evalvault 1.66.0__py3-none-any.whl → 1.68.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -63,6 +63,12 @@ async def lifespan(app: FastAPI):
63
63
  # Startup: Initialize adapter
64
64
  adapter = create_adapter()
65
65
  app.state.adapter = adapter
66
+ try:
67
+ from evalvault.adapters.inbound.api.routers.chat import warm_rag_index
68
+
69
+ await warm_rag_index()
70
+ except Exception as exc:
71
+ logger.warning("RAG preload failed: %s", exc)
66
72
  yield
67
73
  # Shutdown: Cleanup if necessary
68
74
  pass
@@ -1,11 +1,14 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import asyncio
4
+ import hashlib
4
5
  import json
6
+ import logging
5
7
  import os
6
8
  import re
7
9
  import time
8
10
  from collections.abc import AsyncGenerator
11
+ from datetime import UTC, datetime
9
12
  from pathlib import Path
10
13
  from typing import Any
11
14
 
@@ -16,9 +19,15 @@ from pydantic import BaseModel, Field
16
19
 
17
20
  router = APIRouter(tags=["chat"])
18
21
 
22
+ logger = logging.getLogger(__name__)
23
+
19
24
  MCP_URL = os.getenv("EVALVAULT_MCP_URL", "http://localhost:8000/api/v1/mcp")
20
25
  MCP_TOKEN = os.getenv("EVALVAULT_MCP_TOKEN", "mcp-local-dev-token")
21
26
 
27
+ USER_GUIDE_PATH = Path(os.getenv("EVALVAULT_RAG_USER_GUIDE", "docs/guides/USER_GUIDE.md"))
28
+ RAG_INDEX_DIR = Path(os.getenv("EVALVAULT_RAG_INDEX_DIR", "data/rag"))
29
+ RAG_INDEX_PATH = RAG_INDEX_DIR / "user_guide_bm25.json"
30
+
22
31
  _RAG_RETRIEVER = None
23
32
  _RAG_DOCS_COUNT = 0
24
33
  _RAG_TEXTS: list[str] = []
@@ -129,25 +138,98 @@ def _summarize_result(tool_name: str, payload: dict[str, Any]) -> str:
129
138
  return str(payload)
130
139
 
131
140
 
132
- def _load_text_files(root: Path, extensions: tuple[str, ...], limit: int) -> list[str]:
133
- texts: list[str] = []
134
- for path in root.rglob("*"):
135
- if not path.is_file():
136
- continue
137
- if path.suffix.lower() not in extensions:
138
- continue
139
- if limit and len(texts) >= limit:
140
- break
141
- try:
142
- content = path.read_text(encoding="utf-8")
143
- except Exception:
144
- continue
145
- if content.strip():
146
- texts.append(content)
147
- return texts
141
+ def _load_user_guide_text() -> str | None:
142
+ if not USER_GUIDE_PATH.exists():
143
+ logger.warning("USER_GUIDE.md not found at %s", USER_GUIDE_PATH)
144
+ return None
145
+ try:
146
+ content = USER_GUIDE_PATH.read_text(encoding="utf-8")
147
+ except Exception as exc:
148
+ logger.warning("Failed to read USER_GUIDE.md: %s", exc)
149
+ return None
150
+ if not content.strip():
151
+ return None
152
+ return content
153
+
154
+
155
+ def _hash_text(text: str) -> str:
156
+ return hashlib.sha256(text.encode("utf-8")).hexdigest()
157
+
158
+
159
+ def _chunk_user_guide(content: str, chunk_limit: int) -> list[str]:
160
+ try:
161
+ from evalvault.adapters.outbound.nlp.korean.document_chunker import ParagraphChunker
162
+ from evalvault.adapters.outbound.nlp.korean.kiwi_tokenizer import KiwiTokenizer
163
+
164
+ tokenizer = KiwiTokenizer()
165
+ chunker = ParagraphChunker(tokenizer=tokenizer, chunk_size=450, overlap_tokens=80)
166
+ chunks = [
167
+ chunk.text
168
+ for chunk in chunker.chunk_with_metadata(content, source=str(USER_GUIDE_PATH))
169
+ ]
170
+ if chunk_limit > 0:
171
+ return chunks[:chunk_limit]
172
+ return chunks
173
+ except Exception as exc:
174
+ logger.warning("Failed to chunk USER_GUIDE.md, using fallback split: %s", exc)
175
+ paragraphs = [block.strip() for block in content.split("\n\n") if block.strip()]
176
+ if chunk_limit > 0:
177
+ return paragraphs[:chunk_limit]
178
+ return paragraphs
148
179
 
149
180
 
150
- async def _get_rag_retriever():
181
+ def _build_bm25_tokens(texts: list[str]) -> list[list[str]]:
182
+ try:
183
+ from evalvault.adapters.outbound.nlp.korean.kiwi_tokenizer import KiwiTokenizer
184
+
185
+ tokenizer = KiwiTokenizer()
186
+ tokens = []
187
+ for text in texts:
188
+ doc_tokens = tokenizer.tokenize(text)
189
+ if not doc_tokens:
190
+ doc_tokens = re.findall(r"[A-Za-z0-9가-힣]+", text)
191
+ tokens.append(doc_tokens)
192
+ return tokens
193
+ except Exception as exc:
194
+ logger.warning("Failed to tokenize with Kiwi, using regex: %s", exc)
195
+ return [re.findall(r"[A-Za-z0-9가-힣]+", text) for text in texts]
196
+
197
+
198
+ def _load_bm25_index() -> dict[str, Any] | None:
199
+ if not RAG_INDEX_PATH.exists():
200
+ return None
201
+ try:
202
+ payload = json.loads(RAG_INDEX_PATH.read_text(encoding="utf-8"))
203
+ except Exception as exc:
204
+ logger.warning("Failed to read BM25 index: %s", exc)
205
+ return None
206
+ if not isinstance(payload, dict):
207
+ return None
208
+ return payload
209
+
210
+
211
+ def _save_bm25_index(payload: dict[str, Any]) -> None:
212
+ RAG_INDEX_DIR.mkdir(parents=True, exist_ok=True)
213
+ RAG_INDEX_PATH.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
214
+
215
+
216
+ def _build_bm25_index(content: str, chunk_limit: int) -> dict[str, Any] | None:
217
+ chunks = _chunk_user_guide(content, chunk_limit)
218
+ if not chunks:
219
+ return None
220
+ tokens = _build_bm25_tokens(chunks)
221
+ return {
222
+ "version": 1,
223
+ "source": str(USER_GUIDE_PATH),
224
+ "source_hash": _hash_text(content),
225
+ "chunk_limit": chunk_limit,
226
+ "created_at": datetime.now(UTC).isoformat(),
227
+ "documents": chunks,
228
+ "tokens": tokens,
229
+ }
230
+
231
+
232
+ async def _get_rag_retriever() -> tuple[Any | None, int]:
151
233
  global _RAG_RETRIEVER
152
234
  global _RAG_DOCS_COUNT
153
235
  global _RAG_TEXTS
@@ -156,50 +238,51 @@ async def _get_rag_retriever():
156
238
  if _RAG_RETRIEVER is not None:
157
239
  return _RAG_RETRIEVER, _RAG_DOCS_COUNT
158
240
 
159
- if not _RAG_INITIALIZED:
160
- docs_root = Path(os.getenv("EVALVAULT_RAG_DOCS", "docs"))
161
- src_root = Path(os.getenv("EVALVAULT_RAG_SRC", "src"))
162
- docs_limit = int(os.getenv("EVALVAULT_RAG_DOCS_LIMIT", "120"))
163
- src_limit = int(os.getenv("EVALVAULT_RAG_SRC_LIMIT", "120"))
164
-
165
- texts: list[str] = []
166
- if docs_root.exists():
167
- texts.extend(_load_text_files(docs_root, (".md", ".txt"), docs_limit))
168
- if src_root.exists():
169
- texts.extend(_load_text_files(src_root, (".py",), src_limit))
241
+ user_guide_limit = int(os.getenv("EVALVAULT_RAG_USER_GUIDE_LIMIT", "80"))
242
+ content = _load_user_guide_text()
243
+ if content is None:
244
+ return None, 0
245
+ source_hash = _hash_text(content)
246
+
247
+ index_payload = _load_bm25_index()
248
+ if index_payload is None or index_payload.get("source_hash") != source_hash:
249
+ index_payload = _build_bm25_index(content, user_guide_limit)
250
+ if index_payload is None:
251
+ return None, 0
252
+ _save_bm25_index(index_payload)
253
+
254
+ documents = index_payload.get("documents")
255
+ tokens = index_payload.get("tokens")
256
+ if not isinstance(documents, list) or not isinstance(tokens, list):
257
+ return None, 0
170
258
 
171
- _RAG_TEXTS = texts
172
- _RAG_DOCS_COUNT = len(texts)
173
- _RAG_INITIALIZED = True
259
+ _RAG_TEXTS = documents
260
+ _RAG_DOCS_COUNT = len(documents)
261
+ _RAG_INITIALIZED = True
174
262
 
175
263
  if not _RAG_TEXTS:
176
264
  return None, 0
177
265
 
178
- from evalvault.adapters.outbound.llm.ollama_adapter import OllamaAdapter
179
- from evalvault.adapters.outbound.nlp.korean.toolkit_factory import try_create_korean_toolkit
180
- from evalvault.config.settings import Settings
181
-
182
- settings = Settings()
183
- ollama_adapter = OllamaAdapter(settings)
184
- toolkit = try_create_korean_toolkit()
185
- if toolkit is None:
186
- return None, 0
266
+ from evalvault.adapters.outbound.nlp.korean.bm25_retriever import KoreanBM25Retriever
267
+ from evalvault.adapters.outbound.nlp.korean.kiwi_tokenizer import KiwiTokenizer
187
268
 
188
- use_hybrid = os.getenv("EVALVAULT_RAG_USE_HYBRID", "true").lower() == "true"
189
- retriever = toolkit.build_retriever(
190
- documents=_RAG_TEXTS,
191
- use_hybrid=use_hybrid,
192
- ollama_adapter=ollama_adapter if use_hybrid else None,
193
- embedding_profile=os.getenv("EVALVAULT_RAG_EMBEDDING_PROFILE", "dev"),
194
- verbose=False,
195
- )
196
- if retriever is None:
197
- return None, 0
269
+ tokenizer = KiwiTokenizer()
270
+ retriever = KoreanBM25Retriever(tokenizer=tokenizer)
271
+ retriever.index(list(_RAG_TEXTS))
272
+ if tokens and len(tokens) == len(_RAG_TEXTS):
273
+ retriever._tokenized_docs = tokens
198
274
 
199
275
  _RAG_RETRIEVER = retriever
200
276
  return retriever, _RAG_DOCS_COUNT
201
277
 
202
278
 
279
+ async def warm_rag_index() -> None:
280
+ try:
281
+ await _get_rag_retriever()
282
+ except Exception as exc:
283
+ logger.warning("RAG preload failed: %s", exc)
284
+
285
+
203
286
  async def _direct_chat_answer(user_text: str) -> str | None:
204
287
  payload = {
205
288
  "model": os.getenv("OLLAMA_CHAT_MODEL", "gpt-oss-safeguard:20b"),
@@ -351,15 +434,17 @@ async def _resolve_tool_with_llm(user_text: str) -> dict[str, Any] | None:
351
434
 
352
435
 
353
436
  def _extract_json_content(result: Any) -> dict[str, Any] | None:
354
- if isinstance(result, dict) and isinstance(result.get("structuredContent"), dict):
355
- return result.get("structuredContent")
356
-
357
- if hasattr(result, "structuredContent"):
358
- payload = result.structuredContent
359
- if isinstance(payload, dict):
360
- return payload
437
+ if isinstance(result, dict):
438
+ structured = result.get("structuredContent")
439
+ if isinstance(structured, dict):
440
+ return structured
441
+ else:
442
+ if hasattr(result, "structuredContent"):
443
+ payload = result.structuredContent
444
+ if isinstance(payload, dict):
445
+ return payload
361
446
 
362
- if hasattr(result, "content"):
447
+ if not isinstance(result, dict) and hasattr(result, "content"):
363
448
  content = result.content
364
449
  elif isinstance(result, dict):
365
450
  content = result.get("content")
@@ -385,17 +470,6 @@ def _extract_json_content(result: Any) -> dict[str, Any] | None:
385
470
  return None
386
471
  if isinstance(parsed, dict):
387
472
  return parsed
388
- else:
389
- item_type = getattr(item, "type", None)
390
- if item_type == "text":
391
- text = getattr(item, "text", None)
392
- if isinstance(text, str):
393
- try:
394
- parsed = json.loads(text)
395
- except Exception:
396
- return None
397
- if isinstance(parsed, dict):
398
- return parsed
399
473
  return None
400
474
 
401
475
 
@@ -0,0 +1,144 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from dataclasses import asdict
5
+ from typing import Any
6
+
7
+ from fastapi import APIRouter, Depends, HTTPException, Request
8
+ from pydantic import BaseModel, Field
9
+
10
+ from evalvault.adapters.inbound.mcp import tools as mcp_tools
11
+ from evalvault.config.settings import Settings, get_settings
12
+
13
+ router = APIRouter(tags=["mcp"])
14
+
15
+
16
+ class JsonRpcRequest(BaseModel):
17
+ jsonrpc: str = Field("2.0", pattern=r"^2\.0$")
18
+ id: int | str | None = None
19
+ method: str
20
+ params: dict[str, Any] | None = None
21
+
22
+
23
+ def _normalize_tokens(raw_tokens: str | None) -> set[str]:
24
+ if not raw_tokens:
25
+ return set()
26
+ return {token.strip() for token in raw_tokens.split(",") if token.strip()}
27
+
28
+
29
+ def _require_mcp_token(
30
+ request: Request,
31
+ settings: Settings = Depends(get_settings),
32
+ ) -> None:
33
+ if not settings.mcp_enabled:
34
+ raise HTTPException(status_code=404, detail="MCP is disabled")
35
+ tokens = _normalize_tokens(settings.mcp_auth_tokens) or _normalize_tokens(
36
+ settings.api_auth_tokens
37
+ )
38
+ if not tokens:
39
+ raise HTTPException(status_code=401, detail="MCP auth tokens are required")
40
+ auth_header = request.headers.get("Authorization", "")
41
+ if not auth_header.lower().startswith("bearer "):
42
+ raise HTTPException(status_code=401, detail="Invalid or missing MCP token")
43
+ token = auth_header[7:].strip()
44
+ if token not in tokens:
45
+ raise HTTPException(status_code=401, detail="Invalid or missing MCP token")
46
+
47
+
48
+ def _tool_registry() -> dict[str, Any]:
49
+ return {
50
+ "list_runs": mcp_tools.list_runs,
51
+ "get_run_summary": mcp_tools.get_run_summary,
52
+ "run_evaluation": mcp_tools.run_evaluation,
53
+ "analyze_compare": mcp_tools.analyze_compare,
54
+ "get_artifacts": mcp_tools.get_artifacts,
55
+ }
56
+
57
+
58
+ def _allowed_tools(settings: Settings) -> set[str]:
59
+ if settings.mcp_allowed_tools:
60
+ return {name.strip() for name in settings.mcp_allowed_tools.split(",") if name.strip()}
61
+ return set(_tool_registry().keys())
62
+
63
+
64
+ def _serialize_result(result: Any) -> Any:
65
+ if hasattr(result, "model_dump"):
66
+ return result.model_dump()
67
+ if hasattr(result, "dict"):
68
+ return result.dict()
69
+ try:
70
+ return asdict(result)
71
+ except TypeError:
72
+ return result
73
+
74
+
75
+ def _jsonrpc_result(rpc_id: int | str | None, payload: Any) -> dict[str, Any]:
76
+ return {"jsonrpc": "2.0", "id": rpc_id, "result": payload}
77
+
78
+
79
+ def _jsonrpc_error(rpc_id: int | str | None, code: int, message: str) -> dict[str, Any]:
80
+ return {"jsonrpc": "2.0", "id": rpc_id, "error": {"code": code, "message": message}}
81
+
82
+
83
+ @router.post("")
84
+ def handle_mcp_request(
85
+ request: JsonRpcRequest,
86
+ settings: Settings = Depends(get_settings),
87
+ _: None = Depends(_require_mcp_token),
88
+ ) -> dict[str, Any]:
89
+ method = request.method
90
+ params = request.params or {}
91
+
92
+ if method == "initialize":
93
+ return _jsonrpc_result(
94
+ request.id,
95
+ {
96
+ "protocolVersion": settings.mcp_protocol_version,
97
+ "serverInfo": {
98
+ "name": "evalvault-mcp",
99
+ "version": settings.mcp_server_version,
100
+ },
101
+ "capabilities": {"tools": {"listChanged": False}},
102
+ },
103
+ )
104
+
105
+ if method in {"initialized", "notifications/initialized"}:
106
+ return _jsonrpc_result(request.id, None)
107
+
108
+ if method == "tools/list":
109
+ allowed = _allowed_tools(settings)
110
+ tools = [tool for tool in mcp_tools.get_tool_specs() if tool.get("name") in allowed]
111
+ return _jsonrpc_result(request.id, {"tools": tools})
112
+
113
+ if method == "tools/call":
114
+ tool_name = params.get("name")
115
+ tool_args = params.get("arguments") or {}
116
+ if not tool_name:
117
+ return _jsonrpc_error(request.id, -32602, "Missing tool name")
118
+
119
+ allowed = _allowed_tools(settings)
120
+ if tool_name not in allowed:
121
+ return _jsonrpc_error(request.id, -32601, "Tool not allowed")
122
+
123
+ tool_fn = _tool_registry().get(tool_name)
124
+ if tool_fn is None:
125
+ return _jsonrpc_error(request.id, -32601, "Tool not found")
126
+
127
+ try:
128
+ result = tool_fn(tool_args)
129
+ except Exception as exc:
130
+ return _jsonrpc_error(request.id, -32000, f"Tool execution failed: {exc}")
131
+
132
+ payload = _serialize_result(result)
133
+ return _jsonrpc_result(
134
+ request.id,
135
+ {
136
+ "content": [{"type": "text", "text": json.dumps(payload, ensure_ascii=False)}],
137
+ "structuredContent": payload,
138
+ },
139
+ )
140
+
141
+ if method == "ping":
142
+ return _jsonrpc_result(request.id, {"status": "ok"})
143
+
144
+ return _jsonrpc_error(request.id, -32601, "Method not found")
@@ -3,6 +3,7 @@
3
3
  import json
4
4
  from abc import ABC, abstractmethod
5
5
  from pathlib import Path
6
+ from typing import Any
6
7
 
7
8
  from evalvault.domain.entities.dataset import Dataset
8
9
 
@@ -118,6 +119,45 @@ class BaseDatasetLoader(ABC):
118
119
  # Fall back to pipe-separated format
119
120
  return [ctx.strip() for ctx in contexts_str.split("|")]
120
121
 
122
+ def _parse_metadata_cell(self, raw: Any) -> dict[str, Any]:
123
+ if raw is None or (isinstance(raw, float) and str(raw) == "nan"):
124
+ return {}
125
+ text = str(raw).strip()
126
+ if not text:
127
+ return {}
128
+ try:
129
+ parsed = json.loads(text)
130
+ except json.JSONDecodeError as exc:
131
+ raise ValueError("Invalid metadata JSON") from exc
132
+ if not isinstance(parsed, dict):
133
+ raise ValueError("metadata must be a JSON object")
134
+ return parsed
135
+
136
+ def _parse_summary_tags_cell(self, raw: Any) -> list[str]:
137
+ if raw is None or (isinstance(raw, float) and str(raw) == "nan"):
138
+ return []
139
+ if isinstance(raw, list):
140
+ return [str(item).strip().lower() for item in raw if str(item).strip()]
141
+ text = str(raw).strip()
142
+ if not text:
143
+ return []
144
+ if text.startswith("["):
145
+ try:
146
+ parsed = json.loads(text)
147
+ except json.JSONDecodeError:
148
+ parsed = None
149
+ if isinstance(parsed, list):
150
+ return [str(item).strip().lower() for item in parsed if str(item).strip()]
151
+ delimiter = "," if "," in text else "|" if "|" in text else None
152
+ parts = text.split(delimiter) if delimiter else [text]
153
+ return [part.strip().lower() for part in parts if part.strip()]
154
+
155
+ def _parse_summary_intent_cell(self, raw: Any) -> str | None:
156
+ if raw is None or (isinstance(raw, float) and str(raw) == "nan"):
157
+ return None
158
+ text = str(raw).strip()
159
+ return text or None
160
+
121
161
  def _get_default_name(self, file_path: Path) -> str:
122
162
  """Get default dataset name from file path.
123
163
 
@@ -123,12 +123,28 @@ class CSVDatasetLoader(BaseDatasetLoader):
123
123
  else None
124
124
  )
125
125
 
126
+ metadata = {}
127
+ if "metadata" in df.columns:
128
+ try:
129
+ metadata = self._parse_metadata_cell(row["metadata"])
130
+ except ValueError as exc:
131
+ raise ValueError(f"Test case {row['id']}: {exc}") from exc
132
+ if "summary_tags" in df.columns:
133
+ tags = self._parse_summary_tags_cell(row["summary_tags"])
134
+ if tags:
135
+ metadata["summary_tags"] = tags
136
+ if "summary_intent" in df.columns:
137
+ intent = self._parse_summary_intent_cell(row["summary_intent"])
138
+ if intent:
139
+ metadata["summary_intent"] = intent
140
+
126
141
  test_case = TestCase(
127
142
  id=str(row["id"]),
128
143
  question=str(row["question"]),
129
144
  answer=str(row["answer"]),
130
145
  contexts=contexts,
131
146
  ground_truth=ground_truth,
147
+ metadata=metadata,
132
148
  )
133
149
  test_cases.append(test_case)
134
150
 
@@ -96,12 +96,28 @@ class ExcelDatasetLoader(BaseDatasetLoader):
96
96
  else None
97
97
  )
98
98
 
99
+ metadata = {}
100
+ if "metadata" in df.columns:
101
+ try:
102
+ metadata = self._parse_metadata_cell(row["metadata"])
103
+ except ValueError as exc:
104
+ raise ValueError(f"Test case {row['id']}: {exc}") from exc
105
+ if "summary_tags" in df.columns:
106
+ tags = self._parse_summary_tags_cell(row["summary_tags"])
107
+ if tags:
108
+ metadata["summary_tags"] = tags
109
+ if "summary_intent" in df.columns:
110
+ intent = self._parse_summary_intent_cell(row["summary_intent"])
111
+ if intent:
112
+ metadata["summary_intent"] = intent
113
+
99
114
  test_case = TestCase(
100
115
  id=str(row["id"]),
101
116
  question=str(row["question"]),
102
117
  answer=str(row["answer"]),
103
118
  contexts=contexts,
104
119
  ground_truth=ground_truth,
120
+ metadata=metadata,
105
121
  )
106
122
  test_cases.append(test_case)
107
123
 
@@ -424,9 +424,13 @@ def apply_profile(settings: Settings, profile_name: str) -> Settings:
424
424
  """
425
425
  from evalvault.config.model_config import get_model_config
426
426
 
427
+ normalized = profile_name.strip() if isinstance(profile_name, str) else profile_name
428
+ if not normalized:
429
+ return settings
430
+
427
431
  try:
428
432
  model_config = get_model_config()
429
- profile = model_config.get_profile(profile_name)
433
+ profile = model_config.get_profile(normalized)
430
434
 
431
435
  # LLM 설정 적용 (모델명과 provider만)
432
436
  settings.llm_provider = profile.llm.provider
@@ -449,9 +453,16 @@ def apply_profile(settings: Settings, profile_name: str) -> Settings:
449
453
  elif profile.embedding.provider == "vllm":
450
454
  settings.vllm_embedding_model = profile.embedding.model
451
455
 
452
- except FileNotFoundError:
453
- # 설정 파일이 없으면 프로필 무시
454
- pass
456
+ except FileNotFoundError as exc:
457
+ raise ValueError(
458
+ "Model profile config not found. Create 'config/models.yaml' or 'evalvault.yaml' "
459
+ f"to use profile '{normalized}'."
460
+ ) from exc
461
+ except KeyError as exc:
462
+ available = ", ".join(sorted(model_config.profiles.keys()))
463
+ raise ValueError(
464
+ f"Unknown profile '{normalized}'. Available profiles: {available}"
465
+ ) from exc
455
466
 
456
467
  return settings
457
468
 
@@ -60,18 +60,16 @@ class StageEvent:
60
60
 
61
61
  @classmethod
62
62
  def from_dict(cls, payload: dict[str, Any]) -> StageEvent:
63
- if "run_id" not in payload:
64
- raise ValueError("StageEvent requires 'run_id'")
65
- if "stage_type" not in payload:
66
- raise ValueError("StageEvent requires 'stage_type'")
63
+ run_id = _require_str(payload, "run_id")
64
+ stage_type = _normalize_stage_type(payload)
67
65
 
68
66
  trace_payload = payload.get("trace") or {}
69
67
  input_ref = _parse_payload_ref(payload.get("input_ref"))
70
68
  output_ref = _parse_payload_ref(payload.get("output_ref"))
71
69
 
72
70
  return cls(
73
- run_id=str(payload["run_id"]),
74
- stage_type=str(payload["stage_type"]),
71
+ run_id=run_id,
72
+ stage_type=stage_type,
75
73
  stage_id=str(payload.get("stage_id") or uuid4()),
76
74
  stage_name=_optional_str(payload.get("stage_name")),
77
75
  parent_stage_id=_optional_str(payload.get("parent_stage_id")),
@@ -187,6 +185,24 @@ def _parse_datetime(value: Any) -> datetime | None:
187
185
  raise ValueError("Invalid datetime value")
188
186
 
189
187
 
188
+ def _require_str(payload: dict[str, Any], key: str) -> str:
189
+ if key not in payload:
190
+ raise ValueError(f"StageEvent requires '{key}'")
191
+ value = str(payload.get(key, "")).strip()
192
+ if not value:
193
+ raise ValueError(f"StageEvent requires non-empty '{key}'")
194
+ return value
195
+
196
+
197
+ def _normalize_stage_type(payload: dict[str, Any]) -> str:
198
+ if "stage_type" not in payload:
199
+ raise ValueError("StageEvent requires 'stage_type'")
200
+ value = str(payload.get("stage_type", "")).strip()
201
+ if not value:
202
+ raise ValueError("StageEvent requires non-empty 'stage_type'")
203
+ return value.lower()
204
+
205
+
190
206
  @overload
191
207
  def _ensure_dict(value: None, *, allow_none: Literal[True]) -> None: ...
192
208
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evalvault
3
- Version: 1.66.0
3
+ Version: 1.68.0
4
4
  Summary: RAG evaluation system using Ragas with Phoenix/Langfuse tracing
5
5
  Project-URL: Homepage, https://github.com/ntts9990/EvalVault
6
6
  Project-URL: Documentation, https://github.com/ntts9990/EvalVault#readme
@@ -6,13 +6,14 @@ evalvault/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuF
6
6
  evalvault/adapters/inbound/__init__.py,sha256=SG1svel1PwqetnqVpKFLSv612_WwGwLTbFpYgwk6FMw,166
7
7
  evalvault/adapters/inbound/api/__init__.py,sha256=LeVVttCA3tLKoHA2PO4z3y8VkfVcf3Bq8CZSzo91lf4,34
8
8
  evalvault/adapters/inbound/api/adapter.py,sha256=HgWSYyUxvJPlaSG158WVzpPckpPCYV9Ec3CWN8rLFdI,69118
9
- evalvault/adapters/inbound/api/main.py,sha256=skYtmDngdOBryyLXQpNGlSd2Te6RF6GtfIwcMACPHFU,7068
9
+ evalvault/adapters/inbound/api/main.py,sha256=RPcstctf_mFH9TPUhld6plA0104Kb6Iccb6Cu26oFR8,7271
10
10
  evalvault/adapters/inbound/api/routers/__init__.py,sha256=q07_YF9TnBl68bqcRCvhPU4-zRTyvmPoHVehwO6W7QM,19
11
11
  evalvault/adapters/inbound/api/routers/benchmark.py,sha256=yevntbZcNtMvbVODsITUBgR1Ka4pdFQrXBJJ4K4Jyr4,4477
12
- evalvault/adapters/inbound/api/routers/chat.py,sha256=3S6-ljiY1COlDuVDH5yzMJs9SO0EkuosRcJIYScHWvI,18143
12
+ evalvault/adapters/inbound/api/routers/chat.py,sha256=hCA6rWr5GT_gCqu75uCqYwy2gOEUd85mlcc5y-ruFTY,20661
13
13
  evalvault/adapters/inbound/api/routers/config.py,sha256=LygN0fVMr8NFtj5zuQXnVFhoafx56Txa98vpwtPa4Jc,4104
14
14
  evalvault/adapters/inbound/api/routers/domain.py,sha256=RsR7GIFMjccDN7vpG1uDyk9n1DnCTH18JDGAX7o4Qqc,3648
15
15
  evalvault/adapters/inbound/api/routers/knowledge.py,sha256=yb_e7OEPtwldOAzHTGiWe7jShHw2JdpOFnzGPMceRsg,7109
16
+ evalvault/adapters/inbound/api/routers/mcp.py,sha256=yHANV7qIXig-7YSiQgXzSTuabqFStH5yT3URyQGY2W4,4764
16
17
  evalvault/adapters/inbound/api/routers/pipeline.py,sha256=8UgQzNFHcuqS61s69mOrPee4OMwfxVdvRWHJ2_qYBF0,17175
17
18
  evalvault/adapters/inbound/api/routers/runs.py,sha256=rydOvwWk24QIYafu3XYS3oL_VVCE_jHDmjADhA19T1s,40059
18
19
  evalvault/adapters/inbound/cli/__init__.py,sha256=a42flC5NK-VfbdbBrE49IrUL5zAyKdXZYJVM6E3NTE0,675
@@ -117,9 +118,9 @@ evalvault/adapters/outbound/cache/__init__.py,sha256=LcsKzxnx1AnAwS07iSCdws11CfE
117
118
  evalvault/adapters/outbound/cache/hybrid_cache.py,sha256=AVhctQVOIbQWwvn_K0kxSq3lkhucuM7tezmSkPDbCrA,12711
118
119
  evalvault/adapters/outbound/cache/memory_cache.py,sha256=jvjIgXp7YRj08_AzBFaJ58jjXNzUlYbG_zX6fQJP4C0,3533
119
120
  evalvault/adapters/outbound/dataset/__init__.py,sha256=SDFnjmieEgz0uH5MpdXx8pmjnIMjRLkMFmFioMxCju0,1183
120
- evalvault/adapters/outbound/dataset/base.py,sha256=9ExRuKa3ZHdkeyK2SiBrWxZFA7DGo98ox9FVKizPFA8,3923
121
- evalvault/adapters/outbound/dataset/csv_loader.py,sha256=Zb-FhWxyauvcdkxLhFJWSAgSjkaKaMFbkATg-W2OGzc,4870
122
- evalvault/adapters/outbound/dataset/excel_loader.py,sha256=Am6T23MQVLs_HoQ_r2T-x7j5chFNXtP1opTuaodFanY,3769
121
+ evalvault/adapters/outbound/dataset/base.py,sha256=4rxpQgxpFty0G5XRv1SP-XJ9mpZ9YO6PAMDgp71JiJQ,5547
122
+ evalvault/adapters/outbound/dataset/csv_loader.py,sha256=xHg2QadMvLfHTHzeex6WxXmagLJog3LN-ui6dFxD8HY,5595
123
+ evalvault/adapters/outbound/dataset/excel_loader.py,sha256=MUl-63r1s1GjVVmDgdag1DpMJvIVX_agGx20NQzEZN8,4494
123
124
  evalvault/adapters/outbound/dataset/json_loader.py,sha256=4wG7APg1LLADPxJ-wQZo2zBcvVX12sqo9VUIb-0Kww4,4923
124
125
  evalvault/adapters/outbound/dataset/loader_factory.py,sha256=32sjGuW2Yta12lpKy4DLH4I5B4Pi-YuHTvGG1Pr4VAk,1361
125
126
  evalvault/adapters/outbound/dataset/method_input_loader.py,sha256=d7pB4OPvvr-q-Y5DlvjX3X719jCCQ2vRDfT_ov0dUFU,3833
@@ -206,7 +207,7 @@ evalvault/config/langfuse_support.py,sha256=DEzVMfMGGf1V45W_2oUG-NCDfsYI4UUdnYJI
206
207
  evalvault/config/model_config.py,sha256=KlzDbGyDLeOGE7ElekFFk5YjjT5u8i6KO2B4EyZkLnI,3542
207
208
  evalvault/config/phoenix_support.py,sha256=e6RPWd6Qb7KU6Q8pLaYTpJGWULtvEEU6B0xHWyVyOH0,13604
208
209
  evalvault/config/secret_manager.py,sha256=YjPMuNqeBrAR2BzCJvsBNUExaU4TBSFyZ8kVYZZifqA,4172
209
- evalvault/config/settings.py,sha256=xvoNma4CHAd8R_nF0DL4MUWXBWCR5M0C68NPSPLT5JQ,18285
210
+ evalvault/config/settings.py,sha256=DY170XUoMo8yQx8_CJjPt96QsGg7tyTx5wJ-ptcfdY0,18766
210
211
  evalvault/config/playbooks/improvement_playbook.yaml,sha256=9F9WVVCydFfz6zUuGYzZ4PKdW1LLtcBKVF36T7xT764,26965
211
212
  evalvault/domain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
212
213
  evalvault/domain/entities/__init__.py,sha256=wszRJ1Imdc5NJ1bQPC2udk-mAgFdlw4uZV5IPNjLpHQ,3669
@@ -227,7 +228,7 @@ evalvault/domain/entities/prompt.py,sha256=lQlRnHEKY69GWTC-cUIu0DMuPfJ9UWm6Sm4KT
227
228
  evalvault/domain/entities/prompt_suggestion.py,sha256=Ep_XSjdYUj7pFSCMyeeZKs8yTnp74AVx05Zqr7829PE,1243
228
229
  evalvault/domain/entities/rag_trace.py,sha256=sZgnkG4fK6KOe3Np6TYAZ_tPnsRbOmucDSQns35U1n4,11868
229
230
  evalvault/domain/entities/result.py,sha256=OaGHMDLWMW2O4fNVuVTUvWFVBQ1iu93OD_oI3NumrCQ,10697
230
- evalvault/domain/entities/stage.py,sha256=UqS59sjoMs_bhMupNtvagbIx8QgHgFjWoRPhJ3uJP2s,7426
231
+ evalvault/domain/entities/stage.py,sha256=KyR-v3tyusPJ7pfTXtHE2_23tVvNSRU9Q1RT-R5akXg,7914
231
232
  evalvault/domain/metrics/__init__.py,sha256=Ros3CWg5in1xlEdMa0WUSG602SBVkxw2Zbro-XUlmxU,1214
232
233
  evalvault/domain/metrics/analysis_registry.py,sha256=JZpBrBs7-JExHKYuEML6Vg_uYLm-WniBE3BfiU5OtJg,7641
233
234
  evalvault/domain/metrics/confidence.py,sha256=AX4oeN28OvmMkwD0pT-jskkOlXh87C1pe2W9P1sF69g,17224
@@ -338,8 +339,8 @@ evalvault/reports/__init__.py,sha256=Bb1X4871msAN8I6PM6nKGED3psPwZt88hXZBAOdH06Y
338
339
  evalvault/reports/release_notes.py,sha256=pZj0PBFT-4F_Ty-Kv5P69BuoOnmTCn4kznDcORFJd0w,4011
339
340
  evalvault/scripts/__init__.py,sha256=NwEeIFQbkX4ml2R_PhtIoNtArDSX_suuoymgG_7Kwso,89
340
341
  evalvault/scripts/regression_runner.py,sha256=SxZori5BZ8jVQ057Mf5V5FPgIVDccrV5oRONmnhuk8w,8438
341
- evalvault-1.66.0.dist-info/METADATA,sha256=f6jzeYkN1iuFwYJTcI8r5L52hVNZwACOlQuWYvVz_JY,26159
342
- evalvault-1.66.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
343
- evalvault-1.66.0.dist-info/entry_points.txt,sha256=Oj9Xc5gYcyUYYNmQfWI8NYGw7nN-3M-h2ipHIMlVn6o,65
344
- evalvault-1.66.0.dist-info/licenses/LICENSE.md,sha256=3RNWY4jjtrQ_yYa-D-7I3XO12Ti7YzxsLV_dpykujvo,11358
345
- evalvault-1.66.0.dist-info/RECORD,,
342
+ evalvault-1.68.0.dist-info/METADATA,sha256=bEWK-9BGROeWrWf3kNoGytr-GbAa2gzLCDZ1PwWBzEM,26159
343
+ evalvault-1.68.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
344
+ evalvault-1.68.0.dist-info/entry_points.txt,sha256=Oj9Xc5gYcyUYYNmQfWI8NYGw7nN-3M-h2ipHIMlVn6o,65
345
+ evalvault-1.68.0.dist-info/licenses/LICENSE.md,sha256=3RNWY4jjtrQ_yYa-D-7I3XO12Ti7YzxsLV_dpykujvo,11358
346
+ evalvault-1.68.0.dist-info/RECORD,,