evalvault 1.66.0__py3-none-any.whl → 1.67.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -63,6 +63,12 @@ async def lifespan(app: FastAPI):
63
63
  # Startup: Initialize adapter
64
64
  adapter = create_adapter()
65
65
  app.state.adapter = adapter
66
+ try:
67
+ from evalvault.adapters.inbound.api.routers.chat import warm_rag_index
68
+
69
+ await warm_rag_index()
70
+ except Exception as exc:
71
+ logger.warning("RAG preload failed: %s", exc)
66
72
  yield
67
73
  # Shutdown: Cleanup if necessary
68
74
  pass
@@ -1,11 +1,14 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import asyncio
4
+ import hashlib
4
5
  import json
6
+ import logging
5
7
  import os
6
8
  import re
7
9
  import time
8
10
  from collections.abc import AsyncGenerator
11
+ from datetime import UTC, datetime
9
12
  from pathlib import Path
10
13
  from typing import Any
11
14
 
@@ -16,9 +19,15 @@ from pydantic import BaseModel, Field
16
19
 
17
20
  router = APIRouter(tags=["chat"])
18
21
 
22
+ logger = logging.getLogger(__name__)
23
+
19
24
  MCP_URL = os.getenv("EVALVAULT_MCP_URL", "http://localhost:8000/api/v1/mcp")
20
25
  MCP_TOKEN = os.getenv("EVALVAULT_MCP_TOKEN", "mcp-local-dev-token")
21
26
 
27
+ USER_GUIDE_PATH = Path(os.getenv("EVALVAULT_RAG_USER_GUIDE", "docs/guides/USER_GUIDE.md"))
28
+ RAG_INDEX_DIR = Path(os.getenv("EVALVAULT_RAG_INDEX_DIR", "data/rag"))
29
+ RAG_INDEX_PATH = RAG_INDEX_DIR / "user_guide_bm25.json"
30
+
22
31
  _RAG_RETRIEVER = None
23
32
  _RAG_DOCS_COUNT = 0
24
33
  _RAG_TEXTS: list[str] = []
@@ -129,25 +138,98 @@ def _summarize_result(tool_name: str, payload: dict[str, Any]) -> str:
129
138
  return str(payload)
130
139
 
131
140
 
132
- def _load_text_files(root: Path, extensions: tuple[str, ...], limit: int) -> list[str]:
133
- texts: list[str] = []
134
- for path in root.rglob("*"):
135
- if not path.is_file():
136
- continue
137
- if path.suffix.lower() not in extensions:
138
- continue
139
- if limit and len(texts) >= limit:
140
- break
141
- try:
142
- content = path.read_text(encoding="utf-8")
143
- except Exception:
144
- continue
145
- if content.strip():
146
- texts.append(content)
147
- return texts
141
+ def _load_user_guide_text() -> str | None:
142
+ if not USER_GUIDE_PATH.exists():
143
+ logger.warning("USER_GUIDE.md not found at %s", USER_GUIDE_PATH)
144
+ return None
145
+ try:
146
+ content = USER_GUIDE_PATH.read_text(encoding="utf-8")
147
+ except Exception as exc:
148
+ logger.warning("Failed to read USER_GUIDE.md: %s", exc)
149
+ return None
150
+ if not content.strip():
151
+ return None
152
+ return content
153
+
154
+
155
+ def _hash_text(text: str) -> str:
156
+ return hashlib.sha256(text.encode("utf-8")).hexdigest()
157
+
158
+
159
+ def _chunk_user_guide(content: str, chunk_limit: int) -> list[str]:
160
+ try:
161
+ from evalvault.adapters.outbound.nlp.korean.document_chunker import ParagraphChunker
162
+ from evalvault.adapters.outbound.nlp.korean.kiwi_tokenizer import KiwiTokenizer
163
+
164
+ tokenizer = KiwiTokenizer()
165
+ chunker = ParagraphChunker(tokenizer=tokenizer, chunk_size=450, overlap_tokens=80)
166
+ chunks = [
167
+ chunk.text
168
+ for chunk in chunker.chunk_with_metadata(content, source=str(USER_GUIDE_PATH))
169
+ ]
170
+ if chunk_limit > 0:
171
+ return chunks[:chunk_limit]
172
+ return chunks
173
+ except Exception as exc:
174
+ logger.warning("Failed to chunk USER_GUIDE.md, using fallback split: %s", exc)
175
+ paragraphs = [block.strip() for block in content.split("\n\n") if block.strip()]
176
+ if chunk_limit > 0:
177
+ return paragraphs[:chunk_limit]
178
+ return paragraphs
148
179
 
149
180
 
150
- async def _get_rag_retriever():
181
+ def _build_bm25_tokens(texts: list[str]) -> list[list[str]]:
182
+ try:
183
+ from evalvault.adapters.outbound.nlp.korean.kiwi_tokenizer import KiwiTokenizer
184
+
185
+ tokenizer = KiwiTokenizer()
186
+ tokens = []
187
+ for text in texts:
188
+ doc_tokens = tokenizer.tokenize(text)
189
+ if not doc_tokens:
190
+ doc_tokens = re.findall(r"[A-Za-z0-9가-힣]+", text)
191
+ tokens.append(doc_tokens)
192
+ return tokens
193
+ except Exception as exc:
194
+ logger.warning("Failed to tokenize with Kiwi, using regex: %s", exc)
195
+ return [re.findall(r"[A-Za-z0-9가-힣]+", text) for text in texts]
196
+
197
+
198
+ def _load_bm25_index() -> dict[str, Any] | None:
199
+ if not RAG_INDEX_PATH.exists():
200
+ return None
201
+ try:
202
+ payload = json.loads(RAG_INDEX_PATH.read_text(encoding="utf-8"))
203
+ except Exception as exc:
204
+ logger.warning("Failed to read BM25 index: %s", exc)
205
+ return None
206
+ if not isinstance(payload, dict):
207
+ return None
208
+ return payload
209
+
210
+
211
+ def _save_bm25_index(payload: dict[str, Any]) -> None:
212
+ RAG_INDEX_DIR.mkdir(parents=True, exist_ok=True)
213
+ RAG_INDEX_PATH.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
214
+
215
+
216
+ def _build_bm25_index(content: str, chunk_limit: int) -> dict[str, Any] | None:
217
+ chunks = _chunk_user_guide(content, chunk_limit)
218
+ if not chunks:
219
+ return None
220
+ tokens = _build_bm25_tokens(chunks)
221
+ return {
222
+ "version": 1,
223
+ "source": str(USER_GUIDE_PATH),
224
+ "source_hash": _hash_text(content),
225
+ "chunk_limit": chunk_limit,
226
+ "created_at": datetime.now(UTC).isoformat(),
227
+ "documents": chunks,
228
+ "tokens": tokens,
229
+ }
230
+
231
+
232
+ async def _get_rag_retriever() -> tuple[Any | None, int]:
151
233
  global _RAG_RETRIEVER
152
234
  global _RAG_DOCS_COUNT
153
235
  global _RAG_TEXTS
@@ -156,50 +238,51 @@ async def _get_rag_retriever():
156
238
  if _RAG_RETRIEVER is not None:
157
239
  return _RAG_RETRIEVER, _RAG_DOCS_COUNT
158
240
 
159
- if not _RAG_INITIALIZED:
160
- docs_root = Path(os.getenv("EVALVAULT_RAG_DOCS", "docs"))
161
- src_root = Path(os.getenv("EVALVAULT_RAG_SRC", "src"))
162
- docs_limit = int(os.getenv("EVALVAULT_RAG_DOCS_LIMIT", "120"))
163
- src_limit = int(os.getenv("EVALVAULT_RAG_SRC_LIMIT", "120"))
164
-
165
- texts: list[str] = []
166
- if docs_root.exists():
167
- texts.extend(_load_text_files(docs_root, (".md", ".txt"), docs_limit))
168
- if src_root.exists():
169
- texts.extend(_load_text_files(src_root, (".py",), src_limit))
241
+ user_guide_limit = int(os.getenv("EVALVAULT_RAG_USER_GUIDE_LIMIT", "80"))
242
+ content = _load_user_guide_text()
243
+ if content is None:
244
+ return None, 0
245
+ source_hash = _hash_text(content)
246
+
247
+ index_payload = _load_bm25_index()
248
+ if index_payload is None or index_payload.get("source_hash") != source_hash:
249
+ index_payload = _build_bm25_index(content, user_guide_limit)
250
+ if index_payload is None:
251
+ return None, 0
252
+ _save_bm25_index(index_payload)
253
+
254
+ documents = index_payload.get("documents")
255
+ tokens = index_payload.get("tokens")
256
+ if not isinstance(documents, list) or not isinstance(tokens, list):
257
+ return None, 0
170
258
 
171
- _RAG_TEXTS = texts
172
- _RAG_DOCS_COUNT = len(texts)
173
- _RAG_INITIALIZED = True
259
+ _RAG_TEXTS = documents
260
+ _RAG_DOCS_COUNT = len(documents)
261
+ _RAG_INITIALIZED = True
174
262
 
175
263
  if not _RAG_TEXTS:
176
264
  return None, 0
177
265
 
178
- from evalvault.adapters.outbound.llm.ollama_adapter import OllamaAdapter
179
- from evalvault.adapters.outbound.nlp.korean.toolkit_factory import try_create_korean_toolkit
180
- from evalvault.config.settings import Settings
181
-
182
- settings = Settings()
183
- ollama_adapter = OllamaAdapter(settings)
184
- toolkit = try_create_korean_toolkit()
185
- if toolkit is None:
186
- return None, 0
266
+ from evalvault.adapters.outbound.nlp.korean.bm25_retriever import KoreanBM25Retriever
267
+ from evalvault.adapters.outbound.nlp.korean.kiwi_tokenizer import KiwiTokenizer
187
268
 
188
- use_hybrid = os.getenv("EVALVAULT_RAG_USE_HYBRID", "true").lower() == "true"
189
- retriever = toolkit.build_retriever(
190
- documents=_RAG_TEXTS,
191
- use_hybrid=use_hybrid,
192
- ollama_adapter=ollama_adapter if use_hybrid else None,
193
- embedding_profile=os.getenv("EVALVAULT_RAG_EMBEDDING_PROFILE", "dev"),
194
- verbose=False,
195
- )
196
- if retriever is None:
197
- return None, 0
269
+ tokenizer = KiwiTokenizer()
270
+ retriever = KoreanBM25Retriever(tokenizer=tokenizer)
271
+ retriever.index(list(_RAG_TEXTS))
272
+ if tokens and len(tokens) == len(_RAG_TEXTS):
273
+ retriever._tokenized_docs = tokens
198
274
 
199
275
  _RAG_RETRIEVER = retriever
200
276
  return retriever, _RAG_DOCS_COUNT
201
277
 
202
278
 
279
+ async def warm_rag_index() -> None:
280
+ try:
281
+ await _get_rag_retriever()
282
+ except Exception as exc:
283
+ logger.warning("RAG preload failed: %s", exc)
284
+
285
+
203
286
  async def _direct_chat_answer(user_text: str) -> str | None:
204
287
  payload = {
205
288
  "model": os.getenv("OLLAMA_CHAT_MODEL", "gpt-oss-safeguard:20b"),
@@ -351,15 +434,17 @@ async def _resolve_tool_with_llm(user_text: str) -> dict[str, Any] | None:
351
434
 
352
435
 
353
436
  def _extract_json_content(result: Any) -> dict[str, Any] | None:
354
- if isinstance(result, dict) and isinstance(result.get("structuredContent"), dict):
355
- return result.get("structuredContent")
356
-
357
- if hasattr(result, "structuredContent"):
358
- payload = result.structuredContent
359
- if isinstance(payload, dict):
360
- return payload
437
+ if isinstance(result, dict):
438
+ structured = result.get("structuredContent")
439
+ if isinstance(structured, dict):
440
+ return structured
441
+ else:
442
+ if hasattr(result, "structuredContent"):
443
+ payload = result.structuredContent
444
+ if isinstance(payload, dict):
445
+ return payload
361
446
 
362
- if hasattr(result, "content"):
447
+ if not isinstance(result, dict) and hasattr(result, "content"):
363
448
  content = result.content
364
449
  elif isinstance(result, dict):
365
450
  content = result.get("content")
@@ -385,17 +470,6 @@ def _extract_json_content(result: Any) -> dict[str, Any] | None:
385
470
  return None
386
471
  if isinstance(parsed, dict):
387
472
  return parsed
388
- else:
389
- item_type = getattr(item, "type", None)
390
- if item_type == "text":
391
- text = getattr(item, "text", None)
392
- if isinstance(text, str):
393
- try:
394
- parsed = json.loads(text)
395
- except Exception:
396
- return None
397
- if isinstance(parsed, dict):
398
- return parsed
399
473
  return None
400
474
 
401
475
 
@@ -0,0 +1,144 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from dataclasses import asdict
5
+ from typing import Any
6
+
7
+ from fastapi import APIRouter, Depends, HTTPException, Request
8
+ from pydantic import BaseModel, Field
9
+
10
+ from evalvault.adapters.inbound.mcp import tools as mcp_tools
11
+ from evalvault.config.settings import Settings, get_settings
12
+
13
+ router = APIRouter(tags=["mcp"])
14
+
15
+
16
+ class JsonRpcRequest(BaseModel):
17
+ jsonrpc: str = Field("2.0", pattern=r"^2\.0$")
18
+ id: int | str | None = None
19
+ method: str
20
+ params: dict[str, Any] | None = None
21
+
22
+
23
+ def _normalize_tokens(raw_tokens: str | None) -> set[str]:
24
+ if not raw_tokens:
25
+ return set()
26
+ return {token.strip() for token in raw_tokens.split(",") if token.strip()}
27
+
28
+
29
+ def _require_mcp_token(
30
+ request: Request,
31
+ settings: Settings = Depends(get_settings),
32
+ ) -> None:
33
+ if not settings.mcp_enabled:
34
+ raise HTTPException(status_code=404, detail="MCP is disabled")
35
+ tokens = _normalize_tokens(settings.mcp_auth_tokens) or _normalize_tokens(
36
+ settings.api_auth_tokens
37
+ )
38
+ if not tokens:
39
+ raise HTTPException(status_code=401, detail="MCP auth tokens are required")
40
+ auth_header = request.headers.get("Authorization", "")
41
+ if not auth_header.lower().startswith("bearer "):
42
+ raise HTTPException(status_code=401, detail="Invalid or missing MCP token")
43
+ token = auth_header[7:].strip()
44
+ if token not in tokens:
45
+ raise HTTPException(status_code=401, detail="Invalid or missing MCP token")
46
+
47
+
48
+ def _tool_registry() -> dict[str, Any]:
49
+ return {
50
+ "list_runs": mcp_tools.list_runs,
51
+ "get_run_summary": mcp_tools.get_run_summary,
52
+ "run_evaluation": mcp_tools.run_evaluation,
53
+ "analyze_compare": mcp_tools.analyze_compare,
54
+ "get_artifacts": mcp_tools.get_artifacts,
55
+ }
56
+
57
+
58
+ def _allowed_tools(settings: Settings) -> set[str]:
59
+ if settings.mcp_allowed_tools:
60
+ return {name.strip() for name in settings.mcp_allowed_tools.split(",") if name.strip()}
61
+ return set(_tool_registry().keys())
62
+
63
+
64
+ def _serialize_result(result: Any) -> Any:
65
+ if hasattr(result, "model_dump"):
66
+ return result.model_dump()
67
+ if hasattr(result, "dict"):
68
+ return result.dict()
69
+ try:
70
+ return asdict(result)
71
+ except TypeError:
72
+ return result
73
+
74
+
75
+ def _jsonrpc_result(rpc_id: int | str | None, payload: Any) -> dict[str, Any]:
76
+ return {"jsonrpc": "2.0", "id": rpc_id, "result": payload}
77
+
78
+
79
+ def _jsonrpc_error(rpc_id: int | str | None, code: int, message: str) -> dict[str, Any]:
80
+ return {"jsonrpc": "2.0", "id": rpc_id, "error": {"code": code, "message": message}}
81
+
82
+
83
+ @router.post("")
84
+ def handle_mcp_request(
85
+ request: JsonRpcRequest,
86
+ settings: Settings = Depends(get_settings),
87
+ _: None = Depends(_require_mcp_token),
88
+ ) -> dict[str, Any]:
89
+ method = request.method
90
+ params = request.params or {}
91
+
92
+ if method == "initialize":
93
+ return _jsonrpc_result(
94
+ request.id,
95
+ {
96
+ "protocolVersion": settings.mcp_protocol_version,
97
+ "serverInfo": {
98
+ "name": "evalvault-mcp",
99
+ "version": settings.mcp_server_version,
100
+ },
101
+ "capabilities": {"tools": {"listChanged": False}},
102
+ },
103
+ )
104
+
105
+ if method in {"initialized", "notifications/initialized"}:
106
+ return _jsonrpc_result(request.id, None)
107
+
108
+ if method == "tools/list":
109
+ allowed = _allowed_tools(settings)
110
+ tools = [tool for tool in mcp_tools.get_tool_specs() if tool.get("name") in allowed]
111
+ return _jsonrpc_result(request.id, {"tools": tools})
112
+
113
+ if method == "tools/call":
114
+ tool_name = params.get("name")
115
+ tool_args = params.get("arguments") or {}
116
+ if not tool_name:
117
+ return _jsonrpc_error(request.id, -32602, "Missing tool name")
118
+
119
+ allowed = _allowed_tools(settings)
120
+ if tool_name not in allowed:
121
+ return _jsonrpc_error(request.id, -32601, "Tool not allowed")
122
+
123
+ tool_fn = _tool_registry().get(tool_name)
124
+ if tool_fn is None:
125
+ return _jsonrpc_error(request.id, -32601, "Tool not found")
126
+
127
+ try:
128
+ result = tool_fn(tool_args)
129
+ except Exception as exc:
130
+ return _jsonrpc_error(request.id, -32000, f"Tool execution failed: {exc}")
131
+
132
+ payload = _serialize_result(result)
133
+ return _jsonrpc_result(
134
+ request.id,
135
+ {
136
+ "content": [{"type": "text", "text": json.dumps(payload, ensure_ascii=False)}],
137
+ "structuredContent": payload,
138
+ },
139
+ )
140
+
141
+ if method == "ping":
142
+ return _jsonrpc_result(request.id, {"status": "ok"})
143
+
144
+ return _jsonrpc_error(request.id, -32601, "Method not found")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evalvault
3
- Version: 1.66.0
3
+ Version: 1.67.0
4
4
  Summary: RAG evaluation system using Ragas with Phoenix/Langfuse tracing
5
5
  Project-URL: Homepage, https://github.com/ntts9990/EvalVault
6
6
  Project-URL: Documentation, https://github.com/ntts9990/EvalVault#readme
@@ -6,13 +6,14 @@ evalvault/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuF
6
6
  evalvault/adapters/inbound/__init__.py,sha256=SG1svel1PwqetnqVpKFLSv612_WwGwLTbFpYgwk6FMw,166
7
7
  evalvault/adapters/inbound/api/__init__.py,sha256=LeVVttCA3tLKoHA2PO4z3y8VkfVcf3Bq8CZSzo91lf4,34
8
8
  evalvault/adapters/inbound/api/adapter.py,sha256=HgWSYyUxvJPlaSG158WVzpPckpPCYV9Ec3CWN8rLFdI,69118
9
- evalvault/adapters/inbound/api/main.py,sha256=skYtmDngdOBryyLXQpNGlSd2Te6RF6GtfIwcMACPHFU,7068
9
+ evalvault/adapters/inbound/api/main.py,sha256=RPcstctf_mFH9TPUhld6plA0104Kb6Iccb6Cu26oFR8,7271
10
10
  evalvault/adapters/inbound/api/routers/__init__.py,sha256=q07_YF9TnBl68bqcRCvhPU4-zRTyvmPoHVehwO6W7QM,19
11
11
  evalvault/adapters/inbound/api/routers/benchmark.py,sha256=yevntbZcNtMvbVODsITUBgR1Ka4pdFQrXBJJ4K4Jyr4,4477
12
- evalvault/adapters/inbound/api/routers/chat.py,sha256=3S6-ljiY1COlDuVDH5yzMJs9SO0EkuosRcJIYScHWvI,18143
12
+ evalvault/adapters/inbound/api/routers/chat.py,sha256=hCA6rWr5GT_gCqu75uCqYwy2gOEUd85mlcc5y-ruFTY,20661
13
13
  evalvault/adapters/inbound/api/routers/config.py,sha256=LygN0fVMr8NFtj5zuQXnVFhoafx56Txa98vpwtPa4Jc,4104
14
14
  evalvault/adapters/inbound/api/routers/domain.py,sha256=RsR7GIFMjccDN7vpG1uDyk9n1DnCTH18JDGAX7o4Qqc,3648
15
15
  evalvault/adapters/inbound/api/routers/knowledge.py,sha256=yb_e7OEPtwldOAzHTGiWe7jShHw2JdpOFnzGPMceRsg,7109
16
+ evalvault/adapters/inbound/api/routers/mcp.py,sha256=yHANV7qIXig-7YSiQgXzSTuabqFStH5yT3URyQGY2W4,4764
16
17
  evalvault/adapters/inbound/api/routers/pipeline.py,sha256=8UgQzNFHcuqS61s69mOrPee4OMwfxVdvRWHJ2_qYBF0,17175
17
18
  evalvault/adapters/inbound/api/routers/runs.py,sha256=rydOvwWk24QIYafu3XYS3oL_VVCE_jHDmjADhA19T1s,40059
18
19
  evalvault/adapters/inbound/cli/__init__.py,sha256=a42flC5NK-VfbdbBrE49IrUL5zAyKdXZYJVM6E3NTE0,675
@@ -338,8 +339,8 @@ evalvault/reports/__init__.py,sha256=Bb1X4871msAN8I6PM6nKGED3psPwZt88hXZBAOdH06Y
338
339
  evalvault/reports/release_notes.py,sha256=pZj0PBFT-4F_Ty-Kv5P69BuoOnmTCn4kznDcORFJd0w,4011
339
340
  evalvault/scripts/__init__.py,sha256=NwEeIFQbkX4ml2R_PhtIoNtArDSX_suuoymgG_7Kwso,89
340
341
  evalvault/scripts/regression_runner.py,sha256=SxZori5BZ8jVQ057Mf5V5FPgIVDccrV5oRONmnhuk8w,8438
341
- evalvault-1.66.0.dist-info/METADATA,sha256=f6jzeYkN1iuFwYJTcI8r5L52hVNZwACOlQuWYvVz_JY,26159
342
- evalvault-1.66.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
343
- evalvault-1.66.0.dist-info/entry_points.txt,sha256=Oj9Xc5gYcyUYYNmQfWI8NYGw7nN-3M-h2ipHIMlVn6o,65
344
- evalvault-1.66.0.dist-info/licenses/LICENSE.md,sha256=3RNWY4jjtrQ_yYa-D-7I3XO12Ti7YzxsLV_dpykujvo,11358
345
- evalvault-1.66.0.dist-info/RECORD,,
342
+ evalvault-1.67.0.dist-info/METADATA,sha256=8KLerbvqXFjDF3iio6CNKbIr5O3YmnnNg5C91hmXqdo,26159
343
+ evalvault-1.67.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
344
+ evalvault-1.67.0.dist-info/entry_points.txt,sha256=Oj9Xc5gYcyUYYNmQfWI8NYGw7nN-3M-h2ipHIMlVn6o,65
345
+ evalvault-1.67.0.dist-info/licenses/LICENSE.md,sha256=3RNWY4jjtrQ_yYa-D-7I3XO12Ti7YzxsLV_dpykujvo,11358
346
+ evalvault-1.67.0.dist-info/RECORD,,