evalvault 1.76.0__py3-none-any.whl → 1.77.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evalvault/adapters/inbound/api/adapter.py +24 -1
- evalvault/adapters/inbound/api/main.py +2 -0
- evalvault/adapters/inbound/cli/app.py +3 -0
- evalvault/adapters/inbound/cli/commands/analyze.py +6 -1
- evalvault/adapters/inbound/cli/commands/method.py +1 -1
- evalvault/adapters/inbound/cli/commands/run.py +9 -4
- evalvault/adapters/inbound/cli/commands/run_helpers.py +18 -16
- evalvault/adapters/outbound/analysis/llm_report_module.py +515 -33
- evalvault/adapters/outbound/phoenix/sync_service.py +1 -1
- evalvault/adapters/outbound/report/markdown_adapter.py +92 -0
- evalvault/adapters/outbound/storage/factory.py +1 -4
- evalvault/adapters/outbound/tracker/phoenix_adapter.py +25 -8
- evalvault/config/runtime_services.py +122 -0
- {evalvault-1.76.0.dist-info → evalvault-1.77.0.dist-info}/METADATA +2 -1
- {evalvault-1.76.0.dist-info → evalvault-1.77.0.dist-info}/RECORD +18 -17
- {evalvault-1.76.0.dist-info → evalvault-1.77.0.dist-info}/WHEEL +0 -0
- {evalvault-1.76.0.dist-info → evalvault-1.77.0.dist-info}/entry_points.txt +0 -0
- {evalvault-1.76.0.dist-info → evalvault-1.77.0.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -15,7 +15,7 @@ class PhoenixSyncError(RuntimeError):
|
|
|
15
15
|
def _normalize_base_url(endpoint: str) -> str:
|
|
16
16
|
"""Convert OTLP endpoint (…/v1/traces) to Phoenix REST base URL."""
|
|
17
17
|
|
|
18
|
-
if not endpoint:
|
|
18
|
+
if not isinstance(endpoint, str) or not endpoint:
|
|
19
19
|
return "http://localhost:6006"
|
|
20
20
|
base = endpoint.strip()
|
|
21
21
|
suffix = "/v1/traces"
|
|
@@ -50,6 +50,15 @@ class MarkdownReportAdapter:
|
|
|
50
50
|
# 통계 분석
|
|
51
51
|
if bundle.statistical:
|
|
52
52
|
sections.append(self._generate_statistical_section(bundle.statistical))
|
|
53
|
+
sections.append(self._generate_reason_section(bundle.statistical))
|
|
54
|
+
sections.append(self._generate_meaning_section(bundle.statistical))
|
|
55
|
+
sections.append(self._generate_dataset_delta_section(bundle.statistical))
|
|
56
|
+
sections.append(self._generate_improvement_plan_section(bundle.statistical))
|
|
57
|
+
else:
|
|
58
|
+
sections.append(self._generate_reason_section(None))
|
|
59
|
+
sections.append(self._generate_meaning_section(None))
|
|
60
|
+
sections.append(self._generate_dataset_delta_section(None))
|
|
61
|
+
sections.append(self._generate_improvement_plan_section(None))
|
|
53
62
|
|
|
54
63
|
# NLP 분석
|
|
55
64
|
if include_nlp and bundle.has_nlp and bundle.nlp:
|
|
@@ -208,6 +217,89 @@ class MarkdownReportAdapter:
|
|
|
208
217
|
|
|
209
218
|
return "\n".join(lines)
|
|
210
219
|
|
|
220
|
+
def _generate_reason_section(self, stat: StatisticalAnalysis | None) -> str:
|
|
221
|
+
lines = ["## 원인/근거"]
|
|
222
|
+
if stat is None:
|
|
223
|
+
lines.append(
|
|
224
|
+
"- 통계 분석 결과가 없어 원인/근거를 도출할 수 없습니다. (추가 데이터 필요)"
|
|
225
|
+
)
|
|
226
|
+
return "\n".join(lines)
|
|
227
|
+
|
|
228
|
+
if stat.low_performers:
|
|
229
|
+
for lp in stat.low_performers[:5]:
|
|
230
|
+
lines.append(
|
|
231
|
+
f"- {lp.test_case_id}: {lp.metric_name} {lp.score:.2f} < {lp.threshold:.2f}"
|
|
232
|
+
)
|
|
233
|
+
elif stat.insights:
|
|
234
|
+
for insight in stat.insights[:5]:
|
|
235
|
+
lines.append(f"- {insight}")
|
|
236
|
+
else:
|
|
237
|
+
lines.append("- 추가 데이터 필요")
|
|
238
|
+
return "\n".join(lines)
|
|
239
|
+
|
|
240
|
+
def _generate_meaning_section(self, stat: StatisticalAnalysis | None) -> str:
|
|
241
|
+
lines = ["## 결과 의미"]
|
|
242
|
+
if stat is None:
|
|
243
|
+
lines.append("- 통계 분석 결과가 없어 의미를 해석할 수 없습니다. (추가 데이터 필요)")
|
|
244
|
+
return "\n".join(lines)
|
|
245
|
+
|
|
246
|
+
if stat.overall_pass_rate < 0.7:
|
|
247
|
+
lines.append("- 전체 통과율이 낮아 사용자 신뢰/정확성 리스크가 큽니다.")
|
|
248
|
+
else:
|
|
249
|
+
lines.append("- 전체 통과율이 기준 이상으로 기본 품질은 유지됩니다.")
|
|
250
|
+
|
|
251
|
+
low_metrics = [
|
|
252
|
+
metric for metric, rate in (stat.metric_pass_rates or {}).items() if rate < 0.7
|
|
253
|
+
]
|
|
254
|
+
if low_metrics:
|
|
255
|
+
metrics_str = ", ".join(sorted(low_metrics)[:6])
|
|
256
|
+
lines.append(f"- 기준 미달 메트릭: {metrics_str}")
|
|
257
|
+
return "\n".join(lines)
|
|
258
|
+
|
|
259
|
+
def _generate_dataset_delta_section(self, stat: StatisticalAnalysis | None) -> str:
|
|
260
|
+
lines = ["## 데이터셋 차이"]
|
|
261
|
+
if stat is None:
|
|
262
|
+
lines.append("- 데이터셋 기준 차이를 판단할 수 없습니다. (추가 데이터 필요)")
|
|
263
|
+
return "\n".join(lines)
|
|
264
|
+
|
|
265
|
+
low_metrics = [
|
|
266
|
+
metric for metric, rate in (stat.metric_pass_rates or {}).items() if rate < 0.7
|
|
267
|
+
]
|
|
268
|
+
if low_metrics:
|
|
269
|
+
lines.append("- 데이터셋 기준 미달 지표: " + ", ".join(sorted(low_metrics)[:6]))
|
|
270
|
+
else:
|
|
271
|
+
lines.append("- 데이터셋 기준 미달 지표가 없습니다.")
|
|
272
|
+
return "\n".join(lines)
|
|
273
|
+
|
|
274
|
+
def _generate_improvement_plan_section(self, stat: StatisticalAnalysis | None) -> str:
|
|
275
|
+
lines = ["## 개선 방향"]
|
|
276
|
+
if stat is None:
|
|
277
|
+
lines.append("- 개선 방향 도출을 위한 분석 결과가 부족합니다. (추가 데이터 필요)")
|
|
278
|
+
return "\n".join(lines)
|
|
279
|
+
|
|
280
|
+
action_map = {
|
|
281
|
+
"context_precision": "랭커/리랭커 도입 및 상위 문서 필터링 강화",
|
|
282
|
+
"context_recall": "검색 범위 확장 또는 하드 네거티브 추가",
|
|
283
|
+
"mrr": "상위 K 재정렬 및 쿼리 재작성 적용",
|
|
284
|
+
"ndcg": "랭킹 품질 지표 최적화(리랭킹/하이브리드 검색)",
|
|
285
|
+
"hit_rate": "검색 후보군 확대 또는 인덱싱 개선",
|
|
286
|
+
"answer_relevancy": "답변 포맷/질문 의도 정렬 프롬프트 강화",
|
|
287
|
+
"faithfulness": "근거 인용/검증 단계 추가",
|
|
288
|
+
"factual_correctness": "정답 검증 규칙 강화 및 근거 필터링",
|
|
289
|
+
"semantic_similarity": "정답 기준 문장 재정의 및 평가셋 보강",
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
low_metrics = [
|
|
293
|
+
metric for metric, rate in (stat.metric_pass_rates or {}).items() if rate < 0.7
|
|
294
|
+
]
|
|
295
|
+
if low_metrics:
|
|
296
|
+
for metric in sorted(low_metrics)[:5]:
|
|
297
|
+
action = action_map.get(metric, "실험을 통해 개선 방향을 재검증")
|
|
298
|
+
lines.append(f"- {metric}: {action}")
|
|
299
|
+
else:
|
|
300
|
+
lines.append("- 개선 대상 지표가 명확하지 않습니다. (추가 데이터 필요)")
|
|
301
|
+
return "\n".join(lines)
|
|
302
|
+
|
|
211
303
|
def _generate_nlp_section(self, nlp: NLPAnalysis) -> str:
|
|
212
304
|
"""NLP 분석 섹션 생성."""
|
|
213
305
|
lines = ["## NLP 분석"]
|
|
@@ -19,12 +19,9 @@ def build_storage_adapter(
|
|
|
19
19
|
) -> StoragePort:
|
|
20
20
|
resolved_settings = settings or Settings()
|
|
21
21
|
|
|
22
|
-
if db_path is not None:
|
|
23
|
-
return SQLiteStorageAdapter(db_path=db_path)
|
|
24
|
-
|
|
25
22
|
backend = getattr(resolved_settings, "db_backend", "postgres")
|
|
26
23
|
if backend == "sqlite":
|
|
27
|
-
resolved_db_path = resolved_settings.evalvault_db_path
|
|
24
|
+
resolved_db_path = db_path or resolved_settings.evalvault_db_path
|
|
28
25
|
if resolved_db_path is None:
|
|
29
26
|
raise RuntimeError("SQLite backend selected but evalvault_db_path is not set.")
|
|
30
27
|
return SQLiteStorageAdapter(db_path=resolved_db_path)
|
|
@@ -164,6 +164,20 @@ class PhoenixAdapter(TrackerPort):
|
|
|
164
164
|
from opentelemetry.trace import format_span_id
|
|
165
165
|
|
|
166
166
|
span_id = format_span_id(span.get_span_context().span_id)
|
|
167
|
+
spans_client = getattr(client, "spans", None)
|
|
168
|
+
add_span_annotation = (
|
|
169
|
+
getattr(spans_client, "add_span_annotation", None) if spans_client else None
|
|
170
|
+
)
|
|
171
|
+
if callable(add_span_annotation):
|
|
172
|
+
add_span_annotation(
|
|
173
|
+
annotation_name=name,
|
|
174
|
+
annotator_kind="CODE",
|
|
175
|
+
span_id=span_id,
|
|
176
|
+
label=label,
|
|
177
|
+
score=score,
|
|
178
|
+
explanation=explanation,
|
|
179
|
+
)
|
|
180
|
+
return
|
|
167
181
|
client.annotations.add_span_annotation(
|
|
168
182
|
annotation_name=name,
|
|
169
183
|
annotator_kind="CODE",
|
|
@@ -339,6 +353,7 @@ class PhoenixAdapter(TrackerPort):
|
|
|
339
353
|
|
|
340
354
|
# Calculate per-metric summary
|
|
341
355
|
metric_summary = {}
|
|
356
|
+
total_count = len(run.results) if run.results else 0
|
|
342
357
|
for metric_name in run.metrics_evaluated:
|
|
343
358
|
passed_count = sum(
|
|
344
359
|
1
|
|
@@ -351,9 +366,9 @@ class PhoenixAdapter(TrackerPort):
|
|
|
351
366
|
"average_score": round(avg_score, 4) if avg_score else 0.0,
|
|
352
367
|
"threshold": threshold,
|
|
353
368
|
"passed": passed_count,
|
|
354
|
-
"failed":
|
|
355
|
-
"total":
|
|
356
|
-
"pass_rate": round(passed_count /
|
|
369
|
+
"failed": total_count - passed_count,
|
|
370
|
+
"total": total_count,
|
|
371
|
+
"pass_rate": round(passed_count / total_count, 4) if total_count else 0.0,
|
|
357
372
|
}
|
|
358
373
|
|
|
359
374
|
# Start root trace
|
|
@@ -805,11 +820,13 @@ class PhoenixAdapter(TrackerPort):
|
|
|
805
820
|
if not output_preview and data.generation and data.generation.response:
|
|
806
821
|
output_preview = sanitize_text(data.generation.response, max_chars=MAX_LOG_CHARS)
|
|
807
822
|
if not output_preview and data.retrieval:
|
|
808
|
-
previews = [
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
823
|
+
previews: list[str] = []
|
|
824
|
+
for doc in data.retrieval.candidates:
|
|
825
|
+
if not doc.content:
|
|
826
|
+
continue
|
|
827
|
+
preview = sanitize_text(doc.content, max_chars=MAX_CONTEXT_CHARS)
|
|
828
|
+
if preview:
|
|
829
|
+
previews.append(preview)
|
|
813
830
|
output_preview = "\n".join(previews[:3])
|
|
814
831
|
if output_preview:
|
|
815
832
|
span.set_attribute("rag.final_answer", output_preview)
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import socket
|
|
5
|
+
import subprocess
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from shutil import which
|
|
8
|
+
from urllib.parse import urlparse
|
|
9
|
+
|
|
10
|
+
from evalvault.config.settings import Settings, is_production_profile
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
_PHOENIX_CONTAINER = "evalvault-phoenix"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass(frozen=True)
|
|
18
|
+
class Endpoint:
|
|
19
|
+
host: str
|
|
20
|
+
port: int
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _is_local_host(host: str | None) -> bool:
|
|
24
|
+
if not host:
|
|
25
|
+
return False
|
|
26
|
+
return host in {"localhost", "127.0.0.1", "0.0.0.0"}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _port_is_open(host: str, port: int) -> bool:
|
|
30
|
+
try:
|
|
31
|
+
with socket.create_connection((host, port), timeout=0.2):
|
|
32
|
+
return True
|
|
33
|
+
except OSError:
|
|
34
|
+
return False
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _parse_http_endpoint(url: str | None, default_port: int) -> Endpoint | None:
|
|
38
|
+
if not url or not isinstance(url, str):
|
|
39
|
+
return None
|
|
40
|
+
parsed = urlparse(url)
|
|
41
|
+
if parsed.scheme and parsed.scheme not in {"http", "https"}:
|
|
42
|
+
return None
|
|
43
|
+
host = parsed.hostname or ""
|
|
44
|
+
port = parsed.port or default_port
|
|
45
|
+
if not host or port <= 0:
|
|
46
|
+
return None
|
|
47
|
+
return Endpoint(host=host, port=port)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _start_mlflow(port: int) -> bool:
|
|
51
|
+
if which("mlflow") is None:
|
|
52
|
+
logger.warning("MLflow CLI not found. Install with: uv sync --extra mlflow")
|
|
53
|
+
return False
|
|
54
|
+
try:
|
|
55
|
+
subprocess.Popen(
|
|
56
|
+
["mlflow", "server", "--host", "0.0.0.0", "--port", str(port)],
|
|
57
|
+
stdout=subprocess.DEVNULL,
|
|
58
|
+
stderr=subprocess.DEVNULL,
|
|
59
|
+
)
|
|
60
|
+
logger.info("Started MLflow server on port %s", port)
|
|
61
|
+
return True
|
|
62
|
+
except Exception as exc: # pragma: no cover - safety net
|
|
63
|
+
logger.warning("Failed to start MLflow server: %s", exc)
|
|
64
|
+
return False
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _start_phoenix(port: int) -> bool:
|
|
68
|
+
if which("docker") is None:
|
|
69
|
+
logger.warning("Docker not found. Phoenix auto-start skipped.")
|
|
70
|
+
return False
|
|
71
|
+
try:
|
|
72
|
+
start = subprocess.run(
|
|
73
|
+
["docker", "start", _PHOENIX_CONTAINER],
|
|
74
|
+
check=False,
|
|
75
|
+
capture_output=True,
|
|
76
|
+
text=True,
|
|
77
|
+
)
|
|
78
|
+
if start.returncode != 0:
|
|
79
|
+
subprocess.run(
|
|
80
|
+
[
|
|
81
|
+
"docker",
|
|
82
|
+
"run",
|
|
83
|
+
"-d",
|
|
84
|
+
"-p",
|
|
85
|
+
f"{port}:6006",
|
|
86
|
+
"--name",
|
|
87
|
+
_PHOENIX_CONTAINER,
|
|
88
|
+
"arizephoenix/phoenix:latest",
|
|
89
|
+
],
|
|
90
|
+
check=False,
|
|
91
|
+
capture_output=True,
|
|
92
|
+
text=True,
|
|
93
|
+
)
|
|
94
|
+
logger.info("Ensured Phoenix container is running on port %s", port)
|
|
95
|
+
return True
|
|
96
|
+
except Exception as exc: # pragma: no cover - safety net
|
|
97
|
+
logger.warning("Failed to start Phoenix container: %s", exc)
|
|
98
|
+
return False
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def ensure_local_observability(settings: Settings) -> None:
|
|
102
|
+
if is_production_profile(settings.evalvault_profile):
|
|
103
|
+
return
|
|
104
|
+
|
|
105
|
+
phoenix_endpoint = _parse_http_endpoint(
|
|
106
|
+
getattr(settings, "phoenix_endpoint", None) or "http://localhost:6006/v1/traces",
|
|
107
|
+
6006,
|
|
108
|
+
)
|
|
109
|
+
if (
|
|
110
|
+
phoenix_endpoint
|
|
111
|
+
and _is_local_host(phoenix_endpoint.host)
|
|
112
|
+
and not _port_is_open(phoenix_endpoint.host, phoenix_endpoint.port)
|
|
113
|
+
):
|
|
114
|
+
_start_phoenix(phoenix_endpoint.port)
|
|
115
|
+
|
|
116
|
+
mlflow_endpoint = _parse_http_endpoint(getattr(settings, "mlflow_tracking_uri", None), 5000)
|
|
117
|
+
if (
|
|
118
|
+
mlflow_endpoint
|
|
119
|
+
and _is_local_host(mlflow_endpoint.host)
|
|
120
|
+
and not _port_is_open(mlflow_endpoint.host, mlflow_endpoint.port)
|
|
121
|
+
):
|
|
122
|
+
_start_mlflow(mlflow_endpoint.port)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: evalvault
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.77.0
|
|
4
4
|
Summary: RAG evaluation system using Ragas with Phoenix/Langfuse tracing
|
|
5
5
|
Project-URL: Homepage, https://github.com/ntts9990/EvalVault
|
|
6
6
|
Project-URL: Documentation, https://github.com/ntts9990/EvalVault#readme
|
|
@@ -66,6 +66,7 @@ Requires-Dist: ijson>=3.3.0; extra == 'dev'
|
|
|
66
66
|
Requires-Dist: kiwipiepy>=0.18.0; extra == 'dev'
|
|
67
67
|
Requires-Dist: langchain-anthropic; extra == 'dev'
|
|
68
68
|
Requires-Dist: lm-eval[api]>=0.4.0; extra == 'dev'
|
|
69
|
+
Requires-Dist: manim>=0.18.0; extra == 'dev'
|
|
69
70
|
Requires-Dist: mkdocs-material>=9.5.0; extra == 'dev'
|
|
70
71
|
Requires-Dist: mkdocs>=1.5.0; extra == 'dev'
|
|
71
72
|
Requires-Dist: mkdocstrings[python]>=0.24.0; extra == 'dev'
|
|
@@ -5,8 +5,8 @@ evalvault/mkdocs_helpers.py,sha256=1AKVQ1W2_VO4qclhfyefyU9Dz1Hzkh1DWDwsFMe24jc,3
|
|
|
5
5
|
evalvault/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
evalvault/adapters/inbound/__init__.py,sha256=SG1svel1PwqetnqVpKFLSv612_WwGwLTbFpYgwk6FMw,166
|
|
7
7
|
evalvault/adapters/inbound/api/__init__.py,sha256=LeVVttCA3tLKoHA2PO4z3y8VkfVcf3Bq8CZSzo91lf4,34
|
|
8
|
-
evalvault/adapters/inbound/api/adapter.py,sha256=
|
|
9
|
-
evalvault/adapters/inbound/api/main.py,sha256=
|
|
8
|
+
evalvault/adapters/inbound/api/adapter.py,sha256=fYI8NBg4Ropav5W4sVwSus6SFAkhXdWoAEY70_UryaY,87797
|
|
9
|
+
evalvault/adapters/inbound/api/main.py,sha256=B22zDra7MawI_fbQ4uJOzlf7Lrbxy7hycebT9kO9pL0,7651
|
|
10
10
|
evalvault/adapters/inbound/api/routers/__init__.py,sha256=q07_YF9TnBl68bqcRCvhPU4-zRTyvmPoHVehwO6W7QM,19
|
|
11
11
|
evalvault/adapters/inbound/api/routers/benchmark.py,sha256=yevntbZcNtMvbVODsITUBgR1Ka4pdFQrXBJJ4K4Jyr4,4477
|
|
12
12
|
evalvault/adapters/inbound/api/routers/calibration.py,sha256=FzEOe7KqDtfGDlOKREsAoYId1jTILyqkDm_Y3LyHk54,4130
|
|
@@ -18,10 +18,10 @@ evalvault/adapters/inbound/api/routers/mcp.py,sha256=yHANV7qIXig-7YSiQgXzSTuabqF
|
|
|
18
18
|
evalvault/adapters/inbound/api/routers/pipeline.py,sha256=VCoPYg69fg2hc5Z8oWIPbHzYYT2NnGoRZW48C48lApE,17442
|
|
19
19
|
evalvault/adapters/inbound/api/routers/runs.py,sha256=eQu_6Vp-8lcp4oVaEFhlmR8uCyQ8ys_-VRHO5AKDQvs,43617
|
|
20
20
|
evalvault/adapters/inbound/cli/__init__.py,sha256=a42flC5NK-VfbdbBrE49IrUL5zAyKdXZYJVM6E3NTE0,675
|
|
21
|
-
evalvault/adapters/inbound/cli/app.py,sha256=
|
|
21
|
+
evalvault/adapters/inbound/cli/app.py,sha256=7owV7vDZCZlAByHnalY84mmPjxdf1nSciigTJ_smr20,1774
|
|
22
22
|
evalvault/adapters/inbound/cli/commands/__init__.py,sha256=TB3evGuv3_AQoudWiR2bH5EH_AY9f9E7lQcSOWH6TO8,4091
|
|
23
23
|
evalvault/adapters/inbound/cli/commands/agent.py,sha256=YlOYMEzzS1aSKDKD_a7UK3St18X6GXGkdTatrzyd8Zc,7555
|
|
24
|
-
evalvault/adapters/inbound/cli/commands/analyze.py,sha256=
|
|
24
|
+
evalvault/adapters/inbound/cli/commands/analyze.py,sha256=p7J4bcn9tp2YR5v8655z_xiqelx8Y-sxD4rHBTvasdU,50745
|
|
25
25
|
evalvault/adapters/inbound/cli/commands/api.py,sha256=YdbJ_-QEajnFcjTa7P2heLMjFKpeQ4nWP_p-HvfYkEo,1943
|
|
26
26
|
evalvault/adapters/inbound/cli/commands/artifacts.py,sha256=bE8FQxmnU0mMIAPx5en8aKrtfNNkrbWoLxIX4ZT9D5c,3776
|
|
27
27
|
evalvault/adapters/inbound/cli/commands/benchmark.py,sha256=rcmLcId5w_cM5ZfMzu2V49B-pDAZMXBcYgiB8v1X0Eo,41306
|
|
@@ -39,15 +39,15 @@ evalvault/adapters/inbound/cli/commands/history.py,sha256=dGkoRz59FZCbs-Af29MxgV
|
|
|
39
39
|
evalvault/adapters/inbound/cli/commands/init.py,sha256=7q86fUeBVA08fU_N0lAV6Lakxirq4val2jIyALlDy3E,8822
|
|
40
40
|
evalvault/adapters/inbound/cli/commands/kg.py,sha256=ycV9Xj6SUUJLTyTfLZcjXDVLcZqwo7Gw878ZhZAeDoc,19155
|
|
41
41
|
evalvault/adapters/inbound/cli/commands/langfuse.py,sha256=aExhZ5WYT0FzJI4v1sF-a1jqy9b1BF46_HBtfiQjVGI,4085
|
|
42
|
-
evalvault/adapters/inbound/cli/commands/method.py,sha256=
|
|
42
|
+
evalvault/adapters/inbound/cli/commands/method.py,sha256=Bh-kcSEk2QFeLLUrL9OB8baHndiR4FmpMoolDEDF4fM,19100
|
|
43
43
|
evalvault/adapters/inbound/cli/commands/ops.py,sha256=jh0ryYIDvj2VDhOR6bESPHnyogtzAfSs81gKMGlOczs,4042
|
|
44
44
|
evalvault/adapters/inbound/cli/commands/phoenix.py,sha256=LQi3KTLq1ybjjBuz92oQ6lYyBS3mHrCHk0qe-7bqB4U,15611
|
|
45
45
|
evalvault/adapters/inbound/cli/commands/pipeline.py,sha256=nBsgSG9RvwDRy_X-01DqZe0jS9zpp1j4hkccni3sruc,12253
|
|
46
46
|
evalvault/adapters/inbound/cli/commands/profile_difficulty.py,sha256=PF8MbYzufbNVlKKnBp6JF9PrBe-ncGDOe8JHWW0L278,6329
|
|
47
47
|
evalvault/adapters/inbound/cli/commands/prompts.py,sha256=ExbTG_8yZupuw6em6OPOdTOMc4T-YPc7tUx5_cbXeiU,26708
|
|
48
48
|
evalvault/adapters/inbound/cli/commands/regress.py,sha256=VzwTucxJav71Xmd18qCb93XG_mTvfjmW52SQRv8Sgu4,19082
|
|
49
|
-
evalvault/adapters/inbound/cli/commands/run.py,sha256=
|
|
50
|
-
evalvault/adapters/inbound/cli/commands/run_helpers.py,sha256=
|
|
49
|
+
evalvault/adapters/inbound/cli/commands/run.py,sha256=G_6AhvD-VByY1fj3JqYvmrYUleeyAdXRLY3a-rG7IIE,136860
|
|
50
|
+
evalvault/adapters/inbound/cli/commands/run_helpers.py,sha256=VWZKtkY4MwMpLbgq2zzl3uSu6OrXcixvpn3GYgBgBtM,48055
|
|
51
51
|
evalvault/adapters/inbound/cli/commands/stage.py,sha256=XiPfJrdtUWrdSw5EzLgvz5WEEBW1qz1FALEhDYamMjE,23236
|
|
52
52
|
evalvault/adapters/inbound/cli/utils/__init__.py,sha256=QPNKneZS-Z-tTnYYxtgJXgcJWY6puUlRQcKrn7Mlv1M,685
|
|
53
53
|
evalvault/adapters/inbound/cli/utils/analysis_io.py,sha256=RHkKEq4e-PtbtRDlXAJWU80RYHNPw-O5V9_GujdaGfc,13393
|
|
@@ -84,7 +84,7 @@ evalvault/adapters/outbound/analysis/embedding_searcher_module.py,sha256=DcAMuV7
|
|
|
84
84
|
evalvault/adapters/outbound/analysis/hybrid_rrf_module.py,sha256=kaHSc7z3Jg_KrRLBqPMTV_9XXsL6v1dmbz-3dDO6IMw,3255
|
|
85
85
|
evalvault/adapters/outbound/analysis/hybrid_weighted_module.py,sha256=AO-7thmnFGerUDWd8l9ydxeAkHkACo7Raf9O0RfW_nE,3671
|
|
86
86
|
evalvault/adapters/outbound/analysis/hypothesis_generator_module.py,sha256=tx9fWgS0rBoK5eJPmwK5POoV78yN03hkFmWhCx71Ln0,13337
|
|
87
|
-
evalvault/adapters/outbound/analysis/llm_report_module.py,sha256=
|
|
87
|
+
evalvault/adapters/outbound/analysis/llm_report_module.py,sha256=sYwgeS2W3jJRJzvFMYb3tnXag9x__iAVVP-8YzHXACU,91812
|
|
88
88
|
evalvault/adapters/outbound/analysis/low_performer_extractor_module.py,sha256=Pt0Tmtc5Etqp_3SBDCPAzqWI2EF9woSg0mmBucEHlQw,1291
|
|
89
89
|
evalvault/adapters/outbound/analysis/model_analyzer_module.py,sha256=28rHdXBXYIFpLHixbbZcv6-j2QVgl3yaGN0vU1Q0gFc,2682
|
|
90
90
|
evalvault/adapters/outbound/analysis/morpheme_analyzer_module.py,sha256=Hrh4mluMsOhQHPrliD2w0FVKokJpfikXOFKT6sNwk74,4158
|
|
@@ -186,12 +186,12 @@ evalvault/adapters/outbound/nlp/korean/toolkit.py,sha256=iMaY5VueC5P0rxm5SCdz69C
|
|
|
186
186
|
evalvault/adapters/outbound/nlp/korean/toolkit_factory.py,sha256=x3v-AAkVInOabC4PtOtStsZrFnHun0IOqZDyQGaQVm8,586
|
|
187
187
|
evalvault/adapters/outbound/ops/__init__.py,sha256=_QiDVPuiYWkIwW_ELEVKD_v6dLojjyvIJWs4qVNxehw,164
|
|
188
188
|
evalvault/adapters/outbound/ops/report_renderer.py,sha256=mezVKdIsnJSNvBW6xkhpNG3MOFXHZLZspmHk5o-e8Cg,6354
|
|
189
|
-
evalvault/adapters/outbound/phoenix/sync_service.py,sha256=
|
|
189
|
+
evalvault/adapters/outbound/phoenix/sync_service.py,sha256=D5OUfArTq6K9D7oCe3CbIKOqqfRK9r4WoyqFcL03uxs,13304
|
|
190
190
|
evalvault/adapters/outbound/report/__init__.py,sha256=8VeMrfj63mDR-xUHct-drNNBA5M-m-B7sgC1qUJF7g4,660
|
|
191
191
|
evalvault/adapters/outbound/report/ci_report_formatter.py,sha256=5YD8BwtOjLnHcNbbG0HJziOifD9BDhBtZT1oItd6zJE,1233
|
|
192
192
|
evalvault/adapters/outbound/report/dashboard_generator.py,sha256=g0SANOrOS_mSfLfKbfbxflxZmCNbZGrGzA0g03_Yb48,8356
|
|
193
193
|
evalvault/adapters/outbound/report/llm_report_generator.py,sha256=i_iXfY8qutIb8TsvLKyMLnijsA0yiNJ3rBEFg4zVqcE,26858
|
|
194
|
-
evalvault/adapters/outbound/report/markdown_adapter.py,sha256=
|
|
194
|
+
evalvault/adapters/outbound/report/markdown_adapter.py,sha256=5KaMeStOLgKsBDTf2DeDbbnpN_eWp2PhkdAwpe1KrLo,21346
|
|
195
195
|
evalvault/adapters/outbound/report/pr_comment_formatter.py,sha256=FxWWfZQU5ErejVqE_F8rKoUxoBTbbmw_ok9xbEYiA4E,1661
|
|
196
196
|
evalvault/adapters/outbound/retriever/__init__.py,sha256=o5qK4pIrlXm0mIs_fo0aLPQHuHkrmrXKc6ZSg8a0t4g,201
|
|
197
197
|
evalvault/adapters/outbound/retriever/graph_rag_adapter.py,sha256=xTI7uMFp4WKstg1s2zY9R_QsWPA-Rz_KUnzekwso8z0,11790
|
|
@@ -199,7 +199,7 @@ evalvault/adapters/outbound/retriever/pgvector_store.py,sha256=gf2oO_aWpkaDJxBhc
|
|
|
199
199
|
evalvault/adapters/outbound/storage/__init__.py,sha256=n5R6thAPTx1leSwv6od6nBWcLWFa-UYD6cOLzN89T8I,614
|
|
200
200
|
evalvault/adapters/outbound/storage/base_sql.py,sha256=WzA57p11m65zf-F7jjiJ9GG0BxnnAahKRckVOiK3xcY,58441
|
|
201
201
|
evalvault/adapters/outbound/storage/benchmark_storage_adapter.py,sha256=Qgf9xSSIkYQRpG4uLzcUdoYO9LTQDQ4tFRkkMYer-WA,9803
|
|
202
|
-
evalvault/adapters/outbound/storage/factory.py,sha256=
|
|
202
|
+
evalvault/adapters/outbound/storage/factory.py,sha256=lU5nsBbTPtt_yFbw4GHms2lbdY7AACesWE2D8iK6oXU,1909
|
|
203
203
|
evalvault/adapters/outbound/storage/postgres_adapter.py,sha256=SZb4Dx2ZYNu-pDqX4rVgT4bRfJ2cQ75DXcWIIEu8aws,59933
|
|
204
204
|
evalvault/adapters/outbound/storage/postgres_schema.sql,sha256=CIQn1S93ZrH1B_qumePf5aKi-25L284QmvpllnUfeEI,12375
|
|
205
205
|
evalvault/adapters/outbound/storage/schema.sql,sha256=LEtrKFpa1SbIBN-igkNiQXJqVLGp-liX-6KR_sZAMwM,14283
|
|
@@ -214,7 +214,7 @@ evalvault/adapters/outbound/tracker/__init__.py,sha256=Suu5BznOK5uTuD5_jS8JMZd8R
|
|
|
214
214
|
evalvault/adapters/outbound/tracker/langfuse_adapter.py,sha256=uI-t5v9AC5VUMYsIc1FHYImourZeErGMXB0_prOMErc,18839
|
|
215
215
|
evalvault/adapters/outbound/tracker/log_sanitizer.py,sha256=ilKTTSzsHslQYc-elnWu0Z3HKNNw1D1iI0_cCvYbo1M,2653
|
|
216
216
|
evalvault/adapters/outbound/tracker/mlflow_adapter.py,sha256=8vo53d2ZpcYDz3J798iru7Sk-Wm3XhARJMVmS5d6pRE,13495
|
|
217
|
-
evalvault/adapters/outbound/tracker/phoenix_adapter.py,sha256=
|
|
217
|
+
evalvault/adapters/outbound/tracker/phoenix_adapter.py,sha256=ajH8B9kgmUBywrko20Fo8DUi-kbzsTiNZZUZSwfBfOA,33663
|
|
218
218
|
evalvault/config/__init__.py,sha256=UCgeDx62M2gOuFvdN29wWwny2fdH4bPY_uUC3-42eDw,1297
|
|
219
219
|
evalvault/config/agent_types.py,sha256=EP2Pv3ZtOzDXIvIa-Hnd1to9JIbMUtGitrlwzZtx0Ys,13418
|
|
220
220
|
evalvault/config/domain_config.py,sha256=rOgNA2T8NWlDzcEFC0shdUCCww0lI1E5fUm5QrKQSZI,9264
|
|
@@ -222,6 +222,7 @@ evalvault/config/instrumentation.py,sha256=BW2a3OqHH6aQNjjlL0oyEI6K3pXuORy6x__-Z
|
|
|
222
222
|
evalvault/config/langfuse_support.py,sha256=DEzVMfMGGf1V45W_2oUG-NCDfsYI4UUdnYJIgBSrN2o,582
|
|
223
223
|
evalvault/config/model_config.py,sha256=KlzDbGyDLeOGE7ElekFFk5YjjT5u8i6KO2B4EyZkLnI,3542
|
|
224
224
|
evalvault/config/phoenix_support.py,sha256=w2W8UQTprac8BWdIfV9kPjr2sEj7EF9pNBXX2JB1x4M,13810
|
|
225
|
+
evalvault/config/runtime_services.py,sha256=-dktvBVmqudfk0Q07A1vY8KuhtJTnoU9ZxLGR5_ifmw,3642
|
|
225
226
|
evalvault/config/secret_manager.py,sha256=YjPMuNqeBrAR2BzCJvsBNUExaU4TBSFyZ8kVYZZifqA,4172
|
|
226
227
|
evalvault/config/settings.py,sha256=2EoO6WutkwD2F7rkyECxx5-VenivntOXPZwemVQ9ZtU,20696
|
|
227
228
|
evalvault/config/playbooks/improvement_playbook.yaml,sha256=9F9WVVCydFfz6zUuGYzZ4PKdW1LLtcBKVF36T7xT764,26965
|
|
@@ -364,8 +365,8 @@ evalvault/reports/__init__.py,sha256=Bb1X4871msAN8I6PM6nKGED3psPwZt88hXZBAOdH06Y
|
|
|
364
365
|
evalvault/reports/release_notes.py,sha256=pZj0PBFT-4F_Ty-Kv5P69BuoOnmTCn4kznDcORFJd0w,4011
|
|
365
366
|
evalvault/scripts/__init__.py,sha256=NwEeIFQbkX4ml2R_PhtIoNtArDSX_suuoymgG_7Kwso,89
|
|
366
367
|
evalvault/scripts/regression_runner.py,sha256=SxZori5BZ8jVQ057Mf5V5FPgIVDccrV5oRONmnhuk8w,8438
|
|
367
|
-
evalvault-1.
|
|
368
|
-
evalvault-1.
|
|
369
|
-
evalvault-1.
|
|
370
|
-
evalvault-1.
|
|
371
|
-
evalvault-1.
|
|
368
|
+
evalvault-1.77.0.dist-info/METADATA,sha256=5ALEi-ipXLZDaMenmFQFAlYOsmHjNHtUWuR_yxzCpIM,8797
|
|
369
|
+
evalvault-1.77.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
370
|
+
evalvault-1.77.0.dist-info/entry_points.txt,sha256=Oj9Xc5gYcyUYYNmQfWI8NYGw7nN-3M-h2ipHIMlVn6o,65
|
|
371
|
+
evalvault-1.77.0.dist-info/licenses/LICENSE.md,sha256=3RNWY4jjtrQ_yYa-D-7I3XO12Ti7YzxsLV_dpykujvo,11358
|
|
372
|
+
evalvault-1.77.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|