evalvault 1.64.0__py3-none-any.whl → 1.66.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evalvault/adapters/inbound/api/adapter.py +14 -0
- evalvault/adapters/inbound/api/main.py +14 -4
- evalvault/adapters/inbound/api/routers/chat.py +543 -0
- evalvault/adapters/inbound/cli/commands/__init__.py +14 -7
- evalvault/adapters/inbound/cli/commands/artifacts.py +107 -0
- evalvault/adapters/inbound/cli/commands/calibrate_judge.py +283 -0
- evalvault/adapters/inbound/cli/commands/compare.py +290 -0
- evalvault/adapters/inbound/cli/commands/history.py +13 -85
- evalvault/adapters/inbound/cli/commands/ops.py +110 -0
- evalvault/adapters/inbound/cli/commands/profile_difficulty.py +160 -0
- evalvault/adapters/inbound/cli/commands/regress.py +251 -0
- evalvault/adapters/inbound/cli/commands/run.py +14 -0
- evalvault/adapters/inbound/cli/commands/run_helpers.py +21 -2
- evalvault/adapters/outbound/analysis/comparison_pipeline_adapter.py +49 -0
- evalvault/adapters/outbound/artifact_fs.py +16 -0
- evalvault/adapters/outbound/filesystem/__init__.py +3 -0
- evalvault/adapters/outbound/filesystem/difficulty_profile_writer.py +50 -0
- evalvault/adapters/outbound/filesystem/ops_snapshot_writer.py +13 -0
- evalvault/adapters/outbound/judge_calibration_adapter.py +36 -0
- evalvault/adapters/outbound/judge_calibration_reporter.py +57 -0
- evalvault/adapters/outbound/report/llm_report_generator.py +13 -1
- evalvault/adapters/outbound/storage/base_sql.py +41 -1
- evalvault/adapters/outbound/tracker/langfuse_adapter.py +13 -7
- evalvault/adapters/outbound/tracker/mlflow_adapter.py +5 -0
- evalvault/adapters/outbound/tracker/phoenix_adapter.py +68 -14
- evalvault/config/settings.py +21 -0
- evalvault/domain/entities/__init__.py +10 -0
- evalvault/domain/entities/judge_calibration.py +50 -0
- evalvault/domain/entities/prompt.py +1 -1
- evalvault/domain/entities/stage.py +11 -3
- evalvault/domain/metrics/__init__.py +8 -0
- evalvault/domain/metrics/registry.py +39 -3
- evalvault/domain/metrics/summary_accuracy.py +189 -0
- evalvault/domain/metrics/summary_needs_followup.py +45 -0
- evalvault/domain/metrics/summary_non_definitive.py +41 -0
- evalvault/domain/metrics/summary_risk_coverage.py +45 -0
- evalvault/domain/services/artifact_lint_service.py +268 -0
- evalvault/domain/services/benchmark_runner.py +1 -6
- evalvault/domain/services/custom_metric_snapshot.py +233 -0
- evalvault/domain/services/dataset_preprocessor.py +26 -0
- evalvault/domain/services/difficulty_profile_reporter.py +25 -0
- evalvault/domain/services/difficulty_profiling_service.py +304 -0
- evalvault/domain/services/evaluator.py +282 -27
- evalvault/domain/services/judge_calibration_service.py +495 -0
- evalvault/domain/services/ops_snapshot_service.py +159 -0
- evalvault/domain/services/prompt_registry.py +39 -10
- evalvault/domain/services/regression_gate_service.py +199 -0
- evalvault/domain/services/run_comparison_service.py +159 -0
- evalvault/domain/services/stage_event_builder.py +6 -1
- evalvault/domain/services/stage_metric_service.py +83 -18
- evalvault/domain/services/threshold_profiles.py +4 -0
- evalvault/domain/services/visual_space_service.py +79 -4
- evalvault/ports/outbound/__init__.py +4 -0
- evalvault/ports/outbound/artifact_fs_port.py +12 -0
- evalvault/ports/outbound/comparison_pipeline_port.py +22 -0
- evalvault/ports/outbound/difficulty_profile_port.py +15 -0
- evalvault/ports/outbound/judge_calibration_port.py +22 -0
- evalvault/ports/outbound/ops_snapshot_port.py +8 -0
- {evalvault-1.64.0.dist-info → evalvault-1.66.0.dist-info}/METADATA +25 -1
- {evalvault-1.64.0.dist-info → evalvault-1.66.0.dist-info}/RECORD +63 -31
- {evalvault-1.64.0.dist-info → evalvault-1.66.0.dist-info}/WHEEL +0 -0
- {evalvault-1.64.0.dist-info → evalvault-1.66.0.dist-info}/entry_points.txt +0 -0
- {evalvault-1.64.0.dist-info → evalvault-1.66.0.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -233,13 +233,13 @@ def _aggregate_stage_metrics(metrics: Iterable[StageMetric]) -> dict[str, dict[s
|
|
|
233
233
|
|
|
234
234
|
aggregated: dict[str, dict[str, float]] = {}
|
|
235
235
|
for name, entries in buckets.items():
|
|
236
|
-
scores = [m.score for m in entries]
|
|
236
|
+
scores = [m.score for m in entries if m.score is not None]
|
|
237
237
|
threshold = next(
|
|
238
238
|
(m.threshold for m in entries if m.threshold is not None),
|
|
239
239
|
DEFAULT_STAGE_THRESHOLDS.get(name),
|
|
240
240
|
)
|
|
241
241
|
aggregated[name] = {
|
|
242
|
-
"avg": mean(scores) if scores else
|
|
242
|
+
"avg": mean(scores) if scores else 0.0,
|
|
243
243
|
"threshold": threshold if threshold is not None else DEFAULT_METRIC_THRESHOLD,
|
|
244
244
|
}
|
|
245
245
|
return aggregated
|
|
@@ -770,6 +770,77 @@ def _build_case_coords(result: TestCaseResult) -> dict[str, float | None]:
|
|
|
770
770
|
),
|
|
771
771
|
]
|
|
772
772
|
)
|
|
773
|
+
|
|
774
|
+
if x_value is None:
|
|
775
|
+
x_value = _weighted_average(
|
|
776
|
+
[
|
|
777
|
+
(
|
|
778
|
+
_centered_norm(
|
|
779
|
+
scores.get("summary_accuracy"), thresholds.get("summary_accuracy")
|
|
780
|
+
),
|
|
781
|
+
0.4,
|
|
782
|
+
),
|
|
783
|
+
(
|
|
784
|
+
_centered_norm(
|
|
785
|
+
scores.get("summary_risk_coverage"),
|
|
786
|
+
thresholds.get("summary_risk_coverage"),
|
|
787
|
+
),
|
|
788
|
+
0.3,
|
|
789
|
+
),
|
|
790
|
+
(
|
|
791
|
+
_centered_norm(
|
|
792
|
+
scores.get("summary_faithfulness"),
|
|
793
|
+
thresholds.get("summary_faithfulness"),
|
|
794
|
+
),
|
|
795
|
+
0.2,
|
|
796
|
+
),
|
|
797
|
+
(
|
|
798
|
+
_centered_norm(scores.get("summary_score"), thresholds.get("summary_score")),
|
|
799
|
+
0.1,
|
|
800
|
+
),
|
|
801
|
+
(
|
|
802
|
+
_centered_norm(
|
|
803
|
+
scores.get("entity_preservation"),
|
|
804
|
+
thresholds.get("entity_preservation"),
|
|
805
|
+
),
|
|
806
|
+
0.2,
|
|
807
|
+
),
|
|
808
|
+
]
|
|
809
|
+
)
|
|
810
|
+
|
|
811
|
+
if y_value is None:
|
|
812
|
+
y_value = _weighted_average(
|
|
813
|
+
[
|
|
814
|
+
(
|
|
815
|
+
_centered_norm(
|
|
816
|
+
scores.get("summary_accuracy"), thresholds.get("summary_accuracy")
|
|
817
|
+
),
|
|
818
|
+
0.35,
|
|
819
|
+
),
|
|
820
|
+
(
|
|
821
|
+
_centered_norm(
|
|
822
|
+
scores.get("summary_non_definitive"),
|
|
823
|
+
thresholds.get("summary_non_definitive"),
|
|
824
|
+
),
|
|
825
|
+
0.35,
|
|
826
|
+
),
|
|
827
|
+
(
|
|
828
|
+
_centered_norm(
|
|
829
|
+
scores.get("summary_needs_followup"),
|
|
830
|
+
thresholds.get("summary_needs_followup"),
|
|
831
|
+
),
|
|
832
|
+
0.3,
|
|
833
|
+
),
|
|
834
|
+
(
|
|
835
|
+
_centered_norm(
|
|
836
|
+
scores.get("entity_preservation"),
|
|
837
|
+
thresholds.get("entity_preservation"),
|
|
838
|
+
),
|
|
839
|
+
0.2,
|
|
840
|
+
),
|
|
841
|
+
]
|
|
842
|
+
)
|
|
843
|
+
|
|
773
844
|
return {"x": x_value, "y": y_value}
|
|
774
845
|
|
|
775
846
|
|
|
@@ -799,8 +870,12 @@ def _build_cluster_points(
|
|
|
799
870
|
|
|
800
871
|
points = []
|
|
801
872
|
for cluster_id, coords_list in clusters.items():
|
|
802
|
-
x_values = [
|
|
803
|
-
|
|
873
|
+
x_values = [
|
|
874
|
+
value for value in (c.get("x") for c in coords_list) if isinstance(value, (int, float))
|
|
875
|
+
]
|
|
876
|
+
y_values = [
|
|
877
|
+
value for value in (c.get("y") for c in coords_list) if isinstance(value, (int, float))
|
|
878
|
+
]
|
|
804
879
|
x_avg = mean(x_values) if x_values else None
|
|
805
880
|
y_avg = mean(y_values) if y_values else None
|
|
806
881
|
quadrant = _quadrant_label(x_avg, y_avg)
|
|
@@ -11,6 +11,7 @@ from evalvault.ports.outbound.benchmark_port import (
|
|
|
11
11
|
BenchmarkTaskResult,
|
|
12
12
|
)
|
|
13
13
|
from evalvault.ports.outbound.causal_analysis_port import CausalAnalysisPort
|
|
14
|
+
from evalvault.ports.outbound.comparison_pipeline_port import ComparisonPipelinePort
|
|
14
15
|
from evalvault.ports.outbound.dataset_port import DatasetPort
|
|
15
16
|
from evalvault.ports.outbound.domain_memory_port import (
|
|
16
17
|
BehaviorMemoryPort,
|
|
@@ -38,6 +39,7 @@ from evalvault.ports.outbound.improvement_port import (
|
|
|
38
39
|
PlaybookPort,
|
|
39
40
|
)
|
|
40
41
|
from evalvault.ports.outbound.intent_classifier_port import IntentClassifierPort
|
|
42
|
+
from evalvault.ports.outbound.judge_calibration_port import JudgeCalibrationPort
|
|
41
43
|
from evalvault.ports.outbound.korean_nlp_port import (
|
|
42
44
|
FaithfulnessResultProtocol,
|
|
43
45
|
KoreanNLPToolkitPort,
|
|
@@ -58,6 +60,7 @@ from evalvault.ports.outbound.tracker_port import TrackerPort
|
|
|
58
60
|
__all__ = [
|
|
59
61
|
"AnalysisCachePort",
|
|
60
62
|
"AnalysisPort",
|
|
63
|
+
"ComparisonPipelinePort",
|
|
61
64
|
"CausalAnalysisPort",
|
|
62
65
|
"DatasetPort",
|
|
63
66
|
"DomainMemoryPort",
|
|
@@ -83,6 +86,7 @@ __all__ = [
|
|
|
83
86
|
"PatternDefinitionProtocol",
|
|
84
87
|
"MetricPlaybookProtocol",
|
|
85
88
|
"ClaimImprovementProtocol",
|
|
89
|
+
"JudgeCalibrationPort",
|
|
86
90
|
"LLMFactoryPort",
|
|
87
91
|
"LLMPort",
|
|
88
92
|
"MethodRuntime",
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Protocol
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ArtifactFileSystemPort(Protocol):
|
|
8
|
+
def exists(self, path: Path) -> bool: ...
|
|
9
|
+
|
|
10
|
+
def is_dir(self, path: Path) -> bool: ...
|
|
11
|
+
|
|
12
|
+
def read_text(self, path: Path) -> str: ...
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Protocol
|
|
4
|
+
|
|
5
|
+
from evalvault.domain.entities.analysis_pipeline import PipelineResult
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ComparisonPipelinePort(Protocol):
|
|
9
|
+
def run_comparison(
|
|
10
|
+
self,
|
|
11
|
+
*,
|
|
12
|
+
run_ids: list[str],
|
|
13
|
+
compare_metrics: list[str] | None,
|
|
14
|
+
test_type: str,
|
|
15
|
+
parallel: bool,
|
|
16
|
+
concurrency: int | None,
|
|
17
|
+
report_type: str,
|
|
18
|
+
use_llm_report: bool,
|
|
19
|
+
) -> PipelineResult: ...
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
__all__ = ["ComparisonPipelinePort"]
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Protocol
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DifficultyProfileWriterPort(Protocol):
|
|
8
|
+
def write_profile(
|
|
9
|
+
self,
|
|
10
|
+
*,
|
|
11
|
+
output_path: Path,
|
|
12
|
+
artifacts_dir: Path,
|
|
13
|
+
envelope: dict[str, object],
|
|
14
|
+
artifacts: dict[str, object],
|
|
15
|
+
) -> dict[str, object]: ...
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Protocol
|
|
4
|
+
|
|
5
|
+
from evalvault.domain.entities import EvaluationRun, SatisfactionFeedback
|
|
6
|
+
from evalvault.domain.entities.judge_calibration import JudgeCalibrationResult
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class JudgeCalibrationPort(Protocol):
|
|
10
|
+
def calibrate(
|
|
11
|
+
self,
|
|
12
|
+
run: EvaluationRun,
|
|
13
|
+
feedbacks: list[SatisfactionFeedback],
|
|
14
|
+
*,
|
|
15
|
+
labels_source: str,
|
|
16
|
+
method: str,
|
|
17
|
+
metrics: list[str],
|
|
18
|
+
holdout_ratio: float,
|
|
19
|
+
seed: int,
|
|
20
|
+
parallel: bool = False,
|
|
21
|
+
concurrency: int = 8,
|
|
22
|
+
) -> JudgeCalibrationResult: ...
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: evalvault
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.66.0
|
|
4
4
|
Summary: RAG evaluation system using Ragas with Phoenix/Langfuse tracing
|
|
5
5
|
Project-URL: Homepage, https://github.com/ntts9990/EvalVault
|
|
6
6
|
Project-URL: Documentation, https://github.com/ntts9990/EvalVault#readme
|
|
@@ -25,6 +25,7 @@ Classifier: Topic :: Software Development :: Quality Assurance
|
|
|
25
25
|
Classifier: Topic :: Software Development :: Testing
|
|
26
26
|
Classifier: Typing :: Typed
|
|
27
27
|
Requires-Python: >=3.12
|
|
28
|
+
Requires-Dist: chainlit>=2.9.5
|
|
28
29
|
Requires-Dist: chardet
|
|
29
30
|
Requires-Dist: fastapi>=0.128.0
|
|
30
31
|
Requires-Dist: instructor
|
|
@@ -137,12 +138,17 @@ English version? See `README.en.md`.
|
|
|
137
138
|
## Quick Links
|
|
138
139
|
|
|
139
140
|
- 문서 허브: `docs/INDEX.md`
|
|
141
|
+
- CLI 실행 시나리오 가이드: `docs/guides/RAG_CLI_WORKFLOW_TEMPLATES.md`
|
|
140
142
|
- 사용자 가이드: `docs/guides/USER_GUIDE.md`
|
|
141
143
|
- 개발 가이드: `docs/guides/DEV_GUIDE.md`
|
|
142
144
|
- 상태/로드맵: `docs/STATUS.md`, `docs/ROADMAP.md`
|
|
143
145
|
- 개발 백서(설계/운영/품질 기준): `docs/new_whitepaper/INDEX.md`
|
|
144
146
|
- Open RAG Trace: `docs/architecture/open-rag-trace-spec.md`
|
|
145
147
|
|
|
148
|
+
### 다음 개선 작업 메모
|
|
149
|
+
- 보험 요약 메트릭 확장 계획: `docs/guides/INSURANCE_SUMMARY_METRICS_PLAN.md`
|
|
150
|
+
- Prompt 반복 적용 계획: `docs/guides/repeat_query.md`
|
|
151
|
+
|
|
146
152
|
---
|
|
147
153
|
|
|
148
154
|
## EvalVault가 해결하는 문제
|
|
@@ -470,6 +476,24 @@ npm run dev
|
|
|
470
476
|
- Ragas 계열: `faithfulness`, `answer_relevancy`, `context_precision`, `context_recall`, `factual_correctness`, `semantic_similarity`
|
|
471
477
|
- 커스텀 예시(도메인): `insurance_term_accuracy`
|
|
472
478
|
|
|
479
|
+
### 요약 메트릭 설계 근거 (summary_score, summary_faithfulness, entity_preservation)
|
|
480
|
+
|
|
481
|
+
### 커스텀 메트릭 스냅샷 (평가 방식/과정/결과 기록)
|
|
482
|
+
- 평가 방식/입출력/규칙/구현 파일 해시를 `run.tracker_metadata.custom_metric_snapshot`에 기록합니다.
|
|
483
|
+
- Excel `CustomMetrics` 시트와 Langfuse/Phoenix/MLflow artifact에도 함께 저장됩니다.
|
|
484
|
+
|
|
485
|
+
- `summary_faithfulness`: 요약의 모든 주장이 컨텍스트에 근거하는지 평가합니다. 환각/왜곡 리스크를 직접적으로 측정합니다.
|
|
486
|
+
- `summary_score`: 컨텍스트 대비 요약의 핵심 정보 보존/간결성 균형을 평가합니다. 정답 요약 단일 기준의 편향을 줄입니다.
|
|
487
|
+
- `entity_preservation`: 금액·기간·조건·면책 등 보험 약관에서 중요한 엔티티가 요약에 유지되는지 측정합니다.
|
|
488
|
+
|
|
489
|
+
**보험 도메인 특화 근거**
|
|
490
|
+
- 보험 약관에서 치명적인 요소(면책, 자기부담, 한도, 조건 등)를 키워드로 직접 반영하고, 금액/기간/비율 같은 핵심 엔티티를 보존하도록 설계했습니다.
|
|
491
|
+
- 범용 규칙(숫자/기간/금액)과 보험 특화 키워드를 함께 사용하므로, 현재 상태는 “보험 리스크 중심의 약한 도메인 특화”로 보는 것이 정확합니다.
|
|
492
|
+
|
|
493
|
+
**해석 주의사항**
|
|
494
|
+
- 세 메트릭 모두 `contexts` 품질에 크게 의존합니다. 컨텍스트가 부정확/과도하면 점수가 낮아질 수 있습니다.
|
|
495
|
+
- `summary_score`는 키프레이즈 기반이므로, 표현이 달라지면 점수가 낮게 나올 수 있습니다.
|
|
496
|
+
|
|
473
497
|
정확한 옵션/운영 레시피는 `docs/guides/USER_GUIDE.md`를 기준으로 최신화합니다.
|
|
474
498
|
|
|
475
499
|
---
|
|
@@ -5,10 +5,11 @@ evalvault/mkdocs_helpers.py,sha256=1AKVQ1W2_VO4qclhfyefyU9Dz1Hzkh1DWDwsFMe24jc,3
|
|
|
5
5
|
evalvault/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
evalvault/adapters/inbound/__init__.py,sha256=SG1svel1PwqetnqVpKFLSv612_WwGwLTbFpYgwk6FMw,166
|
|
7
7
|
evalvault/adapters/inbound/api/__init__.py,sha256=LeVVttCA3tLKoHA2PO4z3y8VkfVcf3Bq8CZSzo91lf4,34
|
|
8
|
-
evalvault/adapters/inbound/api/adapter.py,sha256=
|
|
9
|
-
evalvault/adapters/inbound/api/main.py,sha256=
|
|
8
|
+
evalvault/adapters/inbound/api/adapter.py,sha256=HgWSYyUxvJPlaSG158WVzpPckpPCYV9Ec3CWN8rLFdI,69118
|
|
9
|
+
evalvault/adapters/inbound/api/main.py,sha256=skYtmDngdOBryyLXQpNGlSd2Te6RF6GtfIwcMACPHFU,7068
|
|
10
10
|
evalvault/adapters/inbound/api/routers/__init__.py,sha256=q07_YF9TnBl68bqcRCvhPU4-zRTyvmPoHVehwO6W7QM,19
|
|
11
11
|
evalvault/adapters/inbound/api/routers/benchmark.py,sha256=yevntbZcNtMvbVODsITUBgR1Ka4pdFQrXBJJ4K4Jyr4,4477
|
|
12
|
+
evalvault/adapters/inbound/api/routers/chat.py,sha256=3S6-ljiY1COlDuVDH5yzMJs9SO0EkuosRcJIYScHWvI,18143
|
|
12
13
|
evalvault/adapters/inbound/api/routers/config.py,sha256=LygN0fVMr8NFtj5zuQXnVFhoafx56Txa98vpwtPa4Jc,4104
|
|
13
14
|
evalvault/adapters/inbound/api/routers/domain.py,sha256=RsR7GIFMjccDN7vpG1uDyk9n1DnCTH18JDGAX7o4Qqc,3648
|
|
14
15
|
evalvault/adapters/inbound/api/routers/knowledge.py,sha256=yb_e7OEPtwldOAzHTGiWe7jShHw2JdpOFnzGPMceRsg,7109
|
|
@@ -16,28 +17,34 @@ evalvault/adapters/inbound/api/routers/pipeline.py,sha256=8UgQzNFHcuqS61s69mOrPe
|
|
|
16
17
|
evalvault/adapters/inbound/api/routers/runs.py,sha256=rydOvwWk24QIYafu3XYS3oL_VVCE_jHDmjADhA19T1s,40059
|
|
17
18
|
evalvault/adapters/inbound/cli/__init__.py,sha256=a42flC5NK-VfbdbBrE49IrUL5zAyKdXZYJVM6E3NTE0,675
|
|
18
19
|
evalvault/adapters/inbound/cli/app.py,sha256=ytNgHRg9ZTAl33AkB1wIL8RKfQ_Cf8fsy0gSsLTs7Ew,1603
|
|
19
|
-
evalvault/adapters/inbound/cli/commands/__init__.py,sha256=
|
|
20
|
+
evalvault/adapters/inbound/cli/commands/__init__.py,sha256=kw0SAEwOce1v92Pd6YpQjSYsdwLU95TQqbKGM44fNhY,3995
|
|
20
21
|
evalvault/adapters/inbound/cli/commands/agent.py,sha256=YlOYMEzzS1aSKDKD_a7UK3St18X6GXGkdTatrzyd8Zc,7555
|
|
21
22
|
evalvault/adapters/inbound/cli/commands/analyze.py,sha256=aMi1BEDOX3yhN-ppBftDssPQLB5TdzIfpx9U7CZEgWo,48932
|
|
22
23
|
evalvault/adapters/inbound/cli/commands/api.py,sha256=YdbJ_-QEajnFcjTa7P2heLMjFKpeQ4nWP_p-HvfYkEo,1943
|
|
24
|
+
evalvault/adapters/inbound/cli/commands/artifacts.py,sha256=bE8FQxmnU0mMIAPx5en8aKrtfNNkrbWoLxIX4ZT9D5c,3776
|
|
23
25
|
evalvault/adapters/inbound/cli/commands/benchmark.py,sha256=RZ4nRTF7d6hDZug-Pw8dGcFEyWdOKclwqkvS-gN4VWo,41097
|
|
24
26
|
evalvault/adapters/inbound/cli/commands/calibrate.py,sha256=-UnT0LQH40U5lzMLqMJ7DOTLa3mt5P_fJL2XzqIkvu4,4223
|
|
27
|
+
evalvault/adapters/inbound/cli/commands/calibrate_judge.py,sha256=hJBlNl9Rt-ZtoIu-HKfudhZb2j2HOoEnRbiG4n5TOTE,10348
|
|
28
|
+
evalvault/adapters/inbound/cli/commands/compare.py,sha256=X_uyJoT_yQP43RTWMLCwMuHwhOb8wCqFShjy477V-2c,10384
|
|
25
29
|
evalvault/adapters/inbound/cli/commands/config.py,sha256=Mv9IQHBFHZ3I2stUzHDgLDn-Znt_Awdy3j-sk5ruUmw,6069
|
|
26
30
|
evalvault/adapters/inbound/cli/commands/debug.py,sha256=KU-hL1gLhpjV2ZybDQgGMwRfm-hCynkrqY4UzETfL9k,2234
|
|
27
31
|
evalvault/adapters/inbound/cli/commands/domain.py,sha256=dL9iqBlnr5mDeS1unXW6uxE0qp6yfnxj-ls6k3EenwI,27279
|
|
28
32
|
evalvault/adapters/inbound/cli/commands/experiment.py,sha256=jficaFOsZ9EMHrPHCOZjq6jpFrgmqCwmIo--wA_OcvQ,10389
|
|
29
33
|
evalvault/adapters/inbound/cli/commands/gate.py,sha256=SxBSHALhekw9OVuJcuk64tkS8YMDDsgmhMALTE38wwY,9956
|
|
30
34
|
evalvault/adapters/inbound/cli/commands/generate.py,sha256=7IPvd0WAwPxt9uaxmzqWCwt0b2VC_wXiVxyJ3lP-xys,8562
|
|
31
|
-
evalvault/adapters/inbound/cli/commands/history.py,sha256=
|
|
35
|
+
evalvault/adapters/inbound/cli/commands/history.py,sha256=3xf1l-I8IW-1Vtne9ypepDMDRRbwOpEvAjh4Qf9tV2w,8420
|
|
32
36
|
evalvault/adapters/inbound/cli/commands/init.py,sha256=7q86fUeBVA08fU_N0lAV6Lakxirq4val2jIyALlDy3E,8822
|
|
33
37
|
evalvault/adapters/inbound/cli/commands/kg.py,sha256=ycV9Xj6SUUJLTyTfLZcjXDVLcZqwo7Gw878ZhZAeDoc,19155
|
|
34
38
|
evalvault/adapters/inbound/cli/commands/langfuse.py,sha256=aExhZ5WYT0FzJI4v1sF-a1jqy9b1BF46_HBtfiQjVGI,4085
|
|
35
39
|
evalvault/adapters/inbound/cli/commands/method.py,sha256=OWdoofhvsDJchgNKnGGjXfIsZ-IHKZEo6RlmTsZRRYM,19124
|
|
40
|
+
evalvault/adapters/inbound/cli/commands/ops.py,sha256=2r6hdrZ7STnWMhtzYmv8jF_ukBq4HuKB1El6YnyxwrY,4035
|
|
36
41
|
evalvault/adapters/inbound/cli/commands/phoenix.py,sha256=LQi3KTLq1ybjjBuz92oQ6lYyBS3mHrCHk0qe-7bqB4U,15611
|
|
37
42
|
evalvault/adapters/inbound/cli/commands/pipeline.py,sha256=NeqWLzO9kRDuZd0pHAIHglP3F7VzoNOU4JI0QcSZ120,7788
|
|
43
|
+
evalvault/adapters/inbound/cli/commands/profile_difficulty.py,sha256=nOJH3iqgLAlXq4keLBj5oqpiRCg0jjGgT-7Q57HxEh8,6665
|
|
38
44
|
evalvault/adapters/inbound/cli/commands/prompts.py,sha256=lddde5VbjYaqN_9gHPLNu6DWpg5fE-KqZzjN-XYwvJw,27153
|
|
39
|
-
evalvault/adapters/inbound/cli/commands/
|
|
40
|
-
evalvault/adapters/inbound/cli/commands/
|
|
45
|
+
evalvault/adapters/inbound/cli/commands/regress.py,sha256=Dy8hUOdjapxOW9Hoov0DHHblkMaExiqWfYS14CaC9Kk,8806
|
|
46
|
+
evalvault/adapters/inbound/cli/commands/run.py,sha256=aKoZcQbOJ1KB_4zPk4L-AWw3u9vGWg3SaooR7A3Xd_Y,119910
|
|
47
|
+
evalvault/adapters/inbound/cli/commands/run_helpers.py,sha256=93jFUg8QLrD38QU2JhOhFMoHDWUphSEKRdJ5KcUvrkQ,40806
|
|
41
48
|
evalvault/adapters/inbound/cli/commands/stage.py,sha256=oRC9c5CysLX90Iy5Ba1pc_00DaOBS78lcBvzkbdrGRM,17123
|
|
42
49
|
evalvault/adapters/inbound/cli/utils/__init__.py,sha256=QPNKneZS-Z-tTnYYxtgJXgcJWY6puUlRQcKrn7Mlv1M,685
|
|
43
50
|
evalvault/adapters/inbound/cli/utils/analysis_io.py,sha256=RHkKEq4e-PtbtRDlXAJWU80RYHNPw-O5V9_GujdaGfc,13393
|
|
@@ -52,6 +59,9 @@ evalvault/adapters/inbound/mcp/__init__.py,sha256=kctJsmaP4fY94T3WCOhgANk3TCLdfb
|
|
|
52
59
|
evalvault/adapters/inbound/mcp/schemas.py,sha256=KUKm4gEc-UDyF8sUbyzAnAIzyZ6DcXsaCEIVR3oESNQ,4469
|
|
53
60
|
evalvault/adapters/inbound/mcp/tools.py,sha256=fnvkWS5p93o3FNmUSbh3EW4jCAVwtBKHX6kDuEbXkK8,24219
|
|
54
61
|
evalvault/adapters/outbound/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
62
|
+
evalvault/adapters/outbound/artifact_fs.py,sha256=nySk7-10B9rQpV9EslBmRUr0gFIi9V1E_E_wdC1WpIU,439
|
|
63
|
+
evalvault/adapters/outbound/judge_calibration_adapter.py,sha256=vu5dVJVo5AXa5ULcx8WE-6YgfD718BC8Ci1_HAdwy20,1155
|
|
64
|
+
evalvault/adapters/outbound/judge_calibration_reporter.py,sha256=R0nId6P1jYQ3M3636knPNFztuc8kewDBKsg2LLcUw58,2005
|
|
55
65
|
evalvault/adapters/outbound/analysis/__init__.py,sha256=TLuS-eKfXg97_Db5td1nTZkD3BErRLZLic1v2EAM2sA,6185
|
|
56
66
|
evalvault/adapters/outbound/analysis/analysis_report_module.py,sha256=xah3wgJErHD_Hpb1YAwWRsxr8xaC8SW--CpNA7IgfxI,3957
|
|
57
67
|
evalvault/adapters/outbound/analysis/base_module.py,sha256=eUN77SSD2KR4WKU7gLY8TlVewETx_YIZvPT4LUnBv4o,2523
|
|
@@ -59,6 +69,7 @@ evalvault/adapters/outbound/analysis/bm25_searcher_module.py,sha256=I8BsXrHaOVxg
|
|
|
59
69
|
evalvault/adapters/outbound/analysis/causal_adapter.py,sha256=Rt5QcoLDEjx8u_yidACz3u8SbAVYSJO6lLu6udwnd4U,27410
|
|
60
70
|
evalvault/adapters/outbound/analysis/causal_analyzer_module.py,sha256=hBcTx7ZyUZ6HQ6I6W2VvSZ1ndatlgMen2KjKXk_Ltx4,6780
|
|
61
71
|
evalvault/adapters/outbound/analysis/common.py,sha256=H1RqNBiOt7WRcHUM3jFydd3850GFQhEDUu8WBEhtMws,5734
|
|
72
|
+
evalvault/adapters/outbound/analysis/comparison_pipeline_adapter.py,sha256=D_ZPVgQHz3Cn3fxxl-TgLEoo_RNbhsOJCvqH-cJ2Lf4,1577
|
|
62
73
|
evalvault/adapters/outbound/analysis/comparison_report_module.py,sha256=0tTMZB5qpGMaxlcWtTtTln7Y_jFEDFaaW7V-UyboBDM,2343
|
|
63
74
|
evalvault/adapters/outbound/analysis/data_loader_module.py,sha256=6X0-ZcFtEfonQnbJ0POqmHXstJ1Wq1NvpijtbKSeEm0,3749
|
|
64
75
|
evalvault/adapters/outbound/analysis/detailed_report_module.py,sha256=59CjuNQthlroJyGEhQap3PgahWfzXciKx_DD10gHXjM,3897
|
|
@@ -125,6 +136,9 @@ evalvault/adapters/outbound/documents/ocr/paddleocr_backend.py,sha256=AORA9JUV5u
|
|
|
125
136
|
evalvault/adapters/outbound/domain_memory/__init__.py,sha256=ksMX1IkNiDqQHLtJe9TOXiLC1iouGt6_QSdPLiALHHs,229
|
|
126
137
|
evalvault/adapters/outbound/domain_memory/domain_memory_schema.sql,sha256=APlNhJNFZdcm7Sb2tvr7V8JMiLinmXkx1gd6pgTf9ZI,11268
|
|
127
138
|
evalvault/adapters/outbound/domain_memory/sqlite_adapter.py,sha256=RWobnFgvxiItxFAr6niY89sT19O-cnExTbP0I7UAY78,85186
|
|
139
|
+
evalvault/adapters/outbound/filesystem/__init__.py,sha256=eTQLuVPMpEctE92TtegKQT3wuJTIhiBS38BzfxRV-N0,122
|
|
140
|
+
evalvault/adapters/outbound/filesystem/difficulty_profile_writer.py,sha256=9qO9_3E-SL6ngDOia6zcw680S1fQloxo32f6hx76YHs,1626
|
|
141
|
+
evalvault/adapters/outbound/filesystem/ops_snapshot_writer.py,sha256=sOuUk8VD8yWwG8508uw1zqSnNsp3dQrF9bp9T2z-n48,448
|
|
128
142
|
evalvault/adapters/outbound/improvement/__init__.py,sha256=tXA6vaZOLvqwJpyjGMiC8WrvszMmvUPzJnHjvJhQxSI,1143
|
|
129
143
|
evalvault/adapters/outbound/improvement/insight_generator.py,sha256=U16l0euCZy0_08Zb_i0eijXSjS5t-iq0iMUfttwPqgI,17636
|
|
130
144
|
evalvault/adapters/outbound/improvement/pattern_detector.py,sha256=uFFjWNy8A4KIihw_ANtL6At73RirwNnFnN4rFsEvcXk,24602
|
|
@@ -164,10 +178,10 @@ evalvault/adapters/outbound/nlp/korean/toolkit_factory.py,sha256=x3v-AAkVInOabC4
|
|
|
164
178
|
evalvault/adapters/outbound/phoenix/sync_service.py,sha256=i6gHpNiZXKQ5yzV9B2TPb-P1N45k_Ck5ruzh3oqp4d8,9122
|
|
165
179
|
evalvault/adapters/outbound/report/__init__.py,sha256=8OUduTHnWkBLHYrc7mBg45DnAwz0RgvSJmz1HqxVjLY,477
|
|
166
180
|
evalvault/adapters/outbound/report/dashboard_generator.py,sha256=Dcu18NTK4lS8XNKnnnquagpZkd-4TSf5Mb2isFNW5Pk,7800
|
|
167
|
-
evalvault/adapters/outbound/report/llm_report_generator.py,sha256=
|
|
181
|
+
evalvault/adapters/outbound/report/llm_report_generator.py,sha256=i_iXfY8qutIb8TsvLKyMLnijsA0yiNJ3rBEFg4zVqcE,26858
|
|
168
182
|
evalvault/adapters/outbound/report/markdown_adapter.py,sha256=5PS72h_qe4ZtYs-umhX5TqQL2k5SuDaCUc6rRw9AKRw,16761
|
|
169
183
|
evalvault/adapters/outbound/storage/__init__.py,sha256=n5R6thAPTx1leSwv6od6nBWcLWFa-UYD6cOLzN89T8I,614
|
|
170
|
-
evalvault/adapters/outbound/storage/base_sql.py,sha256=
|
|
184
|
+
evalvault/adapters/outbound/storage/base_sql.py,sha256=bNjJr941wqeLgv4E772JlOer1Q8OpJWxyotsNNn_R98,42536
|
|
171
185
|
evalvault/adapters/outbound/storage/benchmark_storage_adapter.py,sha256=Qgf9xSSIkYQRpG4uLzcUdoYO9LTQDQ4tFRkkMYer-WA,9803
|
|
172
186
|
evalvault/adapters/outbound/storage/postgres_adapter.py,sha256=HLaoQ3YJDFwOxeY0S92oPIqb-7EgWSasgt89RM86vr0,47148
|
|
173
187
|
evalvault/adapters/outbound/storage/postgres_schema.sql,sha256=A9MfO0pjf4kjxoRj2KPI0Gg1cbX13I2YE3oieT-PGiI,8906
|
|
@@ -180,10 +194,10 @@ evalvault/adapters/outbound/tracer/open_rag_trace_decorators.py,sha256=LFnk-3FSL
|
|
|
180
194
|
evalvault/adapters/outbound/tracer/open_rag_trace_helpers.py,sha256=D48Mbj-ioDKztjhV9513Q5DiUNiVdO60B_2sWMFEmnI,3520
|
|
181
195
|
evalvault/adapters/outbound/tracer/phoenix_tracer_adapter.py,sha256=inmTAolAVsm0IrszE9VTJoI7HSvGGAnGNZVu_vZRAGg,741
|
|
182
196
|
evalvault/adapters/outbound/tracker/__init__.py,sha256=Suu5BznOK5uTuD5_jS8JMZd8RPfQNlddLxHCBvMTm_4,358
|
|
183
|
-
evalvault/adapters/outbound/tracker/langfuse_adapter.py,sha256=
|
|
197
|
+
evalvault/adapters/outbound/tracker/langfuse_adapter.py,sha256=uI-t5v9AC5VUMYsIc1FHYImourZeErGMXB0_prOMErc,18839
|
|
184
198
|
evalvault/adapters/outbound/tracker/log_sanitizer.py,sha256=ilKTTSzsHslQYc-elnWu0Z3HKNNw1D1iI0_cCvYbo1M,2653
|
|
185
|
-
evalvault/adapters/outbound/tracker/mlflow_adapter.py,sha256=
|
|
186
|
-
evalvault/adapters/outbound/tracker/phoenix_adapter.py,sha256=
|
|
199
|
+
evalvault/adapters/outbound/tracker/mlflow_adapter.py,sha256=6pSxbxSDZE7jN7uSMU6VFg0JlO7cBiMLYcd53NYpfcY,7350
|
|
200
|
+
evalvault/adapters/outbound/tracker/phoenix_adapter.py,sha256=8p2qJeKn6OvIxNbD16h_QrhmCzKIBIf8_ej535MNn_A,26443
|
|
187
201
|
evalvault/config/__init__.py,sha256=UCgeDx62M2gOuFvdN29wWwny2fdH4bPY_uUC3-42eDw,1297
|
|
188
202
|
evalvault/config/agent_types.py,sha256=EP2Pv3ZtOzDXIvIa-Hnd1to9JIbMUtGitrlwzZtx0Ys,13418
|
|
189
203
|
evalvault/config/domain_config.py,sha256=rOgNA2T8NWlDzcEFC0shdUCCww0lI1E5fUm5QrKQSZI,9264
|
|
@@ -192,10 +206,10 @@ evalvault/config/langfuse_support.py,sha256=DEzVMfMGGf1V45W_2oUG-NCDfsYI4UUdnYJI
|
|
|
192
206
|
evalvault/config/model_config.py,sha256=KlzDbGyDLeOGE7ElekFFk5YjjT5u8i6KO2B4EyZkLnI,3542
|
|
193
207
|
evalvault/config/phoenix_support.py,sha256=e6RPWd6Qb7KU6Q8pLaYTpJGWULtvEEU6B0xHWyVyOH0,13604
|
|
194
208
|
evalvault/config/secret_manager.py,sha256=YjPMuNqeBrAR2BzCJvsBNUExaU4TBSFyZ8kVYZZifqA,4172
|
|
195
|
-
evalvault/config/settings.py,sha256=
|
|
209
|
+
evalvault/config/settings.py,sha256=xvoNma4CHAd8R_nF0DL4MUWXBWCR5M0C68NPSPLT5JQ,18285
|
|
196
210
|
evalvault/config/playbooks/improvement_playbook.yaml,sha256=9F9WVVCydFfz6zUuGYzZ4PKdW1LLtcBKVF36T7xT764,26965
|
|
197
211
|
evalvault/domain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
198
|
-
evalvault/domain/entities/__init__.py,sha256=
|
|
212
|
+
evalvault/domain/entities/__init__.py,sha256=wszRJ1Imdc5NJ1bQPC2udk-mAgFdlw4uZV5IPNjLpHQ,3669
|
|
199
213
|
evalvault/domain/entities/analysis.py,sha256=gcMtumC66g-AIqb2LgfMpm5BMzwJhJkjg-zuybNoJCM,15208
|
|
200
214
|
evalvault/domain/entities/analysis_pipeline.py,sha256=hD9rFHMa4rUq0InRkSKhh6HQ9ZeNYAHKADzs-kWRP04,16845
|
|
201
215
|
evalvault/domain/entities/benchmark.py,sha256=CVbz_eW7Y9eM7wG7xA_xmldTIs72csdoTmu3E0NKoMU,18475
|
|
@@ -205,42 +219,51 @@ evalvault/domain/entities/debug.py,sha256=r92lgvOpq2svw70syJIo78muRAvrSn5h1JByH_
|
|
|
205
219
|
evalvault/domain/entities/experiment.py,sha256=oWjbu0IJZ6oIRcnA-8ppeJDgp57Tv8ZjQ3UOZ0X9KJ8,2576
|
|
206
220
|
evalvault/domain/entities/feedback.py,sha256=xiaZaUQhyuxyW_i2scXt8eKZshMC6tXe3981e-uukw8,1604
|
|
207
221
|
evalvault/domain/entities/improvement.py,sha256=WHI7q1jXRxkuHhBWOrpk8UdLaH0UwjZVjRIDsqVDyZo,19322
|
|
222
|
+
evalvault/domain/entities/judge_calibration.py,sha256=fhQEI7g2nZuG1OliikhxgefcFAJldDqEmfTs9Mp-FPk,1234
|
|
208
223
|
evalvault/domain/entities/kg.py,sha256=8awN1M4vxAGQZk_ZG8i2CXKTizQ8FA1VCLhUWHZq0U8,3001
|
|
209
224
|
evalvault/domain/entities/memory.py,sha256=bfS75q8K8_jNrB7IYh4mjP8Lkyj-I0TVsmHCP0FuICw,8423
|
|
210
225
|
evalvault/domain/entities/method.py,sha256=a3jZi7SjcpK3HeVyVwQkUMwpnmg2RbxCnH4NqYPLCOI,1157
|
|
211
|
-
evalvault/domain/entities/prompt.py,sha256=
|
|
226
|
+
evalvault/domain/entities/prompt.py,sha256=lQlRnHEKY69GWTC-cUIu0DMuPfJ9UWm6Sm4KTNjVwfY,2920
|
|
212
227
|
evalvault/domain/entities/prompt_suggestion.py,sha256=Ep_XSjdYUj7pFSCMyeeZKs8yTnp74AVx05Zqr7829PE,1243
|
|
213
228
|
evalvault/domain/entities/rag_trace.py,sha256=sZgnkG4fK6KOe3Np6TYAZ_tPnsRbOmucDSQns35U1n4,11868
|
|
214
229
|
evalvault/domain/entities/result.py,sha256=OaGHMDLWMW2O4fNVuVTUvWFVBQ1iu93OD_oI3NumrCQ,10697
|
|
215
|
-
evalvault/domain/entities/stage.py,sha256=
|
|
216
|
-
evalvault/domain/metrics/__init__.py,sha256=
|
|
230
|
+
evalvault/domain/entities/stage.py,sha256=UqS59sjoMs_bhMupNtvagbIx8QgHgFjWoRPhJ3uJP2s,7426
|
|
231
|
+
evalvault/domain/metrics/__init__.py,sha256=Ros3CWg5in1xlEdMa0WUSG602SBVkxw2Zbro-XUlmxU,1214
|
|
217
232
|
evalvault/domain/metrics/analysis_registry.py,sha256=JZpBrBs7-JExHKYuEML6Vg_uYLm-WniBE3BfiU5OtJg,7641
|
|
218
233
|
evalvault/domain/metrics/confidence.py,sha256=AX4oeN28OvmMkwD0pT-jskkOlXh87C1pe2W9P1sF69g,17224
|
|
219
234
|
evalvault/domain/metrics/contextual_relevancy.py,sha256=xAPYUv_0TM4j4WOutOSGftNln_l-2Ev6qpANeu4REv8,11057
|
|
220
235
|
evalvault/domain/metrics/entity_preservation.py,sha256=uSCbaETceE5PbGn-230Rm8pryOA8jDkkeOwAkWxA65g,6500
|
|
221
236
|
evalvault/domain/metrics/insurance.py,sha256=5NPeAi_86rpuZRgV4KhzomGrq3Uw2jjglN6FfA_AO8o,4040
|
|
222
237
|
evalvault/domain/metrics/no_answer.py,sha256=x6vRyOa1jw-qsnw9kOYT8YMPdLElaDRu7zjNCpyJfqM,8237
|
|
223
|
-
evalvault/domain/metrics/registry.py,sha256=
|
|
238
|
+
evalvault/domain/metrics/registry.py,sha256=1CKPxSjdXK739zhzwodukGDL-dIhlJJH30cuP-czQWc,6926
|
|
224
239
|
evalvault/domain/metrics/retrieval_rank.py,sha256=F55ByadJBowyKHKBmKAZ0T0qN_R1_7UNu-MiLnT4Ypg,14675
|
|
240
|
+
evalvault/domain/metrics/summary_accuracy.py,sha256=Hr4QS1e4Rxt1MgcTj5rElKuPw9rWS-zGkI0d8wB5dwA,5988
|
|
241
|
+
evalvault/domain/metrics/summary_needs_followup.py,sha256=5kExtZxxankP7csAAIZe_1uRFeBD7NQK-N15b5d0awM,1357
|
|
242
|
+
evalvault/domain/metrics/summary_non_definitive.py,sha256=1EE-z0Ib66gpjc0MGZHmZJHJfpoACSIldgOwFkUNxg0,1029
|
|
243
|
+
evalvault/domain/metrics/summary_risk_coverage.py,sha256=Fo-dMg_jU4MCr0YqOZzBZymwEbG9y2H6eLX-jmuS8IU,1777
|
|
225
244
|
evalvault/domain/metrics/terms_dictionary.json,sha256=-ZQmpx6yMOYoAOpcLj-xK2LkAeCbAw0EUb6-syIOKS0,3801
|
|
226
245
|
evalvault/domain/metrics/text_match.py,sha256=P-YTZs9ekDqEmxLNBP8eXnMRymPdC8V4dJPtwG2ajVM,10219
|
|
227
246
|
evalvault/domain/services/__init__.py,sha256=X5Af1kf_vSt3S3mFwOV6OQdro-lFxwbVdNd7nJznkC8,1024
|
|
228
247
|
evalvault/domain/services/analysis_service.py,sha256=oUEtfJHB3bNJ_Ksygx-pjnLm4CTk7_rDvDbqfkAfFD4,10838
|
|
248
|
+
evalvault/domain/services/artifact_lint_service.py,sha256=80P46weoj9lBxOqg_ViHZEQ6Cfo69XV4cniZlmMsti0,8434
|
|
229
249
|
evalvault/domain/services/async_batch_executor.py,sha256=qYFRl7CGmv56XppeRhInde7Fw0GESCoZh8V-Iv_1hQQ,11140
|
|
230
250
|
evalvault/domain/services/batch_executor.py,sha256=cYA_Q1es46n_PYeyyfm0iM2b7GGVtDoOGoMxexrf6tI,1243
|
|
231
251
|
evalvault/domain/services/benchmark_report_service.py,sha256=IF-zqtvpsJ0ONJWUEw4ghKiC7ka_PWxUBO10lPaDRmI,15083
|
|
232
|
-
evalvault/domain/services/benchmark_runner.py,sha256=
|
|
252
|
+
evalvault/domain/services/benchmark_runner.py,sha256=4tvQEDrfvp2fC2luUPuPBcRjEPLHdrdystLpe3PnBqM,26046
|
|
233
253
|
evalvault/domain/services/benchmark_service.py,sha256=TrmnvBMAPmcs0PewGZcn2rxHbviZ8KxmDvJCeyqm28I,6286
|
|
234
254
|
evalvault/domain/services/cache_metrics.py,sha256=FKNZoxym30lc1SxTGmTn3Pr-PDNoAqgC9_d_IdF_jOQ,3463
|
|
235
255
|
evalvault/domain/services/cluster_map_builder.py,sha256=qPKMPj-eSqECJSCOKvv3ZETgIwxwiKWbU3d6_feCoDg,6885
|
|
236
|
-
evalvault/domain/services/
|
|
256
|
+
evalvault/domain/services/custom_metric_snapshot.py,sha256=_MLOzBlHTRyTQ2NuunZ_lrLVF0__kvEcCUxXVVCeoRA,9684
|
|
257
|
+
evalvault/domain/services/dataset_preprocessor.py,sha256=PnhLiPk0E9DIzjUr8N75296CCfl1AUXGv-lpaXBi0Ok,14797
|
|
237
258
|
evalvault/domain/services/debug_report_service.py,sha256=SGdFh8tctAIq7RotFbg47eetxdYSS4Yju7-LOzpCMCM,4386
|
|
259
|
+
evalvault/domain/services/difficulty_profile_reporter.py,sha256=uIj9-eiO2dDvQ6tP-DJBddfBq8VT63st0wtNC8Co4NQ,680
|
|
260
|
+
evalvault/domain/services/difficulty_profiling_service.py,sha256=wB3T2iz_dZjvj7wiU2fnM0XT-doMNokV_YqSt24Wc6A,11078
|
|
238
261
|
evalvault/domain/services/document_chunker.py,sha256=u05N1xSBcJuJPUfP7WmpY_EyHuUMuGMsPSM9qs-ID8c,2494
|
|
239
262
|
evalvault/domain/services/document_versioning.py,sha256=M1qZaMpQ2exVT1wkVAmvEPPuoYibJDt0F7pYfTK7mvE,3323
|
|
240
263
|
evalvault/domain/services/domain_learning_hook.py,sha256=rhKBmdnrJyfGzFNsNxzyv8jZO26-WOosHSmBV_9qdJg,7176
|
|
241
264
|
evalvault/domain/services/embedding_overlay.py,sha256=ZTNxUPXpHGbQ3Uri5DD3feTUFn7qrhuNshhyCQEvRuM,3559
|
|
242
265
|
evalvault/domain/services/entity_extractor.py,sha256=f3Rf5saK8QsgetLNK1Hbxzt8PtttJZCicSR63S8DJ5k,14141
|
|
243
|
-
evalvault/domain/services/evaluator.py,sha256=
|
|
266
|
+
evalvault/domain/services/evaluator.py,sha256=Fvth2VdckDJvGuwxbXPnvPfQU59WZSJHV63H4qji4lM,78815
|
|
244
267
|
evalvault/domain/services/experiment_comparator.py,sha256=IBrxIwux-8GucwlLx6e5lUqB9miSPvBLGJK9ctoW7Y0,3299
|
|
245
268
|
evalvault/domain/services/experiment_manager.py,sha256=2k-qGiAUyZuqqmcp4P-M3Z9HTXwwcqW5HQYKNkcIHuI,4863
|
|
246
269
|
evalvault/domain/services/experiment_reporter.py,sha256=QYlVmCFSx8hKTPMezc7QjJE07b3MSQ82Q4QVucSHLVY,1420
|
|
@@ -249,53 +272,62 @@ evalvault/domain/services/experiment_statistics.py,sha256=aOrqbBjB1swHPaFRziID1m
|
|
|
249
272
|
evalvault/domain/services/holdout_splitter.py,sha256=Sos61Zy_bBjStt8LPHJ3KxDNda-OmX7AVUsT24K1n6Q,1910
|
|
250
273
|
evalvault/domain/services/improvement_guide_service.py,sha256=gMoVFlDsprOEEfRGKmdbk9_Due62J63Q-rL2zr65Q0s,17881
|
|
251
274
|
evalvault/domain/services/intent_classifier.py,sha256=hsWivDXqXJjCJEE-OI7eUGeYrewpYxlz67Z0TI3oskU,11707
|
|
275
|
+
evalvault/domain/services/judge_calibration_service.py,sha256=cOaAsbfMBlaDxoMAXe8MacDDRK0tCD-tXRnYjB6sEPs,19264
|
|
252
276
|
evalvault/domain/services/kg_generator.py,sha256=oEugjPdn8Pb2Q3r5yAZl0dZJibNUkEherlRVquknB6k,24969
|
|
253
277
|
evalvault/domain/services/memory_aware_evaluator.py,sha256=vTiYoxiMfZ_CMjSBjqwkBRdpiXRwQ2zXnQ2pXzVHYts,5249
|
|
254
278
|
evalvault/domain/services/memory_based_analysis.py,sha256=oh2irCy3le7fWiTtL31SMEhPyu7fyBVz-giO2hlNifE,4499
|
|
255
279
|
evalvault/domain/services/method_runner.py,sha256=pABqKZeaALpWZYDfzAbd-VOZt2djQggRNIPuuPQeUSw,3571
|
|
280
|
+
evalvault/domain/services/ops_snapshot_service.py,sha256=1CqJN2p3tM6SgzLCZKcVEM213fd1cDGexTRPG_3e59w,5138
|
|
256
281
|
evalvault/domain/services/pipeline_orchestrator.py,sha256=yriVlEVZYDtt0Vwt4Ae6xyW1H6Dj4Hxdn8XQSvQNSoQ,19436
|
|
257
282
|
evalvault/domain/services/pipeline_template_registry.py,sha256=aWqXLQ24grpSZo9M4tZLRo1ysD10c6hUpW3JupZH9e0,28083
|
|
258
283
|
evalvault/domain/services/prompt_candidate_service.py,sha256=Ibyb5EaWK28Ju2HnTqHHGOoiA9Q-VwY3hjxVODALwGY,3997
|
|
259
284
|
evalvault/domain/services/prompt_manifest.py,sha256=5s5Kd6-_Dn-xrjjlU99CVo6njsPhvE50H5m_85U-H6U,5612
|
|
260
|
-
evalvault/domain/services/prompt_registry.py,sha256=
|
|
285
|
+
evalvault/domain/services/prompt_registry.py,sha256=QyL4yIcKT93uv6L0-Q_iaNXno8QnsC19YcGekuSRMtE,5247
|
|
261
286
|
evalvault/domain/services/prompt_scoring_service.py,sha256=SlvfuIbhj92RJu4RQAJ1BGKhKkOAUOt3cZNH21HtsX4,9833
|
|
262
287
|
evalvault/domain/services/prompt_status.py,sha256=r1dFLGz4SfRxXaxsULQsr0-HpJkG9YfZ_yLIxF1MMBo,6731
|
|
263
288
|
evalvault/domain/services/prompt_suggestion_reporter.py,sha256=Fc6sCPebUMk8SZVpjoJ6bCEun0ma-YmayEQnulBVv8s,10577
|
|
264
289
|
evalvault/domain/services/ragas_prompt_overrides.py,sha256=4BecYE2KrreUBbIM3ssP9WzHcK_wRc8jW7CE_k58QOU,1412
|
|
290
|
+
evalvault/domain/services/regression_gate_service.py,sha256=qBMODgpizmEzqEL8_JX-FYSVyARiroMW7MFVzlz7gjc,6579
|
|
265
291
|
evalvault/domain/services/retrieval_metrics.py,sha256=dtrQPLMrXSyWLcgF8EGcLNFwzwA59WDzEh41JRToHAY,2980
|
|
266
292
|
evalvault/domain/services/retriever_context.py,sha256=ySQ-GuadiggS0LVAib4AxA_0JpasYz4S9hbjau0eyIA,6482
|
|
293
|
+
evalvault/domain/services/run_comparison_service.py,sha256=_NScltCRcY3zrvdyYDiPmssTxCDv1GyjCLdP3uAxJts,5631
|
|
267
294
|
evalvault/domain/services/satisfaction_calibration_service.py,sha256=H7Z8opOyPHRO5qVIw-XDsNhIwdCteAS9_a3BTlfIqHg,11906
|
|
268
|
-
evalvault/domain/services/stage_event_builder.py,sha256=
|
|
295
|
+
evalvault/domain/services/stage_event_builder.py,sha256=FAT34Wmylvd2Yz5rDlhaTh1lqSCDhGApCXMi7Hjkib0,9748
|
|
269
296
|
evalvault/domain/services/stage_metric_guide_service.py,sha256=_JdRsBRWirO24qYFlh6hG-dkoWlX6_XWEYKf_uUlKIQ,8807
|
|
270
|
-
evalvault/domain/services/stage_metric_service.py,sha256=
|
|
297
|
+
evalvault/domain/services/stage_metric_service.py,sha256=_u6ThZ8rGw8H9h3TNpu0j8XhpIfukHSoyc1ZpCa3Z00,18031
|
|
271
298
|
evalvault/domain/services/stage_summary_service.py,sha256=VVtuAr4vwzvmNFn8rqURJrhKFqAMG4CaBmyGiUk_xG0,1590
|
|
272
299
|
evalvault/domain/services/synthetic_qa_generator.py,sha256=aiOTPoHZbKRTEeodABQ2I5lq8-Vs_kQtuzcGWd4MTGE,16526
|
|
273
300
|
evalvault/domain/services/testset_generator.py,sha256=6IpiZ0pqhKEymo-AlUdfJjDkF2P1n8Md_QKV4nOheyg,4470
|
|
274
|
-
evalvault/domain/services/threshold_profiles.py,sha256=
|
|
301
|
+
evalvault/domain/services/threshold_profiles.py,sha256=yYJ7o8SIRufI7kUN8edh8am-dVOq_TEhvDqlHe0WQUQ,1433
|
|
275
302
|
evalvault/domain/services/unified_report_service.py,sha256=lG3VpMLC1MTYUlcGl-MUEE4PUopkyrhcgj4_ye9c_vM,11829
|
|
276
|
-
evalvault/domain/services/visual_space_service.py,sha256=
|
|
303
|
+
evalvault/domain/services/visual_space_service.py,sha256=3_qyBsThr5lzP1le6qkXf9ByX3JjoYGX15iMIHe8gQs,34958
|
|
277
304
|
evalvault/ports/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
278
305
|
evalvault/ports/inbound/__init__.py,sha256=2Wsc0vNzH8_ZaErk4OHxP93hRonLUkMbn3W28DtTDO0,562
|
|
279
306
|
evalvault/ports/inbound/analysis_pipeline_port.py,sha256=RJfKtp22AYEqnmRk6RDawAK52rEmyAhuk0FUPJQUwQU,1758
|
|
280
307
|
evalvault/ports/inbound/evaluator_port.py,sha256=rDvouIRUjBD7uICgrpeo11vNPvo27_0CdylRHPodPSE,1323
|
|
281
308
|
evalvault/ports/inbound/learning_hook_port.py,sha256=EeJeMl3chcPHlj5mkLOj6tm8s_qdDRvoCwK1-0l70tI,3297
|
|
282
309
|
evalvault/ports/inbound/web_port.py,sha256=b4uMhwOMLXy3LeILc7ZK3RR-XtoW4p4NzoTpj4syptg,5578
|
|
283
|
-
evalvault/ports/outbound/__init__.py,sha256=
|
|
310
|
+
evalvault/ports/outbound/__init__.py,sha256=x3LseXtwX0NONM1mnhT3fMchz7U6gEDRUX0TDswpg5E,3591
|
|
284
311
|
evalvault/ports/outbound/analysis_cache_port.py,sha256=zPSdUVK_yw3PMWPII2YvS1WLmCGlg5bDScSuYINW9yc,1386
|
|
285
312
|
evalvault/ports/outbound/analysis_module_port.py,sha256=QYzkvie9-BbONj8ZgiQUjm8I-bn8mgzlXTzIXMhehmQ,1881
|
|
286
313
|
evalvault/ports/outbound/analysis_port.py,sha256=gE-iXToTgdQomj9JwNZJY4nwut8q0J6EurUmJNsnptQ,2127
|
|
314
|
+
evalvault/ports/outbound/artifact_fs_port.py,sha256=SN966vwHiIjLA06MBWePr7V0NmafbiQbSLFlXAN3YKU,273
|
|
287
315
|
evalvault/ports/outbound/benchmark_port.py,sha256=pgo3rNbvvJS8x03UxBVQPBBgxc7X5kfG70ZlIf3sopE,7173
|
|
288
316
|
evalvault/ports/outbound/causal_analysis_port.py,sha256=IsyVdFrs66mHcOc-_VbxrZQriwMrDxx-5a_4ElX5Bp0,941
|
|
317
|
+
evalvault/ports/outbound/comparison_pipeline_port.py,sha256=IOLK6vZdzjSV6Qcvkl9GD-wRxx6Waa3dsYOCFdD1mXY,503
|
|
289
318
|
evalvault/ports/outbound/dataset_port.py,sha256=OpEBlkvFwpSRbmi-Lt3wK7n0wljmQ6m985mjyNn_qFk,990
|
|
319
|
+
evalvault/ports/outbound/difficulty_profile_port.py,sha256=hQY-TR64WyUNnCxD9Mw-QraO3ZBw0VUP8KoCmVsQYBE,347
|
|
290
320
|
evalvault/ports/outbound/domain_memory_port.py,sha256=SZFurqsoBmTw1Kt_pej-YpMbooVeyV35jekhaDRojus,23320
|
|
291
321
|
evalvault/ports/outbound/embedding_port.py,sha256=ZHeKRMRBNjpZKWxsLKrD8jJz0M66JTwNcrJbkRaklK4,2034
|
|
292
322
|
evalvault/ports/outbound/improvement_port.py,sha256=fIXhcG4n6OJ1hdvWeqEoLBrVsCNdHZRgtEZjR8lf3qA,2325
|
|
293
323
|
evalvault/ports/outbound/intent_classifier_port.py,sha256=gqMIk0rH6Z43ceuMMRX4vqXurgHZz-CJX2bR5PVAkjQ,2253
|
|
324
|
+
evalvault/ports/outbound/judge_calibration_port.py,sha256=kShZ2MZGvgQZaY7XxwkmLXtquK_RFKcwuWRBfJOrILA,602
|
|
294
325
|
evalvault/ports/outbound/korean_nlp_port.py,sha256=mJCnxBAkV8a5Nd_VX6QcjfDucY62er8GlaNO4HQA8q8,1572
|
|
295
326
|
evalvault/ports/outbound/llm_factory_port.py,sha256=lzoDJi6A6ltk-t3N4oY8DSwMBMfnvXGgSduILOpzoas,305
|
|
296
327
|
evalvault/ports/outbound/llm_port.py,sha256=YAW0i-41yT8KzMuzZGEO5yPDkHN0onGxj55eL0cdPHY,4393
|
|
297
328
|
evalvault/ports/outbound/method_port.py,sha256=sntcKgwagAdJGxp0dI-S_bhBQcOW9QpnND3fOjrsX9E,1377
|
|
298
329
|
evalvault/ports/outbound/nlp_analysis_port.py,sha256=QDJHAsSpynTenuaKp78t1s--U036mtYeUEX0p5vQw24,3046
|
|
330
|
+
evalvault/ports/outbound/ops_snapshot_port.py,sha256=6v72W41tlnxjkJfbfHhFiJMPlRSAQ-BvrI2T09_yddk,214
|
|
299
331
|
evalvault/ports/outbound/relation_augmenter_port.py,sha256=cMcHQnmK111WzZr50vYr7affeHhOtpFZxPARwkg9xbk,651
|
|
300
332
|
evalvault/ports/outbound/report_port.py,sha256=wgReSYL4SupXIoALFh0QFWfX2kzPftXpWTvGLCMd2B8,1315
|
|
301
333
|
evalvault/ports/outbound/stage_storage_port.py,sha256=Nlf9upsXxgCABQB5cJdpLQYsoZNiGRAU5zE5D-Ptp2I,1201
|
|
@@ -306,8 +338,8 @@ evalvault/reports/__init__.py,sha256=Bb1X4871msAN8I6PM6nKGED3psPwZt88hXZBAOdH06Y
|
|
|
306
338
|
evalvault/reports/release_notes.py,sha256=pZj0PBFT-4F_Ty-Kv5P69BuoOnmTCn4kznDcORFJd0w,4011
|
|
307
339
|
evalvault/scripts/__init__.py,sha256=NwEeIFQbkX4ml2R_PhtIoNtArDSX_suuoymgG_7Kwso,89
|
|
308
340
|
evalvault/scripts/regression_runner.py,sha256=SxZori5BZ8jVQ057Mf5V5FPgIVDccrV5oRONmnhuk8w,8438
|
|
309
|
-
evalvault-1.
|
|
310
|
-
evalvault-1.
|
|
311
|
-
evalvault-1.
|
|
312
|
-
evalvault-1.
|
|
313
|
-
evalvault-1.
|
|
341
|
+
evalvault-1.66.0.dist-info/METADATA,sha256=f6jzeYkN1iuFwYJTcI8r5L52hVNZwACOlQuWYvVz_JY,26159
|
|
342
|
+
evalvault-1.66.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
343
|
+
evalvault-1.66.0.dist-info/entry_points.txt,sha256=Oj9Xc5gYcyUYYNmQfWI8NYGw7nN-3M-h2ipHIMlVn6o,65
|
|
344
|
+
evalvault-1.66.0.dist-info/licenses/LICENSE.md,sha256=3RNWY4jjtrQ_yYa-D-7I3XO12Ti7YzxsLV_dpykujvo,11358
|
|
345
|
+
evalvault-1.66.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|