evalvault 1.70.1__py3-none-any.whl → 1.72.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evalvault/adapters/inbound/api/adapter.py +367 -3
- evalvault/adapters/inbound/api/main.py +17 -1
- evalvault/adapters/inbound/api/routers/calibration.py +133 -0
- evalvault/adapters/inbound/api/routers/runs.py +71 -1
- evalvault/adapters/inbound/cli/commands/__init__.py +2 -0
- evalvault/adapters/inbound/cli/commands/analyze.py +1 -0
- evalvault/adapters/inbound/cli/commands/compare.py +1 -1
- evalvault/adapters/inbound/cli/commands/experiment.py +27 -1
- evalvault/adapters/inbound/cli/commands/graph_rag.py +303 -0
- evalvault/adapters/inbound/cli/commands/history.py +1 -1
- evalvault/adapters/inbound/cli/commands/regress.py +169 -1
- evalvault/adapters/inbound/cli/commands/run.py +225 -1
- evalvault/adapters/inbound/cli/commands/run_helpers.py +57 -0
- evalvault/adapters/outbound/analysis/network_analyzer_module.py +17 -4
- evalvault/adapters/outbound/dataset/__init__.py +6 -0
- evalvault/adapters/outbound/dataset/multiturn_json_loader.py +111 -0
- evalvault/adapters/outbound/report/__init__.py +6 -0
- evalvault/adapters/outbound/report/ci_report_formatter.py +43 -0
- evalvault/adapters/outbound/report/dashboard_generator.py +24 -9
- evalvault/adapters/outbound/report/pr_comment_formatter.py +50 -0
- evalvault/adapters/outbound/retriever/__init__.py +8 -0
- evalvault/adapters/outbound/retriever/graph_rag_adapter.py +326 -0
- evalvault/adapters/outbound/storage/base_sql.py +291 -0
- evalvault/adapters/outbound/storage/postgres_adapter.py +130 -0
- evalvault/adapters/outbound/storage/postgres_schema.sql +60 -0
- evalvault/adapters/outbound/storage/schema.sql +63 -0
- evalvault/adapters/outbound/storage/sqlite_adapter.py +107 -0
- evalvault/domain/entities/__init__.py +20 -0
- evalvault/domain/entities/graph_rag.py +30 -0
- evalvault/domain/entities/multiturn.py +78 -0
- evalvault/domain/metrics/__init__.py +10 -0
- evalvault/domain/metrics/multiturn_metrics.py +113 -0
- evalvault/domain/metrics/registry.py +36 -0
- evalvault/domain/services/__init__.py +8 -0
- evalvault/domain/services/evaluator.py +5 -2
- evalvault/domain/services/graph_rag_experiment.py +155 -0
- evalvault/domain/services/multiturn_evaluator.py +187 -0
- evalvault/ports/inbound/__init__.py +2 -0
- evalvault/ports/inbound/multiturn_port.py +23 -0
- evalvault/ports/inbound/web_port.py +4 -0
- evalvault/ports/outbound/graph_retriever_port.py +24 -0
- evalvault/ports/outbound/storage_port.py +25 -0
- {evalvault-1.70.1.dist-info → evalvault-1.72.0.dist-info}/METADATA +1 -1
- {evalvault-1.70.1.dist-info → evalvault-1.72.0.dist-info}/RECORD +47 -33
- {evalvault-1.70.1.dist-info → evalvault-1.72.0.dist-info}/WHEEL +0 -0
- {evalvault-1.70.1.dist-info → evalvault-1.72.0.dist-info}/entry_points.txt +0 -0
- {evalvault-1.70.1.dist-info → evalvault-1.72.0.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
from evalvault.domain.entities.dataset import Dataset, TestCase
|
|
7
|
+
from evalvault.domain.entities.multiturn import (
|
|
8
|
+
ConversationTurn,
|
|
9
|
+
DriftAnalysis,
|
|
10
|
+
MultiTurnEvaluationResult,
|
|
11
|
+
MultiTurnTestCase,
|
|
12
|
+
MultiTurnTurnResult,
|
|
13
|
+
)
|
|
14
|
+
from evalvault.domain.metrics.multiturn_metrics import (
|
|
15
|
+
calculate_context_coherence,
|
|
16
|
+
calculate_drift_rate,
|
|
17
|
+
calculate_turn_faithfulness,
|
|
18
|
+
calculate_turn_latency_p95,
|
|
19
|
+
)
|
|
20
|
+
from evalvault.domain.services.evaluator import RagasEvaluator
|
|
21
|
+
from evalvault.ports.inbound.multiturn_port import MultiTurnEvaluatorPort
|
|
22
|
+
from evalvault.ports.outbound.llm_port import LLMPort
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass(frozen=True)
|
|
26
|
+
class _TurnMapping:
|
|
27
|
+
test_case_id: str
|
|
28
|
+
turn: ConversationTurn
|
|
29
|
+
turn_index: int
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class MultiTurnEvaluator(MultiTurnEvaluatorPort):
|
|
33
|
+
MULTITURN_METRICS = {"turn_faithfulness", "context_coherence", "drift_rate", "turn_latency"}
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self, *, evaluator: RagasEvaluator | None = None, llm: LLMPort | None = None
|
|
37
|
+
) -> None:
|
|
38
|
+
self._evaluator = evaluator
|
|
39
|
+
self._llm = llm
|
|
40
|
+
|
|
41
|
+
def evaluate_conversation(
|
|
42
|
+
self,
|
|
43
|
+
conversation: MultiTurnTestCase,
|
|
44
|
+
metrics: list[str],
|
|
45
|
+
) -> MultiTurnEvaluationResult:
|
|
46
|
+
dataset, mappings = self._build_turn_dataset(conversation)
|
|
47
|
+
base_metrics = [metric for metric in metrics if self._is_base_metric(metric)]
|
|
48
|
+
if "turn_faithfulness" in metrics and "faithfulness" not in base_metrics:
|
|
49
|
+
base_metrics.append("faithfulness")
|
|
50
|
+
|
|
51
|
+
turn_results: list[MultiTurnTurnResult] = []
|
|
52
|
+
scores_by_case: dict[str, dict[str, float]] = {}
|
|
53
|
+
metadata_by_case: dict[str, dict[str, object]] = {
|
|
54
|
+
mapping.test_case_id: {
|
|
55
|
+
"conversation_id": conversation.conversation_id,
|
|
56
|
+
"turn_index": mapping.turn_index,
|
|
57
|
+
"turn_id": mapping.turn.turn_id,
|
|
58
|
+
"role": mapping.turn.role,
|
|
59
|
+
}
|
|
60
|
+
for mapping in mappings
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if base_metrics:
|
|
64
|
+
if not self._evaluator or not self._llm:
|
|
65
|
+
raise ValueError("MultiTurnEvaluator requires evaluator and llm for base metrics")
|
|
66
|
+
evaluation = self._run_base_metrics(dataset, base_metrics)
|
|
67
|
+
scores_by_case = {
|
|
68
|
+
result.test_case_id: {metric.name: metric.score for metric in result.metrics}
|
|
69
|
+
for result in evaluation.results
|
|
70
|
+
}
|
|
71
|
+
for result in evaluation.results:
|
|
72
|
+
mapping = next((m for m in mappings if m.test_case_id == result.test_case_id), None)
|
|
73
|
+
if not mapping:
|
|
74
|
+
continue
|
|
75
|
+
turn_results.append(
|
|
76
|
+
MultiTurnTurnResult(
|
|
77
|
+
conversation_id=conversation.conversation_id,
|
|
78
|
+
turn_id=mapping.turn.turn_id,
|
|
79
|
+
turn_index=mapping.turn_index,
|
|
80
|
+
role=mapping.turn.role,
|
|
81
|
+
metrics=scores_by_case.get(result.test_case_id, {}),
|
|
82
|
+
passed=result.all_passed,
|
|
83
|
+
latency_ms=result.latency_ms,
|
|
84
|
+
metadata=dict(metadata_by_case.get(result.test_case_id, {})),
|
|
85
|
+
)
|
|
86
|
+
)
|
|
87
|
+
else:
|
|
88
|
+
for mapping in mappings:
|
|
89
|
+
turn_results.append(
|
|
90
|
+
MultiTurnTurnResult(
|
|
91
|
+
conversation_id=conversation.conversation_id,
|
|
92
|
+
turn_id=mapping.turn.turn_id,
|
|
93
|
+
turn_index=mapping.turn_index,
|
|
94
|
+
role=mapping.turn.role,
|
|
95
|
+
metrics={},
|
|
96
|
+
passed=False,
|
|
97
|
+
latency_ms=None,
|
|
98
|
+
metadata=dict(metadata_by_case.get(mapping.test_case_id, {})),
|
|
99
|
+
)
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
summary: dict[str, object] = {}
|
|
103
|
+
if "turn_faithfulness" in metrics:
|
|
104
|
+
summary["turn_faithfulness"] = calculate_turn_faithfulness(turn_results)
|
|
105
|
+
if "context_coherence" in metrics:
|
|
106
|
+
summary["context_coherence"] = calculate_context_coherence(conversation.turns)
|
|
107
|
+
if "drift_rate" in metrics:
|
|
108
|
+
summary["drift_rate"] = calculate_drift_rate(conversation.turns)
|
|
109
|
+
if "turn_latency" in metrics:
|
|
110
|
+
summary["turn_latency"] = calculate_turn_latency_p95(
|
|
111
|
+
[result.latency_ms for result in turn_results]
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
summary["turn_count"] = len(turn_results)
|
|
115
|
+
summary["conversation_id"] = conversation.conversation_id
|
|
116
|
+
|
|
117
|
+
return MultiTurnEvaluationResult(
|
|
118
|
+
conversation_id=conversation.conversation_id,
|
|
119
|
+
turn_results=turn_results,
|
|
120
|
+
summary=summary,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
def detect_drift(
|
|
124
|
+
self,
|
|
125
|
+
conversation: MultiTurnTestCase,
|
|
126
|
+
threshold: float = 0.1,
|
|
127
|
+
) -> DriftAnalysis:
|
|
128
|
+
drift_score = calculate_drift_rate(conversation.turns)
|
|
129
|
+
return DriftAnalysis(
|
|
130
|
+
conversation_id=conversation.conversation_id,
|
|
131
|
+
drift_score=drift_score,
|
|
132
|
+
drift_threshold=threshold,
|
|
133
|
+
drift_detected=drift_score >= threshold,
|
|
134
|
+
notes=[],
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
def _run_base_metrics(self, dataset: Dataset, metrics: list[str]):
|
|
138
|
+
return asyncio.run(
|
|
139
|
+
self._evaluator.evaluate(
|
|
140
|
+
dataset=dataset,
|
|
141
|
+
metrics=metrics,
|
|
142
|
+
llm=self._llm,
|
|
143
|
+
)
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
def _is_base_metric(self, metric: str) -> bool:
|
|
147
|
+
if metric in self.MULTITURN_METRICS:
|
|
148
|
+
return False
|
|
149
|
+
if metric in RagasEvaluator.METRIC_MAP:
|
|
150
|
+
return True
|
|
151
|
+
return metric in RagasEvaluator.CUSTOM_METRIC_MAP
|
|
152
|
+
|
|
153
|
+
@staticmethod
|
|
154
|
+
def _build_turn_dataset(conversation: MultiTurnTestCase) -> tuple[Dataset, list[_TurnMapping]]:
|
|
155
|
+
test_cases: list[TestCase] = []
|
|
156
|
+
mappings: list[_TurnMapping] = []
|
|
157
|
+
last_user_content: str | None = None
|
|
158
|
+
|
|
159
|
+
for index, turn in enumerate(conversation.turns, start=1):
|
|
160
|
+
if turn.role == "user":
|
|
161
|
+
last_user_content = turn.content
|
|
162
|
+
continue
|
|
163
|
+
question = last_user_content or ""
|
|
164
|
+
test_case_id = f"{conversation.conversation_id}:{index}:{turn.turn_id}"
|
|
165
|
+
test_case = TestCase(
|
|
166
|
+
id=test_case_id,
|
|
167
|
+
question=question,
|
|
168
|
+
answer=turn.content,
|
|
169
|
+
contexts=turn.contexts or [],
|
|
170
|
+
ground_truth=turn.ground_truth,
|
|
171
|
+
metadata={
|
|
172
|
+
"conversation_id": conversation.conversation_id,
|
|
173
|
+
"turn_index": index,
|
|
174
|
+
"turn_id": turn.turn_id,
|
|
175
|
+
"role": turn.role,
|
|
176
|
+
},
|
|
177
|
+
)
|
|
178
|
+
test_cases.append(test_case)
|
|
179
|
+
mappings.append(_TurnMapping(test_case_id=test_case_id, turn=turn, turn_index=index))
|
|
180
|
+
|
|
181
|
+
dataset = Dataset(
|
|
182
|
+
name=f"multiturn:{conversation.conversation_id}",
|
|
183
|
+
version="1.0.0",
|
|
184
|
+
test_cases=test_cases,
|
|
185
|
+
metadata={"conversation_id": conversation.conversation_id},
|
|
186
|
+
)
|
|
187
|
+
return dataset, mappings
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from evalvault.ports.inbound.analysis_pipeline_port import AnalysisPipelinePort
|
|
4
4
|
from evalvault.ports.inbound.evaluator_port import EvaluatorPort
|
|
5
5
|
from evalvault.ports.inbound.learning_hook_port import DomainLearningHookPort
|
|
6
|
+
from evalvault.ports.inbound.multiturn_port import MultiTurnEvaluatorPort
|
|
6
7
|
from evalvault.ports.inbound.web_port import (
|
|
7
8
|
EvalProgress,
|
|
8
9
|
EvalRequest,
|
|
@@ -15,6 +16,7 @@ __all__ = [
|
|
|
15
16
|
"EvaluatorPort",
|
|
16
17
|
"DomainLearningHookPort",
|
|
17
18
|
"AnalysisPipelinePort",
|
|
19
|
+
"MultiTurnEvaluatorPort",
|
|
18
20
|
"WebUIPort",
|
|
19
21
|
"EvalRequest",
|
|
20
22
|
"EvalProgress",
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Protocol
|
|
4
|
+
|
|
5
|
+
from evalvault.domain.entities.multiturn import (
|
|
6
|
+
DriftAnalysis,
|
|
7
|
+
MultiTurnEvaluationResult,
|
|
8
|
+
MultiTurnTestCase,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class MultiTurnEvaluatorPort(Protocol):
|
|
13
|
+
def evaluate_conversation(
|
|
14
|
+
self,
|
|
15
|
+
conversation: MultiTurnTestCase,
|
|
16
|
+
metrics: list[str],
|
|
17
|
+
) -> MultiTurnEvaluationResult: ...
|
|
18
|
+
|
|
19
|
+
def detect_drift(
|
|
20
|
+
self,
|
|
21
|
+
conversation: MultiTurnTestCase,
|
|
22
|
+
threshold: float = 0.1,
|
|
23
|
+
) -> DriftAnalysis: ...
|
|
@@ -166,6 +166,8 @@ class WebUIPort(Protocol):
|
|
|
166
166
|
*,
|
|
167
167
|
include_nlp: bool = True,
|
|
168
168
|
include_causal: bool = True,
|
|
169
|
+
use_cache: bool = True,
|
|
170
|
+
save: bool = False,
|
|
169
171
|
) -> str:
|
|
170
172
|
"""보고서 생성.
|
|
171
173
|
|
|
@@ -174,6 +176,8 @@ class WebUIPort(Protocol):
|
|
|
174
176
|
output_format: 출력 포맷 (markdown, html)
|
|
175
177
|
include_nlp: NLP 분석 포함 여부
|
|
176
178
|
include_causal: 인과 분석 포함 여부
|
|
179
|
+
use_cache: 캐시된 보고서 사용 여부
|
|
180
|
+
save: DB 저장 여부
|
|
177
181
|
|
|
178
182
|
Returns:
|
|
179
183
|
생성된 보고서 문자열
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Protocol
|
|
4
|
+
|
|
5
|
+
from evalvault.domain.entities.graph_rag import EntityNode, KnowledgeSubgraph
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class GraphRetrieverPort(Protocol):
|
|
9
|
+
def extract_entities(self, text: str) -> list[EntityNode]:
|
|
10
|
+
"""텍스트에서 엔티티 추출"""
|
|
11
|
+
|
|
12
|
+
def build_subgraph(
|
|
13
|
+
self,
|
|
14
|
+
query: str,
|
|
15
|
+
max_hops: int = 2,
|
|
16
|
+
max_nodes: int = 20,
|
|
17
|
+
) -> KnowledgeSubgraph:
|
|
18
|
+
"""질의 관련 서브그래프 구축"""
|
|
19
|
+
|
|
20
|
+
def generate_context(
|
|
21
|
+
self,
|
|
22
|
+
subgraph: KnowledgeSubgraph,
|
|
23
|
+
) -> str:
|
|
24
|
+
"""서브그래프를 LLM 컨텍스트로 변환"""
|
|
@@ -6,6 +6,9 @@ from typing import Any, Protocol
|
|
|
6
6
|
from evalvault.domain.entities import (
|
|
7
7
|
EvaluationRun,
|
|
8
8
|
FeedbackSummary,
|
|
9
|
+
MultiTurnConversationRecord,
|
|
10
|
+
MultiTurnRunRecord,
|
|
11
|
+
MultiTurnTurnResult,
|
|
9
12
|
PromptSetBundle,
|
|
10
13
|
RunClusterMap,
|
|
11
14
|
RunClusterMapInfo,
|
|
@@ -32,12 +35,25 @@ class StoragePort(Protocol):
|
|
|
32
35
|
"""
|
|
33
36
|
...
|
|
34
37
|
|
|
38
|
+
def save_multiturn_run(
|
|
39
|
+
self,
|
|
40
|
+
run: MultiTurnRunRecord,
|
|
41
|
+
conversations: list[MultiTurnConversationRecord],
|
|
42
|
+
turn_results: list[MultiTurnTurnResult],
|
|
43
|
+
*,
|
|
44
|
+
metric_thresholds: dict[str, float] | None = None,
|
|
45
|
+
) -> str:
|
|
46
|
+
"""멀티턴 평가 실행 결과를 저장합니다."""
|
|
47
|
+
...
|
|
48
|
+
|
|
35
49
|
def save_prompt_set(self, bundle: PromptSetBundle) -> None:
|
|
36
50
|
"""Persist prompt set and prompt items."""
|
|
37
51
|
...
|
|
38
52
|
|
|
39
53
|
def export_run_to_excel(self, run_id: str, output_path: str | Path) -> Path: ...
|
|
40
54
|
|
|
55
|
+
def export_multiturn_run_to_excel(self, run_id: str, output_path: str | Path) -> Path: ...
|
|
56
|
+
|
|
41
57
|
def link_prompt_set_to_run(self, run_id: str, prompt_set_id: str) -> None:
|
|
42
58
|
"""Attach a prompt set to an evaluation run."""
|
|
43
59
|
...
|
|
@@ -204,6 +220,15 @@ class StoragePort(Protocol):
|
|
|
204
220
|
created_at: str | None = None,
|
|
205
221
|
) -> str: ...
|
|
206
222
|
|
|
223
|
+
def list_analysis_reports(
|
|
224
|
+
self,
|
|
225
|
+
*,
|
|
226
|
+
run_id: str,
|
|
227
|
+
report_type: str | None = None,
|
|
228
|
+
format: str | None = None,
|
|
229
|
+
limit: int = 20,
|
|
230
|
+
) -> list[dict[str, Any]]: ...
|
|
231
|
+
|
|
207
232
|
def list_pipeline_results(self, limit: int = 50) -> list[dict[str, Any]]:
|
|
208
233
|
"""파이프라인 분석 결과 목록을 조회합니다."""
|
|
209
234
|
...
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: evalvault
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.72.0
|
|
4
4
|
Summary: RAG evaluation system using Ragas with Phoenix/Langfuse tracing
|
|
5
5
|
Project-URL: Homepage, https://github.com/ntts9990/EvalVault
|
|
6
6
|
Project-URL: Documentation, https://github.com/ntts9990/EvalVault#readme
|
|
@@ -5,35 +5,37 @@ evalvault/mkdocs_helpers.py,sha256=1AKVQ1W2_VO4qclhfyefyU9Dz1Hzkh1DWDwsFMe24jc,3
|
|
|
5
5
|
evalvault/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
evalvault/adapters/inbound/__init__.py,sha256=SG1svel1PwqetnqVpKFLSv612_WwGwLTbFpYgwk6FMw,166
|
|
7
7
|
evalvault/adapters/inbound/api/__init__.py,sha256=LeVVttCA3tLKoHA2PO4z3y8VkfVcf3Bq8CZSzo91lf4,34
|
|
8
|
-
evalvault/adapters/inbound/api/adapter.py,sha256=
|
|
9
|
-
evalvault/adapters/inbound/api/main.py,sha256=
|
|
8
|
+
evalvault/adapters/inbound/api/adapter.py,sha256=Igg2grCUxQzMuvDOAhBK08wY0nxjmnvnaGS5rLVF3i4,83388
|
|
9
|
+
evalvault/adapters/inbound/api/main.py,sha256=QgLxzHEy7aycGKIFLtN12tWTjnpWLtQ2XDXKV_2FDvg,7531
|
|
10
10
|
evalvault/adapters/inbound/api/routers/__init__.py,sha256=q07_YF9TnBl68bqcRCvhPU4-zRTyvmPoHVehwO6W7QM,19
|
|
11
11
|
evalvault/adapters/inbound/api/routers/benchmark.py,sha256=yevntbZcNtMvbVODsITUBgR1Ka4pdFQrXBJJ4K4Jyr4,4477
|
|
12
|
+
evalvault/adapters/inbound/api/routers/calibration.py,sha256=ZnJSEW8hV-94S95lU_nDmzcLyaUoH1suM3sFUpJ3w5k,4130
|
|
12
13
|
evalvault/adapters/inbound/api/routers/chat.py,sha256=hCA6rWr5GT_gCqu75uCqYwy2gOEUd85mlcc5y-ruFTY,20661
|
|
13
14
|
evalvault/adapters/inbound/api/routers/config.py,sha256=LygN0fVMr8NFtj5zuQXnVFhoafx56Txa98vpwtPa4Jc,4104
|
|
14
15
|
evalvault/adapters/inbound/api/routers/domain.py,sha256=RsR7GIFMjccDN7vpG1uDyk9n1DnCTH18JDGAX7o4Qqc,3648
|
|
15
16
|
evalvault/adapters/inbound/api/routers/knowledge.py,sha256=yb_e7OEPtwldOAzHTGiWe7jShHw2JdpOFnzGPMceRsg,7109
|
|
16
17
|
evalvault/adapters/inbound/api/routers/mcp.py,sha256=yHANV7qIXig-7YSiQgXzSTuabqFStH5yT3URyQGY2W4,4764
|
|
17
18
|
evalvault/adapters/inbound/api/routers/pipeline.py,sha256=8UgQzNFHcuqS61s69mOrPee4OMwfxVdvRWHJ2_qYBF0,17175
|
|
18
|
-
evalvault/adapters/inbound/api/routers/runs.py,sha256=
|
|
19
|
+
evalvault/adapters/inbound/api/routers/runs.py,sha256=eIJ4xkFu-Bn2gRUVBajWZBi-QD2Spl6_wDuaRMeEI84,42744
|
|
19
20
|
evalvault/adapters/inbound/cli/__init__.py,sha256=a42flC5NK-VfbdbBrE49IrUL5zAyKdXZYJVM6E3NTE0,675
|
|
20
21
|
evalvault/adapters/inbound/cli/app.py,sha256=ytNgHRg9ZTAl33AkB1wIL8RKfQ_Cf8fsy0gSsLTs7Ew,1603
|
|
21
|
-
evalvault/adapters/inbound/cli/commands/__init__.py,sha256=
|
|
22
|
+
evalvault/adapters/inbound/cli/commands/__init__.py,sha256=TB3evGuv3_AQoudWiR2bH5EH_AY9f9E7lQcSOWH6TO8,4091
|
|
22
23
|
evalvault/adapters/inbound/cli/commands/agent.py,sha256=YlOYMEzzS1aSKDKD_a7UK3St18X6GXGkdTatrzyd8Zc,7555
|
|
23
|
-
evalvault/adapters/inbound/cli/commands/analyze.py,sha256=
|
|
24
|
+
evalvault/adapters/inbound/cli/commands/analyze.py,sha256=sffmFRbHyxBd0yy10OwxkBSfzIIEhcshW6TOydQcmY0,48974
|
|
24
25
|
evalvault/adapters/inbound/cli/commands/api.py,sha256=YdbJ_-QEajnFcjTa7P2heLMjFKpeQ4nWP_p-HvfYkEo,1943
|
|
25
26
|
evalvault/adapters/inbound/cli/commands/artifacts.py,sha256=bE8FQxmnU0mMIAPx5en8aKrtfNNkrbWoLxIX4ZT9D5c,3776
|
|
26
27
|
evalvault/adapters/inbound/cli/commands/benchmark.py,sha256=RZ4nRTF7d6hDZug-Pw8dGcFEyWdOKclwqkvS-gN4VWo,41097
|
|
27
28
|
evalvault/adapters/inbound/cli/commands/calibrate.py,sha256=-UnT0LQH40U5lzMLqMJ7DOTLa3mt5P_fJL2XzqIkvu4,4223
|
|
28
29
|
evalvault/adapters/inbound/cli/commands/calibrate_judge.py,sha256=hJBlNl9Rt-ZtoIu-HKfudhZb2j2HOoEnRbiG4n5TOTE,10348
|
|
29
|
-
evalvault/adapters/inbound/cli/commands/compare.py,sha256=
|
|
30
|
+
evalvault/adapters/inbound/cli/commands/compare.py,sha256=qqgBN9noE9kiYJs-EZWiU_yA1mW4mnKFavlFTS63OzU,10385
|
|
30
31
|
evalvault/adapters/inbound/cli/commands/config.py,sha256=Mv9IQHBFHZ3I2stUzHDgLDn-Znt_Awdy3j-sk5ruUmw,6069
|
|
31
32
|
evalvault/adapters/inbound/cli/commands/debug.py,sha256=KU-hL1gLhpjV2ZybDQgGMwRfm-hCynkrqY4UzETfL9k,2234
|
|
32
33
|
evalvault/adapters/inbound/cli/commands/domain.py,sha256=dL9iqBlnr5mDeS1unXW6uxE0qp6yfnxj-ls6k3EenwI,27279
|
|
33
|
-
evalvault/adapters/inbound/cli/commands/experiment.py,sha256=
|
|
34
|
+
evalvault/adapters/inbound/cli/commands/experiment.py,sha256=Jtz-jp7URy8HYWf15pje8Ecrctx_Q5kYw-QccT0vMDA,11400
|
|
34
35
|
evalvault/adapters/inbound/cli/commands/gate.py,sha256=SxBSHALhekw9OVuJcuk64tkS8YMDDsgmhMALTE38wwY,9956
|
|
35
36
|
evalvault/adapters/inbound/cli/commands/generate.py,sha256=7IPvd0WAwPxt9uaxmzqWCwt0b2VC_wXiVxyJ3lP-xys,8562
|
|
36
|
-
evalvault/adapters/inbound/cli/commands/
|
|
37
|
+
evalvault/adapters/inbound/cli/commands/graph_rag.py,sha256=UWaTgWuBE9B6KJJnfN9PBYl6gMOKaa641KjXw8Xwqho,11047
|
|
38
|
+
evalvault/adapters/inbound/cli/commands/history.py,sha256=bo7mtHgSCniI7WfeewKQIJzI_HNUTrDiAkOXLFxabVc,8434
|
|
37
39
|
evalvault/adapters/inbound/cli/commands/init.py,sha256=7q86fUeBVA08fU_N0lAV6Lakxirq4val2jIyALlDy3E,8822
|
|
38
40
|
evalvault/adapters/inbound/cli/commands/kg.py,sha256=ycV9Xj6SUUJLTyTfLZcjXDVLcZqwo7Gw878ZhZAeDoc,19155
|
|
39
41
|
evalvault/adapters/inbound/cli/commands/langfuse.py,sha256=aExhZ5WYT0FzJI4v1sF-a1jqy9b1BF46_HBtfiQjVGI,4085
|
|
@@ -43,9 +45,9 @@ evalvault/adapters/inbound/cli/commands/phoenix.py,sha256=LQi3KTLq1ybjjBuz92oQ6l
|
|
|
43
45
|
evalvault/adapters/inbound/cli/commands/pipeline.py,sha256=NeqWLzO9kRDuZd0pHAIHglP3F7VzoNOU4JI0QcSZ120,7788
|
|
44
46
|
evalvault/adapters/inbound/cli/commands/profile_difficulty.py,sha256=nOJH3iqgLAlXq4keLBj5oqpiRCg0jjGgT-7Q57HxEh8,6665
|
|
45
47
|
evalvault/adapters/inbound/cli/commands/prompts.py,sha256=lddde5VbjYaqN_9gHPLNu6DWpg5fE-KqZzjN-XYwvJw,27153
|
|
46
|
-
evalvault/adapters/inbound/cli/commands/regress.py,sha256=
|
|
47
|
-
evalvault/adapters/inbound/cli/commands/run.py,sha256=
|
|
48
|
-
evalvault/adapters/inbound/cli/commands/run_helpers.py,sha256=
|
|
48
|
+
evalvault/adapters/inbound/cli/commands/regress.py,sha256=rugJiX4Qujx9JlAwMUlCIEc2eMa-6mZscmb79pYNWfI,15527
|
|
49
|
+
evalvault/adapters/inbound/cli/commands/run.py,sha256=DMobs36HxZ0AnG3DWYuJuiq9x7M4qQu7GbMz02Yf-rM,130361
|
|
50
|
+
evalvault/adapters/inbound/cli/commands/run_helpers.py,sha256=WCKSaZ2vdBwvYgS0gb3Z_O0eNEAjtWEqH0-8KS1c3gI,43211
|
|
49
51
|
evalvault/adapters/inbound/cli/commands/stage.py,sha256=oRC9c5CysLX90Iy5Ba1pc_00DaOBS78lcBvzkbdrGRM,17123
|
|
50
52
|
evalvault/adapters/inbound/cli/utils/__init__.py,sha256=QPNKneZS-Z-tTnYYxtgJXgcJWY6puUlRQcKrn7Mlv1M,685
|
|
51
53
|
evalvault/adapters/inbound/cli/utils/analysis_io.py,sha256=RHkKEq4e-PtbtRDlXAJWU80RYHNPw-O5V9_GujdaGfc,13393
|
|
@@ -87,7 +89,7 @@ evalvault/adapters/outbound/analysis/model_analyzer_module.py,sha256=28rHdXBXYIF
|
|
|
87
89
|
evalvault/adapters/outbound/analysis/morpheme_analyzer_module.py,sha256=Hrh4mluMsOhQHPrliD2w0FVKokJpfikXOFKT6sNwk74,4158
|
|
88
90
|
evalvault/adapters/outbound/analysis/morpheme_quality_checker_module.py,sha256=_uRKDXdwGbfYduf_3XT77vF8X3-_zW3stHYc3HKYQTE,2216
|
|
89
91
|
evalvault/adapters/outbound/analysis/multiturn_analyzer_module.py,sha256=6R_lcbJyQr5CEEI_zpDJDdw6G4n3ZnkUI0ovfUPGrtU,7557
|
|
90
|
-
evalvault/adapters/outbound/analysis/network_analyzer_module.py,sha256=
|
|
92
|
+
evalvault/adapters/outbound/analysis/network_analyzer_module.py,sha256=RTCeIEfEpe5cjhnSJkYMCmQRGxZiw2ZO6sSMFwLXEN4,8961
|
|
91
93
|
evalvault/adapters/outbound/analysis/nlp_adapter.py,sha256=aLtF_fns-7IEtitwON2EYS_lweq_IdldFsRm47alN0Q,29561
|
|
92
94
|
evalvault/adapters/outbound/analysis/nlp_analyzer_module.py,sha256=kVuG9pVMQO6OYY5zxj_w9nNQZ1-qIO0y6XcXo6lG-n0,8221
|
|
93
95
|
evalvault/adapters/outbound/analysis/pattern_detector_module.py,sha256=SyCDO_VS-r-tjGh8WrW-t1GCSC9ouxirdVk4NizFPXo,1882
|
|
@@ -118,13 +120,14 @@ evalvault/adapters/outbound/benchmark/lm_eval_adapter.py,sha256=xFj_Cgny3JN_COA1
|
|
|
118
120
|
evalvault/adapters/outbound/cache/__init__.py,sha256=LcsKzxnx1AnAwS07iSCdws11CfEYuxkUjRkogN0SviI,317
|
|
119
121
|
evalvault/adapters/outbound/cache/hybrid_cache.py,sha256=AVhctQVOIbQWwvn_K0kxSq3lkhucuM7tezmSkPDbCrA,12711
|
|
120
122
|
evalvault/adapters/outbound/cache/memory_cache.py,sha256=jvjIgXp7YRj08_AzBFaJ58jjXNzUlYbG_zX6fQJP4C0,3533
|
|
121
|
-
evalvault/adapters/outbound/dataset/__init__.py,sha256=
|
|
123
|
+
evalvault/adapters/outbound/dataset/__init__.py,sha256=hijY1l0uPvO9no-RB4XyYi9g653U-4_VqVSVN5P_5XA,1361
|
|
122
124
|
evalvault/adapters/outbound/dataset/base.py,sha256=4rxpQgxpFty0G5XRv1SP-XJ9mpZ9YO6PAMDgp71JiJQ,5547
|
|
123
125
|
evalvault/adapters/outbound/dataset/csv_loader.py,sha256=xHg2QadMvLfHTHzeex6WxXmagLJog3LN-ui6dFxD8HY,5595
|
|
124
126
|
evalvault/adapters/outbound/dataset/excel_loader.py,sha256=MUl-63r1s1GjVVmDgdag1DpMJvIVX_agGx20NQzEZN8,4494
|
|
125
127
|
evalvault/adapters/outbound/dataset/json_loader.py,sha256=4wG7APg1LLADPxJ-wQZo2zBcvVX12sqo9VUIb-0Kww4,4923
|
|
126
128
|
evalvault/adapters/outbound/dataset/loader_factory.py,sha256=32sjGuW2Yta12lpKy4DLH4I5B4Pi-YuHTvGG1Pr4VAk,1361
|
|
127
129
|
evalvault/adapters/outbound/dataset/method_input_loader.py,sha256=d7pB4OPvvr-q-Y5DlvjX3X719jCCQ2vRDfT_ov0dUFU,3833
|
|
130
|
+
evalvault/adapters/outbound/dataset/multiturn_json_loader.py,sha256=XJDDYgabFlLMqwTAa-aelptirhq07xJDkaB0kbrxPmU,4194
|
|
128
131
|
evalvault/adapters/outbound/dataset/streaming_loader.py,sha256=pLSJDG3zkaX-KyKebEtNdIVJhpIXDCJpzNBglWlU9tw,17995
|
|
129
132
|
evalvault/adapters/outbound/dataset/templates.py,sha256=5gfae7kqs66SRAP-OyWX6N2cKhr3wy7qAzfhWO_5zXY,3316
|
|
130
133
|
evalvault/adapters/outbound/dataset/thresholds.py,sha256=5Vodqar6QrCL7R_Pq0gTxv1pJh_OiH_3pNimvJaUQA4,2199
|
|
@@ -178,17 +181,21 @@ evalvault/adapters/outbound/nlp/korean/korean_stopwords.py,sha256=UemEFCJudg2Eps
|
|
|
178
181
|
evalvault/adapters/outbound/nlp/korean/toolkit.py,sha256=EYGpd89ilpn4Wg5t8pALYt4Qi0aDHYOfXGuYbQx7do0,4246
|
|
179
182
|
evalvault/adapters/outbound/nlp/korean/toolkit_factory.py,sha256=x3v-AAkVInOabC4PtOtStsZrFnHun0IOqZDyQGaQVm8,586
|
|
180
183
|
evalvault/adapters/outbound/phoenix/sync_service.py,sha256=i6gHpNiZXKQ5yzV9B2TPb-P1N45k_Ck5ruzh3oqp4d8,9122
|
|
181
|
-
evalvault/adapters/outbound/report/__init__.py,sha256=
|
|
182
|
-
evalvault/adapters/outbound/report/
|
|
184
|
+
evalvault/adapters/outbound/report/__init__.py,sha256=8VeMrfj63mDR-xUHct-drNNBA5M-m-B7sgC1qUJF7g4,660
|
|
185
|
+
evalvault/adapters/outbound/report/ci_report_formatter.py,sha256=5YD8BwtOjLnHcNbbG0HJziOifD9BDhBtZT1oItd6zJE,1233
|
|
186
|
+
evalvault/adapters/outbound/report/dashboard_generator.py,sha256=g0SANOrOS_mSfLfKbfbxflxZmCNbZGrGzA0g03_Yb48,8356
|
|
183
187
|
evalvault/adapters/outbound/report/llm_report_generator.py,sha256=i_iXfY8qutIb8TsvLKyMLnijsA0yiNJ3rBEFg4zVqcE,26858
|
|
184
188
|
evalvault/adapters/outbound/report/markdown_adapter.py,sha256=5PS72h_qe4ZtYs-umhX5TqQL2k5SuDaCUc6rRw9AKRw,16761
|
|
189
|
+
evalvault/adapters/outbound/report/pr_comment_formatter.py,sha256=FxWWfZQU5ErejVqE_F8rKoUxoBTbbmw_ok9xbEYiA4E,1661
|
|
190
|
+
evalvault/adapters/outbound/retriever/__init__.py,sha256=o5qK4pIrlXm0mIs_fo0aLPQHuHkrmrXKc6ZSg8a0t4g,201
|
|
191
|
+
evalvault/adapters/outbound/retriever/graph_rag_adapter.py,sha256=xTI7uMFp4WKstg1s2zY9R_QsWPA-Rz_KUnzekwso8z0,11790
|
|
185
192
|
evalvault/adapters/outbound/storage/__init__.py,sha256=n5R6thAPTx1leSwv6od6nBWcLWFa-UYD6cOLzN89T8I,614
|
|
186
|
-
evalvault/adapters/outbound/storage/base_sql.py,sha256=
|
|
193
|
+
evalvault/adapters/outbound/storage/base_sql.py,sha256=sNCkUD9YfCBUTUXeLCyScXPwcgmfn6wX15j8WcjZm7w,52550
|
|
187
194
|
evalvault/adapters/outbound/storage/benchmark_storage_adapter.py,sha256=Qgf9xSSIkYQRpG4uLzcUdoYO9LTQDQ4tFRkkMYer-WA,9803
|
|
188
|
-
evalvault/adapters/outbound/storage/postgres_adapter.py,sha256=
|
|
189
|
-
evalvault/adapters/outbound/storage/postgres_schema.sql,sha256=
|
|
190
|
-
evalvault/adapters/outbound/storage/schema.sql,sha256=
|
|
191
|
-
evalvault/adapters/outbound/storage/sqlite_adapter.py,sha256=
|
|
195
|
+
evalvault/adapters/outbound/storage/postgres_adapter.py,sha256=fLg_0JA3iBKQh8nRI_Njf4564H4evcnWGTjX183IrFA,52194
|
|
196
|
+
evalvault/adapters/outbound/storage/postgres_schema.sql,sha256=xaN4rSkhtBNfKw5gOYqqhYP0xIN2Sn3drOnFpkBo1_Q,11255
|
|
197
|
+
evalvault/adapters/outbound/storage/schema.sql,sha256=GXelsMmthV6ugC1tv9oVwr1x-Ni-N3BpBom7iLDLlZA,13015
|
|
198
|
+
evalvault/adapters/outbound/storage/sqlite_adapter.py,sha256=qX4SHaDi8U0kY-rAksQ_GJX58bg9EwXkXQ2hffcoWDQ,56090
|
|
192
199
|
evalvault/adapters/outbound/tracer/__init__.py,sha256=xrvQQuAvF_UI02mKLMV7GTrG3zn836n5zwCRrrmhq_U,1054
|
|
193
200
|
evalvault/adapters/outbound/tracer/open_rag_log_handler.py,sha256=aq96FIWD-bBaSkq-bygWhQArC9LWghSwi-S03Mga0mI,2827
|
|
194
201
|
evalvault/adapters/outbound/tracer/open_rag_trace_adapter.py,sha256=P-4PN1UweITXu5uN3LJVCEL3wRwiExzhgs3y2GN78xM,4784
|
|
@@ -211,7 +218,7 @@ evalvault/config/secret_manager.py,sha256=YjPMuNqeBrAR2BzCJvsBNUExaU4TBSFyZ8kVYZ
|
|
|
211
218
|
evalvault/config/settings.py,sha256=DY170XUoMo8yQx8_CJjPt96QsGg7tyTx5wJ-ptcfdY0,18766
|
|
212
219
|
evalvault/config/playbooks/improvement_playbook.yaml,sha256=9F9WVVCydFfz6zUuGYzZ4PKdW1LLtcBKVF36T7xT764,26965
|
|
213
220
|
evalvault/domain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
214
|
-
evalvault/domain/entities/__init__.py,sha256=
|
|
221
|
+
evalvault/domain/entities/__init__.py,sha256=7adWG4vwr67fC_ysT1rSvYG5U7qVc2CfWfUf7gDhx-k,4244
|
|
215
222
|
evalvault/domain/entities/analysis.py,sha256=gcMtumC66g-AIqb2LgfMpm5BMzwJhJkjg-zuybNoJCM,15208
|
|
216
223
|
evalvault/domain/entities/analysis_pipeline.py,sha256=hD9rFHMa4rUq0InRkSKhh6HQ9ZeNYAHKADzs-kWRP04,16845
|
|
217
224
|
evalvault/domain/entities/benchmark.py,sha256=CVbz_eW7Y9eM7wG7xA_xmldTIs72csdoTmu3E0NKoMU,18475
|
|
@@ -220,24 +227,27 @@ evalvault/domain/entities/dataset.py,sha256=WsC_5ivGluy-o2nXxLGmoC8DYl5UafVSo2hS
|
|
|
220
227
|
evalvault/domain/entities/debug.py,sha256=r92lgvOpq2svw70syJIo78muRAvrSn5h1JByH_Hvz-s,1493
|
|
221
228
|
evalvault/domain/entities/experiment.py,sha256=oWjbu0IJZ6oIRcnA-8ppeJDgp57Tv8ZjQ3UOZ0X9KJ8,2576
|
|
222
229
|
evalvault/domain/entities/feedback.py,sha256=xiaZaUQhyuxyW_i2scXt8eKZshMC6tXe3981e-uukw8,1604
|
|
230
|
+
evalvault/domain/entities/graph_rag.py,sha256=LvCJRyrXihJOFQSH2xB7O124_GG_lrsqfIVCgwJDu_0,610
|
|
223
231
|
evalvault/domain/entities/improvement.py,sha256=WHI7q1jXRxkuHhBWOrpk8UdLaH0UwjZVjRIDsqVDyZo,19322
|
|
224
232
|
evalvault/domain/entities/judge_calibration.py,sha256=fhQEI7g2nZuG1OliikhxgefcFAJldDqEmfTs9Mp-FPk,1234
|
|
225
233
|
evalvault/domain/entities/kg.py,sha256=8awN1M4vxAGQZk_ZG8i2CXKTizQ8FA1VCLhUWHZq0U8,3001
|
|
226
234
|
evalvault/domain/entities/memory.py,sha256=bfS75q8K8_jNrB7IYh4mjP8Lkyj-I0TVsmHCP0FuICw,8423
|
|
227
235
|
evalvault/domain/entities/method.py,sha256=a3jZi7SjcpK3HeVyVwQkUMwpnmg2RbxCnH4NqYPLCOI,1157
|
|
236
|
+
evalvault/domain/entities/multiturn.py,sha256=V9ay30rix6zxNcDRXeLudMgikC1b4f3kt01Hj2ZH7wE,2012
|
|
228
237
|
evalvault/domain/entities/prompt.py,sha256=lQlRnHEKY69GWTC-cUIu0DMuPfJ9UWm6Sm4KTNjVwfY,2920
|
|
229
238
|
evalvault/domain/entities/prompt_suggestion.py,sha256=Ep_XSjdYUj7pFSCMyeeZKs8yTnp74AVx05Zqr7829PE,1243
|
|
230
239
|
evalvault/domain/entities/rag_trace.py,sha256=sZgnkG4fK6KOe3Np6TYAZ_tPnsRbOmucDSQns35U1n4,11868
|
|
231
240
|
evalvault/domain/entities/result.py,sha256=OaGHMDLWMW2O4fNVuVTUvWFVBQ1iu93OD_oI3NumrCQ,10697
|
|
232
241
|
evalvault/domain/entities/stage.py,sha256=KyR-v3tyusPJ7pfTXtHE2_23tVvNSRU9Q1RT-R5akXg,7914
|
|
233
|
-
evalvault/domain/metrics/__init__.py,sha256=
|
|
242
|
+
evalvault/domain/metrics/__init__.py,sha256=RKHfCRKPHfKh7HmlZd4bazZi9V4jvRtAOZd15J4rMDk,1529
|
|
234
243
|
evalvault/domain/metrics/analysis_registry.py,sha256=JZpBrBs7-JExHKYuEML6Vg_uYLm-WniBE3BfiU5OtJg,7641
|
|
235
244
|
evalvault/domain/metrics/confidence.py,sha256=AX4oeN28OvmMkwD0pT-jskkOlXh87C1pe2W9P1sF69g,17224
|
|
236
245
|
evalvault/domain/metrics/contextual_relevancy.py,sha256=xAPYUv_0TM4j4WOutOSGftNln_l-2Ev6qpANeu4REv8,11057
|
|
237
246
|
evalvault/domain/metrics/entity_preservation.py,sha256=uSCbaETceE5PbGn-230Rm8pryOA8jDkkeOwAkWxA65g,6500
|
|
238
247
|
evalvault/domain/metrics/insurance.py,sha256=5NPeAi_86rpuZRgV4KhzomGrq3Uw2jjglN6FfA_AO8o,4040
|
|
248
|
+
evalvault/domain/metrics/multiturn_metrics.py,sha256=lGBN91VAQaa02bsvDOuUggRCQEB_dSORH31A4N72n2c,3385
|
|
239
249
|
evalvault/domain/metrics/no_answer.py,sha256=x6vRyOa1jw-qsnw9kOYT8YMPdLElaDRu7zjNCpyJfqM,8237
|
|
240
|
-
evalvault/domain/metrics/registry.py,sha256=
|
|
250
|
+
evalvault/domain/metrics/registry.py,sha256=6mMCN1kVQZlLrxGOPClOkaDed0IAc3L0rT1MERF4Ruk,8086
|
|
241
251
|
evalvault/domain/metrics/retrieval_rank.py,sha256=F55ByadJBowyKHKBmKAZ0T0qN_R1_7UNu-MiLnT4Ypg,14675
|
|
242
252
|
evalvault/domain/metrics/summary_accuracy.py,sha256=Hr4QS1e4Rxt1MgcTj5rElKuPw9rWS-zGkI0d8wB5dwA,5988
|
|
243
253
|
evalvault/domain/metrics/summary_needs_followup.py,sha256=5kExtZxxankP7csAAIZe_1uRFeBD7NQK-N15b5d0awM,1357
|
|
@@ -245,7 +255,7 @@ evalvault/domain/metrics/summary_non_definitive.py,sha256=1EE-z0Ib66gpjc0MGZHmZJ
|
|
|
245
255
|
evalvault/domain/metrics/summary_risk_coverage.py,sha256=Fo-dMg_jU4MCr0YqOZzBZymwEbG9y2H6eLX-jmuS8IU,1777
|
|
246
256
|
evalvault/domain/metrics/terms_dictionary.json,sha256=-ZQmpx6yMOYoAOpcLj-xK2LkAeCbAw0EUb6-syIOKS0,3801
|
|
247
257
|
evalvault/domain/metrics/text_match.py,sha256=P-YTZs9ekDqEmxLNBP8eXnMRymPdC8V4dJPtwG2ajVM,10219
|
|
248
|
-
evalvault/domain/services/__init__.py,sha256=
|
|
258
|
+
evalvault/domain/services/__init__.py,sha256=gpkco8bZGjuy7xAgSPMPCVTrw5xxHVvdILF8opTOS6c,1302
|
|
249
259
|
evalvault/domain/services/analysis_service.py,sha256=oUEtfJHB3bNJ_Ksygx-pjnLm4CTk7_rDvDbqfkAfFD4,10838
|
|
250
260
|
evalvault/domain/services/artifact_lint_service.py,sha256=80P46weoj9lBxOqg_ViHZEQ6Cfo69XV4cniZlmMsti0,8434
|
|
251
261
|
evalvault/domain/services/async_batch_executor.py,sha256=qYFRl7CGmv56XppeRhInde7Fw0GESCoZh8V-Iv_1hQQ,11140
|
|
@@ -265,12 +275,13 @@ evalvault/domain/services/document_versioning.py,sha256=M1qZaMpQ2exVT1wkVAmvEPPu
|
|
|
265
275
|
evalvault/domain/services/domain_learning_hook.py,sha256=rhKBmdnrJyfGzFNsNxzyv8jZO26-WOosHSmBV_9qdJg,7176
|
|
266
276
|
evalvault/domain/services/embedding_overlay.py,sha256=ZTNxUPXpHGbQ3Uri5DD3feTUFn7qrhuNshhyCQEvRuM,3559
|
|
267
277
|
evalvault/domain/services/entity_extractor.py,sha256=f3Rf5saK8QsgetLNK1Hbxzt8PtttJZCicSR63S8DJ5k,14141
|
|
268
|
-
evalvault/domain/services/evaluator.py,sha256=
|
|
278
|
+
evalvault/domain/services/evaluator.py,sha256=Iud5KWompKyMm59qsaH0jWkNL4nl_A7UI3pES-rCjcA,78921
|
|
269
279
|
evalvault/domain/services/experiment_comparator.py,sha256=IBrxIwux-8GucwlLx6e5lUqB9miSPvBLGJK9ctoW7Y0,3299
|
|
270
280
|
evalvault/domain/services/experiment_manager.py,sha256=2k-qGiAUyZuqqmcp4P-M3Z9HTXwwcqW5HQYKNkcIHuI,4863
|
|
271
281
|
evalvault/domain/services/experiment_reporter.py,sha256=QYlVmCFSx8hKTPMezc7QjJE07b3MSQ82Q4QVucSHLVY,1420
|
|
272
282
|
evalvault/domain/services/experiment_repository.py,sha256=1OQSvBmM7llNJFWgGHBL3XvJvn_OIQsKo7zophYtV1g,2423
|
|
273
283
|
evalvault/domain/services/experiment_statistics.py,sha256=aOrqbBjB1swHPaFRziID1mrAowoV-pBS4XdLFhAhMLE,1115
|
|
284
|
+
evalvault/domain/services/graph_rag_experiment.py,sha256=guSMw8OvfQ-Fz8PLygepDr4Pt-xKyWxwJPdQNRgWU-0,5027
|
|
274
285
|
evalvault/domain/services/holdout_splitter.py,sha256=Sos61Zy_bBjStt8LPHJ3KxDNda-OmX7AVUsT24K1n6Q,1910
|
|
275
286
|
evalvault/domain/services/improvement_guide_service.py,sha256=gMoVFlDsprOEEfRGKmdbk9_Due62J63Q-rL2zr65Q0s,17881
|
|
276
287
|
evalvault/domain/services/intent_classifier.py,sha256=hsWivDXqXJjCJEE-OI7eUGeYrewpYxlz67Z0TI3oskU,11707
|
|
@@ -279,6 +290,7 @@ evalvault/domain/services/kg_generator.py,sha256=oEugjPdn8Pb2Q3r5yAZl0dZJibNUkEh
|
|
|
279
290
|
evalvault/domain/services/memory_aware_evaluator.py,sha256=vTiYoxiMfZ_CMjSBjqwkBRdpiXRwQ2zXnQ2pXzVHYts,5249
|
|
280
291
|
evalvault/domain/services/memory_based_analysis.py,sha256=oh2irCy3le7fWiTtL31SMEhPyu7fyBVz-giO2hlNifE,4499
|
|
281
292
|
evalvault/domain/services/method_runner.py,sha256=pABqKZeaALpWZYDfzAbd-VOZt2djQggRNIPuuPQeUSw,3571
|
|
293
|
+
evalvault/domain/services/multiturn_evaluator.py,sha256=fipi5hEyidq_cnGGr0GpvoprLtjm6dHLuAkSotbT3YA,7202
|
|
282
294
|
evalvault/domain/services/ops_snapshot_service.py,sha256=1CqJN2p3tM6SgzLCZKcVEM213fd1cDGexTRPG_3e59w,5138
|
|
283
295
|
evalvault/domain/services/pipeline_orchestrator.py,sha256=yriVlEVZYDtt0Vwt4Ae6xyW1H6Dj4Hxdn8XQSvQNSoQ,19436
|
|
284
296
|
evalvault/domain/services/pipeline_template_registry.py,sha256=k5Ce1BC3NgcYqCLiUZpXsl_6WwDHOXONoYDH7KzX2L4,28809
|
|
@@ -304,11 +316,12 @@ evalvault/domain/services/threshold_profiles.py,sha256=yYJ7o8SIRufI7kUN8edh8am-d
|
|
|
304
316
|
evalvault/domain/services/unified_report_service.py,sha256=lG3VpMLC1MTYUlcGl-MUEE4PUopkyrhcgj4_ye9c_vM,11829
|
|
305
317
|
evalvault/domain/services/visual_space_service.py,sha256=3_qyBsThr5lzP1le6qkXf9ByX3JjoYGX15iMIHe8gQs,34958
|
|
306
318
|
evalvault/ports/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
307
|
-
evalvault/ports/inbound/__init__.py,sha256=
|
|
319
|
+
evalvault/ports/inbound/__init__.py,sha256=a0BA3oy21besJsf0m1_zNBAt0eFo8UDL4XKOTHtpmdc,666
|
|
308
320
|
evalvault/ports/inbound/analysis_pipeline_port.py,sha256=RJfKtp22AYEqnmRk6RDawAK52rEmyAhuk0FUPJQUwQU,1758
|
|
309
321
|
evalvault/ports/inbound/evaluator_port.py,sha256=rDvouIRUjBD7uICgrpeo11vNPvo27_0CdylRHPodPSE,1323
|
|
310
322
|
evalvault/ports/inbound/learning_hook_port.py,sha256=EeJeMl3chcPHlj5mkLOj6tm8s_qdDRvoCwK1-0l70tI,3297
|
|
311
|
-
evalvault/ports/inbound/
|
|
323
|
+
evalvault/ports/inbound/multiturn_port.py,sha256=qZNV6h9Qvw77FE_9hZdyYUOvj_7cXSBHVkj6B9HztJI,524
|
|
324
|
+
evalvault/ports/inbound/web_port.py,sha256=6MJuPQy9CKvuQKiRGyDaDgeqBYj1T_Q9wCjHPbd5GYE,5730
|
|
312
325
|
evalvault/ports/outbound/__init__.py,sha256=x3LseXtwX0NONM1mnhT3fMchz7U6gEDRUX0TDswpg5E,3591
|
|
313
326
|
evalvault/ports/outbound/analysis_cache_port.py,sha256=zPSdUVK_yw3PMWPII2YvS1WLmCGlg5bDScSuYINW9yc,1386
|
|
314
327
|
evalvault/ports/outbound/analysis_module_port.py,sha256=QYzkvie9-BbONj8ZgiQUjm8I-bn8mgzlXTzIXMhehmQ,1881
|
|
@@ -321,6 +334,7 @@ evalvault/ports/outbound/dataset_port.py,sha256=OpEBlkvFwpSRbmi-Lt3wK7n0wljmQ6m9
|
|
|
321
334
|
evalvault/ports/outbound/difficulty_profile_port.py,sha256=hQY-TR64WyUNnCxD9Mw-QraO3ZBw0VUP8KoCmVsQYBE,347
|
|
322
335
|
evalvault/ports/outbound/domain_memory_port.py,sha256=SZFurqsoBmTw1Kt_pej-YpMbooVeyV35jekhaDRojus,23320
|
|
323
336
|
evalvault/ports/outbound/embedding_port.py,sha256=ZHeKRMRBNjpZKWxsLKrD8jJz0M66JTwNcrJbkRaklK4,2034
|
|
337
|
+
evalvault/ports/outbound/graph_retriever_port.py,sha256=2jaaXYRfG4Gi7UE38_NmFBaQcqAGb5svizcWl1PVScs,637
|
|
324
338
|
evalvault/ports/outbound/improvement_port.py,sha256=fIXhcG4n6OJ1hdvWeqEoLBrVsCNdHZRgtEZjR8lf3qA,2325
|
|
325
339
|
evalvault/ports/outbound/intent_classifier_port.py,sha256=gqMIk0rH6Z43ceuMMRX4vqXurgHZz-CJX2bR5PVAkjQ,2253
|
|
326
340
|
evalvault/ports/outbound/judge_calibration_port.py,sha256=kShZ2MZGvgQZaY7XxwkmLXtquK_RFKcwuWRBfJOrILA,602
|
|
@@ -333,15 +347,15 @@ evalvault/ports/outbound/ops_snapshot_port.py,sha256=6v72W41tlnxjkJfbfHhFiJMPlRS
|
|
|
333
347
|
evalvault/ports/outbound/relation_augmenter_port.py,sha256=cMcHQnmK111WzZr50vYr7affeHhOtpFZxPARwkg9xbk,651
|
|
334
348
|
evalvault/ports/outbound/report_port.py,sha256=wgReSYL4SupXIoALFh0QFWfX2kzPftXpWTvGLCMd2B8,1315
|
|
335
349
|
evalvault/ports/outbound/stage_storage_port.py,sha256=Nlf9upsXxgCABQB5cJdpLQYsoZNiGRAU5zE5D-Ptp2I,1201
|
|
336
|
-
evalvault/ports/outbound/storage_port.py,sha256=
|
|
350
|
+
evalvault/ports/outbound/storage_port.py,sha256=F6k-c0Yz7IWvLRrTdQAmpHjDYVz-Zo9eL6Qj0HNBJ4Y,6632
|
|
337
351
|
evalvault/ports/outbound/tracer_port.py,sha256=kTqJCUIJHnvvDzMxxGhHSfiz8_Q4CZ0WSPvIUVVOcyw,623
|
|
338
352
|
evalvault/ports/outbound/tracker_port.py,sha256=05LA3AWnuE1XmGQC16Zle9i2sEV3q69Nt8ZUye_w1_Y,2532
|
|
339
353
|
evalvault/reports/__init__.py,sha256=Bb1X4871msAN8I6PM6nKGED3psPwZt88hXZBAOdH06Y,113
|
|
340
354
|
evalvault/reports/release_notes.py,sha256=pZj0PBFT-4F_Ty-Kv5P69BuoOnmTCn4kznDcORFJd0w,4011
|
|
341
355
|
evalvault/scripts/__init__.py,sha256=NwEeIFQbkX4ml2R_PhtIoNtArDSX_suuoymgG_7Kwso,89
|
|
342
356
|
evalvault/scripts/regression_runner.py,sha256=SxZori5BZ8jVQ057Mf5V5FPgIVDccrV5oRONmnhuk8w,8438
|
|
343
|
-
evalvault-1.
|
|
344
|
-
evalvault-1.
|
|
345
|
-
evalvault-1.
|
|
346
|
-
evalvault-1.
|
|
347
|
-
evalvault-1.
|
|
357
|
+
evalvault-1.72.0.dist-info/METADATA,sha256=-h7WdCBxLkrvGWBAMifdpc9BpoBfUqpEEAz8GLM6fgU,26159
|
|
358
|
+
evalvault-1.72.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
359
|
+
evalvault-1.72.0.dist-info/entry_points.txt,sha256=Oj9Xc5gYcyUYYNmQfWI8NYGw7nN-3M-h2ipHIMlVn6o,65
|
|
360
|
+
evalvault-1.72.0.dist-info/licenses/LICENSE.md,sha256=3RNWY4jjtrQ_yYa-D-7I3XO12Ti7YzxsLV_dpykujvo,11358
|
|
361
|
+
evalvault-1.72.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|