evalvault 1.68.1__py3-none-any.whl → 1.70.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,212 @@
1
+ """
2
+ 멀티턴 평가 요약 모듈입니다.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from collections import defaultdict
8
+ from typing import Any
9
+
10
+ from evalvault.adapters.outbound.analysis.base_module import BaseAnalysisModule
11
+ from evalvault.adapters.outbound.analysis.pipeline_helpers import get_upstream_output, safe_mean
12
+ from evalvault.domain.entities import EvaluationRun
13
+
14
+
15
+ class MultiTurnAnalyzerModule(BaseAnalysisModule):
16
+ """멀티턴(대화) 단위로 결과를 집계합니다."""
17
+
18
+ module_id = "multiturn_analyzer"
19
+ name = "멀티턴 분석"
20
+ description = "대화/턴 메타데이터를 기준으로 멀티턴 성능을 요약합니다."
21
+ input_types = ["run"]
22
+ output_types = ["multiturn_summary", "multiturn_conversations", "multiturn_turns"]
23
+ requires = ["data_loader"]
24
+ tags = ["analysis", "multiturn"]
25
+
26
+ def execute(
27
+ self,
28
+ inputs: dict[str, Any],
29
+ params: dict[str, Any] | None = None,
30
+ ) -> dict[str, Any]:
31
+ loader_output = get_upstream_output(inputs, "load_data", "data_loader") or {}
32
+ run = loader_output.get("run")
33
+ if not isinstance(run, EvaluationRun):
34
+ return {
35
+ "available": False,
36
+ "summary": {},
37
+ "conversations": [],
38
+ "turns": [],
39
+ "coverage": {},
40
+ }
41
+
42
+ retrieval_meta = run.retrieval_metadata or {}
43
+ cases = run.results
44
+ total_cases = len(cases)
45
+
46
+ coverage = {
47
+ "total_cases": total_cases,
48
+ "has_conversation_id": 0,
49
+ "has_turn_index": 0,
50
+ }
51
+
52
+ grouped: dict[str, list[dict[str, Any]]] = defaultdict(list)
53
+ turns: list[dict[str, Any]] = []
54
+
55
+ for result in cases:
56
+ case_meta = _resolve_case_metadata(retrieval_meta, result.test_case_id)
57
+ conversation_id = _coerce_text(case_meta.get("conversation_id"))
58
+ turn_index = _coerce_turn_index(case_meta.get("turn_index"))
59
+ turn_id = _coerce_text(case_meta.get("turn_id"))
60
+
61
+ if conversation_id:
62
+ coverage["has_conversation_id"] += 1
63
+ if turn_index is not None:
64
+ coverage["has_turn_index"] += 1
65
+
66
+ metrics = {
67
+ metric.name: metric.score for metric in result.metrics if metric.score is not None
68
+ }
69
+ avg_score = safe_mean(metrics.values()) if metrics else 0.0
70
+ failed_metrics = [metric.name for metric in result.metrics if not metric.passed]
71
+ entry = {
72
+ "test_case_id": result.test_case_id,
73
+ "conversation_id": conversation_id,
74
+ "turn_index": turn_index,
75
+ "turn_id": turn_id,
76
+ "avg_score": round(avg_score, 4),
77
+ "metrics": metrics,
78
+ "failed_metrics": failed_metrics,
79
+ "passed_all": result.all_passed,
80
+ }
81
+ turns.append(entry)
82
+ if conversation_id:
83
+ grouped[conversation_id].append(entry)
84
+
85
+ conversations: list[dict[str, Any]] = []
86
+ first_failure_hist: dict[str, int] = defaultdict(int)
87
+
88
+ for conversation_id, entries in grouped.items():
89
+ entries_sorted = _sort_turns(entries)
90
+ avg_scores = [item["avg_score"] for item in entries_sorted]
91
+ metric_scores: dict[str, list[float]] = defaultdict(list)
92
+ for item in entries_sorted:
93
+ for name, score in (item.get("metrics") or {}).items():
94
+ metric_scores[name].append(float(score))
95
+
96
+ metric_means = {
97
+ name: round(safe_mean(values), 4) for name, values in metric_scores.items()
98
+ }
99
+ passed_all = all(item.get("passed_all") for item in entries_sorted)
100
+ failure_turn = _first_failure_turn(entries_sorted)
101
+ if failure_turn is not None:
102
+ first_failure_hist[str(failure_turn)] += 1
103
+
104
+ worst_turn = _select_worst_turn(entries_sorted)
105
+
106
+ conversations.append(
107
+ {
108
+ "conversation_id": conversation_id,
109
+ "turn_count": len(entries_sorted),
110
+ "avg_score": round(safe_mean(avg_scores), 4),
111
+ "passed_all_turns": passed_all,
112
+ "first_failure_turn_index": failure_turn,
113
+ "worst_turn": worst_turn,
114
+ "metric_means": metric_means,
115
+ }
116
+ )
117
+
118
+ conversation_count = len(grouped)
119
+ turn_count = sum(len(items) for items in grouped.values())
120
+ summary = {
121
+ "conversation_count": conversation_count,
122
+ "turn_count": turn_count,
123
+ "avg_turns_per_conversation": round(
124
+ (turn_count / conversation_count) if conversation_count else 0.0, 3
125
+ ),
126
+ "conversation_pass_rate": round(
127
+ (
128
+ sum(1 for item in conversations if item.get("passed_all_turns"))
129
+ / conversation_count
130
+ )
131
+ if conversation_count
132
+ else 0.0,
133
+ 4,
134
+ ),
135
+ "first_failure_turn_histogram": dict(first_failure_hist),
136
+ }
137
+
138
+ if total_cases:
139
+ coverage["has_conversation_id"] = round(
140
+ coverage["has_conversation_id"] / total_cases, 4
141
+ )
142
+ coverage["has_turn_index"] = round(coverage["has_turn_index"] / total_cases, 4)
143
+
144
+ return {
145
+ "available": True,
146
+ "summary": summary,
147
+ "conversations": conversations,
148
+ "turns": turns,
149
+ "coverage": coverage,
150
+ }
151
+
152
+
153
+ def _resolve_case_metadata(
154
+ retrieval_metadata: dict[str, dict[str, Any]],
155
+ test_case_id: str,
156
+ ) -> dict[str, Any]:
157
+ meta = retrieval_metadata.get(test_case_id)
158
+ if isinstance(meta, dict):
159
+ nested = meta.get("test_case_metadata")
160
+ if isinstance(nested, dict):
161
+ merged = dict(nested)
162
+ merged.update({k: v for k, v in meta.items() if k != "test_case_metadata"})
163
+ return merged
164
+ return dict(meta)
165
+ return {}
166
+
167
+
168
+ def _coerce_text(value: Any) -> str | None:
169
+ if value is None:
170
+ return None
171
+ if isinstance(value, str):
172
+ trimmed = value.strip()
173
+ return trimmed or None
174
+ return str(value)
175
+
176
+
177
+ def _coerce_turn_index(value: Any) -> int | None:
178
+ if value is None:
179
+ return None
180
+ if isinstance(value, int):
181
+ return value
182
+ if isinstance(value, float) and value.is_integer():
183
+ return int(value)
184
+ if isinstance(value, str) and value.strip().isdigit():
185
+ return int(value.strip())
186
+ return None
187
+
188
+
189
+ def _sort_turns(entries: list[dict[str, Any]]) -> list[dict[str, Any]]:
190
+ if all(item.get("turn_index") is None for item in entries):
191
+ return list(entries)
192
+ return sorted(
193
+ entries, key=lambda item: (item.get("turn_index") is None, item.get("turn_index") or 0)
194
+ )
195
+
196
+
197
+ def _first_failure_turn(entries: list[dict[str, Any]]) -> int | None:
198
+ for item in entries:
199
+ if not item.get("passed_all"):
200
+ return item.get("turn_index")
201
+ return None
202
+
203
+
204
+ def _select_worst_turn(entries: list[dict[str, Any]]) -> dict[str, Any] | None:
205
+ if not entries:
206
+ return None
207
+ worst = min(entries, key=lambda item: item.get("avg_score", 0.0))
208
+ return {
209
+ "test_case_id": worst.get("test_case_id"),
210
+ "avg_score": worst.get("avg_score"),
211
+ "failed_metrics": worst.get("failed_metrics", []),
212
+ }
@@ -58,6 +58,7 @@ def build_analysis_pipeline_service(
58
58
  service.register_module(analysis_modules.DiagnosticPlaybookModule())
59
59
  service.register_module(analysis_modules.RootCauseAnalyzerModule())
60
60
  service.register_module(analysis_modules.PatternDetectorModule())
61
+ service.register_module(analysis_modules.MultiTurnAnalyzerModule())
61
62
  service.register_module(analysis_modules.TimeSeriesAnalyzerModule())
62
63
  service.register_module(analysis_modules.TimeSeriesAdvancedModule())
63
64
  service.register_module(analysis_modules.TrendDetectorModule())
@@ -37,6 +37,8 @@ class RetrievalAnalyzerModule(BaseAnalysisModule):
37
37
 
38
38
  params = params or {}
39
39
  max_cases = int(params.get("max_cases", 150))
40
+ max_examples = int(params.get("max_examples", 5))
41
+ max_graphrag_docs = int(params.get("max_graphrag_docs", 5))
40
42
 
41
43
  context_counts: list[int] = []
42
44
  context_token_counts: list[int] = []
@@ -96,6 +98,11 @@ class RetrievalAnalyzerModule(BaseAnalysisModule):
96
98
  retrieval_meta = run.retrieval_metadata or {}
97
99
  retrieval_times: list[float] = []
98
100
  retrieval_scores: list[float] = []
101
+ graph_nodes: list[int] = []
102
+ graph_edges: list[int] = []
103
+ subgraph_sizes: list[int] = []
104
+ graphrag_cases = 0
105
+ graphrag_examples: list[dict[str, Any]] = []
99
106
  for item in retrieval_meta.values():
100
107
  if isinstance(item, dict):
101
108
  if "retrieval_time_ms" in item:
@@ -103,6 +110,20 @@ class RetrievalAnalyzerModule(BaseAnalysisModule):
103
110
  scores = item.get("scores")
104
111
  if isinstance(scores, list) and scores:
105
112
  retrieval_scores.append(safe_mean([float(s) for s in scores]))
113
+ if "graph_nodes" in item:
114
+ graph_nodes.append(int(item["graph_nodes"]))
115
+ if "graph_edges" in item:
116
+ graph_edges.append(int(item["graph_edges"]))
117
+ if "subgraph_size" in item:
118
+ subgraph_sizes.append(int(item["subgraph_size"]))
119
+ if item.get("retriever") == "graphrag":
120
+ graphrag_cases += 1
121
+ if len(graphrag_examples) < max_examples:
122
+ graphrag_details = item.get("graphrag")
123
+ if isinstance(graphrag_details, dict):
124
+ graphrag_examples.append(
125
+ _trim_graphrag_example(graphrag_details, max_docs=max_graphrag_docs)
126
+ )
106
127
 
107
128
  summary = {
108
129
  "total_cases": total_cases,
@@ -122,6 +143,15 @@ class RetrievalAnalyzerModule(BaseAnalysisModule):
122
143
  summary["avg_retrieval_time_ms"] = round(safe_mean(retrieval_times), 2)
123
144
  if retrieval_scores:
124
145
  summary["avg_retrieval_score"] = round(safe_mean(retrieval_scores), 4)
146
+ if graph_nodes:
147
+ summary["avg_graph_nodes"] = round(safe_mean(graph_nodes), 2)
148
+ if graph_edges:
149
+ summary["avg_graph_edges"] = round(safe_mean(graph_edges), 2)
150
+ if subgraph_sizes:
151
+ summary["avg_subgraph_size"] = round(safe_mean(subgraph_sizes), 2)
152
+ if total_cases:
153
+ summary["graphrag_case_rate"] = round(graphrag_cases / total_cases, 4)
154
+ summary["graphrag_case_count"] = graphrag_cases
125
155
 
126
156
  insights = []
127
157
  if summary["avg_contexts"] < 1:
@@ -138,6 +168,38 @@ class RetrievalAnalyzerModule(BaseAnalysisModule):
138
168
  "context_token_counts": context_token_counts[:100],
139
169
  "keyword_overlap_scores": keyword_overlap_scores[:100],
140
170
  "faithfulness_scores": faithfulness_scores[:100],
171
+ "graph_nodes": graph_nodes[:100],
172
+ "graph_edges": graph_edges[:100],
173
+ "subgraph_sizes": subgraph_sizes[:100],
174
+ "graphrag_examples": graphrag_examples,
141
175
  },
142
176
  "insights": insights,
143
177
  }
178
+
179
+
180
+ def _trim_graphrag_example(payload: dict[str, Any], *, max_docs: int) -> dict[str, Any]:
181
+ docs = payload.get("docs")
182
+ if isinstance(docs, list):
183
+ trimmed_docs = []
184
+ for entry in docs[:max_docs]:
185
+ if not isinstance(entry, dict):
186
+ continue
187
+ trimmed_docs.append(_trim_graphrag_doc(entry))
188
+ docs = trimmed_docs
189
+ else:
190
+ docs = []
191
+ return {
192
+ "docs": docs,
193
+ "max_docs": max_docs,
194
+ }
195
+
196
+
197
+ def _trim_graphrag_doc(entry: dict[str, Any]) -> dict[str, Any]:
198
+ output: dict[str, Any] = {}
199
+ for key in ("doc_id", "rank", "score"):
200
+ if key in entry:
201
+ output[key] = entry[key]
202
+ sources = entry.get("sources")
203
+ if isinstance(sources, dict):
204
+ output["sources"] = sources
205
+ return output
@@ -658,6 +658,12 @@ class PipelineTemplateRegistry:
658
658
  module="statistical_analyzer",
659
659
  depends_on=["load_data"],
660
660
  ),
661
+ AnalysisNode(
662
+ id="retrieval_analysis",
663
+ name="검색 분석",
664
+ module="retrieval_analyzer",
665
+ depends_on=["load_data"],
666
+ ),
661
667
  AnalysisNode(
662
668
  id="priority_summary",
663
669
  name="우선순위 요약",
@@ -669,7 +675,7 @@ class PipelineTemplateRegistry:
669
675
  name="LLM 요약 보고서",
670
676
  module="llm_report",
671
677
  params={"report_type": "summary"},
672
- depends_on=["load_data", "statistics"],
678
+ depends_on=["load_data", "statistics", "retrieval_analysis"],
673
679
  ),
674
680
  ]
675
681
  return AnalysisPipeline(
@@ -698,6 +704,12 @@ class PipelineTemplateRegistry:
698
704
  module="ragas_evaluator",
699
705
  depends_on=["load_data"],
700
706
  ),
707
+ AnalysisNode(
708
+ id="retrieval_analysis",
709
+ name="검색 분석",
710
+ module="retrieval_analyzer",
711
+ depends_on=["load_data"],
712
+ ),
701
713
  AnalysisNode(
702
714
  id="low_samples",
703
715
  name="낮은 성능 케이스 추출",
@@ -710,6 +722,12 @@ class PipelineTemplateRegistry:
710
722
  module="diagnostic_playbook",
711
723
  depends_on=["load_data", "ragas_eval"],
712
724
  ),
725
+ AnalysisNode(
726
+ id="multiturn",
727
+ name="멀티턴 분석",
728
+ module="multiturn_analyzer",
729
+ depends_on=["load_data", "ragas_eval"],
730
+ ),
713
731
  AnalysisNode(
714
732
  id="nlp_analysis",
715
733
  name="NLP 분석",
@@ -767,11 +785,13 @@ class PipelineTemplateRegistry:
767
785
  "load_data",
768
786
  "statistics",
769
787
  "ragas_eval",
788
+ "retrieval_analysis",
770
789
  "nlp_analysis",
771
790
  "pattern_detection",
772
791
  "causal_analysis",
773
792
  "root_cause",
774
793
  "priority_summary",
794
+ "multiturn",
775
795
  "trend_detection",
776
796
  ],
777
797
  ),
@@ -48,6 +48,14 @@ def apply_retriever_to_dataset(
48
48
  if scores:
49
49
  metadata["scores"] = scores
50
50
  metadata.update(_extract_graph_attributes(results))
51
+ graphrag_details = _build_graphrag_details(
52
+ results,
53
+ doc_ids=resolved_doc_ids,
54
+ max_docs=top_k,
55
+ )
56
+ if graphrag_details:
57
+ metadata["retriever"] = "graphrag"
58
+ metadata["graphrag"] = graphrag_details
51
59
  retrieval_metadata[test_case.id] = metadata
52
60
 
53
61
  return retrieval_metadata
@@ -164,6 +172,114 @@ def _compact_values(values: set[str]) -> str | list[str]:
164
172
  return sorted(values)
165
173
 
166
174
 
175
+ def _build_graphrag_details(
176
+ results: Sequence[RetrieverResultProtocol],
177
+ *,
178
+ doc_ids: Sequence[str],
179
+ max_docs: int,
180
+ max_entities: int = 20,
181
+ max_relations: int = 20,
182
+ ) -> dict[str, Any] | None:
183
+ details: list[dict[str, Any]] = []
184
+ for rank, result in enumerate(results, start=1):
185
+ metadata = getattr(result, "metadata", None)
186
+ if not isinstance(metadata, dict):
187
+ continue
188
+
189
+ kg_meta = metadata.get("kg") if isinstance(metadata.get("kg"), dict) else None
190
+ bm25_meta = metadata.get("bm25") if isinstance(metadata.get("bm25"), dict) else None
191
+ dense_meta = metadata.get("dense") if isinstance(metadata.get("dense"), dict) else None
192
+ community_id = metadata.get("community_id")
193
+
194
+ if not (kg_meta or bm25_meta or dense_meta or community_id is not None):
195
+ continue
196
+
197
+ doc_id = _resolve_doc_id(result, doc_ids, rank)
198
+ entry: dict[str, Any] = {
199
+ "doc_id": doc_id,
200
+ "rank": rank,
201
+ }
202
+ score = _extract_score(result)
203
+ if score is not None:
204
+ entry["score"] = score
205
+
206
+ sources: dict[str, Any] = {}
207
+ if kg_meta:
208
+ sources["kg"] = {
209
+ "entity_score": _coerce_float_or_none(kg_meta.get("entity_score")),
210
+ "relation_score": _coerce_float_or_none(kg_meta.get("relation_score")),
211
+ "entities": _limit_strings(kg_meta.get("entities"), max_entities),
212
+ "relations": _limit_strings(kg_meta.get("relations"), max_relations),
213
+ "community_id": _coerce_text_or_list(kg_meta.get("community_id")),
214
+ }
215
+ if bm25_meta:
216
+ sources["bm25"] = _build_rank_score(bm25_meta)
217
+ if dense_meta:
218
+ sources["dense"] = _build_rank_score(dense_meta)
219
+ if community_id is not None:
220
+ sources["community_id"] = _coerce_text_or_list(community_id)
221
+ if sources:
222
+ entry["sources"] = sources
223
+
224
+ details.append(entry)
225
+ if len(details) >= max_docs:
226
+ break
227
+
228
+ if not details:
229
+ return None
230
+
231
+ return {
232
+ "docs": details,
233
+ "max_docs": max_docs,
234
+ "max_entities": max_entities,
235
+ "max_relations": max_relations,
236
+ }
237
+
238
+
239
+ def _build_rank_score(payload: dict[str, Any]) -> dict[str, Any]:
240
+ out: dict[str, Any] = {}
241
+ rank = _coerce_int_optional(payload.get("rank"))
242
+ if rank is not None:
243
+ out["rank"] = rank
244
+ score = _coerce_float_or_none(payload.get("score"))
245
+ if score is not None:
246
+ out["score"] = score
247
+ return out
248
+
249
+
250
+ def _coerce_float_or_none(value: Any) -> float | None:
251
+ try:
252
+ if value is None:
253
+ return None
254
+ return float(value)
255
+ except (TypeError, ValueError):
256
+ return None
257
+
258
+
259
+ def _coerce_int_optional(value: Any) -> int | None:
260
+ try:
261
+ if value is None:
262
+ return None
263
+ return int(value)
264
+ except (TypeError, ValueError):
265
+ return None
266
+
267
+
268
+ def _coerce_text_or_list(value: Any) -> str | list[str] | None:
269
+ if value is None:
270
+ return None
271
+ if isinstance(value, (list, tuple, set)):
272
+ return [str(item) for item in value]
273
+ return str(value)
274
+
275
+
276
+ def _limit_strings(value: Any, limit: int) -> list[str]:
277
+ if not value:
278
+ return []
279
+ items = list(value) if isinstance(value, (list, tuple, set)) else [value]
280
+ return [str(item) for item in items[:limit]]
281
+
282
+
167
283
  def apply_versioned_retriever_to_dataset(
168
284
  *,
169
285
  dataset: Dataset,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evalvault
3
- Version: 1.68.1
3
+ Version: 1.70.0
4
4
  Summary: RAG evaluation system using Ragas with Phoenix/Langfuse tracing
5
5
  Project-URL: Homepage, https://github.com/ntts9990/EvalVault
6
6
  Project-URL: Documentation, https://github.com/ntts9990/EvalVault#readme
@@ -86,15 +86,16 @@ evalvault/adapters/outbound/analysis/low_performer_extractor_module.py,sha256=Pt
86
86
  evalvault/adapters/outbound/analysis/model_analyzer_module.py,sha256=28rHdXBXYIFpLHixbbZcv6-j2QVgl3yaGN0vU1Q0gFc,2682
87
87
  evalvault/adapters/outbound/analysis/morpheme_analyzer_module.py,sha256=Hrh4mluMsOhQHPrliD2w0FVKokJpfikXOFKT6sNwk74,4158
88
88
  evalvault/adapters/outbound/analysis/morpheme_quality_checker_module.py,sha256=_uRKDXdwGbfYduf_3XT77vF8X3-_zW3stHYc3HKYQTE,2216
89
+ evalvault/adapters/outbound/analysis/multiturn_analyzer_module.py,sha256=6R_lcbJyQr5CEEI_zpDJDdw6G4n3ZnkUI0ovfUPGrtU,7557
89
90
  evalvault/adapters/outbound/analysis/network_analyzer_module.py,sha256=ITUVnt_CI5pHy5SAESBSi004yMtiAhGFsbhC61VTezk,8475
90
91
  evalvault/adapters/outbound/analysis/nlp_adapter.py,sha256=aLtF_fns-7IEtitwON2EYS_lweq_IdldFsRm47alN0Q,29561
91
92
  evalvault/adapters/outbound/analysis/nlp_analyzer_module.py,sha256=kVuG9pVMQO6OYY5zxj_w9nNQZ1-qIO0y6XcXo6lG-n0,8221
92
93
  evalvault/adapters/outbound/analysis/pattern_detector_module.py,sha256=SyCDO_VS-r-tjGh8WrW-t1GCSC9ouxirdVk4NizFPXo,1882
93
- evalvault/adapters/outbound/analysis/pipeline_factory.py,sha256=XvcCbKCN_otv1pGUzk0oE76RV19yFga8r6RngBvgEFo,3691
94
+ evalvault/adapters/outbound/analysis/pipeline_factory.py,sha256=Yk-VPagdAZXbbD08pCSOleg-URuVAzJks4oGl61mKAs,3763
94
95
  evalvault/adapters/outbound/analysis/pipeline_helpers.py,sha256=8E8IrYI5JvRrpnjxe0DS7srbPzB0XAxxXhLLYgfwsgU,5756
95
96
  evalvault/adapters/outbound/analysis/priority_summary_module.py,sha256=o8Y0rfHjYYE9WNTwKtpJulwfvLA3MNMhYjdSg15Vacc,10802
96
97
  evalvault/adapters/outbound/analysis/ragas_evaluator_module.py,sha256=Cd-spGn56zMcqOdoTLUHTYVOFqHqR17tPFyJs7rmnbw,7659
97
- evalvault/adapters/outbound/analysis/retrieval_analyzer_module.py,sha256=D24GTaKabHacSBI-UqCd_jy61hnne8-QG1p4rqW1Bzk,5748
98
+ evalvault/adapters/outbound/analysis/retrieval_analyzer_module.py,sha256=STRHWapVAEz0YbSxR3NzT6zV7wfwlPxjKZunuWpfTmE,8340
98
99
  evalvault/adapters/outbound/analysis/retrieval_benchmark_module.py,sha256=_duIBlYhAsFygEpC7DuwoAqfTbVG2xgp70JjW1LJAGE,9312
99
100
  evalvault/adapters/outbound/analysis/retrieval_quality_checker_module.py,sha256=K1IJn4bvvz-BfqQmhd5Ik9oATjq_-G7V1AZSW8zKtSE,3121
100
101
  evalvault/adapters/outbound/analysis/root_cause_analyzer_module.py,sha256=UagHWb2d1vD7aCH0vLl3tSJx86gkkxNarrF-rwtEBhU,2811
@@ -280,7 +281,7 @@ evalvault/domain/services/memory_based_analysis.py,sha256=oh2irCy3le7fWiTtL31SME
280
281
  evalvault/domain/services/method_runner.py,sha256=pABqKZeaALpWZYDfzAbd-VOZt2djQggRNIPuuPQeUSw,3571
281
282
  evalvault/domain/services/ops_snapshot_service.py,sha256=1CqJN2p3tM6SgzLCZKcVEM213fd1cDGexTRPG_3e59w,5138
282
283
  evalvault/domain/services/pipeline_orchestrator.py,sha256=yriVlEVZYDtt0Vwt4Ae6xyW1H6Dj4Hxdn8XQSvQNSoQ,19436
283
- evalvault/domain/services/pipeline_template_registry.py,sha256=aWqXLQ24grpSZo9M4tZLRo1ysD10c6hUpW3JupZH9e0,28083
284
+ evalvault/domain/services/pipeline_template_registry.py,sha256=k5Ce1BC3NgcYqCLiUZpXsl_6WwDHOXONoYDH7KzX2L4,28809
284
285
  evalvault/domain/services/prompt_candidate_service.py,sha256=Ibyb5EaWK28Ju2HnTqHHGOoiA9Q-VwY3hjxVODALwGY,3997
285
286
  evalvault/domain/services/prompt_manifest.py,sha256=5s5Kd6-_Dn-xrjjlU99CVo6njsPhvE50H5m_85U-H6U,5612
286
287
  evalvault/domain/services/prompt_registry.py,sha256=QyL4yIcKT93uv6L0-Q_iaNXno8QnsC19YcGekuSRMtE,5247
@@ -290,7 +291,7 @@ evalvault/domain/services/prompt_suggestion_reporter.py,sha256=Fc6sCPebUMk8SZVpj
290
291
  evalvault/domain/services/ragas_prompt_overrides.py,sha256=4BecYE2KrreUBbIM3ssP9WzHcK_wRc8jW7CE_k58QOU,1412
291
292
  evalvault/domain/services/regression_gate_service.py,sha256=qBMODgpizmEzqEL8_JX-FYSVyARiroMW7MFVzlz7gjc,6579
292
293
  evalvault/domain/services/retrieval_metrics.py,sha256=dtrQPLMrXSyWLcgF8EGcLNFwzwA59WDzEh41JRToHAY,2980
293
- evalvault/domain/services/retriever_context.py,sha256=ySQ-GuadiggS0LVAib4AxA_0JpasYz4S9hbjau0eyIA,6482
294
+ evalvault/domain/services/retriever_context.py,sha256=TeJ9UgT4l3lXxOXcYMz_7PdVMlV7JsW2ewTXdv9dI2M,10185
294
295
  evalvault/domain/services/run_comparison_service.py,sha256=_NScltCRcY3zrvdyYDiPmssTxCDv1GyjCLdP3uAxJts,5631
295
296
  evalvault/domain/services/satisfaction_calibration_service.py,sha256=H7Z8opOyPHRO5qVIw-XDsNhIwdCteAS9_a3BTlfIqHg,11906
296
297
  evalvault/domain/services/stage_event_builder.py,sha256=FAT34Wmylvd2Yz5rDlhaTh1lqSCDhGApCXMi7Hjkib0,9748
@@ -339,8 +340,8 @@ evalvault/reports/__init__.py,sha256=Bb1X4871msAN8I6PM6nKGED3psPwZt88hXZBAOdH06Y
339
340
  evalvault/reports/release_notes.py,sha256=pZj0PBFT-4F_Ty-Kv5P69BuoOnmTCn4kznDcORFJd0w,4011
340
341
  evalvault/scripts/__init__.py,sha256=NwEeIFQbkX4ml2R_PhtIoNtArDSX_suuoymgG_7Kwso,89
341
342
  evalvault/scripts/regression_runner.py,sha256=SxZori5BZ8jVQ057Mf5V5FPgIVDccrV5oRONmnhuk8w,8438
342
- evalvault-1.68.1.dist-info/METADATA,sha256=BRG7UFXRx1fT_JDFqSsdOuB_nk_LVnaNSNYzVyYWyyU,26159
343
- evalvault-1.68.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
344
- evalvault-1.68.1.dist-info/entry_points.txt,sha256=Oj9Xc5gYcyUYYNmQfWI8NYGw7nN-3M-h2ipHIMlVn6o,65
345
- evalvault-1.68.1.dist-info/licenses/LICENSE.md,sha256=3RNWY4jjtrQ_yYa-D-7I3XO12Ti7YzxsLV_dpykujvo,11358
346
- evalvault-1.68.1.dist-info/RECORD,,
343
+ evalvault-1.70.0.dist-info/METADATA,sha256=Bm7z86HYTWoMfyTK9VpmxjNk-mhp2LHsIJ9Gt8s-onw,26159
344
+ evalvault-1.70.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
345
+ evalvault-1.70.0.dist-info/entry_points.txt,sha256=Oj9Xc5gYcyUYYNmQfWI8NYGw7nN-3M-h2ipHIMlVn6o,65
346
+ evalvault-1.70.0.dist-info/licenses/LICENSE.md,sha256=3RNWY4jjtrQ_yYa-D-7I3XO12Ti7YzxsLV_dpykujvo,11358
347
+ evalvault-1.70.0.dist-info/RECORD,,