evalvault 1.67.0__py3-none-any.whl → 1.68.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evalvault/adapters/outbound/dataset/base.py +40 -0
- evalvault/adapters/outbound/dataset/csv_loader.py +16 -0
- evalvault/adapters/outbound/dataset/excel_loader.py +16 -0
- evalvault/config/settings.py +15 -4
- evalvault/domain/entities/stage.py +22 -6
- {evalvault-1.67.0.dist-info → evalvault-1.68.1.dist-info}/METADATA +1 -1
- {evalvault-1.67.0.dist-info → evalvault-1.68.1.dist-info}/RECORD +10 -10
- {evalvault-1.67.0.dist-info → evalvault-1.68.1.dist-info}/WHEEL +0 -0
- {evalvault-1.67.0.dist-info → evalvault-1.68.1.dist-info}/entry_points.txt +0 -0
- {evalvault-1.67.0.dist-info → evalvault-1.68.1.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
import json
|
|
4
4
|
from abc import ABC, abstractmethod
|
|
5
5
|
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
6
7
|
|
|
7
8
|
from evalvault.domain.entities.dataset import Dataset
|
|
8
9
|
|
|
@@ -118,6 +119,45 @@ class BaseDatasetLoader(ABC):
|
|
|
118
119
|
# Fall back to pipe-separated format
|
|
119
120
|
return [ctx.strip() for ctx in contexts_str.split("|")]
|
|
120
121
|
|
|
122
|
+
def _parse_metadata_cell(self, raw: Any) -> dict[str, Any]:
|
|
123
|
+
if raw is None or (isinstance(raw, float) and str(raw) == "nan"):
|
|
124
|
+
return {}
|
|
125
|
+
text = str(raw).strip()
|
|
126
|
+
if not text:
|
|
127
|
+
return {}
|
|
128
|
+
try:
|
|
129
|
+
parsed = json.loads(text)
|
|
130
|
+
except json.JSONDecodeError as exc:
|
|
131
|
+
raise ValueError("Invalid metadata JSON") from exc
|
|
132
|
+
if not isinstance(parsed, dict):
|
|
133
|
+
raise ValueError("metadata must be a JSON object")
|
|
134
|
+
return parsed
|
|
135
|
+
|
|
136
|
+
def _parse_summary_tags_cell(self, raw: Any) -> list[str]:
|
|
137
|
+
if raw is None or (isinstance(raw, float) and str(raw) == "nan"):
|
|
138
|
+
return []
|
|
139
|
+
if isinstance(raw, list):
|
|
140
|
+
return [str(item).strip().lower() for item in raw if str(item).strip()]
|
|
141
|
+
text = str(raw).strip()
|
|
142
|
+
if not text:
|
|
143
|
+
return []
|
|
144
|
+
if text.startswith("["):
|
|
145
|
+
try:
|
|
146
|
+
parsed = json.loads(text)
|
|
147
|
+
except json.JSONDecodeError:
|
|
148
|
+
parsed = None
|
|
149
|
+
if isinstance(parsed, list):
|
|
150
|
+
return [str(item).strip().lower() for item in parsed if str(item).strip()]
|
|
151
|
+
delimiter = "," if "," in text else "|" if "|" in text else None
|
|
152
|
+
parts = text.split(delimiter) if delimiter else [text]
|
|
153
|
+
return [part.strip().lower() for part in parts if part.strip()]
|
|
154
|
+
|
|
155
|
+
def _parse_summary_intent_cell(self, raw: Any) -> str | None:
|
|
156
|
+
if raw is None or (isinstance(raw, float) and str(raw) == "nan"):
|
|
157
|
+
return None
|
|
158
|
+
text = str(raw).strip()
|
|
159
|
+
return text or None
|
|
160
|
+
|
|
121
161
|
def _get_default_name(self, file_path: Path) -> str:
|
|
122
162
|
"""Get default dataset name from file path.
|
|
123
163
|
|
|
@@ -123,12 +123,28 @@ class CSVDatasetLoader(BaseDatasetLoader):
|
|
|
123
123
|
else None
|
|
124
124
|
)
|
|
125
125
|
|
|
126
|
+
metadata = {}
|
|
127
|
+
if "metadata" in df.columns:
|
|
128
|
+
try:
|
|
129
|
+
metadata = self._parse_metadata_cell(row["metadata"])
|
|
130
|
+
except ValueError as exc:
|
|
131
|
+
raise ValueError(f"Test case {row['id']}: {exc}") from exc
|
|
132
|
+
if "summary_tags" in df.columns:
|
|
133
|
+
tags = self._parse_summary_tags_cell(row["summary_tags"])
|
|
134
|
+
if tags:
|
|
135
|
+
metadata["summary_tags"] = tags
|
|
136
|
+
if "summary_intent" in df.columns:
|
|
137
|
+
intent = self._parse_summary_intent_cell(row["summary_intent"])
|
|
138
|
+
if intent:
|
|
139
|
+
metadata["summary_intent"] = intent
|
|
140
|
+
|
|
126
141
|
test_case = TestCase(
|
|
127
142
|
id=str(row["id"]),
|
|
128
143
|
question=str(row["question"]),
|
|
129
144
|
answer=str(row["answer"]),
|
|
130
145
|
contexts=contexts,
|
|
131
146
|
ground_truth=ground_truth,
|
|
147
|
+
metadata=metadata,
|
|
132
148
|
)
|
|
133
149
|
test_cases.append(test_case)
|
|
134
150
|
|
|
@@ -96,12 +96,28 @@ class ExcelDatasetLoader(BaseDatasetLoader):
|
|
|
96
96
|
else None
|
|
97
97
|
)
|
|
98
98
|
|
|
99
|
+
metadata = {}
|
|
100
|
+
if "metadata" in df.columns:
|
|
101
|
+
try:
|
|
102
|
+
metadata = self._parse_metadata_cell(row["metadata"])
|
|
103
|
+
except ValueError as exc:
|
|
104
|
+
raise ValueError(f"Test case {row['id']}: {exc}") from exc
|
|
105
|
+
if "summary_tags" in df.columns:
|
|
106
|
+
tags = self._parse_summary_tags_cell(row["summary_tags"])
|
|
107
|
+
if tags:
|
|
108
|
+
metadata["summary_tags"] = tags
|
|
109
|
+
if "summary_intent" in df.columns:
|
|
110
|
+
intent = self._parse_summary_intent_cell(row["summary_intent"])
|
|
111
|
+
if intent:
|
|
112
|
+
metadata["summary_intent"] = intent
|
|
113
|
+
|
|
99
114
|
test_case = TestCase(
|
|
100
115
|
id=str(row["id"]),
|
|
101
116
|
question=str(row["question"]),
|
|
102
117
|
answer=str(row["answer"]),
|
|
103
118
|
contexts=contexts,
|
|
104
119
|
ground_truth=ground_truth,
|
|
120
|
+
metadata=metadata,
|
|
105
121
|
)
|
|
106
122
|
test_cases.append(test_case)
|
|
107
123
|
|
evalvault/config/settings.py
CHANGED
|
@@ -424,9 +424,13 @@ def apply_profile(settings: Settings, profile_name: str) -> Settings:
|
|
|
424
424
|
"""
|
|
425
425
|
from evalvault.config.model_config import get_model_config
|
|
426
426
|
|
|
427
|
+
normalized = profile_name.strip() if isinstance(profile_name, str) else profile_name
|
|
428
|
+
if not normalized:
|
|
429
|
+
return settings
|
|
430
|
+
|
|
427
431
|
try:
|
|
428
432
|
model_config = get_model_config()
|
|
429
|
-
profile = model_config.get_profile(
|
|
433
|
+
profile = model_config.get_profile(normalized)
|
|
430
434
|
|
|
431
435
|
# LLM 설정 적용 (모델명과 provider만)
|
|
432
436
|
settings.llm_provider = profile.llm.provider
|
|
@@ -449,9 +453,16 @@ def apply_profile(settings: Settings, profile_name: str) -> Settings:
|
|
|
449
453
|
elif profile.embedding.provider == "vllm":
|
|
450
454
|
settings.vllm_embedding_model = profile.embedding.model
|
|
451
455
|
|
|
452
|
-
except FileNotFoundError:
|
|
453
|
-
|
|
454
|
-
|
|
456
|
+
except FileNotFoundError as exc:
|
|
457
|
+
raise ValueError(
|
|
458
|
+
"Model profile config not found. Create 'config/models.yaml' or 'evalvault.yaml' "
|
|
459
|
+
f"to use profile '{normalized}'."
|
|
460
|
+
) from exc
|
|
461
|
+
except KeyError as exc:
|
|
462
|
+
available = ", ".join(sorted(model_config.profiles.keys()))
|
|
463
|
+
raise ValueError(
|
|
464
|
+
f"Unknown profile '{normalized}'. Available profiles: {available}"
|
|
465
|
+
) from exc
|
|
455
466
|
|
|
456
467
|
return settings
|
|
457
468
|
|
|
@@ -60,18 +60,16 @@ class StageEvent:
|
|
|
60
60
|
|
|
61
61
|
@classmethod
|
|
62
62
|
def from_dict(cls, payload: dict[str, Any]) -> StageEvent:
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
if "stage_type" not in payload:
|
|
66
|
-
raise ValueError("StageEvent requires 'stage_type'")
|
|
63
|
+
run_id = _require_str(payload, "run_id")
|
|
64
|
+
stage_type = _normalize_stage_type(payload)
|
|
67
65
|
|
|
68
66
|
trace_payload = payload.get("trace") or {}
|
|
69
67
|
input_ref = _parse_payload_ref(payload.get("input_ref"))
|
|
70
68
|
output_ref = _parse_payload_ref(payload.get("output_ref"))
|
|
71
69
|
|
|
72
70
|
return cls(
|
|
73
|
-
run_id=
|
|
74
|
-
stage_type=
|
|
71
|
+
run_id=run_id,
|
|
72
|
+
stage_type=stage_type,
|
|
75
73
|
stage_id=str(payload.get("stage_id") or uuid4()),
|
|
76
74
|
stage_name=_optional_str(payload.get("stage_name")),
|
|
77
75
|
parent_stage_id=_optional_str(payload.get("parent_stage_id")),
|
|
@@ -187,6 +185,24 @@ def _parse_datetime(value: Any) -> datetime | None:
|
|
|
187
185
|
raise ValueError("Invalid datetime value")
|
|
188
186
|
|
|
189
187
|
|
|
188
|
+
def _require_str(payload: dict[str, Any], key: str) -> str:
|
|
189
|
+
if key not in payload:
|
|
190
|
+
raise ValueError(f"StageEvent requires '{key}'")
|
|
191
|
+
value = str(payload.get(key, "")).strip()
|
|
192
|
+
if not value:
|
|
193
|
+
raise ValueError(f"StageEvent requires non-empty '{key}'")
|
|
194
|
+
return value
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _normalize_stage_type(payload: dict[str, Any]) -> str:
|
|
198
|
+
if "stage_type" not in payload:
|
|
199
|
+
raise ValueError("StageEvent requires 'stage_type'")
|
|
200
|
+
value = str(payload.get("stage_type", "")).strip()
|
|
201
|
+
if not value:
|
|
202
|
+
raise ValueError("StageEvent requires non-empty 'stage_type'")
|
|
203
|
+
return value.lower()
|
|
204
|
+
|
|
205
|
+
|
|
190
206
|
@overload
|
|
191
207
|
def _ensure_dict(value: None, *, allow_none: Literal[True]) -> None: ...
|
|
192
208
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: evalvault
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.68.1
|
|
4
4
|
Summary: RAG evaluation system using Ragas with Phoenix/Langfuse tracing
|
|
5
5
|
Project-URL: Homepage, https://github.com/ntts9990/EvalVault
|
|
6
6
|
Project-URL: Documentation, https://github.com/ntts9990/EvalVault#readme
|
|
@@ -118,9 +118,9 @@ evalvault/adapters/outbound/cache/__init__.py,sha256=LcsKzxnx1AnAwS07iSCdws11CfE
|
|
|
118
118
|
evalvault/adapters/outbound/cache/hybrid_cache.py,sha256=AVhctQVOIbQWwvn_K0kxSq3lkhucuM7tezmSkPDbCrA,12711
|
|
119
119
|
evalvault/adapters/outbound/cache/memory_cache.py,sha256=jvjIgXp7YRj08_AzBFaJ58jjXNzUlYbG_zX6fQJP4C0,3533
|
|
120
120
|
evalvault/adapters/outbound/dataset/__init__.py,sha256=SDFnjmieEgz0uH5MpdXx8pmjnIMjRLkMFmFioMxCju0,1183
|
|
121
|
-
evalvault/adapters/outbound/dataset/base.py,sha256=
|
|
122
|
-
evalvault/adapters/outbound/dataset/csv_loader.py,sha256=
|
|
123
|
-
evalvault/adapters/outbound/dataset/excel_loader.py,sha256=
|
|
121
|
+
evalvault/adapters/outbound/dataset/base.py,sha256=4rxpQgxpFty0G5XRv1SP-XJ9mpZ9YO6PAMDgp71JiJQ,5547
|
|
122
|
+
evalvault/adapters/outbound/dataset/csv_loader.py,sha256=xHg2QadMvLfHTHzeex6WxXmagLJog3LN-ui6dFxD8HY,5595
|
|
123
|
+
evalvault/adapters/outbound/dataset/excel_loader.py,sha256=MUl-63r1s1GjVVmDgdag1DpMJvIVX_agGx20NQzEZN8,4494
|
|
124
124
|
evalvault/adapters/outbound/dataset/json_loader.py,sha256=4wG7APg1LLADPxJ-wQZo2zBcvVX12sqo9VUIb-0Kww4,4923
|
|
125
125
|
evalvault/adapters/outbound/dataset/loader_factory.py,sha256=32sjGuW2Yta12lpKy4DLH4I5B4Pi-YuHTvGG1Pr4VAk,1361
|
|
126
126
|
evalvault/adapters/outbound/dataset/method_input_loader.py,sha256=d7pB4OPvvr-q-Y5DlvjX3X719jCCQ2vRDfT_ov0dUFU,3833
|
|
@@ -207,7 +207,7 @@ evalvault/config/langfuse_support.py,sha256=DEzVMfMGGf1V45W_2oUG-NCDfsYI4UUdnYJI
|
|
|
207
207
|
evalvault/config/model_config.py,sha256=KlzDbGyDLeOGE7ElekFFk5YjjT5u8i6KO2B4EyZkLnI,3542
|
|
208
208
|
evalvault/config/phoenix_support.py,sha256=e6RPWd6Qb7KU6Q8pLaYTpJGWULtvEEU6B0xHWyVyOH0,13604
|
|
209
209
|
evalvault/config/secret_manager.py,sha256=YjPMuNqeBrAR2BzCJvsBNUExaU4TBSFyZ8kVYZZifqA,4172
|
|
210
|
-
evalvault/config/settings.py,sha256=
|
|
210
|
+
evalvault/config/settings.py,sha256=DY170XUoMo8yQx8_CJjPt96QsGg7tyTx5wJ-ptcfdY0,18766
|
|
211
211
|
evalvault/config/playbooks/improvement_playbook.yaml,sha256=9F9WVVCydFfz6zUuGYzZ4PKdW1LLtcBKVF36T7xT764,26965
|
|
212
212
|
evalvault/domain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
213
213
|
evalvault/domain/entities/__init__.py,sha256=wszRJ1Imdc5NJ1bQPC2udk-mAgFdlw4uZV5IPNjLpHQ,3669
|
|
@@ -228,7 +228,7 @@ evalvault/domain/entities/prompt.py,sha256=lQlRnHEKY69GWTC-cUIu0DMuPfJ9UWm6Sm4KT
|
|
|
228
228
|
evalvault/domain/entities/prompt_suggestion.py,sha256=Ep_XSjdYUj7pFSCMyeeZKs8yTnp74AVx05Zqr7829PE,1243
|
|
229
229
|
evalvault/domain/entities/rag_trace.py,sha256=sZgnkG4fK6KOe3Np6TYAZ_tPnsRbOmucDSQns35U1n4,11868
|
|
230
230
|
evalvault/domain/entities/result.py,sha256=OaGHMDLWMW2O4fNVuVTUvWFVBQ1iu93OD_oI3NumrCQ,10697
|
|
231
|
-
evalvault/domain/entities/stage.py,sha256=
|
|
231
|
+
evalvault/domain/entities/stage.py,sha256=KyR-v3tyusPJ7pfTXtHE2_23tVvNSRU9Q1RT-R5akXg,7914
|
|
232
232
|
evalvault/domain/metrics/__init__.py,sha256=Ros3CWg5in1xlEdMa0WUSG602SBVkxw2Zbro-XUlmxU,1214
|
|
233
233
|
evalvault/domain/metrics/analysis_registry.py,sha256=JZpBrBs7-JExHKYuEML6Vg_uYLm-WniBE3BfiU5OtJg,7641
|
|
234
234
|
evalvault/domain/metrics/confidence.py,sha256=AX4oeN28OvmMkwD0pT-jskkOlXh87C1pe2W9P1sF69g,17224
|
|
@@ -339,8 +339,8 @@ evalvault/reports/__init__.py,sha256=Bb1X4871msAN8I6PM6nKGED3psPwZt88hXZBAOdH06Y
|
|
|
339
339
|
evalvault/reports/release_notes.py,sha256=pZj0PBFT-4F_Ty-Kv5P69BuoOnmTCn4kznDcORFJd0w,4011
|
|
340
340
|
evalvault/scripts/__init__.py,sha256=NwEeIFQbkX4ml2R_PhtIoNtArDSX_suuoymgG_7Kwso,89
|
|
341
341
|
evalvault/scripts/regression_runner.py,sha256=SxZori5BZ8jVQ057Mf5V5FPgIVDccrV5oRONmnhuk8w,8438
|
|
342
|
-
evalvault-1.
|
|
343
|
-
evalvault-1.
|
|
344
|
-
evalvault-1.
|
|
345
|
-
evalvault-1.
|
|
346
|
-
evalvault-1.
|
|
342
|
+
evalvault-1.68.1.dist-info/METADATA,sha256=BRG7UFXRx1fT_JDFqSsdOuB_nk_LVnaNSNYzVyYWyyU,26159
|
|
343
|
+
evalvault-1.68.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
344
|
+
evalvault-1.68.1.dist-info/entry_points.txt,sha256=Oj9Xc5gYcyUYYNmQfWI8NYGw7nN-3M-h2ipHIMlVn6o,65
|
|
345
|
+
evalvault-1.68.1.dist-info/licenses/LICENSE.md,sha256=3RNWY4jjtrQ_yYa-D-7I3XO12Ti7YzxsLV_dpykujvo,11358
|
|
346
|
+
evalvault-1.68.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|