judgeval 0.0.44__py3-none-any.whl → 0.0.46__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval/__init__.py +5 -4
- judgeval/clients.py +6 -6
- judgeval/common/__init__.py +7 -2
- judgeval/common/exceptions.py +2 -3
- judgeval/common/logger.py +74 -49
- judgeval/common/s3_storage.py +30 -23
- judgeval/common/tracer.py +1273 -939
- judgeval/common/utils.py +416 -244
- judgeval/constants.py +73 -61
- judgeval/data/__init__.py +1 -1
- judgeval/data/custom_example.py +3 -2
- judgeval/data/datasets/dataset.py +80 -54
- judgeval/data/datasets/eval_dataset_client.py +131 -181
- judgeval/data/example.py +67 -43
- judgeval/data/result.py +11 -9
- judgeval/data/scorer_data.py +4 -2
- judgeval/data/tool.py +25 -16
- judgeval/data/trace.py +57 -29
- judgeval/data/trace_run.py +5 -11
- judgeval/evaluation_run.py +22 -82
- judgeval/integrations/langgraph.py +546 -184
- judgeval/judges/base_judge.py +1 -2
- judgeval/judges/litellm_judge.py +33 -11
- judgeval/judges/mixture_of_judges.py +128 -78
- judgeval/judges/together_judge.py +22 -9
- judgeval/judges/utils.py +14 -5
- judgeval/judgment_client.py +259 -271
- judgeval/rules.py +169 -142
- judgeval/run_evaluation.py +462 -305
- judgeval/scorers/api_scorer.py +20 -11
- judgeval/scorers/exceptions.py +1 -0
- judgeval/scorers/judgeval_scorer.py +77 -58
- judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +46 -15
- judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +3 -2
- judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +3 -2
- judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py +12 -11
- judgeval/scorers/judgeval_scorers/api_scorers/comparison.py +7 -5
- judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +3 -2
- judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +3 -2
- judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +5 -2
- judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +2 -1
- judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +17 -8
- judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +3 -2
- judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py +3 -2
- judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +3 -2
- judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +3 -2
- judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +8 -9
- judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +4 -4
- judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +5 -5
- judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +5 -2
- judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +9 -10
- judgeval/scorers/prompt_scorer.py +48 -37
- judgeval/scorers/score.py +86 -53
- judgeval/scorers/utils.py +11 -7
- judgeval/tracer/__init__.py +1 -1
- judgeval/utils/alerts.py +23 -12
- judgeval/utils/{data_utils.py → file_utils.py} +5 -9
- judgeval/utils/requests.py +29 -0
- judgeval/version_check.py +5 -2
- {judgeval-0.0.44.dist-info → judgeval-0.0.46.dist-info}/METADATA +79 -135
- judgeval-0.0.46.dist-info/RECORD +69 -0
- judgeval-0.0.44.dist-info/RECORD +0 -68
- {judgeval-0.0.44.dist-info → judgeval-0.0.46.dist-info}/WHEEL +0 -0
- {judgeval-0.0.44.dist-info → judgeval-0.0.46.dist-info}/licenses/LICENSE.md +0 -0
@@ -1,24 +1,21 @@
|
|
1
|
-
|
2
1
|
from typing import Optional, List
|
3
|
-
import
|
2
|
+
from requests import Response, exceptions
|
3
|
+
from judgeval.utils.requests import requests
|
4
4
|
from rich.progress import Progress, SpinnerColumn, TextColumn
|
5
5
|
|
6
6
|
from judgeval.common.logger import debug, error, warning, info
|
7
7
|
from judgeval.constants import (
|
8
8
|
JUDGMENT_DATASETS_PUSH_API_URL,
|
9
9
|
JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL,
|
10
|
-
JUDGMENT_DATASETS_PULL_API_URL,
|
10
|
+
JUDGMENT_DATASETS_PULL_API_URL,
|
11
11
|
JUDGMENT_DATASETS_PROJECT_STATS_API_URL,
|
12
12
|
JUDGMENT_DATASETS_DELETE_API_URL,
|
13
|
-
|
14
|
-
JUDGMENT_DATASETS_EXPORT_JSONL_API_URL
|
13
|
+
JUDGMENT_DATASETS_EXPORT_JSONL_API_URL,
|
15
14
|
)
|
16
15
|
from judgeval.data import Example, Trace
|
17
16
|
from judgeval.data.datasets import EvalDataset
|
18
17
|
|
19
18
|
|
20
|
-
|
21
|
-
|
22
19
|
class EvalDatasetClient:
|
23
20
|
def __init__(self, judgment_api_key: str, organization_id: str):
|
24
21
|
self.judgment_api_key = judgment_api_key
|
@@ -26,8 +23,14 @@ class EvalDatasetClient:
|
|
26
23
|
|
27
24
|
def create_dataset(self) -> EvalDataset:
|
28
25
|
return EvalDataset(judgment_api_key=self.judgment_api_key)
|
29
|
-
|
30
|
-
def push(
|
26
|
+
|
27
|
+
def push(
|
28
|
+
self,
|
29
|
+
dataset: EvalDataset,
|
30
|
+
alias: str,
|
31
|
+
project_name: str,
|
32
|
+
overwrite: Optional[bool] = False,
|
33
|
+
) -> bool:
|
31
34
|
debug(f"Pushing dataset with alias '{alias}' (overwrite={overwrite})")
|
32
35
|
if overwrite:
|
33
36
|
warning(f"Overwrite enabled for alias '{alias}'")
|
@@ -55,45 +58,46 @@ class EvalDatasetClient:
|
|
55
58
|
total=100,
|
56
59
|
)
|
57
60
|
content = {
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
61
|
+
"dataset_alias": alias,
|
62
|
+
"project_name": project_name,
|
63
|
+
"examples": [e.to_dict() for e in dataset.examples],
|
64
|
+
"traces": [t.model_dump() for t in dataset.traces],
|
65
|
+
"overwrite": overwrite,
|
66
|
+
}
|
64
67
|
try:
|
65
68
|
response = requests.post(
|
66
|
-
JUDGMENT_DATASETS_PUSH_API_URL,
|
69
|
+
JUDGMENT_DATASETS_PUSH_API_URL,
|
67
70
|
json=content,
|
68
71
|
headers={
|
69
72
|
"Content-Type": "application/json",
|
70
73
|
"Authorization": f"Bearer {self.judgment_api_key}",
|
71
|
-
"X-Organization-Id": self.organization_id
|
74
|
+
"X-Organization-Id": self.organization_id,
|
72
75
|
},
|
73
|
-
verify=True
|
76
|
+
verify=True,
|
74
77
|
)
|
75
78
|
if response.status_code != 200:
|
76
79
|
error(f"Server error during push: {response.json()}")
|
77
80
|
raise Exception(f"Server error during push: {response.json()}")
|
78
81
|
response.raise_for_status()
|
79
|
-
except
|
82
|
+
except exceptions.HTTPError as err:
|
80
83
|
if response.status_code == 422:
|
81
84
|
error(f"Validation error during push: {err.response.json()}")
|
82
85
|
else:
|
83
86
|
error(f"HTTP error during push: {err}")
|
84
|
-
|
87
|
+
|
85
88
|
info(f"Successfully pushed dataset with alias '{alias}'")
|
86
89
|
payload = response.json()
|
87
90
|
dataset._alias = payload.get("_alias")
|
88
91
|
dataset._id = payload.get("_id")
|
89
92
|
progress.update(
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
+
task_id,
|
94
|
+
description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
|
95
|
+
)
|
93
96
|
return True
|
94
|
-
|
95
97
|
|
96
|
-
def append_examples(
|
98
|
+
def append_examples(
|
99
|
+
self, alias: str, examples: List[Example], project_name: str
|
100
|
+
) -> bool:
|
97
101
|
debug(f"Appending dataset with alias '{alias}'")
|
98
102
|
"""
|
99
103
|
Appends the dataset to Judgment platform
|
@@ -119,37 +123,37 @@ class EvalDatasetClient:
|
|
119
123
|
total=100,
|
120
124
|
)
|
121
125
|
content = {
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
+
"dataset_alias": alias,
|
127
|
+
"project_name": project_name,
|
128
|
+
"examples": [e.to_dict() for e in examples],
|
129
|
+
}
|
126
130
|
try:
|
127
131
|
response = requests.post(
|
128
|
-
JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL,
|
132
|
+
JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL,
|
129
133
|
json=content,
|
130
134
|
headers={
|
131
135
|
"Content-Type": "application/json",
|
132
136
|
"Authorization": f"Bearer {self.judgment_api_key}",
|
133
|
-
"X-Organization-Id": self.organization_id
|
137
|
+
"X-Organization-Id": self.organization_id,
|
134
138
|
},
|
135
|
-
verify=True
|
139
|
+
verify=True,
|
136
140
|
)
|
137
141
|
if response.status_code != 200:
|
138
142
|
error(f"Server error during append: {response.json()}")
|
139
143
|
raise Exception(f"Server error during append: {response.json()}")
|
140
144
|
response.raise_for_status()
|
141
|
-
except
|
145
|
+
except exceptions.HTTPError as err:
|
142
146
|
if response.status_code == 422:
|
143
147
|
error(f"Validation error during append: {err.response.json()}")
|
144
148
|
else:
|
145
149
|
error(f"HTTP error during append: {err}")
|
146
|
-
|
150
|
+
|
147
151
|
progress.update(
|
148
|
-
|
149
|
-
|
150
|
-
|
152
|
+
task_id,
|
153
|
+
description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
|
154
|
+
)
|
151
155
|
return True
|
152
|
-
|
156
|
+
|
153
157
|
def pull(self, alias: str, project_name: str) -> EvalDataset:
|
154
158
|
debug(f"Pulling dataset with alias '{alias}'")
|
155
159
|
"""
|
@@ -171,81 +175,75 @@ class EvalDatasetClient:
|
|
171
175
|
dataset = self.create_dataset()
|
172
176
|
|
173
177
|
with Progress(
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
178
|
+
SpinnerColumn(style="rgb(106,0,255)"),
|
179
|
+
TextColumn("[progress.description]{task.description}"),
|
180
|
+
transient=False,
|
181
|
+
) as progress:
|
182
|
+
task_id = progress.add_task(
|
183
|
+
f"Pulling [rgb(106,0,255)]'{alias}'[/rgb(106,0,255)] from Judgment...",
|
184
|
+
total=100,
|
185
|
+
)
|
186
|
+
request_body = {"dataset_alias": alias, "project_name": project_name}
|
187
|
+
|
188
|
+
try:
|
189
|
+
response = requests.post(
|
190
|
+
JUDGMENT_DATASETS_PULL_API_URL,
|
191
|
+
json=request_body,
|
192
|
+
headers={
|
193
|
+
"Content-Type": "application/json",
|
194
|
+
"Authorization": f"Bearer {self.judgment_api_key}",
|
195
|
+
"X-Organization-Id": self.organization_id,
|
196
|
+
},
|
197
|
+
verify=True,
|
181
198
|
)
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
199
|
+
response.raise_for_status()
|
200
|
+
except exceptions.RequestException as e:
|
201
|
+
error(f"Error pulling dataset: {str(e)}")
|
202
|
+
raise
|
186
203
|
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
)
|
198
|
-
response.raise_for_status()
|
199
|
-
except requests.exceptions.RequestException as e:
|
200
|
-
error(f"Error pulling dataset: {str(e)}")
|
201
|
-
raise
|
204
|
+
info(f"Successfully pulled dataset with alias '{alias}'")
|
205
|
+
payload = response.json()
|
206
|
+
dataset.examples = [Example(**e) for e in payload.get("examples", [])]
|
207
|
+
dataset.traces = [Trace(**t) for t in payload.get("traces", [])]
|
208
|
+
dataset._alias = payload.get("alias")
|
209
|
+
dataset._id = payload.get("id")
|
210
|
+
progress.update(
|
211
|
+
task_id,
|
212
|
+
description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
|
213
|
+
)
|
202
214
|
|
203
|
-
|
204
|
-
payload = response.json()
|
205
|
-
dataset.examples = [Example(**e) for e in payload.get("examples", [])]
|
206
|
-
dataset.traces = [Trace(**t) for t in payload.get("traces", [])]
|
207
|
-
dataset._alias = payload.get("alias")
|
208
|
-
dataset._id = payload.get("id")
|
209
|
-
progress.update(
|
210
|
-
task_id,
|
211
|
-
description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
|
212
|
-
)
|
215
|
+
return dataset
|
213
216
|
|
214
|
-
return dataset
|
215
|
-
|
216
217
|
def delete(self, alias: str, project_name: str) -> bool:
|
217
218
|
with Progress(
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
219
|
+
SpinnerColumn(style="rgb(106,0,255)"),
|
220
|
+
TextColumn("[progress.description]{task.description}"),
|
221
|
+
transient=False,
|
222
|
+
) as progress:
|
223
|
+
progress.add_task(
|
224
|
+
f"Deleting [rgb(106,0,255)]'{alias}'[/rgb(106,0,255)] from Judgment...",
|
225
|
+
total=100,
|
226
|
+
)
|
227
|
+
request_body = {"dataset_alias": alias, "project_name": project_name}
|
228
|
+
|
229
|
+
try:
|
230
|
+
response = requests.post(
|
231
|
+
JUDGMENT_DATASETS_DELETE_API_URL,
|
232
|
+
json=request_body,
|
233
|
+
headers={
|
234
|
+
"Content-Type": "application/json",
|
235
|
+
"Authorization": f"Bearer {self.judgment_api_key}",
|
236
|
+
"X-Organization-Id": self.organization_id,
|
237
|
+
},
|
238
|
+
verify=True,
|
225
239
|
)
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
240
|
+
response.raise_for_status()
|
241
|
+
except exceptions.RequestException as e:
|
242
|
+
error(f"Error deleting dataset: {str(e)}")
|
243
|
+
raise
|
230
244
|
|
231
|
-
|
232
|
-
response = requests.post(
|
233
|
-
JUDGMENT_DATASETS_DELETE_API_URL,
|
234
|
-
json=request_body,
|
235
|
-
headers={
|
236
|
-
"Content-Type": "application/json",
|
237
|
-
"Authorization": f"Bearer {self.judgment_api_key}",
|
238
|
-
"X-Organization-Id": self.organization_id
|
239
|
-
},
|
240
|
-
verify=True
|
241
|
-
)
|
242
|
-
response.raise_for_status()
|
243
|
-
except requests.exceptions.RequestException as e:
|
244
|
-
error(f"Error deleting dataset: {str(e)}")
|
245
|
-
raise
|
245
|
+
return True
|
246
246
|
|
247
|
-
return True
|
248
|
-
|
249
247
|
def pull_project_dataset_stats(self, project_name: str) -> dict:
|
250
248
|
debug(f"Pulling project datasets stats for project_name: {project_name}'")
|
251
249
|
"""
|
@@ -265,91 +263,43 @@ class EvalDatasetClient:
|
|
265
263
|
# Make a POST request to the Judgment API to get the dataset
|
266
264
|
|
267
265
|
with Progress(
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
task_id = progress.add_task(
|
273
|
-
f"Pulling [rgb(106,0,255)]' datasets'[/rgb(106,0,255)] from Judgment...",
|
274
|
-
total=100,
|
275
|
-
)
|
276
|
-
request_body = {
|
277
|
-
"project_name": project_name
|
278
|
-
}
|
279
|
-
|
280
|
-
try:
|
281
|
-
response = requests.post(
|
282
|
-
JUDGMENT_DATASETS_PROJECT_STATS_API_URL,
|
283
|
-
json=request_body,
|
284
|
-
headers={
|
285
|
-
"Content-Type": "application/json",
|
286
|
-
"Authorization": f"Bearer {self.judgment_api_key}",
|
287
|
-
"X-Organization-Id": self.organization_id
|
288
|
-
},
|
289
|
-
verify=True
|
290
|
-
)
|
291
|
-
response.raise_for_status()
|
292
|
-
except requests.exceptions.RequestException as e:
|
293
|
-
error(f"Error pulling dataset: {str(e)}")
|
294
|
-
raise
|
295
|
-
|
296
|
-
info(f"Successfully pulled datasets for userid: {self.judgment_api_key}'")
|
297
|
-
payload = response.json()
|
298
|
-
|
299
|
-
progress.update(
|
300
|
-
task_id,
|
301
|
-
description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
|
302
|
-
)
|
303
|
-
|
304
|
-
return payload
|
305
|
-
|
306
|
-
def insert_dataset(self, alias: str, examples: List[Example], project_name: str) -> bool:
|
307
|
-
"""
|
308
|
-
Edits the dataset on Judgment platform by adding new examples
|
309
|
-
|
310
|
-
Mock request:
|
311
|
-
{
|
312
|
-
"alias": alias,
|
313
|
-
"examples": [...],
|
314
|
-
"project_name": project_name
|
315
|
-
}
|
316
|
-
"""
|
317
|
-
with Progress(
|
318
|
-
SpinnerColumn(style="rgb(106,0,255)"),
|
319
|
-
TextColumn("[progress.description]{task.description}"),
|
320
|
-
transient=False,
|
321
|
-
) as progress:
|
266
|
+
SpinnerColumn(style="rgb(106,0,255)"),
|
267
|
+
TextColumn("[progress.description]{task.description}"),
|
268
|
+
transient=False,
|
269
|
+
) as progress:
|
322
270
|
task_id = progress.add_task(
|
323
|
-
|
271
|
+
"Pulling [rgb(106,0,255)]' datasets'[/rgb(106,0,255)] from Judgment...",
|
324
272
|
total=100,
|
325
273
|
)
|
326
|
-
|
327
|
-
content = {
|
328
|
-
"dataset_alias": alias,
|
329
|
-
"examples": [e.to_dict() for e in examples],
|
330
|
-
"project_name": project_name
|
331
|
-
}
|
274
|
+
request_body = {"project_name": project_name}
|
332
275
|
|
333
276
|
try:
|
334
277
|
response = requests.post(
|
335
|
-
|
336
|
-
json=
|
278
|
+
JUDGMENT_DATASETS_PROJECT_STATS_API_URL,
|
279
|
+
json=request_body,
|
337
280
|
headers={
|
338
281
|
"Content-Type": "application/json",
|
339
282
|
"Authorization": f"Bearer {self.judgment_api_key}",
|
340
|
-
"X-Organization-Id": self.organization_id
|
283
|
+
"X-Organization-Id": self.organization_id,
|
341
284
|
},
|
342
|
-
verify=True
|
285
|
+
verify=True,
|
343
286
|
)
|
344
287
|
response.raise_for_status()
|
345
|
-
except
|
346
|
-
error(f"Error
|
347
|
-
|
348
|
-
|
349
|
-
info(f"Successfully
|
350
|
-
|
288
|
+
except exceptions.RequestException as e:
|
289
|
+
error(f"Error pulling dataset: {str(e)}")
|
290
|
+
raise
|
291
|
+
|
292
|
+
info(f"Successfully pulled datasets for userid: {self.judgment_api_key}'")
|
293
|
+
payload = response.json()
|
294
|
+
|
295
|
+
progress.update(
|
296
|
+
task_id,
|
297
|
+
description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
|
298
|
+
)
|
351
299
|
|
352
|
-
|
300
|
+
return payload
|
301
|
+
|
302
|
+
def export_jsonl(self, alias: str, project_name: str) -> Response:
|
353
303
|
"""Export dataset in JSONL format from Judgment platform"""
|
354
304
|
debug(f"Exporting dataset with alias '{alias}' as JSONL")
|
355
305
|
with Progress(
|
@@ -368,13 +318,13 @@ class EvalDatasetClient:
|
|
368
318
|
headers={
|
369
319
|
"Content-Type": "application/json",
|
370
320
|
"Authorization": f"Bearer {self.judgment_api_key}",
|
371
|
-
"X-Organization-Id": self.organization_id
|
321
|
+
"X-Organization-Id": self.organization_id,
|
372
322
|
},
|
373
323
|
stream=True,
|
374
|
-
verify=True
|
324
|
+
verify=True,
|
375
325
|
)
|
376
326
|
response.raise_for_status()
|
377
|
-
except
|
327
|
+
except exceptions.HTTPError as err:
|
378
328
|
if err.response.status_code == 404:
|
379
329
|
error(f"Dataset not found: {alias}")
|
380
330
|
else:
|
@@ -383,11 +333,11 @@ class EvalDatasetClient:
|
|
383
333
|
except Exception as e:
|
384
334
|
error(f"Error during export: {str(e)}")
|
385
335
|
raise
|
386
|
-
|
336
|
+
|
387
337
|
info(f"Successfully exported dataset with alias '{alias}'")
|
388
338
|
progress.update(
|
389
339
|
task_id,
|
390
340
|
description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
|
391
341
|
)
|
392
|
-
|
342
|
+
|
393
343
|
return response
|
judgeval/data/example.py
CHANGED
@@ -2,14 +2,12 @@
|
|
2
2
|
Classes for representing examples in a dataset.
|
3
3
|
"""
|
4
4
|
|
5
|
-
|
6
5
|
from typing import Optional, Any, Dict, List, Union
|
7
6
|
from uuid import uuid4
|
8
7
|
from pydantic import BaseModel, Field, field_validator
|
9
8
|
from enum import Enum
|
10
9
|
from datetime import datetime
|
11
10
|
from judgeval.data.tool import Tool
|
12
|
-
import time
|
13
11
|
|
14
12
|
|
15
13
|
class ExampleParams(Enum):
|
@@ -38,103 +36,129 @@ class Example(BaseModel):
|
|
38
36
|
example_index: Optional[int] = None
|
39
37
|
created_at: Optional[str] = None
|
40
38
|
trace_id: Optional[str] = None
|
41
|
-
|
39
|
+
|
42
40
|
def __init__(self, **data):
|
43
|
-
if
|
44
|
-
data[
|
41
|
+
if "example_id" not in data:
|
42
|
+
data["example_id"] = str(uuid4())
|
45
43
|
# Set timestamp if not provided
|
46
|
-
if
|
47
|
-
data[
|
44
|
+
if "created_at" not in data:
|
45
|
+
data["created_at"] = datetime.now().isoformat()
|
48
46
|
super().__init__(**data)
|
49
|
-
|
50
|
-
@field_validator(
|
47
|
+
|
48
|
+
@field_validator("input", mode="before")
|
51
49
|
@classmethod
|
52
50
|
def validate_input(cls, v):
|
53
51
|
if v is not None:
|
54
52
|
if not isinstance(v, (str, dict)):
|
55
|
-
raise ValueError(
|
56
|
-
|
53
|
+
raise ValueError(
|
54
|
+
f"Input must be a string or dictionary but got {v} of type {type(v)}"
|
55
|
+
)
|
56
|
+
|
57
57
|
# If it's a string, check that it's not empty
|
58
58
|
if isinstance(v, str) and not v:
|
59
59
|
raise ValueError(f"Input string must be non-empty but got '{v}'")
|
60
|
-
|
60
|
+
|
61
61
|
# If it's a dictionary, check that it's not empty
|
62
62
|
if isinstance(v, dict) and not v:
|
63
63
|
raise ValueError(f"Input dictionary must be non-empty but got {v}")
|
64
|
-
|
64
|
+
|
65
65
|
return v
|
66
|
-
|
67
|
-
@field_validator(
|
66
|
+
|
67
|
+
@field_validator("actual_output", mode="before")
|
68
68
|
@classmethod
|
69
69
|
def validate_actual_output(cls, v):
|
70
70
|
if v is not None:
|
71
71
|
if not isinstance(v, (str, list)):
|
72
|
-
raise ValueError(
|
72
|
+
raise ValueError(
|
73
|
+
f"Actual output must be a string or a list of strings but got {v} of type {type(v)}"
|
74
|
+
)
|
73
75
|
if isinstance(v, list) and not all(isinstance(item, str) for item in v):
|
74
|
-
raise ValueError(
|
76
|
+
raise ValueError(
|
77
|
+
f"All items in actual_output must be strings but got {v}"
|
78
|
+
)
|
75
79
|
return v
|
76
|
-
|
77
|
-
@field_validator(
|
80
|
+
|
81
|
+
@field_validator("expected_output", mode="before")
|
78
82
|
@classmethod
|
79
83
|
def validate_expected_output(cls, v):
|
80
84
|
if v is not None and not isinstance(v, (str, list)):
|
81
|
-
raise ValueError(
|
85
|
+
raise ValueError(
|
86
|
+
f"Expected output must be a string, a list of strings, or None but got {v} of type {type(v)}"
|
87
|
+
)
|
82
88
|
if isinstance(v, list) and not all(isinstance(item, str) for item in v):
|
83
|
-
raise ValueError(
|
89
|
+
raise ValueError(
|
90
|
+
f"All items in expected_output must be strings but got {v}"
|
91
|
+
)
|
84
92
|
return v
|
85
|
-
|
86
|
-
@field_validator(
|
93
|
+
|
94
|
+
@field_validator("expected_tools")
|
87
95
|
@classmethod
|
88
96
|
def validate_expected_tools(cls, v):
|
89
97
|
if v is not None:
|
90
98
|
if not isinstance(v, list):
|
91
|
-
raise ValueError(
|
92
|
-
|
99
|
+
raise ValueError(
|
100
|
+
f"Expected tools must be a list of Tools or None but got {v} of type {type(v)}"
|
101
|
+
)
|
102
|
+
|
93
103
|
# Check that each item in the list is a Tool
|
94
104
|
for i, item in enumerate(v):
|
95
105
|
if not isinstance(item, Tool):
|
96
|
-
raise ValueError(
|
97
|
-
|
106
|
+
raise ValueError(
|
107
|
+
f"Expected tools must be a list of Tools, but item at index {i} is {item} of type {type(item)}"
|
108
|
+
)
|
109
|
+
|
98
110
|
return v
|
99
|
-
|
100
|
-
@field_validator(
|
111
|
+
|
112
|
+
@field_validator("context", "retrieval_context", "tools_called", mode="before")
|
101
113
|
@classmethod
|
102
114
|
def validate_string_lists(cls, v, info):
|
103
115
|
field_name = info.field_name
|
104
116
|
if v is not None:
|
105
117
|
if not isinstance(v, list):
|
106
|
-
raise ValueError(
|
118
|
+
raise ValueError(
|
119
|
+
f"{field_name} must be a list of strings or None but got {v} of type {type(v)}"
|
120
|
+
)
|
107
121
|
for i, item in enumerate(v):
|
108
122
|
if not isinstance(item, str):
|
109
|
-
raise ValueError(
|
123
|
+
raise ValueError(
|
124
|
+
f"All items in {field_name} must be strings but item at index {i} is {item} of type {type(item)}"
|
125
|
+
)
|
110
126
|
return v
|
111
|
-
|
112
|
-
@field_validator(
|
127
|
+
|
128
|
+
@field_validator("additional_metadata", mode="before")
|
113
129
|
@classmethod
|
114
130
|
def validate_additional_metadata(cls, v):
|
115
131
|
if v is not None and not isinstance(v, dict):
|
116
|
-
raise ValueError(
|
132
|
+
raise ValueError(
|
133
|
+
f"Additional metadata must be a dictionary or None but got {v} of type {type(v)}"
|
134
|
+
)
|
117
135
|
return v
|
118
|
-
|
119
|
-
@field_validator(
|
136
|
+
|
137
|
+
@field_validator("example_index", mode="before")
|
120
138
|
@classmethod
|
121
139
|
def validate_example_index(cls, v):
|
122
140
|
if v is not None and not isinstance(v, int):
|
123
|
-
raise ValueError(
|
141
|
+
raise ValueError(
|
142
|
+
f"Example index must be an integer or None but got {v} of type {type(v)}"
|
143
|
+
)
|
124
144
|
return v
|
125
|
-
|
126
|
-
@field_validator(
|
145
|
+
|
146
|
+
@field_validator("created_at", mode="before")
|
127
147
|
@classmethod
|
128
148
|
def validate_created_at(cls, v):
|
129
149
|
if v is not None and not isinstance(v, str):
|
130
|
-
raise ValueError(
|
150
|
+
raise ValueError(
|
151
|
+
f"Timestamp must be a string or None but got {v} of type {type(v)}"
|
152
|
+
)
|
131
153
|
return v
|
132
|
-
|
133
|
-
@field_validator(
|
154
|
+
|
155
|
+
@field_validator("trace_id", mode="before")
|
134
156
|
@classmethod
|
135
157
|
def validate_trace_id(cls, v):
|
136
158
|
if v is not None and not isinstance(v, str):
|
137
|
-
raise ValueError(
|
159
|
+
raise ValueError(
|
160
|
+
f"Trace ID must be a string or None but got {v} of type {type(v)}"
|
161
|
+
)
|
138
162
|
return v
|
139
163
|
|
140
164
|
def to_dict(self):
|