judgeval 0.0.44__py3-none-any.whl → 0.0.46__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. judgeval/__init__.py +5 -4
  2. judgeval/clients.py +6 -6
  3. judgeval/common/__init__.py +7 -2
  4. judgeval/common/exceptions.py +2 -3
  5. judgeval/common/logger.py +74 -49
  6. judgeval/common/s3_storage.py +30 -23
  7. judgeval/common/tracer.py +1273 -939
  8. judgeval/common/utils.py +416 -244
  9. judgeval/constants.py +73 -61
  10. judgeval/data/__init__.py +1 -1
  11. judgeval/data/custom_example.py +3 -2
  12. judgeval/data/datasets/dataset.py +80 -54
  13. judgeval/data/datasets/eval_dataset_client.py +131 -181
  14. judgeval/data/example.py +67 -43
  15. judgeval/data/result.py +11 -9
  16. judgeval/data/scorer_data.py +4 -2
  17. judgeval/data/tool.py +25 -16
  18. judgeval/data/trace.py +57 -29
  19. judgeval/data/trace_run.py +5 -11
  20. judgeval/evaluation_run.py +22 -82
  21. judgeval/integrations/langgraph.py +546 -184
  22. judgeval/judges/base_judge.py +1 -2
  23. judgeval/judges/litellm_judge.py +33 -11
  24. judgeval/judges/mixture_of_judges.py +128 -78
  25. judgeval/judges/together_judge.py +22 -9
  26. judgeval/judges/utils.py +14 -5
  27. judgeval/judgment_client.py +259 -271
  28. judgeval/rules.py +169 -142
  29. judgeval/run_evaluation.py +462 -305
  30. judgeval/scorers/api_scorer.py +20 -11
  31. judgeval/scorers/exceptions.py +1 -0
  32. judgeval/scorers/judgeval_scorer.py +77 -58
  33. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +46 -15
  34. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +3 -2
  35. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +3 -2
  36. judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py +12 -11
  37. judgeval/scorers/judgeval_scorers/api_scorers/comparison.py +7 -5
  38. judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +3 -2
  39. judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +3 -2
  40. judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +5 -2
  41. judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +2 -1
  42. judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +17 -8
  43. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +3 -2
  44. judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py +3 -2
  45. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +3 -2
  46. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +3 -2
  47. judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +8 -9
  48. judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +4 -4
  49. judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +5 -5
  50. judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +5 -2
  51. judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +9 -10
  52. judgeval/scorers/prompt_scorer.py +48 -37
  53. judgeval/scorers/score.py +86 -53
  54. judgeval/scorers/utils.py +11 -7
  55. judgeval/tracer/__init__.py +1 -1
  56. judgeval/utils/alerts.py +23 -12
  57. judgeval/utils/{data_utils.py → file_utils.py} +5 -9
  58. judgeval/utils/requests.py +29 -0
  59. judgeval/version_check.py +5 -2
  60. {judgeval-0.0.44.dist-info → judgeval-0.0.46.dist-info}/METADATA +79 -135
  61. judgeval-0.0.46.dist-info/RECORD +69 -0
  62. judgeval-0.0.44.dist-info/RECORD +0 -68
  63. {judgeval-0.0.44.dist-info → judgeval-0.0.46.dist-info}/WHEEL +0 -0
  64. {judgeval-0.0.44.dist-info → judgeval-0.0.46.dist-info}/licenses/LICENSE.md +0 -0
@@ -1,24 +1,21 @@
1
-
2
1
  from typing import Optional, List
3
- import requests
2
+ from requests import Response, exceptions
3
+ from judgeval.utils.requests import requests
4
4
  from rich.progress import Progress, SpinnerColumn, TextColumn
5
5
 
6
6
  from judgeval.common.logger import debug, error, warning, info
7
7
  from judgeval.constants import (
8
8
  JUDGMENT_DATASETS_PUSH_API_URL,
9
9
  JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL,
10
- JUDGMENT_DATASETS_PULL_API_URL,
10
+ JUDGMENT_DATASETS_PULL_API_URL,
11
11
  JUDGMENT_DATASETS_PROJECT_STATS_API_URL,
12
12
  JUDGMENT_DATASETS_DELETE_API_URL,
13
- JUDGMENT_DATASETS_INSERT_API_URL,
14
- JUDGMENT_DATASETS_EXPORT_JSONL_API_URL
13
+ JUDGMENT_DATASETS_EXPORT_JSONL_API_URL,
15
14
  )
16
15
  from judgeval.data import Example, Trace
17
16
  from judgeval.data.datasets import EvalDataset
18
17
 
19
18
 
20
-
21
-
22
19
  class EvalDatasetClient:
23
20
  def __init__(self, judgment_api_key: str, organization_id: str):
24
21
  self.judgment_api_key = judgment_api_key
@@ -26,8 +23,14 @@ class EvalDatasetClient:
26
23
 
27
24
  def create_dataset(self) -> EvalDataset:
28
25
  return EvalDataset(judgment_api_key=self.judgment_api_key)
29
-
30
- def push(self, dataset: EvalDataset, alias: str, project_name: str, overwrite: Optional[bool] = False) -> bool:
26
+
27
+ def push(
28
+ self,
29
+ dataset: EvalDataset,
30
+ alias: str,
31
+ project_name: str,
32
+ overwrite: Optional[bool] = False,
33
+ ) -> bool:
31
34
  debug(f"Pushing dataset with alias '{alias}' (overwrite={overwrite})")
32
35
  if overwrite:
33
36
  warning(f"Overwrite enabled for alias '{alias}'")
@@ -55,45 +58,46 @@ class EvalDatasetClient:
55
58
  total=100,
56
59
  )
57
60
  content = {
58
- "dataset_alias": alias,
59
- "project_name": project_name,
60
- "examples": [e.to_dict() for e in dataset.examples],
61
- "traces": [t.model_dump() for t in dataset.traces],
62
- "overwrite": overwrite,
63
- }
61
+ "dataset_alias": alias,
62
+ "project_name": project_name,
63
+ "examples": [e.to_dict() for e in dataset.examples],
64
+ "traces": [t.model_dump() for t in dataset.traces],
65
+ "overwrite": overwrite,
66
+ }
64
67
  try:
65
68
  response = requests.post(
66
- JUDGMENT_DATASETS_PUSH_API_URL,
69
+ JUDGMENT_DATASETS_PUSH_API_URL,
67
70
  json=content,
68
71
  headers={
69
72
  "Content-Type": "application/json",
70
73
  "Authorization": f"Bearer {self.judgment_api_key}",
71
- "X-Organization-Id": self.organization_id
74
+ "X-Organization-Id": self.organization_id,
72
75
  },
73
- verify=True
76
+ verify=True,
74
77
  )
75
78
  if response.status_code != 200:
76
79
  error(f"Server error during push: {response.json()}")
77
80
  raise Exception(f"Server error during push: {response.json()}")
78
81
  response.raise_for_status()
79
- except requests.exceptions.HTTPError as err:
82
+ except exceptions.HTTPError as err:
80
83
  if response.status_code == 422:
81
84
  error(f"Validation error during push: {err.response.json()}")
82
85
  else:
83
86
  error(f"HTTP error during push: {err}")
84
-
87
+
85
88
  info(f"Successfully pushed dataset with alias '{alias}'")
86
89
  payload = response.json()
87
90
  dataset._alias = payload.get("_alias")
88
91
  dataset._id = payload.get("_id")
89
92
  progress.update(
90
- task_id,
91
- description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
92
- )
93
+ task_id,
94
+ description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
95
+ )
93
96
  return True
94
-
95
97
 
96
- def append_examples(self, alias: str, examples: List[Example], project_name: str) -> bool:
98
+ def append_examples(
99
+ self, alias: str, examples: List[Example], project_name: str
100
+ ) -> bool:
97
101
  debug(f"Appending dataset with alias '{alias}'")
98
102
  """
99
103
  Appends the dataset to Judgment platform
@@ -119,37 +123,37 @@ class EvalDatasetClient:
119
123
  total=100,
120
124
  )
121
125
  content = {
122
- "dataset_alias": alias,
123
- "project_name": project_name,
124
- "examples": [e.to_dict() for e in examples],
125
- }
126
+ "dataset_alias": alias,
127
+ "project_name": project_name,
128
+ "examples": [e.to_dict() for e in examples],
129
+ }
126
130
  try:
127
131
  response = requests.post(
128
- JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL,
132
+ JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL,
129
133
  json=content,
130
134
  headers={
131
135
  "Content-Type": "application/json",
132
136
  "Authorization": f"Bearer {self.judgment_api_key}",
133
- "X-Organization-Id": self.organization_id
137
+ "X-Organization-Id": self.organization_id,
134
138
  },
135
- verify=True
139
+ verify=True,
136
140
  )
137
141
  if response.status_code != 200:
138
142
  error(f"Server error during append: {response.json()}")
139
143
  raise Exception(f"Server error during append: {response.json()}")
140
144
  response.raise_for_status()
141
- except requests.exceptions.HTTPError as err:
145
+ except exceptions.HTTPError as err:
142
146
  if response.status_code == 422:
143
147
  error(f"Validation error during append: {err.response.json()}")
144
148
  else:
145
149
  error(f"HTTP error during append: {err}")
146
-
150
+
147
151
  progress.update(
148
- task_id,
149
- description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
150
- )
152
+ task_id,
153
+ description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
154
+ )
151
155
  return True
152
-
156
+
153
157
  def pull(self, alias: str, project_name: str) -> EvalDataset:
154
158
  debug(f"Pulling dataset with alias '{alias}'")
155
159
  """
@@ -171,81 +175,75 @@ class EvalDatasetClient:
171
175
  dataset = self.create_dataset()
172
176
 
173
177
  with Progress(
174
- SpinnerColumn(style="rgb(106,0,255)"),
175
- TextColumn("[progress.description]{task.description}"),
176
- transient=False,
177
- ) as progress:
178
- task_id = progress.add_task(
179
- f"Pulling [rgb(106,0,255)]'{alias}'[/rgb(106,0,255)] from Judgment...",
180
- total=100,
178
+ SpinnerColumn(style="rgb(106,0,255)"),
179
+ TextColumn("[progress.description]{task.description}"),
180
+ transient=False,
181
+ ) as progress:
182
+ task_id = progress.add_task(
183
+ f"Pulling [rgb(106,0,255)]'{alias}'[/rgb(106,0,255)] from Judgment...",
184
+ total=100,
185
+ )
186
+ request_body = {"dataset_alias": alias, "project_name": project_name}
187
+
188
+ try:
189
+ response = requests.post(
190
+ JUDGMENT_DATASETS_PULL_API_URL,
191
+ json=request_body,
192
+ headers={
193
+ "Content-Type": "application/json",
194
+ "Authorization": f"Bearer {self.judgment_api_key}",
195
+ "X-Organization-Id": self.organization_id,
196
+ },
197
+ verify=True,
181
198
  )
182
- request_body = {
183
- "dataset_alias": alias,
184
- "project_name": project_name
185
- }
199
+ response.raise_for_status()
200
+ except exceptions.RequestException as e:
201
+ error(f"Error pulling dataset: {str(e)}")
202
+ raise
186
203
 
187
- try:
188
- response = requests.post(
189
- JUDGMENT_DATASETS_PULL_API_URL,
190
- json=request_body,
191
- headers={
192
- "Content-Type": "application/json",
193
- "Authorization": f"Bearer {self.judgment_api_key}",
194
- "X-Organization-Id": self.organization_id
195
- },
196
- verify=True
197
- )
198
- response.raise_for_status()
199
- except requests.exceptions.RequestException as e:
200
- error(f"Error pulling dataset: {str(e)}")
201
- raise
204
+ info(f"Successfully pulled dataset with alias '{alias}'")
205
+ payload = response.json()
206
+ dataset.examples = [Example(**e) for e in payload.get("examples", [])]
207
+ dataset.traces = [Trace(**t) for t in payload.get("traces", [])]
208
+ dataset._alias = payload.get("alias")
209
+ dataset._id = payload.get("id")
210
+ progress.update(
211
+ task_id,
212
+ description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
213
+ )
202
214
 
203
- info(f"Successfully pulled dataset with alias '{alias}'")
204
- payload = response.json()
205
- dataset.examples = [Example(**e) for e in payload.get("examples", [])]
206
- dataset.traces = [Trace(**t) for t in payload.get("traces", [])]
207
- dataset._alias = payload.get("alias")
208
- dataset._id = payload.get("id")
209
- progress.update(
210
- task_id,
211
- description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
212
- )
215
+ return dataset
213
216
 
214
- return dataset
215
-
216
217
  def delete(self, alias: str, project_name: str) -> bool:
217
218
  with Progress(
218
- SpinnerColumn(style="rgb(106,0,255)"),
219
- TextColumn("[progress.description]{task.description}"),
220
- transient=False,
221
- ) as progress:
222
- task_id = progress.add_task(
223
- f"Deleting [rgb(106,0,255)]'{alias}'[/rgb(106,0,255)] from Judgment...",
224
- total=100,
219
+ SpinnerColumn(style="rgb(106,0,255)"),
220
+ TextColumn("[progress.description]{task.description}"),
221
+ transient=False,
222
+ ) as progress:
223
+ progress.add_task(
224
+ f"Deleting [rgb(106,0,255)]'{alias}'[/rgb(106,0,255)] from Judgment...",
225
+ total=100,
226
+ )
227
+ request_body = {"dataset_alias": alias, "project_name": project_name}
228
+
229
+ try:
230
+ response = requests.post(
231
+ JUDGMENT_DATASETS_DELETE_API_URL,
232
+ json=request_body,
233
+ headers={
234
+ "Content-Type": "application/json",
235
+ "Authorization": f"Bearer {self.judgment_api_key}",
236
+ "X-Organization-Id": self.organization_id,
237
+ },
238
+ verify=True,
225
239
  )
226
- request_body = {
227
- "dataset_alias": alias,
228
- "project_name": project_name
229
- }
240
+ response.raise_for_status()
241
+ except exceptions.RequestException as e:
242
+ error(f"Error deleting dataset: {str(e)}")
243
+ raise
230
244
 
231
- try:
232
- response = requests.post(
233
- JUDGMENT_DATASETS_DELETE_API_URL,
234
- json=request_body,
235
- headers={
236
- "Content-Type": "application/json",
237
- "Authorization": f"Bearer {self.judgment_api_key}",
238
- "X-Organization-Id": self.organization_id
239
- },
240
- verify=True
241
- )
242
- response.raise_for_status()
243
- except requests.exceptions.RequestException as e:
244
- error(f"Error deleting dataset: {str(e)}")
245
- raise
245
+ return True
246
246
 
247
- return True
248
-
249
247
  def pull_project_dataset_stats(self, project_name: str) -> dict:
250
248
  debug(f"Pulling project datasets stats for project_name: {project_name}'")
251
249
  """
@@ -265,91 +263,43 @@ class EvalDatasetClient:
265
263
  # Make a POST request to the Judgment API to get the dataset
266
264
 
267
265
  with Progress(
268
- SpinnerColumn(style="rgb(106,0,255)"),
269
- TextColumn("[progress.description]{task.description}"),
270
- transient=False,
271
- ) as progress:
272
- task_id = progress.add_task(
273
- f"Pulling [rgb(106,0,255)]' datasets'[/rgb(106,0,255)] from Judgment...",
274
- total=100,
275
- )
276
- request_body = {
277
- "project_name": project_name
278
- }
279
-
280
- try:
281
- response = requests.post(
282
- JUDGMENT_DATASETS_PROJECT_STATS_API_URL,
283
- json=request_body,
284
- headers={
285
- "Content-Type": "application/json",
286
- "Authorization": f"Bearer {self.judgment_api_key}",
287
- "X-Organization-Id": self.organization_id
288
- },
289
- verify=True
290
- )
291
- response.raise_for_status()
292
- except requests.exceptions.RequestException as e:
293
- error(f"Error pulling dataset: {str(e)}")
294
- raise
295
-
296
- info(f"Successfully pulled datasets for userid: {self.judgment_api_key}'")
297
- payload = response.json()
298
-
299
- progress.update(
300
- task_id,
301
- description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
302
- )
303
-
304
- return payload
305
-
306
- def insert_dataset(self, alias: str, examples: List[Example], project_name: str) -> bool:
307
- """
308
- Edits the dataset on Judgment platform by adding new examples
309
-
310
- Mock request:
311
- {
312
- "alias": alias,
313
- "examples": [...],
314
- "project_name": project_name
315
- }
316
- """
317
- with Progress(
318
- SpinnerColumn(style="rgb(106,0,255)"),
319
- TextColumn("[progress.description]{task.description}"),
320
- transient=False,
321
- ) as progress:
266
+ SpinnerColumn(style="rgb(106,0,255)"),
267
+ TextColumn("[progress.description]{task.description}"),
268
+ transient=False,
269
+ ) as progress:
322
270
  task_id = progress.add_task(
323
- f"Editing dataset [rgb(106,0,255)]'{alias}'[/rgb(106,0,255)] on Judgment...",
271
+ "Pulling [rgb(106,0,255)]' datasets'[/rgb(106,0,255)] from Judgment...",
324
272
  total=100,
325
273
  )
326
-
327
- content = {
328
- "dataset_alias": alias,
329
- "examples": [e.to_dict() for e in examples],
330
- "project_name": project_name
331
- }
274
+ request_body = {"project_name": project_name}
332
275
 
333
276
  try:
334
277
  response = requests.post(
335
- JUDGMENT_DATASETS_INSERT_API_URL,
336
- json=content,
278
+ JUDGMENT_DATASETS_PROJECT_STATS_API_URL,
279
+ json=request_body,
337
280
  headers={
338
281
  "Content-Type": "application/json",
339
282
  "Authorization": f"Bearer {self.judgment_api_key}",
340
- "X-Organization-Id": self.organization_id
283
+ "X-Organization-Id": self.organization_id,
341
284
  },
342
- verify=True
285
+ verify=True,
343
286
  )
344
287
  response.raise_for_status()
345
- except requests.exceptions.RequestException as e:
346
- error(f"Error editing dataset: {str(e)}")
347
- return False
348
-
349
- info(f"Successfully edited dataset '{alias}'")
350
- return True
288
+ except exceptions.RequestException as e:
289
+ error(f"Error pulling dataset: {str(e)}")
290
+ raise
291
+
292
+ info(f"Successfully pulled datasets for userid: {self.judgment_api_key}'")
293
+ payload = response.json()
294
+
295
+ progress.update(
296
+ task_id,
297
+ description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
298
+ )
351
299
 
352
- def export_jsonl(self, alias: str, project_name: str) -> requests.Response:
300
+ return payload
301
+
302
+ def export_jsonl(self, alias: str, project_name: str) -> Response:
353
303
  """Export dataset in JSONL format from Judgment platform"""
354
304
  debug(f"Exporting dataset with alias '{alias}' as JSONL")
355
305
  with Progress(
@@ -368,13 +318,13 @@ class EvalDatasetClient:
368
318
  headers={
369
319
  "Content-Type": "application/json",
370
320
  "Authorization": f"Bearer {self.judgment_api_key}",
371
- "X-Organization-Id": self.organization_id
321
+ "X-Organization-Id": self.organization_id,
372
322
  },
373
323
  stream=True,
374
- verify=True
324
+ verify=True,
375
325
  )
376
326
  response.raise_for_status()
377
- except requests.exceptions.HTTPError as err:
327
+ except exceptions.HTTPError as err:
378
328
  if err.response.status_code == 404:
379
329
  error(f"Dataset not found: {alias}")
380
330
  else:
@@ -383,11 +333,11 @@ class EvalDatasetClient:
383
333
  except Exception as e:
384
334
  error(f"Error during export: {str(e)}")
385
335
  raise
386
-
336
+
387
337
  info(f"Successfully exported dataset with alias '{alias}'")
388
338
  progress.update(
389
339
  task_id,
390
340
  description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
391
341
  )
392
-
342
+
393
343
  return response
judgeval/data/example.py CHANGED
@@ -2,14 +2,12 @@
2
2
  Classes for representing examples in a dataset.
3
3
  """
4
4
 
5
-
6
5
  from typing import Optional, Any, Dict, List, Union
7
6
  from uuid import uuid4
8
7
  from pydantic import BaseModel, Field, field_validator
9
8
  from enum import Enum
10
9
  from datetime import datetime
11
10
  from judgeval.data.tool import Tool
12
- import time
13
11
 
14
12
 
15
13
  class ExampleParams(Enum):
@@ -38,103 +36,129 @@ class Example(BaseModel):
38
36
  example_index: Optional[int] = None
39
37
  created_at: Optional[str] = None
40
38
  trace_id: Optional[str] = None
41
-
39
+
42
40
  def __init__(self, **data):
43
- if 'example_id' not in data:
44
- data['example_id'] = str(uuid4())
41
+ if "example_id" not in data:
42
+ data["example_id"] = str(uuid4())
45
43
  # Set timestamp if not provided
46
- if 'created_at' not in data:
47
- data['created_at'] = datetime.now().isoformat()
44
+ if "created_at" not in data:
45
+ data["created_at"] = datetime.now().isoformat()
48
46
  super().__init__(**data)
49
-
50
- @field_validator('input', mode='before')
47
+
48
+ @field_validator("input", mode="before")
51
49
  @classmethod
52
50
  def validate_input(cls, v):
53
51
  if v is not None:
54
52
  if not isinstance(v, (str, dict)):
55
- raise ValueError(f"Input must be a string or dictionary but got {v} of type {type(v)}")
56
-
53
+ raise ValueError(
54
+ f"Input must be a string or dictionary but got {v} of type {type(v)}"
55
+ )
56
+
57
57
  # If it's a string, check that it's not empty
58
58
  if isinstance(v, str) and not v:
59
59
  raise ValueError(f"Input string must be non-empty but got '{v}'")
60
-
60
+
61
61
  # If it's a dictionary, check that it's not empty
62
62
  if isinstance(v, dict) and not v:
63
63
  raise ValueError(f"Input dictionary must be non-empty but got {v}")
64
-
64
+
65
65
  return v
66
-
67
- @field_validator('actual_output', mode='before')
66
+
67
+ @field_validator("actual_output", mode="before")
68
68
  @classmethod
69
69
  def validate_actual_output(cls, v):
70
70
  if v is not None:
71
71
  if not isinstance(v, (str, list)):
72
- raise ValueError(f"Actual output must be a string or a list of strings but got {v} of type {type(v)}")
72
+ raise ValueError(
73
+ f"Actual output must be a string or a list of strings but got {v} of type {type(v)}"
74
+ )
73
75
  if isinstance(v, list) and not all(isinstance(item, str) for item in v):
74
- raise ValueError(f"All items in actual_output must be strings but got {v}")
76
+ raise ValueError(
77
+ f"All items in actual_output must be strings but got {v}"
78
+ )
75
79
  return v
76
-
77
- @field_validator('expected_output', mode='before')
80
+
81
+ @field_validator("expected_output", mode="before")
78
82
  @classmethod
79
83
  def validate_expected_output(cls, v):
80
84
  if v is not None and not isinstance(v, (str, list)):
81
- raise ValueError(f"Expected output must be a string, a list of strings, or None but got {v} of type {type(v)}")
85
+ raise ValueError(
86
+ f"Expected output must be a string, a list of strings, or None but got {v} of type {type(v)}"
87
+ )
82
88
  if isinstance(v, list) and not all(isinstance(item, str) for item in v):
83
- raise ValueError(f"All items in expected_output must be strings but got {v}")
89
+ raise ValueError(
90
+ f"All items in expected_output must be strings but got {v}"
91
+ )
84
92
  return v
85
-
86
- @field_validator('expected_tools')
93
+
94
+ @field_validator("expected_tools")
87
95
  @classmethod
88
96
  def validate_expected_tools(cls, v):
89
97
  if v is not None:
90
98
  if not isinstance(v, list):
91
- raise ValueError(f"Expected tools must be a list of Tools or None but got {v} of type {type(v)}")
92
-
99
+ raise ValueError(
100
+ f"Expected tools must be a list of Tools or None but got {v} of type {type(v)}"
101
+ )
102
+
93
103
  # Check that each item in the list is a Tool
94
104
  for i, item in enumerate(v):
95
105
  if not isinstance(item, Tool):
96
- raise ValueError(f"Expected tools must be a list of Tools, but item at index {i} is {item} of type {type(item)}")
97
-
106
+ raise ValueError(
107
+ f"Expected tools must be a list of Tools, but item at index {i} is {item} of type {type(item)}"
108
+ )
109
+
98
110
  return v
99
-
100
- @field_validator('context', 'retrieval_context', 'tools_called', mode='before')
111
+
112
+ @field_validator("context", "retrieval_context", "tools_called", mode="before")
101
113
  @classmethod
102
114
  def validate_string_lists(cls, v, info):
103
115
  field_name = info.field_name
104
116
  if v is not None:
105
117
  if not isinstance(v, list):
106
- raise ValueError(f"{field_name} must be a list of strings or None but got {v} of type {type(v)}")
118
+ raise ValueError(
119
+ f"{field_name} must be a list of strings or None but got {v} of type {type(v)}"
120
+ )
107
121
  for i, item in enumerate(v):
108
122
  if not isinstance(item, str):
109
- raise ValueError(f"All items in {field_name} must be strings but item at index {i} is {item} of type {type(item)}")
123
+ raise ValueError(
124
+ f"All items in {field_name} must be strings but item at index {i} is {item} of type {type(item)}"
125
+ )
110
126
  return v
111
-
112
- @field_validator('additional_metadata', mode='before')
127
+
128
+ @field_validator("additional_metadata", mode="before")
113
129
  @classmethod
114
130
  def validate_additional_metadata(cls, v):
115
131
  if v is not None and not isinstance(v, dict):
116
- raise ValueError(f"Additional metadata must be a dictionary or None but got {v} of type {type(v)}")
132
+ raise ValueError(
133
+ f"Additional metadata must be a dictionary or None but got {v} of type {type(v)}"
134
+ )
117
135
  return v
118
-
119
- @field_validator('example_index', mode='before')
136
+
137
+ @field_validator("example_index", mode="before")
120
138
  @classmethod
121
139
  def validate_example_index(cls, v):
122
140
  if v is not None and not isinstance(v, int):
123
- raise ValueError(f"Example index must be an integer or None but got {v} of type {type(v)}")
141
+ raise ValueError(
142
+ f"Example index must be an integer or None but got {v} of type {type(v)}"
143
+ )
124
144
  return v
125
-
126
- @field_validator('created_at', mode='before')
145
+
146
+ @field_validator("created_at", mode="before")
127
147
  @classmethod
128
148
  def validate_created_at(cls, v):
129
149
  if v is not None and not isinstance(v, str):
130
- raise ValueError(f"Timestamp must be a string or None but got {v} of type {type(v)}")
150
+ raise ValueError(
151
+ f"Timestamp must be a string or None but got {v} of type {type(v)}"
152
+ )
131
153
  return v
132
-
133
- @field_validator('trace_id', mode='before')
154
+
155
+ @field_validator("trace_id", mode="before")
134
156
  @classmethod
135
157
  def validate_trace_id(cls, v):
136
158
  if v is not None and not isinstance(v, str):
137
- raise ValueError(f"Trace ID must be a string or None but got {v} of type {type(v)}")
159
+ raise ValueError(
160
+ f"Trace ID must be a string or None but got {v} of type {type(v)}"
161
+ )
138
162
  return v
139
163
 
140
164
  def to_dict(self):