judgeval 0.0.54__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. judgeval/common/api/__init__.py +3 -0
  2. judgeval/common/api/api.py +352 -0
  3. judgeval/common/api/constants.py +165 -0
  4. judgeval/common/storage/__init__.py +6 -0
  5. judgeval/common/tracer/__init__.py +31 -0
  6. judgeval/common/tracer/constants.py +22 -0
  7. judgeval/common/tracer/core.py +1916 -0
  8. judgeval/common/tracer/otel_exporter.py +108 -0
  9. judgeval/common/tracer/otel_span_processor.py +234 -0
  10. judgeval/common/tracer/span_processor.py +37 -0
  11. judgeval/common/tracer/span_transformer.py +211 -0
  12. judgeval/common/tracer/trace_manager.py +92 -0
  13. judgeval/common/utils.py +2 -2
  14. judgeval/constants.py +3 -30
  15. judgeval/data/datasets/eval_dataset_client.py +29 -156
  16. judgeval/data/judgment_types.py +4 -12
  17. judgeval/data/result.py +1 -1
  18. judgeval/data/scorer_data.py +2 -2
  19. judgeval/data/scripts/openapi_transform.py +1 -1
  20. judgeval/data/trace.py +66 -1
  21. judgeval/data/trace_run.py +0 -3
  22. judgeval/evaluation_run.py +0 -2
  23. judgeval/integrations/langgraph.py +43 -164
  24. judgeval/judgment_client.py +17 -211
  25. judgeval/run_evaluation.py +209 -611
  26. judgeval/scorers/__init__.py +2 -6
  27. judgeval/scorers/base_scorer.py +4 -23
  28. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +3 -3
  29. judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +215 -0
  30. judgeval/scorers/score.py +2 -1
  31. judgeval/scorers/utils.py +1 -13
  32. judgeval/utils/requests.py +21 -0
  33. judgeval-0.1.0.dist-info/METADATA +202 -0
  34. {judgeval-0.0.54.dist-info → judgeval-0.1.0.dist-info}/RECORD +37 -29
  35. judgeval/common/tracer.py +0 -3215
  36. judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py +0 -73
  37. judgeval/scorers/judgeval_scorers/classifiers/__init__.py +0 -3
  38. judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py +0 -3
  39. judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +0 -53
  40. judgeval-0.0.54.dist-info/METADATA +0 -1384
  41. /judgeval/common/{s3_storage.py → storage/s3_storage.py} +0 -0
  42. {judgeval-0.0.54.dist-info → judgeval-0.1.0.dist-info}/WHEEL +0 -0
  43. {judgeval-0.0.54.dist-info → judgeval-0.1.0.dist-info}/licenses/LICENSE.md +0 -0
judgeval/common/utils.py CHANGED
@@ -13,6 +13,7 @@ import asyncio
13
13
  import concurrent.futures
14
14
  import os
15
15
  from types import TracebackType
16
+ from judgeval.common.api.constants import ROOT_API
16
17
  from judgeval.utils.requests import requests
17
18
  import pprint
18
19
  from typing import Any, Dict, List, Mapping, Optional, TypeAlias, Union, TypeGuard
@@ -27,7 +28,6 @@ from judgeval.clients import async_together_client, together_client
27
28
  from judgeval.constants import (
28
29
  ACCEPTABLE_MODELS,
29
30
  MAX_WORKER_THREADS,
30
- ROOT_API,
31
31
  TOGETHER_SUPPORTED_MODELS,
32
32
  LITELLM_SUPPORTED_MODELS,
33
33
  )
@@ -128,7 +128,7 @@ def validate_api_key(judgment_api_key: str):
128
128
  "Content-Type": "application/json",
129
129
  "Authorization": f"Bearer {judgment_api_key}",
130
130
  },
131
- json={}, # Empty body now
131
+ json={},
132
132
  verify=True,
133
133
  )
134
134
  if response.status_code == 200:
judgeval/constants.py CHANGED
@@ -39,36 +39,6 @@ UNBOUNDED_SCORERS: set[APIScorerType] = (
39
39
  set()
40
40
  ) # scorers whose scores are not bounded between 0-1
41
41
 
42
- ROOT_API = os.getenv("JUDGMENT_API_URL", "https://api.judgmentlabs.ai")
43
- # API URLs
44
- JUDGMENT_EVAL_API_URL = f"{ROOT_API}/evaluate/"
45
- JUDGMENT_TRACE_EVAL_API_URL = f"{ROOT_API}/evaluate_trace/"
46
- JUDGMENT_DATASETS_PUSH_API_URL = f"{ROOT_API}/datasets/push/"
47
- JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL = f"{ROOT_API}/datasets/insert_examples/"
48
- JUDGMENT_DATASETS_PULL_API_URL = f"{ROOT_API}/datasets/pull_for_judgeval/"
49
- JUDGMENT_DATASETS_DELETE_API_URL = f"{ROOT_API}/datasets/delete/"
50
- JUDGMENT_DATASETS_EXPORT_JSONL_API_URL = f"{ROOT_API}/datasets/export_jsonl/"
51
- JUDGMENT_DATASETS_PROJECT_STATS_API_URL = f"{ROOT_API}/datasets/fetch_stats_by_project/"
52
- JUDGMENT_DATASETS_INSERT_API_URL = f"{ROOT_API}/datasets/insert_examples/"
53
- JUDGMENT_EVAL_LOG_API_URL = f"{ROOT_API}/log_eval_results/"
54
- JUDGMENT_EVAL_FETCH_API_URL = f"{ROOT_API}/fetch_experiment_run/"
55
- JUDGMENT_EVAL_DELETE_API_URL = (
56
- f"{ROOT_API}/delete_eval_results_by_project_and_run_names/"
57
- )
58
- JUDGMENT_EVAL_DELETE_PROJECT_API_URL = f"{ROOT_API}/delete_eval_results_by_project/"
59
- JUDGMENT_PROJECT_DELETE_API_URL = f"{ROOT_API}/projects/delete/"
60
- JUDGMENT_PROJECT_CREATE_API_URL = f"{ROOT_API}/projects/add/"
61
- JUDGMENT_TRACES_FETCH_API_URL = f"{ROOT_API}/traces/fetch/"
62
- JUDGMENT_TRACES_SAVE_API_URL = f"{ROOT_API}/traces/save/"
63
- JUDGMENT_TRACES_UPSERT_API_URL = f"{ROOT_API}/traces/upsert/"
64
- JUDGMENT_TRACES_DELETE_API_URL = f"{ROOT_API}/traces/delete/"
65
- JUDGMENT_TRACES_ADD_ANNOTATION_API_URL = f"{ROOT_API}/traces/add_annotation/"
66
- JUDGMENT_TRACES_SPANS_BATCH_API_URL = f"{ROOT_API}/traces/spans/batch/"
67
- JUDGMENT_TRACES_EVALUATION_RUNS_BATCH_API_URL = (
68
- f"{ROOT_API}/traces/evaluation_runs/batch/"
69
- )
70
- JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL = f"{ROOT_API}/add_to_run_eval_queue/"
71
- JUDGMENT_GET_EVAL_STATUS_API_URL = f"{ROOT_API}/get_evaluation_status/"
72
42
  # RabbitMQ
73
43
  RABBITMQ_HOST = os.getenv(
74
44
  "RABBITMQ_HOST", "rabbitmq-networklb-faa155df16ec9085.elb.us-west-1.amazonaws.com"
@@ -145,3 +115,6 @@ MAX_WORKER_THREADS = 10
145
115
 
146
116
  # Maximum number of concurrent operations for evaluation runs
147
117
  MAX_CONCURRENT_EVALUATIONS = 50 # Adjust based on system capabilities
118
+
119
+ # Span lifecycle management
120
+ SPAN_LIFECYCLE_END_UPDATE_ID = 20 # Default ending number for completed spans
@@ -1,27 +1,17 @@
1
1
  from typing import Optional, List
2
- from requests import Response, exceptions
3
- from judgeval.utils.requests import requests
4
2
  from rich.progress import Progress, SpinnerColumn, TextColumn
5
3
  from judgeval.common.logger import judgeval_logger
6
- from judgeval.constants import (
7
- JUDGMENT_DATASETS_PUSH_API_URL,
8
- JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL,
9
- JUDGMENT_DATASETS_PULL_API_URL,
10
- JUDGMENT_DATASETS_PROJECT_STATS_API_URL,
11
- JUDGMENT_DATASETS_DELETE_API_URL,
12
- JUDGMENT_DATASETS_EXPORT_JSONL_API_URL,
13
- )
4
+ from judgeval.common.api import JudgmentApiClient
14
5
  from judgeval.data import Example, Trace
15
6
  from judgeval.data.datasets import EvalDataset
16
7
 
17
8
 
18
9
  class EvalDatasetClient:
19
10
  def __init__(self, judgment_api_key: str, organization_id: str):
20
- self.judgment_api_key = judgment_api_key
21
- self.organization_id = organization_id
11
+ self.api_client = JudgmentApiClient(judgment_api_key, organization_id)
22
12
 
23
13
  def create_dataset(self) -> EvalDataset:
24
- return EvalDataset(judgment_api_key=self.judgment_api_key)
14
+ return EvalDataset(judgment_api_key=self.api_client.api_key)
25
15
 
26
16
  def push(
27
17
  self,
@@ -55,39 +45,17 @@ class EvalDatasetClient:
55
45
  f"Pushing [rgb(106,0,255)]'{alias}' to Judgment...",
56
46
  total=100,
57
47
  )
58
- content = {
59
- "dataset_alias": alias,
60
- "project_name": project_name,
61
- "examples": [e.to_dict() for e in dataset.examples],
62
- "traces": [t.model_dump() for t in dataset.traces],
63
- "overwrite": overwrite,
64
- }
65
48
  try:
66
- response = requests.post(
67
- JUDGMENT_DATASETS_PUSH_API_URL,
68
- json=content,
69
- headers={
70
- "Content-Type": "application/json",
71
- "Authorization": f"Bearer {self.judgment_api_key}",
72
- "X-Organization-Id": self.organization_id,
73
- },
74
- verify=True,
49
+ payload = self.api_client.push_dataset(
50
+ dataset_alias=alias,
51
+ project_name=project_name,
52
+ examples=[e.to_dict() for e in dataset.examples],
53
+ traces=[t.model_dump() for t in dataset.traces],
54
+ overwrite=overwrite or False,
75
55
  )
76
- if response.status_code != 200:
77
- judgeval_logger.error(
78
- f"Server error during push: {response.json()}"
79
- )
80
- raise Exception(f"Server error during push: {response.json()}")
81
- response.raise_for_status()
82
- except exceptions.HTTPError as err:
83
- if response.status_code == 422:
84
- judgeval_logger.error(
85
- f"Validation error during push: {err.response.json()}"
86
- )
87
- else:
88
- judgeval_logger.error(f"HTTP error during push: {err}")
89
-
90
- payload = response.json()
56
+ except Exception as e:
57
+ judgeval_logger.error(f"Error during push: {e}")
58
+ raise
91
59
  dataset._alias = payload.get("_alias")
92
60
  dataset._id = payload.get("_id")
93
61
  progress.update(
@@ -122,35 +90,15 @@ class EvalDatasetClient:
122
90
  f"Appending [rgb(106,0,255)]'{alias}' to Judgment...",
123
91
  total=100,
124
92
  )
125
- content = {
126
- "dataset_alias": alias,
127
- "project_name": project_name,
128
- "examples": [e.to_dict() for e in examples],
129
- }
130
93
  try:
131
- response = requests.post(
132
- JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL,
133
- json=content,
134
- headers={
135
- "Content-Type": "application/json",
136
- "Authorization": f"Bearer {self.judgment_api_key}",
137
- "X-Organization-Id": self.organization_id,
138
- },
139
- verify=True,
94
+ self.api_client.append_examples(
95
+ dataset_alias=alias,
96
+ project_name=project_name,
97
+ examples=[e.to_dict() for e in examples],
140
98
  )
141
- if response.status_code != 200:
142
- judgeval_logger.error(
143
- f"Server error during append: {response.json()}"
144
- )
145
- raise Exception(f"Server error during append: {response.json()}")
146
- response.raise_for_status()
147
- except exceptions.HTTPError as err:
148
- if response.status_code == 422:
149
- judgeval_logger.error(
150
- f"Validation error during append: {err.response.json()}"
151
- )
152
- else:
153
- judgeval_logger.error(f"HTTP error during append: {err}")
99
+ except Exception as e:
100
+ judgeval_logger.error(f"Error during append: {e}")
101
+ raise
154
102
 
155
103
  progress.update(
156
104
  task_id,
@@ -186,25 +134,14 @@ class EvalDatasetClient:
186
134
  f"Pulling [rgb(106,0,255)]'{alias}'[/rgb(106,0,255)] from Judgment...",
187
135
  total=100,
188
136
  )
189
- request_body = {"dataset_alias": alias, "project_name": project_name}
190
-
191
137
  try:
192
- response = requests.post(
193
- JUDGMENT_DATASETS_PULL_API_URL,
194
- json=request_body,
195
- headers={
196
- "Content-Type": "application/json",
197
- "Authorization": f"Bearer {self.judgment_api_key}",
198
- "X-Organization-Id": self.organization_id,
199
- },
200
- verify=True,
138
+ payload = self.api_client.pull_dataset(
139
+ dataset_alias=alias,
140
+ project_name=project_name,
201
141
  )
202
- response.raise_for_status()
203
- except exceptions.RequestException as e:
142
+ except Exception as e:
204
143
  judgeval_logger.error(f"Error pulling dataset: {str(e)}")
205
144
  raise
206
-
207
- payload = response.json()
208
145
  dataset.examples = [Example(**e) for e in payload.get("examples", [])]
209
146
  dataset.traces = [Trace(**t) for t in payload.get("traces", [])]
210
147
  dataset._alias = payload.get("alias")
@@ -226,21 +163,12 @@ class EvalDatasetClient:
226
163
  f"Deleting [rgb(106,0,255)]'{alias}'[/rgb(106,0,255)] from Judgment...",
227
164
  total=100,
228
165
  )
229
- request_body = {"dataset_alias": alias, "project_name": project_name}
230
-
231
166
  try:
232
- response = requests.post(
233
- JUDGMENT_DATASETS_DELETE_API_URL,
234
- json=request_body,
235
- headers={
236
- "Content-Type": "application/json",
237
- "Authorization": f"Bearer {self.judgment_api_key}",
238
- "X-Organization-Id": self.organization_id,
239
- },
240
- verify=True,
167
+ self.api_client.delete_dataset(
168
+ dataset_alias=alias,
169
+ project_name=project_name,
241
170
  )
242
- response.raise_for_status()
243
- except exceptions.RequestException as e:
171
+ except Exception as e:
244
172
  judgeval_logger.error(f"Error deleting dataset: {str(e)}")
245
173
  raise
246
174
 
@@ -272,70 +200,15 @@ class EvalDatasetClient:
272
200
  "Pulling [rgb(106,0,255)]' datasets'[/rgb(106,0,255)] from Judgment...",
273
201
  total=100,
274
202
  )
275
- request_body = {"project_name": project_name}
276
-
277
203
  try:
278
- response = requests.post(
279
- JUDGMENT_DATASETS_PROJECT_STATS_API_URL,
280
- json=request_body,
281
- headers={
282
- "Content-Type": "application/json",
283
- "Authorization": f"Bearer {self.judgment_api_key}",
284
- "X-Organization-Id": self.organization_id,
285
- },
286
- verify=True,
287
- )
288
- response.raise_for_status()
289
- except exceptions.RequestException as e:
204
+ payload = self.api_client.get_project_dataset_stats(project_name)
205
+ except Exception as e:
290
206
  judgeval_logger.error(f"Error pulling dataset: {str(e)}")
291
207
  raise
292
208
 
293
- payload = response.json()
294
-
295
209
  progress.update(
296
210
  task_id,
297
211
  description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
298
212
  )
299
213
 
300
214
  return payload
301
-
302
- def export_jsonl(self, alias: str, project_name: str) -> Response:
303
- """Export dataset in JSONL format from Judgment platform"""
304
- with Progress(
305
- SpinnerColumn(style="rgb(106,0,255)"),
306
- TextColumn("[progress.description]{task.description}"),
307
- transient=False,
308
- ) as progress:
309
- task_id = progress.add_task(
310
- f"Exporting [rgb(106,0,255)]'{alias}'[/rgb(106,0,255)] as JSONL...",
311
- total=100,
312
- )
313
- try:
314
- response = requests.post(
315
- JUDGMENT_DATASETS_EXPORT_JSONL_API_URL,
316
- json={"dataset_alias": alias, "project_name": project_name},
317
- headers={
318
- "Content-Type": "application/json",
319
- "Authorization": f"Bearer {self.judgment_api_key}",
320
- "X-Organization-Id": self.organization_id,
321
- },
322
- stream=True,
323
- verify=True,
324
- )
325
- response.raise_for_status()
326
- except exceptions.HTTPError as err:
327
- if err.response.status_code == 404:
328
- judgeval_logger.error(f"Dataset not found: {alias}")
329
- else:
330
- judgeval_logger.error(f"HTTP error during export: {err}")
331
- raise
332
- except Exception as e:
333
- judgeval_logger.error(f"Error during export: {str(e)}")
334
- raise
335
-
336
- progress.update(
337
- task_id,
338
- description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
339
- )
340
-
341
- return response
@@ -1,6 +1,6 @@
1
1
  # generated by datamodel-codegen:
2
2
  # filename: openapi_new.json
3
- # timestamp: 2025-07-12T17:11:33+00:00
3
+ # timestamp: 2025-07-17T03:14:16+00:00
4
4
 
5
5
  from __future__ import annotations
6
6
 
@@ -94,9 +94,6 @@ class TraceSpanJudgmentType(BaseModel):
94
94
  output: Annotated[Any, Field(title="Output")] = None
95
95
  usage: Optional[TraceUsageJudgmentType] = None
96
96
  duration: Annotated[Optional[float], Field(title="Duration")] = None
97
- annotation: Annotated[Optional[List[Dict[str, Any]]], Field(title="Annotation")] = (
98
- None
99
- )
100
97
  expected_tools: Annotated[
101
98
  Optional[List[ToolJudgmentType]], Field(title="Expected Tools")
102
99
  ] = None
@@ -176,6 +173,7 @@ class ScoringResultJudgmentType(BaseModel):
176
173
  ] = None
177
174
  trace_id: Annotated[Optional[str], Field(title="Trace Id")] = None
178
175
  run_duration: Annotated[Optional[float], Field(title="Run Duration")] = None
176
+ evaluation_cost: Annotated[Optional[float], Field(title="Evaluation Cost")] = None
179
177
 
180
178
 
181
179
  class TraceRunJudgmentType(BaseModel):
@@ -184,11 +182,8 @@ class TraceRunJudgmentType(BaseModel):
184
182
  traces: Annotated[List[TraceJudgmentType], Field(title="Traces")]
185
183
  scorers: Annotated[List[ScorerConfigJudgmentType], Field(title="Scorers")]
186
184
  model: Annotated[str, Field(title="Model")]
187
- judgment_api_key: Annotated[Optional[str], Field(title="Judgment Api Key")] = None
188
185
  append: Annotated[Optional[bool], Field(title="Append")] = False
189
- override_existing_eval_run_name: Annotated[
190
- Optional[bool], Field(title="Override Existing Eval Run Name")
191
- ] = False
186
+ override: Annotated[Optional[bool], Field(title="Override")] = False
192
187
  trace_span_id: Annotated[Optional[str], Field(title="Trace Span Id")] = None
193
188
  tools: Annotated[Optional[List[Dict[str, Any]]], Field(title="Tools")] = None
194
189
 
@@ -199,11 +194,8 @@ class JudgmentEvalJudgmentType(BaseModel):
199
194
  examples: Annotated[List[ExampleJudgmentType], Field(title="Examples")]
200
195
  scorers: Annotated[List[ScorerConfigJudgmentType], Field(title="Scorers")]
201
196
  model: Annotated[str, Field(title="Model")]
202
- judgment_api_key: Annotated[Optional[str], Field(title="Judgment Api Key")] = None
203
197
  append: Annotated[Optional[bool], Field(title="Append")] = False
204
- override_existing_eval_run_name: Annotated[
205
- Optional[bool], Field(title="Override Existing Eval Run Name")
206
- ] = False
198
+ override: Annotated[Optional[bool], Field(title="Override")] = False
207
199
  trace_span_id: Annotated[Optional[str], Field(title="Trace Span Id")] = None
208
200
 
209
201
 
judgeval/data/result.py CHANGED
@@ -30,7 +30,7 @@ class ScoringResult(ScoringResultJudgmentType):
30
30
  def __str__(self) -> str:
31
31
  return f"ScoringResult(\
32
32
  success={self.success}, \
33
- scorer_data={self.scorers_data}, \
33
+ scorers_data={self.scorers_data}, \
34
34
  data_object={self.data_object}, \
35
35
  run_duration={self.run_duration})"
36
36
 
@@ -54,7 +54,7 @@ def create_scorer_data(scorer: BaseScorer) -> List[ScorerData]:
54
54
  reason=scorer.reason,
55
55
  success=scorer.success,
56
56
  strict_mode=scorer.strict_mode,
57
- evaluation_model=scorer.evaluation_model,
57
+ evaluation_model=scorer.model,
58
58
  error=scorer.error,
59
59
  additional_metadata=scorer.additional_metadata,
60
60
  )
@@ -68,7 +68,7 @@ def create_scorer_data(scorer: BaseScorer) -> List[ScorerData]:
68
68
  reason=scorer.internal_scorer.reason,
69
69
  success=scorer.internal_scorer.success,
70
70
  strict_mode=scorer.internal_scorer.strict_mode,
71
- evaluation_model=scorer.internal_scorer.evaluation_model,
71
+ evaluation_model=scorer.internal_scorer.model,
72
72
  error=scorer.internal_scorer.error,
73
73
  additional_metadata=scorer.internal_scorer.additional_metadata,
74
74
  )
@@ -1,7 +1,7 @@
1
1
  import json
2
2
  import sys
3
3
  from typing import Any, Dict, Generator, List
4
- import requests
4
+ from judgeval.utils.requests import requests
5
5
 
6
6
  spec_file = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:8000/openapi.json"
7
7
 
judgeval/data/trace.py CHANGED
@@ -8,6 +8,7 @@ from judgeval.data.judgment_types import (
8
8
  TraceSpanJudgmentType,
9
9
  TraceJudgmentType,
10
10
  )
11
+ from judgeval.constants import SPAN_LIFECYCLE_END_UPDATE_ID
11
12
  from pydantic import BaseModel
12
13
 
13
14
 
@@ -55,6 +56,22 @@ class TraceSpan(TraceSpanJudgmentType):
55
56
  self.update_id += 1
56
57
  return self.update_id
57
58
 
59
+ def set_update_id_to_ending_number(
60
+ self, ending_number: int = SPAN_LIFECYCLE_END_UPDATE_ID
61
+ ) -> int:
62
+ """
63
+ Thread-safe method to set the update_id to a predetermined ending number.
64
+
65
+ Args:
66
+ ending_number (int): The number to set update_id to. Defaults to SPAN_LIFECYCLE_END_UPDATE_ID.
67
+
68
+ Returns:
69
+ int: The new update_id value after setting
70
+ """
71
+ with self._update_id_lock:
72
+ self.update_id = ending_number
73
+ return self.update_id
74
+
58
75
  def print_span(self):
59
76
  """Print the span with proper formatting and parent relationship information."""
60
77
  indent = " " * self.depth
@@ -73,8 +90,56 @@ class TraceSpan(TraceSpanJudgmentType):
73
90
 
74
91
  def safe_stringify(self, output, function_name):
75
92
  """
76
- Safely converts an object to a string or repr, handling serialization issues gracefully.
93
+ Safely converts an object to a JSON-serializable structure, handling common object types intelligently.
77
94
  """
95
+ # Handle Pydantic models
96
+ if hasattr(output, "model_dump"):
97
+ try:
98
+ return output.model_dump()
99
+ except Exception:
100
+ pass
101
+
102
+ # Handle LangChain messages and similar objects with content/type
103
+ if hasattr(output, "content") and hasattr(output, "type"):
104
+ try:
105
+ result = {"type": output.type, "content": output.content}
106
+ # Add additional fields if they exist
107
+ if hasattr(output, "additional_kwargs"):
108
+ result["additional_kwargs"] = output.additional_kwargs
109
+ if hasattr(output, "response_metadata"):
110
+ result["response_metadata"] = output.response_metadata
111
+ if hasattr(output, "name"):
112
+ result["name"] = output.name
113
+ return result
114
+ except Exception:
115
+ pass
116
+
117
+ if hasattr(output, "dict"):
118
+ try:
119
+ return output.dict()
120
+ except Exception:
121
+ pass
122
+
123
+ if hasattr(output, "to_dict"):
124
+ try:
125
+ return output.to_dict()
126
+ except Exception:
127
+ pass
128
+
129
+ if hasattr(output, "__dataclass_fields__"):
130
+ try:
131
+ import dataclasses
132
+
133
+ return dataclasses.asdict(output)
134
+ except Exception:
135
+ pass
136
+
137
+ if hasattr(output, "__dict__"):
138
+ try:
139
+ return output.__dict__
140
+ except Exception:
141
+ pass
142
+
78
143
  try:
79
144
  return str(output)
80
145
  except (TypeError, OverflowError, ValueError):
@@ -16,7 +16,6 @@ class TraceRun(BaseModel):
16
16
  scorers (List[Union[JudgmentScorer, BaseScorer]]): A list of scorers to use for evaluation
17
17
  model (str): The model used as a judge when using LLM as a Judge
18
18
  metadata (Optional[Dict[str, Any]]): Additional metadata to include for this evaluation run, e.g. comments, dataset name, purpose, etc.
19
- judgment_api_key (Optional[str]): The API key for running evaluations on the Judgment API
20
19
  rules (Optional[List[Rule]]): Rules to evaluate against scoring results
21
20
  append (Optional[bool]): Whether to append to existing evaluation results
22
21
  tools (Optional[List[Dict[str, Any]]]): List of tools to use for evaluation
@@ -30,8 +29,6 @@ class TraceRun(BaseModel):
30
29
  model: Optional[str] = "gpt-4.1"
31
30
  trace_span_id: Optional[str] = None
32
31
  append: Optional[bool] = False
33
- # API Key will be "" until user calls client.run_eval(), then API Key will be set
34
- judgment_api_key: Optional[str] = ""
35
32
  override: Optional[bool] = False
36
33
  rules: Optional[List[Rule]] = None
37
34
  tools: Optional[List[Dict[str, Any]]] = None
@@ -17,7 +17,6 @@ class EvaluationRun(BaseModel):
17
17
  scorers (List[Union[JudgmentScorer, BaseScorer]]): A list of scorers to use for evaluation
18
18
  model (str): The model used as a judge when using LLM as a Judge
19
19
  metadata (Optional[Dict[str, Any]]): Additional metadata to include for this evaluation run, e.g. comments, dataset name, purpose, etc.
20
- judgment_api_key (Optional[str]): The API key for running evaluations on the Judgment API
21
20
  """
22
21
 
23
22
  organization_id: Optional[str] = None
@@ -28,7 +27,6 @@ class EvaluationRun(BaseModel):
28
27
  model: Optional[str] = "gpt-4.1"
29
28
  trace_span_id: Optional[str] = None
30
29
  # API Key will be "" until user calls client.run_eval(), then API Key will be set
31
- judgment_api_key: Optional[str] = ""
32
30
  override: Optional[bool] = False
33
31
  append: Optional[bool] = False
34
32