judgeval 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval/cli.py +65 -0
- judgeval/common/api/api.py +44 -38
- judgeval/common/api/constants.py +18 -5
- judgeval/common/api/json_encoder.py +8 -9
- judgeval/common/tracer/core.py +278 -256
- judgeval/common/tracer/otel_span_processor.py +1 -1
- judgeval/common/tracer/span_processor.py +1 -1
- judgeval/common/tracer/span_transformer.py +2 -1
- judgeval/data/evaluation_run.py +104 -0
- judgeval/data/judgment_types.py +37 -8
- judgeval/data/trace.py +1 -0
- judgeval/data/trace_run.py +0 -2
- judgeval/integrations/langgraph.py +2 -1
- judgeval/judgment_client.py +102 -47
- judgeval/local_eval_queue.py +3 -5
- judgeval/run_evaluation.py +33 -192
- judgeval/scorers/base_scorer.py +9 -10
- judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +17 -3
- {judgeval-0.5.0.dist-info → judgeval-0.6.0.dist-info}/METADATA +3 -1
- {judgeval-0.5.0.dist-info → judgeval-0.6.0.dist-info}/RECORD +23 -21
- judgeval-0.6.0.dist-info/entry_points.txt +2 -0
- judgeval/evaluation_run.py +0 -80
- {judgeval-0.5.0.dist-info → judgeval-0.6.0.dist-info}/WHEEL +0 -0
- {judgeval-0.5.0.dist-info → judgeval-0.6.0.dist-info}/licenses/LICENSE.md +0 -0
judgeval/common/tracer/core.py
CHANGED
@@ -45,7 +45,7 @@ from judgeval.common.tracer.trace_manager import TraceManagerClient
|
|
45
45
|
|
46
46
|
from judgeval.data import Example, Trace, TraceSpan, TraceUsage
|
47
47
|
from judgeval.scorers import APIScorerConfig, BaseScorer
|
48
|
-
from judgeval.evaluation_run import EvaluationRun
|
48
|
+
from judgeval.data.evaluation_run import EvaluationRun
|
49
49
|
from judgeval.local_eval_queue import LocalEvaluationQueue
|
50
50
|
from judgeval.common.api import JudgmentApiClient
|
51
51
|
from judgeval.common.utils import OptExcInfo, validate_api_key
|
@@ -183,8 +183,10 @@ class TraceClient:
|
|
183
183
|
eval_run_name = (
|
184
184
|
f"{self.name.capitalize()}-{span_id}-{scorer.score_type.capitalize()}"
|
185
185
|
)
|
186
|
-
|
187
|
-
|
186
|
+
hosted_scoring = isinstance(scorer, APIScorerConfig) or (
|
187
|
+
isinstance(scorer, BaseScorer) and scorer.server_hosted
|
188
|
+
)
|
189
|
+
if hosted_scoring:
|
188
190
|
eval_run = EvaluationRun(
|
189
191
|
organization_id=self.tracer.organization_id,
|
190
192
|
project_name=self.project_name,
|
@@ -203,7 +205,7 @@ class TraceClient:
|
|
203
205
|
self.otel_span_processor.queue_evaluation_run(
|
204
206
|
eval_run, span_id=span_id, span_data=current_span
|
205
207
|
)
|
206
|
-
|
208
|
+
else:
|
207
209
|
# Handle custom scorers using local evaluation queue
|
208
210
|
eval_run = EvaluationRun(
|
209
211
|
organization_id=self.tracer.organization_id,
|
@@ -212,9 +214,7 @@ class TraceClient:
|
|
212
214
|
examples=[example],
|
213
215
|
scorers=[scorer],
|
214
216
|
model=model,
|
215
|
-
judgment_api_key=self.tracer.api_key,
|
216
217
|
trace_span_id=span_id,
|
217
|
-
trace_id=self.trace_id,
|
218
218
|
)
|
219
219
|
|
220
220
|
self.add_eval_run(eval_run, start_time)
|
@@ -251,6 +251,14 @@ class TraceClient:
|
|
251
251
|
|
252
252
|
self.otel_span_processor.queue_span_update(span, span_state="agent_name")
|
253
253
|
|
254
|
+
def record_class_name(self, class_name: str):
|
255
|
+
current_span_id = self.get_current_span()
|
256
|
+
if current_span_id:
|
257
|
+
span = self.span_id_to_span[current_span_id]
|
258
|
+
span.class_name = class_name
|
259
|
+
|
260
|
+
self.otel_span_processor.queue_span_update(span, span_state="class_name")
|
261
|
+
|
254
262
|
def record_state_before(self, state: dict):
|
255
263
|
"""Records the agent's state before a tool execution on the current span.
|
256
264
|
|
@@ -277,35 +285,13 @@ class TraceClient:
|
|
277
285
|
|
278
286
|
self.otel_span_processor.queue_span_update(span, span_state="state_after")
|
279
287
|
|
280
|
-
async def _update_coroutine(self, span: TraceSpan, coroutine: Any, field: str):
|
281
|
-
"""Helper method to update the output of a trace entry once the coroutine completes"""
|
282
|
-
try:
|
283
|
-
result = await coroutine
|
284
|
-
setattr(span, field, result)
|
285
|
-
|
286
|
-
if field == "output":
|
287
|
-
self.otel_span_processor.queue_span_update(span, span_state="output")
|
288
|
-
|
289
|
-
return result
|
290
|
-
except Exception as e:
|
291
|
-
setattr(span, field, f"Error: {str(e)}")
|
292
|
-
|
293
|
-
if field == "output":
|
294
|
-
self.otel_span_processor.queue_span_update(span, span_state="output")
|
295
|
-
|
296
|
-
raise
|
297
|
-
|
298
288
|
def record_output(self, output: Any):
|
299
289
|
current_span_id = self.get_current_span()
|
300
290
|
if current_span_id:
|
301
291
|
span = self.span_id_to_span[current_span_id]
|
302
|
-
span.output =
|
303
|
-
|
304
|
-
if inspect.iscoroutine(output):
|
305
|
-
asyncio.create_task(self._update_coroutine(span, output, "output"))
|
292
|
+
span.output = output
|
306
293
|
|
307
|
-
|
308
|
-
self.otel_span_processor.queue_span_update(span, span_state="output")
|
294
|
+
self.otel_span_processor.queue_span_update(span, span_state="output")
|
309
295
|
|
310
296
|
return span
|
311
297
|
return None
|
@@ -642,6 +628,7 @@ class _DeepTracer:
|
|
642
628
|
|
643
629
|
qual_name = self._get_qual_name(frame)
|
644
630
|
instance_name = None
|
631
|
+
class_name = None
|
645
632
|
if "self" in frame.f_locals:
|
646
633
|
instance = frame.f_locals["self"]
|
647
634
|
class_name = instance.__class__.__name__
|
@@ -715,6 +702,7 @@ class _DeepTracer:
|
|
715
702
|
parent_span_id=parent_span_id,
|
716
703
|
function=qual_name,
|
717
704
|
agent_name=instance_name,
|
705
|
+
class_name=class_name,
|
718
706
|
)
|
719
707
|
current_trace.add_span(span)
|
720
708
|
|
@@ -1063,10 +1051,10 @@ class Tracer:
|
|
1063
1051
|
# Reset the context variable
|
1064
1052
|
self.reset_current_trace(token)
|
1065
1053
|
|
1066
|
-
def
|
1054
|
+
def agent(
|
1067
1055
|
self,
|
1068
|
-
identifier: str,
|
1069
|
-
track_state: bool = False,
|
1056
|
+
identifier: Optional[str] = None,
|
1057
|
+
track_state: Optional[bool] = False,
|
1070
1058
|
track_attributes: Optional[List[str]] = None,
|
1071
1059
|
field_mappings: Optional[Dict[str, str]] = None,
|
1072
1060
|
):
|
@@ -1104,11 +1092,18 @@ class Tracer:
|
|
1104
1092
|
"track_state": track_state,
|
1105
1093
|
"track_attributes": track_attributes,
|
1106
1094
|
"field_mappings": field_mappings or {},
|
1095
|
+
"class_name": class_name,
|
1107
1096
|
}
|
1108
1097
|
return cls
|
1109
1098
|
|
1110
1099
|
return decorator
|
1111
1100
|
|
1101
|
+
def identify(self, *args, **kwargs):
|
1102
|
+
judgeval_logger.warning(
|
1103
|
+
"identify() is deprecated and may not be supported in future versions of judgeval. Use the agent() decorator instead."
|
1104
|
+
)
|
1105
|
+
return self.agent(*args, **kwargs)
|
1106
|
+
|
1112
1107
|
def _capture_instance_state(
|
1113
1108
|
self, instance: Any, class_config: Dict[str, Any]
|
1114
1109
|
) -> Dict[str, Any]:
|
@@ -1213,125 +1208,256 @@ class Tracer:
|
|
1213
1208
|
except Exception:
|
1214
1209
|
return func
|
1215
1210
|
|
1216
|
-
|
1211
|
+
def _record_span_data(span, args, kwargs):
|
1212
|
+
"""Helper function to record inputs, agent info, and state on a span."""
|
1213
|
+
# Get class and agent info
|
1214
|
+
class_name = None
|
1215
|
+
agent_name = None
|
1216
|
+
if args and hasattr(args[0], "__class__"):
|
1217
|
+
class_name = args[0].__class__.__name__
|
1218
|
+
agent_name = get_instance_prefixed_name(
|
1219
|
+
args[0], class_name, self.class_identifiers
|
1220
|
+
)
|
1217
1221
|
|
1218
|
-
|
1219
|
-
|
1220
|
-
|
1221
|
-
|
1222
|
-
|
1223
|
-
|
1222
|
+
# Record inputs, agent name, class name
|
1223
|
+
inputs = combine_args_kwargs(func, args, kwargs)
|
1224
|
+
span.record_input(inputs)
|
1225
|
+
if agent_name:
|
1226
|
+
span.record_agent_name(agent_name)
|
1227
|
+
if class_name and class_name in self.class_identifiers:
|
1228
|
+
span.record_class_name(class_name)
|
1229
|
+
|
1230
|
+
# Capture state before execution
|
1231
|
+
self._conditionally_capture_and_record_state(span, args, is_before=True)
|
1232
|
+
|
1233
|
+
return class_name, agent_name
|
1234
|
+
|
1235
|
+
def _finalize_span_data(span, result, args):
|
1236
|
+
"""Helper function to record outputs and final state on a span."""
|
1237
|
+
# Record output
|
1238
|
+
span.record_output(result)
|
1239
|
+
|
1240
|
+
# Capture state after execution
|
1241
|
+
self._conditionally_capture_and_record_state(span, args, is_before=False)
|
1242
|
+
|
1243
|
+
def _cleanup_trace(current_trace, trace_token, wrapper_type="function"):
|
1244
|
+
"""Helper function to handle trace cleanup in finally blocks."""
|
1245
|
+
try:
|
1246
|
+
trace_id, server_response = current_trace.save(final_save=True)
|
1247
|
+
|
1248
|
+
complete_trace_data = {
|
1249
|
+
"trace_id": current_trace.trace_id,
|
1250
|
+
"name": current_trace.name,
|
1251
|
+
"project_name": current_trace.project_name,
|
1252
|
+
"created_at": datetime.fromtimestamp(
|
1253
|
+
current_trace.start_time or time.time(),
|
1254
|
+
timezone.utc,
|
1255
|
+
).isoformat(),
|
1256
|
+
"duration": current_trace.get_duration(),
|
1257
|
+
"trace_spans": [
|
1258
|
+
span.model_dump() for span in current_trace.trace_spans
|
1259
|
+
],
|
1260
|
+
"evaluation_runs": [
|
1261
|
+
run.model_dump() for run in current_trace.evaluation_runs
|
1262
|
+
],
|
1263
|
+
"offline_mode": self.offline_mode,
|
1264
|
+
"parent_trace_id": current_trace.parent_trace_id,
|
1265
|
+
"parent_name": current_trace.parent_name,
|
1266
|
+
"customer_id": current_trace.customer_id,
|
1267
|
+
"tags": current_trace.tags,
|
1268
|
+
"metadata": current_trace.metadata,
|
1269
|
+
"update_id": current_trace.update_id,
|
1270
|
+
}
|
1271
|
+
self.traces.append(complete_trace_data)
|
1272
|
+
self.reset_current_trace(trace_token)
|
1273
|
+
except Exception as e:
|
1274
|
+
judgeval_logger.warning(f"Issue with {wrapper_type} cleanup: {e}")
|
1275
|
+
|
1276
|
+
def _execute_in_span(
|
1277
|
+
current_trace, span_name, span_type, execution_func, args, kwargs
|
1278
|
+
):
|
1279
|
+
"""Helper function to execute code within a span context."""
|
1280
|
+
with current_trace.span(span_name, span_type=span_type) as span:
|
1281
|
+
_record_span_data(span, args, kwargs)
|
1282
|
+
|
1283
|
+
try:
|
1284
|
+
result = execution_func()
|
1285
|
+
_finalize_span_data(span, result, args)
|
1286
|
+
return result
|
1287
|
+
except Exception as e:
|
1288
|
+
_capture_exception_for_trace(current_trace, sys.exc_info())
|
1289
|
+
raise e
|
1290
|
+
|
1291
|
+
async def _execute_in_span_async(
|
1292
|
+
current_trace, span_name, span_type, async_execution_func, args, kwargs
|
1293
|
+
):
|
1294
|
+
"""Helper function to execute async code within a span context."""
|
1295
|
+
with current_trace.span(span_name, span_type=span_type) as span:
|
1296
|
+
_record_span_data(span, args, kwargs)
|
1297
|
+
|
1298
|
+
try:
|
1299
|
+
result = await async_execution_func()
|
1300
|
+
_finalize_span_data(span, result, args)
|
1301
|
+
return result
|
1302
|
+
except Exception as e:
|
1303
|
+
_capture_exception_for_trace(current_trace, sys.exc_info())
|
1304
|
+
raise e
|
1305
|
+
|
1306
|
+
def _create_new_trace(self, span_name):
|
1307
|
+
"""Helper function to create a new trace and set it as current."""
|
1308
|
+
trace_id = str(uuid.uuid4())
|
1309
|
+
project = self.project_name
|
1310
|
+
|
1311
|
+
current_trace = TraceClient(
|
1312
|
+
self,
|
1313
|
+
trace_id,
|
1314
|
+
span_name,
|
1315
|
+
project_name=project,
|
1316
|
+
enable_monitoring=self.enable_monitoring,
|
1317
|
+
enable_evaluations=self.enable_evaluations,
|
1318
|
+
)
|
1319
|
+
|
1320
|
+
trace_token = self.set_current_trace(current_trace)
|
1321
|
+
return current_trace, trace_token
|
1322
|
+
|
1323
|
+
def _execute_with_auto_trace_creation(
|
1324
|
+
span_name, span_type, execution_func, args, kwargs
|
1325
|
+
):
|
1326
|
+
"""Helper function that handles automatic trace creation and span execution."""
|
1327
|
+
current_trace = self.get_current_trace()
|
1328
|
+
|
1329
|
+
if not current_trace:
|
1330
|
+
current_trace, trace_token = _create_new_trace(self, span_name)
|
1224
1331
|
|
1225
|
-
|
1226
|
-
|
1227
|
-
|
1228
|
-
|
1332
|
+
try:
|
1333
|
+
result = _execute_in_span(
|
1334
|
+
current_trace,
|
1335
|
+
span_name,
|
1336
|
+
span_type,
|
1337
|
+
execution_func,
|
1338
|
+
args,
|
1339
|
+
kwargs,
|
1229
1340
|
)
|
1341
|
+
return result
|
1342
|
+
finally:
|
1343
|
+
# Cleanup the trace we created
|
1344
|
+
_cleanup_trace(current_trace, trace_token, "auto_trace")
|
1345
|
+
else:
|
1346
|
+
# Use existing trace
|
1347
|
+
return _execute_in_span(
|
1348
|
+
current_trace, span_name, span_type, execution_func, args, kwargs
|
1349
|
+
)
|
1230
1350
|
|
1231
|
-
|
1351
|
+
async def _execute_with_auto_trace_creation_async(
|
1352
|
+
span_name, span_type, async_execution_func, args, kwargs
|
1353
|
+
):
|
1354
|
+
"""Helper function that handles automatic trace creation and async span execution."""
|
1355
|
+
current_trace = self.get_current_trace()
|
1232
1356
|
|
1233
|
-
|
1234
|
-
|
1235
|
-
project = self.project_name
|
1357
|
+
if not current_trace:
|
1358
|
+
current_trace, trace_token = _create_new_trace(self, span_name)
|
1236
1359
|
|
1237
|
-
|
1238
|
-
|
1239
|
-
|
1360
|
+
try:
|
1361
|
+
result = await _execute_in_span_async(
|
1362
|
+
current_trace,
|
1240
1363
|
span_name,
|
1241
|
-
|
1242
|
-
|
1243
|
-
|
1364
|
+
span_type,
|
1365
|
+
async_execution_func,
|
1366
|
+
args,
|
1367
|
+
kwargs,
|
1244
1368
|
)
|
1369
|
+
return result
|
1370
|
+
finally:
|
1371
|
+
# Cleanup the trace we created
|
1372
|
+
_cleanup_trace(current_trace, trace_token, "async_auto_trace")
|
1373
|
+
else:
|
1374
|
+
# Use existing trace
|
1375
|
+
return await _execute_in_span_async(
|
1376
|
+
current_trace,
|
1377
|
+
span_name,
|
1378
|
+
span_type,
|
1379
|
+
async_execution_func,
|
1380
|
+
args,
|
1381
|
+
kwargs,
|
1382
|
+
)
|
1245
1383
|
|
1246
|
-
|
1384
|
+
# Check for generator functions first
|
1385
|
+
if inspect.isgeneratorfunction(func):
|
1247
1386
|
|
1248
|
-
|
1249
|
-
|
1250
|
-
|
1251
|
-
|
1252
|
-
if agent_name:
|
1253
|
-
span.record_agent_name(agent_name)
|
1254
|
-
|
1255
|
-
self._conditionally_capture_and_record_state(
|
1256
|
-
span, args, is_before=True
|
1257
|
-
)
|
1387
|
+
@functools.wraps(func)
|
1388
|
+
def generator_wrapper(*args, **kwargs):
|
1389
|
+
# Get the generator from the original function
|
1390
|
+
generator = func(*args, **kwargs)
|
1258
1391
|
|
1259
|
-
|
1260
|
-
|
1261
|
-
|
1262
|
-
|
1263
|
-
|
1264
|
-
|
1265
|
-
|
1266
|
-
|
1267
|
-
|
1268
|
-
|
1269
|
-
|
1270
|
-
|
1271
|
-
self._conditionally_capture_and_record_state(
|
1272
|
-
span, args, is_before=False
|
1392
|
+
# Create wrapper generator that creates spans for each yield
|
1393
|
+
def traced_generator():
|
1394
|
+
while True:
|
1395
|
+
try:
|
1396
|
+
# Handle automatic trace creation and span execution
|
1397
|
+
item = _execute_with_auto_trace_creation(
|
1398
|
+
original_span_name,
|
1399
|
+
span_type,
|
1400
|
+
lambda: next(generator),
|
1401
|
+
args,
|
1402
|
+
kwargs,
|
1273
1403
|
)
|
1404
|
+
yield item
|
1405
|
+
except StopIteration:
|
1406
|
+
break
|
1407
|
+
|
1408
|
+
return traced_generator()
|
1274
1409
|
|
1275
|
-
|
1276
|
-
|
1277
|
-
|
1410
|
+
return generator_wrapper
|
1411
|
+
|
1412
|
+
# Check for async generator functions
|
1413
|
+
elif inspect.isasyncgenfunction(func):
|
1414
|
+
|
1415
|
+
@functools.wraps(func)
|
1416
|
+
def async_generator_wrapper(*args, **kwargs):
|
1417
|
+
# Get the async generator from the original function
|
1418
|
+
async_generator = func(*args, **kwargs)
|
1419
|
+
|
1420
|
+
# Create wrapper async generator that creates spans for each yield
|
1421
|
+
async def traced_async_generator():
|
1422
|
+
while True:
|
1278
1423
|
try:
|
1279
|
-
|
1280
|
-
|
1281
|
-
|
1282
|
-
|
1283
|
-
|
1284
|
-
|
1285
|
-
|
1286
|
-
"duration": current_trace.get_duration(),
|
1287
|
-
"trace_spans": [
|
1288
|
-
span.model_dump()
|
1289
|
-
for span in current_trace.trace_spans
|
1290
|
-
],
|
1291
|
-
"offline_mode": self.offline_mode,
|
1292
|
-
"parent_trace_id": current_trace.parent_trace_id,
|
1293
|
-
"parent_name": current_trace.parent_name,
|
1294
|
-
}
|
1295
|
-
|
1296
|
-
trace_id, server_response = current_trace.save(
|
1297
|
-
final_save=True
|
1424
|
+
# Handle automatic trace creation and span execution
|
1425
|
+
item = await _execute_with_auto_trace_creation_async(
|
1426
|
+
original_span_name,
|
1427
|
+
span_type,
|
1428
|
+
lambda: async_generator.__anext__(),
|
1429
|
+
args,
|
1430
|
+
kwargs,
|
1298
1431
|
)
|
1432
|
+
if inspect.iscoroutine(item):
|
1433
|
+
item = await item
|
1434
|
+
yield item
|
1435
|
+
except StopAsyncIteration:
|
1436
|
+
break
|
1299
1437
|
|
1300
|
-
|
1438
|
+
return traced_async_generator()
|
1301
1439
|
|
1302
|
-
|
1303
|
-
except Exception as e:
|
1304
|
-
judgeval_logger.warning(f"Issue with async_wrapper: {e}")
|
1305
|
-
pass
|
1306
|
-
else:
|
1307
|
-
with current_trace.span(span_name, span_type=span_type) as span:
|
1308
|
-
inputs = combine_args_kwargs(func, args, kwargs)
|
1309
|
-
span.record_input(inputs)
|
1310
|
-
if agent_name:
|
1311
|
-
span.record_agent_name(agent_name)
|
1312
|
-
|
1313
|
-
# Capture state before execution
|
1314
|
-
self._conditionally_capture_and_record_state(
|
1315
|
-
span, args, is_before=True
|
1316
|
-
)
|
1440
|
+
return async_generator_wrapper
|
1317
1441
|
|
1318
|
-
|
1319
|
-
if self.deep_tracing:
|
1320
|
-
with _DeepTracer(self):
|
1321
|
-
result = await func(*args, **kwargs)
|
1322
|
-
else:
|
1323
|
-
result = await func(*args, **kwargs)
|
1324
|
-
except Exception as e:
|
1325
|
-
_capture_exception_for_trace(current_trace, sys.exc_info())
|
1326
|
-
raise e
|
1327
|
-
|
1328
|
-
# Capture state after execution
|
1329
|
-
self._conditionally_capture_and_record_state(
|
1330
|
-
span, args, is_before=False
|
1331
|
-
)
|
1442
|
+
elif asyncio.iscoroutinefunction(func):
|
1332
1443
|
|
1333
|
-
|
1334
|
-
|
1444
|
+
@functools.wraps(func)
|
1445
|
+
async def async_wrapper(*args, **kwargs):
|
1446
|
+
nonlocal original_span_name
|
1447
|
+
span_name = original_span_name
|
1448
|
+
|
1449
|
+
async def async_execution():
|
1450
|
+
if self.deep_tracing:
|
1451
|
+
with _DeepTracer(self):
|
1452
|
+
return await func(*args, **kwargs)
|
1453
|
+
else:
|
1454
|
+
return await func(*args, **kwargs)
|
1455
|
+
|
1456
|
+
result = await _execute_with_auto_trace_creation_async(
|
1457
|
+
span_name, span_type, async_execution, args, kwargs
|
1458
|
+
)
|
1459
|
+
|
1460
|
+
return result
|
1335
1461
|
|
1336
1462
|
return async_wrapper
|
1337
1463
|
else:
|
@@ -1339,122 +1465,18 @@ class Tracer:
|
|
1339
1465
|
@functools.wraps(func)
|
1340
1466
|
def wrapper(*args, **kwargs):
|
1341
1467
|
nonlocal original_span_name
|
1342
|
-
class_name = None
|
1343
1468
|
span_name = original_span_name
|
1344
|
-
agent_name = None
|
1345
|
-
if args and hasattr(args[0], "__class__"):
|
1346
|
-
class_name = args[0].__class__.__name__
|
1347
|
-
agent_name = get_instance_prefixed_name(
|
1348
|
-
args[0], class_name, self.class_identifiers
|
1349
|
-
)
|
1350
|
-
# Get current trace from context
|
1351
|
-
current_trace = self.get_current_trace()
|
1352
|
-
|
1353
|
-
# If there's no current trace, create a root trace
|
1354
|
-
if not current_trace:
|
1355
|
-
trace_id = str(uuid.uuid4())
|
1356
|
-
project = self.project_name
|
1357
|
-
|
1358
|
-
# Create a new trace client to serve as the root
|
1359
|
-
current_trace = TraceClient(
|
1360
|
-
self,
|
1361
|
-
trace_id,
|
1362
|
-
span_name,
|
1363
|
-
project_name=project,
|
1364
|
-
enable_monitoring=self.enable_monitoring,
|
1365
|
-
enable_evaluations=self.enable_evaluations,
|
1366
|
-
)
|
1367
|
-
|
1368
|
-
trace_token = self.set_current_trace(current_trace)
|
1369
|
-
|
1370
|
-
try:
|
1371
|
-
with current_trace.span(span_name, span_type=span_type) as span:
|
1372
|
-
# Record inputs
|
1373
|
-
inputs = combine_args_kwargs(func, args, kwargs)
|
1374
|
-
span.record_input(inputs)
|
1375
|
-
if agent_name:
|
1376
|
-
span.record_agent_name(agent_name)
|
1377
|
-
# Capture state before execution
|
1378
|
-
self._conditionally_capture_and_record_state(
|
1379
|
-
span, args, is_before=True
|
1380
|
-
)
|
1381
|
-
|
1382
|
-
try:
|
1383
|
-
if self.deep_tracing:
|
1384
|
-
with _DeepTracer(self):
|
1385
|
-
result = func(*args, **kwargs)
|
1386
|
-
else:
|
1387
|
-
result = func(*args, **kwargs)
|
1388
|
-
except Exception as e:
|
1389
|
-
_capture_exception_for_trace(
|
1390
|
-
current_trace, sys.exc_info()
|
1391
|
-
)
|
1392
|
-
raise e
|
1393
|
-
|
1394
|
-
# Capture state after execution
|
1395
|
-
self._conditionally_capture_and_record_state(
|
1396
|
-
span, args, is_before=False
|
1397
|
-
)
|
1398
|
-
|
1399
|
-
# Record output
|
1400
|
-
span.record_output(result)
|
1401
|
-
return result
|
1402
|
-
finally:
|
1403
|
-
try:
|
1404
|
-
trace_id, server_response = current_trace.save(
|
1405
|
-
final_save=True
|
1406
|
-
)
|
1407
1469
|
|
1408
|
-
|
1409
|
-
|
1410
|
-
|
1411
|
-
|
1412
|
-
|
1413
|
-
|
1414
|
-
).isoformat(),
|
1415
|
-
"duration": current_trace.get_duration(),
|
1416
|
-
"trace_spans": [
|
1417
|
-
span.model_dump()
|
1418
|
-
for span in current_trace.trace_spans
|
1419
|
-
],
|
1420
|
-
"offline_mode": self.offline_mode,
|
1421
|
-
"parent_trace_id": current_trace.parent_trace_id,
|
1422
|
-
"parent_name": current_trace.parent_name,
|
1423
|
-
}
|
1424
|
-
self.traces.append(complete_trace_data)
|
1425
|
-
self.reset_current_trace(trace_token)
|
1426
|
-
except Exception as e:
|
1427
|
-
judgeval_logger.warning(f"Issue with save: {e}")
|
1428
|
-
pass
|
1429
|
-
else:
|
1430
|
-
with current_trace.span(span_name, span_type=span_type) as span:
|
1431
|
-
inputs = combine_args_kwargs(func, args, kwargs)
|
1432
|
-
span.record_input(inputs)
|
1433
|
-
if agent_name:
|
1434
|
-
span.record_agent_name(agent_name)
|
1435
|
-
|
1436
|
-
# Capture state before execution
|
1437
|
-
self._conditionally_capture_and_record_state(
|
1438
|
-
span, args, is_before=True
|
1439
|
-
)
|
1470
|
+
def sync_execution():
|
1471
|
+
if self.deep_tracing:
|
1472
|
+
with _DeepTracer(self):
|
1473
|
+
return func(*args, **kwargs)
|
1474
|
+
else:
|
1475
|
+
return func(*args, **kwargs)
|
1440
1476
|
|
1441
|
-
|
1442
|
-
|
1443
|
-
|
1444
|
-
result = func(*args, **kwargs)
|
1445
|
-
else:
|
1446
|
-
result = func(*args, **kwargs)
|
1447
|
-
except Exception as e:
|
1448
|
-
_capture_exception_for_trace(current_trace, sys.exc_info())
|
1449
|
-
raise e
|
1450
|
-
|
1451
|
-
# Capture state after execution
|
1452
|
-
self._conditionally_capture_and_record_state(
|
1453
|
-
span, args, is_before=False
|
1454
|
-
)
|
1455
|
-
|
1456
|
-
span.record_output(result)
|
1457
|
-
return result
|
1477
|
+
return _execute_with_auto_trace_creation(
|
1478
|
+
span_name, span_type, sync_execution, args, kwargs
|
1479
|
+
)
|
1458
1480
|
|
1459
1481
|
return wrapper
|
1460
1482
|
|
@@ -2223,13 +2245,13 @@ def get_instance_prefixed_name(instance, class_name, class_identifiers):
|
|
2223
2245
|
"""
|
2224
2246
|
if class_name in class_identifiers:
|
2225
2247
|
class_config = class_identifiers[class_name]
|
2226
|
-
attr = class_config
|
2227
|
-
|
2228
|
-
|
2229
|
-
|
2230
|
-
|
2231
|
-
|
2232
|
-
|
2233
|
-
|
2234
|
-
|
2235
|
-
|
2248
|
+
attr = class_config.get("identifier")
|
2249
|
+
if attr:
|
2250
|
+
if hasattr(instance, attr) and not callable(getattr(instance, attr)):
|
2251
|
+
instance_name = getattr(instance, attr)
|
2252
|
+
return instance_name
|
2253
|
+
else:
|
2254
|
+
raise Exception(
|
2255
|
+
f"Attribute {attr} does not exist for {class_name}. Check your agent() decorator."
|
2256
|
+
)
|
2257
|
+
return None
|
@@ -21,7 +21,7 @@ from judgeval.common.tracer.otel_exporter import JudgmentAPISpanExporter
|
|
21
21
|
from judgeval.common.tracer.span_processor import SpanProcessorBase
|
22
22
|
from judgeval.common.tracer.span_transformer import SpanTransformer
|
23
23
|
from judgeval.data import TraceSpan
|
24
|
-
from judgeval.evaluation_run import EvaluationRun
|
24
|
+
from judgeval.data.evaluation_run import EvaluationRun
|
25
25
|
|
26
26
|
|
27
27
|
class SimpleReadableSpan(ReadableSpan):
|
@@ -7,7 +7,7 @@ When monitoring is enabled, we use JudgmentSpanProcessor which overrides the met
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
from judgeval.data import TraceSpan
|
10
|
-
from judgeval.evaluation_run import EvaluationRun
|
10
|
+
from judgeval.data.evaluation_run import EvaluationRun
|
11
11
|
|
12
12
|
|
13
13
|
class SpanProcessorBase:
|