judgeval 0.0.40__py3-none-any.whl → 0.0.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval/common/tracer.py +160 -38
- judgeval/common/utils.py +5 -1
- judgeval/data/datasets/dataset.py +12 -6
- judgeval/data/datasets/eval_dataset_client.py +3 -1
- judgeval/data/trace.py +6 -2
- judgeval/judgment_client.py +9 -1
- judgeval/run_evaluation.py +17 -3
- judgeval/scorers/judgeval_scorer.py +4 -1
- judgeval/scorers/prompt_scorer.py +3 -0
- {judgeval-0.0.40.dist-info → judgeval-0.0.41.dist-info}/METADATA +25 -16
- {judgeval-0.0.40.dist-info → judgeval-0.0.41.dist-info}/RECORD +13 -13
- {judgeval-0.0.40.dist-info → judgeval-0.0.41.dist-info}/WHEEL +0 -0
- {judgeval-0.0.40.dist-info → judgeval-0.0.41.dist-info}/licenses/LICENSE.md +0 -0
judgeval/common/tracer.py
CHANGED
@@ -5,7 +5,6 @@ Tracing system for judgeval that allows for function tracing using decorators.
|
|
5
5
|
import asyncio
|
6
6
|
import functools
|
7
7
|
import inspect
|
8
|
-
import json
|
9
8
|
import os
|
10
9
|
import site
|
11
10
|
import sysconfig
|
@@ -16,6 +15,7 @@ import uuid
|
|
16
15
|
import warnings
|
17
16
|
import contextvars
|
18
17
|
import sys
|
18
|
+
import json
|
19
19
|
from contextlib import contextmanager, asynccontextmanager, AbstractAsyncContextManager, AbstractContextManager # Import context manager bases
|
20
20
|
from dataclasses import dataclass, field
|
21
21
|
from datetime import datetime
|
@@ -29,20 +29,16 @@ from typing import (
|
|
29
29
|
Literal,
|
30
30
|
Optional,
|
31
31
|
Tuple,
|
32
|
-
Type,
|
33
|
-
TypeVar,
|
34
32
|
Union,
|
35
33
|
AsyncGenerator,
|
36
34
|
TypeAlias,
|
37
|
-
Set
|
38
35
|
)
|
39
36
|
from rich import print as rprint
|
40
|
-
import types
|
37
|
+
import types
|
41
38
|
|
42
39
|
# Third-party imports
|
43
40
|
import requests
|
44
41
|
from litellm import cost_per_token as _original_cost_per_token
|
45
|
-
from pydantic import BaseModel
|
46
42
|
from rich import print as rprint
|
47
43
|
from openai import OpenAI, AsyncOpenAI
|
48
44
|
from together import Together, AsyncTogether
|
@@ -64,8 +60,7 @@ from judgeval.data import Example, Trace, TraceSpan, TraceUsage
|
|
64
60
|
from judgeval.scorers import APIJudgmentScorer, JudgevalScorer
|
65
61
|
from judgeval.rules import Rule
|
66
62
|
from judgeval.evaluation_run import EvaluationRun
|
67
|
-
from judgeval.
|
68
|
-
from judgeval.common.utils import validate_api_key
|
63
|
+
from judgeval.common.utils import ExcInfo, validate_api_key
|
69
64
|
from judgeval.common.exceptions import JudgmentAPIError
|
70
65
|
|
71
66
|
# Standard library imports needed for the new class
|
@@ -307,7 +302,7 @@ class TraceClient:
|
|
307
302
|
tracer: Optional["Tracer"],
|
308
303
|
trace_id: Optional[str] = None,
|
309
304
|
name: str = "default",
|
310
|
-
project_name: str =
|
305
|
+
project_name: str = None,
|
311
306
|
overwrite: bool = False,
|
312
307
|
rules: Optional[List[Rule]] = None,
|
313
308
|
enable_monitoring: bool = True,
|
@@ -317,7 +312,7 @@ class TraceClient:
|
|
317
312
|
):
|
318
313
|
self.name = name
|
319
314
|
self.trace_id = trace_id or str(uuid.uuid4())
|
320
|
-
self.project_name = project_name
|
315
|
+
self.project_name = project_name or str(uuid.uuid4())
|
321
316
|
self.overwrite = overwrite
|
322
317
|
self.tracer = tracer
|
323
318
|
self.rules = rules or []
|
@@ -507,6 +502,28 @@ class TraceClient:
|
|
507
502
|
span = self.span_id_to_span[current_span_id]
|
508
503
|
span.agent_name = agent_name
|
509
504
|
|
505
|
+
def record_state_before(self, state: dict):
|
506
|
+
"""Records the agent's state before a tool execution on the current span.
|
507
|
+
|
508
|
+
Args:
|
509
|
+
state: A dictionary representing the agent's state.
|
510
|
+
"""
|
511
|
+
current_span_id = current_span_var.get()
|
512
|
+
if current_span_id:
|
513
|
+
span = self.span_id_to_span[current_span_id]
|
514
|
+
span.state_before = state
|
515
|
+
|
516
|
+
def record_state_after(self, state: dict):
|
517
|
+
"""Records the agent's state after a tool execution on the current span.
|
518
|
+
|
519
|
+
Args:
|
520
|
+
state: A dictionary representing the agent's state.
|
521
|
+
"""
|
522
|
+
current_span_id = current_span_var.get()
|
523
|
+
if current_span_id:
|
524
|
+
span = self.span_id_to_span[current_span_id]
|
525
|
+
span.state_after = state
|
526
|
+
|
510
527
|
async def _update_coroutine(self, span: TraceSpan, coroutine: Any, field: str):
|
511
528
|
"""Helper method to update the output of a trace entry once the coroutine completes"""
|
512
529
|
try:
|
@@ -540,7 +557,7 @@ class TraceClient:
|
|
540
557
|
# Removed else block - original didn't have one
|
541
558
|
return None # Return None if no span_id found
|
542
559
|
|
543
|
-
def record_error(self, error: Any):
|
560
|
+
def record_error(self, error: Dict[str, Any]):
|
544
561
|
current_span_id = current_span_var.get()
|
545
562
|
if current_span_id:
|
546
563
|
span = self.span_id_to_span[current_span_id]
|
@@ -579,7 +596,7 @@ class TraceClient:
|
|
579
596
|
"project_name": self.project_name,
|
580
597
|
"created_at": datetime.utcfromtimestamp(self.start_time).isoformat(),
|
581
598
|
"duration": total_duration,
|
582
|
-
"
|
599
|
+
"trace_spans": [span.model_dump() for span in self.trace_spans],
|
583
600
|
"evaluation_runs": [run.model_dump() for run in self.evaluation_runs],
|
584
601
|
"overwrite": overwrite,
|
585
602
|
"offline_mode": self.tracer.offline_mode,
|
@@ -599,7 +616,7 @@ class TraceClient:
|
|
599
616
|
def delete(self):
|
600
617
|
return self.trace_manager_client.delete_trace(self.trace_id)
|
601
618
|
|
602
|
-
def _capture_exception_for_trace(current_trace: Optional['TraceClient'], exc_info:
|
619
|
+
def _capture_exception_for_trace(current_trace: Optional['TraceClient'], exc_info: ExcInfo):
|
603
620
|
if not current_trace:
|
604
621
|
return
|
605
622
|
|
@@ -609,6 +626,27 @@ def _capture_exception_for_trace(current_trace: Optional['TraceClient'], exc_inf
|
|
609
626
|
"message": str(exc_value) if exc_value else "No exception message",
|
610
627
|
"traceback": traceback.format_tb(exc_traceback_obj) if exc_traceback_obj else []
|
611
628
|
}
|
629
|
+
|
630
|
+
# This is where we specially handle exceptions that we might want to collect additional data for.
|
631
|
+
# When we do this, always try checking the module from sys.modules instead of importing. This will
|
632
|
+
# Let us support a wider range of exceptions without needing to import them for all clients.
|
633
|
+
|
634
|
+
# Most clients (requests, httpx, urllib) support the standard format of exposing error.request.url and error.response.status_code
|
635
|
+
# The alternative is to hand select libraries we want from sys.modules and check for them:
|
636
|
+
# As an example: requests_module = sys.modules.get("requests", None) // then do things with requests_module;
|
637
|
+
|
638
|
+
# General HTTP Like errors
|
639
|
+
try:
|
640
|
+
url = getattr(getattr(exc_value, "request", None), "url", None)
|
641
|
+
status_code = getattr(getattr(exc_value, "response", None), "status_code", None)
|
642
|
+
if status_code:
|
643
|
+
formatted_exception["http"] = {
|
644
|
+
"url": url if url else "Unknown URL",
|
645
|
+
"status_code": status_code if status_code else None,
|
646
|
+
}
|
647
|
+
except Exception as e:
|
648
|
+
pass
|
649
|
+
|
612
650
|
current_trace.record_error(formatted_exception)
|
613
651
|
class _DeepTracer:
|
614
652
|
_instance: Optional["_DeepTracer"] = None
|
@@ -907,7 +945,7 @@ class Tracer:
|
|
907
945
|
def __init__(
|
908
946
|
self,
|
909
947
|
api_key: str = os.getenv("JUDGMENT_API_KEY"),
|
910
|
-
project_name: str =
|
948
|
+
project_name: str = None,
|
911
949
|
rules: Optional[List[Rule]] = None, # Added rules parameter
|
912
950
|
organization_id: str = os.getenv("JUDGMENT_ORG_ID"),
|
913
951
|
enable_monitoring: bool = os.getenv("JUDGMENT_MONITORING", "true").lower() == "true",
|
@@ -935,7 +973,7 @@ class Tracer:
|
|
935
973
|
raise ValueError("S3 bucket name must be provided when use_s3 is True")
|
936
974
|
|
937
975
|
self.api_key: str = api_key
|
938
|
-
self.project_name: str = project_name
|
976
|
+
self.project_name: str = project_name or str(uuid.uuid4())
|
939
977
|
self.organization_id: str = organization_id
|
940
978
|
self._current_trace: Optional[str] = None
|
941
979
|
self._active_trace_client: Optional[TraceClient] = None # Add active trace client attribute
|
@@ -1068,32 +1106,92 @@ class Tracer:
|
|
1068
1106
|
|
1069
1107
|
rprint(f"[bold]{label}:[/bold] {msg}")
|
1070
1108
|
|
1071
|
-
def identify(self, identifier: str):
|
1109
|
+
def identify(self, identifier: str, track_state: bool = False, track_attributes: Optional[List[str]] = None, field_mappings: Optional[Dict[str, str]] = None):
|
1072
1110
|
"""
|
1073
|
-
Class decorator that associates a class with a custom identifier.
|
1111
|
+
Class decorator that associates a class with a custom identifier and enables state tracking.
|
1074
1112
|
|
1075
1113
|
This decorator creates a mapping between the class name and the provided
|
1076
1114
|
identifier, which can be useful for tagging, grouping, or referencing
|
1077
|
-
classes in a standardized way.
|
1115
|
+
classes in a standardized way. It also enables automatic state capture
|
1116
|
+
for instances of the decorated class when used with tracing.
|
1078
1117
|
|
1079
1118
|
Args:
|
1080
|
-
identifier: The identifier to associate with the decorated class
|
1081
|
-
|
1082
|
-
|
1083
|
-
|
1119
|
+
identifier: The identifier to associate with the decorated class.
|
1120
|
+
This will be used as the instance name in traces.
|
1121
|
+
track_state: Whether to automatically capture the state (attributes)
|
1122
|
+
of instances before and after function execution. Defaults to False.
|
1123
|
+
track_attributes: Optional list of specific attribute names to track.
|
1124
|
+
If None, all non-private attributes (not starting with '_')
|
1125
|
+
will be tracked when track_state=True.
|
1126
|
+
field_mappings: Optional dictionary mapping internal attribute names to
|
1127
|
+
display names in the captured state. For example:
|
1128
|
+
{"system_prompt": "instructions"} will capture the
|
1129
|
+
'instructions' attribute as 'system_prompt' in the state.
|
1084
1130
|
|
1085
1131
|
Example:
|
1086
|
-
@tracer.identify(identifier="user_model")
|
1132
|
+
@tracer.identify(identifier="user_model", track_state=True, track_attributes=["name", "age"], field_mappings={"system_prompt": "instructions"})
|
1087
1133
|
class User:
|
1088
1134
|
# Class implementation
|
1089
1135
|
"""
|
1090
1136
|
def decorator(cls):
|
1091
1137
|
class_name = cls.__name__
|
1092
|
-
self.class_identifiers[class_name] =
|
1138
|
+
self.class_identifiers[class_name] = {
|
1139
|
+
"identifier": identifier,
|
1140
|
+
"track_state": track_state,
|
1141
|
+
"track_attributes": track_attributes,
|
1142
|
+
"field_mappings": field_mappings or {}
|
1143
|
+
}
|
1093
1144
|
return cls
|
1094
1145
|
|
1095
1146
|
return decorator
|
1096
1147
|
|
1148
|
+
def _capture_instance_state(self, instance: Any, class_config: Dict[str, Any]) -> Dict[str, Any]:
|
1149
|
+
"""
|
1150
|
+
Capture the state of an instance based on class configuration.
|
1151
|
+
Args:
|
1152
|
+
instance: The instance to capture the state of.
|
1153
|
+
class_config: Configuration dictionary for state capture,
|
1154
|
+
expected to contain 'track_attributes' and 'field_mappings'.
|
1155
|
+
"""
|
1156
|
+
track_attributes = class_config.get('track_attributes')
|
1157
|
+
field_mappings = class_config.get('field_mappings')
|
1158
|
+
|
1159
|
+
if track_attributes:
|
1160
|
+
|
1161
|
+
state = {attr: getattr(instance, attr, None) for attr in track_attributes}
|
1162
|
+
else:
|
1163
|
+
|
1164
|
+
state = {k: v for k, v in instance.__dict__.items() if not k.startswith('_')}
|
1165
|
+
|
1166
|
+
if field_mappings:
|
1167
|
+
state['field_mappings'] = field_mappings
|
1168
|
+
|
1169
|
+
return state
|
1170
|
+
|
1171
|
+
|
1172
|
+
def _get_instance_state_if_tracked(self, args):
|
1173
|
+
"""
|
1174
|
+
Extract instance state if the instance should be tracked.
|
1175
|
+
|
1176
|
+
Returns the captured state dict if tracking is enabled, None otherwise.
|
1177
|
+
"""
|
1178
|
+
if args and hasattr(args[0], '__class__'):
|
1179
|
+
instance = args[0]
|
1180
|
+
class_name = instance.__class__.__name__
|
1181
|
+
if (class_name in self.class_identifiers and
|
1182
|
+
isinstance(self.class_identifiers[class_name], dict) and
|
1183
|
+
self.class_identifiers[class_name].get('track_state', False)):
|
1184
|
+
return self._capture_instance_state(instance, self.class_identifiers[class_name])
|
1185
|
+
|
1186
|
+
def _conditionally_capture_and_record_state(self, trace_client_instance: TraceClient, args: tuple, is_before: bool):
|
1187
|
+
"""Captures instance state if tracked and records it via the trace_client."""
|
1188
|
+
state = self._get_instance_state_if_tracked(args)
|
1189
|
+
if state:
|
1190
|
+
if is_before:
|
1191
|
+
trace_client_instance.record_state_before(state)
|
1192
|
+
else:
|
1193
|
+
trace_client_instance.record_state_after(state)
|
1194
|
+
|
1097
1195
|
def observe(self, func=None, *, name=None, span_type: SpanType = "span", project_name: str = None, overwrite: bool = False, deep_tracing: bool = None):
|
1098
1196
|
"""
|
1099
1197
|
Decorator to trace function execution with detailed entry/exit information.
|
@@ -1171,6 +1269,9 @@ class Tracer:
|
|
1171
1269
|
span.record_input(inputs)
|
1172
1270
|
if agent_name:
|
1173
1271
|
span.record_agent_name(agent_name)
|
1272
|
+
|
1273
|
+
# Capture state before execution
|
1274
|
+
self._conditionally_capture_and_record_state(span, args, is_before=True)
|
1174
1275
|
|
1175
1276
|
if use_deep_tracing:
|
1176
1277
|
with _DeepTracer():
|
@@ -1181,7 +1282,10 @@ class Tracer:
|
|
1181
1282
|
except Exception as e:
|
1182
1283
|
_capture_exception_for_trace(current_trace, sys.exc_info())
|
1183
1284
|
raise e
|
1184
|
-
|
1285
|
+
|
1286
|
+
# Capture state after execution
|
1287
|
+
self._conditionally_capture_and_record_state(span, args, is_before=False)
|
1288
|
+
|
1185
1289
|
# Record output
|
1186
1290
|
span.record_output(result)
|
1187
1291
|
return result
|
@@ -1199,6 +1303,9 @@ class Tracer:
|
|
1199
1303
|
if agent_name:
|
1200
1304
|
span.record_agent_name(agent_name)
|
1201
1305
|
|
1306
|
+
# Capture state before execution
|
1307
|
+
self._conditionally_capture_and_record_state(span, args, is_before=True)
|
1308
|
+
|
1202
1309
|
if use_deep_tracing:
|
1203
1310
|
with _DeepTracer():
|
1204
1311
|
result = await func(*args, **kwargs)
|
@@ -1208,6 +1315,9 @@ class Tracer:
|
|
1208
1315
|
except Exception as e:
|
1209
1316
|
_capture_exception_for_trace(current_trace, sys.exc_info())
|
1210
1317
|
raise e
|
1318
|
+
|
1319
|
+
# Capture state after execution
|
1320
|
+
self._conditionally_capture_and_record_state(span, args, is_before=False)
|
1211
1321
|
|
1212
1322
|
span.record_output(result)
|
1213
1323
|
return result
|
@@ -1258,6 +1368,9 @@ class Tracer:
|
|
1258
1368
|
span.record_input(inputs)
|
1259
1369
|
if agent_name:
|
1260
1370
|
span.record_agent_name(agent_name)
|
1371
|
+
# Capture state before execution
|
1372
|
+
self._conditionally_capture_and_record_state(span, args, is_before=True)
|
1373
|
+
|
1261
1374
|
if use_deep_tracing:
|
1262
1375
|
with _DeepTracer():
|
1263
1376
|
result = func(*args, **kwargs)
|
@@ -1267,6 +1380,10 @@ class Tracer:
|
|
1267
1380
|
except Exception as e:
|
1268
1381
|
_capture_exception_for_trace(current_trace, sys.exc_info())
|
1269
1382
|
raise e
|
1383
|
+
|
1384
|
+
# Capture state after execution
|
1385
|
+
self._conditionally_capture_and_record_state(span, args, is_before=False)
|
1386
|
+
|
1270
1387
|
|
1271
1388
|
# Record output
|
1272
1389
|
span.record_output(result)
|
@@ -1286,6 +1403,9 @@ class Tracer:
|
|
1286
1403
|
if agent_name:
|
1287
1404
|
span.record_agent_name(agent_name)
|
1288
1405
|
|
1406
|
+
# Capture state before execution
|
1407
|
+
self._conditionally_capture_and_record_state(span, args, is_before=True)
|
1408
|
+
|
1289
1409
|
if use_deep_tracing:
|
1290
1410
|
with _DeepTracer():
|
1291
1411
|
result = func(*args, **kwargs)
|
@@ -1296,6 +1416,9 @@ class Tracer:
|
|
1296
1416
|
_capture_exception_for_trace(current_trace, sys.exc_info())
|
1297
1417
|
raise e
|
1298
1418
|
|
1419
|
+
# Capture state after execution
|
1420
|
+
self._conditionally_capture_and_record_state(span, args, is_before=False)
|
1421
|
+
|
1299
1422
|
span.record_output(result)
|
1300
1423
|
return result
|
1301
1424
|
|
@@ -1369,13 +1492,6 @@ def wrap(client: Any) -> Any:
|
|
1369
1492
|
span.record_usage(usage)
|
1370
1493
|
return response
|
1371
1494
|
|
1372
|
-
def _handle_error(span, e, is_async):
|
1373
|
-
"""Handle and record errors"""
|
1374
|
-
call_type = "async" if is_async else "sync"
|
1375
|
-
print(f"Error during wrapped {call_type} API call ({span_name}): {e}")
|
1376
|
-
span.record_output({"error": str(e)})
|
1377
|
-
raise
|
1378
|
-
|
1379
1495
|
# --- Traced Async Functions ---
|
1380
1496
|
async def traced_create_async(*args, **kwargs):
|
1381
1497
|
current_trace = current_trace_var.get()
|
@@ -1389,7 +1505,8 @@ def wrap(client: Any) -> Any:
|
|
1389
1505
|
response_or_iterator = await original_create(*args, **kwargs)
|
1390
1506
|
return _format_and_record_output(span, response_or_iterator, is_streaming, True, False)
|
1391
1507
|
except Exception as e:
|
1392
|
-
|
1508
|
+
_capture_exception_for_trace(span, sys.exc_info())
|
1509
|
+
raise e
|
1393
1510
|
|
1394
1511
|
# Async responses for OpenAI clients
|
1395
1512
|
async def traced_response_create_async(*args, **kwargs):
|
@@ -1404,7 +1521,8 @@ def wrap(client: Any) -> Any:
|
|
1404
1521
|
response_or_iterator = await original_responses_create(*args, **kwargs)
|
1405
1522
|
return _format_and_record_output(span, response_or_iterator, is_streaming, True, True)
|
1406
1523
|
except Exception as e:
|
1407
|
-
|
1524
|
+
_capture_exception_for_trace(span, sys.exc_info())
|
1525
|
+
raise e
|
1408
1526
|
|
1409
1527
|
# Function replacing .stream() for async clients
|
1410
1528
|
def traced_stream_async(*args, **kwargs):
|
@@ -1435,7 +1553,8 @@ def wrap(client: Any) -> Any:
|
|
1435
1553
|
response_or_iterator = original_create(*args, **kwargs)
|
1436
1554
|
return _format_and_record_output(span, response_or_iterator, is_streaming, False, False)
|
1437
1555
|
except Exception as e:
|
1438
|
-
|
1556
|
+
_capture_exception_for_trace(span, sys.exc_info())
|
1557
|
+
raise e
|
1439
1558
|
|
1440
1559
|
def traced_response_create_sync(*args, **kwargs):
|
1441
1560
|
current_trace = current_trace_var.get()
|
@@ -1449,7 +1568,8 @@ def wrap(client: Any) -> Any:
|
|
1449
1568
|
response_or_iterator = original_responses_create(*args, **kwargs)
|
1450
1569
|
return _format_and_record_output(span, response_or_iterator, is_streaming, False, True)
|
1451
1570
|
except Exception as e:
|
1452
|
-
|
1571
|
+
_capture_exception_for_trace(span, sys.exc_info())
|
1572
|
+
raise e
|
1453
1573
|
|
1454
1574
|
# Function replacing sync .stream()
|
1455
1575
|
def traced_stream_sync(*args, **kwargs):
|
@@ -1990,10 +2110,12 @@ def get_instance_prefixed_name(instance, class_name, class_identifiers):
|
|
1990
2110
|
Otherwise, returns None.
|
1991
2111
|
"""
|
1992
2112
|
if class_name in class_identifiers:
|
1993
|
-
|
2113
|
+
class_config = class_identifiers[class_name]
|
2114
|
+
attr = class_config['identifier']
|
2115
|
+
|
1994
2116
|
if hasattr(instance, attr):
|
1995
2117
|
instance_name = getattr(instance, attr)
|
1996
2118
|
return instance_name
|
1997
2119
|
else:
|
1998
|
-
raise Exception(f"Attribute {
|
2120
|
+
raise Exception(f"Attribute {attr} does not exist for {class_name}. Check your identify() decorator.")
|
1999
2121
|
return None
|
judgeval/common/utils.py
CHANGED
@@ -12,9 +12,10 @@ NOTE: any function beginning with 'a', e.g. 'afetch_together_api_response', is a
|
|
12
12
|
import asyncio
|
13
13
|
import concurrent.futures
|
14
14
|
import os
|
15
|
+
from types import TracebackType
|
15
16
|
import requests
|
16
17
|
import pprint
|
17
|
-
from typing import Any, Dict, List, Literal, Mapping, Optional, Union
|
18
|
+
from typing import Any, Dict, List, Literal, Mapping, Optional, TypeAlias, Union
|
18
19
|
|
19
20
|
# Third-party imports
|
20
21
|
import litellm
|
@@ -782,3 +783,6 @@ if __name__ == "__main__":
|
|
782
783
|
]
|
783
784
|
]
|
784
785
|
))
|
786
|
+
|
787
|
+
ExcInfo: TypeAlias = tuple[type[BaseException], BaseException, TracebackType]
|
788
|
+
OptExcInfo: TypeAlias = ExcInfo | tuple[None, None, None]
|
@@ -5,14 +5,15 @@ import json
|
|
5
5
|
import os
|
6
6
|
import yaml
|
7
7
|
from dataclasses import dataclass, field
|
8
|
-
from typing import List, Union, Literal
|
8
|
+
from typing import List, Union, Literal, Optional
|
9
9
|
|
10
|
-
from judgeval.data import Example
|
10
|
+
from judgeval.data import Example, Trace
|
11
11
|
from judgeval.common.logger import debug, error, warning, info
|
12
12
|
|
13
13
|
@dataclass
|
14
14
|
class EvalDataset:
|
15
15
|
examples: List[Example]
|
16
|
+
traces: List[Trace]
|
16
17
|
_alias: Union[str, None] = field(default=None)
|
17
18
|
_id: Union[str, None] = field(default=None)
|
18
19
|
judgment_api_key: str = field(default="")
|
@@ -20,12 +21,13 @@ class EvalDataset:
|
|
20
21
|
def __init__(self,
|
21
22
|
judgment_api_key: str = os.getenv("JUDGMENT_API_KEY"),
|
22
23
|
organization_id: str = os.getenv("JUDGMENT_ORG_ID"),
|
23
|
-
examples: List[Example] =
|
24
|
+
examples: Optional[List[Example]] = None,
|
25
|
+
traces: Optional[List[Trace]] = None
|
24
26
|
):
|
25
|
-
debug(f"Initializing EvalDataset with {len(examples)} examples")
|
26
27
|
if not judgment_api_key:
|
27
28
|
warning("No judgment_api_key provided")
|
28
|
-
self.examples = examples
|
29
|
+
self.examples = examples or []
|
30
|
+
self.traces = traces or []
|
29
31
|
self._alias = None
|
30
32
|
self._id = None
|
31
33
|
self.judgment_api_key = judgment_api_key
|
@@ -218,8 +220,11 @@ class EvalDataset:
|
|
218
220
|
self.add_example(e)
|
219
221
|
|
220
222
|
def add_example(self, e: Example) -> None:
|
221
|
-
self.examples
|
223
|
+
self.examples.append(e)
|
222
224
|
# TODO if we need to add rank, then we need to do it here
|
225
|
+
|
226
|
+
def add_trace(self, t: Trace) -> None:
|
227
|
+
self.traces.append(t)
|
223
228
|
|
224
229
|
def save_as(self, file_type: Literal["json", "csv", "yaml"], dir_path: str, save_name: str = None) -> None:
|
225
230
|
"""
|
@@ -307,6 +312,7 @@ class EvalDataset:
|
|
307
312
|
return (
|
308
313
|
f"{self.__class__.__name__}("
|
309
314
|
f"examples={self.examples}, "
|
315
|
+
f"traces={self.traces}, "
|
310
316
|
f"_alias={self._alias}, "
|
311
317
|
f"_id={self._id}"
|
312
318
|
f")"
|
@@ -13,7 +13,7 @@ from judgeval.constants import (
|
|
13
13
|
JUDGMENT_DATASETS_INSERT_API_URL,
|
14
14
|
JUDGMENT_DATASETS_EXPORT_JSONL_API_URL
|
15
15
|
)
|
16
|
-
from judgeval.data import Example
|
16
|
+
from judgeval.data import Example, Trace
|
17
17
|
from judgeval.data.datasets import EvalDataset
|
18
18
|
|
19
19
|
|
@@ -58,6 +58,7 @@ class EvalDatasetClient:
|
|
58
58
|
"dataset_alias": alias,
|
59
59
|
"project_name": project_name,
|
60
60
|
"examples": [e.to_dict() for e in dataset.examples],
|
61
|
+
"traces": [t.model_dump() for t in dataset.traces],
|
61
62
|
"overwrite": overwrite,
|
62
63
|
}
|
63
64
|
try:
|
@@ -202,6 +203,7 @@ class EvalDatasetClient:
|
|
202
203
|
info(f"Successfully pulled dataset with alias '{alias}'")
|
203
204
|
payload = response.json()
|
204
205
|
dataset.examples = [Example(**e) for e in payload.get("examples", [])]
|
206
|
+
dataset.traces = [Trace(**t) for t in payload.get("traces", [])]
|
205
207
|
dataset._alias = payload.get("alias")
|
206
208
|
dataset._id = payload.get("id")
|
207
209
|
progress.update(
|
judgeval/data/trace.py
CHANGED
@@ -33,6 +33,8 @@ class TraceSpan(BaseModel):
|
|
33
33
|
additional_metadata: Optional[Dict[str, Any]] = None
|
34
34
|
has_evaluation: Optional[bool] = False
|
35
35
|
agent_name: Optional[str] = None
|
36
|
+
state_before: Optional[Dict[str, Any]] = None
|
37
|
+
state_after: Optional[Dict[str, Any]] = None
|
36
38
|
|
37
39
|
def model_dump(self, **kwargs):
|
38
40
|
return {
|
@@ -50,7 +52,9 @@ class TraceSpan(BaseModel):
|
|
50
52
|
"span_type": self.span_type,
|
51
53
|
"usage": self.usage.model_dump() if self.usage else None,
|
52
54
|
"has_evaluation": self.has_evaluation,
|
53
|
-
"agent_name": self.agent_name
|
55
|
+
"agent_name": self.agent_name,
|
56
|
+
"state_before": self.state_before,
|
57
|
+
"state_after": self.state_after
|
54
58
|
}
|
55
59
|
|
56
60
|
def print_span(self):
|
@@ -113,7 +117,7 @@ class Trace(BaseModel):
|
|
113
117
|
name: str
|
114
118
|
created_at: str
|
115
119
|
duration: float
|
116
|
-
|
120
|
+
trace_spans: List[TraceSpan]
|
117
121
|
overwrite: bool = False
|
118
122
|
offline_mode: bool = False
|
119
123
|
rules: Optional[Dict[str, Any]] = None
|
judgeval/judgment_client.py
CHANGED
@@ -63,7 +63,15 @@ class SingletonMeta(type):
|
|
63
63
|
return cls._instances[cls]
|
64
64
|
|
65
65
|
class JudgmentClient(metaclass=SingletonMeta):
|
66
|
-
def __init__(self, judgment_api_key: str = os.getenv("JUDGMENT_API_KEY"), organization_id: str = os.getenv("JUDGMENT_ORG_ID")):
|
66
|
+
def __init__(self, judgment_api_key: Optional[str] = os.getenv("JUDGMENT_API_KEY"), organization_id: Optional[str] = os.getenv("JUDGMENT_ORG_ID")):
|
67
|
+
# Check if API key is None
|
68
|
+
if judgment_api_key is None:
|
69
|
+
raise ValueError("JUDGMENT_API_KEY cannot be None. Please provide a valid API key or set the JUDGMENT_API_KEY environment variable.")
|
70
|
+
|
71
|
+
# Check if organization ID is None
|
72
|
+
if organization_id is None:
|
73
|
+
raise ValueError("JUDGMENT_ORG_ID cannot be None. Please provide a valid organization ID or set the JUDGMENT_ORG_ID environment variable.")
|
74
|
+
|
67
75
|
self.judgment_api_key = judgment_api_key
|
68
76
|
self.organization_id = organization_id
|
69
77
|
self.eval_dataset_client = EvalDatasetClient(judgment_api_key, organization_id)
|
judgeval/run_evaluation.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
import asyncio
|
2
2
|
import requests
|
3
3
|
import time
|
4
|
+
import json
|
4
5
|
import sys
|
5
6
|
import itertools
|
6
7
|
import threading
|
@@ -362,14 +363,26 @@ def check_examples(examples: List[Example], scorers: List[Union[APIJudgmentScore
|
|
362
363
|
"""
|
363
364
|
Checks if the example contains the necessary parameters for the scorer.
|
364
365
|
"""
|
366
|
+
prompt_user = False
|
365
367
|
for scorer in scorers:
|
366
368
|
for example in examples:
|
367
369
|
missing_params = []
|
368
370
|
for param in scorer.required_params:
|
369
371
|
if getattr(example, param.value) is None:
|
370
|
-
missing_params.append(f"
|
372
|
+
missing_params.append(f"{param.value}")
|
371
373
|
if missing_params:
|
372
|
-
|
374
|
+
rprint(f"[yellow]⚠️ WARNING:[/yellow] Example is missing required parameters for scorer [bold]{scorer.score_type.value}[/bold]")
|
375
|
+
rprint(f"Missing parameters: {', '.join(missing_params)}")
|
376
|
+
rprint(f"Example: {json.dumps(example.model_dump(), indent=2)}")
|
377
|
+
rprint("-"*40)
|
378
|
+
prompt_user = True
|
379
|
+
|
380
|
+
if prompt_user:
|
381
|
+
user_input = input("Do you want to continue? (y/n)")
|
382
|
+
if user_input.lower() != "y":
|
383
|
+
sys.exit(0)
|
384
|
+
else:
|
385
|
+
rprint("[green]Continuing...[/green]")
|
373
386
|
|
374
387
|
def run_trace_eval(trace_run: TraceRun, override: bool = False, ignore_errors: bool = True, function: Optional[Callable] = None, tracer: Optional[Union[Tracer, BaseCallbackHandler]] = None, examples: Optional[List[Example]] = None) -> List[ScoringResult]:
|
375
388
|
# Call endpoint to check to see if eval run name exists (if we DON'T want to override and DO want to log results)
|
@@ -407,7 +420,7 @@ def run_trace_eval(trace_run: TraceRun, override: bool = False, ignore_errors: b
|
|
407
420
|
for i, trace in enumerate(tracer.traces):
|
408
421
|
# We set the root-level trace span with the expected tools of the Trace
|
409
422
|
trace = Trace(**trace)
|
410
|
-
trace.
|
423
|
+
trace.trace_spans[0].expected_tools = examples[i].expected_tools
|
411
424
|
new_traces.append(trace)
|
412
425
|
trace_run.traces = new_traces
|
413
426
|
tracer.traces = []
|
@@ -894,6 +907,7 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False, ignore_error
|
|
894
907
|
f"Processing evaluation '{evaluation_run.eval_name}': "
|
895
908
|
)
|
896
909
|
else:
|
910
|
+
check_examples(evaluation_run.examples, evaluation_run.scorers)
|
897
911
|
if judgment_scorers:
|
898
912
|
# Execute evaluation using Judgment API
|
899
913
|
info("Starting API evaluation")
|
@@ -12,7 +12,7 @@ from judgeval.common.logger import debug, info, warning, error
|
|
12
12
|
from judgeval.judges import JudgevalJudge
|
13
13
|
from judgeval.judges.utils import create_judge
|
14
14
|
from judgeval.constants import UNBOUNDED_SCORERS
|
15
|
-
|
15
|
+
from judgeval.data.example import ExampleParams
|
16
16
|
class JudgevalScorer:
|
17
17
|
"""
|
18
18
|
Base class for scorers in `judgeval`.
|
@@ -39,6 +39,7 @@ class JudgevalScorer:
|
|
39
39
|
evaluation_cost: Optional[float] = None # The cost of running the scorer
|
40
40
|
verbose_logs: Optional[str] = None # The verbose logs of the scorer
|
41
41
|
additional_metadata: Optional[Dict] = None # Additional metadata for the scorer
|
42
|
+
required_params: Optional[List[ExampleParams]] = None # The required parameters for the scorer
|
42
43
|
error: Optional[str] = None
|
43
44
|
success: Optional[bool] = None
|
44
45
|
|
@@ -51,6 +52,7 @@ class JudgevalScorer:
|
|
51
52
|
reason: Optional[str] = None,
|
52
53
|
success: Optional[bool] = None,
|
53
54
|
evaluation_model: Optional[str] = None,
|
55
|
+
required_params: Optional[List[ExampleParams]] = None,
|
54
56
|
strict_mode: bool = False,
|
55
57
|
async_mode: bool = True,
|
56
58
|
verbose_mode: bool = True,
|
@@ -87,6 +89,7 @@ class JudgevalScorer:
|
|
87
89
|
self.evaluation_cost = evaluation_cost
|
88
90
|
self.verbose_logs = verbose_logs
|
89
91
|
self.additional_metadata = additional_metadata
|
92
|
+
self.required_params = required_params
|
90
93
|
|
91
94
|
def _add_model(self, model: Optional[Union[str, List[str], JudgevalJudge]] = None):
|
92
95
|
"""
|
@@ -30,6 +30,7 @@ from typing import List, Optional, Tuple, Any, Mapping
|
|
30
30
|
from pydantic import BaseModel, model_serializer, Field
|
31
31
|
|
32
32
|
from judgeval.data import Example
|
33
|
+
from judgeval.data.example import ExampleParams
|
33
34
|
from judgeval.scorers import JudgevalScorer
|
34
35
|
from judgeval.scorers.utils import (
|
35
36
|
scorer_progress_meter,
|
@@ -64,6 +65,7 @@ class PromptScorer(JudgevalScorer, BaseModel):
|
|
64
65
|
async_mode: bool = True,
|
65
66
|
strict_mode: bool = False,
|
66
67
|
verbose_mode: bool = False,
|
68
|
+
required_params: Optional[List[ExampleParams]] = None,
|
67
69
|
):
|
68
70
|
# Initialize BaseModel first
|
69
71
|
BaseModel.__init__(
|
@@ -85,6 +87,7 @@ class PromptScorer(JudgevalScorer, BaseModel):
|
|
85
87
|
async_mode=async_mode,
|
86
88
|
strict_mode=strict_mode,
|
87
89
|
verbose_mode=verbose_mode,
|
90
|
+
required_params=required_params,
|
88
91
|
)
|
89
92
|
|
90
93
|
def score_example(
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: judgeval
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.41
|
4
4
|
Summary: Judgeval Package
|
5
5
|
Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
|
6
6
|
Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
|
@@ -37,11 +37,11 @@ Description-Content-Type: text/markdown
|
|
37
37
|
|
38
38
|
<br>
|
39
39
|
|
40
|
-
## [🌐 Landing Page](https://www.judgmentlabs.ai/) • [📚 Docs](https://
|
40
|
+
## [🌐 Landing Page](https://www.judgmentlabs.ai/) • [📚 Docs](https://docs.judgmentlabs.ai/introduction) • [🚀 Demos](https://www.youtube.com/@AlexShan-j3o)
|
41
41
|
|
42
42
|
[](https://x.com/JudgmentLabs)
|
43
43
|
[](https://www.linkedin.com/company/judgmentlabs)
|
44
|
-
[](https://discord.gg/
|
44
|
+
[](https://discord.gg/ZCnSXYug)
|
45
45
|
|
46
46
|
</div>
|
47
47
|
|
@@ -56,19 +56,28 @@ We support tracing agents built with LangGraph, OpenAI SDK, Anthropic, ... and a
|
|
56
56
|
Judgeval is created and maintained by [Judgment Labs](https://judgmentlabs.ai/).
|
57
57
|
|
58
58
|
## 📋 Table of Contents
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
59
|
+
- [🌐 Landing Page • 📚 Docs • 🚀 Demos](#-landing-page----docs---demos)
|
60
|
+
- [Judgeval: open-source testing, monitoring, and optimization for AI agents](#judgeval-open-source-testing-monitoring-and-optimization-for-ai-agents)
|
61
|
+
- [📋 Table of Contents](#-table-of-contents)
|
62
|
+
- [✨ Features](#-features)
|
63
|
+
- [🛠️ Installation](#️-installation)
|
64
|
+
- [🏁 Get Started](#-get-started)
|
65
|
+
- [🛰️ Tracing](#️-tracing)
|
66
|
+
- [📝 Offline Evaluations](#-offline-evaluations)
|
67
|
+
- [📡 Online Evaluations](#-online-evaluations)
|
68
|
+
- [🏢 Self-Hosting](#-self-hosting)
|
69
|
+
- [Key Features](#key-features)
|
70
|
+
- [Getting Started](#getting-started)
|
71
|
+
- [📚 Cookbooks](#-cookbooks)
|
72
|
+
- [Sample Agents](#sample-agents)
|
73
|
+
- [💰 LangGraph Financial QA Agent](#-langgraph-financial-qa-agent)
|
74
|
+
- [✈️ OpenAI Travel Agent](#️-openai-travel-agent)
|
75
|
+
- [Custom Evaluators](#custom-evaluators)
|
76
|
+
- [🔍 PII Detection](#-pii-detection)
|
77
|
+
- [📧 Cold Email Generation](#-cold-email-generation)
|
78
|
+
- [💻 Development with Cursor](#-development-with-cursor)
|
79
|
+
- [⭐ Star Us on GitHub](#-star-us-on-github)
|
80
|
+
- [❤️ Contributors](#️-contributors)
|
72
81
|
|
73
82
|
<!-- Created by https://github.com/ekalinin/github-markdown-toc -->
|
74
83
|
|
@@ -2,27 +2,27 @@ judgeval/__init__.py,sha256=x9HWt4waJwJMAqTuJSg2MezF9Zg-macEjeU-ajbly-8,330
|
|
2
2
|
judgeval/clients.py,sha256=EiTmvvWksTPyWIuMC9jz06SPY2vFzokIJUIGoScpisA,989
|
3
3
|
judgeval/constants.py,sha256=xuO-Und5c0-K3yTRn2fAkwyY2uTf8b7dGd39CPVqkSQ,5661
|
4
4
|
judgeval/evaluation_run.py,sha256=KNGtaGAwD18pDNOKF7PCMlLnQe9SpRLTs0XWFMrCiLc,6684
|
5
|
-
judgeval/judgment_client.py,sha256=
|
5
|
+
judgeval/judgment_client.py,sha256=JO3AkU-disPHQVK5g1SM-bs_EUSy8QZ3AaAj_Q2ag6s,24968
|
6
6
|
judgeval/rules.py,sha256=jkh1cXXcUf8oRY7xJUZfcQBYWn_rjUW4GvrhRt15PeU,20265
|
7
|
-
judgeval/run_evaluation.py,sha256=
|
7
|
+
judgeval/run_evaluation.py,sha256=MshtOGvWm_eGj2JamEtiMWvPjdCwrKTp9WcAUrBm2Fs,49673
|
8
8
|
judgeval/version_check.py,sha256=bvJEidB7rAeXozoUbN9Yb97QOR_s2hgvpvj74jJ5HlY,943
|
9
9
|
judgeval/common/__init__.py,sha256=7d24BRxtncpMj3AAJCj8RS7TqgjXmW777HVZH6-3sBs,289
|
10
10
|
judgeval/common/exceptions.py,sha256=U-TxHLn7oVMezsMuoYouNDb2XuS8RCggfntYf5_6u4E,565
|
11
11
|
judgeval/common/logger.py,sha256=KO75wWXCxhUHUMvLaTU31ZzOk6tkZBa7heQ7y0f-zFE,6062
|
12
12
|
judgeval/common/s3_storage.py,sha256=W8wq9S7qJZdqdBR4sk3aEZ4K3-pz40DOoolOJrWs9Vo,3768
|
13
|
-
judgeval/common/tracer.py,sha256=
|
14
|
-
judgeval/common/utils.py,sha256=
|
13
|
+
judgeval/common/tracer.py,sha256=rYNmyB3Z955xfnKmlase6gub8Xf5xz6nQefONs_Td5U,90870
|
14
|
+
judgeval/common/utils.py,sha256=sWdHfqgiF6AnKTQNmeUBfoEsddXgInI5M24t2-QYexk,34271
|
15
15
|
judgeval/data/__init__.py,sha256=GX_GloDtBB35mv3INWbSTP2r9cwCU2IeIYjzRT0SAd8,530
|
16
16
|
judgeval/data/custom_example.py,sha256=QRBqiRiZS8UgVeTRHY0r1Jzm6yAYsyg6zmHxQGxdiQs,739
|
17
17
|
judgeval/data/example.py,sha256=jcK78ff-TKNl9Qtxvbd1g61crpo-s4fWHaqyMIbQNq0,6877
|
18
18
|
judgeval/data/result.py,sha256=KfU9lhAKG_Xo2eGDm2uKVVRZpf177IDASg1cIwedJwE,3184
|
19
19
|
judgeval/data/scorer_data.py,sha256=JVlaTx1EP2jw2gh3Vgx1CSEsvIFABAN26IquKyxwiJQ,3273
|
20
20
|
judgeval/data/tool.py,sha256=eEEvGDNNYWhcQiI6cjDv3rO1VoOJJS5LWGS76Gb_gtY,1813
|
21
|
-
judgeval/data/trace.py,sha256=
|
21
|
+
judgeval/data/trace.py,sha256=S9IQunatke-Kcxi2-qXg3CtbmxBk8VGBDJzWshx7zJg,4798
|
22
22
|
judgeval/data/trace_run.py,sha256=fiB5Z5il9U9XqvksdA2DbLNd96U_Wrz8K00RuFJBy38,2324
|
23
23
|
judgeval/data/datasets/__init__.py,sha256=IdNKhQv9yYZ_op0rdBacrFaFVmiiYQ3JTzXzxOTsEVQ,176
|
24
|
-
judgeval/data/datasets/dataset.py,sha256=
|
25
|
-
judgeval/data/datasets/eval_dataset_client.py,sha256=
|
24
|
+
judgeval/data/datasets/dataset.py,sha256=pq9-A1mg2Brpjg1TufDU_eLo9sQhX0nw-UTGaf3jCXA,12952
|
25
|
+
judgeval/data/datasets/eval_dataset_client.py,sha256=LJ1bf1sZAC4ZBCRTQ1Y4VrJuNSslYBQ1y9YKuhYxwqY,15176
|
26
26
|
judgeval/integrations/langgraph.py,sha256=L9zPPWVLGL2HWuwHPqM5Kic4S7EfQ_Y1Y3YKBJNfGCA,23004
|
27
27
|
judgeval/judges/__init__.py,sha256=6X7VSwrwsdxGBNxCyapVRWGghhKOy3MVxFNMQ62kCXM,308
|
28
28
|
judgeval/judges/base_judge.py,sha256=ch_S7uBB7lyv44Lf1d7mIGFpveOO58zOkkpImKgd9_4,994
|
@@ -33,8 +33,8 @@ judgeval/judges/utils.py,sha256=vL-15_udU94JHUAiyrAvHAKMj6Fqypg01ek4YH5zVCM,2687
|
|
33
33
|
judgeval/scorers/__init__.py,sha256=VKPveyGCv5Rc0YtuT7iAxSv-M5EuikqAVeaGNnYMuWE,1340
|
34
34
|
judgeval/scorers/api_scorer.py,sha256=NQ_CrrUPhSUk1k2Q8rKpCG_TU2FT32sFEqvb-Yi54B0,2688
|
35
35
|
judgeval/scorers/exceptions.py,sha256=eGW5CuJgZ5YJBFrE4FHDSF651PO1dKAZ379mJ8gOsfo,178
|
36
|
-
judgeval/scorers/judgeval_scorer.py,sha256=
|
37
|
-
judgeval/scorers/prompt_scorer.py,sha256=
|
36
|
+
judgeval/scorers/judgeval_scorer.py,sha256=_qtXzl5aa1FH_50kVPnRfiwyCtuXPKyrGU71_3pOrBw,7288
|
37
|
+
judgeval/scorers/prompt_scorer.py,sha256=Uf_QZhytd78cInKZv8wr66Angz5sxLklP5hEEcoabq4,12001
|
38
38
|
judgeval/scorers/score.py,sha256=h4eVlbItqG8R0nQgSgeyicYSIraZV9MvV-RRaFu46mg,18762
|
39
39
|
judgeval/scorers/utils.py,sha256=iHQVTlIANbmCTXz9kTeSdOytgUZ_T74Re61ajqsk_WQ,6827
|
40
40
|
judgeval/scorers/judgeval_scorers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -62,7 +62,7 @@ judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py,sha256
|
|
62
62
|
judgeval/tracer/__init__.py,sha256=wy3DYpH8U_z0GO_K_gOSkK0tTTD-u5eLDo0T5xIBoAc,147
|
63
63
|
judgeval/utils/alerts.py,sha256=O19Xj7DA0YVjl8PWiuH4zfdZeu3yiLVvHfY8ah2wG0g,2759
|
64
64
|
judgeval/utils/data_utils.py,sha256=pB4GBWi8XoM2zSR2NlLXH5kqcQ029BVhDxaVKkdmiBY,1860
|
65
|
-
judgeval-0.0.
|
66
|
-
judgeval-0.0.
|
67
|
-
judgeval-0.0.
|
68
|
-
judgeval-0.0.
|
65
|
+
judgeval-0.0.41.dist-info/METADATA,sha256=-sO68MUEmN3s4ji7Vf1gTuPv60R7Ny6bMcuuKlFSSI8,57358
|
66
|
+
judgeval-0.0.41.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
67
|
+
judgeval-0.0.41.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
|
68
|
+
judgeval-0.0.41.dist-info/RECORD,,
|
File without changes
|
File without changes
|