judgeval 0.0.13__py3-none-any.whl → 0.0.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval/common/tracer.py +19 -30
- judgeval/data/datasets/dataset.py +3 -2
- judgeval/data/datasets/eval_dataset_client.py +16 -9
- judgeval/data/example.py +8 -1
- judgeval/evaluation_run.py +1 -0
- judgeval/judgment_client.py +18 -12
- judgeval/run_evaluation.py +10 -6
- {judgeval-0.0.13.dist-info → judgeval-0.0.14.dist-info}/METADATA +1 -1
- {judgeval-0.0.13.dist-info → judgeval-0.0.14.dist-info}/RECORD +11 -11
- {judgeval-0.0.13.dist-info → judgeval-0.0.14.dist-info}/WHEEL +0 -0
- {judgeval-0.0.13.dist-info → judgeval-0.0.14.dist-info}/licenses/LICENSE.md +0 -0
judgeval/common/tracer.py
CHANGED
@@ -188,8 +188,9 @@ class TraceManagerClient:
|
|
188
188
|
- Saving a trace
|
189
189
|
- Deleting a trace
|
190
190
|
"""
|
191
|
-
def __init__(self, judgment_api_key: str):
|
191
|
+
def __init__(self, judgment_api_key: str, organization_id: str):
|
192
192
|
self.judgment_api_key = judgment_api_key
|
193
|
+
self.organization_id = organization_id
|
193
194
|
|
194
195
|
def fetch_trace(self, trace_id: str):
|
195
196
|
"""
|
@@ -199,11 +200,11 @@ class TraceManagerClient:
|
|
199
200
|
JUDGMENT_TRACES_FETCH_API_URL,
|
200
201
|
json={
|
201
202
|
"trace_id": trace_id,
|
202
|
-
# "judgment_api_key": self.judgment_api_key,
|
203
203
|
},
|
204
204
|
headers={
|
205
205
|
"Content-Type": "application/json",
|
206
|
-
"Authorization": f"Bearer {self.judgment_api_key}"
|
206
|
+
"Authorization": f"Bearer {self.judgment_api_key}",
|
207
|
+
"X-Organization-Id": self.organization_id
|
207
208
|
}
|
208
209
|
)
|
209
210
|
|
@@ -226,7 +227,8 @@ class TraceManagerClient:
|
|
226
227
|
json=trace_data,
|
227
228
|
headers={
|
228
229
|
"Content-Type": "application/json",
|
229
|
-
"Authorization": f"Bearer {self.judgment_api_key}"
|
230
|
+
"Authorization": f"Bearer {self.judgment_api_key}",
|
231
|
+
"X-Organization-Id": self.organization_id
|
230
232
|
}
|
231
233
|
)
|
232
234
|
|
@@ -245,12 +247,12 @@ class TraceManagerClient:
|
|
245
247
|
response = requests.delete(
|
246
248
|
JUDGMENT_TRACES_DELETE_API_URL,
|
247
249
|
json={
|
248
|
-
"judgment_api_key": self.judgment_api_key,
|
249
250
|
"trace_ids": [trace_id],
|
250
251
|
},
|
251
252
|
headers={
|
252
253
|
"Content-Type": "application/json",
|
253
|
-
"Authorization": f"Bearer {self.judgment_api_key}"
|
254
|
+
"Authorization": f"Bearer {self.judgment_api_key}",
|
255
|
+
"X-Organization-Id": self.organization_id
|
254
256
|
}
|
255
257
|
)
|
256
258
|
|
@@ -266,12 +268,12 @@ class TraceManagerClient:
|
|
266
268
|
response = requests.delete(
|
267
269
|
JUDGMENT_TRACES_DELETE_API_URL,
|
268
270
|
json={
|
269
|
-
# "judgment_api_key": self.judgment_api_key,
|
270
271
|
"trace_ids": trace_ids,
|
271
272
|
},
|
272
273
|
headers={
|
273
274
|
"Content-Type": "application/json",
|
274
|
-
"Authorization": f"Bearer {self.judgment_api_key}"
|
275
|
+
"Authorization": f"Bearer {self.judgment_api_key}",
|
276
|
+
"X-Organization-Id": self.organization_id
|
275
277
|
}
|
276
278
|
)
|
277
279
|
|
@@ -294,7 +296,7 @@ class TraceClient:
|
|
294
296
|
self.span_type = None
|
295
297
|
self._current_span: Optional[TraceEntry] = None
|
296
298
|
self.overwrite = overwrite
|
297
|
-
self.trace_manager_client = TraceManagerClient(tracer.api_key) # Manages DB operations for trace data
|
299
|
+
self.trace_manager_client = TraceManagerClient(tracer.api_key, tracer.organization_id) # Manages DB operations for trace data
|
298
300
|
|
299
301
|
@contextmanager
|
300
302
|
def span(self, name: str, span_type: SpanType = "span"):
|
@@ -371,6 +373,7 @@ class TraceClient:
|
|
371
373
|
raise ValueError(f"Failed to load scorers: {str(e)}")
|
372
374
|
|
373
375
|
eval_run = EvaluationRun(
|
376
|
+
organization_id=self.tracer.organization_id,
|
374
377
|
log_results=log_results,
|
375
378
|
project_name=self.project_name,
|
376
379
|
eval_name=f"{self.name.capitalize()}-"
|
@@ -546,7 +549,6 @@ class TraceClient:
|
|
546
549
|
# Create trace document
|
547
550
|
trace_data = {
|
548
551
|
"trace_id": self.trace_id,
|
549
|
-
"api_key": self.tracer.api_key,
|
550
552
|
"name": self.name,
|
551
553
|
"project_name": self.project_name,
|
552
554
|
"created_at": datetime.fromtimestamp(self.start_time).isoformat(),
|
@@ -568,6 +570,8 @@ class TraceClient:
|
|
568
570
|
channel = connection.channel()
|
569
571
|
|
570
572
|
channel.queue_declare(queue=RABBITMQ_QUEUE, durable=True)
|
573
|
+
trace_data["judgment_api_key"] = self.tracer.api_key
|
574
|
+
trace_data["organization_id"] = self.tracer.organization_id
|
571
575
|
|
572
576
|
channel.basic_publish(
|
573
577
|
exchange='',
|
@@ -580,25 +584,6 @@ class TraceClient:
|
|
580
584
|
|
581
585
|
self.trace_manager_client.save_trace(trace_data, empty_save)
|
582
586
|
|
583
|
-
|
584
|
-
# Save trace data by making POST request to API
|
585
|
-
response = requests.post(
|
586
|
-
JUDGMENT_TRACES_SAVE_API_URL,
|
587
|
-
json=trace_data,
|
588
|
-
headers={
|
589
|
-
"Content-Type": "application/json",
|
590
|
-
"Authorization": f"Bearer {self.tracer.api_key}" # Bearer token format
|
591
|
-
}
|
592
|
-
)
|
593
|
-
|
594
|
-
if response.status_code == HTTPStatus.BAD_REQUEST:
|
595
|
-
raise ValueError(f"Failed to save trace data: Check your Trace name for conflicts, set overwrite=True to overwrite existing traces: {response.text}")
|
596
|
-
elif response.status_code != HTTPStatus.OK:
|
597
|
-
raise ValueError(f"Failed to save trace data: {response.text}")
|
598
|
-
|
599
|
-
if not empty_save and "ui_results_url" in response.json():
|
600
|
-
rprint(f"\n🔍 You can view your trace data here: [rgb(106,0,255)]{response.json()['ui_results_url']}[/]\n")
|
601
|
-
|
602
587
|
return self.trace_id, trace_data
|
603
588
|
|
604
589
|
def delete(self):
|
@@ -612,14 +597,18 @@ class Tracer:
|
|
612
597
|
cls._instance = super(Tracer, cls).__new__(cls)
|
613
598
|
return cls._instance
|
614
599
|
|
615
|
-
def __init__(self, api_key: str = os.getenv("JUDGMENT_API_KEY"), project_name: str = "default_project"):
|
600
|
+
def __init__(self, api_key: str = os.getenv("JUDGMENT_API_KEY"), project_name: str = "default_project", organization_id: str = os.getenv("ORGANIZATION_ID")):
|
616
601
|
if not hasattr(self, 'initialized'):
|
617
602
|
if not api_key:
|
618
603
|
raise ValueError("Tracer must be configured with a Judgment API key")
|
619
604
|
|
605
|
+
if not organization_id:
|
606
|
+
raise ValueError("Tracer must be configured with an Organization ID")
|
607
|
+
|
620
608
|
self.api_key: str = api_key
|
621
609
|
self.project_name: str = project_name
|
622
610
|
self.client: JudgmentClient = JudgmentClient(judgment_api_key=api_key)
|
611
|
+
self.organization_id: str = organization_id
|
623
612
|
self.depth: int = 0
|
624
613
|
self._current_trace: Optional[str] = None
|
625
614
|
self.initialized: bool = True
|
@@ -17,9 +17,10 @@ class EvalDataset:
|
|
17
17
|
_alias: Union[str, None] = field(default=None)
|
18
18
|
_id: Union[str, None] = field(default=None)
|
19
19
|
judgment_api_key: str = field(default="")
|
20
|
-
|
20
|
+
organization_id: str = field(default="")
|
21
21
|
def __init__(self,
|
22
22
|
judgment_api_key: str = os.getenv("JUDGMENT_API_KEY"),
|
23
|
+
organization_id: str = os.getenv("ORGANIZATION_ID"),
|
23
24
|
ground_truths: List[GroundTruthExample] = [],
|
24
25
|
examples: List[Example] = [],
|
25
26
|
):
|
@@ -31,7 +32,7 @@ class EvalDataset:
|
|
31
32
|
self._alias = None
|
32
33
|
self._id = None
|
33
34
|
self.judgment_api_key = judgment_api_key
|
34
|
-
|
35
|
+
self.organization_id = organization_id
|
35
36
|
|
36
37
|
def add_from_json(self, file_path: str) -> None:
|
37
38
|
debug(f"Loading dataset from JSON file: {file_path}")
|
@@ -19,8 +19,9 @@ from judgeval.data.datasets.ground_truth import GroundTruthExample
|
|
19
19
|
|
20
20
|
|
21
21
|
class EvalDatasetClient:
|
22
|
-
def __init__(self, judgment_api_key: str):
|
22
|
+
def __init__(self, judgment_api_key: str, organization_id: str):
|
23
23
|
self.judgment_api_key = judgment_api_key
|
24
|
+
self.organization_id = organization_id
|
24
25
|
|
25
26
|
def create_dataset(self) -> EvalDataset:
|
26
27
|
return EvalDataset(judgment_api_key=self.judgment_api_key)
|
@@ -58,7 +59,6 @@ class EvalDatasetClient:
|
|
58
59
|
"ground_truths": [g.to_dict() for g in dataset.ground_truths],
|
59
60
|
"examples": [e.to_dict() for e in dataset.examples],
|
60
61
|
"overwrite": overwrite,
|
61
|
-
# "judgment_api_key": dataset.judgment_api_key
|
62
62
|
}
|
63
63
|
try:
|
64
64
|
response = requests.post(
|
@@ -66,7 +66,8 @@ class EvalDatasetClient:
|
|
66
66
|
json=content,
|
67
67
|
headers={
|
68
68
|
"Content-Type": "application/json",
|
69
|
-
"Authorization": f"Bearer {self.judgment_api_key}"
|
69
|
+
"Authorization": f"Bearer {self.judgment_api_key}",
|
70
|
+
"X-Organization-Id": self.organization_id
|
70
71
|
}
|
71
72
|
)
|
72
73
|
if response.status_code == 500:
|
@@ -121,7 +122,6 @@ class EvalDatasetClient:
|
|
121
122
|
)
|
122
123
|
request_body = {
|
123
124
|
"alias": alias,
|
124
|
-
# "judgment_api_key": self.judgment_api_key
|
125
125
|
}
|
126
126
|
|
127
127
|
try:
|
@@ -130,7 +130,8 @@ class EvalDatasetClient:
|
|
130
130
|
json=request_body,
|
131
131
|
headers={
|
132
132
|
"Content-Type": "application/json",
|
133
|
-
"Authorization": f"Bearer {self.judgment_api_key}"
|
133
|
+
"Authorization": f"Bearer {self.judgment_api_key}",
|
134
|
+
"X-Organization-Id": self.organization_id
|
134
135
|
}
|
135
136
|
)
|
136
137
|
response.raise_for_status()
|
@@ -179,7 +180,6 @@ class EvalDatasetClient:
|
|
179
180
|
total=100,
|
180
181
|
)
|
181
182
|
request_body = {
|
182
|
-
# "judgment_api_key": self.judgment_api_key
|
183
183
|
}
|
184
184
|
|
185
185
|
try:
|
@@ -188,7 +188,8 @@ class EvalDatasetClient:
|
|
188
188
|
json=request_body,
|
189
189
|
headers={
|
190
190
|
"Content-Type": "application/json",
|
191
|
-
"Authorization": f"Bearer {self.judgment_api_key}"
|
191
|
+
"Authorization": f"Bearer {self.judgment_api_key}",
|
192
|
+
"X-Organization-Id": self.organization_id
|
192
193
|
}
|
193
194
|
)
|
194
195
|
response.raise_for_status()
|
@@ -238,7 +239,12 @@ class EvalDatasetClient:
|
|
238
239
|
try:
|
239
240
|
response = requests.post(
|
240
241
|
JUDGMENT_DATASETS_EDIT_API_URL,
|
241
|
-
json=content
|
242
|
+
json=content,
|
243
|
+
headers={
|
244
|
+
"Content-Type": "application/json",
|
245
|
+
"Authorization": f"Bearer {self.judgment_api_key}",
|
246
|
+
"X-Organization-Id": self.organization_id
|
247
|
+
}
|
242
248
|
)
|
243
249
|
response.raise_for_status()
|
244
250
|
except requests.exceptions.RequestException as e:
|
@@ -266,7 +272,8 @@ class EvalDatasetClient:
|
|
266
272
|
json={"alias": alias},
|
267
273
|
headers={
|
268
274
|
"Content-Type": "application/json",
|
269
|
-
"Authorization": f"Bearer {self.judgment_api_key}"
|
275
|
+
"Authorization": f"Bearer {self.judgment_api_key}",
|
276
|
+
"X-Organization-Id": self.organization_id
|
270
277
|
},
|
271
278
|
stream=True
|
272
279
|
)
|
judgeval/data/example.py
CHANGED
@@ -5,7 +5,7 @@ Classes for representing examples in a dataset.
|
|
5
5
|
|
6
6
|
from typing import TypeVar, Optional, Any, Dict, List
|
7
7
|
from uuid import uuid4
|
8
|
-
from pydantic import BaseModel, Field
|
8
|
+
from pydantic import BaseModel, Field, field_validator
|
9
9
|
from enum import Enum
|
10
10
|
from datetime import datetime
|
11
11
|
import time
|
@@ -40,6 +40,13 @@ class Example(BaseModel):
|
|
40
40
|
timestamp: Optional[str] = None
|
41
41
|
trace_id: Optional[str] = None
|
42
42
|
|
43
|
+
@field_validator('input', 'actual_output', mode='before')
|
44
|
+
def convert_to_str(cls, value):
|
45
|
+
try:
|
46
|
+
return str(value)
|
47
|
+
except Exception:
|
48
|
+
return repr(value)
|
49
|
+
|
43
50
|
def __init__(self, **data):
|
44
51
|
if 'example_id' not in data:
|
45
52
|
data['example_id'] = str(uuid4())
|
judgeval/evaluation_run.py
CHANGED
@@ -24,6 +24,7 @@ class EvaluationRun(BaseModel):
|
|
24
24
|
|
25
25
|
# The user will specify whether they want log_results when they call run_eval
|
26
26
|
log_results: bool = False # NOTE: log_results has to be set first because it is used to validate project_name and eval_name
|
27
|
+
organization_id: Optional[str] = None
|
27
28
|
project_name: Optional[str] = None
|
28
29
|
eval_name: Optional[str] = None
|
29
30
|
examples: List[Example]
|
judgeval/judgment_client.py
CHANGED
@@ -34,9 +34,10 @@ class EvalRunRequestBody(BaseModel):
|
|
34
34
|
|
35
35
|
|
36
36
|
class JudgmentClient:
|
37
|
-
def __init__(self, judgment_api_key: str = os.getenv("JUDGMENT_API_KEY")):
|
37
|
+
def __init__(self, judgment_api_key: str = os.getenv("JUDGMENT_API_KEY"), organization_id: str = os.getenv("ORGANIZATION_ID")):
|
38
38
|
self.judgment_api_key = judgment_api_key
|
39
|
-
self.
|
39
|
+
self.organization_id = organization_id
|
40
|
+
self.eval_dataset_client = EvalDatasetClient(judgment_api_key, organization_id)
|
40
41
|
|
41
42
|
# Verify API key is valid
|
42
43
|
result, response = self._validate_api_key()
|
@@ -78,7 +79,8 @@ class JudgmentClient:
|
|
78
79
|
model=model,
|
79
80
|
aggregator=aggregator,
|
80
81
|
metadata=metadata,
|
81
|
-
judgment_api_key=self.judgment_api_key
|
82
|
+
judgment_api_key=self.judgment_api_key,
|
83
|
+
organization_id=self.organization_id
|
82
84
|
)
|
83
85
|
return run_eval(eval, override)
|
84
86
|
except ValueError as e:
|
@@ -115,7 +117,8 @@ class JudgmentClient:
|
|
115
117
|
model=model,
|
116
118
|
aggregator=aggregator,
|
117
119
|
metadata=metadata,
|
118
|
-
judgment_api_key=self.judgment_api_key
|
120
|
+
judgment_api_key=self.judgment_api_key,
|
121
|
+
organization_id=self.organization_id
|
119
122
|
)
|
120
123
|
return run_eval(evaluation_run)
|
121
124
|
except ValueError as e:
|
@@ -189,7 +192,8 @@ class JudgmentClient:
|
|
189
192
|
eval_run = requests.post(JUDGMENT_EVAL_FETCH_API_URL,
|
190
193
|
headers={
|
191
194
|
"Content-Type": "application/json",
|
192
|
-
"Authorization": f"Bearer {self.judgment_api_key}"
|
195
|
+
"Authorization": f"Bearer {self.judgment_api_key}",
|
196
|
+
"X-Organization-Id": self.organization_id
|
193
197
|
},
|
194
198
|
json=eval_run_request_body.model_dump())
|
195
199
|
if eval_run.status_code != requests.codes.ok:
|
@@ -222,7 +226,8 @@ class JudgmentClient:
|
|
222
226
|
json=eval_run_request_body.model_dump(),
|
223
227
|
headers={
|
224
228
|
"Content-Type": "application/json",
|
225
|
-
"Authorization": f"Bearer {self.judgment_api_key}"
|
229
|
+
"Authorization": f"Bearer {self.judgment_api_key}",
|
230
|
+
"X-Organization-Id": self.organization_id
|
226
231
|
})
|
227
232
|
if response.status_code != requests.codes.ok:
|
228
233
|
raise ValueError(f"Error deleting eval results: {response.json()}")
|
@@ -241,11 +246,12 @@ class JudgmentClient:
|
|
241
246
|
response = requests.delete(JUDGMENT_EVAL_DELETE_PROJECT_API_URL,
|
242
247
|
json={
|
243
248
|
"project_name": project_name,
|
244
|
-
"judgment_api_key": self.judgment_api_key
|
249
|
+
"judgment_api_key": self.judgment_api_key,
|
245
250
|
},
|
246
251
|
headers={
|
247
252
|
"Content-Type": "application/json",
|
248
|
-
"Authorization": f"Bearer {self.judgment_api_key}"
|
253
|
+
"Authorization": f"Bearer {self.judgment_api_key}",
|
254
|
+
"X-Organization-Id": self.organization_id
|
249
255
|
})
|
250
256
|
if response.status_code != requests.codes.ok:
|
251
257
|
raise ValueError(f"Error deleting eval results: {response.json()}")
|
@@ -283,7 +289,6 @@ class JudgmentClient:
|
|
283
289
|
"""
|
284
290
|
request_body = {
|
285
291
|
"slug": slug,
|
286
|
-
# "judgment_api_key": self.judgment_api_key
|
287
292
|
}
|
288
293
|
|
289
294
|
response = requests.post(
|
@@ -291,7 +296,8 @@ class JudgmentClient:
|
|
291
296
|
json=request_body,
|
292
297
|
headers={
|
293
298
|
"Content-Type": "application/json",
|
294
|
-
"Authorization": f"Bearer {self.judgment_api_key}"
|
299
|
+
"Authorization": f"Bearer {self.judgment_api_key}",
|
300
|
+
"X-Organization-Id": self.organization_id
|
295
301
|
}
|
296
302
|
)
|
297
303
|
|
@@ -325,7 +331,6 @@ class JudgmentClient:
|
|
325
331
|
"name": scorer.name,
|
326
332
|
"conversation": scorer.conversation,
|
327
333
|
"options": scorer.options,
|
328
|
-
# "judgment_api_key": self.judgment_api_key,
|
329
334
|
"slug": slug
|
330
335
|
}
|
331
336
|
|
@@ -334,7 +339,8 @@ class JudgmentClient:
|
|
334
339
|
json=request_body,
|
335
340
|
headers={
|
336
341
|
"Content-Type": "application/json",
|
337
|
-
"Authorization": f"Bearer {self.judgment_api_key}"
|
342
|
+
"Authorization": f"Bearer {self.judgment_api_key}",
|
343
|
+
"X-Organization-Id": self.organization_id
|
338
344
|
}
|
339
345
|
)
|
340
346
|
|
judgeval/run_evaluation.py
CHANGED
@@ -50,7 +50,8 @@ def execute_api_eval(evaluation_run: EvaluationRun) -> List[Dict]:
|
|
50
50
|
response = requests.post(
|
51
51
|
JUDGMENT_EVAL_API_URL, headers={
|
52
52
|
"Content-Type": "application/json",
|
53
|
-
"Authorization": f"Bearer {evaluation_run.judgment_api_key}"
|
53
|
+
"Authorization": f"Bearer {evaluation_run.judgment_api_key}",
|
54
|
+
"X-Organization-Id": evaluation_run.organization_id
|
54
55
|
},
|
55
56
|
json=payload)
|
56
57
|
response_data = response.json()
|
@@ -140,7 +141,7 @@ def check_missing_scorer_data(results: List[ScoringResult]) -> List[ScoringResul
|
|
140
141
|
return results
|
141
142
|
|
142
143
|
|
143
|
-
def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_key: str) -> None:
|
144
|
+
def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_key: str, organization_id: str) -> None:
|
144
145
|
"""
|
145
146
|
Checks if an evaluation run name already exists for a given project.
|
146
147
|
|
@@ -158,7 +159,8 @@ def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_k
|
|
158
159
|
f"{ROOT_API}/eval-run-name-exists/",
|
159
160
|
headers={
|
160
161
|
"Content-Type": "application/json",
|
161
|
-
"Authorization": f"Bearer {judgment_api_key}"
|
162
|
+
"Authorization": f"Bearer {judgment_api_key}",
|
163
|
+
"X-Organization-Id": organization_id
|
162
164
|
},
|
163
165
|
json={
|
164
166
|
"eval_name": eval_name,
|
@@ -199,11 +201,11 @@ def log_evaluation_results(merged_results: List[ScoringResult], evaluation_run:
|
|
199
201
|
JUDGMENT_EVAL_LOG_API_URL,
|
200
202
|
headers={
|
201
203
|
"Content-Type": "application/json",
|
202
|
-
"Authorization": f"Bearer {evaluation_run.judgment_api_key}"
|
204
|
+
"Authorization": f"Bearer {evaluation_run.judgment_api_key}",
|
205
|
+
"X-Organization-Id": evaluation_run.organization_id
|
203
206
|
},
|
204
207
|
json={
|
205
208
|
"results": [result.to_dict() for result in merged_results],
|
206
|
-
"judgment_api_key": evaluation_run.judgment_api_key,
|
207
209
|
"project_name": evaluation_run.project_name,
|
208
210
|
"eval_name": evaluation_run.eval_name,
|
209
211
|
}
|
@@ -254,7 +256,8 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
|
|
254
256
|
check_eval_run_name_exists(
|
255
257
|
evaluation_run.eval_name,
|
256
258
|
evaluation_run.project_name,
|
257
|
-
evaluation_run.judgment_api_key
|
259
|
+
evaluation_run.judgment_api_key,
|
260
|
+
evaluation_run.organization_id
|
258
261
|
)
|
259
262
|
|
260
263
|
# Set example IDs if not already set
|
@@ -312,6 +315,7 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
|
|
312
315
|
aggregator=evaluation_run.aggregator,
|
313
316
|
metadata=evaluation_run.metadata,
|
314
317
|
judgment_api_key=evaluation_run.judgment_api_key,
|
318
|
+
organization_id=evaluation_run.organization_id,
|
315
319
|
log_results=evaluation_run.log_results
|
316
320
|
)
|
317
321
|
debug("Sending request to Judgment API")
|
@@ -1,22 +1,22 @@
|
|
1
1
|
judgeval/__init__.py,sha256=xiiG4CkeaOtey4fusCd9CBz0BVqzTIbV-K2EFIU0rUM,283
|
2
2
|
judgeval/clients.py,sha256=Ns5ljrgPPXUMo7fSPJxO12H64lcPyKeQPIVG_RMi2cM,1162
|
3
3
|
judgeval/constants.py,sha256=43hGesvBbX1uzc4KXvjLCVdd6cyZRMSnEJp11oA7h74,2794
|
4
|
-
judgeval/evaluation_run.py,sha256=
|
5
|
-
judgeval/judgment_client.py,sha256=
|
6
|
-
judgeval/run_evaluation.py,sha256=
|
4
|
+
judgeval/evaluation_run.py,sha256=59lG8AUFTKqbY_JVEEA0I093-Pmiy0ERYDK5BuXuEGg,5965
|
5
|
+
judgeval/judgment_client.py,sha256=ryGT3A9-Him6oco3WvuHbjB-FVvAR3wCiiGz03eO_Q4,15409
|
6
|
+
judgeval/run_evaluation.py,sha256=Cc7BS07WyqsNpQ38HdMdRI782N3DANjM8UcIq9AwaGA,20769
|
7
7
|
judgeval/common/__init__.py,sha256=7d24BRxtncpMj3AAJCj8RS7TqgjXmW777HVZH6-3sBs,289
|
8
8
|
judgeval/common/exceptions.py,sha256=U-TxHLn7oVMezsMuoYouNDb2XuS8RCggfntYf5_6u4E,565
|
9
9
|
judgeval/common/logger.py,sha256=QXN3UMymmKu2iMEMEgATLBnMDjGr_pE2iOSEFoICgg8,6092
|
10
|
-
judgeval/common/tracer.py,sha256=
|
10
|
+
judgeval/common/tracer.py,sha256=qam2suh-0_Cu_B7AWg3AMfEo2TisRZVY1SnAfqhiFQo,33211
|
11
11
|
judgeval/common/utils.py,sha256=3WRyyX0tvnnj_VAVlEdtZrfzyWj6zfX04xdpCtE1m5Y,33736
|
12
12
|
judgeval/data/__init__.py,sha256=YferxwmUqoBi18hrdgro0BD0h4pt20LAqISeUzGMcVU,474
|
13
13
|
judgeval/data/api_example.py,sha256=vwWFbI6eJr5VgURCRbuSiMtEXLUbTCih_BcaqEBy-pg,4108
|
14
|
-
judgeval/data/example.py,sha256=
|
14
|
+
judgeval/data/example.py,sha256=Rd-eDEM-giYfkfsGh_PBS2wwl15QlQPzbMV-J64Yj5E,2991
|
15
15
|
judgeval/data/result.py,sha256=8FIO-bFKPegZuByKRjA2_sumjb8oGWQ5ZeQ1RVz5z2w,4393
|
16
16
|
judgeval/data/scorer_data.py,sha256=pYljblCPZrlMIv5Eg7R-clnmsqzUBAwokKjZpwa0DXE,3280
|
17
17
|
judgeval/data/datasets/__init__.py,sha256=eO6ayeM_bTGwIt0eDSlTBIIBvXvIWRWWSfYZrZROPiQ,265
|
18
|
-
judgeval/data/datasets/dataset.py,sha256=
|
19
|
-
judgeval/data/datasets/eval_dataset_client.py,sha256=
|
18
|
+
judgeval/data/datasets/dataset.py,sha256=KdAY0KRUB2jxcGmc1XXXheFFcPsGFOIGY-kTwBNQS_Y,12080
|
19
|
+
judgeval/data/datasets/eval_dataset_client.py,sha256=DzxWQIiHlbpg6FpmWY6brcSP_h_rGcztk2A_6tQNFys,11411
|
20
20
|
judgeval/data/datasets/ground_truth.py,sha256=OTBs3VZe-Wp0vEXEsq14GPZHYtpWT16bhGQTycIvkKc,2057
|
21
21
|
judgeval/data/datasets/utils.py,sha256=lQxyl7mevct7JcDSyIrU_8QOzT-EYPWEvoUiAeOdeek,2502
|
22
22
|
judgeval/judges/__init__.py,sha256=tyQ5KY88Kp1Ctfw2IJxnVEpy8DnFCtmy04JdPOpp-As,339
|
@@ -78,7 +78,7 @@ judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarizat
|
|
78
78
|
judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/__init__.py,sha256=JUB3TMqS1OHr6PqpIGqkyiBNbyfUaw7lZuUATjU3_ek,168
|
79
79
|
judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/tool_correctness_scorer.py,sha256=CYGRJY5EuyICYzHrmFdLykwXakX8AC7G3Bhj7p6szfY,5493
|
80
80
|
judgeval/tracer/__init__.py,sha256=wy3DYpH8U_z0GO_K_gOSkK0tTTD-u5eLDo0T5xIBoAc,147
|
81
|
-
judgeval-0.0.
|
82
|
-
judgeval-0.0.
|
83
|
-
judgeval-0.0.
|
84
|
-
judgeval-0.0.
|
81
|
+
judgeval-0.0.14.dist-info/METADATA,sha256=ZmCAECDNWwzpuES1slYKWcY_U-SMOsjaOdtSoj6wu0I,1283
|
82
|
+
judgeval-0.0.14.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
83
|
+
judgeval-0.0.14.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
|
84
|
+
judgeval-0.0.14.dist-info/RECORD,,
|
File without changes
|
File without changes
|