judgeval 0.0.12__py3-none-any.whl → 0.0.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
judgeval/common/tracer.py CHANGED
@@ -188,8 +188,9 @@ class TraceManagerClient:
188
188
  - Saving a trace
189
189
  - Deleting a trace
190
190
  """
191
- def __init__(self, judgment_api_key: str):
191
+ def __init__(self, judgment_api_key: str, organization_id: str):
192
192
  self.judgment_api_key = judgment_api_key
193
+ self.organization_id = organization_id
193
194
 
194
195
  def fetch_trace(self, trace_id: str):
195
196
  """
@@ -199,10 +200,11 @@ class TraceManagerClient:
199
200
  JUDGMENT_TRACES_FETCH_API_URL,
200
201
  json={
201
202
  "trace_id": trace_id,
202
- "judgment_api_key": self.judgment_api_key,
203
203
  },
204
204
  headers={
205
205
  "Content-Type": "application/json",
206
+ "Authorization": f"Bearer {self.judgment_api_key}",
207
+ "X-Organization-Id": self.organization_id
206
208
  }
207
209
  )
208
210
 
@@ -225,6 +227,8 @@ class TraceManagerClient:
225
227
  json=trace_data,
226
228
  headers={
227
229
  "Content-Type": "application/json",
230
+ "Authorization": f"Bearer {self.judgment_api_key}",
231
+ "X-Organization-Id": self.organization_id
228
232
  }
229
233
  )
230
234
 
@@ -243,11 +247,12 @@ class TraceManagerClient:
243
247
  response = requests.delete(
244
248
  JUDGMENT_TRACES_DELETE_API_URL,
245
249
  json={
246
- "judgment_api_key": self.judgment_api_key,
247
250
  "trace_ids": [trace_id],
248
251
  },
249
252
  headers={
250
253
  "Content-Type": "application/json",
254
+ "Authorization": f"Bearer {self.judgment_api_key}",
255
+ "X-Organization-Id": self.organization_id
251
256
  }
252
257
  )
253
258
 
@@ -263,11 +268,12 @@ class TraceManagerClient:
263
268
  response = requests.delete(
264
269
  JUDGMENT_TRACES_DELETE_API_URL,
265
270
  json={
266
- "judgment_api_key": self.judgment_api_key,
267
271
  "trace_ids": trace_ids,
268
272
  },
269
273
  headers={
270
274
  "Content-Type": "application/json",
275
+ "Authorization": f"Bearer {self.judgment_api_key}",
276
+ "X-Organization-Id": self.organization_id
271
277
  }
272
278
  )
273
279
 
@@ -290,7 +296,7 @@ class TraceClient:
290
296
  self.span_type = None
291
297
  self._current_span: Optional[TraceEntry] = None
292
298
  self.overwrite = overwrite
293
- self.trace_manager_client = TraceManagerClient(tracer.api_key) # Manages DB operations for trace data
299
+ self.trace_manager_client = TraceManagerClient(tracer.api_key, tracer.organization_id) # Manages DB operations for trace data
294
300
 
295
301
  @contextmanager
296
302
  def span(self, name: str, span_type: SpanType = "span"):
@@ -367,6 +373,7 @@ class TraceClient:
367
373
  raise ValueError(f"Failed to load scorers: {str(e)}")
368
374
 
369
375
  eval_run = EvaluationRun(
376
+ organization_id=self.tracer.organization_id,
370
377
  log_results=log_results,
371
378
  project_name=self.project_name,
372
379
  eval_name=f"{self.name.capitalize()}-"
@@ -542,7 +549,6 @@ class TraceClient:
542
549
  # Create trace document
543
550
  trace_data = {
544
551
  "trace_id": self.trace_id,
545
- "api_key": self.tracer.api_key,
546
552
  "name": self.name,
547
553
  "project_name": self.project_name,
548
554
  "created_at": datetime.fromtimestamp(self.start_time).isoformat(),
@@ -564,6 +570,8 @@ class TraceClient:
564
570
  channel = connection.channel()
565
571
 
566
572
  channel.queue_declare(queue=RABBITMQ_QUEUE, durable=True)
573
+ trace_data["judgment_api_key"] = self.tracer.api_key
574
+ trace_data["organization_id"] = self.tracer.organization_id
567
575
 
568
576
  channel.basic_publish(
569
577
  exchange='',
@@ -589,14 +597,18 @@ class Tracer:
589
597
  cls._instance = super(Tracer, cls).__new__(cls)
590
598
  return cls._instance
591
599
 
592
- def __init__(self, api_key: str = os.getenv("JUDGMENT_API_KEY"), project_name: str = "default_project"):
600
+ def __init__(self, api_key: str = os.getenv("JUDGMENT_API_KEY"), project_name: str = "default_project", organization_id: str = os.getenv("ORGANIZATION_ID")):
593
601
  if not hasattr(self, 'initialized'):
594
602
  if not api_key:
595
603
  raise ValueError("Tracer must be configured with a Judgment API key")
596
604
 
605
+ if not organization_id:
606
+ raise ValueError("Tracer must be configured with an Organization ID")
607
+
597
608
  self.api_key: str = api_key
598
609
  self.project_name: str = project_name
599
610
  self.client: JudgmentClient = JudgmentClient(judgment_api_key=api_key)
611
+ self.organization_id: str = organization_id
600
612
  self.depth: int = 0
601
613
  self._current_trace: Optional[str] = None
602
614
  self.initialized: bool = True
judgeval/constants.py CHANGED
@@ -36,7 +36,9 @@ ROOT_API = os.getenv("JUDGMENT_API_URL", "https://api.judgmentlabs.ai")
36
36
  JUDGMENT_EVAL_API_URL = f"{ROOT_API}/evaluate/"
37
37
  JUDGMENT_DATASETS_PUSH_API_URL = f"{ROOT_API}/datasets/push/"
38
38
  JUDGMENT_DATASETS_PULL_API_URL = f"{ROOT_API}/datasets/pull/"
39
+ JUDGMENT_DATASETS_EXPORT_JSONL_API_URL = f"{ROOT_API}/datasets/export_jsonl/"
39
40
  JUDGMENT_DATASETS_PULL_ALL_API_URL = f"{ROOT_API}/datasets/get_all_stats/"
41
+ JUDGMENT_DATASETS_EDIT_API_URL = f"{ROOT_API}/datasets/edit/"
40
42
  JUDGMENT_EVAL_LOG_API_URL = f"{ROOT_API}/log_eval_results/"
41
43
  JUDGMENT_EVAL_FETCH_API_URL = f"{ROOT_API}/fetch_eval_results/"
42
44
  JUDGMENT_EVAL_DELETE_API_URL = f"{ROOT_API}/delete_eval_results_by_project_and_run_name/"
@@ -17,9 +17,10 @@ class EvalDataset:
17
17
  _alias: Union[str, None] = field(default=None)
18
18
  _id: Union[str, None] = field(default=None)
19
19
  judgment_api_key: str = field(default="")
20
-
20
+ organization_id: str = field(default="")
21
21
  def __init__(self,
22
22
  judgment_api_key: str = os.getenv("JUDGMENT_API_KEY"),
23
+ organization_id: str = os.getenv("ORGANIZATION_ID"),
23
24
  ground_truths: List[GroundTruthExample] = [],
24
25
  examples: List[Example] = [],
25
26
  ):
@@ -31,7 +32,7 @@ class EvalDataset:
31
32
  self._alias = None
32
33
  self._id = None
33
34
  self.judgment_api_key = judgment_api_key
34
-
35
+ self.organization_id = organization_id
35
36
 
36
37
  def add_from_json(self, file_path: str) -> None:
37
38
  debug(f"Loading dataset from JSON file: {file_path}")
@@ -162,7 +163,8 @@ class EvalDataset:
162
163
  "additional_metadata": ast.literal_eval(row["additional_metadata"]) if pd.notna(row["additional_metadata"]) else dict(),
163
164
  "tools_called": row["tools_called"].split(";") if pd.notna(row["tools_called"]) else [],
164
165
  "expected_tools": row["expected_tools"].split(";") if pd.notna(row["expected_tools"]) else [],
165
- "trace_id": row["trace_id"] if pd.notna(row["trace_id"]) else None
166
+ "trace_id": row["trace_id"] if pd.notna(row["trace_id"]) else None,
167
+ "example_id": str(row["example_id"]) if pd.notna(row["example_id"]) else None
166
168
  }
167
169
  if row["example"]:
168
170
  data["name"] = row["name"] if pd.notna(row["name"]) else None
@@ -1,5 +1,5 @@
1
1
 
2
- from typing import Optional
2
+ from typing import Optional, List
3
3
  import requests
4
4
  from rich.progress import Progress, SpinnerColumn, TextColumn
5
5
 
@@ -7,7 +7,9 @@ from judgeval.common.logger import debug, error, warning, info
7
7
  from judgeval.constants import (
8
8
  JUDGMENT_DATASETS_PUSH_API_URL,
9
9
  JUDGMENT_DATASETS_PULL_API_URL,
10
- JUDGMENT_DATASETS_PULL_ALL_API_URL
10
+ JUDGMENT_DATASETS_PULL_ALL_API_URL,
11
+ JUDGMENT_DATASETS_EDIT_API_URL,
12
+ JUDGMENT_DATASETS_EXPORT_JSONL_API_URL
11
13
  )
12
14
  from judgeval.data import Example
13
15
  from judgeval.data.datasets import EvalDataset
@@ -17,13 +19,14 @@ from judgeval.data.datasets.ground_truth import GroundTruthExample
17
19
 
18
20
 
19
21
  class EvalDatasetClient:
20
- def __init__(self, judgment_api_key: str):
22
+ def __init__(self, judgment_api_key: str, organization_id: str):
21
23
  self.judgment_api_key = judgment_api_key
24
+ self.organization_id = organization_id
22
25
 
23
26
  def create_dataset(self) -> EvalDataset:
24
27
  return EvalDataset(judgment_api_key=self.judgment_api_key)
25
28
 
26
- def push(self, dataset: EvalDataset, alias: str,overwrite: Optional[bool] = False) -> bool:
29
+ def push(self, dataset: EvalDataset, alias: str, overwrite: Optional[bool] = False) -> bool:
27
30
  debug(f"Pushing dataset with alias '{alias}' (overwrite={overwrite})")
28
31
  if overwrite:
29
32
  warning(f"Overwrite enabled for alias '{alias}'")
@@ -56,12 +59,16 @@ class EvalDatasetClient:
56
59
  "ground_truths": [g.to_dict() for g in dataset.ground_truths],
57
60
  "examples": [e.to_dict() for e in dataset.examples],
58
61
  "overwrite": overwrite,
59
- "judgment_api_key": dataset.judgment_api_key
60
62
  }
61
63
  try:
62
64
  response = requests.post(
63
65
  JUDGMENT_DATASETS_PUSH_API_URL,
64
- json=content
66
+ json=content,
67
+ headers={
68
+ "Content-Type": "application/json",
69
+ "Authorization": f"Bearer {self.judgment_api_key}",
70
+ "X-Organization-Id": self.organization_id
71
+ }
65
72
  )
66
73
  if response.status_code == 500:
67
74
  error(f"Server error during push: {content.get('message')}")
@@ -115,13 +122,17 @@ class EvalDatasetClient:
115
122
  )
116
123
  request_body = {
117
124
  "alias": alias,
118
- "judgment_api_key": self.judgment_api_key
119
125
  }
120
126
 
121
127
  try:
122
128
  response = requests.post(
123
129
  JUDGMENT_DATASETS_PULL_API_URL,
124
- json=request_body
130
+ json=request_body,
131
+ headers={
132
+ "Content-Type": "application/json",
133
+ "Authorization": f"Bearer {self.judgment_api_key}",
134
+ "X-Organization-Id": self.organization_id
135
+ }
125
136
  )
126
137
  response.raise_for_status()
127
138
  except requests.exceptions.RequestException as e:
@@ -169,13 +180,17 @@ class EvalDatasetClient:
169
180
  total=100,
170
181
  )
171
182
  request_body = {
172
- "judgment_api_key": self.judgment_api_key
173
183
  }
174
184
 
175
185
  try:
176
186
  response = requests.post(
177
187
  JUDGMENT_DATASETS_PULL_ALL_API_URL,
178
- json=request_body
188
+ json=request_body,
189
+ headers={
190
+ "Content-Type": "application/json",
191
+ "Authorization": f"Bearer {self.judgment_api_key}",
192
+ "X-Organization-Id": self.organization_id
193
+ }
179
194
  )
180
195
  response.raise_for_status()
181
196
  except requests.exceptions.RequestException as e:
@@ -191,3 +206,92 @@ class EvalDatasetClient:
191
206
  )
192
207
 
193
208
  return payload
209
+
210
+ def edit_dataset(self, alias: str, examples: List[Example], ground_truths: List[GroundTruthExample]) -> bool:
211
+ """
212
+ Edits the dataset on Judgment platform by adding new examples and ground truths
213
+
214
+ Mock request:
215
+ {
216
+ "alias": alias,
217
+ "examples": [...],
218
+ "ground_truths": [...],
219
+ "judgment_api_key": self.judgment_api_key
220
+ }
221
+ """
222
+ with Progress(
223
+ SpinnerColumn(style="rgb(106,0,255)"),
224
+ TextColumn("[progress.description]{task.description}"),
225
+ transient=False,
226
+ ) as progress:
227
+ task_id = progress.add_task(
228
+ f"Editing dataset [rgb(106,0,255)]'{alias}'[/rgb(106,0,255)] on Judgment...",
229
+ total=100,
230
+ )
231
+
232
+ content = {
233
+ "alias": alias,
234
+ "examples": [e.to_dict() for e in examples],
235
+ "ground_truths": [g.to_dict() for g in ground_truths],
236
+ "judgment_api_key": self.judgment_api_key
237
+ }
238
+
239
+ try:
240
+ response = requests.post(
241
+ JUDGMENT_DATASETS_EDIT_API_URL,
242
+ json=content,
243
+ headers={
244
+ "Content-Type": "application/json",
245
+ "Authorization": f"Bearer {self.judgment_api_key}",
246
+ "X-Organization-Id": self.organization_id
247
+ }
248
+ )
249
+ response.raise_for_status()
250
+ except requests.exceptions.RequestException as e:
251
+ error(f"Error editing dataset: {str(e)}")
252
+ return False
253
+
254
+ info(f"Successfully edited dataset '{alias}'")
255
+ return True
256
+
257
+ def export_jsonl(self, alias: str) -> requests.Response:
258
+ """Export dataset in JSONL format from Judgment platform"""
259
+ debug(f"Exporting dataset with alias '{alias}' as JSONL")
260
+ with Progress(
261
+ SpinnerColumn(style="rgb(106,0,255)"),
262
+ TextColumn("[progress.description]{task.description}"),
263
+ transient=False,
264
+ ) as progress:
265
+ task_id = progress.add_task(
266
+ f"Exporting [rgb(106,0,255)]'{alias}'[/rgb(106,0,255)] as JSONL...",
267
+ total=100,
268
+ )
269
+ try:
270
+ response = requests.post(
271
+ JUDGMENT_DATASETS_EXPORT_JSONL_API_URL,
272
+ json={"alias": alias},
273
+ headers={
274
+ "Content-Type": "application/json",
275
+ "Authorization": f"Bearer {self.judgment_api_key}",
276
+ "X-Organization-Id": self.organization_id
277
+ },
278
+ stream=True
279
+ )
280
+ response.raise_for_status()
281
+ except requests.exceptions.HTTPError as err:
282
+ if err.response.status_code == 404:
283
+ error(f"Dataset not found: {alias}")
284
+ else:
285
+ error(f"HTTP error during export: {err}")
286
+ raise
287
+ except Exception as e:
288
+ error(f"Error during export: {str(e)}")
289
+ raise
290
+
291
+ info(f"Successfully exported dataset with alias '{alias}'")
292
+ progress.update(
293
+ task_id,
294
+ description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
295
+ )
296
+
297
+ return response
judgeval/data/example.py CHANGED
@@ -4,9 +4,11 @@ Classes for representing examples in a dataset.
4
4
 
5
5
 
6
6
  from typing import TypeVar, Optional, Any, Dict, List
7
- from pydantic import BaseModel
7
+ from uuid import uuid4
8
+ from pydantic import BaseModel, Field, field_validator
8
9
  from enum import Enum
9
10
  from datetime import datetime
11
+ import time
10
12
 
11
13
 
12
14
  Input = TypeVar('Input')
@@ -33,15 +35,26 @@ class Example(BaseModel):
33
35
  tools_called: Optional[List[str]] = None
34
36
  expected_tools: Optional[List[str]] = None
35
37
  name: Optional[str] = None
36
- example_id: Optional[str] = None
38
+ example_id: str = Field(default_factory=lambda: str(uuid4()))
39
+ example_index: Optional[int] = None
37
40
  timestamp: Optional[str] = None
38
41
  trace_id: Optional[str] = None
39
42
 
43
+ @field_validator('input', 'actual_output', mode='before')
44
+ def convert_to_str(cls, value):
45
+ try:
46
+ return str(value)
47
+ except Exception:
48
+ return repr(value)
49
+
40
50
  def __init__(self, **data):
41
- super().__init__(**data)
51
+ if 'example_id' not in data:
52
+ data['example_id'] = str(uuid4())
42
53
  # Set timestamp if not provided
43
- if self.timestamp is None:
44
- self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
54
+ if 'timestamp' not in data:
55
+ data['timestamp'] = datetime.now().strftime("%Y%m%d_%H%M%S")
56
+ super().__init__(**data)
57
+
45
58
 
46
59
  def to_dict(self):
47
60
  return {
@@ -55,6 +68,7 @@ class Example(BaseModel):
55
68
  "expected_tools": self.expected_tools,
56
69
  "name": self.name,
57
70
  "example_id": self.example_id,
71
+ "example_index": self.example_index,
58
72
  "timestamp": self.timestamp,
59
73
  "trace_id": self.trace_id
60
74
  }
@@ -71,6 +85,7 @@ class Example(BaseModel):
71
85
  f"expected_tools={self.expected_tools}, "
72
86
  f"name={self.name}, "
73
87
  f"example_id={self.example_id}, "
88
+ f"example_index={self.example_index}, "
74
89
  f"timestamp={self.timestamp}, "
75
90
  f"trace_id={self.trace_id})"
76
91
  )
@@ -24,6 +24,7 @@ class EvaluationRun(BaseModel):
24
24
 
25
25
  # The user will specify whether they want log_results when they call run_eval
26
26
  log_results: bool = False # NOTE: log_results has to be set first because it is used to validate project_name and eval_name
27
+ organization_id: Optional[str] = None
27
28
  project_name: Optional[str] = None
28
29
  eval_name: Optional[str] = None
29
30
  examples: List[Example]
@@ -6,7 +6,7 @@ from typing import Optional, List, Dict, Any, Union
6
6
  import requests
7
7
 
8
8
  from judgeval.constants import ROOT_API
9
- from judgeval.data.datasets import EvalDataset, EvalDatasetClient
9
+ from judgeval.data.datasets import EvalDataset, EvalDatasetClient, GroundTruthExample
10
10
  from judgeval.data import (
11
11
  ScoringResult,
12
12
  Example
@@ -34,9 +34,10 @@ class EvalRunRequestBody(BaseModel):
34
34
 
35
35
 
36
36
  class JudgmentClient:
37
- def __init__(self, judgment_api_key: str = os.getenv("JUDGMENT_API_KEY")):
37
+ def __init__(self, judgment_api_key: str = os.getenv("JUDGMENT_API_KEY"), organization_id: str = os.getenv("ORGANIZATION_ID")):
38
38
  self.judgment_api_key = judgment_api_key
39
- self.eval_dataset_client = EvalDatasetClient(judgment_api_key)
39
+ self.organization_id = organization_id
40
+ self.eval_dataset_client = EvalDatasetClient(judgment_api_key, organization_id)
40
41
 
41
42
  # Verify API key is valid
42
43
  result, response = self._validate_api_key()
@@ -78,7 +79,8 @@ class JudgmentClient:
78
79
  model=model,
79
80
  aggregator=aggregator,
80
81
  metadata=metadata,
81
- judgment_api_key=self.judgment_api_key
82
+ judgment_api_key=self.judgment_api_key,
83
+ organization_id=self.organization_id
82
84
  )
83
85
  return run_eval(eval, override)
84
86
  except ValueError as e:
@@ -115,7 +117,8 @@ class JudgmentClient:
115
117
  model=model,
116
118
  aggregator=aggregator,
117
119
  metadata=metadata,
118
- judgment_api_key=self.judgment_api_key
120
+ judgment_api_key=self.judgment_api_key,
121
+ organization_id=self.organization_id
119
122
  )
120
123
  return run_eval(evaluation_run)
121
124
  except ValueError as e:
@@ -164,6 +167,11 @@ class JudgmentClient:
164
167
  """
165
168
  return self.eval_dataset_client.pull_all_user_dataset_stats()
166
169
 
170
+ def edit_dataset(self, alias: str, examples: List[Example], ground_truths: List[GroundTruthExample]) -> bool:
171
+ """
172
+ Edits the dataset on Judgment platform by adding new examples and ground truths
173
+ """
174
+ return self.eval_dataset_client.edit_dataset(alias, examples, ground_truths)
167
175
 
168
176
  # Maybe add option where you can pass in the EvaluationRun object and it will pull the eval results from the backend
169
177
  def pull_eval(self, project_name: str, eval_run_name: str) -> List[Dict[str, Union[str, List[ScoringResult]]]]:
@@ -182,6 +190,11 @@ class JudgmentClient:
182
190
  eval_name=eval_run_name,
183
191
  judgment_api_key=self.judgment_api_key)
184
192
  eval_run = requests.post(JUDGMENT_EVAL_FETCH_API_URL,
193
+ headers={
194
+ "Content-Type": "application/json",
195
+ "Authorization": f"Bearer {self.judgment_api_key}",
196
+ "X-Organization-Id": self.organization_id
197
+ },
185
198
  json=eval_run_request_body.model_dump())
186
199
  if eval_run.status_code != requests.codes.ok:
187
200
  raise ValueError(f"Error fetching eval results: {eval_run.json()}")
@@ -213,6 +226,8 @@ class JudgmentClient:
213
226
  json=eval_run_request_body.model_dump(),
214
227
  headers={
215
228
  "Content-Type": "application/json",
229
+ "Authorization": f"Bearer {self.judgment_api_key}",
230
+ "X-Organization-Id": self.organization_id
216
231
  })
217
232
  if response.status_code != requests.codes.ok:
218
233
  raise ValueError(f"Error deleting eval results: {response.json()}")
@@ -231,10 +246,12 @@ class JudgmentClient:
231
246
  response = requests.delete(JUDGMENT_EVAL_DELETE_PROJECT_API_URL,
232
247
  json={
233
248
  "project_name": project_name,
234
- "judgment_api_key": self.judgment_api_key
249
+ "judgment_api_key": self.judgment_api_key,
235
250
  },
236
251
  headers={
237
252
  "Content-Type": "application/json",
253
+ "Authorization": f"Bearer {self.judgment_api_key}",
254
+ "X-Organization-Id": self.organization_id
238
255
  })
239
256
  if response.status_code != requests.codes.ok:
240
257
  raise ValueError(f"Error deleting eval results: {response.json()}")
@@ -246,7 +263,11 @@ class JudgmentClient:
246
263
  """
247
264
  response = requests.post(
248
265
  f"{ROOT_API}/validate_api_key/",
249
- json={"api_key": self.judgment_api_key}
266
+ headers={
267
+ "Content-Type": "application/json",
268
+ "Authorization": f"Bearer {self.judgment_api_key}",
269
+ },
270
+ json={} # Empty body now
250
271
  )
251
272
  if response.status_code == 200:
252
273
  return True, response.json()
@@ -268,12 +289,16 @@ class JudgmentClient:
268
289
  """
269
290
  request_body = {
270
291
  "slug": slug,
271
- "judgment_api_key": self.judgment_api_key
272
292
  }
273
293
 
274
294
  response = requests.post(
275
295
  f"{ROOT_API}/fetch_scorer/",
276
- json=request_body
296
+ json=request_body,
297
+ headers={
298
+ "Content-Type": "application/json",
299
+ "Authorization": f"Bearer {self.judgment_api_key}",
300
+ "X-Organization-Id": self.organization_id
301
+ }
277
302
  )
278
303
 
279
304
  if response.status_code == 500:
@@ -306,13 +331,17 @@ class JudgmentClient:
306
331
  "name": scorer.name,
307
332
  "conversation": scorer.conversation,
308
333
  "options": scorer.options,
309
- "judgment_api_key": self.judgment_api_key,
310
334
  "slug": slug
311
335
  }
312
336
 
313
337
  response = requests.post(
314
338
  f"{ROOT_API}/save_scorer/",
315
- json=request_body
339
+ json=request_body,
340
+ headers={
341
+ "Content-Type": "application/json",
342
+ "Authorization": f"Bearer {self.judgment_api_key}",
343
+ "X-Organization-Id": self.organization_id
344
+ }
316
345
  )
317
346
 
318
347
  if response.status_code == 500:
@@ -47,7 +47,13 @@ def execute_api_eval(evaluation_run: EvaluationRun) -> List[Dict]:
47
47
  try:
48
48
  # submit API request to execute evals
49
49
  payload = evaluation_run.model_dump(warnings=False)
50
- response = requests.post(JUDGMENT_EVAL_API_URL, json=payload)
50
+ response = requests.post(
51
+ JUDGMENT_EVAL_API_URL, headers={
52
+ "Content-Type": "application/json",
53
+ "Authorization": f"Bearer {evaluation_run.judgment_api_key}",
54
+ "X-Organization-Id": evaluation_run.organization_id
55
+ },
56
+ json=payload)
51
57
  response_data = response.json()
52
58
  except Exception as e:
53
59
  error(f"Error: {e}")
@@ -135,7 +141,7 @@ def check_missing_scorer_data(results: List[ScoringResult]) -> List[ScoringResul
135
141
  return results
136
142
 
137
143
 
138
- def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_key: str) -> None:
144
+ def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_key: str, organization_id: str) -> None:
139
145
  """
140
146
  Checks if an evaluation run name already exists for a given project.
141
147
 
@@ -151,6 +157,11 @@ def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_k
151
157
  try:
152
158
  response = requests.post(
153
159
  f"{ROOT_API}/eval-run-name-exists/",
160
+ headers={
161
+ "Content-Type": "application/json",
162
+ "Authorization": f"Bearer {judgment_api_key}",
163
+ "X-Organization-Id": organization_id
164
+ },
154
165
  json={
155
166
  "eval_name": eval_name,
156
167
  "project_name": project_name,
@@ -188,9 +199,13 @@ def log_evaluation_results(merged_results: List[ScoringResult], evaluation_run:
188
199
  try:
189
200
  res = requests.post(
190
201
  JUDGMENT_EVAL_LOG_API_URL,
202
+ headers={
203
+ "Content-Type": "application/json",
204
+ "Authorization": f"Bearer {evaluation_run.judgment_api_key}",
205
+ "X-Organization-Id": evaluation_run.organization_id
206
+ },
191
207
  json={
192
208
  "results": [result.to_dict() for result in merged_results],
193
- "judgment_api_key": evaluation_run.judgment_api_key,
194
209
  "project_name": evaluation_run.project_name,
195
210
  "eval_name": evaluation_run.eval_name,
196
211
  }
@@ -241,18 +256,17 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
241
256
  check_eval_run_name_exists(
242
257
  evaluation_run.eval_name,
243
258
  evaluation_run.project_name,
244
- evaluation_run.judgment_api_key
259
+ evaluation_run.judgment_api_key,
260
+ evaluation_run.organization_id
245
261
  )
246
262
 
247
263
  # Set example IDs if not already set
248
264
  debug("Initializing examples with IDs and timestamps")
249
265
  for idx, example in enumerate(evaluation_run.examples):
250
- if example.example_id is None:
251
- example.example_id = idx
252
- debug(f"Set example ID {idx} for input: {example.input[:50]}...")
266
+ example.example_index = idx # Set numeric index
253
267
  example.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
254
268
  with example_logging_context(example.timestamp, example.example_id):
255
- debug(f"Initialized example {example.example_id}")
269
+ debug(f"Initialized example {example.example_id} (index: {example.example_index})")
256
270
  debug(f"Input: {example.input}")
257
271
  debug(f"Actual output: {example.actual_output}")
258
272
  if example.expected_output:
@@ -301,6 +315,7 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
301
315
  aggregator=evaluation_run.aggregator,
302
316
  metadata=evaluation_run.metadata,
303
317
  judgment_api_key=evaluation_run.judgment_api_key,
318
+ organization_id=evaluation_run.organization_id,
304
319
  log_results=evaluation_run.log_results
305
320
  )
306
321
  debug("Sending request to Judgment API")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.0.12
3
+ Version: 0.0.14
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -1,22 +1,22 @@
1
1
  judgeval/__init__.py,sha256=xiiG4CkeaOtey4fusCd9CBz0BVqzTIbV-K2EFIU0rUM,283
2
2
  judgeval/clients.py,sha256=Ns5ljrgPPXUMo7fSPJxO12H64lcPyKeQPIVG_RMi2cM,1162
3
- judgeval/constants.py,sha256=oL3kWHg9CzQJiTInDTgJgxRhF3fgylhvEVP360UqG8A,2654
4
- judgeval/evaluation_run.py,sha256=ev-IbL34SwRv8lwB4KHfYag1jYo6b049R8mmwNBqmnM,5923
5
- judgeval/judgment_client.py,sha256=thmSXi2essIlmd_j5SjlBw9_8qJJp6N3djoWdLaMrj0,13770
6
- judgeval/run_evaluation.py,sha256=YOQ6s9RuUrXPTgoYexf7r6Hl1QKIMSTdvHl9kw-ZMzw,20103
3
+ judgeval/constants.py,sha256=43hGesvBbX1uzc4KXvjLCVdd6cyZRMSnEJp11oA7h74,2794
4
+ judgeval/evaluation_run.py,sha256=59lG8AUFTKqbY_JVEEA0I093-Pmiy0ERYDK5BuXuEGg,5965
5
+ judgeval/judgment_client.py,sha256=ryGT3A9-Him6oco3WvuHbjB-FVvAR3wCiiGz03eO_Q4,15409
6
+ judgeval/run_evaluation.py,sha256=Cc7BS07WyqsNpQ38HdMdRI782N3DANjM8UcIq9AwaGA,20769
7
7
  judgeval/common/__init__.py,sha256=7d24BRxtncpMj3AAJCj8RS7TqgjXmW777HVZH6-3sBs,289
8
8
  judgeval/common/exceptions.py,sha256=U-TxHLn7oVMezsMuoYouNDb2XuS8RCggfntYf5_6u4E,565
9
9
  judgeval/common/logger.py,sha256=QXN3UMymmKu2iMEMEgATLBnMDjGr_pE2iOSEFoICgg8,6092
10
- judgeval/common/tracer.py,sha256=1WmHF5dGT-fesskT8BH39BZ65eQ9WURN49yGg9A6YKM,32397
10
+ judgeval/common/tracer.py,sha256=qam2suh-0_Cu_B7AWg3AMfEo2TisRZVY1SnAfqhiFQo,33211
11
11
  judgeval/common/utils.py,sha256=3WRyyX0tvnnj_VAVlEdtZrfzyWj6zfX04xdpCtE1m5Y,33736
12
12
  judgeval/data/__init__.py,sha256=YferxwmUqoBi18hrdgro0BD0h4pt20LAqISeUzGMcVU,474
13
13
  judgeval/data/api_example.py,sha256=vwWFbI6eJr5VgURCRbuSiMtEXLUbTCih_BcaqEBy-pg,4108
14
- judgeval/data/example.py,sha256=lymGZ3jG818-r2vyFunt6OLFrhESOyJnbhao_ljTjlA,2471
14
+ judgeval/data/example.py,sha256=Rd-eDEM-giYfkfsGh_PBS2wwl15QlQPzbMV-J64Yj5E,2991
15
15
  judgeval/data/result.py,sha256=8FIO-bFKPegZuByKRjA2_sumjb8oGWQ5ZeQ1RVz5z2w,4393
16
16
  judgeval/data/scorer_data.py,sha256=pYljblCPZrlMIv5Eg7R-clnmsqzUBAwokKjZpwa0DXE,3280
17
17
  judgeval/data/datasets/__init__.py,sha256=eO6ayeM_bTGwIt0eDSlTBIIBvXvIWRWWSfYZrZROPiQ,265
18
- judgeval/data/datasets/dataset.py,sha256=AGdU21vZ4iVjqbjQ7JY-u29FzJrdDFTgdvhzvYVJNyo,11833
19
- judgeval/data/datasets/eval_dataset_client.py,sha256=TaCDzymGFNFjGRrieEdQB8dT8xqNPpsEi2XLGFyrJno,7113
18
+ judgeval/data/datasets/dataset.py,sha256=KdAY0KRUB2jxcGmc1XXXheFFcPsGFOIGY-kTwBNQS_Y,12080
19
+ judgeval/data/datasets/eval_dataset_client.py,sha256=DzxWQIiHlbpg6FpmWY6brcSP_h_rGcztk2A_6tQNFys,11411
20
20
  judgeval/data/datasets/ground_truth.py,sha256=OTBs3VZe-Wp0vEXEsq14GPZHYtpWT16bhGQTycIvkKc,2057
21
21
  judgeval/data/datasets/utils.py,sha256=lQxyl7mevct7JcDSyIrU_8QOzT-EYPWEvoUiAeOdeek,2502
22
22
  judgeval/judges/__init__.py,sha256=tyQ5KY88Kp1Ctfw2IJxnVEpy8DnFCtmy04JdPOpp-As,339
@@ -78,7 +78,7 @@ judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarizat
78
78
  judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/__init__.py,sha256=JUB3TMqS1OHr6PqpIGqkyiBNbyfUaw7lZuUATjU3_ek,168
79
79
  judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/tool_correctness_scorer.py,sha256=CYGRJY5EuyICYzHrmFdLykwXakX8AC7G3Bhj7p6szfY,5493
80
80
  judgeval/tracer/__init__.py,sha256=wy3DYpH8U_z0GO_K_gOSkK0tTTD-u5eLDo0T5xIBoAc,147
81
- judgeval-0.0.12.dist-info/METADATA,sha256=QabQInkXXIceknwYzcLrqn9YbGk7nURNgseoD2TfM24,1283
82
- judgeval-0.0.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
83
- judgeval-0.0.12.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
84
- judgeval-0.0.12.dist-info/RECORD,,
81
+ judgeval-0.0.14.dist-info/METADATA,sha256=ZmCAECDNWwzpuES1slYKWcY_U-SMOsjaOdtSoj6wu0I,1283
82
+ judgeval-0.0.14.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
83
+ judgeval-0.0.14.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
84
+ judgeval-0.0.14.dist-info/RECORD,,