judgeval 0.0.35__py3-none-any.whl → 0.0.36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -232,11 +232,17 @@ class JudgmentClient(metaclass=SingletonMeta):
232
232
  dataset.judgment_api_key = self.judgment_api_key
233
233
  return self.eval_dataset_client.push(dataset, alias, project_name, overwrite)
234
234
 
235
- def append_dataset(self, alias: str, examples: List[Example], project_name: str) -> bool:
235
+ def append_example_dataset(self, alias: str, examples: List[Example], project_name: str) -> bool:
236
236
  """
237
237
  Appends an `EvalDataset` to the Judgment platform for storage.
238
238
  """
239
- return self.eval_dataset_client.append(alias, examples, project_name)
239
+ return self.eval_dataset_client.append_examples(alias, examples, project_name)
240
+
241
+ def append_sequence_dataset(self, alias: str, sequences: List[Sequence], project_name: str) -> bool:
242
+ """
243
+ Appends a `Sequence` to the Judgment platform for storage.
244
+ """
245
+ return self.eval_dataset_client.append_sequences(alias, sequences, project_name)
240
246
 
241
247
  def pull_dataset(self, alias: str, project_name: str) -> EvalDataset:
242
248
  """
@@ -198,6 +198,40 @@ def check_missing_scorer_data(results: List[ScoringResult]) -> List[ScoringResul
198
198
  )
199
199
  return results
200
200
 
201
+ def check_experiment_type(eval_name: str, project_name: str, judgment_api_key: str, organization_id: str, is_sequence: bool) -> None:
202
+ """
203
+ Checks if the current experiment, if one exists, has the same type (examples of sequences)
204
+ """
205
+ try:
206
+ response = requests.post(
207
+ f"{ROOT_API}/check_experiment_type/",
208
+ headers={
209
+ "Content-Type": "application/json",
210
+ "Authorization": f"Bearer {judgment_api_key}",
211
+ "X-Organization-Id": organization_id
212
+ },
213
+ json={
214
+ "eval_name": eval_name,
215
+ "project_name": project_name,
216
+ "judgment_api_key": judgment_api_key,
217
+ "is_sequence": is_sequence
218
+ },
219
+ verify=True
220
+ )
221
+
222
+ if response.status_code == 422:
223
+ error(f"{response.json()}")
224
+ raise ValueError(f"{response.json()}")
225
+
226
+ if not response.ok:
227
+ response_data = response.json()
228
+ error_message = response_data.get('detail', 'An unknown error occurred.')
229
+ error(f"Error checking eval run name: {error_message}")
230
+ raise JudgmentAPIError(error_message)
231
+
232
+ except requests.exceptions.RequestException as e:
233
+ error(f"Failed to check if experiment type exists: {str(e)}")
234
+ raise JudgmentAPIError(f"Failed to check if experiment type exists: {str(e)}")
201
235
 
202
236
  def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_key: str, organization_id: str) -> None:
203
237
  """
@@ -264,7 +298,7 @@ def log_evaluation_results(merged_results: List[ScoringResult], run: Union[Evalu
264
298
  "X-Organization-Id": run.organization_id
265
299
  },
266
300
  json={
267
- "results": [result.model_dump(warnings=False) for result in merged_results],
301
+ "results": merged_results,
268
302
  "run": run.model_dump(warnings=False)
269
303
  },
270
304
  verify=True
@@ -318,21 +352,18 @@ def run_with_spinner(message: str, func, *args, **kwargs) -> Any:
318
352
 
319
353
  return result
320
354
 
321
- def check_examples(examples: List[Example], scorers: List[APIJudgmentScorer]) -> None:
355
+ def check_examples(examples: List[Example], scorers: List[Union[APIJudgmentScorer, JudgevalScorer]]) -> None:
322
356
  """
323
357
  Checks if the example contains the necessary parameters for the scorer.
324
358
  """
325
359
  for scorer in scorers:
326
- if isinstance(scorer, APIJudgmentScorer):
327
- for example in examples:
328
- missing_params = []
329
- for param in scorer.required_params:
330
- if getattr(example, param.value) is None:
331
- missing_params.append(f"'{param.value}'")
332
- if missing_params:
333
- # We do this because we want to inform users that an example is missing parameters for a scorer
334
- # Example ID (usually random UUID) does not provide any helpful information for the user but printing the entire example is overdoing it
335
- print(f"WARNING: Example {example.example_id} is missing the following parameters: {missing_params} for scorer {scorer.score_type.value}")
360
+ for example in examples:
361
+ missing_params = []
362
+ for param in scorer.required_params:
363
+ if getattr(example, param.value) is None:
364
+ missing_params.append(f"'{param.value}'")
365
+ if missing_params:
366
+ print(f"WARNING: Example {example.example_id} is missing the following parameters: {missing_params} for scorer {scorer.score_type.value}")
336
367
 
337
368
  def run_sequence_eval(sequence_run: SequenceRun, override: bool = False, ignore_errors: bool = True) -> List[ScoringResult]:
338
369
  # Call endpoint to check to see if eval run name exists (if we DON'T want to override and DO want to log results)
@@ -344,6 +375,17 @@ def run_sequence_eval(sequence_run: SequenceRun, override: bool = False, ignore_
344
375
  sequence_run.organization_id
345
376
  )
346
377
 
378
+ if sequence_run.append:
379
+ # Check that the current experiment, if one exists, has the same type (examples of sequences)
380
+ check_experiment_type(
381
+ sequence_run.eval_name,
382
+ sequence_run.project_name,
383
+ sequence_run.judgment_api_key,
384
+ sequence_run.organization_id,
385
+ True
386
+ )
387
+
388
+
347
389
  # Execute evaluation using Judgment API
348
390
  info("Starting API evaluation")
349
391
  try: # execute an EvaluationRun with just JudgmentScorers
@@ -359,13 +401,9 @@ def run_sequence_eval(sequence_run: SequenceRun, override: bool = False, ignore_
359
401
 
360
402
  # Convert the response data to `ScoringResult` objects
361
403
  debug("Processing API results")
362
- api_results = []
363
- for result in response_data["results"]:
364
- api_results.append(ScoringResult(**result))
365
-
366
404
  # TODO: allow for custom scorer on sequences
367
405
  if sequence_run.log_results:
368
- pretty_str = run_with_spinner("Logging Results: ", log_evaluation_results, api_results, sequence_run)
406
+ pretty_str = run_with_spinner("Logging Results: ", log_evaluation_results, response_data["results"], sequence_run)
369
407
  rprint(pretty_str)
370
408
 
371
409
 
@@ -404,6 +442,16 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False, ignore_error
404
442
  evaluation_run.organization_id
405
443
  )
406
444
 
445
+ if evaluation_run.append:
446
+ # Check that the current experiment, if one exists, has the same type (examples of sequences)
447
+ check_experiment_type(
448
+ evaluation_run.eval_name,
449
+ evaluation_run.project_name,
450
+ evaluation_run.judgment_api_key,
451
+ evaluation_run.organization_id,
452
+ False
453
+ )
454
+
407
455
  # Set example IDs if not already set
408
456
  debug("Initializing examples with IDs and timestamps")
409
457
  for idx, example in enumerate(evaluation_run.examples):
@@ -539,7 +587,8 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False, ignore_error
539
587
  # )
540
588
  # print(merged_results)
541
589
  if evaluation_run.log_results:
542
- pretty_str = run_with_spinner("Logging Results: ", log_evaluation_results, merged_results, evaluation_run)
590
+ send_results = [result.model_dump(warnings=False) for result in merged_results]
591
+ pretty_str = run_with_spinner("Logging Results: ", log_evaluation_results, send_results, evaluation_run)
543
592
  rprint(pretty_str)
544
593
 
545
594
  for i, result in enumerate(merged_results):
judgeval/scorers/score.py CHANGED
@@ -271,6 +271,7 @@ async def a_execute_scoring(
271
271
  Returns:
272
272
  List[ScoringResult]: A list of `ScoringResult` objects containing the evaluation results.
273
273
  """
274
+
274
275
  semaphore = asyncio.Semaphore(max_concurrent)
275
276
 
276
277
  async def execute_with_semaphore(func: Callable, *args, **kwargs):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.0.35
3
+ Version: 0.0.36
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -11,7 +11,6 @@ Classifier: Operating System :: OS Independent
11
11
  Classifier: Programming Language :: Python :: 3
12
12
  Requires-Python: >=3.11
13
13
  Requires-Dist: anthropic
14
- Requires-Dist: boto3==1.38.3
15
14
  Requires-Dist: fastapi
16
15
  Requires-Dist: google-genai
17
16
  Requires-Dist: langchain
@@ -1,16 +1,16 @@
1
1
  judgeval/__init__.py,sha256=x9HWt4waJwJMAqTuJSg2MezF9Zg-macEjeU-ajbly-8,330
2
2
  judgeval/clients.py,sha256=6VQmEqmfCngUdS2MuPBIpHvtDFqOENm8-_BmMvjLyRQ,944
3
- judgeval/constants.py,sha256=_XmVAkebMyGrDvvanAVlMgVd4p6MLHdEVsTQFI0kz1k,5411
3
+ judgeval/constants.py,sha256=Gc1xpft2BkFRUIjj-puCzILsG1EUOEs8V-bUWP9b1WM,5508
4
4
  judgeval/evaluation_run.py,sha256=WGzx-Ug2qhSmunFo8NrmSstBRsOUc5KpKq0Lc51rqsM,6739
5
- judgeval/judgment_client.py,sha256=brRYmphZR-2IUre9kdOhfse1mYDilcIqUzzH21ROAdk,22208
5
+ judgeval/judgment_client.py,sha256=slYLE80FqEIsqgShMtML4I64p-RrEfELbMgZnlXhxP0,22515
6
6
  judgeval/rules.py,sha256=jkh1cXXcUf8oRY7xJUZfcQBYWn_rjUW4GvrhRt15PeU,20265
7
- judgeval/run_evaluation.py,sha256=elMpFHahyeukKKa09fmJM3c_afwJ00mbZRqm18l5f00,28481
7
+ judgeval/run_evaluation.py,sha256=1G-KYNHowfMKTD5j3cDd4EuEme00AqZkn6wpP3zMKUo,30241
8
8
  judgeval/version_check.py,sha256=bvJEidB7rAeXozoUbN9Yb97QOR_s2hgvpvj74jJ5HlY,943
9
9
  judgeval/common/__init__.py,sha256=7d24BRxtncpMj3AAJCj8RS7TqgjXmW777HVZH6-3sBs,289
10
10
  judgeval/common/exceptions.py,sha256=U-TxHLn7oVMezsMuoYouNDb2XuS8RCggfntYf5_6u4E,565
11
11
  judgeval/common/logger.py,sha256=KO75wWXCxhUHUMvLaTU31ZzOk6tkZBa7heQ7y0f-zFE,6062
12
12
  judgeval/common/s3_storage.py,sha256=W8wq9S7qJZdqdBR4sk3aEZ4K3-pz40DOoolOJrWs9Vo,3768
13
- judgeval/common/tracer.py,sha256=YsObK8VQXp1DDbU9xncU8NjuY-JUI54BqmG4olezrZc,92507
13
+ judgeval/common/tracer.py,sha256=bkN0Jol0mNosJeEJMtjM54jJDhEYL3OSBtkS4FB1m8E,105461
14
14
  judgeval/common/utils.py,sha256=LUQV5JfDr6wj7xHAJoNq-gofNZ6mjXbeKrGKzBME1KM,33533
15
15
  judgeval/data/__init__.py,sha256=xuKx_KCVHGp6CXvQuVmKl3v7pJp-qDaz0NccKxwjtO0,481
16
16
  judgeval/data/custom_example.py,sha256=QRBqiRiZS8UgVeTRHY0r1Jzm6yAYsyg6zmHxQGxdiQs,739
@@ -20,9 +20,9 @@ judgeval/data/scorer_data.py,sha256=JVlaTx1EP2jw2gh3Vgx1CSEsvIFABAN26IquKyxwiJQ,
20
20
  judgeval/data/sequence.py,sha256=FmKVdzQP5VTujRCHDWk097MKRR-rJgbsdrxyCKee6tA,1994
21
21
  judgeval/data/sequence_run.py,sha256=RmYjfWKMWg-pcF5PLeiWfrhuDkjDZi5VEmAIEXN3Ib0,2104
22
22
  judgeval/data/datasets/__init__.py,sha256=IdNKhQv9yYZ_op0rdBacrFaFVmiiYQ3JTzXzxOTsEVQ,176
23
- judgeval/data/datasets/dataset.py,sha256=dhLo30hvpmmOK2R6O5wDs_neawUJ4lS8bb4S42SufNQ,13034
24
- judgeval/data/datasets/eval_dataset_client.py,sha256=xjj66BO9Es9IxXqzQe1RT_e0kpeKlt7OrhRoSuj4KHM,15085
25
- judgeval/integrations/langgraph.py,sha256=J-cQfFP52TjJewdSTe-fcsUC4HDvjNbXoxmbmF0SgiE,11743
23
+ judgeval/data/datasets/dataset.py,sha256=RmZ28oyDPfRsCx4k5ftMscoq0M0LN78MW6ofTiM81BI,13134
24
+ judgeval/data/datasets/eval_dataset_client.py,sha256=uirHpkpLOfygXIz0xKAGTPx1qjbBTzdLFQK6yyoZduU,17544
25
+ judgeval/integrations/langgraph.py,sha256=7LpWDpb8wgOkeRJvlr2COvF_O1f01zm-cwsI5trKoiw,123150
26
26
  judgeval/judges/__init__.py,sha256=6X7VSwrwsdxGBNxCyapVRWGghhKOy3MVxFNMQ62kCXM,308
27
27
  judgeval/judges/base_judge.py,sha256=ch_S7uBB7lyv44Lf1d7mIGFpveOO58zOkkpImKgd9_4,994
28
28
  judgeval/judges/litellm_judge.py,sha256=EIL58Teptv8DzZUO3yP2RDQCDq-aoBB6HPZzPdK6KTg,2424
@@ -34,7 +34,7 @@ judgeval/scorers/api_scorer.py,sha256=NQ_CrrUPhSUk1k2Q8rKpCG_TU2FT32sFEqvb-Yi54B
34
34
  judgeval/scorers/exceptions.py,sha256=eGW5CuJgZ5YJBFrE4FHDSF651PO1dKAZ379mJ8gOsfo,178
35
35
  judgeval/scorers/judgeval_scorer.py,sha256=79-JJurqHP-qTaWNWInx4SjvQYwXc9lvfPPNgwsh2yA,6773
36
36
  judgeval/scorers/prompt_scorer.py,sha256=PaAs2qRolw1P3_I061Xvk9qzvF4O-JR8g_39RqXnHcM,17728
37
- judgeval/scorers/score.py,sha256=r9QiT4-LIvivcJ6XxByrbswKSO8eQTtAD1UlXT_lcmo,18741
37
+ judgeval/scorers/score.py,sha256=fZuaZPumqkLCWcZdpTn3bJeHPNHXaDqgyb0WBp2EYgE,18742
38
38
  judgeval/scorers/utils.py,sha256=iHQVTlIANbmCTXz9kTeSdOytgUZ_T74Re61ajqsk_WQ,6827
39
39
  judgeval/scorers/judgeval_scorers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
40
  judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=_sDUBxSG536KGqXNi6dFpaYKghjEAadxBxaaxV9HuuE,1764
@@ -57,7 +57,7 @@ judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py,sha256=8iTzMv
57
57
  judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py,sha256=ly72Z7s_c8NID6-nQnuW8qEGEW2MqdvpJ-5WfXzbAQg,2579
58
58
  judgeval/tracer/__init__.py,sha256=wy3DYpH8U_z0GO_K_gOSkK0tTTD-u5eLDo0T5xIBoAc,147
59
59
  judgeval/utils/alerts.py,sha256=O19Xj7DA0YVjl8PWiuH4zfdZeu3yiLVvHfY8ah2wG0g,2759
60
- judgeval-0.0.35.dist-info/METADATA,sha256=oAaDqpJCCZxUBOoVPTFbSjZgZ5xJMpGTxjngoJqmTO8,6126
61
- judgeval-0.0.35.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
62
- judgeval-0.0.35.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
63
- judgeval-0.0.35.dist-info/RECORD,,
60
+ judgeval-0.0.36.dist-info/METADATA,sha256=oexg66X9idECkevPAF2VkuQJBt-hYHvKmsZz5p5Y-LI,6097
61
+ judgeval-0.0.36.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
62
+ judgeval-0.0.36.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
63
+ judgeval-0.0.36.dist-info/RECORD,,