judgeval 0.0.38__py3-none-any.whl → 0.0.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
judgeval/constants.py CHANGED
@@ -59,6 +59,7 @@ JUDGMENT_TRACES_SAVE_API_URL = f"{ROOT_API}/traces/save/"
59
59
  JUDGMENT_TRACES_DELETE_API_URL = f"{ROOT_API}/traces/delete/"
60
60
  JUDGMENT_TRACES_ADD_ANNOTATION_API_URL = f"{ROOT_API}/traces/add_annotation/"
61
61
  JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL = f"{ROOT_API}/add_to_run_eval_queue/"
62
+ JUDGMENT_GET_EVAL_STATUS_API_URL = f"{ROOT_API}/get_evaluation_status/"
62
63
  # RabbitMQ
63
64
  RABBITMQ_HOST = os.getenv("RABBITMQ_HOST", "rabbitmq-networklb-faa155df16ec9085.elb.us-west-1.amazonaws.com")
64
65
  RABBITMQ_PORT = os.getenv("RABBITMQ_PORT", 5672)
judgeval/data/example.py CHANGED
@@ -8,6 +8,7 @@ from uuid import uuid4
8
8
  from pydantic import BaseModel, Field, field_validator
9
9
  from enum import Enum
10
10
  from datetime import datetime
11
+ from judgeval.data.tool import Tool
11
12
  import time
12
13
 
13
14
 
@@ -31,7 +32,7 @@ class Example(BaseModel):
31
32
  retrieval_context: Optional[List[str]] = None
32
33
  additional_metadata: Optional[Dict[str, Any]] = None
33
34
  tools_called: Optional[List[str]] = None
34
- expected_tools: Optional[List[Dict[str, Any]]] = None
35
+ expected_tools: Optional[List[Tool]] = None
35
36
  name: Optional[str] = None
36
37
  example_id: str = Field(default_factory=lambda: str(uuid4()))
37
38
  example_index: Optional[int] = None
@@ -82,17 +83,17 @@ class Example(BaseModel):
82
83
  raise ValueError(f"All items in expected_output must be strings but got {v}")
83
84
  return v
84
85
 
85
- @field_validator('expected_tools', mode='before')
86
+ @field_validator('expected_tools')
86
87
  @classmethod
87
88
  def validate_expected_tools(cls, v):
88
89
  if v is not None:
89
90
  if not isinstance(v, list):
90
- raise ValueError(f"Expected tools must be a list of dictionaries or None but got {v} of type {type(v)}")
91
+ raise ValueError(f"Expected tools must be a list of Tools or None but got {v} of type {type(v)}")
91
92
 
92
- # Check that each item in the list is a dictionary
93
+ # Check that each item in the list is a Tool
93
94
  for i, item in enumerate(v):
94
- if not isinstance(item, dict):
95
- raise ValueError(f"Expected tools must be a list of dictionaries, but item at index {i} is {item} of type {type(item)}")
95
+ if not isinstance(item, Tool):
96
+ raise ValueError(f"Expected tools must be a list of Tools, but item at index {i} is {item} of type {type(item)}")
96
97
 
97
98
  return v
98
99
 
judgeval/data/tool.py ADDED
@@ -0,0 +1,19 @@
1
+ from pydantic import BaseModel, field_validator
2
+ from typing import Dict, Any, Optional
3
+ import warnings
4
+
5
+ class Tool(BaseModel):
6
+ tool_name: str
7
+ parameters: Optional[Dict[str, Any]] = None
8
+
9
+ @field_validator('tool_name')
10
+ def validate_tool_name(cls, v):
11
+ if not v:
12
+ warnings.warn("Tool name is empty or None", UserWarning)
13
+ return v
14
+
15
+ @field_validator('parameters')
16
+ def validate_parameters(cls, v):
17
+ if v is not None and not isinstance(v, dict):
18
+ warnings.warn(f"Parameters should be a dictionary, got {type(v)}", UserWarning)
19
+ return v
judgeval/data/trace.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from pydantic import BaseModel
2
2
  from typing import Optional, Dict, Any, List
3
3
  from judgeval.evaluation_run import EvaluationRun
4
+ from judgeval.data.tool import Tool
4
5
  import json
5
6
  from datetime import datetime, timezone
6
7
 
@@ -17,7 +18,7 @@ class TraceSpan(BaseModel):
17
18
  duration: Optional[float] = None
18
19
  annotation: Optional[List[Dict[str, Any]]] = None
19
20
  evaluation_runs: Optional[List[EvaluationRun]] = []
20
- expected_tools: Optional[List[Dict[str, Any]]] = None
21
+ expected_tools: Optional[List[Tool]] = None
21
22
  additional_metadata: Optional[Dict[str, Any]] = None
22
23
 
23
24
  def model_dump(self, **kwargs):
@@ -480,7 +480,7 @@ class JudgmentClient(metaclass=SingletonMeta):
480
480
 
481
481
  return response.json()["slug"]
482
482
 
483
- def assert_test(
483
+ async def assert_test(
484
484
  self,
485
485
  scorers: List[Union[APIJudgmentScorer, JudgevalScorer]],
486
486
  examples: Optional[List[Example]] = None,
@@ -494,7 +494,8 @@ class JudgmentClient(metaclass=SingletonMeta):
494
494
  override: bool = False,
495
495
  rules: Optional[List[Rule]] = None,
496
496
  function: Optional[Callable] = None,
497
- tracer: Optional[Union[Tracer, BaseCallbackHandler]] = None
497
+ tracer: Optional[Union[Tracer, BaseCallbackHandler]] = None,
498
+ async_execution: bool = False
498
499
  ) -> None:
499
500
  """
500
501
  Asserts a test by running the evaluation and checking the results for success
@@ -532,7 +533,7 @@ class JudgmentClient(metaclass=SingletonMeta):
532
533
  test_file=test_file
533
534
  )
534
535
  else:
535
- results = self.run_evaluation(
536
+ results = await self.run_evaluation(
536
537
  examples=examples,
537
538
  scorers=scorers,
538
539
  model=model,
@@ -542,7 +543,8 @@ class JudgmentClient(metaclass=SingletonMeta):
542
543
  project_name=project_name,
543
544
  eval_run_name=eval_run_name,
544
545
  override=override,
545
- rules=rules
546
+ rules=rules,
547
+ async_execution=async_execution
546
548
  )
547
549
 
548
550
  assert_test(results)
@@ -28,12 +28,15 @@ from judgeval.constants import (
28
28
  JUDGMENT_EVAL_LOG_API_URL,
29
29
  MAX_CONCURRENT_EVALUATIONS,
30
30
  JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL,
31
+ JUDGMENT_GET_EVAL_STATUS_API_URL,
32
+ JUDGMENT_EVAL_FETCH_API_URL
31
33
  )
32
34
  from judgeval.common.exceptions import JudgmentAPIError
33
35
  from judgeval.common.logger import (
34
36
  debug,
35
37
  info,
36
- error,
38
+ error,
39
+ warning,
37
40
  example_logging_context
38
41
  )
39
42
  from judgeval.evaluation_run import EvaluationRun
@@ -427,7 +430,307 @@ def run_trace_eval(trace_run: TraceRun, override: bool = False, ignore_errors: b
427
430
 
428
431
 
429
432
 
430
- def run_eval(evaluation_run: EvaluationRun, override: bool = False, ignore_errors: bool = True, async_execution: bool = False) -> List[ScoringResult]:
433
+ async def get_evaluation_status(eval_name: str, project_name: str, judgment_api_key: str, organization_id: str) -> Dict:
434
+ """
435
+ Gets the status of an async evaluation run.
436
+
437
+ Args:
438
+ eval_name (str): Name of the evaluation run
439
+ project_name (str): Name of the project
440
+ judgment_api_key (str): API key for authentication
441
+ organization_id (str): Organization ID for the evaluation
442
+
443
+ Returns:
444
+ Dict: Status information including:
445
+ - status: 'pending', 'running', 'completed', or 'failed'
446
+ - results: List of ScoringResult objects if completed
447
+ - error: Error message if failed
448
+ """
449
+ try:
450
+ response = requests.get(
451
+ JUDGMENT_GET_EVAL_STATUS_API_URL,
452
+ headers={
453
+ "Content-Type": "application/json",
454
+ "Authorization": f"Bearer {judgment_api_key}",
455
+ "X-Organization-Id": organization_id
456
+ },
457
+ params={
458
+ "eval_name": eval_name,
459
+ "project_name": project_name,
460
+ },
461
+ verify=True
462
+ )
463
+
464
+ if not response.ok:
465
+ error_message = response.json().get('detail', 'An unknown error occurred.')
466
+ error(f"Error checking evaluation status: {error_message}")
467
+ raise JudgmentAPIError(error_message)
468
+
469
+ return response.json()
470
+ except requests.exceptions.RequestException as e:
471
+ error(f"Failed to check evaluation status: {str(e)}")
472
+ raise JudgmentAPIError(f"Failed to check evaluation status: {str(e)}")
473
+
474
+ async def _poll_evaluation_until_complete(eval_name: str, project_name: str, judgment_api_key: str, organization_id: str, poll_interval_seconds: int = 5, original_examples: Optional[List[Example]] = None) -> List[ScoringResult]:
475
+ """
476
+ Polls until the evaluation is complete and returns the results.
477
+
478
+ Args:
479
+ eval_name (str): Name of the evaluation run
480
+ project_name (str): Name of the project
481
+ judgment_api_key (str): API key for authentication
482
+ organization_id (str): Organization ID for the evaluation
483
+ poll_interval_seconds (int, optional): Time between status checks in seconds. Defaults to 5.
484
+ original_examples (List[Example], optional): The original examples sent for evaluation.
485
+ If provided, will match results with original examples.
486
+
487
+ Returns:
488
+ List[ScoringResult]: The evaluation results
489
+ """
490
+ poll_count = 0
491
+ # Create example_id to Example mapping if original examples are provided
492
+ original_example_map = {}
493
+ if original_examples:
494
+ for example in original_examples:
495
+ original_example_map[example.example_id] = example
496
+
497
+ # Remove the expected scorer names extraction and checking
498
+ # We'll instead verify all examples have consistent scorer data
499
+ while True:
500
+ poll_count += 1
501
+ try:
502
+ # Log polling attempt
503
+ if poll_count % 4 == 0: # Log every 4th poll to avoid excess logging
504
+ info(f"Polling for evaluation '{eval_name}' in project '{project_name}' (attempt {poll_count})")
505
+
506
+ # Check status
507
+ response = requests.get(
508
+ JUDGMENT_GET_EVAL_STATUS_API_URL,
509
+ headers={
510
+ "Content-Type": "application/json",
511
+ "Authorization": f"Bearer {judgment_api_key}",
512
+ "X-Organization-Id": organization_id
513
+ },
514
+ params={
515
+ "eval_name": eval_name,
516
+ "project_name": project_name
517
+ },
518
+ verify=True
519
+ )
520
+
521
+ if not response.ok:
522
+ error_message = response.json().get('detail', 'An unknown error occurred.')
523
+ error(f"Error checking evaluation status: {error_message}")
524
+ # Don't raise exception immediately, just log and continue polling
525
+ await asyncio.sleep(poll_interval_seconds)
526
+ continue
527
+
528
+ status_data = response.json()
529
+ status = status_data.get("status")
530
+
531
+ # If complete, get results and return
532
+ if status == "completed" or status == "complete":
533
+ info(f"Evaluation '{eval_name}' reported as completed, fetching and verifying results...")
534
+ results_response = requests.post(
535
+ JUDGMENT_EVAL_FETCH_API_URL,
536
+ headers={
537
+ "Content-Type": "application/json",
538
+ "Authorization": f"Bearer {judgment_api_key}",
539
+ "X-Organization-Id": organization_id
540
+ },
541
+ json={
542
+ "project_name": project_name,
543
+ "eval_name": eval_name
544
+ },
545
+ verify=True
546
+ )
547
+
548
+ if not results_response.ok:
549
+ error_message = results_response.json().get('detail', 'An unknown error occurred.')
550
+ error(f"Error fetching evaluation results: {error_message}")
551
+ raise JudgmentAPIError(error_message)
552
+
553
+ result_data = results_response.json()
554
+
555
+ if "examples" in result_data:
556
+ examples_data = result_data.get("examples", [])
557
+
558
+
559
+ info(f"Successfully fetched {len(examples_data)} results for evaluation '{eval_name}'")
560
+
561
+ # Check for result validity if original examples are provided
562
+ if original_example_map:
563
+ # Verify all returned examples have matching original examples
564
+ has_invalid_results = False
565
+ for example_data in examples_data:
566
+ example_id = example_data.get("example_id")
567
+
568
+ if example_id not in original_example_map:
569
+ warning(f"Server returned example with ID {example_id} not found in original examples. " +
570
+ f"This indicates stale or incorrect data. Continuing to poll...")
571
+ has_invalid_results = True
572
+ break
573
+
574
+ # If any invalid examples found, continue polling
575
+ if has_invalid_results:
576
+ info("Detected stale data. Waiting before polling again...")
577
+ await asyncio.sleep(poll_interval_seconds)
578
+ continue
579
+
580
+ # Check if we received the expected number of results
581
+ if len(original_examples) != len(examples_data):
582
+ warning(f"Expected {len(original_examples)} results but got {len(examples_data)} results. " +
583
+ f"This indicates incomplete data. Continuing to poll...")
584
+ await asyncio.sleep(poll_interval_seconds)
585
+ continue
586
+
587
+ # Collect all example IDs from scorer data
588
+ scorer_example_ids = set()
589
+ for example_data in examples_data:
590
+ scorer_data_list = example_data.get("scorer_data", [])
591
+ for scorer_data in scorer_data_list:
592
+ if "example_id" in scorer_data:
593
+ scorer_example_ids.add(scorer_data["example_id"])
594
+
595
+ # Get the set of original example IDs
596
+ original_example_ids = set(original_example_map.keys())
597
+
598
+ # Check if the sets are equal
599
+ missing_in_scorer = original_example_ids - scorer_example_ids
600
+ extra_in_scorer = scorer_example_ids - original_example_ids
601
+
602
+ if missing_in_scorer or extra_in_scorer:
603
+ if missing_in_scorer:
604
+ warning(f"Examples missing in scorer data: {missing_in_scorer}")
605
+ if extra_in_scorer:
606
+ warning(f"Extra examples in scorer data: {extra_in_scorer}")
607
+ info("Detected mismatched example IDs in scorer data. Waiting before polling again...")
608
+ await asyncio.sleep(poll_interval_seconds)
609
+ continue
610
+
611
+ # Create ScoringResult objects from the raw data
612
+ scoring_results = []
613
+
614
+ for example_data in examples_data:
615
+ # Extract example_id from the server response
616
+ example_id = example_data.get("example_id")
617
+
618
+ # Create ScorerData objects
619
+ scorer_data_list = []
620
+ for raw_scorer_data in example_data.get("scorer_data", []):
621
+ scorer_data_list.append(ScorerData(**raw_scorer_data))
622
+
623
+ # Use the original Example object if we have it and the ID matches
624
+ if original_example_map:
625
+ example = original_example_map[example_id]
626
+ debug(f"Matched result with original example {example_id}")
627
+ else:
628
+ # Create Example from example data (excluding scorer_data) if no original examples provided
629
+ example_dict = {k: v for k, v in example_data.items() if k != "scorer_data"}
630
+ example = Example(**example_dict)
631
+
632
+ # Calculate success based on whether all scorer_data entries were successful
633
+ success = all(scorer_data.success for scorer_data in scorer_data_list) if scorer_data_list else False
634
+
635
+ # Create ScoringResult
636
+ scoring_result = ScoringResult(
637
+ success=success, # Set based on all scorer data success values
638
+ scorers_data=scorer_data_list,
639
+ data_object=example
640
+ )
641
+ scoring_results.append(scoring_result)
642
+
643
+ # If we got here, all validation checks passed
644
+ info(f"Verified complete results for all {len(scoring_results)} examples with all expected scorer data")
645
+ return scoring_results
646
+ else:
647
+ # No examples found
648
+ info(f"No example results found for completed evaluation '{eval_name}'")
649
+ return []
650
+
651
+ elif status == "failed":
652
+ # Evaluation failed
653
+ error_message = status_data.get("error", "Unknown error")
654
+ error(f"Evaluation '{eval_name}' failed: {error_message}")
655
+ raise JudgmentAPIError(f"Evaluation failed: {error_message}")
656
+
657
+ elif status == "pending" or status == "running":
658
+ # Only log occasionally for pending/running to avoid flooding logs
659
+ if poll_count % 4 == 0:
660
+ info(f"Evaluation '{eval_name}' status: {status}")
661
+
662
+ # Wait before checking again
663
+ await asyncio.sleep(poll_interval_seconds)
664
+
665
+ except Exception as e:
666
+ if isinstance(e, JudgmentAPIError):
667
+ raise
668
+
669
+ # For other exceptions, log and continue polling
670
+ error(f"Error checking evaluation status: {str(e)}")
671
+ if poll_count > 20: # Only raise exception after many failed attempts
672
+ raise JudgmentAPIError(f"Error checking evaluation status after {poll_count} attempts: {str(e)}")
673
+
674
+ # Continue polling after a delay
675
+ await asyncio.sleep(poll_interval_seconds)
676
+
677
+ async def await_with_spinner(task, message: str = "Awaiting async task: "):
678
+ """
679
+ Display a spinner while awaiting an async task.
680
+
681
+ Args:
682
+ task: The asyncio task to await
683
+ message (str): Message to display with the spinner
684
+
685
+ Returns:
686
+ Any: The result of the awaited task
687
+ """
688
+ spinner = itertools.cycle(['|', '/', '-', '\\'])
689
+
690
+ # Create an event to signal when to stop the spinner
691
+ stop_spinner_event = asyncio.Event()
692
+
693
+ async def display_spinner():
694
+ while not stop_spinner_event.is_set():
695
+ sys.stdout.write(f'\r{message}{next(spinner)}')
696
+ sys.stdout.flush()
697
+ await asyncio.sleep(0.1)
698
+
699
+ # Start the spinner in a separate task
700
+ spinner_task = asyncio.create_task(display_spinner())
701
+
702
+ try:
703
+ # Await the actual task
704
+ result = await task
705
+ finally:
706
+ # Signal the spinner to stop and wait for it to finish
707
+ stop_spinner_event.set()
708
+ await spinner_task
709
+
710
+ # Clear the spinner line
711
+ sys.stdout.write('\r' + ' ' * (len(message) + 1) + '\r')
712
+ sys.stdout.flush()
713
+
714
+ return result
715
+
716
+ class SpinnerWrappedTask:
717
+ """
718
+ A wrapper for an asyncio task that displays a spinner when awaited.
719
+ """
720
+ def __init__(self, task, message: str):
721
+ self.task = task
722
+ self.message = message
723
+
724
+ def __await__(self):
725
+ async def _spin_and_await():
726
+ return await await_with_spinner(self.task, self.message)
727
+ return _spin_and_await().__await__()
728
+
729
+ # Proxy all Task attributes and methods to the underlying task
730
+ def __getattr__(self, name):
731
+ return getattr(self.task, name)
732
+
733
+ def run_eval(evaluation_run: EvaluationRun, override: bool = False, ignore_errors: bool = True, async_execution: bool = False) -> Union[List[ScoringResult], asyncio.Task]:
431
734
  """
432
735
  Executes an evaluation of `Example`s using one or more `Scorer`s
433
736
 
@@ -435,21 +738,12 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False, ignore_error
435
738
  evaluation_run (EvaluationRun): Stores example and evaluation together for running
436
739
  override (bool, optional): Whether to override existing evaluation run with same name. Defaults to False.
437
740
  ignore_errors (bool, optional): Whether to ignore scorer errors during evaluation. Defaults to True.
741
+ async_execution (bool, optional): Whether to execute the evaluation asynchronously. Defaults to False.
438
742
 
439
- Args:
440
- project_name (str): The name of the project the evaluation results belong to
441
- eval_name (str): The name of the evaluation run
442
- examples (List[Example]): The examples to evaluate
443
- scorers (List[Union[JudgmentScorer, JudgevalScorer]]): A list of scorers to use for evaluation
444
- model (str): The model used as a judge when using LLM as a Judge
445
- aggregator (Optional[str]): The aggregator to use for evaluation if using Mixture of Judges
446
- metadata (Optional[Dict[str, Any]]): Additional metadata to include for this evaluation run, e.g. comments, dataset name, purpose, etc.
447
- judgment_api_key (Optional[str]): The API key for running evaluations on the Judgment API
448
- log_results (bool): Whether to log the results to the Judgment API
449
- rules (Optional[List[Rule]]): Rules to evaluate against scoring results
450
-
451
743
  Returns:
452
- List[ScoringResult]: The results of the evaluation. Each result is a dictionary containing the fields of a `ScoringResult` object.
744
+ Union[List[ScoringResult], Union[asyncio.Task, SpinnerWrappedTask]]:
745
+ - If async_execution is False, returns a list of ScoringResult objects
746
+ - If async_execution is True, returns a Task that will resolve to a list of ScoringResult objects when awaited
453
747
  """
454
748
 
455
749
  # Call endpoint to check to see if eval run name exists (if we DON'T want to override and DO want to log results)
@@ -520,21 +814,51 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False, ignore_error
520
814
  if async_execution:
521
815
  if len(local_scorers) > 0:
522
816
  error("Local scorers are not supported in async execution")
817
+ raise ValueError("Local scorers are not supported in async execution")
523
818
 
524
819
  check_examples(evaluation_run.examples, evaluation_run.scorers)
525
820
  info("Starting async evaluation")
526
- payload = evaluation_run.model_dump(warnings=False)
527
- requests.post(
528
- JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL,
529
- headers={
530
- "Content-Type": "application/json",
531
- "Authorization": f"Bearer {evaluation_run.judgment_api_key}",
532
- "X-Organization-Id": evaluation_run.organization_id
533
- },
534
- json=payload,
535
- verify=True
821
+
822
+ async def _async_evaluation_workflow():
823
+ # Create a payload
824
+ payload = evaluation_run.model_dump(warnings=False)
825
+
826
+ # Send the evaluation to the queue
827
+ response = requests.post(
828
+ JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL,
829
+ headers={
830
+ "Content-Type": "application/json",
831
+ "Authorization": f"Bearer {evaluation_run.judgment_api_key}",
832
+ "X-Organization-Id": evaluation_run.organization_id
833
+ },
834
+ json=payload,
835
+ verify=True
836
+ )
837
+
838
+ if not response.ok:
839
+ error_message = response.json().get('detail', 'An unknown error occurred.')
840
+ error(f"Error adding evaluation to queue: {error_message}")
841
+ raise JudgmentAPIError(error_message)
842
+
843
+ info(f"Successfully added evaluation '{evaluation_run.eval_name}' to queue")
844
+
845
+ # Poll until the evaluation is complete
846
+ return await _poll_evaluation_until_complete(
847
+ eval_name=evaluation_run.eval_name,
848
+ project_name=evaluation_run.project_name,
849
+ judgment_api_key=evaluation_run.judgment_api_key,
850
+ organization_id=evaluation_run.organization_id,
851
+ original_examples=evaluation_run.examples # Pass the original examples
852
+ )
853
+
854
+ # Create a regular task
855
+ task = asyncio.create_task(_async_evaluation_workflow())
856
+
857
+ # Wrap it in our custom awaitable that will show a spinner only when awaited
858
+ return SpinnerWrappedTask(
859
+ task,
860
+ f"Processing evaluation '{evaluation_run.eval_name}': "
536
861
  )
537
- print("Successfully added evaluation to queue")
538
862
  else:
539
863
  if judgment_scorers:
540
864
  # Execute evaluation using Judgment API
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.0.38
3
+ Version: 0.0.39
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -1,10 +1,10 @@
1
1
  judgeval/__init__.py,sha256=x9HWt4waJwJMAqTuJSg2MezF9Zg-macEjeU-ajbly-8,330
2
2
  judgeval/clients.py,sha256=6VQmEqmfCngUdS2MuPBIpHvtDFqOENm8-_BmMvjLyRQ,944
3
- judgeval/constants.py,sha256=qemyUNf5G5-W6YQ9tNkxbFa7L7XR6cDtWCVFKRwT3TM,5519
3
+ judgeval/constants.py,sha256=aDEy51CUbzp_CWARFmw3Fie5fZ-2pkaYPc_gUEbvT4Y,5591
4
4
  judgeval/evaluation_run.py,sha256=V9xMyiJ7e9lqHRblaeeMh6oyx1MEtGwfSxYtbi-EeXY,6746
5
- judgeval/judgment_client.py,sha256=ozNMDeM3lNnaNq4zY40x3z1TwHYL1e25BlxGnSYO0yw,23275
5
+ judgeval/judgment_client.py,sha256=eQQ6J3iUPHfBu9v83-8F-yNMqf015b1NoGsbLOzy2s4,23375
6
6
  judgeval/rules.py,sha256=jkh1cXXcUf8oRY7xJUZfcQBYWn_rjUW4GvrhRt15PeU,20265
7
- judgeval/run_evaluation.py,sha256=-7oiebkggP7lf6nVRxqDKE3QkuPSA0sAVkZl_n2nZtI,32437
7
+ judgeval/run_evaluation.py,sha256=bYNbMubqOOUNlsplY5Iw9IpUxuuqsJHIs-RweWC45E4,47474
8
8
  judgeval/version_check.py,sha256=bvJEidB7rAeXozoUbN9Yb97QOR_s2hgvpvj74jJ5HlY,943
9
9
  judgeval/common/__init__.py,sha256=7d24BRxtncpMj3AAJCj8RS7TqgjXmW777HVZH6-3sBs,289
10
10
  judgeval/common/exceptions.py,sha256=U-TxHLn7oVMezsMuoYouNDb2XuS8RCggfntYf5_6u4E,565
@@ -14,10 +14,11 @@ judgeval/common/tracer.py,sha256=EkWkg2AsS5FIj2ffh912qZZ9ew5h3hu2rynPBDsMszw,804
14
14
  judgeval/common/utils.py,sha256=w1SjpDtB1DTJapFSAvLzr_a3gGI45iacEoxIUnQXx4Q,34087
15
15
  judgeval/data/__init__.py,sha256=Q4WiIva20U_NgxGr-MU-9FWN_eFzUZBVgCsBmoo7IM8,501
16
16
  judgeval/data/custom_example.py,sha256=QRBqiRiZS8UgVeTRHY0r1Jzm6yAYsyg6zmHxQGxdiQs,739
17
- judgeval/data/example.py,sha256=MD0rA9oNI4cyaRgz7I7EOKv0gD2dp22Q_5z-NWdFHhE,6891
17
+ judgeval/data/example.py,sha256=XptCg2dLMS46SfDYa4kLgq1zXnlDnhOmR15Ci_08p90,6882
18
18
  judgeval/data/result.py,sha256=KfU9lhAKG_Xo2eGDm2uKVVRZpf177IDASg1cIwedJwE,3184
19
19
  judgeval/data/scorer_data.py,sha256=JVlaTx1EP2jw2gh3Vgx1CSEsvIFABAN26IquKyxwiJQ,3273
20
- judgeval/data/trace.py,sha256=IjL06YNElxTuJC0HrPUh69rtXkfkSpzDoZdNiXFUvwY,5043
20
+ judgeval/data/tool.py,sha256=x6YsdTTfeIwSn5f1xIDU3j1xJgSCzho0FW1ojR-L0Ac,612
21
+ judgeval/data/trace.py,sha256=euYIbwYsGqATWIeOZwBzNWS3hh3wefVzMJ7v5rHvG6c,5069
21
22
  judgeval/data/trace_run.py,sha256=G_OsHNK_nZzJKhtdiyWp7GFyyns5AOJZ956GM_4jXM0,2192
22
23
  judgeval/data/datasets/__init__.py,sha256=IdNKhQv9yYZ_op0rdBacrFaFVmiiYQ3JTzXzxOTsEVQ,176
23
24
  judgeval/data/datasets/dataset.py,sha256=oU9hvZTifK2x8em3FhL3oIqgHOByfJWH6C_9rIKnL5g,12773
@@ -59,7 +60,7 @@ judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py,sha256
59
60
  judgeval/tracer/__init__.py,sha256=wy3DYpH8U_z0GO_K_gOSkK0tTTD-u5eLDo0T5xIBoAc,147
60
61
  judgeval/utils/alerts.py,sha256=O19Xj7DA0YVjl8PWiuH4zfdZeu3yiLVvHfY8ah2wG0g,2759
61
62
  judgeval/utils/data_utils.py,sha256=pB4GBWi8XoM2zSR2NlLXH5kqcQ029BVhDxaVKkdmiBY,1860
62
- judgeval-0.0.38.dist-info/METADATA,sha256=jlCQMfdz2Ni9nRi9cOu5svHnLqIinll2odC37dqkE3U,11860
63
- judgeval-0.0.38.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
64
- judgeval-0.0.38.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
65
- judgeval-0.0.38.dist-info/RECORD,,
63
+ judgeval-0.0.39.dist-info/METADATA,sha256=Q4wTRKXRoTozgF96BJFFoGwOoy-vLnAGs0HOQ9PCZ_k,11860
64
+ judgeval-0.0.39.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
65
+ judgeval-0.0.39.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
66
+ judgeval-0.0.39.dist-info/RECORD,,