PyPI - judgeval - Versions diffs - 0.0.32__py3-none-any.whl → 0.0.33__py3-none-any.whl - Mend

judgeval 0.0.32py3-none-any.whl → 0.0.33py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

judgeval/data/sequence.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from pydantic import BaseModel, Field, field_validator, model_validator
 from typing import List, Optional, Union, Any
 from judgeval.data.example import Example
-from judgeval.scorers import ScorerWrapper, JudgevalScorer
+from judgeval.scorers import JudgevalScorer, APIJudgmentScorer
 from uuid import uuid4
 from datetime import datetime, timezone
@@ -22,16 +22,10 @@ class Sequence(BaseModel):
     @field_validator("scorers")
     def validate_scorer(cls, v):
-        loaded_scorers = []
         for scorer in v or []:
-            try:
-                if isinstance(scorer, ScorerWrapper):
-                    loaded_scorers.append(scorer.load_implementation())
-                else:
-                    loaded_scorers.append(scorer)
-            except Exception as e:
-                raise ValueError(f"Failed to load implementation for scorer {scorer}: {str(e)}")
-        return loaded_scorers
+            if not isinstance(scorer, APIJudgmentScorer) and not isinstance(scorer, JudgevalScorer):
+                raise ValueError(f"Invalid scorer type: {type(scorer)}")
+        return v
     @model_validator(mode="after")
     def populate_sequence_metadata(self) -> "Sequence":

judgeval/judgment_client.py CHANGED Viewed

@@ -17,7 +17,6 @@ from judgeval.scorers import (
     APIJudgmentScorer,
     JudgevalScorer,
     ClassifierScorer,
-    ScorerWrapper,
 )
 from judgeval.evaluation_run import EvaluationRun
 from judgeval.run_evaluation import (
@@ -74,7 +73,7 @@ class JudgmentClient(metaclass=SingletonMeta):
     def a_run_evaluation(
         self,
         examples: List[Example],
-        scorers: List[Union[ScorerWrapper, JudgevalScorer]],
+        scorers: List[Union[APIJudgmentScorer, JudgevalScorer]],
         model: Union[str, List[str], JudgevalJudge],
         aggregator: Optional[str] = None,
         metadata: Optional[Dict[str, Any]] = None,
@@ -83,21 +82,32 @@ class JudgmentClient(metaclass=SingletonMeta):
         eval_run_name: str = "default_eval_run",
         override: bool = False,
         append: bool = False,
-        use_judgment: bool = True,
         ignore_errors: bool = True,
         rules: Optional[List[Rule]] = None
     ) -> List[ScoringResult]:
-        return self.run_evaluation(examples, scorers, model, aggregator, metadata, log_results, project_name, eval_run_name, override, append, use_judgment, ignore_errors, True, rules)
+        return self.run_evaluation(
+            examples=examples,
+            scorers=scorers,
+            model=model,
+            aggregator=aggregator,
+            metadata=metadata,
+            log_results=log_results,
+            project_name=project_name,
+            eval_run_name=eval_run_name,
+            override=override,
+            append=append,
+            ignore_errors=ignore_errors,
+            rules=rules
+        )
     def run_sequence_evaluation(
         self,
         sequences: List[Sequence],
         model: Union[str, List[str], JudgevalJudge],
-        scorers: List[Union[ScorerWrapper, JudgevalScorer]],
+        scorers: List[Union[APIJudgmentScorer, JudgevalScorer]],
         aggregator: Optional[str] = None,
         project_name: str = "default_project",
         eval_run_name: str = "default_eval_sequence",
-        use_judgment: bool = True,
         log_results: bool = True,
         append: bool = False,
         override: bool = False,
@@ -105,16 +115,6 @@ class JudgmentClient(metaclass=SingletonMeta):
         rules: Optional[List[Rule]] = None
     ) -> List[ScoringResult]:
         try:
-            loaded_scorers = []
-            for scorer in scorers:
-                try:
-                    if isinstance(scorer, ScorerWrapper):
-                        loaded_scorers.append(scorer.load_implementation())
-                    else:
-                        loaded_scorers.append(scorer)
-                except Exception as e:
-                    raise ValueError(f"Failed to load implementation for scorer {scorer}: {str(e)}")
             def get_all_sequences(root: Sequence) -> List[Sequence]:
                 all_sequences = [root]
@@ -132,31 +132,7 @@ class JudgmentClient(metaclass=SingletonMeta):
             flattened_sequences = flatten_sequence_list(sequences)
             for sequence in flattened_sequences:
-                sequence.scorers = loaded_scorers
-            if rules:
-                loaded_rules = []
-                for rule in rules:
-                    try:
-                        processed_conditions = []
-                        for condition in rule.conditions:
-                            # Convert metric if it's a ScorerWrapper
-                            if isinstance(condition.metric, ScorerWrapper):
-                                try:
-                                    condition_copy = condition.model_copy()
-                                    condition_copy.metric = condition.metric.load_implementation(use_judgment=use_judgment)
-                                    processed_conditions.append(condition_copy)
-                                except Exception as e:
-                                    raise ValueError(f"Failed to convert ScorerWrapper to implementation in rule '{rule.name}', condition metric '{condition.metric}': {str(e)}")
-                            else:
-                                processed_conditions.append(condition)
-                        # Create new rule with processed conditions
-                        new_rule = rule.model_copy()
-                        new_rule.conditions = processed_conditions
-                        loaded_rules.append(new_rule)
-                    except Exception as e:
-                        raise ValueError(f"Failed to process rule '{rule.name}': {str(e)}")
+                sequence.scorers = scorers
             sequence_run = SequenceRun(
                 project_name=project_name,
@@ -169,7 +145,7 @@ class JudgmentClient(metaclass=SingletonMeta):
                 judgment_api_key=self.judgment_api_key,
                 organization_id=self.organization_id
             )
-            return run_sequence_eval(sequence_run, override, ignore_errors, use_judgment)
+            return run_sequence_eval(sequence_run, override, ignore_errors)
         except ValueError as e:
             raise ValueError(f"Please check your SequenceRun object, one or more fields are invalid: \n{str(e)}")
         except Exception as e:
@@ -178,7 +154,7 @@ class JudgmentClient(metaclass=SingletonMeta):
     def run_evaluation(
         self,
         examples: Union[List[Example], List[CustomExample]],
-        scorers: List[Union[ScorerWrapper, JudgevalScorer]],
+        scorers: List[Union[APIJudgmentScorer, JudgevalScorer]],
         model: Union[str, List[str], JudgevalJudge],
         aggregator: Optional[str] = None,
         metadata: Optional[Dict[str, Any]] = None,
@@ -187,7 +163,6 @@ class JudgmentClient(metaclass=SingletonMeta):
         eval_run_name: str = "default_eval_run",
         override: bool = False,
         append: bool = False,
-        use_judgment: bool = True,
         ignore_errors: bool = True,
         async_execution: bool = False,
         rules: Optional[List[Rule]] = None
@@ -197,7 +172,7 @@ class JudgmentClient(metaclass=SingletonMeta):
         Args:
             examples (Union[List[Example], List[CustomExample]]): The examples to evaluate
-            scorers (List[Union[ScorerWrapper, JudgevalScorer]]): A list of scorers to use for evaluation
+            scorers (List[Union[APIJudgmentScorer, JudgevalScorer]]): A list of scorers to use for evaluation
             model (Union[str, List[str], JudgevalJudge]): The model used as a judge when using LLM as a Judge
             aggregator (Optional[str]): The aggregator to use for evaluation if using Mixture of Judges
             metadata (Optional[Dict[str, Any]]): Additional metadata to include for this evaluation run
@@ -205,7 +180,6 @@ class JudgmentClient(metaclass=SingletonMeta):
             project_name (str): The name of the project the evaluation results belong to
             eval_run_name (str): A name for this evaluation run
             override (bool): Whether to override an existing evaluation run with the same name
-            use_judgment (bool): Whether to use Judgment API for evaluation
             ignore_errors (bool): Whether to ignore errors during evaluation (safely handled)
             rules (Optional[List[Rule]]): Rules to evaluate against scoring results
@@ -216,58 +190,21 @@ class JudgmentClient(metaclass=SingletonMeta):
             raise ValueError("Cannot set both override and append to True. Please choose one.")
         try:
-            # Load appropriate implementations for all scorers
-            loaded_scorers: List[Union[JudgevalScorer, APIJudgmentScorer]] = []
-            for scorer in scorers:
-                try:
-                    if isinstance(scorer, ScorerWrapper):
-                        loaded_scorers.append(scorer.load_implementation(use_judgment=use_judgment))
-                    else:
-                        loaded_scorers.append(scorer)
-                except Exception as e:
-                    raise ValueError(f"Failed to load implementation for scorer {scorer}: {str(e)}")
-            # Prevent using JudgevalScorer with rules - only APIJudgmentScorer allowed with rules
-            if rules and any(isinstance(scorer, JudgevalScorer) for scorer in loaded_scorers):
+            if rules and any(isinstance(scorer, JudgevalScorer) for scorer in scorers):
                 raise ValueError("Cannot use Judgeval scorers (only API scorers) when using rules. Please either remove rules or use only APIJudgmentScorer types.")
-            # Convert ScorerWrapper in rules to their implementations
-            loaded_rules = None
-            if rules:
-                loaded_rules = []
-                for rule in rules:
-                    try:
-                        processed_conditions = []
-                        for condition in rule.conditions:
-                            # Convert metric if it's a ScorerWrapper
-                            if isinstance(condition.metric, ScorerWrapper):
-                                try:
-                                    condition_copy = condition.model_copy()
-                                    condition_copy.metric = condition.metric.load_implementation(use_judgment=use_judgment)
-                                    processed_conditions.append(condition_copy)
-                                except Exception as e:
-                                    raise ValueError(f"Failed to convert ScorerWrapper to implementation in rule '{rule.name}', condition metric '{condition.metric}': {str(e)}")
-                            else:
-                                processed_conditions.append(condition)
-                        # Create new rule with processed conditions
-                        new_rule = rule.model_copy()
-                        new_rule.conditions = processed_conditions
-                        loaded_rules.append(new_rule)
-                    except Exception as e:
-                        raise ValueError(f"Failed to process rule '{rule.name}': {str(e)}")
             eval = EvaluationRun(
                 log_results=log_results,
                 append=append,
                 project_name=project_name,
                 eval_name=eval_run_name,
                 examples=examples,
-                scorers=loaded_scorers,
+                scorers=scorers,
                 model=model,
                 aggregator=aggregator,
                 metadata=metadata,
                 judgment_api_key=self.judgment_api_key,
-                rules=loaded_rules,
+                rules=rules,
                 organization_id=self.organization_id
             )
             return run_eval(eval, override, ignore_errors=ignore_errors, async_execution=async_execution)
@@ -505,6 +442,8 @@ class JudgmentClient(metaclass=SingletonMeta):
             raise JudgmentAPIError(f"Failed to fetch classifier scorer '{slug}': {response.json().get('detail', '')}")
         scorer_config = response.json()
+        created_at = scorer_config.pop("created_at")
+        updated_at = scorer_config.pop("updated_at")
         try:
             return ClassifierScorer(**scorer_config)

judgeval/rules.py CHANGED Viewed

@@ -10,7 +10,7 @@ from concurrent.futures import ThreadPoolExecutor
 import time
 import uuid
-from judgeval.scorers import APIJudgmentScorer, JudgevalScorer, ScorerWrapper
+from judgeval.scorers import APIJudgmentScorer, JudgevalScorer
 class AlertStatus(str, Enum):
     """Status of an alert evaluation."""
@@ -23,22 +23,19 @@ class Condition(BaseModel):
     Example:
         {
-            "metric": FaithfulnessScorer(threshold=0.7)  # Must be a scorer object: APIJudgmentScorer, JudgevalScorer, or ScorerWrapper
+            "metric": FaithfulnessScorer(threshold=0.7)  # Must be a scorer object: APIJudgmentScorer, JudgevalScorer
         }
     The Condition class uses the scorer's threshold and success function internally.
     """
     model_config = ConfigDict(arbitrary_types_allowed=True)
-    metric: Union[APIJudgmentScorer, JudgevalScorer, ScorerWrapper]
+    metric: Union[APIJudgmentScorer, JudgevalScorer]
     @property
     def metric_name(self) -> str:
         """Get the name of the metric for lookups in scores dictionary."""
-        if isinstance(self.metric, ScorerWrapper):
-            # Handle ScorerWrapper case specifically
-            return self.metric.scorer.score_type if hasattr(self.metric.scorer, 'score_type') else str(self.metric.scorer)
-        elif hasattr(self.metric, 'score_type'):
+        if hasattr(self.metric, 'score_type'):
             # Handle APIJudgmentScorer and JudgevalScorer which have score_type
             return self.metric.score_type
         elif hasattr(self.metric, '__name__'):

judgeval/run_evaluation.py CHANGED Viewed

@@ -334,7 +334,7 @@ def check_examples(examples: List[Example], scorers: List[APIJudgmentScorer]) ->
                     # Example ID (usually random UUID) does not provide any helpful information for the user but printing the entire example is overdoing it
                     print(f"WARNING: Example {example.example_id} is missing the following parameters: {missing_params} for scorer {scorer.score_type.value}")
-def run_sequence_eval(sequence_run: SequenceRun, override: bool = False, ignore_errors: bool = True, async_execution: bool = False) -> List[ScoringResult]:
+def run_sequence_eval(sequence_run: SequenceRun, override: bool = False, ignore_errors: bool = True) -> List[ScoringResult]:
     # Call endpoint to check to see if eval run name exists (if we DON'T want to override and DO want to log results)
     if not override and sequence_run.log_results and not sequence_run.append:
         check_eval_run_name_exists(

judgeval/scorers/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from judgeval.scorers.api_scorer import APIJudgmentScorer
 from judgeval.scorers.judgeval_scorer import JudgevalScorer
 from judgeval.scorers.prompt_scorer import PromptScorer, ClassifierScorer
-from judgeval.scorers.judgeval_scorers import (
+from judgeval.scorers.judgeval_scorers.api_scorers import (
     ExecutionOrderScorer,
     JSONCorrectnessScorer,
     SummarizationScorer,
@@ -11,14 +11,15 @@ from judgeval.scorers.judgeval_scorers import (
     ContextualPrecisionScorer,
     ContextualRecallScorer,
     AnswerRelevancyScorer,
-    ScorerWrapper,
     AnswerCorrectnessScorer,
-    Text2SQLScorer,
     ComparisonScorer,
     InstructionAdherenceScorer,
     GroundednessScorer,
     DerailmentScorer,
 )
+from judgeval.scorers.judgeval_scorers.classifiers import (
+    Text2SQLScorer,
+)
 __all__ = [
     "APIJudgmentScorer",
@@ -34,7 +35,6 @@ __all__ = [
     "ContextualPrecisionScorer",
     "ContextualRecallScorer",
     "AnswerRelevancyScorer",
-    "ScorerWrapper",
     "AnswerCorrectnessScorer",
     "Text2SQLScorer",
     "ComparisonScorer",

judgeval/scorers/judgeval_scorers/__init__.py CHANGED Viewed

@@ -1,176 +0,0 @@
-from typing import Type, Optional, Any
-# Import implementations
-from judgeval.scorers.judgeval_scorers.api_scorers import (
-    ExecutionOrderScorer as APIExecutionOrderScorer,
-    JSONCorrectnessScorer as APIJSONCorrectnessScorer,
-    SummarizationScorer as APISummarizationScorer,
-    HallucinationScorer as APIHallucinationScorer,
-    FaithfulnessScorer as APIFaithfulnessScorer,
-    ContextualRelevancyScorer as APIContextualRelevancyScorer,
-    ContextualPrecisionScorer as APIContextualPrecisionScorer,
-    ContextualRecallScorer as APIContextualRecallScorer,
-    AnswerRelevancyScorer as APIAnswerRelevancyScorer,
-    AnswerCorrectnessScorer as APIAnswerCorrectnessScorer,
-    ComparisonScorer as APIComparisonScorer,
-    InstructionAdherenceScorer as APIInstructionAdherenceScorer,
-    GroundednessScorer as APIGroundednessScorer,
-    DerailmentScorer as APIDerailmentScorer,
-)
-from judgeval.scorers.judgeval_scorers.local_implementations import (
-    AnswerRelevancyScorer as LocalAnswerRelevancyScorer,
-    ContextualPrecisionScorer as LocalContextualPrecisionScorer,
-    ContextualRecallScorer as LocalContextualRecallScorer,
-    ContextualRelevancyScorer as LocalContextualRelevancyScorer,
-    FaithfulnessScorer as LocalFaithfulnessScorer,
-    JsonCorrectnessScorer as LocalJsonCorrectnessScorer,
-    ExecutionOrderScorer as LocalExecutionOrderScorer,
-    HallucinationScorer as LocalHallucinationScorer,
-    SummarizationScorer as LocalSummarizationScorer,
-    AnswerCorrectnessScorer as LocalAnswerCorrectnessScorer,
-    ComparisonScorer as LocalComparisonScorer,
-    InstructionAdherenceScorer as LocalInstructionAdherenceScorer,
-)
-from judgeval.scorers.judgeval_scorers.classifiers import Text2SQLScorer
-class ScorerWrapper:
-    """
-    Wrapper class that can dynamically load either API or local implementation of a scorer.
-    """
-    def __init__(self, api_implementation: Type, local_implementation: Optional[Type] = None):
-        self.api_implementation = api_implementation
-        self.local_implementation = local_implementation
-        self._instance = None
-        self._init_args = None
-        self._init_kwargs = None
-    def __call__(self, *args, **kwargs):
-        """Store initialization arguments for later use when implementation is loaded"""
-        self._init_args = args
-        self._init_kwargs = kwargs
-        return self
-    def load_implementation(self, use_judgment: bool = True) -> Any:
-        """
-        Load the appropriate implementation based on the use_judgment flag.
-        Args:
-            use_judgment (bool): If True, use API implementation. If False, use local implementation.
-        Returns:
-            Instance of the appropriate implementation
-        Raises:
-            ValueError: If local implementation is requested but not available
-        """
-        if self._instance is not None:
-            return self._instance
-        if use_judgment:
-            implementation = self.api_implementation
-        else:
-            if self.local_implementation is None:
-                raise ValueError("No local implementation available for this scorer")
-            implementation = self.local_implementation
-        args = self._init_args or ()
-        kwargs = self._init_kwargs or {}
-        self._instance = implementation(*args, **kwargs)
-        return self._instance
-    def __getattr__(self, name):
-        """Defer all attribute access to the loaded implementation"""
-        if self._instance is None:
-            raise RuntimeError("Implementation not loaded. Call load_implementation() first")
-        return getattr(self._instance, name)
-# Create wrapped versions of all scorers
-AnswerCorrectnessScorer = ScorerWrapper(
-    api_implementation=APIAnswerCorrectnessScorer,
-    local_implementation=LocalAnswerCorrectnessScorer
-)
-AnswerRelevancyScorer = ScorerWrapper(
-    api_implementation=APIAnswerRelevancyScorer,
-    local_implementation=LocalAnswerRelevancyScorer
-)
-ExecutionOrderScorer = ScorerWrapper(
-    api_implementation=APIExecutionOrderScorer,
-    local_implementation=LocalExecutionOrderScorer
-)
-JSONCorrectnessScorer = ScorerWrapper(
-    api_implementation=APIJSONCorrectnessScorer,
-    local_implementation=LocalJsonCorrectnessScorer
-)
-SummarizationScorer = ScorerWrapper(
-    api_implementation=APISummarizationScorer,
-    local_implementation=LocalSummarizationScorer
-)
-HallucinationScorer = ScorerWrapper(
-    api_implementation=APIHallucinationScorer,
-    local_implementation=LocalHallucinationScorer
-)
-FaithfulnessScorer = ScorerWrapper(
-    api_implementation=APIFaithfulnessScorer,
-    local_implementation=LocalFaithfulnessScorer
-)
-ContextualRelevancyScorer = ScorerWrapper(
-    api_implementation=APIContextualRelevancyScorer,
-    local_implementation=LocalContextualRelevancyScorer
-)
-ContextualPrecisionScorer = ScorerWrapper(
-    api_implementation=APIContextualPrecisionScorer,
-    local_implementation=LocalContextualPrecisionScorer
-)
-ContextualRecallScorer = ScorerWrapper(
-    api_implementation=APIContextualRecallScorer,
-    local_implementation=LocalContextualRecallScorer
-)
-InstructionAdherenceScorer = ScorerWrapper(
-    api_implementation=APIInstructionAdherenceScorer,
-    local_implementation=LocalInstructionAdherenceScorer
-)
-def ComparisonScorer(threshold: float, criteria: str, description: str):
-    return ScorerWrapper(
-        api_implementation=APIComparisonScorer,
-        local_implementation=LocalComparisonScorer
-    )(threshold=threshold, criteria=criteria, description=description)
-GroundednessScorer = ScorerWrapper(
-    api_implementation=APIGroundednessScorer,
-)
-DerailmentScorer = ScorerWrapper(
-    api_implementation=APIDerailmentScorer,
-    local_implementation=LocalInstructionAdherenceScorer # TODO: add local implementation
-)
-__all__ = [
-    "ExecutionOrderScorer",
-    "JSONCorrectnessScorer",
-    "SummarizationScorer",
-    "HallucinationScorer",
-    "FaithfulnessScorer",
-    "ContextualRelevancyScorer",
-    "ContextualPrecisionScorer",
-    "ContextualRecallScorer",
-    "AnswerRelevancyScorer",
-    "Text2SQLScorer",
-    "ComparisonScorer",
-    "GroundednessScorer",
-    "DerailmentScorer",
-]

{judgeval-0.0.32.dist-info → judgeval-0.0.33.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: judgeval
-Version: 0.0.32
+Version: 0.0.33
 Summary: Judgeval Package
 Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
 Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -12,12 +12,13 @@ Classifier: Programming Language :: Python :: 3
 Requires-Python: >=3.11
 Requires-Dist: anthropic
 Requires-Dist: fastapi
+Requires-Dist: google-genai
 Requires-Dist: langchain
 Requires-Dist: langchain-anthropic
 Requires-Dist: langchain-core
 Requires-Dist: langchain-huggingface
 Requires-Dist: langchain-openai
-Requires-Dist: litellm
+Requires-Dist: litellm==1.38.12
 Requires-Dist: nest-asyncio
 Requires-Dist: openai
 Requires-Dist: openpyxl
@@ -94,9 +95,21 @@ Create a file named `traces.py` with the following code:
     from judgeval.common.tracer import Tracer, wrap
     from openai import OpenAI
+    # Basic initialization
     client = wrap(OpenAI())
     judgment = Tracer(project_name="my_project")
+    # Or with S3 storage enabled
+    # NOTE: Make sure AWS creds correspond to an account with write access to the specified S3 bucket
+    judgment = Tracer(
+        project_name="my_project",
+        use_s3=True,
+        s3_bucket_name="my-traces-bucket", # Bucket created automatically if it doesn't exist
+        s3_aws_access_key_id="your-access-key",  # Optional: defaults to AWS_ACCESS_KEY_ID env var
+        s3_aws_secret_access_key="your-secret-key",  # Optional: defaults to AWS_SECRET_ACCESS_KEY env var
+        s3_region_name="us-west-1"  # Optional: defaults to AWS_REGION env var or "us-west-1"
+    )
     @judgment.observe(span_type="tool")
     def my_tool():
         return "Hello world!"

judgeval-0.0.33.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,63 @@
+judgeval/__init__.py,sha256=x9HWt4waJwJMAqTuJSg2MezF9Zg-macEjeU-ajbly-8,330
+judgeval/clients.py,sha256=6VQmEqmfCngUdS2MuPBIpHvtDFqOENm8-_BmMvjLyRQ,944
+judgeval/constants.py,sha256=_XmVAkebMyGrDvvanAVlMgVd4p6MLHdEVsTQFI0kz1k,5411
+judgeval/evaluation_run.py,sha256=WGzx-Ug2qhSmunFo8NrmSstBRsOUc5KpKq0Lc51rqsM,6739
+judgeval/judgment_client.py,sha256=brRYmphZR-2IUre9kdOhfse1mYDilcIqUzzH21ROAdk,22208
+judgeval/rules.py,sha256=jkh1cXXcUf8oRY7xJUZfcQBYWn_rjUW4GvrhRt15PeU,20265
+judgeval/run_evaluation.py,sha256=elMpFHahyeukKKa09fmJM3c_afwJ00mbZRqm18l5f00,28481
+judgeval/version_check.py,sha256=bvJEidB7rAeXozoUbN9Yb97QOR_s2hgvpvj74jJ5HlY,943
+judgeval/common/__init__.py,sha256=7d24BRxtncpMj3AAJCj8RS7TqgjXmW777HVZH6-3sBs,289
+judgeval/common/exceptions.py,sha256=U-TxHLn7oVMezsMuoYouNDb2XuS8RCggfntYf5_6u4E,565
+judgeval/common/logger.py,sha256=KO75wWXCxhUHUMvLaTU31ZzOk6tkZBa7heQ7y0f-zFE,6062
+judgeval/common/s3_storage.py,sha256=W8wq9S7qJZdqdBR4sk3aEZ4K3-pz40DOoolOJrWs9Vo,3768
+judgeval/common/tracer.py,sha256=YsObK8VQXp1DDbU9xncU8NjuY-JUI54BqmG4olezrZc,92507
+judgeval/common/utils.py,sha256=LUQV5JfDr6wj7xHAJoNq-gofNZ6mjXbeKrGKzBME1KM,33533
+judgeval/data/__init__.py,sha256=xuKx_KCVHGp6CXvQuVmKl3v7pJp-qDaz0NccKxwjtO0,481
+judgeval/data/custom_example.py,sha256=QRBqiRiZS8UgVeTRHY0r1Jzm6yAYsyg6zmHxQGxdiQs,739
+judgeval/data/example.py,sha256=cJrmPGLel_P2sy1UaRvuVSAi35EnA9XMR11Lhp4aDLo,5930
+judgeval/data/result.py,sha256=Gb9tiSDsk1amXgh0cFG6JmlW_BMKxS2kuTwNA0rrHjA,3184
+judgeval/data/scorer_data.py,sha256=JVlaTx1EP2jw2gh3Vgx1CSEsvIFABAN26IquKyxwiJQ,3273
+judgeval/data/sequence.py,sha256=FmKVdzQP5VTujRCHDWk097MKRR-rJgbsdrxyCKee6tA,1994
+judgeval/data/sequence_run.py,sha256=RmYjfWKMWg-pcF5PLeiWfrhuDkjDZi5VEmAIEXN3Ib0,2104
+judgeval/data/datasets/__init__.py,sha256=IdNKhQv9yYZ_op0rdBacrFaFVmiiYQ3JTzXzxOTsEVQ,176
+judgeval/data/datasets/dataset.py,sha256=dhLo30hvpmmOK2R6O5wDs_neawUJ4lS8bb4S42SufNQ,13034
+judgeval/data/datasets/eval_dataset_client.py,sha256=xjj66BO9Es9IxXqzQe1RT_e0kpeKlt7OrhRoSuj4KHM,15085
+judgeval/integrations/langgraph.py,sha256=J-cQfFP52TjJewdSTe-fcsUC4HDvjNbXoxmbmF0SgiE,11743
+judgeval/judges/__init__.py,sha256=6X7VSwrwsdxGBNxCyapVRWGghhKOy3MVxFNMQ62kCXM,308
+judgeval/judges/base_judge.py,sha256=ch_S7uBB7lyv44Lf1d7mIGFpveOO58zOkkpImKgd9_4,994
+judgeval/judges/litellm_judge.py,sha256=EIL58Teptv8DzZUO3yP2RDQCDq-aoBB6HPZzPdK6KTg,2424
+judgeval/judges/mixture_of_judges.py,sha256=IJoi4Twk8ze1CJWVEp69k6TSqTCTGrmVYQ0qdffer60,15549
+judgeval/judges/together_judge.py,sha256=l00hhPerAZXg3oYBd8cyMtWsOTNt_0FIqoxhKJKQe3k,2302
+judgeval/judges/utils.py,sha256=9lvUxziGV86ISvVFxYBWc09TWFyAQgUTyPf_a9mD5Rs,2686
+judgeval/scorers/__init__.py,sha256=Mk-mWUt_gNpJqY_WIEuQynD6fxc34fWSRSuobMSrj94,1238
+judgeval/scorers/api_scorer.py,sha256=NQ_CrrUPhSUk1k2Q8rKpCG_TU2FT32sFEqvb-Yi54B0,2688
+judgeval/scorers/exceptions.py,sha256=eGW5CuJgZ5YJBFrE4FHDSF651PO1dKAZ379mJ8gOsfo,178
+judgeval/scorers/judgeval_scorer.py,sha256=79-JJurqHP-qTaWNWInx4SjvQYwXc9lvfPPNgwsh2yA,6773
+judgeval/scorers/prompt_scorer.py,sha256=PaAs2qRolw1P3_I061Xvk9qzvF4O-JR8g_39RqXnHcM,17728
+judgeval/scorers/score.py,sha256=r9QiT4-LIvivcJ6XxByrbswKSO8eQTtAD1UlXT_lcmo,18741
+judgeval/scorers/utils.py,sha256=iHQVTlIANbmCTXz9kTeSdOytgUZ_T74Re61ajqsk_WQ,6827
+judgeval/scorers/judgeval_scorers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=_sDUBxSG536KGqXNi6dFpaYKghjEAadxBxaaxV9HuuE,1764
+judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py,sha256=Fnd9CVIOZ73sWEWymsU5eBrrZqPFjMZ0BKpeW-PDyTg,711
+judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py,sha256=oETeN9K0HSIRdL2SDqn82Vskpwh5SlKnZvs5VDm2OBU,658
+judgeval/scorers/judgeval_scorers/api_scorers/comparison.py,sha256=kuzf9OWvpY38yYSwlBgneLkUZwJNM4FQqvbS66keA90,1249
+judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py,sha256=tpSuzFAaW8X9xqA0aLLKwh7qmBK0Pc_bJZMIe_q412U,770
+judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py,sha256=pFVhk4pLtQ-FnNlbI-dFF-SIh69Jza7erHqiPkFWoBo,758
+judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py,sha256=RQ6DZwEhChfecd89Ey-T7ke--7qTaXZlRsNxwH8gaME,823
+judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py,sha256=V9WPuwNMm097V7IknKs8UkmAk0yjnBXTcJha_BHXxTA,475
+judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py,sha256=Pb3CiNF2Ca826B92wJCVAi_68lJjLhqqCKwQKaflSUg,1294
+judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=-BwOapqjryYNKNydtdkUiKIij76dY0O1jBmdc6dKazQ,692
+judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py,sha256=ntEEeTANEOsGlcbiTAF_3r6BeSJEaVDns8po8T0L6Vg,692
+judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py,sha256=k5gDOki-8KXrZXydvdSqDt3NZqQ28hXoOCHQf6jNxr4,686
+judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py,sha256=XnSGEkQfwVqaqnHEGMCsxNiHVzrsrej48uDbLoWc8CQ,678
+judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py,sha256=mMKEuR87_yanEuZJ5YSGFMHDD_oLVZ6-rQuciFaDOMA,1095
+judgeval/scorers/judgeval_scorers/api_scorers/summarization.py,sha256=QmWB8bVbDYHY5FcF0rYZE_3c2XXgMLRmR6aXJWfdMC4,655
+judgeval/scorers/judgeval_scorers/classifiers/__init__.py,sha256=Qt81W5ZCwMvBAne0LfQDb8xvg5iOG1vEYP7WizgwAZo,67
+judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py,sha256=8iTzMvou1Dr8pybul6lZHKjc9Ye2-0_racRGYkhEdTY,74
+judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py,sha256=ly72Z7s_c8NID6-nQnuW8qEGEW2MqdvpJ-5WfXzbAQg,2579
+judgeval/tracer/__init__.py,sha256=wy3DYpH8U_z0GO_K_gOSkK0tTTD-u5eLDo0T5xIBoAc,147
+judgeval/utils/alerts.py,sha256=O19Xj7DA0YVjl8PWiuH4zfdZeu3yiLVvHfY8ah2wG0g,2759
+judgeval-0.0.33.dist-info/METADATA,sha256=KzTkGTHYE8Uplehvtk_7x30XrV0xe1bpd-tU5lt0mHg,6097
+judgeval-0.0.33.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+judgeval-0.0.33.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
+judgeval-0.0.33.dist-info/RECORD,,

judgeval/scorers/base_scorer.py DELETED Viewed

@@ -1,58 +0,0 @@
-"""
-Judgment Scorer class.
-Scores `Example`s using ready-made Judgment evaluators.
-"""
-from pydantic import BaseModel, field_validator
-from judgeval.common.logger import debug, info, warning, error
-from judgeval.constants import APIScorer, UNBOUNDED_SCORERS
-class APIJudgmentScorer(BaseModel):
-    """
-    Class for ready-made, "out-of-the-box" scorer that uses Judgment evaluators to score `Example`s.
-    Args:
-        score_type (APIScorer): The Judgment metric to use for scoring `Example`s
-        threshold (float): A value between 0 and 1 that determines the scoring threshold
-    """
-    score_type: APIScorer
-    threshold: float
-    @field_validator('threshold')
-    def validate_threshold(cls, v, info):
-        """
-        Validates that the threshold is between 0 and 1 inclusive.
-        """
-        score_type = info.data.get('score_type')
-        if score_type in UNBOUNDED_SCORERS:
-            if v < 0:
-                error(f"Threshold for {score_type} must be greater than 0, got: {v}")
-                raise ValueError(f"Threshold for {score_type} must be greater than 0, got: {v}")
-        else:
-            if not 0 <= v <= 1:
-                error(f"Threshold for {score_type} must be between 0 and 1, got: {v}")
-                raise ValueError(f"Threshold for {score_type} must be between 0 and 1, got: {v}")
-        return v
-    @field_validator('score_type')
-    def convert_to_enum_value(cls, v):
-        """
-        Validates that the `score_type` is a valid `JudgmentMetric` enum value.
-        Converts string values to `JudgmentMetric` enum values.
-        """
-        debug(f"Attempting to convert score_type value: {v}")
-        if isinstance(v, APIScorer):
-            info(f"Using existing JudgmentMetric: {v.value}")
-            return v.value
-        elif isinstance(v, str):
-            debug(f"Converting string value to JudgmentMetric enum: {v}")
-            return APIScorer[v.upper()].value
-        error(f"Invalid score_type value: {v}")
-        raise ValueError(f"Invalid value for score_type: {v}")
-    def __str__(self):
-        return f"JudgmentScorer(score_type={self.score_type.value}, threshold={self.threshold})"

judgeval 0.0.32__py3-none-any.whl → 0.0.33__py3-none-any.whl

judgeval 0.0.32py3-none-any.whl → 0.0.33py3-none-any.whl