PyPI - judgeval - Versions diffs - 0.0.44__py3-none-any.whl → 0.0.46__py3-none-any.whl - Mend

judgeval 0.0.44py3-none-any.whl → 0.0.46py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

judgeval/__init__.py +5 -4
judgeval/clients.py +6 -6
judgeval/common/__init__.py +7 -2
judgeval/common/exceptions.py +2 -3
judgeval/common/logger.py +74 -49
judgeval/common/s3_storage.py +30 -23
judgeval/common/tracer.py +1273 -939
judgeval/common/utils.py +416 -244
judgeval/constants.py +73 -61
judgeval/data/__init__.py +1 -1
judgeval/data/custom_example.py +3 -2
judgeval/data/datasets/dataset.py +80 -54
judgeval/data/datasets/eval_dataset_client.py +131 -181
judgeval/data/example.py +67 -43
judgeval/data/result.py +11 -9
judgeval/data/scorer_data.py +4 -2
judgeval/data/tool.py +25 -16
judgeval/data/trace.py +57 -29
judgeval/data/trace_run.py +5 -11
judgeval/evaluation_run.py +22 -82
judgeval/integrations/langgraph.py +546 -184
judgeval/judges/base_judge.py +1 -2
judgeval/judges/litellm_judge.py +33 -11
judgeval/judges/mixture_of_judges.py +128 -78
judgeval/judges/together_judge.py +22 -9
judgeval/judges/utils.py +14 -5
judgeval/judgment_client.py +259 -271
judgeval/rules.py +169 -142
judgeval/run_evaluation.py +462 -305
judgeval/scorers/api_scorer.py +20 -11
judgeval/scorers/exceptions.py +1 -0
judgeval/scorers/judgeval_scorer.py +77 -58
judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +46 -15
judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +3 -2
judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +3 -2
judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py +12 -11
judgeval/scorers/judgeval_scorers/api_scorers/comparison.py +7 -5
judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +3 -2
judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +3 -2
judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +5 -2
judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +2 -1
judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +17 -8
judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +3 -2
judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py +3 -2
judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +3 -2
judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +3 -2
judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +8 -9
judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +4 -4
judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +5 -5
judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +5 -2
judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +9 -10
judgeval/scorers/prompt_scorer.py +48 -37
judgeval/scorers/score.py +86 -53
judgeval/scorers/utils.py +11 -7
judgeval/tracer/__init__.py +1 -1
judgeval/utils/alerts.py +23 -12
judgeval/utils/{data_utils.py → file_utils.py} +5 -9
judgeval/utils/requests.py +29 -0
judgeval/version_check.py +5 -2
{judgeval-0.0.44.dist-info → judgeval-0.0.46.dist-info}/METADATA +79 -135
judgeval-0.0.46.dist-info/RECORD +69 -0
judgeval-0.0.44.dist-info/RECORD +0 -68
{judgeval-0.0.44.dist-info → judgeval-0.0.46.dist-info}/WHEEL +0 -0
{judgeval-0.0.44.dist-info → judgeval-0.0.46.dist-info}/licenses/LICENSE.md +0 -0

judgeval/scorers/api_scorer.py CHANGED Viewed

@@ -6,7 +6,7 @@ Scores `Example`s using ready-made Judgment evaluators.
 from pydantic import BaseModel, field_validator
 from typing import List
-from judgeval.common.logger import debug, info, warning, error
+from judgeval.common.logger import debug, info, error
 from judgeval.data import ExampleParams
 from judgeval.constants import APIScorer, UNBOUNDED_SCORERS
@@ -19,27 +19,34 @@ class APIJudgmentScorer(BaseModel):
         score_type (APIScorer): The Judgment metric to use for scoring `Example`s
         threshold (float): A value between 0 and 1 that determines the scoring threshold
     """
     score_type: APIScorer
     threshold: float
-    required_params: List[ExampleParams] = [] # List of the required parameters on examples for the scorer
+    required_params: List[
+        ExampleParams
+    ] = []  # List of the required parameters on examples for the scorer
-    @field_validator('threshold')
+    @field_validator("threshold")
     def validate_threshold(cls, v, info):
         """
         Validates that the threshold is between 0 and 1 inclusive.
         """
-        score_type = info.data.get('score_type')
+        score_type = info.data.get("score_type")
         if score_type in UNBOUNDED_SCORERS:
             if v < 0:
                 error(f"Threshold for {score_type} must be greater than 0, got: {v}")
-                raise ValueError(f"Threshold for {score_type} must be greater than 0, got: {v}")
+                raise ValueError(
+                    f"Threshold for {score_type} must be greater than 0, got: {v}"
+                )
         else:
             if not 0 <= v <= 1:
                 error(f"Threshold for {score_type} must be between 0 and 1, got: {v}")
-                raise ValueError(f"Threshold for {score_type} must be between 0 and 1, got: {v}")
+                raise ValueError(
+                    f"Threshold for {score_type} must be between 0 and 1, got: {v}"
+                )
         return v
-    @field_validator('score_type')
+    @field_validator("score_type")
     def convert_to_enum_value(cls, v):
         """
         Validates that the `score_type` is a valid `APIScorer` enum value.
@@ -61,11 +68,13 @@ class APIJudgmentScorer(BaseModel):
     def to_dict(self) -> dict:
         """
         Converts the scorer configuration to a dictionary format.
         Returns:
             dict: A dictionary containing the scorer's configuration
         """
         return {
-            "score_type": str(self.score_type.value),  # Convert enum to string for serialization
-            "threshold": self.threshold
-        }
+            "score_type": str(
+                self.score_type.value
+            ),  # Convert enum to string for serialization
+            "threshold": self.threshold,
+        }

judgeval/scorers/exceptions.py CHANGED Viewed

@@ -8,4 +8,5 @@ class MissingExampleParamsError(Exception):
     """
     Error raised when a scorer is missing required example parameters.
     """
     pass

judgeval/scorers/judgeval_scorer.py CHANGED Viewed

@@ -13,21 +13,26 @@ from judgeval.judges import JudgevalJudge
 from judgeval.judges.utils import create_judge
 from judgeval.constants import UNBOUNDED_SCORERS
 from judgeval.data.example import ExampleParams
 class JudgevalScorer:
     """
     Base class for scorers in `judgeval`.
     In practice, you should not implement this class unless you are creating a custom scorer.
     Judgeval offers 10+ default scorers that you can use out of the box.
     If you want to create a scorer that does not fall under any of the ready-made Judgment scorers,
     you can create a custom scorer by extending this class.
     """
     score_type: str  # name of your new scorer
     threshold: float  # The threshold to pass a test while using this scorer as a scorer
     score: Optional[float] = None  # The float score of the scorer run on the test case
-    score_breakdown: Dict = None
-    reason: Optional[str] = None  # The reason for the score when evaluating the test case
+    score_breakdown: Optional[Dict] = None
+    reason: Optional[str] = (
+        None  # The reason for the score when evaluating the test case
+    )
     success: Optional[bool] = None  # Whether the test case passed or failed
     evaluation_model: Optional[str] = None  # The model used to evaluate the test case
     strict_mode: bool = False  # Whether to run the scorer in strict mode
@@ -39,61 +44,67 @@ class JudgevalScorer:
     evaluation_cost: Optional[float] = None  # The cost of running the scorer
     verbose_logs: Optional[str] = None  # The verbose logs of the scorer
     additional_metadata: Optional[Dict] = None  # Additional metadata for the scorer
-    required_params: Optional[List[ExampleParams]] = None  # The required parameters for the scorer
-    error: Optional[str] = None
-    success: Optional[bool] = None
+    required_params: Optional[List[ExampleParams]] = (
+        None  # The required parameters for the scorer
+    )
     def __init__(
-        self,
-        score_type: str,
-        threshold: float,
-        score: Optional[float] = None,
-        score_breakdown: Optional[Dict] = None,
-        reason: Optional[str] = None,
-        success: Optional[bool] = None,
-        evaluation_model: Optional[str] = None,
+        self,
+        score_type: str,
+        threshold: float,
+        score: Optional[float] = None,
+        score_breakdown: Optional[Dict] = None,
+        reason: Optional[str] = None,
+        success: Optional[bool] = None,
+        evaluation_model: Optional[str] = None,
         required_params: Optional[List[ExampleParams]] = None,
-        strict_mode: bool = False,
-        async_mode: bool = True,
-        verbose_mode: bool = True,
-        include_reason: bool = False,
+        strict_mode: bool = False,
+        async_mode: bool = True,
+        verbose_mode: bool = True,
+        include_reason: bool = False,
         custom_example: bool = False,
-        error: Optional[str] = None,
-        evaluation_cost: Optional[float] = None,
-        verbose_logs: Optional[str] = None,
-        additional_metadata: Optional[Dict] = None
-        ):
-            debug(f"Initializing JudgevalScorer with score_type={score_type}, threshold={threshold}")
-            if score_type in UNBOUNDED_SCORERS:
-                if threshold < 0:
-                    raise ValueError(f"Threshold for {score_type} must be greater than 0, got: {threshold}")
-            else:
-                if not 0 <= threshold <= 1:
-                    raise ValueError(f"Threshold for {score_type} must be between 0 and 1, got: {threshold}")
-            if strict_mode:
-                warning("Strict mode enabled - scoring will be more rigorous")
-            info(f"JudgevalScorer initialized with evaluation_model: {evaluation_model}")
-            self.score_type = score_type
-            self.threshold = threshold
-            self.score = score
-            self.score_breakdown = score_breakdown
-            self.reason = reason
-            self.success = success
-            self.evaluation_model = evaluation_model
-            self.strict_mode = strict_mode
-            self.async_mode = async_mode
-            self.verbose_mode = verbose_mode
-            self.include_reason = include_reason
-            self.custom_example = custom_example
-            self.error = error
-            self.evaluation_cost = evaluation_cost
-            self.verbose_logs = verbose_logs
-            self.additional_metadata = additional_metadata
-            self.required_params = required_params
+        error: Optional[str] = None,
+        evaluation_cost: Optional[float] = None,
+        verbose_logs: Optional[str] = None,
+        additional_metadata: Optional[Dict] = None,
+    ):
+        debug(
+            f"Initializing JudgevalScorer with score_type={score_type}, threshold={threshold}"
+        )
+        if score_type in UNBOUNDED_SCORERS:
+            if threshold < 0:
+                raise ValueError(
+                    f"Threshold for {score_type} must be greater than 0, got: {threshold}"
+                )
+        else:
+            if not 0 <= threshold <= 1:
+                raise ValueError(
+                    f"Threshold for {score_type} must be between 0 and 1, got: {threshold}"
+                )
+        if strict_mode:
+            warning("Strict mode enabled - scoring will be more rigorous")
+        info(f"JudgevalScorer initialized with evaluation_model: {evaluation_model}")
+        self.score_type = score_type
+        self.threshold = threshold
+        self.score = score
+        self.score_breakdown = score_breakdown
+        self.reason = reason
+        self.success = success
+        self.evaluation_model = evaluation_model
+        self.strict_mode = strict_mode
+        self.async_mode = async_mode
+        self.verbose_mode = verbose_mode
+        self.include_reason = include_reason
+        self.custom_example = custom_example
+        self.error = error
+        self.evaluation_cost = evaluation_cost
+        self.verbose_logs = verbose_logs
+        self.additional_metadata = additional_metadata
+        self.required_params = required_params
     def _add_model(self, model: Optional[Union[str, List[str], JudgevalJudge]] = None):
         """
-        Adds the evaluation model to the JudgevalScorer instance
+        Adds the evaluation model to the JudgevalScorer instance
         This method is used at eval time
         """
@@ -107,7 +118,9 @@ class JudgevalScorer:
         """
         warning("Attempting to call unimplemented score_example method")
         error("score_example method not implemented")
-        raise NotImplementedError("You must implement the `score` method in your custom scorer")
+        raise NotImplementedError(
+            "You must implement the `score` method in your custom scorer"
+        )
     @abstractmethod
     async def a_score_example(self, example, *args, **kwargs) -> float:
@@ -116,8 +129,10 @@ class JudgevalScorer:
         """
         warning("Attempting to call unimplemented a_score_example method")
         error("a_score_example method not implemented")
-        raise NotImplementedError("You must implement the `a_score` method in your custom scorer")
+        raise NotImplementedError(
+            "You must implement the `a_score` method in your custom scorer"
+        )
     @abstractmethod
     def _success_check(self) -> bool:
         """
@@ -125,7 +140,9 @@ class JudgevalScorer:
         """
         warning("Attempting to call unimplemented success_check method")
         error("_success_check method not implemented")
-        raise NotImplementedError("You must implement the `_success_check` method in your custom scorer")
+        raise NotImplementedError(
+            "You must implement the `_success_check` method in your custom scorer"
+        )
     def __str__(self):
         debug("Converting JudgevalScorer instance to string representation")
@@ -150,9 +167,11 @@ class JudgevalScorer:
             "additional_metadata": self.additional_metadata,
         }
         return f"JudgevalScorer({attributes})"
     def to_dict(self):
         return {
-            "score_type": str(self.score_type),  # Convert enum to string for serialization
-            "threshold": self.threshold
+            "score_type": str(
+                self.score_type
+            ),  # Convert enum to string for serialization
+            "threshold": self.threshold,
         }

judgeval/scorers/judgeval_scorers/api_scorers/__init__.py CHANGED Viewed

@@ -1,20 +1,51 @@
-from judgeval.scorers.judgeval_scorers.api_scorers.execution_order import ExecutionOrderScorer
-from judgeval.scorers.judgeval_scorers.api_scorers.json_correctness import JSONCorrectnessScorer
-from judgeval.scorers.judgeval_scorers.api_scorers.summarization import SummarizationScorer
-from judgeval.scorers.judgeval_scorers.api_scorers.hallucination import HallucinationScorer
-from judgeval.scorers.judgeval_scorers.api_scorers.faithfulness import FaithfulnessScorer
-from judgeval.scorers.judgeval_scorers.api_scorers.contextual_relevancy import ContextualRelevancyScorer
-from judgeval.scorers.judgeval_scorers.api_scorers.contextual_precision import ContextualPrecisionScorer
-from judgeval.scorers.judgeval_scorers.api_scorers.contextual_recall import ContextualRecallScorer
-from judgeval.scorers.judgeval_scorers.api_scorers.answer_relevancy import AnswerRelevancyScorer
-from judgeval.scorers.judgeval_scorers.api_scorers.answer_correctness import AnswerCorrectnessScorer
+from judgeval.scorers.judgeval_scorers.api_scorers.execution_order import (
+    ExecutionOrderScorer,
+)
+from judgeval.scorers.judgeval_scorers.api_scorers.json_correctness import (
+    JSONCorrectnessScorer,
+)
+from judgeval.scorers.judgeval_scorers.api_scorers.summarization import (
+    SummarizationScorer,
+)
+from judgeval.scorers.judgeval_scorers.api_scorers.hallucination import (
+    HallucinationScorer,
+)
+from judgeval.scorers.judgeval_scorers.api_scorers.faithfulness import (
+    FaithfulnessScorer,
+)
+from judgeval.scorers.judgeval_scorers.api_scorers.contextual_relevancy import (
+    ContextualRelevancyScorer,
+)
+from judgeval.scorers.judgeval_scorers.api_scorers.contextual_precision import (
+    ContextualPrecisionScorer,
+)
+from judgeval.scorers.judgeval_scorers.api_scorers.contextual_recall import (
+    ContextualRecallScorer,
+)
+from judgeval.scorers.judgeval_scorers.api_scorers.answer_relevancy import (
+    AnswerRelevancyScorer,
+)
+from judgeval.scorers.judgeval_scorers.api_scorers.answer_correctness import (
+    AnswerCorrectnessScorer,
+)
 from judgeval.scorers.judgeval_scorers.api_scorers.comparison import ComparisonScorer
-from judgeval.scorers.judgeval_scorers.api_scorers.instruction_adherence import InstructionAdherenceScorer
-from judgeval.scorers.judgeval_scorers.api_scorers.groundedness import GroundednessScorer
-from judgeval.scorers.judgeval_scorers.api_scorers.derailment_scorer import DerailmentScorer
+from judgeval.scorers.judgeval_scorers.api_scorers.instruction_adherence import (
+    InstructionAdherenceScorer,
+)
+from judgeval.scorers.judgeval_scorers.api_scorers.groundedness import (
+    GroundednessScorer,
+)
+from judgeval.scorers.judgeval_scorers.api_scorers.derailment_scorer import (
+    DerailmentScorer,
+)
 from judgeval.scorers.judgeval_scorers.api_scorers.tool_order import ToolOrderScorer
-from judgeval.scorers.judgeval_scorers.api_scorers.classifier_scorer import ClassifierScorer
-from judgeval.scorers.judgeval_scorers.api_scorers.tool_dependency import ToolDependencyScorer
+from judgeval.scorers.judgeval_scorers.api_scorers.classifier_scorer import (
+    ClassifierScorer,
+)
+from judgeval.scorers.judgeval_scorers.api_scorers.tool_dependency import (
+    ToolDependencyScorer,
+)
 __all__ = [
     "ExecutionOrderScorer",
     "JSONCorrectnessScorer",

judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py CHANGED Viewed

@@ -10,16 +10,17 @@ from judgeval.scorers.api_scorer import APIJudgmentScorer
 from judgeval.constants import APIScorer
 from judgeval.data import ExampleParams
 class AnswerCorrectnessScorer(APIJudgmentScorer):
     def __init__(self, threshold: float):
         super().__init__(
-            threshold=threshold,
+            threshold=threshold,
             score_type=APIScorer.ANSWER_CORRECTNESS,
             required_params=[
                 ExampleParams.INPUT,
                 ExampleParams.ACTUAL_OUTPUT,
                 ExampleParams.EXPECTED_OUTPUT,
-            ]
+            ],
         )
     @property

judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py CHANGED Viewed

@@ -10,15 +10,16 @@ from judgeval.scorers.api_scorer import APIJudgmentScorer
 from judgeval.constants import APIScorer
 from judgeval.data import ExampleParams
 class AnswerRelevancyScorer(APIJudgmentScorer):
     def __init__(self, threshold: float):
         super().__init__(
-            threshold=threshold,
+            threshold=threshold,
             score_type=APIScorer.ANSWER_RELEVANCY,
             required_params=[
                 ExampleParams.INPUT,
                 ExampleParams.ACTUAL_OUTPUT,
-            ]
+            ],
         )
     @property

judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py CHANGED Viewed

@@ -1,11 +1,11 @@
 from judgeval.scorers.api_scorer import APIJudgmentScorer
 from judgeval.constants import APIScorer
-from typing import List, Mapping, Optional, Dict
-from pydantic import model_serializer
+from typing import List, Mapping, Optional
 class ClassifierScorer(APIJudgmentScorer):
     """
-    In the Judgment backend, this scorer is implemented as a PromptScorer that takes
+    In the Judgment backend, this scorer is implemented as a PromptScorer that takes
     1. a system role that may involve the Example object
     2. options for scores on the example
@@ -14,7 +14,7 @@ class ClassifierScorer(APIJudgmentScorer):
     ex:
     system_role = "You are a judge that evaluates whether the response is positive or negative. The response is: {example.actual_output}"
     options = {"positive": 1, "negative": 0}
     Args:
         name (str): The name of the scorer
         slug (str): A unique identifier for the scorer
@@ -25,14 +25,15 @@ class ClassifierScorer(APIJudgmentScorer):
         strict_mode (bool): Whether to use strict mode (default: False)
         verbose_mode (bool): Whether to include verbose logging (default: False)
     """
     name: Optional[str] = None
     slug: Optional[str] = None
     conversation: Optional[List[dict]] = None
     options: Optional[Mapping[str, float]] = None
     verbose_mode: bool = False
     strict_mode: bool = False
-    include_reason: bool = True,
-    async_mode: bool = True,
+    include_reason: bool = True
+    async_mode: bool = True
     threshold: float = 0.5
     def __init__(
@@ -65,26 +66,26 @@ class ClassifierScorer(APIJudgmentScorer):
         Updates the name of the scorer.
         """
         self.name = name
     def update_threshold(self, threshold: float):
         """
         Updates the threshold of the scorer.
         """
         self.threshold = threshold
     def update_conversation(self, conversation: List[dict]):
         """
         Updates the conversation with the new conversation.
         Sample conversation:
         [{'role': 'system', 'content': "Did the chatbot answer the user's question in a kind way?: {{actual_output}}."}]
         """
         self.conversation = conversation
     def update_options(self, options: Mapping[str, float]):
         """
         Updates the options with the new options.
         Sample options:
         {"yes": 1, "no": 0}
         """

judgeval/scorers/judgeval_scorers/api_scorers/comparison.py CHANGED Viewed

@@ -10,34 +10,36 @@ from judgeval.scorers.api_scorer import APIJudgmentScorer
 from judgeval.constants import APIScorer
 from typing import Optional, Dict
 from judgeval.data import ExampleParams
 class ComparisonScorer(APIJudgmentScorer):
     kwargs: Optional[Dict] = None
     def __init__(self, threshold: float, criteria: str, description: str):
         super().__init__(
-            threshold=threshold,
+            threshold=threshold,
             score_type=APIScorer.COMPARISON,
             required_params=[
                 ExampleParams.INPUT,
                 ExampleParams.ACTUAL_OUTPUT,
                 ExampleParams.EXPECTED_OUTPUT,
-            ]
+            ],
         )
         self.kwargs = {"criteria": criteria, "description": description}
     @property
     def __name__(self):
         return f"Comparison-{self.kwargs['criteria']}"
     def to_dict(self) -> dict:
         """
         Converts the scorer configuration to a dictionary format.
         Returns:
             dict: A dictionary containing the scorer's configuration
         """
         return {
             "score_type": self.score_type,
             "threshold": self.threshold,
-            "kwargs": self.kwargs
+            "kwargs": self.kwargs,
         }

judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py CHANGED Viewed

@@ -10,17 +10,18 @@ from judgeval.scorers.api_scorer import APIJudgmentScorer
 from judgeval.constants import APIScorer
 from judgeval.data import ExampleParams
 class ContextualPrecisionScorer(APIJudgmentScorer):
     def __init__(self, threshold: float):
         super().__init__(
-            threshold=threshold,
+            threshold=threshold,
             score_type=APIScorer.CONTEXTUAL_PRECISION,
             required_params=[
                 ExampleParams.INPUT,
                 ExampleParams.ACTUAL_OUTPUT,
                 ExampleParams.RETRIEVAL_CONTEXT,
                 ExampleParams.EXPECTED_OUTPUT,
-            ]
+            ],
         )
     @property

judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py CHANGED Viewed

@@ -14,15 +14,16 @@ from judgeval.data import ExampleParams
 class ContextualRecallScorer(APIJudgmentScorer):
     def __init__(self, threshold: float):
         super().__init__(
-            threshold=threshold,
+            threshold=threshold,
             score_type=APIScorer.CONTEXTUAL_RECALL,
             required_params=[
                 ExampleParams.INPUT,
                 ExampleParams.ACTUAL_OUTPUT,
                 ExampleParams.EXPECTED_OUTPUT,
                 ExampleParams.RETRIEVAL_CONTEXT,
-            ]
+            ],
         )
     @property
     def __name__(self):
         return "Contextual Recall"

judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py CHANGED Viewed

@@ -10,20 +10,23 @@ from judgeval.scorers.api_scorer import APIJudgmentScorer
 from judgeval.constants import APIScorer
 from judgeval.data import ExampleParams
 class ContextualRelevancyScorer(APIJudgmentScorer):
     """
     Scorer that checks if the output of a model is relevant to the retrieval context
     """
     def __init__(self, threshold: float):
         super().__init__(
-            threshold=threshold,
+            threshold=threshold,
             score_type=APIScorer.CONTEXTUAL_RELEVANCY,
             required_params=[
                 ExampleParams.INPUT,
                 ExampleParams.ACTUAL_OUTPUT,
                 ExampleParams.RETRIEVAL_CONTEXT,
-            ]
+            ],
         )
     @property
     def __name__(self):
         return "Contextual Relevancy"

judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py CHANGED Viewed

@@ -9,10 +9,11 @@ TODO add link to docs page for this scorer
 from judgeval.scorers.api_scorer import APIJudgmentScorer
 from judgeval.constants import APIScorer
 class DerailmentScorer(APIJudgmentScorer):
     def __init__(self, threshold: float):
         super().__init__(
-            threshold=threshold,
+            threshold=threshold,
             score_type=APIScorer.DERAILMENT,
         )

judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py CHANGED Viewed

@@ -8,22 +8,31 @@ TODO add link to docs page for this scorer
 # Internal imports
 from judgeval.scorers.api_scorer import APIJudgmentScorer
 from judgeval.constants import APIScorer
-from typing import Optional, Dict, List
+from typing import Optional, Dict
 from judgeval.data import ExampleParams
 class ExecutionOrderScorer(APIJudgmentScorer):
     kwargs: Optional[Dict] = None
-    def __init__(self, threshold: float, should_exact_match: bool = False, should_consider_ordering: bool = False):
+    def __init__(
+        self,
+        threshold: float,
+        should_exact_match: bool = False,
+        should_consider_ordering: bool = False,
+    ):
         super().__init__(
-            threshold=threshold,
+            threshold=threshold,
             score_type=APIScorer.EXECUTION_ORDER,
             required_params=[
                 ExampleParams.ACTUAL_OUTPUT,
                 ExampleParams.EXPECTED_OUTPUT,
-            ]
+            ],
         )
-        self.kwargs = {"should_exact_match": should_exact_match, "should_consider_ordering": should_consider_ordering}
+        self.kwargs = {
+            "should_exact_match": should_exact_match,
+            "should_consider_ordering": should_consider_ordering,
+        }
     @property
     def __name__(self):
@@ -32,12 +41,12 @@ class ExecutionOrderScorer(APIJudgmentScorer):
     def to_dict(self) -> dict:
         """
         Converts the scorer configuration to a dictionary format.
         Returns:
             dict: A dictionary containing the scorer's configuration
         """
         return {
             "score_type": self.score_type,
             "threshold": self.threshold,
-            "kwargs": self.kwargs
-        }
+            "kwargs": self.kwargs,
+        }

judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py CHANGED Viewed

@@ -10,16 +10,17 @@ from judgeval.scorers.api_scorer import APIJudgmentScorer
 from judgeval.constants import APIScorer
 from judgeval.data import ExampleParams
 class FaithfulnessScorer(APIJudgmentScorer):
     def __init__(self, threshold: float):
         super().__init__(
-            threshold=threshold,
+            threshold=threshold,
             score_type=APIScorer.FAITHFULNESS,
             required_params=[
                 ExampleParams.INPUT,
                 ExampleParams.ACTUAL_OUTPUT,
                 ExampleParams.RETRIEVAL_CONTEXT,
-            ]
+            ],
         )
     @property

judgeval 0.0.44__py3-none-any.whl → 0.0.46__py3-none-any.whl

judgeval 0.0.44py3-none-any.whl → 0.0.46py3-none-any.whl