PyPI - judgeval - Versions diffs - 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl - Mend

judgeval 0.0.17py3-none-any.whl → 0.0.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

judgeval/data/scorer_data.py CHANGED Viewed

@@ -5,7 +5,7 @@ ScorerData holds the information related to a single, completed Scorer evaluatio
 """
 from typing import List, Union, Optional, Dict
-from pydantic import BaseModel, Field
+from pydantic import BaseModel
 from judgeval.scorers import JudgevalScorer

judgeval/evaluation_run.py CHANGED Viewed

@@ -111,7 +111,7 @@ class EvaluationRun(BaseModel):
         # Check if model is string or list of strings
         if isinstance(v, str):
             if v not in ACCEPTABLE_MODELS:
-                raise ValueError(f"Model name {v} not recognized.")
+                raise ValueError(f"Model name {v} not recognized. Please select a valid model name.)")
             return v
         if isinstance(v, list):
@@ -119,7 +119,7 @@ class EvaluationRun(BaseModel):
                 raise ValueError("When providing a list of models, all elements must be strings")
             for m in v:
                 if m not in ACCEPTABLE_MODELS:
-                    raise ValueError(f"Model name {m} not recognized.")
+                    raise ValueError(f"Model name {m} not recognized. Please select a valid model name.")
             return v
         raise ValueError(f"Model must be one of: string, list of strings, or JudgevalJudge instance. Received type {type(v)}.")

judgeval/judges/__init__.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from pydantic import BaseModel
 from judgeval.judges.base_judge import JudgevalJudge
 from judgeval.judges.litellm_judge import LiteLLMJudge
 from judgeval.judges.together_judge import TogetherJudge

judgeval/judges/base_judge.py CHANGED Viewed

@@ -3,7 +3,7 @@ Implements the base class for all Judgeval Judge models.
 """
 from abc import ABC, abstractmethod
-from typing import Optional, List
+from typing import Optional
 class JudgevalJudge(ABC):

judgeval/judges/mixture_of_judges.py CHANGED Viewed

@@ -5,9 +5,14 @@ Enables client to use multiple models to generate responses and then aggregate t
 """
 from judgeval import *
 import pydantic
-from typing import List, Union, Mapping, Dict
+from typing import List, Union, Mapping
 from judgeval.judges import JudgevalJudge
-from judgeval.common.utils import get_completion_multiple_models, get_chat_completion, aget_completion_multiple_models, aget_chat_completion
+from judgeval.common.utils import (
+    get_completion_multiple_models,
+    get_chat_completion,
+    aget_completion_multiple_models,
+    aget_chat_completion
+)
 from judgeval.common.logger import debug, error
 def build_dynamic_mixture_prompt(

judgeval/judgment_client.py CHANGED Viewed

@@ -6,17 +6,17 @@ from typing import Optional, List, Dict, Any, Union
 import requests
 from judgeval.constants import ROOT_API
-from judgeval.data.datasets import EvalDataset, EvalDatasetClient, GroundTruthExample
+from judgeval.data.datasets import EvalDataset, EvalDatasetClient
 from judgeval.data import (
     ScoringResult,
-    Example
+    Example,
+    GroundTruthExample
 )
 from judgeval.scorers import (
     APIJudgmentScorer,
     JudgevalScorer,
     ClassifierScorer,
     ScorerWrapper,
-    score,
 )
 from judgeval.evaluation_run import EvaluationRun
 from judgeval.run_evaluation import (
@@ -24,7 +24,11 @@ from judgeval.run_evaluation import (
     assert_test
 )
 from judgeval.judges import JudgevalJudge
-from judgeval.constants import JUDGMENT_EVAL_FETCH_API_URL, JUDGMENT_EVAL_DELETE_API_URL, JUDGMENT_EVAL_DELETE_PROJECT_API_URL
+from judgeval.constants import (
+    JUDGMENT_EVAL_FETCH_API_URL,
+    JUDGMENT_EVAL_DELETE_API_URL,
+    JUDGMENT_EVAL_DELETE_PROJECT_API_URL
+)
 from judgeval.common.exceptions import JudgmentAPIError
 from pydantic import BaseModel
 from judgeval.rules import Rule

judgeval/rules.py CHANGED Viewed

@@ -5,14 +5,12 @@ Rules system for Judgeval that enables alerts based on metric thresholds.
 from typing import Dict, List, Optional, Union, Any, Set, Tuple
 from pydantic import BaseModel, Field, field_validator, ConfigDict
 from enum import Enum
-from datetime import datetime
 import asyncio
 from concurrent.futures import ThreadPoolExecutor
 import time
-import uuid  # Add import for uuid module
+import uuid
-from judgeval.scorers import APIJudgmentScorer, JudgevalScorer
-from judgeval.scorers.judgeval_scorers import ScorerWrapper  # Import from the correct module
+from judgeval.scorers import APIJudgmentScorer, JudgevalScorer, ScorerWrapper
 class AlertStatus(str, Enum):
     """Status of an alert evaluation."""

judgeval/run_evaluation.py CHANGED Viewed

@@ -5,7 +5,6 @@ from datetime import datetime
 from rich import print as rprint
 from judgeval.data import (
-    Example,
     ScorerData,
     ScoringResult
 )
@@ -25,13 +24,11 @@ from judgeval.constants import (
 from judgeval.common.exceptions import JudgmentAPIError
 from judgeval.evaluation_run import EvaluationRun
 from judgeval.common.logger import (
-    enable_logging,
     debug,
     info,
     error,
     example_logging_context
 )
-from judgeval.rules import RulesEngine, Rule, AlertResult, AlertStatus
 def execute_api_eval(evaluation_run: EvaluationRun) -> List[Dict]:
@@ -174,8 +171,8 @@ def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_k
         )
         if response.status_code == 409:
-            error(f"Evaluation run name '{eval_name}' already exists for this project")
-            raise ValueError(f"Evaluation run name '{eval_name}' already exists for this project")
+            error(f"Eval run name '{eval_name}' already exists for this project. Please choose a different name or set the `override` flag to true.")
+            raise ValueError(f"Eval run name '{eval_name}' already exists for this project. Please choose a different name or set the `override` flag to true.")
         if not response.ok:
             response_data = response.json()

judgeval/scorers/__init__.py CHANGED Viewed

@@ -14,6 +14,9 @@ from judgeval.scorers.judgeval_scorers import (
     ScorerWrapper,
     AnswerCorrectnessScorer,
     Text2SQLScorer,
+    ComparisonScorer,
+    InstructionAdherenceScorer,
+    GroundednessScorer,
 )
 __all__ = [
@@ -33,4 +36,7 @@ __all__ = [
     "ScorerWrapper",
     "AnswerCorrectnessScorer",
     "Text2SQLScorer",
+    "ComparisonScorer",
+    "InstructionAdherenceScorer",
+    "GroundednessScorer",
 ]

judgeval/scorers/api_scorer.py CHANGED Viewed

@@ -7,7 +7,7 @@ Scores `Example`s using ready-made Judgment evaluators.
 from pydantic import BaseModel, field_validator
 from judgeval.common.logger import debug, info, warning, error
-from judgeval.constants import APIScorer
+from judgeval.constants import APIScorer, UNBOUNDED_SCORERS
 class APIJudgmentScorer(BaseModel):
@@ -18,17 +18,23 @@ class APIJudgmentScorer(BaseModel):
         score_type (APIScorer): The Judgment metric to use for scoring `Example`s
         threshold (float): A value between 0 and 1 that determines the scoring threshold
     """
-    threshold: float
     score_type: APIScorer
+    threshold: float
     @field_validator('threshold')
-    def validate_threshold(cls, v):
+    def validate_threshold(cls, v, info):
         """
         Validates that the threshold is between 0 and 1 inclusive.
         """
-        if not 0 <= v <= 1:
-            error(f"Threshold must be between 0 and 1, got: {v}")
-            raise ValueError(f"Threshold must be between 0 and 1, got: {v}")
+        score_type = info.data.get('score_type')
+        if score_type in UNBOUNDED_SCORERS:
+            if v < 0:
+                error(f"Threshold for {score_type} must be greater than 0, got: {v}")
+                raise ValueError(f"Threshold for {score_type} must be greater than 0, got: {v}")
+        else:
+            if not 0 <= v <= 1:
+                error(f"Threshold for {score_type} must be between 0 and 1, got: {v}")
+                raise ValueError(f"Threshold for {score_type} must be between 0 and 1, got: {v}")
         return v
     @field_validator('score_type')

judgeval/scorers/base_scorer.py CHANGED Viewed

@@ -7,7 +7,7 @@ Scores `Example`s using ready-made Judgment evaluators.
 from pydantic import BaseModel, field_validator
 from judgeval.common.logger import debug, info, warning, error
-from judgeval.constants import APIScorer
+from judgeval.constants import APIScorer, UNBOUNDED_SCORERS
 class APIJudgmentScorer(BaseModel):
@@ -18,17 +18,23 @@ class APIJudgmentScorer(BaseModel):
         score_type (APIScorer): The Judgment metric to use for scoring `Example`s
         threshold (float): A value between 0 and 1 that determines the scoring threshold
     """
-    threshold: float
     score_type: APIScorer
+    threshold: float
     @field_validator('threshold')
-    def validate_threshold(cls, v):
+    def validate_threshold(cls, v, info):
         """
         Validates that the threshold is between 0 and 1 inclusive.
         """
-        if not 0 <= v <= 1:
-            error(f"Threshold must be between 0 and 1, got: {v}")
-            raise ValueError(f"Threshold must be between 0 and 1, got: {v}")
+        score_type = info.data.get('score_type')
+        if score_type in UNBOUNDED_SCORERS:
+            if v < 0:
+                error(f"Threshold for {score_type} must be greater than 0, got: {v}")
+                raise ValueError(f"Threshold for {score_type} must be greater than 0, got: {v}")
+        else:
+            if not 0 <= v <= 1:
+                error(f"Threshold for {score_type} must be between 0 and 1, got: {v}")
+                raise ValueError(f"Threshold for {score_type} must be between 0 and 1, got: {v}")
         return v
     @field_validator('score_type')

judgeval/scorers/judgeval_scorer.py CHANGED Viewed

@@ -11,7 +11,7 @@ from abc import abstractmethod
 from judgeval.common.logger import debug, info, warning, error
 from judgeval.judges import JudgevalJudge
 from judgeval.judges.utils import create_judge
+from judgeval.constants import UNBOUNDED_SCORERS
 class JudgevalScorer:
     """
@@ -58,8 +58,12 @@ class JudgevalScorer:
         additional_metadata: Optional[Dict] = None
         ):
             debug(f"Initializing JudgevalScorer with score_type={score_type}, threshold={threshold}")
-            if not 0 <= threshold <= 1:
-                raise ValueError("Threshold must be between 0 and 1")
+            if score_type in UNBOUNDED_SCORERS:
+                if threshold < 0:
+                    raise ValueError(f"Threshold for {score_type} must be greater than 0, got: {threshold}")
+            else:
+                if not 0 <= threshold <= 1:
+                    raise ValueError(f"Threshold for {score_type} must be between 0 and 1, got: {threshold}")
             if strict_mode:
                 warning("Strict mode enabled - scoring will be more rigorous")
             info(f"JudgevalScorer initialized with evaluation_model: {evaluation_model}")

judgeval/scorers/judgeval_scorers/__init__.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from typing import Type, Optional, Any
-from functools import wraps
 # Import implementations
 from judgeval.scorers.judgeval_scorers.api_scorers import (
@@ -12,7 +11,10 @@ from judgeval.scorers.judgeval_scorers.api_scorers import (
     ContextualPrecisionScorer as APIContextualPrecisionScorer,
     ContextualRecallScorer as APIContextualRecallScorer,
     AnswerRelevancyScorer as APIAnswerRelevancyScorer,
-    AnswerCorrectnessScorer as APIAnswerCorrectnessScorer,
+    AnswerCorrectnessScorer as APIAnswerCorrectnessScorer,
+    ComparisonScorer as APIComparisonScorer,
+    InstructionAdherenceScorer as APIInstructionAdherenceScorer,
+    GroundednessScorer as APIGroundednessScorer,
 )
 from judgeval.scorers.judgeval_scorers.local_implementations import (
@@ -25,7 +27,9 @@ from judgeval.scorers.judgeval_scorers.local_implementations import (
     ToolCorrectnessScorer as LocalToolCorrectnessScorer,
     HallucinationScorer as LocalHallucinationScorer,
     SummarizationScorer as LocalSummarizationScorer,
-    AnswerCorrectnessScorer as LocalAnswerCorrectnessScorer
+    AnswerCorrectnessScorer as LocalAnswerCorrectnessScorer,
+    ComparisonScorer as LocalComparisonScorer,
+    InstructionAdherenceScorer as LocalInstructionAdherenceScorer,
 )
 from judgeval.scorers.judgeval_scorers.classifiers import Text2SQLScorer
@@ -134,6 +138,21 @@ ContextualRecallScorer = ScorerWrapper(
     local_implementation=LocalContextualRecallScorer
 )
+InstructionAdherenceScorer = ScorerWrapper(
+    api_implementation=APIInstructionAdherenceScorer,
+    local_implementation=LocalInstructionAdherenceScorer
+)
+def ComparisonScorer(threshold: float, criteria: str, description: str):
+    return ScorerWrapper(
+        api_implementation=APIComparisonScorer,
+        local_implementation=LocalComparisonScorer
+    )(threshold=threshold, criteria=criteria, description=description)
+GroundednessScorer = ScorerWrapper(
+    api_implementation=APIGroundednessScorer,
+)
 __all__ = [
     "ToolCorrectnessScorer",
     "JSONCorrectnessScorer",
@@ -145,4 +164,6 @@ __all__ = [
     "ContextualRecallScorer",
     "AnswerRelevancyScorer",
     "Text2SQLScorer",
+    "ComparisonScorer",
+    "GroundednessScorer",
 ]

judgeval/scorers/judgeval_scorers/api_scorers/__init__.py CHANGED Viewed

@@ -8,6 +8,9 @@ from judgeval.scorers.judgeval_scorers.api_scorers.contextual_precision import C
 from judgeval.scorers.judgeval_scorers.api_scorers.contextual_recall import ContextualRecallScorer
 from judgeval.scorers.judgeval_scorers.api_scorers.answer_relevancy import AnswerRelevancyScorer
 from judgeval.scorers.judgeval_scorers.api_scorers.answer_correctness import AnswerCorrectnessScorer
+from judgeval.scorers.judgeval_scorers.api_scorers.comparison import ComparisonScorer
+from judgeval.scorers.judgeval_scorers.api_scorers.instruction_adherence import InstructionAdherenceScorer
+from judgeval.scorers.judgeval_scorers.api_scorers.groundedness import GroundednessScorer
 __all__ = [
     "ToolCorrectnessScorer",
@@ -20,4 +23,7 @@ __all__ = [
     "ContextualRecallScorer",
     "AnswerRelevancyScorer",
     "AnswerCorrectnessScorer",
+    "ComparisonScorer",
+    "InstructionAdherenceScorer",
+    "GroundednessScorer",
 ]

judgeval/scorers/judgeval_scorers/api_scorers/comparison.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""
+`judgeval` comparison scorer
+TODO add link to docs page for this scorer
+"""
+# Internal imports
+from judgeval.scorers.api_scorer import APIJudgmentScorer
+from judgeval.constants import APIScorer
+from typing import Optional, Dict
+class ComparisonScorer(APIJudgmentScorer):
+    kwargs: Optional[Dict] = None
+    def __init__(self, threshold: float, criteria: str, description: str):
+        super().__init__(threshold=threshold, score_type=APIScorer.COMPARISON)
+        self.kwargs = {"criteria": criteria, "description": description}
+    @property
+    def __name__(self):
+        return f"Comparison-{self.kwargs['criteria']}"
+    def to_dict(self) -> dict:
+        """
+        Converts the scorer configuration to a dictionary format.
+        Returns:
+            dict: A dictionary containing the scorer's configuration
+        """
+        return {
+            "score_type": self.score_type,
+            "threshold": self.threshold,
+            "kwargs": self.kwargs
+        }

judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py ADDED Viewed

@@ -0,0 +1,19 @@
+"""
+`judgeval` Groundedness scorer
+TODO add link to docs page for this scorer
+"""
+# Internal imports
+from judgeval.scorers.api_scorer import APIJudgmentScorer
+from judgeval.constants import APIScorer
+class GroundednessScorer(APIJudgmentScorer):
+    def __init__(self, threshold: float):
+        super().__init__(threshold=threshold, score_type=APIScorer.GROUNDEDNESS)
+    @property
+    def __name__(self):
+        return "Groundedness"

judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py ADDED Viewed

@@ -0,0 +1,19 @@
+"""
+`judgeval` instruction adherence scorer
+TODO add link to docs page for this scorer
+"""
+# Internal imports
+from judgeval.scorers.api_scorer import APIJudgmentScorer
+from judgeval.constants import APIScorer
+class InstructionAdherenceScorer(APIJudgmentScorer):
+    def __init__(self, threshold: float):
+        super().__init__(threshold=threshold, score_type=APIScorer.INSTRUCTION_ADHERENCE)
+    @property
+    def __name__(self):
+        return "Instruction Adherence"

judgeval/scorers/judgeval_scorers/local_implementations/__init__.py CHANGED Viewed

@@ -8,11 +8,13 @@ from judgeval.scorers.judgeval_scorers.local_implementations.tool_correctness.to
 from judgeval.scorers.judgeval_scorers.local_implementations.hallucination.hallucination_scorer import HallucinationScorer
 from judgeval.scorers.judgeval_scorers.local_implementations.summarization.summarization_scorer import SummarizationScorer
 from judgeval.scorers.judgeval_scorers.local_implementations.answer_correctness.answer_correctness_scorer import AnswerCorrectnessScorer
+from judgeval.scorers.judgeval_scorers.local_implementations.comparison.comparison_scorer import ComparisonScorer
+from judgeval.scorers.judgeval_scorers.local_implementations.instruction_adherence.instruction_adherence import InstructionAdherenceScorer
 __all__ = [
     "AnswerCorrectnessScorer",
     "AnswerRelevancyScorer",
+    "ComparisonScorer",
     "ContextualPrecisionScorer",
     "ContextualRecallScorer",
     "ContextualRelevancyScorer",
@@ -21,4 +23,5 @@ __all__ = [
     "ToolCorrectnessScorer",
     "HallucinationScorer",
     "SummarizationScorer",
+    "InstructionAdherenceScorer",
 ]

judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from typing import Optional, List, Union, Tuple
-from pydantic import BaseModel
 from judgeval.constants import APIScorer
 from judgeval.judges import JudgevalJudge

judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py CHANGED Viewed

@@ -2,8 +2,8 @@
 Util prompts for AnswerCorrectnessScorer
 """
-from typing import List, Optional, Tuple
-from pydantic import BaseModel, Field
+from typing import List, Tuple
+from pydantic import BaseModel
 # BaseModels to enforce formatting in LLM JSON response

judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py CHANGED Viewed

@@ -1,12 +1,13 @@
 from typing import Optional, List, Union, Tuple
 from judgeval.constants import APIScorer
-from judgeval.scorers.utils import (get_or_create_event_loop,
-                                    scorer_progress_meter,
-                                    create_verbose_logs,
-                                    parse_response_json,
-                                    check_example_params
-                                    )
+from judgeval.scorers.utils import (
+    get_or_create_event_loop,
+    scorer_progress_meter,
+    create_verbose_logs,
+    parse_response_json,
+    check_example_params
+)
 from judgeval.scorers import JudgevalScorer
 from judgeval.judges import JudgevalJudge
 from judgeval.judges.utils import create_judge

judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/prompts.py CHANGED Viewed

@@ -2,8 +2,8 @@
 Util prompts for AnswerRelevancyScorer
 """
-from typing import List, Optional, Tuple
-from pydantic import BaseModel, Field
+from typing import List, Tuple
+from pydantic import BaseModel
 # BaseModels to enforce formatting in LLM JSON response

judgeval/scorers/judgeval_scorers/local_implementations/comparison/__init__.py ADDED Viewed

File without changes

judgeval/scorers/judgeval_scorers/local_implementations/comparison/comparison_scorer.py ADDED Viewed

@@ -0,0 +1,161 @@
+from typing import Optional, Union, List
+from pydantic import BaseModel
+from judgeval.constants import APIScorer
+from judgeval.scorers import JudgevalScorer
+from judgeval.judges import JudgevalJudge
+from judgeval.judges.utils import create_judge
+from judgeval.data import Example, ExampleParams
+from judgeval.scorers.utils import (
+    get_or_create_event_loop,
+    scorer_progress_meter,
+    create_verbose_logs,
+    parse_response_json,
+    check_example_params
+)
+from .prompts import ComparisonTemplate
+required_params = [
+    ExampleParams.INPUT,
+    ExampleParams.ACTUAL_OUTPUT,
+    ExampleParams.EXPECTED_OUTPUT,
+]
+class ComparisonDifference(BaseModel):
+    actual_output_sentence: str
+    expected_output_sentence: str
+    reason: str
+class ComparisonDifferences(BaseModel):
+    differences: List[ComparisonDifference]
+class ComparisonScorer(JudgevalScorer):
+    def __init__(
+        self,
+        criteria: str,
+        description: str,
+        threshold: float = 1,
+        model: Optional[Union[str, JudgevalJudge]] = None,
+        include_reason: bool = True,
+        async_mode: bool = True,
+        verbose_mode: bool = False,
+    ):
+        super().__init__(
+            score_type=APIScorer.COMPARISON,
+            threshold=threshold,
+            evaluation_model=None,
+            include_reason=include_reason,
+            async_mode=async_mode,
+            verbose_mode=verbose_mode
+        )
+        self.model, self.using_native_model = create_judge(model)
+        self.evaluation_model = self.model.get_model_name()
+        self.criteria = criteria
+        self.description = description
+    def score_example(
+        self,
+        example: Example,
+        _show_indicator: bool = True,
+    ) -> float:
+        check_example_params(example, required_params, self)
+        with scorer_progress_meter(self, display_meter=_show_indicator):
+            if self.async_mode:
+                loop = get_or_create_event_loop()
+                loop.run_until_complete(
+                    self.a_score_example(
+                        example,
+                        _show_indicator=False
+                    )
+                )
+            else:
+                self.differences = self._find_differences(example)
+                self.score = len(self.differences)
+                self.reason = str(self.differences)
+                self.success = self.score <= self.threshold
+                self.verbose_logs = create_verbose_logs(
+                    self,
+                    steps=[
+                        f"Score: {self.score}\nReason: {self.reason}",
+                    ],
+                )
+                return len(self.differences)
+    async def a_score_example(
+        self,
+        example: Example,
+        _show_indicator: bool = True
+    ) -> float:
+        check_example_params(example, required_params, self)
+        with scorer_progress_meter(
+            self, async_mode=True, display_meter=_show_indicator
+        ):
+            self.differences = self.a_find_differences(example)
+            self.score = len(self.differences)
+            self.reason = str(self.differences)
+            self.success = self.score <= self.threshold
+            self.verbose_logs = create_verbose_logs(
+                self,
+                steps=[
+                    f"Score: {self.score}\nReason: {self.reason}",
+                ],
+            )
+            return self.score
+    def _find_differences(self, example: Example) -> float:
+        prompt = ComparisonTemplate.find_differences(
+            criteria=self.criteria,
+            description=self.description,
+            actual_output=example.actual_output,
+            expected_output=example.expected_output
+        )
+        if self.using_native_model:
+            res = self.model.generate(prompt)
+            data = parse_response_json(res, self)
+            return data["differences"]
+        else:
+            try:
+                res: ComparisonDifferences = self.model.generate(prompt, schema=ComparisonDifferences)
+                return res.differences
+            except TypeError:
+                res = self.model.generate(prompt)
+                data = parse_response_json(res, self)
+                return data["differences"]
+    async def a_find_differences(self, example: Example) -> float:
+        prompt = ComparisonTemplate.find_differences(
+            criteria=self.criteria,
+            description=self.description,
+            actual_output=example.actual_output,
+            expected_output=example.expected_output
+        )
+        if self.using_native_model:
+            res = await self.model.a_generate(prompt)
+            data = parse_response_json(res, self)
+            return data["differences"]
+        else:
+            try:
+                res: ComparisonDifferences = await self.model.a_generate(prompt, schema=ComparisonDifferences)
+                return res.differences
+            except TypeError:
+                res = await self.model.a_generate(prompt)
+                data = parse_response_json(res, self)
+                return data["differences"]
+    def _success_check(self) -> bool:
+        if self.error is not None:
+            self.success = False
+        else:
+            try:
+                self.success = self.score <= self.threshold
+            except:
+                self.success = False
+        return self.success
+    @property
+    def __name__(self):
+        return f"Comparison - {self.criteria}"

judgeval 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl

judgeval 0.0.17py3-none-any.whl → 0.0.19py3-none-any.whl