PyPI - judgeval - Versions diffs - 0.0.2__py3-none-any.whl → 0.0.4__py3-none-any.whl - Mend

judgeval 0.0.2py3-none-any.whl → 0.0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

judgeval/run_evaluation.py CHANGED Viewed

@@ -10,8 +10,8 @@ from judgeval.data import (
     ScoringResult
 )
 from judgeval.scorers import (
-    CustomScorer,
-    JudgmentScorer,
+    JudgevalScorer,
+    APIJudgmentScorer,
     ClassifierScorer
 )
 from judgeval.scorers.score import a_execute_scoring
@@ -64,7 +64,7 @@ def execute_api_eval(evaluation_run: EvaluationRun) -> List[Dict]:
 def merge_results(api_results: List[ScoringResult], local_results: List[ScoringResult]) -> List[ScoringResult]:
     """
-    When executing scorers that come from both the Judgment API and custom scorers, we're left with
+    When executing scorers that come from both the Judgment API and local scorers, we're left with
     results for each type of scorer. This function merges the results from the API and local evaluations,
     grouped by example. In particular, we merge the `scorers_data` field of each `ScoringResult` object.
@@ -127,6 +127,7 @@ def check_missing_scorer_data(results: List[ScoringResult]) -> List[ScoringResul
             )
     return results
 def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_key: str) -> None:
     """
     Checks if an evaluation run name already exists for a given project.
@@ -164,6 +165,7 @@ def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_k
         error(f"Failed to check if eval run name exists: {str(e)}")
         raise JudgmentAPIError(f"Failed to check if eval run name exists: {str(e)}")
 def log_evaluation_results(merged_results: List[ScoringResult], evaluation_run: EvaluationRun) -> None:
     """
     Logs evaluation results to the Judgment API database.
@@ -203,6 +205,7 @@ def log_evaluation_results(merged_results: List[ScoringResult], evaluation_run:
         error(f"Failed to save evaluation results to DB: {str(e)}")
         raise ValueError(f"Failed to save evaluation results to DB: {str(e)}")
 def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[ScoringResult]:
     """
     Executes an evaluation of `Example`s using one or more `Scorer`s
@@ -214,7 +217,7 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
             project_name (str): The name of the project the evaluation results belong to
             eval_name (str): The name of the evaluation run
             examples (List[Example]): The examples to evaluate
-            scorers (List[Union[JudgmentScorer, CustomScorer]]): A list of scorers to use for evaluation
+            scorers (List[Union[JudgmentScorer, JudgevalScorer]]): A list of scorers to use for evaluation
             model (str): The model used as a judge when using LLM as a Judge
             aggregator (Optional[str]): The aggregator to use for evaluation if using Mixture of Judges
             metadata (Optional[Dict[str, Any]]): Additional metadata to include for this evaluation run, e.g. comments, dataset name, purpose, etc.
@@ -254,19 +257,19 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
     debug(f"Starting evaluation run with {len(evaluation_run.examples)} examples")
-    # Group JudgmentScorers and CustomScorers, then evaluate them in parallel
+    # Group APIJudgmentScorers and JudgevalScorers, then evaluate them in parallel
     debug("Grouping scorers by type")
-    judgment_scorers: List[JudgmentScorer] = []
-    custom_scorers: List[CustomScorer] = []
+    judgment_scorers: List[APIJudgmentScorer] = []
+    local_scorers: List[JudgevalScorer] = []
     for scorer in evaluation_run.scorers:
-        if isinstance(scorer, (JudgmentScorer, ClassifierScorer)):
+        if isinstance(scorer, (APIJudgmentScorer, ClassifierScorer)):
             judgment_scorers.append(scorer)
             debug(f"Added judgment scorer: {type(scorer).__name__}")
         else:
-            custom_scorers.append(scorer)
-            debug(f"Added custom scorer: {type(scorer).__name__}")
+            local_scorers.append(scorer)
+            debug(f"Added local scorer: {type(scorer).__name__}")
-    debug(f"Found {len(judgment_scorers)} judgment scorers and {len(custom_scorers)} custom scorers")
+    debug(f"Found {len(judgment_scorers)} judgment scorers and {len(local_scorers)} local scorers")
     api_results: List[ScoringResult] = []
     local_results: List[ScoringResult] = []
@@ -288,7 +291,7 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
                 log_results=evaluation_run.log_results
             )
             debug("Sending request to Judgment API")
-            response_data: List[Dict] = execute_api_eval(api_evaluation_run)  # ScoringResults
+            response_data: List[Dict] = execute_api_eval(api_evaluation_run)  # Dicts are `ScoringResult` objs
             info(f"Received {len(response_data['results'])} results from API")
         except JudgmentAPIError as e:
             error(f"An error occurred while executing the Judgment API request: {str(e)}")
@@ -317,7 +320,7 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
                 api_results.append(ScoringResult(**filtered_result))
     # Run local evals
-    if custom_scorers:  # List[CustomScorer]
+    if local_scorers:  # List[JudgevalScorer]
         info("Starting local evaluation")
         for example in evaluation_run.examples:
             with example_logging_context(example.timestamp, example.example_id):
@@ -326,7 +329,7 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
         results: List[ScoringResult] = asyncio.run(
             a_execute_scoring(
                 evaluation_run.examples,
-                custom_scorers,
+                local_scorers,
                 model=evaluation_run.model,
                 ignore_errors=True,
                 skip_on_missing_params=True,
@@ -353,3 +356,65 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
         if not result.scorers_data:  # none of the scorers could be executed on this example
             info(f"None of the scorers could be executed on example {i}. This is usually because the Example is missing the fields needed by the scorers. Try checking that the Example has the necessary fields for your scorers.")
     return merged_results
+def assert_test(scoring_results: List[ScoringResult]) -> None:
+    """
+    Collects all failed scorers from the scoring results.
+    Args:
+        ScoringResults (List[ScoringResult]): List of scoring results to check
+    Returns:
+        None. Raises exceptions for any failed test cases.
+    """
+    failed_cases: List[ScorerData] = []
+    for result in scoring_results:
+        if not result.success:
+            # Create a test case context with all relevant fields
+            test_case = {
+                'input': result.input,
+                'actual_output': result.actual_output,
+                'expected_output': result.expected_output,
+                'context': result.context,
+                'retrieval_context': result.retrieval_context,
+                'eval_run_name': result.eval_run_name,
+                'failed_scorers': []
+            }
+            if result.scorers_data:
+                # If the result was not successful, check each scorer_data
+                for scorer_data in result.scorers_data:
+                    if not scorer_data.success:
+                        test_case['failed_scorers'].append(scorer_data)
+            failed_cases.append(test_case)
+    if failed_cases:
+        error_msg = f"The following test cases failed: \n"
+        for fail_case in failed_cases:
+            error_msg += f"\nInput: {fail_case['input']}\n"
+            error_msg += f"Actual Output: {fail_case['actual_output']}\n"
+            error_msg += f"Expected Output: {fail_case['expected_output']}\n"
+            error_msg += f"Context: {fail_case['context']}\n"
+            error_msg += f"Retrieval Context: {fail_case['retrieval_context']}\n"
+            error_msg += f"Eval Run Name: {fail_case['eval_run_name']}\n"
+            for fail_scorer in fail_case['failed_scorers']:
+                error_msg += (
+                    f"\nScorer Name: {fail_scorer.name}\n"
+                    f"Threshold: {fail_scorer.threshold}\n"
+                    f"Success: {fail_scorer.success}\n"
+                    f"Score: {fail_scorer.score}\n"
+                    f"Reason: {fail_scorer.reason}\n"
+                    f"Strict Mode: {fail_scorer.strict_mode}\n"
+                    f"Evaluation Model: {fail_scorer.evaluation_model}\n"
+                    f"Error: {fail_scorer.error}\n"
+                    f"Evaluation Cost: {fail_scorer.evaluation_cost}\n"
+                    f"Verbose Logs: {fail_scorer.verbose_logs}\n"
+                    f"Additional Metadata: {fail_scorer.additional_metadata}\n"
+                )
+            error_msg += "-"*100
+        raise AssertionError(error_msg)

judgeval/scorers/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from judgeval.scorers.base_scorer import JudgmentScorer
-from judgeval.scorers.custom_scorer import CustomScorer
+from judgeval.scorers.api_scorer import APIJudgmentScorer
+from judgeval.scorers.judgeval_scorer import JudgevalScorer
 from judgeval.scorers.prompt_scorer import PromptScorer, ClassifierScorer
 from judgeval.scorers.judgeval_scorers import (
     ToolCorrectnessScorer,
@@ -11,11 +11,13 @@ from judgeval.scorers.judgeval_scorers import (
     ContextualPrecisionScorer,
     ContextualRecallScorer,
     AnswerRelevancyScorer,
+    ScorerWrapper,
+    AnswerCorrectnessScorer,
 )
 __all__ = [
-    "JudgmentScorer",
-    "CustomScorer",
+    "APIJudgmentScorer",
+    "JudgevalScorer",
     "PromptScorer",
     "ClassifierScorer",
     "ToolCorrectnessScorer",
@@ -27,4 +29,6 @@ __all__ = [
     "ContextualPrecisionScorer",
     "ContextualRecallScorer",
     "AnswerRelevancyScorer",
+    "ScorerWrapper",
+    "AnswerCorrectnessScorer",
 ]

judgeval/scorers/api_scorer.py ADDED Viewed

@@ -0,0 +1,64 @@
+"""
+Judgment Scorer class.
+Scores `Example`s using ready-made Judgment evaluators.
+"""
+from pydantic import BaseModel, field_validator
+from judgeval.common.logger import debug, info, warning, error
+from judgeval.constants import APIScorer
+class APIJudgmentScorer(BaseModel):
+    """
+    Class for ready-made, "out-of-the-box" scorer that uses Judgment evaluators to score `Example`s.
+    Args:
+        score_type (APIScorer): The Judgment metric to use for scoring `Example`s
+        threshold (float): A value between 0 and 1 that determines the scoring threshold
+    """
+    threshold: float
+    score_type: APIScorer
+    @field_validator('threshold')
+    def validate_threshold(cls, v):
+        """
+        Validates that the threshold is between 0 and 1 inclusive.
+        """
+        if not 0 <= v <= 1:
+            error(f"Threshold must be between 0 and 1, got: {v}")
+            raise ValueError(f"Threshold must be between 0 and 1, got: {v}")
+        return v
+    @field_validator('score_type')
+    def convert_to_enum_value(cls, v):
+        """
+        Validates that the `score_type` is a valid `JudgmentMetric` enum value.
+        Converts string values to `JudgmentMetric` enum values.
+        """
+        debug(f"Attempting to convert score_type value: {v}")
+        if isinstance(v, APIScorer):
+            info(f"Using existing JudgmentMetric: {v.value}")
+            return v.value
+        elif isinstance(v, str):
+            debug(f"Converting string value to JudgmentMetric enum: {v}")
+            return APIScorer[v.upper()].value
+        error(f"Invalid score_type value: {v}")
+        raise ValueError(f"Invalid value for score_type: {v}")
+    def __str__(self):
+        return f"JudgmentScorer(score_type={self.score_type}, threshold={self.threshold})"
+    def to_dict(self) -> dict:
+        """
+        Converts the scorer configuration to a dictionary format.
+        Returns:
+            dict: A dictionary containing the scorer's configuration
+        """
+        return {
+            "score_type": self.score_type,
+            "threshold": self.threshold
+        }

judgeval/scorers/base_scorer.py CHANGED Viewed

@@ -10,7 +10,7 @@ from judgeval.common.logger import debug, info, warning, error
 from judgeval.constants import APIScorer
-class JudgmentScorer(BaseModel):
+class APIJudgmentScorer(BaseModel):
     """
     Class for ready-made, "out-of-the-box" scorer that uses Judgment evaluators to score `Example`s.
@@ -48,4 +48,5 @@ class JudgmentScorer(BaseModel):
         raise ValueError(f"Invalid value for score_type: {v}")
     def __str__(self):
-        return f"JudgmentScorer(score_type={self.score_type}, threshold={self.threshold})"
+        return f"JudgmentScorer(score_type={self.score_type}, threshold={self.threshold})"

judgeval/scorers/exceptions.py ADDED Viewed

@@ -0,0 +1,11 @@
+"""
+Error handling for scorers
+"""
+class MissingExampleParamsError(Exception):
+    """
+    Error raised when a scorer is missing required example parameters.
+    """
+    pass

judgeval/scorers/{custom_scorer.py → judgeval_scorer.py} RENAMED Viewed

@@ -9,15 +9,19 @@ from typing import Optional, Dict, Union, List
 from abc import abstractmethod
 from judgeval.common.logger import debug, info, warning, error
-from judgeval.judges import judgevalJudge
+from judgeval.judges import JudgevalJudge
 from judgeval.judges.utils import create_judge
-class CustomScorer:
+class JudgevalScorer:
     """
+    Base class for scorers in `judgeval`.
+    In practice, you should not implement this class unless you are creating a custom scorer.
+    Judgeval offers 10+ default scorers that you can use out of the box.
     If you want to create a scorer that does not fall under any of the ready-made Judgment scorers,
-    you can create a custom scorer by extending this class. This is best used for special use cases
-    where none of Judgment's scorers are suitable.
+    you can create a custom scorer by extending this class.
     """
     score_type: str  # name of your new scorer
     threshold: float  # The threshold to pass a test while using this scorer as a scorer
@@ -73,7 +77,7 @@ class CustomScorer:
             self.verbose_logs = verbose_logs
             self.additional_metadata = additional_metadata
-    def _add_model(self, model: Optional[Union[str, List[str], judgevalJudge]] = None):
+    def _add_model(self, model: Optional[Union[str, List[str], JudgevalJudge]] = None):
         """
         Adds the evaluation model to the CustomScorer instance

judgeval/scorers/judgeval_scorers/__init__.py CHANGED Viewed

@@ -1,12 +1,135 @@
-from judgeval.scorers.judgeval_scorers.tool_correctness import ToolCorrectnessScorer
-from judgeval.scorers.judgeval_scorers.json_correctness import JSONCorrectnessScorer
-from judgeval.scorers.judgeval_scorers.summarization import SummarizationScorer
-from judgeval.scorers.judgeval_scorers.hallucination import HallucinationScorer
-from judgeval.scorers.judgeval_scorers.faithfulness import FaithfulnessScorer
-from judgeval.scorers.judgeval_scorers.contextual_relevancy import ContextualRelevancyScorer
-from judgeval.scorers.judgeval_scorers.contextual_precision import ContextualPrecisionScorer
-from judgeval.scorers.judgeval_scorers.contextual_recall import ContextualRecallScorer
-from judgeval.scorers.judgeval_scorers.answer_relevancy import AnswerRelevancyScorer
+from typing import Type, Optional, Any
+from functools import wraps
+# Import implementations
+from judgeval.scorers.judgeval_scorers.api_scorers import (
+    ToolCorrectnessScorer as APIToolCorrectnessScorer,
+    JSONCorrectnessScorer as APIJSONCorrectnessScorer,
+    SummarizationScorer as APISummarizationScorer,
+    HallucinationScorer as APIHallucinationScorer,
+    FaithfulnessScorer as APIFaithfulnessScorer,
+    ContextualRelevancyScorer as APIContextualRelevancyScorer,
+    ContextualPrecisionScorer as APIContextualPrecisionScorer,
+    ContextualRecallScorer as APIContextualRecallScorer,
+    AnswerRelevancyScorer as APIAnswerRelevancyScorer,
+    AnswerCorrectnessScorer as APIAnswerCorrectnessScorer,
+)
+from judgeval.scorers.judgeval_scorers.local_implementations import (
+    AnswerRelevancyScorer as LocalAnswerRelevancyScorer,
+    ContextualPrecisionScorer as LocalContextualPrecisionScorer,
+    ContextualRecallScorer as LocalContextualRecallScorer,
+    ContextualRelevancyScorer as LocalContextualRelevancyScorer,
+    FaithfulnessScorer as LocalFaithfulnessScorer,
+    JsonCorrectnessScorer as LocalJsonCorrectnessScorer,
+    ToolCorrectnessScorer as LocalToolCorrectnessScorer,
+    HallucinationScorer as LocalHallucinationScorer,
+    SummarizationScorer as LocalSummarizationScorer,
+    AnswerCorrectnessScorer as LocalAnswerCorrectnessScorer
+)
+class ScorerWrapper:
+    """
+    Wrapper class that can dynamically load either API or local implementation of a scorer.
+    """
+    def __init__(self, api_implementation: Type, local_implementation: Optional[Type] = None):
+        self.api_implementation = api_implementation
+        self.local_implementation = local_implementation
+        self._instance = None
+        self._init_args = None
+        self._init_kwargs = None
+    def __call__(self, *args, **kwargs):
+        """Store initialization arguments for later use when implementation is loaded"""
+        self._init_args = args
+        self._init_kwargs = kwargs
+        return self
+    def load_implementation(self, use_judgment: bool = True) -> Any:
+        """
+        Load the appropriate implementation based on the use_judgment flag.
+        Args:
+            use_judgment (bool): If True, use API implementation. If False, use local implementation.
+        Returns:
+            Instance of the appropriate implementation
+        Raises:
+            ValueError: If local implementation is requested but not available
+        """
+        if self._instance is not None:
+            return self._instance
+        if use_judgment:
+            implementation = self.api_implementation
+        else:
+            if self.local_implementation is None:
+                raise ValueError("No local implementation available for this scorer")
+            implementation = self.local_implementation
+        args = self._init_args or ()
+        kwargs = self._init_kwargs or {}
+        self._instance = implementation(*args, **kwargs)
+        return self._instance
+    def __getattr__(self, name):
+        """Defer all attribute access to the loaded implementation"""
+        if self._instance is None:
+            raise RuntimeError("Implementation not loaded. Call load_implementation() first")
+        return getattr(self._instance, name)
+# Create wrapped versions of all scorers
+AnswerCorrectnessScorer = ScorerWrapper(
+    api_implementation=APIAnswerCorrectnessScorer,
+    local_implementation=LocalAnswerCorrectnessScorer
+)
+AnswerRelevancyScorer = ScorerWrapper(
+    api_implementation=APIAnswerRelevancyScorer,
+    local_implementation=LocalAnswerRelevancyScorer
+)
+ToolCorrectnessScorer = ScorerWrapper(
+    api_implementation=APIToolCorrectnessScorer,
+    local_implementation=LocalToolCorrectnessScorer
+)
+JSONCorrectnessScorer = ScorerWrapper(
+    api_implementation=APIJSONCorrectnessScorer,
+    local_implementation=LocalJsonCorrectnessScorer
+)
+SummarizationScorer = ScorerWrapper(
+    api_implementation=APISummarizationScorer,
+    local_implementation=LocalSummarizationScorer
+)
+HallucinationScorer = ScorerWrapper(
+    api_implementation=APIHallucinationScorer,
+    local_implementation=LocalHallucinationScorer
+)
+FaithfulnessScorer = ScorerWrapper(
+    api_implementation=APIFaithfulnessScorer,
+    local_implementation=LocalFaithfulnessScorer
+)
+ContextualRelevancyScorer = ScorerWrapper(
+    api_implementation=APIContextualRelevancyScorer,
+    local_implementation=LocalContextualRelevancyScorer
+)
+ContextualPrecisionScorer = ScorerWrapper(
+    api_implementation=APIContextualPrecisionScorer,
+    local_implementation=LocalContextualPrecisionScorer
+)
+ContextualRecallScorer = ScorerWrapper(
+    api_implementation=APIContextualRecallScorer,
+    local_implementation=LocalContextualRecallScorer
+)
 __all__ = [
     "ToolCorrectnessScorer",

judgeval/scorers/judgeval_scorers/api_scorers/__init__.py ADDED Viewed

@@ -0,0 +1,23 @@
+from judgeval.scorers.judgeval_scorers.api_scorers.tool_correctness import ToolCorrectnessScorer
+from judgeval.scorers.judgeval_scorers.api_scorers.json_correctness import JSONCorrectnessScorer
+from judgeval.scorers.judgeval_scorers.api_scorers.summarization import SummarizationScorer
+from judgeval.scorers.judgeval_scorers.api_scorers.hallucination import HallucinationScorer
+from judgeval.scorers.judgeval_scorers.api_scorers.faithfulness import FaithfulnessScorer
+from judgeval.scorers.judgeval_scorers.api_scorers.contextual_relevancy import ContextualRelevancyScorer
+from judgeval.scorers.judgeval_scorers.api_scorers.contextual_precision import ContextualPrecisionScorer
+from judgeval.scorers.judgeval_scorers.api_scorers.contextual_recall import ContextualRecallScorer
+from judgeval.scorers.judgeval_scorers.api_scorers.answer_relevancy import AnswerRelevancyScorer
+from judgeval.scorers.judgeval_scorers.api_scorers.answer_correctness import AnswerCorrectnessScorer
+__all__ = [
+    "ToolCorrectnessScorer",
+    "JSONCorrectnessScorer",
+    "SummarizationScorer",
+    "HallucinationScorer",
+    "FaithfulnessScorer",
+    "ContextualRelevancyScorer",
+    "ContextualPrecisionScorer",
+    "ContextualRecallScorer",
+    "AnswerRelevancyScorer",
+    "AnswerCorrectnessScorer",
+]

judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py ADDED Viewed

@@ -0,0 +1,19 @@
+"""
+`judgeval` answer relevancy scorer
+TODO add link to docs page for this scorer
+"""
+# Internal imports
+from judgeval.scorers.api_scorer import APIJudgmentScorer
+from judgeval.constants import APIScorer
+class AnswerCorrectnessScorer(APIJudgmentScorer):
+    def __init__(self, threshold: float):
+        super().__init__(threshold=threshold, score_type=APIScorer.ANSWER_CORRECTNESS)
+    @property
+    def __name__(self):
+        return "Answer Correctness"

judgeval/scorers/judgeval_scorers/{answer_relevancy.py → api_scorers/answer_relevancy.py} RENAMED Viewed

@@ -6,11 +6,11 @@ TODO add link to docs page for this scorer
 """
 # Internal imports
-from judgeval.scorers.base_scorer import JudgmentScorer
+from judgeval.scorers.api_scorer import APIJudgmentScorer
 from judgeval.constants import APIScorer
-class AnswerRelevancyScorer(JudgmentScorer):
+class AnswerRelevancyScorer(APIJudgmentScorer):
     def __init__(self, threshold: float):
         super().__init__(threshold=threshold, score_type=APIScorer.ANSWER_RELEVANCY)

judgeval/scorers/judgeval_scorers/{contextual_precision.py → api_scorers/contextual_precision.py} RENAMED Viewed

@@ -6,11 +6,11 @@ TODO add link to docs page for this scorer
 """
 # Internal imports
-from judgeval.scorers.base_scorer import JudgmentScorer
+from judgeval.scorers.api_scorer import APIJudgmentScorer
 from judgeval.constants import APIScorer
-class ContextualPrecisionScorer(JudgmentScorer):
+class ContextualPrecisionScorer(APIJudgmentScorer):
     def __init__(self, threshold: float):
         super().__init__(threshold=threshold, score_type=APIScorer.CONTEXTUAL_PRECISION)

judgeval/scorers/judgeval_scorers/{contextual_recall.py → api_scorers/contextual_recall.py} RENAMED Viewed

@@ -6,11 +6,11 @@ TODO add link to docs page for this scorer
 """
 # Internal imports
-from judgeval.scorers.base_scorer import JudgmentScorer
+from judgeval.scorers.api_scorer import APIJudgmentScorer
 from judgeval.constants import APIScorer
-class ContextualRecallScorer(JudgmentScorer):
+class ContextualRecallScorer(APIJudgmentScorer):
     def __init__(self, threshold: float):
         super().__init__(threshold=threshold, score_type=APIScorer.CONTEXTUAL_RECALL)

judgeval/scorers/judgeval_scorers/{contextual_relevancy.py → api_scorers/contextual_relevancy.py} RENAMED Viewed

@@ -6,11 +6,11 @@ TODO add link to docs page for this scorer
 """
 # Internal imports
-from judgeval.scorers.base_scorer import JudgmentScorer
+from judgeval.scorers.api_scorer import APIJudgmentScorer
 from judgeval.constants import APIScorer
-class ContextualRelevancyScorer(JudgmentScorer):
+class ContextualRelevancyScorer(APIJudgmentScorer):
     """
     Scorer that checks if the output of a model is relevant to the retrieval context
     """

judgeval/scorers/judgeval_scorers/{faithfulness.py → api_scorers/faithfulness.py} RENAMED Viewed

@@ -6,11 +6,11 @@ TODO add link to docs page for this scorer
 """
 # Internal imports
-from judgeval.scorers.base_scorer import JudgmentScorer
+from judgeval.scorers.api_scorer import APIJudgmentScorer
 from judgeval.constants import APIScorer
-class FaithfulnessScorer(JudgmentScorer):
+class FaithfulnessScorer(APIJudgmentScorer):
     def __init__(self, threshold: float):
         super().__init__(threshold=threshold, score_type=APIScorer.FAITHFULNESS)

judgeval/scorers/judgeval_scorers/{hallucination.py → api_scorers/hallucination.py} RENAMED Viewed

@@ -6,11 +6,11 @@ TODO add link to docs page for this scorer
 """
 # Internal imports
-from judgeval.scorers.base_scorer import JudgmentScorer
+from judgeval.scorers.api_scorer import APIJudgmentScorer
 from judgeval.constants import APIScorer
-class HallucinationScorer(JudgmentScorer):
+class HallucinationScorer(APIJudgmentScorer):
     def __init__(self, threshold: float):
         super().__init__(threshold=threshold, score_type=APIScorer.HALLUCINATION)

judgeval/scorers/judgeval_scorers/{json_correctness.py → api_scorers/json_correctness.py} RENAMED Viewed

@@ -9,23 +9,23 @@ TODO add link to docs page for this scorer
 # External imports
 from pydantic import BaseModel, Field
 # Internal imports
-from judgeval.scorers.base_scorer import JudgmentScorer
+from judgeval.scorers.api_scorer import APIJudgmentScorer
 from judgeval.constants import APIScorer
-class JSONCorrectnessScorer(JudgmentScorer):
+class JSONCorrectnessScorer(APIJudgmentScorer):
     json_schema: BaseModel = Field(None, exclude=True)
     def __init__(self, threshold: float, json_schema: BaseModel):
         super().__init__(threshold=threshold, score_type=APIScorer.JSON_CORRECTNESS)
         object.__setattr__(self, 'json_schema', json_schema)
     def to_dict(self):
-        return {
-            "score_type": self.score_type,
-            "threshold": self.threshold,
-            "kwargs": {"json_schema": self.json_schema.model_json_schema()}
+        base_dict = super().to_dict()  # Get the parent class's dictionary
+        base_dict["kwargs"] = {
+            "json_schema": self.json_schema.model_json_schema()
         }
+        return base_dict
     @property
     def __name__(self):

judgeval/scorers/judgeval_scorers/{summarization.py → api_scorers/summarization.py} RENAMED Viewed

@@ -6,11 +6,11 @@ TODO add link to docs page for this scorer
 """
 # Internal imports
-from judgeval.scorers.base_scorer import JudgmentScorer
+from judgeval.scorers.api_scorer import APIJudgmentScorer
 from judgeval.constants import APIScorer
-class SummarizationScorer(JudgmentScorer):
+class SummarizationScorer(APIJudgmentScorer):
     def __init__(self, threshold: float):
         super().__init__(threshold=threshold, score_type=APIScorer.SUMMARIZATION)

judgeval 0.0.2__py3-none-any.whl → 0.0.4__py3-none-any.whl

judgeval 0.0.2py3-none-any.whl → 0.0.4py3-none-any.whl