PyPI - judgeval - Versions diffs - 0.0.16__py3-none-any.whl → 0.0.18__py3-none-any.whl - Mend

judgeval 0.0.16py3-none-any.whl → 0.0.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

judgeval/data/example.py CHANGED Viewed

@@ -2,17 +2,12 @@
 Classes for representing examples in a dataset.
 """
-from typing import TypeVar, Optional, Any, Dict, List
+from typing import Optional, Any, Dict, List
 from uuid import uuid4
-from pydantic import BaseModel, Field, field_validator
+from pydantic import BaseModel, Field
 from enum import Enum
 from datetime import datetime
-import time
-Input = TypeVar('Input')
-Output = TypeVar('Output')
 class ExampleParams(Enum):
     INPUT = "input"
@@ -23,11 +18,12 @@ class ExampleParams(Enum):
     TOOLS_CALLED = "tools_called"
     EXPECTED_TOOLS = "expected_tools"
     REASONING = "reasoning"
+    ADDITIONAL_METADATA = "additional_metadata"
 class Example(BaseModel):
-    input: Input
-    actual_output: Output
+    input: str
+    actual_output: str
     expected_output: Optional[str] = None
     context: Optional[List[str]] = None
     retrieval_context: Optional[List[str]] = None
@@ -39,22 +35,81 @@ class Example(BaseModel):
     example_index: Optional[int] = None
     timestamp: Optional[str] = None
     trace_id: Optional[str] = None
-    @field_validator('input', 'actual_output', mode='before')
-    def convert_to_str(cls, value):
-        try:
-            return str(value)
-        except Exception:
-            return repr(value)
     def __init__(self, **data):
+        # Check that required fields are provided
+        if 'input' not in data:
+            raise ValueError("Example must be initialized with 'input' field.")
+        if 'actual_output' not in data:
+            raise ValueError("Example must be initialized with 'actual_output' field.")
         if 'example_id' not in data:
             data['example_id'] = str(uuid4())
         # Set timestamp if not provided
         if 'timestamp' not in data:
             data['timestamp'] = datetime.now().strftime("%Y%m%d_%H%M%S")
         super().__init__(**data)
+    @field_validator('input', mode='before')
+    @classmethod
+    def validate_input(cls, v):
+        if not v or not isinstance(v, str):
+            raise ValueError(f"Input must be a non-empty string but got '{v}' of type {type(v)}")
+        return v
+    @field_validator('actual_output', mode='before')
+    @classmethod
+    def validate_actual_output(cls, v):
+        if not isinstance(v, str):
+            raise ValueError(f"Actual output must be a string but got '{v}' of type {type(v)}")
+        return v
+    @field_validator('expected_output', mode='before')
+    @classmethod
+    def validate_expected_output(cls, v):
+        if v is not None and not isinstance(v, str):
+            raise ValueError(f"Expected output must be a string or None but got {v} of type {type(v)}")
+        return v
+    @field_validator('context', 'retrieval_context', 'tools_called', 'expected_tools', mode='before')
+    @classmethod
+    def validate_string_lists(cls, v, info):
+        field_name = info.field_name
+        if v is not None:
+            if not isinstance(v, list):
+                raise ValueError(f"{field_name} must be a list of strings or None but got {v} of type {type(v)}")
+            for i, item in enumerate(v):
+                if not isinstance(item, str):
+                    raise ValueError(f"All items in {field_name} must be strings but item at index {i} is {item} of type {type(item)}")
+        return v
+    @field_validator('additional_metadata', mode='before')
+    @classmethod
+    def validate_additional_metadata(cls, v):
+        if v is not None and not isinstance(v, dict):
+            raise ValueError(f"Additional metadata must be a dictionary or None but got {v} of type {type(v)}")
+        return v
+    @field_validator('example_index', mode='before')
+    @classmethod
+    def validate_example_index(cls, v):
+        if v is not None and not isinstance(v, int):
+            raise ValueError(f"Example index must be an integer or None but got {v} of type {type(v)}")
+        return v
+    @field_validator('timestamp', mode='before')
+    @classmethod
+    def validate_timestamp(cls, v):
+        if v is not None and not isinstance(v, str):
+            raise ValueError(f"Timestamp must be a string or None but got {v} of type {type(v)}")
+        return v
+    @field_validator('trace_id', mode='before')
+    @classmethod
+    def validate_trace_id(cls, v):
+        if v is not None and not isinstance(v, str):
+            raise ValueError(f"Trace ID must be a string or None but got {v} of type {type(v)}")
+        return v
     def to_dict(self):
         return {

judgeval/data/scorer_data.py CHANGED Viewed

@@ -5,7 +5,7 @@ ScorerData holds the information related to a single, completed Scorer evaluatio
 """
 from typing import List, Union, Optional, Dict
-from pydantic import BaseModel, Field
+from pydantic import BaseModel
 from judgeval.scorers import JudgevalScorer

judgeval/evaluation_run.py CHANGED Viewed

@@ -111,7 +111,7 @@ class EvaluationRun(BaseModel):
         # Check if model is string or list of strings
         if isinstance(v, str):
             if v not in ACCEPTABLE_MODELS:
-                raise ValueError(f"Model name {v} not recognized.")
+                raise ValueError(f"Model name {v} not recognized. Please select a valid model name.)")
             return v
         if isinstance(v, list):
@@ -119,7 +119,7 @@ class EvaluationRun(BaseModel):
                 raise ValueError("When providing a list of models, all elements must be strings")
             for m in v:
                 if m not in ACCEPTABLE_MODELS:
-                    raise ValueError(f"Model name {m} not recognized.")
+                    raise ValueError(f"Model name {m} not recognized. Please select a valid model name.")
             return v
         raise ValueError(f"Model must be one of: string, list of strings, or JudgevalJudge instance. Received type {type(v)}.")

judgeval/judges/__init__.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from pydantic import BaseModel
 from judgeval.judges.base_judge import JudgevalJudge
 from judgeval.judges.litellm_judge import LiteLLMJudge
 from judgeval.judges.together_judge import TogetherJudge

judgeval/judges/base_judge.py CHANGED Viewed

@@ -3,7 +3,7 @@ Implements the base class for all Judgeval Judge models.
 """
 from abc import ABC, abstractmethod
-from typing import Optional, List
+from typing import Optional
 class JudgevalJudge(ABC):

judgeval/judges/mixture_of_judges.py CHANGED Viewed

@@ -5,9 +5,14 @@ Enables client to use multiple models to generate responses and then aggregate t
 """
 from judgeval import *
 import pydantic
-from typing import List, Union, Mapping, Dict
+from typing import List, Union, Mapping
 from judgeval.judges import JudgevalJudge
-from judgeval.common.utils import get_completion_multiple_models, get_chat_completion, aget_completion_multiple_models, aget_chat_completion
+from judgeval.common.utils import (
+    get_completion_multiple_models,
+    get_chat_completion,
+    aget_completion_multiple_models,
+    aget_chat_completion
+)
 from judgeval.common.logger import debug, error
 def build_dynamic_mixture_prompt(

judgeval/judgment_client.py CHANGED Viewed

@@ -6,17 +6,17 @@ from typing import Optional, List, Dict, Any, Union
 import requests
 from judgeval.constants import ROOT_API
-from judgeval.data.datasets import EvalDataset, EvalDatasetClient, GroundTruthExample
+from judgeval.data.datasets import EvalDataset, EvalDatasetClient
 from judgeval.data import (
     ScoringResult,
-    Example
+    Example,
+    GroundTruthExample
 )
 from judgeval.scorers import (
     APIJudgmentScorer,
     JudgevalScorer,
     ClassifierScorer,
     ScorerWrapper,
-    score,
 )
 from judgeval.evaluation_run import EvaluationRun
 from judgeval.run_evaluation import (
@@ -24,7 +24,11 @@ from judgeval.run_evaluation import (
     assert_test
 )
 from judgeval.judges import JudgevalJudge
-from judgeval.constants import JUDGMENT_EVAL_FETCH_API_URL, JUDGMENT_EVAL_DELETE_API_URL, JUDGMENT_EVAL_DELETE_PROJECT_API_URL
+from judgeval.constants import (
+    JUDGMENT_EVAL_FETCH_API_URL,
+    JUDGMENT_EVAL_DELETE_API_URL,
+    JUDGMENT_EVAL_DELETE_PROJECT_API_URL
+)
 from judgeval.common.exceptions import JudgmentAPIError
 from pydantic import BaseModel
 from judgeval.rules import Rule
@@ -306,7 +310,8 @@ class JudgmentClient:
                                     "Authorization": f"Bearer {self.judgment_api_key}",
                                     "X-Organization-Id": self.organization_id
                                  },
-                                 json=eval_run_request_body.model_dump())
+                                 json=eval_run_request_body.model_dump(),
+                                 verify=True)
         if eval_run.status_code != requests.codes.ok:
             raise ValueError(f"Error fetching eval results: {eval_run.json()}")
@@ -378,7 +383,8 @@ class JudgmentClient:
                 "Content-Type": "application/json",
                 "Authorization": f"Bearer {self.judgment_api_key}",
             },
-            json={}  # Empty body now
+            json={},  # Empty body now
+            verify=True
         )
         if response.status_code == 200:
             return True, response.json()
@@ -409,7 +415,8 @@ class JudgmentClient:
                 "Content-Type": "application/json",
                 "Authorization": f"Bearer {self.judgment_api_key}",
                 "X-Organization-Id": self.organization_id
-            }
+            },
+            verify=True
         )
         if response.status_code == 500:
@@ -452,7 +459,8 @@ class JudgmentClient:
                 "Content-Type": "application/json",
                 "Authorization": f"Bearer {self.judgment_api_key}",
                 "X-Organization-Id": self.organization_id
-            }
+            },
+            verify=True
         )
         if response.status_code == 500:

judgeval/rules.py CHANGED Viewed

@@ -5,14 +5,12 @@ Rules system for Judgeval that enables alerts based on metric thresholds.
 from typing import Dict, List, Optional, Union, Any, Set, Tuple
 from pydantic import BaseModel, Field, field_validator, ConfigDict
 from enum import Enum
-from datetime import datetime
 import asyncio
 from concurrent.futures import ThreadPoolExecutor
 import time
-import uuid  # Add import for uuid module
+import uuid
-from judgeval.scorers import APIJudgmentScorer, JudgevalScorer
-from judgeval.scorers.judgeval_scorers import ScorerWrapper  # Import from the correct module
+from judgeval.scorers import APIJudgmentScorer, JudgevalScorer, ScorerWrapper
 class AlertStatus(str, Enum):
     """Status of an alert evaluation."""

judgeval/run_evaluation.py CHANGED Viewed

@@ -5,7 +5,6 @@ from datetime import datetime
 from rich import print as rprint
 from judgeval.data import (
-    Example,
     ScorerData,
     ScoringResult
 )
@@ -25,13 +24,11 @@ from judgeval.constants import (
 from judgeval.common.exceptions import JudgmentAPIError
 from judgeval.evaluation_run import EvaluationRun
 from judgeval.common.logger import (
-    enable_logging,
     debug,
     info,
     error,
     example_logging_context
 )
-from judgeval.rules import RulesEngine, Rule, AlertResult, AlertStatus
 def execute_api_eval(evaluation_run: EvaluationRun) -> List[Dict]:
@@ -55,7 +52,8 @@ def execute_api_eval(evaluation_run: EvaluationRun) -> List[Dict]:
             "Authorization": f"Bearer {evaluation_run.judgment_api_key}",
             "X-Organization-Id": evaluation_run.organization_id
         },
-        json=payload)
+        json=payload,
+        verify=True)
         response_data = response.json()
     except Exception as e:
         error(f"Error: {e}")
@@ -168,12 +166,13 @@ def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_k
                 "eval_name": eval_name,
                 "project_name": project_name,
                 "judgment_api_key": judgment_api_key,
-            }
+            },
+            verify=True
         )
         if response.status_code == 409:
-            error(f"Evaluation run name '{eval_name}' already exists for this project")
-            raise ValueError(f"Evaluation run name '{eval_name}' already exists for this project")
+            error(f"Eval run name '{eval_name}' already exists for this project. Please choose a different name or set the `override` flag to true.")
+            raise ValueError(f"Eval run name '{eval_name}' already exists for this project. Please choose a different name or set the `override` flag to true.")
         if not response.ok:
             response_data = response.json()
@@ -210,7 +209,8 @@ def log_evaluation_results(merged_results: List[ScoringResult], evaluation_run:
                 "results": [result.to_dict() for result in merged_results],
                 "project_name": evaluation_run.project_name,
                 "eval_name": evaluation_run.eval_name,
-            }
+            },
+            verify=True
         )
         if not res.ok:

judgeval/scorers/__init__.py CHANGED Viewed

@@ -14,6 +14,9 @@ from judgeval.scorers.judgeval_scorers import (
     ScorerWrapper,
     AnswerCorrectnessScorer,
     Text2SQLScorer,
+    ComparisonScorer,
+    InstructionAdherenceScorer,
+    GroundednessScorer,
 )
 __all__ = [
@@ -33,4 +36,7 @@ __all__ = [
     "ScorerWrapper",
     "AnswerCorrectnessScorer",
     "Text2SQLScorer",
+    "ComparisonScorer",
+    "InstructionAdherenceScorer",
+    "GroundednessScorer",
 ]

judgeval/scorers/api_scorer.py CHANGED Viewed

@@ -7,7 +7,7 @@ Scores `Example`s using ready-made Judgment evaluators.
 from pydantic import BaseModel, field_validator
 from judgeval.common.logger import debug, info, warning, error
-from judgeval.constants import APIScorer
+from judgeval.constants import APIScorer, UNBOUNDED_SCORERS
 class APIJudgmentScorer(BaseModel):
@@ -18,17 +18,23 @@ class APIJudgmentScorer(BaseModel):
         score_type (APIScorer): The Judgment metric to use for scoring `Example`s
         threshold (float): A value between 0 and 1 that determines the scoring threshold
     """
-    threshold: float
     score_type: APIScorer
+    threshold: float
     @field_validator('threshold')
-    def validate_threshold(cls, v):
+    def validate_threshold(cls, v, info):
         """
         Validates that the threshold is between 0 and 1 inclusive.
         """
-        if not 0 <= v <= 1:
-            error(f"Threshold must be between 0 and 1, got: {v}")
-            raise ValueError(f"Threshold must be between 0 and 1, got: {v}")
+        score_type = info.data.get('score_type')
+        if score_type in UNBOUNDED_SCORERS:
+            if v < 0:
+                error(f"Threshold for {score_type} must be greater than 0, got: {v}")
+                raise ValueError(f"Threshold for {score_type} must be greater than 0, got: {v}")
+        else:
+            if not 0 <= v <= 1:
+                error(f"Threshold for {score_type} must be between 0 and 1, got: {v}")
+                raise ValueError(f"Threshold for {score_type} must be between 0 and 1, got: {v}")
         return v
     @field_validator('score_type')

judgeval/scorers/base_scorer.py CHANGED Viewed

@@ -7,7 +7,7 @@ Scores `Example`s using ready-made Judgment evaluators.
 from pydantic import BaseModel, field_validator
 from judgeval.common.logger import debug, info, warning, error
-from judgeval.constants import APIScorer
+from judgeval.constants import APIScorer, UNBOUNDED_SCORERS
 class APIJudgmentScorer(BaseModel):
@@ -18,17 +18,23 @@ class APIJudgmentScorer(BaseModel):
         score_type (APIScorer): The Judgment metric to use for scoring `Example`s
         threshold (float): A value between 0 and 1 that determines the scoring threshold
     """
-    threshold: float
     score_type: APIScorer
+    threshold: float
     @field_validator('threshold')
-    def validate_threshold(cls, v):
+    def validate_threshold(cls, v, info):
         """
         Validates that the threshold is between 0 and 1 inclusive.
         """
-        if not 0 <= v <= 1:
-            error(f"Threshold must be between 0 and 1, got: {v}")
-            raise ValueError(f"Threshold must be between 0 and 1, got: {v}")
+        score_type = info.data.get('score_type')
+        if score_type in UNBOUNDED_SCORERS:
+            if v < 0:
+                error(f"Threshold for {score_type} must be greater than 0, got: {v}")
+                raise ValueError(f"Threshold for {score_type} must be greater than 0, got: {v}")
+        else:
+            if not 0 <= v <= 1:
+                error(f"Threshold for {score_type} must be between 0 and 1, got: {v}")
+                raise ValueError(f"Threshold for {score_type} must be between 0 and 1, got: {v}")
         return v
     @field_validator('score_type')

judgeval/scorers/judgeval_scorer.py CHANGED Viewed

@@ -11,7 +11,7 @@ from abc import abstractmethod
 from judgeval.common.logger import debug, info, warning, error
 from judgeval.judges import JudgevalJudge
 from judgeval.judges.utils import create_judge
+from judgeval.constants import UNBOUNDED_SCORERS
 class JudgevalScorer:
     """
@@ -58,8 +58,12 @@ class JudgevalScorer:
         additional_metadata: Optional[Dict] = None
         ):
             debug(f"Initializing JudgevalScorer with score_type={score_type}, threshold={threshold}")
-            if not 0 <= threshold <= 1:
-                raise ValueError("Threshold must be between 0 and 1")
+            if score_type in UNBOUNDED_SCORERS:
+                if threshold < 0:
+                    raise ValueError(f"Threshold for {score_type} must be greater than 0, got: {threshold}")
+            else:
+                if not 0 <= threshold <= 1:
+                    raise ValueError(f"Threshold for {score_type} must be between 0 and 1, got: {threshold}")
             if strict_mode:
                 warning("Strict mode enabled - scoring will be more rigorous")
             info(f"JudgevalScorer initialized with evaluation_model: {evaluation_model}")

judgeval/scorers/judgeval_scorers/__init__.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from typing import Type, Optional, Any
-from functools import wraps
 # Import implementations
 from judgeval.scorers.judgeval_scorers.api_scorers import (
@@ -12,7 +11,10 @@ from judgeval.scorers.judgeval_scorers.api_scorers import (
     ContextualPrecisionScorer as APIContextualPrecisionScorer,
     ContextualRecallScorer as APIContextualRecallScorer,
     AnswerRelevancyScorer as APIAnswerRelevancyScorer,
-    AnswerCorrectnessScorer as APIAnswerCorrectnessScorer,
+    AnswerCorrectnessScorer as APIAnswerCorrectnessScorer,
+    ComparisonScorer as APIComparisonScorer,
+    InstructionAdherenceScorer as APIInstructionAdherenceScorer,
+    GroundednessScorer as APIGroundednessScorer,
 )
 from judgeval.scorers.judgeval_scorers.local_implementations import (
@@ -25,7 +27,9 @@ from judgeval.scorers.judgeval_scorers.local_implementations import (
     ToolCorrectnessScorer as LocalToolCorrectnessScorer,
     HallucinationScorer as LocalHallucinationScorer,
     SummarizationScorer as LocalSummarizationScorer,
-    AnswerCorrectnessScorer as LocalAnswerCorrectnessScorer
+    AnswerCorrectnessScorer as LocalAnswerCorrectnessScorer,
+    ComparisonScorer as LocalComparisonScorer,
+    InstructionAdherenceScorer as LocalInstructionAdherenceScorer,
 )
 from judgeval.scorers.judgeval_scorers.classifiers import Text2SQLScorer
@@ -134,6 +138,21 @@ ContextualRecallScorer = ScorerWrapper(
     local_implementation=LocalContextualRecallScorer
 )
+InstructionAdherenceScorer = ScorerWrapper(
+    api_implementation=APIInstructionAdherenceScorer,
+    local_implementation=LocalInstructionAdherenceScorer
+)
+def ComparisonScorer(threshold: float, criteria: str, description: str):
+    return ScorerWrapper(
+        api_implementation=APIComparisonScorer,
+        local_implementation=LocalComparisonScorer
+    )(threshold=threshold, criteria=criteria, description=description)
+GroundednessScorer = ScorerWrapper(
+    api_implementation=APIGroundednessScorer,
+)
 __all__ = [
     "ToolCorrectnessScorer",
     "JSONCorrectnessScorer",
@@ -145,4 +164,6 @@ __all__ = [
     "ContextualRecallScorer",
     "AnswerRelevancyScorer",
     "Text2SQLScorer",
+    "ComparisonScorer",
+    "GroundednessScorer",
 ]

judgeval/scorers/judgeval_scorers/api_scorers/__init__.py CHANGED Viewed

@@ -8,6 +8,9 @@ from judgeval.scorers.judgeval_scorers.api_scorers.contextual_precision import C
 from judgeval.scorers.judgeval_scorers.api_scorers.contextual_recall import ContextualRecallScorer
 from judgeval.scorers.judgeval_scorers.api_scorers.answer_relevancy import AnswerRelevancyScorer
 from judgeval.scorers.judgeval_scorers.api_scorers.answer_correctness import AnswerCorrectnessScorer
+from judgeval.scorers.judgeval_scorers.api_scorers.comparison import ComparisonScorer
+from judgeval.scorers.judgeval_scorers.api_scorers.instruction_adherence import InstructionAdherenceScorer
+from judgeval.scorers.judgeval_scorers.api_scorers.groundedness import GroundednessScorer
 __all__ = [
     "ToolCorrectnessScorer",
@@ -20,4 +23,7 @@ __all__ = [
     "ContextualRecallScorer",
     "AnswerRelevancyScorer",
     "AnswerCorrectnessScorer",
+    "ComparisonScorer",
+    "InstructionAdherenceScorer",
+    "GroundednessScorer",
 ]

judgeval/scorers/judgeval_scorers/api_scorers/comparison.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""
+`judgeval` comparison scorer
+TODO add link to docs page for this scorer
+"""
+# Internal imports
+from judgeval.scorers.api_scorer import APIJudgmentScorer
+from judgeval.constants import APIScorer
+from typing import Optional, Dict
+class ComparisonScorer(APIJudgmentScorer):
+    kwargs: Optional[Dict] = None
+    def __init__(self, threshold: float, criteria: str, description: str):
+        super().__init__(threshold=threshold, score_type=APIScorer.COMPARISON)
+        self.kwargs = {"criteria": criteria, "description": description}
+    @property
+    def __name__(self):
+        return f"Comparison-{self.kwargs['criteria']}"
+    def to_dict(self) -> dict:
+        """
+        Converts the scorer configuration to a dictionary format.
+        Returns:
+            dict: A dictionary containing the scorer's configuration
+        """
+        return {
+            "score_type": self.score_type,
+            "threshold": self.threshold,
+            "kwargs": self.kwargs
+        }

judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py ADDED Viewed

@@ -0,0 +1,19 @@
+"""
+`judgeval` Groundedness scorer
+TODO add link to docs page for this scorer
+"""
+# Internal imports
+from judgeval.scorers.api_scorer import APIJudgmentScorer
+from judgeval.constants import APIScorer
+class GroundednessScorer(APIJudgmentScorer):
+    def __init__(self, threshold: float):
+        super().__init__(threshold=threshold, score_type=APIScorer.GROUNDEDNESS)
+    @property
+    def __name__(self):
+        return "Groundedness"

judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py ADDED Viewed

@@ -0,0 +1,19 @@
+"""
+`judgeval` instruction adherence scorer
+TODO add link to docs page for this scorer
+"""
+# Internal imports
+from judgeval.scorers.api_scorer import APIJudgmentScorer
+from judgeval.constants import APIScorer
+class InstructionAdherenceScorer(APIJudgmentScorer):
+    def __init__(self, threshold: float):
+        super().__init__(threshold=threshold, score_type=APIScorer.INSTRUCTION_ADHERENCE)
+    @property
+    def __name__(self):
+        return "Instruction Adherence"

judgeval/scorers/judgeval_scorers/local_implementations/__init__.py CHANGED Viewed

@@ -8,11 +8,13 @@ from judgeval.scorers.judgeval_scorers.local_implementations.tool_correctness.to
 from judgeval.scorers.judgeval_scorers.local_implementations.hallucination.hallucination_scorer import HallucinationScorer
 from judgeval.scorers.judgeval_scorers.local_implementations.summarization.summarization_scorer import SummarizationScorer
 from judgeval.scorers.judgeval_scorers.local_implementations.answer_correctness.answer_correctness_scorer import AnswerCorrectnessScorer
+from judgeval.scorers.judgeval_scorers.local_implementations.comparison.comparison_scorer import ComparisonScorer
+from judgeval.scorers.judgeval_scorers.local_implementations.instruction_adherence.instruction_adherence import InstructionAdherenceScorer
 __all__ = [
     "AnswerCorrectnessScorer",
     "AnswerRelevancyScorer",
+    "ComparisonScorer",
     "ContextualPrecisionScorer",
     "ContextualRecallScorer",
     "ContextualRelevancyScorer",
@@ -21,4 +23,5 @@ __all__ = [
     "ToolCorrectnessScorer",
     "HallucinationScorer",
     "SummarizationScorer",
+    "InstructionAdherenceScorer",
 ]

judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from typing import Optional, List, Union, Tuple
-from pydantic import BaseModel
 from judgeval.constants import APIScorer
 from judgeval.judges import JudgevalJudge

judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py CHANGED Viewed

@@ -2,8 +2,8 @@
 Util prompts for AnswerCorrectnessScorer
 """
-from typing import List, Optional, Tuple
-from pydantic import BaseModel, Field
+from typing import List, Tuple
+from pydantic import BaseModel
 # BaseModels to enforce formatting in LLM JSON response

judgeval 0.0.16__py3-none-any.whl → 0.0.18__py3-none-any.whl

judgeval 0.0.16py3-none-any.whl → 0.0.18py3-none-any.whl