PyPI - judgeval - Versions diffs - 0.0.51__py3-none-any.whl → 0.0.53__py3-none-any.whl - Mend

judgeval 0.0.51py3-none-any.whl → 0.0.53py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

judgeval/common/logger.py +46 -199
judgeval/common/s3_storage.py +2 -6
judgeval/common/tracer.py +182 -262
judgeval/common/utils.py +16 -36
judgeval/constants.py +14 -20
judgeval/data/__init__.py +0 -2
judgeval/data/datasets/dataset.py +6 -10
judgeval/data/datasets/eval_dataset_client.py +25 -27
judgeval/data/example.py +5 -138
judgeval/data/judgment_types.py +214 -0
judgeval/data/result.py +7 -25
judgeval/data/scorer_data.py +28 -40
judgeval/data/scripts/fix_default_factory.py +23 -0
judgeval/data/scripts/openapi_transform.py +123 -0
judgeval/data/tool.py +3 -54
judgeval/data/trace.py +31 -50
judgeval/data/trace_run.py +3 -3
judgeval/evaluation_run.py +16 -23
judgeval/integrations/langgraph.py +11 -12
judgeval/judges/litellm_judge.py +3 -6
judgeval/judges/mixture_of_judges.py +8 -25
judgeval/judges/together_judge.py +3 -6
judgeval/judgment_client.py +22 -24
judgeval/rules.py +7 -19
judgeval/run_evaluation.py +79 -242
judgeval/scorers/__init__.py +4 -20
judgeval/scorers/agent_scorer.py +21 -0
judgeval/scorers/api_scorer.py +28 -38
judgeval/scorers/base_scorer.py +98 -0
judgeval/scorers/example_scorer.py +19 -0
judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -20
judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +10 -17
judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +9 -24
judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py +16 -68
judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +4 -12
judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +4 -4
judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +10 -17
judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +4 -4
judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +4 -4
judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +4 -4
judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +18 -14
judgeval/scorers/score.py +45 -330
judgeval/scorers/utils.py +6 -88
judgeval/utils/file_utils.py +4 -6
judgeval/version_check.py +3 -2
{judgeval-0.0.51.dist-info → judgeval-0.0.53.dist-info}/METADATA +3 -2
judgeval-0.0.53.dist-info/RECORD +65 -0
judgeval/data/custom_example.py +0 -19
judgeval/scorers/judgeval_scorer.py +0 -177
judgeval/scorers/judgeval_scorers/api_scorers/comparison.py +0 -45
judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +0 -29
judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +0 -29
judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +0 -32
judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py +0 -28
judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +0 -38
judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +0 -27
judgeval/scorers/prompt_scorer.py +0 -296
judgeval-0.0.51.dist-info/RECORD +0 -69
{judgeval-0.0.51.dist-info → judgeval-0.0.53.dist-info}/WHEEL +0 -0
{judgeval-0.0.51.dist-info → judgeval-0.0.53.dist-info}/licenses/LICENSE.md +0 -0

{judgeval-0.0.51.dist-info → judgeval-0.0.53.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: judgeval
-Version: 0.0.51
+Version: 0.0.53
 Summary: Judgeval Package
 Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
 Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -12,6 +12,7 @@ Classifier: Programming Language :: Python :: 3
 Requires-Python: >=3.11
 Requires-Dist: anthropic
 Requires-Dist: boto3
+Requires-Dist: datamodel-code-generator>=0.31.1
 Requires-Dist: google-genai
 Requires-Dist: langchain-anthropic
 Requires-Dist: langchain-core
@@ -51,7 +52,7 @@ We're hiring! Join us in our mission to enable self-learning agents by providing
 </div>
-Judgeval offers **open-source tooling** for tracing, evaluating, and monitoring LLM agents. **Provides comprehensive data from agent-environment interactions** for continuous learning and self-improvement—**enabling the future of autonomous agents**.
+Judgeval offers **open-source tooling** for tracing and evaluating autonomous, stateful agents. It **provides runtime data from agent-environment interactions** for continuous learning and self-improvement.
 ## 🎬 See Judgeval in Action

judgeval-0.0.53.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,65 @@
+judgeval/__init__.py,sha256=HM1M8hmqRum6G554QKkXhB4DF4f5eh_xtYo0Kf-t3kw,332
+judgeval/clients.py,sha256=JnB8n90GyXiYaGmSEYaA67mdJSnr3SIrzArao7NGebw,980
+judgeval/constants.py,sha256=lqPVUR7XAr1zbmByJil3i0eY24ymWGzcgg88Npk-U20,5772
+judgeval/evaluation_run.py,sha256=B5w6UiB2cu8km93p4XT3jtganOtIKAZJI3UKc5Qgrew,2936
+judgeval/judgment_client.py,sha256=QT6jV1moshs_-1xjX8jAhQpr9vjznqqcXuobQ7eDBks,21343
+judgeval/rules.py,sha256=CoQjqmP8daEXewMkplmA-7urubDtweOr5O6z8klVwLI,20031
+judgeval/run_evaluation.py,sha256=WXQi2AIKu_iPSLZWnhgLarVbHE6nzyjHJcbKSHu3zYc,42568
+judgeval/version_check.py,sha256=FoLEtpCjDw2HuDQdpw5yT29UtwumSc6ZZN6AV_c9Mnw,1057
+judgeval/common/__init__.py,sha256=KH-QJyWtQ60R6yFIBDYS3WGRiNpEu1guynpxivZvpBQ,309
+judgeval/common/exceptions.py,sha256=OkgDznu2wpBQZMXiZarLJYNk1HIcC8qYW7VypDC3Ook,556
+judgeval/common/logger.py,sha256=514eFLYWS_UL8VY-zAR2ePUlpQe4rbYlleLASFllLE4,1511
+judgeval/common/s3_storage.py,sha256=UvAKGSa0S1BnNprzDKHMAfyT-8zlMAOM5kCrXcVN0HE,3743
+judgeval/common/tracer.py,sha256=qrvriShLG6INpE58sAhlQ6YZfZa3TtfJfsP-cVDyBe4,126135
+judgeval/common/utils.py,sha256=wkdBg86OHROQBXpIPtMyNku5cGckwPpaiATeuilLNbE,34304
+judgeval/data/__init__.py,sha256=1QagDcSQtfnJ632t9Dnq8d7XjAqhmY4mInOWt8qH9tM,455
+judgeval/data/example.py,sha256=6xtPTwWUsZ0HdErU-g954nCv64fsbnS1I5xuEvs14EA,2027
+judgeval/data/judgment_types.py,sha256=VM941NM7_uqwx6bKABV1cH2cocuYgclfORxCK3sPQZo,9853
+judgeval/data/result.py,sha256=7FFD9kOla6ijvu2-Wx3tFE98Ry7ECeV-f8aiDeHNaHs,2449
+judgeval/data/scorer_data.py,sha256=ty4clGts-Zp6NiU1SZXKbrVsyKvHhD5Tm1kbXx6we1k,2977
+judgeval/data/tool.py,sha256=iWQSdy5uNbIeACu3gQy1DC2oGYxRVYNfkkczWdQMAiA,99
+judgeval/data/trace.py,sha256=szugEHAb2R0YljmBQllQEVE5pOlBUC6eOSzbm_WXf-Y,4830
+judgeval/data/trace_run.py,sha256=kovRZduC0l-9nM5YWM6lKaQNEVy_WtHwt4lvIwPbHvY,1825
+judgeval/data/datasets/__init__.py,sha256=IdNKhQv9yYZ_op0rdBacrFaFVmiiYQ3JTzXzxOTsEVQ,176
+judgeval/data/datasets/dataset.py,sha256=dDmTYSBRj4YEUhgYOebAcDm4N14nj3tcCqHj9y2Z1z0,12725
+judgeval/data/datasets/eval_dataset_client.py,sha256=0XS8irOA-gI1aEX3hk0LikzLjb6DOLuj18j2w64BoQM,12614
+judgeval/data/scripts/fix_default_factory.py,sha256=lvp2JwYZqz-XpD9LZNa3mANZVP-jJSZoNzolI6JWERM,591
+judgeval/data/scripts/openapi_transform.py,sha256=mT8qrzhvtMrMMC6Q_amSOGt-X-hUbDlT3xvpgEfcuEs,3828
+judgeval/integrations/langgraph.py,sha256=WuaHqer8i2QV_yZWoB18RNDLAYeH_Z_quVERvTOySQU,36151
+judgeval/judges/__init__.py,sha256=6X7VSwrwsdxGBNxCyapVRWGghhKOy3MVxFNMQ62kCXM,308
+judgeval/judges/base_judge.py,sha256=_dz0qWsKRxzXxpRY9l6mrxTRYPSF2FE4ZXkrzhZ4gbY,986
+judgeval/judges/litellm_judge.py,sha256=LX4_KXb1Jp8IXif3vvOiKfRYH7ZkbQLs9AtWPGmj544,2483
+judgeval/judges/mixture_of_judges.py,sha256=wcHwLi9zU0uwKMqRVhcPdjiYKgWflX4dpUbU2kS9yg0,14825
+judgeval/judges/together_judge.py,sha256=r5k8ZcC6lnsFttGHhrocFtmglx2Cb3G-4ORKAeK-Nmw,2253
+judgeval/judges/utils.py,sha256=0CF9qtIUQUL3-W-qTGpmTjZbkUUBAM6TslDsrCHnTBU,2725
+judgeval/scorers/__init__.py,sha256=7f_zsJV29gO_u4o0n2011SovJ1ZGAI5Zk11WPUBPWcs,858
+judgeval/scorers/agent_scorer.py,sha256=TjwD_YglSywr3EowEojiCyg5qDgCRa5LRGc5nFdmIBc,703
+judgeval/scorers/api_scorer.py,sha256=xlhqkeMUBFxl8daSXOTWOYwZjBAz7o6b4sVD5f8cIHw,2523
+judgeval/scorers/base_scorer.py,sha256=rZfRPolxbsghWS0-FMqXrbJKuLobysMGjAeZkqn0cr0,3581
+judgeval/scorers/example_scorer.py,sha256=2n45y3LMV1Q-ARyXLHqvVWETlnY1DqS7OLzPu9IBGz8,716
+judgeval/scorers/exceptions.py,sha256=ACDHK5-TWiF3NTk-wycaedpbrdobm-CvvC1JA_iP-Mk,179
+judgeval/scorers/score.py,sha256=oQC6LMsalL4XAtXlA3S84MB9YiHjqYIgMhRRi-zaXJ4,6577
+judgeval/scorers/utils.py,sha256=I13XwyBKMUpZK2oacgkwaieUOGlQbKxKKn6SdiA4lmE,4532
+judgeval/scorers/judgeval_scorers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=saQmMol_CMzp1yovjgiF3YYhLTu-4O9xtmhygj1LRh8,1496
+judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py,sha256=zJsU0VrUmRhY9qav48c6jTyDqUwI3JzhV9ajtlJCe0M,544
+judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py,sha256=UDfzTO9Fx0FA5o0wfD8kprrGA4eW-43Rn9Gc0BQtKgY,393
+judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py,sha256=rbG80J88cer7yfVRvLpu-x2cdwiTl-ztnF2wgOoIlcE,2624
+judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py,sha256=mbBvirNcivu9dP6deM7FogDXrdwI9o8yqsO8IeKPSb4,309
+judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py,sha256=NABO_iBdkOo3fdPVcoWfUkeN-FTX3t3-bErMjdqBXdk,1361
+judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=ps51bTgQsD9xGYsk1v9bx0WxQMqywSllCE9_xlJkLd8,531
+judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py,sha256=SnFLvU4FGsMeUVUp0SGHSy_6wgfwr_vHPGnZx5YJl_Q,691
+judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py,sha256=aQzu-TiGqG74JDQ927evv5yGmnZw2AOolyHvlIhiUbI,683
+judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py,sha256=Mcp1CjMNyOax9UkvoRdSyUYdO2Os1-Nko43y89m2Luo,594
+judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py,sha256=Z2FLGBC7m_CLx-CMgXVuTvYvN0vY5yOcWA0ImBkeBfY,787
+judgeval/scorers/judgeval_scorers/classifiers/__init__.py,sha256=Qt81W5ZCwMvBAne0LfQDb8xvg5iOG1vEYP7WizgwAZo,67
+judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py,sha256=8iTzMvou1Dr8pybul6lZHKjc9Ye2-0_racRGYkhEdTY,74
+judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py,sha256=gloLzThkFsr8sHQargDAH8XaDrlF6OCuc_69hyNslFU,2589
+judgeval/tracer/__init__.py,sha256=wkuXtOGDCrwgPPXlh_sSJmvGuWaAMHyNzk1TzB5f9aI,148
+judgeval/utils/alerts.py,sha256=3w_AjQrgfmOZvfqCridW8WAnHVxHHXokX9jNzVFyGjA,3297
+judgeval/utils/file_utils.py,sha256=wIEn8kjM0WrP216RGU_yhZhFOMWIS5ckigyHbzFSOMk,1774
+judgeval/utils/requests.py,sha256=rbmZTaiyWI8t2YUkhk11SIe3dF7j2j25L1BuFp_1PII,770
+judgeval-0.0.53.dist-info/METADATA,sha256=dwUw2htkiBkwbwWouoezCpKM-g5IsOkMr0KRgKytMQg,54767
+judgeval-0.0.53.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+judgeval-0.0.53.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
+judgeval-0.0.53.dist-info/RECORD,,

judgeval/data/custom_example.py DELETED Viewed

@@ -1,19 +0,0 @@
-from pydantic import BaseModel, Field
-from typing import Optional, List, Dict, Any
-from uuid import uuid4
-class CustomExample(BaseModel):
-    input: Optional[Dict[str, Any]] = None
-    actual_output: Optional[Dict[str, Any]] = None
-    expected_output: Optional[Dict[str, Any]] = None
-    context: Optional[List[str]] = None
-    retrieval_context: Optional[List[str]] = None
-    additional_metadata: Optional[Dict[str, Any]] = None
-    tools_called: Optional[List[str]] = None
-    expected_tools: Optional[List[str]] = None
-    name: Optional[str] = None
-    example_id: str = Field(default_factory=lambda: str(uuid4()))
-    example_index: Optional[int] = None
-    timestamp: Optional[str] = None
-    trace_id: Optional[str] = None

judgeval/scorers/judgeval_scorer.py DELETED Viewed

@@ -1,177 +0,0 @@
-"""
-Judgeval Scorer class
-Enables client to create custom scorers that do not fall under any of the ready-made Judgment scorers.
-To create a custom scorer, extend this class and implement the `score_example`, `a_score_example`, and `success_check` methods.
-"""
-from typing import Optional, Dict, Union, List
-from abc import abstractmethod
-from judgeval.common.logger import debug, info, warning, error
-from judgeval.judges import JudgevalJudge
-from judgeval.judges.utils import create_judge
-from judgeval.constants import UNBOUNDED_SCORERS
-from judgeval.data.example import ExampleParams
-class JudgevalScorer:
-    """
-    Base class for scorers in `judgeval`.
-    In practice, you should not implement this class unless you are creating a custom scorer.
-    Judgeval offers 10+ default scorers that you can use out of the box.
-    If you want to create a scorer that does not fall under any of the ready-made Judgment scorers,
-    you can create a custom scorer by extending this class.
-    """
-    score_type: str  # name of your new scorer
-    threshold: float  # The threshold to pass a test while using this scorer as a scorer
-    score: Optional[float] = None  # The float score of the scorer run on the test case
-    score_breakdown: Optional[Dict] = None
-    reason: Optional[str] = (
-        None  # The reason for the score when evaluating the test case
-    )
-    success: Optional[bool] = None  # Whether the test case passed or failed
-    evaluation_model: Optional[str] = None  # The model used to evaluate the test case
-    strict_mode: bool = False  # Whether to run the scorer in strict mode
-    async_mode: bool = True  # Whether to run the scorer in async mode
-    verbose_mode: bool = True  # Whether to run the scorer in verbose mode
-    include_reason: bool = False  # Whether to include the reason in the output
-    custom_example: bool = False  # Whether the scorer corresponds to CustomExamples
-    error: Optional[str] = None  # The error message if the scorer failed
-    evaluation_cost: Optional[float] = None  # The cost of running the scorer
-    verbose_logs: Optional[str] = None  # The verbose logs of the scorer
-    additional_metadata: Optional[Dict] = None  # Additional metadata for the scorer
-    required_params: Optional[List[ExampleParams]] = (
-        None  # The required parameters for the scorer
-    )
-    def __init__(
-        self,
-        score_type: str,
-        threshold: float,
-        score: Optional[float] = None,
-        score_breakdown: Optional[Dict] = None,
-        reason: Optional[str] = None,
-        success: Optional[bool] = None,
-        evaluation_model: Optional[str] = None,
-        required_params: Optional[List[ExampleParams]] = None,
-        strict_mode: bool = False,
-        async_mode: bool = True,
-        verbose_mode: bool = True,
-        include_reason: bool = False,
-        custom_example: bool = False,
-        error: Optional[str] = None,
-        evaluation_cost: Optional[float] = None,
-        verbose_logs: Optional[str] = None,
-        additional_metadata: Optional[Dict] = None,
-    ):
-        debug(
-            f"Initializing JudgevalScorer with score_type={score_type}, threshold={threshold}"
-        )
-        if score_type in UNBOUNDED_SCORERS:
-            if threshold < 0:
-                raise ValueError(
-                    f"Threshold for {score_type} must be greater than 0, got: {threshold}"
-                )
-        else:
-            if not 0 <= threshold <= 1:
-                raise ValueError(
-                    f"Threshold for {score_type} must be between 0 and 1, got: {threshold}"
-                )
-        if strict_mode:
-            warning("Strict mode enabled - scoring will be more rigorous")
-        info(f"JudgevalScorer initialized with evaluation_model: {evaluation_model}")
-        self.score_type = score_type
-        self.threshold = threshold
-        self.score = score
-        self.score_breakdown = score_breakdown
-        self.reason = reason
-        self.success = success
-        self.evaluation_model = evaluation_model
-        self.strict_mode = strict_mode
-        self.async_mode = async_mode
-        self.verbose_mode = verbose_mode
-        self.include_reason = include_reason
-        self.custom_example = custom_example
-        self.error = error
-        self.evaluation_cost = evaluation_cost
-        self.verbose_logs = verbose_logs
-        self.additional_metadata = additional_metadata
-        self.required_params = required_params
-    def _add_model(self, model: Optional[Union[str, List[str], JudgevalJudge]] = None):
-        """
-        Adds the evaluation model to the JudgevalScorer instance
-        This method is used at eval time
-        """
-        self.model, self.using_native_model = create_judge(model)
-        self.evaluation_model = self.model.get_model_name()
-    @abstractmethod
-    def score_example(self, example, *args, **kwargs) -> float:
-        """
-        Measures the score on a single example
-        """
-        warning("Attempting to call unimplemented score_example method")
-        error("score_example method not implemented")
-        raise NotImplementedError(
-            "You must implement the `score` method in your custom scorer"
-        )
-    @abstractmethod
-    async def a_score_example(self, example, *args, **kwargs) -> float:
-        """
-        Asynchronously measures the score on a single example
-        """
-        warning("Attempting to call unimplemented a_score_example method")
-        error("a_score_example method not implemented")
-        raise NotImplementedError(
-            "You must implement the `a_score` method in your custom scorer"
-        )
-    @abstractmethod
-    def _success_check(self) -> bool:
-        """
-        For unit testing, determines whether the test case passes or fails
-        """
-        warning("Attempting to call unimplemented success_check method")
-        error("_success_check method not implemented")
-        raise NotImplementedError(
-            "You must implement the `_success_check` method in your custom scorer"
-        )
-    def __str__(self):
-        debug("Converting JudgevalScorer instance to string representation")
-        if self.error:
-            warning(f"JudgevalScorer contains error: {self.error}")
-        info(f"JudgevalScorer status - success: {self.success}, score: {self.score}")
-        attributes = {
-            "score_type": self.score_type,
-            "threshold": self.threshold,
-            "score": self.score,
-            "score_breakdown": self.score_breakdown,
-            "reason": self.reason,
-            "success": self.success,
-            "evaluation_model": self.evaluation_model,
-            "strict_mode": self.strict_mode,
-            "async_mode": self.async_mode,
-            "verbose_mode": self.verbose_mode,
-            "include_reason": self.include_reason,
-            "error": self.error,
-            "evaluation_cost": self.evaluation_cost,
-            "verbose_logs": self.verbose_logs,
-            "additional_metadata": self.additional_metadata,
-        }
-        return f"JudgevalScorer({attributes})"
-    def to_dict(self):
-        return {
-            "score_type": str(
-                self.score_type
-            ),  # Convert enum to string for serialization
-            "threshold": self.threshold,
-        }

judgeval/scorers/judgeval_scorers/api_scorers/comparison.py DELETED Viewed

@@ -1,45 +0,0 @@
-"""
-`judgeval` comparison scorer
-TODO add link to docs page for this scorer
-"""
-# Internal imports
-from judgeval.scorers.api_scorer import APIJudgmentScorer
-from judgeval.constants import APIScorer
-from typing import Optional, Dict
-from judgeval.data import ExampleParams
-class ComparisonScorer(APIJudgmentScorer):
-    kwargs: Optional[Dict] = None
-    def __init__(self, threshold: float, criteria: str, description: str):
-        super().__init__(
-            threshold=threshold,
-            score_type=APIScorer.COMPARISON,
-            required_params=[
-                ExampleParams.INPUT,
-                ExampleParams.ACTUAL_OUTPUT,
-                ExampleParams.EXPECTED_OUTPUT,
-            ],
-        )
-        self.kwargs = {"criteria": criteria, "description": description}
-    @property
-    def __name__(self):
-        return f"Comparison-{self.kwargs['criteria']}"
-    def to_dict(self) -> dict:
-        """
-        Converts the scorer configuration to a dictionary format.
-        Returns:
-            dict: A dictionary containing the scorer's configuration
-        """
-        return {
-            "score_type": self.score_type,
-            "threshold": self.threshold,
-            "kwargs": self.kwargs,
-        }

judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py DELETED Viewed

@@ -1,29 +0,0 @@
-"""
-`judgeval` contextual precision scorer
-TODO add link to docs page for this scorer
-"""
-# Internal imports
-from judgeval.scorers.api_scorer import APIJudgmentScorer
-from judgeval.constants import APIScorer
-from judgeval.data import ExampleParams
-class ContextualPrecisionScorer(APIJudgmentScorer):
-    def __init__(self, threshold: float):
-        super().__init__(
-            threshold=threshold,
-            score_type=APIScorer.CONTEXTUAL_PRECISION,
-            required_params=[
-                ExampleParams.INPUT,
-                ExampleParams.ACTUAL_OUTPUT,
-                ExampleParams.RETRIEVAL_CONTEXT,
-                ExampleParams.EXPECTED_OUTPUT,
-            ],
-        )
-    @property
-    def __name__(self):
-        return "Contextual Precision"

judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py DELETED Viewed

@@ -1,29 +0,0 @@
-"""
-`judgeval` contextual recall scorer
-TODO add link to docs page for this scorer
-"""
-# Internal imports
-from judgeval.scorers.api_scorer import APIJudgmentScorer
-from judgeval.constants import APIScorer
-from judgeval.data import ExampleParams
-class ContextualRecallScorer(APIJudgmentScorer):
-    def __init__(self, threshold: float):
-        super().__init__(
-            threshold=threshold,
-            score_type=APIScorer.CONTEXTUAL_RECALL,
-            required_params=[
-                ExampleParams.INPUT,
-                ExampleParams.ACTUAL_OUTPUT,
-                ExampleParams.EXPECTED_OUTPUT,
-                ExampleParams.RETRIEVAL_CONTEXT,
-            ],
-        )
-    @property
-    def __name__(self):
-        return "Contextual Recall"

judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py DELETED Viewed

@@ -1,32 +0,0 @@
-"""
-`judgeval` contextual relevancy scorer
-TODO add link to docs page for this scorer
-"""
-# Internal imports
-from judgeval.scorers.api_scorer import APIJudgmentScorer
-from judgeval.constants import APIScorer
-from judgeval.data import ExampleParams
-class ContextualRelevancyScorer(APIJudgmentScorer):
-    """
-    Scorer that checks if the output of a model is relevant to the retrieval context
-    """
-    def __init__(self, threshold: float):
-        super().__init__(
-            threshold=threshold,
-            score_type=APIScorer.CONTEXTUAL_RELEVANCY,
-            required_params=[
-                ExampleParams.INPUT,
-                ExampleParams.ACTUAL_OUTPUT,
-                ExampleParams.RETRIEVAL_CONTEXT,
-            ],
-        )
-    @property
-    def __name__(self):
-        return "Contextual Relevancy"

judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py DELETED Viewed

@@ -1,28 +0,0 @@
-"""
-`judgeval` Groundedness scorer
-TODO add link to docs page for this scorer
-"""
-# Internal imports
-from judgeval.scorers.api_scorer import APIJudgmentScorer
-from judgeval.constants import APIScorer
-from judgeval.data import ExampleParams
-class GroundednessScorer(APIJudgmentScorer):
-    def __init__(self, threshold: float):
-        super().__init__(
-            threshold=threshold,
-            score_type=APIScorer.GROUNDEDNESS,
-            required_params=[
-                ExampleParams.INPUT,
-                ExampleParams.ACTUAL_OUTPUT,
-                ExampleParams.RETRIEVAL_CONTEXT,
-            ],
-        )
-    @property
-    def __name__(self):
-        return "Groundedness"

judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py DELETED Viewed

@@ -1,38 +0,0 @@
-"""
-`judgeval` JSON correctness scorer
-TODO add link to docs page for this scorer
-"""
-# External imports
-from pydantic import BaseModel, Field
-# Internal imports
-from judgeval.scorers.api_scorer import APIJudgmentScorer
-from judgeval.constants import APIScorer
-from judgeval.data import ExampleParams
-class JSONCorrectnessScorer(APIJudgmentScorer):
-    json_schema: BaseModel = Field(None, exclude=True)
-    def __init__(self, threshold: float, json_schema: BaseModel):
-        super().__init__(
-            threshold=threshold,
-            score_type=APIScorer.JSON_CORRECTNESS,
-            required_params=[
-                ExampleParams.INPUT,
-                ExampleParams.ACTUAL_OUTPUT,
-            ],
-        )
-        object.__setattr__(self, "json_schema", json_schema)
-    def to_dict(self):
-        base_dict = super().to_dict()  # Get the parent class's dictionary
-        base_dict["kwargs"] = {"json_schema": self.json_schema.model_json_schema()}
-        return base_dict
-    @property
-    def __name__(self):
-        return "JSON Correctness"

judgeval/scorers/judgeval_scorers/api_scorers/summarization.py DELETED Viewed

@@ -1,27 +0,0 @@
-"""
-`judgeval` summarization scorer
-TODO add link to docs page for this scorer
-"""
-# Internal imports
-from judgeval.scorers.api_scorer import APIJudgmentScorer
-from judgeval.constants import APIScorer
-from judgeval.data import ExampleParams
-class SummarizationScorer(APIJudgmentScorer):
-    def __init__(self, threshold: float):
-        super().__init__(
-            threshold=threshold,
-            score_type=APIScorer.SUMMARIZATION,
-            required_params=[
-                ExampleParams.INPUT,
-                ExampleParams.ACTUAL_OUTPUT,
-            ],
-        )
-    @property
-    def __name__(self):
-        return "Summarization"

judgeval 0.0.51__py3-none-any.whl → 0.0.53__py3-none-any.whl

judgeval 0.0.51py3-none-any.whl → 0.0.53py3-none-any.whl