PyPI - judgeval - Versions diffs - 0.0.11__py3-none-any.whl → 0.22.2__py3-none-any.whl - Mend

judgeval 0.0.11py3-none-any.whl → 0.22.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of judgeval might be problematic. Click here for more details.

Files changed (171) hide show

judgeval/__init__.py +177 -12
judgeval/api/__init__.py +519 -0
judgeval/api/api_types.py +407 -0
judgeval/cli.py +79 -0
judgeval/constants.py +76 -47
judgeval/data/__init__.py +3 -3
judgeval/data/evaluation_run.py +125 -0
judgeval/data/example.py +15 -56
judgeval/data/judgment_types.py +450 -0
judgeval/data/result.py +29 -73
judgeval/data/scorer_data.py +29 -62
judgeval/data/scripts/fix_default_factory.py +23 -0
judgeval/data/scripts/openapi_transform.py +123 -0
judgeval/data/trace.py +121 -0
judgeval/dataset/__init__.py +264 -0
judgeval/env.py +52 -0
judgeval/evaluation/__init__.py +344 -0
judgeval/exceptions.py +27 -0
judgeval/integrations/langgraph/__init__.py +13 -0
judgeval/integrations/openlit/__init__.py +50 -0
judgeval/judges/__init__.py +2 -3
judgeval/judges/base_judge.py +2 -3
judgeval/judges/litellm_judge.py +100 -20
judgeval/judges/together_judge.py +101 -20
judgeval/judges/utils.py +20 -24
judgeval/logger.py +62 -0
judgeval/prompt/__init__.py +330 -0
judgeval/scorers/__init__.py +18 -25
judgeval/scorers/agent_scorer.py +17 -0
judgeval/scorers/api_scorer.py +45 -41
judgeval/scorers/base_scorer.py +83 -38
judgeval/scorers/example_scorer.py +17 -0
judgeval/scorers/exceptions.py +1 -0
judgeval/scorers/judgeval_scorers/__init__.py +0 -148
judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +19 -17
judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +13 -19
judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +12 -19
judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +13 -19
judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +15 -0
judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +327 -0
judgeval/scorers/score.py +77 -306
judgeval/scorers/utils.py +4 -199
judgeval/tracer/__init__.py +1122 -2
judgeval/tracer/constants.py +1 -0
judgeval/tracer/exporters/__init__.py +40 -0
judgeval/tracer/exporters/s3.py +119 -0
judgeval/tracer/exporters/store.py +59 -0
judgeval/tracer/exporters/utils.py +32 -0
judgeval/tracer/keys.py +63 -0
judgeval/tracer/llm/__init__.py +7 -0
judgeval/tracer/llm/config.py +78 -0
judgeval/tracer/llm/constants.py +9 -0
judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
judgeval/tracer/llm/llm_anthropic/config.py +6 -0
judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
judgeval/tracer/llm/llm_google/__init__.py +3 -0
judgeval/tracer/llm/llm_google/config.py +6 -0
judgeval/tracer/llm/llm_google/generate_content.py +127 -0
judgeval/tracer/llm/llm_google/wrapper.py +30 -0
judgeval/tracer/llm/llm_openai/__init__.py +3 -0
judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
judgeval/tracer/llm/llm_openai/config.py +6 -0
judgeval/tracer/llm/llm_openai/responses.py +506 -0
judgeval/tracer/llm/llm_openai/utils.py +42 -0
judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
judgeval/tracer/llm/llm_together/__init__.py +3 -0
judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
judgeval/tracer/llm/llm_together/config.py +6 -0
judgeval/tracer/llm/llm_together/wrapper.py +52 -0
judgeval/tracer/llm/providers.py +19 -0
judgeval/tracer/managers.py +167 -0
judgeval/tracer/processors/__init__.py +220 -0
judgeval/tracer/utils.py +19 -0
judgeval/trainer/__init__.py +14 -0
judgeval/trainer/base_trainer.py +122 -0
judgeval/trainer/config.py +128 -0
judgeval/trainer/console.py +144 -0
judgeval/trainer/fireworks_trainer.py +396 -0
judgeval/trainer/trainable_model.py +243 -0
judgeval/trainer/trainer.py +70 -0
judgeval/utils/async_utils.py +39 -0
judgeval/utils/decorators/__init__.py +0 -0
judgeval/utils/decorators/dont_throw.py +37 -0
judgeval/utils/decorators/use_once.py +13 -0
judgeval/utils/file_utils.py +97 -0
judgeval/utils/guards.py +36 -0
judgeval/utils/meta.py +27 -0
judgeval/utils/project.py +15 -0
judgeval/utils/serialize.py +253 -0
judgeval/utils/testing.py +70 -0
judgeval/utils/url.py +10 -0
judgeval/utils/version_check.py +28 -0
judgeval/utils/wrappers/README.md +3 -0
judgeval/utils/wrappers/__init__.py +15 -0
judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
judgeval/utils/wrappers/py.typed +0 -0
judgeval/utils/wrappers/utils.py +35 -0
judgeval/version.py +5 -0
judgeval/warnings.py +4 -0
judgeval-0.22.2.dist-info/METADATA +265 -0
judgeval-0.22.2.dist-info/RECORD +112 -0
judgeval-0.22.2.dist-info/entry_points.txt +2 -0
judgeval/clients.py +0 -39
judgeval/common/__init__.py +0 -8
judgeval/common/exceptions.py +0 -28
judgeval/common/logger.py +0 -189
judgeval/common/tracer.py +0 -798
judgeval/common/utils.py +0 -763
judgeval/data/api_example.py +0 -111
judgeval/data/datasets/__init__.py +0 -5
judgeval/data/datasets/dataset.py +0 -286
judgeval/data/datasets/eval_dataset_client.py +0 -193
judgeval/data/datasets/ground_truth.py +0 -54
judgeval/data/datasets/utils.py +0 -74
judgeval/evaluation_run.py +0 -132
judgeval/judges/mixture_of_judges.py +0 -248
judgeval/judgment_client.py +0 -354
judgeval/run_evaluation.py +0 -439
judgeval/scorers/judgeval_scorer.py +0 -140
judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +0 -19
judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +0 -19
judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +0 -22
judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -19
judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +0 -32
judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +0 -20
judgeval/scorers/judgeval_scorers/api_scorers/tool_correctness.py +0 -19
judgeval/scorers/judgeval_scorers/classifiers/__init__.py +0 -3
judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py +0 -3
judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +0 -54
judgeval/scorers/judgeval_scorers/local_implementations/__init__.py +0 -24
judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py +0 -4
judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py +0 -277
judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py +0 -169
judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/__init__.py +0 -4
judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py +0 -298
judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/prompts.py +0 -174
judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/__init__.py +0 -3
judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py +0 -264
judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/prompts.py +0 -106
judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/__init__.py +0 -3
judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py +0 -254
judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/prompts.py +0 -142
judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__init__.py +0 -3
judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py +0 -245
judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py +0 -121
judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py +0 -3
judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py +0 -325
judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py +0 -268
judgeval/scorers/judgeval_scorers/local_implementations/hallucination/__init__.py +0 -3
judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py +0 -263
judgeval/scorers/judgeval_scorers/local_implementations/hallucination/prompts.py +0 -104
judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/__init__.py +0 -5
judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py +0 -134
judgeval/scorers/judgeval_scorers/local_implementations/summarization/__init__.py +0 -3
judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py +0 -247
judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py +0 -550
judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/__init__.py +0 -3
judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/tool_correctness_scorer.py +0 -157
judgeval/scorers/prompt_scorer.py +0 -439
judgeval-0.0.11.dist-info/METADATA +0 -36
judgeval-0.0.11.dist-info/RECORD +0 -84
{judgeval-0.0.11.dist-info → judgeval-0.22.2.dist-info}/WHEEL +0 -0
{judgeval-0.0.11.dist-info → judgeval-0.22.2.dist-info}/licenses/LICENSE.md +0 -0

judgeval/scorers/judgeval_scorers/__init__.py CHANGED Viewed

@@ -1,148 +0,0 @@
-from typing import Type, Optional, Any
-from functools import wraps
-# Import implementations
-from judgeval.scorers.judgeval_scorers.api_scorers import (
-    ToolCorrectnessScorer as APIToolCorrectnessScorer,
-    JSONCorrectnessScorer as APIJSONCorrectnessScorer,
-    SummarizationScorer as APISummarizationScorer,
-    HallucinationScorer as APIHallucinationScorer,
-    FaithfulnessScorer as APIFaithfulnessScorer,
-    ContextualRelevancyScorer as APIContextualRelevancyScorer,
-    ContextualPrecisionScorer as APIContextualPrecisionScorer,
-    ContextualRecallScorer as APIContextualRecallScorer,
-    AnswerRelevancyScorer as APIAnswerRelevancyScorer,
-    AnswerCorrectnessScorer as APIAnswerCorrectnessScorer,
-)
-from judgeval.scorers.judgeval_scorers.local_implementations import (
-    AnswerRelevancyScorer as LocalAnswerRelevancyScorer,
-    ContextualPrecisionScorer as LocalContextualPrecisionScorer,
-    ContextualRecallScorer as LocalContextualRecallScorer,
-    ContextualRelevancyScorer as LocalContextualRelevancyScorer,
-    FaithfulnessScorer as LocalFaithfulnessScorer,
-    JsonCorrectnessScorer as LocalJsonCorrectnessScorer,
-    ToolCorrectnessScorer as LocalToolCorrectnessScorer,
-    HallucinationScorer as LocalHallucinationScorer,
-    SummarizationScorer as LocalSummarizationScorer,
-    AnswerCorrectnessScorer as LocalAnswerCorrectnessScorer
-)
-from judgeval.scorers.judgeval_scorers.classifiers import Text2SQLScorer
-class ScorerWrapper:
-    """
-    Wrapper class that can dynamically load either API or local implementation of a scorer.
-    """
-    def __init__(self, api_implementation: Type, local_implementation: Optional[Type] = None):
-        self.api_implementation = api_implementation
-        self.local_implementation = local_implementation
-        self._instance = None
-        self._init_args = None
-        self._init_kwargs = None
-    def __call__(self, *args, **kwargs):
-        """Store initialization arguments for later use when implementation is loaded"""
-        self._init_args = args
-        self._init_kwargs = kwargs
-        return self
-    def load_implementation(self, use_judgment: bool = True) -> Any:
-        """
-        Load the appropriate implementation based on the use_judgment flag.
-        Args:
-            use_judgment (bool): If True, use API implementation. If False, use local implementation.
-        Returns:
-            Instance of the appropriate implementation
-        Raises:
-            ValueError: If local implementation is requested but not available
-        """
-        if self._instance is not None:
-            return self._instance
-        if use_judgment:
-            implementation = self.api_implementation
-        else:
-            if self.local_implementation is None:
-                raise ValueError("No local implementation available for this scorer")
-            implementation = self.local_implementation
-        args = self._init_args or ()
-        kwargs = self._init_kwargs or {}
-        self._instance = implementation(*args, **kwargs)
-        return self._instance
-    def __getattr__(self, name):
-        """Defer all attribute access to the loaded implementation"""
-        if self._instance is None:
-            raise RuntimeError("Implementation not loaded. Call load_implementation() first")
-        return getattr(self._instance, name)
-# Create wrapped versions of all scorers
-AnswerCorrectnessScorer = ScorerWrapper(
-    api_implementation=APIAnswerCorrectnessScorer,
-    local_implementation=LocalAnswerCorrectnessScorer
-)
-AnswerRelevancyScorer = ScorerWrapper(
-    api_implementation=APIAnswerRelevancyScorer,
-    local_implementation=LocalAnswerRelevancyScorer
-)
-ToolCorrectnessScorer = ScorerWrapper(
-    api_implementation=APIToolCorrectnessScorer,
-    local_implementation=LocalToolCorrectnessScorer
-)
-JSONCorrectnessScorer = ScorerWrapper(
-    api_implementation=APIJSONCorrectnessScorer,
-    local_implementation=LocalJsonCorrectnessScorer
-)
-SummarizationScorer = ScorerWrapper(
-    api_implementation=APISummarizationScorer,
-    local_implementation=LocalSummarizationScorer
-)
-HallucinationScorer = ScorerWrapper(
-    api_implementation=APIHallucinationScorer,
-    local_implementation=LocalHallucinationScorer
-)
-FaithfulnessScorer = ScorerWrapper(
-    api_implementation=APIFaithfulnessScorer,
-    local_implementation=LocalFaithfulnessScorer
-)
-ContextualRelevancyScorer = ScorerWrapper(
-    api_implementation=APIContextualRelevancyScorer,
-    local_implementation=LocalContextualRelevancyScorer
-)
-ContextualPrecisionScorer = ScorerWrapper(
-    api_implementation=APIContextualPrecisionScorer,
-    local_implementation=LocalContextualPrecisionScorer
-)
-ContextualRecallScorer = ScorerWrapper(
-    api_implementation=APIContextualRecallScorer,
-    local_implementation=LocalContextualRecallScorer
-)
-__all__ = [
-    "ToolCorrectnessScorer",
-    "JSONCorrectnessScorer",
-    "SummarizationScorer",
-    "HallucinationScorer",
-    "FaithfulnessScorer",
-    "ContextualRelevancyScorer",
-    "ContextualPrecisionScorer",
-    "ContextualRecallScorer",
-    "AnswerRelevancyScorer",
-    "Text2SQLScorer",
-]

judgeval/scorers/judgeval_scorers/api_scorers/__init__.py CHANGED Viewed

@@ -1,23 +1,25 @@
-from judgeval.scorers.judgeval_scorers.api_scorers.tool_correctness import ToolCorrectnessScorer
-from judgeval.scorers.judgeval_scorers.api_scorers.json_correctness import JSONCorrectnessScorer
-from judgeval.scorers.judgeval_scorers.api_scorers.summarization import SummarizationScorer
-from judgeval.scorers.judgeval_scorers.api_scorers.hallucination import HallucinationScorer
-from judgeval.scorers.judgeval_scorers.api_scorers.faithfulness import FaithfulnessScorer
-from judgeval.scorers.judgeval_scorers.api_scorers.contextual_relevancy import ContextualRelevancyScorer
-from judgeval.scorers.judgeval_scorers.api_scorers.contextual_precision import ContextualPrecisionScorer
-from judgeval.scorers.judgeval_scorers.api_scorers.contextual_recall import ContextualRecallScorer
-from judgeval.scorers.judgeval_scorers.api_scorers.answer_relevancy import AnswerRelevancyScorer
-from judgeval.scorers.judgeval_scorers.api_scorers.answer_correctness import AnswerCorrectnessScorer
+from judgeval.scorers.judgeval_scorers.api_scorers.faithfulness import (
+    FaithfulnessScorer,
+)
+from judgeval.scorers.judgeval_scorers.api_scorers.answer_relevancy import (
+    AnswerRelevancyScorer,
+)
+from judgeval.scorers.judgeval_scorers.api_scorers.answer_correctness import (
+    AnswerCorrectnessScorer,
+)
+from judgeval.scorers.judgeval_scorers.api_scorers.instruction_adherence import (
+    InstructionAdherenceScorer,
+)
+from judgeval.scorers.judgeval_scorers.api_scorers.prompt_scorer import (
+    TracePromptScorer,
+    PromptScorer,
+)
 __all__ = [
-    "ToolCorrectnessScorer",
-    "JSONCorrectnessScorer",
-    "SummarizationScorer",
-    "HallucinationScorer",
     "FaithfulnessScorer",
-    "ContextualRelevancyScorer",
-    "ContextualPrecisionScorer",
-    "ContextualRecallScorer",
     "AnswerRelevancyScorer",
     "AnswerCorrectnessScorer",
+    "InstructionAdherenceScorer",
+    "TracePromptScorer",
+    "PromptScorer",
 ]

judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py CHANGED Viewed

@@ -1,19 +1,13 @@
-"""
-`judgeval` answer relevancy scorer
-TODO add link to docs page for this scorer
-"""
-# Internal imports
-from judgeval.scorers.api_scorer import APIJudgmentScorer
-from judgeval.constants import APIScorer
-class AnswerCorrectnessScorer(APIJudgmentScorer):
-    def __init__(self, threshold: float):
-        super().__init__(threshold=threshold, score_type=APIScorer.ANSWER_CORRECTNESS)
-    @property
-    def __name__(self):
-        return "Answer Correctness"
+from judgeval.scorers.api_scorer import ExampleAPIScorerConfig
+from judgeval.constants import APIScorerType
+from judgeval.data import ExampleParams
+from typing import List
+class AnswerCorrectnessScorer(ExampleAPIScorerConfig):
+    score_type: APIScorerType = APIScorerType.ANSWER_CORRECTNESS
+    required_params: List[ExampleParams] = [
+        ExampleParams.INPUT,
+        ExampleParams.ACTUAL_OUTPUT,
+        ExampleParams.EXPECTED_OUTPUT,
+    ]

judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py CHANGED Viewed

@@ -1,19 +1,12 @@
-"""
-`judgeval` answer relevancy scorer
-TODO add link to docs page for this scorer
-"""
-# Internal imports
-from judgeval.scorers.api_scorer import APIJudgmentScorer
-from judgeval.constants import APIScorer
-class AnswerRelevancyScorer(APIJudgmentScorer):
-    def __init__(self, threshold: float):
-        super().__init__(threshold=threshold, score_type=APIScorer.ANSWER_RELEVANCY)
-    @property
-    def __name__(self):
-        return "Answer Relevancy"
+from judgeval.scorers.api_scorer import ExampleAPIScorerConfig
+from judgeval.constants import APIScorerType
+from judgeval.data import ExampleParams
+from typing import List
+class AnswerRelevancyScorer(ExampleAPIScorerConfig):
+    score_type: APIScorerType = APIScorerType.ANSWER_RELEVANCY
+    required_params: List[ExampleParams] = [
+        ExampleParams.INPUT,
+        ExampleParams.ACTUAL_OUTPUT,
+    ]

judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py CHANGED Viewed

@@ -1,19 +1,13 @@
-"""
-`judgeval` faithfulness scorer
-TODO add link to docs page for this scorer
-"""
-# Internal imports
-from judgeval.scorers.api_scorer import APIJudgmentScorer
-from judgeval.constants import APIScorer
-class FaithfulnessScorer(APIJudgmentScorer):
-    def __init__(self, threshold: float):
-        super().__init__(threshold=threshold, score_type=APIScorer.FAITHFULNESS)
-    @property
-    def __name__(self):
-        return "Faithfulness"
+from judgeval.scorers.api_scorer import ExampleAPIScorerConfig
+from judgeval.constants import APIScorerType
+from judgeval.data import ExampleParams
+from typing import List
+class FaithfulnessScorer(ExampleAPIScorerConfig):
+    score_type: APIScorerType = APIScorerType.FAITHFULNESS
+    required_params: List[ExampleParams] = [
+        ExampleParams.INPUT,
+        ExampleParams.ACTUAL_OUTPUT,
+        ExampleParams.RETRIEVAL_CONTEXT,
+    ]

judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py ADDED Viewed

@@ -0,0 +1,15 @@
+from judgeval.scorers.api_scorer import ExampleAPIScorerConfig
+from judgeval.constants import APIScorerType
+from judgeval.data import ExampleParams
+class InstructionAdherenceScorer(ExampleAPIScorerConfig):
+    def __init__(self, threshold: float):
+        super().__init__(
+            threshold=threshold,
+            score_type=APIScorerType.INSTRUCTION_ADHERENCE,
+            required_params=[
+                ExampleParams.INPUT,
+                ExampleParams.ACTUAL_OUTPUT,
+            ],
+        )

judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py ADDED Viewed

@@ -0,0 +1,327 @@
+from judgeval.scorers.api_scorer import (
+    APIScorerConfig,
+    ExampleAPIScorerConfig,
+    TraceAPIScorerConfig,
+)
+from judgeval.constants import APIScorerType
+from typing import Dict, Any, Optional
+from judgeval.api import JudgmentSyncClient
+from judgeval.exceptions import JudgmentAPIError
+import os
+from judgeval.logger import judgeval_logger
+from abc import ABC
+from judgeval.env import JUDGMENT_DEFAULT_GPT_MODEL
+from copy import copy
+from judgeval.utils.decorators.dont_throw import dont_throw
+def push_prompt_scorer(
+    name: str,
+    prompt: str,
+    threshold: float,
+    options: Optional[Dict[str, float]] = None,
+    model: str = JUDGMENT_DEFAULT_GPT_MODEL,
+    description: Optional[str] = None,
+    judgment_api_key: str = os.getenv("JUDGMENT_API_KEY") or "",
+    organization_id: str = os.getenv("JUDGMENT_ORG_ID") or "",
+    is_trace: bool = False,
+) -> str:
+    client = JudgmentSyncClient(judgment_api_key, organization_id)
+    try:
+        r = client.save_scorer(
+            payload={
+                "name": name,
+                "prompt": prompt,
+                "threshold": threshold,
+                "options": options,
+                "model": model,
+                "description": description,
+                "is_trace": is_trace,
+            }
+        )
+    except JudgmentAPIError as e:
+        raise JudgmentAPIError(
+            status_code=e.status_code,
+            detail=f"Failed to save prompt scorer: {e.detail}",
+            response=e.response,
+        )
+    return r["scorer_response"]["name"]
+def fetch_prompt_scorer(
+    name: str,
+    judgment_api_key: str = os.getenv("JUDGMENT_API_KEY") or "",
+    organization_id: str = os.getenv("JUDGMENT_ORG_ID") or "",
+):
+    client = JudgmentSyncClient(judgment_api_key, organization_id)
+    try:
+        fetched_scorers = client.fetch_scorers({"names": [name]})
+        if len(fetched_scorers["scorers"]) == 0:
+            judgeval_logger.error(f"Prompt scorer '{name}' not found")
+            raise JudgmentAPIError(
+                status_code=404,
+                detail=f"Prompt scorer '{name}' not found",
+                response=None,  # type: ignore
+            )
+        else:
+            scorer_config = fetched_scorers["scorers"][0]
+            scorer_config.pop("created_at")
+            scorer_config.pop("updated_at")
+            return scorer_config
+    except JudgmentAPIError as e:
+        raise JudgmentAPIError(
+            status_code=e.status_code,
+            detail=f"Failed to fetch prompt scorer '{name}': {e.detail}",
+            response=e.response,
+        )
+def scorer_exists(
+    name: str,
+    judgment_api_key: str = os.getenv("JUDGMENT_API_KEY") or "",
+    organization_id: str = os.getenv("JUDGMENT_ORG_ID") or "",
+):
+    client = JudgmentSyncClient(judgment_api_key, organization_id)
+    try:
+        return client.scorer_exists({"name": name})["exists"]
+    except JudgmentAPIError as e:
+        if e.status_code == 500:
+            raise JudgmentAPIError(
+                status_code=e.status_code,
+                detail=f"The server is temporarily unavailable. Please try your request again in a few moments. Error details: {e.detail}",
+                response=e.response,
+            )
+        raise JudgmentAPIError(
+            status_code=e.status_code,
+            detail=f"Failed to check if scorer exists: {e.detail}",
+            response=e.response,
+        )
+class BasePromptScorer(ABC, APIScorerConfig):
+    score_type: APIScorerType
+    prompt: str
+    options: Optional[Dict[str, float]] = None
+    description: Optional[str] = None
+    judgment_api_key: str = os.getenv("JUDGMENT_API_KEY") or ""
+    organization_id: str = os.getenv("JUDGMENT_ORG_ID") or ""
+    @classmethod
+    @dont_throw
+    def get(
+        cls,
+        name: str,
+        judgment_api_key: str = os.getenv("JUDGMENT_API_KEY") or "",
+        organization_id: str = os.getenv("JUDGMENT_ORG_ID") or "",
+    ):
+        scorer_config = fetch_prompt_scorer(name, judgment_api_key, organization_id)
+        if scorer_config["is_trace"] != issubclass(cls, TracePromptScorer):
+            raise JudgmentAPIError(
+                status_code=400,
+                detail=f"Scorer with name {name} is not a {cls.__name__}",
+                response=None,  # type: ignore
+            )
+        if issubclass(cls, TracePromptScorer):
+            score_type = APIScorerType.TRACE_PROMPT_SCORER
+        else:
+            score_type = APIScorerType.PROMPT_SCORER
+        return cls(
+            score_type=score_type,
+            name=name,
+            prompt=scorer_config["prompt"],
+            threshold=scorer_config["threshold"],
+            options=scorer_config.get("options"),
+            model=scorer_config.get("model"),
+            description=scorer_config.get("description"),
+            judgment_api_key=judgment_api_key,
+            organization_id=organization_id,
+        )
+    @classmethod
+    def create(
+        cls,
+        name: str,
+        prompt: str,
+        threshold: float = 0.5,
+        options: Optional[Dict[str, float]] = None,
+        model: str = JUDGMENT_DEFAULT_GPT_MODEL,
+        description: Optional[str] = None,
+        judgment_api_key: str = os.getenv("JUDGMENT_API_KEY") or "",
+        organization_id: str = os.getenv("JUDGMENT_ORG_ID") or "",
+    ):
+        if not scorer_exists(name, judgment_api_key, organization_id):
+            if issubclass(cls, TracePromptScorer):
+                is_trace = True
+                score_type = APIScorerType.TRACE_PROMPT_SCORER
+            else:
+                is_trace = False
+                score_type = APIScorerType.PROMPT_SCORER
+            push_prompt_scorer(
+                name,
+                prompt,
+                threshold,
+                options,
+                model,
+                description,
+                judgment_api_key,
+                organization_id,
+                is_trace,
+            )
+            judgeval_logger.info(f"Successfully created PromptScorer: {name}")
+            return cls(
+                score_type=score_type,
+                name=name,
+                prompt=prompt,
+                threshold=threshold,
+                options=options,
+                model=model,
+                description=description,
+                judgment_api_key=judgment_api_key,
+                organization_id=organization_id,
+            )
+        else:
+            raise JudgmentAPIError(
+                status_code=400,
+                detail=f"Scorer with name {name} already exists. Either use the existing scorer with the get() method or use a new name.",
+                response=None,  # type: ignore
+            )
+    # Setter functions. Each setter function pushes the scorer to the DB.
+    def set_threshold(self, threshold: float):
+        """
+        Updates the threshold of the scorer.
+        """
+        self.threshold = threshold
+        self.push_prompt_scorer()
+    def set_prompt(self, prompt: str):
+        """
+        Updates the prompt with the new prompt.
+        Sample prompt:
+        "Did the chatbot answer the user's question in a kind way?"
+        """
+        self.prompt = prompt
+        self.push_prompt_scorer()
+        judgeval_logger.info(f"Successfully updated prompt for {self.name}")
+    def set_model(self, model: str):
+        """
+        Updates the model of the scorer.
+        """
+        self.model = model
+        self.push_prompt_scorer()
+        judgeval_logger.info(f"Successfully updated model for {self.name}")
+    def set_options(self, options: Optional[Dict[str, float]]):
+        """
+        Updates the options of the scorer.
+        """
+        self.options = options
+        self.push_prompt_scorer()
+        judgeval_logger.info(f"Successfully updated options for {self.name}")
+    def set_description(self, description: Optional[str]):
+        """
+        Updates the description of the scorer.
+        """
+        self.description = description
+        self.push_prompt_scorer()
+        judgeval_logger.info(f"Successfully updated description for {self.name}")
+    def append_to_prompt(self, prompt_addition: str):
+        """
+        Appends a string to the prompt.
+        """
+        self.prompt += prompt_addition
+        self.push_prompt_scorer()
+        judgeval_logger.info(f"Successfully appended to prompt for {self.name}")
+    # Getters
+    def get_threshold(self) -> float:
+        """
+        Returns the threshold of the scorer.
+        """
+        return self.threshold
+    def get_prompt(self) -> str:
+        """
+        Returns the prompt of the scorer.
+        """
+        return self.prompt
+    def get_model(self) -> str:
+        """
+        Returns the model of the scorer.
+        """
+        return self.model
+    def get_options(self) -> Dict[str, float] | None:
+        """
+        Returns the options of the scorer.
+        """
+        return copy(self.options) if self.options is not None else None
+    def get_description(self) -> str | None:
+        """
+        Returns the description of the scorer.
+        """
+        return self.description
+    def get_name(self) -> str:
+        """
+        Returns the name of the scorer.
+        """
+        return self.name
+    def get_config(self) -> dict:
+        """
+        Returns a dictionary with all the fields in the scorer.
+        """
+        return {
+            "name": self.name,
+            "model": self.model,
+            "prompt": self.prompt,
+            "threshold": self.threshold,
+            "options": self.options,
+            "description": self.description,
+        }
+    def push_prompt_scorer(self):
+        """
+        Pushes the scorer to the DB.
+        """
+        push_prompt_scorer(
+            self.name,
+            self.prompt,
+            self.threshold,
+            self.options,
+            self.model,
+            self.description,
+            self.judgment_api_key,
+            self.organization_id,
+            isinstance(self, TracePromptScorer),
+        )
+    def __str__(self):
+        return f"PromptScorer(name={self.name}, model={self.model}, prompt={self.prompt}, threshold={self.threshold}, options={self.options}, description={self.description})"
+    def model_dump(self, *args, **kwargs) -> Dict[str, Any]:
+        base = super().model_dump(*args, **kwargs)
+        base_fields = set(APIScorerConfig.model_fields.keys())
+        all_fields = set(self.__class__.model_fields.keys())
+        extra_fields = all_fields - base_fields - {"kwargs"}
+        base["kwargs"] = {
+            k: getattr(self, k) for k in extra_fields if getattr(self, k) is not None
+        }
+        return base
+class PromptScorer(BasePromptScorer, ExampleAPIScorerConfig):
+    pass
+class TracePromptScorer(BasePromptScorer, TraceAPIScorerConfig):
+    pass

judgeval 0.0.11__py3-none-any.whl → 0.22.2__py3-none-any.whl

Potentially problematic release.

judgeval 0.0.11py3-none-any.whl → 0.22.2py3-none-any.whl