PyPI - judgeval - Versions diffs - 0.9.4__py3-none-any.whl → 0.10.0__py3-none-any.whl - Mend

judgeval 0.9.4py3-none-any.whl → 0.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

judgeval/__init__.py +2 -2
judgeval/api/__init__.py +28 -96
judgeval/api/api_types.py +49 -140
judgeval/constants.py +1 -5
judgeval/data/__init__.py +1 -3
judgeval/data/example.py +4 -2
judgeval/data/judgment_types.py +57 -165
judgeval/data/result.py +1 -2
judgeval/data/trace.py +14 -40
judgeval/dataset/__init__.py +15 -42
judgeval/evaluation/__init__.py +23 -34
judgeval/scorers/__init__.py +9 -7
judgeval/scorers/api_scorer.py +8 -0
judgeval/scorers/base_scorer.py +0 -1
judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -10
judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +2 -2
judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +2 -2
judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +2 -2
judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +2 -2
judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +43 -4
judgeval/tracer/__init__.py +13 -50
judgeval/tracer/local_eval_queue.py +2 -2
judgeval/tracer/processors/__init__.py +1 -1
judgeval/tracer/utils.py +1 -1
judgeval/trainer/trainer.py +4 -4
{judgeval-0.9.4.dist-info → judgeval-0.10.0.dist-info}/METADATA +1 -1
{judgeval-0.9.4.dist-info → judgeval-0.10.0.dist-info}/RECORD +30 -35
judgeval/data/trace_run.py +0 -39
judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -14
judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -20
judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -27
judgeval/scorers/trace_api_scorer.py +0 -5
{judgeval-0.9.4.dist-info → judgeval-0.10.0.dist-info}/WHEEL +0 -0
{judgeval-0.9.4.dist-info → judgeval-0.10.0.dist-info}/entry_points.txt +0 -0
{judgeval-0.9.4.dist-info → judgeval-0.10.0.dist-info}/licenses/LICENSE.md +0 -0

judgeval/scorers/base_scorer.py CHANGED Viewed

@@ -85,7 +85,6 @@ class BaseScorer(BaseModel):
         This method is used at eval time
         """
         self.model_client, self.using_native_model = create_judge(model)
-        self.model = self.model_client.get_model_name() or model
     def success_check(self) -> bool:
         """

judgeval/scorers/judgeval_scorers/api_scorers/__init__.py CHANGED Viewed

@@ -10,24 +10,16 @@ from judgeval.scorers.judgeval_scorers.api_scorers.answer_correctness import (
 from judgeval.scorers.judgeval_scorers.api_scorers.instruction_adherence import (
     InstructionAdherenceScorer,
 )
-from judgeval.scorers.judgeval_scorers.api_scorers.derailment_scorer import (
-    DerailmentScorer,
-)
-from judgeval.scorers.judgeval_scorers.api_scorers.tool_order import ToolOrderScorer
 from judgeval.scorers.judgeval_scorers.api_scorers.prompt_scorer import (
+    TracePromptScorer,
     PromptScorer,
 )
-from judgeval.scorers.judgeval_scorers.api_scorers.tool_dependency import (
-    ToolDependencyScorer,
-)
 __all__ = [
     "FaithfulnessScorer",
     "AnswerRelevancyScorer",
     "AnswerCorrectnessScorer",
     "InstructionAdherenceScorer",
-    "DerailmentScorer",
-    "ToolOrderScorer",
+    "TracePromptScorer",
     "PromptScorer",
-    "ToolDependencyScorer",
 ]

judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py CHANGED Viewed

@@ -6,13 +6,13 @@ TODO add link to docs page for this scorer
 """
 # Internal imports
-from judgeval.scorers.api_scorer import APIScorerConfig
+from judgeval.scorers.api_scorer import ExampleAPIScorerConfig
 from judgeval.constants import APIScorerType
 from judgeval.data import ExampleParams
 from typing import List
-class AnswerCorrectnessScorer(APIScorerConfig):
+class AnswerCorrectnessScorer(ExampleAPIScorerConfig):
     score_type: APIScorerType = APIScorerType.ANSWER_CORRECTNESS
     required_params: List[ExampleParams] = [
         ExampleParams.INPUT,

judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py CHANGED Viewed

@@ -1,10 +1,10 @@
-from judgeval.scorers.api_scorer import APIScorerConfig
+from judgeval.scorers.api_scorer import ExampleAPIScorerConfig
 from judgeval.constants import APIScorerType
 from judgeval.data import ExampleParams
 from typing import List
-class AnswerRelevancyScorer(APIScorerConfig):
+class AnswerRelevancyScorer(ExampleAPIScorerConfig):
     score_type: APIScorerType = APIScorerType.ANSWER_RELEVANCY
     required_params: List[ExampleParams] = [
         ExampleParams.INPUT,

judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py CHANGED Viewed

@@ -6,13 +6,13 @@ TODO add link to docs page for this scorer
 """
 # Internal imports
-from judgeval.scorers.api_scorer import APIScorerConfig
+from judgeval.scorers.api_scorer import ExampleAPIScorerConfig
 from judgeval.constants import APIScorerType
 from judgeval.data import ExampleParams
 from typing import List
-class FaithfulnessScorer(APIScorerConfig):
+class FaithfulnessScorer(ExampleAPIScorerConfig):
     score_type: APIScorerType = APIScorerType.FAITHFULNESS
     required_params: List[ExampleParams] = [
         ExampleParams.INPUT,

judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py CHANGED Viewed

@@ -6,12 +6,12 @@ TODO add link to docs page for this scorer
 """
 # Internal imports
-from judgeval.scorers.api_scorer import APIScorerConfig
+from judgeval.scorers.api_scorer import ExampleAPIScorerConfig
 from judgeval.constants import APIScorerType
 from judgeval.data import ExampleParams
-class InstructionAdherenceScorer(APIScorerConfig):
+class InstructionAdherenceScorer(ExampleAPIScorerConfig):
     def __init__(self, threshold: float):
         super().__init__(
             threshold=threshold,

judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py CHANGED Viewed

@@ -1,4 +1,8 @@
-from judgeval.scorers.api_scorer import APIScorerConfig
+from judgeval.scorers.api_scorer import (
+    APIScorerConfig,
+    ExampleAPIScorerConfig,
+    TraceAPIScorerConfig,
+)
 from judgeval.constants import APIScorerType
 from typing import Dict, Any, Optional
 from judgeval.api import JudgmentSyncClient
@@ -6,6 +10,7 @@ from judgeval.exceptions import JudgmentAPIError
 import os
 from copy import copy
 from judgeval.logger import judgeval_logger
+from abc import ABC
 def push_prompt_scorer(
@@ -15,6 +20,7 @@ def push_prompt_scorer(
     options: Optional[Dict[str, float]] = None,
     judgment_api_key: str = os.getenv("JUDGMENT_API_KEY") or "",
     organization_id: str = os.getenv("JUDGMENT_ORG_ID") or "",
+    is_trace: Optional[bool] = None,
 ) -> str:
     client = JudgmentSyncClient(judgment_api_key, organization_id)
     try:
@@ -24,6 +30,7 @@ def push_prompt_scorer(
                 "prompt": prompt,
                 "threshold": threshold,
                 "options": options,
+                "is_trace": is_trace,
             }
         )
     except JudgmentAPIError as e:
@@ -88,7 +95,7 @@ def scorer_exists(
         )
-class PromptScorer(APIScorerConfig):
+class BasePromptScorer(ABC, APIScorerConfig):
     """
     In the Judgment backend, this scorer is implemented as a PromptScorer that takes
     1. a system role that may involve the Example object
@@ -97,9 +104,9 @@ class PromptScorer(APIScorerConfig):
     and uses a judge to execute the evaluation from the system role and classify into one of the options
     """
+    score_type: APIScorerType
     prompt: str
     options: Optional[Dict[str, float]] = None
-    score_type: APIScorerType = APIScorerType.PROMPT_SCORER
     judgment_api_key: str = os.getenv("JUDGMENT_API_KEY") or ""
     organization_id: str = os.getenv("JUDGMENT_ORG_ID") or ""
@@ -111,7 +118,18 @@ class PromptScorer(APIScorerConfig):
         organization_id: str = os.getenv("JUDGMENT_ORG_ID") or "",
     ):
         scorer_config = fetch_prompt_scorer(name, judgment_api_key, organization_id)
+        if scorer_config["is_trace"] != issubclass(cls, TracePromptScorer):
+            raise JudgmentAPIError(
+                status_code=400,
+                detail=f"Scorer with name {name} is not a {cls.__name__}",
+                response=None,  # type: ignore
+            )
+        if issubclass(cls, TracePromptScorer):
+            score_type = APIScorerType.TRACE_PROMPT_SCORER
+        else:
+            score_type = APIScorerType.PROMPT_SCORER
         return cls(
+            score_type=score_type,
             name=name,
             prompt=scorer_config["prompt"],
             threshold=scorer_config["threshold"],
@@ -131,11 +149,24 @@ class PromptScorer(APIScorerConfig):
         organization_id: str = os.getenv("JUDGMENT_ORG_ID") or "",
     ):
         if not scorer_exists(name, judgment_api_key, organization_id):
+            if issubclass(cls, TracePromptScorer):
+                is_trace = True
+                score_type = APIScorerType.TRACE_PROMPT_SCORER
+            else:
+                is_trace = False
+                score_type = APIScorerType.PROMPT_SCORER
             push_prompt_scorer(
-                name, prompt, threshold, options, judgment_api_key, organization_id
+                name,
+                prompt,
+                threshold,
+                options,
+                judgment_api_key,
+                organization_id,
+                is_trace,
             )
             judgeval_logger.info(f"Successfully created PromptScorer: {name}")
             return cls(
+                score_type=score_type,
                 name=name,
                 prompt=prompt,
                 threshold=threshold,
@@ -251,3 +282,11 @@ class PromptScorer(APIScorerConfig):
             k: getattr(self, k) for k in extra_fields if getattr(self, k) is not None
         }
         return base
+class PromptScorer(BasePromptScorer, ExampleAPIScorerConfig):
+    pass
+class TracePromptScorer(BasePromptScorer, TraceAPIScorerConfig):
+    pass

judgeval/tracer/__init__.py CHANGED Viewed

@@ -43,8 +43,7 @@ from judgeval.env import (
     JUDGMENT_ORG_ID,
 )
 from judgeval.logger import judgeval_logger
-from judgeval.scorers.api_scorer import APIScorerConfig
-from judgeval.scorers.trace_api_scorer import TraceAPIScorerConfig
+from judgeval.scorers.api_scorer import ExampleAPIScorerConfig, TraceAPIScorerConfig
 from judgeval.scorers.base_scorer import BaseScorer
 from judgeval.tracer.constants import JUDGEVAL_TRACER_INSTRUMENTING_MODULE_NAME
 from judgeval.tracer.managers import (
@@ -485,11 +484,11 @@ class Tracer:
                         safe_serialize(format_inputs(f, args, kwargs)),
                     )
+                    self.judgment_processor.emit_partial()
                     if scorer_config:
                         self._set_pending_trace_eval(span, scorer_config, args, kwargs)
-                    self.judgment_processor.emit_partial()
                     result = f(*args, **kwargs)
                 except Exception as user_exc:
                     span.record_exception(user_exc)
@@ -537,13 +536,13 @@ class Tracer:
                         safe_serialize(format_inputs(f, args, kwargs)),
                     )
+                    self.judgment_processor.emit_partial()
                     if scorer_config:
                         self._set_pending_trace_eval(
                             main_span, scorer_config, args, kwargs
                         )
-                    self.judgment_processor.emit_partial()
                     generator = f(*args, **kwargs)
                     set_span_attribute(
                         main_span, AttributeKeys.JUDGMENT_OUTPUT, "<generator>"
@@ -587,11 +586,11 @@ class Tracer:
                         safe_serialize(format_inputs(f, args, kwargs)),
                     )
+                    self.judgment_processor.emit_partial()
                     if scorer_config:
                         self._set_pending_trace_eval(span, scorer_config, args, kwargs)
-                    self.judgment_processor.emit_partial()
                     result = await f(*args, **kwargs)
                 except Exception as user_exc:
                     span.record_exception(user_exc)
@@ -639,13 +638,13 @@ class Tracer:
                         safe_serialize(format_inputs(f, args, kwargs)),
                     )
+                    self.judgment_processor.emit_partial()
                     if scorer_config:
                         self._set_pending_trace_eval(
                             main_span, scorer_config, args, kwargs
                         )
-                    self.judgment_processor.emit_partial()
                     async_generator = f(*args, **kwargs)
                     set_span_attribute(
                         main_span, AttributeKeys.JUDGMENT_OUTPUT, "<async_generator>"
@@ -825,42 +824,6 @@ class Tracer:
             return sync_wrapper
-    @overload
-    def observe_tools(
-        self,
-        cls: Cls,
-        /,
-        *,
-        exclude_methods: List[str] = [],
-        include_private: bool = False,
-    ) -> Cls: ...
-    @overload
-    def observe_tools(
-        self,
-        cls: None = None,
-        /,
-        *,
-        exclude_methods: List[str] = [],
-        include_private: bool = False,
-    ) -> Callable[[Cls], Cls]: ...
-    def observe_tools(
-        self,
-        cls: Cls | None = None,
-        /,
-        *,
-        exclude_methods: List[str] = [],
-        include_private: bool = False,
-    ) -> Cls | Callable[[Cls], Cls]:
-        if cls is None:
-            return partial(
-                self.observe_tools,
-                exclude_methods=exclude_methods,
-                include_private=include_private,
-            )
-        return cls
     def wrap(self, client: ApiClient) -> ApiClient:
         return wrap_provider(self, client)
@@ -899,7 +862,7 @@ class Tracer:
         self,
         /,
         *,
-        scorer: Union[APIScorerConfig, BaseScorer],
+        scorer: Union[ExampleAPIScorerConfig, BaseScorer],
         example: Example,
         model: str = JUDGMENT_DEFAULT_GPT_MODEL,
         sampling_rate: float = 1.0,
@@ -908,9 +871,9 @@ class Tracer:
             judgeval_logger.info("Evaluation is not enabled, skipping evaluation")
             return
-        if not isinstance(scorer, (APIScorerConfig, BaseScorer)):
+        if not isinstance(scorer, (ExampleAPIScorerConfig, BaseScorer)):
             judgeval_logger.error(
-                "Scorer must be an instance of APIScorerConfig or BaseScorer, got %s, skipping evaluation."
+                "Scorer must be an instance of ExampleAPIScorerConfig or BaseScorer, got %s, skipping evaluation."
                 % type(scorer)
             )
             return
@@ -939,7 +902,7 @@ class Tracer:
         span_context = self.get_current_span().get_span_context()
         trace_id = format(span_context.trace_id, "032x")
         span_id = format(span_context.span_id, "016x")
-        hosted_scoring = isinstance(scorer, APIScorerConfig) or (
+        hosted_scoring = isinstance(scorer, ExampleAPIScorerConfig) or (
             isinstance(scorer, BaseScorer) and scorer.server_hosted
         )
         eval_run_name = f"async_evaluate_{span_id}"  # note this name doesnt matter because we don't save the experiment only the example and scorer_data

judgeval/tracer/local_eval_queue.py CHANGED Viewed

@@ -24,7 +24,7 @@ class LocalEvaluationQueue:
     """Lightweight in-memory queue for local evaluation runs.
     Only supports EvaluationRuns with local scorers (BaseScorer instances).
-    API scorers (APIScorerConfig) are not supported as they have their own queue.
+    API scorers (ExampleAPIScorerConfig) are not supported as they have their own queue.
     """
     def __init__(
@@ -54,7 +54,7 @@ class LocalEvaluationQueue:
         if not evaluation_run.custom_scorers:
             raise ValueError(
                 "LocalEvaluationQueue only supports runs with local scorers (BaseScorer). "
-                "Found only APIScorerConfig instances."
+                "Found only ExampleAPIScorerConfig instances."
             )
         return safe_run_async(

judgeval/tracer/processors/__init__.py CHANGED Viewed

@@ -97,7 +97,7 @@ class JudgmentSpanProcessor(BatchSpanProcessor):
             resource_attributes[ResourceKeys.JUDGMENT_PROJECT_ID] = self.project_id
         else:
             judgeval_logger.error(
-                f"Failed to resolve project {self.project_name}, please create it first at https://app.judgmentlabs.ai/projects. Skipping Judgment export."
+                f"Failed to resolve project {self.project_name}, please create it first at https://app.judgmentlabs.ai/org/{self.organization_id}/projects. Skipping Judgment export."
             )
         self.resource_attributes = resource_attributes

judgeval/tracer/utils.py CHANGED Viewed

@@ -2,7 +2,7 @@ from typing import Any
 from opentelemetry.trace import Span
 from pydantic import BaseModel
 from typing import Callable, Optional
-from judgeval.scorers.trace_api_scorer import TraceAPIScorerConfig
+from judgeval.scorers.api_scorer import TraceAPIScorerConfig
 from judgeval.env import JUDGMENT_DEFAULT_GPT_MODEL

judgeval/trainer/trainer.py CHANGED Viewed

@@ -10,7 +10,7 @@ from judgeval.tracer.exporters.store import SpanStore
 from judgeval.tracer.exporters import InMemorySpanExporter
 from judgeval.tracer.keys import AttributeKeys
 from judgeval import JudgmentClient
-from judgeval.scorers import BaseScorer, APIScorerConfig
+from judgeval.scorers import BaseScorer, ExampleAPIScorerConfig
 from judgeval.data import Example
 from .console import _spinner_progress, _print_progress, _print_progress_update
 from judgeval.exceptions import JudgmentRuntimeError
@@ -154,7 +154,7 @@ class JudgmentTrainer:
     async def generate_rollouts_and_rewards(
         self,
         agent_function: Callable[[Any], Any],
-        scorers: List[Union[APIScorerConfig, BaseScorer]],
+        scorers: List[Union[ExampleAPIScorerConfig, BaseScorer]],
         prompts: List[Any],
         num_prompts_per_step: Optional[int] = None,
         num_generations_per_prompt: Optional[int] = None,
@@ -264,7 +264,7 @@ class JudgmentTrainer:
     async def run_reinforcement_learning(
         self,
         agent_function: Callable[[Any], Any],
-        scorers: List[Union[APIScorerConfig, BaseScorer]],
+        scorers: List[Union[ExampleAPIScorerConfig, BaseScorer]],
         prompts: List[Any],
     ) -> ModelConfig:
         """
@@ -370,7 +370,7 @@ class JudgmentTrainer:
     async def train(
         self,
         agent_function: Callable[[Any], Any],
-        scorers: List[Union[APIScorerConfig, BaseScorer]],
+        scorers: List[Union[ExampleAPIScorerConfig, BaseScorer]],
         prompts: List[Any],
         rft_provider: Optional[str] = None,
     ) -> ModelConfig:

{judgeval-0.9.4.dist-info → judgeval-0.10.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: judgeval
-Version: 0.9.4
+Version: 0.10.0
 Summary: Judgeval Package
 Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
 Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues

{judgeval-0.9.4.dist-info → judgeval-0.10.0.dist-info}/RECORD RENAMED Viewed

@@ -1,69 +1,64 @@
-judgeval/__init__.py,sha256=1af4bHzNfPIajg0F19xg95sxT7_-vI2jbloic2XhX5M,4948
+judgeval/__init__.py,sha256=MqB1s0zp-Fr_KvKFjGKnRHUeulutmrlMcUyjNRRAU_4,4962
 judgeval/cli.py,sha256=R5IiIQmSVg21kQHX2kL3sOeXCxvvAMSqyva3Z9AoSXc,1560
-judgeval/constants.py,sha256=fqzSY7tDfseWy1trLjCSGC6WVOFEm_4hvA8IFpv7CUc,3683
+judgeval/constants.py,sha256=h7Cuf_2uvNzHZi8nqRFoMpvsQUZMS3mlNB3s2uduse8,3557
 judgeval/env.py,sha256=R0bj7XU29RIVVQjkVMa11ObhOYVMbaE_3LTvL3I9dWM,2212
 judgeval/exceptions.py,sha256=tTbfe4yoOtPXmn22UQz9-6a-5PT9uOko85xaRRwr0Sw,621
 judgeval/logger.py,sha256=ZWbp0QfT1CJnQIjV-Zle4n489nFCKEmD2-ukx--iiow,1553
 judgeval/version.py,sha256=kJtYsih3hTYZ_rY_Lt0RcFqvjAfF5Xo1uNq0jZWJ5pw,73
 judgeval/warnings.py,sha256=LbGte14ppiFjrkp-JJYueZ40NWFvMkWRvPXr6r-fUWw,73
-judgeval/api/__init__.py,sha256=DZ-dijtkzUsjY3CBdCh6TH_PHC5qlI_tAFCBgvAZNjU,14538
-judgeval/api/api_types.py,sha256=4xyqlmV9mEoTUIbii-bj7oS0fVwWrJ_UhYxpXvcBywA,9198
-judgeval/data/__init__.py,sha256=1QagDcSQtfnJ632t9Dnq8d7XjAqhmY4mInOWt8qH9tM,455
+judgeval/api/__init__.py,sha256=RWQDwzT93nXWih3WYMPl1OL2ga9uk0dUGYV7fEDzBso,12764
+judgeval/api/api_types.py,sha256=uyz8ePQI-ec88PVwhHN-KVmldAmNgRjOVmesVDKIBUw,6461
+judgeval/data/__init__.py,sha256=1tU0EN0ThIfQ1fad5I3dKxAfTcZ5U8cvTLcQ6qLVLU0,407
 judgeval/data/evaluation_run.py,sha256=G7ad4eDQTjketfcQRITk8bs8CIO8rm058H1G_qkLmhc,4729
-judgeval/data/example.py,sha256=aTZg0GWQmUEBHk1n9Asw8sz-8YBWKlFsMZYjwq1DfrI,917
-judgeval/data/judgment_types.py,sha256=b2pDeEOSl_zHJLDzqr0AGYbZ5zrooJMr5VmK-bDrN4o,17082
-judgeval/data/result.py,sha256=JQ6f0XzL9p0oPmx-_z2NKUcISO6pISsVZ5dT1jkBeZs,2120
+judgeval/data/example.py,sha256=eGJpF-lyUH734Cg90B7WtU9f8iKoS3VFGeV6R-GVCCc,1039
+judgeval/data/judgment_types.py,sha256=JkhNG6fRBFdryG8ogVZsMWtq3W3JmWh0AYIR8LdBAT4,11773
+judgeval/data/result.py,sha256=LA0OzwcVKwD5NkmtmFuA_EusmYRyE10mjDMXa2bgU1g,2067
 judgeval/data/scorer_data.py,sha256=g9PE0DNLikW0LgxGWhgpCiNVOX8PzqEaZKivifLOUDI,2997
 judgeval/data/tool.py,sha256=bj_WxFg22mypUUVR5KqQRxMDHWvKwiE1MMPjLnTCoDU,99
-judgeval/data/trace.py,sha256=HTeucJqNdFsQI3Ybb6mJ8NkmHkc1vIddzQ7BtQs25k8,1315
-judgeval/data/trace_run.py,sha256=VCQUdDlrHixyiqWW1RUiCtLgqMt-3oW1M1A7CCer2Ok,1635
+judgeval/data/trace.py,sha256=R9RF1kv1JHeOpjXLjErJcxV2RrNrJUSqWcWe73l3f9k,503
 judgeval/data/scripts/fix_default_factory.py,sha256=lvp2JwYZqz-XpD9LZNa3mANZVP-jJSZoNzolI6JWERM,591
 judgeval/data/scripts/openapi_transform.py,sha256=Sm04JClzyP1ga8KA3gkIdsae8Hlx-XU7-x0gHCQYOhg,3877
-judgeval/dataset/__init__.py,sha256=xlg4VgEvbReWOlk6MK4GqJubSOeo17VqoIyjdMwmIf8,6573
-judgeval/evaluation/__init__.py,sha256=O0sk3zP5jbHPtknT6DuB6ijkJ3-0I54mMf1UzDPKMF0,15409
+judgeval/dataset/__init__.py,sha256=S1iLL7ivDLIT3aTNO1ardHqhIRxXMuoW5PFLFIkt4uY,5731
+judgeval/evaluation/__init__.py,sha256=u-aDyLTRebPZigeBbJHpnZk3wQAS7jv_VgLXIi-jMGU,15075
 judgeval/integrations/langgraph/__init__.py,sha256=VvqCKOk65A2gLlr8uWrJVzpRF5OnIja5zwF4hGPEFsw,27540
 judgeval/judges/__init__.py,sha256=e7JnTc1TG_SwqydDHTXHIP0EBazQxt-ydMQG7ghSU5A,228
 judgeval/judges/base_judge.py,sha256=_dz0qWsKRxzXxpRY9l6mrxTRYPSF2FE4ZXkrzhZ4gbY,986
 judgeval/judges/litellm_judge.py,sha256=5vEF0IUo7HVWnOF2ww-DMke8Xkarnz32B_qbgKjc0-I,4182
 judgeval/judges/together_judge.py,sha256=GzwlXZJzle8hT-vWKmq39JyIeanJqJfHDOkrksUbzk0,4398
 judgeval/judges/utils.py,sha256=ITbYwvjU3o9-FIAReFvxh24yJrx9LV3l9BnSBgKUpxg,2068
-judgeval/scorers/__init__.py,sha256=a5f_QcC7P9DjoOu_DMmADlkIXebo0d3zEJDJ7mhN3tM,640
+judgeval/scorers/__init__.py,sha256=34PMPsfR2_3n7T96wpSfAZJWzWlU6v53S3mGX2PE87k,665
 judgeval/scorers/agent_scorer.py,sha256=V1NSwhGWgtXPsX-blKLkDLsPPbEiP-A4614X-95dtlQ,565
-judgeval/scorers/api_scorer.py,sha256=M7cwJ2YY2Mw0pCo1UH-29jwrNd2PdiBRdQtmWS5ijXA,2173
-judgeval/scorers/base_scorer.py,sha256=8uhkmj78R6-Stenl1eo6IVqKSBgkLpoqR0acGi-Fxik,2788
+judgeval/scorers/api_scorer.py,sha256=8TUJut9r74v-qMACiSKAUbDI1v3ZItPXrTz8s4_Lrgk,2287
+judgeval/scorers/base_scorer.py,sha256=naGiZYHnkn9HVwY-jpOY7O6cYPJJJe5dHbrRBSOikxw,2723
 judgeval/scorers/example_scorer.py,sha256=o_BGUztJXjnKnuOqIa9T4PXe0wPoWg63FyH518N1LxA,561
 judgeval/scorers/exceptions.py,sha256=ACDHK5-TWiF3NTk-wycaedpbrdobm-CvvC1JA_iP-Mk,179
 judgeval/scorers/score.py,sha256=xquM59SCtNeuAsrBsHFgBQk3CHp4-bms4oFs24xfcU0,7176
-judgeval/scorers/trace_api_scorer.py,sha256=B2Vp8Jj2I7N-G1weHMm1b_9gVbn0BMcOtestMFNtx08,112
 judgeval/scorers/utils.py,sha256=iSZONwK0HecxUPz-cMCyra_87DSCag1E8BdpF2a4_44,377
 judgeval/scorers/judgeval_scorers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=MFsxDPZoZibJlsz4RgtLehA3rVcEfS9o5cw0l8gI5IM,1046
-judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py,sha256=zJsU0VrUmRhY9qav48c6jTyDqUwI3JzhV9ajtlJCe0M,544
-judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py,sha256=UDfzTO9Fx0FA5o0wfD8kprrGA4eW-43Rn9Gc0BQtKgY,393
-judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py,sha256=mbBvirNcivu9dP6deM7FogDXrdwI9o8yqsO8IeKPSb4,309
-judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=ps51bTgQsD9xGYsk1v9bx0WxQMqywSllCE9_xlJkLd8,531
-judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py,sha256=aQzu-TiGqG74JDQ927evv5yGmnZw2AOolyHvlIhiUbI,683
-judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py,sha256=C-9Q7s9K7mcgFMcEL0I_7XQZMRqrL5MFRi9G6Dx8-v8,8505
-judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py,sha256=BhrLnIASZOTT9XJ6giYSoVfdR7NYsjRRTOTNioNtEiU,610
-judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py,sha256=bMu0WMJaXdMyDTN42sVLoWV-lrUHCEa8iDrCI_K7nlQ,808
-judgeval/tracer/__init__.py,sha256=0DM6ixBI75FuVG7UMG_k-KHJm1MyFbRyhAUPm2GYu9A,36057
+judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=wrq7y9I30GZbwDXIrSh81KRO_-j7i-1DjwX5Hc3PScI,728
+judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py,sha256=_qa1sOHUwJubBCfyx6lsE_4vZsUh65VoTZba1NSouis,558
+judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py,sha256=ciiFBQQC4UDsk9qou9OiKbAR31s82eRUY1ZTt1gdM-0,407
+judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=lIJ3GgOI9tfbrC7voZMvlxXdK3X1bhdj2zNxqdaGIkM,545
+judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py,sha256=bSwbpVNhpkpEeX3GtCJuyz5vFyY1gbyqYEfaBF2KTVY,697
+judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py,sha256=E2_TVO88iLSBAdcKYnfHYp4cUyffgG_p1th5aCpjCd8,9680
+judgeval/tracer/__init__.py,sha256=mQQaca8XJRYwSRn7a5x63dFQeA8xGjwfoZYikQCAAyI,35214
 judgeval/tracer/constants.py,sha256=ae8tivAW97awJQxdRB9OMqX50wOLX3zqChT_AGkPBu0,85
 judgeval/tracer/keys.py,sha256=qXPoZSkEhVF-YYfQ9-zeDMVdr4GtpPf2W7MPJaN2AQo,2889
-judgeval/tracer/local_eval_queue.py,sha256=Amt7xkdmVJH1l2itm-ogiIW5oDaLnACisGfsdZjazn0,7228
+judgeval/tracer/local_eval_queue.py,sha256=iv9on1G4woGlhYn1mZATEMkzCiz-qVn2cdzEINzQFYQ,7242
 judgeval/tracer/managers.py,sha256=h2ZHJ61_vf3cS-HlEUiodFzKDUuQWIhYC6n7pMVyM9c,6113
-judgeval/tracer/utils.py,sha256=jljfr-oiCy8agOh0apAoR04tR2XRAzFg51On_LPzue8,600
+judgeval/tracer/utils.py,sha256=3_8ZjjF4XgNyAu9LpThq5dVOcwdwI-E3vb-HRl_Px8c,594
 judgeval/tracer/exporters/__init__.py,sha256=lnZXfPGaQH844HAIuZCQqjqhnmZGA98kHY8Xp-Oi4Ws,1220
 judgeval/tracer/exporters/s3.py,sha256=N9gmw17cnR0VkfAQQkLsNj5BksgNRETThR5qYhWRjP4,4360
 judgeval/tracer/exporters/store.py,sha256=KQV3cyqteesByQjR-9VdPXT9OlUZ-6F08ogqj837_c0,1012
 judgeval/tracer/exporters/utils.py,sha256=JRcoSQuEHxMDJbXfyrUIfA2SHBVkZM82h4bTbYGxkNw,1154
 judgeval/tracer/llm/__init__.py,sha256=p9uwWPg9k-NcWjj9TbwQj55sHhBOqRYx2-Ld6YHaFUs,42625
 judgeval/tracer/llm/providers.py,sha256=QQLJlSNnDjXRAc2Wqw78o254COJUSXX39D7D_mx3NVA,2651
-judgeval/tracer/processors/__init__.py,sha256=fjk3zGxQGp6adnj1-QdSaiRJk-VhyzuKG5vCalvbucI,8645
+judgeval/tracer/processors/__init__.py,sha256=tXbQaXGMQeutgM_7d5Y2EFTeSjbVEBky685Dst_v3rg,8672
 judgeval/trainer/__init__.py,sha256=h_DDVV7HFF7HUPAJFpt2d9wjqgnmEVcHxqZyB1k7pPQ,257
 judgeval/trainer/config.py,sha256=8s0X8B334PJomorwONaUpb6K8cAMxRdYAeQdtx7HPHs,4258
 judgeval/trainer/console.py,sha256=PJ0rCnDwC7aoW-VsLDS96ZyMyagh-l9EOJKff1ATIpo,4342
 judgeval/trainer/trainable_model.py,sha256=vSDtHJJ-fLczC2gkaY9jG6TQvLgWqaVjElm1l8YlJcU,8959
-judgeval/trainer/trainer.py,sha256=_dlV0NSD4jfNgTb2GwghWGBmnoNsooQq85nvIWW5VR4,16550
+judgeval/trainer/trainer.py,sha256=YhepEm3M-5z1RB50cAEsLbZiOIE_fOWiX-thyvBj6v4,16578
 judgeval/utils/async_utils.py,sha256=lgCgi8gkLUcAEepruEkx-AGQgJnAJpKmBIhZx6Y0q2s,935
 judgeval/utils/decorators.py,sha256=rdqY1w0zNL6O6GU6Wdeo0-x5EgpFTEhU2vkgiWsRYdc,525
 judgeval/utils/file_utils.py,sha256=3LI1YCZwO5ogTgJreyOgRgDksey3natO2Td1PQqaPyY,3252
@@ -73,8 +68,8 @@ judgeval/utils/serialize.py,sha256=QXR-8Nj5rqOrI9zLx0oRLdk6DW6Bc7j8eyF4zQ7PLxA,6
 judgeval/utils/testing.py,sha256=kJOq4LlEXaNThfg9oSIRqSK7IH8AwLgbukjn5uxMY7A,3661
 judgeval/utils/url.py,sha256=Shf0v3XcbaWpL0m1eGJEEO_z4TsQCnDB2Rl25OTUmiI,195
 judgeval/utils/version_check.py,sha256=kcF6SvB6GbVKI0Gv9QRVm-kvBn9_z-c3jmPORsXO3h0,1015
-judgeval-0.9.4.dist-info/METADATA,sha256=Hr4y27-wt-658_DJd_D7oAUpDebQS5a9jfdQwGvfmbg,8869
-judgeval-0.9.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-judgeval-0.9.4.dist-info/entry_points.txt,sha256=-eoeD-oDLn4A7MSgeBS9Akwanf3_0r0cgEleBcIOjg0,46
-judgeval-0.9.4.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
-judgeval-0.9.4.dist-info/RECORD,,
+judgeval-0.10.0.dist-info/METADATA,sha256=vpsStrROABbjYIuuO8UqssmVjq70k4rLH2AvEz4jie8,8870
+judgeval-0.10.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+judgeval-0.10.0.dist-info/entry_points.txt,sha256=-eoeD-oDLn4A7MSgeBS9Akwanf3_0r0cgEleBcIOjg0,46
+judgeval-0.10.0.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
+judgeval-0.10.0.dist-info/RECORD,,

judgeval/data/trace_run.py DELETED Viewed

@@ -1,39 +0,0 @@
-from pydantic import BaseModel
-from typing import List, Optional, Dict, Any, Union
-from judgeval.data import Trace
-from judgeval.scorers import APIScorerConfig, BaseScorer
-from judgeval.env import JUDGMENT_DEFAULT_GPT_MODEL
-class TraceRun(BaseModel):
-    """
-    Stores example and evaluation scorers together for running an eval task
-    Args:
-        project_name (str): The name of the project the evaluation results belong to
-        eval_name (str): A name for this evaluation run
-        traces (List[Trace]): The traces to evaluate
-        scorers (List[Union[JudgmentScorer, BaseScorer]]): A list of scorers to use for evaluation
-        model (str): The model used as a judge when using LLM as a Judge
-        metadata (Optional[Dict[str, Any]]): Additional metadata to include for this evaluation run, e.g. comments, dataset name, purpose, etc.
-        rules (Optional[List[Rule]]): Rules to evaluate against scoring results
-        append (Optional[bool]): Whether to append to existing evaluation results
-        tools (Optional[List[Dict[str, Any]]]): List of tools to use for evaluation
-    """
-    organization_id: Optional[str] = None
-    project_name: Optional[str] = None
-    eval_name: Optional[str] = None
-    traces: Optional[List[Trace]] = None
-    scorers: List[Union[APIScorerConfig, BaseScorer]]
-    model: Optional[str] = JUDGMENT_DEFAULT_GPT_MODEL
-    trace_span_id: Optional[str] = None
-    append: Optional[bool] = False
-    override: Optional[bool] = False
-    # TODO: ?
-    rules: Any = None
-    tools: Optional[List[Dict[str, Any]]] = None
-    class Config:
-        arbitrary_types_allowed = True

judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py DELETED Viewed

@@ -1,14 +0,0 @@
-"""
-`judgeval` answer relevancy scorer
-TODO add link to docs page for this scorer
-"""
-# Internal imports
-from judgeval.scorers.api_scorer import APIScorerConfig
-from judgeval.constants import APIScorerType
-class DerailmentScorer(APIScorerConfig):
-    score_type: APIScorerType = APIScorerType.DERAILMENT

judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py DELETED Viewed

@@ -1,20 +0,0 @@
-"""
-`judgeval` tool dependency scorer
-"""
-# Internal imports
-from judgeval.scorers.trace_api_scorer import TraceAPIScorerConfig
-from judgeval.constants import APIScorerType
-from typing import Optional, Dict
-class ToolDependencyScorer(TraceAPIScorerConfig):
-    kwargs: Optional[Dict] = None
-    def __init__(self, threshold: float = 1.0, enable_param_checking: bool = True):
-        super().__init__(threshold=threshold, score_type=APIScorerType.TOOL_DEPENDENCY)
-        self.kwargs = {"enable_param_checking": enable_param_checking}
-    @property
-    def __name__(self):
-        return "Tool Dependency"

judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py DELETED Viewed

@@ -1,27 +0,0 @@
-"""
-`judgeval` tool order scorer
-"""
-# Internal imports
-from judgeval.scorers.trace_api_scorer import TraceAPIScorerConfig
-from judgeval.constants import APIScorerType
-from typing import Dict, Any
-class ToolOrderScorer(TraceAPIScorerConfig):
-    score_type: APIScorerType = APIScorerType.TOOL_ORDER
-    threshold: float = 1.0
-    exact_match: bool = False
-    def model_dump(self, *args, **kwargs) -> Dict[str, Any]:
-        base = super().model_dump(*args, **kwargs)
-        base_fields = set(TraceAPIScorerConfig.model_fields.keys())
-        all_fields = set(self.__class__.model_fields.keys())
-        extra_fields = all_fields - base_fields - {"kwargs"}
-        base["kwargs"] = {
-            k: getattr(self, k) for k in extra_fields if getattr(self, k) is not None
-        }
-        return base

judgeval 0.9.4__py3-none-any.whl → 0.10.0__py3-none-any.whl

judgeval 0.9.4py3-none-any.whl → 0.10.0py3-none-any.whl