PyPI - judgeval - Versions diffs - 0.9.3__py3-none-any.whl → 0.10.0__py3-none-any.whl - Mend

judgeval 0.9.3py3-none-any.whl → 0.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

judgeval/__init__.py +2 -2
judgeval/api/__init__.py +28 -96
judgeval/api/api_types.py +49 -140
judgeval/constants.py +1 -5
judgeval/data/__init__.py +1 -3
judgeval/data/example.py +4 -2
judgeval/data/judgment_types.py +57 -165
judgeval/data/result.py +1 -2
judgeval/data/trace.py +14 -40
judgeval/dataset/__init__.py +15 -42
judgeval/evaluation/__init__.py +23 -34
judgeval/scorers/__init__.py +9 -7
judgeval/scorers/api_scorer.py +8 -0
judgeval/scorers/base_scorer.py +0 -1
judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -10
judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +2 -2
judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +2 -2
judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +2 -2
judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +2 -2
judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +43 -4
judgeval/tracer/__init__.py +40 -93
judgeval/tracer/local_eval_queue.py +2 -2
judgeval/tracer/processors/__init__.py +84 -6
judgeval/tracer/utils.py +1 -1
judgeval/trainer/trainer.py +4 -4
judgeval/utils/serialize.py +7 -1
{judgeval-0.9.3.dist-info → judgeval-0.10.0.dist-info}/METADATA +2 -2
{judgeval-0.9.3.dist-info → judgeval-0.10.0.dist-info}/RECORD +31 -36
judgeval/data/trace_run.py +0 -39
judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -14
judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -20
judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -27
judgeval/scorers/trace_api_scorer.py +0 -5
{judgeval-0.9.3.dist-info → judgeval-0.10.0.dist-info}/WHEEL +0 -0
{judgeval-0.9.3.dist-info → judgeval-0.10.0.dist-info}/entry_points.txt +0 -0
{judgeval-0.9.3.dist-info → judgeval-0.10.0.dist-info}/licenses/LICENSE.md +0 -0

judgeval/scorers/base_scorer.py CHANGED Viewed

@@ -85,7 +85,6 @@ class BaseScorer(BaseModel):
         This method is used at eval time
         """
         self.model_client, self.using_native_model = create_judge(model)
-        self.model = self.model_client.get_model_name() or model
     def success_check(self) -> bool:
         """

judgeval/scorers/judgeval_scorers/api_scorers/__init__.py CHANGED Viewed

@@ -10,24 +10,16 @@ from judgeval.scorers.judgeval_scorers.api_scorers.answer_correctness import (
 from judgeval.scorers.judgeval_scorers.api_scorers.instruction_adherence import (
     InstructionAdherenceScorer,
 )
-from judgeval.scorers.judgeval_scorers.api_scorers.derailment_scorer import (
-    DerailmentScorer,
-)
-from judgeval.scorers.judgeval_scorers.api_scorers.tool_order import ToolOrderScorer
 from judgeval.scorers.judgeval_scorers.api_scorers.prompt_scorer import (
+    TracePromptScorer,
     PromptScorer,
 )
-from judgeval.scorers.judgeval_scorers.api_scorers.tool_dependency import (
-    ToolDependencyScorer,
-)
 __all__ = [
     "FaithfulnessScorer",
     "AnswerRelevancyScorer",
     "AnswerCorrectnessScorer",
     "InstructionAdherenceScorer",
-    "DerailmentScorer",
-    "ToolOrderScorer",
+    "TracePromptScorer",
     "PromptScorer",
-    "ToolDependencyScorer",
 ]

judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py CHANGED Viewed

@@ -6,13 +6,13 @@ TODO add link to docs page for this scorer
 """
 # Internal imports
-from judgeval.scorers.api_scorer import APIScorerConfig
+from judgeval.scorers.api_scorer import ExampleAPIScorerConfig
 from judgeval.constants import APIScorerType
 from judgeval.data import ExampleParams
 from typing import List
-class AnswerCorrectnessScorer(APIScorerConfig):
+class AnswerCorrectnessScorer(ExampleAPIScorerConfig):
     score_type: APIScorerType = APIScorerType.ANSWER_CORRECTNESS
     required_params: List[ExampleParams] = [
         ExampleParams.INPUT,

judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py CHANGED Viewed

@@ -1,10 +1,10 @@
-from judgeval.scorers.api_scorer import APIScorerConfig
+from judgeval.scorers.api_scorer import ExampleAPIScorerConfig
 from judgeval.constants import APIScorerType
 from judgeval.data import ExampleParams
 from typing import List
-class AnswerRelevancyScorer(APIScorerConfig):
+class AnswerRelevancyScorer(ExampleAPIScorerConfig):
     score_type: APIScorerType = APIScorerType.ANSWER_RELEVANCY
     required_params: List[ExampleParams] = [
         ExampleParams.INPUT,

judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py CHANGED Viewed

@@ -6,13 +6,13 @@ TODO add link to docs page for this scorer
 """
 # Internal imports
-from judgeval.scorers.api_scorer import APIScorerConfig
+from judgeval.scorers.api_scorer import ExampleAPIScorerConfig
 from judgeval.constants import APIScorerType
 from judgeval.data import ExampleParams
 from typing import List
-class FaithfulnessScorer(APIScorerConfig):
+class FaithfulnessScorer(ExampleAPIScorerConfig):
     score_type: APIScorerType = APIScorerType.FAITHFULNESS
     required_params: List[ExampleParams] = [
         ExampleParams.INPUT,

judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py CHANGED Viewed

@@ -6,12 +6,12 @@ TODO add link to docs page for this scorer
 """
 # Internal imports
-from judgeval.scorers.api_scorer import APIScorerConfig
+from judgeval.scorers.api_scorer import ExampleAPIScorerConfig
 from judgeval.constants import APIScorerType
 from judgeval.data import ExampleParams
-class InstructionAdherenceScorer(APIScorerConfig):
+class InstructionAdherenceScorer(ExampleAPIScorerConfig):
     def __init__(self, threshold: float):
         super().__init__(
             threshold=threshold,

judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py CHANGED Viewed

@@ -1,4 +1,8 @@
-from judgeval.scorers.api_scorer import APIScorerConfig
+from judgeval.scorers.api_scorer import (
+    APIScorerConfig,
+    ExampleAPIScorerConfig,
+    TraceAPIScorerConfig,
+)
 from judgeval.constants import APIScorerType
 from typing import Dict, Any, Optional
 from judgeval.api import JudgmentSyncClient
@@ -6,6 +10,7 @@ from judgeval.exceptions import JudgmentAPIError
 import os
 from copy import copy
 from judgeval.logger import judgeval_logger
+from abc import ABC
 def push_prompt_scorer(
@@ -15,6 +20,7 @@ def push_prompt_scorer(
     options: Optional[Dict[str, float]] = None,
     judgment_api_key: str = os.getenv("JUDGMENT_API_KEY") or "",
     organization_id: str = os.getenv("JUDGMENT_ORG_ID") or "",
+    is_trace: Optional[bool] = None,
 ) -> str:
     client = JudgmentSyncClient(judgment_api_key, organization_id)
     try:
@@ -24,6 +30,7 @@ def push_prompt_scorer(
                 "prompt": prompt,
                 "threshold": threshold,
                 "options": options,
+                "is_trace": is_trace,
             }
         )
     except JudgmentAPIError as e:
@@ -88,7 +95,7 @@ def scorer_exists(
         )
-class PromptScorer(APIScorerConfig):
+class BasePromptScorer(ABC, APIScorerConfig):
     """
     In the Judgment backend, this scorer is implemented as a PromptScorer that takes
     1. a system role that may involve the Example object
@@ -97,9 +104,9 @@ class PromptScorer(APIScorerConfig):
     and uses a judge to execute the evaluation from the system role and classify into one of the options
     """
+    score_type: APIScorerType
     prompt: str
     options: Optional[Dict[str, float]] = None
-    score_type: APIScorerType = APIScorerType.PROMPT_SCORER
     judgment_api_key: str = os.getenv("JUDGMENT_API_KEY") or ""
     organization_id: str = os.getenv("JUDGMENT_ORG_ID") or ""
@@ -111,7 +118,18 @@ class PromptScorer(APIScorerConfig):
         organization_id: str = os.getenv("JUDGMENT_ORG_ID") or "",
     ):
         scorer_config = fetch_prompt_scorer(name, judgment_api_key, organization_id)
+        if scorer_config["is_trace"] != issubclass(cls, TracePromptScorer):
+            raise JudgmentAPIError(
+                status_code=400,
+                detail=f"Scorer with name {name} is not a {cls.__name__}",
+                response=None,  # type: ignore
+            )
+        if issubclass(cls, TracePromptScorer):
+            score_type = APIScorerType.TRACE_PROMPT_SCORER
+        else:
+            score_type = APIScorerType.PROMPT_SCORER
         return cls(
+            score_type=score_type,
             name=name,
             prompt=scorer_config["prompt"],
             threshold=scorer_config["threshold"],
@@ -131,11 +149,24 @@ class PromptScorer(APIScorerConfig):
         organization_id: str = os.getenv("JUDGMENT_ORG_ID") or "",
     ):
         if not scorer_exists(name, judgment_api_key, organization_id):
+            if issubclass(cls, TracePromptScorer):
+                is_trace = True
+                score_type = APIScorerType.TRACE_PROMPT_SCORER
+            else:
+                is_trace = False
+                score_type = APIScorerType.PROMPT_SCORER
             push_prompt_scorer(
-                name, prompt, threshold, options, judgment_api_key, organization_id
+                name,
+                prompt,
+                threshold,
+                options,
+                judgment_api_key,
+                organization_id,
+                is_trace,
             )
             judgeval_logger.info(f"Successfully created PromptScorer: {name}")
             return cls(
+                score_type=score_type,
                 name=name,
                 prompt=prompt,
                 threshold=threshold,
@@ -251,3 +282,11 @@ class PromptScorer(APIScorerConfig):
             k: getattr(self, k) for k in extra_fields if getattr(self, k) is not None
         }
         return base
+class PromptScorer(BasePromptScorer, ExampleAPIScorerConfig):
+    pass
+class TracePromptScorer(BasePromptScorer, TraceAPIScorerConfig):
+    pass

judgeval/tracer/__init__.py CHANGED Viewed

@@ -43,8 +43,7 @@ from judgeval.env import (
     JUDGMENT_ORG_ID,
 )
 from judgeval.logger import judgeval_logger
-from judgeval.scorers.api_scorer import APIScorerConfig
-from judgeval.scorers.trace_api_scorer import TraceAPIScorerConfig
+from judgeval.scorers.api_scorer import ExampleAPIScorerConfig, TraceAPIScorerConfig
 from judgeval.scorers.base_scorer import BaseScorer
 from judgeval.tracer.constants import JUDGEVAL_TRACER_INSTRUMENTING_MODULE_NAME
 from judgeval.tracer.managers import (
@@ -57,7 +56,7 @@ from judgeval.utils.serialize import safe_serialize
 from judgeval.version import get_version
 from judgeval.warnings import JudgmentWarning
-from judgeval.tracer.keys import AttributeKeys, ResourceKeys, InternalAttributeKeys
+from judgeval.tracer.keys import AttributeKeys, InternalAttributeKeys
 from judgeval.api import JudgmentSyncClient
 from judgeval.tracer.llm import wrap_provider
 from judgeval.utils.url import url_for
@@ -65,6 +64,7 @@ from judgeval.tracer.local_eval_queue import LocalEvaluationQueue
 from judgeval.tracer.processors import (
     JudgmentSpanProcessor,
     NoOpJudgmentSpanProcessor,
+    NoOpSpanProcessor,
 )
 from judgeval.tracer.utils import set_span_attribute, TraceScorerConfig
@@ -85,19 +85,6 @@ class AgentContext(TypedDict):
     parent_agent_id: str | None
-def resolve_project_id(
-    api_key: str, organization_id: str, project_name: str
-) -> str | None:
-    try:
-        client = JudgmentSyncClient(
-            api_key=api_key,
-            organization_id=organization_id,
-        )
-        return client.projects_resolve({"project_name": project_name})["project_id"]
-    except Exception:
-        return None
 class Tracer:
     _active_tracers: List[Tracer] = []
@@ -188,38 +175,20 @@ class Tracer:
         self.cost_context = ContextVar("current_cost_context", default=None)
         if self.enable_monitoring:
-            project_id = resolve_project_id(
-                self.api_key, self.organization_id, self.project_name
-            )
-            resource_attributes = resource_attributes or {}
-            resource_attributes.update(
-                {
-                    ResourceKeys.SERVICE_NAME: self.project_name,
-                    ResourceKeys.TELEMETRY_SDK_NAME: "judgeval",
-                    ResourceKeys.TELEMETRY_SDK_VERSION: get_version(),
-                }
-            )
-            if project_id is not None:
-                resource_attributes[ResourceKeys.JUDGMENT_PROJECT_ID] = project_id
-            else:
-                judgeval_logger.error(
-                    f"Failed to resolve project {self.project_name}, please create it first at https://app.judgmentlabs.ai/projects. Skipping Judgment export."
-                )
-            resource = Resource.create(resource_attributes)
             self.judgment_processor = JudgmentSpanProcessor(
                 self,
-                self.api_url,
+                self.project_name,
                 self.api_key,
                 self.organization_id,
                 max_queue_size=2**18,
                 export_timeout_millis=30000,
+                resource_attributes=resource_attributes,
             )
-            self.processors.append(self.judgment_processor)
+            resource = Resource.create(self.judgment_processor.resource_attributes)
             self.provider = TracerProvider(resource=resource)
+            self.processors.append(self.judgment_processor)
             for processor in self.processors:
                 self.provider.add_span_processor(processor)
@@ -253,6 +222,14 @@ class Tracer:
     def get_current_cost_context(self):
         return self.cost_context
+    def get_processor(self):
+        """Get the judgment span processor instance.
+        Returns:
+            The JudgmentSpanProcessor or NoOpJudgmentSpanProcessor instance used by this tracer.
+        """
+        return self.judgment_processor
     def set_customer_id(self, customer_id: str) -> None:
         span = self.get_current_span()
         if span and span.is_recording():
@@ -507,11 +484,11 @@ class Tracer:
                         safe_serialize(format_inputs(f, args, kwargs)),
                     )
+                    self.judgment_processor.emit_partial()
                     if scorer_config:
                         self._set_pending_trace_eval(span, scorer_config, args, kwargs)
-                    self.judgment_processor.emit_partial()
                     result = f(*args, **kwargs)
                 except Exception as user_exc:
                     span.record_exception(user_exc)
@@ -559,13 +536,13 @@ class Tracer:
                         safe_serialize(format_inputs(f, args, kwargs)),
                     )
+                    self.judgment_processor.emit_partial()
                     if scorer_config:
                         self._set_pending_trace_eval(
                             main_span, scorer_config, args, kwargs
                         )
-                    self.judgment_processor.emit_partial()
                     generator = f(*args, **kwargs)
                     set_span_attribute(
                         main_span, AttributeKeys.JUDGMENT_OUTPUT, "<generator>"
@@ -609,11 +586,11 @@ class Tracer:
                         safe_serialize(format_inputs(f, args, kwargs)),
                     )
+                    self.judgment_processor.emit_partial()
                     if scorer_config:
                         self._set_pending_trace_eval(span, scorer_config, args, kwargs)
-                    self.judgment_processor.emit_partial()
                     result = await f(*args, **kwargs)
                 except Exception as user_exc:
                     span.record_exception(user_exc)
@@ -661,13 +638,13 @@ class Tracer:
                         safe_serialize(format_inputs(f, args, kwargs)),
                     )
+                    self.judgment_processor.emit_partial()
                     if scorer_config:
                         self._set_pending_trace_eval(
                             main_span, scorer_config, args, kwargs
                         )
-                    self.judgment_processor.emit_partial()
                     async_generator = f(*args, **kwargs)
                     set_span_attribute(
                         main_span, AttributeKeys.JUDGMENT_OUTPUT, "<async_generator>"
@@ -847,42 +824,6 @@ class Tracer:
             return sync_wrapper
-    @overload
-    def observe_tools(
-        self,
-        cls: Cls,
-        /,
-        *,
-        exclude_methods: List[str] = [],
-        include_private: bool = False,
-    ) -> Cls: ...
-    @overload
-    def observe_tools(
-        self,
-        cls: None = None,
-        /,
-        *,
-        exclude_methods: List[str] = [],
-        include_private: bool = False,
-    ) -> Callable[[Cls], Cls]: ...
-    def observe_tools(
-        self,
-        cls: Cls | None = None,
-        /,
-        *,
-        exclude_methods: List[str] = [],
-        include_private: bool = False,
-    ) -> Cls | Callable[[Cls], Cls]:
-        if cls is None:
-            return partial(
-                self.observe_tools,
-                exclude_methods=exclude_methods,
-                include_private=include_private,
-            )
-        return cls
     def wrap(self, client: ApiClient) -> ApiClient:
         return wrap_provider(self, client)
@@ -913,11 +854,7 @@ class Tracer:
         proper cleanup before program termination.
         """
         try:
-            success = self.force_flush(timeout_millis=30000)
-            if not success:
-                judgeval_logger.warning(
-                    "Some spans may not have been exported before program exit"
-                )
+            self.force_flush(timeout_millis=30000)
         except Exception as e:
             judgeval_logger.warning(f"Error during atexit flush: {e}")
@@ -925,7 +862,7 @@ class Tracer:
         self,
         /,
         *,
-        scorer: Union[APIScorerConfig, BaseScorer],
+        scorer: Union[ExampleAPIScorerConfig, BaseScorer],
         example: Example,
         model: str = JUDGMENT_DEFAULT_GPT_MODEL,
         sampling_rate: float = 1.0,
@@ -934,9 +871,9 @@ class Tracer:
             judgeval_logger.info("Evaluation is not enabled, skipping evaluation")
             return
-        if not isinstance(scorer, (APIScorerConfig, BaseScorer)):
+        if not isinstance(scorer, (ExampleAPIScorerConfig, BaseScorer)):
             judgeval_logger.error(
-                "Scorer must be an instance of APIScorerConfig or BaseScorer, got %s, skipping evaluation."
+                "Scorer must be an instance of ExampleAPIScorerConfig or BaseScorer, got %s, skipping evaluation."
                 % type(scorer)
             )
             return
@@ -965,7 +902,7 @@ class Tracer:
         span_context = self.get_current_span().get_span_context()
         trace_id = format(span_context.trace_id, "032x")
         span_id = format(span_context.span_id, "016x")
-        hosted_scoring = isinstance(scorer, APIScorerConfig) or (
+        hosted_scoring = isinstance(scorer, ExampleAPIScorerConfig) or (
             isinstance(scorer, BaseScorer) and scorer.server_hosted
         )
         eval_run_name = f"async_evaluate_{span_id}"  # note this name doesnt matter because we don't save the experiment only the example and scorer_data
@@ -1074,3 +1011,13 @@ def format_inputs(
         return inputs
     except Exception:
         return {}
+# Export processor classes for direct access
+__all__ = [
+    "Tracer",
+    "wrap",
+    "JudgmentSpanProcessor",
+    "NoOpJudgmentSpanProcessor",
+    "NoOpSpanProcessor",
+]

judgeval/tracer/local_eval_queue.py CHANGED Viewed

@@ -24,7 +24,7 @@ class LocalEvaluationQueue:
     """Lightweight in-memory queue for local evaluation runs.
     Only supports EvaluationRuns with local scorers (BaseScorer instances).
-    API scorers (APIScorerConfig) are not supported as they have their own queue.
+    API scorers (ExampleAPIScorerConfig) are not supported as they have their own queue.
     """
     def __init__(
@@ -54,7 +54,7 @@ class LocalEvaluationQueue:
         if not evaluation_run.custom_scorers:
             raise ValueError(
                 "LocalEvaluationQueue only supports runs with local scorers (BaseScorer). "
-                "Found only APIScorerConfig instances."
+                "Found only ExampleAPIScorerConfig instances."
             )
         return safe_run_async(

judgeval/tracer/processors/__init__.py CHANGED Viewed

@@ -6,8 +6,13 @@ from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor, SpanConte
 from opentelemetry.sdk.trace.export import (
     BatchSpanProcessor,
 )
+from opentelemetry.sdk.resources import Resource
 from judgeval.tracer.exporters import JudgmentSpanExporter
-from judgeval.tracer.keys import AttributeKeys, InternalAttributeKeys
+from judgeval.tracer.keys import AttributeKeys, InternalAttributeKeys, ResourceKeys
+from judgeval.api import JudgmentSyncClient
+from judgeval.logger import judgeval_logger
+from judgeval.utils.url import url_for
+from judgeval.version import get_version
 if TYPE_CHECKING:
     from judgeval.tracer import Tracer
@@ -31,15 +36,27 @@ class JudgmentSpanProcessor(BatchSpanProcessor):
     def __init__(
         self,
         tracer: Tracer,
-        endpoint: str,
+        project_name: str,
         api_key: str,
         organization_id: str,
         /,
         *,
         max_queue_size: int = 2**18,
         export_timeout_millis: int = 30000,
+        resource_attributes: Optional[dict[str, Any]] = None,
     ):
         self.tracer = tracer
+        self.project_name = project_name
+        self.api_key = api_key
+        self.organization_id = organization_id
+        # Resolve project_id
+        self.project_id = self._resolve_project_id()
+        # Set up resource attributes with project_id
+        self._setup_resource_attributes(resource_attributes or {})
+        endpoint = url_for("/otel/v1/traces")
         super().__init__(
             JudgmentSpanExporter(
                 endpoint=endpoint,
@@ -53,6 +70,38 @@ class JudgmentSpanProcessor(BatchSpanProcessor):
             defaultdict(dict)
         )
+    def _resolve_project_id(self) -> str | None:
+        """Resolve project_id from project_name using the API."""
+        try:
+            client = JudgmentSyncClient(
+                api_key=self.api_key,
+                organization_id=self.organization_id,
+            )
+            return client.projects_resolve({"project_name": self.project_name})[
+                "project_id"
+            ]
+        except Exception:
+            return None
+    def _setup_resource_attributes(self, resource_attributes: dict[str, Any]) -> None:
+        """Set up resource attributes including project_id."""
+        resource_attributes.update(
+            {
+                ResourceKeys.SERVICE_NAME: self.project_name,
+                ResourceKeys.TELEMETRY_SDK_NAME: "judgeval",
+                ResourceKeys.TELEMETRY_SDK_VERSION: get_version(),
+            }
+        )
+        if self.project_id is not None:
+            resource_attributes[ResourceKeys.JUDGMENT_PROJECT_ID] = self.project_id
+        else:
+            judgeval_logger.error(
+                f"Failed to resolve project {self.project_name}, please create it first at https://app.judgmentlabs.ai/org/{self.organization_id}/projects. Skipping Judgment export."
+            )
+        self.resource_attributes = resource_attributes
     def _get_span_key(self, span_context: SpanContext) -> tuple[int, int]:
         return (span_context.trace_id, span_context.span_id)
@@ -103,11 +152,18 @@ class JudgmentSpanProcessor(BatchSpanProcessor):
         attributes = dict(current_span.attributes or {})
         attributes[AttributeKeys.JUDGMENT_UPDATE_ID] = current_update_id
+        existing_resource_attrs = (
+            dict(current_span.resource.attributes) if current_span.resource else {}
+        )
+        merged_resource_attrs = {**existing_resource_attrs, **self.resource_attributes}
+        merged_resource = Resource.create(merged_resource_attrs)
         partial_span = ReadableSpan(
             name=current_span.name,
             context=span_context,
             parent=current_span.parent,
-            resource=current_span.resource,
+            resource=merged_resource,
             attributes=attributes,
             events=current_span.events,
             links=current_span.links,
@@ -137,11 +193,20 @@ class JudgmentSpanProcessor(BatchSpanProcessor):
             attributes = dict(span.attributes or {})
             attributes[AttributeKeys.JUDGMENT_UPDATE_ID] = 20
+            existing_resource_attrs = (
+                dict(span.resource.attributes) if span.resource else {}
+            )
+            merged_resource_attrs = {
+                **existing_resource_attrs,
+                **self.resource_attributes,
+            }
+            merged_resource = Resource.create(merged_resource_attrs)
             final_span = ReadableSpan(
                 name=span.name,
                 context=span.context,
                 parent=span.parent,
-                resource=span.resource,
+                resource=merged_resource,
                 attributes=attributes,
                 events=span.events,
                 links=span.links,
@@ -160,7 +225,7 @@ class JudgmentSpanProcessor(BatchSpanProcessor):
 class NoOpJudgmentSpanProcessor(JudgmentSpanProcessor):
     def __init__(self):
-        super().__init__(None, "", "", "")  # type: ignore[arg-type]
+        pass
     def on_start(self, span: Span, parent_context: Optional[Context] = None) -> None:
         pass
@@ -177,5 +242,18 @@ class NoOpJudgmentSpanProcessor(JudgmentSpanProcessor):
     def emit_partial(self) -> None:
         pass
+    def set_internal_attribute(
+        self, span_context: SpanContext, key: str, value: Any
+    ) -> None:
+        pass
+    def get_internal_attribute(
+        self, span_context: SpanContext, key: str, default: Any = None
+    ) -> Any:
+        return default
+    def increment_update_id(self, span_context: SpanContext) -> int:
+        return 0
-__all__ = ("NoOpSpanProcessor", "JudgmentSpanProcessor", "NoOpJudgmentSpanProcessor")
+__all__ = ["NoOpSpanProcessor", "JudgmentSpanProcessor", "NoOpJudgmentSpanProcessor"]

judgeval/tracer/utils.py CHANGED Viewed

@@ -2,7 +2,7 @@ from typing import Any
 from opentelemetry.trace import Span
 from pydantic import BaseModel
 from typing import Callable, Optional
-from judgeval.scorers.trace_api_scorer import TraceAPIScorerConfig
+from judgeval.scorers.api_scorer import TraceAPIScorerConfig
 from judgeval.env import JUDGMENT_DEFAULT_GPT_MODEL

judgeval/trainer/trainer.py CHANGED Viewed

@@ -10,7 +10,7 @@ from judgeval.tracer.exporters.store import SpanStore
 from judgeval.tracer.exporters import InMemorySpanExporter
 from judgeval.tracer.keys import AttributeKeys
 from judgeval import JudgmentClient
-from judgeval.scorers import BaseScorer, APIScorerConfig
+from judgeval.scorers import BaseScorer, ExampleAPIScorerConfig
 from judgeval.data import Example
 from .console import _spinner_progress, _print_progress, _print_progress_update
 from judgeval.exceptions import JudgmentRuntimeError
@@ -154,7 +154,7 @@ class JudgmentTrainer:
     async def generate_rollouts_and_rewards(
         self,
         agent_function: Callable[[Any], Any],
-        scorers: List[Union[APIScorerConfig, BaseScorer]],
+        scorers: List[Union[ExampleAPIScorerConfig, BaseScorer]],
         prompts: List[Any],
         num_prompts_per_step: Optional[int] = None,
         num_generations_per_prompt: Optional[int] = None,
@@ -264,7 +264,7 @@ class JudgmentTrainer:
     async def run_reinforcement_learning(
         self,
         agent_function: Callable[[Any], Any],
-        scorers: List[Union[APIScorerConfig, BaseScorer]],
+        scorers: List[Union[ExampleAPIScorerConfig, BaseScorer]],
         prompts: List[Any],
     ) -> ModelConfig:
         """
@@ -370,7 +370,7 @@ class JudgmentTrainer:
     async def train(
         self,
         agent_function: Callable[[Any], Any],
-        scorers: List[Union[APIScorerConfig, BaseScorer]],
+        scorers: List[Union[ExampleAPIScorerConfig, BaseScorer]],
         prompts: List[Any],
         rft_provider: Optional[str] = None,
     ) -> ModelConfig:

judgeval 0.9.3__py3-none-any.whl → 0.10.0__py3-none-any.whl

judgeval 0.9.3py3-none-any.whl → 0.10.0py3-none-any.whl