PyPI - judgeval - Versions diffs - 0.1.0__py3-none-any.whl → 0.23.0__py3-none-any.whl - Mend

judgeval 0.1.0py3-none-any.whl → 0.23.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (234) hide show

judgeval/__init__.py +173 -10
judgeval/api/__init__.py +523 -0
judgeval/api/api_types.py +413 -0
judgeval/cli.py +112 -0
judgeval/constants.py +7 -30
judgeval/data/__init__.py +1 -3
judgeval/data/evaluation_run.py +125 -0
judgeval/data/example.py +14 -40
judgeval/data/judgment_types.py +396 -146
judgeval/data/result.py +11 -18
judgeval/data/scorer_data.py +3 -26
judgeval/data/scripts/openapi_transform.py +5 -5
judgeval/data/trace.py +115 -194
judgeval/dataset/__init__.py +335 -0
judgeval/env.py +55 -0
judgeval/evaluation/__init__.py +346 -0
judgeval/exceptions.py +28 -0
judgeval/integrations/langgraph/__init__.py +13 -0
judgeval/integrations/openlit/__init__.py +51 -0
judgeval/judges/__init__.py +2 -2
judgeval/judges/litellm_judge.py +77 -16
judgeval/judges/together_judge.py +88 -17
judgeval/judges/utils.py +7 -20
judgeval/judgment_attribute_keys.py +55 -0
judgeval/{common/logger.py → logger.py} +24 -8
judgeval/prompt/__init__.py +330 -0
judgeval/scorers/__init__.py +11 -11
judgeval/scorers/agent_scorer.py +15 -19
judgeval/scorers/api_scorer.py +21 -23
judgeval/scorers/base_scorer.py +54 -36
judgeval/scorers/example_scorer.py +1 -3
judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -24
judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +2 -10
judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +2 -2
judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +2 -10
judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +2 -14
judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +171 -59
judgeval/scorers/score.py +64 -47
judgeval/scorers/utils.py +2 -107
judgeval/tracer/__init__.py +1111 -2
judgeval/tracer/constants.py +1 -0
judgeval/tracer/exporters/__init__.py +40 -0
judgeval/tracer/exporters/s3.py +119 -0
judgeval/tracer/exporters/store.py +59 -0
judgeval/tracer/exporters/utils.py +32 -0
judgeval/tracer/keys.py +63 -0
judgeval/tracer/llm/__init__.py +7 -0
judgeval/tracer/llm/config.py +78 -0
judgeval/tracer/llm/constants.py +9 -0
judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
judgeval/tracer/llm/llm_anthropic/config.py +6 -0
judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
judgeval/tracer/llm/llm_google/__init__.py +3 -0
judgeval/tracer/llm/llm_google/config.py +6 -0
judgeval/tracer/llm/llm_google/generate_content.py +127 -0
judgeval/tracer/llm/llm_google/wrapper.py +30 -0
judgeval/tracer/llm/llm_openai/__init__.py +3 -0
judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
judgeval/tracer/llm/llm_openai/config.py +6 -0
judgeval/tracer/llm/llm_openai/responses.py +506 -0
judgeval/tracer/llm/llm_openai/utils.py +42 -0
judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
judgeval/tracer/llm/llm_together/__init__.py +3 -0
judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
judgeval/tracer/llm/llm_together/config.py +6 -0
judgeval/tracer/llm/llm_together/wrapper.py +52 -0
judgeval/tracer/llm/providers.py +19 -0
judgeval/tracer/managers.py +167 -0
judgeval/tracer/processors/__init__.py +220 -0
judgeval/tracer/utils.py +19 -0
judgeval/trainer/__init__.py +14 -0
judgeval/trainer/base_trainer.py +122 -0
judgeval/trainer/config.py +123 -0
judgeval/trainer/console.py +144 -0
judgeval/trainer/fireworks_trainer.py +392 -0
judgeval/trainer/trainable_model.py +252 -0
judgeval/trainer/trainer.py +70 -0
judgeval/utils/async_utils.py +39 -0
judgeval/utils/decorators/__init__.py +0 -0
judgeval/utils/decorators/dont_throw.py +37 -0
judgeval/utils/decorators/use_once.py +13 -0
judgeval/utils/file_utils.py +74 -28
judgeval/utils/guards.py +36 -0
judgeval/utils/meta.py +27 -0
judgeval/utils/project.py +15 -0
judgeval/utils/serialize.py +253 -0
judgeval/utils/testing.py +70 -0
judgeval/utils/url.py +10 -0
judgeval/{version_check.py → utils/version_check.py} +5 -3
judgeval/utils/wrappers/README.md +3 -0
judgeval/utils/wrappers/__init__.py +15 -0
judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
judgeval/utils/wrappers/py.typed +0 -0
judgeval/utils/wrappers/utils.py +35 -0
judgeval/v1/__init__.py +88 -0
judgeval/v1/data/__init__.py +7 -0
judgeval/v1/data/example.py +44 -0
judgeval/v1/data/scorer_data.py +42 -0
judgeval/v1/data/scoring_result.py +44 -0
judgeval/v1/datasets/__init__.py +6 -0
judgeval/v1/datasets/dataset.py +214 -0
judgeval/v1/datasets/dataset_factory.py +94 -0
judgeval/v1/evaluation/__init__.py +6 -0
judgeval/v1/evaluation/evaluation.py +182 -0
judgeval/v1/evaluation/evaluation_factory.py +17 -0
judgeval/v1/instrumentation/__init__.py +6 -0
judgeval/v1/instrumentation/llm/__init__.py +7 -0
judgeval/v1/instrumentation/llm/config.py +78 -0
judgeval/v1/instrumentation/llm/constants.py +11 -0
judgeval/v1/instrumentation/llm/llm_anthropic/__init__.py +5 -0
judgeval/v1/instrumentation/llm/llm_anthropic/config.py +6 -0
judgeval/v1/instrumentation/llm/llm_anthropic/messages.py +414 -0
judgeval/v1/instrumentation/llm/llm_anthropic/messages_stream.py +307 -0
judgeval/v1/instrumentation/llm/llm_anthropic/wrapper.py +61 -0
judgeval/v1/instrumentation/llm/llm_google/__init__.py +5 -0
judgeval/v1/instrumentation/llm/llm_google/config.py +6 -0
judgeval/v1/instrumentation/llm/llm_google/generate_content.py +121 -0
judgeval/v1/instrumentation/llm/llm_google/wrapper.py +30 -0
judgeval/v1/instrumentation/llm/llm_openai/__init__.py +5 -0
judgeval/v1/instrumentation/llm/llm_openai/beta_chat_completions.py +212 -0
judgeval/v1/instrumentation/llm/llm_openai/chat_completions.py +477 -0
judgeval/v1/instrumentation/llm/llm_openai/config.py +6 -0
judgeval/v1/instrumentation/llm/llm_openai/responses.py +472 -0
judgeval/v1/instrumentation/llm/llm_openai/utils.py +41 -0
judgeval/v1/instrumentation/llm/llm_openai/wrapper.py +63 -0
judgeval/v1/instrumentation/llm/llm_together/__init__.py +5 -0
judgeval/v1/instrumentation/llm/llm_together/chat_completions.py +382 -0
judgeval/v1/instrumentation/llm/llm_together/config.py +6 -0
judgeval/v1/instrumentation/llm/llm_together/wrapper.py +57 -0
judgeval/v1/instrumentation/llm/providers.py +19 -0
judgeval/v1/integrations/claude_agent_sdk/__init__.py +119 -0
judgeval/v1/integrations/claude_agent_sdk/wrapper.py +564 -0
judgeval/v1/integrations/langgraph/__init__.py +13 -0
judgeval/v1/integrations/openlit/__init__.py +47 -0
judgeval/v1/internal/api/__init__.py +525 -0
judgeval/v1/internal/api/api_types.py +413 -0
judgeval/v1/prompts/__init__.py +6 -0
judgeval/v1/prompts/prompt.py +29 -0
judgeval/v1/prompts/prompt_factory.py +189 -0
judgeval/v1/py.typed +0 -0
judgeval/v1/scorers/__init__.py +6 -0
judgeval/v1/scorers/api_scorer.py +82 -0
judgeval/v1/scorers/base_scorer.py +17 -0
judgeval/v1/scorers/built_in/__init__.py +17 -0
judgeval/v1/scorers/built_in/answer_correctness.py +28 -0
judgeval/v1/scorers/built_in/answer_relevancy.py +28 -0
judgeval/v1/scorers/built_in/built_in_factory.py +26 -0
judgeval/v1/scorers/built_in/faithfulness.py +28 -0
judgeval/v1/scorers/built_in/instruction_adherence.py +28 -0
judgeval/v1/scorers/custom_scorer/__init__.py +6 -0
judgeval/v1/scorers/custom_scorer/custom_scorer.py +50 -0
judgeval/v1/scorers/custom_scorer/custom_scorer_factory.py +16 -0
judgeval/v1/scorers/prompt_scorer/__init__.py +6 -0
judgeval/v1/scorers/prompt_scorer/prompt_scorer.py +86 -0
judgeval/v1/scorers/prompt_scorer/prompt_scorer_factory.py +85 -0
judgeval/v1/scorers/scorers_factory.py +49 -0
judgeval/v1/tracer/__init__.py +7 -0
judgeval/v1/tracer/base_tracer.py +520 -0
judgeval/v1/tracer/exporters/__init__.py +14 -0
judgeval/v1/tracer/exporters/in_memory_span_exporter.py +25 -0
judgeval/v1/tracer/exporters/judgment_span_exporter.py +42 -0
judgeval/v1/tracer/exporters/noop_span_exporter.py +19 -0
judgeval/v1/tracer/exporters/span_store.py +50 -0
judgeval/v1/tracer/judgment_tracer_provider.py +70 -0
judgeval/v1/tracer/processors/__init__.py +6 -0
judgeval/v1/tracer/processors/_lifecycles/__init__.py +28 -0
judgeval/v1/tracer/processors/_lifecycles/agent_id_processor.py +53 -0
judgeval/v1/tracer/processors/_lifecycles/context_keys.py +11 -0
judgeval/v1/tracer/processors/_lifecycles/customer_id_processor.py +29 -0
judgeval/v1/tracer/processors/_lifecycles/registry.py +18 -0
judgeval/v1/tracer/processors/judgment_span_processor.py +165 -0
judgeval/v1/tracer/processors/noop_span_processor.py +42 -0
judgeval/v1/tracer/tracer.py +67 -0
judgeval/v1/tracer/tracer_factory.py +38 -0
judgeval/v1/trainers/__init__.py +5 -0
judgeval/v1/trainers/base_trainer.py +62 -0
judgeval/v1/trainers/config.py +123 -0
judgeval/v1/trainers/console.py +144 -0
judgeval/v1/trainers/fireworks_trainer.py +392 -0
judgeval/v1/trainers/trainable_model.py +252 -0
judgeval/v1/trainers/trainers_factory.py +37 -0
judgeval/v1/utils.py +18 -0
judgeval/version.py +5 -0
judgeval/warnings.py +4 -0
judgeval-0.23.0.dist-info/METADATA +266 -0
judgeval-0.23.0.dist-info/RECORD +201 -0
judgeval-0.23.0.dist-info/entry_points.txt +2 -0
judgeval/clients.py +0 -34
judgeval/common/__init__.py +0 -13
judgeval/common/api/__init__.py +0 -3
judgeval/common/api/api.py +0 -352
judgeval/common/api/constants.py +0 -165
judgeval/common/exceptions.py +0 -27
judgeval/common/storage/__init__.py +0 -6
judgeval/common/storage/s3_storage.py +0 -98
judgeval/common/tracer/__init__.py +0 -31
judgeval/common/tracer/constants.py +0 -22
judgeval/common/tracer/core.py +0 -1916
judgeval/common/tracer/otel_exporter.py +0 -108
judgeval/common/tracer/otel_span_processor.py +0 -234
judgeval/common/tracer/span_processor.py +0 -37
judgeval/common/tracer/span_transformer.py +0 -211
judgeval/common/tracer/trace_manager.py +0 -92
judgeval/common/utils.py +0 -940
judgeval/data/datasets/__init__.py +0 -4
judgeval/data/datasets/dataset.py +0 -341
judgeval/data/datasets/eval_dataset_client.py +0 -214
judgeval/data/tool.py +0 -5
judgeval/data/trace_run.py +0 -37
judgeval/evaluation_run.py +0 -75
judgeval/integrations/langgraph.py +0 -843
judgeval/judges/mixture_of_judges.py +0 -286
judgeval/judgment_client.py +0 -369
judgeval/rules.py +0 -521
judgeval/run_evaluation.py +0 -684
judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -14
judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -20
judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -27
judgeval/utils/alerts.py +0 -93
judgeval/utils/requests.py +0 -50
judgeval-0.1.0.dist-info/METADATA +0 -202
judgeval-0.1.0.dist-info/RECORD +0 -73
{judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/WHEEL +0 -0
{judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/licenses/LICENSE.md +0 -0

judgeval/v1/tracer/base_tracer.py ADDED Viewed

@@ -0,0 +1,520 @@
+from __future__ import annotations
+import datetime
+import functools
+import inspect
+import time
+from abc import ABC, abstractmethod
+from typing import Any, Callable, Dict, Optional, Tuple, TypeVar, overload
+from opentelemetry import trace
+from opentelemetry.sdk.trace.export import SpanExporter
+from opentelemetry.trace import Span, SpanContext, Status, StatusCode
+from judgeval.logger import judgeval_logger
+from judgeval.utils.decorators.dont_throw import dont_throw
+from judgeval.v1.data.example import Example
+from judgeval.v1.instrumentation import wrap_provider
+from judgeval.v1.instrumentation.llm.providers import ApiClient
+from judgeval.v1.internal.api import JudgmentSyncClient
+from judgeval.v1.utils import resolve_project_id
+from judgeval.v1.internal.api.api_types import (
+    ExampleEvaluationRun,
+    TraceEvaluationRun,
+)
+from judgeval.v1.scorers.base_scorer import BaseScorer
+from judgeval.judgment_attribute_keys import AttributeKeys
+from judgeval.v1.scorers.custom_scorer.custom_scorer import CustomScorer
+from judgeval.v1.tracer.exporters.judgment_span_exporter import JudgmentSpanExporter
+from judgeval.v1.tracer.processors.judgment_span_processor import JudgmentSpanProcessor
+from uuid import uuid4
+from opentelemetry.context import attach, detach, get_value, set_value
+from judgeval.v1.tracer.processors._lifecycles import (
+    AGENT_ID_KEY,
+    PARENT_AGENT_ID_KEY,
+    CUSTOMER_ID_KEY,
+    AGENT_CLASS_NAME_KEY,
+    AGENT_INSTANCE_NAME_KEY,
+)
+C = TypeVar("C", bound=Callable[..., Any])
+class BaseTracer(ABC):
+    __slots__ = (
+        "project_name",
+        "enable_evaluation",
+        "api_client",
+        "serializer",
+        "project_id",
+    )
+    TRACER_NAME = "judgeval"
+    def __init__(
+        self,
+        project_name: str,
+        enable_evaluation: bool,
+        api_client: JudgmentSyncClient,
+        serializer: Callable[[Any], str],
+    ):
+        self.project_name = project_name
+        self.enable_evaluation = enable_evaluation
+        self.api_client = api_client
+        self.serializer = serializer
+        self.project_id = resolve_project_id(api_client, project_name)
+        if self.project_id is None:
+            judgeval_logger.error(
+                f"Failed to resolve project {project_name}, "
+                f"please create it first at https://app.judgmentlabs.ai/org/{self.api_client.organization_id}/projects. "
+                "Skipping Judgment export."
+            )
+    @abstractmethod
+    def initialize(self) -> None:
+        pass
+    @abstractmethod
+    def force_flush(self, timeout_millis: int) -> bool:
+        pass
+    @abstractmethod
+    def shutdown(self, timeout_millis: int) -> None:
+        pass
+    def get_span_exporter(self) -> SpanExporter:
+        if self.project_id is not None:
+            return JudgmentSpanExporter(
+                endpoint=self._build_endpoint(self.api_client.base_url),
+                api_key=self.api_client.api_key,
+                organization_id=self.api_client.organization_id,
+                project_id=self.project_id,
+            )
+        else:
+            judgeval_logger.error(
+                "Project not resolved; cannot create exporter, returning NoOpSpanExporter"
+            )
+            from judgeval.v1.tracer.exporters.noop_span_exporter import NoOpSpanExporter
+            return NoOpSpanExporter()
+    def get_span_processor(self) -> JudgmentSpanProcessor:
+        if self.project_id is not None:
+            return JudgmentSpanProcessor(
+                self,
+                self.get_span_exporter(),
+            )
+        else:
+            judgeval_logger.error(
+                "Project not resolved; cannot create processor, returning NoOpSpanProcessor"
+            )
+            from judgeval.v1.tracer.processors.noop_span_processor import (
+                NoOpJudgmentSpanProcessor,
+            )
+            return NoOpJudgmentSpanProcessor()
+    def get_tracer(self) -> trace.Tracer:
+        return trace.get_tracer(self.TRACER_NAME)
+    def set_span_kind(self, kind: str) -> None:
+        if kind is None:
+            return
+        current_span = trace.get_current_span()
+        if current_span is not None:
+            current_span.set_attribute(AttributeKeys.JUDGMENT_SPAN_KIND, kind)
+    @dont_throw
+    def set_attribute(self, key: str, value: Any) -> None:
+        if not self._is_valid_key(key):
+            return
+        if value is None:
+            return
+        current_span = trace.get_current_span()
+        if current_span is not None:
+            serialized_value = (
+                self.serializer(value)
+                if not isinstance(value, (str, int, float, bool))
+                else value
+            )
+            current_span.set_attribute(key, serialized_value)
+    def set_attributes(self, attributes: Dict[str, Any]) -> None:
+        if attributes is None:
+            return
+        for key, value in attributes.items():
+            self.set_attribute(key, value)
+    def set_customer_id(self, customer_id: str) -> None:
+        ctx = set_value(CUSTOMER_ID_KEY, customer_id)
+        attach(ctx)
+    def set_llm_span(self) -> None:
+        self.set_span_kind("llm")
+    def set_tool_span(self) -> None:
+        self.set_span_kind("tool")
+    def set_general_span(self) -> None:
+        self.set_span_kind("span")
+    def set_input(self, input_data: Any) -> None:
+        self.set_attribute(AttributeKeys.JUDGMENT_INPUT, input_data)
+    def set_output(self, output_data: Any) -> None:
+        self.set_attribute(AttributeKeys.JUDGMENT_OUTPUT, output_data)
+    def span(self, span_name: str, callable_func: Callable[[], Any]) -> Any:
+        tracer = self.get_tracer()
+        with tracer.start_as_current_span(span_name) as span:
+            try:
+                return callable_func()
+            except Exception as e:
+                span.set_status(trace.Status(trace.StatusCode.ERROR))
+                span.record_exception(e)
+                raise
+    @staticmethod
+    def start_span(span_name: str) -> Span:
+        tracer = trace.get_tracer(BaseTracer.TRACER_NAME)
+        return tracer.start_span(span_name)
+    @dont_throw
+    def async_evaluate(
+        self,
+        scorer: BaseScorer,
+        example: Example,
+    ) -> None:
+        if not self.enable_evaluation:
+            return
+        span_context = self._get_sampled_span_context()
+        if span_context is None:
+            return
+        trace_id = span_context.trace_id
+        span_id = span_context.span_id
+        trace_id_hex = format(trace_id, "032x")
+        span_id_hex = format(span_id, "016x")
+        self._log_evaluation_info(
+            "asyncEvaluate", trace_id_hex, span_id_hex, scorer.get_name()
+        )
+        evaluation_run = self._create_evaluation_run(
+            scorer, example, trace_id_hex, span_id_hex
+        )
+        self._enqueue_evaluation(evaluation_run)
+    @dont_throw
+    def async_trace_evaluate(
+        self,
+        scorer: BaseScorer,
+    ) -> None:
+        if not self.enable_evaluation:
+            return
+        current_span = self._get_sampled_span()
+        if current_span is None:
+            return
+        span_context = current_span.get_span_context()
+        trace_id = span_context.trace_id
+        span_id = span_context.span_id
+        trace_id_hex = format(trace_id, "032x")
+        span_id_hex = format(span_id, "016x")
+        self._log_evaluation_info(
+            "asyncTraceEvaluate", trace_id_hex, span_id_hex, scorer.get_name()
+        )
+        evaluation_run = self._create_trace_evaluation_run(
+            scorer, trace_id_hex, span_id_hex
+        )
+        try:
+            trace_eval_json = self.serializer(evaluation_run)
+            current_span.set_attribute(
+                AttributeKeys.JUDGMENT_PENDING_TRACE_EVAL, trace_eval_json
+            )
+        except Exception as e:
+            judgeval_logger.error(f"Failed to serialize trace evaluation: {e}")
+    def _build_endpoint(self, base_url: str) -> str:
+        return (
+            base_url + "otel/v1/traces"
+            if base_url.endswith("/")
+            else base_url + "/otel/v1/traces"
+        )
+    def _generate_run_id(self, prefix: str, span_id: Optional[str]) -> str:
+        return prefix + (
+            span_id if span_id is not None else str(int(time.time() * 1000))
+        )
+    def _create_evaluation_run(
+        self,
+        scorer: BaseScorer,
+        example: Example,
+        trace_id: str,
+        span_id: str,
+    ) -> ExampleEvaluationRun:
+        run_id = self._generate_run_id("async_evaluate_", span_id)
+        judgment_scorers = (
+            [] if isinstance(scorer, CustomScorer) else [scorer.get_scorer_config()]
+        )
+        custom_scorers = [scorer.to_dict()] if isinstance(scorer, CustomScorer) else []
+        return ExampleEvaluationRun(
+            project_name=self.project_name,
+            eval_name=run_id,
+            trace_id=trace_id,
+            trace_span_id=span_id,
+            examples=[example.to_dict()],
+            judgment_scorers=judgment_scorers,
+            custom_scorers=custom_scorers,
+            created_at=datetime.datetime.now(datetime.timezone.utc).isoformat(),
+        )
+    def _create_trace_evaluation_run(
+        self,
+        scorer: BaseScorer,
+        trace_id: str,
+        span_id: str,
+    ) -> TraceEvaluationRun:
+        eval_name = self._generate_run_id("async_trace_evaluate_", span_id)
+        judgment_scorers = (
+            [] if isinstance(scorer, CustomScorer) else [scorer.get_scorer_config()]
+        )
+        custom_scorers = [scorer.to_dict()] if isinstance(scorer, CustomScorer) else []
+        return TraceEvaluationRun(
+            project_name=self.project_name,
+            eval_name=eval_name,
+            trace_and_span_ids=[[trace_id, span_id]],
+            judgment_scorers=judgment_scorers,
+            custom_scorers=custom_scorers,
+            is_offline=False,
+            is_bucket_run=False,
+            created_at=datetime.datetime.now(datetime.timezone.utc).isoformat(),
+        )
+    def _enqueue_evaluation(self, evaluation_run: ExampleEvaluationRun) -> None:
+        try:
+            self.api_client.add_to_run_eval_queue_examples(evaluation_run)
+        except Exception as e:
+            judgeval_logger.error(f"Failed to enqueue evaluation run: {e}")
+    def _get_sampled_span_context(self) -> Optional[SpanContext]:
+        current_span = trace.get_current_span()
+        if current_span is None:
+            return None
+        span_context = current_span.get_span_context()
+        if not span_context.is_valid or not span_context.trace_flags.sampled:
+            return None
+        return span_context
+    def _get_sampled_span(self) -> Optional[Span]:
+        current_span = trace.get_current_span()
+        if current_span is None:
+            return None
+        span_context = current_span.get_span_context()
+        if not span_context.is_valid or not span_context.trace_flags.sampled:
+            return None
+        return current_span
+    def _log_evaluation_info(
+        self, method: str, trace_id: str, span_id: str, scorer_name: str
+    ) -> None:
+        judgeval_logger.info(
+            f"{method}: project={self.project_name}, traceId={trace_id}, spanId={span_id}, scorer={scorer_name}"
+        )
+    @staticmethod
+    def _is_valid_key(key: str) -> bool:
+        return key is not None and len(key) > 0
+    @overload
+    def observe(
+        self,
+        func: C,
+        span_type: Optional[str] = "span",
+        span_name: Optional[str] = None,
+    ) -> C: ...
+    @overload
+    def observe(
+        self,
+        func: None = None,
+        span_type: Optional[str] = "span",
+        span_name: Optional[str] = None,
+    ) -> Callable[[C], C]: ...
+    def observe(
+        self,
+        func: Optional[C] = None,
+        span_type: Optional[str] = "span",
+        span_name: Optional[str] = None,
+    ) -> C | Callable[[C], C]:
+        if func is None:
+            return lambda f: self.observe(f, span_type, span_name)  # type: ignore[return-value]
+        tracer = self.get_tracer()
+        name = span_name or func.__name__
+        if inspect.iscoroutinefunction(func):
+            @functools.wraps(func)
+            async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
+                with tracer.start_as_current_span(name) as span:
+                    if span_type:
+                        span.set_attribute(AttributeKeys.JUDGMENT_SPAN_KIND, span_type)
+                    try:
+                        input_data = _format_inputs(func, args, kwargs)
+                        span.set_attribute(
+                            AttributeKeys.JUDGMENT_INPUT, self.serializer(input_data)
+                        )
+                        self.get_span_processor().emit_partial()
+                        result = await func(*args, **kwargs)
+                        span.set_attribute(
+                            AttributeKeys.JUDGMENT_OUTPUT, self.serializer(result)
+                        )
+                        return result
+                    except Exception as e:
+                        span.record_exception(e)
+                        span.set_status(Status(StatusCode.ERROR, str(e)))
+                        raise
+            return async_wrapper  # type: ignore[return-value]
+        else:
+            @functools.wraps(func)
+            def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
+                with tracer.start_as_current_span(name) as span:
+                    if span_type:
+                        span.set_attribute(AttributeKeys.JUDGMENT_SPAN_KIND, span_type)
+                    try:
+                        input_data = _format_inputs(func, args, kwargs)
+                        span.set_attribute(
+                            AttributeKeys.JUDGMENT_INPUT, self.serializer(input_data)
+                        )
+                        self.get_span_processor().emit_partial()
+                        result = func(*args, **kwargs)
+                        span.set_attribute(
+                            AttributeKeys.JUDGMENT_OUTPUT, self.serializer(result)
+                        )
+                        return result
+                    except Exception as e:
+                        span.record_exception(e)
+                        span.set_status(Status(StatusCode.ERROR, str(e)))
+                        raise
+            return sync_wrapper  # type: ignore[return-value]
+    @overload
+    def agent(self, func: C, /, *, identifier: Optional[str] = None) -> C: ...
+    @overload
+    def agent(
+        self, func: None = None, /, *, identifier: Optional[str] = None
+    ) -> Callable[[C], C]: ...
+    def agent(
+        self, func: Optional[C] = None, /, *, identifier: Optional[str] = None
+    ) -> C | Callable[[C], C]:
+        if func is None:
+            return lambda f: self.agent(f, identifier=identifier)  # type: ignore[return-value]
+        class_name = None
+        if hasattr(func, "__qualname__") and "." in func.__qualname__:
+            parts = func.__qualname__.split(".")
+            if len(parts) >= 2:
+                class_name = parts[-2]
+        if inspect.iscoroutinefunction(func):
+            @functools.wraps(func)
+            async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
+                agent_id = str(uuid4())
+                parent_agent_id = get_value(AGENT_ID_KEY)
+                ctx = set_value(AGENT_ID_KEY, agent_id)
+                if parent_agent_id:
+                    ctx = set_value(PARENT_AGENT_ID_KEY, parent_agent_id, context=ctx)
+                if class_name:
+                    ctx = set_value(AGENT_CLASS_NAME_KEY, class_name, context=ctx)
+                if identifier and args:
+                    instance = args[0]
+                    if hasattr(instance, identifier):
+                        instance_name = str(getattr(instance, identifier))
+                        ctx = set_value(
+                            AGENT_INSTANCE_NAME_KEY, instance_name, context=ctx
+                        )
+                token = attach(ctx)
+                try:
+                    return await func(*args, **kwargs)
+                finally:
+                    detach(token)
+            return async_wrapper  # type: ignore[return-value]
+        else:
+            @functools.wraps(func)
+            def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
+                agent_id = str(uuid4())
+                parent_agent_id = get_value(AGENT_ID_KEY)
+                ctx = set_value(AGENT_ID_KEY, agent_id)
+                if parent_agent_id:
+                    ctx = set_value(PARENT_AGENT_ID_KEY, parent_agent_id, context=ctx)
+                if class_name:
+                    ctx = set_value(AGENT_CLASS_NAME_KEY, class_name, context=ctx)
+                if identifier and args:
+                    instance = args[0]
+                    if hasattr(instance, identifier):
+                        instance_name = str(getattr(instance, identifier))
+                        ctx = set_value(
+                            AGENT_INSTANCE_NAME_KEY, instance_name, context=ctx
+                        )
+                token = attach(ctx)
+                try:
+                    return func(*args, **kwargs)
+                finally:
+                    detach(token)
+            return sync_wrapper  # type: ignore[return-value]
+    def wrap(self, client: ApiClient) -> ApiClient:
+        return wrap_provider(self, client)
+def _format_inputs(
+    f: Callable[..., Any], args: Tuple[Any, ...], kwargs: Dict[str, Any]
+) -> Dict[str, Any]:
+    try:
+        params = list(inspect.signature(f).parameters.values())
+        inputs: Dict[str, Any] = {}
+        arg_i = 0
+        for param in params:
+            if param.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD:
+                if arg_i < len(args):
+                    inputs[param.name] = args[arg_i]
+                    arg_i += 1
+                elif param.name in kwargs:
+                    inputs[param.name] = kwargs[param.name]
+            elif param.kind == inspect.Parameter.VAR_POSITIONAL:
+                inputs[param.name] = args[arg_i:]
+                arg_i = len(args)
+            elif param.kind == inspect.Parameter.VAR_KEYWORD:
+                inputs[param.name] = kwargs
+        return inputs
+    except Exception:
+        return {}

judgeval/v1/tracer/exporters/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+from __future__ import annotations
+from judgeval.v1.tracer.exporters.judgment_span_exporter import JudgmentSpanExporter
+from judgeval.v1.tracer.exporters.noop_span_exporter import NoOpSpanExporter
+from judgeval.v1.tracer.exporters.span_store import ABCSpanStore, SpanStore
+from judgeval.v1.tracer.exporters.in_memory_span_exporter import InMemorySpanExporter
+__all__ = [
+    "JudgmentSpanExporter",
+    "NoOpSpanExporter",
+    "ABCSpanStore",
+    "SpanStore",
+    "InMemorySpanExporter",
+]

judgeval/v1/tracer/exporters/in_memory_span_exporter.py ADDED Viewed

@@ -0,0 +1,25 @@
+from __future__ import annotations
+from typing import Sequence
+from opentelemetry.sdk.trace import ReadableSpan
+from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
+from judgeval.v1.tracer.exporters.span_store import ABCSpanStore
+class InMemorySpanExporter(SpanExporter):
+    __slots__ = ("_store",)
+    def __init__(self, store: ABCSpanStore) -> None:
+        self._store = store
+    def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
+        self._store.add(*spans)
+        return SpanExportResult.SUCCESS
+    def shutdown(self) -> None:
+        pass
+    def force_flush(self, timeout_millis: int = 30000) -> bool:
+        return True

judgeval/v1/tracer/exporters/judgment_span_exporter.py ADDED Viewed

@@ -0,0 +1,42 @@
+from __future__ import annotations
+from typing import Sequence
+from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
+from opentelemetry.sdk.trace import ReadableSpan
+from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
+from judgeval.logger import judgeval_logger
+class JudgmentSpanExporter(SpanExporter):
+    __slots__ = ("_delegate",)
+    def __init__(
+        self,
+        endpoint: str,
+        api_key: str,
+        organization_id: str,
+        project_id: str,
+    ):
+        if not project_id or len(project_id.strip()) == 0:
+            raise ValueError("project_id is required for JudgmentSpanExporter")
+        self._delegate = OTLPSpanExporter(
+            endpoint=endpoint,
+            headers={
+                "Authorization": f"Bearer {api_key}",
+                "X-Organization-Id": organization_id,
+                "X-Project-Id": project_id,
+            },
+        )
+    def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
+        judgeval_logger.info(f"Exported {len(spans)} spans")
+        return self._delegate.export(spans)
+    def shutdown(self) -> None:
+        self._delegate.shutdown()
+    def force_flush(self, timeout_millis: int = 30000) -> bool:
+        return self._delegate.force_flush(timeout_millis)

judgeval/v1/tracer/exporters/noop_span_exporter.py ADDED Viewed

@@ -0,0 +1,19 @@
+from __future__ import annotations
+from typing import Sequence
+from opentelemetry.sdk.trace import ReadableSpan
+from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
+class NoOpSpanExporter(SpanExporter):
+    __slots__ = ()
+    def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
+        return SpanExportResult.SUCCESS
+    def shutdown(self) -> None:
+        pass
+    def force_flush(self, timeout_millis: int = 30000) -> bool:
+        return True

judgeval/v1/tracer/exporters/span_store.py ADDED Viewed

@@ -0,0 +1,50 @@
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from typing import Dict, List
+from opentelemetry.sdk.trace import ReadableSpan
+class ABCSpanStore(ABC):
+    @abstractmethod
+    def add(self, *spans: ReadableSpan) -> None: ...
+    @abstractmethod
+    def get_all(self) -> List[ReadableSpan]: ...
+    @abstractmethod
+    def get_by_trace_id(self, trace_id: str) -> List[ReadableSpan]: ...
+    @abstractmethod
+    def clear_trace(self, trace_id: str) -> None: ...
+class SpanStore(ABCSpanStore):
+    __slots__ = ("_spans_by_trace",)
+    def __init__(self) -> None:
+        self._spans_by_trace: Dict[str, List[ReadableSpan]] = {}
+    def add(self, *spans: ReadableSpan) -> None:
+        for span in spans:
+            context = span.get_span_context()
+            if context is None:
+                continue
+            trace_id = format(context.trace_id, "032x")
+            if trace_id not in self._spans_by_trace:
+                self._spans_by_trace[trace_id] = []
+            self._spans_by_trace[trace_id].append(span)
+    def get_all(self) -> List[ReadableSpan]:
+        all_spans = []
+        for spans in self._spans_by_trace.values():
+            all_spans.extend(spans)
+        return all_spans
+    def get_by_trace_id(self, trace_id: str) -> List[ReadableSpan]:
+        return self._spans_by_trace.get(trace_id, [])
+    def clear_trace(self, trace_id: str) -> None:
+        if trace_id in self._spans_by_trace:
+            del self._spans_by_trace[trace_id]

judgeval 0.1.0__py3-none-any.whl → 0.23.0__py3-none-any.whl

judgeval 0.1.0py3-none-any.whl → 0.23.0py3-none-any.whl