PyPI - judgeval - Versions diffs - 0.1.0__py3-none-any.whl → 0.23.0__py3-none-any.whl - Mend

judgeval 0.1.0py3-none-any.whl → 0.23.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (234) hide show

judgeval/__init__.py +173 -10
judgeval/api/__init__.py +523 -0
judgeval/api/api_types.py +413 -0
judgeval/cli.py +112 -0
judgeval/constants.py +7 -30
judgeval/data/__init__.py +1 -3
judgeval/data/evaluation_run.py +125 -0
judgeval/data/example.py +14 -40
judgeval/data/judgment_types.py +396 -146
judgeval/data/result.py +11 -18
judgeval/data/scorer_data.py +3 -26
judgeval/data/scripts/openapi_transform.py +5 -5
judgeval/data/trace.py +115 -194
judgeval/dataset/__init__.py +335 -0
judgeval/env.py +55 -0
judgeval/evaluation/__init__.py +346 -0
judgeval/exceptions.py +28 -0
judgeval/integrations/langgraph/__init__.py +13 -0
judgeval/integrations/openlit/__init__.py +51 -0
judgeval/judges/__init__.py +2 -2
judgeval/judges/litellm_judge.py +77 -16
judgeval/judges/together_judge.py +88 -17
judgeval/judges/utils.py +7 -20
judgeval/judgment_attribute_keys.py +55 -0
judgeval/{common/logger.py → logger.py} +24 -8
judgeval/prompt/__init__.py +330 -0
judgeval/scorers/__init__.py +11 -11
judgeval/scorers/agent_scorer.py +15 -19
judgeval/scorers/api_scorer.py +21 -23
judgeval/scorers/base_scorer.py +54 -36
judgeval/scorers/example_scorer.py +1 -3
judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -24
judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +2 -10
judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +2 -2
judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +2 -10
judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +2 -14
judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +171 -59
judgeval/scorers/score.py +64 -47
judgeval/scorers/utils.py +2 -107
judgeval/tracer/__init__.py +1111 -2
judgeval/tracer/constants.py +1 -0
judgeval/tracer/exporters/__init__.py +40 -0
judgeval/tracer/exporters/s3.py +119 -0
judgeval/tracer/exporters/store.py +59 -0
judgeval/tracer/exporters/utils.py +32 -0
judgeval/tracer/keys.py +63 -0
judgeval/tracer/llm/__init__.py +7 -0
judgeval/tracer/llm/config.py +78 -0
judgeval/tracer/llm/constants.py +9 -0
judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
judgeval/tracer/llm/llm_anthropic/config.py +6 -0
judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
judgeval/tracer/llm/llm_google/__init__.py +3 -0
judgeval/tracer/llm/llm_google/config.py +6 -0
judgeval/tracer/llm/llm_google/generate_content.py +127 -0
judgeval/tracer/llm/llm_google/wrapper.py +30 -0
judgeval/tracer/llm/llm_openai/__init__.py +3 -0
judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
judgeval/tracer/llm/llm_openai/config.py +6 -0
judgeval/tracer/llm/llm_openai/responses.py +506 -0
judgeval/tracer/llm/llm_openai/utils.py +42 -0
judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
judgeval/tracer/llm/llm_together/__init__.py +3 -0
judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
judgeval/tracer/llm/llm_together/config.py +6 -0
judgeval/tracer/llm/llm_together/wrapper.py +52 -0
judgeval/tracer/llm/providers.py +19 -0
judgeval/tracer/managers.py +167 -0
judgeval/tracer/processors/__init__.py +220 -0
judgeval/tracer/utils.py +19 -0
judgeval/trainer/__init__.py +14 -0
judgeval/trainer/base_trainer.py +122 -0
judgeval/trainer/config.py +123 -0
judgeval/trainer/console.py +144 -0
judgeval/trainer/fireworks_trainer.py +392 -0
judgeval/trainer/trainable_model.py +252 -0
judgeval/trainer/trainer.py +70 -0
judgeval/utils/async_utils.py +39 -0
judgeval/utils/decorators/__init__.py +0 -0
judgeval/utils/decorators/dont_throw.py +37 -0
judgeval/utils/decorators/use_once.py +13 -0
judgeval/utils/file_utils.py +74 -28
judgeval/utils/guards.py +36 -0
judgeval/utils/meta.py +27 -0
judgeval/utils/project.py +15 -0
judgeval/utils/serialize.py +253 -0
judgeval/utils/testing.py +70 -0
judgeval/utils/url.py +10 -0
judgeval/{version_check.py → utils/version_check.py} +5 -3
judgeval/utils/wrappers/README.md +3 -0
judgeval/utils/wrappers/__init__.py +15 -0
judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
judgeval/utils/wrappers/py.typed +0 -0
judgeval/utils/wrappers/utils.py +35 -0
judgeval/v1/__init__.py +88 -0
judgeval/v1/data/__init__.py +7 -0
judgeval/v1/data/example.py +44 -0
judgeval/v1/data/scorer_data.py +42 -0
judgeval/v1/data/scoring_result.py +44 -0
judgeval/v1/datasets/__init__.py +6 -0
judgeval/v1/datasets/dataset.py +214 -0
judgeval/v1/datasets/dataset_factory.py +94 -0
judgeval/v1/evaluation/__init__.py +6 -0
judgeval/v1/evaluation/evaluation.py +182 -0
judgeval/v1/evaluation/evaluation_factory.py +17 -0
judgeval/v1/instrumentation/__init__.py +6 -0
judgeval/v1/instrumentation/llm/__init__.py +7 -0
judgeval/v1/instrumentation/llm/config.py +78 -0
judgeval/v1/instrumentation/llm/constants.py +11 -0
judgeval/v1/instrumentation/llm/llm_anthropic/__init__.py +5 -0
judgeval/v1/instrumentation/llm/llm_anthropic/config.py +6 -0
judgeval/v1/instrumentation/llm/llm_anthropic/messages.py +414 -0
judgeval/v1/instrumentation/llm/llm_anthropic/messages_stream.py +307 -0
judgeval/v1/instrumentation/llm/llm_anthropic/wrapper.py +61 -0
judgeval/v1/instrumentation/llm/llm_google/__init__.py +5 -0
judgeval/v1/instrumentation/llm/llm_google/config.py +6 -0
judgeval/v1/instrumentation/llm/llm_google/generate_content.py +121 -0
judgeval/v1/instrumentation/llm/llm_google/wrapper.py +30 -0
judgeval/v1/instrumentation/llm/llm_openai/__init__.py +5 -0
judgeval/v1/instrumentation/llm/llm_openai/beta_chat_completions.py +212 -0
judgeval/v1/instrumentation/llm/llm_openai/chat_completions.py +477 -0
judgeval/v1/instrumentation/llm/llm_openai/config.py +6 -0
judgeval/v1/instrumentation/llm/llm_openai/responses.py +472 -0
judgeval/v1/instrumentation/llm/llm_openai/utils.py +41 -0
judgeval/v1/instrumentation/llm/llm_openai/wrapper.py +63 -0
judgeval/v1/instrumentation/llm/llm_together/__init__.py +5 -0
judgeval/v1/instrumentation/llm/llm_together/chat_completions.py +382 -0
judgeval/v1/instrumentation/llm/llm_together/config.py +6 -0
judgeval/v1/instrumentation/llm/llm_together/wrapper.py +57 -0
judgeval/v1/instrumentation/llm/providers.py +19 -0
judgeval/v1/integrations/claude_agent_sdk/__init__.py +119 -0
judgeval/v1/integrations/claude_agent_sdk/wrapper.py +564 -0
judgeval/v1/integrations/langgraph/__init__.py +13 -0
judgeval/v1/integrations/openlit/__init__.py +47 -0
judgeval/v1/internal/api/__init__.py +525 -0
judgeval/v1/internal/api/api_types.py +413 -0
judgeval/v1/prompts/__init__.py +6 -0
judgeval/v1/prompts/prompt.py +29 -0
judgeval/v1/prompts/prompt_factory.py +189 -0
judgeval/v1/py.typed +0 -0
judgeval/v1/scorers/__init__.py +6 -0
judgeval/v1/scorers/api_scorer.py +82 -0
judgeval/v1/scorers/base_scorer.py +17 -0
judgeval/v1/scorers/built_in/__init__.py +17 -0
judgeval/v1/scorers/built_in/answer_correctness.py +28 -0
judgeval/v1/scorers/built_in/answer_relevancy.py +28 -0
judgeval/v1/scorers/built_in/built_in_factory.py +26 -0
judgeval/v1/scorers/built_in/faithfulness.py +28 -0
judgeval/v1/scorers/built_in/instruction_adherence.py +28 -0
judgeval/v1/scorers/custom_scorer/__init__.py +6 -0
judgeval/v1/scorers/custom_scorer/custom_scorer.py +50 -0
judgeval/v1/scorers/custom_scorer/custom_scorer_factory.py +16 -0
judgeval/v1/scorers/prompt_scorer/__init__.py +6 -0
judgeval/v1/scorers/prompt_scorer/prompt_scorer.py +86 -0
judgeval/v1/scorers/prompt_scorer/prompt_scorer_factory.py +85 -0
judgeval/v1/scorers/scorers_factory.py +49 -0
judgeval/v1/tracer/__init__.py +7 -0
judgeval/v1/tracer/base_tracer.py +520 -0
judgeval/v1/tracer/exporters/__init__.py +14 -0
judgeval/v1/tracer/exporters/in_memory_span_exporter.py +25 -0
judgeval/v1/tracer/exporters/judgment_span_exporter.py +42 -0
judgeval/v1/tracer/exporters/noop_span_exporter.py +19 -0
judgeval/v1/tracer/exporters/span_store.py +50 -0
judgeval/v1/tracer/judgment_tracer_provider.py +70 -0
judgeval/v1/tracer/processors/__init__.py +6 -0
judgeval/v1/tracer/processors/_lifecycles/__init__.py +28 -0
judgeval/v1/tracer/processors/_lifecycles/agent_id_processor.py +53 -0
judgeval/v1/tracer/processors/_lifecycles/context_keys.py +11 -0
judgeval/v1/tracer/processors/_lifecycles/customer_id_processor.py +29 -0
judgeval/v1/tracer/processors/_lifecycles/registry.py +18 -0
judgeval/v1/tracer/processors/judgment_span_processor.py +165 -0
judgeval/v1/tracer/processors/noop_span_processor.py +42 -0
judgeval/v1/tracer/tracer.py +67 -0
judgeval/v1/tracer/tracer_factory.py +38 -0
judgeval/v1/trainers/__init__.py +5 -0
judgeval/v1/trainers/base_trainer.py +62 -0
judgeval/v1/trainers/config.py +123 -0
judgeval/v1/trainers/console.py +144 -0
judgeval/v1/trainers/fireworks_trainer.py +392 -0
judgeval/v1/trainers/trainable_model.py +252 -0
judgeval/v1/trainers/trainers_factory.py +37 -0
judgeval/v1/utils.py +18 -0
judgeval/version.py +5 -0
judgeval/warnings.py +4 -0
judgeval-0.23.0.dist-info/METADATA +266 -0
judgeval-0.23.0.dist-info/RECORD +201 -0
judgeval-0.23.0.dist-info/entry_points.txt +2 -0
judgeval/clients.py +0 -34
judgeval/common/__init__.py +0 -13
judgeval/common/api/__init__.py +0 -3
judgeval/common/api/api.py +0 -352
judgeval/common/api/constants.py +0 -165
judgeval/common/exceptions.py +0 -27
judgeval/common/storage/__init__.py +0 -6
judgeval/common/storage/s3_storage.py +0 -98
judgeval/common/tracer/__init__.py +0 -31
judgeval/common/tracer/constants.py +0 -22
judgeval/common/tracer/core.py +0 -1916
judgeval/common/tracer/otel_exporter.py +0 -108
judgeval/common/tracer/otel_span_processor.py +0 -234
judgeval/common/tracer/span_processor.py +0 -37
judgeval/common/tracer/span_transformer.py +0 -211
judgeval/common/tracer/trace_manager.py +0 -92
judgeval/common/utils.py +0 -940
judgeval/data/datasets/__init__.py +0 -4
judgeval/data/datasets/dataset.py +0 -341
judgeval/data/datasets/eval_dataset_client.py +0 -214
judgeval/data/tool.py +0 -5
judgeval/data/trace_run.py +0 -37
judgeval/evaluation_run.py +0 -75
judgeval/integrations/langgraph.py +0 -843
judgeval/judges/mixture_of_judges.py +0 -286
judgeval/judgment_client.py +0 -369
judgeval/rules.py +0 -521
judgeval/run_evaluation.py +0 -684
judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -14
judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -20
judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -27
judgeval/utils/alerts.py +0 -93
judgeval/utils/requests.py +0 -50
judgeval-0.1.0.dist-info/METADATA +0 -202
judgeval-0.1.0.dist-info/RECORD +0 -73
{judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/WHEEL +0 -0
{judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/licenses/LICENSE.md +0 -0

judgeval/data/result.py CHANGED Viewed

@@ -1,10 +1,9 @@
 from typing import List, Union
 from judgeval.data import ScorerData, Example
-from judgeval.data.trace import TraceSpan
-from judgeval.data.judgment_types import ScoringResultJudgmentType
+from judgeval.data.judgment_types import ScoringResult as JudgmentScoringResult
-class ScoringResult(ScoringResultJudgmentType):
+class ScoringResult(JudgmentScoringResult):
     """
     A ScoringResult contains the output of one or more scorers applied to a single example.
     Ie: One input, one actual_output, one expected_output, etc..., and 1+ scorer (Faithfulness, Hallucination, Summarization, etc...)
@@ -17,15 +16,14 @@ class ScoringResult(ScoringResultJudgmentType):
     """
-    def to_dict(self) -> dict:
-        """Convert the ScoringResult instance to a dictionary, properly serializing scorer_data."""
-        return {
-            "success": self.success,
-            "scorers_data": [scorer_data.to_dict() for scorer_data in self.scorers_data]
-            if self.scorers_data
-            else None,
-            "data_object": self.data_object.to_dict() if self.data_object else None,
-        }
+    # Need to override this so that it uses this repo's Example class
+    data_object: Example
+    scorers_data: List[ScorerData]
+    def model_dump(self, **kwargs):
+        data = super().model_dump(**kwargs)
+        data["data_object"] = self.data_object.model_dump()
+        return data
     def __str__(self) -> str:
         return f"ScoringResult(\
@@ -36,7 +34,7 @@ class ScoringResult(ScoringResultJudgmentType):
 def generate_scoring_result(
-    data_object: Union[Example, TraceSpan],
+    data_object: Union[Example],
     scorers_data: List[ScorerData],
     run_duration: float,
     success: bool,
@@ -47,12 +45,7 @@ def generate_scoring_result(
     When an LLMTestCase is executed, it turns into an LLMApiTestCase and the progress of the evaluation run is tracked.
     At the end of the evaluation run, we create a TestResult object out of the completed LLMApiTestCase.
     """
-    if hasattr(data_object, "name") and data_object.name is not None:
-        name = data_object.name
-    else:
-        name = "Test Case Placeholder"
     scoring_result = ScoringResult(
-        name=name,
         data_object=data_object,
         success=success,
         scorers_data=scorers_data,

judgeval/data/scorer_data.py CHANGED Viewed

@@ -4,36 +4,13 @@ Implementation of the ScorerData class.
 ScorerData holds the information related to a single, completed Scorer evaluation run.
 """
-from judgeval.data.judgment_types import ScorerDataJudgmentType
+from __future__ import annotations
+from judgeval.data.judgment_types import ScorerData
 from judgeval.scorers import BaseScorer
 from typing import List
-class ScorerData(ScorerDataJudgmentType):
-    """
-    ScorerData holds the information related to a single, completed Scorer evaluation run.
-    For example, if running the Judgment Faithfulness scorer on an example, the ScorerData
-    object will contain whether the example passed its threshold expectation, as well as more detailed
-    information surrounding the evaluation run such as the claims and verdicts generated by the
-    judge model(s).
-    """
-    def to_dict(self) -> dict:
-        """Convert the ScorerData instance to a JSON-serializable dictionary."""
-        return {
-            "name": self.name,
-            "threshold": self.threshold,
-            "success": self.success,
-            "score": self.score,
-            "reason": self.reason,
-            "strict_mode": self.strict_mode,
-            "evaluation_model": self.evaluation_model,
-            "error": self.error,
-            "additional_metadata": self.additional_metadata,
-        }
 def create_scorer_data(scorer: BaseScorer) -> List[ScorerData]:
     """
     After a `scorer` is run, it contains information about the example that was evaluated

judgeval/data/scripts/openapi_transform.py CHANGED Viewed

@@ -1,7 +1,7 @@
-import json
+import orjson
 import sys
 from typing import Any, Dict, Generator, List
-from judgeval.utils.requests import requests
+import requests
 spec_file = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:8000/openapi.json"
@@ -10,8 +10,8 @@ if spec_file.startswith("http"):
     r.raise_for_status()
     SPEC = r.json()
 else:
-    with open(spec_file, "r") as f:
-        SPEC = json.load(f)
+    with open(spec_file, "rb") as f:
+        SPEC = orjson.loads(f.read())
 JUDGEVAL_PATHS: List[str] = [
     "/log_eval_results/",
@@ -120,4 +120,4 @@ spec = {
     },
 }
-print(json.dumps(spec, indent=4))
+print(orjson.dumps(spec, option=orjson.OPT_INDENT_2).decode("utf-8"))

judgeval/data/trace.py CHANGED Viewed

@@ -1,200 +1,121 @@
-from typing import Any
-import json
-import sys
-import threading
-from datetime import datetime, timezone
-from judgeval.data.judgment_types import (
-    TraceUsageJudgmentType,
-    TraceSpanJudgmentType,
-    TraceJudgmentType,
-)
-from judgeval.constants import SPAN_LIFECYCLE_END_UPDATE_ID
+from typing import Optional, List, Dict, Any
 from pydantic import BaseModel
+from .judgment_types import (
+    OtelSpanDetailScores,
+    OtelSpanDetail,
+    OtelTraceListItem,
+)
-class TraceUsage(TraceUsageJudgmentType):
-    pass
+class TraceUsage(BaseModel):
+    prompt_tokens: Optional[int] = None
+    completion_tokens: Optional[int] = None
+    cache_creation_input_tokens: Optional[int] = None
+    cache_read_input_tokens: Optional[int] = None
+    total_tokens: Optional[int] = None
+    prompt_tokens_cost_usd: Optional[float] = None
+    completion_tokens_cost_usd: Optional[float] = None
+    total_cost_usd: Optional[float] = None
+    model_name: Optional[str] = None
-class TraceSpan(TraceSpanJudgmentType):
-    def model_dump(self, **kwargs):
-        return {
-            "span_id": self.span_id,
-            "trace_id": self.trace_id,
-            "depth": self.depth,
-            "created_at": datetime.fromtimestamp(
-                self.created_at, tz=timezone.utc
-            ).isoformat(),
-            "inputs": self._serialize_value(self.inputs),
-            "output": self._serialize_value(self.output),
-            "error": self._serialize_value(self.error),
-            "parent_span_id": self.parent_span_id,
-            "function": self.function,
-            "duration": self.duration,
-            "span_type": self.span_type,
-            "usage": self.usage.model_dump() if self.usage else None,
-            "has_evaluation": self.has_evaluation,
-            "agent_name": self.agent_name,
-            "state_before": self.state_before,
-            "state_after": self.state_after,
-            "additional_metadata": self._serialize_value(self.additional_metadata),
-            "update_id": self.update_id,
-        }
-    def __init__(self, **data):
-        super().__init__(**data)
-        # Initialize thread lock for thread-safe update_id increment
-        self._update_id_lock = threading.Lock()
-    def increment_update_id(self) -> int:
-        """
-        Thread-safe method to increment the update_id counter.
-        Returns:
-            int: The new update_id value after incrementing
-        """
-        with self._update_id_lock:
-            self.update_id += 1
-            return self.update_id
-    def set_update_id_to_ending_number(
-        self, ending_number: int = SPAN_LIFECYCLE_END_UPDATE_ID
-    ) -> int:
-        """
-        Thread-safe method to set the update_id to a predetermined ending number.
-        Args:
-            ending_number (int): The number to set update_id to. Defaults to SPAN_LIFECYCLE_END_UPDATE_ID.
-        Returns:
-            int: The new update_id value after setting
-        """
-        with self._update_id_lock:
-            self.update_id = ending_number
-            return self.update_id
-    def print_span(self):
-        """Print the span with proper formatting and parent relationship information."""
-        indent = "  " * self.depth
-        parent_info = (
-            f" (parent_id: {self.parent_span_id})" if self.parent_span_id else ""
-        )
-        print(f"{indent}→ {self.function} (id: {self.span_id}){parent_info}")
-    def _is_json_serializable(self, obj: Any) -> bool:
-        """Helper method to check if an object is JSON serializable."""
-        try:
-            json.dumps(obj)
-            return True
-        except (TypeError, OverflowError, ValueError):
-            return False
-    def safe_stringify(self, output, function_name):
-        """
-        Safely converts an object to a JSON-serializable structure, handling common object types intelligently.
-        """
-        # Handle Pydantic models
-        if hasattr(output, "model_dump"):
-            try:
-                return output.model_dump()
-            except Exception:
-                pass
-        # Handle LangChain messages and similar objects with content/type
-        if hasattr(output, "content") and hasattr(output, "type"):
-            try:
-                result = {"type": output.type, "content": output.content}
-                # Add additional fields if they exist
-                if hasattr(output, "additional_kwargs"):
-                    result["additional_kwargs"] = output.additional_kwargs
-                if hasattr(output, "response_metadata"):
-                    result["response_metadata"] = output.response_metadata
-                if hasattr(output, "name"):
-                    result["name"] = output.name
-                return result
-            except Exception:
-                pass
-        if hasattr(output, "dict"):
-            try:
-                return output.dict()
-            except Exception:
-                pass
-        if hasattr(output, "to_dict"):
-            try:
-                return output.to_dict()
-            except Exception:
-                pass
-        if hasattr(output, "__dataclass_fields__"):
-            try:
-                import dataclasses
-                return dataclasses.asdict(output)
-            except Exception:
-                pass
-        if hasattr(output, "__dict__"):
-            try:
-                return output.__dict__
-            except Exception:
-                pass
-        try:
-            return str(output)
-        except (TypeError, OverflowError, ValueError):
-            pass
-        try:
-            return repr(output)
-        except (TypeError, OverflowError, ValueError):
-            pass
-        return None
-    def _serialize_value(self, value: Any) -> Any:
-        """Helper method to deep serialize a value safely supporting Pydantic Models / regular PyObjects."""
-        if value is None:
-            return None
-        recursion_limit = sys.getrecursionlimit()
-        recursion_limit = int(recursion_limit * 0.75)
-        def serialize_value(value, current_depth=0):
-            try:
-                if current_depth > recursion_limit:
-                    return {"error": "max_depth_reached: " + type(value).__name__}
-                if isinstance(value, BaseModel):
-                    return value.model_dump()
-                elif isinstance(value, dict):
-                    # Recursively serialize dictionary values
-                    return {
-                        k: serialize_value(v, current_depth + 1)
-                        for k, v in value.items()
-                    }
-                elif isinstance(value, (list, tuple)):
-                    # Recursively serialize list/tuple items
-                    return [serialize_value(item, current_depth + 1) for item in value]
-                else:
-                    # Try direct JSON serialization first
-                    try:
-                        json.dumps(value)
-                        return value
-                    except (TypeError, OverflowError, ValueError):
-                        # Fallback to safe stringification
-                        return self.safe_stringify(value, self.function)
-                    except Exception:
-                        return {"error": "Unable to serialize"}
-            except Exception:
-                return {"error": "Unable to serialize"}
-        # Start serialization with the top-level value
-        try:
-            return serialize_value(value, current_depth=0)
-        except Exception:
-            return {"error": "Unable to serialize"}
-class Trace(TraceJudgmentType):
+class TraceScore(OtelSpanDetailScores):
+    """Score information for a trace or span."""
     pass
+class TraceRule(BaseModel):
+    """Rule that was triggered for a trace."""
+    rule_id: str
+    rule_name: str
+class TraceSpan(OtelSpanDetail):
+    """Individual span within a trace with complete telemetry data."""
+    @classmethod
+    def from_otel_span_detail(cls, span_detail: OtelSpanDetail) -> "TraceSpan":
+        """Create TraceSpan from OtelSpanDetail, converting scores to TraceScore."""
+        data = span_detail.model_dump()
+        if "scores" in data and data["scores"]:
+            data["scores"] = [TraceScore(**score) for score in data["scores"]]
+        return cls(**data)
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert TraceSpan to dictionary."""
+        return self.model_dump(exclude_none=True)
+class Trace(OtelTraceListItem):
+    """Complete trace with metadata and all associated spans."""
+    spans: List[TraceSpan] = []
+    rules: Optional[List[TraceRule]] = []
+    @classmethod
+    def from_dataset_trace_with_spans(cls, dataset_trace: Any) -> "Trace":
+        """Create Trace from DatasetTraceWithSpans (handles both API and judgment types)."""
+        if hasattr(dataset_trace, "trace_detail"):
+            trace_detail = dataset_trace.trace_detail
+            spans_data = dataset_trace.spans
+        else:
+            trace_detail = dataset_trace.get("trace_detail", {})
+            spans_data = dataset_trace.get("spans", [])
+        if hasattr(trace_detail, "model_dump"):
+            trace_data = trace_detail.model_dump()
+        elif isinstance(trace_detail, dict):
+            trace_data = trace_detail.copy()
+        else:
+            trace_data = dict(trace_detail)
+        spans = []
+        for span in spans_data:
+            if hasattr(span, "model_dump"):
+                spans.append(TraceSpan.from_otel_span_detail(span))
+            else:
+                # Handle dict spans
+                span_data = dict(span) if not isinstance(span, dict) else span.copy()
+                if "scores" in span_data and span_data["scores"]:
+                    span_data["scores"] = [
+                        TraceScore(**score)
+                        if isinstance(score, dict)
+                        else TraceScore(**score.model_dump())
+                        for score in span_data["scores"]
+                    ]
+                spans.append(TraceSpan(**span_data))
+        rules = []
+        if "rule_id" in trace_data and trace_data["rule_id"]:
+            rules = [
+                TraceRule(
+                    rule_id=trace_data["rule_id"],
+                    rule_name=f"Rule {trace_data['rule_id']}",
+                )
+            ]
+        trace_data.pop("scores", [])
+        trace_data.pop("rule_id", None)
+        trace = cls(**trace_data)
+        trace.spans = spans
+        trace.rules = rules
+        return trace
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert Trace to dictionary."""
+        return self.model_dump(exclude_none=True)
+    def __len__(self) -> int:
+        """Return the number of spans in the trace."""
+        return len(self.spans)
+    def __iter__(self):
+        """Iterate over spans in the trace."""
+        return iter(self.spans)

judgeval 0.1.0__py3-none-any.whl → 0.23.0__py3-none-any.whl

judgeval 0.1.0py3-none-any.whl → 0.23.0py3-none-any.whl