PyPI - judgeval - Versions diffs - 0.12.0__tar.gz → 0.13.1__tar.gz - Mend

judgeval 0.12.0tar.gz → 0.13.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (138) hide show

{judgeval-0.12.0 → judgeval-0.13.1}/.pre-commit-config.yaml RENAMED Viewed

@@ -1,11 +1,11 @@
 repos:
   - repo: https://github.com/astral-sh/uv-pre-commit
-    rev: 0.8.17
+    rev: 0.8.19
     hooks:
       - id: uv-lock
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.13.0
+    rev: v0.13.1
     hooks:
       - id: ruff
         name: ruff (linter)
@@ -14,7 +14,7 @@ repos:
         name: ruff (formatter)
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.17.0
+    rev: v1.18.2
     hooks:
       - id: mypy
         language: system

{judgeval-0.12.0 → judgeval-0.13.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: judgeval
-Version: 0.12.0
+Version: 0.13.1
 Summary: Judgeval Package
 Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
 Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -17,14 +17,8 @@ Requires-Dist: httpx>=0.28.1
 Requires-Dist: litellm<1.75.0
 Requires-Dist: opentelemetry-exporter-otlp>=1.36.0
 Requires-Dist: opentelemetry-sdk>=1.36.0
-Requires-Dist: opentelemetry-semantic-conventions>=0.57b0
 Requires-Dist: orjson>=3.9.0
 Requires-Dist: typer>=0.9.0
-Provides-Extra: langchain
-Requires-Dist: langchain-anthropic; extra == 'langchain'
-Requires-Dist: langchain-core; extra == 'langchain'
-Requires-Dist: langchain-huggingface; extra == 'langchain'
-Requires-Dist: langchain-openai; extra == 'langchain'
 Provides-Extra: s3
 Requires-Dist: boto3>=1.40.11; extra == 's3'
 Provides-Extra: trainer

judgeval-0.13.1/assets/brand/company.jpg ADDED Viewed

Binary file

judgeval-0.13.1/assets/brand/company_banner.jpg ADDED Viewed

Binary file

judgeval-0.13.1/assets/brand/darkmode.svg ADDED Viewed

@@ -0,0 +1,7 @@
+<svg width="544" height="91" viewBox="0 0 544 91" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M31.2246 18H39.5512V51.3061L31.2246 59.6327V18Z" fill="#FF4B2E"/>
+<path d="M0 59.6328H31.2245L21.8571 69.0002H0V59.6328Z" fill="#FF4B2E"/>
+<path d="M52.041 18H43.7145V51.3061L52.041 59.6327V18Z" fill="#FF4B2E"/>
+<path d="M83.2656 59.6328H52.0411L62.4493 69.0002H83.2656V59.6328Z" fill="#FF4B2E"/>
+<path d="M111.45 61.3V54.37H116.63V59.55L121.39 64.24H133.36L137.35 60.32V20H142.67V62L135.67 69H119.15L111.45 61.3ZM147.896 62.56V34.14H153.076V60.95L156.576 64.38H163.576L172.256 55.7V34.14H177.436V69H172.396V61.58L164.976 69H154.336L147.896 62.56ZM182.363 62.56V40.58L188.803 34.14H202.243L207.983 39.18V19.02H213.163V69H208.123V62.63L201.753 69H188.803L182.363 62.56ZM200.633 64.38L207.983 57.03V44.64L201.263 38.76H191.043L187.543 42.19V60.95L191.043 64.38H200.633ZM248.869 34.14V77.89L242.499 84.26H225.209L219.819 78.87V74.6H224.999V77.19L227.449 79.64H240.189L243.689 76.21V63.19L237.249 69H224.509L218.069 62.56V40.58L224.509 34.14H237.739L243.829 40.23V34.14H248.869ZM243.689 46.11L236.339 38.76H226.749L223.249 42.19V60.95L226.749 64.38H236.409L243.689 57.59V46.11ZM254.474 34.14H259.514V40.86L266.234 34.14H274.564L280.024 39.6L285.484 34.14H296.474L302.914 40.58V69H297.734V42.19L294.234 38.76H286.534L281.634 43.66V69H276.594V42.19L273.094 38.76H267.214L259.654 46.32V69H254.474V34.14ZM307.458 62.56V40.58L313.898 34.14H331.468L337.978 40.58V53.11H312.638V60.95L316.138 64.38H329.228L332.728 60.95V58.29H337.908V62.56L331.468 69H313.898L307.458 62.56ZM332.798 48.63V42.19L329.298 38.76H316.138L312.638 42.19V48.63H332.798ZM342.496 34.14H347.536V41.56L354.956 34.14H365.666L372.106 40.58V69H366.926V42.19L363.426 38.76H356.356L347.676 47.44V69H342.496V34.14ZM379.848 62.56V38.69H373.548V34.14H379.988V22.8H385.028V34.14H395.948V38.69H385.028V60.95L388.528 64.45H395.948V69H386.288L379.848 62.56ZM411.613 20H416.933V64.31H441.853V69H411.613V20ZM442.227 63.26V54.37L447.967 48.7H466.587V42.05L463.087 38.62H451.187L447.687 42.05V44.92H442.507V40.58L448.947 34.14H465.257L471.697 40.58V69H466.727V62.84L460.287 69H447.967L442.227 63.26ZM459.237 64.52L466.587 57.45V53.18H450.207L447.407 55.91V61.79L450.207 64.52H459.237ZM476.932 62.56V19.02H482.112V40.93L488.902 34.14H501.152L507.592 40.58V62.56L501.152 69H483.372L476.932 62.56ZM498.912 64.38L502.412 60.95V42.19L498.912 38.76H490.372L482.112 47.02V60.95L485.612 64.38H498.912ZM510.751 63.26V58.92H515.931V61.79L518.731 64.52H531.611L534.411 61.79V56.4L531.611 53.6H516.561L511.031 48.07V39.88L516.771 34.14H533.151L538.891 39.88V44.22H533.711V41.35L530.911 38.62H519.011L516.211 41.35V46.46L519.011 49.26H533.851L539.591 55V63.26L533.851 69H516.491L510.751 63.26Z" fill="#F4F4F5"/>
+</svg>

judgeval-0.13.1/assets/brand/full_logo.png ADDED Viewed

Binary file

judgeval-0.13.1/assets/brand/icon.png ADDED Viewed

Binary file

judgeval-0.13.1/assets/brand/lightmode.svg ADDED Viewed

@@ -0,0 +1,7 @@
+<svg width="544" height="91" viewBox="0 0 544 91" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M31.2246 18H39.5512V51.3061L31.2246 59.6327V18Z" fill="#FF4B2E"/>
+<path d="M0 59.6328H31.2245L21.8571 69.0002H0V59.6328Z" fill="#FF4B2E"/>
+<path d="M52.041 18H43.7145V51.3061L52.041 59.6327V18Z" fill="#FF4B2E"/>
+<path d="M83.2656 59.6328H52.0411L62.4493 69.0002H83.2656V59.6328Z" fill="#FF4B2E"/>
+<path d="M111.45 61.3V54.37H116.63V59.55L121.39 64.24H133.36L137.35 60.32V20H142.67V62L135.67 69H119.15L111.45 61.3ZM147.896 62.56V34.14H153.076V60.95L156.576 64.38H163.576L172.256 55.7V34.14H177.436V69H172.396V61.58L164.976 69H154.336L147.896 62.56ZM182.363 62.56V40.58L188.803 34.14H202.243L207.983 39.18V19.02H213.163V69H208.123V62.63L201.753 69H188.803L182.363 62.56ZM200.633 64.38L207.983 57.03V44.64L201.263 38.76H191.043L187.543 42.19V60.95L191.043 64.38H200.633ZM248.869 34.14V77.89L242.499 84.26H225.209L219.819 78.87V74.6H224.999V77.19L227.449 79.64H240.189L243.689 76.21V63.19L237.249 69H224.509L218.069 62.56V40.58L224.509 34.14H237.739L243.829 40.23V34.14H248.869ZM243.689 46.11L236.339 38.76H226.749L223.249 42.19V60.95L226.749 64.38H236.409L243.689 57.59V46.11ZM254.474 34.14H259.514V40.86L266.234 34.14H274.564L280.024 39.6L285.484 34.14H296.474L302.914 40.58V69H297.734V42.19L294.234 38.76H286.534L281.634 43.66V69H276.594V42.19L273.094 38.76H267.214L259.654 46.32V69H254.474V34.14ZM307.458 62.56V40.58L313.898 34.14H331.468L337.978 40.58V53.11H312.638V60.95L316.138 64.38H329.228L332.728 60.95V58.29H337.908V62.56L331.468 69H313.898L307.458 62.56ZM332.798 48.63V42.19L329.298 38.76H316.138L312.638 42.19V48.63H332.798ZM342.496 34.14H347.536V41.56L354.956 34.14H365.666L372.106 40.58V69H366.926V42.19L363.426 38.76H356.356L347.676 47.44V69H342.496V34.14ZM379.848 62.56V38.69H373.548V34.14H379.988V22.8H385.028V34.14H395.948V38.69H385.028V60.95L388.528 64.45H395.948V69H386.288L379.848 62.56ZM411.613 20H416.933V64.31H441.853V69H411.613V20ZM442.227 63.26V54.37L447.967 48.7H466.587V42.05L463.087 38.62H451.187L447.687 42.05V44.92H442.507V40.58L448.947 34.14H465.257L471.697 40.58V69H466.727V62.84L460.287 69H447.967L442.227 63.26ZM459.237 64.52L466.587 57.45V53.18H450.207L447.407 55.91V61.79L450.207 64.52H459.237ZM476.932 62.56V19.02H482.112V40.93L488.902 34.14H501.152L507.592 40.58V62.56L501.152 69H483.372L476.932 62.56ZM498.912 64.38L502.412 60.95V42.19L498.912 38.76H490.372L482.112 47.02V60.95L485.612 64.38H498.912ZM510.751 63.26V58.92H515.931V61.79L518.731 64.52H531.611L534.411 61.79V56.4L531.611 53.6H516.561L511.031 48.07V39.88L516.771 34.14H533.151L538.891 39.88V44.22H533.711V41.35L530.911 38.62H519.011L516.211 41.35V46.46L519.011 49.26H533.851L539.591 55V63.26L533.851 69H516.491L510.751 63.26Z" fill="black"/>
+</svg>

judgeval-0.13.1/assets/brand/white_background.png ADDED Viewed

Binary file

{judgeval-0.12.0 → judgeval-0.13.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "judgeval"
-version = "0.12.0"
+version = "0.13.1"
 authors = [
     { name = "Andrew Li", email = "andrew@judgmentlabs.ai" },
     { name = "Alex Shan", email = "alex@judgmentlabs.ai" },
@@ -19,10 +19,9 @@ license-files = ["LICENSE.md"]
 dependencies = [
     "dotenv",
     "httpx>=0.28.1",
-    "litellm<1.75.0", # https://github.com/BerriAI/litellm/issues/13081
+    "litellm<1.75.0",                      # https://github.com/BerriAI/litellm/issues/13081
     "opentelemetry-exporter-otlp>=1.36.0",
     "opentelemetry-sdk>=1.36.0",
-    "opentelemetry-semantic-conventions>=0.57b0",
     "orjson>=3.9.0",
     "click<8.2.0",
     "typer>=0.9.0",
@@ -45,12 +44,6 @@ packages = ["src/judgeval"]
 include = ["/src/judgeval", "/src/judgeval/**/*.py"]
 [project.optional-dependencies]
-langchain = [
-    "langchain-huggingface",
-    "langchain-openai",
-    "langchain-anthropic",
-    "langchain-core",
-]
 s3 = ["boto3>=1.40.11"]
 trainer = ["fireworks-ai>=0.19.18"]
@@ -76,6 +69,10 @@ dev = [
     "types-tqdm>=4.67.0.20250809",
     "pytest-asyncio>=1.1.0",
     "pytest-xdist>=3.8.0",
+    "langchain-openai>=0.3.23",
+    "langchain-tavily>=0.2.11",
+    "streamlit>=1.49.1",
+    "langchain-community>=0.3.29",
 ]

{judgeval-0.12.0 → judgeval-0.13.1}/src/judgeval/__init__.py RENAMED Viewed

@@ -10,7 +10,7 @@ from judgeval.scorers import ExampleAPIScorerConfig
 from judgeval.scorers.example_scorer import ExampleScorer
 from judgeval.data.example import Example
 from judgeval.logger import judgeval_logger
-from judgeval.env import JUDGMENT_API_KEY, JUDGMENT_DEFAULT_GPT_MODEL, JUDGMENT_ORG_ID
+from judgeval.env import JUDGMENT_API_KEY, JUDGMENT_ORG_ID
 from judgeval.utils.meta import SingletonMeta
 from judgeval.exceptions import JudgmentRuntimeError, JudgmentTestError
 from judgeval.api import JudgmentSyncClient
@@ -42,7 +42,7 @@ class JudgmentClient(metaclass=SingletonMeta):
         scorers: Sequence[Union[ExampleAPIScorerConfig, ExampleScorer]],
         project_name: str = "default_project",
         eval_run_name: str = "default_eval_run",
-        model: str = JUDGMENT_DEFAULT_GPT_MODEL,
+        model: Optional[str] = None,
         assert_test: bool = False,
     ) -> List[ScoringResult]:
         try:

{judgeval-0.12.0 → judgeval-0.13.1}/src/judgeval/api/api_types.py RENAMED Viewed

@@ -1,6 +1,6 @@
 # generated by datamodel-codegen:
 #   filename:  .openapi.json
-#   timestamp: 2025-09-12T16:54:35+00:00
+#   timestamp: 2025-09-24T18:25:18+00:00
 from __future__ import annotations
 from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
@@ -52,8 +52,8 @@ class SavePromptScorerRequest(TypedDict):
     name: str
     prompt: str
     threshold: float
-    options: NotRequired[Optional[Dict[str, float]]]
-    is_trace: NotRequired[Optional[bool]]
+    model: NotRequired[str]
+    is_trace: NotRequired[bool]
 class SavePromptScorerResponse(TypedDict):
@@ -117,6 +117,7 @@ class ScorerConfig(TypedDict):
     score_type: str
     name: NotRequired[Optional[str]]
     threshold: NotRequired[float]
+    model: NotRequired[Optional[str]]
     strict_mode: NotRequired[bool]
     required_params: NotRequired[List[str]]
     kwargs: NotRequired[Optional[Dict[str, Any]]]
@@ -141,7 +142,7 @@ class PromptScorer(TypedDict):
     name: str
     prompt: str
     threshold: float
-    options: NotRequired[Optional[Dict[str, float]]]
+    model: NotRequired[str]
     created_at: NotRequired[Optional[str]]
     updated_at: NotRequired[Optional[str]]
     is_trace: NotRequired[Optional[bool]]
@@ -189,13 +190,28 @@ class OtelTraceSpan(TypedDict):
     state_before: NotRequired[Optional[Dict[str, Any]]]
+class OtelSpanListItemScores(TypedDict):
+    success: bool
+    score: float
+    reason: NotRequired[Optional[str]]
+    name: str
+class OtelSpanDetailScores(TypedDict):
+    success: bool
+    score: float
+    reason: NotRequired[Optional[str]]
+    name: str
+    data: NotRequired[Optional[Dict[str, Any]]]
 class ExampleEvaluationRun(TypedDict):
     id: NotRequired[str]
     project_name: str
     eval_name: str
     custom_scorers: NotRequired[List[BaseScorer]]
     judgment_scorers: NotRequired[List[ScorerConfig]]
-    model: str
+    model: NotRequired[Optional[str]]
     created_at: NotRequired[str]
     examples: List[Example]
     trace_span_id: NotRequired[Optional[str]]
@@ -212,7 +228,7 @@ class TraceEvaluationRun(TypedDict):
     eval_name: str
     custom_scorers: NotRequired[List[BaseScorer]]
     judgment_scorers: NotRequired[List[ScorerConfig]]
-    model: str
+    model: NotRequired[Optional[str]]
     created_at: NotRequired[str]
     trace_and_span_ids: List[TraceAndSpanId]
     is_offline: NotRequired[bool]
@@ -224,12 +240,6 @@ class DatasetInsertExamples(TypedDict):
     project_name: str
-class DatasetReturn(TypedDict):
-    name: str
-    project_name: str
-    examples: NotRequired[Optional[List[Example]]]
 class DatasetInfo(TypedDict):
     dataset_id: str
     name: str
@@ -261,6 +271,65 @@ class ScoringResult(TypedDict):
     evaluation_cost: NotRequired[Optional[float]]
+class OtelTraceListItem(TypedDict):
+    organization_id: str
+    project_id: str
+    trace_id: str
+    timestamp: str
+    duration: NotRequired[Optional[int]]
+    has_notification: NotRequired[Optional[bool]]
+    tags: NotRequired[Optional[List[str]]]
+    experiment_run_id: NotRequired[Optional[str]]
+    span_name: NotRequired[Optional[str]]
+    cumulative_llm_cost: NotRequired[Optional[float]]
+    error: NotRequired[Optional[Dict[str, Any]]]
+    scores: NotRequired[List[OtelSpanListItemScores]]
+    customer_id: NotRequired[Optional[str]]
+    input_preview: NotRequired[Optional[str]]
+    output_preview: NotRequired[Optional[str]]
+    annotation_count: NotRequired[int]
+    span_id: str
+    rule_id: NotRequired[Optional[str]]
+class OtelSpanDetail(TypedDict):
+    organization_id: str
+    project_id: str
+    timestamp: str
+    trace_id: str
+    span_id: str
+    parent_span_id: NotRequired[Optional[str]]
+    trace_state: NotRequired[Optional[str]]
+    span_name: NotRequired[Optional[str]]
+    span_kind: NotRequired[Optional[str]]
+    service_name: NotRequired[Optional[str]]
+    resource_attributes: NotRequired[Optional[Dict[str, Any]]]
+    span_attributes: NotRequired[Optional[Dict[str, Any]]]
+    duration: NotRequired[Optional[int]]
+    status_code: NotRequired[Optional[str]]
+    status_message: NotRequired[Optional[str]]
+    events: NotRequired[Optional[Union[List[Dict[str, Any]], Dict[str, Any]]]]
+    links: NotRequired[Optional[Union[List[Dict[str, Any]], Dict[str, Any]]]]
+    llm_cost: NotRequired[Optional[float]]
+    prompt_tokens: NotRequired[Optional[int]]
+    completion_tokens: NotRequired[Optional[int]]
+    scores: NotRequired[Optional[List[OtelSpanDetailScores]]]
 class EvalResults(TypedDict):
     results: List[ScoringResult]
     run: Union[ExampleEvaluationRun, TraceEvaluationRun]
+class DatasetTraceWithSpans(TypedDict):
+    dataset_id: str
+    trace_detail: OtelTraceListItem
+    spans: List[OtelSpanDetail]
+class DatasetReturn(TypedDict):
+    name: str
+    project_name: str
+    dataset_kind: DatasetKind
+    examples: NotRequired[Optional[List[Example]]]
+    traces: NotRequired[Optional[List[DatasetTraceWithSpans]]]

{judgeval-0.12.0 → judgeval-0.13.1}/src/judgeval/cli.py RENAMED Viewed

@@ -5,6 +5,7 @@ from pathlib import Path
 from dotenv import load_dotenv
 from judgeval.logger import judgeval_logger
 from judgeval import JudgmentClient
+from judgeval.version import get_version
 load_dotenv()
@@ -56,7 +57,7 @@ def upload_scorer(
 @app.command()
 def version():
     """Show version info"""
-    judgeval_logger.info("JudgEval CLI v0.0.0")
+    judgeval_logger.info(f"Judgeval CLI v{get_version()}")
 if __name__ == "__main__":

{judgeval-0.12.0 → judgeval-0.13.1}/src/judgeval/constants.py RENAMED Viewed

@@ -24,7 +24,6 @@ class APIScorerType(str, Enum):
     @classmethod
     def __missing__(cls, value: str) -> APIScorerType:
-        # Handle case-insensitive lookup
         for member in cls:
             if member.value == value.lower():
                 return member
@@ -32,11 +31,6 @@ class APIScorerType(str, Enum):
         raise ValueError(f"Invalid scorer type: {value}")
-UNBOUNDED_SCORERS: Set[APIScorerType] = (
-    set()
-)  # scorers whose scores are not bounded between 0-1
 LITELLM_SUPPORTED_MODELS: Set[str] = set(litellm.model_list)

{judgeval-0.12.0 → judgeval-0.13.1}/src/judgeval/data/evaluation_run.py RENAMED Viewed

@@ -23,7 +23,7 @@ class EvaluationRun(BaseModel):
     scorers: Sequence[Union[ExampleScorer, APIScorerConfig]] = Field(
         default_factory=list
     )
-    model: str
+    model: Optional[str] = None
     def __init__(
         self,
@@ -77,11 +77,8 @@ class EvaluationRun(BaseModel):
     @field_validator("model")
     def validate_model(cls, v, values):
-        if not v:
-            raise ValueError("Model cannot be empty.")
         # Check if model is string or list of strings
-        if isinstance(v, str):
+        if v is not None and isinstance(v, str):
             if v not in ACCEPTABLE_MODELS:
                 raise ValueError(
                     f"Model name {v} not recognized. Please select a valid model name.)"

{judgeval-0.12.0 → judgeval-0.13.1}/src/judgeval/data/judgment_types.py RENAMED Viewed

@@ -1,6 +1,6 @@
 # generated by datamodel-codegen:
 #   filename:  .openapi.json
-#   timestamp: 2025-09-12T16:54:34+00:00
+#   timestamp: 2025-09-24T18:25:17+00:00
 from __future__ import annotations
 from typing import Annotated, Any, Dict, List, Optional, Union
@@ -54,8 +54,8 @@ class SavePromptScorerRequest(BaseModel):
     name: Annotated[str, Field(title="Name")]
     prompt: Annotated[str, Field(title="Prompt")]
     threshold: Annotated[float, Field(title="Threshold")]
-    options: Annotated[Optional[Dict[str, float]], Field(title="Options")] = None
-    is_trace: Annotated[Optional[bool], Field(title="Is Trace")] = None
+    model: Annotated[Optional[str], Field(title="Model")] = "gpt-5"
+    is_trace: Annotated[Optional[bool], Field(title="Is Trace")] = False
 class SavePromptScorerResponse(BaseModel):
@@ -125,6 +125,7 @@ class ScorerConfig(BaseModel):
     score_type: Annotated[str, Field(title="Score Type")]
     name: Annotated[Optional[str], Field(title="Name")] = None
     threshold: Annotated[Optional[float], Field(title="Threshold")] = 0.5
+    model: Annotated[Optional[str], Field(title="Model")] = None
     strict_mode: Annotated[Optional[bool], Field(title="Strict Mode")] = False
     required_params: Annotated[Optional[List[str]], Field(title="Required Params")] = []
     kwargs: Annotated[Optional[Dict[str, Any]], Field(title="Kwargs")] = None
@@ -154,7 +155,7 @@ class PromptScorer(BaseModel):
     name: Annotated[str, Field(title="Name")]
     prompt: Annotated[str, Field(title="Prompt")]
     threshold: Annotated[float, Field(title="Threshold")]
-    options: Annotated[Optional[Dict[str, float]], Field(title="Options")] = None
+    model: Annotated[Optional[str], Field(title="Model")] = "gpt-5"
     created_at: Annotated[Optional[AwareDatetime], Field(title="Created At")] = None
     updated_at: Annotated[Optional[AwareDatetime], Field(title="Updated At")] = None
     is_trace: Annotated[Optional[bool], Field(title="Is Trace")] = False
@@ -212,6 +213,21 @@ class OtelTraceSpan(BaseModel):
     )
+class OtelSpanListItemScores(BaseModel):
+    success: Annotated[bool, Field(title="Success")]
+    score: Annotated[float, Field(title="Score")]
+    reason: Annotated[Optional[str], Field(title="Reason")] = None
+    name: Annotated[str, Field(title="Name")]
+class OtelSpanDetailScores(BaseModel):
+    success: Annotated[bool, Field(title="Success")]
+    score: Annotated[float, Field(title="Score")]
+    reason: Annotated[Optional[str], Field(title="Reason")] = None
+    name: Annotated[str, Field(title="Name")]
+    data: Annotated[Optional[Dict[str, Any]], Field(title="Data")] = None
 class ExampleEvaluationRun(BaseModel):
     id: Annotated[Optional[str], Field(title="Id")] = None
     project_name: Annotated[str, Field(title="Project Name")]
@@ -222,7 +238,7 @@ class ExampleEvaluationRun(BaseModel):
     judgment_scorers: Annotated[
         Optional[List[ScorerConfig]], Field(title="Judgment Scorers")
     ] = []
-    model: Annotated[str, Field(title="Model")]
+    model: Annotated[Optional[str], Field(title="Model")] = None
     created_at: Annotated[Optional[str], Field(title="Created At")] = None
     examples: Annotated[List[Example], Field(title="Examples")]
     trace_span_id: Annotated[Optional[str], Field(title="Trace Span Id")] = None
@@ -243,7 +259,7 @@ class TraceEvaluationRun(BaseModel):
     judgment_scorers: Annotated[
         Optional[List[ScorerConfig]], Field(title="Judgment Scorers")
     ] = []
-    model: Annotated[str, Field(title="Model")]
+    model: Annotated[Optional[str], Field(title="Model")] = None
     created_at: Annotated[Optional[str], Field(title="Created At")] = None
     trace_and_span_ids: Annotated[
         List[TraceAndSpanId], Field(title="Trace And Span Ids")
@@ -257,12 +273,6 @@ class DatasetInsertExamples(BaseModel):
     project_name: Annotated[str, Field(title="Project Name")]
-class DatasetReturn(BaseModel):
-    name: Annotated[str, Field(title="Name")]
-    project_name: Annotated[str, Field(title="Project Name")]
-    examples: Annotated[Optional[List[Example]], Field(title="Examples")] = None
 class DatasetInfo(BaseModel):
     dataset_id: Annotated[str, Field(title="Dataset Id")]
     name: Annotated[str, Field(title="Name")]
@@ -296,6 +306,81 @@ class ScoringResult(BaseModel):
     evaluation_cost: Annotated[Optional[float], Field(title="Evaluation Cost")] = None
+class OtelTraceListItem(BaseModel):
+    organization_id: Annotated[str, Field(title="Organization Id")]
+    project_id: Annotated[str, Field(title="Project Id")]
+    trace_id: Annotated[str, Field(title="Trace Id")]
+    timestamp: Annotated[str, Field(title="Timestamp")]
+    duration: Annotated[Optional[int], Field(title="Duration")] = None
+    has_notification: Annotated[Optional[bool], Field(title="Has Notification")] = None
+    tags: Annotated[Optional[List[str]], Field(title="Tags")] = None
+    experiment_run_id: Annotated[Optional[str], Field(title="Experiment Run Id")] = None
+    span_name: Annotated[Optional[str], Field(title="Span Name")] = None
+    cumulative_llm_cost: Annotated[
+        Optional[float], Field(title="Cumulative Llm Cost")
+    ] = None
+    error: Annotated[Optional[Dict[str, Any]], Field(title="Error")] = None
+    scores: Annotated[
+        Optional[List[OtelSpanListItemScores]], Field(title="Scores")
+    ] = []
+    customer_id: Annotated[Optional[str], Field(title="Customer Id")] = None
+    input_preview: Annotated[Optional[str], Field(title="Input Preview")] = None
+    output_preview: Annotated[Optional[str], Field(title="Output Preview")] = None
+    annotation_count: Annotated[Optional[int], Field(title="Annotation Count")] = 0
+    span_id: Annotated[str, Field(title="Span Id")]
+    rule_id: Annotated[Optional[str], Field(title="Rule Id")] = None
+class OtelSpanDetail(BaseModel):
+    organization_id: Annotated[str, Field(title="Organization Id")]
+    project_id: Annotated[str, Field(title="Project Id")]
+    timestamp: Annotated[str, Field(title="Timestamp")]
+    trace_id: Annotated[str, Field(title="Trace Id")]
+    span_id: Annotated[str, Field(title="Span Id")]
+    parent_span_id: Annotated[Optional[str], Field(title="Parent Span Id")] = None
+    trace_state: Annotated[Optional[str], Field(title="Trace State")] = None
+    span_name: Annotated[Optional[str], Field(title="Span Name")] = None
+    span_kind: Annotated[Optional[str], Field(title="Span Kind")] = None
+    service_name: Annotated[Optional[str], Field(title="Service Name")] = None
+    resource_attributes: Annotated[
+        Optional[Dict[str, Any]], Field(title="Resource Attributes")
+    ] = None
+    span_attributes: Annotated[
+        Optional[Dict[str, Any]], Field(title="Span Attributes")
+    ] = None
+    duration: Annotated[Optional[int], Field(title="Duration")] = None
+    status_code: Annotated[Optional[str], Field(title="Status Code")] = None
+    status_message: Annotated[Optional[str], Field(title="Status Message")] = None
+    events: Annotated[
+        Optional[Union[List[Dict[str, Any]], Dict[str, Any]]], Field(title="Events")
+    ] = None
+    links: Annotated[
+        Optional[Union[List[Dict[str, Any]], Dict[str, Any]]], Field(title="Links")
+    ] = None
+    llm_cost: Annotated[Optional[float], Field(title="Llm Cost")] = None
+    prompt_tokens: Annotated[Optional[int], Field(title="Prompt Tokens")] = None
+    completion_tokens: Annotated[Optional[int], Field(title="Completion Tokens")] = None
+    scores: Annotated[Optional[List[OtelSpanDetailScores]], Field(title="Scores")] = (
+        None
+    )
 class EvalResults(BaseModel):
     results: Annotated[List[ScoringResult], Field(title="Results")]
     run: Annotated[Union[ExampleEvaluationRun, TraceEvaluationRun], Field(title="Run")]
+class DatasetTraceWithSpans(BaseModel):
+    dataset_id: Annotated[str, Field(title="Dataset Id")]
+    trace_detail: OtelTraceListItem
+    spans: Annotated[List[OtelSpanDetail], Field(title="Spans")]
+class DatasetReturn(BaseModel):
+    name: Annotated[str, Field(title="Name")]
+    project_name: Annotated[str, Field(title="Project Name")]
+    dataset_kind: DatasetKind
+    examples: Annotated[Optional[List[Example]], Field(title="Examples")] = None
+    traces: Annotated[Optional[List[DatasetTraceWithSpans]], Field(title="Traces")] = (
+        None
+    )

judgeval-0.13.1/src/judgeval/data/trace.py ADDED Viewed

@@ -0,0 +1,121 @@
+from typing import Optional, List, Dict, Any
+from pydantic import BaseModel
+from .judgment_types import (
+    OtelSpanDetailScores,
+    OtelSpanDetail,
+    OtelTraceListItem,
+)
+class TraceUsage(BaseModel):
+    prompt_tokens: Optional[int] = None
+    completion_tokens: Optional[int] = None
+    cache_creation_input_tokens: Optional[int] = None
+    cache_read_input_tokens: Optional[int] = None
+    total_tokens: Optional[int] = None
+    prompt_tokens_cost_usd: Optional[float] = None
+    completion_tokens_cost_usd: Optional[float] = None
+    total_cost_usd: Optional[float] = None
+    model_name: Optional[str] = None
+class TraceScore(OtelSpanDetailScores):
+    """Score information for a trace or span."""
+    pass
+class TraceRule(BaseModel):
+    """Rule that was triggered for a trace."""
+    rule_id: str
+    rule_name: str
+class TraceSpan(OtelSpanDetail):
+    """Individual span within a trace with complete telemetry data."""
+    @classmethod
+    def from_otel_span_detail(cls, span_detail: OtelSpanDetail) -> "TraceSpan":
+        """Create TraceSpan from OtelSpanDetail, converting scores to TraceScore."""
+        data = span_detail.model_dump()
+        if "scores" in data and data["scores"]:
+            data["scores"] = [TraceScore(**score) for score in data["scores"]]
+        return cls(**data)
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert TraceSpan to dictionary."""
+        return self.model_dump(exclude_none=True)
+class Trace(OtelTraceListItem):
+    """Complete trace with metadata and all associated spans."""
+    spans: List[TraceSpan] = []
+    rules: Optional[List[TraceRule]] = []
+    @classmethod
+    def from_dataset_trace_with_spans(cls, dataset_trace: Any) -> "Trace":
+        """Create Trace from DatasetTraceWithSpans (handles both API and judgment types)."""
+        if hasattr(dataset_trace, "trace_detail"):
+            trace_detail = dataset_trace.trace_detail
+            spans_data = dataset_trace.spans
+        else:
+            trace_detail = dataset_trace.get("trace_detail", {})
+            spans_data = dataset_trace.get("spans", [])
+        if hasattr(trace_detail, "model_dump"):
+            trace_data = trace_detail.model_dump()
+        elif isinstance(trace_detail, dict):
+            trace_data = trace_detail.copy()
+        else:
+            trace_data = dict(trace_detail)
+        spans = []
+        for span in spans_data:
+            if hasattr(span, "model_dump"):
+                spans.append(TraceSpan.from_otel_span_detail(span))
+            else:
+                # Handle dict spans
+                span_data = dict(span) if not isinstance(span, dict) else span.copy()
+                if "scores" in span_data and span_data["scores"]:
+                    span_data["scores"] = [
+                        TraceScore(**score)
+                        if isinstance(score, dict)
+                        else TraceScore(**score.model_dump())
+                        for score in span_data["scores"]
+                    ]
+                spans.append(TraceSpan(**span_data))
+        rules = []
+        if "rule_id" in trace_data and trace_data["rule_id"]:
+            rules = [
+                TraceRule(
+                    rule_id=trace_data["rule_id"],
+                    rule_name=f"Rule {trace_data['rule_id']}",
+                )
+            ]
+        trace_data.pop("scores", [])
+        trace_data.pop("rule_id", None)
+        trace = cls(**trace_data)
+        trace.spans = spans
+        trace.rules = rules
+        return trace
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert Trace to dictionary."""
+        return self.model_dump(exclude_none=True)
+    def __len__(self) -> int:
+        """Return the number of spans in the trace."""
+        return len(self.spans)
+    def __iter__(self):
+        """Iterate over spans in the trace."""
+        return iter(self.spans)

judgeval 0.12.0__tar.gz → 0.13.1__tar.gz

judgeval 0.12.0tar.gz → 0.13.1tar.gz