PyPI - judgeval - Versions diffs - 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl - Mend

judgeval 0.8.0py3-none-any.whl → 0.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

judgeval/__init__.py +139 -12
judgeval/api/__init__.py +501 -0
judgeval/api/api_types.py +344 -0
judgeval/cli.py +2 -4
judgeval/constants.py +10 -26
judgeval/data/evaluation_run.py +49 -26
judgeval/data/example.py +2 -2
judgeval/data/judgment_types.py +266 -82
judgeval/data/result.py +4 -5
judgeval/data/scorer_data.py +4 -2
judgeval/data/tool.py +2 -2
judgeval/data/trace.py +7 -50
judgeval/data/trace_run.py +7 -4
judgeval/{dataset.py → dataset/__init__.py} +43 -28
judgeval/env.py +67 -0
judgeval/{run_evaluation.py → evaluation/__init__.py} +29 -95
judgeval/exceptions.py +27 -0
judgeval/integrations/langgraph/__init__.py +788 -0
judgeval/judges/__init__.py +2 -2
judgeval/judges/litellm_judge.py +75 -15
judgeval/judges/together_judge.py +86 -18
judgeval/judges/utils.py +7 -21
judgeval/{common/logger.py → logger.py} +8 -6
judgeval/scorers/__init__.py +0 -4
judgeval/scorers/agent_scorer.py +3 -7
judgeval/scorers/api_scorer.py +8 -13
judgeval/scorers/base_scorer.py +52 -32
judgeval/scorers/example_scorer.py +1 -3
judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -14
judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +45 -20
judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +2 -2
judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +3 -3
judgeval/scorers/score.py +21 -31
judgeval/scorers/trace_api_scorer.py +5 -0
judgeval/scorers/utils.py +1 -103
judgeval/tracer/__init__.py +1075 -2
judgeval/tracer/constants.py +1 -0
judgeval/tracer/exporters/__init__.py +37 -0
judgeval/tracer/exporters/s3.py +119 -0
judgeval/tracer/exporters/store.py +43 -0
judgeval/tracer/exporters/utils.py +32 -0
judgeval/tracer/keys.py +67 -0
judgeval/tracer/llm/__init__.py +1233 -0
judgeval/{common/tracer → tracer/llm}/providers.py +5 -10
judgeval/{local_eval_queue.py → tracer/local_eval_queue.py} +15 -10
judgeval/tracer/managers.py +188 -0
judgeval/tracer/processors/__init__.py +181 -0
judgeval/tracer/utils.py +20 -0
judgeval/trainer/__init__.py +5 -0
judgeval/{common/trainer → trainer}/config.py +12 -9
judgeval/{common/trainer → trainer}/console.py +2 -9
judgeval/{common/trainer → trainer}/trainable_model.py +12 -7
judgeval/{common/trainer → trainer}/trainer.py +119 -17
judgeval/utils/async_utils.py +2 -3
judgeval/utils/decorators.py +24 -0
judgeval/utils/file_utils.py +37 -4
judgeval/utils/guards.py +32 -0
judgeval/utils/meta.py +14 -0
judgeval/{common/api/json_encoder.py → utils/serialize.py} +7 -1
judgeval/utils/testing.py +88 -0
judgeval/utils/url.py +10 -0
judgeval/{version_check.py → utils/version_check.py} +3 -3
judgeval/version.py +5 -0
judgeval/warnings.py +4 -0
{judgeval-0.8.0.dist-info → judgeval-0.9.0.dist-info}/METADATA +12 -14
judgeval-0.9.0.dist-info/RECORD +80 -0
judgeval/clients.py +0 -35
judgeval/common/__init__.py +0 -13
judgeval/common/api/__init__.py +0 -3
judgeval/common/api/api.py +0 -375
judgeval/common/api/constants.py +0 -186
judgeval/common/exceptions.py +0 -27
judgeval/common/storage/__init__.py +0 -6
judgeval/common/storage/s3_storage.py +0 -97
judgeval/common/tracer/__init__.py +0 -31
judgeval/common/tracer/constants.py +0 -22
judgeval/common/tracer/core.py +0 -2427
judgeval/common/tracer/otel_exporter.py +0 -108
judgeval/common/tracer/otel_span_processor.py +0 -188
judgeval/common/tracer/span_processor.py +0 -37
judgeval/common/tracer/span_transformer.py +0 -207
judgeval/common/tracer/trace_manager.py +0 -101
judgeval/common/trainer/__init__.py +0 -5
judgeval/common/utils.py +0 -948
judgeval/integrations/langgraph.py +0 -844
judgeval/judges/mixture_of_judges.py +0 -287
judgeval/judgment_client.py +0 -267
judgeval/rules.py +0 -521
judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
judgeval/utils/alerts.py +0 -93
judgeval/utils/requests.py +0 -50
judgeval-0.8.0.dist-info/RECORD +0 -82
{judgeval-0.8.0.dist-info → judgeval-0.9.0.dist-info}/WHEEL +0 -0
{judgeval-0.8.0.dist-info → judgeval-0.9.0.dist-info}/entry_points.txt +0 -0
{judgeval-0.8.0.dist-info → judgeval-0.9.0.dist-info}/licenses/LICENSE.md +0 -0

judgeval/data/judgment_types.py CHANGED Viewed

@@ -1,57 +1,129 @@
 # generated by datamodel-codegen:
-#   filename:  openapi_new.json
-#   timestamp: 2025-08-08T18:50:51+00:00
+#   filename:  .openapi.json
+#   timestamp: 2025-08-29T04:49:38+00:00
 from __future__ import annotations
 from typing import Annotated, Any, Dict, List, Optional, Union
+from pydantic import AwareDatetime, BaseModel, ConfigDict, Field, RootModel
-from pydantic import BaseModel, ConfigDict, Field
+class TraceAndSpanId(RootModel[List]):
+    root: Annotated[List, Field(max_length=2, min_length=2)]
+class EvalResultsFetch(BaseModel):
+    experiment_run_id: Annotated[str, Field(title="Experiment Run Id")]
+    project_name: Annotated[str, Field(title="Project Name")]
-class ValidationErrorJudgmentType(BaseModel):
-    loc: Annotated[List[Union[str, int]], Field(title="Location")]
-    msg: Annotated[str, Field(title="Message")]
-    type: Annotated[str, Field(title="Error Type")]
+class DatasetFetch(BaseModel):
+    dataset_alias: Annotated[str, Field(title="Dataset Alias")]
+    project_name: Annotated[str, Field(title="Project Name")]
-class ScorerDataJudgmentType(BaseModel):
+class TraceSave(BaseModel):
+    project_name: Annotated[str, Field(title="Project Name")]
+    trace_id: Annotated[str, Field(title="Trace Id")]
     name: Annotated[str, Field(title="Name")]
+    created_at: Annotated[str, Field(title="Created At")]
+    duration: Annotated[float, Field(title="Duration")]
+    offline_mode: Annotated[Optional[bool], Field(title="Offline Mode")] = False
+    has_notification: Annotated[Optional[bool], Field(title="Has Notification")] = False
+    customer_id: Annotated[Optional[str], Field(title="Customer Id")] = None
+    tags: Annotated[Optional[List[str]], Field(title="Tags")] = None
+    metadata: Annotated[Optional[Dict[str, Any]], Field(title="Metadata")] = None
+    update_id: Annotated[Optional[int], Field(title="Update Id")] = 1
+class TraceFetch(BaseModel):
+    trace_id: Annotated[str, Field(title="Trace Id")]
+class TraceAddToDataset(BaseModel):
+    trace_id: Annotated[str, Field(title="Trace Id")]
+    trace_span_id: Annotated[str, Field(title="Trace Span Id")]
+    dataset_alias: Annotated[str, Field(title="Dataset Alias")]
+    project_name: Annotated[str, Field(title="Project Name")]
+class EvaluationRunsBatchRequest(BaseModel):
+    organization_id: Annotated[str, Field(title="Organization Id")]
+    evaluation_entries: Annotated[
+        List[Dict[str, Any]], Field(title="Evaluation Entries")
+    ]
+class ProjectAdd(BaseModel):
+    project_name: Annotated[str, Field(title="Project Name")]
+class ProjectAddResponse(BaseModel):
+    project_id: Annotated[str, Field(title="Project Id")]
+class ProjectDeleteFromJudgevalResponse(BaseModel):
+    project_name: Annotated[str, Field(title="Project Name")]
+class ProjectDeleteResponse(BaseModel):
+    message: Annotated[str, Field(title="Message")]
+class ScorerExistsRequest(BaseModel):
+    name: Annotated[str, Field(title="Name")]
+class ScorerExistsResponse(BaseModel):
+    exists: Annotated[bool, Field(title="Exists")]
+class SavePromptScorerRequest(BaseModel):
+    name: Annotated[str, Field(title="Name")]
+    prompt: Annotated[str, Field(title="Prompt")]
     threshold: Annotated[float, Field(title="Threshold")]
-    success: Annotated[bool, Field(title="Success")]
-    score: Annotated[Optional[float], Field(title="Score")] = None
-    reason: Annotated[Optional[str], Field(title="Reason")] = None
-    strict_mode: Annotated[Optional[bool], Field(title="Strict Mode")] = None
-    evaluation_model: Annotated[
-        Optional[Union[List[str], str]], Field(title="Evaluation Model")
-    ] = None
-    error: Annotated[Optional[str], Field(title="Error")] = None
-    additional_metadata: Annotated[
-        Optional[Dict[str, Any]], Field(title="Additional Metadata")
-    ] = None
+    options: Annotated[Optional[Dict[str, float]], Field(title="Options")] = None
+    is_trace: Annotated[Optional[bool], Field(title="Is Trace")] = False
-class ExampleJudgmentType(BaseModel):
-    model_config = ConfigDict(
-        extra="allow",
-    )
-    example_id: Annotated[str, Field(title="Example Id")]
-    created_at: Annotated[str, Field(title="Created At")]
-    name: Annotated[Optional[str], Field(title="Name")] = None
+class SavePromptScorerResponse(BaseModel):
+    message: Annotated[str, Field(title="Message")]
+    name: Annotated[str, Field(title="Name")]
-class ScorerConfigJudgmentType(BaseModel):
-    score_type: Annotated[str, Field(title="Score Type")]
-    name: Annotated[Optional[str], Field(title="Name")] = None
-    threshold: Annotated[Optional[float], Field(title="Threshold")] = 0.5
-    strict_mode: Annotated[Optional[bool], Field(title="Strict Mode")] = False
-    required_params: Annotated[Optional[List[str]], Field(title="Required Params")] = (
-        Field(default_factory=list)
-    )
-    kwargs: Annotated[Optional[Dict[str, Any]], Field(title="Kwargs")] = None
+class FetchPromptScorerRequest(BaseModel):
+    name: Annotated[str, Field(title="Name")]
+class CustomScorerUploadPayload(BaseModel):
+    scorer_name: Annotated[str, Field(title="Scorer Name")]
+    scorer_code: Annotated[str, Field(title="Scorer Code")]
+    requirements_text: Annotated[str, Field(title="Requirements Text")]
+class CustomScorerTemplateResponse(BaseModel):
+    scorer_name: Annotated[str, Field(title="Scorer Name")]
+    status: Annotated[str, Field(title="Status")]
+    message: Annotated[str, Field(title="Message")]
-class BaseScorerJudgmentType(BaseModel):
+class ResolveProjectNameRequest(BaseModel):
+    project_name: Annotated[str, Field(title="Project Name")]
+class ResolveProjectNameResponse(BaseModel):
+    project_id: Annotated[str, Field(title="Project Id")]
+class TraceIdRequest(BaseModel):
+    trace_id: Annotated[str, Field(title="Trace Id")]
+class SpanScoreRequest(BaseModel):
+    span_id: Annotated[str, Field(title="Span Id")]
+    trace_id: Annotated[str, Field(title="Trace Id")]
+class BaseScorer(BaseModel):
     score_type: Annotated[str, Field(title="Score Type")]
     threshold: Annotated[Optional[float], Field(title="Threshold")] = 0.5
     name: Annotated[Optional[str], Field(title="Name")] = None
@@ -76,7 +148,87 @@ class BaseScorerJudgmentType(BaseModel):
     server_hosted: Annotated[Optional[bool], Field(title="Server Hosted")] = False
-class TraceUsageJudgmentType(BaseModel):
+class ScorerConfig(BaseModel):
+    score_type: Annotated[str, Field(title="Score Type")]
+    name: Annotated[Optional[str], Field(title="Name")] = None
+    threshold: Annotated[Optional[float], Field(title="Threshold")] = 0.5
+    strict_mode: Annotated[Optional[bool], Field(title="Strict Mode")] = False
+    required_params: Annotated[Optional[List[str]], Field(title="Required Params")] = []
+    kwargs: Annotated[Optional[Dict[str, Any]], Field(title="Kwargs")] = None
+class Example(BaseModel):
+    model_config = ConfigDict(
+        extra="allow",
+    )
+    example_id: Annotated[str, Field(title="Example Id")]
+    created_at: Annotated[str, Field(title="Created At")]
+    name: Annotated[Optional[str], Field(title="Name")] = None
+class ValidationError(BaseModel):
+    loc: Annotated[List[Union[str, int]], Field(title="Location")]
+    msg: Annotated[str, Field(title="Message")]
+    type: Annotated[str, Field(title="Error Type")]
+class SpanBatchItem(BaseModel):
+    span_id: Annotated[str, Field(title="Span Id")]
+    trace_id: Annotated[str, Field(title="Trace Id")]
+    function: Annotated[str, Field(title="Function")]
+    created_at: Annotated[Any, Field(title="Created At")] = None
+    parent_span_id: Annotated[Optional[str], Field(title="Parent Span Id")] = None
+    span_type: Annotated[Optional[str], Field(title="Span Type")] = "span"
+    inputs: Annotated[Optional[Dict[str, Any]], Field(title="Inputs")] = None
+    output: Annotated[Any, Field(title="Output")] = None
+    error: Annotated[Optional[Dict[str, Any]], Field(title="Error")] = None
+    usage: Annotated[Optional[Dict[str, Any]], Field(title="Usage")] = None
+    duration: Annotated[Optional[float], Field(title="Duration")] = None
+    expected_tools: Annotated[
+        Optional[List[Dict[str, Any]]], Field(title="Expected Tools")
+    ] = None
+    additional_metadata: Annotated[
+        Optional[Dict[str, Any]], Field(title="Additional Metadata")
+    ] = None
+    has_evaluation: Annotated[Optional[bool], Field(title="Has Evaluation")] = False
+    agent_name: Annotated[Optional[str], Field(title="Agent Name")] = None
+    class_name: Annotated[Optional[str], Field(title="Class Name")] = None
+    state_before: Annotated[Optional[Dict[str, Any]], Field(title="State Before")] = (
+        None
+    )
+    state_after: Annotated[Optional[Dict[str, Any]], Field(title="State After")] = None
+    span_state: Annotated[str, Field(title="Span State")]
+    update_id: Annotated[Optional[int], Field(title="Update Id")] = 1
+    queued_at: Annotated[float, Field(title="Queued At")]
+class PromptScorer(BaseModel):
+    name: Annotated[str, Field(title="Name")]
+    prompt: Annotated[str, Field(title="Prompt")]
+    threshold: Annotated[float, Field(title="Threshold")]
+    options: Annotated[Optional[Dict[str, float]], Field(title="Options")] = None
+    created_at: Annotated[Optional[AwareDatetime], Field(title="Created At")] = None
+    updated_at: Annotated[Optional[AwareDatetime], Field(title="Updated At")] = None
+    is_trace: Annotated[Optional[bool], Field(title="Is Trace")] = False
+class ScorerData(BaseModel):
+    name: Annotated[str, Field(title="Name")]
+    threshold: Annotated[float, Field(title="Threshold")]
+    success: Annotated[bool, Field(title="Success")]
+    score: Annotated[Optional[float], Field(title="Score")] = None
+    reason: Annotated[Optional[str], Field(title="Reason")] = None
+    strict_mode: Annotated[Optional[bool], Field(title="Strict Mode")] = None
+    evaluation_model: Annotated[
+        Optional[Union[List[str], str]], Field(title="Evaluation Model")
+    ] = None
+    error: Annotated[Optional[str], Field(title="Error")] = None
+    additional_metadata: Annotated[
+        Optional[Dict[str, Any]], Field(title="Additional Metadata")
+    ] = None
+class TraceUsage(BaseModel):
     prompt_tokens: Annotated[Optional[int], Field(title="Prompt Tokens")] = None
     completion_tokens: Annotated[Optional[int], Field(title="Completion Tokens")] = None
     cache_creation_input_tokens: Annotated[
@@ -96,7 +248,7 @@ class TraceUsageJudgmentType(BaseModel):
     model_name: Annotated[Optional[str], Field(title="Model Name")] = None
-class ToolJudgmentType(BaseModel):
+class Tool(BaseModel):
     tool_name: Annotated[str, Field(title="Tool Name")]
     parameters: Annotated[Optional[Dict[str, Any]], Field(title="Parameters")] = None
     agent_name: Annotated[Optional[str], Field(title="Agent Name")] = None
@@ -109,45 +261,75 @@ class ToolJudgmentType(BaseModel):
     require_all: Annotated[Optional[bool], Field(title="Require All")] = None
-class HTTPValidationErrorJudgmentType(BaseModel):
-    detail: Annotated[
-        Optional[List[ValidationErrorJudgmentType]], Field(title="Detail")
-    ] = None
-class EvaluationRunJudgmentType(BaseModel):
+class ExampleEvaluationRun(BaseModel):
     id: Annotated[Optional[str], Field(title="Id")] = None
     project_name: Annotated[Optional[str], Field(title="Project Name")] = None
     eval_name: Annotated[Optional[str], Field(title="Eval Name")] = None
-    examples: Annotated[List[ExampleJudgmentType], Field(title="Examples")]
     custom_scorers: Annotated[
-        Optional[List[BaseScorerJudgmentType]], Field(title="Custom Scorers")
-    ] = Field(default_factory=list)
+        Optional[List[BaseScorer]], Field(title="Custom Scorers")
+    ] = []
     judgment_scorers: Annotated[
-        Optional[List[ScorerConfigJudgmentType]], Field(title="Judgment Scorers")
-    ] = Field(default_factory=list)
+        Optional[List[ScorerConfig]], Field(title="Judgment Scorers")
+    ] = []
     model: Annotated[str, Field(title="Model")]
+    created_at: Annotated[Optional[str], Field(title="Created At")] = None
+    examples: Annotated[List[Example], Field(title="Examples")]
     trace_span_id: Annotated[Optional[str], Field(title="Trace Span Id")] = None
     trace_id: Annotated[Optional[str], Field(title="Trace Id")] = None
+class HTTPValidationError(BaseModel):
+    detail: Annotated[Optional[List[ValidationError]], Field(title="Detail")] = None
+class TraceEvaluationRun(BaseModel):
+    id: Annotated[Optional[str], Field(title="Id")] = None
+    project_name: Annotated[Optional[str], Field(title="Project Name")] = None
+    eval_name: Annotated[Optional[str], Field(title="Eval Name")] = None
+    custom_scorers: Annotated[
+        Optional[List[BaseScorer]], Field(title="Custom Scorers")
+    ] = []
+    judgment_scorers: Annotated[
+        Optional[List[ScorerConfig]], Field(title="Judgment Scorers")
+    ] = []
+    model: Annotated[str, Field(title="Model")]
     created_at: Annotated[Optional[str], Field(title="Created At")] = None
+    trace_and_span_ids: Annotated[
+        List[TraceAndSpanId], Field(title="Trace And Span Ids")
+    ]
+    is_offline: Annotated[Optional[bool], Field(title="Is Offline")] = False
+class DatasetInsertExamples(BaseModel):
+    dataset_alias: Annotated[str, Field(title="Dataset Alias")]
+    examples: Annotated[List[Example], Field(title="Examples")]
+    project_name: Annotated[str, Field(title="Project Name")]
+class SpansBatchRequest(BaseModel):
+    spans: Annotated[List[SpanBatchItem], Field(title="Spans")]
+    organization_id: Annotated[str, Field(title="Organization Id")]
-class TraceSpanJudgmentType(BaseModel):
+class FetchPromptScorerResponse(BaseModel):
+    scorer: PromptScorer
+class TraceSpan(BaseModel):
     span_id: Annotated[str, Field(title="Span Id")]
     trace_id: Annotated[str, Field(title="Trace Id")]
     function: Annotated[str, Field(title="Function")]
-    depth: Annotated[int, Field(title="Depth")]
     created_at: Annotated[Any, Field(title="Created At")] = None
     parent_span_id: Annotated[Optional[str], Field(title="Parent Span Id")] = None
     span_type: Annotated[Optional[str], Field(title="Span Type")] = "span"
     inputs: Annotated[Optional[Dict[str, Any]], Field(title="Inputs")] = None
     error: Annotated[Optional[Dict[str, Any]], Field(title="Error")] = None
     output: Annotated[Any, Field(title="Output")] = None
-    usage: Optional[TraceUsageJudgmentType] = None
+    usage: Optional[TraceUsage] = None
     duration: Annotated[Optional[float], Field(title="Duration")] = None
-    expected_tools: Annotated[
-        Optional[List[ToolJudgmentType]], Field(title="Expected Tools")
-    ] = None
+    expected_tools: Annotated[Optional[List[Tool]], Field(title="Expected Tools")] = (
+        None
+    )
     additional_metadata: Annotated[
         Optional[Dict[str, Any]], Field(title="Additional Metadata")
     ] = None
@@ -161,54 +343,56 @@ class TraceSpanJudgmentType(BaseModel):
     update_id: Annotated[Optional[int], Field(title="Update Id")] = 1
-class TraceJudgmentType(BaseModel):
+class Trace(BaseModel):
     trace_id: Annotated[str, Field(title="Trace Id")]
     name: Annotated[str, Field(title="Name")]
     created_at: Annotated[str, Field(title="Created At")]
     duration: Annotated[float, Field(title="Duration")]
-    trace_spans: Annotated[List[TraceSpanJudgmentType], Field(title="Trace Spans")]
+    trace_spans: Annotated[List[TraceSpan], Field(title="Trace Spans")]
     offline_mode: Annotated[Optional[bool], Field(title="Offline Mode")] = False
-    rules: Annotated[Optional[Dict[str, Any]], Field(title="Rules")] = Field(
-        default_factory=dict
-    )
+    rules: Annotated[Optional[Dict[str, Any]], Field(title="Rules")] = {}
     has_notification: Annotated[Optional[bool], Field(title="Has Notification")] = False
     customer_id: Annotated[Optional[str], Field(title="Customer Id")] = None
-    tags: Annotated[Optional[List[str]], Field(title="Tags")] = Field(
-        default_factory=list
-    )
-    metadata: Annotated[Optional[Dict[str, Any]], Field(title="Metadata")] = Field(
-        default_factory=dict
-    )
+    tags: Annotated[Optional[List[str]], Field(title="Tags")] = []
+    metadata: Annotated[Optional[Dict[str, Any]], Field(title="Metadata")] = {}
     update_id: Annotated[Optional[int], Field(title="Update Id")] = 1
-class ScoringResultJudgmentType(BaseModel):
+class ScoringResult(BaseModel):
     success: Annotated[bool, Field(title="Success")]
-    scorers_data: Annotated[
-        Optional[List[ScorerDataJudgmentType]], Field(title="Scorers Data")
-    ] = None
+    scorers_data: Annotated[Optional[List[ScorerData]], Field(title="Scorers Data")] = (
+        None
+    )
     name: Annotated[Optional[str], Field(title="Name")] = None
     data_object: Annotated[
-        Optional[Union[TraceSpanJudgmentType, ExampleJudgmentType]],
-        Field(title="Data Object"),
+        Optional[Union[TraceSpan, Example]], Field(title="Data Object")
     ] = None
     trace_id: Annotated[Optional[str], Field(title="Trace Id")] = None
     run_duration: Annotated[Optional[float], Field(title="Run Duration")] = None
     evaluation_cost: Annotated[Optional[float], Field(title="Evaluation Cost")] = None
-class TraceRunJudgmentType(BaseModel):
+class TraceRun(BaseModel):
     project_name: Annotated[Optional[str], Field(title="Project Name")] = None
     eval_name: Annotated[Optional[str], Field(title="Eval Name")] = None
-    traces: Annotated[List[TraceJudgmentType], Field(title="Traces")]
-    scorers: Annotated[List[ScorerConfigJudgmentType], Field(title="Scorers")]
+    traces: Annotated[List[Trace], Field(title="Traces")]
+    scorers: Annotated[List[ScorerConfig], Field(title="Scorers")]
     model: Annotated[str, Field(title="Model")]
     trace_span_id: Annotated[Optional[str], Field(title="Trace Span Id")] = None
     tools: Annotated[Optional[List[Dict[str, Any]]], Field(title="Tools")] = None
-class EvalResultsJudgmentType(BaseModel):
-    results: Annotated[List[ScoringResultJudgmentType], Field(title="Results")]
-    run: Annotated[
-        Union[TraceRunJudgmentType, EvaluationRunJudgmentType], Field(title="Run")
-    ]
+class EvalResults(BaseModel):
+    results: Annotated[List[ScoringResult], Field(title="Results")]
+    run: Annotated[Union[ExampleEvaluationRun, TraceEvaluationRun], Field(title="Run")]
+class DatasetPush(BaseModel):
+    dataset_alias: Annotated[str, Field(title="Dataset Alias")]
+    comments: Annotated[Optional[str], Field(title="Comments")] = None
+    source_file: Annotated[Optional[str], Field(title="Source File")] = None
+    examples: Annotated[Optional[List[Example]], Field(title="Examples")] = None
+    traces: Annotated[Optional[List[Trace]], Field(title="Traces")] = None
+    is_trace: Annotated[Optional[bool], Field(title="Is Trace")] = False
+    project_name: Annotated[str, Field(title="Project Name")]
+    overwrite: Annotated[Optional[bool], Field(title="Overwrite")] = False

judgeval/data/result.py CHANGED Viewed

@@ -1,10 +1,10 @@
 from typing import List, Union
 from judgeval.data import ScorerData, Example
 from judgeval.data.trace import TraceSpan
-from judgeval.data.judgment_types import ScoringResultJudgmentType
+from judgeval.data.judgment_types import ScoringResult as JudgmentScoringResult
-class ScoringResult(ScoringResultJudgmentType):
+class ScoringResult(JudgmentScoringResult):
     """
     A ScoringResult contains the output of one or more scorers applied to a single example.
     Ie: One input, one actual_output, one expected_output, etc..., and 1+ scorer (Faithfulness, Hallucination, Summarization, etc...)
@@ -17,9 +17,8 @@ class ScoringResult(ScoringResultJudgmentType):
     """
-    data_object: (
-        Example  # Need to override this so that it uses this repo's Example class
-    )
+    # Need to override this so that it uses this repo's Example class
+    data_object: Example
     def model_dump(self, **kwargs):
         data = super().model_dump(**kwargs)

judgeval/data/scorer_data.py CHANGED Viewed

@@ -4,12 +4,14 @@ Implementation of the ScorerData class.
 ScorerData holds the information related to a single, completed Scorer evaluation run.
 """
-from judgeval.data.judgment_types import ScorerDataJudgmentType
+from __future__ import annotations
+from judgeval.data.judgment_types import ScorerData as JudgmentScorerData
 from judgeval.scorers import BaseScorer
 from typing import List
-class ScorerData(ScorerDataJudgmentType):
+class ScorerData(JudgmentScorerData):
     """
     ScorerData holds the information related to a single, completed Scorer evaluation run.

judgeval/data/tool.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from judgeval.data.judgment_types import ToolJudgmentType
+from judgeval.data.judgment_types import Tool as JudgmentTool
-class Tool(ToolJudgmentType):
+class Tool(JudgmentTool):
     pass

judgeval/data/trace.py CHANGED Viewed

@@ -1,24 +1,21 @@
-import threading
 from datetime import datetime, timezone
 from judgeval.data.judgment_types import (
-    TraceUsageJudgmentType,
-    TraceSpanJudgmentType,
-    TraceJudgmentType,
+    TraceUsage as JudgmentTraceUsage,
+    TraceSpan as JudgmentTraceSpan,
+    Trace as JudgmentTrace,
 )
-from judgeval.constants import SPAN_LIFECYCLE_END_UPDATE_ID
-from judgeval.common.api.json_encoder import json_encoder
+from judgeval.utils.serialize import json_encoder
-class TraceUsage(TraceUsageJudgmentType):
+class TraceUsage(JudgmentTraceUsage):
     pass
-class TraceSpan(TraceSpanJudgmentType):
+class TraceSpan(JudgmentTraceSpan):
     def model_dump(self, **kwargs):
         return {
             "span_id": self.span_id,
             "trace_id": self.trace_id,
-            "depth": self.depth,
             "created_at": datetime.fromtimestamp(
                 self.created_at, tz=timezone.utc
             ).isoformat(),
@@ -32,52 +29,12 @@ class TraceSpan(TraceSpanJudgmentType):
             "usage": self.usage.model_dump() if self.usage else None,
             "has_evaluation": self.has_evaluation,
             "agent_name": self.agent_name,
-            "class_name": self.class_name,
             "state_before": self.state_before,
             "state_after": self.state_after,
             "additional_metadata": json_encoder(self.additional_metadata),
             "update_id": self.update_id,
         }
-    def __init__(self, **data):
-        super().__init__(**data)
-        # Initialize thread lock for thread-safe update_id increment
-        self._update_id_lock = threading.Lock()
-    def increment_update_id(self) -> int:
-        """
-        Thread-safe method to increment the update_id counter.
-        Returns:
-            int: The new update_id value after incrementing
-        """
-        with self._update_id_lock:
-            self.update_id += 1
-            return self.update_id
-    def set_update_id_to_ending_number(
-        self, ending_number: int = SPAN_LIFECYCLE_END_UPDATE_ID
-    ) -> int:
-        """
-        Thread-safe method to set the update_id to a predetermined ending number.
-        Args:
-            ending_number (int): The number to set update_id to. Defaults to SPAN_LIFECYCLE_END_UPDATE_ID.
-        Returns:
-            int: The new update_id value after setting
-        """
-        with self._update_id_lock:
-            self.update_id = ending_number
-            return self.update_id
-    def print_span(self):
-        """Print the span with proper formatting and parent relationship information."""
-        indent = "  " * self.depth
-        parent_info = (
-            f" (parent_id: {self.parent_span_id})" if self.parent_span_id else ""
-        )
-        print(f"{indent}→ {self.function} (id: {self.span_id}){parent_info}")
-class Trace(TraceJudgmentType):
+class Trace(JudgmentTrace):
     pass

judgeval/data/trace_run.py CHANGED Viewed

@@ -2,8 +2,7 @@ from pydantic import BaseModel
 from typing import List, Optional, Dict, Any, Union
 from judgeval.data import Trace
 from judgeval.scorers import APIScorerConfig, BaseScorer
-from judgeval.rules import Rule
-from judgeval.constants import DEFAULT_GPT_MODEL
+from judgeval.env import JUDGMENT_DEFAULT_GPT_MODEL
 class TraceRun(BaseModel):
@@ -27,9 +26,13 @@ class TraceRun(BaseModel):
     eval_name: Optional[str] = None
     traces: Optional[List[Trace]] = None
     scorers: List[Union[APIScorerConfig, BaseScorer]]
-    model: Optional[str] = DEFAULT_GPT_MODEL
+    model: Optional[str] = JUDGMENT_DEFAULT_GPT_MODEL
     trace_span_id: Optional[str] = None
-    rules: Optional[List[Rule]] = None
+    append: Optional[bool] = False
+    override: Optional[bool] = False
+    # TODO: ?
+    rules: Any = None
     tools: Optional[List[Dict[str, Any]]] = None
     class Config:

judgeval 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

judgeval 0.8.0py3-none-any.whl → 0.9.0py3-none-any.whl