PyPI - judgeval - Versions diffs - 0.1.0__py3-none-any.whl → 0.23.0__py3-none-any.whl - Mend

judgeval 0.1.0py3-none-any.whl → 0.23.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (234) hide show

judgeval/__init__.py +173 -10
judgeval/api/__init__.py +523 -0
judgeval/api/api_types.py +413 -0
judgeval/cli.py +112 -0
judgeval/constants.py +7 -30
judgeval/data/__init__.py +1 -3
judgeval/data/evaluation_run.py +125 -0
judgeval/data/example.py +14 -40
judgeval/data/judgment_types.py +396 -146
judgeval/data/result.py +11 -18
judgeval/data/scorer_data.py +3 -26
judgeval/data/scripts/openapi_transform.py +5 -5
judgeval/data/trace.py +115 -194
judgeval/dataset/__init__.py +335 -0
judgeval/env.py +55 -0
judgeval/evaluation/__init__.py +346 -0
judgeval/exceptions.py +28 -0
judgeval/integrations/langgraph/__init__.py +13 -0
judgeval/integrations/openlit/__init__.py +51 -0
judgeval/judges/__init__.py +2 -2
judgeval/judges/litellm_judge.py +77 -16
judgeval/judges/together_judge.py +88 -17
judgeval/judges/utils.py +7 -20
judgeval/judgment_attribute_keys.py +55 -0
judgeval/{common/logger.py → logger.py} +24 -8
judgeval/prompt/__init__.py +330 -0
judgeval/scorers/__init__.py +11 -11
judgeval/scorers/agent_scorer.py +15 -19
judgeval/scorers/api_scorer.py +21 -23
judgeval/scorers/base_scorer.py +54 -36
judgeval/scorers/example_scorer.py +1 -3
judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -24
judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +2 -10
judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +2 -2
judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +2 -10
judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +2 -14
judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +171 -59
judgeval/scorers/score.py +64 -47
judgeval/scorers/utils.py +2 -107
judgeval/tracer/__init__.py +1111 -2
judgeval/tracer/constants.py +1 -0
judgeval/tracer/exporters/__init__.py +40 -0
judgeval/tracer/exporters/s3.py +119 -0
judgeval/tracer/exporters/store.py +59 -0
judgeval/tracer/exporters/utils.py +32 -0
judgeval/tracer/keys.py +63 -0
judgeval/tracer/llm/__init__.py +7 -0
judgeval/tracer/llm/config.py +78 -0
judgeval/tracer/llm/constants.py +9 -0
judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
judgeval/tracer/llm/llm_anthropic/config.py +6 -0
judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
judgeval/tracer/llm/llm_google/__init__.py +3 -0
judgeval/tracer/llm/llm_google/config.py +6 -0
judgeval/tracer/llm/llm_google/generate_content.py +127 -0
judgeval/tracer/llm/llm_google/wrapper.py +30 -0
judgeval/tracer/llm/llm_openai/__init__.py +3 -0
judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
judgeval/tracer/llm/llm_openai/config.py +6 -0
judgeval/tracer/llm/llm_openai/responses.py +506 -0
judgeval/tracer/llm/llm_openai/utils.py +42 -0
judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
judgeval/tracer/llm/llm_together/__init__.py +3 -0
judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
judgeval/tracer/llm/llm_together/config.py +6 -0
judgeval/tracer/llm/llm_together/wrapper.py +52 -0
judgeval/tracer/llm/providers.py +19 -0
judgeval/tracer/managers.py +167 -0
judgeval/tracer/processors/__init__.py +220 -0
judgeval/tracer/utils.py +19 -0
judgeval/trainer/__init__.py +14 -0
judgeval/trainer/base_trainer.py +122 -0
judgeval/trainer/config.py +123 -0
judgeval/trainer/console.py +144 -0
judgeval/trainer/fireworks_trainer.py +392 -0
judgeval/trainer/trainable_model.py +252 -0
judgeval/trainer/trainer.py +70 -0
judgeval/utils/async_utils.py +39 -0
judgeval/utils/decorators/__init__.py +0 -0
judgeval/utils/decorators/dont_throw.py +37 -0
judgeval/utils/decorators/use_once.py +13 -0
judgeval/utils/file_utils.py +74 -28
judgeval/utils/guards.py +36 -0
judgeval/utils/meta.py +27 -0
judgeval/utils/project.py +15 -0
judgeval/utils/serialize.py +253 -0
judgeval/utils/testing.py +70 -0
judgeval/utils/url.py +10 -0
judgeval/{version_check.py → utils/version_check.py} +5 -3
judgeval/utils/wrappers/README.md +3 -0
judgeval/utils/wrappers/__init__.py +15 -0
judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
judgeval/utils/wrappers/py.typed +0 -0
judgeval/utils/wrappers/utils.py +35 -0
judgeval/v1/__init__.py +88 -0
judgeval/v1/data/__init__.py +7 -0
judgeval/v1/data/example.py +44 -0
judgeval/v1/data/scorer_data.py +42 -0
judgeval/v1/data/scoring_result.py +44 -0
judgeval/v1/datasets/__init__.py +6 -0
judgeval/v1/datasets/dataset.py +214 -0
judgeval/v1/datasets/dataset_factory.py +94 -0
judgeval/v1/evaluation/__init__.py +6 -0
judgeval/v1/evaluation/evaluation.py +182 -0
judgeval/v1/evaluation/evaluation_factory.py +17 -0
judgeval/v1/instrumentation/__init__.py +6 -0
judgeval/v1/instrumentation/llm/__init__.py +7 -0
judgeval/v1/instrumentation/llm/config.py +78 -0
judgeval/v1/instrumentation/llm/constants.py +11 -0
judgeval/v1/instrumentation/llm/llm_anthropic/__init__.py +5 -0
judgeval/v1/instrumentation/llm/llm_anthropic/config.py +6 -0
judgeval/v1/instrumentation/llm/llm_anthropic/messages.py +414 -0
judgeval/v1/instrumentation/llm/llm_anthropic/messages_stream.py +307 -0
judgeval/v1/instrumentation/llm/llm_anthropic/wrapper.py +61 -0
judgeval/v1/instrumentation/llm/llm_google/__init__.py +5 -0
judgeval/v1/instrumentation/llm/llm_google/config.py +6 -0
judgeval/v1/instrumentation/llm/llm_google/generate_content.py +121 -0
judgeval/v1/instrumentation/llm/llm_google/wrapper.py +30 -0
judgeval/v1/instrumentation/llm/llm_openai/__init__.py +5 -0
judgeval/v1/instrumentation/llm/llm_openai/beta_chat_completions.py +212 -0
judgeval/v1/instrumentation/llm/llm_openai/chat_completions.py +477 -0
judgeval/v1/instrumentation/llm/llm_openai/config.py +6 -0
judgeval/v1/instrumentation/llm/llm_openai/responses.py +472 -0
judgeval/v1/instrumentation/llm/llm_openai/utils.py +41 -0
judgeval/v1/instrumentation/llm/llm_openai/wrapper.py +63 -0
judgeval/v1/instrumentation/llm/llm_together/__init__.py +5 -0
judgeval/v1/instrumentation/llm/llm_together/chat_completions.py +382 -0
judgeval/v1/instrumentation/llm/llm_together/config.py +6 -0
judgeval/v1/instrumentation/llm/llm_together/wrapper.py +57 -0
judgeval/v1/instrumentation/llm/providers.py +19 -0
judgeval/v1/integrations/claude_agent_sdk/__init__.py +119 -0
judgeval/v1/integrations/claude_agent_sdk/wrapper.py +564 -0
judgeval/v1/integrations/langgraph/__init__.py +13 -0
judgeval/v1/integrations/openlit/__init__.py +47 -0
judgeval/v1/internal/api/__init__.py +525 -0
judgeval/v1/internal/api/api_types.py +413 -0
judgeval/v1/prompts/__init__.py +6 -0
judgeval/v1/prompts/prompt.py +29 -0
judgeval/v1/prompts/prompt_factory.py +189 -0
judgeval/v1/py.typed +0 -0
judgeval/v1/scorers/__init__.py +6 -0
judgeval/v1/scorers/api_scorer.py +82 -0
judgeval/v1/scorers/base_scorer.py +17 -0
judgeval/v1/scorers/built_in/__init__.py +17 -0
judgeval/v1/scorers/built_in/answer_correctness.py +28 -0
judgeval/v1/scorers/built_in/answer_relevancy.py +28 -0
judgeval/v1/scorers/built_in/built_in_factory.py +26 -0
judgeval/v1/scorers/built_in/faithfulness.py +28 -0
judgeval/v1/scorers/built_in/instruction_adherence.py +28 -0
judgeval/v1/scorers/custom_scorer/__init__.py +6 -0
judgeval/v1/scorers/custom_scorer/custom_scorer.py +50 -0
judgeval/v1/scorers/custom_scorer/custom_scorer_factory.py +16 -0
judgeval/v1/scorers/prompt_scorer/__init__.py +6 -0
judgeval/v1/scorers/prompt_scorer/prompt_scorer.py +86 -0
judgeval/v1/scorers/prompt_scorer/prompt_scorer_factory.py +85 -0
judgeval/v1/scorers/scorers_factory.py +49 -0
judgeval/v1/tracer/__init__.py +7 -0
judgeval/v1/tracer/base_tracer.py +520 -0
judgeval/v1/tracer/exporters/__init__.py +14 -0
judgeval/v1/tracer/exporters/in_memory_span_exporter.py +25 -0
judgeval/v1/tracer/exporters/judgment_span_exporter.py +42 -0
judgeval/v1/tracer/exporters/noop_span_exporter.py +19 -0
judgeval/v1/tracer/exporters/span_store.py +50 -0
judgeval/v1/tracer/judgment_tracer_provider.py +70 -0
judgeval/v1/tracer/processors/__init__.py +6 -0
judgeval/v1/tracer/processors/_lifecycles/__init__.py +28 -0
judgeval/v1/tracer/processors/_lifecycles/agent_id_processor.py +53 -0
judgeval/v1/tracer/processors/_lifecycles/context_keys.py +11 -0
judgeval/v1/tracer/processors/_lifecycles/customer_id_processor.py +29 -0
judgeval/v1/tracer/processors/_lifecycles/registry.py +18 -0
judgeval/v1/tracer/processors/judgment_span_processor.py +165 -0
judgeval/v1/tracer/processors/noop_span_processor.py +42 -0
judgeval/v1/tracer/tracer.py +67 -0
judgeval/v1/tracer/tracer_factory.py +38 -0
judgeval/v1/trainers/__init__.py +5 -0
judgeval/v1/trainers/base_trainer.py +62 -0
judgeval/v1/trainers/config.py +123 -0
judgeval/v1/trainers/console.py +144 -0
judgeval/v1/trainers/fireworks_trainer.py +392 -0
judgeval/v1/trainers/trainable_model.py +252 -0
judgeval/v1/trainers/trainers_factory.py +37 -0
judgeval/v1/utils.py +18 -0
judgeval/version.py +5 -0
judgeval/warnings.py +4 -0
judgeval-0.23.0.dist-info/METADATA +266 -0
judgeval-0.23.0.dist-info/RECORD +201 -0
judgeval-0.23.0.dist-info/entry_points.txt +2 -0
judgeval/clients.py +0 -34
judgeval/common/__init__.py +0 -13
judgeval/common/api/__init__.py +0 -3
judgeval/common/api/api.py +0 -352
judgeval/common/api/constants.py +0 -165
judgeval/common/exceptions.py +0 -27
judgeval/common/storage/__init__.py +0 -6
judgeval/common/storage/s3_storage.py +0 -98
judgeval/common/tracer/__init__.py +0 -31
judgeval/common/tracer/constants.py +0 -22
judgeval/common/tracer/core.py +0 -1916
judgeval/common/tracer/otel_exporter.py +0 -108
judgeval/common/tracer/otel_span_processor.py +0 -234
judgeval/common/tracer/span_processor.py +0 -37
judgeval/common/tracer/span_transformer.py +0 -211
judgeval/common/tracer/trace_manager.py +0 -92
judgeval/common/utils.py +0 -940
judgeval/data/datasets/__init__.py +0 -4
judgeval/data/datasets/dataset.py +0 -341
judgeval/data/datasets/eval_dataset_client.py +0 -214
judgeval/data/tool.py +0 -5
judgeval/data/trace_run.py +0 -37
judgeval/evaluation_run.py +0 -75
judgeval/integrations/langgraph.py +0 -843
judgeval/judges/mixture_of_judges.py +0 -286
judgeval/judgment_client.py +0 -369
judgeval/rules.py +0 -521
judgeval/run_evaluation.py +0 -684
judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -14
judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -20
judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -27
judgeval/utils/alerts.py +0 -93
judgeval/utils/requests.py +0 -50
judgeval-0.1.0.dist-info/METADATA +0 -202
judgeval-0.1.0.dist-info/RECORD +0 -73
{judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/WHEEL +0 -0
{judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/licenses/LICENSE.md +0 -0

judgeval/data/judgment_types.py CHANGED Viewed

@@ -1,206 +1,456 @@
 # generated by datamodel-codegen:
-#   filename:  openapi_new.json
-#   timestamp: 2025-07-17T03:14:16+00:00
+#   filename:  .openapi.json
+#   timestamp: 2025-11-18T18:52:09+00:00
 from __future__ import annotations
 from typing import Annotated, Any, Dict, List, Optional, Union
+from pydantic import AwareDatetime, BaseModel, ConfigDict, Field, RootModel
+from enum import Enum
-from pydantic import BaseModel, Field
+class TraceAndSpanId(RootModel[List]):
+    root: Annotated[List, Field(max_length=2, min_length=2)]
-class ValidationErrorJudgmentType(BaseModel):
-    loc: Annotated[List[Union[str, int]], Field(title="Location")]
-    msg: Annotated[str, Field(title="Message")]
-    type: Annotated[str, Field(title="Error Type")]
+class LogEvalResultsResponse(BaseModel):
+    ui_results_url: Annotated[str, Field(title="Ui Results Url")]
+class EvalResultsFetch(BaseModel):
+    experiment_run_id: Annotated[str, Field(title="Experiment Run Id")]
+    project_name: Annotated[str, Field(title="Project Name")]
+class FetchExperimentRunResponse(BaseModel):
+    results: Annotated[Optional[List], Field(title="Results")] = None
+    ui_results_url: Annotated[Optional[str], Field(title="Ui Results Url")] = None
+class DatasetFetch(BaseModel):
+    dataset_name: Annotated[str, Field(title="Dataset Name")]
+    project_name: Annotated[str, Field(title="Project Name")]
+class DatasetsFetch(BaseModel):
+    project_name: Annotated[str, Field(title="Project Name")]
+class ProjectAdd(BaseModel):
+    project_name: Annotated[str, Field(title="Project Name")]
+class ProjectAddResponse(BaseModel):
+    project_id: Annotated[str, Field(title="Project Id")]
+class ProjectDeleteFromJudgevalResponse(BaseModel):
+    project_name: Annotated[str, Field(title="Project Name")]
+class ProjectDeleteResponse(BaseModel):
+    message: Annotated[str, Field(title="Message")]
+class ScorerExistsRequest(BaseModel):
+    name: Annotated[str, Field(title="Name")]
+class ScorerExistsResponse(BaseModel):
+    exists: Annotated[bool, Field(title="Exists")]
-class ScorerDataJudgmentType(BaseModel):
+class SavePromptScorerRequest(BaseModel):
     name: Annotated[str, Field(title="Name")]
+    prompt: Annotated[str, Field(title="Prompt")]
     threshold: Annotated[float, Field(title="Threshold")]
-    success: Annotated[bool, Field(title="Success")]
+    model: Annotated[Optional[str], Field(title="Model")] = "gpt-5-mini"
+    is_trace: Annotated[Optional[bool], Field(title="Is Trace")] = False
+    options: Annotated[Optional[Dict[str, float]], Field(title="Options")] = None
+    description: Annotated[Optional[str], Field(title="Description")] = None
+class FetchPromptScorersRequest(BaseModel):
+    names: Annotated[Optional[List[str]], Field(title="Names")] = None
+    is_trace: Annotated[Optional[bool], Field(title="Is Trace")] = None
+class CustomScorerUploadPayload(BaseModel):
+    scorer_name: Annotated[str, Field(title="Scorer Name")]
+    scorer_code: Annotated[str, Field(title="Scorer Code")]
+    requirements_text: Annotated[str, Field(title="Requirements Text")]
+    overwrite: Annotated[Optional[bool], Field(title="Overwrite")] = False
+class CustomScorerTemplateResponse(BaseModel):
+    scorer_name: Annotated[str, Field(title="Scorer Name")]
+    status: Annotated[str, Field(title="Status")]
+    message: Annotated[str, Field(title="Message")]
+class PromptInsertRequest(BaseModel):
+    project_id: Annotated[str, Field(title="Project Id")]
+    name: Annotated[str, Field(title="Name")]
+    prompt: Annotated[str, Field(title="Prompt")]
+    tags: Annotated[List[str], Field(title="Tags")]
+class PromptInsertResponse(BaseModel):
+    commit_id: Annotated[str, Field(title="Commit Id")]
+    parent_commit_id: Annotated[Optional[str], Field(title="Parent Commit Id")] = None
+    created_at: Annotated[str, Field(title="Created At")]
+class PromptTagRequest(BaseModel):
+    project_id: Annotated[str, Field(title="Project Id")]
+    name: Annotated[str, Field(title="Name")]
+    commit_id: Annotated[str, Field(title="Commit Id")]
+    tags: Annotated[List[str], Field(title="Tags")]
+class PromptTagResponse(BaseModel):
+    commit_id: Annotated[str, Field(title="Commit Id")]
+class PromptUntagRequest(BaseModel):
+    project_id: Annotated[str, Field(title="Project Id")]
+    name: Annotated[str, Field(title="Name")]
+    tags: Annotated[List[str], Field(title="Tags")]
+class PromptUntagResponse(BaseModel):
+    commit_ids: Annotated[List[str], Field(title="Commit Ids")]
+class ResolveProjectNameRequest(BaseModel):
+    project_name: Annotated[str, Field(title="Project Name")]
+class ResolveProjectNameResponse(BaseModel):
+    project_id: Annotated[str, Field(title="Project Id")]
+class TraceIdRequest(BaseModel):
+    trace_id: Annotated[str, Field(title="Trace Id")]
+class SpanScoreRequest(BaseModel):
+    span_id: Annotated[str, Field(title="Span Id")]
+    trace_id: Annotated[str, Field(title="Trace Id")]
+class BaseScorer(BaseModel):
+    score_type: Annotated[str, Field(title="Score Type")]
+    threshold: Annotated[Optional[float], Field(title="Threshold")] = 0.5
+    name: Annotated[Optional[str], Field(title="Name")] = None
+    class_name: Annotated[Optional[str], Field(title="Class Name")] = None
     score: Annotated[Optional[float], Field(title="Score")] = None
-    reason: Annotated[Optional[str], Field(title="Reason")] = None
-    strict_mode: Annotated[Optional[bool], Field(title="Strict Mode")] = None
-    evaluation_model: Annotated[
-        Optional[Union[List[str], str]], Field(title="Evaluation Model")
+    score_breakdown: Annotated[
+        Optional[Dict[str, Any]], Field(title="Score Breakdown")
     ] = None
+    reason: Annotated[Optional[str], Field(title="Reason")] = ""
+    using_native_model: Annotated[Optional[bool], Field(title="Using Native Model")] = (
+        None
+    )
+    success: Annotated[Optional[bool], Field(title="Success")] = None
+    model: Annotated[Optional[str], Field(title="Model")] = None
+    model_client: Annotated[Any, Field(title="Model Client")] = None
+    strict_mode: Annotated[Optional[bool], Field(title="Strict Mode")] = False
     error: Annotated[Optional[str], Field(title="Error")] = None
     additional_metadata: Annotated[
         Optional[Dict[str, Any]], Field(title="Additional Metadata")
     ] = None
+    user: Annotated[Optional[str], Field(title="User")] = None
+    server_hosted: Annotated[Optional[bool], Field(title="Server Hosted")] = False
-class ScorerConfigJudgmentType(BaseModel):
+class ScorerConfig(BaseModel):
     score_type: Annotated[str, Field(title="Score Type")]
     name: Annotated[Optional[str], Field(title="Name")] = None
     threshold: Annotated[Optional[float], Field(title="Threshold")] = 0.5
     strict_mode: Annotated[Optional[bool], Field(title="Strict Mode")] = False
-    required_params: Annotated[Optional[List[str]], Field(title="Required Params")] = (
-        Field(default_factory=list)
-    )
+    required_params: Annotated[Optional[List[str]], Field(title="Required Params")] = []
     kwargs: Annotated[Optional[Dict[str, Any]], Field(title="Kwargs")] = None
-class TraceUsageJudgmentType(BaseModel):
-    prompt_tokens: Annotated[Optional[int], Field(title="Prompt Tokens")] = None
-    completion_tokens: Annotated[Optional[int], Field(title="Completion Tokens")] = None
-    cache_creation_input_tokens: Annotated[
-        Optional[int], Field(title="Cache Creation Input Tokens")
-    ] = None
-    cache_read_input_tokens: Annotated[
-        Optional[int], Field(title="Cache Read Input Tokens")
-    ] = None
-    total_tokens: Annotated[Optional[int], Field(title="Total Tokens")] = None
-    prompt_tokens_cost_usd: Annotated[
-        Optional[float], Field(title="Prompt Tokens Cost Usd")
-    ] = None
-    completion_tokens_cost_usd: Annotated[
-        Optional[float], Field(title="Completion Tokens Cost Usd")
-    ] = None
-    total_cost_usd: Annotated[Optional[float], Field(title="Total Cost Usd")] = None
-    model_name: Annotated[Optional[str], Field(title="Model Name")] = None
+class Example(BaseModel):
+    model_config = ConfigDict(
+        extra="allow",
+    )
+    example_id: Annotated[Optional[str], Field(title="Example Id")] = None
+    created_at: Annotated[Optional[str], Field(title="Created At")] = None
+    name: Annotated[Optional[str], Field(title="Name")] = None
-class ToolJudgmentType(BaseModel):
-    tool_name: Annotated[str, Field(title="Tool Name")]
-    parameters: Annotated[Optional[Dict[str, Any]], Field(title="Parameters")] = None
-    agent_name: Annotated[Optional[str], Field(title="Agent Name")] = None
-    result_dependencies: Annotated[
-        Optional[List[Dict[str, Any]]], Field(title="Result Dependencies")
-    ] = None
-    action_dependencies: Annotated[
-        Optional[List[Dict[str, Any]]], Field(title="Action Dependencies")
-    ] = None
-    require_all: Annotated[Optional[bool], Field(title="Require All")] = None
+class ValidationError(BaseModel):
+    loc: Annotated[List[Union[str, int]], Field(title="Location")]
+    msg: Annotated[str, Field(title="Message")]
+    type: Annotated[str, Field(title="Error Type")]
-class HTTPValidationErrorJudgmentType(BaseModel):
-    detail: Annotated[
-        Optional[List[ValidationErrorJudgmentType]], Field(title="Detail")
-    ] = None
+class UsageInfo(BaseModel):
+    total_judgees: Annotated[int, Field(title="Total Judgees")]
+    regular_use: Annotated[int, Field(title="Regular Use")]
+    pay_as_you_go_use: Annotated[int, Field(title="Pay As You Go Use")]
+    remaining_regular: Annotated[int, Field(title="Remaining Regular")]
+    remaining_after: Annotated[int, Field(title="Remaining After")]
-class TraceSpanJudgmentType(BaseModel):
-    span_id: Annotated[str, Field(title="Span Id")]
-    trace_id: Annotated[str, Field(title="Trace Id")]
-    function: Annotated[str, Field(title="Function")]
-    depth: Annotated[int, Field(title="Depth")]
-    created_at: Annotated[Any, Field(title="Created At")] = None
-    parent_span_id: Annotated[Optional[str], Field(title="Parent Span Id")] = None
-    span_type: Annotated[Optional[str], Field(title="Span Type")] = "span"
-    inputs: Annotated[Optional[Dict[str, Any]], Field(title="Inputs")] = None
-    error: Annotated[Optional[Dict[str, Any]], Field(title="Error")] = None
-    output: Annotated[Any, Field(title="Output")] = None
-    usage: Optional[TraceUsageJudgmentType] = None
-    duration: Annotated[Optional[float], Field(title="Duration")] = None
-    expected_tools: Annotated[
-        Optional[List[ToolJudgmentType]], Field(title="Expected Tools")
-    ] = None
+class DatasetKind(Enum):
+    trace = "trace"
+    example = "example"
+class PromptScorer(BaseModel):
+    id: Annotated[str, Field(title="Id")]
+    user_id: Annotated[str, Field(title="User Id")]
+    organization_id: Annotated[str, Field(title="Organization Id")]
+    name: Annotated[str, Field(title="Name")]
+    prompt: Annotated[str, Field(title="Prompt")]
+    threshold: Annotated[float, Field(title="Threshold")]
+    model: Annotated[Optional[str], Field(title="Model")] = "gpt-5-mini"
+    options: Annotated[Optional[Dict[str, float]], Field(title="Options")] = None
+    description: Annotated[Optional[str], Field(title="Description")] = None
+    created_at: Annotated[Optional[AwareDatetime], Field(title="Created At")] = None
+    updated_at: Annotated[Optional[AwareDatetime], Field(title="Updated At")] = None
+    is_trace: Annotated[Optional[bool], Field(title="Is Trace")] = False
+    is_bucket_rubric: Annotated[Optional[bool], Field(title="Is Bucket Rubric")] = None
+class PromptCommitInfo(BaseModel):
+    name: Annotated[str, Field(title="Name")]
+    prompt: Annotated[str, Field(title="Prompt")]
+    tags: Annotated[List[str], Field(title="Tags")]
+    commit_id: Annotated[str, Field(title="Commit Id")]
+    parent_commit_id: Annotated[Optional[str], Field(title="Parent Commit Id")] = None
+    created_at: Annotated[str, Field(title="Created At")]
+    first_name: Annotated[str, Field(title="First Name")]
+    last_name: Annotated[str, Field(title="Last Name")]
+    user_email: Annotated[str, Field(title="User Email")]
+class ScorerData(BaseModel):
+    id: Annotated[Optional[str], Field(title="Id")] = None
+    name: Annotated[str, Field(title="Name")]
+    threshold: Annotated[float, Field(title="Threshold")]
+    success: Annotated[bool, Field(title="Success")]
+    score: Annotated[Optional[float], Field(title="Score")] = None
+    reason: Annotated[Optional[str], Field(title="Reason")] = None
+    strict_mode: Annotated[Optional[bool], Field(title="Strict Mode")] = None
+    evaluation_model: Annotated[Optional[str], Field(title="Evaluation Model")] = None
+    error: Annotated[Optional[str], Field(title="Error")] = None
     additional_metadata: Annotated[
         Optional[Dict[str, Any]], Field(title="Additional Metadata")
     ] = None
-    has_evaluation: Annotated[Optional[bool], Field(title="Has Evaluation")] = False
-    agent_name: Annotated[Optional[str], Field(title="Agent Name")] = None
-    state_before: Annotated[Optional[Dict[str, Any]], Field(title="State Before")] = (
-        None
-    )
-    state_after: Annotated[Optional[Dict[str, Any]], Field(title="State After")] = None
-    update_id: Annotated[Optional[int], Field(title="Update Id")] = 1
-class ExampleJudgmentType(BaseModel):
-    input: Annotated[Optional[Union[str, Dict[str, Any]]], Field(title="Input")] = None
-    actual_output: Annotated[
-        Optional[Union[str, List[str]]], Field(title="Actual Output")
-    ] = None
-    expected_output: Annotated[
-        Optional[Union[str, List[str]]], Field(title="Expected Output")
-    ] = None
-    context: Annotated[Optional[List[str]], Field(title="Context")] = None
-    retrieval_context: Annotated[
-        Optional[List[str]], Field(title="Retrieval Context")
+class OtelTraceSpan(BaseModel):
+    organization_id: Annotated[str, Field(title="Organization Id")]
+    project_id: Annotated[Optional[str], Field(title="Project Id")] = None
+    user_id: Annotated[str, Field(title="User Id")]
+    timestamp: Annotated[str, Field(title="Timestamp")]
+    trace_id: Annotated[str, Field(title="Trace Id")]
+    span_id: Annotated[str, Field(title="Span Id")]
+    parent_span_id: Annotated[Optional[str], Field(title="Parent Span Id")] = None
+    trace_state: Annotated[Optional[str], Field(title="Trace State")] = None
+    span_name: Annotated[Optional[str], Field(title="Span Name")] = None
+    span_kind: Annotated[Optional[str], Field(title="Span Kind")] = None
+    service_name: Annotated[Optional[str], Field(title="Service Name")] = None
+    resource_attributes: Annotated[
+        Optional[Dict[str, Any]], Field(title="Resource Attributes")
     ] = None
-    additional_metadata: Annotated[
-        Optional[Dict[str, Any]], Field(title="Additional Metadata")
+    span_attributes: Annotated[
+        Optional[Dict[str, Any]], Field(title="Span Attributes")
     ] = None
-    tools_called: Annotated[Optional[List[str]], Field(title="Tools Called")] = Field(
-        default_factory=list
-    )
-    expected_tools: Annotated[
-        Optional[List[ToolJudgmentType]], Field(title="Expected Tools")
-    ] = Field(default_factory=list)
-    name: Annotated[Optional[str], Field(title="Name")] = None
-    example_id: Annotated[str, Field(title="Example Id")]
-    example_index: Annotated[Optional[int], Field(title="Example Index")] = None
+    duration: Annotated[Optional[int], Field(title="Duration")] = None
+    status_code: Annotated[Optional[int], Field(title="Status Code")] = None
+    status_message: Annotated[Optional[str], Field(title="Status Message")] = None
+    events: Annotated[Optional[List[Dict[str, Any]]], Field(title="Events")] = None
+    links: Annotated[Optional[List[Dict[str, Any]]], Field(title="Links")] = None
+class OtelSpanListItemScores(BaseModel):
+    success: Annotated[bool, Field(title="Success")]
+    score: Annotated[float, Field(title="Score")]
+    reason: Annotated[Optional[str], Field(title="Reason")] = None
+    name: Annotated[str, Field(title="Name")]
+class OtelSpanDetailScores(BaseModel):
+    success: Annotated[bool, Field(title="Success")]
+    score: Annotated[float, Field(title="Score")]
+    reason: Annotated[Optional[str], Field(title="Reason")] = None
+    name: Annotated[str, Field(title="Name")]
+    example_id: Annotated[Optional[str], Field(title="Example Id")] = None
+class ExampleEvaluationRun(BaseModel):
+    id: Annotated[Optional[str], Field(title="Id")] = None
+    project_name: Annotated[str, Field(title="Project Name")]
+    eval_name: Annotated[str, Field(title="Eval Name")]
+    custom_scorers: Annotated[
+        Optional[List[BaseScorer]], Field(title="Custom Scorers")
+    ] = []
+    judgment_scorers: Annotated[
+        Optional[List[ScorerConfig]], Field(title="Judgment Scorers")
+    ] = []
     created_at: Annotated[Optional[str], Field(title="Created At")] = None
-    trace_id: Annotated[Optional[str], Field(title="Trace Id")] = None
+    examples: Annotated[List[Example], Field(title="Examples")]
     trace_span_id: Annotated[Optional[str], Field(title="Trace Span Id")] = None
-    dataset_id: Annotated[Optional[str], Field(title="Dataset Id")] = None
+    trace_id: Annotated[Optional[str], Field(title="Trace Id")] = None
-class TraceJudgmentType(BaseModel):
-    trace_id: Annotated[str, Field(title="Trace Id")]
+class HTTPValidationError(BaseModel):
+    detail: Annotated[Optional[List[ValidationError]], Field(title="Detail")] = None
+class TraceEvaluationRun(BaseModel):
+    id: Annotated[Optional[str], Field(title="Id")] = None
+    project_name: Annotated[str, Field(title="Project Name")]
+    eval_name: Annotated[str, Field(title="Eval Name")]
+    custom_scorers: Annotated[
+        Optional[List[BaseScorer]], Field(title="Custom Scorers")
+    ] = []
+    judgment_scorers: Annotated[
+        Optional[List[ScorerConfig]], Field(title="Judgment Scorers")
+    ] = []
+    created_at: Annotated[Optional[str], Field(title="Created At")] = None
+    trace_and_span_ids: Annotated[
+        List[TraceAndSpanId], Field(title="Trace And Span Ids")
+    ]
+    is_offline: Annotated[Optional[bool], Field(title="Is Offline")] = False
+    is_bucket_run: Annotated[Optional[bool], Field(title="Is Bucket Run")] = False
+class DatasetInsertExamples(BaseModel):
+    dataset_name: Annotated[str, Field(title="Dataset Name")]
+    examples: Annotated[List[Example], Field(title="Examples")]
+    project_name: Annotated[str, Field(title="Project Name")]
+class DatasetInfo(BaseModel):
+    dataset_id: Annotated[str, Field(title="Dataset Id")]
     name: Annotated[str, Field(title="Name")]
     created_at: Annotated[str, Field(title="Created At")]
-    duration: Annotated[float, Field(title="Duration")]
-    trace_spans: Annotated[List[TraceSpanJudgmentType], Field(title="Trace Spans")]
-    overwrite: Annotated[Optional[bool], Field(title="Overwrite")] = False
-    offline_mode: Annotated[Optional[bool], Field(title="Offline Mode")] = False
-    rules: Annotated[Optional[Dict[str, Any]], Field(title="Rules")] = Field(
-        default_factory=dict
-    )
-    has_notification: Annotated[Optional[bool], Field(title="Has Notification")] = False
-    customer_id: Annotated[Optional[str], Field(title="Customer Id")] = None
-    tags: Annotated[Optional[List[str]], Field(title="Tags")] = Field(
-        default_factory=list
-    )
-    metadata: Annotated[Optional[Dict[str, Any]], Field(title="Metadata")] = Field(
-        default_factory=dict
-    )
-    update_id: Annotated[Optional[int], Field(title="Update Id")] = 1
+    kind: DatasetKind
+    entries: Annotated[int, Field(title="Entries")]
+    creator: Annotated[str, Field(title="Creator")]
+class DatasetCreate(BaseModel):
+    name: Annotated[str, Field(title="Name")]
+    dataset_kind: DatasetKind
+    project_name: Annotated[str, Field(title="Project Name")]
+    examples: Annotated[List[Example], Field(title="Examples")]
+    overwrite: Annotated[bool, Field(title="Overwrite")]
-class ScoringResultJudgmentType(BaseModel):
+class SavePromptScorerResponse(BaseModel):
+    scorer_response: PromptScorer
+class FetchPromptScorersResponse(BaseModel):
+    scorers: Annotated[List[PromptScorer], Field(title="Scorers")]
+class PromptFetchResponse(BaseModel):
+    commit: Optional[PromptCommitInfo] = None
+class PromptVersionsResponse(BaseModel):
+    versions: Annotated[List[PromptCommitInfo], Field(title="Versions")]
+class ScoringResult(BaseModel):
     success: Annotated[bool, Field(title="Success")]
-    scorers_data: Annotated[
-        Optional[List[ScorerDataJudgmentType]], Field(title="Scorers Data")
-    ]
+    scorers_data: Annotated[List[ScorerData], Field(title="Scorers Data")]
     name: Annotated[Optional[str], Field(title="Name")] = None
     data_object: Annotated[
-        Optional[Union[TraceSpanJudgmentType, ExampleJudgmentType]],
-        Field(title="Data Object"),
+        Optional[Union[OtelTraceSpan, Example]], Field(title="Data Object")
     ] = None
     trace_id: Annotated[Optional[str], Field(title="Trace Id")] = None
     run_duration: Annotated[Optional[float], Field(title="Run Duration")] = None
     evaluation_cost: Annotated[Optional[float], Field(title="Evaluation Cost")] = None
-class TraceRunJudgmentType(BaseModel):
-    project_name: Annotated[Optional[str], Field(title="Project Name")] = None
-    eval_name: Annotated[Optional[str], Field(title="Eval Name")] = None
-    traces: Annotated[List[TraceJudgmentType], Field(title="Traces")]
-    scorers: Annotated[List[ScorerConfigJudgmentType], Field(title="Scorers")]
-    model: Annotated[str, Field(title="Model")]
-    append: Annotated[Optional[bool], Field(title="Append")] = False
-    override: Annotated[Optional[bool], Field(title="Override")] = False
-    trace_span_id: Annotated[Optional[str], Field(title="Trace Span Id")] = None
-    tools: Annotated[Optional[List[Dict[str, Any]]], Field(title="Tools")] = None
+class OtelTraceListItem(BaseModel):
+    organization_id: Annotated[str, Field(title="Organization Id")]
+    project_id: Annotated[str, Field(title="Project Id")]
+    trace_id: Annotated[str, Field(title="Trace Id")]
+    created_at: Annotated[AwareDatetime, Field(title="Created At")]
+    duration: Annotated[Optional[int], Field(title="Duration")] = None
+    tags: Annotated[Optional[List[str]], Field(title="Tags")] = None
+    experiment_run_id: Annotated[Optional[str], Field(title="Experiment Run Id")] = None
+    span_name: Annotated[Optional[str], Field(title="Span Name")] = None
+    llm_cost: Annotated[Optional[float], Field(title="Llm Cost")] = None
+    error: Annotated[Optional[str], Field(title="Error")] = ""
+    scores: Annotated[
+        Optional[List[OtelSpanListItemScores]], Field(title="Scores")
+    ] = []
+    rules_invoked: Annotated[Optional[List[str]], Field(title="Rules Invoked")] = []
+    customer_id: Annotated[Optional[str], Field(title="Customer Id")] = None
+    input: Annotated[Optional[str], Field(title="Input")] = None
+    output: Annotated[Optional[str], Field(title="Output")] = None
+    input_preview: Annotated[Optional[str], Field(title="Input Preview")] = None
+    output_preview: Annotated[Optional[str], Field(title="Output Preview")] = None
+    annotation_count: Annotated[Optional[int], Field(title="Annotation Count")] = 0
+    span_id: Annotated[str, Field(title="Span Id")]
+    rule_id: Annotated[Optional[str], Field(title="Rule Id")] = None
-class JudgmentEvalJudgmentType(BaseModel):
-    project_name: Annotated[Optional[str], Field(title="Project Name")] = None
-    eval_name: Annotated[Optional[str], Field(title="Eval Name")] = None
-    examples: Annotated[List[ExampleJudgmentType], Field(title="Examples")]
-    scorers: Annotated[List[ScorerConfigJudgmentType], Field(title="Scorers")]
-    model: Annotated[str, Field(title="Model")]
-    append: Annotated[Optional[bool], Field(title="Append")] = False
-    override: Annotated[Optional[bool], Field(title="Override")] = False
-    trace_span_id: Annotated[Optional[str], Field(title="Trace Span Id")] = None
+class OtelSpanDetail(BaseModel):
+    organization_id: Annotated[str, Field(title="Organization Id")]
+    project_id: Annotated[str, Field(title="Project Id")]
+    timestamp: Annotated[AwareDatetime, Field(title="Timestamp")]
+    trace_id: Annotated[str, Field(title="Trace Id")]
+    span_id: Annotated[str, Field(title="Span Id")]
+    parent_span_id: Annotated[Optional[str], Field(title="Parent Span Id")] = None
+    trace_state: Annotated[Optional[str], Field(title="Trace State")] = None
+    span_name: Annotated[Optional[str], Field(title="Span Name")] = None
+    span_kind: Annotated[Optional[str], Field(title="Span Kind")] = None
+    service_name: Annotated[Optional[str], Field(title="Service Name")] = None
+    resource_attributes: Annotated[
+        Optional[Dict[str, Any]], Field(title="Resource Attributes")
+    ] = None
+    span_attributes: Annotated[
+        Optional[Dict[str, Any]], Field(title="Span Attributes")
+    ] = None
+    duration: Annotated[Optional[int], Field(title="Duration")] = None
+    status_code: Annotated[Optional[int], Field(title="Status Code")] = None
+    status_message: Annotated[Optional[str], Field(title="Status Message")] = None
+    events: Annotated[Optional[List[Dict[str, Any]]], Field(title="Events")] = None
+    links: Annotated[
+        Optional[Union[List[Dict[str, Any]], Dict[str, Any]]], Field(title="Links")
+    ] = None
+    llm_cost: Annotated[Optional[float], Field(title="Llm Cost")] = None
+    prompt_tokens: Annotated[Optional[int], Field(title="Prompt Tokens")] = None
+    completion_tokens: Annotated[Optional[int], Field(title="Completion Tokens")] = None
+    scores: Annotated[Optional[List[OtelSpanDetailScores]], Field(title="Scores")] = (
+        None
+    )
-class EvalResultsJudgmentType(BaseModel):
-    results: Annotated[List[ScoringResultJudgmentType], Field(title="Results")]
-    run: Annotated[
-        Union[TraceRunJudgmentType, JudgmentEvalJudgmentType], Field(title="Run")
-    ]
+class EvaluateResponse(BaseModel):
+    status: Annotated[str, Field(title="Status")]
+    results: Annotated[List[ScoringResult], Field(title="Results")]
+    resource_usage: Optional[UsageInfo] = None
+class EvalResults(BaseModel):
+    results: Annotated[List[ScoringResult], Field(title="Results")]
+    run: Annotated[Union[ExampleEvaluationRun, TraceEvaluationRun], Field(title="Run")]
+class DatasetTraceWithSpans(BaseModel):
+    dataset_id: Annotated[str, Field(title="Dataset Id")]
+    trace_detail: OtelTraceListItem
+    spans: Annotated[List[OtelSpanDetail], Field(title="Spans")]
+class DatasetReturn(BaseModel):
+    name: Annotated[str, Field(title="Name")]
+    project_name: Annotated[str, Field(title="Project Name")]
+    dataset_kind: DatasetKind
+    examples: Annotated[Optional[List[Example]], Field(title="Examples")] = None
+    traces: Annotated[Optional[List[DatasetTraceWithSpans]], Field(title="Traces")] = (
+        None
+    )

judgeval 0.1.0__py3-none-any.whl → 0.23.0__py3-none-any.whl

judgeval 0.1.0py3-none-any.whl → 0.23.0py3-none-any.whl