PyPI - arize-phoenix - Versions diffs - 11.23.1__py3-none-any.whl → 12.28.1__py3-none-any.whl - Mend

arize-phoenix 11.23.1py3-none-any.whl → 12.28.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (221) hide show

{arize_phoenix-11.23.1.dist-info → arize_phoenix-12.28.1.dist-info}/METADATA +61 -36
{arize_phoenix-11.23.1.dist-info → arize_phoenix-12.28.1.dist-info}/RECORD +212 -162
{arize_phoenix-11.23.1.dist-info → arize_phoenix-12.28.1.dist-info}/WHEEL +1 -1
{arize_phoenix-11.23.1.dist-info → arize_phoenix-12.28.1.dist-info}/licenses/IP_NOTICE +1 -1
phoenix/__generated__/__init__.py +0 -0
phoenix/__generated__/classification_evaluator_configs/__init__.py +20 -0
phoenix/__generated__/classification_evaluator_configs/_document_relevance_classification_evaluator_config.py +17 -0
phoenix/__generated__/classification_evaluator_configs/_hallucination_classification_evaluator_config.py +17 -0
phoenix/__generated__/classification_evaluator_configs/_models.py +18 -0
phoenix/__generated__/classification_evaluator_configs/_tool_selection_classification_evaluator_config.py +17 -0
phoenix/__init__.py +2 -1
phoenix/auth.py +27 -2
phoenix/config.py +1594 -81
phoenix/db/README.md +546 -28
phoenix/db/bulk_inserter.py +119 -116
phoenix/db/engines.py +140 -33
phoenix/db/facilitator.py +22 -1
phoenix/db/helpers.py +818 -65
phoenix/db/iam_auth.py +64 -0
phoenix/db/insertion/dataset.py +133 -1
phoenix/db/insertion/document_annotation.py +9 -6
phoenix/db/insertion/evaluation.py +2 -3
phoenix/db/insertion/helpers.py +2 -2
phoenix/db/insertion/session_annotation.py +176 -0
phoenix/db/insertion/span_annotation.py +3 -4
phoenix/db/insertion/trace_annotation.py +3 -4
phoenix/db/insertion/types.py +41 -18
phoenix/db/migrations/versions/01a8342c9cdf_add_user_id_on_datasets.py +40 -0
phoenix/db/migrations/versions/0df286449799_add_session_annotations_table.py +105 -0
phoenix/db/migrations/versions/272b66ff50f8_drop_single_indices.py +119 -0
phoenix/db/migrations/versions/58228d933c91_dataset_labels.py +67 -0
phoenix/db/migrations/versions/699f655af132_experiment_tags.py +57 -0
phoenix/db/migrations/versions/735d3d93c33e_add_composite_indices.py +41 -0
phoenix/db/migrations/versions/ab513d89518b_add_user_id_on_dataset_versions.py +40 -0
phoenix/db/migrations/versions/d0690a79ea51_users_on_experiments.py +40 -0
phoenix/db/migrations/versions/deb2c81c0bb2_dataset_splits.py +139 -0
phoenix/db/migrations/versions/e76cbd66ffc3_add_experiments_dataset_examples.py +87 -0
phoenix/db/models.py +364 -56
phoenix/db/pg_config.py +10 -0
phoenix/db/types/trace_retention.py +7 -6
phoenix/experiments/functions.py +69 -19
phoenix/inferences/inferences.py +1 -2
phoenix/server/api/auth.py +9 -0
phoenix/server/api/auth_messages.py +46 -0
phoenix/server/api/context.py +60 -0
phoenix/server/api/dataloaders/__init__.py +36 -0
phoenix/server/api/dataloaders/annotation_summaries.py +60 -8
phoenix/server/api/dataloaders/average_experiment_repeated_run_group_latency.py +50 -0
phoenix/server/api/dataloaders/average_experiment_run_latency.py +17 -24
phoenix/server/api/dataloaders/cache/two_tier_cache.py +1 -2
phoenix/server/api/dataloaders/dataset_dataset_splits.py +52 -0
phoenix/server/api/dataloaders/dataset_example_revisions.py +0 -1
phoenix/server/api/dataloaders/dataset_example_splits.py +40 -0
phoenix/server/api/dataloaders/dataset_examples_and_versions_by_experiment_run.py +47 -0
phoenix/server/api/dataloaders/dataset_labels.py +36 -0
phoenix/server/api/dataloaders/document_evaluation_summaries.py +2 -2
phoenix/server/api/dataloaders/document_evaluations.py +6 -9
phoenix/server/api/dataloaders/experiment_annotation_summaries.py +88 -34
phoenix/server/api/dataloaders/experiment_dataset_splits.py +43 -0
phoenix/server/api/dataloaders/experiment_error_rates.py +21 -28
phoenix/server/api/dataloaders/experiment_repeated_run_group_annotation_summaries.py +77 -0
phoenix/server/api/dataloaders/experiment_repeated_run_groups.py +57 -0
phoenix/server/api/dataloaders/experiment_runs_by_experiment_and_example.py +44 -0
phoenix/server/api/dataloaders/latency_ms_quantile.py +40 -8
phoenix/server/api/dataloaders/record_counts.py +37 -10
phoenix/server/api/dataloaders/session_annotations_by_session.py +29 -0
phoenix/server/api/dataloaders/span_cost_summary_by_experiment_repeated_run_group.py +64 -0
phoenix/server/api/dataloaders/span_cost_summary_by_project.py +28 -14
phoenix/server/api/dataloaders/span_costs.py +3 -9
phoenix/server/api/dataloaders/table_fields.py +2 -2
phoenix/server/api/dataloaders/token_prices_by_model.py +30 -0
phoenix/server/api/dataloaders/trace_annotations_by_trace.py +27 -0
phoenix/server/api/exceptions.py +5 -1
phoenix/server/api/helpers/playground_clients.py +263 -83
phoenix/server/api/helpers/playground_spans.py +2 -1
phoenix/server/api/helpers/playground_users.py +26 -0
phoenix/server/api/helpers/prompts/conversions/google.py +103 -0
phoenix/server/api/helpers/prompts/models.py +61 -19
phoenix/server/api/input_types/{SpanAnnotationFilter.py → AnnotationFilter.py} +22 -14
phoenix/server/api/input_types/ChatCompletionInput.py +3 -0
phoenix/server/api/input_types/CreateProjectSessionAnnotationInput.py +37 -0
phoenix/server/api/input_types/DatasetFilter.py +5 -2
phoenix/server/api/input_types/ExperimentRunSort.py +237 -0
phoenix/server/api/input_types/GenerativeModelInput.py +3 -0
phoenix/server/api/input_types/ProjectSessionSort.py +158 -1
phoenix/server/api/input_types/PromptVersionInput.py +47 -1
phoenix/server/api/input_types/SpanSort.py +3 -2
phoenix/server/api/input_types/UpdateAnnotationInput.py +34 -0
phoenix/server/api/input_types/UserRoleInput.py +1 -0
phoenix/server/api/mutations/__init__.py +8 -0
phoenix/server/api/mutations/annotation_config_mutations.py +8 -8
phoenix/server/api/mutations/api_key_mutations.py +15 -20
phoenix/server/api/mutations/chat_mutations.py +106 -37
phoenix/server/api/mutations/dataset_label_mutations.py +243 -0
phoenix/server/api/mutations/dataset_mutations.py +21 -16
phoenix/server/api/mutations/dataset_split_mutations.py +351 -0
phoenix/server/api/mutations/experiment_mutations.py +2 -2
phoenix/server/api/mutations/export_events_mutations.py +3 -3
phoenix/server/api/mutations/model_mutations.py +11 -9
phoenix/server/api/mutations/project_mutations.py +4 -4
phoenix/server/api/mutations/project_session_annotations_mutations.py +158 -0
phoenix/server/api/mutations/project_trace_retention_policy_mutations.py +8 -4
phoenix/server/api/mutations/prompt_label_mutations.py +74 -65
phoenix/server/api/mutations/prompt_mutations.py +65 -129
phoenix/server/api/mutations/prompt_version_tag_mutations.py +11 -8
phoenix/server/api/mutations/span_annotations_mutations.py +15 -10
phoenix/server/api/mutations/trace_annotations_mutations.py +13 -8
phoenix/server/api/mutations/trace_mutations.py +3 -3
phoenix/server/api/mutations/user_mutations.py +55 -26
phoenix/server/api/queries.py +501 -617
phoenix/server/api/routers/__init__.py +2 -2
phoenix/server/api/routers/auth.py +141 -87
phoenix/server/api/routers/ldap.py +229 -0
phoenix/server/api/routers/oauth2.py +349 -101
phoenix/server/api/routers/v1/__init__.py +22 -4
phoenix/server/api/routers/v1/annotation_configs.py +19 -30
phoenix/server/api/routers/v1/annotations.py +455 -13
phoenix/server/api/routers/v1/datasets.py +355 -68
phoenix/server/api/routers/v1/documents.py +142 -0
phoenix/server/api/routers/v1/evaluations.py +20 -28
phoenix/server/api/routers/v1/experiment_evaluations.py +16 -6
phoenix/server/api/routers/v1/experiment_runs.py +335 -59
phoenix/server/api/routers/v1/experiments.py +475 -47
phoenix/server/api/routers/v1/projects.py +16 -50
phoenix/server/api/routers/v1/prompts.py +50 -39
phoenix/server/api/routers/v1/sessions.py +108 -0
phoenix/server/api/routers/v1/spans.py +156 -96
phoenix/server/api/routers/v1/traces.py +51 -77
phoenix/server/api/routers/v1/users.py +64 -24
phoenix/server/api/routers/v1/utils.py +3 -7
phoenix/server/api/subscriptions.py +257 -93
phoenix/server/api/types/Annotation.py +90 -23
phoenix/server/api/types/ApiKey.py +13 -17
phoenix/server/api/types/AuthMethod.py +1 -0
phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +1 -0
phoenix/server/api/types/Dataset.py +199 -72
phoenix/server/api/types/DatasetExample.py +88 -18
phoenix/server/api/types/DatasetExperimentAnnotationSummary.py +10 -0
phoenix/server/api/types/DatasetLabel.py +57 -0
phoenix/server/api/types/DatasetSplit.py +98 -0
phoenix/server/api/types/DatasetVersion.py +49 -4
phoenix/server/api/types/DocumentAnnotation.py +212 -0
phoenix/server/api/types/Experiment.py +215 -68
phoenix/server/api/types/ExperimentComparison.py +3 -9
phoenix/server/api/types/ExperimentRepeatedRunGroup.py +155 -0
phoenix/server/api/types/ExperimentRepeatedRunGroupAnnotationSummary.py +9 -0
phoenix/server/api/types/ExperimentRun.py +120 -70
phoenix/server/api/types/ExperimentRunAnnotation.py +158 -39
phoenix/server/api/types/GenerativeModel.py +95 -42
phoenix/server/api/types/GenerativeProvider.py +1 -1
phoenix/server/api/types/ModelInterface.py +7 -2
phoenix/server/api/types/PlaygroundModel.py +12 -2
phoenix/server/api/types/Project.py +218 -185
phoenix/server/api/types/ProjectSession.py +146 -29
phoenix/server/api/types/ProjectSessionAnnotation.py +187 -0
phoenix/server/api/types/ProjectTraceRetentionPolicy.py +1 -1
phoenix/server/api/types/Prompt.py +119 -39
phoenix/server/api/types/PromptLabel.py +42 -25
phoenix/server/api/types/PromptVersion.py +11 -8
phoenix/server/api/types/PromptVersionTag.py +65 -25
phoenix/server/api/types/Span.py +130 -123
phoenix/server/api/types/SpanAnnotation.py +189 -42
phoenix/server/api/types/SystemApiKey.py +65 -1
phoenix/server/api/types/Trace.py +184 -53
phoenix/server/api/types/TraceAnnotation.py +149 -50
phoenix/server/api/types/User.py +128 -33
phoenix/server/api/types/UserApiKey.py +73 -26
phoenix/server/api/types/node.py +10 -0
phoenix/server/api/types/pagination.py +11 -2
phoenix/server/app.py +154 -36
phoenix/server/authorization.py +5 -4
phoenix/server/bearer_auth.py +13 -5
phoenix/server/cost_tracking/cost_model_lookup.py +42 -14
phoenix/server/cost_tracking/model_cost_manifest.json +1085 -194
phoenix/server/daemons/generative_model_store.py +61 -9
phoenix/server/daemons/span_cost_calculator.py +10 -8
phoenix/server/dml_event.py +13 -0
phoenix/server/email/sender.py +29 -2
phoenix/server/grpc_server.py +9 -9
phoenix/server/jwt_store.py +8 -6
phoenix/server/ldap.py +1449 -0
phoenix/server/main.py +9 -3
phoenix/server/oauth2.py +330 -12
phoenix/server/prometheus.py +43 -6
phoenix/server/rate_limiters.py +4 -9
phoenix/server/retention.py +33 -20
phoenix/server/session_filters.py +49 -0
phoenix/server/static/.vite/manifest.json +51 -53
phoenix/server/static/assets/components-BreFUQQa.js +6702 -0
phoenix/server/static/assets/{index-BPCwGQr8.js → index-CTQoemZv.js} +42 -35
phoenix/server/static/assets/pages-DBE5iYM3.js +9524 -0
phoenix/server/static/assets/vendor-BGzfc4EU.css +1 -0
phoenix/server/static/assets/vendor-DCE4v-Ot.js +920 -0
phoenix/server/static/assets/vendor-codemirror-D5f205eT.js +25 -0
phoenix/server/static/assets/{vendor-recharts-Bw30oz1A.js → vendor-recharts-V9cwpXsm.js} +7 -7
phoenix/server/static/assets/{vendor-shiki-DZajAPeq.js → vendor-shiki-Do--csgv.js} +1 -1
phoenix/server/static/assets/vendor-three-CmB8bl_y.js +3840 -0
phoenix/server/templates/index.html +7 -1
phoenix/server/thread_server.py +1 -2
phoenix/server/utils.py +74 -0
phoenix/session/client.py +55 -1
phoenix/session/data_extractor.py +5 -0
phoenix/session/evaluation.py +8 -4
phoenix/session/session.py +44 -8
phoenix/settings.py +2 -0
phoenix/trace/attributes.py +80 -13
phoenix/trace/dsl/query.py +2 -0
phoenix/trace/projects.py +5 -0
phoenix/utilities/template_formatters.py +1 -1
phoenix/version.py +1 -1
phoenix/server/api/types/Evaluation.py +0 -39
phoenix/server/static/assets/components-D0DWAf0l.js +0 -5650
phoenix/server/static/assets/pages-Creyamao.js +0 -8612
phoenix/server/static/assets/vendor-CU36oj8y.js +0 -905
phoenix/server/static/assets/vendor-CqDb5u4o.css +0 -1
phoenix/server/static/assets/vendor-arizeai-Ctgw0e1G.js +0 -168
phoenix/server/static/assets/vendor-codemirror-Cojjzqb9.js +0 -25
phoenix/server/static/assets/vendor-three-BLWp5bic.js +0 -2998
phoenix/utilities/deprecation.py +0 -31
{arize_phoenix-11.23.1.dist-info → arize_phoenix-12.28.1.dist-info}/entry_points.txt +0 -0
{arize_phoenix-11.23.1.dist-info → arize_phoenix-12.28.1.dist-info}/licenses/LICENSE +0 -0

phoenix/server/api/routers/v1/experiments.py CHANGED Viewed

@@ -4,26 +4,38 @@ from random import getrandbits
 from typing import Any, Optional
 import pandas as pd
-from fastapi import APIRouter, Depends, HTTPException, Path, Response
+import sqlalchemy as sa
+from fastapi import APIRouter, Depends, HTTPException, Path, Query, Response
 from pydantic import Field
-from sqlalchemy import and_, func, select
+from sqlalchemy import and_, case, func, select
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.orm import joinedload
 from starlette.requests import Request
 from starlette.responses import PlainTextResponse
-from starlette.status import HTTP_200_OK, HTTP_404_NOT_FOUND, HTTP_422_UNPROCESSABLE_ENTITY
 from strawberry.relay import GlobalID
 from phoenix.db import models
-from phoenix.db.helpers import SupportedSQLDialect
+from phoenix.db.helpers import (
+    SupportedSQLDialect,
+    get_experiment_incomplete_runs_query,
+    insert_experiment_with_examples_snapshot,
+)
 from phoenix.db.insertion.helpers import insert_on_conflict
+from phoenix.server.api.routers.v1.datasets import DatasetExample
 from phoenix.server.api.types.node import from_global_id_with_expected_type
 from phoenix.server.authorization import is_not_locked
+from phoenix.server.bearer_auth import PhoenixUser
 from phoenix.server.dml_event import ExperimentInsertEvent
 from phoenix.server.experiments.utils import generate_experiment_project_name
+from .datasets import _resolve_split_identifiers
 from .models import V1RoutesBaseModel
-from .utils import ResponseBody, add_errors_to_responses, add_text_csv_content_to_responses
+from .utils import (
+    PaginatedResponseBody,
+    ResponseBody,
+    add_errors_to_responses,
+    add_text_csv_content_to_responses,
+)
 router = APIRouter(tags=["experiments"], include_in_schema=True)
@@ -46,13 +58,19 @@ class Experiment(V1RoutesBaseModel):
     dataset_version_id: str = Field(
         description="The ID of the dataset version associated with the experiment"
     )
-    repetitions: int = Field(description="Number of times the experiment is repeated")
+    repetitions: int = Field(description="Number of times the experiment is repeated", gt=0)
     metadata: dict[str, Any] = Field(description="Metadata of the experiment")
     project_name: Optional[str] = Field(
         description="The name of the project associated with the experiment"
     )
     created_at: datetime = Field(description="The creation timestamp of the experiment")
     updated_at: datetime = Field(description="The last update timestamp of the experiment")
+    example_count: int = Field(description="Number of examples in the experiment")
+    successful_run_count: int = Field(description="Number of successful runs in the experiment")
+    failed_run_count: int = Field(description="Number of failed runs in the experiment")
+    missing_run_count: int = Field(
+        description="Number of missing (not yet executed) runs in the experiment"
+    )
 class CreateExperimentRequestBody(V1RoutesBaseModel):
@@ -77,6 +95,10 @@ class CreateExperimentRequestBody(V1RoutesBaseModel):
             "(if omitted, the latest version will be used)"
         ),
     )
+    splits: Optional[list[str]] = Field(
+        default=None,
+        description="List of dataset split identifiers (GlobalIDs or names) to filter by",
+    )
     repetitions: int = Field(
         default=1, description="Number of times the experiment should be repeated for each example"
     )
@@ -92,7 +114,7 @@ class CreateExperimentResponseBody(ResponseBody[Experiment]):
     operation_id="createExperiment",
     summary="Create experiment on a dataset",
     responses=add_errors_to_responses(
-        [{"status_code": HTTP_404_NOT_FOUND, "description": "Dataset or DatasetVersion not found"}]
+        [{"status_code": 404, "description": "Dataset or DatasetVersion not found"}]
     ),
     response_description="Experiment retrieved successfully",
 )
@@ -101,26 +123,38 @@ async def create_experiment(
     request_body: CreateExperimentRequestBody,
     dataset_id: str = Path(..., title="Dataset ID"),
 ) -> CreateExperimentResponseBody:
-    dataset_globalid = GlobalID.from_id(dataset_id)
+    try:
+        dataset_globalid = GlobalID.from_id(dataset_id)
+    except Exception as e:
+        raise HTTPException(
+            detail=f"Invalid dataset ID format: {dataset_id}",
+            status_code=422,
+        ) from e
     try:
         dataset_rowid = from_global_id_with_expected_type(dataset_globalid, "Dataset")
     except ValueError:
         raise HTTPException(
             detail="Dataset with ID {dataset_globalid} does not exist",
-            status_code=HTTP_404_NOT_FOUND,
+            status_code=404,
         )
     dataset_version_globalid_str = request_body.version_id
     if dataset_version_globalid_str is not None:
         try:
             dataset_version_globalid = GlobalID.from_id(dataset_version_globalid_str)
+        except Exception as e:
+            raise HTTPException(
+                detail=f"Invalid dataset version ID format: {dataset_version_globalid_str}",
+                status_code=422,
+            ) from e
+        try:
             dataset_version_id = from_global_id_with_expected_type(
                 dataset_version_globalid, "DatasetVersion"
             )
         except ValueError:
             raise HTTPException(
                 detail=f"DatasetVersion with ID {dataset_version_globalid_str} does not exist",
-                status_code=HTTP_404_NOT_FOUND,
+                status_code=404,
             )
     async with request.app.state.db() as session:
@@ -130,7 +164,7 @@ async def create_experiment(
         if result is None:
             raise HTTPException(
                 detail=f"Dataset with ID {dataset_globalid} does not exist",
-                status_code=HTTP_404_NOT_FOUND,
+                status_code=404,
             )
         dataset_name = result.name
         if dataset_version_globalid_str is None:
@@ -143,7 +177,7 @@ async def create_experiment(
             if not dataset_version:
                 raise HTTPException(
                     detail=f"Dataset {dataset_globalid} does not have any versions",
-                    status_code=HTTP_404_NOT_FOUND,
+                    status_code=404,
                 )
             dataset_version_id = dataset_version.id
             dataset_version_globalid = GlobalID("DatasetVersion", str(dataset_version_id))
@@ -155,8 +189,11 @@ async def create_experiment(
             if not dataset_version:
                 raise HTTPException(
                     detail=f"DatasetVersion with ID {dataset_version_globalid} does not exist",
-                    status_code=HTTP_404_NOT_FOUND,
+                    status_code=404,
                 )
+        user_id: Optional[int] = None
+        if request.app.state.authentication_enabled and isinstance(request.user, PhoenixUser):
+            user_id = int(request.user.identity)
         # generate a semi-unique name for the experiment
         experiment_name = request_body.name or _generate_experiment_name(dataset_name)
@@ -172,9 +209,23 @@ async def create_experiment(
             repetitions=request_body.repetitions,
             metadata_=request_body.metadata or {},
             project_name=project_name,
+            user_id=user_id,
         )
-        session.add(experiment)
-        await session.flush()
+        if request_body.splits is not None:
+            # Resolve split identifiers (IDs or names) to IDs and names
+            resolved_split_ids, _ = await _resolve_split_identifiers(session, request_body.splits)
+            # Generate experiment dataset splits relation
+            # prior to the crosswalk table insert
+            # in insert_experiment_with_examples_snapshot
+            experiment.experiment_dataset_splits = [
+                models.ExperimentDatasetSplit(dataset_split_id=split_id)
+                for split_id in resolved_split_ids
+            ]
+        # crosswalk table assumes the relation is already present
+        await insert_experiment_with_examples_snapshot(session, experiment)
         dialect = SupportedSQLDialect(session.bind.dialect.name)
         project_rowid = await session.scalar(
@@ -197,6 +248,19 @@ async def create_experiment(
             dataset_version_globalid = GlobalID(
                 "DatasetVersion", str(experiment.dataset_version_id)
             )
+        # Optimization: We just created this experiment, so we know there are 0 runs.
+        # No need to query ExperimentRun table - just count the examples.
+        example_count = await session.scalar(
+            select(func.count())
+            .select_from(models.ExperimentDatasetExample)
+            .where(models.ExperimentDatasetExample.experiment_id == experiment.id)
+        )
+        # No runs exist yet for a newly created experiment
+        successful_run_count = 0
+        failed_run_count = 0
+        missing_run_count = (example_count or 0) * experiment.repetitions
     request.state.event_queue.put(ExperimentInsertEvent((experiment.id,)))
     return CreateExperimentResponseBody(
         data=Experiment(
@@ -208,6 +272,10 @@ async def create_experiment(
             project_name=experiment.project_name,
             created_at=experiment.created_at,
             updated_at=experiment.updated_at,
+            example_count=example_count or 0,
+            successful_run_count=successful_run_count or 0,
+            failed_run_count=failed_run_count or 0,
+            missing_run_count=missing_run_count,
         )
     )
@@ -221,18 +289,24 @@ class GetExperimentResponseBody(ResponseBody[Experiment]):
     operation_id="getExperiment",
     summary="Get experiment by ID",
     responses=add_errors_to_responses(
-        [{"status_code": HTTP_404_NOT_FOUND, "description": "Experiment not found"}]
+        [{"status_code": 404, "description": "Experiment not found"}]
     ),
     response_description="Experiment retrieved successfully",
 )
 async def get_experiment(request: Request, experiment_id: str) -> GetExperimentResponseBody:
-    experiment_globalid = GlobalID.from_id(experiment_id)
+    try:
+        experiment_globalid = GlobalID.from_id(experiment_id)
+    except Exception as e:
+        raise HTTPException(
+            detail=f"Invalid experiment ID format: {experiment_id}",
+            status_code=422,
+        ) from e
     try:
         experiment_rowid = from_global_id_with_expected_type(experiment_globalid, "Experiment")
     except ValueError:
         raise HTTPException(
             detail="Experiment with ID {experiment_globalid} does not exist",
-            status_code=HTTP_404_NOT_FOUND,
+            status_code=404,
         )
     async with request.app.state.db() as session:
@@ -243,11 +317,48 @@ async def get_experiment(request: Request, experiment_id: str) -> GetExperimentR
         if not experiment:
             raise HTTPException(
                 detail=f"Experiment with ID {experiment_globalid} does not exist",
-                status_code=HTTP_404_NOT_FOUND,
+                status_code=404,
             )
         dataset_globalid = GlobalID("Dataset", str(experiment.dataset_id))
         dataset_version_globalid = GlobalID("DatasetVersion", str(experiment.dataset_version_id))
+        # Get counts efficiently: use CASE to count successful and failed in single table scan
+        run_counts_subq = (
+            select(
+                func.sum(case((models.ExperimentRun.error.is_(None), 1), else_=0)).label(
+                    "successful_run_count"
+                ),
+                func.sum(case((models.ExperimentRun.error.is_not(None), 1), else_=0)).label(
+                    "failed_run_count"
+                ),
+            )
+            .select_from(models.ExperimentRun)
+            .where(models.ExperimentRun.experiment_id == experiment_rowid)
+            .subquery()
+        )
+        counts_result = await session.execute(
+            select(
+                select(func.count())
+                .select_from(models.ExperimentDatasetExample)
+                .where(models.ExperimentDatasetExample.experiment_id == experiment_rowid)
+                .scalar_subquery()
+                .label("example_count"),
+                run_counts_subq.c.successful_run_count,
+                run_counts_subq.c.failed_run_count,
+            ).select_from(run_counts_subq)
+        )
+        counts = counts_result.one()
+        example_count = counts.example_count
+        successful_run_count = counts.successful_run_count
+        failed_run_count = counts.failed_run_count
+        # Calculate missing runs (no database query needed)
+        total_expected_runs = (example_count or 0) * experiment.repetitions
+        missing_run_count = (
+            total_expected_runs - (successful_run_count or 0) - (failed_run_count or 0)
+        )
     return GetExperimentResponseBody(
         data=Experiment(
             id=str(experiment_globalid),
@@ -258,31 +369,246 @@ async def get_experiment(request: Request, experiment_id: str) -> GetExperimentR
             project_name=experiment.project_name,
             created_at=experiment.created_at,
             updated_at=experiment.updated_at,
+            example_count=example_count or 0,
+            successful_run_count=successful_run_count or 0,
+            failed_run_count=failed_run_count or 0,
+            missing_run_count=missing_run_count,
+        )
+    )
+@router.delete(
+    "/experiments/{experiment_id}",
+    operation_id="deleteExperiment",
+    summary="Delete experiment by ID",
+    responses=add_errors_to_responses(
+        [{"status_code": 404, "description": "Experiment not found"}]
+    ),
+    response_description="Experiment deleted successfully",
+    status_code=204,
+)
+async def delete_experiment(
+    request: Request,
+    experiment_id: str,
+) -> None:
+    try:
+        experiment_globalid = GlobalID.from_id(experiment_id)
+    except Exception as e:
+        raise HTTPException(
+            detail=f"Invalid experiment ID format: {experiment_id}",
+            status_code=422,
+        ) from e
+    try:
+        experiment_rowid = from_global_id_with_expected_type(experiment_globalid, "Experiment")
+    except ValueError:
+        raise HTTPException(
+            detail=f"Experiment with ID {experiment_globalid} does not exist",
+            status_code=404,
         )
+    stmt = (
+        sa.delete(models.Experiment)
+        .where(models.Experiment.id == experiment_rowid)
+        .returning(models.Experiment.id)
     )
+    async with request.app.state.db() as session:
+        if (await session.scalar(stmt)) is None:
+            raise HTTPException(detail="Experiment does not exist", status_code=404)
-class ListExperimentsResponseBody(ResponseBody[list[Experiment]]):
+class ListExperimentsResponseBody(PaginatedResponseBody[Experiment]):
     pass
+class IncompleteExperimentRun(V1RoutesBaseModel):
+    """
+    Information about incomplete runs for a dataset example
+    """
+    dataset_example: DatasetExample = Field(description="The dataset example")
+    repetition_numbers: list[int] = Field(
+        description="List of repetition numbers that need to be run"
+    )
+class GetIncompleteExperimentRunsResponseBody(PaginatedResponseBody[IncompleteExperimentRun]):
+    pass
+@router.get(
+    "/experiments/{experiment_id}/incomplete-runs",
+    operation_id="getIncompleteExperimentRuns",
+    summary="Get incomplete runs for an experiment",
+    responses=add_errors_to_responses(
+        [
+            {"status_code": 404, "description": "Experiment not found"},
+            {"status_code": 422, "description": "Invalid cursor format"},
+        ]
+    ),
+    response_description="Incomplete runs retrieved successfully",
+)
+async def get_incomplete_runs(
+    request: Request,
+    experiment_id: str,
+    cursor: Optional[str] = Query(default=None, description="Cursor for pagination"),
+    limit: int = Query(
+        default=50, description="Maximum number of examples with incomplete runs to return", gt=0
+    ),
+) -> GetIncompleteExperimentRunsResponseBody:
+    """
+    Get runs that need to be completed for this experiment.
+    Returns all incomplete runs, including both missing runs (not yet attempted)
+    and failed runs (attempted but have errors).
+    Args:
+        experiment_id: The ID of the experiment
+        cursor: Cursor for pagination
+        limit: Maximum number of results to return
+    Returns:
+        Paginated list of incomplete runs grouped by dataset example,
+        with repetition numbers that need to be run
+    """
+    try:
+        experiment_globalid = GlobalID.from_id(experiment_id)
+    except Exception as e:
+        raise HTTPException(
+            detail=f"Invalid experiment ID format: {experiment_id}",
+            status_code=422,
+        ) from e
+    try:
+        id_ = from_global_id_with_expected_type(experiment_globalid, "Experiment")
+    except ValueError:
+        raise HTTPException(
+            detail=f"Experiment with ID {experiment_globalid} does not exist",
+            status_code=404,
+        )
+    # Parse cursor if provided
+    cursor_example_rowid: Optional[int] = None
+    if cursor:
+        try:
+            cursor_gid = GlobalID.from_id(cursor)
+            cursor_example_rowid = from_global_id_with_expected_type(cursor_gid, "DatasetExample")
+        except (ValueError, AttributeError):
+            raise HTTPException(
+                detail=f"Invalid cursor format: {cursor}",
+                status_code=422,
+            )
+    # Fetch experiment first (we need its repetitions count for the query)
+    async with request.app.state.db() as session:
+        experiment_result = await session.execute(select(models.Experiment).filter_by(id=id_))
+        experiment = experiment_result.scalar()
+        if not experiment:
+            raise HTTPException(
+                detail=f"Experiment with ID {experiment_globalid} does not exist",
+                status_code=404,
+            )
+        dialect = request.app.state.db.dialect
+        stmt = get_experiment_incomplete_runs_query(
+            experiment,
+            dialect,
+            cursor_example_rowid=cursor_example_rowid,
+            limit=limit,
+        )
+        result = await session.execute(stmt)
+        all_examples = result.all()
+        # Check if there's a next page
+        has_next_page = len(all_examples) > limit
+        if has_next_page:
+            # Remove the extra row
+            examples_to_process = all_examples[:limit]
+            # The cursor points to the FIRST item of the NEXT page
+            next_item_id = all_examples[limit][0].dataset_example_id
+            next_cursor = str(GlobalID("DatasetExample", str(next_item_id)))
+        else:
+            examples_to_process = all_examples
+            next_cursor = None
+        # Parse incomplete repetitions and build response
+        # Optimization: Precompute the "all repetitions" list for completely missing examples
+        # to avoid recomputing it for every missing example
+        all_repetitions = list(range(1, experiment.repetitions + 1))
+        incomplete_runs_list: list[IncompleteExperimentRun] = []
+        for revision, successful_count, incomplete_reps in examples_to_process:
+            example_id = revision.dataset_example_id
+            # Three regimes:
+            # 1. Completely missing (successful_count = 0): all repetitions are incomplete
+            # 2. Partially completed (0 < successful_count < R): parse from SQL result
+            # 3. Totally completed (successful_count = R): filtered out by SQL HAVING clause
+            if successful_count == 0:
+                # Regime 1: Completely missing - use precomputed list
+                incomplete = all_repetitions
+            else:
+                # Regime 2: Partially completed - parse incomplete reps from SQL
+                if dialect is SupportedSQLDialect.POSTGRESQL:
+                    # PostgreSQL returns array (list), filter out nulls
+                    incomplete = [r for r in incomplete_reps if r is not None]
+                else:
+                    # SQLite returns JSON string
+                    incomplete = [r for r in json.loads(incomplete_reps) if r is not None]
+            # Build response
+            example_globalid = GlobalID("DatasetExample", str(example_id))
+            incomplete_runs_list.append(
+                IncompleteExperimentRun(
+                    dataset_example=DatasetExample(
+                        id=str(example_globalid),
+                        input=revision.input,
+                        output=revision.output,
+                        metadata=revision.metadata_,
+                        updated_at=revision.created_at,
+                    ),
+                    repetition_numbers=sorted(incomplete),
+                )
+            )
+        return GetIncompleteExperimentRunsResponseBody(
+            data=incomplete_runs_list, next_cursor=next_cursor
+        )
 @router.get(
     "/datasets/{dataset_id}/experiments",
     operation_id="listExperiments",
     summary="List experiments by dataset",
-    response_description="Experiments retrieved successfully",
+    description="Retrieve a paginated list of experiments for the specified dataset.",
+    response_description="Paginated list of experiments for the dataset",
+    responses=add_errors_to_responses([422]),
 )
 async def list_experiments(
     request: Request,
     dataset_id: str = Path(..., title="Dataset ID"),
+    cursor: Optional[str] = Query(
+        default=None,
+        description="Cursor for pagination (base64-encoded experiment ID)",
+    ),
+    limit: int = Query(
+        default=50, description="The max number of experiments to return at a time.", gt=0
+    ),
 ) -> ListExperimentsResponseBody:
-    dataset_gid = GlobalID.from_id(dataset_id)
+    try:
+        dataset_gid = GlobalID.from_id(dataset_id)
+    except Exception as e:
+        raise HTTPException(
+            detail=f"Invalid dataset ID format: {dataset_id}",
+            status_code=422,
+        ) from e
     try:
         dataset_rowid = from_global_id_with_expected_type(dataset_gid, "Dataset")
     except ValueError:
         raise HTTPException(
             detail=f"Dataset with ID {dataset_gid} does not exist",
-            status_code=HTTP_404_NOT_FOUND,
+            status_code=404,
         )
     async with request.app.state.db() as session:
         query = (
@@ -291,29 +617,119 @@ async def list_experiments(
             .order_by(models.Experiment.id.desc())
         )
+        # Handle cursor for pagination
+        if cursor:
+            try:
+                cursor_gid = GlobalID.from_id(cursor)
+                cursor_rowid = from_global_id_with_expected_type(cursor_gid, "Experiment")
+                query = query.where(models.Experiment.id <= cursor_rowid)
+            except (ValueError, Exception):
+                raise HTTPException(
+                    detail=f"Invalid cursor format: {cursor}",
+                    status_code=422,
+                )
+        # Overfetch by 1 to determine if there's a next page
+        query = query.limit(limit + 1)
         result = await session.execute(query)
         experiments = result.scalars().all()
         if not experiments:
-            return ListExperimentsResponseBody(data=[])
-        data = [
-            Experiment(
-                id=str(GlobalID("Experiment", str(experiment.id))),
-                dataset_id=str(GlobalID("Dataset", str(experiment.dataset_id))),
-                dataset_version_id=str(
-                    GlobalID("DatasetVersion", str(experiment.dataset_version_id))
+            return ListExperimentsResponseBody(data=[], next_cursor=None)
+        # Get example counts and successful run counts for all experiments in a single query
+        experiment_ids = [exp.id for exp in experiments]
+        # Create subqueries for counts
+        example_count_subq = (
+            select(
+                models.ExperimentDatasetExample.experiment_id, func.count().label("example_count")
+            )
+            .where(models.ExperimentDatasetExample.experiment_id.in_(experiment_ids))
+            .group_by(models.ExperimentDatasetExample.experiment_id)
+            .subquery()
+        )
+        # Optimize: Use CASE to count successful and failed in single table scan
+        run_counts_subq = (
+            select(
+                models.ExperimentRun.experiment_id,
+                func.sum(case((models.ExperimentRun.error.is_(None), 1), else_=0)).label(
+                    "successful_run_count"
+                ),
+                func.sum(case((models.ExperimentRun.error.is_not(None), 1), else_=0)).label(
+                    "failed_run_count"
                 ),
-                repetitions=experiment.repetitions,
-                metadata=experiment.metadata_,
-                project_name=experiment.project_name,
-                created_at=experiment.created_at,
-                updated_at=experiment.updated_at,
             )
-            for experiment in experiments
-        ]
+            .where(models.ExperimentRun.experiment_id.in_(experiment_ids))
+            .group_by(models.ExperimentRun.experiment_id)
+            .subquery()
+        )
-        return ListExperimentsResponseBody(data=data)
+        # Get all counts in one query using outer join
+        counts_result = await session.execute(
+            select(
+                func.coalesce(
+                    example_count_subq.c.experiment_id,
+                    run_counts_subq.c.experiment_id,
+                ).label("experiment_id"),
+                func.coalesce(example_count_subq.c.example_count, 0).label("example_count"),
+                func.coalesce(run_counts_subq.c.successful_run_count, 0).label(
+                    "successful_run_count"
+                ),
+                func.coalesce(run_counts_subq.c.failed_run_count, 0).label("failed_run_count"),
+            )
+            .select_from(example_count_subq)
+            .outerjoin(
+                run_counts_subq,
+                example_count_subq.c.experiment_id == run_counts_subq.c.experiment_id,
+            )
+        )
+        counts_by_experiment = {
+            row.experiment_id: (row.example_count, row.successful_run_count, row.failed_run_count)
+            for row in counts_result
+        }
+        # Handle pagination: check if we have a next page
+        next_cursor = None
+        if len(experiments) == limit + 1:
+            last_experiment = experiments[-1]
+            next_cursor = str(GlobalID("Experiment", str(last_experiment.id)))
+            experiments = experiments[:-1]  # Remove the extra overfetched experiment
+        data = []
+        for experiment in experiments:
+            counts = counts_by_experiment.get(experiment.id, (0, 0, 0))
+            example_count = counts[0]
+            successful_run_count = counts[1]
+            failed_run_count = counts[2]
+            # Calculate missing runs (no database query needed)
+            total_expected_runs = example_count * experiment.repetitions
+            missing_run_count = total_expected_runs - successful_run_count - failed_run_count
+            data.append(
+                Experiment(
+                    id=str(GlobalID("Experiment", str(experiment.id))),
+                    dataset_id=str(GlobalID("Dataset", str(experiment.dataset_id))),
+                    dataset_version_id=str(
+                        GlobalID("DatasetVersion", str(experiment.dataset_version_id))
+                    ),
+                    repetitions=experiment.repetitions,
+                    metadata=experiment.metadata_,
+                    project_name=experiment.project_name,
+                    created_at=experiment.created_at,
+                    updated_at=experiment.updated_at,
+                    example_count=example_count,
+                    successful_run_count=successful_run_count,
+                    failed_run_count=failed_run_count,
+                    missing_run_count=missing_run_count,
+                )
+            )
+        return ListExperimentsResponseBody(data=data, next_cursor=next_cursor)
 async def _get_experiment_runs_and_revisions(
@@ -321,7 +737,7 @@ async def _get_experiment_runs_and_revisions(
 ) -> tuple[models.Experiment, tuple[models.ExperimentRun], tuple[models.DatasetExampleRevision]]:
     experiment = await session.get(models.Experiment, experiment_rowid)
     if not experiment:
-        raise HTTPException(detail="Experiment not found", status_code=HTTP_404_NOT_FOUND)
+        raise HTTPException(detail="Experiment not found", status_code=404)
     revision_ids = (
         select(func.max(models.DatasetExampleRevision.id))
         .join(
@@ -370,7 +786,7 @@ async def _get_experiment_runs_and_revisions(
     if not runs_and_revisions:
         raise HTTPException(
             detail="Experiment has no runs",
-            status_code=HTTP_404_NOT_FOUND,
+            status_code=404,
         )
     runs, revisions = zip(*runs_and_revisions)
     return experiment, runs, revisions
@@ -383,7 +799,7 @@ async def _get_experiment_runs_and_revisions(
     response_class=PlainTextResponse,
     responses=add_errors_to_responses(
         [
-            {"status_code": HTTP_404_NOT_FOUND, "description": "Experiment not found"},
+            {"status_code": 404, "description": "Experiment not found"},
         ]
     ),
 )
@@ -391,13 +807,19 @@ async def get_experiment_json(
     request: Request,
     experiment_id: str = Path(..., title="Experiment ID"),
 ) -> Response:
-    experiment_globalid = GlobalID.from_id(experiment_id)
+    try:
+        experiment_globalid = GlobalID.from_id(experiment_id)
+    except Exception as e:
+        raise HTTPException(
+            detail=f"Invalid experiment ID format: {experiment_id}",
+            status_code=422,
+        ) from e
     try:
         experiment_rowid = from_global_id_with_expected_type(experiment_globalid, "Experiment")
     except ValueError:
         raise HTTPException(
             detail=f"Invalid experiment ID: {experiment_globalid}",
-            status_code=HTTP_422_UNPROCESSABLE_ENTITY,
+            status_code=422,
         )
     async with request.app.state.db() as session:
@@ -452,19 +874,25 @@ async def get_experiment_json(
     "/experiments/{experiment_id}/csv",
     operation_id="getExperimentCSV",
     summary="Download experiment runs as a CSV file",
-    responses={**add_text_csv_content_to_responses(HTTP_200_OK)},
+    responses={**add_text_csv_content_to_responses(200)},
 )
 async def get_experiment_csv(
     request: Request,
     experiment_id: str = Path(..., title="Experiment ID"),
 ) -> Response:
-    experiment_globalid = GlobalID.from_id(experiment_id)
+    try:
+        experiment_globalid = GlobalID.from_id(experiment_id)
+    except Exception as e:
+        raise HTTPException(
+            detail=f"Invalid experiment ID format: {experiment_id}",
+            status_code=422,
+        ) from e
     try:
         experiment_rowid = from_global_id_with_expected_type(experiment_globalid, "Experiment")
     except ValueError:
         raise HTTPException(
             detail=f"Invalid experiment ID: {experiment_globalid}",
-            status_code=HTTP_422_UNPROCESSABLE_ENTITY,
+            status_code=422,
         )
     async with request.app.state.db() as session:

arize-phoenix 11.23.1__py3-none-any.whl → 12.28.1__py3-none-any.whl

arize-phoenix 11.23.1py3-none-any.whl → 12.28.1py3-none-any.whl