PyPI - arize-phoenix - Versions diffs - 11.23.1__py3-none-any.whl → 12.28.1__py3-none-any.whl - Mend

arize-phoenix 11.23.1py3-none-any.whl → 12.28.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (221) hide show

{arize_phoenix-11.23.1.dist-info → arize_phoenix-12.28.1.dist-info}/METADATA +61 -36
{arize_phoenix-11.23.1.dist-info → arize_phoenix-12.28.1.dist-info}/RECORD +212 -162
{arize_phoenix-11.23.1.dist-info → arize_phoenix-12.28.1.dist-info}/WHEEL +1 -1
{arize_phoenix-11.23.1.dist-info → arize_phoenix-12.28.1.dist-info}/licenses/IP_NOTICE +1 -1
phoenix/__generated__/__init__.py +0 -0
phoenix/__generated__/classification_evaluator_configs/__init__.py +20 -0
phoenix/__generated__/classification_evaluator_configs/_document_relevance_classification_evaluator_config.py +17 -0
phoenix/__generated__/classification_evaluator_configs/_hallucination_classification_evaluator_config.py +17 -0
phoenix/__generated__/classification_evaluator_configs/_models.py +18 -0
phoenix/__generated__/classification_evaluator_configs/_tool_selection_classification_evaluator_config.py +17 -0
phoenix/__init__.py +2 -1
phoenix/auth.py +27 -2
phoenix/config.py +1594 -81
phoenix/db/README.md +546 -28
phoenix/db/bulk_inserter.py +119 -116
phoenix/db/engines.py +140 -33
phoenix/db/facilitator.py +22 -1
phoenix/db/helpers.py +818 -65
phoenix/db/iam_auth.py +64 -0
phoenix/db/insertion/dataset.py +133 -1
phoenix/db/insertion/document_annotation.py +9 -6
phoenix/db/insertion/evaluation.py +2 -3
phoenix/db/insertion/helpers.py +2 -2
phoenix/db/insertion/session_annotation.py +176 -0
phoenix/db/insertion/span_annotation.py +3 -4
phoenix/db/insertion/trace_annotation.py +3 -4
phoenix/db/insertion/types.py +41 -18
phoenix/db/migrations/versions/01a8342c9cdf_add_user_id_on_datasets.py +40 -0
phoenix/db/migrations/versions/0df286449799_add_session_annotations_table.py +105 -0
phoenix/db/migrations/versions/272b66ff50f8_drop_single_indices.py +119 -0
phoenix/db/migrations/versions/58228d933c91_dataset_labels.py +67 -0
phoenix/db/migrations/versions/699f655af132_experiment_tags.py +57 -0
phoenix/db/migrations/versions/735d3d93c33e_add_composite_indices.py +41 -0
phoenix/db/migrations/versions/ab513d89518b_add_user_id_on_dataset_versions.py +40 -0
phoenix/db/migrations/versions/d0690a79ea51_users_on_experiments.py +40 -0
phoenix/db/migrations/versions/deb2c81c0bb2_dataset_splits.py +139 -0
phoenix/db/migrations/versions/e76cbd66ffc3_add_experiments_dataset_examples.py +87 -0
phoenix/db/models.py +364 -56
phoenix/db/pg_config.py +10 -0
phoenix/db/types/trace_retention.py +7 -6
phoenix/experiments/functions.py +69 -19
phoenix/inferences/inferences.py +1 -2
phoenix/server/api/auth.py +9 -0
phoenix/server/api/auth_messages.py +46 -0
phoenix/server/api/context.py +60 -0
phoenix/server/api/dataloaders/__init__.py +36 -0
phoenix/server/api/dataloaders/annotation_summaries.py +60 -8
phoenix/server/api/dataloaders/average_experiment_repeated_run_group_latency.py +50 -0
phoenix/server/api/dataloaders/average_experiment_run_latency.py +17 -24
phoenix/server/api/dataloaders/cache/two_tier_cache.py +1 -2
phoenix/server/api/dataloaders/dataset_dataset_splits.py +52 -0
phoenix/server/api/dataloaders/dataset_example_revisions.py +0 -1
phoenix/server/api/dataloaders/dataset_example_splits.py +40 -0
phoenix/server/api/dataloaders/dataset_examples_and_versions_by_experiment_run.py +47 -0
phoenix/server/api/dataloaders/dataset_labels.py +36 -0
phoenix/server/api/dataloaders/document_evaluation_summaries.py +2 -2
phoenix/server/api/dataloaders/document_evaluations.py +6 -9
phoenix/server/api/dataloaders/experiment_annotation_summaries.py +88 -34
phoenix/server/api/dataloaders/experiment_dataset_splits.py +43 -0
phoenix/server/api/dataloaders/experiment_error_rates.py +21 -28
phoenix/server/api/dataloaders/experiment_repeated_run_group_annotation_summaries.py +77 -0
phoenix/server/api/dataloaders/experiment_repeated_run_groups.py +57 -0
phoenix/server/api/dataloaders/experiment_runs_by_experiment_and_example.py +44 -0
phoenix/server/api/dataloaders/latency_ms_quantile.py +40 -8
phoenix/server/api/dataloaders/record_counts.py +37 -10
phoenix/server/api/dataloaders/session_annotations_by_session.py +29 -0
phoenix/server/api/dataloaders/span_cost_summary_by_experiment_repeated_run_group.py +64 -0
phoenix/server/api/dataloaders/span_cost_summary_by_project.py +28 -14
phoenix/server/api/dataloaders/span_costs.py +3 -9
phoenix/server/api/dataloaders/table_fields.py +2 -2
phoenix/server/api/dataloaders/token_prices_by_model.py +30 -0
phoenix/server/api/dataloaders/trace_annotations_by_trace.py +27 -0
phoenix/server/api/exceptions.py +5 -1
phoenix/server/api/helpers/playground_clients.py +263 -83
phoenix/server/api/helpers/playground_spans.py +2 -1
phoenix/server/api/helpers/playground_users.py +26 -0
phoenix/server/api/helpers/prompts/conversions/google.py +103 -0
phoenix/server/api/helpers/prompts/models.py +61 -19
phoenix/server/api/input_types/{SpanAnnotationFilter.py → AnnotationFilter.py} +22 -14
phoenix/server/api/input_types/ChatCompletionInput.py +3 -0
phoenix/server/api/input_types/CreateProjectSessionAnnotationInput.py +37 -0
phoenix/server/api/input_types/DatasetFilter.py +5 -2
phoenix/server/api/input_types/ExperimentRunSort.py +237 -0
phoenix/server/api/input_types/GenerativeModelInput.py +3 -0
phoenix/server/api/input_types/ProjectSessionSort.py +158 -1
phoenix/server/api/input_types/PromptVersionInput.py +47 -1
phoenix/server/api/input_types/SpanSort.py +3 -2
phoenix/server/api/input_types/UpdateAnnotationInput.py +34 -0
phoenix/server/api/input_types/UserRoleInput.py +1 -0
phoenix/server/api/mutations/__init__.py +8 -0
phoenix/server/api/mutations/annotation_config_mutations.py +8 -8
phoenix/server/api/mutations/api_key_mutations.py +15 -20
phoenix/server/api/mutations/chat_mutations.py +106 -37
phoenix/server/api/mutations/dataset_label_mutations.py +243 -0
phoenix/server/api/mutations/dataset_mutations.py +21 -16
phoenix/server/api/mutations/dataset_split_mutations.py +351 -0
phoenix/server/api/mutations/experiment_mutations.py +2 -2
phoenix/server/api/mutations/export_events_mutations.py +3 -3
phoenix/server/api/mutations/model_mutations.py +11 -9
phoenix/server/api/mutations/project_mutations.py +4 -4
phoenix/server/api/mutations/project_session_annotations_mutations.py +158 -0
phoenix/server/api/mutations/project_trace_retention_policy_mutations.py +8 -4
phoenix/server/api/mutations/prompt_label_mutations.py +74 -65
phoenix/server/api/mutations/prompt_mutations.py +65 -129
phoenix/server/api/mutations/prompt_version_tag_mutations.py +11 -8
phoenix/server/api/mutations/span_annotations_mutations.py +15 -10
phoenix/server/api/mutations/trace_annotations_mutations.py +13 -8
phoenix/server/api/mutations/trace_mutations.py +3 -3
phoenix/server/api/mutations/user_mutations.py +55 -26
phoenix/server/api/queries.py +501 -617
phoenix/server/api/routers/__init__.py +2 -2
phoenix/server/api/routers/auth.py +141 -87
phoenix/server/api/routers/ldap.py +229 -0
phoenix/server/api/routers/oauth2.py +349 -101
phoenix/server/api/routers/v1/__init__.py +22 -4
phoenix/server/api/routers/v1/annotation_configs.py +19 -30
phoenix/server/api/routers/v1/annotations.py +455 -13
phoenix/server/api/routers/v1/datasets.py +355 -68
phoenix/server/api/routers/v1/documents.py +142 -0
phoenix/server/api/routers/v1/evaluations.py +20 -28
phoenix/server/api/routers/v1/experiment_evaluations.py +16 -6
phoenix/server/api/routers/v1/experiment_runs.py +335 -59
phoenix/server/api/routers/v1/experiments.py +475 -47
phoenix/server/api/routers/v1/projects.py +16 -50
phoenix/server/api/routers/v1/prompts.py +50 -39
phoenix/server/api/routers/v1/sessions.py +108 -0
phoenix/server/api/routers/v1/spans.py +156 -96
phoenix/server/api/routers/v1/traces.py +51 -77
phoenix/server/api/routers/v1/users.py +64 -24
phoenix/server/api/routers/v1/utils.py +3 -7
phoenix/server/api/subscriptions.py +257 -93
phoenix/server/api/types/Annotation.py +90 -23
phoenix/server/api/types/ApiKey.py +13 -17
phoenix/server/api/types/AuthMethod.py +1 -0
phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +1 -0
phoenix/server/api/types/Dataset.py +199 -72
phoenix/server/api/types/DatasetExample.py +88 -18
phoenix/server/api/types/DatasetExperimentAnnotationSummary.py +10 -0
phoenix/server/api/types/DatasetLabel.py +57 -0
phoenix/server/api/types/DatasetSplit.py +98 -0
phoenix/server/api/types/DatasetVersion.py +49 -4
phoenix/server/api/types/DocumentAnnotation.py +212 -0
phoenix/server/api/types/Experiment.py +215 -68
phoenix/server/api/types/ExperimentComparison.py +3 -9
phoenix/server/api/types/ExperimentRepeatedRunGroup.py +155 -0
phoenix/server/api/types/ExperimentRepeatedRunGroupAnnotationSummary.py +9 -0
phoenix/server/api/types/ExperimentRun.py +120 -70
phoenix/server/api/types/ExperimentRunAnnotation.py +158 -39
phoenix/server/api/types/GenerativeModel.py +95 -42
phoenix/server/api/types/GenerativeProvider.py +1 -1
phoenix/server/api/types/ModelInterface.py +7 -2
phoenix/server/api/types/PlaygroundModel.py +12 -2
phoenix/server/api/types/Project.py +218 -185
phoenix/server/api/types/ProjectSession.py +146 -29
phoenix/server/api/types/ProjectSessionAnnotation.py +187 -0
phoenix/server/api/types/ProjectTraceRetentionPolicy.py +1 -1
phoenix/server/api/types/Prompt.py +119 -39
phoenix/server/api/types/PromptLabel.py +42 -25
phoenix/server/api/types/PromptVersion.py +11 -8
phoenix/server/api/types/PromptVersionTag.py +65 -25
phoenix/server/api/types/Span.py +130 -123
phoenix/server/api/types/SpanAnnotation.py +189 -42
phoenix/server/api/types/SystemApiKey.py +65 -1
phoenix/server/api/types/Trace.py +184 -53
phoenix/server/api/types/TraceAnnotation.py +149 -50
phoenix/server/api/types/User.py +128 -33
phoenix/server/api/types/UserApiKey.py +73 -26
phoenix/server/api/types/node.py +10 -0
phoenix/server/api/types/pagination.py +11 -2
phoenix/server/app.py +154 -36
phoenix/server/authorization.py +5 -4
phoenix/server/bearer_auth.py +13 -5
phoenix/server/cost_tracking/cost_model_lookup.py +42 -14
phoenix/server/cost_tracking/model_cost_manifest.json +1085 -194
phoenix/server/daemons/generative_model_store.py +61 -9
phoenix/server/daemons/span_cost_calculator.py +10 -8
phoenix/server/dml_event.py +13 -0
phoenix/server/email/sender.py +29 -2
phoenix/server/grpc_server.py +9 -9
phoenix/server/jwt_store.py +8 -6
phoenix/server/ldap.py +1449 -0
phoenix/server/main.py +9 -3
phoenix/server/oauth2.py +330 -12
phoenix/server/prometheus.py +43 -6
phoenix/server/rate_limiters.py +4 -9
phoenix/server/retention.py +33 -20
phoenix/server/session_filters.py +49 -0
phoenix/server/static/.vite/manifest.json +51 -53
phoenix/server/static/assets/components-BreFUQQa.js +6702 -0
phoenix/server/static/assets/{index-BPCwGQr8.js → index-CTQoemZv.js} +42 -35
phoenix/server/static/assets/pages-DBE5iYM3.js +9524 -0
phoenix/server/static/assets/vendor-BGzfc4EU.css +1 -0
phoenix/server/static/assets/vendor-DCE4v-Ot.js +920 -0
phoenix/server/static/assets/vendor-codemirror-D5f205eT.js +25 -0
phoenix/server/static/assets/{vendor-recharts-Bw30oz1A.js → vendor-recharts-V9cwpXsm.js} +7 -7
phoenix/server/static/assets/{vendor-shiki-DZajAPeq.js → vendor-shiki-Do--csgv.js} +1 -1
phoenix/server/static/assets/vendor-three-CmB8bl_y.js +3840 -0
phoenix/server/templates/index.html +7 -1
phoenix/server/thread_server.py +1 -2
phoenix/server/utils.py +74 -0
phoenix/session/client.py +55 -1
phoenix/session/data_extractor.py +5 -0
phoenix/session/evaluation.py +8 -4
phoenix/session/session.py +44 -8
phoenix/settings.py +2 -0
phoenix/trace/attributes.py +80 -13
phoenix/trace/dsl/query.py +2 -0
phoenix/trace/projects.py +5 -0
phoenix/utilities/template_formatters.py +1 -1
phoenix/version.py +1 -1
phoenix/server/api/types/Evaluation.py +0 -39
phoenix/server/static/assets/components-D0DWAf0l.js +0 -5650
phoenix/server/static/assets/pages-Creyamao.js +0 -8612
phoenix/server/static/assets/vendor-CU36oj8y.js +0 -905
phoenix/server/static/assets/vendor-CqDb5u4o.css +0 -1
phoenix/server/static/assets/vendor-arizeai-Ctgw0e1G.js +0 -168
phoenix/server/static/assets/vendor-codemirror-Cojjzqb9.js +0 -25
phoenix/server/static/assets/vendor-three-BLWp5bic.js +0 -2998
phoenix/utilities/deprecation.py +0 -31
{arize_phoenix-11.23.1.dist-info → arize_phoenix-12.28.1.dist-info}/entry_points.txt +0 -0
{arize_phoenix-11.23.1.dist-info → arize_phoenix-12.28.1.dist-info}/licenses/LICENSE +0 -0

phoenix/db/helpers.py CHANGED Viewed

@@ -1,25 +1,28 @@
 from collections.abc import Callable, Hashable, Iterable
 from datetime import datetime
 from enum import Enum
-from typing import Any, Literal, Optional, TypeVar, Union
+from typing import Any, Literal, Optional, Sequence, TypeVar, Union
 import sqlalchemy as sa
-from openinference.semconv.trace import (
-    OpenInferenceSpanKindValues,
-    RerankerAttributes,
-    SpanAttributes,
-)
 from sqlalchemy import (
-    Integer,
+    Insert,
     Select,
     SQLColumnExpression,
     and_,
     case,
     distinct,
+    exists,
     func,
+    insert,
+    literal,
+    literal_column,
+    or_,
     select,
+    util,
 )
+from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.orm import QueryableAttribute
+from sqlalchemy.sql.roles import InElementRole
 from typing_extensions import assert_never
 from phoenix.config import PLAYGROUND_PROJECT_NAME
@@ -37,30 +40,6 @@ class SupportedSQLDialect(Enum):
         raise ValueError(f"`{v}` is not a supported SQL backend/dialect.")
-def num_docs_col(dialect: SupportedSQLDialect) -> SQLColumnExpression[Integer]:
-    if dialect is SupportedSQLDialect.POSTGRESQL:
-        array_length = func.jsonb_array_length
-    elif dialect is SupportedSQLDialect.SQLITE:
-        array_length = func.json_array_length
-    else:
-        assert_never(dialect)
-    retrieval_docs = models.Span.attributes[_RETRIEVAL_DOCUMENTS]
-    num_retrieval_docs = array_length(retrieval_docs)
-    reranker_docs = models.Span.attributes[_RERANKER_OUTPUT_DOCUMENTS]
-    num_reranker_docs = array_length(reranker_docs)
-    return case(
-        (
-            func.upper(models.Span.span_kind) == OpenInferenceSpanKindValues.RERANKER.value.upper(),
-            num_reranker_docs,
-        ),
-        else_=num_retrieval_docs,
-    ).label("num_docs")
-_RETRIEVAL_DOCUMENTS = SpanAttributes.RETRIEVAL_DOCUMENTS.split(".")
-_RERANKER_OUTPUT_DOCUMENTS = RerankerAttributes.RERANKER_OUTPUT_DOCUMENTS.split(".")
 def get_eval_trace_ids_for_datasets(*dataset_ids: int) -> Select[tuple[Optional[str]]]:
     return (
         select(distinct(models.ExperimentRunAnnotation.trace_id))
@@ -118,50 +97,204 @@ def dedup(
     return ans
-def get_dataset_example_revisions(
+def _build_ranked_revisions_query(
     dataset_version_id: int,
-) -> Select[tuple[models.DatasetExampleRevision]]:
-    version = (
+    /,
+    *,
+    dataset_id: Optional[int] = None,
+    example_ids: Optional[Union[Sequence[int], InElementRole]] = None,
+) -> Select[tuple[int]]:
+    """
+    Build a query that ranks revisions per example within a dataset version.
+    This performs the core ranking logic using ROW_NUMBER() to find the latest
+    revision for each example within the specified dataset version.
+    Args:
+        dataset_version_id: Maximum dataset version to consider
+        dataset_id: Optional dataset ID - if provided, avoids subquery lookup
+    Returns:
+        SQLAlchemy SELECT query with revision ranking and basic dataset filtering
+    """
+    stmt = (
         select(
-            models.DatasetVersion.id,
-            models.DatasetVersion.dataset_id,
+            func.row_number()
+            .over(
+                partition_by=models.DatasetExampleRevision.dataset_example_id,
+                order_by=models.DatasetExampleRevision.dataset_version_id.desc(),
+            )
+            .label("rn"),
         )
-        .filter_by(id=dataset_version_id)
-        .subquery()
+        .join(models.DatasetExample)
+        .where(models.DatasetExampleRevision.dataset_version_id <= dataset_version_id)
     )
-    table = models.DatasetExampleRevision
-    revision = (
-        select(
-            table.dataset_example_id,
-            func.max(table.dataset_version_id).label("dataset_version_id"),
-        )
-        .join_from(
-            table,
-            models.DatasetExample,
-            table.dataset_example_id == models.DatasetExample.id,
-        )
-        .join_from(
-            models.DatasetExample,
-            version,
-            models.DatasetExample.dataset_id == version.c.dataset_id,
-        )
-        .where(models.DatasetExample.dataset_id == version.c.dataset_id)
-        .where(table.dataset_version_id <= version.c.id)
-        .group_by(table.dataset_example_id)
-        .subquery()
+    if dataset_id is None:
+        version_subquery = (
+            select(models.DatasetVersion.dataset_id)
+            .filter_by(id=dataset_version_id)
+            .scalar_subquery()
+        )
+        stmt = stmt.where(models.DatasetExample.dataset_id == version_subquery)
+    else:
+        stmt = stmt.where(models.DatasetExample.dataset_id == dataset_id)
+    if example_ids is not None:
+        stmt = stmt.where(models.DatasetExampleRevision.dataset_example_id.in_(example_ids))
+    return stmt
+def get_dataset_example_revisions(
+    dataset_version_id: int,
+    /,
+    *,
+    dataset_id: Optional[int] = None,
+    example_ids: Optional[Union[Sequence[int], InElementRole]] = None,
+    split_ids: Optional[Union[Sequence[int], InElementRole]] = None,
+    split_names: Optional[Union[Sequence[str], InElementRole]] = None,
+) -> Select[tuple[models.DatasetExampleRevision]]:
+    """
+    Get the latest revisions for all dataset examples within a specific dataset version.
+    Excludes examples where the latest revision is a DELETE.
+    Args:
+        dataset_version_id: The dataset version to get revisions for
+        dataset_id: Optional dataset ID - if provided, avoids extra subquery lookup
+        example_ids: Optional filter by specific example IDs (subquery or list of IDs).
+            - None = no filtering
+            - Empty sequences/subqueries = no matches (strict filtering)
+        split_ids: Optional filter by split IDs (subquery or list of split IDs).
+            - None = no filtering
+            - Empty sequences/subqueries = no matches (strict filtering)
+        split_names: Optional filter by split names (subquery or list of split names).
+            - None = no filtering
+            - Empty sequences/subqueries = no matches (strict filtering)
+    Note:
+        - split_ids and split_names are mutually exclusive
+        - Use split_ids for better performance when IDs are available (avoids JOIN)
+        - Empty filters use strict behavior: empty inputs return zero results
+    """
+    if split_ids is not None and split_names is not None:
+        raise ValueError(
+            "Cannot specify both split_ids and split_names - they are mutually exclusive"
+        )
+    stmt = _build_ranked_revisions_query(
+        dataset_version_id,
+        dataset_id=dataset_id,
+        example_ids=example_ids,
+    ).add_columns(
+        models.DatasetExampleRevision.id,
+        models.DatasetExampleRevision.revision_kind,
     )
+    if split_ids is not None or split_names is not None:
+        if split_names is not None:
+            split_example_ids_subquery = (
+                select(models.DatasetSplitDatasetExample.dataset_example_id)
+                .join(
+                    models.DatasetSplit,
+                    models.DatasetSplit.id == models.DatasetSplitDatasetExample.dataset_split_id,
+                )
+                .where(models.DatasetSplit.name.in_(split_names))
+            )
+            stmt = stmt.where(models.DatasetExample.id.in_(split_example_ids_subquery))
+        else:
+            assert split_ids is not None
+            split_example_ids_subquery = select(
+                models.DatasetSplitDatasetExample.dataset_example_id
+            ).where(models.DatasetSplitDatasetExample.dataset_split_id.in_(split_ids))
+            stmt = stmt.where(models.DatasetExample.id.in_(split_example_ids_subquery))
+    ranked_subquery = stmt.subquery()
     return (
-        select(table)
-        .where(table.revision_kind != "DELETE")
+        select(models.DatasetExampleRevision)
         .join(
-            revision,
-            onclause=and_(
-                revision.c.dataset_example_id == table.dataset_example_id,
-                revision.c.dataset_version_id == table.dataset_version_id,
-            ),
+            ranked_subquery,
+            models.DatasetExampleRevision.id == ranked_subquery.c.id,
+        )
+        .where(
+            ranked_subquery.c.rn == 1,
+            ranked_subquery.c.revision_kind != "DELETE",
+        )
+    )
+def create_experiment_examples_snapshot_insert(
+    experiment: models.Experiment,
+) -> Insert:
+    """
+    Create an INSERT statement to snapshot dataset examples for an experiment.
+    This captures which examples belong to the experiment at the time of creation,
+    respecting any dataset splits assigned to the experiment.
+    Args:
+        experiment: The experiment to create the snapshot for
+    Returns:
+        SQLAlchemy INSERT statement ready for execution
+    """
+    stmt = _build_ranked_revisions_query(
+        experiment.dataset_version_id,
+        dataset_id=experiment.dataset_id,
+    ).add_columns(
+        models.DatasetExampleRevision.id,
+        models.DatasetExampleRevision.dataset_example_id,
+        models.DatasetExampleRevision.revision_kind,
+    )
+    experiment_splits_subquery = select(models.ExperimentDatasetSplit.dataset_split_id).where(
+        models.ExperimentDatasetSplit.experiment_id == experiment.id
+    )
+    has_splits_condition = exists(experiment_splits_subquery)
+    split_filtered_example_ids = select(models.DatasetSplitDatasetExample.dataset_example_id).where(
+        models.DatasetSplitDatasetExample.dataset_split_id.in_(experiment_splits_subquery)
+    )
+    stmt = stmt.where(
+        or_(
+            ~has_splits_condition,  # No splits = include all examples
+            models.DatasetExampleRevision.dataset_example_id.in_(
+                split_filtered_example_ids
+            ),  # Has splits = filter by splits
         )
     )
+    ranked_subquery = stmt.subquery()
+    return insert(models.ExperimentDatasetExample).from_select(
+        [
+            models.ExperimentDatasetExample.experiment_id,
+            models.ExperimentDatasetExample.dataset_example_id,
+            models.ExperimentDatasetExample.dataset_example_revision_id,
+        ],
+        select(
+            literal(experiment.id),
+            ranked_subquery.c.dataset_example_id,
+            ranked_subquery.c.id,
+        ).where(
+            ranked_subquery.c.rn == 1,
+            ranked_subquery.c.revision_kind != "DELETE",
+        ),
+    )
+async def insert_experiment_with_examples_snapshot(
+    session: AsyncSession,
+    experiment: models.Experiment,
+) -> None:
+    """
+    Insert an experiment with its snapshot of dataset examples.
+    """
+    session.add(experiment)
+    await session.flush()
+    insert_stmt = create_experiment_examples_snapshot_insert(experiment)
+    await session.execute(insert_stmt)
 _AnyTuple = TypeVar("_AnyTuple", bound=tuple[Any, ...])
@@ -355,3 +488,623 @@ def get_ancestor_span_rowids(parent_id: str) -> Select[tuple[int]]:
         )
     )
     return select(ancestors.c.id)
+def truncate_name(name: str, max_len: int = 63) -> str:
+    # https://github.com/sqlalchemy/sqlalchemy/blob/e263825e3c5060bf4f47eed0e833c6660a31658e/lib/sqlalchemy/sql/compiler.py#L7844-L7845
+    if len(name) > max_len:
+        return name[0 : max_len - 8] + "_" + util.md5_hex(name)[-4:]
+    return name
+def get_successful_run_counts_subquery(
+    experiment_id: int,
+    repetitions: int,
+) -> Any:
+    """
+    Build a subquery that counts successful runs per dataset example for an experiment.
+    This subquery outer joins experiment dataset examples with their runs, counting only
+    successful runs (runs that exist and have no error). The HAVING clause filters to only
+    include examples with fewer successful runs than the total repetitions required.
+    Args:
+        experiment_id: The experiment ID to query runs for
+        repetitions: The number of repetitions required per example
+    Returns:
+        SQLAlchemy subquery with columns:
+        - dataset_example_revision_id: ID of the example revision
+        - dataset_example_id: ID of the dataset example
+        - successful_count: Count of successful runs for this example
+    """
+    # Use CASE to count only successful runs (run exists AND error IS NULL)
+    # Important: Must check that run exists (id IS NOT NULL) to distinguish
+    # "no run" from "successful run" in the outer join
+    successful_run_case = case(
+        (
+            and_(
+                models.ExperimentRun.id.is_not(None),  # Run exists
+                models.ExperimentRun.error.is_(None),  # No error (successful)
+            ),
+            1,
+        ),
+        else_=0,
+    )
+    return (
+        select(
+            models.ExperimentDatasetExample.dataset_example_revision_id,
+            models.ExperimentDatasetExample.dataset_example_id,
+            func.sum(successful_run_case).label("successful_count"),
+        )
+        .select_from(models.ExperimentDatasetExample)
+        .outerjoin(
+            models.ExperimentRun,
+            and_(
+                models.ExperimentRun.experiment_id == experiment_id,
+                models.ExperimentRun.dataset_example_id
+                == models.ExperimentDatasetExample.dataset_example_id,
+            ),
+        )
+        .where(models.ExperimentDatasetExample.experiment_id == experiment_id)
+        .group_by(
+            models.ExperimentDatasetExample.dataset_example_revision_id,
+            models.ExperimentDatasetExample.dataset_example_id,
+        )
+        .having(
+            # Only include incomplete examples (successful_count < repetitions)
+            func.coalesce(func.sum(successful_run_case), 0) < repetitions
+        )
+        .subquery()
+    )
+def generate_expected_repetitions_cte(
+    dialect: SupportedSQLDialect,
+    run_counts_subquery: Any,
+    repetitions: int,
+) -> Any:
+    """
+    Generate a CTE that produces all expected repetition numbers for partially complete examples.
+    This generates a sequence of repetition numbers [1..repetitions] for each example that has
+    at least one successful run (0 < successful_count < repetitions). The implementation varies
+    by SQL dialect.
+    Args:
+        dialect: The SQL dialect to use (PostgreSQL or SQLite)
+        run_counts_subquery: Subquery from get_successful_run_counts_subquery containing
+            dataset_example_revision_id, dataset_example_id, and successful_count columns
+        repetitions: The total number of repetitions required
+    Returns:
+        SQLAlchemy CTE with columns:
+        - dataset_example_revision_id: ID of the example revision
+        - dataset_example_id: ID of the dataset example
+        - successful_count: Count of successful runs for this example
+        - repetition_number: Expected repetition number (1..repetitions)
+    Note:
+        - For PostgreSQL: Uses generate_series function
+        - For SQLite: Uses recursive CTE to generate the sequence
+    """
+    if dialect is SupportedSQLDialect.POSTGRESQL:
+        # Generate expected repetition numbers only for partially complete examples
+        # Use func.generate_series with direct parameter - SQLAlchemy handles this safely
+        return (
+            select(
+                run_counts_subquery.c.dataset_example_revision_id,
+                run_counts_subquery.c.dataset_example_id,
+                run_counts_subquery.c.successful_count,
+                func.generate_series(1, repetitions).label("repetition_number"),
+            )
+            .select_from(run_counts_subquery)
+            .where(run_counts_subquery.c.successful_count > 0)  # Only partially complete!
+            .cte("expected_runs")
+        )
+    elif dialect is SupportedSQLDialect.SQLITE:
+        # Recursive CTE only for partially complete examples
+        expected_runs_cte = (
+            select(
+                run_counts_subquery.c.dataset_example_revision_id,
+                run_counts_subquery.c.dataset_example_id,
+                run_counts_subquery.c.successful_count,
+                literal_column("1").label("repetition_number"),
+            )
+            .select_from(run_counts_subquery)
+            .where(run_counts_subquery.c.successful_count > 0)  # Only partially complete!
+            .cte("expected_runs", recursive=True)
+        )
+        # Recursive part: increment repetition_number up to repetitions
+        expected_runs_recursive = expected_runs_cte.union_all(
+            select(
+                expected_runs_cte.c.dataset_example_revision_id,
+                expected_runs_cte.c.dataset_example_id,
+                expected_runs_cte.c.successful_count,
+                (expected_runs_cte.c.repetition_number + 1).label("repetition_number"),
+            ).where(expected_runs_cte.c.repetition_number < repetitions)
+        )
+        return expected_runs_recursive
+    else:
+        assert_never(dialect)
+def get_incomplete_repetitions_query(
+    dialect: SupportedSQLDialect,
+    expected_runs_cte: Any,
+    experiment_id: int,
+) -> Select[tuple[Any, Any, Any]]:
+    """
+    Build a query that finds incomplete repetitions for partially complete examples.
+    This query outer joins the expected repetition numbers with actual successful runs to find
+    which repetitions are missing or failed. It aggregates the incomplete repetitions into an
+    array or JSON array depending on the dialect.
+    Args:
+        dialect: The SQL dialect to use (PostgreSQL or SQLite)
+        expected_runs_cte: CTE from generate_expected_repetitions_cte containing expected
+            repetition numbers for partially complete examples
+        experiment_id: The experiment ID to query runs for
+    Returns:
+        SQLAlchemy SELECT query with columns:
+        - dataset_example_revision_id: ID of the example revision
+        - successful_count: Count of successful runs for this example
+        - incomplete_reps: Array/JSON array of incomplete repetition numbers
+    Note:
+        - For PostgreSQL: Returns an array using array_agg
+        - For SQLite: Returns a JSON string using json_group_array
+    """
+    if dialect is SupportedSQLDialect.POSTGRESQL:
+        agg_func = func.coalesce(
+            func.array_agg(expected_runs_cte.c.repetition_number),
+            literal_column("ARRAY[]::int[]"),
+        )
+    elif dialect is SupportedSQLDialect.SQLITE:
+        agg_func = func.coalesce(
+            func.json_group_array(expected_runs_cte.c.repetition_number),
+            literal_column("'[]'"),
+        )
+    else:
+        assert_never(dialect)
+    # Find incomplete runs for partially complete examples
+    return (
+        select(
+            expected_runs_cte.c.dataset_example_revision_id,
+            expected_runs_cte.c.successful_count,
+            agg_func.label("incomplete_reps"),
+        )
+        .select_from(expected_runs_cte)
+        .outerjoin(
+            models.ExperimentRun,
+            and_(
+                models.ExperimentRun.experiment_id == experiment_id,
+                models.ExperimentRun.dataset_example_id == expected_runs_cte.c.dataset_example_id,
+                models.ExperimentRun.repetition_number == expected_runs_cte.c.repetition_number,
+                # Only join successful runs
+                models.ExperimentRun.error.is_(None),
+            ),
+        )
+        .where(
+            # Incomplete = no matching run (NULL)
+            models.ExperimentRun.id.is_(None)
+        )
+        .group_by(
+            expected_runs_cte.c.dataset_example_revision_id,
+            expected_runs_cte.c.successful_count,
+        )
+    )
+def get_incomplete_runs_with_revisions_query(
+    incomplete_runs_subquery: Any,
+    *,
+    cursor_example_rowid: Optional[int] = None,
+    limit: Optional[int] = None,
+) -> Select[tuple[models.DatasetExampleRevision, Any, Any]]:
+    """
+    Build the main query that joins incomplete runs with dataset example revisions.
+    This query takes a subquery containing incomplete run information and joins it with
+    the DatasetExampleRevision table to get the full example data. It also applies
+    cursor-based pagination for efficient retrieval of large result sets.
+    Args:
+        incomplete_runs_subquery: Subquery with columns:
+            - dataset_example_revision_id: ID of the example revision
+            - successful_count: Count of successful runs for this example
+            - incomplete_reps: Array/JSON array of incomplete repetition numbers
+        cursor_example_rowid: Optional cursor position (dataset_example_id) for pagination.
+            When provided, only returns examples with ID >= cursor_example_rowid
+        limit: Optional maximum number of results to return. If provided, the query
+            will fetch limit+1 rows to enable next-page detection
+    Returns:
+        SQLAlchemy SELECT query with columns:
+        - DatasetExampleRevision: The full revision object
+        - successful_count: Count of successful runs
+        - incomplete_reps: Array/JSON array of incomplete repetition numbers
+    Note:
+        Results are ordered by dataset_example_id ascending for consistent pagination.
+        When using limit, fetch one extra row to check if there's a next page.
+    """
+    stmt = (
+        select(
+            models.DatasetExampleRevision,
+            incomplete_runs_subquery.c.successful_count,
+            incomplete_runs_subquery.c.incomplete_reps,
+        )
+        .select_from(incomplete_runs_subquery)
+        .join(
+            models.DatasetExampleRevision,
+            models.DatasetExampleRevision.id
+            == incomplete_runs_subquery.c.dataset_example_revision_id,
+        )
+        .order_by(models.DatasetExampleRevision.dataset_example_id.asc())
+    )
+    # Apply cursor filter in SQL for efficiency with large datasets
+    if cursor_example_rowid is not None:
+        stmt = stmt.where(models.DatasetExampleRevision.dataset_example_id >= cursor_example_rowid)
+    # Fetch limit+1 to check if there's a next page
+    if limit is not None:
+        stmt = stmt.limit(limit + 1)
+    return stmt
+def get_successful_experiment_runs_query(
+    experiment_id: int,
+    *,
+    cursor_run_rowid: Optional[int] = None,
+    limit: Optional[int] = None,
+) -> Select[tuple[models.ExperimentRun, int]]:
+    """
+    Build a query for successful experiment runs with their dataset example revision IDs.
+    This query retrieves all experiment runs that completed successfully (error IS NULL)
+    and joins them with the ExperimentDatasetExample table to get the revision IDs.
+    Results are ordered by run ID ascending for consistent pagination.
+    Args:
+        experiment_id: The experiment ID to query runs for
+        cursor_run_rowid: Optional cursor position (experiment_run_id) for pagination.
+            When provided, only returns runs with ID >= cursor_run_rowid
+        limit: Optional maximum number of results to return. If provided, the query
+            will fetch limit+1 rows to enable next-page detection
+    Returns:
+        SQLAlchemy SELECT query with columns:
+        - ExperimentRun: The full experiment run object
+        - dataset_example_revision_id: ID of the dataset example revision (int)
+    Note:
+        - Only includes successful runs (error IS NULL)
+        - Results ordered by run ID ascending for consistent pagination
+        - When using limit, fetch one extra row to check if there's a next page
+    """
+    stmt = (
+        select(
+            models.ExperimentRun,
+            models.ExperimentDatasetExample.dataset_example_revision_id,
+        )
+        .join(
+            models.ExperimentDatasetExample,
+            and_(
+                models.ExperimentDatasetExample.experiment_id == experiment_id,
+                models.ExperimentDatasetExample.dataset_example_id
+                == models.ExperimentRun.dataset_example_id,
+            ),
+        )
+        .where(
+            and_(
+                models.ExperimentRun.experiment_id == experiment_id,
+                models.ExperimentRun.error.is_(None),  # Only successful task runs
+            )
+        )
+        .order_by(models.ExperimentRun.id.asc())
+    )
+    if cursor_run_rowid is not None:
+        stmt = stmt.where(models.ExperimentRun.id >= cursor_run_rowid)
+    if limit is not None:
+        stmt = stmt.limit(limit + 1)
+    return stmt
+def get_experiment_run_annotations_query(
+    run_ids: Sequence[int],
+    evaluation_names: Sequence[str],
+) -> Select[tuple[int, str, Optional[str]]]:
+    """
+    Build a query to get annotations for specific runs and evaluation names.
+    This query retrieves annotations (evaluations) for a set of experiment runs,
+    filtered by specific evaluation names. It returns only the essential fields
+    needed to determine if an evaluation is complete or has errors.
+    Args:
+        run_ids: List of experiment run IDs to query annotations for
+        evaluation_names: List of evaluation names to filter by
+    Returns:
+        SQLAlchemy SELECT query with columns:
+        - experiment_run_id: ID of the experiment run (int)
+        - name: Name of the evaluation (str)
+        - error: Error message if evaluation failed, None if successful (Optional[str])
+    Example:
+        >>> run_ids = [1, 2, 3]
+        >>> eval_names = ["relevance", "coherence"]
+        >>> query = get_experiment_run_annotations_query(run_ids, eval_names)
+        >>> results = await session.execute(query)
+        >>> for run_id, name, error in results:
+        ...     # Process annotations...
+    """
+    return (
+        select(
+            models.ExperimentRunAnnotation.experiment_run_id,
+            models.ExperimentRunAnnotation.name,
+            models.ExperimentRunAnnotation.error,
+        )
+        .where(models.ExperimentRunAnnotation.experiment_run_id.in_(run_ids))
+        .where(models.ExperimentRunAnnotation.name.in_(evaluation_names))
+    )
+def get_runs_with_incomplete_evaluations_query(
+    experiment_id: int,
+    evaluation_names: Sequence[str],
+    dialect: SupportedSQLDialect,
+    *,
+    cursor_run_rowid: Optional[int] = None,
+    limit: Optional[int] = None,
+    include_annotations_and_revisions: bool = False,
+) -> Select[Any]:
+    """
+    Get experiment runs that have incomplete evaluations.
+    A run has incomplete evaluations if it's missing successful annotations for any of
+    the requested evaluation names. Both missing (no annotation) and failed (error != NULL)
+    evaluations are considered incomplete.
+    Args:
+        experiment_id: The experiment ID to query
+        evaluation_names: Evaluation names to check for completeness
+        dialect: SQL dialect (PostgreSQL or SQLite)
+        cursor_run_rowid: Optional run ID for cursor-based pagination
+        limit: Optional limit (fetches limit+1 for next-page detection)
+        include_annotations_and_revisions: If True, also fetch revision and successful
+            annotation names as JSON array
+    Returns:
+        Query returning (ExperimentRun, revision_id, [revision, annotations_json])
+        Results ordered by run ID ascending
+    """
+    # Subquery: Count successful annotations per run
+    successful_annotations_count = (
+        select(
+            models.ExperimentRunAnnotation.experiment_run_id,
+            func.count().label("successful_count"),
+        )
+        .where(
+            models.ExperimentRunAnnotation.name.in_(evaluation_names),
+            models.ExperimentRunAnnotation.error.is_(None),
+        )
+        .group_by(models.ExperimentRunAnnotation.experiment_run_id)
+        .subquery()
+    )
+    # Base query: Find runs where successful_count < required evaluations
+    stmt = (
+        select(
+            models.ExperimentRun,
+            models.ExperimentDatasetExample.dataset_example_revision_id,
+        )
+        .join(
+            models.ExperimentDatasetExample,
+            and_(
+                models.ExperimentDatasetExample.experiment_id == experiment_id,
+                models.ExperimentDatasetExample.dataset_example_id
+                == models.ExperimentRun.dataset_example_id,
+            ),
+        )
+        .outerjoin(
+            successful_annotations_count,
+            successful_annotations_count.c.experiment_run_id == models.ExperimentRun.id,
+        )
+        .where(
+            models.ExperimentRun.experiment_id == experiment_id,
+            models.ExperimentRun.error.is_(None),  # Only successful task runs
+            func.coalesce(successful_annotations_count.c.successful_count, 0)
+            < len(evaluation_names),
+        )
+    )
+    # Optionally include revisions and successful annotation names
+    if include_annotations_and_revisions:
+        # Subquery: Aggregate successful annotation names as JSON array
+        if dialect is SupportedSQLDialect.POSTGRESQL:
+            json_agg_expr = func.cast(
+                func.coalesce(
+                    func.json_agg(models.ExperimentRunAnnotation.name),
+                    literal_column("'[]'::json"),
+                ),
+                sa.String,
+            )
+        else:  # SQLite
+            json_agg_expr = func.cast(
+                func.coalesce(
+                    func.json_group_array(models.ExperimentRunAnnotation.name),
+                    literal_column("'[]'"),
+                ),
+                sa.String,
+            )
+        successful_annotations_json = (
+            select(
+                models.ExperimentRunAnnotation.experiment_run_id,
+                json_agg_expr.label("annotations_json"),
+            )
+            .where(
+                models.ExperimentRunAnnotation.name.in_(evaluation_names),
+                models.ExperimentRunAnnotation.error.is_(None),
+            )
+            .group_by(models.ExperimentRunAnnotation.experiment_run_id)
+            .subquery()
+        )
+        stmt = (
+            stmt.add_columns(
+                models.DatasetExampleRevision,
+                successful_annotations_json.c.annotations_json,
+            )
+            .join(
+                models.DatasetExampleRevision,
+                models.DatasetExampleRevision.id
+                == models.ExperimentDatasetExample.dataset_example_revision_id,
+            )
+            .outerjoin(
+                successful_annotations_json,
+                successful_annotations_json.c.experiment_run_id == models.ExperimentRun.id,
+            )
+        )
+    # Apply ordering, cursor, and limit
+    stmt = stmt.order_by(models.ExperimentRun.id.asc())
+    if cursor_run_rowid is not None:
+        stmt = stmt.where(models.ExperimentRun.id >= cursor_run_rowid)
+    if limit is not None:
+        stmt = stmt.limit(limit + 1)
+    return stmt
+def get_experiment_incomplete_runs_query(
+    experiment: models.Experiment,
+    dialect: SupportedSQLDialect,
+    *,
+    cursor_example_rowid: Optional[int] = None,
+    limit: Optional[int] = None,
+) -> Select[tuple[models.DatasetExampleRevision, Any, Any]]:
+    """
+    High-level helper to build a complete query for incomplete runs in an experiment.
+    This is the main entry point for querying incomplete runs. It encapsulates all the
+    logic for finding runs that need to be completed, including both missing runs
+    (not yet attempted) and failed runs (attempted but have errors).
+    The function automatically chooses the optimal query strategy:
+    - For repetitions=1: Simple fast path (no CTE needed)
+    - For repetitions>1: Two-path optimization separating completely missing examples
+      from partially complete examples
+    Args:
+        experiment: The Experiment model instance to query incomplete runs for
+        dialect: The SQL dialect to use (PostgreSQL or SQLite)
+        cursor_example_rowid: Optional cursor position (dataset_example_id) for pagination.
+            When provided, only returns examples with ID >= cursor_example_rowid
+        limit: Optional maximum number of results to return. If provided, the query
+            will fetch limit+1 rows to enable next-page detection
+    Returns:
+        SQLAlchemy SELECT query with columns:
+        - DatasetExampleRevision: The full revision object with example data
+        - successful_count: Count of successful runs for this example (int)
+        - incomplete_reps: Incomplete repetition numbers as:
+            * PostgreSQL: Array of ints (or empty array for completely missing)
+            * SQLite: JSON string array (or '[]' for completely missing)
+    Note:
+        For completely missing examples (successful_count=0), the incomplete_reps
+        column will be an empty array/JSON. Callers should generate the full
+        [1..repetitions] list when successful_count=0.
+    Example:
+        >>> experiment = session.get(models.Experiment, experiment_id)
+        >>> dialect = SupportedSQLDialect(session.bind.dialect.name)
+        >>> query = get_experiment_incomplete_runs_query(
+        ...     experiment, dialect, cursor_example_rowid=100, limit=50
+        ... )
+        >>> results = await session.execute(query)
+        >>> for revision, success_count, incomplete_reps in results:
+        ...     # Process incomplete runs...
+    """
+    # Step 1: Get successful run counts for incomplete examples
+    run_counts_subquery = get_successful_run_counts_subquery(experiment.id, experiment.repetitions)
+    # Step 2: Build the combined incomplete runs subquery
+    # The strategy depends on whether repetitions=1 or >1
+    if experiment.repetitions == 1:
+        # Fast path optimization for repetitions=1:
+        # All incomplete examples have successful_count=0, so we can skip the expensive CTE
+        empty_array: Any
+        if dialect is SupportedSQLDialect.POSTGRESQL:
+            empty_array = literal_column("ARRAY[]::int[]")
+        elif dialect is SupportedSQLDialect.SQLITE:
+            empty_array = literal_column("'[]'")
+        else:
+            assert_never(dialect)
+        combined_incomplete = (
+            select(
+                run_counts_subquery.c.dataset_example_revision_id,
+                run_counts_subquery.c.successful_count,
+                empty_array.label("incomplete_reps"),
+            ).select_from(run_counts_subquery)
+        ).subquery()
+    else:
+        # Two-path optimization for repetitions > 1:
+        # Path 1: Completely missing examples (successful_count = 0) - no CTE needed
+        # Path 2: Partially complete examples (0 < successful_count < R) - use CTE
+        # Path 1: Completely missing examples
+        empty_array_inner: Any
+        if dialect is SupportedSQLDialect.POSTGRESQL:
+            empty_array_inner = literal_column("ARRAY[]::int[]")
+        elif dialect is SupportedSQLDialect.SQLITE:
+            empty_array_inner = literal_column("'[]'")
+        else:
+            assert_never(dialect)
+        completely_missing_stmt = (
+            select(
+                run_counts_subquery.c.dataset_example_revision_id,
+                run_counts_subquery.c.successful_count,
+                empty_array_inner.label("incomplete_reps"),
+            )
+            .select_from(run_counts_subquery)
+            .where(run_counts_subquery.c.successful_count == 0)
+        )
+        # Path 2: Partially complete examples
+        expected_runs_cte = generate_expected_repetitions_cte(
+            dialect, run_counts_subquery, experiment.repetitions
+        )
+        partially_complete_stmt = get_incomplete_repetitions_query(
+            dialect, expected_runs_cte, experiment.id
+        )
+        # Combine both paths
+        from sqlalchemy import union_all
+        combined_incomplete = union_all(completely_missing_stmt, partially_complete_stmt).subquery()
+    # Step 3: Join with revisions and apply pagination
+    return get_incomplete_runs_with_revisions_query(
+        combined_incomplete,
+        cursor_example_rowid=cursor_example_rowid,
+        limit=limit,
+    )

arize-phoenix 11.23.1__py3-none-any.whl → 12.28.1__py3-none-any.whl

arize-phoenix 11.23.1py3-none-any.whl → 12.28.1py3-none-any.whl