PyPI - arize-phoenix - Versions diffs - 10.0.4__py3-none-any.whl → 12.28.1__py3-none-any.whl - Mend

arize-phoenix 10.0.4py3-none-any.whl → 12.28.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (276) hide show

{arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/METADATA +124 -72
arize_phoenix-12.28.1.dist-info/RECORD +499 -0
{arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/WHEEL +1 -1
{arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/licenses/IP_NOTICE +1 -1
phoenix/__generated__/__init__.py +0 -0
phoenix/__generated__/classification_evaluator_configs/__init__.py +20 -0
phoenix/__generated__/classification_evaluator_configs/_document_relevance_classification_evaluator_config.py +17 -0
phoenix/__generated__/classification_evaluator_configs/_hallucination_classification_evaluator_config.py +17 -0
phoenix/__generated__/classification_evaluator_configs/_models.py +18 -0
phoenix/__generated__/classification_evaluator_configs/_tool_selection_classification_evaluator_config.py +17 -0
phoenix/__init__.py +5 -4
phoenix/auth.py +39 -2
phoenix/config.py +1763 -91
phoenix/datetime_utils.py +120 -2
phoenix/db/README.md +595 -25
phoenix/db/bulk_inserter.py +145 -103
phoenix/db/engines.py +140 -33
phoenix/db/enums.py +3 -12
phoenix/db/facilitator.py +302 -35
phoenix/db/helpers.py +1000 -65
phoenix/db/iam_auth.py +64 -0
phoenix/db/insertion/dataset.py +135 -2
phoenix/db/insertion/document_annotation.py +9 -6
phoenix/db/insertion/evaluation.py +2 -3
phoenix/db/insertion/helpers.py +17 -2
phoenix/db/insertion/session_annotation.py +176 -0
phoenix/db/insertion/span.py +15 -11
phoenix/db/insertion/span_annotation.py +3 -4
phoenix/db/insertion/trace_annotation.py +3 -4
phoenix/db/insertion/types.py +50 -20
phoenix/db/migrations/versions/01a8342c9cdf_add_user_id_on_datasets.py +40 -0
phoenix/db/migrations/versions/0df286449799_add_session_annotations_table.py +105 -0
phoenix/db/migrations/versions/272b66ff50f8_drop_single_indices.py +119 -0
phoenix/db/migrations/versions/58228d933c91_dataset_labels.py +67 -0
phoenix/db/migrations/versions/699f655af132_experiment_tags.py +57 -0
phoenix/db/migrations/versions/735d3d93c33e_add_composite_indices.py +41 -0
phoenix/db/migrations/versions/a20694b15f82_cost.py +196 -0
phoenix/db/migrations/versions/ab513d89518b_add_user_id_on_dataset_versions.py +40 -0
phoenix/db/migrations/versions/d0690a79ea51_users_on_experiments.py +40 -0
phoenix/db/migrations/versions/deb2c81c0bb2_dataset_splits.py +139 -0
phoenix/db/migrations/versions/e76cbd66ffc3_add_experiments_dataset_examples.py +87 -0
phoenix/db/models.py +669 -56
phoenix/db/pg_config.py +10 -0
phoenix/db/types/model_provider.py +4 -0
phoenix/db/types/token_price_customization.py +29 -0
phoenix/db/types/trace_retention.py +23 -15
phoenix/experiments/evaluators/utils.py +3 -3
phoenix/experiments/functions.py +160 -52
phoenix/experiments/tracing.py +2 -2
phoenix/experiments/types.py +1 -1
phoenix/inferences/inferences.py +1 -2
phoenix/server/api/auth.py +38 -7
phoenix/server/api/auth_messages.py +46 -0
phoenix/server/api/context.py +100 -4
phoenix/server/api/dataloaders/__init__.py +79 -5
phoenix/server/api/dataloaders/annotation_configs_by_project.py +31 -0
phoenix/server/api/dataloaders/annotation_summaries.py +60 -8
phoenix/server/api/dataloaders/average_experiment_repeated_run_group_latency.py +50 -0
phoenix/server/api/dataloaders/average_experiment_run_latency.py +17 -24
phoenix/server/api/dataloaders/cache/two_tier_cache.py +1 -2
phoenix/server/api/dataloaders/dataset_dataset_splits.py +52 -0
phoenix/server/api/dataloaders/dataset_example_revisions.py +0 -1
phoenix/server/api/dataloaders/dataset_example_splits.py +40 -0
phoenix/server/api/dataloaders/dataset_examples_and_versions_by_experiment_run.py +47 -0
phoenix/server/api/dataloaders/dataset_labels.py +36 -0
phoenix/server/api/dataloaders/document_evaluation_summaries.py +2 -2
phoenix/server/api/dataloaders/document_evaluations.py +6 -9
phoenix/server/api/dataloaders/experiment_annotation_summaries.py +88 -34
phoenix/server/api/dataloaders/experiment_dataset_splits.py +43 -0
phoenix/server/api/dataloaders/experiment_error_rates.py +21 -28
phoenix/server/api/dataloaders/experiment_repeated_run_group_annotation_summaries.py +77 -0
phoenix/server/api/dataloaders/experiment_repeated_run_groups.py +57 -0
phoenix/server/api/dataloaders/experiment_runs_by_experiment_and_example.py +44 -0
phoenix/server/api/dataloaders/last_used_times_by_generative_model_id.py +35 -0
phoenix/server/api/dataloaders/latency_ms_quantile.py +40 -8
phoenix/server/api/dataloaders/record_counts.py +37 -10
phoenix/server/api/dataloaders/session_annotations_by_session.py +29 -0
phoenix/server/api/dataloaders/span_cost_by_span.py +24 -0
phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_generative_model.py +56 -0
phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_project_session.py +57 -0
phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_span.py +43 -0
phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_trace.py +56 -0
phoenix/server/api/dataloaders/span_cost_details_by_span_cost.py +27 -0
phoenix/server/api/dataloaders/span_cost_summary_by_experiment.py +57 -0
phoenix/server/api/dataloaders/span_cost_summary_by_experiment_repeated_run_group.py +64 -0
phoenix/server/api/dataloaders/span_cost_summary_by_experiment_run.py +58 -0
phoenix/server/api/dataloaders/span_cost_summary_by_generative_model.py +55 -0
phoenix/server/api/dataloaders/span_cost_summary_by_project.py +152 -0
phoenix/server/api/dataloaders/span_cost_summary_by_project_session.py +56 -0
phoenix/server/api/dataloaders/span_cost_summary_by_trace.py +55 -0
phoenix/server/api/dataloaders/span_costs.py +29 -0
phoenix/server/api/dataloaders/table_fields.py +2 -2
phoenix/server/api/dataloaders/token_prices_by_model.py +30 -0
phoenix/server/api/dataloaders/trace_annotations_by_trace.py +27 -0
phoenix/server/api/dataloaders/types.py +29 -0
phoenix/server/api/exceptions.py +11 -1
phoenix/server/api/helpers/dataset_helpers.py +5 -1
phoenix/server/api/helpers/playground_clients.py +1243 -292
phoenix/server/api/helpers/playground_registry.py +2 -2
phoenix/server/api/helpers/playground_spans.py +8 -4
phoenix/server/api/helpers/playground_users.py +26 -0
phoenix/server/api/helpers/prompts/conversions/aws.py +83 -0
phoenix/server/api/helpers/prompts/conversions/google.py +103 -0
phoenix/server/api/helpers/prompts/models.py +205 -22
phoenix/server/api/input_types/{SpanAnnotationFilter.py → AnnotationFilter.py} +22 -14
phoenix/server/api/input_types/ChatCompletionInput.py +6 -2
phoenix/server/api/input_types/CreateProjectInput.py +27 -0
phoenix/server/api/input_types/CreateProjectSessionAnnotationInput.py +37 -0
phoenix/server/api/input_types/DatasetFilter.py +17 -0
phoenix/server/api/input_types/ExperimentRunSort.py +237 -0
phoenix/server/api/input_types/GenerativeCredentialInput.py +9 -0
phoenix/server/api/input_types/GenerativeModelInput.py +5 -0
phoenix/server/api/input_types/ProjectSessionSort.py +161 -1
phoenix/server/api/input_types/PromptFilter.py +14 -0
phoenix/server/api/input_types/PromptVersionInput.py +52 -1
phoenix/server/api/input_types/SpanSort.py +44 -7
phoenix/server/api/input_types/TimeBinConfig.py +23 -0
phoenix/server/api/input_types/UpdateAnnotationInput.py +34 -0
phoenix/server/api/input_types/UserRoleInput.py +1 -0
phoenix/server/api/mutations/__init__.py +10 -0
phoenix/server/api/mutations/annotation_config_mutations.py +8 -8
phoenix/server/api/mutations/api_key_mutations.py +19 -23
phoenix/server/api/mutations/chat_mutations.py +154 -47
phoenix/server/api/mutations/dataset_label_mutations.py +243 -0
phoenix/server/api/mutations/dataset_mutations.py +21 -16
phoenix/server/api/mutations/dataset_split_mutations.py +351 -0
phoenix/server/api/mutations/experiment_mutations.py +2 -2
phoenix/server/api/mutations/export_events_mutations.py +3 -3
phoenix/server/api/mutations/model_mutations.py +210 -0
phoenix/server/api/mutations/project_mutations.py +49 -10
phoenix/server/api/mutations/project_session_annotations_mutations.py +158 -0
phoenix/server/api/mutations/project_trace_retention_policy_mutations.py +8 -4
phoenix/server/api/mutations/prompt_label_mutations.py +74 -65
phoenix/server/api/mutations/prompt_mutations.py +65 -129
phoenix/server/api/mutations/prompt_version_tag_mutations.py +11 -8
phoenix/server/api/mutations/span_annotations_mutations.py +15 -10
phoenix/server/api/mutations/trace_annotations_mutations.py +14 -10
phoenix/server/api/mutations/trace_mutations.py +47 -3
phoenix/server/api/mutations/user_mutations.py +66 -41
phoenix/server/api/queries.py +768 -293
phoenix/server/api/routers/__init__.py +2 -2
phoenix/server/api/routers/auth.py +154 -88
phoenix/server/api/routers/ldap.py +229 -0
phoenix/server/api/routers/oauth2.py +369 -106
phoenix/server/api/routers/v1/__init__.py +24 -4
phoenix/server/api/routers/v1/annotation_configs.py +23 -31
phoenix/server/api/routers/v1/annotations.py +481 -17
phoenix/server/api/routers/v1/datasets.py +395 -81
phoenix/server/api/routers/v1/documents.py +142 -0
phoenix/server/api/routers/v1/evaluations.py +24 -31
phoenix/server/api/routers/v1/experiment_evaluations.py +19 -8
phoenix/server/api/routers/v1/experiment_runs.py +337 -59
phoenix/server/api/routers/v1/experiments.py +479 -48
phoenix/server/api/routers/v1/models.py +7 -0
phoenix/server/api/routers/v1/projects.py +18 -49
phoenix/server/api/routers/v1/prompts.py +54 -40
phoenix/server/api/routers/v1/sessions.py +108 -0
phoenix/server/api/routers/v1/spans.py +1091 -81
phoenix/server/api/routers/v1/traces.py +132 -78
phoenix/server/api/routers/v1/users.py +389 -0
phoenix/server/api/routers/v1/utils.py +3 -7
phoenix/server/api/subscriptions.py +305 -88
phoenix/server/api/types/Annotation.py +90 -23
phoenix/server/api/types/ApiKey.py +13 -17
phoenix/server/api/types/AuthMethod.py +1 -0
phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +1 -0
phoenix/server/api/types/CostBreakdown.py +12 -0
phoenix/server/api/types/Dataset.py +226 -72
phoenix/server/api/types/DatasetExample.py +88 -18
phoenix/server/api/types/DatasetExperimentAnnotationSummary.py +10 -0
phoenix/server/api/types/DatasetLabel.py +57 -0
phoenix/server/api/types/DatasetSplit.py +98 -0
phoenix/server/api/types/DatasetVersion.py +49 -4
phoenix/server/api/types/DocumentAnnotation.py +212 -0
phoenix/server/api/types/Experiment.py +264 -59
phoenix/server/api/types/ExperimentComparison.py +5 -10
phoenix/server/api/types/ExperimentRepeatedRunGroup.py +155 -0
phoenix/server/api/types/ExperimentRepeatedRunGroupAnnotationSummary.py +9 -0
phoenix/server/api/types/ExperimentRun.py +169 -65
phoenix/server/api/types/ExperimentRunAnnotation.py +158 -39
phoenix/server/api/types/GenerativeModel.py +245 -3
phoenix/server/api/types/GenerativeProvider.py +70 -11
phoenix/server/api/types/{Model.py → InferenceModel.py} +1 -1
phoenix/server/api/types/ModelInterface.py +16 -0
phoenix/server/api/types/PlaygroundModel.py +20 -0
phoenix/server/api/types/Project.py +1278 -216
phoenix/server/api/types/ProjectSession.py +188 -28
phoenix/server/api/types/ProjectSessionAnnotation.py +187 -0
phoenix/server/api/types/ProjectTraceRetentionPolicy.py +1 -1
phoenix/server/api/types/Prompt.py +119 -39
phoenix/server/api/types/PromptLabel.py +42 -25
phoenix/server/api/types/PromptVersion.py +11 -8
phoenix/server/api/types/PromptVersionTag.py +65 -25
phoenix/server/api/types/ServerStatus.py +6 -0
phoenix/server/api/types/Span.py +167 -123
phoenix/server/api/types/SpanAnnotation.py +189 -42
phoenix/server/api/types/SpanCostDetailSummaryEntry.py +10 -0
phoenix/server/api/types/SpanCostSummary.py +10 -0
phoenix/server/api/types/SystemApiKey.py +65 -1
phoenix/server/api/types/TokenPrice.py +16 -0
phoenix/server/api/types/TokenUsage.py +3 -3
phoenix/server/api/types/Trace.py +223 -51
phoenix/server/api/types/TraceAnnotation.py +149 -50
phoenix/server/api/types/User.py +137 -32
phoenix/server/api/types/UserApiKey.py +73 -26
phoenix/server/api/types/node.py +10 -0
phoenix/server/api/types/pagination.py +11 -2
phoenix/server/app.py +290 -45
phoenix/server/authorization.py +38 -3
phoenix/server/bearer_auth.py +34 -24
phoenix/server/cost_tracking/cost_details_calculator.py +196 -0
phoenix/server/cost_tracking/cost_model_lookup.py +179 -0
phoenix/server/cost_tracking/helpers.py +68 -0
phoenix/server/cost_tracking/model_cost_manifest.json +3657 -830
phoenix/server/cost_tracking/regex_specificity.py +397 -0
phoenix/server/cost_tracking/token_cost_calculator.py +57 -0
phoenix/server/daemons/__init__.py +0 -0
phoenix/server/daemons/db_disk_usage_monitor.py +214 -0
phoenix/server/daemons/generative_model_store.py +103 -0
phoenix/server/daemons/span_cost_calculator.py +99 -0
phoenix/server/dml_event.py +17 -0
phoenix/server/dml_event_handler.py +5 -0
phoenix/server/email/sender.py +56 -3
phoenix/server/email/templates/db_disk_usage_notification.html +19 -0
phoenix/server/email/types.py +11 -0
phoenix/server/experiments/__init__.py +0 -0
phoenix/server/experiments/utils.py +14 -0
phoenix/server/grpc_server.py +11 -11
phoenix/server/jwt_store.py +17 -15
phoenix/server/ldap.py +1449 -0
phoenix/server/main.py +26 -10
phoenix/server/oauth2.py +330 -12
phoenix/server/prometheus.py +66 -6
phoenix/server/rate_limiters.py +4 -9
phoenix/server/retention.py +33 -20
phoenix/server/session_filters.py +49 -0
phoenix/server/static/.vite/manifest.json +55 -51
phoenix/server/static/assets/components-BreFUQQa.js +6702 -0
phoenix/server/static/assets/{index-E0M82BdE.js → index-CTQoemZv.js} +140 -56
phoenix/server/static/assets/pages-DBE5iYM3.js +9524 -0
phoenix/server/static/assets/vendor-BGzfc4EU.css +1 -0
phoenix/server/static/assets/vendor-DCE4v-Ot.js +920 -0
phoenix/server/static/assets/vendor-codemirror-D5f205eT.js +25 -0
phoenix/server/static/assets/vendor-recharts-V9cwpXsm.js +37 -0
phoenix/server/static/assets/vendor-shiki-Do--csgv.js +5 -0
phoenix/server/static/assets/vendor-three-CmB8bl_y.js +3840 -0
phoenix/server/templates/index.html +40 -6
phoenix/server/thread_server.py +1 -2
phoenix/server/types.py +14 -4
phoenix/server/utils.py +74 -0
phoenix/session/client.py +56 -3
phoenix/session/data_extractor.py +5 -0
phoenix/session/evaluation.py +14 -5
phoenix/session/session.py +45 -9
phoenix/settings.py +5 -0
phoenix/trace/attributes.py +80 -13
phoenix/trace/dsl/helpers.py +90 -1
phoenix/trace/dsl/query.py +8 -6
phoenix/trace/projects.py +5 -0
phoenix/utilities/template_formatters.py +1 -1
phoenix/version.py +1 -1
arize_phoenix-10.0.4.dist-info/RECORD +0 -405
phoenix/server/api/types/Evaluation.py +0 -39
phoenix/server/cost_tracking/cost_lookup.py +0 -255
phoenix/server/static/assets/components-DULKeDfL.js +0 -4365
phoenix/server/static/assets/pages-Cl0A-0U2.js +0 -7430
phoenix/server/static/assets/vendor-WIZid84E.css +0 -1
phoenix/server/static/assets/vendor-arizeai-Dy-0mSNw.js +0 -649
phoenix/server/static/assets/vendor-codemirror-DBtifKNr.js +0 -33
phoenix/server/static/assets/vendor-oB4u9zuV.js +0 -905
phoenix/server/static/assets/vendor-recharts-D-T4KPz2.js +0 -59
phoenix/server/static/assets/vendor-shiki-BMn4O_9F.js +0 -5
phoenix/server/static/assets/vendor-three-C5WAXd5r.js +0 -2998
phoenix/utilities/deprecation.py +0 -31
{arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/entry_points.txt +0 -0
{arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/licenses/LICENSE +0 -0

phoenix/db/pg_config.py CHANGED Viewed

@@ -10,12 +10,14 @@ from typing_extensions import assert_never
 def get_pg_config(
     url: URL,
     driver: Literal["psycopg", "asyncpg"],
+    enforce_ssl: bool = False,
 ) -> tuple[URL, dict[str, Any]]:
     """Convert SQLAlchemy URL to driver-specific configuration.
     Args:
         url: SQLAlchemy URL
         driver: "psycopg" or "asyncpg"
+        enforce_ssl: If True, ensure SSL is enabled (required for AWS RDS IAM auth)
     Returns:
         Tuple of (base_url, connect_args):
@@ -26,6 +28,14 @@ def get_pg_config(
     query = url.query
     ssl_args = _get_ssl_args(query)
+    if enforce_ssl and not ssl_args:
+        ssl_args = {"sslmode": "require"}
+    elif enforce_ssl and ssl_args.get("sslmode") == "disable":
+        raise ValueError(
+            "SSL cannot be disabled when using AWS RDS IAM authentication. "
+            "Remove 'sslmode=disable' from the connection string."
+        )
     # Create base URL without SSL parameters
     base_url = url.set(
         drivername=f"postgresql+{driver}",

phoenix/db/types/model_provider.py CHANGED Viewed

@@ -6,3 +6,7 @@ class ModelProvider(Enum):
     AZURE_OPENAI = "AZURE_OPENAI"
     ANTHROPIC = "ANTHROPIC"
     GOOGLE = "GOOGLE"
+    DEEPSEEK = "DEEPSEEK"
+    XAI = "XAI"
+    OLLAMA = "OLLAMA"
+    AWS = "AWS"

phoenix/db/types/token_price_customization.py ADDED Viewed

@@ -0,0 +1,29 @@
+from abc import ABC
+from typing import Any, Literal, Optional
+from pydantic import BaseModel, ValidationError
+class TokenPriceCustomization(BaseModel, ABC):
+    model_config = {"extra": "allow"}
+class ThresholdBasedTokenPriceCustomization(TokenPriceCustomization):
+    type: Literal["threshold_based"] = "threshold_based"
+    key: str
+    threshold: float
+    new_rate: float
+class TokenPriceCustomizationParser:
+    """Intended to be forward-compatible while maintaining the ability to round-trip."""
+    @staticmethod
+    def parse(data: Optional[dict[str, Any]]) -> Optional[TokenPriceCustomization]:
+        if not data:
+            return None
+        try:
+            return ThresholdBasedTokenPriceCustomization.model_validate(data)
+        except ValidationError:
+            pass
+        return TokenPriceCustomization.model_validate(data)

phoenix/db/types/trace_retention.py CHANGED Viewed

@@ -5,7 +5,9 @@ from typing import Annotated, Iterable, Literal, Optional, Union
 import sqlalchemy as sa
 from pydantic import AfterValidator, BaseModel, Field, RootModel
+from sqlalchemy import func
 from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.sql.roles import InElementRole
 from phoenix.utilities import hour_of_week
@@ -25,19 +27,25 @@ class _MaxDays(BaseModel):
 class _MaxCount(BaseModel):
     max_count: Annotated[int, Field(ge=0)]
-    @property
-    def max_count_filter(self) -> sa.ColumnElement[bool]:
+    def max_count_filter(
+        self,
+        project_rowids: Union[Iterable[int], InElementRole],
+    ) -> sa.ColumnElement[bool]:
         if self.max_count <= 0:
             return sa.literal(False)
         from phoenix.db.models import Trace
-        return Trace.start_time < (
-            sa.select(Trace.start_time)
-            .order_by(Trace.start_time.desc())
-            .offset(self.max_count - 1)
-            .limit(1)
-            .scalar_subquery()
+        ranked = (
+            sa.select(
+                Trace.id,
+                func.row_number()
+                .over(partition_by=Trace.project_rowid, order_by=Trace.start_time.desc())
+                .label("rn"),
+            )
+            .where(Trace.project_rowid.in_(project_rowids))
+            .cte("ranked")
         )
+        return Trace.id.in_(sa.select(ranked.c.id).where(ranked.c.rn > self.max_count))
 class MaxDaysRule(_MaxDays, BaseModel):
@@ -49,7 +57,7 @@ class MaxDaysRule(_MaxDays, BaseModel):
     async def delete_traces(
         self,
         session: AsyncSession,
-        project_rowids: Union[Iterable[int], sa.ScalarSelect[int]],
+        project_rowids: Union[Iterable[int], InElementRole],
     ) -> set[int]:
         if self.max_days <= 0:
             return set()
@@ -73,7 +81,7 @@ class MaxCountRule(_MaxCount, BaseModel):
     async def delete_traces(
         self,
         session: AsyncSession,
-        project_rowids: Union[Iterable[int], sa.ScalarSelect[int]],
+        project_rowids: Union[Iterable[int], InElementRole],
     ) -> set[int]:
         if self.max_count <= 0:
             return set()
@@ -82,7 +90,7 @@ class MaxCountRule(_MaxCount, BaseModel):
         stmt = (
             sa.delete(Trace)
             .where(Trace.project_rowid.in_(project_rowids))
-            .where(self.max_count_filter)
+            .where(self.max_count_filter(project_rowids))
             .returning(Trace.project_rowid)
         )
         return set(await session.scalars(stmt))
@@ -97,7 +105,7 @@ class MaxDaysOrCountRule(_MaxDays, _MaxCount, BaseModel):
     async def delete_traces(
         self,
         session: AsyncSession,
-        project_rowids: Union[Iterable[int], sa.ScalarSelect[int]],
+        project_rowids: Union[Iterable[int], InElementRole],
     ) -> set[int]:
         if self.max_days <= 0 and self.max_count <= 0:
             return set()
@@ -106,7 +114,7 @@ class MaxDaysOrCountRule(_MaxDays, _MaxCount, BaseModel):
         stmt = (
             sa.delete(Trace)
             .where(Trace.project_rowid.in_(project_rowids))
-            .where(sa.or_(self.max_days_filter, self.max_count_filter))
+            .where(sa.or_(self.max_days_filter, self.max_count_filter(project_rowids)))
             .returning(Trace.project_rowid)
         )
         return set(await session.scalars(stmt))
@@ -123,7 +131,7 @@ class TraceRetentionRule(RootModel[Union[MaxDaysRule, MaxCountRule, MaxDaysOrCou
     async def delete_traces(
         self,
         session: AsyncSession,
-        project_rowids: Union[Iterable[int], sa.ScalarSelect[int]],
+        project_rowids: Union[Iterable[int], InElementRole],
     ) -> set[int]:
         return await self.root.delete_traces(session, project_rowids)
@@ -192,7 +200,7 @@ class TraceRetentionCronExpression(RootModel[str]):
 def _parse_field(field: str, min_val: int, max_val: int) -> set[int]:
     """
-    Parse a cron field and return the set of matching values.
+    Parses a cron field and returns the set of matching values.
     Args:
         field (str): The cron field to parse

phoenix/experiments/evaluators/utils.py CHANGED Viewed

@@ -19,9 +19,9 @@ def unwrap_json(obj: JSONSerializable) -> JSONSerializable:
         if len(obj) == 1:
             key = next(iter(obj.keys()))
             output = obj[key]
-            assert isinstance(
-                output, (dict, list, str, int, float, bool, type(None))
-            ), "Output must be JSON serializable"
+            assert isinstance(output, (dict, list, str, int, float, bool, type(None))), (
+                "Output must be JSON serializable"
+            )
             return output
     return obj

phoenix/experiments/functions.py CHANGED Viewed

@@ -10,7 +10,7 @@ from copy import deepcopy
 from dataclasses import replace
 from datetime import datetime, timezone
 from itertools import product
-from typing import Any, Literal, Optional, Union, cast
+from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast
 from urllib.parse import urljoin
 import httpx
@@ -65,6 +65,41 @@ from phoenix.trace.attributes import flatten
 from phoenix.utilities.client import VersionedAsyncClient, VersionedClient
 from phoenix.utilities.json import jsonify
+if TYPE_CHECKING:
+    from phoenix.client.resources.datasets import Dataset as ClientDataset
+def _convert_client_dataset(new_dataset: "ClientDataset") -> Dataset:
+    """
+    Converts Dataset objects from `phoenix.client` to Dataset objects compatible with experiments.
+    """
+    examples_dict: dict[str, Example] = {}
+    for example_data in new_dataset.examples:
+        legacy_example = Example(
+            id=example_data["id"],
+            input=example_data["input"],
+            output=example_data["output"],
+            metadata=example_data["metadata"],
+            updated_at=datetime.fromisoformat(example_data["updated_at"]),
+        )
+        examples_dict[legacy_example.id] = legacy_example
+    return Dataset(
+        id=new_dataset.id,
+        version_id=new_dataset.version_id,
+        examples=examples_dict,
+    )
+def _is_new_client_dataset(dataset: Any) -> bool:
+    """Check if dataset is from new client (has list examples)."""
+    try:
+        from phoenix.client.resources.datasets import Dataset as _ClientDataset
+        return isinstance(dataset, _ClientDataset)
+    except ImportError:
+        return False
 def _phoenix_clients() -> tuple[httpx.Client, httpx.AsyncClient]:
     return VersionedClient(
@@ -74,6 +109,64 @@ def _phoenix_clients() -> tuple[httpx.Client, httpx.AsyncClient]:
     )
+def _get_all_experiment_runs(
+    client: httpx.Client,
+    experiment_id: str,
+    page_size: int = 50,
+) -> list[ExperimentRun]:
+    """
+    Fetch all experiment runs using pagination to handle large datasets.
+    Args:
+        client: The HTTP client to use for requests.
+        experiment_id: The ID of the experiment.
+        page_size: Number of runs to fetch per page. Defaults to 50.
+    Returns:
+        List of all experiment runs as ExperimentRun objects.
+    """
+    all_runs: list[dict[str, Any]] = []
+    cursor = None
+    while True:
+        params: dict[str, Any] = {"limit": page_size}
+        if cursor:
+            params["cursor"] = cursor
+        try:
+            response = client.get(
+                f"v1/experiments/{experiment_id}/runs",
+                params=params,
+            )
+            response.raise_for_status()
+            data = response.json()
+            runs = data["data"]
+            all_runs.extend(runs)
+            # Check if there are more pages
+            cursor = data.get("next_cursor")
+            if not cursor:
+                break
+        except HTTPStatusError as e:
+            if e.response.status_code == 404:
+                # Experiment doesn't exist - treat as empty result
+                break
+            else:
+                raise
+    # Convert dicts to ExperimentRun objects
+    experiment_runs: list[ExperimentRun] = []
+    for run in all_runs:
+        # Parse datetime strings
+        run["start_time"] = datetime.fromisoformat(run["start_time"])
+        run["end_time"] = datetime.fromisoformat(run["end_time"])
+        experiment_runs.append(ExperimentRun.from_dict(run))
+    return experiment_runs
 Evaluators: TypeAlias = Union[
     ExperimentEvaluator,
     Sequence[ExperimentEvaluator],
@@ -85,7 +178,7 @@ RateLimitErrors: TypeAlias = Union[type[BaseException], Sequence[type[BaseExcept
 def run_experiment(
-    dataset: Dataset,
+    dataset: Union[Dataset, Any],  # Accept both legacy and new client datasets
     task: ExperimentTask,
     evaluators: Optional[Evaluators] = None,
     *,
@@ -166,11 +259,20 @@ def run_experiment(
         RanExperiment: The results of the experiment and evaluation. Additional evaluations can be
             added to the experiment using the `evaluate_experiment` function.
     """
+    # Auto-convert client Dataset objects to legacy format
+    normalized_dataset: Dataset
+    if _is_new_client_dataset(dataset):
+        normalized_dataset = _convert_client_dataset(cast("ClientDataset", dataset))
+    else:
+        normalized_dataset = dataset
     task_signature = inspect.signature(task)
     _validate_task_signature(task_signature)
-    if not dataset.examples:
-        raise ValueError(f"Dataset has no examples: {dataset.id=}, {dataset.version_id=}")
+    if not normalized_dataset.examples:
+        raise ValueError(
+            f"Dataset has no examples: {normalized_dataset.id=}, {normalized_dataset.version_id=}"
+        )
     # Add this to the params once supported in the UI
     repetitions = 1
     assert repetitions > 0, "Must run the experiment at least once."
@@ -179,7 +281,7 @@ def run_experiment(
     sync_client, async_client = _phoenix_clients()
     payload = {
-        "version_id": dataset.version_id,
+        "version_id": normalized_dataset.version_id,
         "name": experiment_name,
         "description": experiment_description,
         "metadata": experiment_metadata,
@@ -187,23 +289,23 @@ def run_experiment(
     }
     if not dry_run:
         experiment_response = sync_client.post(
-            f"/v1/datasets/{dataset.id}/experiments",
+            f"v1/datasets/{normalized_dataset.id}/experiments",
             json=payload,
         )
         experiment_response.raise_for_status()
         exp_json = experiment_response.json()["data"]
         project_name = exp_json["project_name"]
         experiment = Experiment(
-            dataset_id=dataset.id,
-            dataset_version_id=dataset.version_id,
+            dataset_id=normalized_dataset.id,
+            dataset_version_id=normalized_dataset.version_id,
             repetitions=repetitions,
             id=exp_json["id"],
             project_name=project_name,
         )
     else:
         experiment = Experiment(
-            dataset_id=dataset.id,
-            dataset_version_id=dataset.version_id,
+            dataset_id=normalized_dataset.id,
+            dataset_version_id=normalized_dataset.version_id,
             repetitions=repetitions,
             id=DRY_RUN,
             project_name="",
@@ -216,18 +318,18 @@ def run_experiment(
     print("🧪 Experiment started.")
     if dry_run:
         examples = {
-            (ex := dataset[i]).id: ex
-            for i in pd.Series(range(len(dataset)))
-            .sample(min(len(dataset), int(dry_run)), random_state=42)
+            (ex := normalized_dataset[i]).id: ex
+            for i in pd.Series(range(len(normalized_dataset)))
+            .sample(min(len(normalized_dataset), int(dry_run)), random_state=42)
             .sort_values()
         }
         id_selection = "\n".join(examples)
         print(f"🌵️ This is a dry-run for these example IDs:\n{id_selection}")
-        dataset = replace(dataset, examples=examples)
+        normalized_dataset = replace(normalized_dataset, examples=examples)
     else:
-        dataset_experiments_url = get_dataset_experiments_url(dataset_id=dataset.id)
+        dataset_experiments_url = get_dataset_experiments_url(dataset_id=normalized_dataset.id)
         experiment_compare_url = get_experiment_url(
-            dataset_id=dataset.id,
+            dataset_id=normalized_dataset.id,
             experiment_id=experiment.id,
         )
         print(f"📺 View dataset experiments: {dataset_experiments_url}")
@@ -259,7 +361,7 @@ def run_experiment(
                 try:
                     # Try to create the run directly
                     resp = sync_client.post(
-                        f"/v1/experiments/{experiment.id}/runs", json=jsonify(exp_run)
+                        f"v1/experiments/{experiment.id}/runs", json=jsonify(exp_run)
                     )
                     resp.raise_for_status()
                     exp_run = replace(exp_run, id=resp.json()["data"]["id"])
@@ -274,8 +376,11 @@ def run_experiment(
         error: Optional[BaseException] = None
         status = Status(StatusCode.OK)
         with ExitStack() as stack:
-            span: Span = stack.enter_context(
-                tracer.start_as_current_span(root_span_name, context=Context())
+            span = cast(
+                Span,
+                stack.enter_context(
+                    tracer.start_as_current_span(root_span_name, context=Context())
+                ),
             )
             stack.enter_context(capture_spans(resource))
             try:
@@ -316,9 +421,9 @@ def run_experiment(
             span.set_attribute(SpanAttributes.OPENINFERENCE_SPAN_KIND, root_span_kind)
             span.set_status(status)
-        assert isinstance(
-            output, (dict, list, str, int, float, bool, type(None))
-        ), "Output must be JSON serializable"
+        assert isinstance(output, (dict, list, str, int, float, bool, type(None))), (
+            "Output must be JSON serializable"
+        )
         exp_run = ExperimentRun(
             start_time=_decode_unix_nano(cast(int, span.start_time)),
@@ -334,7 +439,7 @@ def run_experiment(
             try:
                 # Try to create the run directly
                 resp = sync_client.post(
-                    f"/v1/experiments/{experiment.id}/runs", json=jsonify(exp_run)
+                    f"v1/experiments/{experiment.id}/runs", json=jsonify(exp_run)
                 )
                 resp.raise_for_status()
                 exp_run = replace(exp_run, id=resp.json()["data"]["id"])
@@ -373,7 +478,7 @@ def run_experiment(
                         None,
                         functools.partial(
                             sync_client.post,
-                            url=f"/v1/experiments/{experiment.id}/runs",
+                            url=f"v1/experiments/{experiment.id}/runs",
                             json=jsonify(exp_run),
                         ),
                     )
@@ -391,8 +496,11 @@ def run_experiment(
         error: Optional[BaseException] = None
         status = Status(StatusCode.OK)
         with ExitStack() as stack:
-            span: Span = stack.enter_context(
-                tracer.start_as_current_span(root_span_name, context=Context())
+            span = cast(
+                Span,
+                stack.enter_context(
+                    tracer.start_as_current_span(root_span_name, context=Context())
+                ),
             )
             stack.enter_context(capture_spans(resource))
             try:
@@ -427,9 +535,9 @@ def run_experiment(
             span.set_attribute(OPENINFERENCE_SPAN_KIND, root_span_kind)
             span.set_status(status)
-        assert isinstance(
-            output, (dict, list, str, int, float, bool, type(None))
-        ), "Output must be JSON serializable"
+        assert isinstance(output, (dict, list, str, int, float, bool, type(None))), (
+            "Output must be JSON serializable"
+        )
         exp_run = ExperimentRun(
             start_time=_decode_unix_nano(cast(int, span.start_time)),
@@ -448,7 +556,7 @@ def run_experiment(
                     None,
                     functools.partial(
                         sync_client.post,
-                        url=f"/v1/experiments/{experiment.id}/runs",
+                        url=f"v1/experiments/{experiment.id}/runs",
                         json=jsonify(exp_run),
                     ),
                 )
@@ -491,23 +599,17 @@ def run_experiment(
     test_cases = [
         TestCase(example=deepcopy(ex), repetition_number=rep)
-        for ex, rep in product(dataset.examples.values(), range(1, repetitions + 1))
+        for ex, rep in product(normalized_dataset.examples.values(), range(1, repetitions + 1))
     ]
     task_runs, _execution_details = executor.run(test_cases)
     print("✅ Task runs completed.")
     # Get the final state of runs from the database
     if not dry_run:
-        all_runs = sync_client.get(f"/v1/experiments/{experiment.id}/runs").json()["data"]
-        task_runs = []
-        for run in all_runs:
-            # Parse datetime strings
-            run["start_time"] = datetime.fromisoformat(run["start_time"])
-            run["end_time"] = datetime.fromisoformat(run["end_time"])
-            task_runs.append(ExperimentRun.from_dict(run))
+        task_runs = _get_all_experiment_runs(sync_client, experiment.id)
         # Check if we got all expected runs
-        expected_runs = len(dataset.examples) * repetitions
+        expected_runs = len(normalized_dataset.examples) * repetitions
         actual_runs = len(task_runs)
         if actual_runs < expected_runs:
             print(
@@ -515,12 +617,14 @@ def run_experiment(
                 "completed successfully."
             )
-    params = ExperimentParameters(n_examples=len(dataset.examples), n_repetitions=repetitions)
+    params = ExperimentParameters(
+        n_examples=len(normalized_dataset.examples), n_repetitions=repetitions
+    )
     task_summary = TaskSummary.from_task_runs(params, task_runs)
     ran_experiment: RanExperiment = object.__new__(RanExperiment)
     ran_experiment.__init__(  # type: ignore[misc]
         params=params,
-        dataset=dataset,
+        dataset=normalized_dataset,
         runs={r.id: r for r in task_runs if r is not None},
         task_summary=task_summary,
         **_asdict(experiment),
@@ -561,16 +665,14 @@ def evaluate_experiment(
     else:
         dataset = Dataset.from_dict(
             sync_client.get(
-                f"/v1/datasets/{dataset_id}/examples",
+                f"v1/datasets/{dataset_id}/examples",
                 params={"version_id": str(dataset_version_id)},
             ).json()["data"]
         )
         if not dataset.examples:
             raise ValueError(f"Dataset has no examples: {dataset_id=}, {dataset_version_id=}")
-        experiment_runs = {
-            exp_run["id"]: ExperimentRun.from_dict(exp_run)
-            for exp_run in sync_client.get(f"/v1/experiments/{experiment.id}/runs").json()["data"]
-        }
+        all_runs = _get_all_experiment_runs(sync_client, experiment.id)
+        experiment_runs = {exp_run.id: exp_run for exp_run in all_runs}
         if not experiment_runs:
             raise ValueError("Experiment has not been run")
         params = ExperimentParameters(n_examples=len(dataset.examples))
@@ -622,8 +724,11 @@ def evaluate_experiment(
         status = Status(StatusCode.OK)
         root_span_name = f"Evaluation: {evaluator.name}"
         with ExitStack() as stack:
-            span: Span = stack.enter_context(
-                tracer.start_as_current_span(root_span_name, context=Context())
+            span = cast(
+                Span,
+                stack.enter_context(
+                    tracer.start_as_current_span(root_span_name, context=Context())
+                ),
             )
             stack.enter_context(capture_spans(resource))
             try:
@@ -660,7 +765,7 @@ def evaluate_experiment(
             trace_id=_str_trace_id(span.get_span_context().trace_id),  # type: ignore[no-untyped-call]
         )
         if not dry_run:
-            resp = sync_client.post("/v1/experiment_evaluations", json=jsonify(eval_run))
+            resp = sync_client.post("v1/experiment_evaluations", json=jsonify(eval_run))
             resp.raise_for_status()
             eval_run = replace(eval_run, id=resp.json()["data"]["id"])
         return eval_run
@@ -674,8 +779,11 @@ def evaluate_experiment(
         status = Status(StatusCode.OK)
         root_span_name = f"Evaluation: {evaluator.name}"
         with ExitStack() as stack:
-            span: Span = stack.enter_context(
-                tracer.start_as_current_span(root_span_name, context=Context())
+            span = cast(
+                Span,
+                stack.enter_context(
+                    tracer.start_as_current_span(root_span_name, context=Context())
+                ),
             )
             stack.enter_context(capture_spans(resource))
             try:
@@ -719,7 +827,7 @@ def evaluate_experiment(
                 None,
                 functools.partial(
                     sync_client.post,
-                    url="/v1/experiment_evaluations",
+                    url="v1/experiment_evaluations",
                     json=jsonify(eval_run),
                 ),
             )

phoenix/experiments/tracing.py CHANGED Viewed

@@ -8,7 +8,7 @@ from typing import Any, Optional
 from opentelemetry.sdk.resources import Resource
 from opentelemetry.sdk.trace import ReadableSpan
-from opentelemetry.trace import INVALID_TRACE_ID
+from opentelemetry.trace import INVALID_SPAN_ID
 from wrapt import apply_patch, resolve_path, wrap_function_wrapper
@@ -29,7 +29,7 @@ class SpanModifier:
         Args:
           span: ReadableSpan: the span to modify
         """
-        if (ctx := span._context) is None or ctx.span_id == INVALID_TRACE_ID:
+        if (ctx := span._context) is None or ctx.span_id == INVALID_SPAN_ID:
             return
         span._resource = span._resource.merge(self._resource)

phoenix/experiments/types.py CHANGED Viewed

@@ -322,7 +322,7 @@ class _HasStats:
             text = self.stats.__str__()
         else:
             text = self.stats.to_markdown(index=False)
-        return f"{self.title}\n{'-'*len(self.title)}\n" + text
+        return f"{self.title}\n{'-' * len(self.title)}\n" + text
 @dataclass(frozen=True)

phoenix/inferences/inferences.py CHANGED Viewed

@@ -13,11 +13,10 @@ from pandas import DataFrame, Series, Timestamp, read_parquet
 from pandas.api.types import (
     is_numeric_dtype,
 )
-from typing_extensions import TypeAlias
+from typing_extensions import TypeAlias, deprecated
 from phoenix.config import GENERATED_INFERENCES_NAME_PREFIX, INFERENCES_DIR
 from phoenix.datetime_utils import normalize_timestamps
-from phoenix.utilities.deprecation import deprecated
 from . import errors as err
 from .schema import (

arize-phoenix 10.0.4__py3-none-any.whl → 12.28.1__py3-none-any.whl

arize-phoenix 10.0.4py3-none-any.whl → 12.28.1py3-none-any.whl