PyPI - arize-phoenix - Versions diffs - 11.6.2__py3-none-any.whl → 11.8.0__py3-none-any.whl - Mend

arize-phoenix 11.6.2py3-none-any.whl → 11.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of arize-phoenix might be problematic. Click here for more details.

Files changed (34) hide show

phoenix/experiments/functions.py CHANGED Viewed

@@ -10,7 +10,7 @@ from copy import deepcopy
 from dataclasses import replace
 from datetime import datetime, timezone
 from itertools import product
-from typing import Any, Literal, Optional, Union, cast
+from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast
 from urllib.parse import urljoin
 import httpx
@@ -65,6 +65,41 @@ from phoenix.trace.attributes import flatten
 from phoenix.utilities.client import VersionedAsyncClient, VersionedClient
 from phoenix.utilities.json import jsonify
+if TYPE_CHECKING:
+    from phoenix.client.resources.datasets import Dataset as ClientDataset
+def _convert_client_dataset(new_dataset: "ClientDataset") -> Dataset:
+    """
+    Converts Dataset objects from `phoenix.client` to Dataset objects compatible with experiments.
+    """
+    examples_dict: dict[str, Example] = {}
+    for example_data in new_dataset.examples:
+        legacy_example = Example(
+            id=example_data["id"],
+            input=example_data["input"],
+            output=example_data["output"],
+            metadata=example_data["metadata"],
+            updated_at=datetime.fromisoformat(example_data["updated_at"]),
+        )
+        examples_dict[legacy_example.id] = legacy_example
+    return Dataset(
+        id=new_dataset.id,
+        version_id=new_dataset.version_id,
+        examples=examples_dict,
+    )
+def _is_new_client_dataset(dataset: Any) -> bool:
+    """Check if dataset is from new client (has list examples)."""
+    try:
+        from phoenix.client.resources.datasets import Dataset as _ClientDataset
+        return isinstance(dataset, _ClientDataset)
+    except ImportError:
+        return False
 def _phoenix_clients() -> tuple[httpx.Client, httpx.AsyncClient]:
     return VersionedClient(
@@ -85,7 +120,7 @@ RateLimitErrors: TypeAlias = Union[type[BaseException], Sequence[type[BaseExcept
 def run_experiment(
-    dataset: Dataset,
+    dataset: Union[Dataset, Any],  # Accept both legacy and new client datasets
     task: ExperimentTask,
     evaluators: Optional[Evaluators] = None,
     *,
@@ -166,11 +201,20 @@ def run_experiment(
         RanExperiment: The results of the experiment and evaluation. Additional evaluations can be
             added to the experiment using the `evaluate_experiment` function.
     """
+    # Auto-convert client Dataset objects to legacy format
+    normalized_dataset: Dataset
+    if _is_new_client_dataset(dataset):
+        normalized_dataset = _convert_client_dataset(cast("ClientDataset", dataset))
+    else:
+        normalized_dataset = dataset
     task_signature = inspect.signature(task)
     _validate_task_signature(task_signature)
-    if not dataset.examples:
-        raise ValueError(f"Dataset has no examples: {dataset.id=}, {dataset.version_id=}")
+    if not normalized_dataset.examples:
+        raise ValueError(
+            f"Dataset has no examples: {normalized_dataset.id=}, {normalized_dataset.version_id=}"
+        )
     # Add this to the params once supported in the UI
     repetitions = 1
     assert repetitions > 0, "Must run the experiment at least once."
@@ -179,7 +223,7 @@ def run_experiment(
     sync_client, async_client = _phoenix_clients()
     payload = {
-        "version_id": dataset.version_id,
+        "version_id": normalized_dataset.version_id,
         "name": experiment_name,
         "description": experiment_description,
         "metadata": experiment_metadata,
@@ -187,23 +231,23 @@ def run_experiment(
     }
     if not dry_run:
         experiment_response = sync_client.post(
-            f"/v1/datasets/{dataset.id}/experiments",
+            f"/v1/datasets/{normalized_dataset.id}/experiments",
             json=payload,
         )
         experiment_response.raise_for_status()
         exp_json = experiment_response.json()["data"]
         project_name = exp_json["project_name"]
         experiment = Experiment(
-            dataset_id=dataset.id,
-            dataset_version_id=dataset.version_id,
+            dataset_id=normalized_dataset.id,
+            dataset_version_id=normalized_dataset.version_id,
             repetitions=repetitions,
             id=exp_json["id"],
             project_name=project_name,
         )
     else:
         experiment = Experiment(
-            dataset_id=dataset.id,
-            dataset_version_id=dataset.version_id,
+            dataset_id=normalized_dataset.id,
+            dataset_version_id=normalized_dataset.version_id,
             repetitions=repetitions,
             id=DRY_RUN,
             project_name="",
@@ -216,18 +260,18 @@ def run_experiment(
     print("🧪 Experiment started.")
     if dry_run:
         examples = {
-            (ex := dataset[i]).id: ex
-            for i in pd.Series(range(len(dataset)))
-            .sample(min(len(dataset), int(dry_run)), random_state=42)
+            (ex := normalized_dataset[i]).id: ex
+            for i in pd.Series(range(len(normalized_dataset)))
+            .sample(min(len(normalized_dataset), int(dry_run)), random_state=42)
             .sort_values()
         }
         id_selection = "\n".join(examples)
         print(f"🌵️ This is a dry-run for these example IDs:\n{id_selection}")
-        dataset = replace(dataset, examples=examples)
+        normalized_dataset = replace(normalized_dataset, examples=examples)
     else:
-        dataset_experiments_url = get_dataset_experiments_url(dataset_id=dataset.id)
+        dataset_experiments_url = get_dataset_experiments_url(dataset_id=normalized_dataset.id)
         experiment_compare_url = get_experiment_url(
-            dataset_id=dataset.id,
+            dataset_id=normalized_dataset.id,
             experiment_id=experiment.id,
         )
         print(f"📺 View dataset experiments: {dataset_experiments_url}")
@@ -497,7 +541,7 @@ def run_experiment(
     test_cases = [
         TestCase(example=deepcopy(ex), repetition_number=rep)
-        for ex, rep in product(dataset.examples.values(), range(1, repetitions + 1))
+        for ex, rep in product(normalized_dataset.examples.values(), range(1, repetitions + 1))
     ]
     task_runs, _execution_details = executor.run(test_cases)
     print("✅ Task runs completed.")
@@ -513,7 +557,7 @@ def run_experiment(
             task_runs.append(ExperimentRun.from_dict(run))
         # Check if we got all expected runs
-        expected_runs = len(dataset.examples) * repetitions
+        expected_runs = len(normalized_dataset.examples) * repetitions
         actual_runs = len(task_runs)
         if actual_runs < expected_runs:
             print(
@@ -521,12 +565,14 @@ def run_experiment(
                 "completed successfully."
             )
-    params = ExperimentParameters(n_examples=len(dataset.examples), n_repetitions=repetitions)
+    params = ExperimentParameters(
+        n_examples=len(normalized_dataset.examples), n_repetitions=repetitions
+    )
     task_summary = TaskSummary.from_task_runs(params, task_runs)
     ran_experiment: RanExperiment = object.__new__(RanExperiment)
     ran_experiment.__init__(  # type: ignore[misc]
         params=params,
-        dataset=dataset,
+        dataset=normalized_dataset,
         runs={r.id: r for r in task_runs if r is not None},
         task_summary=task_summary,
         **_asdict(experiment),

phoenix/experiments/tracing.py CHANGED Viewed

@@ -8,7 +8,7 @@ from typing import Any, Optional
 from opentelemetry.sdk.resources import Resource
 from opentelemetry.sdk.trace import ReadableSpan
-from opentelemetry.trace import INVALID_TRACE_ID
+from opentelemetry.trace import INVALID_SPAN_ID
 from wrapt import apply_patch, resolve_path, wrap_function_wrapper
@@ -29,7 +29,7 @@ class SpanModifier:
         Args:
           span: ReadableSpan: the span to modify
         """
-        if (ctx := span._context) is None or ctx.span_id == INVALID_TRACE_ID:
+        if (ctx := span._context) is None or ctx.span_id == INVALID_SPAN_ID:
             return
         span._resource = span._resource.merge(self._resource)

phoenix/server/api/auth.py CHANGED Viewed

@@ -3,8 +3,10 @@ from typing import Any
 from strawberry import Info
 from strawberry.permission import BasePermission
+from typing_extensions import override
-from phoenix.server.api.exceptions import Unauthorized
+from phoenix.config import get_env_support_email
+from phoenix.server.api.exceptions import InsufficientStorage, Unauthorized
 from phoenix.server.bearer_auth import PhoenixUser
@@ -20,15 +22,35 @@ class IsNotReadOnly(Authorization):
         return not info.context.read_only
-class IsLocked(Authorization):
-    """
-    Disables mutations and subscriptions that create or update data but allows
-    queries and delete mutations.
+class IsLocked(BasePermission):
     """
+    Permission class that restricts data-modifying operations when insufficient storage.
+    When database storage capacity is exceeded, this permission blocks mutations and
+    subscriptions that create or update data, while allowing queries and delete mutations
+    to continue. This prevents database overflow while maintaining read access and the
+    ability to free up space through deletions.
-    message = "Operations that write or modify data are locked"
+    Raises:
+        InsufficientStorage: When storage capacity is exceeded and data operations
+            are temporarily disabled. The error includes guidance for resolution
+            and support contact information if configured.
+    """
+    @override
+    def on_unauthorized(self) -> None:
+        """Create user-friendly error message when storage operations are blocked."""
+        message = (
+            "Database operations are disabled due to insufficient storage. "
+            "Please delete old data or increase storage."
+        )
+        if support_email := get_env_support_email():
+            message += f" Need help? Contact us at {support_email}"
+        raise InsufficientStorage(message)
+    @override
     def has_permission(self, source: Any, info: Info, **kwargs: Any) -> bool:
+        """Check if database operations are allowed based on storage capacity and lock status."""
         return not (info.context.db.should_not_insert_or_update or info.context.locked)

phoenix/server/api/dataloaders/span_cost_summary_by_experiment.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from collections import defaultdict
 from sqlalchemy import func, select
-from sqlalchemy.sql.functions import coalesce
 from strawberry.dataloader import DataLoader
 from typing_extensions import TypeAlias
@@ -23,12 +22,12 @@ class SpanCostSummaryByExperimentDataLoader(DataLoader[Key, Result]):
         stmt = (
             select(
                 models.ExperimentRun.experiment_id,
-                coalesce(func.sum(models.SpanCost.prompt_cost), 0).label("prompt_cost"),
-                coalesce(func.sum(models.SpanCost.completion_cost), 0).label("completion_cost"),
-                coalesce(func.sum(models.SpanCost.total_cost), 0).label("total_cost"),
-                coalesce(func.sum(models.SpanCost.prompt_tokens), 0).label("prompt_tokens"),
-                coalesce(func.sum(models.SpanCost.completion_tokens), 0).label("completion_tokens"),
-                coalesce(func.sum(models.SpanCost.total_tokens), 0).label("total_tokens"),
+                func.sum(models.SpanCost.prompt_cost).label("prompt_cost"),
+                func.sum(models.SpanCost.completion_cost).label("completion_cost"),
+                func.sum(models.SpanCost.total_cost).label("total_cost"),
+                func.sum(models.SpanCost.prompt_tokens).label("prompt_tokens"),
+                func.sum(models.SpanCost.completion_tokens).label("completion_tokens"),
+                func.sum(models.SpanCost.total_tokens).label("total_tokens"),
             )
             .select_from(models.ExperimentRun)
             .join(models.Trace, models.ExperimentRun.trace_id == models.Trace.trace_id)

phoenix/server/api/exceptions.py CHANGED Viewed

@@ -27,6 +27,12 @@ class Unauthorized(CustomGraphQLError):
     """
+class InsufficientStorage(CustomGraphQLError):
+    """
+    An error raised when the database has insufficient storage to complete a request.
+    """
 class Conflict(CustomGraphQLError):
     """
     An error raised when a mutation cannot be completed due to a conflict with

phoenix/server/api/input_types/TimeBinConfig.py ADDED Viewed

@@ -0,0 +1,23 @@
+from enum import Enum
+import strawberry
+@strawberry.enum
+class TimeBinScale(Enum):
+    MINUTE = "minute"
+    HOUR = "hour"
+    DAY = "day"
+    WEEK = "week"
+    MONTH = "month"
+    YEAR = "year"
+@strawberry.input
+class TimeBinConfig:
+    scale: TimeBinScale = strawberry.field(
+        default=TimeBinScale.HOUR, description="The scale of time bins for aggregation."
+    )
+    utc_offset_minutes: int = strawberry.field(
+        default=0, description="Offset in minutes from UTC for local time binning."
+    )

phoenix/server/api/routers/oauth2.py CHANGED Viewed

@@ -169,7 +169,11 @@ async def create_tokens(
             error=f"OAuth2 IDP {idp_name} does not appear to support OpenID Connect.",
         )
     user_info = await oauth2_client.parse_id_token(token_data, nonce=stored_nonce)
-    user_info = _parse_user_info(user_info)
+    try:
+        user_info = _parse_user_info(user_info)
+    except MissingEmailScope as error:
+        return _redirect_to_login(request=request, error=str(error))
     try:
         async with request.app.state.db() as session:
             user = await _process_oauth2_user(
@@ -237,7 +241,12 @@ def _parse_user_info(user_info: dict[str, Any]) -> UserInfo:
     """
     assert isinstance(subject := user_info.get("sub"), (str, int))
     idp_user_id = str(subject)
-    assert isinstance(email := user_info.get("email"), str)
+    email = user_info.get("email")
+    if not isinstance(email, str):
+        raise MissingEmailScope(
+            "Please ensure your OIDC provider is configured to use the 'email' scope."
+        )
     assert isinstance(username := user_info.get("name"), str) or username is None
     assert (
         isinstance(profile_picture_url := user_info.get("picture"), str)
@@ -541,6 +550,14 @@ class NotInvited(Exception):
     pass
+class MissingEmailScope(Exception):
+    """
+    Raised when the OIDC provider does not return the email scope.
+    """
+    pass
 def _redirect_to_login(*, request: Request, error: str) -> RedirectResponse:
     """
     Creates a RedirectResponse to the login page to display an error message.

phoenix/server/api/types/CostBreakdown.py CHANGED Viewed

@@ -5,11 +5,8 @@ import strawberry
 @strawberry.type
 class CostBreakdown:
-    tokens: Optional[float] = None
+    tokens: Optional[float] = strawberry.field(
+        default=None,
+        description="Total number of tokens, including tokens for which no cost was computed.",
+    )
     cost: Optional[float] = None
-    @strawberry.field
-    def cost_per_token(self) -> Optional[float]:
-        if self.tokens and self.cost:
-            return self.cost / self.tokens
-        return None

arize-phoenix 11.6.2__py3-none-any.whl → 11.8.0__py3-none-any.whl

Potentially problematic release.

arize-phoenix 11.6.2py3-none-any.whl → 11.8.0py3-none-any.whl