arize-phoenix 11.6.2__py3-none-any.whl → 11.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (34) hide show
  1. {arize_phoenix-11.6.2.dist-info → arize_phoenix-11.8.0.dist-info}/METADATA +22 -10
  2. {arize_phoenix-11.6.2.dist-info → arize_phoenix-11.8.0.dist-info}/RECORD +34 -33
  3. phoenix/config.py +33 -0
  4. phoenix/datetime_utils.py +112 -1
  5. phoenix/db/helpers.py +156 -1
  6. phoenix/experiments/functions.py +66 -20
  7. phoenix/experiments/tracing.py +2 -2
  8. phoenix/server/api/auth.py +28 -6
  9. phoenix/server/api/dataloaders/span_cost_summary_by_experiment.py +6 -7
  10. phoenix/server/api/exceptions.py +6 -0
  11. phoenix/server/api/input_types/TimeBinConfig.py +23 -0
  12. phoenix/server/api/routers/oauth2.py +19 -2
  13. phoenix/server/api/types/CostBreakdown.py +4 -7
  14. phoenix/server/api/types/Project.py +341 -73
  15. phoenix/server/app.py +7 -3
  16. phoenix/server/authorization.py +27 -2
  17. phoenix/server/cost_tracking/cost_details_calculator.py +22 -16
  18. phoenix/server/daemons/span_cost_calculator.py +2 -8
  19. phoenix/server/email/sender.py +2 -1
  20. phoenix/server/email/templates/db_disk_usage_notification.html +3 -0
  21. phoenix/server/static/.vite/manifest.json +36 -36
  22. phoenix/server/static/assets/{components-BZHttBll.js → components-5M9nebi4.js} +363 -283
  23. phoenix/server/static/assets/{index-AbJf1oNh.js → index-OU2WTnGN.js} +27 -20
  24. phoenix/server/static/assets/{pages-5yQ-jl70.js → pages-DF8rqxJ4.js} +416 -396
  25. phoenix/server/static/assets/{vendor-DRWIRkSJ.js → vendor-Bl7CyFDw.js} +147 -147
  26. phoenix/server/static/assets/{vendor-arizeai-DUhQaeau.js → vendor-arizeai-B_viEUUA.js} +18 -480
  27. phoenix/server/static/assets/{vendor-codemirror-D_6Q6Auv.js → vendor-codemirror-vlcH1_iR.js} +1 -1
  28. phoenix/server/static/assets/{vendor-recharts-BNBwj7vz.js → vendor-recharts-C9cQu72o.js} +25 -25
  29. phoenix/server/static/assets/{vendor-shiki-k1qj_XjP.js → vendor-shiki-BsknB7bv.js} +1 -1
  30. phoenix/version.py +1 -1
  31. {arize_phoenix-11.6.2.dist-info → arize_phoenix-11.8.0.dist-info}/WHEEL +0 -0
  32. {arize_phoenix-11.6.2.dist-info → arize_phoenix-11.8.0.dist-info}/entry_points.txt +0 -0
  33. {arize_phoenix-11.6.2.dist-info → arize_phoenix-11.8.0.dist-info}/licenses/IP_NOTICE +0 -0
  34. {arize_phoenix-11.6.2.dist-info → arize_phoenix-11.8.0.dist-info}/licenses/LICENSE +0 -0
@@ -10,7 +10,7 @@ from copy import deepcopy
10
10
  from dataclasses import replace
11
11
  from datetime import datetime, timezone
12
12
  from itertools import product
13
- from typing import Any, Literal, Optional, Union, cast
13
+ from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast
14
14
  from urllib.parse import urljoin
15
15
 
16
16
  import httpx
@@ -65,6 +65,41 @@ from phoenix.trace.attributes import flatten
65
65
  from phoenix.utilities.client import VersionedAsyncClient, VersionedClient
66
66
  from phoenix.utilities.json import jsonify
67
67
 
68
+ if TYPE_CHECKING:
69
+ from phoenix.client.resources.datasets import Dataset as ClientDataset
70
+
71
+
72
+ def _convert_client_dataset(new_dataset: "ClientDataset") -> Dataset:
73
+ """
74
+ Converts Dataset objects from `phoenix.client` to Dataset objects compatible with experiments.
75
+ """
76
+ examples_dict: dict[str, Example] = {}
77
+ for example_data in new_dataset.examples:
78
+ legacy_example = Example(
79
+ id=example_data["id"],
80
+ input=example_data["input"],
81
+ output=example_data["output"],
82
+ metadata=example_data["metadata"],
83
+ updated_at=datetime.fromisoformat(example_data["updated_at"]),
84
+ )
85
+ examples_dict[legacy_example.id] = legacy_example
86
+
87
+ return Dataset(
88
+ id=new_dataset.id,
89
+ version_id=new_dataset.version_id,
90
+ examples=examples_dict,
91
+ )
92
+
93
+
94
+ def _is_new_client_dataset(dataset: Any) -> bool:
95
+ """Check if dataset is from new client (has list examples)."""
96
+ try:
97
+ from phoenix.client.resources.datasets import Dataset as _ClientDataset
98
+
99
+ return isinstance(dataset, _ClientDataset)
100
+ except ImportError:
101
+ return False
102
+
68
103
 
69
104
  def _phoenix_clients() -> tuple[httpx.Client, httpx.AsyncClient]:
70
105
  return VersionedClient(
@@ -85,7 +120,7 @@ RateLimitErrors: TypeAlias = Union[type[BaseException], Sequence[type[BaseExcept
85
120
 
86
121
 
87
122
  def run_experiment(
88
- dataset: Dataset,
123
+ dataset: Union[Dataset, Any], # Accept both legacy and new client datasets
89
124
  task: ExperimentTask,
90
125
  evaluators: Optional[Evaluators] = None,
91
126
  *,
@@ -166,11 +201,20 @@ def run_experiment(
166
201
  RanExperiment: The results of the experiment and evaluation. Additional evaluations can be
167
202
  added to the experiment using the `evaluate_experiment` function.
168
203
  """
204
+ # Auto-convert client Dataset objects to legacy format
205
+ normalized_dataset: Dataset
206
+ if _is_new_client_dataset(dataset):
207
+ normalized_dataset = _convert_client_dataset(cast("ClientDataset", dataset))
208
+ else:
209
+ normalized_dataset = dataset
210
+
169
211
  task_signature = inspect.signature(task)
170
212
  _validate_task_signature(task_signature)
171
213
 
172
- if not dataset.examples:
173
- raise ValueError(f"Dataset has no examples: {dataset.id=}, {dataset.version_id=}")
214
+ if not normalized_dataset.examples:
215
+ raise ValueError(
216
+ f"Dataset has no examples: {normalized_dataset.id=}, {normalized_dataset.version_id=}"
217
+ )
174
218
  # Add this to the params once supported in the UI
175
219
  repetitions = 1
176
220
  assert repetitions > 0, "Must run the experiment at least once."
@@ -179,7 +223,7 @@ def run_experiment(
179
223
  sync_client, async_client = _phoenix_clients()
180
224
 
181
225
  payload = {
182
- "version_id": dataset.version_id,
226
+ "version_id": normalized_dataset.version_id,
183
227
  "name": experiment_name,
184
228
  "description": experiment_description,
185
229
  "metadata": experiment_metadata,
@@ -187,23 +231,23 @@ def run_experiment(
187
231
  }
188
232
  if not dry_run:
189
233
  experiment_response = sync_client.post(
190
- f"/v1/datasets/{dataset.id}/experiments",
234
+ f"/v1/datasets/{normalized_dataset.id}/experiments",
191
235
  json=payload,
192
236
  )
193
237
  experiment_response.raise_for_status()
194
238
  exp_json = experiment_response.json()["data"]
195
239
  project_name = exp_json["project_name"]
196
240
  experiment = Experiment(
197
- dataset_id=dataset.id,
198
- dataset_version_id=dataset.version_id,
241
+ dataset_id=normalized_dataset.id,
242
+ dataset_version_id=normalized_dataset.version_id,
199
243
  repetitions=repetitions,
200
244
  id=exp_json["id"],
201
245
  project_name=project_name,
202
246
  )
203
247
  else:
204
248
  experiment = Experiment(
205
- dataset_id=dataset.id,
206
- dataset_version_id=dataset.version_id,
249
+ dataset_id=normalized_dataset.id,
250
+ dataset_version_id=normalized_dataset.version_id,
207
251
  repetitions=repetitions,
208
252
  id=DRY_RUN,
209
253
  project_name="",
@@ -216,18 +260,18 @@ def run_experiment(
216
260
  print("🧪 Experiment started.")
217
261
  if dry_run:
218
262
  examples = {
219
- (ex := dataset[i]).id: ex
220
- for i in pd.Series(range(len(dataset)))
221
- .sample(min(len(dataset), int(dry_run)), random_state=42)
263
+ (ex := normalized_dataset[i]).id: ex
264
+ for i in pd.Series(range(len(normalized_dataset)))
265
+ .sample(min(len(normalized_dataset), int(dry_run)), random_state=42)
222
266
  .sort_values()
223
267
  }
224
268
  id_selection = "\n".join(examples)
225
269
  print(f"🌵️ This is a dry-run for these example IDs:\n{id_selection}")
226
- dataset = replace(dataset, examples=examples)
270
+ normalized_dataset = replace(normalized_dataset, examples=examples)
227
271
  else:
228
- dataset_experiments_url = get_dataset_experiments_url(dataset_id=dataset.id)
272
+ dataset_experiments_url = get_dataset_experiments_url(dataset_id=normalized_dataset.id)
229
273
  experiment_compare_url = get_experiment_url(
230
- dataset_id=dataset.id,
274
+ dataset_id=normalized_dataset.id,
231
275
  experiment_id=experiment.id,
232
276
  )
233
277
  print(f"📺 View dataset experiments: {dataset_experiments_url}")
@@ -497,7 +541,7 @@ def run_experiment(
497
541
 
498
542
  test_cases = [
499
543
  TestCase(example=deepcopy(ex), repetition_number=rep)
500
- for ex, rep in product(dataset.examples.values(), range(1, repetitions + 1))
544
+ for ex, rep in product(normalized_dataset.examples.values(), range(1, repetitions + 1))
501
545
  ]
502
546
  task_runs, _execution_details = executor.run(test_cases)
503
547
  print("✅ Task runs completed.")
@@ -513,7 +557,7 @@ def run_experiment(
513
557
  task_runs.append(ExperimentRun.from_dict(run))
514
558
 
515
559
  # Check if we got all expected runs
516
- expected_runs = len(dataset.examples) * repetitions
560
+ expected_runs = len(normalized_dataset.examples) * repetitions
517
561
  actual_runs = len(task_runs)
518
562
  if actual_runs < expected_runs:
519
563
  print(
@@ -521,12 +565,14 @@ def run_experiment(
521
565
  "completed successfully."
522
566
  )
523
567
 
524
- params = ExperimentParameters(n_examples=len(dataset.examples), n_repetitions=repetitions)
568
+ params = ExperimentParameters(
569
+ n_examples=len(normalized_dataset.examples), n_repetitions=repetitions
570
+ )
525
571
  task_summary = TaskSummary.from_task_runs(params, task_runs)
526
572
  ran_experiment: RanExperiment = object.__new__(RanExperiment)
527
573
  ran_experiment.__init__( # type: ignore[misc]
528
574
  params=params,
529
- dataset=dataset,
575
+ dataset=normalized_dataset,
530
576
  runs={r.id: r for r in task_runs if r is not None},
531
577
  task_summary=task_summary,
532
578
  **_asdict(experiment),
@@ -8,7 +8,7 @@ from typing import Any, Optional
8
8
 
9
9
  from opentelemetry.sdk.resources import Resource
10
10
  from opentelemetry.sdk.trace import ReadableSpan
11
- from opentelemetry.trace import INVALID_TRACE_ID
11
+ from opentelemetry.trace import INVALID_SPAN_ID
12
12
  from wrapt import apply_patch, resolve_path, wrap_function_wrapper
13
13
 
14
14
 
@@ -29,7 +29,7 @@ class SpanModifier:
29
29
  Args:
30
30
  span: ReadableSpan: the span to modify
31
31
  """
32
- if (ctx := span._context) is None or ctx.span_id == INVALID_TRACE_ID:
32
+ if (ctx := span._context) is None or ctx.span_id == INVALID_SPAN_ID:
33
33
  return
34
34
  span._resource = span._resource.merge(self._resource)
35
35
 
@@ -3,8 +3,10 @@ from typing import Any
3
3
 
4
4
  from strawberry import Info
5
5
  from strawberry.permission import BasePermission
6
+ from typing_extensions import override
6
7
 
7
- from phoenix.server.api.exceptions import Unauthorized
8
+ from phoenix.config import get_env_support_email
9
+ from phoenix.server.api.exceptions import InsufficientStorage, Unauthorized
8
10
  from phoenix.server.bearer_auth import PhoenixUser
9
11
 
10
12
 
@@ -20,15 +22,35 @@ class IsNotReadOnly(Authorization):
20
22
  return not info.context.read_only
21
23
 
22
24
 
23
- class IsLocked(Authorization):
24
- """
25
- Disables mutations and subscriptions that create or update data but allows
26
- queries and delete mutations.
25
+ class IsLocked(BasePermission):
27
26
  """
27
+ Permission class that restricts data-modifying operations when insufficient storage.
28
+
29
+ When database storage capacity is exceeded, this permission blocks mutations and
30
+ subscriptions that create or update data, while allowing queries and delete mutations
31
+ to continue. This prevents database overflow while maintaining read access and the
32
+ ability to free up space through deletions.
28
33
 
29
- message = "Operations that write or modify data are locked"
34
+ Raises:
35
+ InsufficientStorage: When storage capacity is exceeded and data operations
36
+ are temporarily disabled. The error includes guidance for resolution
37
+ and support contact information if configured.
38
+ """
30
39
 
40
+ @override
41
+ def on_unauthorized(self) -> None:
42
+ """Create user-friendly error message when storage operations are blocked."""
43
+ message = (
44
+ "Database operations are disabled due to insufficient storage. "
45
+ "Please delete old data or increase storage."
46
+ )
47
+ if support_email := get_env_support_email():
48
+ message += f" Need help? Contact us at {support_email}"
49
+ raise InsufficientStorage(message)
50
+
51
+ @override
31
52
  def has_permission(self, source: Any, info: Info, **kwargs: Any) -> bool:
53
+ """Check if database operations are allowed based on storage capacity and lock status."""
32
54
  return not (info.context.db.should_not_insert_or_update or info.context.locked)
33
55
 
34
56
 
@@ -1,7 +1,6 @@
1
1
  from collections import defaultdict
2
2
 
3
3
  from sqlalchemy import func, select
4
- from sqlalchemy.sql.functions import coalesce
5
4
  from strawberry.dataloader import DataLoader
6
5
  from typing_extensions import TypeAlias
7
6
 
@@ -23,12 +22,12 @@ class SpanCostSummaryByExperimentDataLoader(DataLoader[Key, Result]):
23
22
  stmt = (
24
23
  select(
25
24
  models.ExperimentRun.experiment_id,
26
- coalesce(func.sum(models.SpanCost.prompt_cost), 0).label("prompt_cost"),
27
- coalesce(func.sum(models.SpanCost.completion_cost), 0).label("completion_cost"),
28
- coalesce(func.sum(models.SpanCost.total_cost), 0).label("total_cost"),
29
- coalesce(func.sum(models.SpanCost.prompt_tokens), 0).label("prompt_tokens"),
30
- coalesce(func.sum(models.SpanCost.completion_tokens), 0).label("completion_tokens"),
31
- coalesce(func.sum(models.SpanCost.total_tokens), 0).label("total_tokens"),
25
+ func.sum(models.SpanCost.prompt_cost).label("prompt_cost"),
26
+ func.sum(models.SpanCost.completion_cost).label("completion_cost"),
27
+ func.sum(models.SpanCost.total_cost).label("total_cost"),
28
+ func.sum(models.SpanCost.prompt_tokens).label("prompt_tokens"),
29
+ func.sum(models.SpanCost.completion_tokens).label("completion_tokens"),
30
+ func.sum(models.SpanCost.total_tokens).label("total_tokens"),
32
31
  )
33
32
  .select_from(models.ExperimentRun)
34
33
  .join(models.Trace, models.ExperimentRun.trace_id == models.Trace.trace_id)
@@ -27,6 +27,12 @@ class Unauthorized(CustomGraphQLError):
27
27
  """
28
28
 
29
29
 
30
+ class InsufficientStorage(CustomGraphQLError):
31
+ """
32
+ An error raised when the database has insufficient storage to complete a request.
33
+ """
34
+
35
+
30
36
  class Conflict(CustomGraphQLError):
31
37
  """
32
38
  An error raised when a mutation cannot be completed due to a conflict with
@@ -0,0 +1,23 @@
1
+ from enum import Enum
2
+
3
+ import strawberry
4
+
5
+
6
+ @strawberry.enum
7
+ class TimeBinScale(Enum):
8
+ MINUTE = "minute"
9
+ HOUR = "hour"
10
+ DAY = "day"
11
+ WEEK = "week"
12
+ MONTH = "month"
13
+ YEAR = "year"
14
+
15
+
16
+ @strawberry.input
17
+ class TimeBinConfig:
18
+ scale: TimeBinScale = strawberry.field(
19
+ default=TimeBinScale.HOUR, description="The scale of time bins for aggregation."
20
+ )
21
+ utc_offset_minutes: int = strawberry.field(
22
+ default=0, description="Offset in minutes from UTC for local time binning."
23
+ )
@@ -169,7 +169,11 @@ async def create_tokens(
169
169
  error=f"OAuth2 IDP {idp_name} does not appear to support OpenID Connect.",
170
170
  )
171
171
  user_info = await oauth2_client.parse_id_token(token_data, nonce=stored_nonce)
172
- user_info = _parse_user_info(user_info)
172
+ try:
173
+ user_info = _parse_user_info(user_info)
174
+ except MissingEmailScope as error:
175
+ return _redirect_to_login(request=request, error=str(error))
176
+
173
177
  try:
174
178
  async with request.app.state.db() as session:
175
179
  user = await _process_oauth2_user(
@@ -237,7 +241,12 @@ def _parse_user_info(user_info: dict[str, Any]) -> UserInfo:
237
241
  """
238
242
  assert isinstance(subject := user_info.get("sub"), (str, int))
239
243
  idp_user_id = str(subject)
240
- assert isinstance(email := user_info.get("email"), str)
244
+ email = user_info.get("email")
245
+ if not isinstance(email, str):
246
+ raise MissingEmailScope(
247
+ "Please ensure your OIDC provider is configured to use the 'email' scope."
248
+ )
249
+
241
250
  assert isinstance(username := user_info.get("name"), str) or username is None
242
251
  assert (
243
252
  isinstance(profile_picture_url := user_info.get("picture"), str)
@@ -541,6 +550,14 @@ class NotInvited(Exception):
541
550
  pass
542
551
 
543
552
 
553
+ class MissingEmailScope(Exception):
554
+ """
555
+ Raised when the OIDC provider does not return the email scope.
556
+ """
557
+
558
+ pass
559
+
560
+
544
561
  def _redirect_to_login(*, request: Request, error: str) -> RedirectResponse:
545
562
  """
546
563
  Creates a RedirectResponse to the login page to display an error message.
@@ -5,11 +5,8 @@ import strawberry
5
5
 
6
6
  @strawberry.type
7
7
  class CostBreakdown:
8
- tokens: Optional[float] = None
8
+ tokens: Optional[float] = strawberry.field(
9
+ default=None,
10
+ description="Total number of tokens, including tokens for which no cost was computed.",
11
+ )
9
12
  cost: Optional[float] = None
10
-
11
- @strawberry.field
12
- def cost_per_token(self) -> Optional[float]:
13
- if self.tokens and self.cost:
14
- return self.cost / self.tokens
15
- return None