arize-phoenix 11.6.2__py3-none-any.whl → 11.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-11.6.2.dist-info → arize_phoenix-11.8.0.dist-info}/METADATA +22 -10
- {arize_phoenix-11.6.2.dist-info → arize_phoenix-11.8.0.dist-info}/RECORD +34 -33
- phoenix/config.py +33 -0
- phoenix/datetime_utils.py +112 -1
- phoenix/db/helpers.py +156 -1
- phoenix/experiments/functions.py +66 -20
- phoenix/experiments/tracing.py +2 -2
- phoenix/server/api/auth.py +28 -6
- phoenix/server/api/dataloaders/span_cost_summary_by_experiment.py +6 -7
- phoenix/server/api/exceptions.py +6 -0
- phoenix/server/api/input_types/TimeBinConfig.py +23 -0
- phoenix/server/api/routers/oauth2.py +19 -2
- phoenix/server/api/types/CostBreakdown.py +4 -7
- phoenix/server/api/types/Project.py +341 -73
- phoenix/server/app.py +7 -3
- phoenix/server/authorization.py +27 -2
- phoenix/server/cost_tracking/cost_details_calculator.py +22 -16
- phoenix/server/daemons/span_cost_calculator.py +2 -8
- phoenix/server/email/sender.py +2 -1
- phoenix/server/email/templates/db_disk_usage_notification.html +3 -0
- phoenix/server/static/.vite/manifest.json +36 -36
- phoenix/server/static/assets/{components-BZHttBll.js → components-5M9nebi4.js} +363 -283
- phoenix/server/static/assets/{index-AbJf1oNh.js → index-OU2WTnGN.js} +27 -20
- phoenix/server/static/assets/{pages-5yQ-jl70.js → pages-DF8rqxJ4.js} +416 -396
- phoenix/server/static/assets/{vendor-DRWIRkSJ.js → vendor-Bl7CyFDw.js} +147 -147
- phoenix/server/static/assets/{vendor-arizeai-DUhQaeau.js → vendor-arizeai-B_viEUUA.js} +18 -480
- phoenix/server/static/assets/{vendor-codemirror-D_6Q6Auv.js → vendor-codemirror-vlcH1_iR.js} +1 -1
- phoenix/server/static/assets/{vendor-recharts-BNBwj7vz.js → vendor-recharts-C9cQu72o.js} +25 -25
- phoenix/server/static/assets/{vendor-shiki-k1qj_XjP.js → vendor-shiki-BsknB7bv.js} +1 -1
- phoenix/version.py +1 -1
- {arize_phoenix-11.6.2.dist-info → arize_phoenix-11.8.0.dist-info}/WHEEL +0 -0
- {arize_phoenix-11.6.2.dist-info → arize_phoenix-11.8.0.dist-info}/entry_points.txt +0 -0
- {arize_phoenix-11.6.2.dist-info → arize_phoenix-11.8.0.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-11.6.2.dist-info → arize_phoenix-11.8.0.dist-info}/licenses/LICENSE +0 -0
phoenix/experiments/functions.py
CHANGED
|
@@ -10,7 +10,7 @@ from copy import deepcopy
|
|
|
10
10
|
from dataclasses import replace
|
|
11
11
|
from datetime import datetime, timezone
|
|
12
12
|
from itertools import product
|
|
13
|
-
from typing import Any, Literal, Optional, Union, cast
|
|
13
|
+
from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast
|
|
14
14
|
from urllib.parse import urljoin
|
|
15
15
|
|
|
16
16
|
import httpx
|
|
@@ -65,6 +65,41 @@ from phoenix.trace.attributes import flatten
|
|
|
65
65
|
from phoenix.utilities.client import VersionedAsyncClient, VersionedClient
|
|
66
66
|
from phoenix.utilities.json import jsonify
|
|
67
67
|
|
|
68
|
+
if TYPE_CHECKING:
|
|
69
|
+
from phoenix.client.resources.datasets import Dataset as ClientDataset
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _convert_client_dataset(new_dataset: "ClientDataset") -> Dataset:
|
|
73
|
+
"""
|
|
74
|
+
Converts Dataset objects from `phoenix.client` to Dataset objects compatible with experiments.
|
|
75
|
+
"""
|
|
76
|
+
examples_dict: dict[str, Example] = {}
|
|
77
|
+
for example_data in new_dataset.examples:
|
|
78
|
+
legacy_example = Example(
|
|
79
|
+
id=example_data["id"],
|
|
80
|
+
input=example_data["input"],
|
|
81
|
+
output=example_data["output"],
|
|
82
|
+
metadata=example_data["metadata"],
|
|
83
|
+
updated_at=datetime.fromisoformat(example_data["updated_at"]),
|
|
84
|
+
)
|
|
85
|
+
examples_dict[legacy_example.id] = legacy_example
|
|
86
|
+
|
|
87
|
+
return Dataset(
|
|
88
|
+
id=new_dataset.id,
|
|
89
|
+
version_id=new_dataset.version_id,
|
|
90
|
+
examples=examples_dict,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _is_new_client_dataset(dataset: Any) -> bool:
|
|
95
|
+
"""Check if dataset is from new client (has list examples)."""
|
|
96
|
+
try:
|
|
97
|
+
from phoenix.client.resources.datasets import Dataset as _ClientDataset
|
|
98
|
+
|
|
99
|
+
return isinstance(dataset, _ClientDataset)
|
|
100
|
+
except ImportError:
|
|
101
|
+
return False
|
|
102
|
+
|
|
68
103
|
|
|
69
104
|
def _phoenix_clients() -> tuple[httpx.Client, httpx.AsyncClient]:
|
|
70
105
|
return VersionedClient(
|
|
@@ -85,7 +120,7 @@ RateLimitErrors: TypeAlias = Union[type[BaseException], Sequence[type[BaseExcept
|
|
|
85
120
|
|
|
86
121
|
|
|
87
122
|
def run_experiment(
|
|
88
|
-
dataset: Dataset,
|
|
123
|
+
dataset: Union[Dataset, Any], # Accept both legacy and new client datasets
|
|
89
124
|
task: ExperimentTask,
|
|
90
125
|
evaluators: Optional[Evaluators] = None,
|
|
91
126
|
*,
|
|
@@ -166,11 +201,20 @@ def run_experiment(
|
|
|
166
201
|
RanExperiment: The results of the experiment and evaluation. Additional evaluations can be
|
|
167
202
|
added to the experiment using the `evaluate_experiment` function.
|
|
168
203
|
"""
|
|
204
|
+
# Auto-convert client Dataset objects to legacy format
|
|
205
|
+
normalized_dataset: Dataset
|
|
206
|
+
if _is_new_client_dataset(dataset):
|
|
207
|
+
normalized_dataset = _convert_client_dataset(cast("ClientDataset", dataset))
|
|
208
|
+
else:
|
|
209
|
+
normalized_dataset = dataset
|
|
210
|
+
|
|
169
211
|
task_signature = inspect.signature(task)
|
|
170
212
|
_validate_task_signature(task_signature)
|
|
171
213
|
|
|
172
|
-
if not
|
|
173
|
-
raise ValueError(
|
|
214
|
+
if not normalized_dataset.examples:
|
|
215
|
+
raise ValueError(
|
|
216
|
+
f"Dataset has no examples: {normalized_dataset.id=}, {normalized_dataset.version_id=}"
|
|
217
|
+
)
|
|
174
218
|
# Add this to the params once supported in the UI
|
|
175
219
|
repetitions = 1
|
|
176
220
|
assert repetitions > 0, "Must run the experiment at least once."
|
|
@@ -179,7 +223,7 @@ def run_experiment(
|
|
|
179
223
|
sync_client, async_client = _phoenix_clients()
|
|
180
224
|
|
|
181
225
|
payload = {
|
|
182
|
-
"version_id":
|
|
226
|
+
"version_id": normalized_dataset.version_id,
|
|
183
227
|
"name": experiment_name,
|
|
184
228
|
"description": experiment_description,
|
|
185
229
|
"metadata": experiment_metadata,
|
|
@@ -187,23 +231,23 @@ def run_experiment(
|
|
|
187
231
|
}
|
|
188
232
|
if not dry_run:
|
|
189
233
|
experiment_response = sync_client.post(
|
|
190
|
-
f"/v1/datasets/{
|
|
234
|
+
f"/v1/datasets/{normalized_dataset.id}/experiments",
|
|
191
235
|
json=payload,
|
|
192
236
|
)
|
|
193
237
|
experiment_response.raise_for_status()
|
|
194
238
|
exp_json = experiment_response.json()["data"]
|
|
195
239
|
project_name = exp_json["project_name"]
|
|
196
240
|
experiment = Experiment(
|
|
197
|
-
dataset_id=
|
|
198
|
-
dataset_version_id=
|
|
241
|
+
dataset_id=normalized_dataset.id,
|
|
242
|
+
dataset_version_id=normalized_dataset.version_id,
|
|
199
243
|
repetitions=repetitions,
|
|
200
244
|
id=exp_json["id"],
|
|
201
245
|
project_name=project_name,
|
|
202
246
|
)
|
|
203
247
|
else:
|
|
204
248
|
experiment = Experiment(
|
|
205
|
-
dataset_id=
|
|
206
|
-
dataset_version_id=
|
|
249
|
+
dataset_id=normalized_dataset.id,
|
|
250
|
+
dataset_version_id=normalized_dataset.version_id,
|
|
207
251
|
repetitions=repetitions,
|
|
208
252
|
id=DRY_RUN,
|
|
209
253
|
project_name="",
|
|
@@ -216,18 +260,18 @@ def run_experiment(
|
|
|
216
260
|
print("🧪 Experiment started.")
|
|
217
261
|
if dry_run:
|
|
218
262
|
examples = {
|
|
219
|
-
(ex :=
|
|
220
|
-
for i in pd.Series(range(len(
|
|
221
|
-
.sample(min(len(
|
|
263
|
+
(ex := normalized_dataset[i]).id: ex
|
|
264
|
+
for i in pd.Series(range(len(normalized_dataset)))
|
|
265
|
+
.sample(min(len(normalized_dataset), int(dry_run)), random_state=42)
|
|
222
266
|
.sort_values()
|
|
223
267
|
}
|
|
224
268
|
id_selection = "\n".join(examples)
|
|
225
269
|
print(f"🌵️ This is a dry-run for these example IDs:\n{id_selection}")
|
|
226
|
-
|
|
270
|
+
normalized_dataset = replace(normalized_dataset, examples=examples)
|
|
227
271
|
else:
|
|
228
|
-
dataset_experiments_url = get_dataset_experiments_url(dataset_id=
|
|
272
|
+
dataset_experiments_url = get_dataset_experiments_url(dataset_id=normalized_dataset.id)
|
|
229
273
|
experiment_compare_url = get_experiment_url(
|
|
230
|
-
dataset_id=
|
|
274
|
+
dataset_id=normalized_dataset.id,
|
|
231
275
|
experiment_id=experiment.id,
|
|
232
276
|
)
|
|
233
277
|
print(f"📺 View dataset experiments: {dataset_experiments_url}")
|
|
@@ -497,7 +541,7 @@ def run_experiment(
|
|
|
497
541
|
|
|
498
542
|
test_cases = [
|
|
499
543
|
TestCase(example=deepcopy(ex), repetition_number=rep)
|
|
500
|
-
for ex, rep in product(
|
|
544
|
+
for ex, rep in product(normalized_dataset.examples.values(), range(1, repetitions + 1))
|
|
501
545
|
]
|
|
502
546
|
task_runs, _execution_details = executor.run(test_cases)
|
|
503
547
|
print("✅ Task runs completed.")
|
|
@@ -513,7 +557,7 @@ def run_experiment(
|
|
|
513
557
|
task_runs.append(ExperimentRun.from_dict(run))
|
|
514
558
|
|
|
515
559
|
# Check if we got all expected runs
|
|
516
|
-
expected_runs = len(
|
|
560
|
+
expected_runs = len(normalized_dataset.examples) * repetitions
|
|
517
561
|
actual_runs = len(task_runs)
|
|
518
562
|
if actual_runs < expected_runs:
|
|
519
563
|
print(
|
|
@@ -521,12 +565,14 @@ def run_experiment(
|
|
|
521
565
|
"completed successfully."
|
|
522
566
|
)
|
|
523
567
|
|
|
524
|
-
params = ExperimentParameters(
|
|
568
|
+
params = ExperimentParameters(
|
|
569
|
+
n_examples=len(normalized_dataset.examples), n_repetitions=repetitions
|
|
570
|
+
)
|
|
525
571
|
task_summary = TaskSummary.from_task_runs(params, task_runs)
|
|
526
572
|
ran_experiment: RanExperiment = object.__new__(RanExperiment)
|
|
527
573
|
ran_experiment.__init__( # type: ignore[misc]
|
|
528
574
|
params=params,
|
|
529
|
-
dataset=
|
|
575
|
+
dataset=normalized_dataset,
|
|
530
576
|
runs={r.id: r for r in task_runs if r is not None},
|
|
531
577
|
task_summary=task_summary,
|
|
532
578
|
**_asdict(experiment),
|
phoenix/experiments/tracing.py
CHANGED
|
@@ -8,7 +8,7 @@ from typing import Any, Optional
|
|
|
8
8
|
|
|
9
9
|
from opentelemetry.sdk.resources import Resource
|
|
10
10
|
from opentelemetry.sdk.trace import ReadableSpan
|
|
11
|
-
from opentelemetry.trace import
|
|
11
|
+
from opentelemetry.trace import INVALID_SPAN_ID
|
|
12
12
|
from wrapt import apply_patch, resolve_path, wrap_function_wrapper
|
|
13
13
|
|
|
14
14
|
|
|
@@ -29,7 +29,7 @@ class SpanModifier:
|
|
|
29
29
|
Args:
|
|
30
30
|
span: ReadableSpan: the span to modify
|
|
31
31
|
"""
|
|
32
|
-
if (ctx := span._context) is None or ctx.span_id ==
|
|
32
|
+
if (ctx := span._context) is None or ctx.span_id == INVALID_SPAN_ID:
|
|
33
33
|
return
|
|
34
34
|
span._resource = span._resource.merge(self._resource)
|
|
35
35
|
|
phoenix/server/api/auth.py
CHANGED
|
@@ -3,8 +3,10 @@ from typing import Any
|
|
|
3
3
|
|
|
4
4
|
from strawberry import Info
|
|
5
5
|
from strawberry.permission import BasePermission
|
|
6
|
+
from typing_extensions import override
|
|
6
7
|
|
|
7
|
-
from phoenix.
|
|
8
|
+
from phoenix.config import get_env_support_email
|
|
9
|
+
from phoenix.server.api.exceptions import InsufficientStorage, Unauthorized
|
|
8
10
|
from phoenix.server.bearer_auth import PhoenixUser
|
|
9
11
|
|
|
10
12
|
|
|
@@ -20,15 +22,35 @@ class IsNotReadOnly(Authorization):
|
|
|
20
22
|
return not info.context.read_only
|
|
21
23
|
|
|
22
24
|
|
|
23
|
-
class IsLocked(
|
|
24
|
-
"""
|
|
25
|
-
Disables mutations and subscriptions that create or update data but allows
|
|
26
|
-
queries and delete mutations.
|
|
25
|
+
class IsLocked(BasePermission):
|
|
27
26
|
"""
|
|
27
|
+
Permission class that restricts data-modifying operations when insufficient storage.
|
|
28
|
+
|
|
29
|
+
When database storage capacity is exceeded, this permission blocks mutations and
|
|
30
|
+
subscriptions that create or update data, while allowing queries and delete mutations
|
|
31
|
+
to continue. This prevents database overflow while maintaining read access and the
|
|
32
|
+
ability to free up space through deletions.
|
|
28
33
|
|
|
29
|
-
|
|
34
|
+
Raises:
|
|
35
|
+
InsufficientStorage: When storage capacity is exceeded and data operations
|
|
36
|
+
are temporarily disabled. The error includes guidance for resolution
|
|
37
|
+
and support contact information if configured.
|
|
38
|
+
"""
|
|
30
39
|
|
|
40
|
+
@override
|
|
41
|
+
def on_unauthorized(self) -> None:
|
|
42
|
+
"""Create user-friendly error message when storage operations are blocked."""
|
|
43
|
+
message = (
|
|
44
|
+
"Database operations are disabled due to insufficient storage. "
|
|
45
|
+
"Please delete old data or increase storage."
|
|
46
|
+
)
|
|
47
|
+
if support_email := get_env_support_email():
|
|
48
|
+
message += f" Need help? Contact us at {support_email}"
|
|
49
|
+
raise InsufficientStorage(message)
|
|
50
|
+
|
|
51
|
+
@override
|
|
31
52
|
def has_permission(self, source: Any, info: Info, **kwargs: Any) -> bool:
|
|
53
|
+
"""Check if database operations are allowed based on storage capacity and lock status."""
|
|
32
54
|
return not (info.context.db.should_not_insert_or_update or info.context.locked)
|
|
33
55
|
|
|
34
56
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from collections import defaultdict
|
|
2
2
|
|
|
3
3
|
from sqlalchemy import func, select
|
|
4
|
-
from sqlalchemy.sql.functions import coalesce
|
|
5
4
|
from strawberry.dataloader import DataLoader
|
|
6
5
|
from typing_extensions import TypeAlias
|
|
7
6
|
|
|
@@ -23,12 +22,12 @@ class SpanCostSummaryByExperimentDataLoader(DataLoader[Key, Result]):
|
|
|
23
22
|
stmt = (
|
|
24
23
|
select(
|
|
25
24
|
models.ExperimentRun.experiment_id,
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
25
|
+
func.sum(models.SpanCost.prompt_cost).label("prompt_cost"),
|
|
26
|
+
func.sum(models.SpanCost.completion_cost).label("completion_cost"),
|
|
27
|
+
func.sum(models.SpanCost.total_cost).label("total_cost"),
|
|
28
|
+
func.sum(models.SpanCost.prompt_tokens).label("prompt_tokens"),
|
|
29
|
+
func.sum(models.SpanCost.completion_tokens).label("completion_tokens"),
|
|
30
|
+
func.sum(models.SpanCost.total_tokens).label("total_tokens"),
|
|
32
31
|
)
|
|
33
32
|
.select_from(models.ExperimentRun)
|
|
34
33
|
.join(models.Trace, models.ExperimentRun.trace_id == models.Trace.trace_id)
|
phoenix/server/api/exceptions.py
CHANGED
|
@@ -27,6 +27,12 @@ class Unauthorized(CustomGraphQLError):
|
|
|
27
27
|
"""
|
|
28
28
|
|
|
29
29
|
|
|
30
|
+
class InsufficientStorage(CustomGraphQLError):
|
|
31
|
+
"""
|
|
32
|
+
An error raised when the database has insufficient storage to complete a request.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
|
|
30
36
|
class Conflict(CustomGraphQLError):
|
|
31
37
|
"""
|
|
32
38
|
An error raised when a mutation cannot be completed due to a conflict with
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
|
|
3
|
+
import strawberry
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@strawberry.enum
|
|
7
|
+
class TimeBinScale(Enum):
|
|
8
|
+
MINUTE = "minute"
|
|
9
|
+
HOUR = "hour"
|
|
10
|
+
DAY = "day"
|
|
11
|
+
WEEK = "week"
|
|
12
|
+
MONTH = "month"
|
|
13
|
+
YEAR = "year"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@strawberry.input
|
|
17
|
+
class TimeBinConfig:
|
|
18
|
+
scale: TimeBinScale = strawberry.field(
|
|
19
|
+
default=TimeBinScale.HOUR, description="The scale of time bins for aggregation."
|
|
20
|
+
)
|
|
21
|
+
utc_offset_minutes: int = strawberry.field(
|
|
22
|
+
default=0, description="Offset in minutes from UTC for local time binning."
|
|
23
|
+
)
|
|
@@ -169,7 +169,11 @@ async def create_tokens(
|
|
|
169
169
|
error=f"OAuth2 IDP {idp_name} does not appear to support OpenID Connect.",
|
|
170
170
|
)
|
|
171
171
|
user_info = await oauth2_client.parse_id_token(token_data, nonce=stored_nonce)
|
|
172
|
-
|
|
172
|
+
try:
|
|
173
|
+
user_info = _parse_user_info(user_info)
|
|
174
|
+
except MissingEmailScope as error:
|
|
175
|
+
return _redirect_to_login(request=request, error=str(error))
|
|
176
|
+
|
|
173
177
|
try:
|
|
174
178
|
async with request.app.state.db() as session:
|
|
175
179
|
user = await _process_oauth2_user(
|
|
@@ -237,7 +241,12 @@ def _parse_user_info(user_info: dict[str, Any]) -> UserInfo:
|
|
|
237
241
|
"""
|
|
238
242
|
assert isinstance(subject := user_info.get("sub"), (str, int))
|
|
239
243
|
idp_user_id = str(subject)
|
|
240
|
-
|
|
244
|
+
email = user_info.get("email")
|
|
245
|
+
if not isinstance(email, str):
|
|
246
|
+
raise MissingEmailScope(
|
|
247
|
+
"Please ensure your OIDC provider is configured to use the 'email' scope."
|
|
248
|
+
)
|
|
249
|
+
|
|
241
250
|
assert isinstance(username := user_info.get("name"), str) or username is None
|
|
242
251
|
assert (
|
|
243
252
|
isinstance(profile_picture_url := user_info.get("picture"), str)
|
|
@@ -541,6 +550,14 @@ class NotInvited(Exception):
|
|
|
541
550
|
pass
|
|
542
551
|
|
|
543
552
|
|
|
553
|
+
class MissingEmailScope(Exception):
|
|
554
|
+
"""
|
|
555
|
+
Raised when the OIDC provider does not return the email scope.
|
|
556
|
+
"""
|
|
557
|
+
|
|
558
|
+
pass
|
|
559
|
+
|
|
560
|
+
|
|
544
561
|
def _redirect_to_login(*, request: Request, error: str) -> RedirectResponse:
|
|
545
562
|
"""
|
|
546
563
|
Creates a RedirectResponse to the login page to display an error message.
|
|
@@ -5,11 +5,8 @@ import strawberry
|
|
|
5
5
|
|
|
6
6
|
@strawberry.type
|
|
7
7
|
class CostBreakdown:
|
|
8
|
-
tokens: Optional[float] =
|
|
8
|
+
tokens: Optional[float] = strawberry.field(
|
|
9
|
+
default=None,
|
|
10
|
+
description="Total number of tokens, including tokens for which no cost was computed.",
|
|
11
|
+
)
|
|
9
12
|
cost: Optional[float] = None
|
|
10
|
-
|
|
11
|
-
@strawberry.field
|
|
12
|
-
def cost_per_token(self) -> Optional[float]:
|
|
13
|
-
if self.tokens and self.cost:
|
|
14
|
-
return self.cost / self.tokens
|
|
15
|
-
return None
|