arize-phoenix 11.18.0__py3-none-any.whl → 11.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: arize-phoenix
3
- Version: 11.18.0
3
+ Version: 11.20.0
4
4
  Summary: AI Observability and Evaluation
5
5
  Project-URL: Documentation, https://arize.com/docs/phoenix/
6
6
  Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
@@ -6,7 +6,7 @@ phoenix/exceptions.py,sha256=n2L2KKuecrdflB9MsCdAYCiSEvGJptIsfRkXMoJle7A,169
6
6
  phoenix/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
7
7
  phoenix/services.py,sha256=ngkyKGVatX3cO2WJdo2hKdaVKP-xJCMvqthvga6kJss,5196
8
8
  phoenix/settings.py,sha256=2kHfT3BNOVd4dAO1bq-syEQbHSG8oX2-7NhOwK2QREk,896
9
- phoenix/version.py,sha256=674yIbDThxLNMaAj6918CK5bHaDOhKWT8kaou_ywuBk,24
9
+ phoenix/version.py,sha256=PWj_8RsVSxALf302x1wXXyxFVSuiHNYoMU_ppot504s,24
10
10
  phoenix/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  phoenix/core/embedding_dimension.py,sha256=zKGbcvwOXgLf-yrJBpQyKtd-LEOPRKHnUToyAU8Owis,87
12
12
  phoenix/core/model.py,sha256=qBFraOtmwCCnWJltKNP18DDG0mULXigytlFsa6YOz6k,4837
@@ -20,7 +20,7 @@ phoenix/db/constants.py,sha256=-YE2rkzcROG06_rerfnX5hC7fLzOHx1Gjw4nXhX_um4,46
20
20
  phoenix/db/engines.py,sha256=tB_8iWMDz0folryVvw29sbBUxJOB2XZ-Xx0Uexj3uns,6889
21
21
  phoenix/db/enums.py,sha256=w3O5YuJEEzVTwVDZb8b2UUFhU8yK_GosF081VVrrno0,188
22
22
  phoenix/db/facilitator.py,sha256=UIC-l14p3R8GFVWPmz04NY-CDm_zAynXCAuIYpj_W_g,20254
23
- phoenix/db/helpers.py,sha256=dsGONSgkhmVtjMpJh-84KRVTf5uPdQ5c8O2AhUgHkRg,14150
23
+ phoenix/db/helpers.py,sha256=jqX13kO4qdGozDRdw6nME4BloPtzEQd0fweQpGHbj3I,15078
24
24
  phoenix/db/migrate.py,sha256=oUrXH8yEbcpL4eh09aSCuUiSrhFli0eT5D_j4ZmYChY,2797
25
25
  phoenix/db/models.py,sha256=bxyBRSST8rqBKcAyPyyDHmkv9AadaE3XmQnpcaMvvnk,61588
26
26
  phoenix/db/pg_config.py,sha256=h6mB7qF7t4Zk6VGvAiyefHGVu74o-yJynaWzeE39k9Y,6001
@@ -100,7 +100,7 @@ phoenix/server/grpc_server.py,sha256=ahHC394gFZYM3h4FmjQxZwL-a4x3mWmV2EdXYFlNEC8
100
100
  phoenix/server/jwt_store.py,sha256=B6uVildN_dQDTG_-aHHvuVSI7wIVK1yvED-_y6se2GU,16905
101
101
  phoenix/server/main.py,sha256=UBwxrQIEE7ci-SbE6GAlRYmbMHooI6JYG6sG-UpBFFs,18905
102
102
  phoenix/server/oauth2.py,sha256=GvUqZBoZ5dG-l2G1RMl1SUcN10jNAjaMXFznMSWz2Zs,3336
103
- phoenix/server/prometheus.py,sha256=c7G_5Rvb7teCzBHuEvwHMZjE2iYrsLLYAcg2mPU8y_c,8678
103
+ phoenix/server/prometheus.py,sha256=PV3XNUvtBQqpIjFS5ulyhGK_WF-2v-Wf3rsXjfDxeUs,8694
104
104
  phoenix/server/rate_limiters.py,sha256=cFc73D2NaxqNZZDbwfIDw4So-fRVOJPBtqxOZ8Qky_s,7155
105
105
  phoenix/server/retention.py,sha256=MQe1FWuc_NxhqgIq5q2hfFhWT8ddAmpppgI74xYEQ6c,3064
106
106
  phoenix/server/telemetry.py,sha256=4EluDDrhdDPxAjaW6lVSbi73xkB5XeUCZWOmZGdk0hg,2755
@@ -112,7 +112,7 @@ phoenix/server/api/auth.py,sha256=AyYhnZIbY9ALVjg2K6aC2UXSa3Pva5GVDBXyaZ3nD3o,27
112
112
  phoenix/server/api/context.py,sha256=mqsq_8Ru50e-PxKWNTzh9zptb1PFjYFUf58uW59UYL0,8996
113
113
  phoenix/server/api/exceptions.py,sha256=E2W0x63CBzc0CoQPptrLr9nZxPF9zIP8MCJ3RuJMddw,1322
114
114
  phoenix/server/api/interceptor.py,sha256=ykDnoC_apUd-llVli3m1CW18kNSIgjz2qZ6m5JmPDu8,1294
115
- phoenix/server/api/queries.py,sha256=ExhH6hftJArSKSdVlKOjbwyWMUT7s8nu1YyW_Wnj4vE,46482
115
+ phoenix/server/api/queries.py,sha256=RC-MS6x1gNqYhNQ6TnWA6zqDDnpiW4puKLBTrbtS8os,67102
116
116
  phoenix/server/api/schema.py,sha256=fcs36xQwFF_Qe41_5cWR8wYpDvOrnbcyTeo5WNMbDsA,1702
117
117
  phoenix/server/api/subscriptions.py,sha256=U7JZl-FGfsaIhRkIFdeSQLqR7xCS7CY1h-21BOAcaqY,25439
118
118
  phoenix/server/api/utils.py,sha256=quCBRcusc6PUq9tJq7M8PgwFZp7nXgVAxtbw8feribY,833
@@ -176,7 +176,7 @@ phoenix/server/api/helpers/__init__.py,sha256=m2-xaSPqUiSs91k62JaRDjFNfl-1byxBfY
176
176
  phoenix/server/api/helpers/annotations.py,sha256=9gMXKpMTfWEChoSCnvdWYuyB0hlSnNOp-qUdar9Vono,262
177
177
  phoenix/server/api/helpers/dataset_helpers.py,sha256=3bdGBoUzqrtg-sr5p2wpQLOU6dhg_3TKFHNeJj8p0TU,9155
178
178
  phoenix/server/api/helpers/experiment_run_filters.py,sha256=DOnVwrmn39eAkk2mwuZP8kIcAnR5jrOgllEwWSjsw94,29893
179
- phoenix/server/api/helpers/playground_clients.py,sha256=Fq4DNVIdnCiiVt0bh5mrZ7dJb2oOQcLjTttfq0Wcuv0,73589
179
+ phoenix/server/api/helpers/playground_clients.py,sha256=3EoJx4bYgCl0LXQnL8lbYdTklk4TxGKbNYcRZFDYxrY,73652
180
180
  phoenix/server/api/helpers/playground_registry.py,sha256=n0v4-KnvZJxeaEwOla5qBbnOQjSWznKmMhZnh9ziJt0,2584
181
181
  phoenix/server/api/helpers/playground_spans.py,sha256=QpXwPl_fFNwm_iA1A77XApUyXMl1aDmonw8aXuNZ_4k,17132
182
182
  phoenix/server/api/helpers/prompts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -263,9 +263,9 @@ phoenix/server/api/routers/v1/experiment_evaluations.py,sha256=_xnVqFCwZoOUPravd
263
263
  phoenix/server/api/routers/v1/experiment_runs.py,sha256=LZeCQWQIEOZ9jK5Gp_C4JbiYY6AmnnWe85cVcvdkCLE,7107
264
264
  phoenix/server/api/routers/v1/experiments.py,sha256=wML-cad0NiflFGIysaJECFulXLqj5rjwMmNmKQVaGzw,20592
265
265
  phoenix/server/api/routers/v1/models.py,sha256=p3gJN-9SWiUYTUTft4bZMsZVCBNTb4nN1Foy68eRZzQ,1997
266
- phoenix/server/api/routers/v1/projects.py,sha256=32GwTLsaFgQLVNdjrlrGe90XT3pIX1N7-zX9D9_J_4w,12701
266
+ phoenix/server/api/routers/v1/projects.py,sha256=XR6uJxHXXtC1q8LNyS9W6iaj440sv1OKCu-OSBfxEys,12824
267
267
  phoenix/server/api/routers/v1/prompts.py,sha256=chRYcLkOYDJdJfVZVukVTUyIRnLPvsJCg41CuPxOIU8,26695
268
- phoenix/server/api/routers/v1/spans.py,sha256=ETH6I14O_zY9IW69Fo-LxL796BR3xgt8qdzwqzYAvbE,44208
268
+ phoenix/server/api/routers/v1/spans.py,sha256=4aEUlfbfoyxP78TxQxBzriJYFBUe5ObFU99uesbz-iI,49390
269
269
  phoenix/server/api/routers/v1/traces.py,sha256=Skn0N_L4ZjoJ7x76PBrqvbKPFiAk8xSe1yxfiOaQ0Gc,11285
270
270
  phoenix/server/api/routers/v1/users.py,sha256=eO8zMtGU33Td2_G1l9D7Z0a4CG1CwBUCj_Z9z2uk7wg,12089
271
271
  phoenix/server/api/routers/v1/utils.py,sha256=oXIOGPzPTkE0ZWUTRCoRIQQ7wTzoSwtWFaUSjlGBqts,4960
@@ -283,7 +283,7 @@ phoenix/server/api/types/CostBreakdown.py,sha256=yw9dlb0blGIB_dWNP8yEvDHJztHjpiV
283
283
  phoenix/server/api/types/CreateDatasetPayload.py,sha256=R-6zCmuD0f76RU9Giu78xwTHlASQs6Aq8yzvX1Kxc3g,140
284
284
  phoenix/server/api/types/CronExpression.py,sha256=R7oxuSSX_eTUHQWaoaSueQqWDmkkHr5dBKRN6q-6ROk,331
285
285
  phoenix/server/api/types/DataQualityMetric.py,sha256=Aieg3bHeBFaAf4mqeRcH1zT04sXAtQD8ATSHJt7FaBQ,1538
286
- phoenix/server/api/types/Dataset.py,sha256=WBgZORzdwjWjNsX5suSmPt0x7IwKY0jA_BdZt2nsrYA,12186
286
+ phoenix/server/api/types/Dataset.py,sha256=StVJmOE996Citau11JtFTmcgLqvN9IeZsHAbe-Y1gkg,12933
287
287
  phoenix/server/api/types/DatasetExample.py,sha256=_9byxGpXfYb-hmFMUJeG7Bw1wsRKSJaHwF2IPAbFpFw,3115
288
288
  phoenix/server/api/types/DatasetExampleRevision.py,sha256=c-jWR6dTguEZTm54IMlFr0Ic84I3nefyDnZb7nF5hnI,874
289
289
  phoenix/server/api/types/DatasetValues.py,sha256=7VbCOLlzOXpZN80-zYF2UGuafRcPsZF-8WQNc0YsKFc,1119
@@ -391,10 +391,10 @@ phoenix/server/static/apple-touch-icon-76x76.png,sha256=CT_xT12I0u2i0WU8JzBZBuOQ
391
391
  phoenix/server/static/apple-touch-icon.png,sha256=fOfpjqGpWYbJ0eAurKsyoZP1EAs6ZVooBJ_SGk2ZkDs,3801
392
392
  phoenix/server/static/favicon.ico,sha256=bY0vvCKRftemZfPShwZtE93DiiQdaYaozkPGwNFr6H8,34494
393
393
  phoenix/server/static/modernizr.js,sha256=mvK-XtkNqjOral-QvzoqsyOMECXIMu5BQwSVN_wcU9c,2564
394
- phoenix/server/static/.vite/manifest.json,sha256=wdIctWiLSvBXLW9VyoDGwL1JHv4egHSTv5LoL920QKU,2328
395
- phoenix/server/static/assets/components-B1Ec5V_g.js,sha256=4acB1c9xZ91quZtIby2N2jN-wX4GCA5G3RnlmW9FKwc,656940
396
- phoenix/server/static/assets/index-DYx39hbu.js,sha256=1xQZOioOvWwZOZfyjmyCV_TejQqY3C_CH2mN609RA-Q,63064
397
- phoenix/server/static/assets/pages-B77OHHSB.js,sha256=oQjHFiCc5ciitUdp0-LtGBshV-ZIIh0QxvSmZr0Sx5E,1218398
394
+ phoenix/server/static/.vite/manifest.json,sha256=VfeGEGEeidc2iRZCqPQ8Ae-XTEOyLCy0MQw8DsvFEc0,2328
395
+ phoenix/server/static/assets/components-BNcxEjYs.js,sha256=1_lobSTCfokeUlK4ClEylCmZ4gMIpa_zCsgZYZDGF9w,658582
396
+ phoenix/server/static/assets/index-CKIBKnVD.js,sha256=ig-C5VgtSHWxhrkqhMwj6k1WKTjCYXdYvqI30ajC3v0,63064
397
+ phoenix/server/static/assets/pages-3RoC-adr.js,sha256=Y5BgOTRvJRpBKJJ4rBMi7iTM3TNRDFe_v0nlxUEpV-0,1224388
398
398
  phoenix/server/static/assets/vendor-BbqekBfb.js,sha256=8xINQdH4ikfrf8nr8mlO0B9YrKJ2FPecrA9qu5kPILo,2588857
399
399
  phoenix/server/static/assets/vendor-CqDb5u4o.css,sha256=zIyFiNJKxMaQk8AvtLgt1rR01oO10d1MFndSDKH9Clw,5517
400
400
  phoenix/server/static/assets/vendor-arizeai-CEwHhYfL.js,sha256=EIl1d9G6uPn7_Fc8YyAdxWmyV1Y7k1nN7VeJmI4MxtA,121514
@@ -441,9 +441,9 @@ phoenix/utilities/project.py,sha256=auVpARXkDb-JgeX5f2aStyFIkeKvGwN9l7qrFeJMVxI,
441
441
  phoenix/utilities/re.py,sha256=6YyUWIkv0zc2SigsxfOWIHzdpjKA_TZo2iqKq7zJKvw,2081
442
442
  phoenix/utilities/span_store.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
443
443
  phoenix/utilities/template_formatters.py,sha256=gh9PJD6WEGw7TEYXfSst1UR4pWWwmjxMLrDVQ_CkpkQ,2779
444
- arize_phoenix-11.18.0.dist-info/METADATA,sha256=XaPsPlJWMGjRNAAtWtBTxz1RuLSV0A_U37UeL1u-1wI,30950
445
- arize_phoenix-11.18.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
446
- arize_phoenix-11.18.0.dist-info/entry_points.txt,sha256=Pgpn8Upxx9P8z8joPXZWl2LlnAlGc3gcQoVchb06X1Q,94
447
- arize_phoenix-11.18.0.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
448
- arize_phoenix-11.18.0.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
449
- arize_phoenix-11.18.0.dist-info/RECORD,,
444
+ arize_phoenix-11.20.0.dist-info/METADATA,sha256=_ihQiuoj-PL_R0L83OBu6UdGvzM6wkRmjFQKJbPaU2M,30950
445
+ arize_phoenix-11.20.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
446
+ arize_phoenix-11.20.0.dist-info/entry_points.txt,sha256=Pgpn8Upxx9P8z8joPXZWl2LlnAlGc3gcQoVchb06X1Q,94
447
+ arize_phoenix-11.20.0.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
448
+ arize_phoenix-11.20.0.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
449
+ arize_phoenix-11.20.0.dist-info/RECORD,,
phoenix/db/helpers.py CHANGED
@@ -328,3 +328,30 @@ def _date_trunc_for_sqlite(
328
328
 
329
329
  # Convert back to UTC by subtracting the offset
330
330
  return func.datetime(t, f"{-utc_offset_minutes} minutes")
331
+
332
+
333
+ def get_ancestor_span_rowids(parent_id: str) -> Select[tuple[int]]:
334
+ """
335
+ Get all ancestor span IDs for a given parent_id using recursive CTE.
336
+
337
+ This function returns a query that finds all ancestors of a span with the given parent_id.
338
+ It uses a recursive Common Table Expression (CTE) to traverse up the span hierarchy.
339
+
340
+ Args:
341
+ parent_id: The span_id of the parent span to start the ancestor search from.
342
+
343
+ Returns:
344
+ A Select query that returns tuples of (span_id,) for all ancestor spans.
345
+ """
346
+ ancestors = (
347
+ select(models.Span.id, models.Span.parent_id)
348
+ .where(models.Span.span_id == parent_id)
349
+ .cte(recursive=True)
350
+ )
351
+ child = ancestors.alias()
352
+ ancestors = ancestors.union_all(
353
+ select(models.Span.id, models.Span.parent_id).join(
354
+ child, models.Span.span_id == child.c.parent_id
355
+ )
356
+ )
357
+ return select(ancestors.c.id)
@@ -1677,6 +1677,8 @@ class AnthropicStreamingClient(PlaygroundStreamingClient):
1677
1677
  model_names=[
1678
1678
  "claude-sonnet-4-0",
1679
1679
  "claude-sonnet-4-20250514",
1680
+ "claude-opus-4-1",
1681
+ "claude-opus-4-1-20250805",
1680
1682
  "claude-opus-4-0",
1681
1683
  "claude-opus-4-20250514",
1682
1684
  "claude-3-7-sonnet-latest",
@@ -1,14 +1,14 @@
1
1
  import re
2
2
  from collections import defaultdict
3
3
  from datetime import datetime
4
- from typing import Iterable, Iterator, Optional, Union
4
+ from typing import Any, Iterable, Iterator, Optional, Union
5
5
  from typing import cast as type_cast
6
6
 
7
7
  import numpy as np
8
8
  import numpy.typing as npt
9
9
  import strawberry
10
- from sqlalchemy import String, and_, cast, distinct, func, select, text
11
- from sqlalchemy.orm import joinedload
10
+ from sqlalchemy import ColumnElement, String, and_, case, cast, distinct, func, select, text
11
+ from sqlalchemy.orm import aliased, joinedload
12
12
  from starlette.authentication import UnauthenticatedUser
13
13
  from strawberry import ID, UNSET
14
14
  from strawberry.relay import Connection, GlobalID, Node
@@ -23,6 +23,7 @@ from phoenix.config import (
23
23
  from phoenix.db import models
24
24
  from phoenix.db.constants import DEFAULT_PROJECT_TRACE_RETENTION_POLICY_ID
25
25
  from phoenix.db.helpers import SupportedSQLDialect, exclude_experiment_projects
26
+ from phoenix.db.models import LatencyMs
26
27
  from phoenix.pointcloud.clustering import Hdbscan
27
28
  from phoenix.server.api.auth import MSG_ADMIN_ONLY, IsAdmin
28
29
  from phoenix.server.api.context import Context
@@ -106,6 +107,32 @@ class DbTableStats:
106
107
  num_bytes: float
107
108
 
108
109
 
110
+ @strawberry.type
111
+ class MetricCounts:
112
+ num_increases: int
113
+ num_decreases: int
114
+ num_equal: int
115
+
116
+
117
+ @strawberry.type
118
+ class CompareExperimentRunMetricCounts:
119
+ compare_experiment_id: GlobalID
120
+ latency: MetricCounts
121
+ prompt_token_count: MetricCounts
122
+ completion_token_count: MetricCounts
123
+ total_token_count: MetricCounts
124
+ total_cost: MetricCounts
125
+
126
+
127
+ @strawberry.type
128
+ class CompareExperimentRunAnnotationMetricCounts:
129
+ annotation_name: str
130
+ compare_experiment_id: GlobalID
131
+ num_increases: int
132
+ num_decreases: int
133
+ num_equal: int
134
+
135
+
109
136
  @strawberry.type
110
137
  class Query:
111
138
  @strawberry.field
@@ -481,6 +508,409 @@ class Query:
481
508
  has_next_page=has_next_page,
482
509
  )
483
510
 
511
+ @strawberry.field
512
+ async def compare_experiment_run_metric_counts(
513
+ self,
514
+ info: Info[Context, None],
515
+ base_experiment_id: GlobalID,
516
+ compare_experiment_ids: list[GlobalID],
517
+ ) -> list[CompareExperimentRunMetricCounts]:
518
+ if base_experiment_id in compare_experiment_ids:
519
+ raise BadRequest("Compare experiment IDs cannot contain the base experiment ID")
520
+ if not compare_experiment_ids:
521
+ raise BadRequest("At least one compare experiment ID must be provided")
522
+ if len(set(compare_experiment_ids)) < len(compare_experiment_ids):
523
+ raise BadRequest("Compare experiment IDs must be unique")
524
+
525
+ try:
526
+ base_experiment_rowid = from_global_id_with_expected_type(
527
+ base_experiment_id, models.Experiment.__name__
528
+ )
529
+ except ValueError:
530
+ raise BadRequest(f"Invalid base experiment ID: {base_experiment_id}")
531
+
532
+ compare_experiment_rowids = []
533
+ for compare_experiment_id in compare_experiment_ids:
534
+ try:
535
+ compare_experiment_rowids.append(
536
+ from_global_id_with_expected_type(
537
+ compare_experiment_id, models.Experiment.__name__
538
+ )
539
+ )
540
+ except ValueError:
541
+ raise BadRequest(f"Invalid compare experiment ID: {compare_experiment_id}")
542
+
543
+ base_experiment_runs = (
544
+ select(models.ExperimentRun)
545
+ .where(models.ExperimentRun.experiment_id == base_experiment_rowid)
546
+ .subquery()
547
+ .alias("base_experiment_runs")
548
+ )
549
+ base_experiment_traces = aliased(models.Trace, name="base_experiment_traces")
550
+ base_experiment_span_costs = (
551
+ select(
552
+ models.SpanCost.trace_rowid,
553
+ func.coalesce(func.sum(models.SpanCost.total_tokens), 0).label("total_tokens"),
554
+ func.coalesce(func.sum(models.SpanCost.prompt_tokens), 0).label("prompt_tokens"),
555
+ func.coalesce(func.sum(models.SpanCost.completion_tokens), 0).label(
556
+ "completion_tokens"
557
+ ),
558
+ func.coalesce(func.sum(models.SpanCost.total_cost), 0).label("total_cost"),
559
+ )
560
+ .select_from(models.SpanCost)
561
+ .group_by(
562
+ models.SpanCost.trace_rowid,
563
+ )
564
+ .subquery()
565
+ .alias("base_experiment_span_costs")
566
+ )
567
+
568
+ query = (
569
+ select() # add selected columns below
570
+ .select_from(base_experiment_runs)
571
+ .join(
572
+ base_experiment_traces,
573
+ onclause=base_experiment_runs.c.trace_id == base_experiment_traces.trace_id,
574
+ isouter=True,
575
+ )
576
+ .join(
577
+ base_experiment_span_costs,
578
+ onclause=base_experiment_traces.id == base_experiment_span_costs.c.trace_rowid,
579
+ isouter=True,
580
+ )
581
+ )
582
+
583
+ base_experiment_run_latency = LatencyMs(
584
+ base_experiment_runs.c.start_time, base_experiment_runs.c.end_time
585
+ ).label("base_experiment_run_latency_ms")
586
+ base_experiment_run_prompt_token_count = base_experiment_span_costs.c.prompt_tokens
587
+ base_experiment_run_completion_token_count = base_experiment_span_costs.c.completion_tokens
588
+ base_experiment_run_total_token_count = base_experiment_span_costs.c.total_tokens
589
+ base_experiment_run_total_cost = base_experiment_span_costs.c.total_cost
590
+
591
+ for compare_experiment_index, compare_experiment_rowid in enumerate(
592
+ compare_experiment_rowids
593
+ ):
594
+ compare_experiment_runs = (
595
+ select(models.ExperimentRun)
596
+ .where(models.ExperimentRun.experiment_id == compare_experiment_rowid)
597
+ .subquery()
598
+ .alias(f"comp_exp_{compare_experiment_index}_runs")
599
+ )
600
+ compare_experiment_traces = aliased(
601
+ models.Trace, name=f"comp_exp_{compare_experiment_index}_traces"
602
+ )
603
+ compare_experiment_span_costs = (
604
+ select(
605
+ models.SpanCost.trace_rowid,
606
+ func.coalesce(func.sum(models.SpanCost.total_tokens), 0).label("total_tokens"),
607
+ func.coalesce(func.sum(models.SpanCost.prompt_tokens), 0).label(
608
+ "prompt_tokens"
609
+ ),
610
+ func.coalesce(func.sum(models.SpanCost.completion_tokens), 0).label(
611
+ "completion_tokens"
612
+ ),
613
+ func.coalesce(func.sum(models.SpanCost.total_cost), 0).label("total_cost"),
614
+ )
615
+ .select_from(models.SpanCost)
616
+ .group_by(models.SpanCost.trace_rowid)
617
+ .subquery()
618
+ .alias(f"comp_exp_{compare_experiment_index}_span_costs")
619
+ )
620
+ compare_experiment_run_latency = LatencyMs(
621
+ compare_experiment_runs.c.start_time, compare_experiment_runs.c.end_time
622
+ ).label(f"comp_exp_{compare_experiment_index}_run_latency_ms")
623
+ compare_experiment_run_prompt_token_count = (
624
+ compare_experiment_span_costs.c.prompt_tokens
625
+ )
626
+ compare_experiment_run_completion_token_count = (
627
+ compare_experiment_span_costs.c.completion_tokens
628
+ )
629
+ compare_experiment_run_total_token_count = compare_experiment_span_costs.c.total_tokens
630
+ compare_experiment_run_total_cost = compare_experiment_span_costs.c.total_cost
631
+
632
+ query = (
633
+ query.add_columns(
634
+ _count_rows(
635
+ base_experiment_run_latency < compare_experiment_run_latency,
636
+ ).label(f"comp_exp_{compare_experiment_index}_num_runs_increased_latency"),
637
+ _count_rows(
638
+ base_experiment_run_latency > compare_experiment_run_latency,
639
+ ).label(f"comp_exp_{compare_experiment_index}_num_runs_decreased_latency"),
640
+ _count_rows(
641
+ base_experiment_run_latency == compare_experiment_run_latency,
642
+ ).label(f"comp_exp_{compare_experiment_index}_num_runs_equal_latency"),
643
+ _count_rows(
644
+ base_experiment_run_prompt_token_count
645
+ < compare_experiment_run_prompt_token_count,
646
+ ).label(
647
+ f"comp_exp_{compare_experiment_index}_num_runs_increased_prompt_token_count"
648
+ ),
649
+ _count_rows(
650
+ base_experiment_run_prompt_token_count
651
+ > compare_experiment_run_prompt_token_count,
652
+ ).label(
653
+ f"comp_exp_{compare_experiment_index}_num_runs_decreased_prompt_token_count"
654
+ ),
655
+ _count_rows(
656
+ base_experiment_run_prompt_token_count
657
+ == compare_experiment_run_prompt_token_count,
658
+ ).label(
659
+ f"comp_exp_{compare_experiment_index}_num_runs_equal_prompt_token_count"
660
+ ),
661
+ _count_rows(
662
+ base_experiment_run_completion_token_count
663
+ < compare_experiment_run_completion_token_count,
664
+ ).label(
665
+ f"comp_exp_{compare_experiment_index}_num_runs_increased_completion_token_count"
666
+ ),
667
+ _count_rows(
668
+ base_experiment_run_completion_token_count
669
+ > compare_experiment_run_completion_token_count,
670
+ ).label(
671
+ f"comp_exp_{compare_experiment_index}_num_runs_decreased_completion_token_count"
672
+ ),
673
+ _count_rows(
674
+ base_experiment_run_completion_token_count
675
+ == compare_experiment_run_completion_token_count,
676
+ ).label(
677
+ f"comp_exp_{compare_experiment_index}_num_runs_equal_completion_token_count"
678
+ ),
679
+ _count_rows(
680
+ base_experiment_run_total_token_count
681
+ < compare_experiment_run_total_token_count,
682
+ ).label(
683
+ f"comp_exp_{compare_experiment_index}_num_runs_increased_total_token_count"
684
+ ),
685
+ _count_rows(
686
+ base_experiment_run_total_token_count
687
+ > compare_experiment_run_total_token_count,
688
+ ).label(
689
+ f"comp_exp_{compare_experiment_index}_num_runs_decreased_total_token_count"
690
+ ),
691
+ _count_rows(
692
+ base_experiment_run_total_token_count
693
+ == compare_experiment_run_total_token_count,
694
+ ).label(
695
+ f"comp_exp_{compare_experiment_index}_num_runs_equal_total_token_count"
696
+ ),
697
+ _count_rows(
698
+ base_experiment_run_total_cost < compare_experiment_run_total_cost,
699
+ ).label(f"comp_exp_{compare_experiment_index}_num_runs_increased_total_cost"),
700
+ _count_rows(
701
+ base_experiment_run_total_cost > compare_experiment_run_total_cost,
702
+ ).label(f"comp_exp_{compare_experiment_index}_num_runs_decreased_total_cost"),
703
+ _count_rows(
704
+ base_experiment_run_total_cost == compare_experiment_run_total_cost,
705
+ ).label(f"comp_exp_{compare_experiment_index}_num_runs_equal_total_cost"),
706
+ )
707
+ .join(
708
+ compare_experiment_runs,
709
+ onclause=base_experiment_runs.c.dataset_example_id
710
+ == compare_experiment_runs.c.dataset_example_id,
711
+ isouter=True,
712
+ )
713
+ .join(
714
+ compare_experiment_traces,
715
+ onclause=compare_experiment_runs.c.trace_id
716
+ == compare_experiment_traces.trace_id,
717
+ isouter=True,
718
+ )
719
+ .join(
720
+ compare_experiment_span_costs,
721
+ onclause=compare_experiment_traces.id
722
+ == compare_experiment_span_costs.c.trace_rowid,
723
+ isouter=True,
724
+ )
725
+ )
726
+
727
+ async with info.context.db() as session:
728
+ result = (await session.execute(query)).first()
729
+ assert result is not None
730
+
731
+ num_columns_per_compare_experiment = len(query.columns) // len(compare_experiment_ids)
732
+ counts = []
733
+ for compare_experiment_index, compare_experiment_id in enumerate(compare_experiment_ids):
734
+ start_index = compare_experiment_index * num_columns_per_compare_experiment
735
+ end_index = start_index + num_columns_per_compare_experiment
736
+ (
737
+ num_runs_with_increased_latency,
738
+ num_runs_with_decreased_latency,
739
+ num_runs_with_equal_latency,
740
+ num_runs_with_increased_prompt_token_count,
741
+ num_runs_with_decreased_prompt_token_count,
742
+ num_runs_with_equal_prompt_token_count,
743
+ num_runs_with_increased_completion_token_count,
744
+ num_runs_with_decreased_completion_token_count,
745
+ num_runs_with_equal_completion_token_count,
746
+ num_runs_with_increased_total_token_count,
747
+ num_runs_with_decreased_total_token_count,
748
+ num_runs_with_equal_total_token_count,
749
+ num_runs_with_increased_total_cost,
750
+ num_runs_with_decreased_total_cost,
751
+ num_runs_with_equal_total_cost,
752
+ ) = result[start_index:end_index]
753
+ counts.append(
754
+ CompareExperimentRunMetricCounts(
755
+ compare_experiment_id=compare_experiment_id,
756
+ latency=MetricCounts(
757
+ num_increases=num_runs_with_increased_latency,
758
+ num_decreases=num_runs_with_decreased_latency,
759
+ num_equal=num_runs_with_equal_latency,
760
+ ),
761
+ prompt_token_count=MetricCounts(
762
+ num_increases=num_runs_with_increased_prompt_token_count,
763
+ num_decreases=num_runs_with_decreased_prompt_token_count,
764
+ num_equal=num_runs_with_equal_prompt_token_count,
765
+ ),
766
+ completion_token_count=MetricCounts(
767
+ num_increases=num_runs_with_increased_completion_token_count,
768
+ num_decreases=num_runs_with_decreased_completion_token_count,
769
+ num_equal=num_runs_with_equal_completion_token_count,
770
+ ),
771
+ total_token_count=MetricCounts(
772
+ num_increases=num_runs_with_increased_total_token_count,
773
+ num_decreases=num_runs_with_decreased_total_token_count,
774
+ num_equal=num_runs_with_equal_total_token_count,
775
+ ),
776
+ total_cost=MetricCounts(
777
+ num_increases=num_runs_with_increased_total_cost,
778
+ num_decreases=num_runs_with_decreased_total_cost,
779
+ num_equal=num_runs_with_equal_total_cost,
780
+ ),
781
+ )
782
+ )
783
+ return counts
784
+
785
+ @strawberry.field
786
+ async def compare_experiment_run_annotation_metric_counts(
787
+ self,
788
+ info: Info[Context, None],
789
+ base_experiment_id: GlobalID,
790
+ compare_experiment_ids: list[GlobalID],
791
+ ) -> list[CompareExperimentRunAnnotationMetricCounts]:
792
+ if base_experiment_id in compare_experiment_ids:
793
+ raise BadRequest("Compare experiment IDs cannot contain the base experiment ID")
794
+ if not compare_experiment_ids:
795
+ raise BadRequest("At least one compare experiment ID must be provided")
796
+ if len(set(compare_experiment_ids)) < len(compare_experiment_ids):
797
+ raise BadRequest("Compare experiment IDs must be unique")
798
+
799
+ try:
800
+ base_experiment_rowid = from_global_id_with_expected_type(
801
+ base_experiment_id, models.Experiment.__name__
802
+ )
803
+ except ValueError:
804
+ raise BadRequest(f"Invalid base experiment ID: {base_experiment_id}")
805
+
806
+ compare_experiment_rowids = []
807
+ for compare_experiment_id in compare_experiment_ids:
808
+ try:
809
+ compare_experiment_rowids.append(
810
+ from_global_id_with_expected_type(
811
+ compare_experiment_id, models.Experiment.__name__
812
+ )
813
+ )
814
+ except ValueError:
815
+ raise BadRequest(f"Invalid compare experiment ID: {compare_experiment_id}")
816
+
817
+ base_experiment_runs = (
818
+ select(models.ExperimentRun)
819
+ .where(
820
+ models.ExperimentRun.experiment_id == base_experiment_rowid,
821
+ )
822
+ .subquery()
823
+ .alias("base_experiment_runs")
824
+ )
825
+ base_experiment_run_annotations = aliased(
826
+ models.ExperimentRunAnnotation, name="base_experiment_run_annotations"
827
+ )
828
+ query = (
829
+ select(base_experiment_run_annotations.name)
830
+ .select_from(base_experiment_runs)
831
+ .join(
832
+ base_experiment_run_annotations,
833
+ onclause=base_experiment_runs.c.id
834
+ == base_experiment_run_annotations.experiment_run_id,
835
+ isouter=True,
836
+ )
837
+ .group_by(base_experiment_run_annotations.name)
838
+ .order_by(base_experiment_run_annotations.name)
839
+ )
840
+ for compare_experiment_index, compare_experiment_rowid in enumerate(
841
+ compare_experiment_rowids
842
+ ):
843
+ compare_experiment_runs = (
844
+ select(models.ExperimentRun)
845
+ .where(
846
+ models.ExperimentRun.experiment_id == compare_experiment_rowid,
847
+ )
848
+ .subquery()
849
+ .alias(f"comp_exp_{compare_experiment_index}_runs")
850
+ )
851
+ compare_experiment_run_annotations = aliased(
852
+ models.ExperimentRunAnnotation,
853
+ name=f"comp_exp_{compare_experiment_index}_run_annotations",
854
+ )
855
+ query = (
856
+ query.add_columns(
857
+ _count_rows(
858
+ base_experiment_run_annotations.score
859
+ < compare_experiment_run_annotations.score,
860
+ ).label(f"comp_exp_{compare_experiment_index}_num_runs_increased_score"),
861
+ _count_rows(
862
+ base_experiment_run_annotations.score
863
+ > compare_experiment_run_annotations.score,
864
+ ).label(f"comp_exp_{compare_experiment_index}_num_runs_decreased_score"),
865
+ _count_rows(
866
+ base_experiment_run_annotations.score
867
+ == compare_experiment_run_annotations.score,
868
+ ).label(f"comp_exp_{compare_experiment_index}_num_runs_equal_score"),
869
+ )
870
+ .join(
871
+ compare_experiment_runs,
872
+ onclause=base_experiment_runs.c.dataset_example_id
873
+ == compare_experiment_runs.c.dataset_example_id,
874
+ isouter=True,
875
+ )
876
+ .join(
877
+ compare_experiment_run_annotations,
878
+ onclause=compare_experiment_runs.c.id
879
+ == compare_experiment_run_annotations.experiment_run_id,
880
+ isouter=True,
881
+ )
882
+ .where(
883
+ base_experiment_run_annotations.name == compare_experiment_run_annotations.name
884
+ )
885
+ )
886
+ async with info.context.db() as session:
887
+ result = (await session.execute(query)).all()
888
+ assert result is not None
889
+ num_columns_per_compare_experiment = (len(query.columns) - 1) // len(compare_experiment_ids)
890
+ metric_counts = []
891
+ for record in result:
892
+ annotation_name, *counts = record
893
+ for compare_experiment_index, compare_experiment_id in enumerate(
894
+ compare_experiment_ids
895
+ ):
896
+ start_index = compare_experiment_index * num_columns_per_compare_experiment
897
+ end_index = start_index + num_columns_per_compare_experiment
898
+ (
899
+ num_runs_with_increased_score,
900
+ num_runs_with_decreased_score,
901
+ num_runs_with_equal_score,
902
+ ) = counts[start_index:end_index]
903
+ metric_counts.append(
904
+ CompareExperimentRunAnnotationMetricCounts(
905
+ annotation_name=annotation_name,
906
+ compare_experiment_id=compare_experiment_id,
907
+ num_increases=num_runs_with_increased_score,
908
+ num_decreases=num_runs_with_decreased_score,
909
+ num_equal=num_runs_with_equal_score,
910
+ )
911
+ )
912
+ return metric_counts
913
+
484
914
  @strawberry.field
485
915
  async def validate_experiment_run_filter_condition(
486
916
  self,
@@ -1106,3 +1536,20 @@ def _longest_matching_prefix(s: str, prefixes: Iterable[str]) -> str:
1106
1536
  if s.startswith(prefix) and len(prefix) > len(longest):
1107
1537
  longest = prefix
1108
1538
  return longest
1539
+
1540
+
1541
+ def _count_rows(
1542
+ condition: ColumnElement[Any],
1543
+ ) -> ColumnElement[Any]:
1544
+ """
1545
+ Returns an expression that counts the number of rows satisfying the condition.
1546
+ """
1547
+ return func.coalesce(
1548
+ func.sum(
1549
+ case(
1550
+ (condition, 1),
1551
+ else_=0,
1552
+ )
1553
+ ),
1554
+ 0,
1555
+ )
@@ -15,6 +15,7 @@ from strawberry.relay import GlobalID
15
15
  from phoenix.config import DEFAULT_PROJECT_NAME
16
16
  from phoenix.db import models
17
17
  from phoenix.db.helpers import exclude_experiment_projects
18
+ from phoenix.db.models import UserRoleName
18
19
  from phoenix.server.api.routers.v1.models import V1RoutesBaseModel
19
20
  from phoenix.server.api.routers.v1.utils import (
20
21
  PaginatedResponseBody,
@@ -258,8 +259,8 @@ async def update_project(
258
259
  .join(models.User)
259
260
  .where(models.User.id == int(request.user.identity))
260
261
  )
261
- role_name = await session.scalar(stmt)
262
- if role_name != "ADMIN":
262
+ role_name: UserRoleName = await session.scalar(stmt)
263
+ if role_name != "ADMIN" and role_name != "SYSTEM":
263
264
  raise HTTPException(
264
265
  status_code=HTTP_403_FORBIDDEN,
265
266
  detail="Only admins can update projects",
@@ -318,8 +319,8 @@ async def delete_project(
318
319
  .join(models.User)
319
320
  .where(models.User.id == int(request.user.identity))
320
321
  )
321
- role_name = await session.scalar(stmt)
322
- if role_name != "ADMIN":
322
+ role_name: UserRoleName = await session.scalar(stmt)
323
+ if role_name != "ADMIN" and role_name != "SYSTEM":
323
324
  raise HTTPException(
324
325
  status_code=HTTP_403_FORBIDDEN,
325
326
  detail="Only admins can delete projects",