PyPI - arize-phoenix - Versions diffs - 11.18.0__py3-none-any.whl → 11.20.0__py3-none-any.whl - Mend

arize-phoenix 11.18.0py3-none-any.whl → 11.20.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of arize-phoenix might be problematic. Click here for more details.

Files changed (18) hide show

{arize_phoenix-11.18.0.dist-info → arize_phoenix-11.20.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: arize-phoenix
-Version: 11.18.0
+Version: 11.20.0
 Summary: AI Observability and Evaluation
 Project-URL: Documentation, https://arize.com/docs/phoenix/
 Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues

{arize_phoenix-11.18.0.dist-info → arize_phoenix-11.20.0.dist-info}/RECORD RENAMED Viewed

@@ -6,7 +6,7 @@ phoenix/exceptions.py,sha256=n2L2KKuecrdflB9MsCdAYCiSEvGJptIsfRkXMoJle7A,169
 phoenix/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
 phoenix/services.py,sha256=ngkyKGVatX3cO2WJdo2hKdaVKP-xJCMvqthvga6kJss,5196
 phoenix/settings.py,sha256=2kHfT3BNOVd4dAO1bq-syEQbHSG8oX2-7NhOwK2QREk,896
-phoenix/version.py,sha256=674yIbDThxLNMaAj6918CK5bHaDOhKWT8kaou_ywuBk,24
+phoenix/version.py,sha256=PWj_8RsVSxALf302x1wXXyxFVSuiHNYoMU_ppot504s,24
 phoenix/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 phoenix/core/embedding_dimension.py,sha256=zKGbcvwOXgLf-yrJBpQyKtd-LEOPRKHnUToyAU8Owis,87
 phoenix/core/model.py,sha256=qBFraOtmwCCnWJltKNP18DDG0mULXigytlFsa6YOz6k,4837
@@ -20,7 +20,7 @@ phoenix/db/constants.py,sha256=-YE2rkzcROG06_rerfnX5hC7fLzOHx1Gjw4nXhX_um4,46
 phoenix/db/engines.py,sha256=tB_8iWMDz0folryVvw29sbBUxJOB2XZ-Xx0Uexj3uns,6889
 phoenix/db/enums.py,sha256=w3O5YuJEEzVTwVDZb8b2UUFhU8yK_GosF081VVrrno0,188
 phoenix/db/facilitator.py,sha256=UIC-l14p3R8GFVWPmz04NY-CDm_zAynXCAuIYpj_W_g,20254
-phoenix/db/helpers.py,sha256=dsGONSgkhmVtjMpJh-84KRVTf5uPdQ5c8O2AhUgHkRg,14150
+phoenix/db/helpers.py,sha256=jqX13kO4qdGozDRdw6nME4BloPtzEQd0fweQpGHbj3I,15078
 phoenix/db/migrate.py,sha256=oUrXH8yEbcpL4eh09aSCuUiSrhFli0eT5D_j4ZmYChY,2797
 phoenix/db/models.py,sha256=bxyBRSST8rqBKcAyPyyDHmkv9AadaE3XmQnpcaMvvnk,61588
 phoenix/db/pg_config.py,sha256=h6mB7qF7t4Zk6VGvAiyefHGVu74o-yJynaWzeE39k9Y,6001
@@ -100,7 +100,7 @@ phoenix/server/grpc_server.py,sha256=ahHC394gFZYM3h4FmjQxZwL-a4x3mWmV2EdXYFlNEC8
 phoenix/server/jwt_store.py,sha256=B6uVildN_dQDTG_-aHHvuVSI7wIVK1yvED-_y6se2GU,16905
 phoenix/server/main.py,sha256=UBwxrQIEE7ci-SbE6GAlRYmbMHooI6JYG6sG-UpBFFs,18905
 phoenix/server/oauth2.py,sha256=GvUqZBoZ5dG-l2G1RMl1SUcN10jNAjaMXFznMSWz2Zs,3336
-phoenix/server/prometheus.py,sha256=c7G_5Rvb7teCzBHuEvwHMZjE2iYrsLLYAcg2mPU8y_c,8678
+phoenix/server/prometheus.py,sha256=PV3XNUvtBQqpIjFS5ulyhGK_WF-2v-Wf3rsXjfDxeUs,8694
 phoenix/server/rate_limiters.py,sha256=cFc73D2NaxqNZZDbwfIDw4So-fRVOJPBtqxOZ8Qky_s,7155
 phoenix/server/retention.py,sha256=MQe1FWuc_NxhqgIq5q2hfFhWT8ddAmpppgI74xYEQ6c,3064
 phoenix/server/telemetry.py,sha256=4EluDDrhdDPxAjaW6lVSbi73xkB5XeUCZWOmZGdk0hg,2755
@@ -112,7 +112,7 @@ phoenix/server/api/auth.py,sha256=AyYhnZIbY9ALVjg2K6aC2UXSa3Pva5GVDBXyaZ3nD3o,27
 phoenix/server/api/context.py,sha256=mqsq_8Ru50e-PxKWNTzh9zptb1PFjYFUf58uW59UYL0,8996
 phoenix/server/api/exceptions.py,sha256=E2W0x63CBzc0CoQPptrLr9nZxPF9zIP8MCJ3RuJMddw,1322
 phoenix/server/api/interceptor.py,sha256=ykDnoC_apUd-llVli3m1CW18kNSIgjz2qZ6m5JmPDu8,1294
-phoenix/server/api/queries.py,sha256=ExhH6hftJArSKSdVlKOjbwyWMUT7s8nu1YyW_Wnj4vE,46482
+phoenix/server/api/queries.py,sha256=RC-MS6x1gNqYhNQ6TnWA6zqDDnpiW4puKLBTrbtS8os,67102
 phoenix/server/api/schema.py,sha256=fcs36xQwFF_Qe41_5cWR8wYpDvOrnbcyTeo5WNMbDsA,1702
 phoenix/server/api/subscriptions.py,sha256=U7JZl-FGfsaIhRkIFdeSQLqR7xCS7CY1h-21BOAcaqY,25439
 phoenix/server/api/utils.py,sha256=quCBRcusc6PUq9tJq7M8PgwFZp7nXgVAxtbw8feribY,833
@@ -176,7 +176,7 @@ phoenix/server/api/helpers/__init__.py,sha256=m2-xaSPqUiSs91k62JaRDjFNfl-1byxBfY
 phoenix/server/api/helpers/annotations.py,sha256=9gMXKpMTfWEChoSCnvdWYuyB0hlSnNOp-qUdar9Vono,262
 phoenix/server/api/helpers/dataset_helpers.py,sha256=3bdGBoUzqrtg-sr5p2wpQLOU6dhg_3TKFHNeJj8p0TU,9155
 phoenix/server/api/helpers/experiment_run_filters.py,sha256=DOnVwrmn39eAkk2mwuZP8kIcAnR5jrOgllEwWSjsw94,29893
-phoenix/server/api/helpers/playground_clients.py,sha256=Fq4DNVIdnCiiVt0bh5mrZ7dJb2oOQcLjTttfq0Wcuv0,73589
+phoenix/server/api/helpers/playground_clients.py,sha256=3EoJx4bYgCl0LXQnL8lbYdTklk4TxGKbNYcRZFDYxrY,73652
 phoenix/server/api/helpers/playground_registry.py,sha256=n0v4-KnvZJxeaEwOla5qBbnOQjSWznKmMhZnh9ziJt0,2584
 phoenix/server/api/helpers/playground_spans.py,sha256=QpXwPl_fFNwm_iA1A77XApUyXMl1aDmonw8aXuNZ_4k,17132
 phoenix/server/api/helpers/prompts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -263,9 +263,9 @@ phoenix/server/api/routers/v1/experiment_evaluations.py,sha256=_xnVqFCwZoOUPravd
 phoenix/server/api/routers/v1/experiment_runs.py,sha256=LZeCQWQIEOZ9jK5Gp_C4JbiYY6AmnnWe85cVcvdkCLE,7107
 phoenix/server/api/routers/v1/experiments.py,sha256=wML-cad0NiflFGIysaJECFulXLqj5rjwMmNmKQVaGzw,20592
 phoenix/server/api/routers/v1/models.py,sha256=p3gJN-9SWiUYTUTft4bZMsZVCBNTb4nN1Foy68eRZzQ,1997
-phoenix/server/api/routers/v1/projects.py,sha256=32GwTLsaFgQLVNdjrlrGe90XT3pIX1N7-zX9D9_J_4w,12701
+phoenix/server/api/routers/v1/projects.py,sha256=XR6uJxHXXtC1q8LNyS9W6iaj440sv1OKCu-OSBfxEys,12824
 phoenix/server/api/routers/v1/prompts.py,sha256=chRYcLkOYDJdJfVZVukVTUyIRnLPvsJCg41CuPxOIU8,26695
-phoenix/server/api/routers/v1/spans.py,sha256=ETH6I14O_zY9IW69Fo-LxL796BR3xgt8qdzwqzYAvbE,44208
+phoenix/server/api/routers/v1/spans.py,sha256=4aEUlfbfoyxP78TxQxBzriJYFBUe5ObFU99uesbz-iI,49390
 phoenix/server/api/routers/v1/traces.py,sha256=Skn0N_L4ZjoJ7x76PBrqvbKPFiAk8xSe1yxfiOaQ0Gc,11285
 phoenix/server/api/routers/v1/users.py,sha256=eO8zMtGU33Td2_G1l9D7Z0a4CG1CwBUCj_Z9z2uk7wg,12089
 phoenix/server/api/routers/v1/utils.py,sha256=oXIOGPzPTkE0ZWUTRCoRIQQ7wTzoSwtWFaUSjlGBqts,4960
@@ -283,7 +283,7 @@ phoenix/server/api/types/CostBreakdown.py,sha256=yw9dlb0blGIB_dWNP8yEvDHJztHjpiV
 phoenix/server/api/types/CreateDatasetPayload.py,sha256=R-6zCmuD0f76RU9Giu78xwTHlASQs6Aq8yzvX1Kxc3g,140
 phoenix/server/api/types/CronExpression.py,sha256=R7oxuSSX_eTUHQWaoaSueQqWDmkkHr5dBKRN6q-6ROk,331
 phoenix/server/api/types/DataQualityMetric.py,sha256=Aieg3bHeBFaAf4mqeRcH1zT04sXAtQD8ATSHJt7FaBQ,1538
-phoenix/server/api/types/Dataset.py,sha256=WBgZORzdwjWjNsX5suSmPt0x7IwKY0jA_BdZt2nsrYA,12186
+phoenix/server/api/types/Dataset.py,sha256=StVJmOE996Citau11JtFTmcgLqvN9IeZsHAbe-Y1gkg,12933
 phoenix/server/api/types/DatasetExample.py,sha256=_9byxGpXfYb-hmFMUJeG7Bw1wsRKSJaHwF2IPAbFpFw,3115
 phoenix/server/api/types/DatasetExampleRevision.py,sha256=c-jWR6dTguEZTm54IMlFr0Ic84I3nefyDnZb7nF5hnI,874
 phoenix/server/api/types/DatasetValues.py,sha256=7VbCOLlzOXpZN80-zYF2UGuafRcPsZF-8WQNc0YsKFc,1119
@@ -391,10 +391,10 @@ phoenix/server/static/apple-touch-icon-76x76.png,sha256=CT_xT12I0u2i0WU8JzBZBuOQ
 phoenix/server/static/apple-touch-icon.png,sha256=fOfpjqGpWYbJ0eAurKsyoZP1EAs6ZVooBJ_SGk2ZkDs,3801
 phoenix/server/static/favicon.ico,sha256=bY0vvCKRftemZfPShwZtE93DiiQdaYaozkPGwNFr6H8,34494
 phoenix/server/static/modernizr.js,sha256=mvK-XtkNqjOral-QvzoqsyOMECXIMu5BQwSVN_wcU9c,2564
-phoenix/server/static/.vite/manifest.json,sha256=wdIctWiLSvBXLW9VyoDGwL1JHv4egHSTv5LoL920QKU,2328
-phoenix/server/static/assets/components-B1Ec5V_g.js,sha256=4acB1c9xZ91quZtIby2N2jN-wX4GCA5G3RnlmW9FKwc,656940
-phoenix/server/static/assets/index-DYx39hbu.js,sha256=1xQZOioOvWwZOZfyjmyCV_TejQqY3C_CH2mN609RA-Q,63064
-phoenix/server/static/assets/pages-B77OHHSB.js,sha256=oQjHFiCc5ciitUdp0-LtGBshV-ZIIh0QxvSmZr0Sx5E,1218398
+phoenix/server/static/.vite/manifest.json,sha256=VfeGEGEeidc2iRZCqPQ8Ae-XTEOyLCy0MQw8DsvFEc0,2328
+phoenix/server/static/assets/components-BNcxEjYs.js,sha256=1_lobSTCfokeUlK4ClEylCmZ4gMIpa_zCsgZYZDGF9w,658582
+phoenix/server/static/assets/index-CKIBKnVD.js,sha256=ig-C5VgtSHWxhrkqhMwj6k1WKTjCYXdYvqI30ajC3v0,63064
+phoenix/server/static/assets/pages-3RoC-adr.js,sha256=Y5BgOTRvJRpBKJJ4rBMi7iTM3TNRDFe_v0nlxUEpV-0,1224388
 phoenix/server/static/assets/vendor-BbqekBfb.js,sha256=8xINQdH4ikfrf8nr8mlO0B9YrKJ2FPecrA9qu5kPILo,2588857
 phoenix/server/static/assets/vendor-CqDb5u4o.css,sha256=zIyFiNJKxMaQk8AvtLgt1rR01oO10d1MFndSDKH9Clw,5517
 phoenix/server/static/assets/vendor-arizeai-CEwHhYfL.js,sha256=EIl1d9G6uPn7_Fc8YyAdxWmyV1Y7k1nN7VeJmI4MxtA,121514
@@ -441,9 +441,9 @@ phoenix/utilities/project.py,sha256=auVpARXkDb-JgeX5f2aStyFIkeKvGwN9l7qrFeJMVxI,
 phoenix/utilities/re.py,sha256=6YyUWIkv0zc2SigsxfOWIHzdpjKA_TZo2iqKq7zJKvw,2081
 phoenix/utilities/span_store.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 phoenix/utilities/template_formatters.py,sha256=gh9PJD6WEGw7TEYXfSst1UR4pWWwmjxMLrDVQ_CkpkQ,2779
-arize_phoenix-11.18.0.dist-info/METADATA,sha256=XaPsPlJWMGjRNAAtWtBTxz1RuLSV0A_U37UeL1u-1wI,30950
-arize_phoenix-11.18.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-arize_phoenix-11.18.0.dist-info/entry_points.txt,sha256=Pgpn8Upxx9P8z8joPXZWl2LlnAlGc3gcQoVchb06X1Q,94
-arize_phoenix-11.18.0.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
-arize_phoenix-11.18.0.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
-arize_phoenix-11.18.0.dist-info/RECORD,,
+arize_phoenix-11.20.0.dist-info/METADATA,sha256=_ihQiuoj-PL_R0L83OBu6UdGvzM6wkRmjFQKJbPaU2M,30950
+arize_phoenix-11.20.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+arize_phoenix-11.20.0.dist-info/entry_points.txt,sha256=Pgpn8Upxx9P8z8joPXZWl2LlnAlGc3gcQoVchb06X1Q,94
+arize_phoenix-11.20.0.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
+arize_phoenix-11.20.0.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
+arize_phoenix-11.20.0.dist-info/RECORD,,

phoenix/db/helpers.py CHANGED Viewed

@@ -328,3 +328,30 @@ def _date_trunc_for_sqlite(
     # Convert back to UTC by subtracting the offset
     return func.datetime(t, f"{-utc_offset_minutes} minutes")
+def get_ancestor_span_rowids(parent_id: str) -> Select[tuple[int]]:
+    """
+    Get all ancestor span IDs for a given parent_id using recursive CTE.
+    This function returns a query that finds all ancestors of a span with the given parent_id.
+    It uses a recursive Common Table Expression (CTE) to traverse up the span hierarchy.
+    Args:
+        parent_id: The span_id of the parent span to start the ancestor search from.
+    Returns:
+        A Select query that returns tuples of (span_id,) for all ancestor spans.
+    """
+    ancestors = (
+        select(models.Span.id, models.Span.parent_id)
+        .where(models.Span.span_id == parent_id)
+        .cte(recursive=True)
+    )
+    child = ancestors.alias()
+    ancestors = ancestors.union_all(
+        select(models.Span.id, models.Span.parent_id).join(
+            child, models.Span.span_id == child.c.parent_id
+        )
+    )
+    return select(ancestors.c.id)

phoenix/server/api/helpers/playground_clients.py CHANGED Viewed

@@ -1677,6 +1677,8 @@ class AnthropicStreamingClient(PlaygroundStreamingClient):
     model_names=[
         "claude-sonnet-4-0",
         "claude-sonnet-4-20250514",
+        "claude-opus-4-1",
+        "claude-opus-4-1-20250805",
         "claude-opus-4-0",
         "claude-opus-4-20250514",
         "claude-3-7-sonnet-latest",

phoenix/server/api/queries.py CHANGED Viewed

@@ -1,14 +1,14 @@
 import re
 from collections import defaultdict
 from datetime import datetime
-from typing import Iterable, Iterator, Optional, Union
+from typing import Any, Iterable, Iterator, Optional, Union
 from typing import cast as type_cast
 import numpy as np
 import numpy.typing as npt
 import strawberry
-from sqlalchemy import String, and_, cast, distinct, func, select, text
-from sqlalchemy.orm import joinedload
+from sqlalchemy import ColumnElement, String, and_, case, cast, distinct, func, select, text
+from sqlalchemy.orm import aliased, joinedload
 from starlette.authentication import UnauthenticatedUser
 from strawberry import ID, UNSET
 from strawberry.relay import Connection, GlobalID, Node
@@ -23,6 +23,7 @@ from phoenix.config import (
 from phoenix.db import models
 from phoenix.db.constants import DEFAULT_PROJECT_TRACE_RETENTION_POLICY_ID
 from phoenix.db.helpers import SupportedSQLDialect, exclude_experiment_projects
+from phoenix.db.models import LatencyMs
 from phoenix.pointcloud.clustering import Hdbscan
 from phoenix.server.api.auth import MSG_ADMIN_ONLY, IsAdmin
 from phoenix.server.api.context import Context
@@ -106,6 +107,32 @@ class DbTableStats:
     num_bytes: float
+@strawberry.type
+class MetricCounts:
+    num_increases: int
+    num_decreases: int
+    num_equal: int
+@strawberry.type
+class CompareExperimentRunMetricCounts:
+    compare_experiment_id: GlobalID
+    latency: MetricCounts
+    prompt_token_count: MetricCounts
+    completion_token_count: MetricCounts
+    total_token_count: MetricCounts
+    total_cost: MetricCounts
+@strawberry.type
+class CompareExperimentRunAnnotationMetricCounts:
+    annotation_name: str
+    compare_experiment_id: GlobalID
+    num_increases: int
+    num_decreases: int
+    num_equal: int
 @strawberry.type
 class Query:
     @strawberry.field
@@ -481,6 +508,409 @@ class Query:
             has_next_page=has_next_page,
         )
+    @strawberry.field
+    async def compare_experiment_run_metric_counts(
+        self,
+        info: Info[Context, None],
+        base_experiment_id: GlobalID,
+        compare_experiment_ids: list[GlobalID],
+    ) -> list[CompareExperimentRunMetricCounts]:
+        if base_experiment_id in compare_experiment_ids:
+            raise BadRequest("Compare experiment IDs cannot contain the base experiment ID")
+        if not compare_experiment_ids:
+            raise BadRequest("At least one compare experiment ID must be provided")
+        if len(set(compare_experiment_ids)) < len(compare_experiment_ids):
+            raise BadRequest("Compare experiment IDs must be unique")
+        try:
+            base_experiment_rowid = from_global_id_with_expected_type(
+                base_experiment_id, models.Experiment.__name__
+            )
+        except ValueError:
+            raise BadRequest(f"Invalid base experiment ID: {base_experiment_id}")
+        compare_experiment_rowids = []
+        for compare_experiment_id in compare_experiment_ids:
+            try:
+                compare_experiment_rowids.append(
+                    from_global_id_with_expected_type(
+                        compare_experiment_id, models.Experiment.__name__
+                    )
+                )
+            except ValueError:
+                raise BadRequest(f"Invalid compare experiment ID: {compare_experiment_id}")
+        base_experiment_runs = (
+            select(models.ExperimentRun)
+            .where(models.ExperimentRun.experiment_id == base_experiment_rowid)
+            .subquery()
+            .alias("base_experiment_runs")
+        )
+        base_experiment_traces = aliased(models.Trace, name="base_experiment_traces")
+        base_experiment_span_costs = (
+            select(
+                models.SpanCost.trace_rowid,
+                func.coalesce(func.sum(models.SpanCost.total_tokens), 0).label("total_tokens"),
+                func.coalesce(func.sum(models.SpanCost.prompt_tokens), 0).label("prompt_tokens"),
+                func.coalesce(func.sum(models.SpanCost.completion_tokens), 0).label(
+                    "completion_tokens"
+                ),
+                func.coalesce(func.sum(models.SpanCost.total_cost), 0).label("total_cost"),
+            )
+            .select_from(models.SpanCost)
+            .group_by(
+                models.SpanCost.trace_rowid,
+            )
+            .subquery()
+            .alias("base_experiment_span_costs")
+        )
+        query = (
+            select()  # add selected columns below
+            .select_from(base_experiment_runs)
+            .join(
+                base_experiment_traces,
+                onclause=base_experiment_runs.c.trace_id == base_experiment_traces.trace_id,
+                isouter=True,
+            )
+            .join(
+                base_experiment_span_costs,
+                onclause=base_experiment_traces.id == base_experiment_span_costs.c.trace_rowid,
+                isouter=True,
+            )
+        )
+        base_experiment_run_latency = LatencyMs(
+            base_experiment_runs.c.start_time, base_experiment_runs.c.end_time
+        ).label("base_experiment_run_latency_ms")
+        base_experiment_run_prompt_token_count = base_experiment_span_costs.c.prompt_tokens
+        base_experiment_run_completion_token_count = base_experiment_span_costs.c.completion_tokens
+        base_experiment_run_total_token_count = base_experiment_span_costs.c.total_tokens
+        base_experiment_run_total_cost = base_experiment_span_costs.c.total_cost
+        for compare_experiment_index, compare_experiment_rowid in enumerate(
+            compare_experiment_rowids
+        ):
+            compare_experiment_runs = (
+                select(models.ExperimentRun)
+                .where(models.ExperimentRun.experiment_id == compare_experiment_rowid)
+                .subquery()
+                .alias(f"comp_exp_{compare_experiment_index}_runs")
+            )
+            compare_experiment_traces = aliased(
+                models.Trace, name=f"comp_exp_{compare_experiment_index}_traces"
+            )
+            compare_experiment_span_costs = (
+                select(
+                    models.SpanCost.trace_rowid,
+                    func.coalesce(func.sum(models.SpanCost.total_tokens), 0).label("total_tokens"),
+                    func.coalesce(func.sum(models.SpanCost.prompt_tokens), 0).label(
+                        "prompt_tokens"
+                    ),
+                    func.coalesce(func.sum(models.SpanCost.completion_tokens), 0).label(
+                        "completion_tokens"
+                    ),
+                    func.coalesce(func.sum(models.SpanCost.total_cost), 0).label("total_cost"),
+                )
+                .select_from(models.SpanCost)
+                .group_by(models.SpanCost.trace_rowid)
+                .subquery()
+                .alias(f"comp_exp_{compare_experiment_index}_span_costs")
+            )
+            compare_experiment_run_latency = LatencyMs(
+                compare_experiment_runs.c.start_time, compare_experiment_runs.c.end_time
+            ).label(f"comp_exp_{compare_experiment_index}_run_latency_ms")
+            compare_experiment_run_prompt_token_count = (
+                compare_experiment_span_costs.c.prompt_tokens
+            )
+            compare_experiment_run_completion_token_count = (
+                compare_experiment_span_costs.c.completion_tokens
+            )
+            compare_experiment_run_total_token_count = compare_experiment_span_costs.c.total_tokens
+            compare_experiment_run_total_cost = compare_experiment_span_costs.c.total_cost
+            query = (
+                query.add_columns(
+                    _count_rows(
+                        base_experiment_run_latency < compare_experiment_run_latency,
+                    ).label(f"comp_exp_{compare_experiment_index}_num_runs_increased_latency"),
+                    _count_rows(
+                        base_experiment_run_latency > compare_experiment_run_latency,
+                    ).label(f"comp_exp_{compare_experiment_index}_num_runs_decreased_latency"),
+                    _count_rows(
+                        base_experiment_run_latency == compare_experiment_run_latency,
+                    ).label(f"comp_exp_{compare_experiment_index}_num_runs_equal_latency"),
+                    _count_rows(
+                        base_experiment_run_prompt_token_count
+                        < compare_experiment_run_prompt_token_count,
+                    ).label(
+                        f"comp_exp_{compare_experiment_index}_num_runs_increased_prompt_token_count"
+                    ),
+                    _count_rows(
+                        base_experiment_run_prompt_token_count
+                        > compare_experiment_run_prompt_token_count,
+                    ).label(
+                        f"comp_exp_{compare_experiment_index}_num_runs_decreased_prompt_token_count"
+                    ),
+                    _count_rows(
+                        base_experiment_run_prompt_token_count
+                        == compare_experiment_run_prompt_token_count,
+                    ).label(
+                        f"comp_exp_{compare_experiment_index}_num_runs_equal_prompt_token_count"
+                    ),
+                    _count_rows(
+                        base_experiment_run_completion_token_count
+                        < compare_experiment_run_completion_token_count,
+                    ).label(
+                        f"comp_exp_{compare_experiment_index}_num_runs_increased_completion_token_count"
+                    ),
+                    _count_rows(
+                        base_experiment_run_completion_token_count
+                        > compare_experiment_run_completion_token_count,
+                    ).label(
+                        f"comp_exp_{compare_experiment_index}_num_runs_decreased_completion_token_count"
+                    ),
+                    _count_rows(
+                        base_experiment_run_completion_token_count
+                        == compare_experiment_run_completion_token_count,
+                    ).label(
+                        f"comp_exp_{compare_experiment_index}_num_runs_equal_completion_token_count"
+                    ),
+                    _count_rows(
+                        base_experiment_run_total_token_count
+                        < compare_experiment_run_total_token_count,
+                    ).label(
+                        f"comp_exp_{compare_experiment_index}_num_runs_increased_total_token_count"
+                    ),
+                    _count_rows(
+                        base_experiment_run_total_token_count
+                        > compare_experiment_run_total_token_count,
+                    ).label(
+                        f"comp_exp_{compare_experiment_index}_num_runs_decreased_total_token_count"
+                    ),
+                    _count_rows(
+                        base_experiment_run_total_token_count
+                        == compare_experiment_run_total_token_count,
+                    ).label(
+                        f"comp_exp_{compare_experiment_index}_num_runs_equal_total_token_count"
+                    ),
+                    _count_rows(
+                        base_experiment_run_total_cost < compare_experiment_run_total_cost,
+                    ).label(f"comp_exp_{compare_experiment_index}_num_runs_increased_total_cost"),
+                    _count_rows(
+                        base_experiment_run_total_cost > compare_experiment_run_total_cost,
+                    ).label(f"comp_exp_{compare_experiment_index}_num_runs_decreased_total_cost"),
+                    _count_rows(
+                        base_experiment_run_total_cost == compare_experiment_run_total_cost,
+                    ).label(f"comp_exp_{compare_experiment_index}_num_runs_equal_total_cost"),
+                )
+                .join(
+                    compare_experiment_runs,
+                    onclause=base_experiment_runs.c.dataset_example_id
+                    == compare_experiment_runs.c.dataset_example_id,
+                    isouter=True,
+                )
+                .join(
+                    compare_experiment_traces,
+                    onclause=compare_experiment_runs.c.trace_id
+                    == compare_experiment_traces.trace_id,
+                    isouter=True,
+                )
+                .join(
+                    compare_experiment_span_costs,
+                    onclause=compare_experiment_traces.id
+                    == compare_experiment_span_costs.c.trace_rowid,
+                    isouter=True,
+                )
+            )
+        async with info.context.db() as session:
+            result = (await session.execute(query)).first()
+        assert result is not None
+        num_columns_per_compare_experiment = len(query.columns) // len(compare_experiment_ids)
+        counts = []
+        for compare_experiment_index, compare_experiment_id in enumerate(compare_experiment_ids):
+            start_index = compare_experiment_index * num_columns_per_compare_experiment
+            end_index = start_index + num_columns_per_compare_experiment
+            (
+                num_runs_with_increased_latency,
+                num_runs_with_decreased_latency,
+                num_runs_with_equal_latency,
+                num_runs_with_increased_prompt_token_count,
+                num_runs_with_decreased_prompt_token_count,
+                num_runs_with_equal_prompt_token_count,
+                num_runs_with_increased_completion_token_count,
+                num_runs_with_decreased_completion_token_count,
+                num_runs_with_equal_completion_token_count,
+                num_runs_with_increased_total_token_count,
+                num_runs_with_decreased_total_token_count,
+                num_runs_with_equal_total_token_count,
+                num_runs_with_increased_total_cost,
+                num_runs_with_decreased_total_cost,
+                num_runs_with_equal_total_cost,
+            ) = result[start_index:end_index]
+            counts.append(
+                CompareExperimentRunMetricCounts(
+                    compare_experiment_id=compare_experiment_id,
+                    latency=MetricCounts(
+                        num_increases=num_runs_with_increased_latency,
+                        num_decreases=num_runs_with_decreased_latency,
+                        num_equal=num_runs_with_equal_latency,
+                    ),
+                    prompt_token_count=MetricCounts(
+                        num_increases=num_runs_with_increased_prompt_token_count,
+                        num_decreases=num_runs_with_decreased_prompt_token_count,
+                        num_equal=num_runs_with_equal_prompt_token_count,
+                    ),
+                    completion_token_count=MetricCounts(
+                        num_increases=num_runs_with_increased_completion_token_count,
+                        num_decreases=num_runs_with_decreased_completion_token_count,
+                        num_equal=num_runs_with_equal_completion_token_count,
+                    ),
+                    total_token_count=MetricCounts(
+                        num_increases=num_runs_with_increased_total_token_count,
+                        num_decreases=num_runs_with_decreased_total_token_count,
+                        num_equal=num_runs_with_equal_total_token_count,
+                    ),
+                    total_cost=MetricCounts(
+                        num_increases=num_runs_with_increased_total_cost,
+                        num_decreases=num_runs_with_decreased_total_cost,
+                        num_equal=num_runs_with_equal_total_cost,
+                    ),
+                )
+            )
+        return counts
+    @strawberry.field
+    async def compare_experiment_run_annotation_metric_counts(
+        self,
+        info: Info[Context, None],
+        base_experiment_id: GlobalID,
+        compare_experiment_ids: list[GlobalID],
+    ) -> list[CompareExperimentRunAnnotationMetricCounts]:
+        if base_experiment_id in compare_experiment_ids:
+            raise BadRequest("Compare experiment IDs cannot contain the base experiment ID")
+        if not compare_experiment_ids:
+            raise BadRequest("At least one compare experiment ID must be provided")
+        if len(set(compare_experiment_ids)) < len(compare_experiment_ids):
+            raise BadRequest("Compare experiment IDs must be unique")
+        try:
+            base_experiment_rowid = from_global_id_with_expected_type(
+                base_experiment_id, models.Experiment.__name__
+            )
+        except ValueError:
+            raise BadRequest(f"Invalid base experiment ID: {base_experiment_id}")
+        compare_experiment_rowids = []
+        for compare_experiment_id in compare_experiment_ids:
+            try:
+                compare_experiment_rowids.append(
+                    from_global_id_with_expected_type(
+                        compare_experiment_id, models.Experiment.__name__
+                    )
+                )
+            except ValueError:
+                raise BadRequest(f"Invalid compare experiment ID: {compare_experiment_id}")
+        base_experiment_runs = (
+            select(models.ExperimentRun)
+            .where(
+                models.ExperimentRun.experiment_id == base_experiment_rowid,
+            )
+            .subquery()
+            .alias("base_experiment_runs")
+        )
+        base_experiment_run_annotations = aliased(
+            models.ExperimentRunAnnotation, name="base_experiment_run_annotations"
+        )
+        query = (
+            select(base_experiment_run_annotations.name)
+            .select_from(base_experiment_runs)
+            .join(
+                base_experiment_run_annotations,
+                onclause=base_experiment_runs.c.id
+                == base_experiment_run_annotations.experiment_run_id,
+                isouter=True,
+            )
+            .group_by(base_experiment_run_annotations.name)
+            .order_by(base_experiment_run_annotations.name)
+        )
+        for compare_experiment_index, compare_experiment_rowid in enumerate(
+            compare_experiment_rowids
+        ):
+            compare_experiment_runs = (
+                select(models.ExperimentRun)
+                .where(
+                    models.ExperimentRun.experiment_id == compare_experiment_rowid,
+                )
+                .subquery()
+                .alias(f"comp_exp_{compare_experiment_index}_runs")
+            )
+            compare_experiment_run_annotations = aliased(
+                models.ExperimentRunAnnotation,
+                name=f"comp_exp_{compare_experiment_index}_run_annotations",
+            )
+            query = (
+                query.add_columns(
+                    _count_rows(
+                        base_experiment_run_annotations.score
+                        < compare_experiment_run_annotations.score,
+                    ).label(f"comp_exp_{compare_experiment_index}_num_runs_increased_score"),
+                    _count_rows(
+                        base_experiment_run_annotations.score
+                        > compare_experiment_run_annotations.score,
+                    ).label(f"comp_exp_{compare_experiment_index}_num_runs_decreased_score"),
+                    _count_rows(
+                        base_experiment_run_annotations.score
+                        == compare_experiment_run_annotations.score,
+                    ).label(f"comp_exp_{compare_experiment_index}_num_runs_equal_score"),
+                )
+                .join(
+                    compare_experiment_runs,
+                    onclause=base_experiment_runs.c.dataset_example_id
+                    == compare_experiment_runs.c.dataset_example_id,
+                    isouter=True,
+                )
+                .join(
+                    compare_experiment_run_annotations,
+                    onclause=compare_experiment_runs.c.id
+                    == compare_experiment_run_annotations.experiment_run_id,
+                    isouter=True,
+                )
+                .where(
+                    base_experiment_run_annotations.name == compare_experiment_run_annotations.name
+                )
+            )
+        async with info.context.db() as session:
+            result = (await session.execute(query)).all()
+        assert result is not None
+        num_columns_per_compare_experiment = (len(query.columns) - 1) // len(compare_experiment_ids)
+        metric_counts = []
+        for record in result:
+            annotation_name, *counts = record
+            for compare_experiment_index, compare_experiment_id in enumerate(
+                compare_experiment_ids
+            ):
+                start_index = compare_experiment_index * num_columns_per_compare_experiment
+                end_index = start_index + num_columns_per_compare_experiment
+                (
+                    num_runs_with_increased_score,
+                    num_runs_with_decreased_score,
+                    num_runs_with_equal_score,
+                ) = counts[start_index:end_index]
+                metric_counts.append(
+                    CompareExperimentRunAnnotationMetricCounts(
+                        annotation_name=annotation_name,
+                        compare_experiment_id=compare_experiment_id,
+                        num_increases=num_runs_with_increased_score,
+                        num_decreases=num_runs_with_decreased_score,
+                        num_equal=num_runs_with_equal_score,
+                    )
+                )
+        return metric_counts
     @strawberry.field
     async def validate_experiment_run_filter_condition(
         self,
@@ -1106,3 +1536,20 @@ def _longest_matching_prefix(s: str, prefixes: Iterable[str]) -> str:
         if s.startswith(prefix) and len(prefix) > len(longest):
             longest = prefix
     return longest
+def _count_rows(
+    condition: ColumnElement[Any],
+) -> ColumnElement[Any]:
+    """
+    Returns an expression that counts the number of rows satisfying the condition.
+    """
+    return func.coalesce(
+        func.sum(
+            case(
+                (condition, 1),
+                else_=0,
+            )
+        ),
+        0,
+    )

phoenix/server/api/routers/v1/projects.py CHANGED Viewed

@@ -15,6 +15,7 @@ from strawberry.relay import GlobalID
 from phoenix.config import DEFAULT_PROJECT_NAME
 from phoenix.db import models
 from phoenix.db.helpers import exclude_experiment_projects
+from phoenix.db.models import UserRoleName
 from phoenix.server.api.routers.v1.models import V1RoutesBaseModel
 from phoenix.server.api.routers.v1.utils import (
     PaginatedResponseBody,
@@ -258,8 +259,8 @@ async def update_project(
                 .join(models.User)
                 .where(models.User.id == int(request.user.identity))
             )
-            role_name = await session.scalar(stmt)
-        if role_name != "ADMIN":
+            role_name: UserRoleName = await session.scalar(stmt)
+        if role_name != "ADMIN" and role_name != "SYSTEM":
             raise HTTPException(
                 status_code=HTTP_403_FORBIDDEN,
                 detail="Only admins can update projects",
@@ -318,8 +319,8 @@ async def delete_project(
                 .join(models.User)
                 .where(models.User.id == int(request.user.identity))
             )
-            role_name = await session.scalar(stmt)
-        if role_name != "ADMIN":
+            role_name: UserRoleName = await session.scalar(stmt)
+        if role_name != "ADMIN" and role_name != "SYSTEM":
             raise HTTPException(
                 status_code=HTTP_403_FORBIDDEN,
                 detail="Only admins can delete projects",

arize-phoenix 11.18.0__py3-none-any.whl → 11.20.0__py3-none-any.whl

Potentially problematic release.

arize-phoenix 11.18.0py3-none-any.whl → 11.20.0py3-none-any.whl