arize-phoenix 11.20.0__py3-none-any.whl → 11.21.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: arize-phoenix
3
- Version: 11.20.0
3
+ Version: 11.21.1
4
4
  Summary: AI Observability and Evaluation
5
5
  Project-URL: Documentation, https://arize.com/docs/phoenix/
6
6
  Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
@@ -167,7 +167,7 @@ Description-Content-Type: text/markdown
167
167
  <a target="_blank" href="https://github.com/Arize-ai/phoenix/tree/main/js/packages/phoenix-mcp">
168
168
  <img src="https://badge.mcpx.dev?status=on" title="MCP Enabled"/>
169
169
  </a>
170
- <a href="https://cursor.com/install-mcp?name=phoenix&config=eyJjb21tYW5kIjoibnB4IC15IEBhcml6ZWFpL3Bob2VuaXgtbWNwQGxhdGVzdCAtLWJhc2VVcmwgaHR0cDovL2xvY2FsaG9zdDo2MDA2IC0tYXBpS2V5IHlvdXItYXBpLWtleSJ9"><img src="https://cursor.com/deeplink/mcp-install-dark.svg" alt="Add Arize Phoenix MCP server to Cursor" height=20 /></a>
170
+ <a href="cursor://anysphere.cursor-deeplink/mcp/install?name=phoenix&config=eyJjb21tYW5kIjoibnB4IC15IEBhcml6ZWFpL3Bob2VuaXgtbWNwQGxhdGVzdCAtLWJhc2VVcmwgaHR0cHM6Ly9teS1waG9lbml4LmNvbSAtLWFwaUtleSB5b3VyLWFwaS1rZXkifQ%3D%3D"><img src="https://cursor.com/deeplink/mcp-install-dark.svg" alt="Add Arize Phoenix MCP server to Cursor" height=20 /></a>
171
171
  </p>
172
172
 
173
173
  Phoenix is an open-source AI observability platform designed for experimentation, evaluation, and troubleshooting. It provides:
@@ -254,14 +254,16 @@ Phoenix is built on top of OpenTelemetry and is vendor, language, and framework
254
254
  | Integration | Package | Version Badge |
255
255
  | --------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
256
256
  | [LangChain4j](https://github.com/Arize-ai/openinference/tree/main/java/instrumentation/openinference-instrumentation-langchain4j) | `openinference-instrumentation-langchain4j` | [![Maven Central](https://img.shields.io/maven-central/v/com.arize/openinference-instrumentation-langchain4j.svg)](https://central.sonatype.com/artifact/com.arize/openinference-instrumentation-langchain4j) |
257
+ | [SpringAI](https://central.sonatype.com/artifact/com.arize/openinference-instrumentation-springAI) | `openinference-instrumentation-springAI` | [![Maven Central](https://img.shields.io/maven-central/v/com.arize/openinference-instrumentation-springAI.svg)](https://central.sonatype.com/artifact/com.arize/openinference-instrumentation-springAI) |
257
258
 
258
259
  ### Platforms
259
260
 
260
261
  | Platform | Description | Docs |
261
262
  | -------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------- |
263
+ | [BeeAI](https://docs.beeai.dev/observability/agents-traceability) | AI agent framework with built-in observability | [Integration Guide](https://docs.beeai.dev/observability/agents-traceability) |
262
264
  | [Dify](https://docs.dify.ai/en/guides/monitoring/integrate-external-ops-tools/integrate-phoenix) | Open-source LLM app development platform | [Integration Guide](https://docs.dify.ai/en/guides/monitoring/integrate-external-ops-tools/integrate-phoenix) |
265
+ | [Envoy AI Gateway](https://github.com/envoyproxy/ai-gateway) | AI Gateway built on Envoy Proxy for AI workloads | [Integration Guide](https://github.com/envoyproxy/ai-gateway/tree/main/cmd/aigw#opentelemetry-setup-with-phoenix) |
263
266
  | [LangFlow](https://arize.com/docs/phoenix/tracing/integrations-tracing/langflow) | Visual framework for building multi-agent and RAG applications | [Integration Guide](https://arize.com/docs/phoenix/tracing/integrations-tracing/langflow) |
264
- | [BeeAI](https://docs.beeai.dev/observability/agents-traceability) | AI agent framework with built-in observability | [Integration Guide](https://docs.beeai.dev/observability/agents-traceability) |
265
267
  | [LiteLLM Proxy](https://docs.litellm.ai/docs/observability/phoenix_integration#using-with-litellm-proxy) | Proxy server for LLMs | [Integration Guide](https://docs.litellm.ai/docs/observability/phoenix_integration#using-with-litellm-proxy) |
266
268
 
267
269
  ## Community
@@ -6,7 +6,7 @@ phoenix/exceptions.py,sha256=n2L2KKuecrdflB9MsCdAYCiSEvGJptIsfRkXMoJle7A,169
6
6
  phoenix/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
7
7
  phoenix/services.py,sha256=ngkyKGVatX3cO2WJdo2hKdaVKP-xJCMvqthvga6kJss,5196
8
8
  phoenix/settings.py,sha256=2kHfT3BNOVd4dAO1bq-syEQbHSG8oX2-7NhOwK2QREk,896
9
- phoenix/version.py,sha256=PWj_8RsVSxALf302x1wXXyxFVSuiHNYoMU_ppot504s,24
9
+ phoenix/version.py,sha256=MKnna2l-ytcQjtXJcRSl2t1LDUmcw25A2XUiuU4Tln0,24
10
10
  phoenix/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  phoenix/core/embedding_dimension.py,sha256=zKGbcvwOXgLf-yrJBpQyKtd-LEOPRKHnUToyAU8Owis,87
12
12
  phoenix/core/model.py,sha256=qBFraOtmwCCnWJltKNP18DDG0mULXigytlFsa6YOz6k,4837
@@ -112,7 +112,7 @@ phoenix/server/api/auth.py,sha256=AyYhnZIbY9ALVjg2K6aC2UXSa3Pva5GVDBXyaZ3nD3o,27
112
112
  phoenix/server/api/context.py,sha256=mqsq_8Ru50e-PxKWNTzh9zptb1PFjYFUf58uW59UYL0,8996
113
113
  phoenix/server/api/exceptions.py,sha256=E2W0x63CBzc0CoQPptrLr9nZxPF9zIP8MCJ3RuJMddw,1322
114
114
  phoenix/server/api/interceptor.py,sha256=ykDnoC_apUd-llVli3m1CW18kNSIgjz2qZ6m5JmPDu8,1294
115
- phoenix/server/api/queries.py,sha256=RC-MS6x1gNqYhNQ6TnWA6zqDDnpiW4puKLBTrbtS8os,67102
115
+ phoenix/server/api/queries.py,sha256=rHV8gBqCwNjuDCtfKTfBcJB-eWxbJFivZzFV3mId1HY,67524
116
116
  phoenix/server/api/schema.py,sha256=fcs36xQwFF_Qe41_5cWR8wYpDvOrnbcyTeo5WNMbDsA,1702
117
117
  phoenix/server/api/subscriptions.py,sha256=U7JZl-FGfsaIhRkIFdeSQLqR7xCS7CY1h-21BOAcaqY,25439
118
118
  phoenix/server/api/utils.py,sha256=quCBRcusc6PUq9tJq7M8PgwFZp7nXgVAxtbw8feribY,833
@@ -176,11 +176,11 @@ phoenix/server/api/helpers/__init__.py,sha256=m2-xaSPqUiSs91k62JaRDjFNfl-1byxBfY
176
176
  phoenix/server/api/helpers/annotations.py,sha256=9gMXKpMTfWEChoSCnvdWYuyB0hlSnNOp-qUdar9Vono,262
177
177
  phoenix/server/api/helpers/dataset_helpers.py,sha256=3bdGBoUzqrtg-sr5p2wpQLOU6dhg_3TKFHNeJj8p0TU,9155
178
178
  phoenix/server/api/helpers/experiment_run_filters.py,sha256=DOnVwrmn39eAkk2mwuZP8kIcAnR5jrOgllEwWSjsw94,29893
179
- phoenix/server/api/helpers/playground_clients.py,sha256=3EoJx4bYgCl0LXQnL8lbYdTklk4TxGKbNYcRZFDYxrY,73652
179
+ phoenix/server/api/helpers/playground_clients.py,sha256=R_lXbm58ejnsdHMvlbHbrasC7XsaLfqh5pZoHaL3DSg,72129
180
180
  phoenix/server/api/helpers/playground_registry.py,sha256=n0v4-KnvZJxeaEwOla5qBbnOQjSWznKmMhZnh9ziJt0,2584
181
181
  phoenix/server/api/helpers/playground_spans.py,sha256=QpXwPl_fFNwm_iA1A77XApUyXMl1aDmonw8aXuNZ_4k,17132
182
182
  phoenix/server/api/helpers/prompts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
183
- phoenix/server/api/helpers/prompts/models.py,sha256=nlPtLZaGcHfWNRR0iNRaBUv8eoKOnoGqRm6zadrTt0I,23547
183
+ phoenix/server/api/helpers/prompts/models.py,sha256=pEPmloU27j5c4dM0dbDSAeIAzxpfsjPtjNRdF99mlhM,23558
184
184
  phoenix/server/api/helpers/prompts/conversions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
185
185
  phoenix/server/api/helpers/prompts/conversions/anthropic.py,sha256=ZT--UqBwoGf7QMusajB6aeB7zyWGttaZigb113kgiY8,3571
186
186
  phoenix/server/api/helpers/prompts/conversions/aws.py,sha256=6vaT8K13r0bMXB9XHA8qY1MCjVsmR9TO0VIwyBMjQoY,2941
@@ -220,7 +220,7 @@ phoenix/server/api/input_types/ProjectSessionSort.py,sha256=KZzEtOMpcxtP11brL4fX
220
220
  phoenix/server/api/input_types/ProjectSort.py,sha256=ZTT-InFB6NvInDwLuivyHe9PxR5nsmQ8aXCHAPjZm9k,329
221
221
  phoenix/server/api/input_types/PromptFilter.py,sha256=f2F7fDlYRsNJp_rKkmvpgUt9rfgr_e-dyZxuHX8YfkU,256
222
222
  phoenix/server/api/input_types/PromptTemplateOptions.py,sha256=8ZJdH1F9fExcdH9dF8SJ29WycCvtEpK-Z6dZwFO7KgQ,232
223
- phoenix/server/api/input_types/PromptVersionInput.py,sha256=n6zBeSkK8ZFRHTjtVx4BK--azZIxXeYETa6Cufcet2I,3743
223
+ phoenix/server/api/input_types/PromptVersionInput.py,sha256=6iFWf2Ye9K1dwL1810L-wUTLkMtOIHemJTxar4xF4kw,3911
224
224
  phoenix/server/api/input_types/SpanAnnotationFilter.py,sha256=-djfIXYCxV6sV3GPOZQUV0SPfiWDhRlTORfeQ7tCBgQ,2671
225
225
  phoenix/server/api/input_types/SpanAnnotationSort.py,sha256=T5pAGzmh4MiJp9JMAzNDByFVTczfw02FH4WFWwFezyI,361
226
226
  phoenix/server/api/input_types/SpanSort.py,sha256=GReQx9yOo0Kehi2y4AtY69aZhRtcqvcg-9bSIFru69U,7540
@@ -362,7 +362,7 @@ phoenix/server/cost_tracking/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
362
362
  phoenix/server/cost_tracking/cost_details_calculator.py,sha256=Tt0YcuLhgPuXKWJemWVmYQfG0xQUvH4VziIj6KcDnoA,8945
363
363
  phoenix/server/cost_tracking/cost_model_lookup.py,sha256=jhtVdnQBzrTUHeOGPWgOebk-Io5hpJ1vAgWOu8ojeJ4,6801
364
364
  phoenix/server/cost_tracking/helpers.py,sha256=Pk6ECjnYreTxrldtRwxnwFcxIPVsvDq_yAwDA_spkOc,2122
365
- phoenix/server/cost_tracking/model_cost_manifest.json,sha256=XKOgaRE9bq109MnopfyC6jFY7F4ZZbLFPRB2juLqfBU,57556
365
+ phoenix/server/cost_tracking/model_cost_manifest.json,sha256=kE8VrBbvdqDy1ijk8KWWs_76U-L_vcRUT5hVT418evY,63488
366
366
  phoenix/server/cost_tracking/regex_specificity.py,sha256=9kqWuQ68C-hlwW25hr7BhFlRt5y2Nnpy0Ax3n9UN6Xk,11622
367
367
  phoenix/server/cost_tracking/token_cost_calculator.py,sha256=2JEZnvusx2-xbhp8krp9EarjWuyGH2KO4e-ZwJX-K0s,1598
368
368
  phoenix/server/daemons/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -391,10 +391,10 @@ phoenix/server/static/apple-touch-icon-76x76.png,sha256=CT_xT12I0u2i0WU8JzBZBuOQ
391
391
  phoenix/server/static/apple-touch-icon.png,sha256=fOfpjqGpWYbJ0eAurKsyoZP1EAs6ZVooBJ_SGk2ZkDs,3801
392
392
  phoenix/server/static/favicon.ico,sha256=bY0vvCKRftemZfPShwZtE93DiiQdaYaozkPGwNFr6H8,34494
393
393
  phoenix/server/static/modernizr.js,sha256=mvK-XtkNqjOral-QvzoqsyOMECXIMu5BQwSVN_wcU9c,2564
394
- phoenix/server/static/.vite/manifest.json,sha256=VfeGEGEeidc2iRZCqPQ8Ae-XTEOyLCy0MQw8DsvFEc0,2328
395
- phoenix/server/static/assets/components-BNcxEjYs.js,sha256=1_lobSTCfokeUlK4ClEylCmZ4gMIpa_zCsgZYZDGF9w,658582
396
- phoenix/server/static/assets/index-CKIBKnVD.js,sha256=ig-C5VgtSHWxhrkqhMwj6k1WKTjCYXdYvqI30ajC3v0,63064
397
- phoenix/server/static/assets/pages-3RoC-adr.js,sha256=Y5BgOTRvJRpBKJJ4rBMi7iTM3TNRDFe_v0nlxUEpV-0,1224388
394
+ phoenix/server/static/.vite/manifest.json,sha256=42hM44kTkxtzKywj80vA_-KqJ9LJO9p6Xvi52T8UnGU,2328
395
+ phoenix/server/static/assets/components-BHdwFK1u.js,sha256=aWWK46DXOMuDUTYquIY07idjdtqJqdF_H8Kc7vcE0ZU,658573
396
+ phoenix/server/static/assets/index-BuFamj8g.js,sha256=ijIj6Frv0WbowI5EYmfJQibFOsFD102TOQ7o6q4QkFk,63122
397
+ phoenix/server/static/assets/pages-wNrdUuht.js,sha256=OCYvFNXL1V7CiMFoeszBhHtQJ7CZnfmXkwRl8vj-A3A,1225254
398
398
  phoenix/server/static/assets/vendor-BbqekBfb.js,sha256=8xINQdH4ikfrf8nr8mlO0B9YrKJ2FPecrA9qu5kPILo,2588857
399
399
  phoenix/server/static/assets/vendor-CqDb5u4o.css,sha256=zIyFiNJKxMaQk8AvtLgt1rR01oO10d1MFndSDKH9Clw,5517
400
400
  phoenix/server/static/assets/vendor-arizeai-CEwHhYfL.js,sha256=EIl1d9G6uPn7_Fc8YyAdxWmyV1Y7k1nN7VeJmI4MxtA,121514
@@ -441,9 +441,9 @@ phoenix/utilities/project.py,sha256=auVpARXkDb-JgeX5f2aStyFIkeKvGwN9l7qrFeJMVxI,
441
441
  phoenix/utilities/re.py,sha256=6YyUWIkv0zc2SigsxfOWIHzdpjKA_TZo2iqKq7zJKvw,2081
442
442
  phoenix/utilities/span_store.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
443
443
  phoenix/utilities/template_formatters.py,sha256=gh9PJD6WEGw7TEYXfSst1UR4pWWwmjxMLrDVQ_CkpkQ,2779
444
- arize_phoenix-11.20.0.dist-info/METADATA,sha256=_ihQiuoj-PL_R0L83OBu6UdGvzM6wkRmjFQKJbPaU2M,30950
445
- arize_phoenix-11.20.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
446
- arize_phoenix-11.20.0.dist-info/entry_points.txt,sha256=Pgpn8Upxx9P8z8joPXZWl2LlnAlGc3gcQoVchb06X1Q,94
447
- arize_phoenix-11.20.0.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
448
- arize_phoenix-11.20.0.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
449
- arize_phoenix-11.20.0.dist-info/RECORD,,
444
+ arize_phoenix-11.21.1.dist-info/METADATA,sha256=XaEthhkljvctwAGuXZs1mQwD6AXTMF8Jzx_ngrsjTQg,31612
445
+ arize_phoenix-11.21.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
446
+ arize_phoenix-11.21.1.dist-info/entry_points.txt,sha256=Pgpn8Upxx9P8z8joPXZWl2LlnAlGc3gcQoVchb06X1Q,94
447
+ arize_phoenix-11.21.1.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
448
+ arize_phoenix-11.21.1.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
449
+ arize_phoenix-11.21.1.dist-info/RECORD,,
@@ -1141,6 +1141,10 @@ class OpenAIStreamingClient(OpenAIBaseStreamingClient):
1141
1141
 
1142
1142
 
1143
1143
  _OPENAI_REASONING_MODELS = [
1144
+ "gpt-5",
1145
+ "gpt-5-mini",
1146
+ "gpt-5-nano",
1147
+ "gpt-5-chat-latest",
1144
1148
  "o1",
1145
1149
  "o1-pro",
1146
1150
  "o1-2024-12-17",
@@ -1201,50 +1205,6 @@ class OpenAIReasoningNonStreamingClient(
1201
1205
  OpenAIReasoningReasoningModelsMixin,
1202
1206
  OpenAIStreamingClient,
1203
1207
  ):
1204
- @override
1205
- async def chat_completion_create(
1206
- self,
1207
- messages: list[
1208
- tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
1209
- ],
1210
- tools: list[JSONScalarType],
1211
- **invocation_parameters: Any,
1212
- ) -> AsyncIterator[ChatCompletionChunk]:
1213
- from openai import NOT_GIVEN
1214
-
1215
- # Convert standard messages to OpenAI messages
1216
- openai_messages = []
1217
- for message in messages:
1218
- openai_message = self.to_openai_chat_completion_param(*message)
1219
- if openai_message is not None:
1220
- openai_messages.append(openai_message)
1221
-
1222
- throttled_create = self.rate_limiter._alimit(self.client.chat.completions.create)
1223
- response = await throttled_create(
1224
- messages=openai_messages,
1225
- model=self.model_name,
1226
- stream=False,
1227
- tools=tools or NOT_GIVEN,
1228
- **invocation_parameters,
1229
- )
1230
-
1231
- if response.usage is not None:
1232
- self._attributes.update(dict(self._llm_token_counts(response.usage)))
1233
-
1234
- choice = response.choices[0]
1235
- if choice.message.content:
1236
- yield TextChunk(content=choice.message.content)
1237
-
1238
- if choice.message.tool_calls:
1239
- for tool_call in choice.message.tool_calls:
1240
- yield ToolCallChunk(
1241
- id=tool_call.id,
1242
- function=FunctionCallChunk(
1243
- name=tool_call.function.name,
1244
- arguments=tool_call.function.arguments,
1245
- ),
1246
- )
1247
-
1248
1208
  def to_openai_chat_completion_param(
1249
1209
  self,
1250
1210
  role: ChatCompletionMessageRole,
@@ -329,7 +329,7 @@ class PromptOpenAIInvocationParametersContent(DBBaseModel):
329
329
  presence_penalty: float = UNDEFINED
330
330
  top_p: float = UNDEFINED
331
331
  seed: int = UNDEFINED
332
- reasoning_effort: Literal["low", "medium", "high"] = UNDEFINED
332
+ reasoning_effort: Literal["minimal", "low", "medium", "high"] = UNDEFINED
333
333
 
334
334
 
335
335
  class PromptOpenAIInvocationParameters(DBBaseModel):
@@ -83,6 +83,11 @@ class ChatPromptVersionInput:
83
83
  model_provider: ModelProvider
84
84
  model_name: str
85
85
 
86
+ def __post_init__(self) -> None:
87
+ self.invocation_parameters = {
88
+ k: v for k, v in self.invocation_parameters.items() if v is not None
89
+ }
90
+
86
91
 
87
92
  def to_pydantic_prompt_chat_template_v1(
88
93
  prompt_chat_template_input: PromptChatTemplateInput,
@@ -7,8 +7,8 @@ from typing import cast as type_cast
7
7
  import numpy as np
8
8
  import numpy.typing as npt
9
9
  import strawberry
10
- from sqlalchemy import ColumnElement, String, and_, case, cast, distinct, func, select, text
11
- from sqlalchemy.orm import aliased, joinedload
10
+ from sqlalchemy import ColumnElement, String, and_, case, cast, func, select, text
11
+ from sqlalchemy.orm import aliased, joinedload, load_only
12
12
  from starlette.authentication import UnauthenticatedUser
13
13
  from strawberry import ID, UNSET
14
14
  from strawberry.relay import Connection, GlobalID, Node
@@ -375,41 +375,52 @@ class Query:
375
375
  raise BadRequest("Compare experiment IDs cannot contain the base experiment ID")
376
376
  if len(set(compare_experiment_ids)) < len(compare_experiment_ids):
377
377
  raise BadRequest("Compare experiment IDs must be unique")
378
- experiment_ids = [
379
- from_global_id_with_expected_type(experiment_id, models.Experiment.__name__)
380
- for experiment_id in (base_experiment_id, *compare_experiment_ids)
381
- ]
378
+
379
+ try:
380
+ base_experiment_rowid = from_global_id_with_expected_type(
381
+ base_experiment_id, models.Experiment.__name__
382
+ )
383
+ except ValueError:
384
+ raise BadRequest(f"Invalid base experiment ID: {base_experiment_id}")
385
+
386
+ compare_experiment_rowids = []
387
+ for compare_experiment_id in compare_experiment_ids:
388
+ try:
389
+ compare_experiment_rowids.append(
390
+ from_global_id_with_expected_type(
391
+ compare_experiment_id, models.Experiment.__name__
392
+ )
393
+ )
394
+ except ValueError:
395
+ raise BadRequest(f"Invalid compare experiment ID: {compare_experiment_id}")
396
+
397
+ experiment_rowids = [base_experiment_rowid, *compare_experiment_rowids]
398
+
382
399
  cursor = Cursor.from_string(after) if after else None
383
400
  page_size = first or 50
384
401
 
385
402
  async with info.context.db() as session:
386
- validation_result = (
387
- await session.execute(
403
+ experiments = (
404
+ await session.scalars(
388
405
  select(
389
- func.count(distinct(models.DatasetVersion.dataset_id)),
390
- func.max(models.DatasetVersion.dataset_id),
391
- func.max(models.DatasetVersion.id),
392
- func.count(models.Experiment.id),
393
- )
394
- .select_from(models.DatasetVersion)
395
- .join(
396
406
  models.Experiment,
397
- models.Experiment.dataset_version_id == models.DatasetVersion.id,
398
407
  )
399
- .where(
400
- models.Experiment.id.in_(experiment_ids),
408
+ .where(models.Experiment.id.in_(experiment_rowids))
409
+ .options(
410
+ load_only(
411
+ models.Experiment.dataset_id, models.Experiment.dataset_version_id
412
+ )
401
413
  )
402
414
  )
403
- ).first()
404
- if validation_result is None:
405
- raise NotFound("No experiments could be found for input IDs.")
406
-
407
- num_datasets, dataset_id, version_id, num_resolved_experiment_ids = validation_result
408
- if num_datasets != 1:
409
- raise BadRequest("Experiments must belong to the same dataset.")
410
- if num_resolved_experiment_ids != len(experiment_ids):
415
+ ).all()
416
+ if not experiments or len(experiments) < len(experiment_rowids):
411
417
  raise NotFound("Unable to resolve one or more experiment IDs.")
412
-
418
+ num_datasets = len(set(experiment.dataset_id for experiment in experiments))
419
+ if num_datasets > 1:
420
+ raise BadRequest("Experiments must belong to the same dataset.")
421
+ base_experiment = next(
422
+ experiment for experiment in experiments if experiment.id == base_experiment_rowid
423
+ )
413
424
  revision_ids = (
414
425
  select(func.max(models.DatasetExampleRevision.id))
415
426
  .join(
@@ -418,8 +429,9 @@ class Query:
418
429
  )
419
430
  .where(
420
431
  and_(
421
- models.DatasetExampleRevision.dataset_version_id <= version_id,
422
- models.DatasetExample.dataset_id == dataset_id,
432
+ models.DatasetExampleRevision.dataset_version_id
433
+ <= base_experiment.dataset_version_id,
434
+ models.DatasetExample.dataset_id == base_experiment.dataset_id,
423
435
  )
424
436
  )
425
437
  .group_by(models.DatasetExampleRevision.dataset_example_id)
@@ -447,7 +459,7 @@ class Query:
447
459
  examples_query = update_examples_query_with_filter_condition(
448
460
  query=examples_query,
449
461
  filter_condition=filter_condition,
450
- experiment_ids=experiment_ids,
462
+ experiment_ids=experiment_rowids,
451
463
  )
452
464
 
453
465
  examples = (await session.scalars(examples_query)).all()
@@ -466,7 +478,7 @@ class Query:
466
478
  models.ExperimentRun.dataset_example_id.in_(
467
479
  example.id for example in examples
468
480
  ),
469
- models.ExperimentRun.experiment_id.in_(experiment_ids),
481
+ models.ExperimentRun.experiment_id.in_(experiment_rowids),
470
482
  )
471
483
  )
472
484
  .options(joinedload(models.ExperimentRun.trace).load_only(models.Trace.trace_id))
@@ -479,7 +491,7 @@ class Query:
479
491
  cursors_and_nodes = []
480
492
  for example in examples:
481
493
  run_comparison_items = []
482
- for experiment_id in experiment_ids:
494
+ for experiment_id in experiment_rowids:
483
495
  run_comparison_items.append(
484
496
  RunComparisonItem(
485
497
  experiment_id=GlobalID(Experiment.__name__, str(experiment_id)),
@@ -496,7 +508,7 @@ class Query:
496
508
  example=DatasetExample(
497
509
  id_attr=example.id,
498
510
  created_at=example.created_at,
499
- version_id=version_id,
511
+ version_id=base_experiment.dataset_version_id,
500
512
  ),
501
513
  run_comparison_items=run_comparison_items,
502
514
  )
@@ -1491,7 +1503,7 @@ class Query:
1491
1503
  return None
1492
1504
 
1493
1505
  @strawberry.field
1494
- async def get_project_session_by_otel_id(
1506
+ async def get_project_session_by_id(
1495
1507
  self,
1496
1508
  info: Info[Context, None],
1497
1509
  session_id: str,
@@ -341,6 +341,60 @@
341
341
  }
342
342
  ]
343
343
  },
344
+ {
345
+ "name": "claude-opus-4-1",
346
+ "name_pattern": "claude-opus-4-1",
347
+ "source": "litellm",
348
+ "token_prices": [
349
+ {
350
+ "base_rate": 0.000015,
351
+ "is_prompt": true,
352
+ "token_type": "input"
353
+ },
354
+ {
355
+ "base_rate": 0.000075,
356
+ "is_prompt": false,
357
+ "token_type": "output"
358
+ },
359
+ {
360
+ "base_rate": 1.5e-6,
361
+ "is_prompt": true,
362
+ "token_type": "cache_read"
363
+ },
364
+ {
365
+ "base_rate": 0.00001875,
366
+ "is_prompt": true,
367
+ "token_type": "cache_write"
368
+ }
369
+ ]
370
+ },
371
+ {
372
+ "name": "claude-opus-4-1-20250805",
373
+ "name_pattern": "claude-opus-4-1-20250805",
374
+ "source": "litellm",
375
+ "token_prices": [
376
+ {
377
+ "base_rate": 0.000015,
378
+ "is_prompt": true,
379
+ "token_type": "input"
380
+ },
381
+ {
382
+ "base_rate": 0.000075,
383
+ "is_prompt": false,
384
+ "token_type": "output"
385
+ },
386
+ {
387
+ "base_rate": 1.5e-6,
388
+ "is_prompt": true,
389
+ "token_type": "cache_read"
390
+ },
391
+ {
392
+ "base_rate": 0.00001875,
393
+ "is_prompt": true,
394
+ "token_type": "cache_write"
395
+ }
396
+ ]
397
+ },
344
398
  {
345
399
  "name": "claude-opus-4-20250514",
346
400
  "name_pattern": "claude-opus-4-20250514",
@@ -2078,6 +2132,182 @@
2078
2132
  }
2079
2133
  ]
2080
2134
  },
2135
+ {
2136
+ "name": "gpt-5",
2137
+ "name_pattern": "gpt-5",
2138
+ "source": "litellm",
2139
+ "token_prices": [
2140
+ {
2141
+ "base_rate": 1.25e-6,
2142
+ "is_prompt": true,
2143
+ "token_type": "input"
2144
+ },
2145
+ {
2146
+ "base_rate": 0.00001,
2147
+ "is_prompt": false,
2148
+ "token_type": "output"
2149
+ },
2150
+ {
2151
+ "base_rate": 1.25e-7,
2152
+ "is_prompt": true,
2153
+ "token_type": "cache_read"
2154
+ }
2155
+ ]
2156
+ },
2157
+ {
2158
+ "name": "gpt-5-2025-08-07",
2159
+ "name_pattern": "gpt-5-2025-08-07",
2160
+ "source": "litellm",
2161
+ "token_prices": [
2162
+ {
2163
+ "base_rate": 1.25e-6,
2164
+ "is_prompt": true,
2165
+ "token_type": "input"
2166
+ },
2167
+ {
2168
+ "base_rate": 0.00001,
2169
+ "is_prompt": false,
2170
+ "token_type": "output"
2171
+ },
2172
+ {
2173
+ "base_rate": 1.25e-7,
2174
+ "is_prompt": true,
2175
+ "token_type": "cache_read"
2176
+ }
2177
+ ]
2178
+ },
2179
+ {
2180
+ "name": "gpt-5-chat",
2181
+ "name_pattern": "gpt-5-chat",
2182
+ "source": "litellm",
2183
+ "token_prices": [
2184
+ {
2185
+ "base_rate": 1.25e-6,
2186
+ "is_prompt": true,
2187
+ "token_type": "input"
2188
+ },
2189
+ {
2190
+ "base_rate": 0.00001,
2191
+ "is_prompt": false,
2192
+ "token_type": "output"
2193
+ },
2194
+ {
2195
+ "base_rate": 1.25e-7,
2196
+ "is_prompt": true,
2197
+ "token_type": "cache_read"
2198
+ }
2199
+ ]
2200
+ },
2201
+ {
2202
+ "name": "gpt-5-chat-latest",
2203
+ "name_pattern": "gpt-5-chat-latest",
2204
+ "source": "litellm",
2205
+ "token_prices": [
2206
+ {
2207
+ "base_rate": 1.25e-6,
2208
+ "is_prompt": true,
2209
+ "token_type": "input"
2210
+ },
2211
+ {
2212
+ "base_rate": 0.00001,
2213
+ "is_prompt": false,
2214
+ "token_type": "output"
2215
+ },
2216
+ {
2217
+ "base_rate": 1.25e-7,
2218
+ "is_prompt": true,
2219
+ "token_type": "cache_read"
2220
+ }
2221
+ ]
2222
+ },
2223
+ {
2224
+ "name": "gpt-5-mini",
2225
+ "name_pattern": "gpt-5-mini",
2226
+ "source": "litellm",
2227
+ "token_prices": [
2228
+ {
2229
+ "base_rate": 2.5e-7,
2230
+ "is_prompt": true,
2231
+ "token_type": "input"
2232
+ },
2233
+ {
2234
+ "base_rate": 2e-6,
2235
+ "is_prompt": false,
2236
+ "token_type": "output"
2237
+ },
2238
+ {
2239
+ "base_rate": 2.5e-8,
2240
+ "is_prompt": true,
2241
+ "token_type": "cache_read"
2242
+ }
2243
+ ]
2244
+ },
2245
+ {
2246
+ "name": "gpt-5-mini-2025-08-07",
2247
+ "name_pattern": "gpt-5-mini-2025-08-07",
2248
+ "source": "litellm",
2249
+ "token_prices": [
2250
+ {
2251
+ "base_rate": 2.5e-7,
2252
+ "is_prompt": true,
2253
+ "token_type": "input"
2254
+ },
2255
+ {
2256
+ "base_rate": 2e-6,
2257
+ "is_prompt": false,
2258
+ "token_type": "output"
2259
+ },
2260
+ {
2261
+ "base_rate": 2.5e-8,
2262
+ "is_prompt": true,
2263
+ "token_type": "cache_read"
2264
+ }
2265
+ ]
2266
+ },
2267
+ {
2268
+ "name": "gpt-5-nano",
2269
+ "name_pattern": "gpt-5-nano",
2270
+ "source": "litellm",
2271
+ "token_prices": [
2272
+ {
2273
+ "base_rate": 5e-8,
2274
+ "is_prompt": true,
2275
+ "token_type": "input"
2276
+ },
2277
+ {
2278
+ "base_rate": 4e-7,
2279
+ "is_prompt": false,
2280
+ "token_type": "output"
2281
+ },
2282
+ {
2283
+ "base_rate": 5e-9,
2284
+ "is_prompt": true,
2285
+ "token_type": "cache_read"
2286
+ }
2287
+ ]
2288
+ },
2289
+ {
2290
+ "name": "gpt-5-nano-2025-08-07",
2291
+ "name_pattern": "gpt-5-nano-2025-08-07",
2292
+ "source": "litellm",
2293
+ "token_prices": [
2294
+ {
2295
+ "base_rate": 5e-8,
2296
+ "is_prompt": true,
2297
+ "token_type": "input"
2298
+ },
2299
+ {
2300
+ "base_rate": 4e-7,
2301
+ "is_prompt": false,
2302
+ "token_type": "output"
2303
+ },
2304
+ {
2305
+ "base_rate": 5e-9,
2306
+ "is_prompt": true,
2307
+ "token_type": "cache_read"
2308
+ }
2309
+ ]
2310
+ },
2081
2311
  {
2082
2312
  "name": "o1",
2083
2313
  "name_pattern": "o1",
@@ -2497,6 +2727,40 @@
2497
2727
  "token_type": "cache_read"
2498
2728
  }
2499
2729
  ]
2730
+ },
2731
+ {
2732
+ "name": "openai.gpt-oss-120b-1:0",
2733
+ "name_pattern": "openai\\.gpt-oss-120b-1:0",
2734
+ "source": "litellm",
2735
+ "token_prices": [
2736
+ {
2737
+ "base_rate": 1.5e-7,
2738
+ "is_prompt": true,
2739
+ "token_type": "input"
2740
+ },
2741
+ {
2742
+ "base_rate": 6e-7,
2743
+ "is_prompt": false,
2744
+ "token_type": "output"
2745
+ }
2746
+ ]
2747
+ },
2748
+ {
2749
+ "name": "openai.gpt-oss-20b-1:0",
2750
+ "name_pattern": "openai\\.gpt-oss-20b-1:0",
2751
+ "source": "litellm",
2752
+ "token_prices": [
2753
+ {
2754
+ "base_rate": 7e-8,
2755
+ "is_prompt": true,
2756
+ "token_type": "input"
2757
+ },
2758
+ {
2759
+ "base_rate": 3e-7,
2760
+ "is_prompt": false,
2761
+ "token_type": "output"
2762
+ }
2763
+ ]
2500
2764
  }
2501
2765
  ]
2502
2766
  }