arize-phoenix 10.0.4__py3-none-any.whl → 12.28.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/METADATA +124 -72
  2. arize_phoenix-12.28.1.dist-info/RECORD +499 -0
  3. {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/WHEEL +1 -1
  4. {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/licenses/IP_NOTICE +1 -1
  5. phoenix/__generated__/__init__.py +0 -0
  6. phoenix/__generated__/classification_evaluator_configs/__init__.py +20 -0
  7. phoenix/__generated__/classification_evaluator_configs/_document_relevance_classification_evaluator_config.py +17 -0
  8. phoenix/__generated__/classification_evaluator_configs/_hallucination_classification_evaluator_config.py +17 -0
  9. phoenix/__generated__/classification_evaluator_configs/_models.py +18 -0
  10. phoenix/__generated__/classification_evaluator_configs/_tool_selection_classification_evaluator_config.py +17 -0
  11. phoenix/__init__.py +5 -4
  12. phoenix/auth.py +39 -2
  13. phoenix/config.py +1763 -91
  14. phoenix/datetime_utils.py +120 -2
  15. phoenix/db/README.md +595 -25
  16. phoenix/db/bulk_inserter.py +145 -103
  17. phoenix/db/engines.py +140 -33
  18. phoenix/db/enums.py +3 -12
  19. phoenix/db/facilitator.py +302 -35
  20. phoenix/db/helpers.py +1000 -65
  21. phoenix/db/iam_auth.py +64 -0
  22. phoenix/db/insertion/dataset.py +135 -2
  23. phoenix/db/insertion/document_annotation.py +9 -6
  24. phoenix/db/insertion/evaluation.py +2 -3
  25. phoenix/db/insertion/helpers.py +17 -2
  26. phoenix/db/insertion/session_annotation.py +176 -0
  27. phoenix/db/insertion/span.py +15 -11
  28. phoenix/db/insertion/span_annotation.py +3 -4
  29. phoenix/db/insertion/trace_annotation.py +3 -4
  30. phoenix/db/insertion/types.py +50 -20
  31. phoenix/db/migrations/versions/01a8342c9cdf_add_user_id_on_datasets.py +40 -0
  32. phoenix/db/migrations/versions/0df286449799_add_session_annotations_table.py +105 -0
  33. phoenix/db/migrations/versions/272b66ff50f8_drop_single_indices.py +119 -0
  34. phoenix/db/migrations/versions/58228d933c91_dataset_labels.py +67 -0
  35. phoenix/db/migrations/versions/699f655af132_experiment_tags.py +57 -0
  36. phoenix/db/migrations/versions/735d3d93c33e_add_composite_indices.py +41 -0
  37. phoenix/db/migrations/versions/a20694b15f82_cost.py +196 -0
  38. phoenix/db/migrations/versions/ab513d89518b_add_user_id_on_dataset_versions.py +40 -0
  39. phoenix/db/migrations/versions/d0690a79ea51_users_on_experiments.py +40 -0
  40. phoenix/db/migrations/versions/deb2c81c0bb2_dataset_splits.py +139 -0
  41. phoenix/db/migrations/versions/e76cbd66ffc3_add_experiments_dataset_examples.py +87 -0
  42. phoenix/db/models.py +669 -56
  43. phoenix/db/pg_config.py +10 -0
  44. phoenix/db/types/model_provider.py +4 -0
  45. phoenix/db/types/token_price_customization.py +29 -0
  46. phoenix/db/types/trace_retention.py +23 -15
  47. phoenix/experiments/evaluators/utils.py +3 -3
  48. phoenix/experiments/functions.py +160 -52
  49. phoenix/experiments/tracing.py +2 -2
  50. phoenix/experiments/types.py +1 -1
  51. phoenix/inferences/inferences.py +1 -2
  52. phoenix/server/api/auth.py +38 -7
  53. phoenix/server/api/auth_messages.py +46 -0
  54. phoenix/server/api/context.py +100 -4
  55. phoenix/server/api/dataloaders/__init__.py +79 -5
  56. phoenix/server/api/dataloaders/annotation_configs_by_project.py +31 -0
  57. phoenix/server/api/dataloaders/annotation_summaries.py +60 -8
  58. phoenix/server/api/dataloaders/average_experiment_repeated_run_group_latency.py +50 -0
  59. phoenix/server/api/dataloaders/average_experiment_run_latency.py +17 -24
  60. phoenix/server/api/dataloaders/cache/two_tier_cache.py +1 -2
  61. phoenix/server/api/dataloaders/dataset_dataset_splits.py +52 -0
  62. phoenix/server/api/dataloaders/dataset_example_revisions.py +0 -1
  63. phoenix/server/api/dataloaders/dataset_example_splits.py +40 -0
  64. phoenix/server/api/dataloaders/dataset_examples_and_versions_by_experiment_run.py +47 -0
  65. phoenix/server/api/dataloaders/dataset_labels.py +36 -0
  66. phoenix/server/api/dataloaders/document_evaluation_summaries.py +2 -2
  67. phoenix/server/api/dataloaders/document_evaluations.py +6 -9
  68. phoenix/server/api/dataloaders/experiment_annotation_summaries.py +88 -34
  69. phoenix/server/api/dataloaders/experiment_dataset_splits.py +43 -0
  70. phoenix/server/api/dataloaders/experiment_error_rates.py +21 -28
  71. phoenix/server/api/dataloaders/experiment_repeated_run_group_annotation_summaries.py +77 -0
  72. phoenix/server/api/dataloaders/experiment_repeated_run_groups.py +57 -0
  73. phoenix/server/api/dataloaders/experiment_runs_by_experiment_and_example.py +44 -0
  74. phoenix/server/api/dataloaders/last_used_times_by_generative_model_id.py +35 -0
  75. phoenix/server/api/dataloaders/latency_ms_quantile.py +40 -8
  76. phoenix/server/api/dataloaders/record_counts.py +37 -10
  77. phoenix/server/api/dataloaders/session_annotations_by_session.py +29 -0
  78. phoenix/server/api/dataloaders/span_cost_by_span.py +24 -0
  79. phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_generative_model.py +56 -0
  80. phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_project_session.py +57 -0
  81. phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_span.py +43 -0
  82. phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_trace.py +56 -0
  83. phoenix/server/api/dataloaders/span_cost_details_by_span_cost.py +27 -0
  84. phoenix/server/api/dataloaders/span_cost_summary_by_experiment.py +57 -0
  85. phoenix/server/api/dataloaders/span_cost_summary_by_experiment_repeated_run_group.py +64 -0
  86. phoenix/server/api/dataloaders/span_cost_summary_by_experiment_run.py +58 -0
  87. phoenix/server/api/dataloaders/span_cost_summary_by_generative_model.py +55 -0
  88. phoenix/server/api/dataloaders/span_cost_summary_by_project.py +152 -0
  89. phoenix/server/api/dataloaders/span_cost_summary_by_project_session.py +56 -0
  90. phoenix/server/api/dataloaders/span_cost_summary_by_trace.py +55 -0
  91. phoenix/server/api/dataloaders/span_costs.py +29 -0
  92. phoenix/server/api/dataloaders/table_fields.py +2 -2
  93. phoenix/server/api/dataloaders/token_prices_by_model.py +30 -0
  94. phoenix/server/api/dataloaders/trace_annotations_by_trace.py +27 -0
  95. phoenix/server/api/dataloaders/types.py +29 -0
  96. phoenix/server/api/exceptions.py +11 -1
  97. phoenix/server/api/helpers/dataset_helpers.py +5 -1
  98. phoenix/server/api/helpers/playground_clients.py +1243 -292
  99. phoenix/server/api/helpers/playground_registry.py +2 -2
  100. phoenix/server/api/helpers/playground_spans.py +8 -4
  101. phoenix/server/api/helpers/playground_users.py +26 -0
  102. phoenix/server/api/helpers/prompts/conversions/aws.py +83 -0
  103. phoenix/server/api/helpers/prompts/conversions/google.py +103 -0
  104. phoenix/server/api/helpers/prompts/models.py +205 -22
  105. phoenix/server/api/input_types/{SpanAnnotationFilter.py → AnnotationFilter.py} +22 -14
  106. phoenix/server/api/input_types/ChatCompletionInput.py +6 -2
  107. phoenix/server/api/input_types/CreateProjectInput.py +27 -0
  108. phoenix/server/api/input_types/CreateProjectSessionAnnotationInput.py +37 -0
  109. phoenix/server/api/input_types/DatasetFilter.py +17 -0
  110. phoenix/server/api/input_types/ExperimentRunSort.py +237 -0
  111. phoenix/server/api/input_types/GenerativeCredentialInput.py +9 -0
  112. phoenix/server/api/input_types/GenerativeModelInput.py +5 -0
  113. phoenix/server/api/input_types/ProjectSessionSort.py +161 -1
  114. phoenix/server/api/input_types/PromptFilter.py +14 -0
  115. phoenix/server/api/input_types/PromptVersionInput.py +52 -1
  116. phoenix/server/api/input_types/SpanSort.py +44 -7
  117. phoenix/server/api/input_types/TimeBinConfig.py +23 -0
  118. phoenix/server/api/input_types/UpdateAnnotationInput.py +34 -0
  119. phoenix/server/api/input_types/UserRoleInput.py +1 -0
  120. phoenix/server/api/mutations/__init__.py +10 -0
  121. phoenix/server/api/mutations/annotation_config_mutations.py +8 -8
  122. phoenix/server/api/mutations/api_key_mutations.py +19 -23
  123. phoenix/server/api/mutations/chat_mutations.py +154 -47
  124. phoenix/server/api/mutations/dataset_label_mutations.py +243 -0
  125. phoenix/server/api/mutations/dataset_mutations.py +21 -16
  126. phoenix/server/api/mutations/dataset_split_mutations.py +351 -0
  127. phoenix/server/api/mutations/experiment_mutations.py +2 -2
  128. phoenix/server/api/mutations/export_events_mutations.py +3 -3
  129. phoenix/server/api/mutations/model_mutations.py +210 -0
  130. phoenix/server/api/mutations/project_mutations.py +49 -10
  131. phoenix/server/api/mutations/project_session_annotations_mutations.py +158 -0
  132. phoenix/server/api/mutations/project_trace_retention_policy_mutations.py +8 -4
  133. phoenix/server/api/mutations/prompt_label_mutations.py +74 -65
  134. phoenix/server/api/mutations/prompt_mutations.py +65 -129
  135. phoenix/server/api/mutations/prompt_version_tag_mutations.py +11 -8
  136. phoenix/server/api/mutations/span_annotations_mutations.py +15 -10
  137. phoenix/server/api/mutations/trace_annotations_mutations.py +14 -10
  138. phoenix/server/api/mutations/trace_mutations.py +47 -3
  139. phoenix/server/api/mutations/user_mutations.py +66 -41
  140. phoenix/server/api/queries.py +768 -293
  141. phoenix/server/api/routers/__init__.py +2 -2
  142. phoenix/server/api/routers/auth.py +154 -88
  143. phoenix/server/api/routers/ldap.py +229 -0
  144. phoenix/server/api/routers/oauth2.py +369 -106
  145. phoenix/server/api/routers/v1/__init__.py +24 -4
  146. phoenix/server/api/routers/v1/annotation_configs.py +23 -31
  147. phoenix/server/api/routers/v1/annotations.py +481 -17
  148. phoenix/server/api/routers/v1/datasets.py +395 -81
  149. phoenix/server/api/routers/v1/documents.py +142 -0
  150. phoenix/server/api/routers/v1/evaluations.py +24 -31
  151. phoenix/server/api/routers/v1/experiment_evaluations.py +19 -8
  152. phoenix/server/api/routers/v1/experiment_runs.py +337 -59
  153. phoenix/server/api/routers/v1/experiments.py +479 -48
  154. phoenix/server/api/routers/v1/models.py +7 -0
  155. phoenix/server/api/routers/v1/projects.py +18 -49
  156. phoenix/server/api/routers/v1/prompts.py +54 -40
  157. phoenix/server/api/routers/v1/sessions.py +108 -0
  158. phoenix/server/api/routers/v1/spans.py +1091 -81
  159. phoenix/server/api/routers/v1/traces.py +132 -78
  160. phoenix/server/api/routers/v1/users.py +389 -0
  161. phoenix/server/api/routers/v1/utils.py +3 -7
  162. phoenix/server/api/subscriptions.py +305 -88
  163. phoenix/server/api/types/Annotation.py +90 -23
  164. phoenix/server/api/types/ApiKey.py +13 -17
  165. phoenix/server/api/types/AuthMethod.py +1 -0
  166. phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +1 -0
  167. phoenix/server/api/types/CostBreakdown.py +12 -0
  168. phoenix/server/api/types/Dataset.py +226 -72
  169. phoenix/server/api/types/DatasetExample.py +88 -18
  170. phoenix/server/api/types/DatasetExperimentAnnotationSummary.py +10 -0
  171. phoenix/server/api/types/DatasetLabel.py +57 -0
  172. phoenix/server/api/types/DatasetSplit.py +98 -0
  173. phoenix/server/api/types/DatasetVersion.py +49 -4
  174. phoenix/server/api/types/DocumentAnnotation.py +212 -0
  175. phoenix/server/api/types/Experiment.py +264 -59
  176. phoenix/server/api/types/ExperimentComparison.py +5 -10
  177. phoenix/server/api/types/ExperimentRepeatedRunGroup.py +155 -0
  178. phoenix/server/api/types/ExperimentRepeatedRunGroupAnnotationSummary.py +9 -0
  179. phoenix/server/api/types/ExperimentRun.py +169 -65
  180. phoenix/server/api/types/ExperimentRunAnnotation.py +158 -39
  181. phoenix/server/api/types/GenerativeModel.py +245 -3
  182. phoenix/server/api/types/GenerativeProvider.py +70 -11
  183. phoenix/server/api/types/{Model.py → InferenceModel.py} +1 -1
  184. phoenix/server/api/types/ModelInterface.py +16 -0
  185. phoenix/server/api/types/PlaygroundModel.py +20 -0
  186. phoenix/server/api/types/Project.py +1278 -216
  187. phoenix/server/api/types/ProjectSession.py +188 -28
  188. phoenix/server/api/types/ProjectSessionAnnotation.py +187 -0
  189. phoenix/server/api/types/ProjectTraceRetentionPolicy.py +1 -1
  190. phoenix/server/api/types/Prompt.py +119 -39
  191. phoenix/server/api/types/PromptLabel.py +42 -25
  192. phoenix/server/api/types/PromptVersion.py +11 -8
  193. phoenix/server/api/types/PromptVersionTag.py +65 -25
  194. phoenix/server/api/types/ServerStatus.py +6 -0
  195. phoenix/server/api/types/Span.py +167 -123
  196. phoenix/server/api/types/SpanAnnotation.py +189 -42
  197. phoenix/server/api/types/SpanCostDetailSummaryEntry.py +10 -0
  198. phoenix/server/api/types/SpanCostSummary.py +10 -0
  199. phoenix/server/api/types/SystemApiKey.py +65 -1
  200. phoenix/server/api/types/TokenPrice.py +16 -0
  201. phoenix/server/api/types/TokenUsage.py +3 -3
  202. phoenix/server/api/types/Trace.py +223 -51
  203. phoenix/server/api/types/TraceAnnotation.py +149 -50
  204. phoenix/server/api/types/User.py +137 -32
  205. phoenix/server/api/types/UserApiKey.py +73 -26
  206. phoenix/server/api/types/node.py +10 -0
  207. phoenix/server/api/types/pagination.py +11 -2
  208. phoenix/server/app.py +290 -45
  209. phoenix/server/authorization.py +38 -3
  210. phoenix/server/bearer_auth.py +34 -24
  211. phoenix/server/cost_tracking/cost_details_calculator.py +196 -0
  212. phoenix/server/cost_tracking/cost_model_lookup.py +179 -0
  213. phoenix/server/cost_tracking/helpers.py +68 -0
  214. phoenix/server/cost_tracking/model_cost_manifest.json +3657 -830
  215. phoenix/server/cost_tracking/regex_specificity.py +397 -0
  216. phoenix/server/cost_tracking/token_cost_calculator.py +57 -0
  217. phoenix/server/daemons/__init__.py +0 -0
  218. phoenix/server/daemons/db_disk_usage_monitor.py +214 -0
  219. phoenix/server/daemons/generative_model_store.py +103 -0
  220. phoenix/server/daemons/span_cost_calculator.py +99 -0
  221. phoenix/server/dml_event.py +17 -0
  222. phoenix/server/dml_event_handler.py +5 -0
  223. phoenix/server/email/sender.py +56 -3
  224. phoenix/server/email/templates/db_disk_usage_notification.html +19 -0
  225. phoenix/server/email/types.py +11 -0
  226. phoenix/server/experiments/__init__.py +0 -0
  227. phoenix/server/experiments/utils.py +14 -0
  228. phoenix/server/grpc_server.py +11 -11
  229. phoenix/server/jwt_store.py +17 -15
  230. phoenix/server/ldap.py +1449 -0
  231. phoenix/server/main.py +26 -10
  232. phoenix/server/oauth2.py +330 -12
  233. phoenix/server/prometheus.py +66 -6
  234. phoenix/server/rate_limiters.py +4 -9
  235. phoenix/server/retention.py +33 -20
  236. phoenix/server/session_filters.py +49 -0
  237. phoenix/server/static/.vite/manifest.json +55 -51
  238. phoenix/server/static/assets/components-BreFUQQa.js +6702 -0
  239. phoenix/server/static/assets/{index-E0M82BdE.js → index-CTQoemZv.js} +140 -56
  240. phoenix/server/static/assets/pages-DBE5iYM3.js +9524 -0
  241. phoenix/server/static/assets/vendor-BGzfc4EU.css +1 -0
  242. phoenix/server/static/assets/vendor-DCE4v-Ot.js +920 -0
  243. phoenix/server/static/assets/vendor-codemirror-D5f205eT.js +25 -0
  244. phoenix/server/static/assets/vendor-recharts-V9cwpXsm.js +37 -0
  245. phoenix/server/static/assets/vendor-shiki-Do--csgv.js +5 -0
  246. phoenix/server/static/assets/vendor-three-CmB8bl_y.js +3840 -0
  247. phoenix/server/templates/index.html +40 -6
  248. phoenix/server/thread_server.py +1 -2
  249. phoenix/server/types.py +14 -4
  250. phoenix/server/utils.py +74 -0
  251. phoenix/session/client.py +56 -3
  252. phoenix/session/data_extractor.py +5 -0
  253. phoenix/session/evaluation.py +14 -5
  254. phoenix/session/session.py +45 -9
  255. phoenix/settings.py +5 -0
  256. phoenix/trace/attributes.py +80 -13
  257. phoenix/trace/dsl/helpers.py +90 -1
  258. phoenix/trace/dsl/query.py +8 -6
  259. phoenix/trace/projects.py +5 -0
  260. phoenix/utilities/template_formatters.py +1 -1
  261. phoenix/version.py +1 -1
  262. arize_phoenix-10.0.4.dist-info/RECORD +0 -405
  263. phoenix/server/api/types/Evaluation.py +0 -39
  264. phoenix/server/cost_tracking/cost_lookup.py +0 -255
  265. phoenix/server/static/assets/components-DULKeDfL.js +0 -4365
  266. phoenix/server/static/assets/pages-Cl0A-0U2.js +0 -7430
  267. phoenix/server/static/assets/vendor-WIZid84E.css +0 -1
  268. phoenix/server/static/assets/vendor-arizeai-Dy-0mSNw.js +0 -649
  269. phoenix/server/static/assets/vendor-codemirror-DBtifKNr.js +0 -33
  270. phoenix/server/static/assets/vendor-oB4u9zuV.js +0 -905
  271. phoenix/server/static/assets/vendor-recharts-D-T4KPz2.js +0 -59
  272. phoenix/server/static/assets/vendor-shiki-BMn4O_9F.js +0 -5
  273. phoenix/server/static/assets/vendor-three-C5WAXd5r.js +0 -2998
  274. phoenix/utilities/deprecation.py +0 -31
  275. {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/entry_points.txt +0 -0
  276. {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/licenses/LICENSE +0 -0
phoenix/db/pg_config.py CHANGED
@@ -10,12 +10,14 @@ from typing_extensions import assert_never
10
10
  def get_pg_config(
11
11
  url: URL,
12
12
  driver: Literal["psycopg", "asyncpg"],
13
+ enforce_ssl: bool = False,
13
14
  ) -> tuple[URL, dict[str, Any]]:
14
15
  """Convert SQLAlchemy URL to driver-specific configuration.
15
16
 
16
17
  Args:
17
18
  url: SQLAlchemy URL
18
19
  driver: "psycopg" or "asyncpg"
20
+ enforce_ssl: If True, ensure SSL is enabled (required for AWS RDS IAM auth)
19
21
 
20
22
  Returns:
21
23
  Tuple of (base_url, connect_args):
@@ -26,6 +28,14 @@ def get_pg_config(
26
28
  query = url.query
27
29
  ssl_args = _get_ssl_args(query)
28
30
 
31
+ if enforce_ssl and not ssl_args:
32
+ ssl_args = {"sslmode": "require"}
33
+ elif enforce_ssl and ssl_args.get("sslmode") == "disable":
34
+ raise ValueError(
35
+ "SSL cannot be disabled when using AWS RDS IAM authentication. "
36
+ "Remove 'sslmode=disable' from the connection string."
37
+ )
38
+
29
39
  # Create base URL without SSL parameters
30
40
  base_url = url.set(
31
41
  drivername=f"postgresql+{driver}",
@@ -6,3 +6,7 @@ class ModelProvider(Enum):
6
6
  AZURE_OPENAI = "AZURE_OPENAI"
7
7
  ANTHROPIC = "ANTHROPIC"
8
8
  GOOGLE = "GOOGLE"
9
+ DEEPSEEK = "DEEPSEEK"
10
+ XAI = "XAI"
11
+ OLLAMA = "OLLAMA"
12
+ AWS = "AWS"
@@ -0,0 +1,29 @@
1
+ from abc import ABC
2
+ from typing import Any, Literal, Optional
3
+
4
+ from pydantic import BaseModel, ValidationError
5
+
6
+
7
+ class TokenPriceCustomization(BaseModel, ABC):
8
+ model_config = {"extra": "allow"}
9
+
10
+
11
+ class ThresholdBasedTokenPriceCustomization(TokenPriceCustomization):
12
+ type: Literal["threshold_based"] = "threshold_based"
13
+ key: str
14
+ threshold: float
15
+ new_rate: float
16
+
17
+
18
+ class TokenPriceCustomizationParser:
19
+ """Intended to be forward-compatible while maintaining the ability to round-trip."""
20
+
21
+ @staticmethod
22
+ def parse(data: Optional[dict[str, Any]]) -> Optional[TokenPriceCustomization]:
23
+ if not data:
24
+ return None
25
+ try:
26
+ return ThresholdBasedTokenPriceCustomization.model_validate(data)
27
+ except ValidationError:
28
+ pass
29
+ return TokenPriceCustomization.model_validate(data)
@@ -5,7 +5,9 @@ from typing import Annotated, Iterable, Literal, Optional, Union
5
5
 
6
6
  import sqlalchemy as sa
7
7
  from pydantic import AfterValidator, BaseModel, Field, RootModel
8
+ from sqlalchemy import func
8
9
  from sqlalchemy.ext.asyncio import AsyncSession
10
+ from sqlalchemy.sql.roles import InElementRole
9
11
 
10
12
  from phoenix.utilities import hour_of_week
11
13
 
@@ -25,19 +27,25 @@ class _MaxDays(BaseModel):
25
27
  class _MaxCount(BaseModel):
26
28
  max_count: Annotated[int, Field(ge=0)]
27
29
 
28
- @property
29
- def max_count_filter(self) -> sa.ColumnElement[bool]:
30
+ def max_count_filter(
31
+ self,
32
+ project_rowids: Union[Iterable[int], InElementRole],
33
+ ) -> sa.ColumnElement[bool]:
30
34
  if self.max_count <= 0:
31
35
  return sa.literal(False)
32
36
  from phoenix.db.models import Trace
33
37
 
34
- return Trace.start_time < (
35
- sa.select(Trace.start_time)
36
- .order_by(Trace.start_time.desc())
37
- .offset(self.max_count - 1)
38
- .limit(1)
39
- .scalar_subquery()
38
+ ranked = (
39
+ sa.select(
40
+ Trace.id,
41
+ func.row_number()
42
+ .over(partition_by=Trace.project_rowid, order_by=Trace.start_time.desc())
43
+ .label("rn"),
44
+ )
45
+ .where(Trace.project_rowid.in_(project_rowids))
46
+ .cte("ranked")
40
47
  )
48
+ return Trace.id.in_(sa.select(ranked.c.id).where(ranked.c.rn > self.max_count))
41
49
 
42
50
 
43
51
  class MaxDaysRule(_MaxDays, BaseModel):
@@ -49,7 +57,7 @@ class MaxDaysRule(_MaxDays, BaseModel):
49
57
  async def delete_traces(
50
58
  self,
51
59
  session: AsyncSession,
52
- project_rowids: Union[Iterable[int], sa.ScalarSelect[int]],
60
+ project_rowids: Union[Iterable[int], InElementRole],
53
61
  ) -> set[int]:
54
62
  if self.max_days <= 0:
55
63
  return set()
@@ -73,7 +81,7 @@ class MaxCountRule(_MaxCount, BaseModel):
73
81
  async def delete_traces(
74
82
  self,
75
83
  session: AsyncSession,
76
- project_rowids: Union[Iterable[int], sa.ScalarSelect[int]],
84
+ project_rowids: Union[Iterable[int], InElementRole],
77
85
  ) -> set[int]:
78
86
  if self.max_count <= 0:
79
87
  return set()
@@ -82,7 +90,7 @@ class MaxCountRule(_MaxCount, BaseModel):
82
90
  stmt = (
83
91
  sa.delete(Trace)
84
92
  .where(Trace.project_rowid.in_(project_rowids))
85
- .where(self.max_count_filter)
93
+ .where(self.max_count_filter(project_rowids))
86
94
  .returning(Trace.project_rowid)
87
95
  )
88
96
  return set(await session.scalars(stmt))
@@ -97,7 +105,7 @@ class MaxDaysOrCountRule(_MaxDays, _MaxCount, BaseModel):
97
105
  async def delete_traces(
98
106
  self,
99
107
  session: AsyncSession,
100
- project_rowids: Union[Iterable[int], sa.ScalarSelect[int]],
108
+ project_rowids: Union[Iterable[int], InElementRole],
101
109
  ) -> set[int]:
102
110
  if self.max_days <= 0 and self.max_count <= 0:
103
111
  return set()
@@ -106,7 +114,7 @@ class MaxDaysOrCountRule(_MaxDays, _MaxCount, BaseModel):
106
114
  stmt = (
107
115
  sa.delete(Trace)
108
116
  .where(Trace.project_rowid.in_(project_rowids))
109
- .where(sa.or_(self.max_days_filter, self.max_count_filter))
117
+ .where(sa.or_(self.max_days_filter, self.max_count_filter(project_rowids)))
110
118
  .returning(Trace.project_rowid)
111
119
  )
112
120
  return set(await session.scalars(stmt))
@@ -123,7 +131,7 @@ class TraceRetentionRule(RootModel[Union[MaxDaysRule, MaxCountRule, MaxDaysOrCou
123
131
  async def delete_traces(
124
132
  self,
125
133
  session: AsyncSession,
126
- project_rowids: Union[Iterable[int], sa.ScalarSelect[int]],
134
+ project_rowids: Union[Iterable[int], InElementRole],
127
135
  ) -> set[int]:
128
136
  return await self.root.delete_traces(session, project_rowids)
129
137
 
@@ -192,7 +200,7 @@ class TraceRetentionCronExpression(RootModel[str]):
192
200
 
193
201
  def _parse_field(field: str, min_val: int, max_val: int) -> set[int]:
194
202
  """
195
- Parse a cron field and return the set of matching values.
203
+ Parses a cron field and returns the set of matching values.
196
204
 
197
205
  Args:
198
206
  field (str): The cron field to parse
@@ -19,9 +19,9 @@ def unwrap_json(obj: JSONSerializable) -> JSONSerializable:
19
19
  if len(obj) == 1:
20
20
  key = next(iter(obj.keys()))
21
21
  output = obj[key]
22
- assert isinstance(
23
- output, (dict, list, str, int, float, bool, type(None))
24
- ), "Output must be JSON serializable"
22
+ assert isinstance(output, (dict, list, str, int, float, bool, type(None))), (
23
+ "Output must be JSON serializable"
24
+ )
25
25
  return output
26
26
  return obj
27
27
 
@@ -10,7 +10,7 @@ from copy import deepcopy
10
10
  from dataclasses import replace
11
11
  from datetime import datetime, timezone
12
12
  from itertools import product
13
- from typing import Any, Literal, Optional, Union, cast
13
+ from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast
14
14
  from urllib.parse import urljoin
15
15
 
16
16
  import httpx
@@ -65,6 +65,41 @@ from phoenix.trace.attributes import flatten
65
65
  from phoenix.utilities.client import VersionedAsyncClient, VersionedClient
66
66
  from phoenix.utilities.json import jsonify
67
67
 
68
+ if TYPE_CHECKING:
69
+ from phoenix.client.resources.datasets import Dataset as ClientDataset
70
+
71
+
72
+ def _convert_client_dataset(new_dataset: "ClientDataset") -> Dataset:
73
+ """
74
+ Converts Dataset objects from `phoenix.client` to Dataset objects compatible with experiments.
75
+ """
76
+ examples_dict: dict[str, Example] = {}
77
+ for example_data in new_dataset.examples:
78
+ legacy_example = Example(
79
+ id=example_data["id"],
80
+ input=example_data["input"],
81
+ output=example_data["output"],
82
+ metadata=example_data["metadata"],
83
+ updated_at=datetime.fromisoformat(example_data["updated_at"]),
84
+ )
85
+ examples_dict[legacy_example.id] = legacy_example
86
+
87
+ return Dataset(
88
+ id=new_dataset.id,
89
+ version_id=new_dataset.version_id,
90
+ examples=examples_dict,
91
+ )
92
+
93
+
94
+ def _is_new_client_dataset(dataset: Any) -> bool:
95
+ """Check if dataset is from new client (has list examples)."""
96
+ try:
97
+ from phoenix.client.resources.datasets import Dataset as _ClientDataset
98
+
99
+ return isinstance(dataset, _ClientDataset)
100
+ except ImportError:
101
+ return False
102
+
68
103
 
69
104
  def _phoenix_clients() -> tuple[httpx.Client, httpx.AsyncClient]:
70
105
  return VersionedClient(
@@ -74,6 +109,64 @@ def _phoenix_clients() -> tuple[httpx.Client, httpx.AsyncClient]:
74
109
  )
75
110
 
76
111
 
112
+ def _get_all_experiment_runs(
113
+ client: httpx.Client,
114
+ experiment_id: str,
115
+ page_size: int = 50,
116
+ ) -> list[ExperimentRun]:
117
+ """
118
+ Fetch all experiment runs using pagination to handle large datasets.
119
+
120
+ Args:
121
+ client: The HTTP client to use for requests.
122
+ experiment_id: The ID of the experiment.
123
+ page_size: Number of runs to fetch per page. Defaults to 50.
124
+
125
+ Returns:
126
+ List of all experiment runs as ExperimentRun objects.
127
+ """
128
+ all_runs: list[dict[str, Any]] = []
129
+ cursor = None
130
+
131
+ while True:
132
+ params: dict[str, Any] = {"limit": page_size}
133
+ if cursor:
134
+ params["cursor"] = cursor
135
+
136
+ try:
137
+ response = client.get(
138
+ f"v1/experiments/{experiment_id}/runs",
139
+ params=params,
140
+ )
141
+ response.raise_for_status()
142
+ data = response.json()
143
+
144
+ runs = data["data"]
145
+ all_runs.extend(runs)
146
+
147
+ # Check if there are more pages
148
+ cursor = data.get("next_cursor")
149
+ if not cursor:
150
+ break
151
+
152
+ except HTTPStatusError as e:
153
+ if e.response.status_code == 404:
154
+ # Experiment doesn't exist - treat as empty result
155
+ break
156
+ else:
157
+ raise
158
+
159
+ # Convert dicts to ExperimentRun objects
160
+ experiment_runs: list[ExperimentRun] = []
161
+ for run in all_runs:
162
+ # Parse datetime strings
163
+ run["start_time"] = datetime.fromisoformat(run["start_time"])
164
+ run["end_time"] = datetime.fromisoformat(run["end_time"])
165
+ experiment_runs.append(ExperimentRun.from_dict(run))
166
+
167
+ return experiment_runs
168
+
169
+
77
170
  Evaluators: TypeAlias = Union[
78
171
  ExperimentEvaluator,
79
172
  Sequence[ExperimentEvaluator],
@@ -85,7 +178,7 @@ RateLimitErrors: TypeAlias = Union[type[BaseException], Sequence[type[BaseExcept
85
178
 
86
179
 
87
180
  def run_experiment(
88
- dataset: Dataset,
181
+ dataset: Union[Dataset, Any], # Accept both legacy and new client datasets
89
182
  task: ExperimentTask,
90
183
  evaluators: Optional[Evaluators] = None,
91
184
  *,
@@ -166,11 +259,20 @@ def run_experiment(
166
259
  RanExperiment: The results of the experiment and evaluation. Additional evaluations can be
167
260
  added to the experiment using the `evaluate_experiment` function.
168
261
  """
262
+ # Auto-convert client Dataset objects to legacy format
263
+ normalized_dataset: Dataset
264
+ if _is_new_client_dataset(dataset):
265
+ normalized_dataset = _convert_client_dataset(cast("ClientDataset", dataset))
266
+ else:
267
+ normalized_dataset = dataset
268
+
169
269
  task_signature = inspect.signature(task)
170
270
  _validate_task_signature(task_signature)
171
271
 
172
- if not dataset.examples:
173
- raise ValueError(f"Dataset has no examples: {dataset.id=}, {dataset.version_id=}")
272
+ if not normalized_dataset.examples:
273
+ raise ValueError(
274
+ f"Dataset has no examples: {normalized_dataset.id=}, {normalized_dataset.version_id=}"
275
+ )
174
276
  # Add this to the params once supported in the UI
175
277
  repetitions = 1
176
278
  assert repetitions > 0, "Must run the experiment at least once."
@@ -179,7 +281,7 @@ def run_experiment(
179
281
  sync_client, async_client = _phoenix_clients()
180
282
 
181
283
  payload = {
182
- "version_id": dataset.version_id,
284
+ "version_id": normalized_dataset.version_id,
183
285
  "name": experiment_name,
184
286
  "description": experiment_description,
185
287
  "metadata": experiment_metadata,
@@ -187,23 +289,23 @@ def run_experiment(
187
289
  }
188
290
  if not dry_run:
189
291
  experiment_response = sync_client.post(
190
- f"/v1/datasets/{dataset.id}/experiments",
292
+ f"v1/datasets/{normalized_dataset.id}/experiments",
191
293
  json=payload,
192
294
  )
193
295
  experiment_response.raise_for_status()
194
296
  exp_json = experiment_response.json()["data"]
195
297
  project_name = exp_json["project_name"]
196
298
  experiment = Experiment(
197
- dataset_id=dataset.id,
198
- dataset_version_id=dataset.version_id,
299
+ dataset_id=normalized_dataset.id,
300
+ dataset_version_id=normalized_dataset.version_id,
199
301
  repetitions=repetitions,
200
302
  id=exp_json["id"],
201
303
  project_name=project_name,
202
304
  )
203
305
  else:
204
306
  experiment = Experiment(
205
- dataset_id=dataset.id,
206
- dataset_version_id=dataset.version_id,
307
+ dataset_id=normalized_dataset.id,
308
+ dataset_version_id=normalized_dataset.version_id,
207
309
  repetitions=repetitions,
208
310
  id=DRY_RUN,
209
311
  project_name="",
@@ -216,18 +318,18 @@ def run_experiment(
216
318
  print("🧪 Experiment started.")
217
319
  if dry_run:
218
320
  examples = {
219
- (ex := dataset[i]).id: ex
220
- for i in pd.Series(range(len(dataset)))
221
- .sample(min(len(dataset), int(dry_run)), random_state=42)
321
+ (ex := normalized_dataset[i]).id: ex
322
+ for i in pd.Series(range(len(normalized_dataset)))
323
+ .sample(min(len(normalized_dataset), int(dry_run)), random_state=42)
222
324
  .sort_values()
223
325
  }
224
326
  id_selection = "\n".join(examples)
225
327
  print(f"🌵️ This is a dry-run for these example IDs:\n{id_selection}")
226
- dataset = replace(dataset, examples=examples)
328
+ normalized_dataset = replace(normalized_dataset, examples=examples)
227
329
  else:
228
- dataset_experiments_url = get_dataset_experiments_url(dataset_id=dataset.id)
330
+ dataset_experiments_url = get_dataset_experiments_url(dataset_id=normalized_dataset.id)
229
331
  experiment_compare_url = get_experiment_url(
230
- dataset_id=dataset.id,
332
+ dataset_id=normalized_dataset.id,
231
333
  experiment_id=experiment.id,
232
334
  )
233
335
  print(f"📺 View dataset experiments: {dataset_experiments_url}")
@@ -259,7 +361,7 @@ def run_experiment(
259
361
  try:
260
362
  # Try to create the run directly
261
363
  resp = sync_client.post(
262
- f"/v1/experiments/{experiment.id}/runs", json=jsonify(exp_run)
364
+ f"v1/experiments/{experiment.id}/runs", json=jsonify(exp_run)
263
365
  )
264
366
  resp.raise_for_status()
265
367
  exp_run = replace(exp_run, id=resp.json()["data"]["id"])
@@ -274,8 +376,11 @@ def run_experiment(
274
376
  error: Optional[BaseException] = None
275
377
  status = Status(StatusCode.OK)
276
378
  with ExitStack() as stack:
277
- span: Span = stack.enter_context(
278
- tracer.start_as_current_span(root_span_name, context=Context())
379
+ span = cast(
380
+ Span,
381
+ stack.enter_context(
382
+ tracer.start_as_current_span(root_span_name, context=Context())
383
+ ),
279
384
  )
280
385
  stack.enter_context(capture_spans(resource))
281
386
  try:
@@ -316,9 +421,9 @@ def run_experiment(
316
421
  span.set_attribute(SpanAttributes.OPENINFERENCE_SPAN_KIND, root_span_kind)
317
422
  span.set_status(status)
318
423
 
319
- assert isinstance(
320
- output, (dict, list, str, int, float, bool, type(None))
321
- ), "Output must be JSON serializable"
424
+ assert isinstance(output, (dict, list, str, int, float, bool, type(None))), (
425
+ "Output must be JSON serializable"
426
+ )
322
427
 
323
428
  exp_run = ExperimentRun(
324
429
  start_time=_decode_unix_nano(cast(int, span.start_time)),
@@ -334,7 +439,7 @@ def run_experiment(
334
439
  try:
335
440
  # Try to create the run directly
336
441
  resp = sync_client.post(
337
- f"/v1/experiments/{experiment.id}/runs", json=jsonify(exp_run)
442
+ f"v1/experiments/{experiment.id}/runs", json=jsonify(exp_run)
338
443
  )
339
444
  resp.raise_for_status()
340
445
  exp_run = replace(exp_run, id=resp.json()["data"]["id"])
@@ -373,7 +478,7 @@ def run_experiment(
373
478
  None,
374
479
  functools.partial(
375
480
  sync_client.post,
376
- url=f"/v1/experiments/{experiment.id}/runs",
481
+ url=f"v1/experiments/{experiment.id}/runs",
377
482
  json=jsonify(exp_run),
378
483
  ),
379
484
  )
@@ -391,8 +496,11 @@ def run_experiment(
391
496
  error: Optional[BaseException] = None
392
497
  status = Status(StatusCode.OK)
393
498
  with ExitStack() as stack:
394
- span: Span = stack.enter_context(
395
- tracer.start_as_current_span(root_span_name, context=Context())
499
+ span = cast(
500
+ Span,
501
+ stack.enter_context(
502
+ tracer.start_as_current_span(root_span_name, context=Context())
503
+ ),
396
504
  )
397
505
  stack.enter_context(capture_spans(resource))
398
506
  try:
@@ -427,9 +535,9 @@ def run_experiment(
427
535
  span.set_attribute(OPENINFERENCE_SPAN_KIND, root_span_kind)
428
536
  span.set_status(status)
429
537
 
430
- assert isinstance(
431
- output, (dict, list, str, int, float, bool, type(None))
432
- ), "Output must be JSON serializable"
538
+ assert isinstance(output, (dict, list, str, int, float, bool, type(None))), (
539
+ "Output must be JSON serializable"
540
+ )
433
541
 
434
542
  exp_run = ExperimentRun(
435
543
  start_time=_decode_unix_nano(cast(int, span.start_time)),
@@ -448,7 +556,7 @@ def run_experiment(
448
556
  None,
449
557
  functools.partial(
450
558
  sync_client.post,
451
- url=f"/v1/experiments/{experiment.id}/runs",
559
+ url=f"v1/experiments/{experiment.id}/runs",
452
560
  json=jsonify(exp_run),
453
561
  ),
454
562
  )
@@ -491,23 +599,17 @@ def run_experiment(
491
599
 
492
600
  test_cases = [
493
601
  TestCase(example=deepcopy(ex), repetition_number=rep)
494
- for ex, rep in product(dataset.examples.values(), range(1, repetitions + 1))
602
+ for ex, rep in product(normalized_dataset.examples.values(), range(1, repetitions + 1))
495
603
  ]
496
604
  task_runs, _execution_details = executor.run(test_cases)
497
605
  print("✅ Task runs completed.")
498
606
 
499
607
  # Get the final state of runs from the database
500
608
  if not dry_run:
501
- all_runs = sync_client.get(f"/v1/experiments/{experiment.id}/runs").json()["data"]
502
- task_runs = []
503
- for run in all_runs:
504
- # Parse datetime strings
505
- run["start_time"] = datetime.fromisoformat(run["start_time"])
506
- run["end_time"] = datetime.fromisoformat(run["end_time"])
507
- task_runs.append(ExperimentRun.from_dict(run))
609
+ task_runs = _get_all_experiment_runs(sync_client, experiment.id)
508
610
 
509
611
  # Check if we got all expected runs
510
- expected_runs = len(dataset.examples) * repetitions
612
+ expected_runs = len(normalized_dataset.examples) * repetitions
511
613
  actual_runs = len(task_runs)
512
614
  if actual_runs < expected_runs:
513
615
  print(
@@ -515,12 +617,14 @@ def run_experiment(
515
617
  "completed successfully."
516
618
  )
517
619
 
518
- params = ExperimentParameters(n_examples=len(dataset.examples), n_repetitions=repetitions)
620
+ params = ExperimentParameters(
621
+ n_examples=len(normalized_dataset.examples), n_repetitions=repetitions
622
+ )
519
623
  task_summary = TaskSummary.from_task_runs(params, task_runs)
520
624
  ran_experiment: RanExperiment = object.__new__(RanExperiment)
521
625
  ran_experiment.__init__( # type: ignore[misc]
522
626
  params=params,
523
- dataset=dataset,
627
+ dataset=normalized_dataset,
524
628
  runs={r.id: r for r in task_runs if r is not None},
525
629
  task_summary=task_summary,
526
630
  **_asdict(experiment),
@@ -561,16 +665,14 @@ def evaluate_experiment(
561
665
  else:
562
666
  dataset = Dataset.from_dict(
563
667
  sync_client.get(
564
- f"/v1/datasets/{dataset_id}/examples",
668
+ f"v1/datasets/{dataset_id}/examples",
565
669
  params={"version_id": str(dataset_version_id)},
566
670
  ).json()["data"]
567
671
  )
568
672
  if not dataset.examples:
569
673
  raise ValueError(f"Dataset has no examples: {dataset_id=}, {dataset_version_id=}")
570
- experiment_runs = {
571
- exp_run["id"]: ExperimentRun.from_dict(exp_run)
572
- for exp_run in sync_client.get(f"/v1/experiments/{experiment.id}/runs").json()["data"]
573
- }
674
+ all_runs = _get_all_experiment_runs(sync_client, experiment.id)
675
+ experiment_runs = {exp_run.id: exp_run for exp_run in all_runs}
574
676
  if not experiment_runs:
575
677
  raise ValueError("Experiment has not been run")
576
678
  params = ExperimentParameters(n_examples=len(dataset.examples))
@@ -622,8 +724,11 @@ def evaluate_experiment(
622
724
  status = Status(StatusCode.OK)
623
725
  root_span_name = f"Evaluation: {evaluator.name}"
624
726
  with ExitStack() as stack:
625
- span: Span = stack.enter_context(
626
- tracer.start_as_current_span(root_span_name, context=Context())
727
+ span = cast(
728
+ Span,
729
+ stack.enter_context(
730
+ tracer.start_as_current_span(root_span_name, context=Context())
731
+ ),
627
732
  )
628
733
  stack.enter_context(capture_spans(resource))
629
734
  try:
@@ -660,7 +765,7 @@ def evaluate_experiment(
660
765
  trace_id=_str_trace_id(span.get_span_context().trace_id), # type: ignore[no-untyped-call]
661
766
  )
662
767
  if not dry_run:
663
- resp = sync_client.post("/v1/experiment_evaluations", json=jsonify(eval_run))
768
+ resp = sync_client.post("v1/experiment_evaluations", json=jsonify(eval_run))
664
769
  resp.raise_for_status()
665
770
  eval_run = replace(eval_run, id=resp.json()["data"]["id"])
666
771
  return eval_run
@@ -674,8 +779,11 @@ def evaluate_experiment(
674
779
  status = Status(StatusCode.OK)
675
780
  root_span_name = f"Evaluation: {evaluator.name}"
676
781
  with ExitStack() as stack:
677
- span: Span = stack.enter_context(
678
- tracer.start_as_current_span(root_span_name, context=Context())
782
+ span = cast(
783
+ Span,
784
+ stack.enter_context(
785
+ tracer.start_as_current_span(root_span_name, context=Context())
786
+ ),
679
787
  )
680
788
  stack.enter_context(capture_spans(resource))
681
789
  try:
@@ -719,7 +827,7 @@ def evaluate_experiment(
719
827
  None,
720
828
  functools.partial(
721
829
  sync_client.post,
722
- url="/v1/experiment_evaluations",
830
+ url="v1/experiment_evaluations",
723
831
  json=jsonify(eval_run),
724
832
  ),
725
833
  )
@@ -8,7 +8,7 @@ from typing import Any, Optional
8
8
 
9
9
  from opentelemetry.sdk.resources import Resource
10
10
  from opentelemetry.sdk.trace import ReadableSpan
11
- from opentelemetry.trace import INVALID_TRACE_ID
11
+ from opentelemetry.trace import INVALID_SPAN_ID
12
12
  from wrapt import apply_patch, resolve_path, wrap_function_wrapper
13
13
 
14
14
 
@@ -29,7 +29,7 @@ class SpanModifier:
29
29
  Args:
30
30
  span: ReadableSpan: the span to modify
31
31
  """
32
- if (ctx := span._context) is None or ctx.span_id == INVALID_TRACE_ID:
32
+ if (ctx := span._context) is None or ctx.span_id == INVALID_SPAN_ID:
33
33
  return
34
34
  span._resource = span._resource.merge(self._resource)
35
35
 
@@ -322,7 +322,7 @@ class _HasStats:
322
322
  text = self.stats.__str__()
323
323
  else:
324
324
  text = self.stats.to_markdown(index=False)
325
- return f"{self.title}\n{'-'*len(self.title)}\n" + text
325
+ return f"{self.title}\n{'-' * len(self.title)}\n" + text
326
326
 
327
327
 
328
328
  @dataclass(frozen=True)
@@ -13,11 +13,10 @@ from pandas import DataFrame, Series, Timestamp, read_parquet
13
13
  from pandas.api.types import (
14
14
  is_numeric_dtype,
15
15
  )
16
- from typing_extensions import TypeAlias
16
+ from typing_extensions import TypeAlias, deprecated
17
17
 
18
18
  from phoenix.config import GENERATED_INFERENCES_NAME_PREFIX, INFERENCES_DIR
19
19
  from phoenix.datetime_utils import normalize_timestamps
20
- from phoenix.utilities.deprecation import deprecated
21
20
 
22
21
  from . import errors as err
23
22
  from .schema import (