arize-phoenix 3.16.1__py3-none-any.whl → 7.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (338) hide show
  1. arize_phoenix-7.7.1.dist-info/METADATA +261 -0
  2. arize_phoenix-7.7.1.dist-info/RECORD +345 -0
  3. {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.1.dist-info}/WHEEL +1 -1
  4. arize_phoenix-7.7.1.dist-info/entry_points.txt +3 -0
  5. phoenix/__init__.py +86 -14
  6. phoenix/auth.py +309 -0
  7. phoenix/config.py +675 -45
  8. phoenix/core/model.py +32 -30
  9. phoenix/core/model_schema.py +102 -109
  10. phoenix/core/model_schema_adapter.py +48 -45
  11. phoenix/datetime_utils.py +24 -3
  12. phoenix/db/README.md +54 -0
  13. phoenix/db/__init__.py +4 -0
  14. phoenix/db/alembic.ini +85 -0
  15. phoenix/db/bulk_inserter.py +294 -0
  16. phoenix/db/engines.py +208 -0
  17. phoenix/db/enums.py +20 -0
  18. phoenix/db/facilitator.py +113 -0
  19. phoenix/db/helpers.py +159 -0
  20. phoenix/db/insertion/constants.py +2 -0
  21. phoenix/db/insertion/dataset.py +227 -0
  22. phoenix/db/insertion/document_annotation.py +171 -0
  23. phoenix/db/insertion/evaluation.py +191 -0
  24. phoenix/db/insertion/helpers.py +98 -0
  25. phoenix/db/insertion/span.py +193 -0
  26. phoenix/db/insertion/span_annotation.py +158 -0
  27. phoenix/db/insertion/trace_annotation.py +158 -0
  28. phoenix/db/insertion/types.py +256 -0
  29. phoenix/db/migrate.py +86 -0
  30. phoenix/db/migrations/data_migration_scripts/populate_project_sessions.py +199 -0
  31. phoenix/db/migrations/env.py +114 -0
  32. phoenix/db/migrations/script.py.mako +26 -0
  33. phoenix/db/migrations/versions/10460e46d750_datasets.py +317 -0
  34. phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py +126 -0
  35. phoenix/db/migrations/versions/4ded9e43755f_create_project_sessions_table.py +66 -0
  36. phoenix/db/migrations/versions/cd164e83824f_users_and_tokens.py +157 -0
  37. phoenix/db/migrations/versions/cf03bd6bae1d_init.py +280 -0
  38. phoenix/db/models.py +807 -0
  39. phoenix/exceptions.py +5 -1
  40. phoenix/experiments/__init__.py +6 -0
  41. phoenix/experiments/evaluators/__init__.py +29 -0
  42. phoenix/experiments/evaluators/base.py +158 -0
  43. phoenix/experiments/evaluators/code_evaluators.py +184 -0
  44. phoenix/experiments/evaluators/llm_evaluators.py +473 -0
  45. phoenix/experiments/evaluators/utils.py +236 -0
  46. phoenix/experiments/functions.py +772 -0
  47. phoenix/experiments/tracing.py +86 -0
  48. phoenix/experiments/types.py +726 -0
  49. phoenix/experiments/utils.py +25 -0
  50. phoenix/inferences/__init__.py +0 -0
  51. phoenix/{datasets → inferences}/errors.py +6 -5
  52. phoenix/{datasets → inferences}/fixtures.py +49 -42
  53. phoenix/{datasets/dataset.py → inferences/inferences.py} +121 -105
  54. phoenix/{datasets → inferences}/schema.py +11 -11
  55. phoenix/{datasets → inferences}/validation.py +13 -14
  56. phoenix/logging/__init__.py +3 -0
  57. phoenix/logging/_config.py +90 -0
  58. phoenix/logging/_filter.py +6 -0
  59. phoenix/logging/_formatter.py +69 -0
  60. phoenix/metrics/__init__.py +5 -4
  61. phoenix/metrics/binning.py +4 -3
  62. phoenix/metrics/metrics.py +2 -1
  63. phoenix/metrics/mixins.py +7 -6
  64. phoenix/metrics/retrieval_metrics.py +2 -1
  65. phoenix/metrics/timeseries.py +5 -4
  66. phoenix/metrics/wrappers.py +9 -3
  67. phoenix/pointcloud/clustering.py +5 -5
  68. phoenix/pointcloud/pointcloud.py +7 -5
  69. phoenix/pointcloud/projectors.py +5 -6
  70. phoenix/pointcloud/umap_parameters.py +53 -52
  71. phoenix/server/api/README.md +28 -0
  72. phoenix/server/api/auth.py +44 -0
  73. phoenix/server/api/context.py +152 -9
  74. phoenix/server/api/dataloaders/__init__.py +91 -0
  75. phoenix/server/api/dataloaders/annotation_summaries.py +139 -0
  76. phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
  77. phoenix/server/api/dataloaders/cache/__init__.py +3 -0
  78. phoenix/server/api/dataloaders/cache/two_tier_cache.py +68 -0
  79. phoenix/server/api/dataloaders/dataset_example_revisions.py +131 -0
  80. phoenix/server/api/dataloaders/dataset_example_spans.py +38 -0
  81. phoenix/server/api/dataloaders/document_evaluation_summaries.py +144 -0
  82. phoenix/server/api/dataloaders/document_evaluations.py +31 -0
  83. phoenix/server/api/dataloaders/document_retrieval_metrics.py +89 -0
  84. phoenix/server/api/dataloaders/experiment_annotation_summaries.py +79 -0
  85. phoenix/server/api/dataloaders/experiment_error_rates.py +58 -0
  86. phoenix/server/api/dataloaders/experiment_run_annotations.py +36 -0
  87. phoenix/server/api/dataloaders/experiment_run_counts.py +49 -0
  88. phoenix/server/api/dataloaders/experiment_sequence_number.py +44 -0
  89. phoenix/server/api/dataloaders/latency_ms_quantile.py +188 -0
  90. phoenix/server/api/dataloaders/min_start_or_max_end_times.py +85 -0
  91. phoenix/server/api/dataloaders/project_by_name.py +31 -0
  92. phoenix/server/api/dataloaders/record_counts.py +116 -0
  93. phoenix/server/api/dataloaders/session_io.py +79 -0
  94. phoenix/server/api/dataloaders/session_num_traces.py +30 -0
  95. phoenix/server/api/dataloaders/session_num_traces_with_error.py +32 -0
  96. phoenix/server/api/dataloaders/session_token_usages.py +41 -0
  97. phoenix/server/api/dataloaders/session_trace_latency_ms_quantile.py +55 -0
  98. phoenix/server/api/dataloaders/span_annotations.py +26 -0
  99. phoenix/server/api/dataloaders/span_dataset_examples.py +31 -0
  100. phoenix/server/api/dataloaders/span_descendants.py +57 -0
  101. phoenix/server/api/dataloaders/span_projects.py +33 -0
  102. phoenix/server/api/dataloaders/token_counts.py +124 -0
  103. phoenix/server/api/dataloaders/trace_by_trace_ids.py +25 -0
  104. phoenix/server/api/dataloaders/trace_root_spans.py +32 -0
  105. phoenix/server/api/dataloaders/user_roles.py +30 -0
  106. phoenix/server/api/dataloaders/users.py +33 -0
  107. phoenix/server/api/exceptions.py +48 -0
  108. phoenix/server/api/helpers/__init__.py +12 -0
  109. phoenix/server/api/helpers/dataset_helpers.py +217 -0
  110. phoenix/server/api/helpers/experiment_run_filters.py +763 -0
  111. phoenix/server/api/helpers/playground_clients.py +948 -0
  112. phoenix/server/api/helpers/playground_registry.py +70 -0
  113. phoenix/server/api/helpers/playground_spans.py +455 -0
  114. phoenix/server/api/input_types/AddExamplesToDatasetInput.py +16 -0
  115. phoenix/server/api/input_types/AddSpansToDatasetInput.py +14 -0
  116. phoenix/server/api/input_types/ChatCompletionInput.py +38 -0
  117. phoenix/server/api/input_types/ChatCompletionMessageInput.py +24 -0
  118. phoenix/server/api/input_types/ClearProjectInput.py +15 -0
  119. phoenix/server/api/input_types/ClusterInput.py +2 -2
  120. phoenix/server/api/input_types/CreateDatasetInput.py +12 -0
  121. phoenix/server/api/input_types/CreateSpanAnnotationInput.py +18 -0
  122. phoenix/server/api/input_types/CreateTraceAnnotationInput.py +18 -0
  123. phoenix/server/api/input_types/DataQualityMetricInput.py +5 -2
  124. phoenix/server/api/input_types/DatasetExampleInput.py +14 -0
  125. phoenix/server/api/input_types/DatasetSort.py +17 -0
  126. phoenix/server/api/input_types/DatasetVersionSort.py +16 -0
  127. phoenix/server/api/input_types/DeleteAnnotationsInput.py +7 -0
  128. phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +13 -0
  129. phoenix/server/api/input_types/DeleteDatasetInput.py +7 -0
  130. phoenix/server/api/input_types/DeleteExperimentsInput.py +7 -0
  131. phoenix/server/api/input_types/DimensionFilter.py +4 -4
  132. phoenix/server/api/input_types/GenerativeModelInput.py +17 -0
  133. phoenix/server/api/input_types/Granularity.py +1 -1
  134. phoenix/server/api/input_types/InvocationParameters.py +162 -0
  135. phoenix/server/api/input_types/PatchAnnotationInput.py +19 -0
  136. phoenix/server/api/input_types/PatchDatasetExamplesInput.py +35 -0
  137. phoenix/server/api/input_types/PatchDatasetInput.py +14 -0
  138. phoenix/server/api/input_types/PerformanceMetricInput.py +5 -2
  139. phoenix/server/api/input_types/ProjectSessionSort.py +29 -0
  140. phoenix/server/api/input_types/SpanAnnotationSort.py +17 -0
  141. phoenix/server/api/input_types/SpanSort.py +134 -69
  142. phoenix/server/api/input_types/TemplateOptions.py +10 -0
  143. phoenix/server/api/input_types/TraceAnnotationSort.py +17 -0
  144. phoenix/server/api/input_types/UserRoleInput.py +9 -0
  145. phoenix/server/api/mutations/__init__.py +28 -0
  146. phoenix/server/api/mutations/api_key_mutations.py +167 -0
  147. phoenix/server/api/mutations/chat_mutations.py +593 -0
  148. phoenix/server/api/mutations/dataset_mutations.py +591 -0
  149. phoenix/server/api/mutations/experiment_mutations.py +75 -0
  150. phoenix/server/api/{types/ExportEventsMutation.py → mutations/export_events_mutations.py} +21 -18
  151. phoenix/server/api/mutations/project_mutations.py +57 -0
  152. phoenix/server/api/mutations/span_annotations_mutations.py +128 -0
  153. phoenix/server/api/mutations/trace_annotations_mutations.py +127 -0
  154. phoenix/server/api/mutations/user_mutations.py +329 -0
  155. phoenix/server/api/openapi/__init__.py +0 -0
  156. phoenix/server/api/openapi/main.py +17 -0
  157. phoenix/server/api/openapi/schema.py +16 -0
  158. phoenix/server/api/queries.py +738 -0
  159. phoenix/server/api/routers/__init__.py +11 -0
  160. phoenix/server/api/routers/auth.py +284 -0
  161. phoenix/server/api/routers/embeddings.py +26 -0
  162. phoenix/server/api/routers/oauth2.py +488 -0
  163. phoenix/server/api/routers/v1/__init__.py +64 -0
  164. phoenix/server/api/routers/v1/datasets.py +1017 -0
  165. phoenix/server/api/routers/v1/evaluations.py +362 -0
  166. phoenix/server/api/routers/v1/experiment_evaluations.py +115 -0
  167. phoenix/server/api/routers/v1/experiment_runs.py +167 -0
  168. phoenix/server/api/routers/v1/experiments.py +308 -0
  169. phoenix/server/api/routers/v1/pydantic_compat.py +78 -0
  170. phoenix/server/api/routers/v1/spans.py +267 -0
  171. phoenix/server/api/routers/v1/traces.py +208 -0
  172. phoenix/server/api/routers/v1/utils.py +95 -0
  173. phoenix/server/api/schema.py +44 -241
  174. phoenix/server/api/subscriptions.py +597 -0
  175. phoenix/server/api/types/Annotation.py +21 -0
  176. phoenix/server/api/types/AnnotationSummary.py +55 -0
  177. phoenix/server/api/types/AnnotatorKind.py +16 -0
  178. phoenix/server/api/types/ApiKey.py +27 -0
  179. phoenix/server/api/types/AuthMethod.py +9 -0
  180. phoenix/server/api/types/ChatCompletionMessageRole.py +11 -0
  181. phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +46 -0
  182. phoenix/server/api/types/Cluster.py +25 -24
  183. phoenix/server/api/types/CreateDatasetPayload.py +8 -0
  184. phoenix/server/api/types/DataQualityMetric.py +31 -13
  185. phoenix/server/api/types/Dataset.py +288 -63
  186. phoenix/server/api/types/DatasetExample.py +85 -0
  187. phoenix/server/api/types/DatasetExampleRevision.py +34 -0
  188. phoenix/server/api/types/DatasetVersion.py +14 -0
  189. phoenix/server/api/types/Dimension.py +32 -31
  190. phoenix/server/api/types/DocumentEvaluationSummary.py +9 -8
  191. phoenix/server/api/types/EmbeddingDimension.py +56 -49
  192. phoenix/server/api/types/Evaluation.py +25 -31
  193. phoenix/server/api/types/EvaluationSummary.py +30 -50
  194. phoenix/server/api/types/Event.py +20 -20
  195. phoenix/server/api/types/ExampleRevisionInterface.py +14 -0
  196. phoenix/server/api/types/Experiment.py +152 -0
  197. phoenix/server/api/types/ExperimentAnnotationSummary.py +13 -0
  198. phoenix/server/api/types/ExperimentComparison.py +17 -0
  199. phoenix/server/api/types/ExperimentRun.py +119 -0
  200. phoenix/server/api/types/ExperimentRunAnnotation.py +56 -0
  201. phoenix/server/api/types/GenerativeModel.py +9 -0
  202. phoenix/server/api/types/GenerativeProvider.py +85 -0
  203. phoenix/server/api/types/Inferences.py +80 -0
  204. phoenix/server/api/types/InferencesRole.py +23 -0
  205. phoenix/server/api/types/LabelFraction.py +7 -0
  206. phoenix/server/api/types/MimeType.py +2 -2
  207. phoenix/server/api/types/Model.py +54 -54
  208. phoenix/server/api/types/PerformanceMetric.py +8 -5
  209. phoenix/server/api/types/Project.py +407 -142
  210. phoenix/server/api/types/ProjectSession.py +139 -0
  211. phoenix/server/api/types/Segments.py +4 -4
  212. phoenix/server/api/types/Span.py +221 -176
  213. phoenix/server/api/types/SpanAnnotation.py +43 -0
  214. phoenix/server/api/types/SpanIOValue.py +15 -0
  215. phoenix/server/api/types/SystemApiKey.py +9 -0
  216. phoenix/server/api/types/TemplateLanguage.py +10 -0
  217. phoenix/server/api/types/TimeSeries.py +19 -15
  218. phoenix/server/api/types/TokenUsage.py +11 -0
  219. phoenix/server/api/types/Trace.py +154 -0
  220. phoenix/server/api/types/TraceAnnotation.py +45 -0
  221. phoenix/server/api/types/UMAPPoints.py +7 -7
  222. phoenix/server/api/types/User.py +60 -0
  223. phoenix/server/api/types/UserApiKey.py +45 -0
  224. phoenix/server/api/types/UserRole.py +15 -0
  225. phoenix/server/api/types/node.py +4 -112
  226. phoenix/server/api/types/pagination.py +156 -57
  227. phoenix/server/api/utils.py +34 -0
  228. phoenix/server/app.py +864 -115
  229. phoenix/server/bearer_auth.py +163 -0
  230. phoenix/server/dml_event.py +136 -0
  231. phoenix/server/dml_event_handler.py +256 -0
  232. phoenix/server/email/__init__.py +0 -0
  233. phoenix/server/email/sender.py +97 -0
  234. phoenix/server/email/templates/__init__.py +0 -0
  235. phoenix/server/email/templates/password_reset.html +19 -0
  236. phoenix/server/email/types.py +11 -0
  237. phoenix/server/grpc_server.py +102 -0
  238. phoenix/server/jwt_store.py +505 -0
  239. phoenix/server/main.py +305 -116
  240. phoenix/server/oauth2.py +52 -0
  241. phoenix/server/openapi/__init__.py +0 -0
  242. phoenix/server/prometheus.py +111 -0
  243. phoenix/server/rate_limiters.py +188 -0
  244. phoenix/server/static/.vite/manifest.json +87 -0
  245. phoenix/server/static/assets/components-Cy9nwIvF.js +2125 -0
  246. phoenix/server/static/assets/index-BKvHIxkk.js +113 -0
  247. phoenix/server/static/assets/pages-CUi2xCVQ.js +4449 -0
  248. phoenix/server/static/assets/vendor-DvC8cT4X.js +894 -0
  249. phoenix/server/static/assets/vendor-DxkFTwjz.css +1 -0
  250. phoenix/server/static/assets/vendor-arizeai-Do1793cv.js +662 -0
  251. phoenix/server/static/assets/vendor-codemirror-BzwZPyJM.js +24 -0
  252. phoenix/server/static/assets/vendor-recharts-_Jb7JjhG.js +59 -0
  253. phoenix/server/static/assets/vendor-shiki-Cl9QBraO.js +5 -0
  254. phoenix/server/static/assets/vendor-three-DwGkEfCM.js +2998 -0
  255. phoenix/server/telemetry.py +68 -0
  256. phoenix/server/templates/index.html +82 -23
  257. phoenix/server/thread_server.py +3 -3
  258. phoenix/server/types.py +275 -0
  259. phoenix/services.py +27 -18
  260. phoenix/session/client.py +743 -68
  261. phoenix/session/data_extractor.py +31 -7
  262. phoenix/session/evaluation.py +3 -9
  263. phoenix/session/session.py +263 -219
  264. phoenix/settings.py +22 -0
  265. phoenix/trace/__init__.py +2 -22
  266. phoenix/trace/attributes.py +338 -0
  267. phoenix/trace/dsl/README.md +116 -0
  268. phoenix/trace/dsl/filter.py +663 -213
  269. phoenix/trace/dsl/helpers.py +73 -21
  270. phoenix/trace/dsl/query.py +574 -201
  271. phoenix/trace/exporter.py +24 -19
  272. phoenix/trace/fixtures.py +368 -32
  273. phoenix/trace/otel.py +71 -219
  274. phoenix/trace/projects.py +3 -2
  275. phoenix/trace/schemas.py +33 -11
  276. phoenix/trace/span_evaluations.py +21 -16
  277. phoenix/trace/span_json_decoder.py +6 -4
  278. phoenix/trace/span_json_encoder.py +2 -2
  279. phoenix/trace/trace_dataset.py +47 -32
  280. phoenix/trace/utils.py +21 -4
  281. phoenix/utilities/__init__.py +0 -26
  282. phoenix/utilities/client.py +132 -0
  283. phoenix/utilities/deprecation.py +31 -0
  284. phoenix/utilities/error_handling.py +3 -2
  285. phoenix/utilities/json.py +109 -0
  286. phoenix/utilities/logging.py +8 -0
  287. phoenix/utilities/project.py +2 -2
  288. phoenix/utilities/re.py +49 -0
  289. phoenix/utilities/span_store.py +0 -23
  290. phoenix/utilities/template_formatters.py +99 -0
  291. phoenix/version.py +1 -1
  292. arize_phoenix-3.16.1.dist-info/METADATA +0 -495
  293. arize_phoenix-3.16.1.dist-info/RECORD +0 -178
  294. phoenix/core/project.py +0 -619
  295. phoenix/core/traces.py +0 -96
  296. phoenix/experimental/evals/__init__.py +0 -73
  297. phoenix/experimental/evals/evaluators.py +0 -413
  298. phoenix/experimental/evals/functions/__init__.py +0 -4
  299. phoenix/experimental/evals/functions/classify.py +0 -453
  300. phoenix/experimental/evals/functions/executor.py +0 -353
  301. phoenix/experimental/evals/functions/generate.py +0 -138
  302. phoenix/experimental/evals/functions/processing.py +0 -76
  303. phoenix/experimental/evals/models/__init__.py +0 -14
  304. phoenix/experimental/evals/models/anthropic.py +0 -175
  305. phoenix/experimental/evals/models/base.py +0 -170
  306. phoenix/experimental/evals/models/bedrock.py +0 -221
  307. phoenix/experimental/evals/models/litellm.py +0 -134
  308. phoenix/experimental/evals/models/openai.py +0 -448
  309. phoenix/experimental/evals/models/rate_limiters.py +0 -246
  310. phoenix/experimental/evals/models/vertex.py +0 -173
  311. phoenix/experimental/evals/models/vertexai.py +0 -186
  312. phoenix/experimental/evals/retrievals.py +0 -96
  313. phoenix/experimental/evals/templates/__init__.py +0 -50
  314. phoenix/experimental/evals/templates/default_templates.py +0 -472
  315. phoenix/experimental/evals/templates/template.py +0 -195
  316. phoenix/experimental/evals/utils/__init__.py +0 -172
  317. phoenix/experimental/evals/utils/threads.py +0 -27
  318. phoenix/server/api/helpers.py +0 -11
  319. phoenix/server/api/routers/evaluation_handler.py +0 -109
  320. phoenix/server/api/routers/span_handler.py +0 -70
  321. phoenix/server/api/routers/trace_handler.py +0 -60
  322. phoenix/server/api/types/DatasetRole.py +0 -23
  323. phoenix/server/static/index.css +0 -6
  324. phoenix/server/static/index.js +0 -7447
  325. phoenix/storage/span_store/__init__.py +0 -23
  326. phoenix/storage/span_store/text_file.py +0 -85
  327. phoenix/trace/dsl/missing.py +0 -60
  328. phoenix/trace/langchain/__init__.py +0 -3
  329. phoenix/trace/langchain/instrumentor.py +0 -35
  330. phoenix/trace/llama_index/__init__.py +0 -3
  331. phoenix/trace/llama_index/callback.py +0 -102
  332. phoenix/trace/openai/__init__.py +0 -3
  333. phoenix/trace/openai/instrumentor.py +0 -30
  334. {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.1.dist-info}/licenses/IP_NOTICE +0 -0
  335. {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.1.dist-info}/licenses/LICENSE +0 -0
  336. /phoenix/{datasets → db/insertion}/__init__.py +0 -0
  337. /phoenix/{experimental → db/migrations}/__init__.py +0 -0
  338. /phoenix/{storage → db/migrations/data_migration_scripts}/__init__.py +0 -0
@@ -0,0 +1,25 @@
1
+ import functools
2
+ from collections.abc import Callable
3
+ from typing import Any
4
+
5
+ from phoenix.config import get_web_base_url
6
+
7
+
8
+ def get_experiment_url(*, dataset_id: str, experiment_id: str) -> str:
9
+ return f"{get_web_base_url()}datasets/{dataset_id}/compare?experimentId={experiment_id}"
10
+
11
+
12
+ def get_dataset_experiments_url(*, dataset_id: str) -> str:
13
+ return f"{get_web_base_url()}datasets/{dataset_id}/experiments"
14
+
15
+
16
+ def get_func_name(fn: Callable[..., Any]) -> str:
17
+ """
18
+ Makes a best-effort attempt to get the name of the function.
19
+ """
20
+
21
+ if isinstance(fn, functools.partial):
22
+ return fn.func.__qualname__
23
+ if hasattr(fn, "__qualname__") and not fn.__qualname__.endswith("<lambda>"):
24
+ return fn.__qualname__.split(".<locals>.")[-1]
25
+ return str(fn)
File without changes
@@ -1,5 +1,6 @@
1
1
  from abc import abstractmethod
2
- from typing import Any, Iterable, List, Union
2
+ from collections.abc import Iterable
3
+ from typing import Any, Union
3
4
 
4
5
 
5
6
  class ValidationError(Exception):
@@ -57,8 +58,8 @@ class InvalidSchemaError(ValidationError):
57
58
  class DatasetError(Exception):
58
59
  """An error raised when the dataset is invalid or incomplete"""
59
60
 
60
- def __init__(self, errors: Union[ValidationError, List[ValidationError]]):
61
- self.errors: List[ValidationError] = errors if isinstance(errors, list) else [errors]
61
+ def __init__(self, errors: Union[ValidationError, list[ValidationError]]):
62
+ self.errors: list[ValidationError] = errors if isinstance(errors, list) else [errors]
62
63
 
63
64
  def __str__(self) -> str:
64
65
  return "\n".join(map(str, self.errors))
@@ -142,7 +143,7 @@ class EmbeddingVectorSizeMismatch(ValidationError):
142
143
  vector lengths"""
143
144
 
144
145
  def __init__(
145
- self, embedding_feature_name: str, vector_column_name: str, vector_lengths: List[int]
146
+ self, embedding_feature_name: str, vector_column_name: str, vector_lengths: list[int]
146
147
  ) -> None:
147
148
  self.embedding_feature_name = embedding_feature_name
148
149
  self.vector_column_name = vector_column_name
@@ -238,5 +239,5 @@ class MissingTimestampColumnName(ValidationError):
238
239
  class SchemaError(Exception):
239
240
  """An error raised when the Schema is invalid or incomplete"""
240
241
 
241
- def __init__(self, errors: Union[ValidationError, List[ValidationError]]):
242
+ def __init__(self, errors: Union[ValidationError, list[ValidationError]]):
242
243
  self.errors = errors
@@ -1,17 +1,18 @@
1
1
  import json
2
2
  import logging
3
+ from collections.abc import Iterator
3
4
  from dataclasses import dataclass, replace
4
5
  from enum import Enum, auto
5
6
  from pathlib import Path
6
- from typing import Iterator, NamedTuple, Optional, Tuple
7
+ from typing import NamedTuple, Optional
7
8
  from urllib import request
8
9
  from urllib.parse import quote, urljoin
9
10
 
10
11
  from pandas import read_parquet
11
12
 
12
- from phoenix.config import DATASET_DIR
13
- from phoenix.datasets.dataset import Dataset
14
- from phoenix.datasets.schema import (
13
+ from phoenix.config import INFERENCES_DIR
14
+ from phoenix.inferences.inferences import Inferences
15
+ from phoenix.inferences.schema import (
15
16
  EmbeddingColumnNames,
16
17
  RetrievalEmbeddingColumnNames,
17
18
  Schema,
@@ -20,7 +21,7 @@ from phoenix.datasets.schema import (
20
21
  logger = logging.getLogger(__name__)
21
22
 
22
23
 
23
- class DatasetRole(Enum):
24
+ class InferencesRole(Enum):
24
25
  PRIMARY = auto()
25
26
  REFERENCE = auto()
26
27
  CORPUS = auto()
@@ -39,11 +40,11 @@ class Fixture:
39
40
  corpus_file_name: Optional[str] = None
40
41
  corpus_schema: Optional[Schema] = None
41
42
 
42
- def paths(self) -> Iterator[Tuple[DatasetRole, Path]]:
43
+ def paths(self) -> Iterator[tuple[InferencesRole, Path]]:
43
44
  return (
44
45
  (role, Path(self.prefix) / name)
45
46
  for role, name in zip(
46
- DatasetRole,
47
+ InferencesRole,
47
48
  (
48
49
  self.primary_file_name,
49
50
  self.reference_file_name,
@@ -397,7 +398,7 @@ wikipedia_fixture = Fixture(
397
398
  corpus_file_name="corpus.parquet",
398
399
  )
399
400
 
400
- FIXTURES: Tuple[Fixture, ...] = (
401
+ FIXTURES: tuple[Fixture, ...] = (
401
402
  sentiment_classification_language_drift_fixture,
402
403
  image_classification_fixture,
403
404
  fashion_mnist_fixture,
@@ -413,47 +414,53 @@ FIXTURES: Tuple[Fixture, ...] = (
413
414
  NAME_TO_FIXTURE = {fixture.name: fixture for fixture in FIXTURES}
414
415
 
415
416
 
416
- def get_datasets(
417
+ def get_inferences(
417
418
  fixture_name: str,
418
419
  no_internet: bool = False,
419
- ) -> Tuple[Dataset, Optional[Dataset], Optional[Dataset]]:
420
+ ) -> tuple[Inferences, Optional[Inferences], Optional[Inferences]]:
420
421
  """
421
- Downloads primary and reference datasets for a fixture if they are not found
422
+ Downloads primary and reference inferences for a fixture if they are not found
422
423
  locally.
423
424
  """
424
- fixture = _get_fixture_by_name(fixture_name=fixture_name)
425
+ fixture = get_fixture_by_name(fixture_name=fixture_name)
425
426
  if no_internet:
426
- paths = {role: DATASET_DIR / path for role, path in fixture.paths()}
427
+ paths = {role: INFERENCES_DIR / path for role, path in fixture.paths()}
427
428
  else:
428
- paths = dict(_download(fixture, DATASET_DIR))
429
- primary_dataset = Dataset(
430
- read_parquet(paths[DatasetRole.PRIMARY]),
429
+ paths = dict(_download(fixture, INFERENCES_DIR))
430
+ primary_inferences = Inferences(
431
+ read_parquet(paths[InferencesRole.PRIMARY]),
431
432
  fixture.primary_schema,
432
433
  "production",
433
434
  )
434
- reference_dataset = None
435
+ reference_inferences = None
435
436
  if fixture.reference_file_name is not None:
436
- reference_dataset = Dataset(
437
- read_parquet(paths[DatasetRole.REFERENCE]),
438
- fixture.reference_schema
439
- if fixture.reference_schema is not None
440
- else fixture.primary_schema,
437
+ reference_inferences = Inferences(
438
+ read_parquet(paths[InferencesRole.REFERENCE]),
439
+ (
440
+ fixture.reference_schema
441
+ if fixture.reference_schema is not None
442
+ else fixture.primary_schema
443
+ ),
441
444
  "training",
442
445
  )
443
- corpus_dataset = None
446
+ corpus_inferences = None
444
447
  if fixture.corpus_file_name is not None:
445
- corpus_dataset = Dataset(
446
- read_parquet(paths[DatasetRole.CORPUS]),
448
+ corpus_inferences = Inferences(
449
+ read_parquet(paths[InferencesRole.CORPUS]),
447
450
  fixture.corpus_schema,
448
451
  "knowledge_base",
449
452
  )
450
- return primary_dataset, reference_dataset, corpus_dataset
453
+ return primary_inferences, reference_inferences, corpus_inferences
451
454
 
452
455
 
453
- def _get_fixture_by_name(fixture_name: str) -> Fixture:
456
+ def get_fixture_by_name(fixture_name: str) -> Fixture:
454
457
  """
455
- Returns the fixture whose name matches the input name. Raises a ValueError
456
- if the input fixture name does not match any known fixture names.
458
+ Returns the fixture whose name matches the input name.
459
+
460
+ Raises
461
+ ------
462
+ ValueError
463
+ if the input fixture name does not match any known fixture names.
457
464
  """
458
465
  if fixture_name not in NAME_TO_FIXTURE:
459
466
  valid_fixture_names = ", ".join(NAME_TO_FIXTURE.keys())
@@ -462,17 +469,17 @@ def _get_fixture_by_name(fixture_name: str) -> Fixture:
462
469
 
463
470
 
464
471
  @dataclass
465
- class ExampleDatasets:
472
+ class ExampleInferences:
466
473
  """
467
474
  A primary and optional reference dataset pair.
468
475
  """
469
476
 
470
- primary: Dataset
471
- reference: Optional[Dataset] = None
472
- corpus: Optional[Dataset] = None
477
+ primary: Inferences
478
+ reference: Optional[Inferences] = None
479
+ corpus: Optional[Inferences] = None
473
480
 
474
481
 
475
- def load_example(use_case: str) -> ExampleDatasets:
482
+ def load_example(use_case: str) -> ExampleInferences:
476
483
  """
477
484
  Loads an example primary and reference dataset for a given use-case.
478
485
 
@@ -495,15 +502,15 @@ def load_example(use_case: str) -> ExampleDatasets:
495
502
  reference).
496
503
 
497
504
  """
498
- fixture = _get_fixture_by_name(use_case)
499
- primary_dataset, reference_dataset, corpus_dataset = get_datasets(use_case)
505
+ fixture = get_fixture_by_name(use_case)
506
+ primary_inferences, reference_inferences, corpus_inferences = get_inferences(use_case)
500
507
  print(f"📥 Loaded {use_case} example datasets.")
501
508
  print("ℹ️ About this use-case:")
502
509
  print(fixture.description)
503
- return ExampleDatasets(
504
- primary=primary_dataset,
505
- reference=reference_dataset,
506
- corpus=corpus_dataset,
510
+ return ExampleInferences(
511
+ primary=primary_inferences,
512
+ reference=reference_inferences,
513
+ corpus=corpus_inferences,
507
514
  )
508
515
 
509
516
 
@@ -544,7 +551,7 @@ class GCSAssets(NamedTuple):
544
551
  )
545
552
 
546
553
 
547
- def _download(fixture: Fixture, location: Path) -> Iterator[Tuple[DatasetRole, Path]]:
554
+ def _download(fixture: Fixture, location: Path) -> Iterator[tuple[InferencesRole, Path]]:
548
555
  for role, path in fixture.paths():
549
556
  yield role, GCSAssets().metadata(path).save_artifact(location)
550
557
 
@@ -556,5 +563,5 @@ if __name__ == "__main__":
556
563
  for fixture in FIXTURES:
557
564
  start_time = time.time()
558
565
  print(f"getting {fixture.name}", end="...")
559
- dict(_download(fixture, DATASET_DIR))
566
+ dict(_download(fixture, INFERENCES_DIR))
560
567
  print(f"done ({time.time() - start_time:.2f}s)")
@@ -5,7 +5,7 @@ from copy import deepcopy
5
5
  from dataclasses import dataclass, fields, replace
6
6
  from enum import Enum
7
7
  from itertools import groupby
8
- from typing import Any, Dict, List, Optional, Set, Tuple, Union
8
+ from typing import Any, Optional, Union
9
9
 
10
10
  import numpy as np
11
11
  import pandas as pd
@@ -15,8 +15,9 @@ from pandas.api.types import (
15
15
  )
16
16
  from typing_extensions import TypeAlias
17
17
 
18
- from phoenix.config import DATASET_DIR, GENERATED_DATASET_NAME_PREFIX
18
+ from phoenix.config import GENERATED_INFERENCES_NAME_PREFIX, INFERENCES_DIR
19
19
  from phoenix.datetime_utils import normalize_timestamps
20
+ from phoenix.utilities.deprecation import deprecated
20
21
 
21
22
  from . import errors as err
22
23
  from .schema import (
@@ -30,7 +31,7 @@ from .schema import (
30
31
  SchemaFieldName,
31
32
  SchemaFieldValue,
32
33
  )
33
- from .validation import validate_dataset_inputs
34
+ from .validation import validate_inferences_inputs
34
35
 
35
36
  logger = logging.getLogger(__name__)
36
37
 
@@ -38,10 +39,16 @@ logger = logging.getLogger(__name__)
38
39
  SchemaLike: TypeAlias = Any
39
40
 
40
41
 
41
- class Dataset:
42
+ class Inferences:
42
43
  """
43
44
  A dataset to use for analysis using phoenix.
44
- Used to construct a phoenix session via px.launch_app
45
+ Used to construct a phoenix session via px.launch_app.
46
+
47
+ Typical usage example::
48
+
49
+ primary_inferences = px.Inferences(
50
+ dataframe=production_dataframe, schema=schema, name="primary"
51
+ )
45
52
 
46
53
  Parameters
47
54
  ----------
@@ -61,7 +68,15 @@ class Dataset:
61
68
 
62
69
  Examples
63
70
  --------
64
- >>> primary_dataset = px.Dataset(dataframe=production_dataframe, schema=schema, name="primary")
71
+ Define inferences ds from a pandas dataframe df and a schema object schema by running::
72
+
73
+ ds = px.Inferences(df, schema)
74
+
75
+ Alternatively, provide a name for the inferences that will appear in the application::
76
+
77
+ ds = px.Inferences(df, schema, name="training")
78
+
79
+ ds is then passed as the primary or reference argument to launch_app.
65
80
  """
66
81
 
67
82
  _data_file_name: str = "data.parquet"
@@ -78,7 +93,7 @@ class Dataset:
78
93
  # allow for schema like objects
79
94
  if not isinstance(schema, Schema):
80
95
  schema = _get_schema_from_unknown_schema_param(schema)
81
- errors = validate_dataset_inputs(
96
+ errors = validate_inferences_inputs(
82
97
  dataframe=dataframe,
83
98
  schema=schema,
84
99
  )
@@ -92,7 +107,7 @@ class Dataset:
92
107
  self.__dataframe: DataFrame = dataframe
93
108
  self.__schema: Schema = schema
94
109
  self.__name: str = (
95
- name if name is not None else f"{GENERATED_DATASET_NAME_PREFIX}{str(uuid.uuid4())}"
110
+ name if name is not None else f"{GENERATED_INFERENCES_NAME_PREFIX}{str(uuid.uuid4())}"
96
111
  )
97
112
  self._is_empty = self.dataframe.empty
98
113
  logger.info(f"""Dataset: {self.__name} initialized""")
@@ -113,19 +128,33 @@ class Dataset:
113
128
  return self.__name
114
129
 
115
130
  @classmethod
116
- def from_name(cls, name: str) -> "Dataset":
131
+ def from_name(cls, name: str) -> "Inferences":
117
132
  """Retrieves a dataset by name from the file system"""
118
- directory = DATASET_DIR / name
133
+ directory = INFERENCES_DIR / name
119
134
  df = read_parquet(directory / cls._data_file_name)
120
135
  with open(directory / cls._schema_file_name) as schema_file:
121
136
  schema_json = schema_file.read()
122
137
  schema = Schema.from_json(schema_json)
123
138
  return cls(df, schema, name)
124
139
 
140
+ def to_disc(self) -> None:
141
+ """writes the data and schema to disc"""
142
+ directory = INFERENCES_DIR / self.name
143
+ directory.mkdir(parents=True, exist_ok=True)
144
+ self.dataframe.to_parquet(
145
+ directory / self._data_file_name,
146
+ allow_truncated_timestamps=True,
147
+ coerce_timestamps="ms",
148
+ )
149
+ schema_json_data = self.schema.to_json()
150
+ with open(directory / self._schema_file_name, "w+") as schema_file:
151
+ schema_file.write(schema_json_data)
152
+
125
153
  @classmethod
126
- def from_open_inference(cls, dataframe: DataFrame) -> "Dataset":
154
+ @deprecated("Inferences.from_open_inference is deprecated and will be removed.")
155
+ def from_open_inference(cls, dataframe: DataFrame) -> "Inferences":
127
156
  schema = Schema()
128
- column_renaming: Dict[str, str] = {}
157
+ column_renaming: dict[str, str] = {}
129
158
  for group_name, group in groupby(
130
159
  sorted(
131
160
  map(_parse_open_inference_column_name, dataframe.columns),
@@ -276,21 +305,53 @@ class Dataset:
276
305
  schema,
277
306
  )
278
307
 
279
- def to_disc(self) -> None:
280
- """writes the data and schema to disc"""
281
- directory = DATASET_DIR / self.name
282
- directory.mkdir(parents=True, exist_ok=True)
283
- self.dataframe.to_parquet(
284
- directory / self._data_file_name,
285
- allow_truncated_timestamps=True,
286
- coerce_timestamps="ms",
308
+
309
+ class OpenInferenceCategory(Enum):
310
+ id = "id"
311
+ timestamp = "timestamp"
312
+ feature = "feature"
313
+ tag = "tag"
314
+ prediction = "prediction"
315
+ actual = "actual"
316
+
317
+
318
+ class OpenInferenceSpecifier(Enum):
319
+ default = ""
320
+ score = "score"
321
+ label = "label"
322
+ embedding = "embedding"
323
+ raw_data = "raw_data"
324
+ link_to_data = "link_to_data"
325
+ retrieved_document_ids = "retrieved_document_ids"
326
+ retrieved_document_scores = "retrieved_document_scores"
327
+
328
+
329
+ @dataclass(frozen=True)
330
+ class _OpenInferenceColumnName:
331
+ full_name: str
332
+ category: OpenInferenceCategory
333
+ data_type: str
334
+ specifier: OpenInferenceSpecifier = OpenInferenceSpecifier.default
335
+ name: str = ""
336
+
337
+
338
+ def _parse_open_inference_column_name(column_name: str) -> _OpenInferenceColumnName:
339
+ pattern = (
340
+ r"^:(?P<category>\w+)\.(?P<data_type>\[\w+\]|\w+)(\.(?P<specifier>\w+))?:(?P<name>.*)?$"
341
+ )
342
+ if match := re.match(pattern, column_name):
343
+ extract = match.groupdict(default="")
344
+ return _OpenInferenceColumnName(
345
+ full_name=column_name,
346
+ category=OpenInferenceCategory(extract.get("category", "").lower()),
347
+ data_type=extract.get("data_type", "").lower(),
348
+ specifier=OpenInferenceSpecifier(extract.get("specifier", "").lower()),
349
+ name=extract.get("name", ""),
287
350
  )
288
- schema_json_data = self.schema.to_json()
289
- with open(directory / self._schema_file_name, "w+") as schema_file:
290
- schema_file.write(schema_json_data)
351
+ raise ValueError(f"Invalid format for column name: {column_name}")
291
352
 
292
353
 
293
- def _parse_dataframe_and_schema(dataframe: DataFrame, schema: Schema) -> Tuple[DataFrame, Schema]:
354
+ def _parse_dataframe_and_schema(dataframe: DataFrame, schema: Schema) -> tuple[DataFrame, Schema]:
294
355
  """
295
356
  Parses a dataframe according to a schema, infers feature columns names when
296
357
  they are not explicitly provided, and removes excluded column names from
@@ -303,12 +364,12 @@ def _parse_dataframe_and_schema(dataframe: DataFrame, schema: Schema) -> Tuple[D
303
364
  names present in the dataframe but not included in any other schema fields.
304
365
  """
305
366
 
306
- unseen_excluded_column_names: Set[str] = (
367
+ unseen_excluded_column_names: set[str] = (
307
368
  set(schema.excluded_column_names) if schema.excluded_column_names is not None else set()
308
369
  )
309
- unseen_column_names: Set[str] = set(dataframe.columns.to_list())
310
- column_name_to_include: Dict[str, bool] = {}
311
- schema_patch: Dict[SchemaFieldName, SchemaFieldValue] = {}
370
+ unseen_column_names: set[str] = set(dataframe.columns.to_list())
371
+ column_name_to_include: dict[str, bool] = {}
372
+ schema_patch: dict[SchemaFieldName, SchemaFieldValue] = {}
312
373
 
313
374
  for schema_field_name in SINGLE_COLUMN_SCHEMA_FIELD_NAMES:
314
375
  _check_single_column_schema_field_for_excluded_columns(
@@ -373,10 +434,10 @@ def _parse_dataframe_and_schema(dataframe: DataFrame, schema: Schema) -> Tuple[D
373
434
  def _check_single_column_schema_field_for_excluded_columns(
374
435
  schema: Schema,
375
436
  schema_field_name: str,
376
- unseen_excluded_column_names: Set[str],
377
- schema_patch: Dict[SchemaFieldName, SchemaFieldValue],
378
- column_name_to_include: Dict[str, bool],
379
- unseen_column_names: Set[str],
437
+ unseen_excluded_column_names: set[str],
438
+ schema_patch: dict[SchemaFieldName, SchemaFieldValue],
439
+ column_name_to_include: dict[str, bool],
440
+ unseen_column_names: set[str],
380
441
  ) -> None:
381
442
  """
382
443
  Checks single-column schema fields for excluded column names.
@@ -394,18 +455,18 @@ def _check_single_column_schema_field_for_excluded_columns(
394
455
  def _check_multi_column_schema_field_for_excluded_columns(
395
456
  schema: Schema,
396
457
  schema_field_name: str,
397
- unseen_excluded_column_names: Set[str],
398
- schema_patch: Dict[SchemaFieldName, SchemaFieldValue],
399
- column_name_to_include: Dict[str, bool],
400
- unseen_column_names: Set[str],
458
+ unseen_excluded_column_names: set[str],
459
+ schema_patch: dict[SchemaFieldName, SchemaFieldValue],
460
+ column_name_to_include: dict[str, bool],
461
+ unseen_column_names: set[str],
401
462
  ) -> None:
402
463
  """
403
464
  Checks multi-column schema fields for excluded columns names.
404
465
  """
405
- column_names: Optional[List[str]] = getattr(schema, schema_field_name)
466
+ column_names: Optional[list[str]] = getattr(schema, schema_field_name)
406
467
  if column_names:
407
- included_column_names: List[str] = []
408
- excluded_column_names: List[str] = []
468
+ included_column_names: list[str] = []
469
+ excluded_column_names: list[str] = []
409
470
  for column_name in column_names:
410
471
  is_included_column = column_name not in unseen_excluded_column_names
411
472
  column_name_to_include[column_name] = is_included_column
@@ -421,10 +482,10 @@ def _check_multi_column_schema_field_for_excluded_columns(
421
482
 
422
483
  def _check_embedding_features_schema_field_for_excluded_columns(
423
484
  embedding_features: EmbeddingFeatures,
424
- unseen_excluded_column_names: Set[str],
425
- schema_patch: Dict[SchemaFieldName, SchemaFieldValue],
426
- column_name_to_include: Dict[str, bool],
427
- unseen_column_names: Set[str],
485
+ unseen_excluded_column_names: set[str],
486
+ schema_patch: dict[SchemaFieldName, SchemaFieldValue],
487
+ column_name_to_include: dict[str, bool],
488
+ unseen_column_names: set[str],
428
489
  ) -> None:
429
490
  """
430
491
  Check embedding features for excluded column names.
@@ -466,8 +527,8 @@ def _check_embedding_features_schema_field_for_excluded_columns(
466
527
 
467
528
  def _check_embedding_column_names_for_excluded_columns(
468
529
  embedding_column_name_mapping: EmbeddingColumnNames,
469
- column_name_to_include: Dict[str, bool],
470
- unseen_column_names: Set[str],
530
+ column_name_to_include: dict[str, bool],
531
+ unseen_column_names: set[str],
471
532
  ) -> None:
472
533
  """
473
534
  Check embedding column names for excluded column names.
@@ -481,10 +542,10 @@ def _check_embedding_column_names_for_excluded_columns(
481
542
 
482
543
  def _discover_feature_columns(
483
544
  dataframe: DataFrame,
484
- unseen_excluded_column_names: Set[str],
485
- schema_patch: Dict[SchemaFieldName, SchemaFieldValue],
486
- column_name_to_include: Dict[str, bool],
487
- unseen_column_names: Set[str],
545
+ unseen_excluded_column_names: set[str],
546
+ schema_patch: dict[SchemaFieldName, SchemaFieldValue],
547
+ column_name_to_include: dict[str, bool],
548
+ unseen_column_names: set[str],
488
549
  ) -> None:
489
550
  """
490
551
  Adds unseen and un-excluded columns as features, with the exception of "prediction_id"
@@ -498,10 +559,10 @@ def _discover_feature_columns(
498
559
  else:
499
560
  unseen_excluded_column_names.discard(column_name)
500
561
  logger.debug(f"excluded feature: {column_name}")
501
- original_column_positions: List[int] = dataframe.columns.get_indexer(
562
+ original_column_positions: list[int] = dataframe.columns.get_indexer(
502
563
  discovered_feature_column_names
503
564
  ) # type: ignore
504
- feature_column_name_to_position: Dict[str, int] = dict(
565
+ feature_column_name_to_position: dict[str, int] = dict(
505
566
  zip(discovered_feature_column_names, original_column_positions)
506
567
  )
507
568
  discovered_feature_column_names.sort(key=lambda col: feature_column_name_to_position[col])
@@ -514,16 +575,16 @@ def _discover_feature_columns(
514
575
  def _create_and_normalize_dataframe_and_schema(
515
576
  dataframe: DataFrame,
516
577
  schema: Schema,
517
- schema_patch: Dict[SchemaFieldName, SchemaFieldValue],
518
- column_name_to_include: Dict[str, bool],
519
- ) -> Tuple[DataFrame, Schema]:
578
+ schema_patch: dict[SchemaFieldName, SchemaFieldValue],
579
+ column_name_to_include: dict[str, bool],
580
+ ) -> tuple[DataFrame, Schema]:
520
581
  """
521
582
  Creates new dataframe and schema objects to reflect excluded column names
522
583
  and discovered features. This also normalizes dataframe columns to ensure a
523
584
  standard set of columns (i.e. timestamp and prediction_id) and datatypes for
524
585
  those columns.
525
586
  """
526
- included_column_names: List[str] = []
587
+ included_column_names: list[str] = []
527
588
  for column_name in dataframe.columns:
528
589
  if column_name_to_include.get(str(column_name), False):
529
590
  included_column_names.append(str(column_name))
@@ -587,7 +648,7 @@ def _normalize_timestamps(
587
648
  dataframe: DataFrame,
588
649
  schema: Schema,
589
650
  default_timestamp: Timestamp,
590
- ) -> Tuple[DataFrame, Schema]:
651
+ ) -> tuple[DataFrame, Schema]:
591
652
  """
592
653
  Ensures that the dataframe has a timestamp column and the schema has a timestamp field. If the
593
654
  input dataframe contains a Unix or datetime timestamp or ISO8601 timestamp strings column, it
@@ -614,7 +675,7 @@ def _normalize_timestamps(
614
675
 
615
676
  def _get_schema_from_unknown_schema_param(schemaLike: SchemaLike) -> Schema:
616
677
  """
617
- Compatibility function for converting from arize.utils.types.Schema to phoenix.datasets.Schema
678
+ Compatibility function for converting from arize.utils.types.Schema to phoenix.inferences.Schema
618
679
  """
619
680
  try:
620
681
  from arize.utils.types import (
@@ -625,7 +686,7 @@ def _get_schema_from_unknown_schema_param(schemaLike: SchemaLike) -> Schema:
625
686
  if not isinstance(schemaLike, ArizeSchema):
626
687
  raise ValueError("Unknown schema passed to Dataset. Please pass a phoenix Schema")
627
688
 
628
- embedding_feature_column_names: Dict[str, EmbeddingColumnNames] = {}
689
+ embedding_feature_column_names: dict[str, EmbeddingColumnNames] = {}
629
690
  if schemaLike.embedding_feature_column_names is not None:
630
691
  for (
631
692
  embedding_name,
@@ -673,54 +734,9 @@ def _get_schema_from_unknown_schema_param(schemaLike: SchemaLike) -> Schema:
673
734
  )
674
735
 
675
736
 
676
- def _add_prediction_id(num_rows: int) -> List[str]:
737
+ def _add_prediction_id(num_rows: int) -> list[str]:
677
738
  return [str(uuid.uuid4()) for _ in range(num_rows)]
678
739
 
679
740
 
680
- class OpenInferenceCategory(Enum):
681
- id = "id"
682
- timestamp = "timestamp"
683
- feature = "feature"
684
- tag = "tag"
685
- prediction = "prediction"
686
- actual = "actual"
687
-
688
-
689
- class OpenInferenceSpecifier(Enum):
690
- default = ""
691
- score = "score"
692
- label = "label"
693
- embedding = "embedding"
694
- raw_data = "raw_data"
695
- link_to_data = "link_to_data"
696
- retrieved_document_ids = "retrieved_document_ids"
697
- retrieved_document_scores = "retrieved_document_scores"
698
-
699
-
700
- @dataclass(frozen=True)
701
- class _OpenInferenceColumnName:
702
- full_name: str
703
- category: OpenInferenceCategory
704
- data_type: str
705
- specifier: OpenInferenceSpecifier = OpenInferenceSpecifier.default
706
- name: str = ""
707
-
708
-
709
- def _parse_open_inference_column_name(column_name: str) -> _OpenInferenceColumnName:
710
- pattern = (
711
- r"^:(?P<category>\w+)\.(?P<data_type>\[\w+\]|\w+)(\.(?P<specifier>\w+))?:(?P<name>.*)?$"
712
- )
713
- if match := re.match(pattern, column_name):
714
- extract = match.groupdict(default="")
715
- return _OpenInferenceColumnName(
716
- full_name=column_name,
717
- category=OpenInferenceCategory(extract.get("category", "").lower()),
718
- data_type=extract.get("data_type", "").lower(),
719
- specifier=OpenInferenceSpecifier(extract.get("specifier", "").lower()),
720
- name=extract.get("name", ""),
721
- )
722
- raise ValueError(f"Invalid format for column name: {column_name}")
723
-
724
-
725
741
  # A dataset with no data. Useful for stubs
726
- EMPTY_DATASET = Dataset(pd.DataFrame(), schema=Schema())
742
+ EMPTY_INFERENCES = Inferences(pd.DataFrame(), schema=Schema())