arize-phoenix 3.16.0__py3-none-any.whl → 7.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (338) hide show
  1. arize_phoenix-7.7.0.dist-info/METADATA +261 -0
  2. arize_phoenix-7.7.0.dist-info/RECORD +345 -0
  3. {arize_phoenix-3.16.0.dist-info → arize_phoenix-7.7.0.dist-info}/WHEEL +1 -1
  4. arize_phoenix-7.7.0.dist-info/entry_points.txt +3 -0
  5. phoenix/__init__.py +86 -14
  6. phoenix/auth.py +309 -0
  7. phoenix/config.py +675 -45
  8. phoenix/core/model.py +32 -30
  9. phoenix/core/model_schema.py +102 -109
  10. phoenix/core/model_schema_adapter.py +48 -45
  11. phoenix/datetime_utils.py +24 -3
  12. phoenix/db/README.md +54 -0
  13. phoenix/db/__init__.py +4 -0
  14. phoenix/db/alembic.ini +85 -0
  15. phoenix/db/bulk_inserter.py +294 -0
  16. phoenix/db/engines.py +208 -0
  17. phoenix/db/enums.py +20 -0
  18. phoenix/db/facilitator.py +113 -0
  19. phoenix/db/helpers.py +159 -0
  20. phoenix/db/insertion/constants.py +2 -0
  21. phoenix/db/insertion/dataset.py +227 -0
  22. phoenix/db/insertion/document_annotation.py +171 -0
  23. phoenix/db/insertion/evaluation.py +191 -0
  24. phoenix/db/insertion/helpers.py +98 -0
  25. phoenix/db/insertion/span.py +193 -0
  26. phoenix/db/insertion/span_annotation.py +158 -0
  27. phoenix/db/insertion/trace_annotation.py +158 -0
  28. phoenix/db/insertion/types.py +256 -0
  29. phoenix/db/migrate.py +86 -0
  30. phoenix/db/migrations/data_migration_scripts/populate_project_sessions.py +199 -0
  31. phoenix/db/migrations/env.py +114 -0
  32. phoenix/db/migrations/script.py.mako +26 -0
  33. phoenix/db/migrations/versions/10460e46d750_datasets.py +317 -0
  34. phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py +126 -0
  35. phoenix/db/migrations/versions/4ded9e43755f_create_project_sessions_table.py +66 -0
  36. phoenix/db/migrations/versions/cd164e83824f_users_and_tokens.py +157 -0
  37. phoenix/db/migrations/versions/cf03bd6bae1d_init.py +280 -0
  38. phoenix/db/models.py +807 -0
  39. phoenix/exceptions.py +5 -1
  40. phoenix/experiments/__init__.py +6 -0
  41. phoenix/experiments/evaluators/__init__.py +29 -0
  42. phoenix/experiments/evaluators/base.py +158 -0
  43. phoenix/experiments/evaluators/code_evaluators.py +184 -0
  44. phoenix/experiments/evaluators/llm_evaluators.py +473 -0
  45. phoenix/experiments/evaluators/utils.py +236 -0
  46. phoenix/experiments/functions.py +772 -0
  47. phoenix/experiments/tracing.py +86 -0
  48. phoenix/experiments/types.py +726 -0
  49. phoenix/experiments/utils.py +25 -0
  50. phoenix/inferences/__init__.py +0 -0
  51. phoenix/{datasets → inferences}/errors.py +6 -5
  52. phoenix/{datasets → inferences}/fixtures.py +49 -42
  53. phoenix/{datasets/dataset.py → inferences/inferences.py} +121 -105
  54. phoenix/{datasets → inferences}/schema.py +11 -11
  55. phoenix/{datasets → inferences}/validation.py +13 -14
  56. phoenix/logging/__init__.py +3 -0
  57. phoenix/logging/_config.py +90 -0
  58. phoenix/logging/_filter.py +6 -0
  59. phoenix/logging/_formatter.py +69 -0
  60. phoenix/metrics/__init__.py +5 -4
  61. phoenix/metrics/binning.py +4 -3
  62. phoenix/metrics/metrics.py +2 -1
  63. phoenix/metrics/mixins.py +7 -6
  64. phoenix/metrics/retrieval_metrics.py +2 -1
  65. phoenix/metrics/timeseries.py +5 -4
  66. phoenix/metrics/wrappers.py +9 -3
  67. phoenix/pointcloud/clustering.py +5 -5
  68. phoenix/pointcloud/pointcloud.py +7 -5
  69. phoenix/pointcloud/projectors.py +5 -6
  70. phoenix/pointcloud/umap_parameters.py +53 -52
  71. phoenix/server/api/README.md +28 -0
  72. phoenix/server/api/auth.py +44 -0
  73. phoenix/server/api/context.py +152 -9
  74. phoenix/server/api/dataloaders/__init__.py +91 -0
  75. phoenix/server/api/dataloaders/annotation_summaries.py +139 -0
  76. phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
  77. phoenix/server/api/dataloaders/cache/__init__.py +3 -0
  78. phoenix/server/api/dataloaders/cache/two_tier_cache.py +68 -0
  79. phoenix/server/api/dataloaders/dataset_example_revisions.py +131 -0
  80. phoenix/server/api/dataloaders/dataset_example_spans.py +38 -0
  81. phoenix/server/api/dataloaders/document_evaluation_summaries.py +144 -0
  82. phoenix/server/api/dataloaders/document_evaluations.py +31 -0
  83. phoenix/server/api/dataloaders/document_retrieval_metrics.py +89 -0
  84. phoenix/server/api/dataloaders/experiment_annotation_summaries.py +79 -0
  85. phoenix/server/api/dataloaders/experiment_error_rates.py +58 -0
  86. phoenix/server/api/dataloaders/experiment_run_annotations.py +36 -0
  87. phoenix/server/api/dataloaders/experiment_run_counts.py +49 -0
  88. phoenix/server/api/dataloaders/experiment_sequence_number.py +44 -0
  89. phoenix/server/api/dataloaders/latency_ms_quantile.py +188 -0
  90. phoenix/server/api/dataloaders/min_start_or_max_end_times.py +85 -0
  91. phoenix/server/api/dataloaders/project_by_name.py +31 -0
  92. phoenix/server/api/dataloaders/record_counts.py +116 -0
  93. phoenix/server/api/dataloaders/session_io.py +79 -0
  94. phoenix/server/api/dataloaders/session_num_traces.py +30 -0
  95. phoenix/server/api/dataloaders/session_num_traces_with_error.py +32 -0
  96. phoenix/server/api/dataloaders/session_token_usages.py +41 -0
  97. phoenix/server/api/dataloaders/session_trace_latency_ms_quantile.py +55 -0
  98. phoenix/server/api/dataloaders/span_annotations.py +26 -0
  99. phoenix/server/api/dataloaders/span_dataset_examples.py +31 -0
  100. phoenix/server/api/dataloaders/span_descendants.py +57 -0
  101. phoenix/server/api/dataloaders/span_projects.py +33 -0
  102. phoenix/server/api/dataloaders/token_counts.py +124 -0
  103. phoenix/server/api/dataloaders/trace_by_trace_ids.py +25 -0
  104. phoenix/server/api/dataloaders/trace_root_spans.py +32 -0
  105. phoenix/server/api/dataloaders/user_roles.py +30 -0
  106. phoenix/server/api/dataloaders/users.py +33 -0
  107. phoenix/server/api/exceptions.py +48 -0
  108. phoenix/server/api/helpers/__init__.py +12 -0
  109. phoenix/server/api/helpers/dataset_helpers.py +217 -0
  110. phoenix/server/api/helpers/experiment_run_filters.py +763 -0
  111. phoenix/server/api/helpers/playground_clients.py +948 -0
  112. phoenix/server/api/helpers/playground_registry.py +70 -0
  113. phoenix/server/api/helpers/playground_spans.py +455 -0
  114. phoenix/server/api/input_types/AddExamplesToDatasetInput.py +16 -0
  115. phoenix/server/api/input_types/AddSpansToDatasetInput.py +14 -0
  116. phoenix/server/api/input_types/ChatCompletionInput.py +38 -0
  117. phoenix/server/api/input_types/ChatCompletionMessageInput.py +24 -0
  118. phoenix/server/api/input_types/ClearProjectInput.py +15 -0
  119. phoenix/server/api/input_types/ClusterInput.py +2 -2
  120. phoenix/server/api/input_types/CreateDatasetInput.py +12 -0
  121. phoenix/server/api/input_types/CreateSpanAnnotationInput.py +18 -0
  122. phoenix/server/api/input_types/CreateTraceAnnotationInput.py +18 -0
  123. phoenix/server/api/input_types/DataQualityMetricInput.py +5 -2
  124. phoenix/server/api/input_types/DatasetExampleInput.py +14 -0
  125. phoenix/server/api/input_types/DatasetSort.py +17 -0
  126. phoenix/server/api/input_types/DatasetVersionSort.py +16 -0
  127. phoenix/server/api/input_types/DeleteAnnotationsInput.py +7 -0
  128. phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +13 -0
  129. phoenix/server/api/input_types/DeleteDatasetInput.py +7 -0
  130. phoenix/server/api/input_types/DeleteExperimentsInput.py +7 -0
  131. phoenix/server/api/input_types/DimensionFilter.py +4 -4
  132. phoenix/server/api/input_types/GenerativeModelInput.py +17 -0
  133. phoenix/server/api/input_types/Granularity.py +1 -1
  134. phoenix/server/api/input_types/InvocationParameters.py +162 -0
  135. phoenix/server/api/input_types/PatchAnnotationInput.py +19 -0
  136. phoenix/server/api/input_types/PatchDatasetExamplesInput.py +35 -0
  137. phoenix/server/api/input_types/PatchDatasetInput.py +14 -0
  138. phoenix/server/api/input_types/PerformanceMetricInput.py +5 -2
  139. phoenix/server/api/input_types/ProjectSessionSort.py +29 -0
  140. phoenix/server/api/input_types/SpanAnnotationSort.py +17 -0
  141. phoenix/server/api/input_types/SpanSort.py +134 -69
  142. phoenix/server/api/input_types/TemplateOptions.py +10 -0
  143. phoenix/server/api/input_types/TraceAnnotationSort.py +17 -0
  144. phoenix/server/api/input_types/UserRoleInput.py +9 -0
  145. phoenix/server/api/mutations/__init__.py +28 -0
  146. phoenix/server/api/mutations/api_key_mutations.py +167 -0
  147. phoenix/server/api/mutations/chat_mutations.py +593 -0
  148. phoenix/server/api/mutations/dataset_mutations.py +591 -0
  149. phoenix/server/api/mutations/experiment_mutations.py +75 -0
  150. phoenix/server/api/{types/ExportEventsMutation.py → mutations/export_events_mutations.py} +21 -18
  151. phoenix/server/api/mutations/project_mutations.py +57 -0
  152. phoenix/server/api/mutations/span_annotations_mutations.py +128 -0
  153. phoenix/server/api/mutations/trace_annotations_mutations.py +127 -0
  154. phoenix/server/api/mutations/user_mutations.py +329 -0
  155. phoenix/server/api/openapi/__init__.py +0 -0
  156. phoenix/server/api/openapi/main.py +17 -0
  157. phoenix/server/api/openapi/schema.py +16 -0
  158. phoenix/server/api/queries.py +738 -0
  159. phoenix/server/api/routers/__init__.py +11 -0
  160. phoenix/server/api/routers/auth.py +284 -0
  161. phoenix/server/api/routers/embeddings.py +26 -0
  162. phoenix/server/api/routers/oauth2.py +488 -0
  163. phoenix/server/api/routers/v1/__init__.py +64 -0
  164. phoenix/server/api/routers/v1/datasets.py +1017 -0
  165. phoenix/server/api/routers/v1/evaluations.py +362 -0
  166. phoenix/server/api/routers/v1/experiment_evaluations.py +115 -0
  167. phoenix/server/api/routers/v1/experiment_runs.py +167 -0
  168. phoenix/server/api/routers/v1/experiments.py +308 -0
  169. phoenix/server/api/routers/v1/pydantic_compat.py +78 -0
  170. phoenix/server/api/routers/v1/spans.py +267 -0
  171. phoenix/server/api/routers/v1/traces.py +208 -0
  172. phoenix/server/api/routers/v1/utils.py +95 -0
  173. phoenix/server/api/schema.py +44 -247
  174. phoenix/server/api/subscriptions.py +597 -0
  175. phoenix/server/api/types/Annotation.py +21 -0
  176. phoenix/server/api/types/AnnotationSummary.py +55 -0
  177. phoenix/server/api/types/AnnotatorKind.py +16 -0
  178. phoenix/server/api/types/ApiKey.py +27 -0
  179. phoenix/server/api/types/AuthMethod.py +9 -0
  180. phoenix/server/api/types/ChatCompletionMessageRole.py +11 -0
  181. phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +46 -0
  182. phoenix/server/api/types/Cluster.py +25 -24
  183. phoenix/server/api/types/CreateDatasetPayload.py +8 -0
  184. phoenix/server/api/types/DataQualityMetric.py +31 -13
  185. phoenix/server/api/types/Dataset.py +288 -63
  186. phoenix/server/api/types/DatasetExample.py +85 -0
  187. phoenix/server/api/types/DatasetExampleRevision.py +34 -0
  188. phoenix/server/api/types/DatasetVersion.py +14 -0
  189. phoenix/server/api/types/Dimension.py +32 -31
  190. phoenix/server/api/types/DocumentEvaluationSummary.py +9 -8
  191. phoenix/server/api/types/EmbeddingDimension.py +56 -49
  192. phoenix/server/api/types/Evaluation.py +25 -31
  193. phoenix/server/api/types/EvaluationSummary.py +30 -50
  194. phoenix/server/api/types/Event.py +20 -20
  195. phoenix/server/api/types/ExampleRevisionInterface.py +14 -0
  196. phoenix/server/api/types/Experiment.py +152 -0
  197. phoenix/server/api/types/ExperimentAnnotationSummary.py +13 -0
  198. phoenix/server/api/types/ExperimentComparison.py +17 -0
  199. phoenix/server/api/types/ExperimentRun.py +119 -0
  200. phoenix/server/api/types/ExperimentRunAnnotation.py +56 -0
  201. phoenix/server/api/types/GenerativeModel.py +9 -0
  202. phoenix/server/api/types/GenerativeProvider.py +85 -0
  203. phoenix/server/api/types/Inferences.py +80 -0
  204. phoenix/server/api/types/InferencesRole.py +23 -0
  205. phoenix/server/api/types/LabelFraction.py +7 -0
  206. phoenix/server/api/types/MimeType.py +2 -2
  207. phoenix/server/api/types/Model.py +54 -54
  208. phoenix/server/api/types/PerformanceMetric.py +8 -5
  209. phoenix/server/api/types/Project.py +407 -142
  210. phoenix/server/api/types/ProjectSession.py +139 -0
  211. phoenix/server/api/types/Segments.py +4 -4
  212. phoenix/server/api/types/Span.py +221 -176
  213. phoenix/server/api/types/SpanAnnotation.py +43 -0
  214. phoenix/server/api/types/SpanIOValue.py +15 -0
  215. phoenix/server/api/types/SystemApiKey.py +9 -0
  216. phoenix/server/api/types/TemplateLanguage.py +10 -0
  217. phoenix/server/api/types/TimeSeries.py +19 -15
  218. phoenix/server/api/types/TokenUsage.py +11 -0
  219. phoenix/server/api/types/Trace.py +154 -0
  220. phoenix/server/api/types/TraceAnnotation.py +45 -0
  221. phoenix/server/api/types/UMAPPoints.py +7 -7
  222. phoenix/server/api/types/User.py +60 -0
  223. phoenix/server/api/types/UserApiKey.py +45 -0
  224. phoenix/server/api/types/UserRole.py +15 -0
  225. phoenix/server/api/types/node.py +13 -107
  226. phoenix/server/api/types/pagination.py +156 -57
  227. phoenix/server/api/utils.py +34 -0
  228. phoenix/server/app.py +864 -115
  229. phoenix/server/bearer_auth.py +163 -0
  230. phoenix/server/dml_event.py +136 -0
  231. phoenix/server/dml_event_handler.py +256 -0
  232. phoenix/server/email/__init__.py +0 -0
  233. phoenix/server/email/sender.py +97 -0
  234. phoenix/server/email/templates/__init__.py +0 -0
  235. phoenix/server/email/templates/password_reset.html +19 -0
  236. phoenix/server/email/types.py +11 -0
  237. phoenix/server/grpc_server.py +102 -0
  238. phoenix/server/jwt_store.py +505 -0
  239. phoenix/server/main.py +305 -116
  240. phoenix/server/oauth2.py +52 -0
  241. phoenix/server/openapi/__init__.py +0 -0
  242. phoenix/server/prometheus.py +111 -0
  243. phoenix/server/rate_limiters.py +188 -0
  244. phoenix/server/static/.vite/manifest.json +87 -0
  245. phoenix/server/static/assets/components-Cy9nwIvF.js +2125 -0
  246. phoenix/server/static/assets/index-BKvHIxkk.js +113 -0
  247. phoenix/server/static/assets/pages-CUi2xCVQ.js +4449 -0
  248. phoenix/server/static/assets/vendor-DvC8cT4X.js +894 -0
  249. phoenix/server/static/assets/vendor-DxkFTwjz.css +1 -0
  250. phoenix/server/static/assets/vendor-arizeai-Do1793cv.js +662 -0
  251. phoenix/server/static/assets/vendor-codemirror-BzwZPyJM.js +24 -0
  252. phoenix/server/static/assets/vendor-recharts-_Jb7JjhG.js +59 -0
  253. phoenix/server/static/assets/vendor-shiki-Cl9QBraO.js +5 -0
  254. phoenix/server/static/assets/vendor-three-DwGkEfCM.js +2998 -0
  255. phoenix/server/telemetry.py +68 -0
  256. phoenix/server/templates/index.html +82 -23
  257. phoenix/server/thread_server.py +3 -3
  258. phoenix/server/types.py +275 -0
  259. phoenix/services.py +27 -18
  260. phoenix/session/client.py +743 -68
  261. phoenix/session/data_extractor.py +31 -7
  262. phoenix/session/evaluation.py +3 -9
  263. phoenix/session/session.py +263 -219
  264. phoenix/settings.py +22 -0
  265. phoenix/trace/__init__.py +2 -22
  266. phoenix/trace/attributes.py +338 -0
  267. phoenix/trace/dsl/README.md +116 -0
  268. phoenix/trace/dsl/filter.py +663 -213
  269. phoenix/trace/dsl/helpers.py +73 -21
  270. phoenix/trace/dsl/query.py +574 -201
  271. phoenix/trace/exporter.py +24 -19
  272. phoenix/trace/fixtures.py +368 -32
  273. phoenix/trace/otel.py +71 -219
  274. phoenix/trace/projects.py +3 -2
  275. phoenix/trace/schemas.py +33 -11
  276. phoenix/trace/span_evaluations.py +21 -16
  277. phoenix/trace/span_json_decoder.py +6 -4
  278. phoenix/trace/span_json_encoder.py +2 -2
  279. phoenix/trace/trace_dataset.py +47 -32
  280. phoenix/trace/utils.py +21 -4
  281. phoenix/utilities/__init__.py +0 -26
  282. phoenix/utilities/client.py +132 -0
  283. phoenix/utilities/deprecation.py +31 -0
  284. phoenix/utilities/error_handling.py +3 -2
  285. phoenix/utilities/json.py +109 -0
  286. phoenix/utilities/logging.py +8 -0
  287. phoenix/utilities/project.py +2 -2
  288. phoenix/utilities/re.py +49 -0
  289. phoenix/utilities/span_store.py +0 -23
  290. phoenix/utilities/template_formatters.py +99 -0
  291. phoenix/version.py +1 -1
  292. arize_phoenix-3.16.0.dist-info/METADATA +0 -495
  293. arize_phoenix-3.16.0.dist-info/RECORD +0 -178
  294. phoenix/core/project.py +0 -617
  295. phoenix/core/traces.py +0 -100
  296. phoenix/experimental/evals/__init__.py +0 -73
  297. phoenix/experimental/evals/evaluators.py +0 -413
  298. phoenix/experimental/evals/functions/__init__.py +0 -4
  299. phoenix/experimental/evals/functions/classify.py +0 -453
  300. phoenix/experimental/evals/functions/executor.py +0 -353
  301. phoenix/experimental/evals/functions/generate.py +0 -138
  302. phoenix/experimental/evals/functions/processing.py +0 -76
  303. phoenix/experimental/evals/models/__init__.py +0 -14
  304. phoenix/experimental/evals/models/anthropic.py +0 -175
  305. phoenix/experimental/evals/models/base.py +0 -170
  306. phoenix/experimental/evals/models/bedrock.py +0 -221
  307. phoenix/experimental/evals/models/litellm.py +0 -134
  308. phoenix/experimental/evals/models/openai.py +0 -448
  309. phoenix/experimental/evals/models/rate_limiters.py +0 -246
  310. phoenix/experimental/evals/models/vertex.py +0 -173
  311. phoenix/experimental/evals/models/vertexai.py +0 -186
  312. phoenix/experimental/evals/retrievals.py +0 -96
  313. phoenix/experimental/evals/templates/__init__.py +0 -50
  314. phoenix/experimental/evals/templates/default_templates.py +0 -472
  315. phoenix/experimental/evals/templates/template.py +0 -195
  316. phoenix/experimental/evals/utils/__init__.py +0 -172
  317. phoenix/experimental/evals/utils/threads.py +0 -27
  318. phoenix/server/api/helpers.py +0 -11
  319. phoenix/server/api/routers/evaluation_handler.py +0 -109
  320. phoenix/server/api/routers/span_handler.py +0 -70
  321. phoenix/server/api/routers/trace_handler.py +0 -60
  322. phoenix/server/api/types/DatasetRole.py +0 -23
  323. phoenix/server/static/index.css +0 -6
  324. phoenix/server/static/index.js +0 -7447
  325. phoenix/storage/span_store/__init__.py +0 -23
  326. phoenix/storage/span_store/text_file.py +0 -85
  327. phoenix/trace/dsl/missing.py +0 -60
  328. phoenix/trace/langchain/__init__.py +0 -3
  329. phoenix/trace/langchain/instrumentor.py +0 -35
  330. phoenix/trace/llama_index/__init__.py +0 -3
  331. phoenix/trace/llama_index/callback.py +0 -102
  332. phoenix/trace/openai/__init__.py +0 -3
  333. phoenix/trace/openai/instrumentor.py +0 -30
  334. {arize_phoenix-3.16.0.dist-info → arize_phoenix-7.7.0.dist-info}/licenses/IP_NOTICE +0 -0
  335. {arize_phoenix-3.16.0.dist-info → arize_phoenix-7.7.0.dist-info}/licenses/LICENSE +0 -0
  336. /phoenix/{datasets → db/insertion}/__init__.py +0 -0
  337. /phoenix/{experimental → db/migrations}/__init__.py +0 -0
  338. /phoenix/{storage → db/migrations/data_migration_scripts}/__init__.py +0 -0
@@ -1,30 +1,37 @@
1
- import json
1
+ import warnings
2
2
  from collections import defaultdict
3
- from dataclasses import dataclass, field, fields, replace
4
- from functools import cached_property, partial
3
+ from collections.abc import Iterable, Mapping, Sequence
4
+ from dataclasses import dataclass, field, replace
5
+ from datetime import datetime
6
+ from functools import cached_property
7
+ from itertools import chain
8
+ from random import randint, random
5
9
  from types import MappingProxyType
6
- from typing import (
7
- Any,
8
- Callable,
9
- ClassVar,
10
- Dict,
11
- Iterable,
12
- Iterator,
13
- List,
14
- Mapping,
15
- Optional,
16
- Sequence,
17
- Tuple,
18
- cast,
19
- )
10
+ from typing import Any, Optional, cast
20
11
 
21
12
  import pandas as pd
22
13
  from openinference.semconv.trace import SpanAttributes
23
-
14
+ from sqlalchemy import JSON, Column, Label, Select, SQLColumnExpression, and_, func, select
15
+ from sqlalchemy.dialects.postgresql import aggregate_order_by
16
+ from sqlalchemy.orm import Session, aliased
17
+ from typing_extensions import assert_never
18
+
19
+ from phoenix.config import DEFAULT_PROJECT_NAME
20
+ from phoenix.db import models
21
+ from phoenix.db.helpers import SupportedSQLDialect
22
+ from phoenix.trace.attributes import (
23
+ JSON_STRING_ATTRIBUTES,
24
+ SEMANTIC_CONVENTIONS,
25
+ flatten,
26
+ get_attribute_value,
27
+ load_json_strings,
28
+ unflatten,
29
+ )
24
30
  from phoenix.trace.dsl import SpanFilter
25
- from phoenix.trace.dsl.filter import SupportsGetSpanEvaluation
26
- from phoenix.trace.schemas import ATTRIBUTE_PREFIX, CONTEXT_PREFIX, Span
27
- from phoenix.trace.span_json_encoder import span_to_json
31
+ from phoenix.trace.dsl.filter import Projector
32
+ from phoenix.trace.schemas import ATTRIBUTE_PREFIX
33
+
34
+ DEFAULT_SPAN_LIMIT = 1000
28
35
 
29
36
  RETRIEVAL_DOCUMENTS = SpanAttributes.RETRIEVAL_DOCUMENTS
30
37
 
@@ -38,129 +45,214 @@ _ALIASES = {
38
45
  "trace_id": "context.trace_id",
39
46
  }
40
47
 
41
- # Because span_kind is an enum, it needs to be converted to string,
42
- # so it's serializable by pyarrow.
43
- _CONVERT_TO_STRING = ("span_kind",)
44
-
45
48
 
46
49
  def _unalias(key: str) -> str:
47
50
  return _ALIASES.get(key, key)
48
51
 
49
52
 
50
53
  @dataclass(frozen=True)
51
- class Projection:
52
- key: str = ""
53
- value: Callable[[Span], Any] = field(init=False, repr=False)
54
- span_fields: ClassVar[Tuple[str, ...]] = tuple(f.name for f in fields(Span))
55
-
56
- def __bool__(self) -> bool:
57
- return bool(self.key)
54
+ class _Base:
55
+ """The sole purpose of this class is for `super().__post_init__()` to work"""
58
56
 
59
57
  def __post_init__(self) -> None:
60
- key = _unalias(self.key)
61
- object.__setattr__(self, "key", key)
62
- if key.startswith(CONTEXT_PREFIX):
63
- key = key[len(CONTEXT_PREFIX) :]
64
- value = partial(self._from_context, key=key)
65
- elif key.startswith(ATTRIBUTE_PREFIX):
66
- key = self.key[len(ATTRIBUTE_PREFIX) :]
67
- value = partial(self._from_attributes, key=key)
68
- elif key in self.span_fields:
69
- value = partial(self._from_span, key=key)
70
- else:
71
- value = partial(self._from_attributes, key=key)
72
- if self.key in _CONVERT_TO_STRING:
73
- object.__setattr__(
74
- self,
75
- "value",
76
- lambda span: None if (v := value(span)) is None else str(v),
77
- )
78
- else:
79
- object.__setattr__(self, "value", value)
58
+ pass
59
+
80
60
 
81
- def __call__(self, span: Span) -> Any:
82
- return self.value(span)
61
+ @dataclass(frozen=True)
62
+ class Projection(_Base):
63
+ key: str = ""
64
+ _projector: Projector = field(init=False, repr=False)
83
65
 
84
- @staticmethod
85
- def _from_attributes(span: Span, key: str) -> Any:
86
- return span.attributes.get(key)
66
+ def __post_init__(self) -> None:
67
+ super().__post_init__()
68
+ object.__setattr__(self, "key", _unalias(self.key))
69
+ object.__setattr__(self, "_projector", Projector(self.key))
87
70
 
88
- @staticmethod
89
- def _from_context(span: Span, key: str) -> Any:
90
- return getattr(span.context, key, None)
71
+ def __bool__(self) -> bool:
72
+ return bool(self.key)
91
73
 
92
- @staticmethod
93
- def _from_span(span: Span, key: str) -> Any:
94
- return getattr(span, key, None)
74
+ def __call__(self) -> SQLColumnExpression[Any]:
75
+ return self._projector()
95
76
 
96
- def to_dict(self) -> Dict[str, Any]:
77
+ def to_dict(self) -> dict[str, Any]:
97
78
  return {"key": self.key}
98
79
 
99
80
  @classmethod
100
81
  def from_dict(cls, obj: Mapping[str, Any]) -> "Projection":
101
- return cls(
102
- **({"key": cast(str, key)} if (key := obj.get("key")) else {}),
103
- )
82
+ return cls(**({"key": cast(str, key)} if (key := obj.get("key")) else {}))
83
+
84
+
85
+ @dataclass(frozen=True)
86
+ class _HasTmpSuffix(_Base):
87
+ _tmp_suffix: str = field(init=False, repr=False)
88
+ """Ideally every column label should get a temporary random suffix that will
89
+ be removed at the end. This is necessary during query construction because
90
+ sqlalchemy is not always foolproof, e.g. we have seen `group_by` clauses that
91
+ were incorrect or ambiguous. We should actively avoid name collisions, which
92
+ is increasingly likely as queries get more complex.
93
+ """
94
+
95
+ def __post_init__(self) -> None:
96
+ super().__post_init__()
97
+ object.__setattr__(self, "_tmp_suffix", f"{randint(0, 10**6):06d}")
98
+
99
+ def _remove_tmp_suffix(self, name: str) -> str:
100
+ if name.endswith(self._tmp_suffix):
101
+ return name[: -len(self._tmp_suffix)]
102
+ return name
103
+
104
+ def _add_tmp_suffix(self, name: str) -> str:
105
+ if name.endswith(self._tmp_suffix):
106
+ return name
107
+ return name + self._tmp_suffix
104
108
 
105
109
 
106
110
  @dataclass(frozen=True)
107
- class Explosion(Projection):
111
+ class Explosion(_HasTmpSuffix, Projection):
108
112
  kwargs: Mapping[str, str] = field(default_factory=lambda: MappingProxyType({}))
109
113
  primary_index_key: str = "context.span_id"
110
114
 
111
- position_prefix: str = field(init=False, repr=False)
112
- primary_index: Projection = field(init=False, repr=False)
115
+ _position_prefix: str = field(init=False, repr=False)
116
+ _primary_index: Projection = field(init=False, repr=False)
117
+ _array_tmp_col_label: str = field(init=False, repr=False)
118
+ """For sqlite we need to store the array in a temporary column to be able
119
+ to explode it later in pandas. `_array_tmp_col_label` is the name of this
120
+ temporary column. The temporary column will have a unique name
121
+ per instance.
122
+ """
113
123
 
114
124
  def __post_init__(self) -> None:
115
125
  super().__post_init__()
116
126
  position_prefix = _PRESCRIBED_POSITION_PREFIXES.get(self.key, "")
117
- object.__setattr__(self, "position_prefix", position_prefix)
118
- object.__setattr__(self, "primary_index", Projection(self.primary_index_key))
127
+ object.__setattr__(self, "_position_prefix", position_prefix)
128
+ object.__setattr__(self, "_primary_index", Projection(self.primary_index_key))
129
+ object.__setattr__(self, "_array_tmp_col_label", f"__array_tmp_col_{random()}")
119
130
 
120
131
  @cached_property
121
- def index_keys(self) -> Tuple[str, str]:
122
- return (self.primary_index.key, f"{self.position_prefix}position")
123
-
124
- def with_primary_index_key(self, primary_index_key: str) -> "Explosion":
125
- return replace(self, primary_index_key=primary_index_key)
126
-
127
- def __call__(self, span: Span) -> Iterator[Dict[str, Any]]:
128
- if not isinstance(seq := self.value(span), Sequence):
129
- return
130
- has_mapping = False
131
- for item in seq:
132
- if isinstance(item, Mapping):
133
- has_mapping = True
134
- break
135
- if not has_mapping:
136
- for i, item in enumerate(seq):
137
- if item is not None:
138
- yield {
139
- self.key: item,
140
- self.primary_index.key: self.primary_index(span),
141
- f"{self.position_prefix}position": i,
142
- }
143
- return
144
- for i, item in enumerate(seq):
145
- if not isinstance(item, Mapping):
146
- continue
147
- record = (
148
- {name: item.get(key) for name, key in self.kwargs.items()}
149
- if self.kwargs
150
- else dict(item)
132
+ def index_keys(self) -> list[str]:
133
+ return [self._primary_index.key, f"{self._position_prefix}position"]
134
+
135
+ def with_primary_index_key(self, _: str) -> "Explosion":
136
+ print("`.with_primary_index_key(...)` is deprecated and will be removed in the future.")
137
+ return self
138
+
139
+ def update_sql(
140
+ self,
141
+ stmt: Select[Any],
142
+ dialect: SupportedSQLDialect,
143
+ ) -> Select[Any]:
144
+ array = self()
145
+ if dialect is SupportedSQLDialect.SQLITE:
146
+ # Because sqlite doesn't support `WITH ORDINALITY`, the order of
147
+ # the returned (table) values is not guaranteed. So we resort to
148
+ # post hoc processing using pandas.
149
+ stmt = stmt.where(
150
+ func.json_type(array) == "array",
151
+ ).add_columns(
152
+ array.label(self._array_tmp_col_label),
153
+ )
154
+ return stmt
155
+ elif dialect is SupportedSQLDialect.POSTGRESQL:
156
+ element = (
157
+ func.jsonb_array_elements(array)
158
+ .table_valued(
159
+ Column("obj", JSON),
160
+ with_ordinality="position",
161
+ joins_implicitly=True,
162
+ )
163
+ .render_derived()
151
164
  )
152
- for v in record.values():
153
- if v is not None:
154
- break
165
+ obj, position = element.c.obj, element.c.position
166
+ # Use zero-based indexing for backward-compatibility.
167
+ position_label = (position - 1).label(f"{self._position_prefix}position")
168
+ if self.kwargs:
169
+ columns: Iterable[Label[Any]] = (
170
+ obj[key.split(".")].label(self._add_tmp_suffix(name))
171
+ for name, key in self.kwargs.items()
172
+ )
155
173
  else:
156
- record = {}
157
- if not record:
158
- continue
159
- record[self.primary_index.key] = self.primary_index(span)
160
- record[f"{self.position_prefix}position"] = i
161
- yield record
162
-
163
- def to_dict(self) -> Dict[str, Any]:
174
+ columns = (obj.label(self._array_tmp_col_label),)
175
+ stmt = (
176
+ stmt.where(func.jsonb_typeof(array) == "array")
177
+ .where(func.jsonb_typeof(obj) == "object")
178
+ .add_columns(position_label, *columns)
179
+ )
180
+ return stmt
181
+ else:
182
+ assert_never(dialect)
183
+
184
+ def update_df(
185
+ self,
186
+ df: pd.DataFrame,
187
+ dialect: SupportedSQLDialect,
188
+ ) -> pd.DataFrame:
189
+ df = df.rename(self._remove_tmp_suffix, axis=1)
190
+ if df.empty:
191
+ columns = list(
192
+ set(
193
+ chain(
194
+ self.index_keys,
195
+ df.drop(self._array_tmp_col_label, axis=1, errors="ignore").columns,
196
+ self.kwargs.keys(),
197
+ )
198
+ )
199
+ )
200
+ df = pd.DataFrame(columns=columns).set_index(self.index_keys)
201
+ return df
202
+ if dialect != SupportedSQLDialect.SQLITE and self.kwargs:
203
+ df = df.set_index(self.index_keys)
204
+ return df
205
+ if dialect is SupportedSQLDialect.SQLITE:
206
+ # Because sqlite doesn't support `WITH ORDINALITY`, the order of
207
+ # the returned (table) values is not guaranteed. So we resort to
208
+ # post hoc processing using pandas.
209
+ def _extract_values(array: list[Any]) -> list[dict[str, Any]]:
210
+ if not isinstance(array, Iterable):
211
+ return []
212
+ if not self.kwargs:
213
+ return [
214
+ {
215
+ **dict(flatten(obj)),
216
+ f"{self._position_prefix}position": i,
217
+ }
218
+ for i, obj in enumerate(array)
219
+ if isinstance(obj, Mapping)
220
+ ]
221
+ res: list[dict[str, Any]] = []
222
+ for i, obj in enumerate(array):
223
+ if not isinstance(obj, Mapping):
224
+ continue
225
+ values: dict[str, Any] = {f"{self._position_prefix}position": i}
226
+ for name, key in self.kwargs.items():
227
+ if (value := get_attribute_value(obj, key)) is not None:
228
+ values[name] = value
229
+ res.append(values)
230
+ return res
231
+
232
+ records = df.loc[:, self._array_tmp_col_label].dropna().map(_extract_values).explode()
233
+ elif dialect is SupportedSQLDialect.POSTGRESQL:
234
+ records = df.loc[:, self._array_tmp_col_label].dropna().map(flatten).map(dict)
235
+ else:
236
+ assert_never(dialect)
237
+ df = df.drop(self._array_tmp_col_label, axis=1)
238
+ if records.empty:
239
+ df = df.set_index(self.index_keys[0])
240
+ return df
241
+ not_na = records.notna()
242
+ df_explode = pd.DataFrame.from_records(
243
+ records.loc[not_na].to_list(),
244
+ index=records.index[not_na],
245
+ )
246
+ if dialect is SupportedSQLDialect.SQLITE:
247
+ df = _outer_join(df, df_explode)
248
+ elif dialect is SupportedSQLDialect.POSTGRESQL:
249
+ df = pd.concat([df, df_explode], axis=1)
250
+ else:
251
+ assert_never(dialect)
252
+ df = df.set_index(self.index_keys)
253
+ return df
254
+
255
+ def to_dict(self) -> dict[str, Any]:
164
256
  return {
165
257
  **super().to_dict(),
166
258
  **({"kwargs": dict(self.kwargs)} if self.kwargs else {}),
@@ -185,29 +277,128 @@ class Explosion(Projection):
185
277
 
186
278
 
187
279
  @dataclass(frozen=True)
188
- class Concatenation(Projection):
280
+ class Concatenation(_HasTmpSuffix, Projection):
189
281
  kwargs: Mapping[str, str] = field(default_factory=lambda: MappingProxyType({}))
190
282
  separator: str = "\n\n"
191
283
 
284
+ _array_tmp_col_label: str = field(init=False, repr=False)
285
+ """For SQLite we need to store the array in a temporary column to be able
286
+ to concatenate it later in pandas. `_array_tmp_col_label` is the name of
287
+ this temporary column. The temporary column will have a unique name
288
+ per instance.
289
+ """
290
+
291
+ def __post_init__(self) -> None:
292
+ super().__post_init__()
293
+ object.__setattr__(self, "_array_tmp_col_label", f"__array_tmp_col_{random()}")
294
+
192
295
  def with_separator(self, separator: str = "\n\n") -> "Concatenation":
193
296
  return replace(self, separator=separator)
194
297
 
195
- def __call__(self, span: Span) -> Iterator[Tuple[str, str]]:
196
- if not isinstance(seq := self.value(span), Sequence):
197
- return
198
- if not self.kwargs:
199
- yield self.key, self.separator.join(map(str, seq))
200
- record = defaultdict(list)
201
- for item in seq:
202
- if not isinstance(item, Mapping):
203
- continue
204
- for k, v in self.kwargs.items():
205
- if value := item.get(v):
206
- record[k].append(value)
207
- for name, values in record.items():
208
- yield name, self.separator.join(map(str, values))
209
-
210
- def to_dict(self) -> Dict[str, Any]:
298
+ def update_sql(
299
+ self,
300
+ stmt: Select[Any],
301
+ dialect: SupportedSQLDialect,
302
+ ) -> Select[Any]:
303
+ array = self()
304
+ if dialect is SupportedSQLDialect.SQLITE:
305
+ # Because SQLite doesn't support `WITH ORDINALITY`, the order of
306
+ # the returned table-values is not guaranteed. So we resort to
307
+ # post hoc processing using pandas.
308
+ stmt = stmt.where(
309
+ func.json_type(array) == "array",
310
+ ).add_columns(
311
+ array.label(self._array_tmp_col_label),
312
+ )
313
+ return stmt
314
+ elif dialect is SupportedSQLDialect.POSTGRESQL:
315
+ element = (
316
+ (
317
+ func.jsonb_array_elements(array)
318
+ if self.kwargs
319
+ else func.jsonb_array_elements_text(array)
320
+ )
321
+ .table_valued(
322
+ Column("obj", JSON),
323
+ with_ordinality="position",
324
+ joins_implicitly=True,
325
+ )
326
+ .render_derived()
327
+ )
328
+ obj, position = element.c.obj, element.c.position
329
+ if self.kwargs:
330
+ columns: Iterable[Label[Any]] = (
331
+ func.string_agg(
332
+ obj[key.split(".")].as_string(),
333
+ aggregate_order_by(self.separator, position), # type: ignore
334
+ ).label(self._add_tmp_suffix(label))
335
+ for label, key in self.kwargs.items()
336
+ )
337
+ else:
338
+ columns = (
339
+ func.string_agg(
340
+ obj,
341
+ aggregate_order_by(self.separator, position), # type: ignore
342
+ ).label(self.key),
343
+ )
344
+ stmt = (
345
+ stmt.where(
346
+ and_(
347
+ func.jsonb_typeof(array) == "array",
348
+ *((func.jsonb_typeof(obj) == "object",) if self.kwargs else ()),
349
+ )
350
+ )
351
+ .add_columns(*columns)
352
+ .group_by(*stmt.columns.keys())
353
+ )
354
+ return stmt
355
+ else:
356
+ assert_never(dialect)
357
+
358
+ def update_df(
359
+ self,
360
+ df: pd.DataFrame,
361
+ dialect: SupportedSQLDialect,
362
+ ) -> pd.DataFrame:
363
+ df = df.rename(self._remove_tmp_suffix, axis=1)
364
+ if df.empty:
365
+ columns = list(
366
+ set(
367
+ chain(
368
+ df.drop(self._array_tmp_col_label, axis=1, errors="ignore").columns,
369
+ self.kwargs.keys(),
370
+ )
371
+ )
372
+ )
373
+ return pd.DataFrame(columns=columns, index=df.index)
374
+ if dialect is SupportedSQLDialect.SQLITE:
375
+ # Because SQLite doesn't support `WITH ORDINALITY`, the order of
376
+ # the returned table-values is not guaranteed. So we resort to
377
+ # post hoc processing using pandas.
378
+ def _concat_values(array: list[Any]) -> dict[str, Any]:
379
+ if not isinstance(array, Iterable):
380
+ return {}
381
+ if not self.kwargs:
382
+ return {self.key: self.separator.join(str(obj) for obj in array)}
383
+ values: defaultdict[str, list[str]] = defaultdict(list)
384
+ for i, obj in enumerate(array):
385
+ if not isinstance(obj, Mapping):
386
+ continue
387
+ for label, key in self.kwargs.items():
388
+ if (value := get_attribute_value(obj, key)) is not None:
389
+ values[label].append(str(value))
390
+ return {label: self.separator.join(vs) for label, vs in values.items()}
391
+
392
+ records = df.loc[:, self._array_tmp_col_label].map(_concat_values)
393
+ df_concat = pd.DataFrame.from_records(records.to_list(), index=records.index)
394
+ return df.drop(self._array_tmp_col_label, axis=1).join(df_concat, how="outer")
395
+ elif dialect is SupportedSQLDialect.POSTGRESQL:
396
+ pass
397
+ else:
398
+ assert_never(dialect)
399
+ return df
400
+
401
+ def to_dict(self) -> dict[str, Any]:
211
402
  return {
212
403
  **super().to_dict(),
213
404
  **({"kwargs": dict(self.kwargs)} if self.kwargs else {}),
@@ -232,13 +423,24 @@ class Concatenation(Projection):
232
423
 
233
424
 
234
425
  @dataclass(frozen=True)
235
- class SpanQuery:
426
+ class SpanQuery(_HasTmpSuffix):
236
427
  _select: Mapping[str, Projection] = field(default_factory=lambda: MappingProxyType({}))
237
- _concat: Concatenation = field(default_factory=Concatenation)
238
- _explode: Explosion = field(default_factory=Explosion)
239
- _filter: SpanFilter = field(default_factory=SpanFilter)
428
+ _concat: Optional[Concatenation] = field(default=None)
429
+ _explode: Optional[Explosion] = field(default=None)
430
+ _filter: Optional[SpanFilter] = field(default=None)
240
431
  _rename: Mapping[str, str] = field(default_factory=lambda: MappingProxyType({}))
241
432
  _index: Projection = field(default_factory=lambda: Projection("context.span_id"))
433
+ _concat_separator: str = field(default="\n\n", repr=False)
434
+ _pk_tmp_col_label: str = field(init=False, repr=False)
435
+ """We use `_pk_tmp_col_label` as a temporary column for storing
436
+ the row id, i.e. the primary key, of the spans table. This will help
437
+ us with joins without the risk of naming conflicts. The temporary
438
+ column will have a unique name per instance.
439
+ """
440
+
441
+ def __post_init__(self) -> None:
442
+ super().__post_init__()
443
+ object.__setattr__(self, "_pk_tmp_col_label", f"__pk_tmp_col_{random()}")
242
444
 
243
445
  def __bool__(self) -> bool:
244
446
  return bool(self._select) or bool(self._filter) or bool(self._explode) or bool(self._concat)
@@ -254,11 +456,21 @@ class SpanQuery:
254
456
  return replace(self, _filter=_filter)
255
457
 
256
458
  def explode(self, key: str, **kwargs: str) -> "SpanQuery":
459
+ assert (
460
+ isinstance(key, str) and key
461
+ ), "The field name for explosion must be a non-empty string."
257
462
  _explode = Explosion(key=key, kwargs=kwargs, primary_index_key=self._index.key)
258
463
  return replace(self, _explode=_explode)
259
464
 
260
465
  def concat(self, key: str, **kwargs: str) -> "SpanQuery":
261
- _concat = Concatenation(key=key, kwargs=kwargs)
466
+ assert (
467
+ isinstance(key, str) and key
468
+ ), "The field name for concatenation must be a non-empty string."
469
+ _concat = (
470
+ Concatenation(key=key, kwargs=kwargs, separator=self._concat.separator)
471
+ if self._concat
472
+ else Concatenation(key=key, kwargs=kwargs, separator=self._concat_separator)
473
+ )
262
474
  return replace(self, _concat=_concat)
263
475
 
264
476
  def rename(self, **kwargs: str) -> "SpanQuery":
@@ -267,86 +479,147 @@ class SpanQuery:
267
479
 
268
480
  def with_index(self, key: str = "context.span_id") -> "SpanQuery":
269
481
  _index = Projection(key=key)
270
- return replace(self, _index=_index)
482
+ return (
483
+ replace(self, _index=_index, _explode=replace(self._explode, primary_index_key=key))
484
+ if self._explode
485
+ else replace(self, _index=_index)
486
+ )
271
487
 
272
488
  def with_concat_separator(self, separator: str = "\n\n") -> "SpanQuery":
489
+ if not self._concat:
490
+ return replace(self, _concat_separator=separator)
273
491
  _concat = self._concat.with_separator(separator)
274
492
  return replace(self, _concat=_concat)
275
493
 
276
- def with_explode_primary_index_key(self, primary_index_key: str) -> "SpanQuery":
277
- _explode = self._explode.with_primary_index_key(primary_index_key)
278
- return replace(self, _explode=_explode)
279
-
280
- def __call__(self, spans: Iterable[Span]) -> pd.DataFrame:
281
- if self._filter:
282
- spans = filter(self._filter, spans)
283
- if self._explode:
284
- spans = filter(
285
- lambda span: (isinstance(seq := self._explode.value(span), Sequence) and len(seq)),
286
- spans,
287
- )
288
- if self._concat:
289
- spans = filter(
290
- lambda span: (isinstance(seq := self._concat.value(span), Sequence) and len(seq)),
291
- spans,
494
+ def with_explode_primary_index_key(self, _: str) -> "SpanQuery":
495
+ print(
496
+ "`.with_explode_primary_index_key(...)` is deprecated and will be "
497
+ "removed in the future. Use `.with_index(...)` instead."
498
+ )
499
+ return self
500
+
501
+ def __call__(
502
+ self,
503
+ session: Session,
504
+ project_name: Optional[str] = None,
505
+ start_time: Optional[datetime] = None,
506
+ end_time: Optional[datetime] = None,
507
+ limit: Optional[int] = DEFAULT_SPAN_LIMIT,
508
+ root_spans_only: Optional[bool] = None,
509
+ # Deprecated
510
+ stop_time: Optional[datetime] = None,
511
+ ) -> pd.DataFrame:
512
+ if not project_name:
513
+ project_name = DEFAULT_PROJECT_NAME
514
+ if stop_time:
515
+ # Deprecated. Raise a warning
516
+ warnings.warn(
517
+ "stop_time is deprecated. Use end_time instead.",
518
+ DeprecationWarning,
292
519
  )
520
+ end_time = end_time or stop_time
293
521
  if not (self._select or self._explode or self._concat):
294
- if not (data := [json.loads(span_to_json(span)) for span in spans]):
295
- return pd.DataFrame()
296
- return (
297
- pd.json_normalize(data, max_level=1)
298
- .rename(self._rename, axis=1, errors="ignore")
299
- .set_index("context.span_id", drop=False)
522
+ return _get_spans_dataframe(
523
+ session,
524
+ project_name,
525
+ span_filter=self._filter,
526
+ start_time=start_time,
527
+ end_time=end_time,
528
+ limit=limit,
529
+ root_spans_only=root_spans_only,
300
530
  )
301
- _selected: List[Dict[str, Any]] = []
302
- _exploded: List[Dict[str, Any]] = []
303
- for span in spans:
304
- if self._select:
305
- record = {name: proj(span) for name, proj in self._select.items()}
306
- for v in record.values():
307
- if v is not None:
308
- break
309
- else:
310
- record = {}
311
- if self._concat:
312
- record.update(self._concat(span))
313
- if record:
314
- if self._index.key not in record:
315
- record[self._index.key] = self._index(span)
316
- _selected.append(record)
317
- elif self._concat:
318
- record = {self._index.key: self._index(span)}
319
- record.update(self._concat(span))
320
- if record:
321
- _selected.append(record)
322
- if self._explode:
323
- _exploded.extend(self._explode(span))
324
- if _selected:
325
- select_df = pd.DataFrame(_selected)
326
- else:
327
- select_df = pd.DataFrame(columns=[self._index.key])
328
- select_df = select_df.set_index(self._index.key)
531
+ assert session.bind is not None
532
+ dialect = SupportedSQLDialect(session.bind.dialect.name)
533
+ row_id = models.Span.id.label(self._pk_tmp_col_label)
534
+ stmt: Select[Any] = (
535
+ # We do not allow `group_by` anything other than `row_id` because otherwise
536
+ # it's too complex for the post hoc processing step in pandas.
537
+ select(row_id)
538
+ .join(models.Trace)
539
+ .join(models.Project)
540
+ .where(models.Project.name == project_name)
541
+ )
542
+ if start_time:
543
+ stmt = stmt.where(start_time <= models.Span.start_time)
544
+ if end_time:
545
+ stmt = stmt.where(models.Span.start_time < end_time)
546
+ if limit is not None:
547
+ stmt = stmt.limit(limit)
548
+ if root_spans_only:
549
+ parent = aliased(models.Span)
550
+ stmt = stmt.outerjoin(
551
+ parent,
552
+ models.Span.parent_id == parent.span_id,
553
+ ).where(parent.span_id == None) # noqa E711
554
+ stmt0_orig: Select[Any] = stmt
555
+ stmt1_filter: Optional[Select[Any]] = None
556
+ if self._filter:
557
+ stmt = stmt1_filter = self._filter(stmt)
558
+ stmt2_select: Optional[Select[Any]] = None
559
+ if self._select:
560
+ columns: Iterable[Label[Any]] = (
561
+ proj().label(self._add_tmp_suffix(label)) for label, proj in self._select.items()
562
+ )
563
+ stmt = stmt2_select = stmt.add_columns(*columns)
564
+ stmt3_explode: Optional[Select[Any]] = None
329
565
  if self._explode:
330
- if _exploded:
331
- explode_df = pd.DataFrame(_exploded)
566
+ stmt = stmt3_explode = self._explode.update_sql(stmt, dialect)
567
+ index: Label[Any] = self._index().label(self._add_tmp_suffix(self._index.key))
568
+ df: Optional[pd.DataFrame] = None
569
+ # `concat` is done separately because it has `group_by` but we can't
570
+ # always join to it as a subquery because it may require post hoc
571
+ # processing in pandas. It's kept separate for simplicity.
572
+ df_concat: Optional[pd.DataFrame] = None
573
+ conn = session.connection()
574
+ if self._explode or not self._concat:
575
+ if index.name not in stmt.selected_columns.keys():
576
+ stmt = stmt.add_columns(index)
577
+ df = pd.read_sql_query(stmt, conn, self._pk_tmp_col_label)
578
+ if self._concat:
579
+ if df is not None:
580
+ assert stmt3_explode is not None
581
+ # We can't include stmt3_explode because it may be trying to
582
+ # explode the same column that we're trying to concatenate,
583
+ # resulting in duplicated joins.
584
+ stmt_no_explode = (
585
+ stmt2_select
586
+ if stmt2_select is not None
587
+ else (stmt1_filter if stmt1_filter is not None else stmt0_orig)
588
+ )
589
+ stmt4_concat = stmt_no_explode.with_only_columns(row_id)
332
590
  else:
333
- explode_df = pd.DataFrame(columns=self._explode.index_keys)
334
- explode_df = explode_df.set_index(list(self._explode.index_keys))
335
- if not self._select:
336
- return explode_df.rename(self._rename, axis=1, errors="ignore")
337
- select_df = select_df.join(explode_df, how="outer")
338
- return select_df.rename(self._rename, axis=1, errors="ignore")
339
-
340
- def to_dict(self) -> Dict[str, Any]:
591
+ assert stmt3_explode is None
592
+ stmt4_concat = stmt
593
+ if (df is None or df.empty) and index.name not in stmt4_concat.selected_columns.keys():
594
+ stmt4_concat = stmt4_concat.add_columns(index)
595
+ stmt4_concat = self._concat.update_sql(stmt4_concat, dialect)
596
+ df_concat = pd.read_sql_query(stmt4_concat, conn, self._pk_tmp_col_label)
597
+ df_concat = self._concat.update_df(df_concat, dialect)
598
+ assert df is not None or df_concat is not None
599
+ if df is None:
600
+ df = df_concat
601
+ elif df_concat is not None:
602
+ df = _outer_join(df, df_concat)
603
+ assert df is not None and self._pk_tmp_col_label not in df.columns
604
+ df = df.rename(self._remove_tmp_suffix, axis=1)
605
+ if self._explode:
606
+ df = self._explode.update_df(df, dialect)
607
+ else:
608
+ df = df.set_index(self._index.key)
609
+ df = df.rename(_ALIASES, axis=1, errors="ignore")
610
+ df = df.rename(self._rename, axis=1, errors="ignore")
611
+ return df
612
+
613
+ def to_dict(self) -> dict[str, Any]:
341
614
  return {
342
615
  **(
343
616
  {"select": {name: proj.to_dict() for name, proj in self._select.items()}}
344
617
  if self._select
345
618
  else {}
346
619
  ),
347
- "filter": self._filter.to_dict(),
348
- "explode": self._explode.to_dict(),
349
- "concat": self._concat.to_dict(),
620
+ **({"filter": self._filter.to_dict()} if self._filter else {}),
621
+ **({"explode": self._explode.to_dict()} if self._explode else {}),
622
+ **({"concat": self._concat.to_dict()} if self._concat else {}),
350
623
  **({"rename": dict(self._rename)} if self._rename else {}),
351
624
  "index": self._index.to_dict(),
352
625
  }
@@ -355,7 +628,6 @@ class SpanQuery:
355
628
  def from_dict(
356
629
  cls,
357
630
  obj: Mapping[str, Any],
358
- evals: Optional[SupportsGetSpanEvaluation] = None,
359
631
  valid_eval_names: Optional[Sequence[str]] = None,
360
632
  ) -> "SpanQuery":
361
633
  return cls(
@@ -375,7 +647,6 @@ class SpanQuery:
375
647
  {
376
648
  "_filter": SpanFilter.from_dict(
377
649
  cast(Mapping[str, Any], filter),
378
- evals=evals,
379
650
  valid_eval_names=valid_eval_names,
380
651
  )
381
652
  } # type: ignore
@@ -385,11 +656,13 @@ class SpanQuery:
385
656
  **(
386
657
  {"_explode": Explosion.from_dict(cast(Mapping[str, Any], explode))} # type: ignore
387
658
  if (explode := obj.get("explode"))
659
+ and explode.get("key") # check `key` for backward-compatible truthiness
388
660
  else {}
389
661
  ),
390
662
  **(
391
663
  {"_concat": Concatenation.from_dict(cast(Mapping[str, Any], concat))} # type: ignore
392
664
  if (concat := obj.get("concat"))
665
+ and concat.get("key") # check `key` for backward-compatible truthiness
393
666
  else {}
394
667
  ),
395
668
  **(
@@ -403,3 +676,103 @@ class SpanQuery:
403
676
  else {}
404
677
  ),
405
678
  )
679
+
680
+
681
+ def _get_spans_dataframe(
682
+ session: Session,
683
+ project_name: str,
684
+ /,
685
+ *,
686
+ span_filter: Optional[SpanFilter] = None,
687
+ start_time: Optional[datetime] = None,
688
+ end_time: Optional[datetime] = None,
689
+ limit: Optional[int] = DEFAULT_SPAN_LIMIT,
690
+ root_spans_only: Optional[bool] = None,
691
+ # Deprecated
692
+ stop_time: Optional[datetime] = None,
693
+ ) -> pd.DataFrame:
694
+ # use legacy labels for backward-compatibility
695
+ span_id_label = "context.span_id"
696
+ trace_id_label = "context.trace_id"
697
+ if stop_time:
698
+ # Deprecated. Raise a warning
699
+ warnings.warn(
700
+ "stop_time is deprecated. Use end_time instead.",
701
+ DeprecationWarning,
702
+ )
703
+ end_time = end_time or stop_time
704
+ stmt: Select[Any] = (
705
+ select(
706
+ models.Span.name,
707
+ models.Span.span_kind,
708
+ models.Span.parent_id,
709
+ models.Span.start_time,
710
+ models.Span.end_time,
711
+ models.Span.status_code,
712
+ models.Span.status_message,
713
+ models.Span.events,
714
+ models.Span.span_id.label(span_id_label),
715
+ models.Trace.trace_id.label(trace_id_label),
716
+ models.Span.attributes,
717
+ )
718
+ .join(models.Trace)
719
+ .join(models.Project)
720
+ .where(models.Project.name == project_name)
721
+ )
722
+ if span_filter:
723
+ stmt = span_filter(stmt)
724
+ if start_time:
725
+ stmt = stmt.where(start_time <= models.Span.start_time)
726
+ if end_time:
727
+ stmt = stmt.where(models.Span.start_time < end_time)
728
+ if limit is not None:
729
+ stmt = stmt.limit(limit)
730
+ if root_spans_only:
731
+ parent = aliased(models.Span)
732
+ stmt = stmt.outerjoin(
733
+ parent,
734
+ models.Span.parent_id == parent.span_id,
735
+ ).where(parent.span_id == None) # noqa E711
736
+ conn = session.connection()
737
+ # set `drop=False` for backward-compatibility
738
+ df = pd.read_sql_query(stmt, conn).set_index(span_id_label, drop=False)
739
+ if df.empty:
740
+ return df.drop("attributes", axis=1)
741
+ df_attributes = pd.DataFrame.from_records(
742
+ df.attributes.map(_flatten_semantic_conventions),
743
+ ).set_axis(df.index, axis=0)
744
+ df = pd.concat(
745
+ [
746
+ df.drop("attributes", axis=1),
747
+ df_attributes.add_prefix("attributes" + "."),
748
+ ],
749
+ axis=1,
750
+ )
751
+ return df
752
+
753
+
754
+ def _outer_join(left: pd.DataFrame, right: pd.DataFrame) -> pd.DataFrame:
755
+ if (columns_intersection := left.columns.intersection(right.columns)).empty:
756
+ df = left.join(right, how="outer")
757
+ else:
758
+ df = left.join(right, how="outer", lsuffix="_L", rsuffix="_R")
759
+ for col in columns_intersection:
760
+ df.loc[:, col] = df.loc[:, f"{col}_L"].fillna(df.loc[:, f"{col}_R"])
761
+ df = df.drop([f"{col}_L", f"{col}_R"], axis=1)
762
+ return df
763
+
764
+
765
+ def _flatten_semantic_conventions(attributes: Mapping[str, Any]) -> dict[str, Any]:
766
+ # This may be inefficient, but is needed to preserve backward-compatibility.
767
+ # For example, custom attributes do not get flattened.
768
+ ans = unflatten(
769
+ load_json_strings(
770
+ flatten(
771
+ attributes,
772
+ recurse_on_sequence=True,
773
+ json_string_attributes=JSON_STRING_ATTRIBUTES,
774
+ ),
775
+ ),
776
+ prefix_exclusions=SEMANTIC_CONVENTIONS,
777
+ )
778
+ return ans