arize-phoenix 3.16.0__py3-none-any.whl → 7.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (338) hide show
  1. arize_phoenix-7.7.0.dist-info/METADATA +261 -0
  2. arize_phoenix-7.7.0.dist-info/RECORD +345 -0
  3. {arize_phoenix-3.16.0.dist-info → arize_phoenix-7.7.0.dist-info}/WHEEL +1 -1
  4. arize_phoenix-7.7.0.dist-info/entry_points.txt +3 -0
  5. phoenix/__init__.py +86 -14
  6. phoenix/auth.py +309 -0
  7. phoenix/config.py +675 -45
  8. phoenix/core/model.py +32 -30
  9. phoenix/core/model_schema.py +102 -109
  10. phoenix/core/model_schema_adapter.py +48 -45
  11. phoenix/datetime_utils.py +24 -3
  12. phoenix/db/README.md +54 -0
  13. phoenix/db/__init__.py +4 -0
  14. phoenix/db/alembic.ini +85 -0
  15. phoenix/db/bulk_inserter.py +294 -0
  16. phoenix/db/engines.py +208 -0
  17. phoenix/db/enums.py +20 -0
  18. phoenix/db/facilitator.py +113 -0
  19. phoenix/db/helpers.py +159 -0
  20. phoenix/db/insertion/constants.py +2 -0
  21. phoenix/db/insertion/dataset.py +227 -0
  22. phoenix/db/insertion/document_annotation.py +171 -0
  23. phoenix/db/insertion/evaluation.py +191 -0
  24. phoenix/db/insertion/helpers.py +98 -0
  25. phoenix/db/insertion/span.py +193 -0
  26. phoenix/db/insertion/span_annotation.py +158 -0
  27. phoenix/db/insertion/trace_annotation.py +158 -0
  28. phoenix/db/insertion/types.py +256 -0
  29. phoenix/db/migrate.py +86 -0
  30. phoenix/db/migrations/data_migration_scripts/populate_project_sessions.py +199 -0
  31. phoenix/db/migrations/env.py +114 -0
  32. phoenix/db/migrations/script.py.mako +26 -0
  33. phoenix/db/migrations/versions/10460e46d750_datasets.py +317 -0
  34. phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py +126 -0
  35. phoenix/db/migrations/versions/4ded9e43755f_create_project_sessions_table.py +66 -0
  36. phoenix/db/migrations/versions/cd164e83824f_users_and_tokens.py +157 -0
  37. phoenix/db/migrations/versions/cf03bd6bae1d_init.py +280 -0
  38. phoenix/db/models.py +807 -0
  39. phoenix/exceptions.py +5 -1
  40. phoenix/experiments/__init__.py +6 -0
  41. phoenix/experiments/evaluators/__init__.py +29 -0
  42. phoenix/experiments/evaluators/base.py +158 -0
  43. phoenix/experiments/evaluators/code_evaluators.py +184 -0
  44. phoenix/experiments/evaluators/llm_evaluators.py +473 -0
  45. phoenix/experiments/evaluators/utils.py +236 -0
  46. phoenix/experiments/functions.py +772 -0
  47. phoenix/experiments/tracing.py +86 -0
  48. phoenix/experiments/types.py +726 -0
  49. phoenix/experiments/utils.py +25 -0
  50. phoenix/inferences/__init__.py +0 -0
  51. phoenix/{datasets → inferences}/errors.py +6 -5
  52. phoenix/{datasets → inferences}/fixtures.py +49 -42
  53. phoenix/{datasets/dataset.py → inferences/inferences.py} +121 -105
  54. phoenix/{datasets → inferences}/schema.py +11 -11
  55. phoenix/{datasets → inferences}/validation.py +13 -14
  56. phoenix/logging/__init__.py +3 -0
  57. phoenix/logging/_config.py +90 -0
  58. phoenix/logging/_filter.py +6 -0
  59. phoenix/logging/_formatter.py +69 -0
  60. phoenix/metrics/__init__.py +5 -4
  61. phoenix/metrics/binning.py +4 -3
  62. phoenix/metrics/metrics.py +2 -1
  63. phoenix/metrics/mixins.py +7 -6
  64. phoenix/metrics/retrieval_metrics.py +2 -1
  65. phoenix/metrics/timeseries.py +5 -4
  66. phoenix/metrics/wrappers.py +9 -3
  67. phoenix/pointcloud/clustering.py +5 -5
  68. phoenix/pointcloud/pointcloud.py +7 -5
  69. phoenix/pointcloud/projectors.py +5 -6
  70. phoenix/pointcloud/umap_parameters.py +53 -52
  71. phoenix/server/api/README.md +28 -0
  72. phoenix/server/api/auth.py +44 -0
  73. phoenix/server/api/context.py +152 -9
  74. phoenix/server/api/dataloaders/__init__.py +91 -0
  75. phoenix/server/api/dataloaders/annotation_summaries.py +139 -0
  76. phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
  77. phoenix/server/api/dataloaders/cache/__init__.py +3 -0
  78. phoenix/server/api/dataloaders/cache/two_tier_cache.py +68 -0
  79. phoenix/server/api/dataloaders/dataset_example_revisions.py +131 -0
  80. phoenix/server/api/dataloaders/dataset_example_spans.py +38 -0
  81. phoenix/server/api/dataloaders/document_evaluation_summaries.py +144 -0
  82. phoenix/server/api/dataloaders/document_evaluations.py +31 -0
  83. phoenix/server/api/dataloaders/document_retrieval_metrics.py +89 -0
  84. phoenix/server/api/dataloaders/experiment_annotation_summaries.py +79 -0
  85. phoenix/server/api/dataloaders/experiment_error_rates.py +58 -0
  86. phoenix/server/api/dataloaders/experiment_run_annotations.py +36 -0
  87. phoenix/server/api/dataloaders/experiment_run_counts.py +49 -0
  88. phoenix/server/api/dataloaders/experiment_sequence_number.py +44 -0
  89. phoenix/server/api/dataloaders/latency_ms_quantile.py +188 -0
  90. phoenix/server/api/dataloaders/min_start_or_max_end_times.py +85 -0
  91. phoenix/server/api/dataloaders/project_by_name.py +31 -0
  92. phoenix/server/api/dataloaders/record_counts.py +116 -0
  93. phoenix/server/api/dataloaders/session_io.py +79 -0
  94. phoenix/server/api/dataloaders/session_num_traces.py +30 -0
  95. phoenix/server/api/dataloaders/session_num_traces_with_error.py +32 -0
  96. phoenix/server/api/dataloaders/session_token_usages.py +41 -0
  97. phoenix/server/api/dataloaders/session_trace_latency_ms_quantile.py +55 -0
  98. phoenix/server/api/dataloaders/span_annotations.py +26 -0
  99. phoenix/server/api/dataloaders/span_dataset_examples.py +31 -0
  100. phoenix/server/api/dataloaders/span_descendants.py +57 -0
  101. phoenix/server/api/dataloaders/span_projects.py +33 -0
  102. phoenix/server/api/dataloaders/token_counts.py +124 -0
  103. phoenix/server/api/dataloaders/trace_by_trace_ids.py +25 -0
  104. phoenix/server/api/dataloaders/trace_root_spans.py +32 -0
  105. phoenix/server/api/dataloaders/user_roles.py +30 -0
  106. phoenix/server/api/dataloaders/users.py +33 -0
  107. phoenix/server/api/exceptions.py +48 -0
  108. phoenix/server/api/helpers/__init__.py +12 -0
  109. phoenix/server/api/helpers/dataset_helpers.py +217 -0
  110. phoenix/server/api/helpers/experiment_run_filters.py +763 -0
  111. phoenix/server/api/helpers/playground_clients.py +948 -0
  112. phoenix/server/api/helpers/playground_registry.py +70 -0
  113. phoenix/server/api/helpers/playground_spans.py +455 -0
  114. phoenix/server/api/input_types/AddExamplesToDatasetInput.py +16 -0
  115. phoenix/server/api/input_types/AddSpansToDatasetInput.py +14 -0
  116. phoenix/server/api/input_types/ChatCompletionInput.py +38 -0
  117. phoenix/server/api/input_types/ChatCompletionMessageInput.py +24 -0
  118. phoenix/server/api/input_types/ClearProjectInput.py +15 -0
  119. phoenix/server/api/input_types/ClusterInput.py +2 -2
  120. phoenix/server/api/input_types/CreateDatasetInput.py +12 -0
  121. phoenix/server/api/input_types/CreateSpanAnnotationInput.py +18 -0
  122. phoenix/server/api/input_types/CreateTraceAnnotationInput.py +18 -0
  123. phoenix/server/api/input_types/DataQualityMetricInput.py +5 -2
  124. phoenix/server/api/input_types/DatasetExampleInput.py +14 -0
  125. phoenix/server/api/input_types/DatasetSort.py +17 -0
  126. phoenix/server/api/input_types/DatasetVersionSort.py +16 -0
  127. phoenix/server/api/input_types/DeleteAnnotationsInput.py +7 -0
  128. phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +13 -0
  129. phoenix/server/api/input_types/DeleteDatasetInput.py +7 -0
  130. phoenix/server/api/input_types/DeleteExperimentsInput.py +7 -0
  131. phoenix/server/api/input_types/DimensionFilter.py +4 -4
  132. phoenix/server/api/input_types/GenerativeModelInput.py +17 -0
  133. phoenix/server/api/input_types/Granularity.py +1 -1
  134. phoenix/server/api/input_types/InvocationParameters.py +162 -0
  135. phoenix/server/api/input_types/PatchAnnotationInput.py +19 -0
  136. phoenix/server/api/input_types/PatchDatasetExamplesInput.py +35 -0
  137. phoenix/server/api/input_types/PatchDatasetInput.py +14 -0
  138. phoenix/server/api/input_types/PerformanceMetricInput.py +5 -2
  139. phoenix/server/api/input_types/ProjectSessionSort.py +29 -0
  140. phoenix/server/api/input_types/SpanAnnotationSort.py +17 -0
  141. phoenix/server/api/input_types/SpanSort.py +134 -69
  142. phoenix/server/api/input_types/TemplateOptions.py +10 -0
  143. phoenix/server/api/input_types/TraceAnnotationSort.py +17 -0
  144. phoenix/server/api/input_types/UserRoleInput.py +9 -0
  145. phoenix/server/api/mutations/__init__.py +28 -0
  146. phoenix/server/api/mutations/api_key_mutations.py +167 -0
  147. phoenix/server/api/mutations/chat_mutations.py +593 -0
  148. phoenix/server/api/mutations/dataset_mutations.py +591 -0
  149. phoenix/server/api/mutations/experiment_mutations.py +75 -0
  150. phoenix/server/api/{types/ExportEventsMutation.py → mutations/export_events_mutations.py} +21 -18
  151. phoenix/server/api/mutations/project_mutations.py +57 -0
  152. phoenix/server/api/mutations/span_annotations_mutations.py +128 -0
  153. phoenix/server/api/mutations/trace_annotations_mutations.py +127 -0
  154. phoenix/server/api/mutations/user_mutations.py +329 -0
  155. phoenix/server/api/openapi/__init__.py +0 -0
  156. phoenix/server/api/openapi/main.py +17 -0
  157. phoenix/server/api/openapi/schema.py +16 -0
  158. phoenix/server/api/queries.py +738 -0
  159. phoenix/server/api/routers/__init__.py +11 -0
  160. phoenix/server/api/routers/auth.py +284 -0
  161. phoenix/server/api/routers/embeddings.py +26 -0
  162. phoenix/server/api/routers/oauth2.py +488 -0
  163. phoenix/server/api/routers/v1/__init__.py +64 -0
  164. phoenix/server/api/routers/v1/datasets.py +1017 -0
  165. phoenix/server/api/routers/v1/evaluations.py +362 -0
  166. phoenix/server/api/routers/v1/experiment_evaluations.py +115 -0
  167. phoenix/server/api/routers/v1/experiment_runs.py +167 -0
  168. phoenix/server/api/routers/v1/experiments.py +308 -0
  169. phoenix/server/api/routers/v1/pydantic_compat.py +78 -0
  170. phoenix/server/api/routers/v1/spans.py +267 -0
  171. phoenix/server/api/routers/v1/traces.py +208 -0
  172. phoenix/server/api/routers/v1/utils.py +95 -0
  173. phoenix/server/api/schema.py +44 -247
  174. phoenix/server/api/subscriptions.py +597 -0
  175. phoenix/server/api/types/Annotation.py +21 -0
  176. phoenix/server/api/types/AnnotationSummary.py +55 -0
  177. phoenix/server/api/types/AnnotatorKind.py +16 -0
  178. phoenix/server/api/types/ApiKey.py +27 -0
  179. phoenix/server/api/types/AuthMethod.py +9 -0
  180. phoenix/server/api/types/ChatCompletionMessageRole.py +11 -0
  181. phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +46 -0
  182. phoenix/server/api/types/Cluster.py +25 -24
  183. phoenix/server/api/types/CreateDatasetPayload.py +8 -0
  184. phoenix/server/api/types/DataQualityMetric.py +31 -13
  185. phoenix/server/api/types/Dataset.py +288 -63
  186. phoenix/server/api/types/DatasetExample.py +85 -0
  187. phoenix/server/api/types/DatasetExampleRevision.py +34 -0
  188. phoenix/server/api/types/DatasetVersion.py +14 -0
  189. phoenix/server/api/types/Dimension.py +32 -31
  190. phoenix/server/api/types/DocumentEvaluationSummary.py +9 -8
  191. phoenix/server/api/types/EmbeddingDimension.py +56 -49
  192. phoenix/server/api/types/Evaluation.py +25 -31
  193. phoenix/server/api/types/EvaluationSummary.py +30 -50
  194. phoenix/server/api/types/Event.py +20 -20
  195. phoenix/server/api/types/ExampleRevisionInterface.py +14 -0
  196. phoenix/server/api/types/Experiment.py +152 -0
  197. phoenix/server/api/types/ExperimentAnnotationSummary.py +13 -0
  198. phoenix/server/api/types/ExperimentComparison.py +17 -0
  199. phoenix/server/api/types/ExperimentRun.py +119 -0
  200. phoenix/server/api/types/ExperimentRunAnnotation.py +56 -0
  201. phoenix/server/api/types/GenerativeModel.py +9 -0
  202. phoenix/server/api/types/GenerativeProvider.py +85 -0
  203. phoenix/server/api/types/Inferences.py +80 -0
  204. phoenix/server/api/types/InferencesRole.py +23 -0
  205. phoenix/server/api/types/LabelFraction.py +7 -0
  206. phoenix/server/api/types/MimeType.py +2 -2
  207. phoenix/server/api/types/Model.py +54 -54
  208. phoenix/server/api/types/PerformanceMetric.py +8 -5
  209. phoenix/server/api/types/Project.py +407 -142
  210. phoenix/server/api/types/ProjectSession.py +139 -0
  211. phoenix/server/api/types/Segments.py +4 -4
  212. phoenix/server/api/types/Span.py +221 -176
  213. phoenix/server/api/types/SpanAnnotation.py +43 -0
  214. phoenix/server/api/types/SpanIOValue.py +15 -0
  215. phoenix/server/api/types/SystemApiKey.py +9 -0
  216. phoenix/server/api/types/TemplateLanguage.py +10 -0
  217. phoenix/server/api/types/TimeSeries.py +19 -15
  218. phoenix/server/api/types/TokenUsage.py +11 -0
  219. phoenix/server/api/types/Trace.py +154 -0
  220. phoenix/server/api/types/TraceAnnotation.py +45 -0
  221. phoenix/server/api/types/UMAPPoints.py +7 -7
  222. phoenix/server/api/types/User.py +60 -0
  223. phoenix/server/api/types/UserApiKey.py +45 -0
  224. phoenix/server/api/types/UserRole.py +15 -0
  225. phoenix/server/api/types/node.py +13 -107
  226. phoenix/server/api/types/pagination.py +156 -57
  227. phoenix/server/api/utils.py +34 -0
  228. phoenix/server/app.py +864 -115
  229. phoenix/server/bearer_auth.py +163 -0
  230. phoenix/server/dml_event.py +136 -0
  231. phoenix/server/dml_event_handler.py +256 -0
  232. phoenix/server/email/__init__.py +0 -0
  233. phoenix/server/email/sender.py +97 -0
  234. phoenix/server/email/templates/__init__.py +0 -0
  235. phoenix/server/email/templates/password_reset.html +19 -0
  236. phoenix/server/email/types.py +11 -0
  237. phoenix/server/grpc_server.py +102 -0
  238. phoenix/server/jwt_store.py +505 -0
  239. phoenix/server/main.py +305 -116
  240. phoenix/server/oauth2.py +52 -0
  241. phoenix/server/openapi/__init__.py +0 -0
  242. phoenix/server/prometheus.py +111 -0
  243. phoenix/server/rate_limiters.py +188 -0
  244. phoenix/server/static/.vite/manifest.json +87 -0
  245. phoenix/server/static/assets/components-Cy9nwIvF.js +2125 -0
  246. phoenix/server/static/assets/index-BKvHIxkk.js +113 -0
  247. phoenix/server/static/assets/pages-CUi2xCVQ.js +4449 -0
  248. phoenix/server/static/assets/vendor-DvC8cT4X.js +894 -0
  249. phoenix/server/static/assets/vendor-DxkFTwjz.css +1 -0
  250. phoenix/server/static/assets/vendor-arizeai-Do1793cv.js +662 -0
  251. phoenix/server/static/assets/vendor-codemirror-BzwZPyJM.js +24 -0
  252. phoenix/server/static/assets/vendor-recharts-_Jb7JjhG.js +59 -0
  253. phoenix/server/static/assets/vendor-shiki-Cl9QBraO.js +5 -0
  254. phoenix/server/static/assets/vendor-three-DwGkEfCM.js +2998 -0
  255. phoenix/server/telemetry.py +68 -0
  256. phoenix/server/templates/index.html +82 -23
  257. phoenix/server/thread_server.py +3 -3
  258. phoenix/server/types.py +275 -0
  259. phoenix/services.py +27 -18
  260. phoenix/session/client.py +743 -68
  261. phoenix/session/data_extractor.py +31 -7
  262. phoenix/session/evaluation.py +3 -9
  263. phoenix/session/session.py +263 -219
  264. phoenix/settings.py +22 -0
  265. phoenix/trace/__init__.py +2 -22
  266. phoenix/trace/attributes.py +338 -0
  267. phoenix/trace/dsl/README.md +116 -0
  268. phoenix/trace/dsl/filter.py +663 -213
  269. phoenix/trace/dsl/helpers.py +73 -21
  270. phoenix/trace/dsl/query.py +574 -201
  271. phoenix/trace/exporter.py +24 -19
  272. phoenix/trace/fixtures.py +368 -32
  273. phoenix/trace/otel.py +71 -219
  274. phoenix/trace/projects.py +3 -2
  275. phoenix/trace/schemas.py +33 -11
  276. phoenix/trace/span_evaluations.py +21 -16
  277. phoenix/trace/span_json_decoder.py +6 -4
  278. phoenix/trace/span_json_encoder.py +2 -2
  279. phoenix/trace/trace_dataset.py +47 -32
  280. phoenix/trace/utils.py +21 -4
  281. phoenix/utilities/__init__.py +0 -26
  282. phoenix/utilities/client.py +132 -0
  283. phoenix/utilities/deprecation.py +31 -0
  284. phoenix/utilities/error_handling.py +3 -2
  285. phoenix/utilities/json.py +109 -0
  286. phoenix/utilities/logging.py +8 -0
  287. phoenix/utilities/project.py +2 -2
  288. phoenix/utilities/re.py +49 -0
  289. phoenix/utilities/span_store.py +0 -23
  290. phoenix/utilities/template_formatters.py +99 -0
  291. phoenix/version.py +1 -1
  292. arize_phoenix-3.16.0.dist-info/METADATA +0 -495
  293. arize_phoenix-3.16.0.dist-info/RECORD +0 -178
  294. phoenix/core/project.py +0 -617
  295. phoenix/core/traces.py +0 -100
  296. phoenix/experimental/evals/__init__.py +0 -73
  297. phoenix/experimental/evals/evaluators.py +0 -413
  298. phoenix/experimental/evals/functions/__init__.py +0 -4
  299. phoenix/experimental/evals/functions/classify.py +0 -453
  300. phoenix/experimental/evals/functions/executor.py +0 -353
  301. phoenix/experimental/evals/functions/generate.py +0 -138
  302. phoenix/experimental/evals/functions/processing.py +0 -76
  303. phoenix/experimental/evals/models/__init__.py +0 -14
  304. phoenix/experimental/evals/models/anthropic.py +0 -175
  305. phoenix/experimental/evals/models/base.py +0 -170
  306. phoenix/experimental/evals/models/bedrock.py +0 -221
  307. phoenix/experimental/evals/models/litellm.py +0 -134
  308. phoenix/experimental/evals/models/openai.py +0 -448
  309. phoenix/experimental/evals/models/rate_limiters.py +0 -246
  310. phoenix/experimental/evals/models/vertex.py +0 -173
  311. phoenix/experimental/evals/models/vertexai.py +0 -186
  312. phoenix/experimental/evals/retrievals.py +0 -96
  313. phoenix/experimental/evals/templates/__init__.py +0 -50
  314. phoenix/experimental/evals/templates/default_templates.py +0 -472
  315. phoenix/experimental/evals/templates/template.py +0 -195
  316. phoenix/experimental/evals/utils/__init__.py +0 -172
  317. phoenix/experimental/evals/utils/threads.py +0 -27
  318. phoenix/server/api/helpers.py +0 -11
  319. phoenix/server/api/routers/evaluation_handler.py +0 -109
  320. phoenix/server/api/routers/span_handler.py +0 -70
  321. phoenix/server/api/routers/trace_handler.py +0 -60
  322. phoenix/server/api/types/DatasetRole.py +0 -23
  323. phoenix/server/static/index.css +0 -6
  324. phoenix/server/static/index.js +0 -7447
  325. phoenix/storage/span_store/__init__.py +0 -23
  326. phoenix/storage/span_store/text_file.py +0 -85
  327. phoenix/trace/dsl/missing.py +0 -60
  328. phoenix/trace/langchain/__init__.py +0 -3
  329. phoenix/trace/langchain/instrumentor.py +0 -35
  330. phoenix/trace/llama_index/__init__.py +0 -3
  331. phoenix/trace/llama_index/callback.py +0 -102
  332. phoenix/trace/openai/__init__.py +0 -3
  333. phoenix/trace/openai/instrumentor.py +0 -30
  334. {arize_phoenix-3.16.0.dist-info → arize_phoenix-7.7.0.dist-info}/licenses/IP_NOTICE +0 -0
  335. {arize_phoenix-3.16.0.dist-info → arize_phoenix-7.7.0.dist-info}/licenses/LICENSE +0 -0
  336. /phoenix/{datasets → db/insertion}/__init__.py +0 -0
  337. /phoenix/{experimental → db/migrations}/__init__.py +0 -0
  338. /phoenix/{storage → db/migrations/data_migration_scripts}/__init__.py +0 -0
phoenix/core/project.py DELETED
@@ -1,617 +0,0 @@
1
- import logging
2
- from collections import defaultdict
3
- from datetime import datetime, timezone
4
- from threading import RLock
5
- from types import MappingProxyType
6
- from typing import (
7
- Any,
8
- DefaultDict,
9
- Dict,
10
- Iterable,
11
- Iterator,
12
- List,
13
- Mapping,
14
- Optional,
15
- Set,
16
- Tuple,
17
- Union,
18
- cast,
19
- )
20
-
21
- import numpy as np
22
- from ddsketch import DDSketch
23
- from google.protobuf.json_format import MessageToDict
24
- from openinference.semconv.trace import SpanAttributes
25
- from pandas import DataFrame, Index, MultiIndex
26
- from sortedcontainers import SortedKeyList
27
- from typing_extensions import TypeAlias, assert_never
28
- from wrapt import ObjectProxy
29
-
30
- import phoenix.trace.v1 as pb
31
- from phoenix.datetime_utils import right_open_time_range
32
- from phoenix.trace import DocumentEvaluations, Evaluations, SpanEvaluations
33
- from phoenix.trace.schemas import (
34
- ComputedAttributes,
35
- Span,
36
- SpanID,
37
- SpanStatusCode,
38
- TraceID,
39
- )
40
-
41
- logger = logging.getLogger(__name__)
42
- logger.addHandler(logging.NullHandler())
43
-
44
- END_OF_QUEUE = None # sentinel value for queue termination
45
-
46
-
47
- class WrappedSpan(ObjectProxy): # type: ignore
48
- """
49
- A wrapped Span object with __getitem__ and __setitem__ methods for accessing
50
- computed attributes.
51
- """
52
-
53
- def __init__(self, span: Span) -> None:
54
- super().__init__(span)
55
- self._self_computed_values: Dict[ComputedAttributes, Union[float, int]] = {}
56
-
57
- def get_computed_value(self, key: str) -> Optional[Union[float, int]]:
58
- try:
59
- attr = ComputedAttributes(key)
60
- except Exception:
61
- return None
62
- return self._self_computed_values.get(attr)
63
-
64
- def __getitem__(self, key: Union[str, ComputedAttributes]) -> Any:
65
- if isinstance(key, ComputedAttributes):
66
- return self._self_computed_values.get(key)
67
- return self.__wrapped__.attributes.get(key)
68
-
69
- def __setitem__(self, key: ComputedAttributes, value: Any) -> None:
70
- if not isinstance(key, ComputedAttributes):
71
- raise KeyError(f"{key} is not a computed value")
72
- self._self_computed_values[key] = value
73
-
74
- def __eq__(self, other: Any) -> bool:
75
- return self is other
76
-
77
- def __hash__(self) -> int:
78
- return id(self)
79
-
80
-
81
- _ParentSpanID: TypeAlias = SpanID
82
- _ChildSpanID: TypeAlias = SpanID
83
- _ProjectName: TypeAlias = str
84
-
85
-
86
- EvaluationName: TypeAlias = str
87
- DocumentPosition: TypeAlias = int
88
-
89
-
90
- class Project:
91
- def __init__(self) -> None:
92
- self._spans = _Spans()
93
- self._evals = _Evals()
94
- self._is_archived = False
95
-
96
- @property
97
- def last_updated_at(self) -> Optional[datetime]:
98
- spans_last_updated_at = self._spans.last_updated_at
99
- evals_last_updated_at = self._evals.last_updated_at
100
- if (
101
- not spans_last_updated_at
102
- or evals_last_updated_at
103
- and evals_last_updated_at > spans_last_updated_at
104
- ):
105
- return evals_last_updated_at
106
- return spans_last_updated_at
107
-
108
- def add_span(self, span: Span) -> None:
109
- self._spans.add(WrappedSpan(span))
110
-
111
- def add_eval(self, pb_eval: pb.Evaluation) -> None:
112
- self._evals.add(pb_eval)
113
-
114
- def get_trace(self, trace_id: TraceID) -> Iterator[WrappedSpan]:
115
- yield from self._spans.get_trace(trace_id)
116
-
117
- def get_spans(
118
- self,
119
- start_time: Optional[datetime] = None,
120
- stop_time: Optional[datetime] = None,
121
- root_spans_only: Optional[bool] = False,
122
- span_ids: Optional[Iterable[SpanID]] = None,
123
- ) -> Iterator[WrappedSpan]:
124
- yield from self._spans.get_spans(start_time, stop_time, root_spans_only, span_ids)
125
-
126
- def get_num_documents(self, span_id: SpanID) -> int:
127
- return self._spans.get_num_documents(span_id)
128
-
129
- def root_span_latency_ms_quantiles(self, probability: float) -> Optional[float]:
130
- """Root span latency quantiles in milliseconds"""
131
- return self._spans.root_span_latency_ms_quantiles(probability)
132
-
133
- def get_descendant_spans(self, span_id: SpanID) -> Iterator[WrappedSpan]:
134
- yield from self._spans.get_descendant_spans(span_id)
135
-
136
- def span_count(
137
- self,
138
- start_time: Optional[datetime] = None,
139
- stop_time: Optional[datetime] = None,
140
- ) -> int:
141
- return self._spans.span_count(start_time, stop_time)
142
-
143
- def trace_count(
144
- self,
145
- start_time: Optional[datetime] = None,
146
- stop_time: Optional[datetime] = None,
147
- ) -> int:
148
- return self._spans.trace_count(start_time, stop_time)
149
-
150
- @property
151
- def token_count_total(self) -> int:
152
- return self._spans.token_count_total
153
-
154
- @property
155
- def right_open_time_range(self) -> Tuple[Optional[datetime], Optional[datetime]]:
156
- return self._spans.right_open_time_range
157
-
158
- def get_span_evaluation(self, span_id: SpanID, name: str) -> Optional[pb.Evaluation]:
159
- return self._evals.get_span_evaluation(span_id, name)
160
-
161
- def get_span_evaluation_names(self) -> List[EvaluationName]:
162
- return self._evals.get_span_evaluation_names()
163
-
164
- def get_document_evaluation_names(
165
- self,
166
- span_id: Optional[SpanID] = None,
167
- ) -> List[EvaluationName]:
168
- return self._evals.get_document_evaluation_names(span_id)
169
-
170
- def get_span_evaluation_labels(self, name: EvaluationName) -> Tuple[str, ...]:
171
- return self._evals.get_span_evaluation_labels(name)
172
-
173
- def get_span_evaluation_span_ids(self, name: EvaluationName) -> Tuple[SpanID, ...]:
174
- return self._evals.get_span_evaluation_span_ids(name)
175
-
176
- def get_evaluations_by_span_id(self, span_id: SpanID) -> List[pb.Evaluation]:
177
- return self._evals.get_evaluations_by_span_id(span_id)
178
-
179
- def get_document_evaluation_span_ids(self, name: EvaluationName) -> Tuple[SpanID, ...]:
180
- return self._evals.get_document_evaluation_span_ids(name)
181
-
182
- def get_document_evaluations_by_span_id(self, span_id: SpanID) -> List[pb.Evaluation]:
183
- return self._evals.get_document_evaluations_by_span_id(span_id)
184
-
185
- def get_document_evaluation_scores(
186
- self,
187
- span_id: SpanID,
188
- evaluation_name: str,
189
- num_documents: int,
190
- ) -> List[float]:
191
- return self._evals.get_document_evaluation_scores(span_id, evaluation_name, num_documents)
192
-
193
- def export_evaluations(self) -> List[Evaluations]:
194
- return self._evals.export_evaluations()
195
-
196
- def archive(self) -> None:
197
- self._is_archived = True
198
-
199
- @property
200
- def is_archived(self) -> bool:
201
- return self._is_archived
202
-
203
-
204
- class _Spans:
205
- def __init__(self) -> None:
206
- self._lock = RLock()
207
- self._spans: Dict[SpanID, WrappedSpan] = {}
208
- self._parent_span_ids: Dict[SpanID, _ParentSpanID] = {}
209
- self._traces: DefaultDict[TraceID, Set[WrappedSpan]] = defaultdict(set)
210
- self._child_spans: DefaultDict[SpanID, Set[WrappedSpan]] = defaultdict(set)
211
- self._num_documents: DefaultDict[SpanID, int] = defaultdict(int)
212
- self._start_time_sorted_spans: SortedKeyList[WrappedSpan] = SortedKeyList(
213
- key=lambda span: span.start_time,
214
- )
215
- self._start_time_sorted_root_spans: SortedKeyList[WrappedSpan] = SortedKeyList(
216
- key=lambda span: span.start_time,
217
- )
218
- self._latency_sorted_root_spans: SortedKeyList[WrappedSpan] = SortedKeyList(
219
- key=lambda span: span[ComputedAttributes.LATENCY_MS],
220
- )
221
- self._root_span_latency_ms_sketch = DDSketch()
222
- self._token_count_total: int = 0
223
- self._last_updated_at: Optional[datetime] = None
224
-
225
- def get_trace(self, trace_id: TraceID) -> Iterator[WrappedSpan]:
226
- with self._lock:
227
- # make a copy because source data can mutate during iteration
228
- if not (trace := self._traces.get(trace_id)):
229
- return
230
- spans = tuple(trace)
231
- for span in spans:
232
- yield span
233
-
234
- def get_spans(
235
- self,
236
- start_time: Optional[datetime] = None,
237
- stop_time: Optional[datetime] = None,
238
- root_spans_only: Optional[bool] = False,
239
- span_ids: Optional[Iterable[SpanID]] = None,
240
- ) -> Iterator[WrappedSpan]:
241
- if not self._spans:
242
- return
243
- if start_time is None or stop_time is None:
244
- min_start_time, max_stop_time = cast(
245
- Tuple[datetime, datetime],
246
- self.right_open_time_range,
247
- )
248
- start_time = start_time or min_start_time
249
- stop_time = stop_time or max_stop_time
250
- if span_ids is not None:
251
- with self._lock:
252
- spans = tuple(
253
- span
254
- for span_id in span_ids
255
- if (
256
- (span := self._spans.get(span_id))
257
- and start_time <= span.start_time < stop_time
258
- and (not root_spans_only or span.parent_id is None)
259
- )
260
- )
261
- else:
262
- sorted_spans = (
263
- self._start_time_sorted_root_spans
264
- if root_spans_only
265
- else self._start_time_sorted_spans
266
- )
267
- # make a copy because source data can mutate during iteration
268
- with self._lock:
269
- spans = tuple(
270
- sorted_spans.irange_key(
271
- start_time.astimezone(timezone.utc),
272
- stop_time.astimezone(timezone.utc),
273
- inclusive=(True, False),
274
- reverse=True, # most recent spans first
275
- )
276
- )
277
- for span in spans:
278
- yield span
279
-
280
- def get_num_documents(self, span_id: SpanID) -> int:
281
- with self._lock:
282
- return self._num_documents.get(span_id) or 0
283
-
284
- def root_span_latency_ms_quantiles(self, probability: float) -> Optional[float]:
285
- """Root span latency quantiles in milliseconds"""
286
- with self._lock:
287
- return self._root_span_latency_ms_sketch.get_quantile_value(probability)
288
-
289
- def get_descendant_spans(self, span_id: SpanID) -> Iterator[WrappedSpan]:
290
- for span in self._get_descendant_spans(span_id):
291
- yield span
292
-
293
- def _get_descendant_spans(self, span_id: SpanID) -> Iterator[WrappedSpan]:
294
- with self._lock:
295
- # make a copy because source data can mutate during iteration
296
- if not (child_spans := self._child_spans.get(span_id)):
297
- return
298
- spans = tuple(child_spans)
299
- for child_span in spans:
300
- yield child_span
301
- yield from self._get_descendant_spans(child_span.context.span_id)
302
-
303
- @property
304
- def last_updated_at(self) -> Optional[datetime]:
305
- return self._last_updated_at
306
-
307
- def span_count(
308
- self,
309
- start_time: Optional[datetime] = None,
310
- stop_time: Optional[datetime] = None,
311
- ) -> int:
312
- _index = self._start_time_sorted_spans.bisect_key_left
313
- with self._lock:
314
- start: int = _index(start_time) if start_time else 0
315
- stop: int = _index(stop_time) if stop_time else len(self._spans)
316
- return stop - start
317
-
318
- def trace_count(
319
- self,
320
- start_time: Optional[datetime] = None,
321
- stop_time: Optional[datetime] = None,
322
- ) -> int:
323
- _index = self._start_time_sorted_root_spans.bisect_key_left
324
- with self._lock:
325
- start: int = _index(start_time) if start_time else 0
326
- stop: int = _index(stop_time) if stop_time else len(self._traces)
327
- return stop - start
328
-
329
- @property
330
- def token_count_total(self) -> int:
331
- return self._token_count_total
332
-
333
- @property
334
- def right_open_time_range(self) -> Tuple[Optional[datetime], Optional[datetime]]:
335
- with self._lock:
336
- if not self._start_time_sorted_spans:
337
- return None, None
338
- first_span = self._start_time_sorted_spans[0]
339
- last_span = self._start_time_sorted_spans[-1]
340
- min_start_time = first_span.start_time
341
- max_start_time = last_span.start_time
342
- return right_open_time_range(min_start_time, max_start_time)
343
-
344
- def add(self, span: WrappedSpan) -> None:
345
- with self._lock:
346
- self._add_span(span)
347
-
348
- def _add_span(self, span: WrappedSpan) -> None:
349
- span_id = span.context.span_id
350
- if span_id in self._spans:
351
- # Update is not allowed.
352
- return
353
-
354
- parent_span_id = span.parent_id
355
- is_root_span = parent_span_id is None
356
- if not is_root_span:
357
- self._child_spans[parent_span_id].add(span)
358
- self._parent_span_ids[span_id] = parent_span_id
359
-
360
- # Add computed attributes to span
361
- start_time = span.start_time
362
- end_time = span.end_time
363
- span[ComputedAttributes.LATENCY_MS] = latency = (
364
- end_time - start_time
365
- ).total_seconds() * 1000
366
- if is_root_span:
367
- self._root_span_latency_ms_sketch.add(latency)
368
- span[ComputedAttributes.ERROR_COUNT] = int(span.status_code is SpanStatusCode.ERROR)
369
-
370
- # Store the new span (after adding computed attributes)
371
- self._spans[span_id] = span
372
- self._traces[span.context.trace_id].add(span)
373
- self._start_time_sorted_spans.add(span)
374
- if is_root_span:
375
- self._start_time_sorted_root_spans.add(span)
376
- self._latency_sorted_root_spans.add(span)
377
- self._propagate_cumulative_values(span)
378
- self._update_cached_statistics(span)
379
-
380
- # Update last updated timestamp, letting users know
381
- # when they should refresh the page.
382
- self._last_updated_at = datetime.now(timezone.utc)
383
-
384
- def _update_cached_statistics(self, span: WrappedSpan) -> None:
385
- # Update statistics for quick access later
386
- span_id = span.context.span_id
387
- if token_count_update := span.attributes.get(SpanAttributes.LLM_TOKEN_COUNT_TOTAL):
388
- self._token_count_total += token_count_update
389
- if num_documents_update := len(
390
- span.attributes.get(SpanAttributes.RETRIEVAL_DOCUMENTS) or ()
391
- ):
392
- self._num_documents[span_id] += num_documents_update
393
-
394
- def _propagate_cumulative_values(self, span: WrappedSpan) -> None:
395
- child_spans: Iterable[WrappedSpan] = self._child_spans.get(span.context.span_id) or ()
396
- for cumulative_attribute, attribute in _CUMULATIVE_ATTRIBUTES.items():
397
- span[cumulative_attribute] = span[attribute] or 0
398
- for child_span in child_spans:
399
- span[cumulative_attribute] += child_span[cumulative_attribute] or 0
400
- self._update_ancestors(span)
401
-
402
- def _update_ancestors(self, span: WrappedSpan) -> None:
403
- # Add cumulative values to each of the span's ancestors.
404
- span_id = span.context.span_id
405
- for attribute in _CUMULATIVE_ATTRIBUTES.keys():
406
- value = span[attribute] or 0
407
- self._add_value_to_span_ancestors(span_id, attribute, value)
408
-
409
- def _add_value_to_span_ancestors(
410
- self,
411
- span_id: SpanID,
412
- attribute: ComputedAttributes,
413
- value: float,
414
- ) -> None:
415
- while parent_span_id := self._parent_span_ids.get(span_id):
416
- if not (parent_span := self._spans.get(parent_span_id)):
417
- return
418
- cumulative_value = parent_span[attribute] or 0
419
- parent_span[attribute] = cumulative_value + value
420
- span_id = parent_span_id
421
-
422
-
423
- class _Evals:
424
- def __init__(self) -> None:
425
- self._lock = RLock()
426
- self._trace_evaluations_by_name: DefaultDict[
427
- EvaluationName, Dict[TraceID, pb.Evaluation]
428
- ] = defaultdict(dict)
429
- self._evaluations_by_trace_id: DefaultDict[TraceID, Dict[EvaluationName, pb.Evaluation]] = (
430
- defaultdict(dict)
431
- )
432
- self._span_evaluations_by_name: DefaultDict[EvaluationName, Dict[SpanID, pb.Evaluation]] = (
433
- defaultdict(dict)
434
- )
435
- self._evaluations_by_span_id: DefaultDict[SpanID, Dict[EvaluationName, pb.Evaluation]] = (
436
- defaultdict(dict)
437
- )
438
- self._span_evaluation_labels: DefaultDict[EvaluationName, Set[str]] = defaultdict(set)
439
- self._document_evaluations_by_span_id: DefaultDict[
440
- SpanID, DefaultDict[EvaluationName, Dict[DocumentPosition, pb.Evaluation]]
441
- ] = defaultdict(lambda: defaultdict(dict))
442
- self._document_evaluations_by_name: DefaultDict[
443
- EvaluationName, DefaultDict[SpanID, Dict[DocumentPosition, pb.Evaluation]]
444
- ] = defaultdict(lambda: defaultdict(dict))
445
- self._last_updated_at: Optional[datetime] = None
446
-
447
- def add(self, evaluation: pb.Evaluation) -> None:
448
- with self._lock:
449
- self._add(evaluation)
450
-
451
- def _add(self, evaluation: pb.Evaluation) -> None:
452
- subject_id = evaluation.subject_id
453
- name = evaluation.name
454
- subject_id_kind = subject_id.WhichOneof("kind")
455
- if subject_id_kind == "document_retrieval_id":
456
- document_retrieval_id = subject_id.document_retrieval_id
457
- span_id = SpanID(document_retrieval_id.span_id)
458
- document_position = document_retrieval_id.document_position
459
- self._document_evaluations_by_span_id[span_id][name][document_position] = evaluation
460
- self._document_evaluations_by_name[name][span_id][document_position] = evaluation
461
- elif subject_id_kind == "span_id":
462
- span_id = SpanID(subject_id.span_id)
463
- self._evaluations_by_span_id[span_id][name] = evaluation
464
- self._span_evaluations_by_name[name][span_id] = evaluation
465
- if evaluation.result.HasField("label"):
466
- label = evaluation.result.label.value
467
- self._span_evaluation_labels[name].add(label)
468
- elif subject_id_kind == "trace_id":
469
- trace_id = TraceID(subject_id.trace_id)
470
- self._evaluations_by_trace_id[trace_id][name] = evaluation
471
- self._trace_evaluations_by_name[name][trace_id] = evaluation
472
- elif subject_id_kind is None:
473
- logger.warning(
474
- f"discarding evaluation with missing subject_id: {MessageToDict(evaluation)}"
475
- )
476
- else:
477
- assert_never(subject_id_kind)
478
- self._last_updated_at = datetime.now(timezone.utc)
479
-
480
- @property
481
- def last_updated_at(self) -> Optional[datetime]:
482
- return self._last_updated_at
483
-
484
- def get_span_evaluation(self, span_id: SpanID, name: str) -> Optional[pb.Evaluation]:
485
- with self._lock:
486
- span_evaluations = self._evaluations_by_span_id.get(span_id)
487
- return span_evaluations.get(name) if span_evaluations else None
488
-
489
- def get_span_evaluation_names(self) -> List[EvaluationName]:
490
- with self._lock:
491
- return list(self._span_evaluations_by_name)
492
-
493
- def get_document_evaluation_names(
494
- self,
495
- span_id: Optional[SpanID] = None,
496
- ) -> List[EvaluationName]:
497
- with self._lock:
498
- if span_id is None:
499
- return list(self._document_evaluations_by_name)
500
- document_evaluations = self._document_evaluations_by_span_id.get(span_id)
501
- return list(document_evaluations) if document_evaluations else []
502
-
503
- def get_span_evaluation_labels(self, name: EvaluationName) -> Tuple[str, ...]:
504
- with self._lock:
505
- labels = self._span_evaluation_labels.get(name)
506
- return tuple(labels) if labels else ()
507
-
508
- def get_span_evaluation_span_ids(self, name: EvaluationName) -> Tuple[SpanID, ...]:
509
- with self._lock:
510
- span_evaluations = self._span_evaluations_by_name.get(name)
511
- return tuple(span_evaluations.keys()) if span_evaluations else ()
512
-
513
- def get_evaluations_by_span_id(self, span_id: SpanID) -> List[pb.Evaluation]:
514
- with self._lock:
515
- evaluations = self._evaluations_by_span_id.get(span_id)
516
- return list(evaluations.values()) if evaluations else []
517
-
518
- def get_document_evaluation_span_ids(self, name: EvaluationName) -> Tuple[SpanID, ...]:
519
- with self._lock:
520
- document_evaluations = self._document_evaluations_by_name.get(name)
521
- return tuple(document_evaluations.keys()) if document_evaluations else ()
522
-
523
- def get_document_evaluations_by_span_id(self, span_id: SpanID) -> List[pb.Evaluation]:
524
- all_evaluations: List[pb.Evaluation] = []
525
- with self._lock:
526
- document_evaluations = self._document_evaluations_by_span_id.get(span_id)
527
- if not document_evaluations:
528
- return all_evaluations
529
- for evaluations in document_evaluations.values():
530
- all_evaluations.extend(evaluations.values())
531
- return all_evaluations
532
-
533
- def get_document_evaluation_scores(
534
- self,
535
- span_id: SpanID,
536
- evaluation_name: str,
537
- num_documents: int,
538
- ) -> List[float]:
539
- # num_documents is needed as argument because the document position values
540
- # are not checked during ingestion: e.g. if there exists a position value
541
- # of one trillion, we would not want to create a result that large.
542
- scores: List[float] = [np.nan] * num_documents
543
- with self._lock:
544
- document_evaluations = self._document_evaluations_by_span_id.get(span_id)
545
- if not document_evaluations:
546
- return scores
547
- evaluations = document_evaluations.get(evaluation_name)
548
- if not evaluations:
549
- return scores
550
- for document_position, evaluation in evaluations.items():
551
- result = evaluation.result
552
- if result.HasField("score") and document_position < num_documents:
553
- scores[document_position] = result.score.value
554
- return scores
555
-
556
- def export_evaluations(self) -> List[Evaluations]:
557
- evaluations: List[Evaluations] = []
558
- evaluations.extend(self._export_span_evaluations())
559
- evaluations.extend(self._export_document_evaluations())
560
- return evaluations
561
-
562
- def _export_span_evaluations(self) -> List[SpanEvaluations]:
563
- span_evaluations = []
564
- with self._lock:
565
- span_evaluations_by_name = tuple(self._span_evaluations_by_name.items())
566
- for eval_name, _span_evaluations_by_id in span_evaluations_by_name:
567
- span_ids = []
568
- rows = []
569
- with self._lock:
570
- span_evaluations_by_id = tuple(_span_evaluations_by_id.items())
571
- for span_id, pb_eval in span_evaluations_by_id:
572
- span_ids.append(span_id)
573
- rows.append(MessageToDict(pb_eval.result))
574
- dataframe = DataFrame(rows, index=Index(span_ids, name="context.span_id"))
575
- span_evaluations.append(SpanEvaluations(eval_name, dataframe))
576
- return span_evaluations
577
-
578
- def _export_document_evaluations(self) -> List[DocumentEvaluations]:
579
- evaluations = []
580
- with self._lock:
581
- document_evaluations_by_name = tuple(self._document_evaluations_by_name.items())
582
- for eval_name, _document_evaluations_by_id in document_evaluations_by_name:
583
- span_ids = []
584
- document_positions = []
585
- rows = []
586
- with self._lock:
587
- document_evaluations_by_id = tuple(_document_evaluations_by_id.items())
588
- for span_id, _document_evaluations_by_position in document_evaluations_by_id:
589
- with self._lock:
590
- document_evaluations_by_position = sorted(
591
- _document_evaluations_by_position.items()
592
- ) # ensure the evals are sorted by document position
593
- for document_position, pb_eval in document_evaluations_by_position:
594
- span_ids.append(span_id)
595
- document_positions.append(document_position)
596
- rows.append(MessageToDict(pb_eval.result))
597
- dataframe = DataFrame(
598
- rows,
599
- index=MultiIndex.from_arrays(
600
- (span_ids, document_positions),
601
- names=("context.span_id", "document_position"),
602
- ),
603
- )
604
- evaluations.append(DocumentEvaluations(eval_name, dataframe))
605
- return evaluations
606
-
607
-
608
- _CUMULATIVE_ATTRIBUTES: Mapping[ComputedAttributes, Union[str, ComputedAttributes]] = (
609
- MappingProxyType(
610
- {
611
- ComputedAttributes.CUMULATIVE_LLM_TOKEN_COUNT_TOTAL: SpanAttributes.LLM_TOKEN_COUNT_TOTAL, # noqa: E501
612
- ComputedAttributes.CUMULATIVE_LLM_TOKEN_COUNT_PROMPT: SpanAttributes.LLM_TOKEN_COUNT_PROMPT, # noqa: E501
613
- ComputedAttributes.CUMULATIVE_LLM_TOKEN_COUNT_COMPLETION: SpanAttributes.LLM_TOKEN_COUNT_COMPLETION, # noqa: E501
614
- ComputedAttributes.CUMULATIVE_ERROR_COUNT: ComputedAttributes.ERROR_COUNT,
615
- }
616
- )
617
- )