arize-phoenix 3.16.1__py3-none-any.whl → 7.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (338) hide show
  1. arize_phoenix-7.7.0.dist-info/METADATA +261 -0
  2. arize_phoenix-7.7.0.dist-info/RECORD +345 -0
  3. {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.0.dist-info}/WHEEL +1 -1
  4. arize_phoenix-7.7.0.dist-info/entry_points.txt +3 -0
  5. phoenix/__init__.py +86 -14
  6. phoenix/auth.py +309 -0
  7. phoenix/config.py +675 -45
  8. phoenix/core/model.py +32 -30
  9. phoenix/core/model_schema.py +102 -109
  10. phoenix/core/model_schema_adapter.py +48 -45
  11. phoenix/datetime_utils.py +24 -3
  12. phoenix/db/README.md +54 -0
  13. phoenix/db/__init__.py +4 -0
  14. phoenix/db/alembic.ini +85 -0
  15. phoenix/db/bulk_inserter.py +294 -0
  16. phoenix/db/engines.py +208 -0
  17. phoenix/db/enums.py +20 -0
  18. phoenix/db/facilitator.py +113 -0
  19. phoenix/db/helpers.py +159 -0
  20. phoenix/db/insertion/constants.py +2 -0
  21. phoenix/db/insertion/dataset.py +227 -0
  22. phoenix/db/insertion/document_annotation.py +171 -0
  23. phoenix/db/insertion/evaluation.py +191 -0
  24. phoenix/db/insertion/helpers.py +98 -0
  25. phoenix/db/insertion/span.py +193 -0
  26. phoenix/db/insertion/span_annotation.py +158 -0
  27. phoenix/db/insertion/trace_annotation.py +158 -0
  28. phoenix/db/insertion/types.py +256 -0
  29. phoenix/db/migrate.py +86 -0
  30. phoenix/db/migrations/data_migration_scripts/populate_project_sessions.py +199 -0
  31. phoenix/db/migrations/env.py +114 -0
  32. phoenix/db/migrations/script.py.mako +26 -0
  33. phoenix/db/migrations/versions/10460e46d750_datasets.py +317 -0
  34. phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py +126 -0
  35. phoenix/db/migrations/versions/4ded9e43755f_create_project_sessions_table.py +66 -0
  36. phoenix/db/migrations/versions/cd164e83824f_users_and_tokens.py +157 -0
  37. phoenix/db/migrations/versions/cf03bd6bae1d_init.py +280 -0
  38. phoenix/db/models.py +807 -0
  39. phoenix/exceptions.py +5 -1
  40. phoenix/experiments/__init__.py +6 -0
  41. phoenix/experiments/evaluators/__init__.py +29 -0
  42. phoenix/experiments/evaluators/base.py +158 -0
  43. phoenix/experiments/evaluators/code_evaluators.py +184 -0
  44. phoenix/experiments/evaluators/llm_evaluators.py +473 -0
  45. phoenix/experiments/evaluators/utils.py +236 -0
  46. phoenix/experiments/functions.py +772 -0
  47. phoenix/experiments/tracing.py +86 -0
  48. phoenix/experiments/types.py +726 -0
  49. phoenix/experiments/utils.py +25 -0
  50. phoenix/inferences/__init__.py +0 -0
  51. phoenix/{datasets → inferences}/errors.py +6 -5
  52. phoenix/{datasets → inferences}/fixtures.py +49 -42
  53. phoenix/{datasets/dataset.py → inferences/inferences.py} +121 -105
  54. phoenix/{datasets → inferences}/schema.py +11 -11
  55. phoenix/{datasets → inferences}/validation.py +13 -14
  56. phoenix/logging/__init__.py +3 -0
  57. phoenix/logging/_config.py +90 -0
  58. phoenix/logging/_filter.py +6 -0
  59. phoenix/logging/_formatter.py +69 -0
  60. phoenix/metrics/__init__.py +5 -4
  61. phoenix/metrics/binning.py +4 -3
  62. phoenix/metrics/metrics.py +2 -1
  63. phoenix/metrics/mixins.py +7 -6
  64. phoenix/metrics/retrieval_metrics.py +2 -1
  65. phoenix/metrics/timeseries.py +5 -4
  66. phoenix/metrics/wrappers.py +9 -3
  67. phoenix/pointcloud/clustering.py +5 -5
  68. phoenix/pointcloud/pointcloud.py +7 -5
  69. phoenix/pointcloud/projectors.py +5 -6
  70. phoenix/pointcloud/umap_parameters.py +53 -52
  71. phoenix/server/api/README.md +28 -0
  72. phoenix/server/api/auth.py +44 -0
  73. phoenix/server/api/context.py +152 -9
  74. phoenix/server/api/dataloaders/__init__.py +91 -0
  75. phoenix/server/api/dataloaders/annotation_summaries.py +139 -0
  76. phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
  77. phoenix/server/api/dataloaders/cache/__init__.py +3 -0
  78. phoenix/server/api/dataloaders/cache/two_tier_cache.py +68 -0
  79. phoenix/server/api/dataloaders/dataset_example_revisions.py +131 -0
  80. phoenix/server/api/dataloaders/dataset_example_spans.py +38 -0
  81. phoenix/server/api/dataloaders/document_evaluation_summaries.py +144 -0
  82. phoenix/server/api/dataloaders/document_evaluations.py +31 -0
  83. phoenix/server/api/dataloaders/document_retrieval_metrics.py +89 -0
  84. phoenix/server/api/dataloaders/experiment_annotation_summaries.py +79 -0
  85. phoenix/server/api/dataloaders/experiment_error_rates.py +58 -0
  86. phoenix/server/api/dataloaders/experiment_run_annotations.py +36 -0
  87. phoenix/server/api/dataloaders/experiment_run_counts.py +49 -0
  88. phoenix/server/api/dataloaders/experiment_sequence_number.py +44 -0
  89. phoenix/server/api/dataloaders/latency_ms_quantile.py +188 -0
  90. phoenix/server/api/dataloaders/min_start_or_max_end_times.py +85 -0
  91. phoenix/server/api/dataloaders/project_by_name.py +31 -0
  92. phoenix/server/api/dataloaders/record_counts.py +116 -0
  93. phoenix/server/api/dataloaders/session_io.py +79 -0
  94. phoenix/server/api/dataloaders/session_num_traces.py +30 -0
  95. phoenix/server/api/dataloaders/session_num_traces_with_error.py +32 -0
  96. phoenix/server/api/dataloaders/session_token_usages.py +41 -0
  97. phoenix/server/api/dataloaders/session_trace_latency_ms_quantile.py +55 -0
  98. phoenix/server/api/dataloaders/span_annotations.py +26 -0
  99. phoenix/server/api/dataloaders/span_dataset_examples.py +31 -0
  100. phoenix/server/api/dataloaders/span_descendants.py +57 -0
  101. phoenix/server/api/dataloaders/span_projects.py +33 -0
  102. phoenix/server/api/dataloaders/token_counts.py +124 -0
  103. phoenix/server/api/dataloaders/trace_by_trace_ids.py +25 -0
  104. phoenix/server/api/dataloaders/trace_root_spans.py +32 -0
  105. phoenix/server/api/dataloaders/user_roles.py +30 -0
  106. phoenix/server/api/dataloaders/users.py +33 -0
  107. phoenix/server/api/exceptions.py +48 -0
  108. phoenix/server/api/helpers/__init__.py +12 -0
  109. phoenix/server/api/helpers/dataset_helpers.py +217 -0
  110. phoenix/server/api/helpers/experiment_run_filters.py +763 -0
  111. phoenix/server/api/helpers/playground_clients.py +948 -0
  112. phoenix/server/api/helpers/playground_registry.py +70 -0
  113. phoenix/server/api/helpers/playground_spans.py +455 -0
  114. phoenix/server/api/input_types/AddExamplesToDatasetInput.py +16 -0
  115. phoenix/server/api/input_types/AddSpansToDatasetInput.py +14 -0
  116. phoenix/server/api/input_types/ChatCompletionInput.py +38 -0
  117. phoenix/server/api/input_types/ChatCompletionMessageInput.py +24 -0
  118. phoenix/server/api/input_types/ClearProjectInput.py +15 -0
  119. phoenix/server/api/input_types/ClusterInput.py +2 -2
  120. phoenix/server/api/input_types/CreateDatasetInput.py +12 -0
  121. phoenix/server/api/input_types/CreateSpanAnnotationInput.py +18 -0
  122. phoenix/server/api/input_types/CreateTraceAnnotationInput.py +18 -0
  123. phoenix/server/api/input_types/DataQualityMetricInput.py +5 -2
  124. phoenix/server/api/input_types/DatasetExampleInput.py +14 -0
  125. phoenix/server/api/input_types/DatasetSort.py +17 -0
  126. phoenix/server/api/input_types/DatasetVersionSort.py +16 -0
  127. phoenix/server/api/input_types/DeleteAnnotationsInput.py +7 -0
  128. phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +13 -0
  129. phoenix/server/api/input_types/DeleteDatasetInput.py +7 -0
  130. phoenix/server/api/input_types/DeleteExperimentsInput.py +7 -0
  131. phoenix/server/api/input_types/DimensionFilter.py +4 -4
  132. phoenix/server/api/input_types/GenerativeModelInput.py +17 -0
  133. phoenix/server/api/input_types/Granularity.py +1 -1
  134. phoenix/server/api/input_types/InvocationParameters.py +162 -0
  135. phoenix/server/api/input_types/PatchAnnotationInput.py +19 -0
  136. phoenix/server/api/input_types/PatchDatasetExamplesInput.py +35 -0
  137. phoenix/server/api/input_types/PatchDatasetInput.py +14 -0
  138. phoenix/server/api/input_types/PerformanceMetricInput.py +5 -2
  139. phoenix/server/api/input_types/ProjectSessionSort.py +29 -0
  140. phoenix/server/api/input_types/SpanAnnotationSort.py +17 -0
  141. phoenix/server/api/input_types/SpanSort.py +134 -69
  142. phoenix/server/api/input_types/TemplateOptions.py +10 -0
  143. phoenix/server/api/input_types/TraceAnnotationSort.py +17 -0
  144. phoenix/server/api/input_types/UserRoleInput.py +9 -0
  145. phoenix/server/api/mutations/__init__.py +28 -0
  146. phoenix/server/api/mutations/api_key_mutations.py +167 -0
  147. phoenix/server/api/mutations/chat_mutations.py +593 -0
  148. phoenix/server/api/mutations/dataset_mutations.py +591 -0
  149. phoenix/server/api/mutations/experiment_mutations.py +75 -0
  150. phoenix/server/api/{types/ExportEventsMutation.py → mutations/export_events_mutations.py} +21 -18
  151. phoenix/server/api/mutations/project_mutations.py +57 -0
  152. phoenix/server/api/mutations/span_annotations_mutations.py +128 -0
  153. phoenix/server/api/mutations/trace_annotations_mutations.py +127 -0
  154. phoenix/server/api/mutations/user_mutations.py +329 -0
  155. phoenix/server/api/openapi/__init__.py +0 -0
  156. phoenix/server/api/openapi/main.py +17 -0
  157. phoenix/server/api/openapi/schema.py +16 -0
  158. phoenix/server/api/queries.py +738 -0
  159. phoenix/server/api/routers/__init__.py +11 -0
  160. phoenix/server/api/routers/auth.py +284 -0
  161. phoenix/server/api/routers/embeddings.py +26 -0
  162. phoenix/server/api/routers/oauth2.py +488 -0
  163. phoenix/server/api/routers/v1/__init__.py +64 -0
  164. phoenix/server/api/routers/v1/datasets.py +1017 -0
  165. phoenix/server/api/routers/v1/evaluations.py +362 -0
  166. phoenix/server/api/routers/v1/experiment_evaluations.py +115 -0
  167. phoenix/server/api/routers/v1/experiment_runs.py +167 -0
  168. phoenix/server/api/routers/v1/experiments.py +308 -0
  169. phoenix/server/api/routers/v1/pydantic_compat.py +78 -0
  170. phoenix/server/api/routers/v1/spans.py +267 -0
  171. phoenix/server/api/routers/v1/traces.py +208 -0
  172. phoenix/server/api/routers/v1/utils.py +95 -0
  173. phoenix/server/api/schema.py +44 -241
  174. phoenix/server/api/subscriptions.py +597 -0
  175. phoenix/server/api/types/Annotation.py +21 -0
  176. phoenix/server/api/types/AnnotationSummary.py +55 -0
  177. phoenix/server/api/types/AnnotatorKind.py +16 -0
  178. phoenix/server/api/types/ApiKey.py +27 -0
  179. phoenix/server/api/types/AuthMethod.py +9 -0
  180. phoenix/server/api/types/ChatCompletionMessageRole.py +11 -0
  181. phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +46 -0
  182. phoenix/server/api/types/Cluster.py +25 -24
  183. phoenix/server/api/types/CreateDatasetPayload.py +8 -0
  184. phoenix/server/api/types/DataQualityMetric.py +31 -13
  185. phoenix/server/api/types/Dataset.py +288 -63
  186. phoenix/server/api/types/DatasetExample.py +85 -0
  187. phoenix/server/api/types/DatasetExampleRevision.py +34 -0
  188. phoenix/server/api/types/DatasetVersion.py +14 -0
  189. phoenix/server/api/types/Dimension.py +32 -31
  190. phoenix/server/api/types/DocumentEvaluationSummary.py +9 -8
  191. phoenix/server/api/types/EmbeddingDimension.py +56 -49
  192. phoenix/server/api/types/Evaluation.py +25 -31
  193. phoenix/server/api/types/EvaluationSummary.py +30 -50
  194. phoenix/server/api/types/Event.py +20 -20
  195. phoenix/server/api/types/ExampleRevisionInterface.py +14 -0
  196. phoenix/server/api/types/Experiment.py +152 -0
  197. phoenix/server/api/types/ExperimentAnnotationSummary.py +13 -0
  198. phoenix/server/api/types/ExperimentComparison.py +17 -0
  199. phoenix/server/api/types/ExperimentRun.py +119 -0
  200. phoenix/server/api/types/ExperimentRunAnnotation.py +56 -0
  201. phoenix/server/api/types/GenerativeModel.py +9 -0
  202. phoenix/server/api/types/GenerativeProvider.py +85 -0
  203. phoenix/server/api/types/Inferences.py +80 -0
  204. phoenix/server/api/types/InferencesRole.py +23 -0
  205. phoenix/server/api/types/LabelFraction.py +7 -0
  206. phoenix/server/api/types/MimeType.py +2 -2
  207. phoenix/server/api/types/Model.py +54 -54
  208. phoenix/server/api/types/PerformanceMetric.py +8 -5
  209. phoenix/server/api/types/Project.py +407 -142
  210. phoenix/server/api/types/ProjectSession.py +139 -0
  211. phoenix/server/api/types/Segments.py +4 -4
  212. phoenix/server/api/types/Span.py +221 -176
  213. phoenix/server/api/types/SpanAnnotation.py +43 -0
  214. phoenix/server/api/types/SpanIOValue.py +15 -0
  215. phoenix/server/api/types/SystemApiKey.py +9 -0
  216. phoenix/server/api/types/TemplateLanguage.py +10 -0
  217. phoenix/server/api/types/TimeSeries.py +19 -15
  218. phoenix/server/api/types/TokenUsage.py +11 -0
  219. phoenix/server/api/types/Trace.py +154 -0
  220. phoenix/server/api/types/TraceAnnotation.py +45 -0
  221. phoenix/server/api/types/UMAPPoints.py +7 -7
  222. phoenix/server/api/types/User.py +60 -0
  223. phoenix/server/api/types/UserApiKey.py +45 -0
  224. phoenix/server/api/types/UserRole.py +15 -0
  225. phoenix/server/api/types/node.py +4 -112
  226. phoenix/server/api/types/pagination.py +156 -57
  227. phoenix/server/api/utils.py +34 -0
  228. phoenix/server/app.py +864 -115
  229. phoenix/server/bearer_auth.py +163 -0
  230. phoenix/server/dml_event.py +136 -0
  231. phoenix/server/dml_event_handler.py +256 -0
  232. phoenix/server/email/__init__.py +0 -0
  233. phoenix/server/email/sender.py +97 -0
  234. phoenix/server/email/templates/__init__.py +0 -0
  235. phoenix/server/email/templates/password_reset.html +19 -0
  236. phoenix/server/email/types.py +11 -0
  237. phoenix/server/grpc_server.py +102 -0
  238. phoenix/server/jwt_store.py +505 -0
  239. phoenix/server/main.py +305 -116
  240. phoenix/server/oauth2.py +52 -0
  241. phoenix/server/openapi/__init__.py +0 -0
  242. phoenix/server/prometheus.py +111 -0
  243. phoenix/server/rate_limiters.py +188 -0
  244. phoenix/server/static/.vite/manifest.json +87 -0
  245. phoenix/server/static/assets/components-Cy9nwIvF.js +2125 -0
  246. phoenix/server/static/assets/index-BKvHIxkk.js +113 -0
  247. phoenix/server/static/assets/pages-CUi2xCVQ.js +4449 -0
  248. phoenix/server/static/assets/vendor-DvC8cT4X.js +894 -0
  249. phoenix/server/static/assets/vendor-DxkFTwjz.css +1 -0
  250. phoenix/server/static/assets/vendor-arizeai-Do1793cv.js +662 -0
  251. phoenix/server/static/assets/vendor-codemirror-BzwZPyJM.js +24 -0
  252. phoenix/server/static/assets/vendor-recharts-_Jb7JjhG.js +59 -0
  253. phoenix/server/static/assets/vendor-shiki-Cl9QBraO.js +5 -0
  254. phoenix/server/static/assets/vendor-three-DwGkEfCM.js +2998 -0
  255. phoenix/server/telemetry.py +68 -0
  256. phoenix/server/templates/index.html +82 -23
  257. phoenix/server/thread_server.py +3 -3
  258. phoenix/server/types.py +275 -0
  259. phoenix/services.py +27 -18
  260. phoenix/session/client.py +743 -68
  261. phoenix/session/data_extractor.py +31 -7
  262. phoenix/session/evaluation.py +3 -9
  263. phoenix/session/session.py +263 -219
  264. phoenix/settings.py +22 -0
  265. phoenix/trace/__init__.py +2 -22
  266. phoenix/trace/attributes.py +338 -0
  267. phoenix/trace/dsl/README.md +116 -0
  268. phoenix/trace/dsl/filter.py +663 -213
  269. phoenix/trace/dsl/helpers.py +73 -21
  270. phoenix/trace/dsl/query.py +574 -201
  271. phoenix/trace/exporter.py +24 -19
  272. phoenix/trace/fixtures.py +368 -32
  273. phoenix/trace/otel.py +71 -219
  274. phoenix/trace/projects.py +3 -2
  275. phoenix/trace/schemas.py +33 -11
  276. phoenix/trace/span_evaluations.py +21 -16
  277. phoenix/trace/span_json_decoder.py +6 -4
  278. phoenix/trace/span_json_encoder.py +2 -2
  279. phoenix/trace/trace_dataset.py +47 -32
  280. phoenix/trace/utils.py +21 -4
  281. phoenix/utilities/__init__.py +0 -26
  282. phoenix/utilities/client.py +132 -0
  283. phoenix/utilities/deprecation.py +31 -0
  284. phoenix/utilities/error_handling.py +3 -2
  285. phoenix/utilities/json.py +109 -0
  286. phoenix/utilities/logging.py +8 -0
  287. phoenix/utilities/project.py +2 -2
  288. phoenix/utilities/re.py +49 -0
  289. phoenix/utilities/span_store.py +0 -23
  290. phoenix/utilities/template_formatters.py +99 -0
  291. phoenix/version.py +1 -1
  292. arize_phoenix-3.16.1.dist-info/METADATA +0 -495
  293. arize_phoenix-3.16.1.dist-info/RECORD +0 -178
  294. phoenix/core/project.py +0 -619
  295. phoenix/core/traces.py +0 -96
  296. phoenix/experimental/evals/__init__.py +0 -73
  297. phoenix/experimental/evals/evaluators.py +0 -413
  298. phoenix/experimental/evals/functions/__init__.py +0 -4
  299. phoenix/experimental/evals/functions/classify.py +0 -453
  300. phoenix/experimental/evals/functions/executor.py +0 -353
  301. phoenix/experimental/evals/functions/generate.py +0 -138
  302. phoenix/experimental/evals/functions/processing.py +0 -76
  303. phoenix/experimental/evals/models/__init__.py +0 -14
  304. phoenix/experimental/evals/models/anthropic.py +0 -175
  305. phoenix/experimental/evals/models/base.py +0 -170
  306. phoenix/experimental/evals/models/bedrock.py +0 -221
  307. phoenix/experimental/evals/models/litellm.py +0 -134
  308. phoenix/experimental/evals/models/openai.py +0 -448
  309. phoenix/experimental/evals/models/rate_limiters.py +0 -246
  310. phoenix/experimental/evals/models/vertex.py +0 -173
  311. phoenix/experimental/evals/models/vertexai.py +0 -186
  312. phoenix/experimental/evals/retrievals.py +0 -96
  313. phoenix/experimental/evals/templates/__init__.py +0 -50
  314. phoenix/experimental/evals/templates/default_templates.py +0 -472
  315. phoenix/experimental/evals/templates/template.py +0 -195
  316. phoenix/experimental/evals/utils/__init__.py +0 -172
  317. phoenix/experimental/evals/utils/threads.py +0 -27
  318. phoenix/server/api/helpers.py +0 -11
  319. phoenix/server/api/routers/evaluation_handler.py +0 -109
  320. phoenix/server/api/routers/span_handler.py +0 -70
  321. phoenix/server/api/routers/trace_handler.py +0 -60
  322. phoenix/server/api/types/DatasetRole.py +0 -23
  323. phoenix/server/static/index.css +0 -6
  324. phoenix/server/static/index.js +0 -7447
  325. phoenix/storage/span_store/__init__.py +0 -23
  326. phoenix/storage/span_store/text_file.py +0 -85
  327. phoenix/trace/dsl/missing.py +0 -60
  328. phoenix/trace/langchain/__init__.py +0 -3
  329. phoenix/trace/langchain/instrumentor.py +0 -35
  330. phoenix/trace/llama_index/__init__.py +0 -3
  331. phoenix/trace/llama_index/callback.py +0 -102
  332. phoenix/trace/openai/__init__.py +0 -3
  333. phoenix/trace/openai/instrumentor.py +0 -30
  334. {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.0.dist-info}/licenses/IP_NOTICE +0 -0
  335. {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.0.dist-info}/licenses/LICENSE +0 -0
  336. /phoenix/{datasets → db/insertion}/__init__.py +0 -0
  337. /phoenix/{experimental → db/migrations}/__init__.py +0 -0
  338. /phoenix/{storage → db/migrations/data_migration_scripts}/__init__.py +0 -0
@@ -0,0 +1,593 @@
1
+ import asyncio
2
+ from dataclasses import asdict, field
3
+ from datetime import datetime, timezone
4
+ from itertools import chain, islice
5
+ from traceback import format_exc
6
+ from typing import Any, Iterable, Iterator, List, Optional, TypeVar, Union
7
+
8
+ import strawberry
9
+ from openinference.instrumentation import safe_json_dumps
10
+ from openinference.semconv.trace import (
11
+ MessageAttributes,
12
+ OpenInferenceMimeTypeValues,
13
+ OpenInferenceSpanKindValues,
14
+ SpanAttributes,
15
+ ToolAttributes,
16
+ ToolCallAttributes,
17
+ )
18
+ from opentelemetry.sdk.trace.id_generator import RandomIdGenerator as DefaultOTelIDGenerator
19
+ from opentelemetry.trace import StatusCode
20
+ from sqlalchemy import insert, select
21
+ from strawberry.relay import GlobalID
22
+ from strawberry.types import Info
23
+ from typing_extensions import assert_never
24
+
25
+ from phoenix.datetime_utils import local_now, normalize_datetime
26
+ from phoenix.db import models
27
+ from phoenix.db.helpers import get_dataset_example_revisions
28
+ from phoenix.server.api.auth import IsLocked, IsNotReadOnly
29
+ from phoenix.server.api.context import Context
30
+ from phoenix.server.api.exceptions import BadRequest, CustomGraphQLError, NotFound
31
+ from phoenix.server.api.helpers.dataset_helpers import get_dataset_example_output
32
+ from phoenix.server.api.helpers.playground_clients import (
33
+ PlaygroundStreamingClient,
34
+ initialize_playground_clients,
35
+ )
36
+ from phoenix.server.api.helpers.playground_registry import PLAYGROUND_CLIENT_REGISTRY
37
+ from phoenix.server.api.helpers.playground_spans import (
38
+ input_value_and_mime_type,
39
+ llm_input_messages,
40
+ llm_invocation_parameters,
41
+ llm_model_name,
42
+ llm_span_kind,
43
+ llm_tools,
44
+ )
45
+ from phoenix.server.api.input_types.ChatCompletionInput import (
46
+ ChatCompletionInput,
47
+ ChatCompletionOverDatasetInput,
48
+ )
49
+ from phoenix.server.api.input_types.TemplateOptions import TemplateOptions
50
+ from phoenix.server.api.subscriptions import (
51
+ _default_playground_experiment_description,
52
+ _default_playground_experiment_metadata,
53
+ _default_playground_experiment_name,
54
+ )
55
+ from phoenix.server.api.types.ChatCompletionMessageRole import ChatCompletionMessageRole
56
+ from phoenix.server.api.types.ChatCompletionSubscriptionPayload import (
57
+ TextChunk,
58
+ ToolCallChunk,
59
+ )
60
+ from phoenix.server.api.types.Dataset import Dataset
61
+ from phoenix.server.api.types.DatasetVersion import DatasetVersion
62
+ from phoenix.server.api.types.node import from_global_id_with_expected_type
63
+ from phoenix.server.api.types.Span import Span, to_gql_span
64
+ from phoenix.server.api.types.TemplateLanguage import TemplateLanguage
65
+ from phoenix.server.dml_event import SpanInsertEvent
66
+ from phoenix.trace.attributes import unflatten
67
+ from phoenix.trace.schemas import SpanException
68
+ from phoenix.utilities.json import jsonify
69
+ from phoenix.utilities.template_formatters import (
70
+ FStringTemplateFormatter,
71
+ MustacheTemplateFormatter,
72
+ NoOpFormatter,
73
+ TemplateFormatter,
74
+ )
75
+
76
+ initialize_playground_clients()
77
+
78
+ ChatCompletionMessage = tuple[ChatCompletionMessageRole, str, Optional[str], Optional[List[Any]]]
79
+
80
+
81
+ @strawberry.type
82
+ class ChatCompletionFunctionCall:
83
+ name: str
84
+ arguments: str
85
+
86
+
87
+ @strawberry.type
88
+ class ChatCompletionToolCall:
89
+ id: str
90
+ function: ChatCompletionFunctionCall
91
+
92
+
93
+ @strawberry.type
94
+ class ChatCompletionMutationPayload:
95
+ content: Optional[str]
96
+ tool_calls: List[ChatCompletionToolCall]
97
+ span: Span
98
+ error_message: Optional[str]
99
+
100
+
101
+ @strawberry.type
102
+ class ChatCompletionMutationError:
103
+ message: str
104
+
105
+
106
+ @strawberry.type
107
+ class ChatCompletionOverDatasetMutationExamplePayload:
108
+ dataset_example_id: GlobalID
109
+ experiment_run_id: GlobalID
110
+ result: Union[ChatCompletionMutationPayload, ChatCompletionMutationError]
111
+
112
+
113
+ @strawberry.type
114
+ class ChatCompletionOverDatasetMutationPayload:
115
+ dataset_id: GlobalID
116
+ dataset_version_id: GlobalID
117
+ experiment_id: GlobalID
118
+ examples: list[ChatCompletionOverDatasetMutationExamplePayload] = field(default_factory=list)
119
+
120
+
121
+ @strawberry.type
122
+ class ChatCompletionMutationMixin:
123
+ @strawberry.mutation(permission_classes=[IsNotReadOnly, IsLocked]) # type: ignore
124
+ @classmethod
125
+ async def chat_completion_over_dataset(
126
+ cls,
127
+ info: Info[Context, None],
128
+ input: ChatCompletionOverDatasetInput,
129
+ ) -> ChatCompletionOverDatasetMutationPayload:
130
+ provider_key = input.model.provider_key
131
+ llm_client_class = PLAYGROUND_CLIENT_REGISTRY.get_client(provider_key, input.model.name)
132
+ if llm_client_class is None:
133
+ raise BadRequest(f"Unknown LLM provider: '{provider_key.value}'")
134
+ try:
135
+ llm_client = llm_client_class(
136
+ model=input.model,
137
+ api_key=input.api_key,
138
+ )
139
+ except CustomGraphQLError:
140
+ raise
141
+ except Exception as error:
142
+ raise BadRequest(
143
+ f"Failed to connect to LLM API for {provider_key.value} {input.model.name}: "
144
+ f"{str(error)}"
145
+ )
146
+ dataset_id = from_global_id_with_expected_type(input.dataset_id, Dataset.__name__)
147
+ dataset_version_id = (
148
+ from_global_id_with_expected_type(
149
+ global_id=input.dataset_version_id, expected_type_name=DatasetVersion.__name__
150
+ )
151
+ if input.dataset_version_id
152
+ else None
153
+ )
154
+ async with info.context.db() as session:
155
+ dataset = await session.scalar(select(models.Dataset).filter_by(id=dataset_id))
156
+ if dataset is None:
157
+ raise NotFound("Dataset not found")
158
+ if dataset_version_id is None:
159
+ resolved_version_id = await session.scalar(
160
+ select(models.DatasetVersion.id)
161
+ .filter_by(dataset_id=dataset_id)
162
+ .order_by(models.DatasetVersion.id.desc())
163
+ .limit(1)
164
+ )
165
+ if resolved_version_id is None:
166
+ raise NotFound("No versions found for the given dataset")
167
+ else:
168
+ resolved_version_id = dataset_version_id
169
+ revisions = [
170
+ revision
171
+ async for revision in await session.stream_scalars(
172
+ get_dataset_example_revisions(resolved_version_id).order_by(
173
+ models.DatasetExampleRevision.id
174
+ )
175
+ )
176
+ ]
177
+ if not revisions:
178
+ raise NotFound("No examples found for the given dataset and version")
179
+ experiment = models.Experiment(
180
+ dataset_id=from_global_id_with_expected_type(input.dataset_id, Dataset.__name__),
181
+ dataset_version_id=resolved_version_id,
182
+ name=input.experiment_name or _default_playground_experiment_name(),
183
+ description=input.experiment_description
184
+ or _default_playground_experiment_description(dataset_name=dataset.name),
185
+ repetitions=1,
186
+ metadata_=input.experiment_metadata
187
+ or _default_playground_experiment_metadata(
188
+ dataset_name=dataset.name,
189
+ dataset_id=input.dataset_id,
190
+ version_id=GlobalID(DatasetVersion.__name__, str(resolved_version_id)),
191
+ ),
192
+ project_name=PLAYGROUND_PROJECT_NAME,
193
+ )
194
+ session.add(experiment)
195
+ await session.flush()
196
+
197
+ results = []
198
+ batch_size = 3
199
+ start_time = datetime.now(timezone.utc)
200
+ for batch in _get_batches(revisions, batch_size):
201
+ batch_results = await asyncio.gather(
202
+ *(
203
+ cls._chat_completion(
204
+ info,
205
+ llm_client,
206
+ ChatCompletionInput(
207
+ model=input.model,
208
+ api_key=input.api_key,
209
+ messages=input.messages,
210
+ tools=input.tools,
211
+ invocation_parameters=input.invocation_parameters,
212
+ template=TemplateOptions(
213
+ language=input.template_language,
214
+ variables=revision.input,
215
+ ),
216
+ ),
217
+ )
218
+ for revision in batch
219
+ ),
220
+ return_exceptions=True,
221
+ )
222
+ results.extend(batch_results)
223
+
224
+ payload = ChatCompletionOverDatasetMutationPayload(
225
+ dataset_id=GlobalID(models.Dataset.__name__, str(dataset.id)),
226
+ dataset_version_id=GlobalID(DatasetVersion.__name__, str(resolved_version_id)),
227
+ experiment_id=GlobalID(models.Experiment.__name__, str(experiment.id)),
228
+ )
229
+ experiment_runs = []
230
+ for revision, result in zip(revisions, results):
231
+ if isinstance(result, BaseException):
232
+ experiment_run = models.ExperimentRun(
233
+ experiment_id=experiment.id,
234
+ dataset_example_id=revision.dataset_example_id,
235
+ output={},
236
+ repetition_number=1,
237
+ start_time=start_time,
238
+ end_time=start_time,
239
+ error=str(result),
240
+ )
241
+ else:
242
+ db_span = result.span.db_span
243
+ experiment_run = models.ExperimentRun(
244
+ experiment_id=experiment.id,
245
+ dataset_example_id=revision.dataset_example_id,
246
+ trace_id=str(result.span.context.trace_id),
247
+ output=models.ExperimentRunOutput(
248
+ task_output=get_dataset_example_output(db_span),
249
+ ),
250
+ prompt_token_count=db_span.cumulative_llm_token_count_prompt,
251
+ completion_token_count=db_span.cumulative_llm_token_count_completion,
252
+ repetition_number=1,
253
+ start_time=result.span.start_time,
254
+ end_time=result.span.end_time,
255
+ error=str(result.error_message) if result.error_message else None,
256
+ )
257
+ experiment_runs.append(experiment_run)
258
+
259
+ async with info.context.db() as session:
260
+ session.add_all(experiment_runs)
261
+ await session.flush()
262
+
263
+ for revision, experiment_run, result in zip(revisions, experiment_runs, results):
264
+ dataset_example_id = GlobalID(
265
+ models.DatasetExample.__name__, str(revision.dataset_example_id)
266
+ )
267
+ experiment_run_id = GlobalID(models.ExperimentRun.__name__, str(experiment_run.id))
268
+ example_payload = ChatCompletionOverDatasetMutationExamplePayload(
269
+ dataset_example_id=dataset_example_id,
270
+ experiment_run_id=experiment_run_id,
271
+ result=result
272
+ if isinstance(result, ChatCompletionMutationPayload)
273
+ else ChatCompletionMutationError(message=str(result)),
274
+ )
275
+ payload.examples.append(example_payload)
276
+ return payload
277
+
278
+ @strawberry.mutation(permission_classes=[IsNotReadOnly, IsLocked]) # type: ignore
279
+ @classmethod
280
+ async def chat_completion(
281
+ cls, info: Info[Context, None], input: ChatCompletionInput
282
+ ) -> ChatCompletionMutationPayload:
283
+ provider_key = input.model.provider_key
284
+ llm_client_class = PLAYGROUND_CLIENT_REGISTRY.get_client(provider_key, input.model.name)
285
+ if llm_client_class is None:
286
+ raise BadRequest(f"Unknown LLM provider: '{provider_key.value}'")
287
+ try:
288
+ llm_client = llm_client_class(
289
+ model=input.model,
290
+ api_key=input.api_key,
291
+ )
292
+ except CustomGraphQLError:
293
+ raise
294
+ except Exception as error:
295
+ raise BadRequest(
296
+ f"Failed to connect to LLM API for {provider_key.value} {input.model.name}: "
297
+ f"{str(error)}"
298
+ )
299
+ return await cls._chat_completion(info, llm_client, input)
300
+
301
+ @classmethod
302
+ async def _chat_completion(
303
+ cls,
304
+ info: Info[Context, None],
305
+ llm_client: PlaygroundStreamingClient,
306
+ input: ChatCompletionInput,
307
+ ) -> ChatCompletionMutationPayload:
308
+ attributes: dict[str, Any] = {}
309
+
310
+ messages = [
311
+ (
312
+ message.role,
313
+ message.content,
314
+ message.tool_call_id if isinstance(message.tool_call_id, str) else None,
315
+ message.tool_calls if isinstance(message.tool_calls, list) else None,
316
+ )
317
+ for message in input.messages
318
+ ]
319
+ if template_options := input.template:
320
+ messages = list(_formatted_messages(messages, template_options))
321
+ attributes.update(
322
+ {PROMPT_TEMPLATE_VARIABLES: safe_json_dumps(template_options.variables)}
323
+ )
324
+
325
+ invocation_parameters = llm_client.construct_invocation_parameters(
326
+ input.invocation_parameters
327
+ )
328
+
329
+ text_content = ""
330
+ tool_calls: dict[str, ChatCompletionToolCall] = {}
331
+ events = []
332
+ attributes.update(
333
+ chain(
334
+ llm_span_kind(),
335
+ llm_model_name(input.model.name),
336
+ llm_tools(input.tools or []),
337
+ llm_input_messages(messages),
338
+ llm_invocation_parameters(invocation_parameters),
339
+ input_value_and_mime_type(input),
340
+ )
341
+ )
342
+
343
+ start_time = normalize_datetime(dt=local_now(), tz=timezone.utc)
344
+ status_code = StatusCode.OK
345
+ status_message = ""
346
+ try:
347
+ async for chunk in llm_client.chat_completion_create(
348
+ messages=messages, tools=input.tools or [], **invocation_parameters
349
+ ):
350
+ # Process the chunk
351
+ if isinstance(chunk, TextChunk):
352
+ text_content += chunk.content
353
+ elif isinstance(chunk, ToolCallChunk):
354
+ if chunk.id not in tool_calls:
355
+ tool_calls[chunk.id] = ChatCompletionToolCall(
356
+ id=chunk.id,
357
+ function=ChatCompletionFunctionCall(
358
+ name=chunk.function.name,
359
+ arguments=chunk.function.arguments,
360
+ ),
361
+ )
362
+ else:
363
+ tool_calls[chunk.id].function.arguments += chunk.function.arguments
364
+ else:
365
+ assert_never(chunk)
366
+ except Exception as e:
367
+ # Handle exceptions and record exception event
368
+ status_code = StatusCode.ERROR
369
+ status_message = str(e)
370
+ end_time = normalize_datetime(dt=local_now(), tz=timezone.utc)
371
+ assert end_time is not None
372
+ events.append(
373
+ SpanException(
374
+ timestamp=end_time,
375
+ message=status_message,
376
+ exception_type=type(e).__name__,
377
+ exception_escaped=False,
378
+ exception_stacktrace=format_exc(),
379
+ )
380
+ )
381
+ else:
382
+ end_time = normalize_datetime(dt=local_now(), tz=timezone.utc)
383
+
384
+ attributes.update(llm_client.attributes)
385
+ if text_content or tool_calls:
386
+ attributes.update(
387
+ chain(
388
+ _output_value_and_mime_type(text_content, tool_calls),
389
+ _llm_output_messages(text_content, tool_calls),
390
+ )
391
+ )
392
+
393
+ # Now write the span to the database
394
+ trace_id = _generate_trace_id()
395
+ span_id = _generate_span_id()
396
+ async with info.context.db() as session:
397
+ # Get or create the project ID
398
+ if (
399
+ project_id := await session.scalar(
400
+ select(models.Project.id).where(models.Project.name == PLAYGROUND_PROJECT_NAME)
401
+ )
402
+ ) is None:
403
+ project_id = await session.scalar(
404
+ insert(models.Project)
405
+ .returning(models.Project.id)
406
+ .values(
407
+ name=PLAYGROUND_PROJECT_NAME,
408
+ description="Traces from prompt playground",
409
+ )
410
+ )
411
+ trace = models.Trace(
412
+ project_rowid=project_id,
413
+ trace_id=trace_id,
414
+ start_time=start_time,
415
+ end_time=end_time,
416
+ )
417
+ span = models.Span(
418
+ trace_rowid=trace.id,
419
+ span_id=span_id,
420
+ parent_id=None,
421
+ name="ChatCompletion",
422
+ span_kind=LLM,
423
+ start_time=start_time,
424
+ end_time=end_time,
425
+ attributes=unflatten(attributes.items()),
426
+ events=[_serialize_event(event) for event in events],
427
+ status_code=status_code.name,
428
+ status_message=status_message,
429
+ cumulative_error_count=int(status_code is StatusCode.ERROR),
430
+ cumulative_llm_token_count_prompt=attributes.get(LLM_TOKEN_COUNT_PROMPT, 0),
431
+ cumulative_llm_token_count_completion=attributes.get(LLM_TOKEN_COUNT_COMPLETION, 0),
432
+ llm_token_count_prompt=attributes.get(LLM_TOKEN_COUNT_PROMPT, 0),
433
+ llm_token_count_completion=attributes.get(LLM_TOKEN_COUNT_COMPLETION, 0),
434
+ trace=trace,
435
+ )
436
+ session.add(trace)
437
+ session.add(span)
438
+ await session.flush()
439
+
440
+ gql_span = to_gql_span(span)
441
+
442
+ info.context.event_queue.put(SpanInsertEvent(ids=(project_id,)))
443
+
444
+ if status_code is StatusCode.ERROR:
445
+ return ChatCompletionMutationPayload(
446
+ content=None,
447
+ tool_calls=[],
448
+ span=gql_span,
449
+ error_message=status_message,
450
+ )
451
+ else:
452
+ return ChatCompletionMutationPayload(
453
+ content=text_content if text_content else None,
454
+ tool_calls=list(tool_calls.values()),
455
+ span=gql_span,
456
+ error_message=None,
457
+ )
458
+
459
+
460
+ def _formatted_messages(
461
+ messages: Iterable[ChatCompletionMessage],
462
+ template_options: TemplateOptions,
463
+ ) -> Iterator[ChatCompletionMessage]:
464
+ """
465
+ Formats the messages using the given template options.
466
+ """
467
+ template_formatter = _template_formatter(template_language=template_options.language)
468
+ (
469
+ roles,
470
+ templates,
471
+ tool_call_id,
472
+ tool_calls,
473
+ ) = zip(*messages)
474
+ formatted_templates = map(
475
+ lambda template: template_formatter.format(template, **template_options.variables),
476
+ templates,
477
+ )
478
+ formatted_messages = zip(roles, formatted_templates, tool_call_id, tool_calls)
479
+ return formatted_messages
480
+
481
+
482
+ def _template_formatter(template_language: TemplateLanguage) -> TemplateFormatter:
483
+ """
484
+ Instantiates the appropriate template formatter for the template language.
485
+ """
486
+ if template_language is TemplateLanguage.MUSTACHE:
487
+ return MustacheTemplateFormatter()
488
+ if template_language is TemplateLanguage.F_STRING:
489
+ return FStringTemplateFormatter()
490
+ if template_language is TemplateLanguage.NONE:
491
+ return NoOpFormatter()
492
+ assert_never(template_language)
493
+
494
+
495
+ def _output_value_and_mime_type(
496
+ text: str, tool_calls: dict[str, ChatCompletionToolCall]
497
+ ) -> Iterator[tuple[str, Any]]:
498
+ if text and tool_calls:
499
+ yield OUTPUT_MIME_TYPE, JSON
500
+ yield (
501
+ OUTPUT_VALUE,
502
+ safe_json_dumps({"content": text, "tool_calls": jsonify(list(tool_calls.values()))}),
503
+ )
504
+ elif tool_calls:
505
+ yield OUTPUT_MIME_TYPE, JSON
506
+ yield OUTPUT_VALUE, safe_json_dumps(jsonify(list(tool_calls.values())))
507
+ elif text:
508
+ yield OUTPUT_MIME_TYPE, TEXT
509
+ yield OUTPUT_VALUE, text
510
+
511
+
512
+ def _llm_output_messages(
513
+ text_content: str, tool_calls: dict[str, ChatCompletionToolCall]
514
+ ) -> Iterator[tuple[str, Any]]:
515
+ yield f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_ROLE}", "assistant"
516
+ if text_content:
517
+ yield f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_CONTENT}", text_content
518
+ for tool_call_index, tool_call in enumerate(tool_calls.values()):
519
+ if tool_call_id := tool_call.id:
520
+ yield (
521
+ f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_TOOL_CALLS}.{tool_call_index}.{TOOL_CALL_ID}",
522
+ tool_call_id,
523
+ )
524
+ yield (
525
+ f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_TOOL_CALLS}.{tool_call_index}.{TOOL_CALL_FUNCTION_NAME}",
526
+ tool_call.function.name,
527
+ )
528
+ if arguments := tool_call.function.arguments:
529
+ yield (
530
+ f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_TOOL_CALLS}.{tool_call_index}.{TOOL_CALL_FUNCTION_ARGUMENTS_JSON}",
531
+ arguments,
532
+ )
533
+
534
+
535
+ def _generate_trace_id() -> str:
536
+ return _hex(DefaultOTelIDGenerator().generate_trace_id())
537
+
538
+
539
+ def _generate_span_id() -> str:
540
+ return _hex(DefaultOTelIDGenerator().generate_span_id())
541
+
542
+
543
+ def _hex(number: int) -> str:
544
+ return hex(number)[2:]
545
+
546
+
547
+ def _serialize_event(event: SpanException) -> dict[str, Any]:
548
+ return {k: (v.isoformat() if isinstance(v, datetime) else v) for k, v in asdict(event).items()}
549
+
550
+
551
+ _AnyT = TypeVar("_AnyT")
552
+
553
+
554
+ def _get_batches(
555
+ iterable: Iterable[_AnyT],
556
+ batch_size: int,
557
+ ) -> Iterator[list[_AnyT]]:
558
+ """Splits an iterable into batches not exceeding a specified size."""
559
+ iterator = iter(iterable)
560
+ while batch := list(islice(iterator, batch_size)):
561
+ yield batch
562
+
563
+
564
+ JSON = OpenInferenceMimeTypeValues.JSON.value
565
+ TEXT = OpenInferenceMimeTypeValues.TEXT.value
566
+ LLM = OpenInferenceSpanKindValues.LLM.value
567
+
568
+ OPENINFERENCE_SPAN_KIND = SpanAttributes.OPENINFERENCE_SPAN_KIND
569
+ INPUT_MIME_TYPE = SpanAttributes.INPUT_MIME_TYPE
570
+ INPUT_VALUE = SpanAttributes.INPUT_VALUE
571
+ OUTPUT_MIME_TYPE = SpanAttributes.OUTPUT_MIME_TYPE
572
+ OUTPUT_VALUE = SpanAttributes.OUTPUT_VALUE
573
+ LLM_INPUT_MESSAGES = SpanAttributes.LLM_INPUT_MESSAGES
574
+ LLM_OUTPUT_MESSAGES = SpanAttributes.LLM_OUTPUT_MESSAGES
575
+ LLM_MODEL_NAME = SpanAttributes.LLM_MODEL_NAME
576
+ LLM_INVOCATION_PARAMETERS = SpanAttributes.LLM_INVOCATION_PARAMETERS
577
+ LLM_TOOLS = SpanAttributes.LLM_TOOLS
578
+ LLM_TOKEN_COUNT_PROMPT = SpanAttributes.LLM_TOKEN_COUNT_PROMPT
579
+ LLM_TOKEN_COUNT_COMPLETION = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION
580
+
581
+ MESSAGE_CONTENT = MessageAttributes.MESSAGE_CONTENT
582
+ MESSAGE_ROLE = MessageAttributes.MESSAGE_ROLE
583
+ MESSAGE_TOOL_CALLS = MessageAttributes.MESSAGE_TOOL_CALLS
584
+
585
+ TOOL_CALL_ID = ToolCallAttributes.TOOL_CALL_ID
586
+ TOOL_CALL_FUNCTION_NAME = ToolCallAttributes.TOOL_CALL_FUNCTION_NAME
587
+ TOOL_CALL_FUNCTION_ARGUMENTS_JSON = ToolCallAttributes.TOOL_CALL_FUNCTION_ARGUMENTS_JSON
588
+
589
+ TOOL_JSON_SCHEMA = ToolAttributes.TOOL_JSON_SCHEMA
590
+ PROMPT_TEMPLATE_VARIABLES = SpanAttributes.LLM_PROMPT_TEMPLATE_VARIABLES
591
+
592
+
593
+ PLAYGROUND_PROJECT_NAME = "playground"