arize-phoenix 3.16.1__py3-none-any.whl → 7.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (338) hide show
  1. arize_phoenix-7.7.0.dist-info/METADATA +261 -0
  2. arize_phoenix-7.7.0.dist-info/RECORD +345 -0
  3. {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.0.dist-info}/WHEEL +1 -1
  4. arize_phoenix-7.7.0.dist-info/entry_points.txt +3 -0
  5. phoenix/__init__.py +86 -14
  6. phoenix/auth.py +309 -0
  7. phoenix/config.py +675 -45
  8. phoenix/core/model.py +32 -30
  9. phoenix/core/model_schema.py +102 -109
  10. phoenix/core/model_schema_adapter.py +48 -45
  11. phoenix/datetime_utils.py +24 -3
  12. phoenix/db/README.md +54 -0
  13. phoenix/db/__init__.py +4 -0
  14. phoenix/db/alembic.ini +85 -0
  15. phoenix/db/bulk_inserter.py +294 -0
  16. phoenix/db/engines.py +208 -0
  17. phoenix/db/enums.py +20 -0
  18. phoenix/db/facilitator.py +113 -0
  19. phoenix/db/helpers.py +159 -0
  20. phoenix/db/insertion/constants.py +2 -0
  21. phoenix/db/insertion/dataset.py +227 -0
  22. phoenix/db/insertion/document_annotation.py +171 -0
  23. phoenix/db/insertion/evaluation.py +191 -0
  24. phoenix/db/insertion/helpers.py +98 -0
  25. phoenix/db/insertion/span.py +193 -0
  26. phoenix/db/insertion/span_annotation.py +158 -0
  27. phoenix/db/insertion/trace_annotation.py +158 -0
  28. phoenix/db/insertion/types.py +256 -0
  29. phoenix/db/migrate.py +86 -0
  30. phoenix/db/migrations/data_migration_scripts/populate_project_sessions.py +199 -0
  31. phoenix/db/migrations/env.py +114 -0
  32. phoenix/db/migrations/script.py.mako +26 -0
  33. phoenix/db/migrations/versions/10460e46d750_datasets.py +317 -0
  34. phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py +126 -0
  35. phoenix/db/migrations/versions/4ded9e43755f_create_project_sessions_table.py +66 -0
  36. phoenix/db/migrations/versions/cd164e83824f_users_and_tokens.py +157 -0
  37. phoenix/db/migrations/versions/cf03bd6bae1d_init.py +280 -0
  38. phoenix/db/models.py +807 -0
  39. phoenix/exceptions.py +5 -1
  40. phoenix/experiments/__init__.py +6 -0
  41. phoenix/experiments/evaluators/__init__.py +29 -0
  42. phoenix/experiments/evaluators/base.py +158 -0
  43. phoenix/experiments/evaluators/code_evaluators.py +184 -0
  44. phoenix/experiments/evaluators/llm_evaluators.py +473 -0
  45. phoenix/experiments/evaluators/utils.py +236 -0
  46. phoenix/experiments/functions.py +772 -0
  47. phoenix/experiments/tracing.py +86 -0
  48. phoenix/experiments/types.py +726 -0
  49. phoenix/experiments/utils.py +25 -0
  50. phoenix/inferences/__init__.py +0 -0
  51. phoenix/{datasets → inferences}/errors.py +6 -5
  52. phoenix/{datasets → inferences}/fixtures.py +49 -42
  53. phoenix/{datasets/dataset.py → inferences/inferences.py} +121 -105
  54. phoenix/{datasets → inferences}/schema.py +11 -11
  55. phoenix/{datasets → inferences}/validation.py +13 -14
  56. phoenix/logging/__init__.py +3 -0
  57. phoenix/logging/_config.py +90 -0
  58. phoenix/logging/_filter.py +6 -0
  59. phoenix/logging/_formatter.py +69 -0
  60. phoenix/metrics/__init__.py +5 -4
  61. phoenix/metrics/binning.py +4 -3
  62. phoenix/metrics/metrics.py +2 -1
  63. phoenix/metrics/mixins.py +7 -6
  64. phoenix/metrics/retrieval_metrics.py +2 -1
  65. phoenix/metrics/timeseries.py +5 -4
  66. phoenix/metrics/wrappers.py +9 -3
  67. phoenix/pointcloud/clustering.py +5 -5
  68. phoenix/pointcloud/pointcloud.py +7 -5
  69. phoenix/pointcloud/projectors.py +5 -6
  70. phoenix/pointcloud/umap_parameters.py +53 -52
  71. phoenix/server/api/README.md +28 -0
  72. phoenix/server/api/auth.py +44 -0
  73. phoenix/server/api/context.py +152 -9
  74. phoenix/server/api/dataloaders/__init__.py +91 -0
  75. phoenix/server/api/dataloaders/annotation_summaries.py +139 -0
  76. phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
  77. phoenix/server/api/dataloaders/cache/__init__.py +3 -0
  78. phoenix/server/api/dataloaders/cache/two_tier_cache.py +68 -0
  79. phoenix/server/api/dataloaders/dataset_example_revisions.py +131 -0
  80. phoenix/server/api/dataloaders/dataset_example_spans.py +38 -0
  81. phoenix/server/api/dataloaders/document_evaluation_summaries.py +144 -0
  82. phoenix/server/api/dataloaders/document_evaluations.py +31 -0
  83. phoenix/server/api/dataloaders/document_retrieval_metrics.py +89 -0
  84. phoenix/server/api/dataloaders/experiment_annotation_summaries.py +79 -0
  85. phoenix/server/api/dataloaders/experiment_error_rates.py +58 -0
  86. phoenix/server/api/dataloaders/experiment_run_annotations.py +36 -0
  87. phoenix/server/api/dataloaders/experiment_run_counts.py +49 -0
  88. phoenix/server/api/dataloaders/experiment_sequence_number.py +44 -0
  89. phoenix/server/api/dataloaders/latency_ms_quantile.py +188 -0
  90. phoenix/server/api/dataloaders/min_start_or_max_end_times.py +85 -0
  91. phoenix/server/api/dataloaders/project_by_name.py +31 -0
  92. phoenix/server/api/dataloaders/record_counts.py +116 -0
  93. phoenix/server/api/dataloaders/session_io.py +79 -0
  94. phoenix/server/api/dataloaders/session_num_traces.py +30 -0
  95. phoenix/server/api/dataloaders/session_num_traces_with_error.py +32 -0
  96. phoenix/server/api/dataloaders/session_token_usages.py +41 -0
  97. phoenix/server/api/dataloaders/session_trace_latency_ms_quantile.py +55 -0
  98. phoenix/server/api/dataloaders/span_annotations.py +26 -0
  99. phoenix/server/api/dataloaders/span_dataset_examples.py +31 -0
  100. phoenix/server/api/dataloaders/span_descendants.py +57 -0
  101. phoenix/server/api/dataloaders/span_projects.py +33 -0
  102. phoenix/server/api/dataloaders/token_counts.py +124 -0
  103. phoenix/server/api/dataloaders/trace_by_trace_ids.py +25 -0
  104. phoenix/server/api/dataloaders/trace_root_spans.py +32 -0
  105. phoenix/server/api/dataloaders/user_roles.py +30 -0
  106. phoenix/server/api/dataloaders/users.py +33 -0
  107. phoenix/server/api/exceptions.py +48 -0
  108. phoenix/server/api/helpers/__init__.py +12 -0
  109. phoenix/server/api/helpers/dataset_helpers.py +217 -0
  110. phoenix/server/api/helpers/experiment_run_filters.py +763 -0
  111. phoenix/server/api/helpers/playground_clients.py +948 -0
  112. phoenix/server/api/helpers/playground_registry.py +70 -0
  113. phoenix/server/api/helpers/playground_spans.py +455 -0
  114. phoenix/server/api/input_types/AddExamplesToDatasetInput.py +16 -0
  115. phoenix/server/api/input_types/AddSpansToDatasetInput.py +14 -0
  116. phoenix/server/api/input_types/ChatCompletionInput.py +38 -0
  117. phoenix/server/api/input_types/ChatCompletionMessageInput.py +24 -0
  118. phoenix/server/api/input_types/ClearProjectInput.py +15 -0
  119. phoenix/server/api/input_types/ClusterInput.py +2 -2
  120. phoenix/server/api/input_types/CreateDatasetInput.py +12 -0
  121. phoenix/server/api/input_types/CreateSpanAnnotationInput.py +18 -0
  122. phoenix/server/api/input_types/CreateTraceAnnotationInput.py +18 -0
  123. phoenix/server/api/input_types/DataQualityMetricInput.py +5 -2
  124. phoenix/server/api/input_types/DatasetExampleInput.py +14 -0
  125. phoenix/server/api/input_types/DatasetSort.py +17 -0
  126. phoenix/server/api/input_types/DatasetVersionSort.py +16 -0
  127. phoenix/server/api/input_types/DeleteAnnotationsInput.py +7 -0
  128. phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +13 -0
  129. phoenix/server/api/input_types/DeleteDatasetInput.py +7 -0
  130. phoenix/server/api/input_types/DeleteExperimentsInput.py +7 -0
  131. phoenix/server/api/input_types/DimensionFilter.py +4 -4
  132. phoenix/server/api/input_types/GenerativeModelInput.py +17 -0
  133. phoenix/server/api/input_types/Granularity.py +1 -1
  134. phoenix/server/api/input_types/InvocationParameters.py +162 -0
  135. phoenix/server/api/input_types/PatchAnnotationInput.py +19 -0
  136. phoenix/server/api/input_types/PatchDatasetExamplesInput.py +35 -0
  137. phoenix/server/api/input_types/PatchDatasetInput.py +14 -0
  138. phoenix/server/api/input_types/PerformanceMetricInput.py +5 -2
  139. phoenix/server/api/input_types/ProjectSessionSort.py +29 -0
  140. phoenix/server/api/input_types/SpanAnnotationSort.py +17 -0
  141. phoenix/server/api/input_types/SpanSort.py +134 -69
  142. phoenix/server/api/input_types/TemplateOptions.py +10 -0
  143. phoenix/server/api/input_types/TraceAnnotationSort.py +17 -0
  144. phoenix/server/api/input_types/UserRoleInput.py +9 -0
  145. phoenix/server/api/mutations/__init__.py +28 -0
  146. phoenix/server/api/mutations/api_key_mutations.py +167 -0
  147. phoenix/server/api/mutations/chat_mutations.py +593 -0
  148. phoenix/server/api/mutations/dataset_mutations.py +591 -0
  149. phoenix/server/api/mutations/experiment_mutations.py +75 -0
  150. phoenix/server/api/{types/ExportEventsMutation.py → mutations/export_events_mutations.py} +21 -18
  151. phoenix/server/api/mutations/project_mutations.py +57 -0
  152. phoenix/server/api/mutations/span_annotations_mutations.py +128 -0
  153. phoenix/server/api/mutations/trace_annotations_mutations.py +127 -0
  154. phoenix/server/api/mutations/user_mutations.py +329 -0
  155. phoenix/server/api/openapi/__init__.py +0 -0
  156. phoenix/server/api/openapi/main.py +17 -0
  157. phoenix/server/api/openapi/schema.py +16 -0
  158. phoenix/server/api/queries.py +738 -0
  159. phoenix/server/api/routers/__init__.py +11 -0
  160. phoenix/server/api/routers/auth.py +284 -0
  161. phoenix/server/api/routers/embeddings.py +26 -0
  162. phoenix/server/api/routers/oauth2.py +488 -0
  163. phoenix/server/api/routers/v1/__init__.py +64 -0
  164. phoenix/server/api/routers/v1/datasets.py +1017 -0
  165. phoenix/server/api/routers/v1/evaluations.py +362 -0
  166. phoenix/server/api/routers/v1/experiment_evaluations.py +115 -0
  167. phoenix/server/api/routers/v1/experiment_runs.py +167 -0
  168. phoenix/server/api/routers/v1/experiments.py +308 -0
  169. phoenix/server/api/routers/v1/pydantic_compat.py +78 -0
  170. phoenix/server/api/routers/v1/spans.py +267 -0
  171. phoenix/server/api/routers/v1/traces.py +208 -0
  172. phoenix/server/api/routers/v1/utils.py +95 -0
  173. phoenix/server/api/schema.py +44 -241
  174. phoenix/server/api/subscriptions.py +597 -0
  175. phoenix/server/api/types/Annotation.py +21 -0
  176. phoenix/server/api/types/AnnotationSummary.py +55 -0
  177. phoenix/server/api/types/AnnotatorKind.py +16 -0
  178. phoenix/server/api/types/ApiKey.py +27 -0
  179. phoenix/server/api/types/AuthMethod.py +9 -0
  180. phoenix/server/api/types/ChatCompletionMessageRole.py +11 -0
  181. phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +46 -0
  182. phoenix/server/api/types/Cluster.py +25 -24
  183. phoenix/server/api/types/CreateDatasetPayload.py +8 -0
  184. phoenix/server/api/types/DataQualityMetric.py +31 -13
  185. phoenix/server/api/types/Dataset.py +288 -63
  186. phoenix/server/api/types/DatasetExample.py +85 -0
  187. phoenix/server/api/types/DatasetExampleRevision.py +34 -0
  188. phoenix/server/api/types/DatasetVersion.py +14 -0
  189. phoenix/server/api/types/Dimension.py +32 -31
  190. phoenix/server/api/types/DocumentEvaluationSummary.py +9 -8
  191. phoenix/server/api/types/EmbeddingDimension.py +56 -49
  192. phoenix/server/api/types/Evaluation.py +25 -31
  193. phoenix/server/api/types/EvaluationSummary.py +30 -50
  194. phoenix/server/api/types/Event.py +20 -20
  195. phoenix/server/api/types/ExampleRevisionInterface.py +14 -0
  196. phoenix/server/api/types/Experiment.py +152 -0
  197. phoenix/server/api/types/ExperimentAnnotationSummary.py +13 -0
  198. phoenix/server/api/types/ExperimentComparison.py +17 -0
  199. phoenix/server/api/types/ExperimentRun.py +119 -0
  200. phoenix/server/api/types/ExperimentRunAnnotation.py +56 -0
  201. phoenix/server/api/types/GenerativeModel.py +9 -0
  202. phoenix/server/api/types/GenerativeProvider.py +85 -0
  203. phoenix/server/api/types/Inferences.py +80 -0
  204. phoenix/server/api/types/InferencesRole.py +23 -0
  205. phoenix/server/api/types/LabelFraction.py +7 -0
  206. phoenix/server/api/types/MimeType.py +2 -2
  207. phoenix/server/api/types/Model.py +54 -54
  208. phoenix/server/api/types/PerformanceMetric.py +8 -5
  209. phoenix/server/api/types/Project.py +407 -142
  210. phoenix/server/api/types/ProjectSession.py +139 -0
  211. phoenix/server/api/types/Segments.py +4 -4
  212. phoenix/server/api/types/Span.py +221 -176
  213. phoenix/server/api/types/SpanAnnotation.py +43 -0
  214. phoenix/server/api/types/SpanIOValue.py +15 -0
  215. phoenix/server/api/types/SystemApiKey.py +9 -0
  216. phoenix/server/api/types/TemplateLanguage.py +10 -0
  217. phoenix/server/api/types/TimeSeries.py +19 -15
  218. phoenix/server/api/types/TokenUsage.py +11 -0
  219. phoenix/server/api/types/Trace.py +154 -0
  220. phoenix/server/api/types/TraceAnnotation.py +45 -0
  221. phoenix/server/api/types/UMAPPoints.py +7 -7
  222. phoenix/server/api/types/User.py +60 -0
  223. phoenix/server/api/types/UserApiKey.py +45 -0
  224. phoenix/server/api/types/UserRole.py +15 -0
  225. phoenix/server/api/types/node.py +4 -112
  226. phoenix/server/api/types/pagination.py +156 -57
  227. phoenix/server/api/utils.py +34 -0
  228. phoenix/server/app.py +864 -115
  229. phoenix/server/bearer_auth.py +163 -0
  230. phoenix/server/dml_event.py +136 -0
  231. phoenix/server/dml_event_handler.py +256 -0
  232. phoenix/server/email/__init__.py +0 -0
  233. phoenix/server/email/sender.py +97 -0
  234. phoenix/server/email/templates/__init__.py +0 -0
  235. phoenix/server/email/templates/password_reset.html +19 -0
  236. phoenix/server/email/types.py +11 -0
  237. phoenix/server/grpc_server.py +102 -0
  238. phoenix/server/jwt_store.py +505 -0
  239. phoenix/server/main.py +305 -116
  240. phoenix/server/oauth2.py +52 -0
  241. phoenix/server/openapi/__init__.py +0 -0
  242. phoenix/server/prometheus.py +111 -0
  243. phoenix/server/rate_limiters.py +188 -0
  244. phoenix/server/static/.vite/manifest.json +87 -0
  245. phoenix/server/static/assets/components-Cy9nwIvF.js +2125 -0
  246. phoenix/server/static/assets/index-BKvHIxkk.js +113 -0
  247. phoenix/server/static/assets/pages-CUi2xCVQ.js +4449 -0
  248. phoenix/server/static/assets/vendor-DvC8cT4X.js +894 -0
  249. phoenix/server/static/assets/vendor-DxkFTwjz.css +1 -0
  250. phoenix/server/static/assets/vendor-arizeai-Do1793cv.js +662 -0
  251. phoenix/server/static/assets/vendor-codemirror-BzwZPyJM.js +24 -0
  252. phoenix/server/static/assets/vendor-recharts-_Jb7JjhG.js +59 -0
  253. phoenix/server/static/assets/vendor-shiki-Cl9QBraO.js +5 -0
  254. phoenix/server/static/assets/vendor-three-DwGkEfCM.js +2998 -0
  255. phoenix/server/telemetry.py +68 -0
  256. phoenix/server/templates/index.html +82 -23
  257. phoenix/server/thread_server.py +3 -3
  258. phoenix/server/types.py +275 -0
  259. phoenix/services.py +27 -18
  260. phoenix/session/client.py +743 -68
  261. phoenix/session/data_extractor.py +31 -7
  262. phoenix/session/evaluation.py +3 -9
  263. phoenix/session/session.py +263 -219
  264. phoenix/settings.py +22 -0
  265. phoenix/trace/__init__.py +2 -22
  266. phoenix/trace/attributes.py +338 -0
  267. phoenix/trace/dsl/README.md +116 -0
  268. phoenix/trace/dsl/filter.py +663 -213
  269. phoenix/trace/dsl/helpers.py +73 -21
  270. phoenix/trace/dsl/query.py +574 -201
  271. phoenix/trace/exporter.py +24 -19
  272. phoenix/trace/fixtures.py +368 -32
  273. phoenix/trace/otel.py +71 -219
  274. phoenix/trace/projects.py +3 -2
  275. phoenix/trace/schemas.py +33 -11
  276. phoenix/trace/span_evaluations.py +21 -16
  277. phoenix/trace/span_json_decoder.py +6 -4
  278. phoenix/trace/span_json_encoder.py +2 -2
  279. phoenix/trace/trace_dataset.py +47 -32
  280. phoenix/trace/utils.py +21 -4
  281. phoenix/utilities/__init__.py +0 -26
  282. phoenix/utilities/client.py +132 -0
  283. phoenix/utilities/deprecation.py +31 -0
  284. phoenix/utilities/error_handling.py +3 -2
  285. phoenix/utilities/json.py +109 -0
  286. phoenix/utilities/logging.py +8 -0
  287. phoenix/utilities/project.py +2 -2
  288. phoenix/utilities/re.py +49 -0
  289. phoenix/utilities/span_store.py +0 -23
  290. phoenix/utilities/template_formatters.py +99 -0
  291. phoenix/version.py +1 -1
  292. arize_phoenix-3.16.1.dist-info/METADATA +0 -495
  293. arize_phoenix-3.16.1.dist-info/RECORD +0 -178
  294. phoenix/core/project.py +0 -619
  295. phoenix/core/traces.py +0 -96
  296. phoenix/experimental/evals/__init__.py +0 -73
  297. phoenix/experimental/evals/evaluators.py +0 -413
  298. phoenix/experimental/evals/functions/__init__.py +0 -4
  299. phoenix/experimental/evals/functions/classify.py +0 -453
  300. phoenix/experimental/evals/functions/executor.py +0 -353
  301. phoenix/experimental/evals/functions/generate.py +0 -138
  302. phoenix/experimental/evals/functions/processing.py +0 -76
  303. phoenix/experimental/evals/models/__init__.py +0 -14
  304. phoenix/experimental/evals/models/anthropic.py +0 -175
  305. phoenix/experimental/evals/models/base.py +0 -170
  306. phoenix/experimental/evals/models/bedrock.py +0 -221
  307. phoenix/experimental/evals/models/litellm.py +0 -134
  308. phoenix/experimental/evals/models/openai.py +0 -448
  309. phoenix/experimental/evals/models/rate_limiters.py +0 -246
  310. phoenix/experimental/evals/models/vertex.py +0 -173
  311. phoenix/experimental/evals/models/vertexai.py +0 -186
  312. phoenix/experimental/evals/retrievals.py +0 -96
  313. phoenix/experimental/evals/templates/__init__.py +0 -50
  314. phoenix/experimental/evals/templates/default_templates.py +0 -472
  315. phoenix/experimental/evals/templates/template.py +0 -195
  316. phoenix/experimental/evals/utils/__init__.py +0 -172
  317. phoenix/experimental/evals/utils/threads.py +0 -27
  318. phoenix/server/api/helpers.py +0 -11
  319. phoenix/server/api/routers/evaluation_handler.py +0 -109
  320. phoenix/server/api/routers/span_handler.py +0 -70
  321. phoenix/server/api/routers/trace_handler.py +0 -60
  322. phoenix/server/api/types/DatasetRole.py +0 -23
  323. phoenix/server/static/index.css +0 -6
  324. phoenix/server/static/index.js +0 -7447
  325. phoenix/storage/span_store/__init__.py +0 -23
  326. phoenix/storage/span_store/text_file.py +0 -85
  327. phoenix/trace/dsl/missing.py +0 -60
  328. phoenix/trace/langchain/__init__.py +0 -3
  329. phoenix/trace/langchain/instrumentor.py +0 -35
  330. phoenix/trace/llama_index/__init__.py +0 -3
  331. phoenix/trace/llama_index/callback.py +0 -102
  332. phoenix/trace/openai/__init__.py +0 -3
  333. phoenix/trace/openai/instrumentor.py +0 -30
  334. {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.0.dist-info}/licenses/IP_NOTICE +0 -0
  335. {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.0.dist-info}/licenses/LICENSE +0 -0
  336. /phoenix/{datasets → db/insertion}/__init__.py +0 -0
  337. /phoenix/{experimental → db/migrations}/__init__.py +0 -0
  338. /phoenix/{storage → db/migrations/data_migration_scripts}/__init__.py +0 -0
@@ -1,353 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import asyncio
4
- import logging
5
- import signal
6
- import traceback
7
- from typing import Any, Callable, Coroutine, List, Optional, Protocol, Sequence, Tuple, Union
8
-
9
- from tqdm.auto import tqdm
10
-
11
- from phoenix.exceptions import PhoenixException
12
-
13
- logger = logging.getLogger(__name__)
14
-
15
-
16
- class Unset:
17
- pass
18
-
19
-
20
- _unset = Unset()
21
-
22
-
23
- class Executor(Protocol):
24
- def run(self, inputs: Sequence[Any]) -> List[Any]: ...
25
-
26
-
27
- class AsyncExecutor(Executor):
28
- """
29
- A class that provides asynchronous execution of tasks using a producer-consumer pattern.
30
-
31
- An async interface is provided by the `execute` method, which returns a coroutine, and a sync
32
- interface is provided by the `run` method.
33
-
34
- Args:
35
- generation_fn (Callable[[Any], Coroutine[Any, Any, Any]]): A coroutine function that
36
- generates tasks to be executed.
37
-
38
- concurrency (int, optional): The number of concurrent consumers. Defaults to 3.
39
-
40
- tqdm_bar_format (Optional[str], optional): The format string for the progress bar. Defaults
41
- to None.
42
-
43
- max_retries (int, optional): The maximum number of times to retry on exceptions. Defaults to
44
- 10.
45
-
46
- exit_on_error (bool, optional): Whether to exit execution on the first encountered error.
47
- Defaults to True.
48
-
49
- fallback_return_value (Union[Unset, Any], optional): The fallback return value for tasks
50
- that encounter errors. Defaults to _unset.
51
-
52
- termination_signal (signal.Signals, optional): The signal handled to terminate the executor.
53
- """
54
-
55
- def __init__(
56
- self,
57
- generation_fn: Callable[[Any], Coroutine[Any, Any, Any]],
58
- concurrency: int = 3,
59
- tqdm_bar_format: Optional[str] = None,
60
- max_retries: int = 10,
61
- exit_on_error: bool = True,
62
- fallback_return_value: Union[Unset, Any] = _unset,
63
- termination_signal: signal.Signals = signal.SIGINT,
64
- ):
65
- self.generate = generation_fn
66
- self.fallback_return_value = fallback_return_value
67
- self.concurrency = concurrency
68
- self.tqdm_bar_format = tqdm_bar_format
69
- self.max_retries = max_retries
70
- self.exit_on_error = exit_on_error
71
- self.base_priority = 0
72
- self.termination_signal = termination_signal
73
-
74
- async def producer(
75
- self,
76
- inputs: Sequence[Any],
77
- queue: asyncio.PriorityQueue[Tuple[int, Any]],
78
- max_fill: int,
79
- done_producing: asyncio.Event,
80
- termination_signal: asyncio.Event,
81
- ) -> None:
82
- try:
83
- for index, input in enumerate(inputs):
84
- if termination_signal.is_set():
85
- break
86
- while queue.qsize() >= max_fill:
87
- # keep room in the queue for requeues
88
- await asyncio.sleep(1)
89
- await queue.put((self.base_priority, (index, input)))
90
- finally:
91
- done_producing.set()
92
-
93
- async def consumer(
94
- self,
95
- output: List[Any],
96
- queue: asyncio.PriorityQueue[Tuple[int, Any]],
97
- done_producing: asyncio.Event,
98
- termination_event: asyncio.Event,
99
- progress_bar: tqdm[Any],
100
- ) -> None:
101
- termination_event_watcher = None
102
- while True:
103
- marked_done = False
104
- try:
105
- priority, item = await asyncio.wait_for(queue.get(), timeout=1)
106
- except asyncio.TimeoutError:
107
- if done_producing.is_set() and queue.empty():
108
- break
109
- continue
110
- if termination_event.is_set():
111
- # discard any remaining items in the queue
112
- queue.task_done()
113
- marked_done = True
114
- continue
115
-
116
- index, payload = item
117
- try:
118
- generate_task = asyncio.create_task(self.generate(payload))
119
- termination_event_watcher = asyncio.create_task(termination_event.wait())
120
- done, pending = await asyncio.wait(
121
- [generate_task, termination_event_watcher],
122
- timeout=120,
123
- return_when=asyncio.FIRST_COMPLETED,
124
- )
125
- if generate_task in done:
126
- output[index] = generate_task.result()
127
- progress_bar.update()
128
- elif termination_event.is_set():
129
- # discard the pending task and remaining items in the queue
130
- if not generate_task.done():
131
- generate_task.cancel()
132
- try:
133
- # allow any cleanup to finish for the cancelled task
134
- await generate_task
135
- except asyncio.CancelledError:
136
- # Handle the cancellation exception
137
- pass
138
- queue.task_done()
139
- marked_done = True
140
- continue
141
- else:
142
- tqdm.write("Worker timeout, requeuing")
143
- # task timeouts are requeued at base priority
144
- await queue.put((self.base_priority, item))
145
- except Exception as exc:
146
- is_phoenix_exception = isinstance(exc, PhoenixException)
147
- if (retry_count := abs(priority)) <= self.max_retries and not is_phoenix_exception:
148
- tqdm.write(
149
- f"Exception in worker on attempt {retry_count + 1}: raised {repr(exc)}"
150
- )
151
- tqdm.write("Requeuing...")
152
- await queue.put((priority - 1, item))
153
- else:
154
- tqdm.write(f"Exception in worker: {traceback.format_exc()}")
155
- if self.exit_on_error:
156
- termination_event.set()
157
- else:
158
- progress_bar.update()
159
- finally:
160
- if not marked_done:
161
- queue.task_done()
162
- if termination_event_watcher and not termination_event_watcher.done():
163
- termination_event_watcher.cancel()
164
-
165
- async def execute(self, inputs: Sequence[Any]) -> List[Any]:
166
- termination_event = asyncio.Event()
167
-
168
- def termination_handler(signum: int, frame: Any) -> None:
169
- termination_event.set()
170
- tqdm.write("Process was interrupted. The return value will be incomplete...")
171
-
172
- signal.signal(self.termination_signal, termination_handler)
173
- outputs = [self.fallback_return_value] * len(inputs)
174
- progress_bar = tqdm(total=len(inputs), bar_format=self.tqdm_bar_format)
175
-
176
- max_queue_size = 5 * self.concurrency # limit the queue to bound memory usage
177
- max_fill = max_queue_size - (2 * self.concurrency) # ensure there is always room to requeue
178
- queue: asyncio.PriorityQueue[Tuple[int, Any]] = asyncio.PriorityQueue(
179
- maxsize=max_queue_size
180
- )
181
- done_producing = asyncio.Event()
182
-
183
- producer = asyncio.create_task(
184
- self.producer(inputs, queue, max_fill, done_producing, termination_event)
185
- )
186
- consumers = [
187
- asyncio.create_task(
188
- self.consumer(outputs, queue, done_producing, termination_event, progress_bar)
189
- )
190
- for _ in range(self.concurrency)
191
- ]
192
-
193
- await asyncio.gather(producer, *consumers)
194
- join_task = asyncio.create_task(queue.join())
195
- termination_event_watcher = asyncio.create_task(termination_event.wait())
196
- done, pending = await asyncio.wait(
197
- [join_task, termination_event_watcher], return_when=asyncio.FIRST_COMPLETED
198
- )
199
- if termination_event_watcher in done:
200
- # Cancel all tasks
201
- if not join_task.done():
202
- join_task.cancel()
203
- if not producer.done():
204
- producer.cancel()
205
- for task in consumers:
206
- if not task.done():
207
- task.cancel()
208
-
209
- if not termination_event_watcher.done():
210
- termination_event_watcher.cancel()
211
-
212
- # reset the SIGTERM handler
213
- signal.signal(self.termination_signal, signal.SIG_DFL) # reset the SIGTERM handler
214
- return outputs
215
-
216
- def run(self, inputs: Sequence[Any]) -> List[Any]:
217
- return asyncio.run(self.execute(inputs))
218
-
219
-
220
- class SyncExecutor(Executor):
221
- """
222
- Synchronous executor for generating outputs from inputs using a given generation function.
223
-
224
- Args:
225
- generation_fn (Callable[[Any], Any]): The generation function that takes an input and
226
- returns an output.
227
-
228
- tqdm_bar_format (Optional[str], optional): The format string for the progress bar. Defaults
229
- to None.
230
-
231
- max_retries (int, optional): The maximum number of times to retry on exceptions. Defaults to
232
- 10.
233
-
234
- exit_on_error (bool, optional): Whether to exit execution on the first encountered error.
235
- Defaults to True.
236
-
237
- fallback_return_value (Union[Unset, Any], optional): The fallback return value for tasks
238
- that encounter errors. Defaults to _unset.
239
- """
240
-
241
- def __init__(
242
- self,
243
- generation_fn: Callable[[Any], Any],
244
- tqdm_bar_format: Optional[str] = None,
245
- max_retries: int = 10,
246
- exit_on_error: bool = True,
247
- fallback_return_value: Union[Unset, Any] = _unset,
248
- termination_signal: signal.Signals = signal.SIGINT,
249
- ):
250
- self.generate = generation_fn
251
- self.fallback_return_value = fallback_return_value
252
- self.tqdm_bar_format = tqdm_bar_format
253
- self.max_retries = max_retries
254
- self.exit_on_error = exit_on_error
255
- self.termination_signal = termination_signal
256
-
257
- self._TERMINATE = False
258
-
259
- def _signal_handler(self, signum: int, frame: Any) -> None:
260
- tqdm.write("Process was interrupted. The return value will be incomplete...")
261
- self._TERMINATE = True
262
-
263
- def run(self, inputs: Sequence[Any]) -> List[Any]:
264
- signal.signal(self.termination_signal, self._signal_handler)
265
- outputs = [self.fallback_return_value] * len(inputs)
266
- progress_bar = tqdm(total=len(inputs), bar_format=self.tqdm_bar_format)
267
-
268
- for index, input in enumerate(inputs):
269
- try:
270
- for attempt in range(self.max_retries + 1):
271
- if self._TERMINATE:
272
- return outputs
273
- try:
274
- result = self.generate(input)
275
- outputs[index] = result
276
- progress_bar.update()
277
- break
278
- except Exception as exc:
279
- is_phoenix_exception = isinstance(exc, PhoenixException)
280
- if attempt >= self.max_retries or is_phoenix_exception:
281
- raise exc
282
- else:
283
- tqdm.write(f"Exception in worker on attempt {attempt + 1}: {exc}")
284
- tqdm.write("Retrying...")
285
- except Exception as exc:
286
- tqdm.write(f"Exception in worker: {exc}")
287
- if self.exit_on_error:
288
- return outputs
289
- else:
290
- progress_bar.update()
291
- signal.signal(self.termination_signal, signal.SIG_DFL) # reset the SIGTERM handler
292
- return outputs
293
-
294
-
295
- def get_executor_on_sync_context(
296
- sync_fn: Callable[[Any], Any],
297
- async_fn: Callable[[Any], Coroutine[Any, Any, Any]],
298
- run_sync: bool = False,
299
- concurrency: int = 3,
300
- tqdm_bar_format: Optional[str] = None,
301
- exit_on_error: bool = True,
302
- fallback_return_value: Union[Unset, Any] = _unset,
303
- ) -> Executor:
304
- if run_sync:
305
- return SyncExecutor(
306
- sync_fn,
307
- tqdm_bar_format=tqdm_bar_format,
308
- exit_on_error=exit_on_error,
309
- fallback_return_value=fallback_return_value,
310
- )
311
-
312
- if _running_event_loop_exists():
313
- if getattr(asyncio, "_nest_patched", False):
314
- return AsyncExecutor(
315
- async_fn,
316
- concurrency=concurrency,
317
- tqdm_bar_format=tqdm_bar_format,
318
- exit_on_error=exit_on_error,
319
- fallback_return_value=fallback_return_value,
320
- )
321
- else:
322
- logger.warning(
323
- "🐌!! If running llm_classify inside a notebook, patching the event loop with "
324
- "nest_asyncio will allow asynchronous eval submission, and is significantly "
325
- "faster. To patch the event loop, run `nest_asyncio.apply()`."
326
- )
327
- return SyncExecutor(
328
- sync_fn,
329
- tqdm_bar_format=tqdm_bar_format,
330
- exit_on_error=exit_on_error,
331
- fallback_return_value=fallback_return_value,
332
- )
333
- else:
334
- return AsyncExecutor(
335
- async_fn,
336
- concurrency=concurrency,
337
- tqdm_bar_format=tqdm_bar_format,
338
- exit_on_error=exit_on_error,
339
- fallback_return_value=fallback_return_value,
340
- )
341
-
342
-
343
- def _running_event_loop_exists() -> bool:
344
- """Checks for a running event loop.
345
-
346
- Returns:
347
- bool: True if a running event loop exists, False otherwise.
348
- """
349
- try:
350
- asyncio.get_running_loop()
351
- return True
352
- except RuntimeError:
353
- return False
@@ -1,138 +0,0 @@
1
- import logging
2
- from typing import Any, Callable, Dict, Optional, Tuple, Union
3
-
4
- import pandas as pd
5
-
6
- from phoenix.experimental.evals.functions.executor import (
7
- get_executor_on_sync_context,
8
- )
9
- from phoenix.experimental.evals.models import BaseEvalModel, set_verbosity
10
- from phoenix.experimental.evals.templates import (
11
- PromptTemplate,
12
- map_template,
13
- normalize_prompt_template,
14
- )
15
- from phoenix.experimental.evals.utils import get_tqdm_progress_bar_formatter
16
-
17
- logger = logging.getLogger(__name__)
18
-
19
-
20
- def _no_op_parser(response: str, response_index: int) -> Dict[str, str]:
21
- return {"output": response}
22
-
23
-
24
- def llm_generate(
25
- dataframe: pd.DataFrame,
26
- template: Union[PromptTemplate, str],
27
- model: BaseEvalModel,
28
- system_instruction: Optional[str] = None,
29
- verbose: bool = False,
30
- output_parser: Optional[Callable[[str, int], Dict[str, Any]]] = None,
31
- include_prompt: bool = False,
32
- include_response: bool = False,
33
- run_sync: bool = False,
34
- concurrency: Optional[int] = None,
35
- ) -> pd.DataFrame:
36
- """
37
- Generates a text using a template using an LLM. This function is useful
38
- if you want to generate synthetic data, such as irrelevant responses
39
- Args:
40
- dataframe (pandas.DataFrame): A pandas dataframe in which each row
41
- represents a record to be used as in input to the template. All
42
- template variable names must appear as column names in the dataframe
43
- (extra columns unrelated to the template are permitted).
44
-
45
- template (Union[PromptTemplate, str]): The prompt template as either an
46
- instance of PromptTemplate or a string. If the latter, the variable
47
- names should be surrounded by curly braces so that a call to `.format`
48
- can be made to substitute variable values.
49
-
50
- model (BaseEvalModel): An LLM model class.
51
-
52
- system_instruction (Optional[str], optional): An optional system
53
- message.
54
-
55
- verbose (bool, optional): If True, prints detailed information to stdout such as model
56
- invocation parameters and retry info. Default False.
57
-
58
- output_parser (Callable[[str, int], Dict[str, Any]], optional): An optional function
59
- that takes each generated response and response index and parses it to a dictionary. The
60
- keys of the dictionary should correspond to the column names of the output dataframe. If
61
- None, the output dataframe will have a single column named "output". Default None.
62
-
63
- include_prompt (bool, default=False): If True, includes a column named `prompt` in the
64
- output dataframe containing the prompt used for each generation.
65
-
66
- include_response (bool, default=False): If True, includes a column named `response` in the
67
- output dataframe containing the raw response from the LLM prior to applying the output
68
- parser.
69
-
70
- run_sync (bool, default=False): If True, forces synchronous request submission. Otherwise
71
- evaluations will be run asynchronously if possible.
72
-
73
- concurrency (Optional[int], default=None): The number of concurrent evals if async
74
- submission is possible. If not provided, a recommended default concurrency is set on a
75
- per-model basis.
76
-
77
- Returns:
78
- generations_dataframe (pandas.DataFrame): A dataframe where each row
79
- represents the generated output
80
-
81
- """
82
- concurrency = concurrency or model.default_concurrency
83
-
84
- # clients need to be reloaded to ensure that async evals work properly
85
- model.reload_client()
86
-
87
- tqdm_bar_format = get_tqdm_progress_bar_formatter("llm_generate")
88
- output_parser = output_parser or _no_op_parser
89
- template = normalize_prompt_template(template)
90
- logger.info(f"Template: \n{template.prompt()}\n")
91
- logger.info(f"Template variables: {template.variables}")
92
- prompts = map_template(dataframe, template)
93
-
94
- async def _run_llm_generation_async(enumerated_prompt: Tuple[int, str]) -> Dict[str, Any]:
95
- index, prompt = enumerated_prompt
96
- with set_verbosity(model, verbose) as verbose_model:
97
- response = await verbose_model._async_generate(
98
- prompt,
99
- instruction=system_instruction,
100
- )
101
- parsed_response = output_parser(response, index)
102
- if include_prompt:
103
- parsed_response["prompt"] = prompt
104
- if include_response:
105
- parsed_response["response"] = response
106
- return parsed_response
107
-
108
- def _run_llm_generation_sync(enumerated_prompt: Tuple[int, str]) -> Dict[str, Any]:
109
- index, prompt = enumerated_prompt
110
- with set_verbosity(model, verbose) as verbose_model:
111
- response = verbose_model._generate(
112
- prompt,
113
- instruction=system_instruction,
114
- )
115
- parsed_response = output_parser(response, index)
116
- if include_prompt:
117
- parsed_response["prompt"] = prompt
118
- if include_response:
119
- parsed_response["response"] = response
120
- return parsed_response
121
-
122
- fallback_return_value = {
123
- "output": "generation-failed",
124
- **({"prompt": ""} if include_prompt else {}),
125
- **({"response": ""} if include_response else {}),
126
- }
127
-
128
- executor = get_executor_on_sync_context(
129
- _run_llm_generation_sync,
130
- _run_llm_generation_async,
131
- run_sync=run_sync,
132
- concurrency=concurrency,
133
- tqdm_bar_format=tqdm_bar_format,
134
- exit_on_error=True,
135
- fallback_return_value=fallback_return_value,
136
- )
137
- output = executor.run(list(enumerate(prompts.tolist())))
138
- return pd.DataFrame(output)
@@ -1,76 +0,0 @@
1
- """
2
- Token processing functions for supported models. This module is being deprecated.
3
- """
4
-
5
- import logging
6
- import sys
7
- from typing import Any, List
8
-
9
- from ..models import BaseEvalModel
10
-
11
- logger = logging.getLogger(__name__)
12
-
13
- _DEPRECATION_WARNING = (
14
- "The processing module is being deprecated. For advanced token processing, please use the "
15
- "encoding approach recommended by the model provider. For example, OpenAI models can use the "
16
- "`tiktoken` library to encode and decode text. For other models, please refer to the model "
17
- "provider's documentation."
18
- )
19
-
20
-
21
- def truncate_text_by_model(model: BaseEvalModel, text: str, token_buffer: int = 0) -> str:
22
- """Truncates text using a give model token limit.
23
-
24
- Args:
25
- model (BaseEvalModel): The model to use as reference.
26
- text (str): The text to be truncated.
27
- token_buffer (int, optional): The number of tokens to be left as buffer. For example, if the
28
- `model` has a token limit of 1,000 and we want to leave a buffer of 50, the text will be
29
- truncated such that the resulting text comprises 950 tokens. Defaults to 0.
30
-
31
- Returns:
32
- str: Truncated text
33
- """
34
- max_token_count = model.max_context_size - token_buffer
35
- tokens = model.get_tokens_from_text(text)
36
- if len(tokens) > max_token_count:
37
- return model.get_text_from_tokens(tokens[:max_token_count]) + "..."
38
- return text
39
-
40
-
41
- def concatenate_and_truncate_chunks(
42
- chunks: List[str], model: BaseEvalModel, token_buffer: int
43
- ) -> str:
44
- """_summary_"""
45
- """Given a list of `chunks` of text, this function will return the concatenated chunks
46
- truncated to a token limit given by the `model` and `token_buffer`. See the function
47
- `truncate_text_by_model` for information on the truncation process.
48
-
49
- Args:
50
- chunks (List[str]): A list of pieces of text.
51
- model (BaseEvalModel): The model to use as reference.
52
- token_buffer (int): The number of tokens to be left as buffer. For example, if the
53
- `model` has a token limit of 1,000 and we want to leave a buffer of 50, the text will be
54
- truncated such that the resulting text comprises 950 tokens. Defaults to 0.
55
-
56
- Returns:
57
- str: _description_
58
- """
59
- return truncate_text_by_model(model=model, text=" ".join(chunks), token_buffer=token_buffer)
60
-
61
-
62
- class _DEPRECATED_MODULE:
63
- __all__ = ("truncate_text_by_model", "concatenate_and_truncate_chunks")
64
-
65
- def __getattr__(self, name: str) -> Any:
66
- if name == "truncate_text_by_model":
67
- logger.warning(_DEPRECATION_WARNING)
68
- return truncate_text_by_model
69
- if name == "concatenate_and_truncate_chunks":
70
- logger.warning(_DEPRECATION_WARNING)
71
- return concatenate_and_truncate_chunks
72
- raise AttributeError(f"module {__name__} has no attribute {name}")
73
-
74
-
75
- # See e.g. https://stackoverflow.com/a/7668273
76
- sys.modules[__name__] = _DEPRECATED_MODULE() # type: ignore
@@ -1,14 +0,0 @@
1
- from .base import BaseEvalModel, set_verbosity
2
- from .bedrock import BedrockModel
3
- from .litellm import LiteLLMModel
4
- from .openai import OpenAIModel
5
- from .vertexai import VertexAIModel
6
-
7
- __all__ = [
8
- "BedrockModel",
9
- "BaseEvalModel",
10
- "LiteLLMModel",
11
- "OpenAIModel",
12
- "VertexAIModel",
13
- "set_verbosity",
14
- ]