arize-phoenix 3.16.1__py3-none-any.whl → 7.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (338) hide show
  1. arize_phoenix-7.7.0.dist-info/METADATA +261 -0
  2. arize_phoenix-7.7.0.dist-info/RECORD +345 -0
  3. {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.0.dist-info}/WHEEL +1 -1
  4. arize_phoenix-7.7.0.dist-info/entry_points.txt +3 -0
  5. phoenix/__init__.py +86 -14
  6. phoenix/auth.py +309 -0
  7. phoenix/config.py +675 -45
  8. phoenix/core/model.py +32 -30
  9. phoenix/core/model_schema.py +102 -109
  10. phoenix/core/model_schema_adapter.py +48 -45
  11. phoenix/datetime_utils.py +24 -3
  12. phoenix/db/README.md +54 -0
  13. phoenix/db/__init__.py +4 -0
  14. phoenix/db/alembic.ini +85 -0
  15. phoenix/db/bulk_inserter.py +294 -0
  16. phoenix/db/engines.py +208 -0
  17. phoenix/db/enums.py +20 -0
  18. phoenix/db/facilitator.py +113 -0
  19. phoenix/db/helpers.py +159 -0
  20. phoenix/db/insertion/constants.py +2 -0
  21. phoenix/db/insertion/dataset.py +227 -0
  22. phoenix/db/insertion/document_annotation.py +171 -0
  23. phoenix/db/insertion/evaluation.py +191 -0
  24. phoenix/db/insertion/helpers.py +98 -0
  25. phoenix/db/insertion/span.py +193 -0
  26. phoenix/db/insertion/span_annotation.py +158 -0
  27. phoenix/db/insertion/trace_annotation.py +158 -0
  28. phoenix/db/insertion/types.py +256 -0
  29. phoenix/db/migrate.py +86 -0
  30. phoenix/db/migrations/data_migration_scripts/populate_project_sessions.py +199 -0
  31. phoenix/db/migrations/env.py +114 -0
  32. phoenix/db/migrations/script.py.mako +26 -0
  33. phoenix/db/migrations/versions/10460e46d750_datasets.py +317 -0
  34. phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py +126 -0
  35. phoenix/db/migrations/versions/4ded9e43755f_create_project_sessions_table.py +66 -0
  36. phoenix/db/migrations/versions/cd164e83824f_users_and_tokens.py +157 -0
  37. phoenix/db/migrations/versions/cf03bd6bae1d_init.py +280 -0
  38. phoenix/db/models.py +807 -0
  39. phoenix/exceptions.py +5 -1
  40. phoenix/experiments/__init__.py +6 -0
  41. phoenix/experiments/evaluators/__init__.py +29 -0
  42. phoenix/experiments/evaluators/base.py +158 -0
  43. phoenix/experiments/evaluators/code_evaluators.py +184 -0
  44. phoenix/experiments/evaluators/llm_evaluators.py +473 -0
  45. phoenix/experiments/evaluators/utils.py +236 -0
  46. phoenix/experiments/functions.py +772 -0
  47. phoenix/experiments/tracing.py +86 -0
  48. phoenix/experiments/types.py +726 -0
  49. phoenix/experiments/utils.py +25 -0
  50. phoenix/inferences/__init__.py +0 -0
  51. phoenix/{datasets → inferences}/errors.py +6 -5
  52. phoenix/{datasets → inferences}/fixtures.py +49 -42
  53. phoenix/{datasets/dataset.py → inferences/inferences.py} +121 -105
  54. phoenix/{datasets → inferences}/schema.py +11 -11
  55. phoenix/{datasets → inferences}/validation.py +13 -14
  56. phoenix/logging/__init__.py +3 -0
  57. phoenix/logging/_config.py +90 -0
  58. phoenix/logging/_filter.py +6 -0
  59. phoenix/logging/_formatter.py +69 -0
  60. phoenix/metrics/__init__.py +5 -4
  61. phoenix/metrics/binning.py +4 -3
  62. phoenix/metrics/metrics.py +2 -1
  63. phoenix/metrics/mixins.py +7 -6
  64. phoenix/metrics/retrieval_metrics.py +2 -1
  65. phoenix/metrics/timeseries.py +5 -4
  66. phoenix/metrics/wrappers.py +9 -3
  67. phoenix/pointcloud/clustering.py +5 -5
  68. phoenix/pointcloud/pointcloud.py +7 -5
  69. phoenix/pointcloud/projectors.py +5 -6
  70. phoenix/pointcloud/umap_parameters.py +53 -52
  71. phoenix/server/api/README.md +28 -0
  72. phoenix/server/api/auth.py +44 -0
  73. phoenix/server/api/context.py +152 -9
  74. phoenix/server/api/dataloaders/__init__.py +91 -0
  75. phoenix/server/api/dataloaders/annotation_summaries.py +139 -0
  76. phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
  77. phoenix/server/api/dataloaders/cache/__init__.py +3 -0
  78. phoenix/server/api/dataloaders/cache/two_tier_cache.py +68 -0
  79. phoenix/server/api/dataloaders/dataset_example_revisions.py +131 -0
  80. phoenix/server/api/dataloaders/dataset_example_spans.py +38 -0
  81. phoenix/server/api/dataloaders/document_evaluation_summaries.py +144 -0
  82. phoenix/server/api/dataloaders/document_evaluations.py +31 -0
  83. phoenix/server/api/dataloaders/document_retrieval_metrics.py +89 -0
  84. phoenix/server/api/dataloaders/experiment_annotation_summaries.py +79 -0
  85. phoenix/server/api/dataloaders/experiment_error_rates.py +58 -0
  86. phoenix/server/api/dataloaders/experiment_run_annotations.py +36 -0
  87. phoenix/server/api/dataloaders/experiment_run_counts.py +49 -0
  88. phoenix/server/api/dataloaders/experiment_sequence_number.py +44 -0
  89. phoenix/server/api/dataloaders/latency_ms_quantile.py +188 -0
  90. phoenix/server/api/dataloaders/min_start_or_max_end_times.py +85 -0
  91. phoenix/server/api/dataloaders/project_by_name.py +31 -0
  92. phoenix/server/api/dataloaders/record_counts.py +116 -0
  93. phoenix/server/api/dataloaders/session_io.py +79 -0
  94. phoenix/server/api/dataloaders/session_num_traces.py +30 -0
  95. phoenix/server/api/dataloaders/session_num_traces_with_error.py +32 -0
  96. phoenix/server/api/dataloaders/session_token_usages.py +41 -0
  97. phoenix/server/api/dataloaders/session_trace_latency_ms_quantile.py +55 -0
  98. phoenix/server/api/dataloaders/span_annotations.py +26 -0
  99. phoenix/server/api/dataloaders/span_dataset_examples.py +31 -0
  100. phoenix/server/api/dataloaders/span_descendants.py +57 -0
  101. phoenix/server/api/dataloaders/span_projects.py +33 -0
  102. phoenix/server/api/dataloaders/token_counts.py +124 -0
  103. phoenix/server/api/dataloaders/trace_by_trace_ids.py +25 -0
  104. phoenix/server/api/dataloaders/trace_root_spans.py +32 -0
  105. phoenix/server/api/dataloaders/user_roles.py +30 -0
  106. phoenix/server/api/dataloaders/users.py +33 -0
  107. phoenix/server/api/exceptions.py +48 -0
  108. phoenix/server/api/helpers/__init__.py +12 -0
  109. phoenix/server/api/helpers/dataset_helpers.py +217 -0
  110. phoenix/server/api/helpers/experiment_run_filters.py +763 -0
  111. phoenix/server/api/helpers/playground_clients.py +948 -0
  112. phoenix/server/api/helpers/playground_registry.py +70 -0
  113. phoenix/server/api/helpers/playground_spans.py +455 -0
  114. phoenix/server/api/input_types/AddExamplesToDatasetInput.py +16 -0
  115. phoenix/server/api/input_types/AddSpansToDatasetInput.py +14 -0
  116. phoenix/server/api/input_types/ChatCompletionInput.py +38 -0
  117. phoenix/server/api/input_types/ChatCompletionMessageInput.py +24 -0
  118. phoenix/server/api/input_types/ClearProjectInput.py +15 -0
  119. phoenix/server/api/input_types/ClusterInput.py +2 -2
  120. phoenix/server/api/input_types/CreateDatasetInput.py +12 -0
  121. phoenix/server/api/input_types/CreateSpanAnnotationInput.py +18 -0
  122. phoenix/server/api/input_types/CreateTraceAnnotationInput.py +18 -0
  123. phoenix/server/api/input_types/DataQualityMetricInput.py +5 -2
  124. phoenix/server/api/input_types/DatasetExampleInput.py +14 -0
  125. phoenix/server/api/input_types/DatasetSort.py +17 -0
  126. phoenix/server/api/input_types/DatasetVersionSort.py +16 -0
  127. phoenix/server/api/input_types/DeleteAnnotationsInput.py +7 -0
  128. phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +13 -0
  129. phoenix/server/api/input_types/DeleteDatasetInput.py +7 -0
  130. phoenix/server/api/input_types/DeleteExperimentsInput.py +7 -0
  131. phoenix/server/api/input_types/DimensionFilter.py +4 -4
  132. phoenix/server/api/input_types/GenerativeModelInput.py +17 -0
  133. phoenix/server/api/input_types/Granularity.py +1 -1
  134. phoenix/server/api/input_types/InvocationParameters.py +162 -0
  135. phoenix/server/api/input_types/PatchAnnotationInput.py +19 -0
  136. phoenix/server/api/input_types/PatchDatasetExamplesInput.py +35 -0
  137. phoenix/server/api/input_types/PatchDatasetInput.py +14 -0
  138. phoenix/server/api/input_types/PerformanceMetricInput.py +5 -2
  139. phoenix/server/api/input_types/ProjectSessionSort.py +29 -0
  140. phoenix/server/api/input_types/SpanAnnotationSort.py +17 -0
  141. phoenix/server/api/input_types/SpanSort.py +134 -69
  142. phoenix/server/api/input_types/TemplateOptions.py +10 -0
  143. phoenix/server/api/input_types/TraceAnnotationSort.py +17 -0
  144. phoenix/server/api/input_types/UserRoleInput.py +9 -0
  145. phoenix/server/api/mutations/__init__.py +28 -0
  146. phoenix/server/api/mutations/api_key_mutations.py +167 -0
  147. phoenix/server/api/mutations/chat_mutations.py +593 -0
  148. phoenix/server/api/mutations/dataset_mutations.py +591 -0
  149. phoenix/server/api/mutations/experiment_mutations.py +75 -0
  150. phoenix/server/api/{types/ExportEventsMutation.py → mutations/export_events_mutations.py} +21 -18
  151. phoenix/server/api/mutations/project_mutations.py +57 -0
  152. phoenix/server/api/mutations/span_annotations_mutations.py +128 -0
  153. phoenix/server/api/mutations/trace_annotations_mutations.py +127 -0
  154. phoenix/server/api/mutations/user_mutations.py +329 -0
  155. phoenix/server/api/openapi/__init__.py +0 -0
  156. phoenix/server/api/openapi/main.py +17 -0
  157. phoenix/server/api/openapi/schema.py +16 -0
  158. phoenix/server/api/queries.py +738 -0
  159. phoenix/server/api/routers/__init__.py +11 -0
  160. phoenix/server/api/routers/auth.py +284 -0
  161. phoenix/server/api/routers/embeddings.py +26 -0
  162. phoenix/server/api/routers/oauth2.py +488 -0
  163. phoenix/server/api/routers/v1/__init__.py +64 -0
  164. phoenix/server/api/routers/v1/datasets.py +1017 -0
  165. phoenix/server/api/routers/v1/evaluations.py +362 -0
  166. phoenix/server/api/routers/v1/experiment_evaluations.py +115 -0
  167. phoenix/server/api/routers/v1/experiment_runs.py +167 -0
  168. phoenix/server/api/routers/v1/experiments.py +308 -0
  169. phoenix/server/api/routers/v1/pydantic_compat.py +78 -0
  170. phoenix/server/api/routers/v1/spans.py +267 -0
  171. phoenix/server/api/routers/v1/traces.py +208 -0
  172. phoenix/server/api/routers/v1/utils.py +95 -0
  173. phoenix/server/api/schema.py +44 -241
  174. phoenix/server/api/subscriptions.py +597 -0
  175. phoenix/server/api/types/Annotation.py +21 -0
  176. phoenix/server/api/types/AnnotationSummary.py +55 -0
  177. phoenix/server/api/types/AnnotatorKind.py +16 -0
  178. phoenix/server/api/types/ApiKey.py +27 -0
  179. phoenix/server/api/types/AuthMethod.py +9 -0
  180. phoenix/server/api/types/ChatCompletionMessageRole.py +11 -0
  181. phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +46 -0
  182. phoenix/server/api/types/Cluster.py +25 -24
  183. phoenix/server/api/types/CreateDatasetPayload.py +8 -0
  184. phoenix/server/api/types/DataQualityMetric.py +31 -13
  185. phoenix/server/api/types/Dataset.py +288 -63
  186. phoenix/server/api/types/DatasetExample.py +85 -0
  187. phoenix/server/api/types/DatasetExampleRevision.py +34 -0
  188. phoenix/server/api/types/DatasetVersion.py +14 -0
  189. phoenix/server/api/types/Dimension.py +32 -31
  190. phoenix/server/api/types/DocumentEvaluationSummary.py +9 -8
  191. phoenix/server/api/types/EmbeddingDimension.py +56 -49
  192. phoenix/server/api/types/Evaluation.py +25 -31
  193. phoenix/server/api/types/EvaluationSummary.py +30 -50
  194. phoenix/server/api/types/Event.py +20 -20
  195. phoenix/server/api/types/ExampleRevisionInterface.py +14 -0
  196. phoenix/server/api/types/Experiment.py +152 -0
  197. phoenix/server/api/types/ExperimentAnnotationSummary.py +13 -0
  198. phoenix/server/api/types/ExperimentComparison.py +17 -0
  199. phoenix/server/api/types/ExperimentRun.py +119 -0
  200. phoenix/server/api/types/ExperimentRunAnnotation.py +56 -0
  201. phoenix/server/api/types/GenerativeModel.py +9 -0
  202. phoenix/server/api/types/GenerativeProvider.py +85 -0
  203. phoenix/server/api/types/Inferences.py +80 -0
  204. phoenix/server/api/types/InferencesRole.py +23 -0
  205. phoenix/server/api/types/LabelFraction.py +7 -0
  206. phoenix/server/api/types/MimeType.py +2 -2
  207. phoenix/server/api/types/Model.py +54 -54
  208. phoenix/server/api/types/PerformanceMetric.py +8 -5
  209. phoenix/server/api/types/Project.py +407 -142
  210. phoenix/server/api/types/ProjectSession.py +139 -0
  211. phoenix/server/api/types/Segments.py +4 -4
  212. phoenix/server/api/types/Span.py +221 -176
  213. phoenix/server/api/types/SpanAnnotation.py +43 -0
  214. phoenix/server/api/types/SpanIOValue.py +15 -0
  215. phoenix/server/api/types/SystemApiKey.py +9 -0
  216. phoenix/server/api/types/TemplateLanguage.py +10 -0
  217. phoenix/server/api/types/TimeSeries.py +19 -15
  218. phoenix/server/api/types/TokenUsage.py +11 -0
  219. phoenix/server/api/types/Trace.py +154 -0
  220. phoenix/server/api/types/TraceAnnotation.py +45 -0
  221. phoenix/server/api/types/UMAPPoints.py +7 -7
  222. phoenix/server/api/types/User.py +60 -0
  223. phoenix/server/api/types/UserApiKey.py +45 -0
  224. phoenix/server/api/types/UserRole.py +15 -0
  225. phoenix/server/api/types/node.py +4 -112
  226. phoenix/server/api/types/pagination.py +156 -57
  227. phoenix/server/api/utils.py +34 -0
  228. phoenix/server/app.py +864 -115
  229. phoenix/server/bearer_auth.py +163 -0
  230. phoenix/server/dml_event.py +136 -0
  231. phoenix/server/dml_event_handler.py +256 -0
  232. phoenix/server/email/__init__.py +0 -0
  233. phoenix/server/email/sender.py +97 -0
  234. phoenix/server/email/templates/__init__.py +0 -0
  235. phoenix/server/email/templates/password_reset.html +19 -0
  236. phoenix/server/email/types.py +11 -0
  237. phoenix/server/grpc_server.py +102 -0
  238. phoenix/server/jwt_store.py +505 -0
  239. phoenix/server/main.py +305 -116
  240. phoenix/server/oauth2.py +52 -0
  241. phoenix/server/openapi/__init__.py +0 -0
  242. phoenix/server/prometheus.py +111 -0
  243. phoenix/server/rate_limiters.py +188 -0
  244. phoenix/server/static/.vite/manifest.json +87 -0
  245. phoenix/server/static/assets/components-Cy9nwIvF.js +2125 -0
  246. phoenix/server/static/assets/index-BKvHIxkk.js +113 -0
  247. phoenix/server/static/assets/pages-CUi2xCVQ.js +4449 -0
  248. phoenix/server/static/assets/vendor-DvC8cT4X.js +894 -0
  249. phoenix/server/static/assets/vendor-DxkFTwjz.css +1 -0
  250. phoenix/server/static/assets/vendor-arizeai-Do1793cv.js +662 -0
  251. phoenix/server/static/assets/vendor-codemirror-BzwZPyJM.js +24 -0
  252. phoenix/server/static/assets/vendor-recharts-_Jb7JjhG.js +59 -0
  253. phoenix/server/static/assets/vendor-shiki-Cl9QBraO.js +5 -0
  254. phoenix/server/static/assets/vendor-three-DwGkEfCM.js +2998 -0
  255. phoenix/server/telemetry.py +68 -0
  256. phoenix/server/templates/index.html +82 -23
  257. phoenix/server/thread_server.py +3 -3
  258. phoenix/server/types.py +275 -0
  259. phoenix/services.py +27 -18
  260. phoenix/session/client.py +743 -68
  261. phoenix/session/data_extractor.py +31 -7
  262. phoenix/session/evaluation.py +3 -9
  263. phoenix/session/session.py +263 -219
  264. phoenix/settings.py +22 -0
  265. phoenix/trace/__init__.py +2 -22
  266. phoenix/trace/attributes.py +338 -0
  267. phoenix/trace/dsl/README.md +116 -0
  268. phoenix/trace/dsl/filter.py +663 -213
  269. phoenix/trace/dsl/helpers.py +73 -21
  270. phoenix/trace/dsl/query.py +574 -201
  271. phoenix/trace/exporter.py +24 -19
  272. phoenix/trace/fixtures.py +368 -32
  273. phoenix/trace/otel.py +71 -219
  274. phoenix/trace/projects.py +3 -2
  275. phoenix/trace/schemas.py +33 -11
  276. phoenix/trace/span_evaluations.py +21 -16
  277. phoenix/trace/span_json_decoder.py +6 -4
  278. phoenix/trace/span_json_encoder.py +2 -2
  279. phoenix/trace/trace_dataset.py +47 -32
  280. phoenix/trace/utils.py +21 -4
  281. phoenix/utilities/__init__.py +0 -26
  282. phoenix/utilities/client.py +132 -0
  283. phoenix/utilities/deprecation.py +31 -0
  284. phoenix/utilities/error_handling.py +3 -2
  285. phoenix/utilities/json.py +109 -0
  286. phoenix/utilities/logging.py +8 -0
  287. phoenix/utilities/project.py +2 -2
  288. phoenix/utilities/re.py +49 -0
  289. phoenix/utilities/span_store.py +0 -23
  290. phoenix/utilities/template_formatters.py +99 -0
  291. phoenix/version.py +1 -1
  292. arize_phoenix-3.16.1.dist-info/METADATA +0 -495
  293. arize_phoenix-3.16.1.dist-info/RECORD +0 -178
  294. phoenix/core/project.py +0 -619
  295. phoenix/core/traces.py +0 -96
  296. phoenix/experimental/evals/__init__.py +0 -73
  297. phoenix/experimental/evals/evaluators.py +0 -413
  298. phoenix/experimental/evals/functions/__init__.py +0 -4
  299. phoenix/experimental/evals/functions/classify.py +0 -453
  300. phoenix/experimental/evals/functions/executor.py +0 -353
  301. phoenix/experimental/evals/functions/generate.py +0 -138
  302. phoenix/experimental/evals/functions/processing.py +0 -76
  303. phoenix/experimental/evals/models/__init__.py +0 -14
  304. phoenix/experimental/evals/models/anthropic.py +0 -175
  305. phoenix/experimental/evals/models/base.py +0 -170
  306. phoenix/experimental/evals/models/bedrock.py +0 -221
  307. phoenix/experimental/evals/models/litellm.py +0 -134
  308. phoenix/experimental/evals/models/openai.py +0 -448
  309. phoenix/experimental/evals/models/rate_limiters.py +0 -246
  310. phoenix/experimental/evals/models/vertex.py +0 -173
  311. phoenix/experimental/evals/models/vertexai.py +0 -186
  312. phoenix/experimental/evals/retrievals.py +0 -96
  313. phoenix/experimental/evals/templates/__init__.py +0 -50
  314. phoenix/experimental/evals/templates/default_templates.py +0 -472
  315. phoenix/experimental/evals/templates/template.py +0 -195
  316. phoenix/experimental/evals/utils/__init__.py +0 -172
  317. phoenix/experimental/evals/utils/threads.py +0 -27
  318. phoenix/server/api/helpers.py +0 -11
  319. phoenix/server/api/routers/evaluation_handler.py +0 -109
  320. phoenix/server/api/routers/span_handler.py +0 -70
  321. phoenix/server/api/routers/trace_handler.py +0 -60
  322. phoenix/server/api/types/DatasetRole.py +0 -23
  323. phoenix/server/static/index.css +0 -6
  324. phoenix/server/static/index.js +0 -7447
  325. phoenix/storage/span_store/__init__.py +0 -23
  326. phoenix/storage/span_store/text_file.py +0 -85
  327. phoenix/trace/dsl/missing.py +0 -60
  328. phoenix/trace/langchain/__init__.py +0 -3
  329. phoenix/trace/langchain/instrumentor.py +0 -35
  330. phoenix/trace/llama_index/__init__.py +0 -3
  331. phoenix/trace/llama_index/callback.py +0 -102
  332. phoenix/trace/openai/__init__.py +0 -3
  333. phoenix/trace/openai/instrumentor.py +0 -30
  334. {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.0.dist-info}/licenses/IP_NOTICE +0 -0
  335. {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.0.dist-info}/licenses/LICENSE +0 -0
  336. /phoenix/{datasets → db/insertion}/__init__.py +0 -0
  337. /phoenix/{experimental → db/migrations}/__init__.py +0 -0
  338. /phoenix/{storage → db/migrations/data_migration_scripts}/__init__.py +0 -0
@@ -1,175 +0,0 @@
1
- from dataclasses import dataclass, field
2
- from typing import TYPE_CHECKING, Any, Dict, List, Optional
3
-
4
- from phoenix.exceptions import PhoenixContextLimitExceeded
5
- from phoenix.experimental.evals.models.base import BaseEvalModel
6
- from phoenix.experimental.evals.models.rate_limiters import RateLimiter
7
-
8
- if TYPE_CHECKING:
9
- from tiktoken import Encoding
10
-
11
- MODEL_TOKEN_LIMIT_MAPPING = {
12
- "claude-2.1": 200000,
13
- "claude-2.0": 100000,
14
- "claude-instant-1.2": 100000,
15
- }
16
-
17
-
18
- @dataclass
19
- class AnthropicModel(BaseEvalModel):
20
- model: str = "claude-2.1"
21
- """The model name to use."""
22
- temperature: float = 0.0
23
- """What sampling temperature to use."""
24
- max_tokens: int = 256
25
- """The maximum number of tokens to generate in the completion."""
26
- top_p: float = 1
27
- """Total probability mass of tokens to consider at each step."""
28
- top_k: int = 256
29
- """The cutoff where the model no longer selects the words"""
30
- stop_sequences: List[str] = field(default_factory=list)
31
- """If the model encounters a stop sequence, it stops generating further tokens. """
32
- max_retries: int = 6
33
- """Maximum number of retries to make when generating."""
34
- retry_min_seconds: int = 10
35
- """Minimum number of seconds to wait when retrying."""
36
- retry_max_seconds: int = 60
37
- """Maximum number of seconds to wait when retrying."""
38
- extra_parameters: Dict[str, Any] = field(default_factory=dict)
39
- """Any extra parameters to add to the request body (e.g., countPenalty for a21 models)"""
40
- max_content_size: Optional[int] = None
41
- """If you're using a fine-tuned model, set this to the maximum content size"""
42
-
43
- def __post_init__(self) -> None:
44
- self._init_environment()
45
- self._init_client()
46
- self._init_tiktoken()
47
- self._init_rate_limiter()
48
-
49
- def _init_environment(self) -> None:
50
- try:
51
- import tiktoken
52
-
53
- self._tiktoken = tiktoken
54
- except ImportError:
55
- self._raise_import_error(
56
- package_name="tiktoken",
57
- )
58
-
59
- def _init_client(self) -> None:
60
- try:
61
- import anthropic # type:ignore
62
-
63
- self._anthropic = anthropic
64
- self.client = self._anthropic.Anthropic()
65
- self.async_client = self._anthropic.AsyncAnthropic()
66
- except ImportError:
67
- self._raise_import_error(
68
- package_name="anthropic",
69
- )
70
-
71
- def _init_tiktoken(self) -> None:
72
- try:
73
- encoding = self._tiktoken.encoding_for_model(self.model)
74
- except KeyError:
75
- encoding = self._tiktoken.get_encoding("cl100k_base")
76
- self._tiktoken_encoding = encoding
77
-
78
- def _init_rate_limiter(self) -> None:
79
- self._rate_limiter = RateLimiter(
80
- rate_limit_error=self._anthropic.RateLimitError,
81
- max_rate_limit_retries=10,
82
- initial_per_second_request_rate=1,
83
- maximum_per_second_request_rate=20,
84
- enforcement_window_minutes=1,
85
- )
86
-
87
- def invocation_parameters(self) -> Dict[str, Any]:
88
- return {
89
- "max_tokens_to_sample": self.max_tokens,
90
- "stop_sequences": self.stop_sequences,
91
- "temperature": self.temperature,
92
- "top_p": self.top_p,
93
- "top_k": self.top_k,
94
- }
95
-
96
- @property
97
- def encoder(self) -> "Encoding":
98
- return self._tiktoken_encoding
99
-
100
- def get_tokens_from_text(self, text: str) -> List[int]:
101
- return self.encoder.encode(text)
102
-
103
- def get_text_from_tokens(self, tokens: List[int]) -> str:
104
- return self.encoder.decode(tokens)
105
-
106
- @property
107
- def max_context_size(self) -> int:
108
- context_size = self.max_content_size or MODEL_TOKEN_LIMIT_MAPPING.get(self.model, None)
109
-
110
- if context_size is None:
111
- raise ValueError(
112
- "Can't determine maximum context size. An unknown model name was "
113
- + f"used: {self.model}. Please set the `max_content_size` argument"
114
- + "when using fine-tuned models. "
115
- )
116
-
117
- return context_size
118
-
119
- def _generate(self, prompt: str, **kwargs: Dict[str, Any]) -> str:
120
- # instruction is an invalid input to Anthropic models, it is passed in by
121
- # BaseEvalModel.__call__ and needs to be removed
122
- kwargs.pop("instruction", None)
123
- invocation_parameters = self.invocation_parameters()
124
- invocation_parameters.update(kwargs)
125
- response = self._rate_limited_completion(
126
- model=self.model,
127
- prompt=self._format_prompt_for_claude(prompt),
128
- **invocation_parameters,
129
- )
130
-
131
- return str(response)
132
-
133
- def _rate_limited_completion(self, **kwargs: Any) -> Any:
134
- @self._rate_limiter.limit
135
- def _completion(**kwargs: Any) -> Any:
136
- try:
137
- response = self.client.completions.create(**kwargs)
138
- return response.completion
139
- except self._anthropic.BadRequestError as e:
140
- exception_message = e.args[0]
141
- if exception_message and "prompt is too long" in exception_message:
142
- raise PhoenixContextLimitExceeded(exception_message) from e
143
- raise e
144
-
145
- return _completion(**kwargs)
146
-
147
- async def _async_generate(self, prompt: str, **kwargs: Dict[str, Any]) -> str:
148
- # instruction is an invalid input to Anthropic models, it is passed in by
149
- # BaseEvalModel.__call__ and needs to be removed
150
- kwargs.pop("instruction", None)
151
- invocation_parameters = self.invocation_parameters()
152
- invocation_parameters.update(kwargs)
153
- response = await self._async_rate_limited_completion(
154
- model=self.model, prompt=self._format_prompt_for_claude(prompt), **invocation_parameters
155
- )
156
-
157
- return str(response)
158
-
159
- async def _async_rate_limited_completion(self, **kwargs: Any) -> Any:
160
- @self._rate_limiter.alimit
161
- async def _async_completion(**kwargs: Any) -> Any:
162
- try:
163
- response = await self.async_client.completions.create(**kwargs)
164
- return response.completion
165
- except self._anthropic.BadRequestError as e:
166
- exception_message = e.args[0]
167
- if exception_message and "prompt is too long" in exception_message:
168
- raise PhoenixContextLimitExceeded(exception_message) from e
169
- raise e
170
-
171
- return await _async_completion(**kwargs)
172
-
173
- def _format_prompt_for_claude(self, prompt: str) -> str:
174
- # Claude requires prompt in the format of Human: ... Assistant:
175
- return f"{self._anthropic.HUMAN_PROMPT} {prompt} {self._anthropic.AI_PROMPT}"
@@ -1,170 +0,0 @@
1
- import logging
2
- from abc import ABC, abstractmethod, abstractproperty
3
- from contextlib import contextmanager
4
- from dataclasses import dataclass, field
5
- from typing import TYPE_CHECKING, Any, Generator, List, Optional, Sequence
6
-
7
- from phoenix.experimental.evals.models.rate_limiters import RateLimiter
8
-
9
- if TYPE_CHECKING:
10
- from tiktoken import Encoding
11
-
12
- from tqdm.asyncio import tqdm_asyncio
13
- from tqdm.auto import tqdm
14
- from typing_extensions import TypeVar
15
-
16
- from phoenix.experimental.evals.utils.threads import to_thread
17
- from phoenix.utilities.logging import printif
18
-
19
- T = TypeVar("T", bound=type)
20
-
21
-
22
- def is_list_of(lst: Sequence[object], tp: T) -> bool:
23
- return isinstance(lst, list) and all(isinstance(x, tp) for x in lst)
24
-
25
-
26
- logger = logging.getLogger(__name__)
27
-
28
- TQDM_BAR_FORMAT = (
29
- "Eta:{eta} |{bar}| {percentage:3.1f}% "
30
- "({n_fmt}/{total_fmt}) "
31
- "[{elapsed}<{remaining}, {rate_fmt}{postfix}]"
32
- )
33
-
34
-
35
- @contextmanager
36
- def set_verbosity(
37
- model: "BaseEvalModel", verbose: bool = False
38
- ) -> Generator["BaseEvalModel", None, None]:
39
- try:
40
- _model_verbose_setting = model._verbose
41
- _rate_limiter_verbose_setting = model._rate_limiter._verbose
42
- model._verbose = verbose
43
- model._rate_limiter._verbose = verbose
44
- yield model
45
- finally:
46
- model._verbose = _model_verbose_setting
47
- model._rate_limiter._verbose = _rate_limiter_verbose_setting
48
-
49
-
50
- @dataclass
51
- class BaseEvalModel(ABC):
52
- default_concurrency: int = 20
53
- _verbose: bool = False
54
- _rate_limiter: RateLimiter = field(default_factory=RateLimiter)
55
-
56
- def reload_client(self) -> None:
57
- pass
58
-
59
- def __call__(self, prompt: str, instruction: Optional[str] = None, **kwargs: Any) -> str:
60
- """Run the LLM on the given prompt."""
61
- if not isinstance(prompt, str):
62
- raise TypeError(
63
- "Invalid type for argument `prompt`. Expected a string but found "
64
- f"{type(prompt)}. If you want to run the LLM on multiple prompts, use "
65
- "`generate` instead."
66
- )
67
- if instruction is not None and not isinstance(instruction, str):
68
- raise TypeError(
69
- "Invalid type for argument `instruction`. Expected a string but found "
70
- f"{type(instruction)}."
71
- )
72
- return self._generate(prompt=prompt, instruction=instruction, **kwargs)
73
-
74
- async def async_call(self, prompt: str, instruction: Optional[str] = None) -> str:
75
- """Run the LLM on the given prompt."""
76
- if not isinstance(prompt, str):
77
- raise TypeError(
78
- "Invalid type for argument `prompt`. Expected a string but found "
79
- f"{type(prompt)}. If you want to run the LLM on multiple prompts, use "
80
- "`generate` instead."
81
- )
82
- if instruction is not None and not isinstance(instruction, str):
83
- raise TypeError(
84
- "Invalid type for argument `instruction`. Expected a string but found "
85
- f"{type(instruction)}."
86
- )
87
- response = await self.agenerate(prompts=[prompt], instruction=instruction)
88
- return response[0]
89
-
90
- def generate(
91
- self, prompts: List[str], instruction: Optional[str] = None, **kwargs: Any
92
- ) -> List[str]:
93
- printif(self._verbose, f"Generating responses for {len(prompts)} prompts...")
94
- if extra_info := self.verbose_generation_info():
95
- printif(self._verbose, extra_info)
96
- if not is_list_of(prompts, str):
97
- raise TypeError(
98
- "Invalid type for argument `prompts`. Expected a list of strings "
99
- f"but found {type(prompts)}."
100
- )
101
- try:
102
- outputs = []
103
- for prompt in tqdm(prompts, bar_format=TQDM_BAR_FORMAT):
104
- output = self._generate(prompt=prompt, instruction=instruction, **kwargs)
105
- logger.info(f"Prompt: {prompt}\nInstruction: {instruction}\nOutput: {output}")
106
- outputs.append(output)
107
-
108
- except (KeyboardInterrupt, Exception) as e:
109
- raise e
110
- return outputs
111
-
112
- async def agenerate(self, prompts: List[str], instruction: Optional[str] = None) -> List[str]:
113
- if not is_list_of(prompts, str):
114
- raise TypeError(
115
- "Invalid type for argument `prompts`. Expected a list of strings "
116
- f"but found {type(prompts)}."
117
- )
118
- try:
119
- result: List[str] = await tqdm_asyncio.gather(
120
- *[self._agenerate(prompt=prompt, instruction=instruction) for prompt in prompts],
121
- bar_format=TQDM_BAR_FORMAT,
122
- ncols=100,
123
- )
124
- except (KeyboardInterrupt, Exception) as e:
125
- raise e
126
- return result
127
-
128
- def verbose_generation_info(self) -> str:
129
- # if defined, returns additional model-specific information to display if `generate` is
130
- # run with `verbose=True`
131
- return ""
132
-
133
- @abstractmethod
134
- async def _async_generate(self, prompt: str, **kwargs: Any) -> str:
135
- raise NotImplementedError
136
-
137
- @abstractmethod
138
- def _generate(self, prompt: str, **kwargs: Any) -> str:
139
- raise NotImplementedError
140
-
141
- async def _agenerate(self, prompt: str, instruction: Optional[str]) -> str:
142
- return str(await to_thread(self._generate, prompt=prompt, instruction=instruction))
143
-
144
- @staticmethod
145
- def _raise_import_error(
146
- package_name: str, package_display_name: str = "", package_min_version: str = ""
147
- ) -> None:
148
- if not package_display_name:
149
- package_display_name = package_name
150
- msg = (
151
- f"Could not import necessary dependencies to use {package_display_name}. "
152
- "Please install them with "
153
- )
154
- if package_min_version:
155
- msg += f"`pip install {package_name}>={package_min_version}`."
156
- else:
157
- msg += f"`pip install {package_name}`."
158
- raise ImportError(msg)
159
-
160
- @abstractmethod
161
- def get_tokens_from_text(self, text: str) -> List[int]: ...
162
-
163
- @abstractmethod
164
- def get_text_from_tokens(self, tokens: List[int]) -> str: ...
165
-
166
- @abstractproperty
167
- def max_context_size(self) -> int: ...
168
-
169
- @abstractproperty
170
- def encoder(self) -> "Encoding": ...
@@ -1,221 +0,0 @@
1
- import json
2
- import logging
3
- from dataclasses import dataclass, field
4
- from typing import TYPE_CHECKING, Any, Dict, List, Optional
5
-
6
- from phoenix.exceptions import PhoenixContextLimitExceeded
7
- from phoenix.experimental.evals.models.base import BaseEvalModel
8
- from phoenix.experimental.evals.models.rate_limiters import RateLimiter
9
-
10
- if TYPE_CHECKING:
11
- from tiktoken import Encoding
12
-
13
- logger = logging.getLogger(__name__)
14
-
15
- MINIMUM_BOTO_VERSION = "1.28.58"
16
- MODEL_TOKEN_LIMIT_MAPPING = {
17
- "anthropic.claude-instant-v1": 100 * 1024,
18
- "anthropic.claude-v1": 100 * 1024,
19
- "anthropic.claude-v2": 100 * 1024,
20
- "amazon.titan-text-express-v1": 8 * 1024,
21
- "ai21.j2-mid-v1": 8 * 1024,
22
- "ai21.j2-ultra-v1": 8 * 1024,
23
- }
24
-
25
-
26
- @dataclass
27
- class BedrockModel(BaseEvalModel):
28
- model_id: str = "anthropic.claude-v2"
29
- """The model name to use."""
30
- temperature: float = 0.0
31
- """What sampling temperature to use."""
32
- max_tokens: int = 256
33
- """The maximum number of tokens to generate in the completion."""
34
- top_p: float = 1
35
- """Total probability mass of tokens to consider at each step."""
36
- top_k: int = 256
37
- """The cutoff where the model no longer selects the words"""
38
- stop_sequences: List[str] = field(default_factory=list)
39
- """If the model encounters a stop sequence, it stops generating further tokens. """
40
- max_retries: int = 6
41
- """Maximum number of retries to make when generating."""
42
- retry_min_seconds: int = 10
43
- """Minimum number of seconds to wait when retrying."""
44
- retry_max_seconds: int = 60
45
- """Maximum number of seconds to wait when retrying."""
46
- client: Any = None
47
- """The bedrock session client. If unset, a new one is created with boto3."""
48
- max_content_size: Optional[int] = None
49
- """If you're using a fine-tuned model, set this to the maximum content size"""
50
- extra_parameters: Dict[str, Any] = field(default_factory=dict)
51
- """Any extra parameters to add to the request body (e.g., countPenalty for a21 models)"""
52
-
53
- def __post_init__(self) -> None:
54
- self._init_environment()
55
- self._init_client()
56
- self._init_tiktoken()
57
- self._init_rate_limiter()
58
-
59
- def _init_environment(self) -> None:
60
- try:
61
- import tiktoken
62
-
63
- self._tiktoken = tiktoken
64
- except ImportError:
65
- self._raise_import_error(
66
- package_name="tiktoken",
67
- )
68
-
69
- def _init_client(self) -> None:
70
- if not self.client:
71
- try:
72
- import boto3 # type:ignore
73
-
74
- self.client = boto3.client("bedrock-runtime")
75
- except ImportError:
76
- self._raise_import_error(
77
- package_name="boto3",
78
- package_min_version=MINIMUM_BOTO_VERSION,
79
- )
80
-
81
- def _init_tiktoken(self) -> None:
82
- try:
83
- encoding = self._tiktoken.encoding_for_model(self.model_id)
84
- except KeyError:
85
- encoding = self._tiktoken.get_encoding("cl100k_base")
86
- self._tiktoken_encoding = encoding
87
-
88
- def _init_rate_limiter(self) -> None:
89
- self._rate_limiter = RateLimiter(
90
- rate_limit_error=self.client.exceptions.ThrottlingException,
91
- max_rate_limit_retries=10,
92
- initial_per_second_request_rate=2,
93
- maximum_per_second_request_rate=20,
94
- enforcement_window_minutes=1,
95
- )
96
-
97
- @property
98
- def max_context_size(self) -> int:
99
- context_size = self.max_content_size or MODEL_TOKEN_LIMIT_MAPPING.get(self.model_id, None)
100
-
101
- if context_size is None:
102
- raise ValueError(
103
- "Can't determine maximum context size. An unknown model name was "
104
- + f"used: {self.model_id}. Please set the `max_content_size` argument"
105
- + "when using fine-tuned models. "
106
- )
107
-
108
- return context_size
109
-
110
- @property
111
- def encoder(self) -> "Encoding":
112
- return self._tiktoken_encoding
113
-
114
- def get_tokens_from_text(self, text: str) -> List[int]:
115
- return self.encoder.encode(text)
116
-
117
- def get_text_from_tokens(self, tokens: List[int]) -> str:
118
- return self.encoder.decode(tokens)
119
-
120
- async def _async_generate(self, prompt: str, **kwargs: Dict[str, Any]) -> str:
121
- return self._generate(prompt, **kwargs)
122
-
123
- def _generate(self, prompt: str, **kwargs: Dict[str, Any]) -> str:
124
- body = json.dumps(self._create_request_body(prompt))
125
- accept = "application/json"
126
- contentType = "application/json"
127
-
128
- response = self._rate_limited_completion(
129
- body=body, modelId=self.model_id, accept=accept, contentType=contentType
130
- )
131
-
132
- return self._parse_output(response) or ""
133
-
134
- def _rate_limited_completion(self, **kwargs: Any) -> Any:
135
- """Use tenacity to retry the completion call."""
136
-
137
- @self._rate_limiter.limit
138
- def _completion(**kwargs: Any) -> Any:
139
- try:
140
- return self.client.invoke_model(**kwargs)
141
- except Exception as e:
142
- exception_message = e.args[0]
143
- if not exception_message:
144
- raise e
145
-
146
- if "Input is too long" in exception_message:
147
- # Error from Anthropic models
148
- raise PhoenixContextLimitExceeded(exception_message) from e
149
- elif "expected maxLength" in exception_message:
150
- # Error from Titan models
151
- raise PhoenixContextLimitExceeded(exception_message) from e
152
- elif "Prompt has too many tokens" in exception_message:
153
- # Error from AI21 models
154
- raise PhoenixContextLimitExceeded(exception_message) from e
155
- raise e
156
-
157
- return _completion(**kwargs)
158
-
159
- def _format_prompt_for_claude(self, prompt: str) -> str:
160
- # Claude requires prompt in the format of Human: ... Assisatnt:
161
- if not prompt.strip().lower().startswith("human:"):
162
- prompt = f"\n\nHuman:{prompt}"
163
- if not prompt.strip().lower().startswith("assistant:"):
164
- prompt = f"{prompt}\n\nAssistant:"
165
- return prompt
166
-
167
- def _create_request_body(self, prompt: str) -> Dict[str, Any]:
168
- # The request formats for bedrock models differ
169
- # see https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters.html
170
- if self.model_id.startswith("ai21"):
171
- return {
172
- **{
173
- "prompt": prompt,
174
- "temperature": self.temperature,
175
- "topP": self.top_p,
176
- "maxTokens": self.max_tokens,
177
- "stopSequences": self.stop_sequences,
178
- },
179
- **self.extra_parameters,
180
- }
181
- elif self.model_id.startswith("anthropic"):
182
- return {
183
- **{
184
- "prompt": self._format_prompt_for_claude(prompt),
185
- "temperature": self.temperature,
186
- "top_p": self.top_p,
187
- "top_k": self.top_k,
188
- "max_tokens_to_sample": self.max_tokens,
189
- "stop_sequences": self.stop_sequences,
190
- },
191
- **self.extra_parameters,
192
- }
193
- else:
194
- if not self.model_id.startswith("amazon"):
195
- logger.warn(f"Unknown format for model {self.model_id}, returning titan format...")
196
- return {
197
- **{
198
- "inputText": prompt,
199
- "textGenerationConfig": {
200
- "temperature": self.temperature,
201
- "topP": self.top_p,
202
- "maxTokenCount": self.max_tokens,
203
- "stopSequences": self.stop_sequences,
204
- },
205
- },
206
- **self.extra_parameters,
207
- }
208
-
209
- def _parse_output(self, response: Any) -> Any:
210
- if self.model_id.startswith("ai21"):
211
- body = json.loads(response.get("body").read())
212
- return body.get("completions")[0].get("data").get("text")
213
- elif self.model_id.startswith("anthropic"):
214
- body = json.loads(response.get("body").read().decode())
215
- return body.get("completion")
216
- elif self.model_id.startswith("amazon"):
217
- body = json.loads(response.get("body").read())
218
- return body.get("results")[0].get("outputText")
219
- else:
220
- body = json.loads(response.get("body").read())
221
- return body.get("results")[0].get("data").get("outputText")