arize-phoenix 10.0.4__py3-none-any.whl → 12.28.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/METADATA +124 -72
  2. arize_phoenix-12.28.1.dist-info/RECORD +499 -0
  3. {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/WHEEL +1 -1
  4. {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/licenses/IP_NOTICE +1 -1
  5. phoenix/__generated__/__init__.py +0 -0
  6. phoenix/__generated__/classification_evaluator_configs/__init__.py +20 -0
  7. phoenix/__generated__/classification_evaluator_configs/_document_relevance_classification_evaluator_config.py +17 -0
  8. phoenix/__generated__/classification_evaluator_configs/_hallucination_classification_evaluator_config.py +17 -0
  9. phoenix/__generated__/classification_evaluator_configs/_models.py +18 -0
  10. phoenix/__generated__/classification_evaluator_configs/_tool_selection_classification_evaluator_config.py +17 -0
  11. phoenix/__init__.py +5 -4
  12. phoenix/auth.py +39 -2
  13. phoenix/config.py +1763 -91
  14. phoenix/datetime_utils.py +120 -2
  15. phoenix/db/README.md +595 -25
  16. phoenix/db/bulk_inserter.py +145 -103
  17. phoenix/db/engines.py +140 -33
  18. phoenix/db/enums.py +3 -12
  19. phoenix/db/facilitator.py +302 -35
  20. phoenix/db/helpers.py +1000 -65
  21. phoenix/db/iam_auth.py +64 -0
  22. phoenix/db/insertion/dataset.py +135 -2
  23. phoenix/db/insertion/document_annotation.py +9 -6
  24. phoenix/db/insertion/evaluation.py +2 -3
  25. phoenix/db/insertion/helpers.py +17 -2
  26. phoenix/db/insertion/session_annotation.py +176 -0
  27. phoenix/db/insertion/span.py +15 -11
  28. phoenix/db/insertion/span_annotation.py +3 -4
  29. phoenix/db/insertion/trace_annotation.py +3 -4
  30. phoenix/db/insertion/types.py +50 -20
  31. phoenix/db/migrations/versions/01a8342c9cdf_add_user_id_on_datasets.py +40 -0
  32. phoenix/db/migrations/versions/0df286449799_add_session_annotations_table.py +105 -0
  33. phoenix/db/migrations/versions/272b66ff50f8_drop_single_indices.py +119 -0
  34. phoenix/db/migrations/versions/58228d933c91_dataset_labels.py +67 -0
  35. phoenix/db/migrations/versions/699f655af132_experiment_tags.py +57 -0
  36. phoenix/db/migrations/versions/735d3d93c33e_add_composite_indices.py +41 -0
  37. phoenix/db/migrations/versions/a20694b15f82_cost.py +196 -0
  38. phoenix/db/migrations/versions/ab513d89518b_add_user_id_on_dataset_versions.py +40 -0
  39. phoenix/db/migrations/versions/d0690a79ea51_users_on_experiments.py +40 -0
  40. phoenix/db/migrations/versions/deb2c81c0bb2_dataset_splits.py +139 -0
  41. phoenix/db/migrations/versions/e76cbd66ffc3_add_experiments_dataset_examples.py +87 -0
  42. phoenix/db/models.py +669 -56
  43. phoenix/db/pg_config.py +10 -0
  44. phoenix/db/types/model_provider.py +4 -0
  45. phoenix/db/types/token_price_customization.py +29 -0
  46. phoenix/db/types/trace_retention.py +23 -15
  47. phoenix/experiments/evaluators/utils.py +3 -3
  48. phoenix/experiments/functions.py +160 -52
  49. phoenix/experiments/tracing.py +2 -2
  50. phoenix/experiments/types.py +1 -1
  51. phoenix/inferences/inferences.py +1 -2
  52. phoenix/server/api/auth.py +38 -7
  53. phoenix/server/api/auth_messages.py +46 -0
  54. phoenix/server/api/context.py +100 -4
  55. phoenix/server/api/dataloaders/__init__.py +79 -5
  56. phoenix/server/api/dataloaders/annotation_configs_by_project.py +31 -0
  57. phoenix/server/api/dataloaders/annotation_summaries.py +60 -8
  58. phoenix/server/api/dataloaders/average_experiment_repeated_run_group_latency.py +50 -0
  59. phoenix/server/api/dataloaders/average_experiment_run_latency.py +17 -24
  60. phoenix/server/api/dataloaders/cache/two_tier_cache.py +1 -2
  61. phoenix/server/api/dataloaders/dataset_dataset_splits.py +52 -0
  62. phoenix/server/api/dataloaders/dataset_example_revisions.py +0 -1
  63. phoenix/server/api/dataloaders/dataset_example_splits.py +40 -0
  64. phoenix/server/api/dataloaders/dataset_examples_and_versions_by_experiment_run.py +47 -0
  65. phoenix/server/api/dataloaders/dataset_labels.py +36 -0
  66. phoenix/server/api/dataloaders/document_evaluation_summaries.py +2 -2
  67. phoenix/server/api/dataloaders/document_evaluations.py +6 -9
  68. phoenix/server/api/dataloaders/experiment_annotation_summaries.py +88 -34
  69. phoenix/server/api/dataloaders/experiment_dataset_splits.py +43 -0
  70. phoenix/server/api/dataloaders/experiment_error_rates.py +21 -28
  71. phoenix/server/api/dataloaders/experiment_repeated_run_group_annotation_summaries.py +77 -0
  72. phoenix/server/api/dataloaders/experiment_repeated_run_groups.py +57 -0
  73. phoenix/server/api/dataloaders/experiment_runs_by_experiment_and_example.py +44 -0
  74. phoenix/server/api/dataloaders/last_used_times_by_generative_model_id.py +35 -0
  75. phoenix/server/api/dataloaders/latency_ms_quantile.py +40 -8
  76. phoenix/server/api/dataloaders/record_counts.py +37 -10
  77. phoenix/server/api/dataloaders/session_annotations_by_session.py +29 -0
  78. phoenix/server/api/dataloaders/span_cost_by_span.py +24 -0
  79. phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_generative_model.py +56 -0
  80. phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_project_session.py +57 -0
  81. phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_span.py +43 -0
  82. phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_trace.py +56 -0
  83. phoenix/server/api/dataloaders/span_cost_details_by_span_cost.py +27 -0
  84. phoenix/server/api/dataloaders/span_cost_summary_by_experiment.py +57 -0
  85. phoenix/server/api/dataloaders/span_cost_summary_by_experiment_repeated_run_group.py +64 -0
  86. phoenix/server/api/dataloaders/span_cost_summary_by_experiment_run.py +58 -0
  87. phoenix/server/api/dataloaders/span_cost_summary_by_generative_model.py +55 -0
  88. phoenix/server/api/dataloaders/span_cost_summary_by_project.py +152 -0
  89. phoenix/server/api/dataloaders/span_cost_summary_by_project_session.py +56 -0
  90. phoenix/server/api/dataloaders/span_cost_summary_by_trace.py +55 -0
  91. phoenix/server/api/dataloaders/span_costs.py +29 -0
  92. phoenix/server/api/dataloaders/table_fields.py +2 -2
  93. phoenix/server/api/dataloaders/token_prices_by_model.py +30 -0
  94. phoenix/server/api/dataloaders/trace_annotations_by_trace.py +27 -0
  95. phoenix/server/api/dataloaders/types.py +29 -0
  96. phoenix/server/api/exceptions.py +11 -1
  97. phoenix/server/api/helpers/dataset_helpers.py +5 -1
  98. phoenix/server/api/helpers/playground_clients.py +1243 -292
  99. phoenix/server/api/helpers/playground_registry.py +2 -2
  100. phoenix/server/api/helpers/playground_spans.py +8 -4
  101. phoenix/server/api/helpers/playground_users.py +26 -0
  102. phoenix/server/api/helpers/prompts/conversions/aws.py +83 -0
  103. phoenix/server/api/helpers/prompts/conversions/google.py +103 -0
  104. phoenix/server/api/helpers/prompts/models.py +205 -22
  105. phoenix/server/api/input_types/{SpanAnnotationFilter.py → AnnotationFilter.py} +22 -14
  106. phoenix/server/api/input_types/ChatCompletionInput.py +6 -2
  107. phoenix/server/api/input_types/CreateProjectInput.py +27 -0
  108. phoenix/server/api/input_types/CreateProjectSessionAnnotationInput.py +37 -0
  109. phoenix/server/api/input_types/DatasetFilter.py +17 -0
  110. phoenix/server/api/input_types/ExperimentRunSort.py +237 -0
  111. phoenix/server/api/input_types/GenerativeCredentialInput.py +9 -0
  112. phoenix/server/api/input_types/GenerativeModelInput.py +5 -0
  113. phoenix/server/api/input_types/ProjectSessionSort.py +161 -1
  114. phoenix/server/api/input_types/PromptFilter.py +14 -0
  115. phoenix/server/api/input_types/PromptVersionInput.py +52 -1
  116. phoenix/server/api/input_types/SpanSort.py +44 -7
  117. phoenix/server/api/input_types/TimeBinConfig.py +23 -0
  118. phoenix/server/api/input_types/UpdateAnnotationInput.py +34 -0
  119. phoenix/server/api/input_types/UserRoleInput.py +1 -0
  120. phoenix/server/api/mutations/__init__.py +10 -0
  121. phoenix/server/api/mutations/annotation_config_mutations.py +8 -8
  122. phoenix/server/api/mutations/api_key_mutations.py +19 -23
  123. phoenix/server/api/mutations/chat_mutations.py +154 -47
  124. phoenix/server/api/mutations/dataset_label_mutations.py +243 -0
  125. phoenix/server/api/mutations/dataset_mutations.py +21 -16
  126. phoenix/server/api/mutations/dataset_split_mutations.py +351 -0
  127. phoenix/server/api/mutations/experiment_mutations.py +2 -2
  128. phoenix/server/api/mutations/export_events_mutations.py +3 -3
  129. phoenix/server/api/mutations/model_mutations.py +210 -0
  130. phoenix/server/api/mutations/project_mutations.py +49 -10
  131. phoenix/server/api/mutations/project_session_annotations_mutations.py +158 -0
  132. phoenix/server/api/mutations/project_trace_retention_policy_mutations.py +8 -4
  133. phoenix/server/api/mutations/prompt_label_mutations.py +74 -65
  134. phoenix/server/api/mutations/prompt_mutations.py +65 -129
  135. phoenix/server/api/mutations/prompt_version_tag_mutations.py +11 -8
  136. phoenix/server/api/mutations/span_annotations_mutations.py +15 -10
  137. phoenix/server/api/mutations/trace_annotations_mutations.py +14 -10
  138. phoenix/server/api/mutations/trace_mutations.py +47 -3
  139. phoenix/server/api/mutations/user_mutations.py +66 -41
  140. phoenix/server/api/queries.py +768 -293
  141. phoenix/server/api/routers/__init__.py +2 -2
  142. phoenix/server/api/routers/auth.py +154 -88
  143. phoenix/server/api/routers/ldap.py +229 -0
  144. phoenix/server/api/routers/oauth2.py +369 -106
  145. phoenix/server/api/routers/v1/__init__.py +24 -4
  146. phoenix/server/api/routers/v1/annotation_configs.py +23 -31
  147. phoenix/server/api/routers/v1/annotations.py +481 -17
  148. phoenix/server/api/routers/v1/datasets.py +395 -81
  149. phoenix/server/api/routers/v1/documents.py +142 -0
  150. phoenix/server/api/routers/v1/evaluations.py +24 -31
  151. phoenix/server/api/routers/v1/experiment_evaluations.py +19 -8
  152. phoenix/server/api/routers/v1/experiment_runs.py +337 -59
  153. phoenix/server/api/routers/v1/experiments.py +479 -48
  154. phoenix/server/api/routers/v1/models.py +7 -0
  155. phoenix/server/api/routers/v1/projects.py +18 -49
  156. phoenix/server/api/routers/v1/prompts.py +54 -40
  157. phoenix/server/api/routers/v1/sessions.py +108 -0
  158. phoenix/server/api/routers/v1/spans.py +1091 -81
  159. phoenix/server/api/routers/v1/traces.py +132 -78
  160. phoenix/server/api/routers/v1/users.py +389 -0
  161. phoenix/server/api/routers/v1/utils.py +3 -7
  162. phoenix/server/api/subscriptions.py +305 -88
  163. phoenix/server/api/types/Annotation.py +90 -23
  164. phoenix/server/api/types/ApiKey.py +13 -17
  165. phoenix/server/api/types/AuthMethod.py +1 -0
  166. phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +1 -0
  167. phoenix/server/api/types/CostBreakdown.py +12 -0
  168. phoenix/server/api/types/Dataset.py +226 -72
  169. phoenix/server/api/types/DatasetExample.py +88 -18
  170. phoenix/server/api/types/DatasetExperimentAnnotationSummary.py +10 -0
  171. phoenix/server/api/types/DatasetLabel.py +57 -0
  172. phoenix/server/api/types/DatasetSplit.py +98 -0
  173. phoenix/server/api/types/DatasetVersion.py +49 -4
  174. phoenix/server/api/types/DocumentAnnotation.py +212 -0
  175. phoenix/server/api/types/Experiment.py +264 -59
  176. phoenix/server/api/types/ExperimentComparison.py +5 -10
  177. phoenix/server/api/types/ExperimentRepeatedRunGroup.py +155 -0
  178. phoenix/server/api/types/ExperimentRepeatedRunGroupAnnotationSummary.py +9 -0
  179. phoenix/server/api/types/ExperimentRun.py +169 -65
  180. phoenix/server/api/types/ExperimentRunAnnotation.py +158 -39
  181. phoenix/server/api/types/GenerativeModel.py +245 -3
  182. phoenix/server/api/types/GenerativeProvider.py +70 -11
  183. phoenix/server/api/types/{Model.py → InferenceModel.py} +1 -1
  184. phoenix/server/api/types/ModelInterface.py +16 -0
  185. phoenix/server/api/types/PlaygroundModel.py +20 -0
  186. phoenix/server/api/types/Project.py +1278 -216
  187. phoenix/server/api/types/ProjectSession.py +188 -28
  188. phoenix/server/api/types/ProjectSessionAnnotation.py +187 -0
  189. phoenix/server/api/types/ProjectTraceRetentionPolicy.py +1 -1
  190. phoenix/server/api/types/Prompt.py +119 -39
  191. phoenix/server/api/types/PromptLabel.py +42 -25
  192. phoenix/server/api/types/PromptVersion.py +11 -8
  193. phoenix/server/api/types/PromptVersionTag.py +65 -25
  194. phoenix/server/api/types/ServerStatus.py +6 -0
  195. phoenix/server/api/types/Span.py +167 -123
  196. phoenix/server/api/types/SpanAnnotation.py +189 -42
  197. phoenix/server/api/types/SpanCostDetailSummaryEntry.py +10 -0
  198. phoenix/server/api/types/SpanCostSummary.py +10 -0
  199. phoenix/server/api/types/SystemApiKey.py +65 -1
  200. phoenix/server/api/types/TokenPrice.py +16 -0
  201. phoenix/server/api/types/TokenUsage.py +3 -3
  202. phoenix/server/api/types/Trace.py +223 -51
  203. phoenix/server/api/types/TraceAnnotation.py +149 -50
  204. phoenix/server/api/types/User.py +137 -32
  205. phoenix/server/api/types/UserApiKey.py +73 -26
  206. phoenix/server/api/types/node.py +10 -0
  207. phoenix/server/api/types/pagination.py +11 -2
  208. phoenix/server/app.py +290 -45
  209. phoenix/server/authorization.py +38 -3
  210. phoenix/server/bearer_auth.py +34 -24
  211. phoenix/server/cost_tracking/cost_details_calculator.py +196 -0
  212. phoenix/server/cost_tracking/cost_model_lookup.py +179 -0
  213. phoenix/server/cost_tracking/helpers.py +68 -0
  214. phoenix/server/cost_tracking/model_cost_manifest.json +3657 -830
  215. phoenix/server/cost_tracking/regex_specificity.py +397 -0
  216. phoenix/server/cost_tracking/token_cost_calculator.py +57 -0
  217. phoenix/server/daemons/__init__.py +0 -0
  218. phoenix/server/daemons/db_disk_usage_monitor.py +214 -0
  219. phoenix/server/daemons/generative_model_store.py +103 -0
  220. phoenix/server/daemons/span_cost_calculator.py +99 -0
  221. phoenix/server/dml_event.py +17 -0
  222. phoenix/server/dml_event_handler.py +5 -0
  223. phoenix/server/email/sender.py +56 -3
  224. phoenix/server/email/templates/db_disk_usage_notification.html +19 -0
  225. phoenix/server/email/types.py +11 -0
  226. phoenix/server/experiments/__init__.py +0 -0
  227. phoenix/server/experiments/utils.py +14 -0
  228. phoenix/server/grpc_server.py +11 -11
  229. phoenix/server/jwt_store.py +17 -15
  230. phoenix/server/ldap.py +1449 -0
  231. phoenix/server/main.py +26 -10
  232. phoenix/server/oauth2.py +330 -12
  233. phoenix/server/prometheus.py +66 -6
  234. phoenix/server/rate_limiters.py +4 -9
  235. phoenix/server/retention.py +33 -20
  236. phoenix/server/session_filters.py +49 -0
  237. phoenix/server/static/.vite/manifest.json +55 -51
  238. phoenix/server/static/assets/components-BreFUQQa.js +6702 -0
  239. phoenix/server/static/assets/{index-E0M82BdE.js → index-CTQoemZv.js} +140 -56
  240. phoenix/server/static/assets/pages-DBE5iYM3.js +9524 -0
  241. phoenix/server/static/assets/vendor-BGzfc4EU.css +1 -0
  242. phoenix/server/static/assets/vendor-DCE4v-Ot.js +920 -0
  243. phoenix/server/static/assets/vendor-codemirror-D5f205eT.js +25 -0
  244. phoenix/server/static/assets/vendor-recharts-V9cwpXsm.js +37 -0
  245. phoenix/server/static/assets/vendor-shiki-Do--csgv.js +5 -0
  246. phoenix/server/static/assets/vendor-three-CmB8bl_y.js +3840 -0
  247. phoenix/server/templates/index.html +40 -6
  248. phoenix/server/thread_server.py +1 -2
  249. phoenix/server/types.py +14 -4
  250. phoenix/server/utils.py +74 -0
  251. phoenix/session/client.py +56 -3
  252. phoenix/session/data_extractor.py +5 -0
  253. phoenix/session/evaluation.py +14 -5
  254. phoenix/session/session.py +45 -9
  255. phoenix/settings.py +5 -0
  256. phoenix/trace/attributes.py +80 -13
  257. phoenix/trace/dsl/helpers.py +90 -1
  258. phoenix/trace/dsl/query.py +8 -6
  259. phoenix/trace/projects.py +5 -0
  260. phoenix/utilities/template_formatters.py +1 -1
  261. phoenix/version.py +1 -1
  262. arize_phoenix-10.0.4.dist-info/RECORD +0 -405
  263. phoenix/server/api/types/Evaluation.py +0 -39
  264. phoenix/server/cost_tracking/cost_lookup.py +0 -255
  265. phoenix/server/static/assets/components-DULKeDfL.js +0 -4365
  266. phoenix/server/static/assets/pages-Cl0A-0U2.js +0 -7430
  267. phoenix/server/static/assets/vendor-WIZid84E.css +0 -1
  268. phoenix/server/static/assets/vendor-arizeai-Dy-0mSNw.js +0 -649
  269. phoenix/server/static/assets/vendor-codemirror-DBtifKNr.js +0 -33
  270. phoenix/server/static/assets/vendor-oB4u9zuV.js +0 -905
  271. phoenix/server/static/assets/vendor-recharts-D-T4KPz2.js +0 -59
  272. phoenix/server/static/assets/vendor-shiki-BMn4O_9F.js +0 -5
  273. phoenix/server/static/assets/vendor-three-C5WAXd5r.js +0 -2998
  274. phoenix/utilities/deprecation.py +0 -31
  275. {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/entry_points.txt +0 -0
  276. {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,397 @@
1
+ """
2
+ Regex specificity scorer based on heuristics intended for tie-breaking.
3
+
4
+ This module provides functionality to score regex patterns based on their specificity.
5
+ More specific patterns (like exact matches with anchors) receive higher scores,
6
+ while more general patterns (like wildcards and quantifiers) receive lower scores.
7
+
8
+ Scoring Weights:
9
+ - Full anchors (^pattern$): +10000 points
10
+ - Partial anchors (^pattern or pattern$): +5000 points
11
+ - Literal characters: +1000 points each
12
+ - Escaped characters (\\. \\+ etc): +950 points each
13
+ - Character classes [abc]: +500 points
14
+ - Shorthand classes (\\d \\w \\s): +400 points
15
+ - Negated classes [^abc]: +300 points
16
+ - Negated shorthand (\\D \\W \\S): +250 points
17
+ - Exact quantifiers {n}: -50 points
18
+ - Range quantifiers {n,m}: -100 points
19
+ - Wildcards (.): -200 points
20
+ - Optional (?): -100 points
21
+ - Multiple (+ *): -150 points
22
+ - Alternation (|): -300 points
23
+
24
+ Examples:
25
+ >>> score("^abc$") # Exact match: 12002
26
+ >>> score("abc") # Literal: 3002
27
+ >>> score(".*") # Wildcard: -198
28
+ >>> score("[a-z]+") # Class + multiple: 350
29
+ >>> score("\\d{3}") # Shorthand + exact quantifier: 350
30
+ """
31
+
32
+ import re
33
+ from typing import Union
34
+
35
+ from typing_extensions import assert_never
36
+
37
+ # Scoring weights for different regex pattern elements
38
+ FULL_ANCHOR = 10000 # ^pattern$ - highest specificity
39
+ PARTIAL_ANCHOR = 5000 # ^pattern or pattern$ - high specificity
40
+ LITERAL = 1000 # exact characters - good specificity
41
+ ESCAPED = 950 # \. \+ etc - slightly less than literal
42
+ CHAR_CLASS = 500 # [abc] [0-9] - moderate specificity
43
+ SHORTHAND = 400 # \d \w \s - moderate specificity
44
+ NEGATED_CLASS = 300 # [^abc] - lower specificity
45
+ NEGATED_SHORTHAND = 250 # \D \W \S - lower specificity
46
+ QUANTIFIER_EXACT = -50 # {n} - reduces specificity
47
+ QUANTIFIER_RANGE = -100 # {n,m} {n,} - reduces specificity more
48
+ WILDCARD = -200 # . - significantly reduces specificity
49
+ OPTIONAL = -100 # ? - reduces specificity
50
+ MULTIPLE = -150 # + * - reduces specificity
51
+ ALTERNATION = -300 # | - significantly reduces specificity
52
+
53
+ # Character sets for classification
54
+ POSITIVE_SHORTHANDS = "dws" # \d \w \s - digit, word, space
55
+ NEGATIVE_SHORTHANDS = "DWS" # \D \W \S - non-digit, non-word, non-space
56
+ META_CHARS = "()^$" # Regex metacharacters that don't affect scoring
57
+
58
+
59
+ def score(regex: Union[str, re.Pattern[str]]) -> int:
60
+ """
61
+ Score a regex pattern for specificity.
62
+
63
+ Calculates a specificity score for a regex pattern where higher scores
64
+ indicate more specific patterns. The scoring considers:
65
+
66
+ - Anchors (^ and $) - significantly increase specificity
67
+ - Character types (literal, escaped, classes) - moderate impact
68
+ - Quantifiers and wildcards - reduce specificity
69
+ - Pattern length - slight bonus for longer patterns
70
+
71
+ Args:
72
+ regex: The regex pattern string to score. Must be a valid regex.
73
+
74
+ Returns:
75
+ An integer score where:
76
+ - Positive scores indicate specific patterns
77
+ - Higher scores indicate more specific patterns
78
+ - Negative scores indicate very general patterns
79
+ - Minimum score is 1 (for empty patterns)
80
+
81
+ Raises:
82
+ ValueError: If the pattern is not a valid regex or is None.
83
+
84
+ Examples:
85
+ >>> score("^abc$")
86
+ 12002
87
+ >>> score("abc")
88
+ 3002
89
+ >>> score(".*")
90
+ -198
91
+ >>> score("")
92
+ 1
93
+ >>> score("[a-z]+")
94
+ 350
95
+ >>> score("\\d{3}")
96
+ 350
97
+
98
+ Note:
99
+ The scoring algorithm is designed for cost tracking scenarios
100
+ where more specific patterns should be prioritized over general ones.
101
+ """
102
+ if isinstance(regex, str):
103
+ pattern = regex
104
+ try:
105
+ re.compile(pattern) # Validate regex
106
+ except re.error as e:
107
+ raise ValueError(f"Invalid regex pattern: {pattern}") from e
108
+ elif isinstance(regex.pattern, str):
109
+ pattern = regex.pattern
110
+ elif isinstance(regex.pattern, bytes):
111
+ pattern = regex.pattern.decode("utf-8")
112
+ else:
113
+ assert_never(regex.pattern)
114
+
115
+ score_value = 0
116
+
117
+ # Score anchors - most significant factor
118
+ has_start_anchor = _has_start_anchor(pattern)
119
+ has_end_anchor = pattern.endswith("$")
120
+
121
+ if has_start_anchor and has_end_anchor:
122
+ score_value += FULL_ANCHOR
123
+ elif has_start_anchor or has_end_anchor:
124
+ score_value += PARTIAL_ANCHOR
125
+
126
+ # Score pattern content
127
+ content = _strip_anchors(pattern)
128
+ score_value += _score_content(content)
129
+
130
+ # Length bonus for tie-breaking (longer patterns slightly preferred)
131
+ score_value += len(pattern) * 2
132
+
133
+ return max(score_value, 1)
134
+
135
+
136
+ def _has_start_anchor(pattern: str) -> bool:
137
+ """
138
+ Check if pattern has a start anchor (after all leading inline flags).
139
+ Handles multiple inline flags robustly.
140
+ """
141
+ i = 0
142
+ # Skip all leading inline flags
143
+ while pattern.startswith("(?", i):
144
+ close = pattern.find(")", i)
145
+ if close == -1:
146
+ break
147
+ i = close + 1
148
+ # After all flags, check for ^
149
+ return i < len(pattern) and pattern[i] == "^"
150
+
151
+
152
+ def _strip_anchors(pattern: str) -> str:
153
+ """
154
+ Remove all leading inline flags and anchors from pattern for content analysis.
155
+ Handles multiple inline flags robustly.
156
+ """
157
+ i = 0
158
+ # Remove all leading inline flags
159
+ while pattern.startswith("(?", i):
160
+ close = pattern.find(")", i)
161
+ if close == -1:
162
+ break
163
+ i = close + 1
164
+ # Remove start anchor
165
+ if i < len(pattern) and pattern[i] == "^":
166
+ i += 1
167
+ content = pattern[i:]
168
+ # Remove end anchor
169
+ if content.endswith("$"):
170
+ content = content[:-1]
171
+ return content
172
+
173
+
174
+ def _score_content(content: str) -> int:
175
+ r"""
176
+ Score the content of a pattern by analyzing each character.
177
+
178
+ Iterates through the pattern content and scores each element:
179
+ - Escape sequences (\d, \., etc.)
180
+ - Character classes ([abc], [^abc])
181
+ - Quantifiers ({n}, {n,m})
182
+ - Special characters (., ?, +, *, |)
183
+ - Literal characters
184
+
185
+ Args:
186
+ content: Pattern content without anchors
187
+
188
+ Returns:
189
+ Cumulative score for all pattern elements
190
+ """
191
+ score_value = 0
192
+ i = 0
193
+
194
+ while i < len(content):
195
+ char = content[i]
196
+
197
+ if char == "\\" and i + 1 < len(content):
198
+ # Handle escape sequences
199
+ score_value += _score_escape(content[i + 1])
200
+ i += 2
201
+ elif char == "[":
202
+ # Handle character classes
203
+ bracket_score, new_pos = _score_bracket(content, i)
204
+ score_value += bracket_score
205
+ i = new_pos
206
+ elif char == "{":
207
+ # Handle quantifiers
208
+ quantifier_score, new_pos = _score_quantifier(content, i)
209
+ score_value += quantifier_score
210
+ i = new_pos
211
+ else:
212
+ # Handle single characters
213
+ score_value += _score_char(char)
214
+ i += 1
215
+
216
+ return score_value
217
+
218
+
219
+ def _score_escape(char: str) -> int:
220
+ r"""
221
+ Score an escape sequence.
222
+
223
+ Args:
224
+ char: The character following the backslash
225
+
226
+ Returns:
227
+ Score for the escape sequence:
228
+ - \d, \w, \s: +400 (shorthand classes)
229
+ - \D, \W, \S: +250 (negated shorthand)
230
+ - \., \+, etc: +950 (escaped literals)
231
+ """
232
+ if char in POSITIVE_SHORTHANDS:
233
+ return SHORTHAND
234
+ elif char in NEGATIVE_SHORTHANDS:
235
+ return NEGATED_SHORTHAND
236
+ else:
237
+ return ESCAPED
238
+
239
+
240
+ def _score_bracket(content: str, start: int) -> tuple[int, int]:
241
+ """
242
+ Score a character class and find its end position.
243
+
244
+ Args:
245
+ content: Pattern content
246
+ start: Starting position of the opening bracket
247
+
248
+ Returns:
249
+ Tuple of (score, next_position):
250
+ - score: +500 for [abc], +300 for [^abc]
251
+ - next_position: Position after the closing bracket
252
+ """
253
+ end = _find_bracket_end(content, start)
254
+ if end == -1:
255
+ # Malformed bracket, treat as literal
256
+ return LITERAL, start + 1
257
+
258
+ class_content = content[start + 1 : end]
259
+ score_value = NEGATED_CLASS if class_content.startswith("^") else CHAR_CLASS
260
+
261
+ return score_value, end + 1
262
+
263
+
264
+ def _score_quantifier(content: str, start: int) -> tuple[int, int]:
265
+ """
266
+ Score a quantifier and find its end position.
267
+
268
+ Args:
269
+ content: Pattern content
270
+ start: Starting position of the opening brace
271
+
272
+ Returns:
273
+ Tuple of (score, next_position):
274
+ - score: -50 for {n}, -100 for {n,m} or {n,}
275
+ - next_position: Position after the closing brace
276
+ """
277
+ end = content.find("}", start)
278
+ if end == -1:
279
+ # Malformed quantifier, treat as literal
280
+ return LITERAL, start + 1
281
+
282
+ quantifier = content[start : end + 1]
283
+
284
+ # Validate quantifier syntax
285
+ if not _is_valid_quantifier(quantifier):
286
+ return LITERAL, start + 1
287
+
288
+ has_comma = "," in quantifier
289
+
290
+ score_value = QUANTIFIER_RANGE if has_comma else QUANTIFIER_EXACT
291
+
292
+ return score_value, end + 1
293
+
294
+
295
+ def _is_valid_quantifier(quantifier: str) -> bool:
296
+ """
297
+ Check if a quantifier has valid syntax.
298
+
299
+ Args:
300
+ quantifier: Quantifier string like "{n}", "{n,m}", "{n,}"
301
+
302
+ Returns:
303
+ True if quantifier syntax is valid
304
+ """
305
+ if not quantifier.startswith("{") or not quantifier.endswith("}"):
306
+ return False
307
+
308
+ # Extract content between braces
309
+ content = quantifier[1:-1]
310
+
311
+ if "," in content:
312
+ # Range quantifier: {n,m} or {n,}
313
+ parts = content.split(",")
314
+ if len(parts) != 2:
315
+ return False
316
+
317
+ min_part, max_part = parts
318
+
319
+ # Check minimum part
320
+ if not min_part.isdigit():
321
+ return False
322
+
323
+ # Check maximum part (can be empty for {n,})
324
+ if max_part and not max_part.isdigit():
325
+ return False
326
+
327
+ # Validate range
328
+ if max_part:
329
+ min_val = int(min_part)
330
+ max_val = int(max_part)
331
+ if min_val > max_val:
332
+ return False
333
+ else:
334
+ # Exact quantifier: {n}
335
+ if not content.isdigit():
336
+ return False
337
+
338
+ return True
339
+
340
+
341
+ def _score_char(char: str) -> int:
342
+ """
343
+ Score a single character.
344
+
345
+ Args:
346
+ char: Single character to score
347
+
348
+ Returns:
349
+ Score for the character:
350
+ - .: -200 (wildcard)
351
+ - ?: -100 (optional)
352
+ - |: -300 (alternation)
353
+ - +, *: -150 (multiple)
354
+ - (, ), ^, $: 0 (metacharacters)
355
+ - Other: +1000 (literal)
356
+ """
357
+ char_scores = {
358
+ ".": WILDCARD,
359
+ "?": OPTIONAL,
360
+ "|": ALTERNATION,
361
+ }
362
+
363
+ if char in char_scores:
364
+ return char_scores[char]
365
+ elif char in "+*":
366
+ return MULTIPLE
367
+ elif char in META_CHARS:
368
+ return 0 # Metacharacters don't affect scoring
369
+ else:
370
+ return LITERAL
371
+
372
+
373
+ def _find_bracket_end(pattern: str, start: int) -> int:
374
+ r"""
375
+ Find the end of a character class, handling escaped brackets.
376
+
377
+ Args:
378
+ pattern: Pattern string
379
+ start: Position of opening bracket
380
+
381
+ Returns:
382
+ Position of closing bracket, or -1 if not found
383
+
384
+ Note:
385
+ Handles escaped closing brackets like [a\]b] correctly.
386
+ """
387
+ for i in range(start + 1, len(pattern)):
388
+ if pattern[i] == "]":
389
+ # Count backslashes to check if this ] is escaped
390
+ backslashes = 0
391
+ j = i - 1
392
+ while j >= 0 and pattern[j] == "\\":
393
+ backslashes += 1
394
+ j -= 1
395
+ if backslashes % 2 == 0: # Not escaped
396
+ return i
397
+ return -1
@@ -0,0 +1,57 @@
1
+ from dataclasses import dataclass
2
+ from typing import Any, Mapping, Optional
3
+
4
+ from typing_extensions import override
5
+
6
+ from phoenix.db.types.token_price_customization import (
7
+ ThresholdBasedTokenPriceCustomization,
8
+ TokenPriceCustomization,
9
+ )
10
+ from phoenix.trace.attributes import get_attribute_value
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class TokenCostCalculator:
15
+ base_rate: float
16
+
17
+ def calculate_cost(
18
+ self,
19
+ attributes: Mapping[str, Any],
20
+ tokens: int,
21
+ ) -> float:
22
+ return tokens * self.base_rate
23
+
24
+
25
+ @dataclass(frozen=True)
26
+ class ThresholdBasedTokenCostCalculator(TokenCostCalculator):
27
+ key: str
28
+ threshold: float
29
+ new_rate: float
30
+
31
+ @override
32
+ def calculate_cost(
33
+ self,
34
+ attributes: Mapping[str, Any],
35
+ tokens: float,
36
+ ) -> float:
37
+ if not (v := get_attribute_value(attributes, self.key)):
38
+ return tokens * self.base_rate
39
+ if v > self.threshold:
40
+ return tokens * self.new_rate
41
+ return tokens * self.base_rate
42
+
43
+
44
+ def create_token_cost_calculator(
45
+ base_rate: float,
46
+ customization: Optional[TokenPriceCustomization] = None,
47
+ ) -> TokenCostCalculator:
48
+ if not customization:
49
+ return TokenCostCalculator(base_rate=base_rate)
50
+ if isinstance(customization, ThresholdBasedTokenPriceCustomization):
51
+ return ThresholdBasedTokenCostCalculator(
52
+ base_rate=base_rate,
53
+ key=customization.key,
54
+ threshold=customization.threshold,
55
+ new_rate=customization.new_rate,
56
+ )
57
+ return TokenCostCalculator(base_rate=base_rate)
File without changes
@@ -0,0 +1,214 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from asyncio import sleep
5
+ from datetime import datetime, timedelta, timezone
6
+ from typing import Optional
7
+
8
+ import sqlalchemy as sa
9
+ from email_validator import EmailNotValidError, validate_email
10
+ from sqlalchemy import text
11
+ from typing_extensions import assert_never
12
+
13
+ from phoenix.config import (
14
+ ENV_PHOENIX_SQL_DATABASE_SCHEMA,
15
+ get_env_database_allocated_storage_capacity_gibibytes,
16
+ get_env_database_usage_email_warning_threshold_percentage,
17
+ get_env_database_usage_insertion_blocking_threshold_percentage,
18
+ getenv,
19
+ )
20
+ from phoenix.db import models
21
+ from phoenix.db.helpers import SupportedSQLDialect
22
+ from phoenix.server.email.types import DbUsageWarningEmailSender
23
+ from phoenix.server.prometheus import (
24
+ DB_DISK_USAGE_BYTES,
25
+ DB_DISK_USAGE_RATIO,
26
+ DB_DISK_USAGE_WARNING_EMAIL_ERRORS,
27
+ DB_DISK_USAGE_WARNING_EMAILS_SENT,
28
+ DB_INSERTIONS_BLOCKED,
29
+ )
30
+ from phoenix.server.types import DaemonTask, DbSessionFactory
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+ _SLEEP_SECONDS = 60
35
+ _EMAIL_FREQUENCY_HOURS = 24
36
+ _BYTES_PER_GIBIBYTE = 1024**3
37
+
38
+
39
+ class DbDiskUsageMonitor(DaemonTask):
40
+ """
41
+ Monitors database disk space usage and triggers warnings/blocking when thresholds are exceeded.
42
+
43
+ This daemon:
44
+ - Periodically checks current database size
45
+ - Compares usage against configured thresholds
46
+ - Sends warning emails to admins when warning threshold is reached
47
+ - Toggles insertion blocking when blocking threshold is reached
48
+ """
49
+
50
+ def __init__(
51
+ self,
52
+ db: DbSessionFactory,
53
+ email_sender: Optional[DbUsageWarningEmailSender] = None,
54
+ ) -> None:
55
+ super().__init__()
56
+ self._db = db
57
+ self._email_sender = email_sender
58
+ # Tracks last email send time per admin email address to prevent spam
59
+ self._last_email_sent: dict[str, datetime] = {}
60
+
61
+ @property
62
+ def _is_disabled(self) -> bool:
63
+ return not bool(
64
+ get_env_database_allocated_storage_capacity_gibibytes()
65
+ and (
66
+ get_env_database_usage_email_warning_threshold_percentage()
67
+ or get_env_database_usage_insertion_blocking_threshold_percentage()
68
+ )
69
+ )
70
+
71
+ async def _run(self) -> None:
72
+ if self._is_disabled:
73
+ return
74
+
75
+ while self._running:
76
+ try:
77
+ current_usage_bytes = await self._check_disk_usage_bytes()
78
+ except Exception:
79
+ logger.exception("Failed to check disk space")
80
+ else:
81
+ DB_DISK_USAGE_BYTES.set(current_usage_bytes)
82
+ current_usage_gibibytes = current_usage_bytes / _BYTES_PER_GIBIBYTE
83
+ try:
84
+ await self._check_thresholds(current_usage_gibibytes)
85
+ except Exception:
86
+ logger.exception("Failed to check database usage thresholds")
87
+ await sleep(_SLEEP_SECONDS)
88
+
89
+ async def _check_disk_usage_bytes(self) -> float:
90
+ if self._db.dialect is SupportedSQLDialect.SQLITE:
91
+ async with self._db() as session:
92
+ page_count = await session.scalar(text("PRAGMA page_count;"))
93
+ freelist_count = await session.scalar(text("PRAGMA freelist_count;"))
94
+ page_size = await session.scalar(text("PRAGMA page_size;"))
95
+ current_usage_bytes = (page_count - freelist_count) * page_size
96
+ elif self._db.dialect is SupportedSQLDialect.POSTGRESQL:
97
+ nspname = getenv(ENV_PHOENIX_SQL_DATABASE_SCHEMA) or "public"
98
+ stmt = text("""\
99
+ SELECT sum(pg_total_relation_size(c.oid))
100
+ FROM pg_class as c
101
+ INNER JOIN pg_namespace as n ON n.oid = c.relnamespace
102
+ WHERE c.relkind = 'r'
103
+ AND n.nspname = :nspname;
104
+ """).bindparams(nspname=nspname)
105
+ async with self._db() as session:
106
+ current_usage_bytes = await session.scalar(stmt)
107
+ else:
108
+ assert_never(self._db.dialect)
109
+ return float(current_usage_bytes)
110
+
111
+ async def _check_thresholds(self, current_usage_gibibytes: float) -> None:
112
+ allocated_capacity_gibibytes = get_env_database_allocated_storage_capacity_gibibytes()
113
+ if not allocated_capacity_gibibytes:
114
+ return
115
+
116
+ used_ratio = current_usage_gibibytes / allocated_capacity_gibibytes
117
+ DB_DISK_USAGE_RATIO.set(used_ratio)
118
+ used_percentage = used_ratio * 100
119
+
120
+ # Check insertion blocking threshold
121
+ if (
122
+ insertion_blocking_threshold_percentage
123
+ := get_env_database_usage_insertion_blocking_threshold_percentage()
124
+ ):
125
+ should_not_insert_or_update = used_percentage > insertion_blocking_threshold_percentage
126
+ self._db.should_not_insert_or_update = should_not_insert_or_update
127
+ DB_INSERTIONS_BLOCKED.set(int(should_not_insert_or_update))
128
+
129
+ # Check warning email threshold
130
+ if (
131
+ notification_threshold_percentage
132
+ := get_env_database_usage_email_warning_threshold_percentage()
133
+ ):
134
+ if used_percentage > notification_threshold_percentage:
135
+ await self._send_warning_emails(
136
+ used_percentage,
137
+ allocated_capacity_gibibytes,
138
+ notification_threshold_percentage,
139
+ )
140
+
141
+ async def _send_warning_emails(
142
+ self,
143
+ used_percentage: float,
144
+ allocated_capacity_gibibytes: float,
145
+ notification_threshold_percentage: float,
146
+ ) -> None:
147
+ if not self._email_sender:
148
+ return
149
+
150
+ current_usage_gibibytes = used_percentage / 100 * allocated_capacity_gibibytes
151
+ stmt = (
152
+ sa.select(models.User.email)
153
+ .join(models.UserRole)
154
+ .where(models.UserRole.name == "ADMIN")
155
+ )
156
+
157
+ try:
158
+ async with self._db() as session:
159
+ admin_emails = (await session.scalars(stmt)).all()
160
+ except Exception:
161
+ logger.exception(
162
+ "Failed to fetch admin emails from database, skipping database usage warning emails"
163
+ )
164
+ return
165
+
166
+ if not admin_emails:
167
+ return
168
+
169
+ # Validate email addresses
170
+ valid_emails: list[str] = []
171
+
172
+ for email in admin_emails:
173
+ try:
174
+ normalized_email = validate_email(email, check_deliverability=False).normalized
175
+ except EmailNotValidError:
176
+ pass
177
+ else:
178
+ valid_emails.append(normalized_email)
179
+
180
+ if not valid_emails:
181
+ return
182
+
183
+ self._last_email_sent = {
184
+ email: timestamp
185
+ for email, timestamp in self._last_email_sent.items()
186
+ if email in valid_emails
187
+ }
188
+
189
+ now = datetime.now(timezone.utc)
190
+ emails_sent = 0
191
+ send_attempts = 0
192
+
193
+ for email in valid_emails:
194
+ if email in self._last_email_sent and now - self._last_email_sent[email] < timedelta(
195
+ hours=_EMAIL_FREQUENCY_HOURS
196
+ ):
197
+ continue
198
+ send_attempts += 1
199
+ try:
200
+ await self._email_sender.send_db_usage_warning_email(
201
+ email=email,
202
+ current_usage_gibibytes=current_usage_gibibytes,
203
+ allocated_storage_gibibytes=allocated_capacity_gibibytes,
204
+ notification_threshold_percentage=notification_threshold_percentage,
205
+ )
206
+ except Exception:
207
+ logger.exception(f"Failed to send database usage warning email to {email}")
208
+ # Count email send errors
209
+ DB_DISK_USAGE_WARNING_EMAIL_ERRORS.inc()
210
+ else:
211
+ self._last_email_sent[email] = now
212
+ emails_sent += 1
213
+ # Count successful warning email sends
214
+ DB_DISK_USAGE_WARNING_EMAILS_SENT.inc()