arize-phoenix 10.0.4__py3-none-any.whl → 12.28.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/METADATA +124 -72
- arize_phoenix-12.28.1.dist-info/RECORD +499 -0
- {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/WHEEL +1 -1
- {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/licenses/IP_NOTICE +1 -1
- phoenix/__generated__/__init__.py +0 -0
- phoenix/__generated__/classification_evaluator_configs/__init__.py +20 -0
- phoenix/__generated__/classification_evaluator_configs/_document_relevance_classification_evaluator_config.py +17 -0
- phoenix/__generated__/classification_evaluator_configs/_hallucination_classification_evaluator_config.py +17 -0
- phoenix/__generated__/classification_evaluator_configs/_models.py +18 -0
- phoenix/__generated__/classification_evaluator_configs/_tool_selection_classification_evaluator_config.py +17 -0
- phoenix/__init__.py +5 -4
- phoenix/auth.py +39 -2
- phoenix/config.py +1763 -91
- phoenix/datetime_utils.py +120 -2
- phoenix/db/README.md +595 -25
- phoenix/db/bulk_inserter.py +145 -103
- phoenix/db/engines.py +140 -33
- phoenix/db/enums.py +3 -12
- phoenix/db/facilitator.py +302 -35
- phoenix/db/helpers.py +1000 -65
- phoenix/db/iam_auth.py +64 -0
- phoenix/db/insertion/dataset.py +135 -2
- phoenix/db/insertion/document_annotation.py +9 -6
- phoenix/db/insertion/evaluation.py +2 -3
- phoenix/db/insertion/helpers.py +17 -2
- phoenix/db/insertion/session_annotation.py +176 -0
- phoenix/db/insertion/span.py +15 -11
- phoenix/db/insertion/span_annotation.py +3 -4
- phoenix/db/insertion/trace_annotation.py +3 -4
- phoenix/db/insertion/types.py +50 -20
- phoenix/db/migrations/versions/01a8342c9cdf_add_user_id_on_datasets.py +40 -0
- phoenix/db/migrations/versions/0df286449799_add_session_annotations_table.py +105 -0
- phoenix/db/migrations/versions/272b66ff50f8_drop_single_indices.py +119 -0
- phoenix/db/migrations/versions/58228d933c91_dataset_labels.py +67 -0
- phoenix/db/migrations/versions/699f655af132_experiment_tags.py +57 -0
- phoenix/db/migrations/versions/735d3d93c33e_add_composite_indices.py +41 -0
- phoenix/db/migrations/versions/a20694b15f82_cost.py +196 -0
- phoenix/db/migrations/versions/ab513d89518b_add_user_id_on_dataset_versions.py +40 -0
- phoenix/db/migrations/versions/d0690a79ea51_users_on_experiments.py +40 -0
- phoenix/db/migrations/versions/deb2c81c0bb2_dataset_splits.py +139 -0
- phoenix/db/migrations/versions/e76cbd66ffc3_add_experiments_dataset_examples.py +87 -0
- phoenix/db/models.py +669 -56
- phoenix/db/pg_config.py +10 -0
- phoenix/db/types/model_provider.py +4 -0
- phoenix/db/types/token_price_customization.py +29 -0
- phoenix/db/types/trace_retention.py +23 -15
- phoenix/experiments/evaluators/utils.py +3 -3
- phoenix/experiments/functions.py +160 -52
- phoenix/experiments/tracing.py +2 -2
- phoenix/experiments/types.py +1 -1
- phoenix/inferences/inferences.py +1 -2
- phoenix/server/api/auth.py +38 -7
- phoenix/server/api/auth_messages.py +46 -0
- phoenix/server/api/context.py +100 -4
- phoenix/server/api/dataloaders/__init__.py +79 -5
- phoenix/server/api/dataloaders/annotation_configs_by_project.py +31 -0
- phoenix/server/api/dataloaders/annotation_summaries.py +60 -8
- phoenix/server/api/dataloaders/average_experiment_repeated_run_group_latency.py +50 -0
- phoenix/server/api/dataloaders/average_experiment_run_latency.py +17 -24
- phoenix/server/api/dataloaders/cache/two_tier_cache.py +1 -2
- phoenix/server/api/dataloaders/dataset_dataset_splits.py +52 -0
- phoenix/server/api/dataloaders/dataset_example_revisions.py +0 -1
- phoenix/server/api/dataloaders/dataset_example_splits.py +40 -0
- phoenix/server/api/dataloaders/dataset_examples_and_versions_by_experiment_run.py +47 -0
- phoenix/server/api/dataloaders/dataset_labels.py +36 -0
- phoenix/server/api/dataloaders/document_evaluation_summaries.py +2 -2
- phoenix/server/api/dataloaders/document_evaluations.py +6 -9
- phoenix/server/api/dataloaders/experiment_annotation_summaries.py +88 -34
- phoenix/server/api/dataloaders/experiment_dataset_splits.py +43 -0
- phoenix/server/api/dataloaders/experiment_error_rates.py +21 -28
- phoenix/server/api/dataloaders/experiment_repeated_run_group_annotation_summaries.py +77 -0
- phoenix/server/api/dataloaders/experiment_repeated_run_groups.py +57 -0
- phoenix/server/api/dataloaders/experiment_runs_by_experiment_and_example.py +44 -0
- phoenix/server/api/dataloaders/last_used_times_by_generative_model_id.py +35 -0
- phoenix/server/api/dataloaders/latency_ms_quantile.py +40 -8
- phoenix/server/api/dataloaders/record_counts.py +37 -10
- phoenix/server/api/dataloaders/session_annotations_by_session.py +29 -0
- phoenix/server/api/dataloaders/span_cost_by_span.py +24 -0
- phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_generative_model.py +56 -0
- phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_project_session.py +57 -0
- phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_span.py +43 -0
- phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_trace.py +56 -0
- phoenix/server/api/dataloaders/span_cost_details_by_span_cost.py +27 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_experiment.py +57 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_experiment_repeated_run_group.py +64 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_experiment_run.py +58 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_generative_model.py +55 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_project.py +152 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_project_session.py +56 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_trace.py +55 -0
- phoenix/server/api/dataloaders/span_costs.py +29 -0
- phoenix/server/api/dataloaders/table_fields.py +2 -2
- phoenix/server/api/dataloaders/token_prices_by_model.py +30 -0
- phoenix/server/api/dataloaders/trace_annotations_by_trace.py +27 -0
- phoenix/server/api/dataloaders/types.py +29 -0
- phoenix/server/api/exceptions.py +11 -1
- phoenix/server/api/helpers/dataset_helpers.py +5 -1
- phoenix/server/api/helpers/playground_clients.py +1243 -292
- phoenix/server/api/helpers/playground_registry.py +2 -2
- phoenix/server/api/helpers/playground_spans.py +8 -4
- phoenix/server/api/helpers/playground_users.py +26 -0
- phoenix/server/api/helpers/prompts/conversions/aws.py +83 -0
- phoenix/server/api/helpers/prompts/conversions/google.py +103 -0
- phoenix/server/api/helpers/prompts/models.py +205 -22
- phoenix/server/api/input_types/{SpanAnnotationFilter.py → AnnotationFilter.py} +22 -14
- phoenix/server/api/input_types/ChatCompletionInput.py +6 -2
- phoenix/server/api/input_types/CreateProjectInput.py +27 -0
- phoenix/server/api/input_types/CreateProjectSessionAnnotationInput.py +37 -0
- phoenix/server/api/input_types/DatasetFilter.py +17 -0
- phoenix/server/api/input_types/ExperimentRunSort.py +237 -0
- phoenix/server/api/input_types/GenerativeCredentialInput.py +9 -0
- phoenix/server/api/input_types/GenerativeModelInput.py +5 -0
- phoenix/server/api/input_types/ProjectSessionSort.py +161 -1
- phoenix/server/api/input_types/PromptFilter.py +14 -0
- phoenix/server/api/input_types/PromptVersionInput.py +52 -1
- phoenix/server/api/input_types/SpanSort.py +44 -7
- phoenix/server/api/input_types/TimeBinConfig.py +23 -0
- phoenix/server/api/input_types/UpdateAnnotationInput.py +34 -0
- phoenix/server/api/input_types/UserRoleInput.py +1 -0
- phoenix/server/api/mutations/__init__.py +10 -0
- phoenix/server/api/mutations/annotation_config_mutations.py +8 -8
- phoenix/server/api/mutations/api_key_mutations.py +19 -23
- phoenix/server/api/mutations/chat_mutations.py +154 -47
- phoenix/server/api/mutations/dataset_label_mutations.py +243 -0
- phoenix/server/api/mutations/dataset_mutations.py +21 -16
- phoenix/server/api/mutations/dataset_split_mutations.py +351 -0
- phoenix/server/api/mutations/experiment_mutations.py +2 -2
- phoenix/server/api/mutations/export_events_mutations.py +3 -3
- phoenix/server/api/mutations/model_mutations.py +210 -0
- phoenix/server/api/mutations/project_mutations.py +49 -10
- phoenix/server/api/mutations/project_session_annotations_mutations.py +158 -0
- phoenix/server/api/mutations/project_trace_retention_policy_mutations.py +8 -4
- phoenix/server/api/mutations/prompt_label_mutations.py +74 -65
- phoenix/server/api/mutations/prompt_mutations.py +65 -129
- phoenix/server/api/mutations/prompt_version_tag_mutations.py +11 -8
- phoenix/server/api/mutations/span_annotations_mutations.py +15 -10
- phoenix/server/api/mutations/trace_annotations_mutations.py +14 -10
- phoenix/server/api/mutations/trace_mutations.py +47 -3
- phoenix/server/api/mutations/user_mutations.py +66 -41
- phoenix/server/api/queries.py +768 -293
- phoenix/server/api/routers/__init__.py +2 -2
- phoenix/server/api/routers/auth.py +154 -88
- phoenix/server/api/routers/ldap.py +229 -0
- phoenix/server/api/routers/oauth2.py +369 -106
- phoenix/server/api/routers/v1/__init__.py +24 -4
- phoenix/server/api/routers/v1/annotation_configs.py +23 -31
- phoenix/server/api/routers/v1/annotations.py +481 -17
- phoenix/server/api/routers/v1/datasets.py +395 -81
- phoenix/server/api/routers/v1/documents.py +142 -0
- phoenix/server/api/routers/v1/evaluations.py +24 -31
- phoenix/server/api/routers/v1/experiment_evaluations.py +19 -8
- phoenix/server/api/routers/v1/experiment_runs.py +337 -59
- phoenix/server/api/routers/v1/experiments.py +479 -48
- phoenix/server/api/routers/v1/models.py +7 -0
- phoenix/server/api/routers/v1/projects.py +18 -49
- phoenix/server/api/routers/v1/prompts.py +54 -40
- phoenix/server/api/routers/v1/sessions.py +108 -0
- phoenix/server/api/routers/v1/spans.py +1091 -81
- phoenix/server/api/routers/v1/traces.py +132 -78
- phoenix/server/api/routers/v1/users.py +389 -0
- phoenix/server/api/routers/v1/utils.py +3 -7
- phoenix/server/api/subscriptions.py +305 -88
- phoenix/server/api/types/Annotation.py +90 -23
- phoenix/server/api/types/ApiKey.py +13 -17
- phoenix/server/api/types/AuthMethod.py +1 -0
- phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +1 -0
- phoenix/server/api/types/CostBreakdown.py +12 -0
- phoenix/server/api/types/Dataset.py +226 -72
- phoenix/server/api/types/DatasetExample.py +88 -18
- phoenix/server/api/types/DatasetExperimentAnnotationSummary.py +10 -0
- phoenix/server/api/types/DatasetLabel.py +57 -0
- phoenix/server/api/types/DatasetSplit.py +98 -0
- phoenix/server/api/types/DatasetVersion.py +49 -4
- phoenix/server/api/types/DocumentAnnotation.py +212 -0
- phoenix/server/api/types/Experiment.py +264 -59
- phoenix/server/api/types/ExperimentComparison.py +5 -10
- phoenix/server/api/types/ExperimentRepeatedRunGroup.py +155 -0
- phoenix/server/api/types/ExperimentRepeatedRunGroupAnnotationSummary.py +9 -0
- phoenix/server/api/types/ExperimentRun.py +169 -65
- phoenix/server/api/types/ExperimentRunAnnotation.py +158 -39
- phoenix/server/api/types/GenerativeModel.py +245 -3
- phoenix/server/api/types/GenerativeProvider.py +70 -11
- phoenix/server/api/types/{Model.py → InferenceModel.py} +1 -1
- phoenix/server/api/types/ModelInterface.py +16 -0
- phoenix/server/api/types/PlaygroundModel.py +20 -0
- phoenix/server/api/types/Project.py +1278 -216
- phoenix/server/api/types/ProjectSession.py +188 -28
- phoenix/server/api/types/ProjectSessionAnnotation.py +187 -0
- phoenix/server/api/types/ProjectTraceRetentionPolicy.py +1 -1
- phoenix/server/api/types/Prompt.py +119 -39
- phoenix/server/api/types/PromptLabel.py +42 -25
- phoenix/server/api/types/PromptVersion.py +11 -8
- phoenix/server/api/types/PromptVersionTag.py +65 -25
- phoenix/server/api/types/ServerStatus.py +6 -0
- phoenix/server/api/types/Span.py +167 -123
- phoenix/server/api/types/SpanAnnotation.py +189 -42
- phoenix/server/api/types/SpanCostDetailSummaryEntry.py +10 -0
- phoenix/server/api/types/SpanCostSummary.py +10 -0
- phoenix/server/api/types/SystemApiKey.py +65 -1
- phoenix/server/api/types/TokenPrice.py +16 -0
- phoenix/server/api/types/TokenUsage.py +3 -3
- phoenix/server/api/types/Trace.py +223 -51
- phoenix/server/api/types/TraceAnnotation.py +149 -50
- phoenix/server/api/types/User.py +137 -32
- phoenix/server/api/types/UserApiKey.py +73 -26
- phoenix/server/api/types/node.py +10 -0
- phoenix/server/api/types/pagination.py +11 -2
- phoenix/server/app.py +290 -45
- phoenix/server/authorization.py +38 -3
- phoenix/server/bearer_auth.py +34 -24
- phoenix/server/cost_tracking/cost_details_calculator.py +196 -0
- phoenix/server/cost_tracking/cost_model_lookup.py +179 -0
- phoenix/server/cost_tracking/helpers.py +68 -0
- phoenix/server/cost_tracking/model_cost_manifest.json +3657 -830
- phoenix/server/cost_tracking/regex_specificity.py +397 -0
- phoenix/server/cost_tracking/token_cost_calculator.py +57 -0
- phoenix/server/daemons/__init__.py +0 -0
- phoenix/server/daemons/db_disk_usage_monitor.py +214 -0
- phoenix/server/daemons/generative_model_store.py +103 -0
- phoenix/server/daemons/span_cost_calculator.py +99 -0
- phoenix/server/dml_event.py +17 -0
- phoenix/server/dml_event_handler.py +5 -0
- phoenix/server/email/sender.py +56 -3
- phoenix/server/email/templates/db_disk_usage_notification.html +19 -0
- phoenix/server/email/types.py +11 -0
- phoenix/server/experiments/__init__.py +0 -0
- phoenix/server/experiments/utils.py +14 -0
- phoenix/server/grpc_server.py +11 -11
- phoenix/server/jwt_store.py +17 -15
- phoenix/server/ldap.py +1449 -0
- phoenix/server/main.py +26 -10
- phoenix/server/oauth2.py +330 -12
- phoenix/server/prometheus.py +66 -6
- phoenix/server/rate_limiters.py +4 -9
- phoenix/server/retention.py +33 -20
- phoenix/server/session_filters.py +49 -0
- phoenix/server/static/.vite/manifest.json +55 -51
- phoenix/server/static/assets/components-BreFUQQa.js +6702 -0
- phoenix/server/static/assets/{index-E0M82BdE.js → index-CTQoemZv.js} +140 -56
- phoenix/server/static/assets/pages-DBE5iYM3.js +9524 -0
- phoenix/server/static/assets/vendor-BGzfc4EU.css +1 -0
- phoenix/server/static/assets/vendor-DCE4v-Ot.js +920 -0
- phoenix/server/static/assets/vendor-codemirror-D5f205eT.js +25 -0
- phoenix/server/static/assets/vendor-recharts-V9cwpXsm.js +37 -0
- phoenix/server/static/assets/vendor-shiki-Do--csgv.js +5 -0
- phoenix/server/static/assets/vendor-three-CmB8bl_y.js +3840 -0
- phoenix/server/templates/index.html +40 -6
- phoenix/server/thread_server.py +1 -2
- phoenix/server/types.py +14 -4
- phoenix/server/utils.py +74 -0
- phoenix/session/client.py +56 -3
- phoenix/session/data_extractor.py +5 -0
- phoenix/session/evaluation.py +14 -5
- phoenix/session/session.py +45 -9
- phoenix/settings.py +5 -0
- phoenix/trace/attributes.py +80 -13
- phoenix/trace/dsl/helpers.py +90 -1
- phoenix/trace/dsl/query.py +8 -6
- phoenix/trace/projects.py +5 -0
- phoenix/utilities/template_formatters.py +1 -1
- phoenix/version.py +1 -1
- arize_phoenix-10.0.4.dist-info/RECORD +0 -405
- phoenix/server/api/types/Evaluation.py +0 -39
- phoenix/server/cost_tracking/cost_lookup.py +0 -255
- phoenix/server/static/assets/components-DULKeDfL.js +0 -4365
- phoenix/server/static/assets/pages-Cl0A-0U2.js +0 -7430
- phoenix/server/static/assets/vendor-WIZid84E.css +0 -1
- phoenix/server/static/assets/vendor-arizeai-Dy-0mSNw.js +0 -649
- phoenix/server/static/assets/vendor-codemirror-DBtifKNr.js +0 -33
- phoenix/server/static/assets/vendor-oB4u9zuV.js +0 -905
- phoenix/server/static/assets/vendor-recharts-D-T4KPz2.js +0 -59
- phoenix/server/static/assets/vendor-shiki-BMn4O_9F.js +0 -5
- phoenix/server/static/assets/vendor-three-C5WAXd5r.js +0 -2998
- phoenix/utilities/deprecation.py +0 -31
- {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/entry_points.txt +0 -0
- {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,397 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Regex specificity scorer based on heuristics intended for tie-breaking.
|
|
3
|
+
|
|
4
|
+
This module provides functionality to score regex patterns based on their specificity.
|
|
5
|
+
More specific patterns (like exact matches with anchors) receive higher scores,
|
|
6
|
+
while more general patterns (like wildcards and quantifiers) receive lower scores.
|
|
7
|
+
|
|
8
|
+
Scoring Weights:
|
|
9
|
+
- Full anchors (^pattern$): +10000 points
|
|
10
|
+
- Partial anchors (^pattern or pattern$): +5000 points
|
|
11
|
+
- Literal characters: +1000 points each
|
|
12
|
+
- Escaped characters (\\. \\+ etc): +950 points each
|
|
13
|
+
- Character classes [abc]: +500 points
|
|
14
|
+
- Shorthand classes (\\d \\w \\s): +400 points
|
|
15
|
+
- Negated classes [^abc]: +300 points
|
|
16
|
+
- Negated shorthand (\\D \\W \\S): +250 points
|
|
17
|
+
- Exact quantifiers {n}: -50 points
|
|
18
|
+
- Range quantifiers {n,m}: -100 points
|
|
19
|
+
- Wildcards (.): -200 points
|
|
20
|
+
- Optional (?): -100 points
|
|
21
|
+
- Multiple (+ *): -150 points
|
|
22
|
+
- Alternation (|): -300 points
|
|
23
|
+
|
|
24
|
+
Examples:
|
|
25
|
+
>>> score("^abc$") # Exact match: 12002
|
|
26
|
+
>>> score("abc") # Literal: 3002
|
|
27
|
+
>>> score(".*") # Wildcard: -198
|
|
28
|
+
>>> score("[a-z]+") # Class + multiple: 350
|
|
29
|
+
>>> score("\\d{3}") # Shorthand + exact quantifier: 350
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
import re
|
|
33
|
+
from typing import Union
|
|
34
|
+
|
|
35
|
+
from typing_extensions import assert_never
|
|
36
|
+
|
|
37
|
+
# Scoring weights for different regex pattern elements
|
|
38
|
+
FULL_ANCHOR = 10000 # ^pattern$ - highest specificity
|
|
39
|
+
PARTIAL_ANCHOR = 5000 # ^pattern or pattern$ - high specificity
|
|
40
|
+
LITERAL = 1000 # exact characters - good specificity
|
|
41
|
+
ESCAPED = 950 # \. \+ etc - slightly less than literal
|
|
42
|
+
CHAR_CLASS = 500 # [abc] [0-9] - moderate specificity
|
|
43
|
+
SHORTHAND = 400 # \d \w \s - moderate specificity
|
|
44
|
+
NEGATED_CLASS = 300 # [^abc] - lower specificity
|
|
45
|
+
NEGATED_SHORTHAND = 250 # \D \W \S - lower specificity
|
|
46
|
+
QUANTIFIER_EXACT = -50 # {n} - reduces specificity
|
|
47
|
+
QUANTIFIER_RANGE = -100 # {n,m} {n,} - reduces specificity more
|
|
48
|
+
WILDCARD = -200 # . - significantly reduces specificity
|
|
49
|
+
OPTIONAL = -100 # ? - reduces specificity
|
|
50
|
+
MULTIPLE = -150 # + * - reduces specificity
|
|
51
|
+
ALTERNATION = -300 # | - significantly reduces specificity
|
|
52
|
+
|
|
53
|
+
# Character sets for classification
|
|
54
|
+
POSITIVE_SHORTHANDS = "dws" # \d \w \s - digit, word, space
|
|
55
|
+
NEGATIVE_SHORTHANDS = "DWS" # \D \W \S - non-digit, non-word, non-space
|
|
56
|
+
META_CHARS = "()^$" # Regex metacharacters that don't affect scoring
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def score(regex: Union[str, re.Pattern[str]]) -> int:
|
|
60
|
+
"""
|
|
61
|
+
Score a regex pattern for specificity.
|
|
62
|
+
|
|
63
|
+
Calculates a specificity score for a regex pattern where higher scores
|
|
64
|
+
indicate more specific patterns. The scoring considers:
|
|
65
|
+
|
|
66
|
+
- Anchors (^ and $) - significantly increase specificity
|
|
67
|
+
- Character types (literal, escaped, classes) - moderate impact
|
|
68
|
+
- Quantifiers and wildcards - reduce specificity
|
|
69
|
+
- Pattern length - slight bonus for longer patterns
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
regex: The regex pattern string to score. Must be a valid regex.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
An integer score where:
|
|
76
|
+
- Positive scores indicate specific patterns
|
|
77
|
+
- Higher scores indicate more specific patterns
|
|
78
|
+
- Negative scores indicate very general patterns
|
|
79
|
+
- Minimum score is 1 (for empty patterns)
|
|
80
|
+
|
|
81
|
+
Raises:
|
|
82
|
+
ValueError: If the pattern is not a valid regex or is None.
|
|
83
|
+
|
|
84
|
+
Examples:
|
|
85
|
+
>>> score("^abc$")
|
|
86
|
+
12002
|
|
87
|
+
>>> score("abc")
|
|
88
|
+
3002
|
|
89
|
+
>>> score(".*")
|
|
90
|
+
-198
|
|
91
|
+
>>> score("")
|
|
92
|
+
1
|
|
93
|
+
>>> score("[a-z]+")
|
|
94
|
+
350
|
|
95
|
+
>>> score("\\d{3}")
|
|
96
|
+
350
|
|
97
|
+
|
|
98
|
+
Note:
|
|
99
|
+
The scoring algorithm is designed for cost tracking scenarios
|
|
100
|
+
where more specific patterns should be prioritized over general ones.
|
|
101
|
+
"""
|
|
102
|
+
if isinstance(regex, str):
|
|
103
|
+
pattern = regex
|
|
104
|
+
try:
|
|
105
|
+
re.compile(pattern) # Validate regex
|
|
106
|
+
except re.error as e:
|
|
107
|
+
raise ValueError(f"Invalid regex pattern: {pattern}") from e
|
|
108
|
+
elif isinstance(regex.pattern, str):
|
|
109
|
+
pattern = regex.pattern
|
|
110
|
+
elif isinstance(regex.pattern, bytes):
|
|
111
|
+
pattern = regex.pattern.decode("utf-8")
|
|
112
|
+
else:
|
|
113
|
+
assert_never(regex.pattern)
|
|
114
|
+
|
|
115
|
+
score_value = 0
|
|
116
|
+
|
|
117
|
+
# Score anchors - most significant factor
|
|
118
|
+
has_start_anchor = _has_start_anchor(pattern)
|
|
119
|
+
has_end_anchor = pattern.endswith("$")
|
|
120
|
+
|
|
121
|
+
if has_start_anchor and has_end_anchor:
|
|
122
|
+
score_value += FULL_ANCHOR
|
|
123
|
+
elif has_start_anchor or has_end_anchor:
|
|
124
|
+
score_value += PARTIAL_ANCHOR
|
|
125
|
+
|
|
126
|
+
# Score pattern content
|
|
127
|
+
content = _strip_anchors(pattern)
|
|
128
|
+
score_value += _score_content(content)
|
|
129
|
+
|
|
130
|
+
# Length bonus for tie-breaking (longer patterns slightly preferred)
|
|
131
|
+
score_value += len(pattern) * 2
|
|
132
|
+
|
|
133
|
+
return max(score_value, 1)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _has_start_anchor(pattern: str) -> bool:
|
|
137
|
+
"""
|
|
138
|
+
Check if pattern has a start anchor (after all leading inline flags).
|
|
139
|
+
Handles multiple inline flags robustly.
|
|
140
|
+
"""
|
|
141
|
+
i = 0
|
|
142
|
+
# Skip all leading inline flags
|
|
143
|
+
while pattern.startswith("(?", i):
|
|
144
|
+
close = pattern.find(")", i)
|
|
145
|
+
if close == -1:
|
|
146
|
+
break
|
|
147
|
+
i = close + 1
|
|
148
|
+
# After all flags, check for ^
|
|
149
|
+
return i < len(pattern) and pattern[i] == "^"
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _strip_anchors(pattern: str) -> str:
|
|
153
|
+
"""
|
|
154
|
+
Remove all leading inline flags and anchors from pattern for content analysis.
|
|
155
|
+
Handles multiple inline flags robustly.
|
|
156
|
+
"""
|
|
157
|
+
i = 0
|
|
158
|
+
# Remove all leading inline flags
|
|
159
|
+
while pattern.startswith("(?", i):
|
|
160
|
+
close = pattern.find(")", i)
|
|
161
|
+
if close == -1:
|
|
162
|
+
break
|
|
163
|
+
i = close + 1
|
|
164
|
+
# Remove start anchor
|
|
165
|
+
if i < len(pattern) and pattern[i] == "^":
|
|
166
|
+
i += 1
|
|
167
|
+
content = pattern[i:]
|
|
168
|
+
# Remove end anchor
|
|
169
|
+
if content.endswith("$"):
|
|
170
|
+
content = content[:-1]
|
|
171
|
+
return content
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _score_content(content: str) -> int:
|
|
175
|
+
r"""
|
|
176
|
+
Score the content of a pattern by analyzing each character.
|
|
177
|
+
|
|
178
|
+
Iterates through the pattern content and scores each element:
|
|
179
|
+
- Escape sequences (\d, \., etc.)
|
|
180
|
+
- Character classes ([abc], [^abc])
|
|
181
|
+
- Quantifiers ({n}, {n,m})
|
|
182
|
+
- Special characters (., ?, +, *, |)
|
|
183
|
+
- Literal characters
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
content: Pattern content without anchors
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
Cumulative score for all pattern elements
|
|
190
|
+
"""
|
|
191
|
+
score_value = 0
|
|
192
|
+
i = 0
|
|
193
|
+
|
|
194
|
+
while i < len(content):
|
|
195
|
+
char = content[i]
|
|
196
|
+
|
|
197
|
+
if char == "\\" and i + 1 < len(content):
|
|
198
|
+
# Handle escape sequences
|
|
199
|
+
score_value += _score_escape(content[i + 1])
|
|
200
|
+
i += 2
|
|
201
|
+
elif char == "[":
|
|
202
|
+
# Handle character classes
|
|
203
|
+
bracket_score, new_pos = _score_bracket(content, i)
|
|
204
|
+
score_value += bracket_score
|
|
205
|
+
i = new_pos
|
|
206
|
+
elif char == "{":
|
|
207
|
+
# Handle quantifiers
|
|
208
|
+
quantifier_score, new_pos = _score_quantifier(content, i)
|
|
209
|
+
score_value += quantifier_score
|
|
210
|
+
i = new_pos
|
|
211
|
+
else:
|
|
212
|
+
# Handle single characters
|
|
213
|
+
score_value += _score_char(char)
|
|
214
|
+
i += 1
|
|
215
|
+
|
|
216
|
+
return score_value
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def _score_escape(char: str) -> int:
|
|
220
|
+
r"""
|
|
221
|
+
Score an escape sequence.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
char: The character following the backslash
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
Score for the escape sequence:
|
|
228
|
+
- \d, \w, \s: +400 (shorthand classes)
|
|
229
|
+
- \D, \W, \S: +250 (negated shorthand)
|
|
230
|
+
- \., \+, etc: +950 (escaped literals)
|
|
231
|
+
"""
|
|
232
|
+
if char in POSITIVE_SHORTHANDS:
|
|
233
|
+
return SHORTHAND
|
|
234
|
+
elif char in NEGATIVE_SHORTHANDS:
|
|
235
|
+
return NEGATED_SHORTHAND
|
|
236
|
+
else:
|
|
237
|
+
return ESCAPED
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _score_bracket(content: str, start: int) -> tuple[int, int]:
|
|
241
|
+
"""
|
|
242
|
+
Score a character class and find its end position.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
content: Pattern content
|
|
246
|
+
start: Starting position of the opening bracket
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
Tuple of (score, next_position):
|
|
250
|
+
- score: +500 for [abc], +300 for [^abc]
|
|
251
|
+
- next_position: Position after the closing bracket
|
|
252
|
+
"""
|
|
253
|
+
end = _find_bracket_end(content, start)
|
|
254
|
+
if end == -1:
|
|
255
|
+
# Malformed bracket, treat as literal
|
|
256
|
+
return LITERAL, start + 1
|
|
257
|
+
|
|
258
|
+
class_content = content[start + 1 : end]
|
|
259
|
+
score_value = NEGATED_CLASS if class_content.startswith("^") else CHAR_CLASS
|
|
260
|
+
|
|
261
|
+
return score_value, end + 1
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def _score_quantifier(content: str, start: int) -> tuple[int, int]:
|
|
265
|
+
"""
|
|
266
|
+
Score a quantifier and find its end position.
|
|
267
|
+
|
|
268
|
+
Args:
|
|
269
|
+
content: Pattern content
|
|
270
|
+
start: Starting position of the opening brace
|
|
271
|
+
|
|
272
|
+
Returns:
|
|
273
|
+
Tuple of (score, next_position):
|
|
274
|
+
- score: -50 for {n}, -100 for {n,m} or {n,}
|
|
275
|
+
- next_position: Position after the closing brace
|
|
276
|
+
"""
|
|
277
|
+
end = content.find("}", start)
|
|
278
|
+
if end == -1:
|
|
279
|
+
# Malformed quantifier, treat as literal
|
|
280
|
+
return LITERAL, start + 1
|
|
281
|
+
|
|
282
|
+
quantifier = content[start : end + 1]
|
|
283
|
+
|
|
284
|
+
# Validate quantifier syntax
|
|
285
|
+
if not _is_valid_quantifier(quantifier):
|
|
286
|
+
return LITERAL, start + 1
|
|
287
|
+
|
|
288
|
+
has_comma = "," in quantifier
|
|
289
|
+
|
|
290
|
+
score_value = QUANTIFIER_RANGE if has_comma else QUANTIFIER_EXACT
|
|
291
|
+
|
|
292
|
+
return score_value, end + 1
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def _is_valid_quantifier(quantifier: str) -> bool:
|
|
296
|
+
"""
|
|
297
|
+
Check if a quantifier has valid syntax.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
quantifier: Quantifier string like "{n}", "{n,m}", "{n,}"
|
|
301
|
+
|
|
302
|
+
Returns:
|
|
303
|
+
True if quantifier syntax is valid
|
|
304
|
+
"""
|
|
305
|
+
if not quantifier.startswith("{") or not quantifier.endswith("}"):
|
|
306
|
+
return False
|
|
307
|
+
|
|
308
|
+
# Extract content between braces
|
|
309
|
+
content = quantifier[1:-1]
|
|
310
|
+
|
|
311
|
+
if "," in content:
|
|
312
|
+
# Range quantifier: {n,m} or {n,}
|
|
313
|
+
parts = content.split(",")
|
|
314
|
+
if len(parts) != 2:
|
|
315
|
+
return False
|
|
316
|
+
|
|
317
|
+
min_part, max_part = parts
|
|
318
|
+
|
|
319
|
+
# Check minimum part
|
|
320
|
+
if not min_part.isdigit():
|
|
321
|
+
return False
|
|
322
|
+
|
|
323
|
+
# Check maximum part (can be empty for {n,})
|
|
324
|
+
if max_part and not max_part.isdigit():
|
|
325
|
+
return False
|
|
326
|
+
|
|
327
|
+
# Validate range
|
|
328
|
+
if max_part:
|
|
329
|
+
min_val = int(min_part)
|
|
330
|
+
max_val = int(max_part)
|
|
331
|
+
if min_val > max_val:
|
|
332
|
+
return False
|
|
333
|
+
else:
|
|
334
|
+
# Exact quantifier: {n}
|
|
335
|
+
if not content.isdigit():
|
|
336
|
+
return False
|
|
337
|
+
|
|
338
|
+
return True
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def _score_char(char: str) -> int:
|
|
342
|
+
"""
|
|
343
|
+
Score a single character.
|
|
344
|
+
|
|
345
|
+
Args:
|
|
346
|
+
char: Single character to score
|
|
347
|
+
|
|
348
|
+
Returns:
|
|
349
|
+
Score for the character:
|
|
350
|
+
- .: -200 (wildcard)
|
|
351
|
+
- ?: -100 (optional)
|
|
352
|
+
- |: -300 (alternation)
|
|
353
|
+
- +, *: -150 (multiple)
|
|
354
|
+
- (, ), ^, $: 0 (metacharacters)
|
|
355
|
+
- Other: +1000 (literal)
|
|
356
|
+
"""
|
|
357
|
+
char_scores = {
|
|
358
|
+
".": WILDCARD,
|
|
359
|
+
"?": OPTIONAL,
|
|
360
|
+
"|": ALTERNATION,
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
if char in char_scores:
|
|
364
|
+
return char_scores[char]
|
|
365
|
+
elif char in "+*":
|
|
366
|
+
return MULTIPLE
|
|
367
|
+
elif char in META_CHARS:
|
|
368
|
+
return 0 # Metacharacters don't affect scoring
|
|
369
|
+
else:
|
|
370
|
+
return LITERAL
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def _find_bracket_end(pattern: str, start: int) -> int:
|
|
374
|
+
r"""
|
|
375
|
+
Find the end of a character class, handling escaped brackets.
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
pattern: Pattern string
|
|
379
|
+
start: Position of opening bracket
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
Position of closing bracket, or -1 if not found
|
|
383
|
+
|
|
384
|
+
Note:
|
|
385
|
+
Handles escaped closing brackets like [a\]b] correctly.
|
|
386
|
+
"""
|
|
387
|
+
for i in range(start + 1, len(pattern)):
|
|
388
|
+
if pattern[i] == "]":
|
|
389
|
+
# Count backslashes to check if this ] is escaped
|
|
390
|
+
backslashes = 0
|
|
391
|
+
j = i - 1
|
|
392
|
+
while j >= 0 and pattern[j] == "\\":
|
|
393
|
+
backslashes += 1
|
|
394
|
+
j -= 1
|
|
395
|
+
if backslashes % 2 == 0: # Not escaped
|
|
396
|
+
return i
|
|
397
|
+
return -1
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Any, Mapping, Optional
|
|
3
|
+
|
|
4
|
+
from typing_extensions import override
|
|
5
|
+
|
|
6
|
+
from phoenix.db.types.token_price_customization import (
|
|
7
|
+
ThresholdBasedTokenPriceCustomization,
|
|
8
|
+
TokenPriceCustomization,
|
|
9
|
+
)
|
|
10
|
+
from phoenix.trace.attributes import get_attribute_value
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class TokenCostCalculator:
|
|
15
|
+
base_rate: float
|
|
16
|
+
|
|
17
|
+
def calculate_cost(
|
|
18
|
+
self,
|
|
19
|
+
attributes: Mapping[str, Any],
|
|
20
|
+
tokens: int,
|
|
21
|
+
) -> float:
|
|
22
|
+
return tokens * self.base_rate
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass(frozen=True)
|
|
26
|
+
class ThresholdBasedTokenCostCalculator(TokenCostCalculator):
|
|
27
|
+
key: str
|
|
28
|
+
threshold: float
|
|
29
|
+
new_rate: float
|
|
30
|
+
|
|
31
|
+
@override
|
|
32
|
+
def calculate_cost(
|
|
33
|
+
self,
|
|
34
|
+
attributes: Mapping[str, Any],
|
|
35
|
+
tokens: float,
|
|
36
|
+
) -> float:
|
|
37
|
+
if not (v := get_attribute_value(attributes, self.key)):
|
|
38
|
+
return tokens * self.base_rate
|
|
39
|
+
if v > self.threshold:
|
|
40
|
+
return tokens * self.new_rate
|
|
41
|
+
return tokens * self.base_rate
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def create_token_cost_calculator(
|
|
45
|
+
base_rate: float,
|
|
46
|
+
customization: Optional[TokenPriceCustomization] = None,
|
|
47
|
+
) -> TokenCostCalculator:
|
|
48
|
+
if not customization:
|
|
49
|
+
return TokenCostCalculator(base_rate=base_rate)
|
|
50
|
+
if isinstance(customization, ThresholdBasedTokenPriceCustomization):
|
|
51
|
+
return ThresholdBasedTokenCostCalculator(
|
|
52
|
+
base_rate=base_rate,
|
|
53
|
+
key=customization.key,
|
|
54
|
+
threshold=customization.threshold,
|
|
55
|
+
new_rate=customization.new_rate,
|
|
56
|
+
)
|
|
57
|
+
return TokenCostCalculator(base_rate=base_rate)
|
|
File without changes
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from asyncio import sleep
|
|
5
|
+
from datetime import datetime, timedelta, timezone
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
import sqlalchemy as sa
|
|
9
|
+
from email_validator import EmailNotValidError, validate_email
|
|
10
|
+
from sqlalchemy import text
|
|
11
|
+
from typing_extensions import assert_never
|
|
12
|
+
|
|
13
|
+
from phoenix.config import (
|
|
14
|
+
ENV_PHOENIX_SQL_DATABASE_SCHEMA,
|
|
15
|
+
get_env_database_allocated_storage_capacity_gibibytes,
|
|
16
|
+
get_env_database_usage_email_warning_threshold_percentage,
|
|
17
|
+
get_env_database_usage_insertion_blocking_threshold_percentage,
|
|
18
|
+
getenv,
|
|
19
|
+
)
|
|
20
|
+
from phoenix.db import models
|
|
21
|
+
from phoenix.db.helpers import SupportedSQLDialect
|
|
22
|
+
from phoenix.server.email.types import DbUsageWarningEmailSender
|
|
23
|
+
from phoenix.server.prometheus import (
|
|
24
|
+
DB_DISK_USAGE_BYTES,
|
|
25
|
+
DB_DISK_USAGE_RATIO,
|
|
26
|
+
DB_DISK_USAGE_WARNING_EMAIL_ERRORS,
|
|
27
|
+
DB_DISK_USAGE_WARNING_EMAILS_SENT,
|
|
28
|
+
DB_INSERTIONS_BLOCKED,
|
|
29
|
+
)
|
|
30
|
+
from phoenix.server.types import DaemonTask, DbSessionFactory
|
|
31
|
+
|
|
32
|
+
logger = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
_SLEEP_SECONDS = 60
|
|
35
|
+
_EMAIL_FREQUENCY_HOURS = 24
|
|
36
|
+
_BYTES_PER_GIBIBYTE = 1024**3
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class DbDiskUsageMonitor(DaemonTask):
|
|
40
|
+
"""
|
|
41
|
+
Monitors database disk space usage and triggers warnings/blocking when thresholds are exceeded.
|
|
42
|
+
|
|
43
|
+
This daemon:
|
|
44
|
+
- Periodically checks current database size
|
|
45
|
+
- Compares usage against configured thresholds
|
|
46
|
+
- Sends warning emails to admins when warning threshold is reached
|
|
47
|
+
- Toggles insertion blocking when blocking threshold is reached
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
def __init__(
|
|
51
|
+
self,
|
|
52
|
+
db: DbSessionFactory,
|
|
53
|
+
email_sender: Optional[DbUsageWarningEmailSender] = None,
|
|
54
|
+
) -> None:
|
|
55
|
+
super().__init__()
|
|
56
|
+
self._db = db
|
|
57
|
+
self._email_sender = email_sender
|
|
58
|
+
# Tracks last email send time per admin email address to prevent spam
|
|
59
|
+
self._last_email_sent: dict[str, datetime] = {}
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def _is_disabled(self) -> bool:
|
|
63
|
+
return not bool(
|
|
64
|
+
get_env_database_allocated_storage_capacity_gibibytes()
|
|
65
|
+
and (
|
|
66
|
+
get_env_database_usage_email_warning_threshold_percentage()
|
|
67
|
+
or get_env_database_usage_insertion_blocking_threshold_percentage()
|
|
68
|
+
)
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
async def _run(self) -> None:
|
|
72
|
+
if self._is_disabled:
|
|
73
|
+
return
|
|
74
|
+
|
|
75
|
+
while self._running:
|
|
76
|
+
try:
|
|
77
|
+
current_usage_bytes = await self._check_disk_usage_bytes()
|
|
78
|
+
except Exception:
|
|
79
|
+
logger.exception("Failed to check disk space")
|
|
80
|
+
else:
|
|
81
|
+
DB_DISK_USAGE_BYTES.set(current_usage_bytes)
|
|
82
|
+
current_usage_gibibytes = current_usage_bytes / _BYTES_PER_GIBIBYTE
|
|
83
|
+
try:
|
|
84
|
+
await self._check_thresholds(current_usage_gibibytes)
|
|
85
|
+
except Exception:
|
|
86
|
+
logger.exception("Failed to check database usage thresholds")
|
|
87
|
+
await sleep(_SLEEP_SECONDS)
|
|
88
|
+
|
|
89
|
+
async def _check_disk_usage_bytes(self) -> float:
|
|
90
|
+
if self._db.dialect is SupportedSQLDialect.SQLITE:
|
|
91
|
+
async with self._db() as session:
|
|
92
|
+
page_count = await session.scalar(text("PRAGMA page_count;"))
|
|
93
|
+
freelist_count = await session.scalar(text("PRAGMA freelist_count;"))
|
|
94
|
+
page_size = await session.scalar(text("PRAGMA page_size;"))
|
|
95
|
+
current_usage_bytes = (page_count - freelist_count) * page_size
|
|
96
|
+
elif self._db.dialect is SupportedSQLDialect.POSTGRESQL:
|
|
97
|
+
nspname = getenv(ENV_PHOENIX_SQL_DATABASE_SCHEMA) or "public"
|
|
98
|
+
stmt = text("""\
|
|
99
|
+
SELECT sum(pg_total_relation_size(c.oid))
|
|
100
|
+
FROM pg_class as c
|
|
101
|
+
INNER JOIN pg_namespace as n ON n.oid = c.relnamespace
|
|
102
|
+
WHERE c.relkind = 'r'
|
|
103
|
+
AND n.nspname = :nspname;
|
|
104
|
+
""").bindparams(nspname=nspname)
|
|
105
|
+
async with self._db() as session:
|
|
106
|
+
current_usage_bytes = await session.scalar(stmt)
|
|
107
|
+
else:
|
|
108
|
+
assert_never(self._db.dialect)
|
|
109
|
+
return float(current_usage_bytes)
|
|
110
|
+
|
|
111
|
+
async def _check_thresholds(self, current_usage_gibibytes: float) -> None:
|
|
112
|
+
allocated_capacity_gibibytes = get_env_database_allocated_storage_capacity_gibibytes()
|
|
113
|
+
if not allocated_capacity_gibibytes:
|
|
114
|
+
return
|
|
115
|
+
|
|
116
|
+
used_ratio = current_usage_gibibytes / allocated_capacity_gibibytes
|
|
117
|
+
DB_DISK_USAGE_RATIO.set(used_ratio)
|
|
118
|
+
used_percentage = used_ratio * 100
|
|
119
|
+
|
|
120
|
+
# Check insertion blocking threshold
|
|
121
|
+
if (
|
|
122
|
+
insertion_blocking_threshold_percentage
|
|
123
|
+
:= get_env_database_usage_insertion_blocking_threshold_percentage()
|
|
124
|
+
):
|
|
125
|
+
should_not_insert_or_update = used_percentage > insertion_blocking_threshold_percentage
|
|
126
|
+
self._db.should_not_insert_or_update = should_not_insert_or_update
|
|
127
|
+
DB_INSERTIONS_BLOCKED.set(int(should_not_insert_or_update))
|
|
128
|
+
|
|
129
|
+
# Check warning email threshold
|
|
130
|
+
if (
|
|
131
|
+
notification_threshold_percentage
|
|
132
|
+
:= get_env_database_usage_email_warning_threshold_percentage()
|
|
133
|
+
):
|
|
134
|
+
if used_percentage > notification_threshold_percentage:
|
|
135
|
+
await self._send_warning_emails(
|
|
136
|
+
used_percentage,
|
|
137
|
+
allocated_capacity_gibibytes,
|
|
138
|
+
notification_threshold_percentage,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
async def _send_warning_emails(
|
|
142
|
+
self,
|
|
143
|
+
used_percentage: float,
|
|
144
|
+
allocated_capacity_gibibytes: float,
|
|
145
|
+
notification_threshold_percentage: float,
|
|
146
|
+
) -> None:
|
|
147
|
+
if not self._email_sender:
|
|
148
|
+
return
|
|
149
|
+
|
|
150
|
+
current_usage_gibibytes = used_percentage / 100 * allocated_capacity_gibibytes
|
|
151
|
+
stmt = (
|
|
152
|
+
sa.select(models.User.email)
|
|
153
|
+
.join(models.UserRole)
|
|
154
|
+
.where(models.UserRole.name == "ADMIN")
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
try:
|
|
158
|
+
async with self._db() as session:
|
|
159
|
+
admin_emails = (await session.scalars(stmt)).all()
|
|
160
|
+
except Exception:
|
|
161
|
+
logger.exception(
|
|
162
|
+
"Failed to fetch admin emails from database, skipping database usage warning emails"
|
|
163
|
+
)
|
|
164
|
+
return
|
|
165
|
+
|
|
166
|
+
if not admin_emails:
|
|
167
|
+
return
|
|
168
|
+
|
|
169
|
+
# Validate email addresses
|
|
170
|
+
valid_emails: list[str] = []
|
|
171
|
+
|
|
172
|
+
for email in admin_emails:
|
|
173
|
+
try:
|
|
174
|
+
normalized_email = validate_email(email, check_deliverability=False).normalized
|
|
175
|
+
except EmailNotValidError:
|
|
176
|
+
pass
|
|
177
|
+
else:
|
|
178
|
+
valid_emails.append(normalized_email)
|
|
179
|
+
|
|
180
|
+
if not valid_emails:
|
|
181
|
+
return
|
|
182
|
+
|
|
183
|
+
self._last_email_sent = {
|
|
184
|
+
email: timestamp
|
|
185
|
+
for email, timestamp in self._last_email_sent.items()
|
|
186
|
+
if email in valid_emails
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
now = datetime.now(timezone.utc)
|
|
190
|
+
emails_sent = 0
|
|
191
|
+
send_attempts = 0
|
|
192
|
+
|
|
193
|
+
for email in valid_emails:
|
|
194
|
+
if email in self._last_email_sent and now - self._last_email_sent[email] < timedelta(
|
|
195
|
+
hours=_EMAIL_FREQUENCY_HOURS
|
|
196
|
+
):
|
|
197
|
+
continue
|
|
198
|
+
send_attempts += 1
|
|
199
|
+
try:
|
|
200
|
+
await self._email_sender.send_db_usage_warning_email(
|
|
201
|
+
email=email,
|
|
202
|
+
current_usage_gibibytes=current_usage_gibibytes,
|
|
203
|
+
allocated_storage_gibibytes=allocated_capacity_gibibytes,
|
|
204
|
+
notification_threshold_percentage=notification_threshold_percentage,
|
|
205
|
+
)
|
|
206
|
+
except Exception:
|
|
207
|
+
logger.exception(f"Failed to send database usage warning email to {email}")
|
|
208
|
+
# Count email send errors
|
|
209
|
+
DB_DISK_USAGE_WARNING_EMAIL_ERRORS.inc()
|
|
210
|
+
else:
|
|
211
|
+
self._last_email_sent[email] = now
|
|
212
|
+
emails_sent += 1
|
|
213
|
+
# Count successful warning email sends
|
|
214
|
+
DB_DISK_USAGE_WARNING_EMAILS_SENT.inc()
|