litellm-enterprise 0.1.20__py3-none-any.whl → 0.1.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. litellm_enterprise/enterprise_callbacks/.pytest_cache/.gitignore +2 -0
  2. litellm_enterprise/enterprise_callbacks/.pytest_cache/CACHEDIR.TAG +4 -0
  3. litellm_enterprise/enterprise_callbacks/.pytest_cache/README.md +8 -0
  4. litellm_enterprise/enterprise_callbacks/.pytest_cache/v/cache/nodeids +1 -0
  5. litellm_enterprise/enterprise_callbacks/.pytest_cache/v/cache/stepwise +1 -0
  6. litellm_enterprise/enterprise_callbacks/generic_api_callback.py +1 -1
  7. litellm_enterprise/enterprise_callbacks/llama_guard.py +2 -10
  8. litellm_enterprise/enterprise_callbacks/llm_guard.py +2 -9
  9. litellm_enterprise/enterprise_callbacks/pagerduty/pagerduty.py +9 -12
  10. litellm_enterprise/enterprise_callbacks/send_emails/base_email.py +61 -1
  11. litellm_enterprise/integrations/custom_guardrail.py +1 -2
  12. litellm_enterprise/proxy/common_utils/check_batch_cost.py +3 -4
  13. litellm_enterprise/proxy/hooks/managed_files.py +6 -24
  14. litellm_enterprise/proxy/management_endpoints/internal_user_endpoints.py +0 -1
  15. litellm_enterprise/proxy/management_endpoints/key_management_endpoints.py +12 -0
  16. litellm_enterprise/proxy/vector_stores/endpoints.py +49 -7
  17. litellm_enterprise/types/enterprise_callbacks/send_emails.py +14 -2
  18. {litellm_enterprise-0.1.20.dist-info → litellm_enterprise-0.1.21.dist-info}/METADATA +1 -1
  19. {litellm_enterprise-0.1.20.dist-info → litellm_enterprise-0.1.21.dist-info}/RECORD +21 -18
  20. litellm_enterprise/integrations/prometheus.py +0 -2361
  21. litellm_enterprise/proxy/guardrails/endpoints.py +0 -41
  22. {litellm_enterprise-0.1.20.dist-info → litellm_enterprise-0.1.21.dist-info}/LICENSE.md +0 -0
  23. {litellm_enterprise-0.1.20.dist-info → litellm_enterprise-0.1.21.dist-info}/WHEEL +0 -0
@@ -1,2361 +0,0 @@
1
- # used for /metrics endpoint on LiteLLM Proxy
2
- #### What this does ####
3
- # On success, log events to Prometheus
4
- import sys
5
- from datetime import datetime, timedelta
6
- from typing import (
7
- TYPE_CHECKING,
8
- Any,
9
- Awaitable,
10
- Callable,
11
- Dict,
12
- List,
13
- Literal,
14
- Optional,
15
- Tuple,
16
- cast,
17
- )
18
-
19
- import litellm
20
- from litellm._logging import print_verbose, verbose_logger
21
- from litellm.integrations.custom_logger import CustomLogger
22
- from litellm.proxy._types import LiteLLM_TeamTable, UserAPIKeyAuth
23
- from litellm.types.integrations.prometheus import *
24
- from litellm.types.utils import StandardLoggingPayload
25
- from litellm.utils import get_end_user_id_for_cost_tracking
26
-
27
- if TYPE_CHECKING:
28
- from apscheduler.schedulers.asyncio import AsyncIOScheduler
29
- else:
30
- AsyncIOScheduler = Any
31
-
32
-
33
- class PrometheusLogger(CustomLogger):
34
- # Class variables or attributes
35
- def __init__(
36
- self,
37
- **kwargs,
38
- ):
39
- try:
40
- from prometheus_client import Counter, Gauge, Histogram
41
-
42
- from litellm.proxy.proxy_server import CommonProxyErrors, premium_user
43
-
44
- # Always initialize label_filters, even for non-premium users
45
- self.label_filters = self._parse_prometheus_config()
46
-
47
- if premium_user is not True:
48
- verbose_logger.warning(
49
- f"🚨🚨🚨 Prometheus Metrics is on LiteLLM Enterprise\n🚨 {CommonProxyErrors.not_premium_user.value}"
50
- )
51
- self.litellm_not_a_premium_user_metric = Counter(
52
- name="litellm_not_a_premium_user_metric",
53
- documentation=f"🚨🚨🚨 Prometheus Metrics is on LiteLLM Enterprise. 🚨 {CommonProxyErrors.not_premium_user.value}",
54
- )
55
- return
56
-
57
- # Create metric factory functions
58
- self._counter_factory = self._create_metric_factory(Counter)
59
- self._gauge_factory = self._create_metric_factory(Gauge)
60
- self._histogram_factory = self._create_metric_factory(Histogram)
61
-
62
- self.litellm_proxy_failed_requests_metric = self._counter_factory(
63
- name="litellm_proxy_failed_requests_metric",
64
- documentation="Total number of failed responses from proxy - the client did not get a success response from litellm proxy",
65
- labelnames=self.get_labels_for_metric(
66
- "litellm_proxy_failed_requests_metric"
67
- ),
68
- )
69
-
70
- self.litellm_proxy_total_requests_metric = self._counter_factory(
71
- name="litellm_proxy_total_requests_metric",
72
- documentation="Total number of requests made to the proxy server - track number of client side requests",
73
- labelnames=self.get_labels_for_metric(
74
- "litellm_proxy_total_requests_metric"
75
- ),
76
- )
77
-
78
- # request latency metrics
79
- self.litellm_request_total_latency_metric = self._histogram_factory(
80
- "litellm_request_total_latency_metric",
81
- "Total latency (seconds) for a request to LiteLLM",
82
- labelnames=self.get_labels_for_metric(
83
- "litellm_request_total_latency_metric"
84
- ),
85
- buckets=LATENCY_BUCKETS,
86
- )
87
-
88
- self.litellm_llm_api_latency_metric = self._histogram_factory(
89
- "litellm_llm_api_latency_metric",
90
- "Total latency (seconds) for a models LLM API call",
91
- labelnames=self.get_labels_for_metric("litellm_llm_api_latency_metric"),
92
- buckets=LATENCY_BUCKETS,
93
- )
94
-
95
- self.litellm_llm_api_time_to_first_token_metric = self._histogram_factory(
96
- "litellm_llm_api_time_to_first_token_metric",
97
- "Time to first token for a models LLM API call",
98
- # labelnames=[
99
- # "model",
100
- # "hashed_api_key",
101
- # "api_key_alias",
102
- # "team",
103
- # "team_alias",
104
- # ],
105
- labelnames=self.get_labels_for_metric(
106
- "litellm_llm_api_time_to_first_token_metric"
107
- ),
108
- buckets=LATENCY_BUCKETS,
109
- )
110
-
111
- # Counter for spend
112
- self.litellm_spend_metric = self._counter_factory(
113
- "litellm_spend_metric",
114
- "Total spend on LLM requests",
115
- labelnames=self.get_labels_for_metric("litellm_spend_metric"),
116
- )
117
-
118
- # Counter for total_output_tokens
119
- self.litellm_tokens_metric = self._counter_factory(
120
- "litellm_total_tokens_metric",
121
- "Total number of input + output tokens from LLM requests",
122
- labelnames=self.get_labels_for_metric("litellm_total_tokens_metric"),
123
- )
124
-
125
- self.litellm_input_tokens_metric = self._counter_factory(
126
- "litellm_input_tokens_metric",
127
- "Total number of input tokens from LLM requests",
128
- labelnames=self.get_labels_for_metric("litellm_input_tokens_metric"),
129
- )
130
-
131
- self.litellm_output_tokens_metric = self._counter_factory(
132
- "litellm_output_tokens_metric",
133
- "Total number of output tokens from LLM requests",
134
- labelnames=self.get_labels_for_metric("litellm_output_tokens_metric"),
135
- )
136
-
137
- # Remaining Budget for Team
138
- self.litellm_remaining_team_budget_metric = self._gauge_factory(
139
- "litellm_remaining_team_budget_metric",
140
- "Remaining budget for team",
141
- labelnames=self.get_labels_for_metric(
142
- "litellm_remaining_team_budget_metric"
143
- ),
144
- )
145
-
146
- # Max Budget for Team
147
- self.litellm_team_max_budget_metric = self._gauge_factory(
148
- "litellm_team_max_budget_metric",
149
- "Maximum budget set for team",
150
- labelnames=self.get_labels_for_metric("litellm_team_max_budget_metric"),
151
- )
152
-
153
- # Team Budget Reset At
154
- self.litellm_team_budget_remaining_hours_metric = self._gauge_factory(
155
- "litellm_team_budget_remaining_hours_metric",
156
- "Remaining days for team budget to be reset",
157
- labelnames=self.get_labels_for_metric(
158
- "litellm_team_budget_remaining_hours_metric"
159
- ),
160
- )
161
-
162
- # Remaining Budget for API Key
163
- self.litellm_remaining_api_key_budget_metric = self._gauge_factory(
164
- "litellm_remaining_api_key_budget_metric",
165
- "Remaining budget for api key",
166
- labelnames=self.get_labels_for_metric(
167
- "litellm_remaining_api_key_budget_metric"
168
- ),
169
- )
170
-
171
- # Max Budget for API Key
172
- self.litellm_api_key_max_budget_metric = self._gauge_factory(
173
- "litellm_api_key_max_budget_metric",
174
- "Maximum budget set for api key",
175
- labelnames=self.get_labels_for_metric(
176
- "litellm_api_key_max_budget_metric"
177
- ),
178
- )
179
-
180
- self.litellm_api_key_budget_remaining_hours_metric = self._gauge_factory(
181
- "litellm_api_key_budget_remaining_hours_metric",
182
- "Remaining hours for api key budget to be reset",
183
- labelnames=self.get_labels_for_metric(
184
- "litellm_api_key_budget_remaining_hours_metric"
185
- ),
186
- )
187
-
188
- ########################################
189
- # LiteLLM Virtual API KEY metrics
190
- ########################################
191
- # Remaining MODEL RPM limit for API Key
192
- self.litellm_remaining_api_key_requests_for_model = self._gauge_factory(
193
- "litellm_remaining_api_key_requests_for_model",
194
- "Remaining Requests API Key can make for model (model based rpm limit on key)",
195
- labelnames=["hashed_api_key", "api_key_alias", "model"],
196
- )
197
-
198
- # Remaining MODEL TPM limit for API Key
199
- self.litellm_remaining_api_key_tokens_for_model = self._gauge_factory(
200
- "litellm_remaining_api_key_tokens_for_model",
201
- "Remaining Tokens API Key can make for model (model based tpm limit on key)",
202
- labelnames=["hashed_api_key", "api_key_alias", "model"],
203
- )
204
-
205
- ########################################
206
- # LLM API Deployment Metrics / analytics
207
- ########################################
208
-
209
- # Remaining Rate Limit for model
210
- self.litellm_remaining_requests_metric = self._gauge_factory(
211
- "litellm_remaining_requests",
212
- "LLM Deployment Analytics - remaining requests for model, returned from LLM API Provider",
213
- labelnames=self.get_labels_for_metric(
214
- "litellm_remaining_requests_metric"
215
- ),
216
- )
217
-
218
- self.litellm_remaining_tokens_metric = self._gauge_factory(
219
- "litellm_remaining_tokens",
220
- "remaining tokens for model, returned from LLM API Provider",
221
- labelnames=self.get_labels_for_metric(
222
- "litellm_remaining_tokens_metric"
223
- ),
224
- )
225
-
226
- self.litellm_overhead_latency_metric = self._histogram_factory(
227
- "litellm_overhead_latency_metric",
228
- "Latency overhead (milliseconds) added by LiteLLM processing",
229
- labelnames=self.get_labels_for_metric(
230
- "litellm_overhead_latency_metric"
231
- ),
232
- buckets=LATENCY_BUCKETS,
233
- )
234
- # llm api provider budget metrics
235
- self.litellm_provider_remaining_budget_metric = self._gauge_factory(
236
- "litellm_provider_remaining_budget_metric",
237
- "Remaining budget for provider - used when you set provider budget limits",
238
- labelnames=["api_provider"],
239
- )
240
-
241
- # Metric for deployment state
242
- self.litellm_deployment_state = self._gauge_factory(
243
- "litellm_deployment_state",
244
- "LLM Deployment Analytics - The state of the deployment: 0 = healthy, 1 = partial outage, 2 = complete outage",
245
- labelnames=self.get_labels_for_metric("litellm_deployment_state"),
246
- )
247
-
248
- self.litellm_deployment_cooled_down = self._counter_factory(
249
- "litellm_deployment_cooled_down",
250
- "LLM Deployment Analytics - Number of times a deployment has been cooled down by LiteLLM load balancing logic. exception_status is the status of the exception that caused the deployment to be cooled down",
251
- # labelnames=_logged_llm_labels + [EXCEPTION_STATUS],
252
- labelnames=self.get_labels_for_metric("litellm_deployment_cooled_down"),
253
- )
254
-
255
- self.litellm_deployment_success_responses = self._counter_factory(
256
- name="litellm_deployment_success_responses",
257
- documentation="LLM Deployment Analytics - Total number of successful LLM API calls via litellm",
258
- labelnames=self.get_labels_for_metric(
259
- "litellm_deployment_success_responses"
260
- ),
261
- )
262
- self.litellm_deployment_failure_responses = self._counter_factory(
263
- name="litellm_deployment_failure_responses",
264
- documentation="LLM Deployment Analytics - Total number of failed LLM API calls for a specific LLM deploymeny. exception_status is the status of the exception from the llm api",
265
- labelnames=self.get_labels_for_metric(
266
- "litellm_deployment_failure_responses"
267
- ),
268
- )
269
-
270
- self.litellm_deployment_total_requests = self._counter_factory(
271
- name="litellm_deployment_total_requests",
272
- documentation="LLM Deployment Analytics - Total number of LLM API calls via litellm - success + failure",
273
- labelnames=self.get_labels_for_metric(
274
- "litellm_deployment_total_requests"
275
- ),
276
- )
277
-
278
- # Deployment Latency tracking
279
- self.litellm_deployment_latency_per_output_token = self._histogram_factory(
280
- name="litellm_deployment_latency_per_output_token",
281
- documentation="LLM Deployment Analytics - Latency per output token",
282
- labelnames=self.get_labels_for_metric(
283
- "litellm_deployment_latency_per_output_token"
284
- ),
285
- )
286
-
287
- self.litellm_deployment_successful_fallbacks = self._counter_factory(
288
- "litellm_deployment_successful_fallbacks",
289
- "LLM Deployment Analytics - Number of successful fallback requests from primary model -> fallback model",
290
- self.get_labels_for_metric("litellm_deployment_successful_fallbacks"),
291
- )
292
-
293
- self.litellm_deployment_failed_fallbacks = self._counter_factory(
294
- "litellm_deployment_failed_fallbacks",
295
- "LLM Deployment Analytics - Number of failed fallback requests from primary model -> fallback model",
296
- self.get_labels_for_metric("litellm_deployment_failed_fallbacks"),
297
- )
298
-
299
- self.litellm_llm_api_failed_requests_metric = self._counter_factory(
300
- name="litellm_llm_api_failed_requests_metric",
301
- documentation="deprecated - use litellm_proxy_failed_requests_metric",
302
- labelnames=[
303
- "end_user",
304
- "hashed_api_key",
305
- "api_key_alias",
306
- "model",
307
- "team",
308
- "team_alias",
309
- "user",
310
- ],
311
- )
312
-
313
- self.litellm_requests_metric = self._counter_factory(
314
- name="litellm_requests_metric",
315
- documentation="deprecated - use litellm_proxy_total_requests_metric. Total number of LLM calls to litellm - track total per API Key, team, user",
316
- labelnames=self.get_labels_for_metric("litellm_requests_metric"),
317
- )
318
-
319
- except Exception as e:
320
- print_verbose(f"Got exception on init prometheus client {str(e)}")
321
- raise e
322
-
323
- def _parse_prometheus_config(self) -> Dict[str, List[str]]:
324
- """Parse prometheus metrics configuration for label filtering and enabled metrics"""
325
- import litellm
326
- from litellm.types.integrations.prometheus import PrometheusMetricsConfig
327
-
328
- config = litellm.prometheus_metrics_config
329
-
330
- # If no config is provided, return empty dict (no filtering)
331
- if not config:
332
- return {}
333
-
334
- verbose_logger.debug(f"prometheus config: {config}")
335
-
336
- # Parse and validate all configuration groups
337
- parsed_configs = []
338
- self.enabled_metrics = set()
339
-
340
- for group_config in config:
341
- # Validate configuration using Pydantic
342
- if isinstance(group_config, dict):
343
- parsed_config = PrometheusMetricsConfig(**group_config)
344
- else:
345
- parsed_config = group_config
346
-
347
- parsed_configs.append(parsed_config)
348
- self.enabled_metrics.update(parsed_config.metrics)
349
-
350
- # Validate all configurations
351
- validation_results = self._validate_all_configurations(parsed_configs)
352
-
353
- if validation_results.has_errors:
354
- self._pretty_print_validation_errors(validation_results)
355
- error_message = "Configuration validation failed:\n" + "\n".join(
356
- validation_results.all_error_messages
357
- )
358
- raise ValueError(error_message)
359
-
360
- # Build label filters from valid configurations
361
- label_filters = self._build_label_filters(parsed_configs)
362
-
363
- # Pretty print the processed configuration
364
- self._pretty_print_prometheus_config(label_filters)
365
- return label_filters
366
-
367
- def _validate_all_configurations(self, parsed_configs: List) -> ValidationResults:
368
- """Validate all metric configurations and return collected errors"""
369
- metric_errors = []
370
- label_errors = []
371
-
372
- for config in parsed_configs:
373
- for metric_name in config.metrics:
374
- # Validate metric name
375
- metric_error = self._validate_single_metric_name(metric_name)
376
- if metric_error:
377
- metric_errors.append(metric_error)
378
- continue # Skip label validation if metric name is invalid
379
-
380
- # Validate labels if provided
381
- if config.include_labels:
382
- label_error = self._validate_single_metric_labels(
383
- metric_name, config.include_labels
384
- )
385
- if label_error:
386
- label_errors.append(label_error)
387
-
388
- return ValidationResults(metric_errors=metric_errors, label_errors=label_errors)
389
-
390
- def _validate_single_metric_name(
391
- self, metric_name: str
392
- ) -> Optional[MetricValidationError]:
393
- """Validate a single metric name"""
394
- from typing import get_args
395
-
396
- if metric_name not in set(get_args(DEFINED_PROMETHEUS_METRICS)):
397
- return MetricValidationError(
398
- metric_name=metric_name,
399
- valid_metrics=get_args(DEFINED_PROMETHEUS_METRICS),
400
- )
401
- return None
402
-
403
- def _validate_single_metric_labels(
404
- self, metric_name: str, labels: List[str]
405
- ) -> Optional[LabelValidationError]:
406
- """Validate labels for a single metric"""
407
- from typing import cast
408
-
409
- # Get valid labels for this metric from PrometheusMetricLabels
410
- valid_labels = PrometheusMetricLabels.get_labels(
411
- cast(DEFINED_PROMETHEUS_METRICS, metric_name)
412
- )
413
-
414
- # Find invalid labels
415
- invalid_labels = [label for label in labels if label not in valid_labels]
416
-
417
- if invalid_labels:
418
- return LabelValidationError(
419
- metric_name=metric_name,
420
- invalid_labels=invalid_labels,
421
- valid_labels=valid_labels,
422
- )
423
- return None
424
-
425
- def _build_label_filters(self, parsed_configs: List) -> Dict[str, List[str]]:
426
- """Build label filters from validated configurations"""
427
- label_filters = {}
428
-
429
- for config in parsed_configs:
430
- for metric_name in config.metrics:
431
- if config.include_labels:
432
- # Only add if metric name is valid (validation already passed)
433
- if self._validate_single_metric_name(metric_name) is None:
434
- label_filters[metric_name] = config.include_labels
435
-
436
- return label_filters
437
-
438
- def _validate_configured_metric_labels(self, metric_name: str, labels: List[str]):
439
- """
440
- Ensure that all the configured labels are valid for the metric
441
-
442
- Raises ValueError if the metric labels are invalid and pretty prints the error
443
- """
444
- label_error = self._validate_single_metric_labels(metric_name, labels)
445
- if label_error:
446
- self._pretty_print_invalid_labels_error(
447
- metric_name=label_error.metric_name,
448
- invalid_labels=label_error.invalid_labels,
449
- valid_labels=label_error.valid_labels,
450
- )
451
- raise ValueError(label_error.message)
452
-
453
- return True
454
-
455
- #########################################################
456
- # Pretty print functions
457
- #########################################################
458
-
459
- def _pretty_print_validation_errors(
460
- self, validation_results: ValidationResults
461
- ) -> None:
462
- """Pretty print all validation errors using rich"""
463
- try:
464
- from rich.console import Console
465
- from rich.panel import Panel
466
- from rich.table import Table
467
- from rich.text import Text
468
-
469
- console = Console()
470
-
471
- # Create error panel title
472
- title = Text("🚨🚨 Configuration Validation Errors", style="bold red")
473
-
474
- # Print main error panel
475
- console.print("\n")
476
- console.print(Panel(title, border_style="red"))
477
-
478
- # Show invalid metric names if any
479
- if validation_results.metric_errors:
480
- invalid_metrics = [
481
- e.metric_name for e in validation_results.metric_errors
482
- ]
483
- valid_metrics = validation_results.metric_errors[
484
- 0
485
- ].valid_metrics # All should have same valid metrics
486
-
487
- metrics_error_text = Text(
488
- f"Invalid Metric Names: {', '.join(invalid_metrics)}",
489
- style="bold red",
490
- )
491
- console.print(Panel(metrics_error_text, border_style="red"))
492
-
493
- metrics_table = Table(
494
- title="📊 Valid Metric Names",
495
- show_header=True,
496
- header_style="bold green",
497
- title_justify="left",
498
- border_style="green",
499
- )
500
- metrics_table.add_column(
501
- "Available Metrics", style="cyan", no_wrap=True
502
- )
503
-
504
- for metric in sorted(valid_metrics):
505
- metrics_table.add_row(metric)
506
-
507
- console.print(metrics_table)
508
-
509
- # Show invalid labels if any
510
- if validation_results.label_errors:
511
- for error in validation_results.label_errors:
512
- labels_error_text = Text(
513
- f"Invalid Labels for '{error.metric_name}': {', '.join(error.invalid_labels)}",
514
- style="bold red",
515
- )
516
- console.print(Panel(labels_error_text, border_style="red"))
517
-
518
- labels_table = Table(
519
- title=f"🏷️ Valid Labels for '{error.metric_name}'",
520
- show_header=True,
521
- header_style="bold green",
522
- title_justify="left",
523
- border_style="green",
524
- )
525
- labels_table.add_column("Valid Labels", style="cyan", no_wrap=True)
526
-
527
- for label in sorted(error.valid_labels):
528
- labels_table.add_row(label)
529
-
530
- console.print(labels_table)
531
-
532
- console.print("\n")
533
-
534
- except ImportError:
535
- # Fallback to simple logging if rich is not available
536
- for metric_error in validation_results.metric_errors:
537
- verbose_logger.error(metric_error.message)
538
- for label_error in validation_results.label_errors:
539
- verbose_logger.error(label_error.message)
540
-
541
- def _pretty_print_invalid_labels_error(
542
- self, metric_name: str, invalid_labels: List[str], valid_labels: List[str]
543
- ) -> None:
544
- """Pretty print error message for invalid labels using rich"""
545
- try:
546
- from rich.console import Console
547
- from rich.panel import Panel
548
- from rich.table import Table
549
- from rich.text import Text
550
-
551
- console = Console()
552
-
553
- # Create error panel title
554
- title = Text(
555
- f"🚨🚨 Invalid Labels for Metric: '{metric_name}'\nInvalid labels: {', '.join(invalid_labels)}\nPlease specify only valid labels below",
556
- style="bold red",
557
- )
558
-
559
- # Create valid labels table
560
- labels_table = Table(
561
- title="🏷️ Valid Labels for this Metric",
562
- show_header=True,
563
- header_style="bold green",
564
- title_justify="left",
565
- border_style="green",
566
- )
567
- labels_table.add_column("Valid Labels", style="cyan", no_wrap=True)
568
-
569
- for label in sorted(valid_labels):
570
- labels_table.add_row(label)
571
-
572
- # Print everything in a nice panel
573
- console.print("\n")
574
- console.print(Panel(title, border_style="red"))
575
- console.print(labels_table)
576
- console.print("\n")
577
-
578
- except ImportError:
579
- # Fallback to simple logging if rich is not available
580
- verbose_logger.error(
581
- f"Invalid labels for metric '{metric_name}': {invalid_labels}. Valid labels: {sorted(valid_labels)}"
582
- )
583
-
584
- def _pretty_print_invalid_metric_error(
585
- self, invalid_metric_name: str, valid_metrics: tuple
586
- ) -> None:
587
- """Pretty print error message for invalid metric name using rich"""
588
- try:
589
- from rich.console import Console
590
- from rich.panel import Panel
591
- from rich.table import Table
592
- from rich.text import Text
593
-
594
- console = Console()
595
-
596
- # Create error panel title
597
- title = Text(
598
- f"🚨🚨 Invalid Metric Name: '{invalid_metric_name}'\nPlease specify one of the allowed metrics below",
599
- style="bold red",
600
- )
601
-
602
- # Create valid metrics table
603
- metrics_table = Table(
604
- title="📊 Valid Metric Names",
605
- show_header=True,
606
- header_style="bold green",
607
- title_justify="left",
608
- border_style="green",
609
- )
610
- metrics_table.add_column("Available Metrics", style="cyan", no_wrap=True)
611
-
612
- for metric in sorted(valid_metrics):
613
- metrics_table.add_row(metric)
614
-
615
- # Print everything in a nice panel
616
- console.print("\n")
617
- console.print(Panel(title, border_style="red"))
618
- console.print(metrics_table)
619
- console.print("\n")
620
-
621
- except ImportError:
622
- # Fallback to simple logging if rich is not available
623
- verbose_logger.error(
624
- f"Invalid metric name: {invalid_metric_name}. Valid metrics: {sorted(valid_metrics)}"
625
- )
626
-
627
- #########################################################
628
- # End of pretty print functions
629
- #########################################################
630
-
631
- def _valid_metric_name(self, metric_name: str):
632
- """
633
- Raises ValueError if the metric name is invalid and pretty prints the error
634
- """
635
- error = self._validate_single_metric_name(metric_name)
636
- if error:
637
- self._pretty_print_invalid_metric_error(
638
- invalid_metric_name=error.metric_name, valid_metrics=error.valid_metrics
639
- )
640
- raise ValueError(error.message)
641
-
642
- def _pretty_print_prometheus_config(
643
- self, label_filters: Dict[str, List[str]]
644
- ) -> None:
645
- """Pretty print the processed prometheus configuration using rich"""
646
- try:
647
- from rich.console import Console
648
- from rich.panel import Panel
649
- from rich.table import Table
650
- from rich.text import Text
651
-
652
- console = Console()
653
-
654
- # Create main panel title
655
- title = Text("Prometheus Configuration Processed", style="bold blue")
656
-
657
- # Create enabled metrics table
658
- metrics_table = Table(
659
- title="📊 Enabled Metrics",
660
- show_header=True,
661
- header_style="bold magenta",
662
- title_justify="left",
663
- )
664
- metrics_table.add_column("Metric Name", style="cyan", no_wrap=True)
665
-
666
- if hasattr(self, "enabled_metrics") and self.enabled_metrics:
667
- for metric in sorted(self.enabled_metrics):
668
- metrics_table.add_row(metric)
669
- else:
670
- metrics_table.add_row(
671
- "[yellow]All metrics enabled (no filter applied)[/yellow]"
672
- )
673
-
674
- # Create label filters table
675
- labels_table = Table(
676
- title="🏷️ Label Filters",
677
- show_header=True,
678
- header_style="bold green",
679
- title_justify="left",
680
- )
681
- labels_table.add_column("Metric Name", style="cyan", no_wrap=True)
682
- labels_table.add_column("Allowed Labels", style="yellow")
683
-
684
- if label_filters:
685
- for metric_name, labels in sorted(label_filters.items()):
686
- labels_str = (
687
- ", ".join(labels)
688
- if labels
689
- else "[dim]No labels specified[/dim]"
690
- )
691
- labels_table.add_row(metric_name, labels_str)
692
- else:
693
- labels_table.add_row(
694
- "[yellow]No label filtering applied[/yellow]",
695
- "[dim]All default labels will be used[/dim]",
696
- )
697
-
698
- # Print everything in a nice panel
699
- console.print("\n")
700
- console.print(Panel(title, border_style="blue"))
701
- console.print(metrics_table)
702
- console.print(labels_table)
703
- console.print("\n")
704
-
705
- except ImportError:
706
- # Fallback to simple logging if rich is not available
707
- verbose_logger.info(
708
- f"Enabled metrics: {sorted(self.enabled_metrics) if hasattr(self, 'enabled_metrics') else 'All metrics'}"
709
- )
710
- verbose_logger.info(f"Label filters: {label_filters}")
711
-
712
- def _is_metric_enabled(self, metric_name: str) -> bool:
713
- """Check if a metric is enabled based on configuration"""
714
- # If no specific configuration is provided, enable all metrics (default behavior)
715
- if not hasattr(self, "enabled_metrics"):
716
- return True
717
-
718
- # If enabled_metrics is empty, enable all metrics
719
- if not self.enabled_metrics:
720
- return True
721
-
722
- return metric_name in self.enabled_metrics
723
-
724
- def _create_metric_factory(self, metric_class):
725
- """Create a factory function that returns either a real metric or a no-op metric"""
726
-
727
- def factory(*args, **kwargs):
728
- # Extract metric name from the first argument or 'name' keyword argument
729
- metric_name = args[0] if args else kwargs.get("name", "")
730
-
731
- if self._is_metric_enabled(metric_name):
732
- return metric_class(*args, **kwargs)
733
- else:
734
- return NoOpMetric()
735
-
736
- return factory
737
-
738
- def get_labels_for_metric(
739
- self, metric_name: DEFINED_PROMETHEUS_METRICS
740
- ) -> List[str]:
741
- """
742
- Get the labels for a metric, filtered if configured
743
- """
744
- # Get default labels for this metric from PrometheusMetricLabels
745
- default_labels = PrometheusMetricLabels.get_labels(metric_name)
746
-
747
- # If no label filtering is configured for this metric, use default labels
748
- if metric_name not in self.label_filters:
749
- return default_labels
750
-
751
- # Get configured labels for this metric
752
- configured_labels = self.label_filters[metric_name]
753
-
754
- # Return intersection of configured and default labels to ensure we only use valid labels
755
- filtered_labels = [
756
- label for label in default_labels if label in configured_labels
757
- ]
758
-
759
- return filtered_labels
760
-
761
- async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
762
- # Define prometheus client
763
- from litellm.types.utils import StandardLoggingPayload
764
-
765
- verbose_logger.debug(
766
- f"prometheus Logging - Enters success logging function for kwargs {kwargs}"
767
- )
768
-
769
- # unpack kwargs
770
- standard_logging_payload: Optional[StandardLoggingPayload] = kwargs.get(
771
- "standard_logging_object"
772
- )
773
-
774
- if standard_logging_payload is None or not isinstance(
775
- standard_logging_payload, dict
776
- ):
777
- raise ValueError(
778
- f"standard_logging_object is required, got={standard_logging_payload}"
779
- )
780
-
781
- model = kwargs.get("model", "")
782
- litellm_params = kwargs.get("litellm_params", {}) or {}
783
- _metadata = litellm_params.get("metadata", {})
784
- end_user_id = get_end_user_id_for_cost_tracking(
785
- litellm_params, service_type="prometheus"
786
- )
787
- user_id = standard_logging_payload["metadata"]["user_api_key_user_id"]
788
- user_api_key = standard_logging_payload["metadata"]["user_api_key_hash"]
789
- user_api_key_alias = standard_logging_payload["metadata"]["user_api_key_alias"]
790
- user_api_team = standard_logging_payload["metadata"]["user_api_key_team_id"]
791
- user_api_team_alias = standard_logging_payload["metadata"][
792
- "user_api_key_team_alias"
793
- ]
794
- output_tokens = standard_logging_payload["completion_tokens"]
795
- tokens_used = standard_logging_payload["total_tokens"]
796
- response_cost = standard_logging_payload["response_cost"]
797
- _requester_metadata = standard_logging_payload["metadata"].get(
798
- "requester_metadata"
799
- )
800
- if standard_logging_payload is not None and isinstance(
801
- standard_logging_payload, dict
802
- ):
803
- _tags = standard_logging_payload["request_tags"]
804
- else:
805
- _tags = []
806
-
807
- print_verbose(
808
- f"inside track_prometheus_metrics, model {model}, response_cost {response_cost}, tokens_used {tokens_used}, end_user_id {end_user_id}, user_api_key {user_api_key}"
809
- )
810
-
811
- enum_values = UserAPIKeyLabelValues(
812
- end_user=end_user_id,
813
- hashed_api_key=user_api_key,
814
- api_key_alias=user_api_key_alias,
815
- requested_model=standard_logging_payload["model_group"],
816
- model_group=standard_logging_payload["model_group"],
817
- team=user_api_team,
818
- team_alias=user_api_team_alias,
819
- user=user_id,
820
- user_email=standard_logging_payload["metadata"]["user_api_key_user_email"],
821
- status_code="200",
822
- model=model,
823
- litellm_model_name=model,
824
- tags=_tags,
825
- model_id=standard_logging_payload["model_id"],
826
- api_base=standard_logging_payload["api_base"],
827
- api_provider=standard_logging_payload["custom_llm_provider"],
828
- exception_status=None,
829
- exception_class=None,
830
- custom_metadata_labels=get_custom_labels_from_metadata(
831
- metadata=standard_logging_payload["metadata"].get("requester_metadata")
832
- or {}
833
- ),
834
- route=standard_logging_payload["metadata"].get(
835
- "user_api_key_request_route"
836
- ),
837
- )
838
-
839
- if (
840
- user_api_key is not None
841
- and isinstance(user_api_key, str)
842
- and user_api_key.startswith("sk-")
843
- ):
844
- from litellm.proxy.utils import hash_token
845
-
846
- user_api_key = hash_token(user_api_key)
847
-
848
- # increment total LLM requests and spend metric
849
- self._increment_top_level_request_and_spend_metrics(
850
- end_user_id=end_user_id,
851
- user_api_key=user_api_key,
852
- user_api_key_alias=user_api_key_alias,
853
- model=model,
854
- user_api_team=user_api_team,
855
- user_api_team_alias=user_api_team_alias,
856
- user_id=user_id,
857
- response_cost=response_cost,
858
- enum_values=enum_values,
859
- )
860
-
861
- # input, output, total token metrics
862
- self._increment_token_metrics(
863
- # why type ignore below?
864
- # 1. We just checked if isinstance(standard_logging_payload, dict). Pyright complains.
865
- # 2. Pyright does not allow us to run isinstance(standard_logging_payload, StandardLoggingPayload) <- this would be ideal
866
- standard_logging_payload=standard_logging_payload, # type: ignore
867
- end_user_id=end_user_id,
868
- user_api_key=user_api_key,
869
- user_api_key_alias=user_api_key_alias,
870
- model=model,
871
- user_api_team=user_api_team,
872
- user_api_team_alias=user_api_team_alias,
873
- user_id=user_id,
874
- enum_values=enum_values,
875
- )
876
-
877
- # remaining budget metrics
878
- await self._increment_remaining_budget_metrics(
879
- user_api_team=user_api_team,
880
- user_api_team_alias=user_api_team_alias,
881
- user_api_key=user_api_key,
882
- user_api_key_alias=user_api_key_alias,
883
- litellm_params=litellm_params,
884
- response_cost=response_cost,
885
- )
886
-
887
- # set proxy virtual key rpm/tpm metrics
888
- self._set_virtual_key_rate_limit_metrics(
889
- user_api_key=user_api_key,
890
- user_api_key_alias=user_api_key_alias,
891
- kwargs=kwargs,
892
- metadata=_metadata,
893
- )
894
-
895
- # set latency metrics
896
- self._set_latency_metrics(
897
- kwargs=kwargs,
898
- model=model,
899
- user_api_key=user_api_key,
900
- user_api_key_alias=user_api_key_alias,
901
- user_api_team=user_api_team,
902
- user_api_team_alias=user_api_team_alias,
903
- # why type ignore below?
904
- # 1. We just checked if isinstance(standard_logging_payload, dict). Pyright complains.
905
- # 2. Pyright does not allow us to run isinstance(standard_logging_payload, StandardLoggingPayload) <- this would be ideal
906
- enum_values=enum_values,
907
- )
908
-
909
- # set x-ratelimit headers
910
- self.set_llm_deployment_success_metrics(
911
- kwargs, start_time, end_time, enum_values, output_tokens
912
- )
913
-
914
- if (
915
- standard_logging_payload["stream"] is True
916
- ): # log successful streaming requests from logging event hook.
917
- _labels = prometheus_label_factory(
918
- supported_enum_labels=self.get_labels_for_metric(
919
- metric_name="litellm_proxy_total_requests_metric"
920
- ),
921
- enum_values=enum_values,
922
- )
923
- self.litellm_proxy_total_requests_metric.labels(**_labels).inc()
924
-
925
- def _increment_token_metrics(
926
- self,
927
- standard_logging_payload: StandardLoggingPayload,
928
- end_user_id: Optional[str],
929
- user_api_key: Optional[str],
930
- user_api_key_alias: Optional[str],
931
- model: Optional[str],
932
- user_api_team: Optional[str],
933
- user_api_team_alias: Optional[str],
934
- user_id: Optional[str],
935
- enum_values: UserAPIKeyLabelValues,
936
- ):
937
- verbose_logger.debug("prometheus Logging - Enters token metrics function")
938
- # token metrics
939
-
940
- if standard_logging_payload is not None and isinstance(
941
- standard_logging_payload, dict
942
- ):
943
- _tags = standard_logging_payload["request_tags"]
944
-
945
- _labels = prometheus_label_factory(
946
- supported_enum_labels=self.get_labels_for_metric(
947
- metric_name="litellm_proxy_total_requests_metric"
948
- ),
949
- enum_values=enum_values,
950
- )
951
-
952
- _labels = prometheus_label_factory(
953
- supported_enum_labels=self.get_labels_for_metric(
954
- metric_name="litellm_total_tokens_metric"
955
- ),
956
- enum_values=enum_values,
957
- )
958
- self.litellm_tokens_metric.labels(**_labels).inc(
959
- standard_logging_payload["total_tokens"]
960
- )
961
-
962
- _labels = prometheus_label_factory(
963
- supported_enum_labels=self.get_labels_for_metric(
964
- metric_name="litellm_input_tokens_metric"
965
- ),
966
- enum_values=enum_values,
967
- )
968
- self.litellm_input_tokens_metric.labels(**_labels).inc(
969
- standard_logging_payload["prompt_tokens"]
970
- )
971
-
972
- _labels = prometheus_label_factory(
973
- supported_enum_labels=self.get_labels_for_metric(
974
- metric_name="litellm_output_tokens_metric"
975
- ),
976
- enum_values=enum_values,
977
- )
978
-
979
- self.litellm_output_tokens_metric.labels(**_labels).inc(
980
- standard_logging_payload["completion_tokens"]
981
- )
982
-
983
- async def _increment_remaining_budget_metrics(
984
- self,
985
- user_api_team: Optional[str],
986
- user_api_team_alias: Optional[str],
987
- user_api_key: Optional[str],
988
- user_api_key_alias: Optional[str],
989
- litellm_params: dict,
990
- response_cost: float,
991
- ):
992
- _team_spend = litellm_params.get("metadata", {}).get(
993
- "user_api_key_team_spend", None
994
- )
995
- _team_max_budget = litellm_params.get("metadata", {}).get(
996
- "user_api_key_team_max_budget", None
997
- )
998
-
999
- _api_key_spend = litellm_params.get("metadata", {}).get(
1000
- "user_api_key_spend", None
1001
- )
1002
- _api_key_max_budget = litellm_params.get("metadata", {}).get(
1003
- "user_api_key_max_budget", None
1004
- )
1005
- await self._set_api_key_budget_metrics_after_api_request(
1006
- user_api_key=user_api_key,
1007
- user_api_key_alias=user_api_key_alias,
1008
- response_cost=response_cost,
1009
- key_max_budget=_api_key_max_budget,
1010
- key_spend=_api_key_spend,
1011
- )
1012
-
1013
- await self._set_team_budget_metrics_after_api_request(
1014
- user_api_team=user_api_team,
1015
- user_api_team_alias=user_api_team_alias,
1016
- team_spend=_team_spend,
1017
- team_max_budget=_team_max_budget,
1018
- response_cost=response_cost,
1019
- )
1020
-
1021
- def _increment_top_level_request_and_spend_metrics(
1022
- self,
1023
- end_user_id: Optional[str],
1024
- user_api_key: Optional[str],
1025
- user_api_key_alias: Optional[str],
1026
- model: Optional[str],
1027
- user_api_team: Optional[str],
1028
- user_api_team_alias: Optional[str],
1029
- user_id: Optional[str],
1030
- response_cost: float,
1031
- enum_values: UserAPIKeyLabelValues,
1032
- ):
1033
- _labels = prometheus_label_factory(
1034
- supported_enum_labels=self.get_labels_for_metric(
1035
- metric_name="litellm_requests_metric"
1036
- ),
1037
- enum_values=enum_values,
1038
- )
1039
-
1040
- self.litellm_requests_metric.labels(**_labels).inc()
1041
-
1042
- _labels = prometheus_label_factory(
1043
- supported_enum_labels=self.get_labels_for_metric(
1044
- metric_name="litellm_spend_metric"
1045
- ),
1046
- enum_values=enum_values,
1047
- )
1048
-
1049
- self.litellm_spend_metric.labels(**_labels).inc(response_cost)
1050
-
1051
- def _set_virtual_key_rate_limit_metrics(
1052
- self,
1053
- user_api_key: Optional[str],
1054
- user_api_key_alias: Optional[str],
1055
- kwargs: dict,
1056
- metadata: dict,
1057
- ):
1058
- from litellm.proxy.common_utils.callback_utils import (
1059
- get_model_group_from_litellm_kwargs,
1060
- )
1061
-
1062
- # Set remaining rpm/tpm for API Key + model
1063
- # see parallel_request_limiter.py - variables are set there
1064
- model_group = get_model_group_from_litellm_kwargs(kwargs)
1065
- remaining_requests_variable_name = (
1066
- f"litellm-key-remaining-requests-{model_group}"
1067
- )
1068
- remaining_tokens_variable_name = f"litellm-key-remaining-tokens-{model_group}"
1069
-
1070
- remaining_requests = (
1071
- metadata.get(remaining_requests_variable_name, sys.maxsize) or sys.maxsize
1072
- )
1073
- remaining_tokens = (
1074
- metadata.get(remaining_tokens_variable_name, sys.maxsize) or sys.maxsize
1075
- )
1076
-
1077
- self.litellm_remaining_api_key_requests_for_model.labels(
1078
- user_api_key, user_api_key_alias, model_group
1079
- ).set(remaining_requests)
1080
-
1081
- self.litellm_remaining_api_key_tokens_for_model.labels(
1082
- user_api_key, user_api_key_alias, model_group
1083
- ).set(remaining_tokens)
1084
-
1085
- def _set_latency_metrics(
1086
- self,
1087
- kwargs: dict,
1088
- model: Optional[str],
1089
- user_api_key: Optional[str],
1090
- user_api_key_alias: Optional[str],
1091
- user_api_team: Optional[str],
1092
- user_api_team_alias: Optional[str],
1093
- enum_values: UserAPIKeyLabelValues,
1094
- ):
1095
- # latency metrics
1096
- end_time: datetime = kwargs.get("end_time") or datetime.now()
1097
- start_time: Optional[datetime] = kwargs.get("start_time")
1098
- api_call_start_time = kwargs.get("api_call_start_time", None)
1099
- completion_start_time = kwargs.get("completion_start_time", None)
1100
- time_to_first_token_seconds = self._safe_duration_seconds(
1101
- start_time=api_call_start_time,
1102
- end_time=completion_start_time,
1103
- )
1104
- if (
1105
- time_to_first_token_seconds is not None
1106
- and kwargs.get("stream", False) is True # only emit for streaming requests
1107
- ):
1108
- self.litellm_llm_api_time_to_first_token_metric.labels(
1109
- model,
1110
- user_api_key,
1111
- user_api_key_alias,
1112
- user_api_team,
1113
- user_api_team_alias,
1114
- ).observe(time_to_first_token_seconds)
1115
- else:
1116
- verbose_logger.debug(
1117
- "Time to first token metric not emitted, stream option in model_parameters is not True"
1118
- )
1119
-
1120
- api_call_total_time_seconds = self._safe_duration_seconds(
1121
- start_time=api_call_start_time,
1122
- end_time=end_time,
1123
- )
1124
- if api_call_total_time_seconds is not None:
1125
- _labels = prometheus_label_factory(
1126
- supported_enum_labels=self.get_labels_for_metric(
1127
- metric_name="litellm_llm_api_latency_metric"
1128
- ),
1129
- enum_values=enum_values,
1130
- )
1131
- self.litellm_llm_api_latency_metric.labels(**_labels).observe(
1132
- api_call_total_time_seconds
1133
- )
1134
-
1135
- # total request latency
1136
- total_time_seconds = self._safe_duration_seconds(
1137
- start_time=start_time,
1138
- end_time=end_time,
1139
- )
1140
- if total_time_seconds is not None:
1141
- _labels = prometheus_label_factory(
1142
- supported_enum_labels=self.get_labels_for_metric(
1143
- metric_name="litellm_request_total_latency_metric"
1144
- ),
1145
- enum_values=enum_values,
1146
- )
1147
- self.litellm_request_total_latency_metric.labels(**_labels).observe(
1148
- total_time_seconds
1149
- )
1150
-
1151
- async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
1152
- from litellm.types.utils import StandardLoggingPayload
1153
-
1154
- verbose_logger.debug(
1155
- f"prometheus Logging - Enters failure logging function for kwargs {kwargs}"
1156
- )
1157
-
1158
- # unpack kwargs
1159
- model = kwargs.get("model", "")
1160
- standard_logging_payload: StandardLoggingPayload = kwargs.get(
1161
- "standard_logging_object", {}
1162
- )
1163
- litellm_params = kwargs.get("litellm_params", {}) or {}
1164
- end_user_id = get_end_user_id_for_cost_tracking(
1165
- litellm_params, service_type="prometheus"
1166
- )
1167
- user_id = standard_logging_payload["metadata"]["user_api_key_user_id"]
1168
- user_api_key = standard_logging_payload["metadata"]["user_api_key_hash"]
1169
- user_api_key_alias = standard_logging_payload["metadata"]["user_api_key_alias"]
1170
- user_api_team = standard_logging_payload["metadata"]["user_api_key_team_id"]
1171
- user_api_team_alias = standard_logging_payload["metadata"][
1172
- "user_api_key_team_alias"
1173
- ]
1174
- kwargs.get("exception", None)
1175
-
1176
- try:
1177
- self.litellm_llm_api_failed_requests_metric.labels(
1178
- end_user_id,
1179
- user_api_key,
1180
- user_api_key_alias,
1181
- model,
1182
- user_api_team,
1183
- user_api_team_alias,
1184
- user_id,
1185
- ).inc()
1186
- self.set_llm_deployment_failure_metrics(kwargs)
1187
- except Exception as e:
1188
- verbose_logger.exception(
1189
- "prometheus Layer Error(): Exception occured - {}".format(str(e))
1190
- )
1191
- pass
1192
- pass
1193
-
1194
- async def async_post_call_failure_hook(
1195
- self,
1196
- request_data: dict,
1197
- original_exception: Exception,
1198
- user_api_key_dict: UserAPIKeyAuth,
1199
- traceback_str: Optional[str] = None,
1200
- ):
1201
- """
1202
- Track client side failures
1203
-
1204
- Proxy level tracking - failed client side requests
1205
-
1206
- labelnames=[
1207
- "end_user",
1208
- "hashed_api_key",
1209
- "api_key_alias",
1210
- REQUESTED_MODEL,
1211
- "team",
1212
- "team_alias",
1213
- ] + EXCEPTION_LABELS,
1214
- """
1215
- from litellm.litellm_core_utils.litellm_logging import (
1216
- StandardLoggingPayloadSetup,
1217
- )
1218
-
1219
- try:
1220
- _tags = StandardLoggingPayloadSetup._get_request_tags(
1221
- request_data.get("metadata", {}),
1222
- request_data.get("proxy_server_request", {}),
1223
- )
1224
- enum_values = UserAPIKeyLabelValues(
1225
- end_user=user_api_key_dict.end_user_id,
1226
- user=user_api_key_dict.user_id,
1227
- user_email=user_api_key_dict.user_email,
1228
- hashed_api_key=user_api_key_dict.api_key,
1229
- api_key_alias=user_api_key_dict.key_alias,
1230
- team=user_api_key_dict.team_id,
1231
- team_alias=user_api_key_dict.team_alias,
1232
- requested_model=request_data.get("model", ""),
1233
- status_code=str(getattr(original_exception, "status_code", None)),
1234
- exception_status=str(getattr(original_exception, "status_code", None)),
1235
- exception_class=self._get_exception_class_name(original_exception),
1236
- tags=_tags,
1237
- route=user_api_key_dict.request_route,
1238
- )
1239
- _labels = prometheus_label_factory(
1240
- supported_enum_labels=self.get_labels_for_metric(
1241
- metric_name="litellm_proxy_failed_requests_metric"
1242
- ),
1243
- enum_values=enum_values,
1244
- )
1245
- self.litellm_proxy_failed_requests_metric.labels(**_labels).inc()
1246
-
1247
- _labels = prometheus_label_factory(
1248
- supported_enum_labels=self.get_labels_for_metric(
1249
- metric_name="litellm_proxy_total_requests_metric"
1250
- ),
1251
- enum_values=enum_values,
1252
- )
1253
- self.litellm_proxy_total_requests_metric.labels(**_labels).inc()
1254
-
1255
- except Exception as e:
1256
- verbose_logger.exception(
1257
- "prometheus Layer Error(): Exception occured - {}".format(str(e))
1258
- )
1259
- pass
1260
-
1261
- async def async_post_call_success_hook(
1262
- self, data: dict, user_api_key_dict: UserAPIKeyAuth, response
1263
- ):
1264
- """
1265
- Proxy level tracking - triggered when the proxy responds with a success response to the client
1266
- """
1267
- try:
1268
- from litellm.litellm_core_utils.litellm_logging import (
1269
- StandardLoggingPayloadSetup,
1270
- )
1271
-
1272
- enum_values = UserAPIKeyLabelValues(
1273
- end_user=user_api_key_dict.end_user_id,
1274
- hashed_api_key=user_api_key_dict.api_key,
1275
- api_key_alias=user_api_key_dict.key_alias,
1276
- requested_model=data.get("model", ""),
1277
- team=user_api_key_dict.team_id,
1278
- team_alias=user_api_key_dict.team_alias,
1279
- user=user_api_key_dict.user_id,
1280
- user_email=user_api_key_dict.user_email,
1281
- status_code="200",
1282
- route=user_api_key_dict.request_route,
1283
- tags=StandardLoggingPayloadSetup._get_request_tags(
1284
- data.get("metadata", {}), data.get("proxy_server_request", {})
1285
- ),
1286
- )
1287
- _labels = prometheus_label_factory(
1288
- supported_enum_labels=self.get_labels_for_metric(
1289
- metric_name="litellm_proxy_total_requests_metric"
1290
- ),
1291
- enum_values=enum_values,
1292
- )
1293
- self.litellm_proxy_total_requests_metric.labels(**_labels).inc()
1294
-
1295
- except Exception as e:
1296
- verbose_logger.exception(
1297
- "prometheus Layer Error(): Exception occured - {}".format(str(e))
1298
- )
1299
- pass
1300
-
1301
- def set_llm_deployment_failure_metrics(self, request_kwargs: dict):
1302
- """
1303
- Sets Failure metrics when an LLM API call fails
1304
-
1305
- - mark the deployment as partial outage
1306
- - increment deployment failure responses metric
1307
- - increment deployment total requests metric
1308
-
1309
- Args:
1310
- request_kwargs: dict
1311
-
1312
- """
1313
- try:
1314
- verbose_logger.debug("setting remaining tokens requests metric")
1315
- standard_logging_payload: StandardLoggingPayload = request_kwargs.get(
1316
- "standard_logging_object", {}
1317
- )
1318
- _litellm_params = request_kwargs.get("litellm_params", {}) or {}
1319
- litellm_model_name = request_kwargs.get("model", None)
1320
- model_group = standard_logging_payload.get("model_group", None)
1321
- api_base = standard_logging_payload.get("api_base", None)
1322
- model_id = standard_logging_payload.get("model_id", None)
1323
- exception = request_kwargs.get("exception", None)
1324
-
1325
- llm_provider = _litellm_params.get("custom_llm_provider", None)
1326
-
1327
- # Create enum_values for the label factory (always create for use in different metrics)
1328
- enum_values = UserAPIKeyLabelValues(
1329
- litellm_model_name=litellm_model_name,
1330
- model_id=model_id,
1331
- api_base=api_base,
1332
- api_provider=llm_provider,
1333
- exception_status=(
1334
- str(getattr(exception, "status_code", None)) if exception else None
1335
- ),
1336
- exception_class=(
1337
- self._get_exception_class_name(exception) if exception else None
1338
- ),
1339
- requested_model=model_group,
1340
- hashed_api_key=standard_logging_payload["metadata"][
1341
- "user_api_key_hash"
1342
- ],
1343
- api_key_alias=standard_logging_payload["metadata"][
1344
- "user_api_key_alias"
1345
- ],
1346
- team=standard_logging_payload["metadata"]["user_api_key_team_id"],
1347
- team_alias=standard_logging_payload["metadata"][
1348
- "user_api_key_team_alias"
1349
- ],
1350
- tags=standard_logging_payload.get("request_tags", []),
1351
- )
1352
-
1353
- """
1354
- log these labels
1355
- ["litellm_model_name", "model_id", "api_base", "api_provider"]
1356
- """
1357
- self.set_deployment_partial_outage(
1358
- litellm_model_name=litellm_model_name or "",
1359
- model_id=model_id,
1360
- api_base=api_base,
1361
- api_provider=llm_provider or "",
1362
- )
1363
- if exception is not None:
1364
-
1365
- _labels = prometheus_label_factory(
1366
- supported_enum_labels=self.get_labels_for_metric(
1367
- metric_name="litellm_deployment_failure_responses"
1368
- ),
1369
- enum_values=enum_values,
1370
- )
1371
- self.litellm_deployment_failure_responses.labels(**_labels).inc()
1372
-
1373
- _labels = prometheus_label_factory(
1374
- supported_enum_labels=self.get_labels_for_metric(
1375
- metric_name="litellm_deployment_total_requests"
1376
- ),
1377
- enum_values=enum_values,
1378
- )
1379
- self.litellm_deployment_total_requests.labels(**_labels).inc()
1380
-
1381
- pass
1382
- except Exception as e:
1383
- verbose_logger.debug(
1384
- "Prometheus Error: set_llm_deployment_failure_metrics. Exception occured - {}".format(
1385
- str(e)
1386
- )
1387
- )
1388
-
1389
- def set_llm_deployment_success_metrics(
1390
- self,
1391
- request_kwargs: dict,
1392
- start_time,
1393
- end_time,
1394
- enum_values: UserAPIKeyLabelValues,
1395
- output_tokens: float = 1.0,
1396
- ):
1397
-
1398
- try:
1399
- verbose_logger.debug("setting remaining tokens requests metric")
1400
- standard_logging_payload: Optional[StandardLoggingPayload] = (
1401
- request_kwargs.get("standard_logging_object")
1402
- )
1403
-
1404
- if standard_logging_payload is None:
1405
- return
1406
-
1407
- api_base = standard_logging_payload["api_base"]
1408
- _litellm_params = request_kwargs.get("litellm_params", {}) or {}
1409
- _metadata = _litellm_params.get("metadata", {})
1410
- litellm_model_name = request_kwargs.get("model", None)
1411
- llm_provider = _litellm_params.get("custom_llm_provider", None)
1412
- _model_info = _metadata.get("model_info") or {}
1413
- model_id = _model_info.get("id", None)
1414
-
1415
- remaining_requests: Optional[int] = None
1416
- remaining_tokens: Optional[int] = None
1417
- if additional_headers := standard_logging_payload["hidden_params"][
1418
- "additional_headers"
1419
- ]:
1420
- # OpenAI / OpenAI Compatible headers
1421
- remaining_requests = additional_headers.get(
1422
- "x_ratelimit_remaining_requests", None
1423
- )
1424
- remaining_tokens = additional_headers.get(
1425
- "x_ratelimit_remaining_tokens", None
1426
- )
1427
-
1428
- if litellm_overhead_time_ms := standard_logging_payload[
1429
- "hidden_params"
1430
- ].get("litellm_overhead_time_ms"):
1431
- _labels = prometheus_label_factory(
1432
- supported_enum_labels=self.get_labels_for_metric(
1433
- metric_name="litellm_overhead_latency_metric"
1434
- ),
1435
- enum_values=enum_values,
1436
- )
1437
- self.litellm_overhead_latency_metric.labels(**_labels).observe(
1438
- litellm_overhead_time_ms / 1000
1439
- ) # set as seconds
1440
-
1441
- if remaining_requests:
1442
- """
1443
- "model_group",
1444
- "api_provider",
1445
- "api_base",
1446
- "litellm_model_name"
1447
- """
1448
- _labels = prometheus_label_factory(
1449
- supported_enum_labels=self.get_labels_for_metric(
1450
- metric_name="litellm_remaining_requests_metric"
1451
- ),
1452
- enum_values=enum_values,
1453
- )
1454
- self.litellm_remaining_requests_metric.labels(**_labels).set(
1455
- remaining_requests
1456
- )
1457
-
1458
- if remaining_tokens:
1459
- _labels = prometheus_label_factory(
1460
- supported_enum_labels=self.get_labels_for_metric(
1461
- metric_name="litellm_remaining_tokens_metric"
1462
- ),
1463
- enum_values=enum_values,
1464
- )
1465
- self.litellm_remaining_tokens_metric.labels(**_labels).set(
1466
- remaining_tokens
1467
- )
1468
-
1469
- """
1470
- log these labels
1471
- ["litellm_model_name", "requested_model", model_id", "api_base", "api_provider"]
1472
- """
1473
- self.set_deployment_healthy(
1474
- litellm_model_name=litellm_model_name or "",
1475
- model_id=model_id or "",
1476
- api_base=api_base or "",
1477
- api_provider=llm_provider or "",
1478
- )
1479
-
1480
- _labels = prometheus_label_factory(
1481
- supported_enum_labels=self.get_labels_for_metric(
1482
- metric_name="litellm_deployment_success_responses"
1483
- ),
1484
- enum_values=enum_values,
1485
- )
1486
- self.litellm_deployment_success_responses.labels(**_labels).inc()
1487
-
1488
- _labels = prometheus_label_factory(
1489
- supported_enum_labels=self.get_labels_for_metric(
1490
- metric_name="litellm_deployment_total_requests"
1491
- ),
1492
- enum_values=enum_values,
1493
- )
1494
- self.litellm_deployment_total_requests.labels(**_labels).inc()
1495
-
1496
- # Track deployment Latency
1497
- response_ms: timedelta = end_time - start_time
1498
- time_to_first_token_response_time: Optional[timedelta] = None
1499
-
1500
- if (
1501
- request_kwargs.get("stream", None) is not None
1502
- and request_kwargs["stream"] is True
1503
- ):
1504
- # only log ttft for streaming request
1505
- time_to_first_token_response_time = (
1506
- request_kwargs.get("completion_start_time", end_time) - start_time
1507
- )
1508
-
1509
- # use the metric that is not None
1510
- # if streaming - use time_to_first_token_response
1511
- # if not streaming - use response_ms
1512
- _latency: timedelta = time_to_first_token_response_time or response_ms
1513
- _latency_seconds = _latency.total_seconds()
1514
-
1515
- # latency per output token
1516
- latency_per_token = None
1517
- if output_tokens is not None and output_tokens > 0:
1518
- latency_per_token = _latency_seconds / output_tokens
1519
- _labels = prometheus_label_factory(
1520
- supported_enum_labels=self.get_labels_for_metric(
1521
- metric_name="litellm_deployment_latency_per_output_token"
1522
- ),
1523
- enum_values=enum_values,
1524
- )
1525
- self.litellm_deployment_latency_per_output_token.labels(
1526
- **_labels
1527
- ).observe(latency_per_token)
1528
-
1529
- except Exception as e:
1530
- verbose_logger.exception(
1531
- "Prometheus Error: set_llm_deployment_success_metrics. Exception occured - {}".format(
1532
- str(e)
1533
- )
1534
- )
1535
- return
1536
-
1537
- @staticmethod
1538
- def _get_exception_class_name(exception: Exception) -> str:
1539
- exception_class_name = ""
1540
- if hasattr(exception, "llm_provider"):
1541
- exception_class_name = getattr(exception, "llm_provider") or ""
1542
-
1543
- # pretty print the provider name on prometheus
1544
- # eg. `openai` -> `Openai.`
1545
- if len(exception_class_name) >= 1:
1546
- exception_class_name = (
1547
- exception_class_name[0].upper() + exception_class_name[1:] + "."
1548
- )
1549
-
1550
- exception_class_name += exception.__class__.__name__
1551
- return exception_class_name
1552
-
1553
- async def log_success_fallback_event(
1554
- self, original_model_group: str, kwargs: dict, original_exception: Exception
1555
- ):
1556
- """
1557
-
1558
- Logs a successful LLM fallback event on prometheus
1559
-
1560
- """
1561
- from litellm.litellm_core_utils.litellm_logging import (
1562
- StandardLoggingMetadata,
1563
- StandardLoggingPayloadSetup,
1564
- )
1565
-
1566
- verbose_logger.debug(
1567
- "Prometheus: log_success_fallback_event, original_model_group: %s, kwargs: %s",
1568
- original_model_group,
1569
- kwargs,
1570
- )
1571
- _metadata = kwargs.get("metadata", {})
1572
- standard_metadata: StandardLoggingMetadata = (
1573
- StandardLoggingPayloadSetup.get_standard_logging_metadata(
1574
- metadata=_metadata
1575
- )
1576
- )
1577
- _new_model = kwargs.get("model")
1578
- _tags = cast(List[str], kwargs.get("tags") or [])
1579
-
1580
- enum_values = UserAPIKeyLabelValues(
1581
- requested_model=original_model_group,
1582
- fallback_model=_new_model,
1583
- hashed_api_key=standard_metadata["user_api_key_hash"],
1584
- api_key_alias=standard_metadata["user_api_key_alias"],
1585
- team=standard_metadata["user_api_key_team_id"],
1586
- team_alias=standard_metadata["user_api_key_team_alias"],
1587
- exception_status=str(getattr(original_exception, "status_code", None)),
1588
- exception_class=self._get_exception_class_name(original_exception),
1589
- tags=_tags,
1590
- )
1591
- _labels = prometheus_label_factory(
1592
- supported_enum_labels=self.get_labels_for_metric(
1593
- metric_name="litellm_deployment_successful_fallbacks"
1594
- ),
1595
- enum_values=enum_values,
1596
- )
1597
- self.litellm_deployment_successful_fallbacks.labels(**_labels).inc()
1598
-
1599
- async def log_failure_fallback_event(
1600
- self, original_model_group: str, kwargs: dict, original_exception: Exception
1601
- ):
1602
- """
1603
- Logs a failed LLM fallback event on prometheus
1604
- """
1605
- from litellm.litellm_core_utils.litellm_logging import (
1606
- StandardLoggingMetadata,
1607
- StandardLoggingPayloadSetup,
1608
- )
1609
-
1610
- verbose_logger.debug(
1611
- "Prometheus: log_failure_fallback_event, original_model_group: %s, kwargs: %s",
1612
- original_model_group,
1613
- kwargs,
1614
- )
1615
- _new_model = kwargs.get("model")
1616
- _metadata = kwargs.get("metadata", {})
1617
- _tags = cast(List[str], kwargs.get("tags") or [])
1618
- standard_metadata: StandardLoggingMetadata = (
1619
- StandardLoggingPayloadSetup.get_standard_logging_metadata(
1620
- metadata=_metadata
1621
- )
1622
- )
1623
-
1624
- enum_values = UserAPIKeyLabelValues(
1625
- requested_model=original_model_group,
1626
- fallback_model=_new_model,
1627
- hashed_api_key=standard_metadata["user_api_key_hash"],
1628
- api_key_alias=standard_metadata["user_api_key_alias"],
1629
- team=standard_metadata["user_api_key_team_id"],
1630
- team_alias=standard_metadata["user_api_key_team_alias"],
1631
- exception_status=str(getattr(original_exception, "status_code", None)),
1632
- exception_class=self._get_exception_class_name(original_exception),
1633
- tags=_tags,
1634
- )
1635
-
1636
- _labels = prometheus_label_factory(
1637
- supported_enum_labels=self.get_labels_for_metric(
1638
- metric_name="litellm_deployment_failed_fallbacks"
1639
- ),
1640
- enum_values=enum_values,
1641
- )
1642
- self.litellm_deployment_failed_fallbacks.labels(**_labels).inc()
1643
-
1644
- def set_litellm_deployment_state(
1645
- self,
1646
- state: int,
1647
- litellm_model_name: str,
1648
- model_id: Optional[str],
1649
- api_base: Optional[str],
1650
- api_provider: str,
1651
- ):
1652
- self.litellm_deployment_state.labels(
1653
- litellm_model_name, model_id, api_base, api_provider
1654
- ).set(state)
1655
-
1656
- def set_deployment_healthy(
1657
- self,
1658
- litellm_model_name: str,
1659
- model_id: str,
1660
- api_base: str,
1661
- api_provider: str,
1662
- ):
1663
- self.set_litellm_deployment_state(
1664
- 0, litellm_model_name, model_id, api_base, api_provider
1665
- )
1666
-
1667
- def set_deployment_partial_outage(
1668
- self,
1669
- litellm_model_name: str,
1670
- model_id: Optional[str],
1671
- api_base: Optional[str],
1672
- api_provider: str,
1673
- ):
1674
- self.set_litellm_deployment_state(
1675
- 1, litellm_model_name, model_id, api_base, api_provider
1676
- )
1677
-
1678
- def set_deployment_complete_outage(
1679
- self,
1680
- litellm_model_name: str,
1681
- model_id: Optional[str],
1682
- api_base: Optional[str],
1683
- api_provider: str,
1684
- ):
1685
- self.set_litellm_deployment_state(
1686
- 2, litellm_model_name, model_id, api_base, api_provider
1687
- )
1688
-
1689
- def increment_deployment_cooled_down(
1690
- self,
1691
- litellm_model_name: str,
1692
- model_id: str,
1693
- api_base: str,
1694
- api_provider: str,
1695
- exception_status: str,
1696
- ):
1697
- """
1698
- increment metric when litellm.Router / load balancing logic places a deployment in cool down
1699
- """
1700
- self.litellm_deployment_cooled_down.labels(
1701
- litellm_model_name, model_id, api_base, api_provider, exception_status
1702
- ).inc()
1703
-
1704
- def track_provider_remaining_budget(
1705
- self, provider: str, spend: float, budget_limit: float
1706
- ):
1707
- """
1708
- Track provider remaining budget in Prometheus
1709
- """
1710
- self.litellm_provider_remaining_budget_metric.labels(provider).set(
1711
- self._safe_get_remaining_budget(
1712
- max_budget=budget_limit,
1713
- spend=spend,
1714
- )
1715
- )
1716
-
1717
- def _safe_get_remaining_budget(
1718
- self, max_budget: Optional[float], spend: Optional[float]
1719
- ) -> float:
1720
- if max_budget is None:
1721
- return float("inf")
1722
-
1723
- if spend is None:
1724
- return max_budget
1725
-
1726
- return max_budget - spend
1727
-
1728
- async def _initialize_budget_metrics(
1729
- self,
1730
- data_fetch_function: Callable[..., Awaitable[Tuple[List[Any], Optional[int]]]],
1731
- set_metrics_function: Callable[[List[Any]], Awaitable[None]],
1732
- data_type: Literal["teams", "keys"],
1733
- ):
1734
- """
1735
- Generic method to initialize budget metrics for teams or API keys.
1736
-
1737
- Args:
1738
- data_fetch_function: Function to fetch data with pagination.
1739
- set_metrics_function: Function to set metrics for the fetched data.
1740
- data_type: String representing the type of data ("teams" or "keys") for logging purposes.
1741
- """
1742
- from litellm.proxy.proxy_server import prisma_client
1743
-
1744
- if prisma_client is None:
1745
- return
1746
-
1747
- try:
1748
- page = 1
1749
- page_size = 50
1750
- data, total_count = await data_fetch_function(
1751
- page_size=page_size, page=page
1752
- )
1753
-
1754
- if total_count is None:
1755
- total_count = len(data)
1756
-
1757
- # Calculate total pages needed
1758
- total_pages = (total_count + page_size - 1) // page_size
1759
-
1760
- # Set metrics for first page of data
1761
- await set_metrics_function(data)
1762
-
1763
- # Get and set metrics for remaining pages
1764
- for page in range(2, total_pages + 1):
1765
- data, _ = await data_fetch_function(page_size=page_size, page=page)
1766
- await set_metrics_function(data)
1767
-
1768
- except Exception as e:
1769
- verbose_logger.exception(
1770
- f"Error initializing {data_type} budget metrics: {str(e)}"
1771
- )
1772
-
1773
- async def _initialize_team_budget_metrics(self):
1774
- """
1775
- Initialize team budget metrics by reusing the generic pagination logic.
1776
- """
1777
- from litellm.proxy.management_endpoints.team_endpoints import (
1778
- get_paginated_teams,
1779
- )
1780
- from litellm.proxy.proxy_server import prisma_client
1781
-
1782
- if prisma_client is None:
1783
- verbose_logger.debug(
1784
- "Prometheus: skipping team metrics initialization, DB not initialized"
1785
- )
1786
- return
1787
-
1788
- async def fetch_teams(
1789
- page_size: int, page: int
1790
- ) -> Tuple[List[LiteLLM_TeamTable], Optional[int]]:
1791
- teams, total_count = await get_paginated_teams(
1792
- prisma_client=prisma_client, page_size=page_size, page=page
1793
- )
1794
- if total_count is None:
1795
- total_count = len(teams)
1796
- return teams, total_count
1797
-
1798
- await self._initialize_budget_metrics(
1799
- data_fetch_function=fetch_teams,
1800
- set_metrics_function=self._set_team_list_budget_metrics,
1801
- data_type="teams",
1802
- )
1803
-
1804
- async def _initialize_api_key_budget_metrics(self):
1805
- """
1806
- Initialize API key budget metrics by reusing the generic pagination logic.
1807
- """
1808
- from typing import Union
1809
-
1810
- from litellm.constants import UI_SESSION_TOKEN_TEAM_ID
1811
- from litellm.proxy.management_endpoints.key_management_endpoints import (
1812
- _list_key_helper,
1813
- )
1814
- from litellm.proxy.proxy_server import prisma_client
1815
-
1816
- if prisma_client is None:
1817
- verbose_logger.debug(
1818
- "Prometheus: skipping key metrics initialization, DB not initialized"
1819
- )
1820
- return
1821
-
1822
- async def fetch_keys(
1823
- page_size: int, page: int
1824
- ) -> Tuple[List[Union[str, UserAPIKeyAuth]], Optional[int]]:
1825
- key_list_response = await _list_key_helper(
1826
- prisma_client=prisma_client,
1827
- page=page,
1828
- size=page_size,
1829
- user_id=None,
1830
- team_id=None,
1831
- key_alias=None,
1832
- key_hash=None,
1833
- exclude_team_id=UI_SESSION_TOKEN_TEAM_ID,
1834
- return_full_object=True,
1835
- organization_id=None,
1836
- )
1837
- keys = key_list_response.get("keys", [])
1838
- total_count = key_list_response.get("total_count")
1839
- if total_count is None:
1840
- total_count = len(keys)
1841
- return keys, total_count
1842
-
1843
- await self._initialize_budget_metrics(
1844
- data_fetch_function=fetch_keys,
1845
- set_metrics_function=self._set_key_list_budget_metrics,
1846
- data_type="keys",
1847
- )
1848
-
1849
- async def initialize_remaining_budget_metrics(self):
1850
- """
1851
- Handler for initializing remaining budget metrics for all teams to avoid metric discrepancies.
1852
-
1853
- Runs when prometheus logger starts up.
1854
-
1855
- - If redis cache is available, we use the pod lock manager to acquire a lock and initialize the metrics.
1856
- - Ensures only one pod emits the metrics at a time.
1857
- - If redis cache is not available, we initialize the metrics directly.
1858
- """
1859
- from litellm.constants import PROMETHEUS_EMIT_BUDGET_METRICS_JOB_NAME
1860
- from litellm.proxy.proxy_server import proxy_logging_obj
1861
-
1862
- pod_lock_manager = proxy_logging_obj.db_spend_update_writer.pod_lock_manager
1863
-
1864
- # if using redis, ensure only one pod emits the metrics at a time
1865
- if pod_lock_manager and pod_lock_manager.redis_cache:
1866
- if await pod_lock_manager.acquire_lock(
1867
- cronjob_id=PROMETHEUS_EMIT_BUDGET_METRICS_JOB_NAME
1868
- ):
1869
- try:
1870
- await self._initialize_remaining_budget_metrics()
1871
- finally:
1872
- await pod_lock_manager.release_lock(
1873
- cronjob_id=PROMETHEUS_EMIT_BUDGET_METRICS_JOB_NAME
1874
- )
1875
- else:
1876
- # if not using redis, initialize the metrics directly
1877
- await self._initialize_remaining_budget_metrics()
1878
-
1879
- async def _initialize_remaining_budget_metrics(self):
1880
- """
1881
- Helper to initialize remaining budget metrics for all teams and API keys.
1882
- """
1883
- verbose_logger.debug("Emitting key, team budget metrics....")
1884
- await self._initialize_team_budget_metrics()
1885
- await self._initialize_api_key_budget_metrics()
1886
-
1887
- async def _set_key_list_budget_metrics(
1888
- self, keys: List[Union[str, UserAPIKeyAuth]]
1889
- ):
1890
- """Helper function to set budget metrics for a list of keys"""
1891
- for key in keys:
1892
- if isinstance(key, UserAPIKeyAuth):
1893
- self._set_key_budget_metrics(key)
1894
-
1895
- async def _set_team_list_budget_metrics(self, teams: List[LiteLLM_TeamTable]):
1896
- """Helper function to set budget metrics for a list of teams"""
1897
- for team in teams:
1898
- self._set_team_budget_metrics(team)
1899
-
1900
- async def _set_team_budget_metrics_after_api_request(
1901
- self,
1902
- user_api_team: Optional[str],
1903
- user_api_team_alias: Optional[str],
1904
- team_spend: float,
1905
- team_max_budget: float,
1906
- response_cost: float,
1907
- ):
1908
- """
1909
- Set team budget metrics after an LLM API request
1910
-
1911
- - Assemble a LiteLLM_TeamTable object
1912
- - looks up team info from db if not available in metadata
1913
- - Set team budget metrics
1914
- """
1915
- if user_api_team:
1916
- team_object = await self._assemble_team_object(
1917
- team_id=user_api_team,
1918
- team_alias=user_api_team_alias or "",
1919
- spend=team_spend,
1920
- max_budget=team_max_budget,
1921
- response_cost=response_cost,
1922
- )
1923
-
1924
- self._set_team_budget_metrics(team_object)
1925
-
1926
- async def _assemble_team_object(
1927
- self,
1928
- team_id: str,
1929
- team_alias: str,
1930
- spend: Optional[float],
1931
- max_budget: Optional[float],
1932
- response_cost: float,
1933
- ) -> LiteLLM_TeamTable:
1934
- """
1935
- Assemble a LiteLLM_TeamTable object
1936
-
1937
- for fields not available in metadata, we fetch from db
1938
- Fields not available in metadata:
1939
- - `budget_reset_at`
1940
- """
1941
- from litellm.proxy.auth.auth_checks import get_team_object
1942
- from litellm.proxy.proxy_server import prisma_client, user_api_key_cache
1943
-
1944
- _total_team_spend = (spend or 0) + response_cost
1945
- team_object = LiteLLM_TeamTable(
1946
- team_id=team_id,
1947
- team_alias=team_alias,
1948
- spend=_total_team_spend,
1949
- max_budget=max_budget,
1950
- )
1951
- try:
1952
- team_info = await get_team_object(
1953
- team_id=team_id,
1954
- prisma_client=prisma_client,
1955
- user_api_key_cache=user_api_key_cache,
1956
- )
1957
- except Exception as e:
1958
- verbose_logger.debug(
1959
- f"[Non-Blocking] Prometheus: Error getting team info: {str(e)}"
1960
- )
1961
- return team_object
1962
-
1963
- if team_info:
1964
- team_object.budget_reset_at = team_info.budget_reset_at
1965
-
1966
- return team_object
1967
-
1968
- def _set_team_budget_metrics(
1969
- self,
1970
- team: LiteLLM_TeamTable,
1971
- ):
1972
- """
1973
- Set team budget metrics for a single team
1974
-
1975
- - Remaining Budget
1976
- - Max Budget
1977
- - Budget Reset At
1978
- """
1979
- enum_values = UserAPIKeyLabelValues(
1980
- team=team.team_id,
1981
- team_alias=team.team_alias or "",
1982
- )
1983
-
1984
- _labels = prometheus_label_factory(
1985
- supported_enum_labels=self.get_labels_for_metric(
1986
- metric_name="litellm_remaining_team_budget_metric"
1987
- ),
1988
- enum_values=enum_values,
1989
- )
1990
- self.litellm_remaining_team_budget_metric.labels(**_labels).set(
1991
- self._safe_get_remaining_budget(
1992
- max_budget=team.max_budget,
1993
- spend=team.spend,
1994
- )
1995
- )
1996
-
1997
- if team.max_budget is not None:
1998
- _labels = prometheus_label_factory(
1999
- supported_enum_labels=self.get_labels_for_metric(
2000
- metric_name="litellm_team_max_budget_metric"
2001
- ),
2002
- enum_values=enum_values,
2003
- )
2004
- self.litellm_team_max_budget_metric.labels(**_labels).set(team.max_budget)
2005
-
2006
- if team.budget_reset_at is not None:
2007
- _labels = prometheus_label_factory(
2008
- supported_enum_labels=self.get_labels_for_metric(
2009
- metric_name="litellm_team_budget_remaining_hours_metric"
2010
- ),
2011
- enum_values=enum_values,
2012
- )
2013
- self.litellm_team_budget_remaining_hours_metric.labels(**_labels).set(
2014
- self._get_remaining_hours_for_budget_reset(
2015
- budget_reset_at=team.budget_reset_at
2016
- )
2017
- )
2018
-
2019
- def _set_key_budget_metrics(self, user_api_key_dict: UserAPIKeyAuth):
2020
- """
2021
- Set virtual key budget metrics
2022
-
2023
- - Remaining Budget
2024
- - Max Budget
2025
- - Budget Reset At
2026
- """
2027
- enum_values = UserAPIKeyLabelValues(
2028
- hashed_api_key=user_api_key_dict.token,
2029
- api_key_alias=user_api_key_dict.key_alias or "",
2030
- )
2031
- _labels = prometheus_label_factory(
2032
- supported_enum_labels=self.get_labels_for_metric(
2033
- metric_name="litellm_remaining_api_key_budget_metric"
2034
- ),
2035
- enum_values=enum_values,
2036
- )
2037
- self.litellm_remaining_api_key_budget_metric.labels(**_labels).set(
2038
- self._safe_get_remaining_budget(
2039
- max_budget=user_api_key_dict.max_budget,
2040
- spend=user_api_key_dict.spend,
2041
- )
2042
- )
2043
-
2044
- if user_api_key_dict.max_budget is not None:
2045
- _labels = prometheus_label_factory(
2046
- supported_enum_labels=self.get_labels_for_metric(
2047
- metric_name="litellm_api_key_max_budget_metric"
2048
- ),
2049
- enum_values=enum_values,
2050
- )
2051
- self.litellm_api_key_max_budget_metric.labels(**_labels).set(
2052
- user_api_key_dict.max_budget
2053
- )
2054
-
2055
- if user_api_key_dict.budget_reset_at is not None:
2056
- self.litellm_api_key_budget_remaining_hours_metric.labels(**_labels).set(
2057
- self._get_remaining_hours_for_budget_reset(
2058
- budget_reset_at=user_api_key_dict.budget_reset_at
2059
- )
2060
- )
2061
-
2062
- async def _set_api_key_budget_metrics_after_api_request(
2063
- self,
2064
- user_api_key: Optional[str],
2065
- user_api_key_alias: Optional[str],
2066
- response_cost: float,
2067
- key_max_budget: float,
2068
- key_spend: Optional[float],
2069
- ):
2070
- if user_api_key:
2071
- user_api_key_dict = await self._assemble_key_object(
2072
- user_api_key=user_api_key,
2073
- user_api_key_alias=user_api_key_alias or "",
2074
- key_max_budget=key_max_budget,
2075
- key_spend=key_spend,
2076
- response_cost=response_cost,
2077
- )
2078
- self._set_key_budget_metrics(user_api_key_dict)
2079
-
2080
- async def _assemble_key_object(
2081
- self,
2082
- user_api_key: str,
2083
- user_api_key_alias: str,
2084
- key_max_budget: float,
2085
- key_spend: Optional[float],
2086
- response_cost: float,
2087
- ) -> UserAPIKeyAuth:
2088
- """
2089
- Assemble a UserAPIKeyAuth object
2090
- """
2091
- from litellm.proxy.auth.auth_checks import get_key_object
2092
- from litellm.proxy.proxy_server import prisma_client, user_api_key_cache
2093
-
2094
- _total_key_spend = (key_spend or 0) + response_cost
2095
- user_api_key_dict = UserAPIKeyAuth(
2096
- token=user_api_key,
2097
- key_alias=user_api_key_alias,
2098
- max_budget=key_max_budget,
2099
- spend=_total_key_spend,
2100
- )
2101
- try:
2102
- if user_api_key_dict.token:
2103
- key_object = await get_key_object(
2104
- hashed_token=user_api_key_dict.token,
2105
- prisma_client=prisma_client,
2106
- user_api_key_cache=user_api_key_cache,
2107
- )
2108
- if key_object:
2109
- user_api_key_dict.budget_reset_at = key_object.budget_reset_at
2110
- except Exception as e:
2111
- verbose_logger.debug(
2112
- f"[Non-Blocking] Prometheus: Error getting key info: {str(e)}"
2113
- )
2114
-
2115
- return user_api_key_dict
2116
-
2117
- def _get_remaining_hours_for_budget_reset(self, budget_reset_at: datetime) -> float:
2118
- """
2119
- Get remaining hours for budget reset
2120
- """
2121
- return (
2122
- budget_reset_at - datetime.now(budget_reset_at.tzinfo)
2123
- ).total_seconds() / 3600
2124
-
2125
- def _safe_duration_seconds(
2126
- self,
2127
- start_time: Any,
2128
- end_time: Any,
2129
- ) -> Optional[float]:
2130
- """
2131
- Compute the duration in seconds between two objects.
2132
-
2133
- Returns the duration as a float if both start and end are instances of datetime,
2134
- otherwise returns None.
2135
- """
2136
- if isinstance(start_time, datetime) and isinstance(end_time, datetime):
2137
- return (end_time - start_time).total_seconds()
2138
- return None
2139
-
2140
- @staticmethod
2141
- def initialize_budget_metrics_cron_job(scheduler: AsyncIOScheduler):
2142
- """
2143
- Initialize budget metrics as a cron job. This job runs every `PROMETHEUS_BUDGET_METRICS_REFRESH_INTERVAL_MINUTES` minutes.
2144
-
2145
- It emits the current remaining budget metrics for all Keys and Teams.
2146
- """
2147
- from enterprise.litellm_enterprise.integrations.prometheus import (
2148
- PrometheusLogger,
2149
- )
2150
- from litellm.constants import PROMETHEUS_BUDGET_METRICS_REFRESH_INTERVAL_MINUTES
2151
- from litellm.integrations.custom_logger import CustomLogger
2152
-
2153
- prometheus_loggers: List[CustomLogger] = (
2154
- litellm.logging_callback_manager.get_custom_loggers_for_type(
2155
- callback_type=PrometheusLogger
2156
- )
2157
- )
2158
- # we need to get the initialized prometheus logger instance(s) and call logger.initialize_remaining_budget_metrics() on them
2159
- verbose_logger.debug("found %s prometheus loggers", len(prometheus_loggers))
2160
- if len(prometheus_loggers) > 0:
2161
- prometheus_logger = cast(PrometheusLogger, prometheus_loggers[0])
2162
- verbose_logger.debug(
2163
- "Initializing remaining budget metrics as a cron job executing every %s minutes"
2164
- % PROMETHEUS_BUDGET_METRICS_REFRESH_INTERVAL_MINUTES
2165
- )
2166
- scheduler.add_job(
2167
- prometheus_logger.initialize_remaining_budget_metrics,
2168
- "interval",
2169
- minutes=PROMETHEUS_BUDGET_METRICS_REFRESH_INTERVAL_MINUTES,
2170
- )
2171
-
2172
- @staticmethod
2173
- def _mount_metrics_endpoint(premium_user: bool):
2174
- """
2175
- Mount the Prometheus metrics endpoint with optional authentication.
2176
-
2177
- Args:
2178
- premium_user (bool): Whether the user is a premium user
2179
- require_auth (bool, optional): Whether to require authentication for the metrics endpoint.
2180
- Defaults to False.
2181
- """
2182
- from prometheus_client import make_asgi_app
2183
-
2184
- from litellm._logging import verbose_proxy_logger
2185
- from litellm.proxy._types import CommonProxyErrors
2186
- from litellm.proxy.proxy_server import app
2187
-
2188
- if premium_user is not True:
2189
- verbose_proxy_logger.warning(
2190
- f"Prometheus metrics are only available for premium users. {CommonProxyErrors.not_premium_user.value}"
2191
- )
2192
-
2193
- # Create metrics ASGI app
2194
- metrics_app = make_asgi_app()
2195
-
2196
- # Mount the metrics app to the app
2197
- app.mount("/metrics", metrics_app)
2198
- verbose_proxy_logger.debug(
2199
- "Starting Prometheus Metrics on /metrics (no authentication)"
2200
- )
2201
-
2202
-
2203
- def prometheus_label_factory(
2204
- supported_enum_labels: List[str],
2205
- enum_values: UserAPIKeyLabelValues,
2206
- tag: Optional[str] = None,
2207
- ) -> dict:
2208
- """
2209
- Returns a dictionary of label + values for prometheus.
2210
-
2211
- Ensures end_user param is not sent to prometheus if it is not supported.
2212
- """
2213
- # Extract dictionary from Pydantic object
2214
- enum_dict = enum_values.model_dump()
2215
-
2216
- # Filter supported labels
2217
- filtered_labels = {
2218
- label: value
2219
- for label, value in enum_dict.items()
2220
- if label in supported_enum_labels
2221
- }
2222
-
2223
- if UserAPIKeyLabelNames.END_USER.value in filtered_labels:
2224
- filtered_labels["end_user"] = get_end_user_id_for_cost_tracking(
2225
- litellm_params={"user_api_key_end_user_id": enum_values.end_user},
2226
- service_type="prometheus",
2227
- )
2228
-
2229
- if enum_values.custom_metadata_labels is not None:
2230
- for key, value in enum_values.custom_metadata_labels.items():
2231
- if key in supported_enum_labels:
2232
- filtered_labels[key] = value
2233
-
2234
- # Add custom tags if configured
2235
- if enum_values.tags is not None:
2236
- custom_tag_labels = get_custom_labels_from_tags(enum_values.tags)
2237
- for key, value in custom_tag_labels.items():
2238
- if key in supported_enum_labels:
2239
- filtered_labels[key] = value
2240
-
2241
- for label in supported_enum_labels:
2242
- if label not in filtered_labels:
2243
- filtered_labels[label] = None
2244
-
2245
- return filtered_labels
2246
-
2247
-
2248
- def get_custom_labels_from_metadata(metadata: dict) -> Dict[str, str]:
2249
- """
2250
- Get custom labels from metadata
2251
- """
2252
- keys = litellm.custom_prometheus_metadata_labels
2253
- if keys is None or len(keys) == 0:
2254
- return {}
2255
-
2256
- result: Dict[str, str] = {}
2257
-
2258
- for key in keys:
2259
- # Split the dot notation key into parts
2260
- original_key = key
2261
- key = key.replace("metadata.", "", 1) if key.startswith("metadata.") else key
2262
-
2263
- keys_parts = key.split(".")
2264
- # Traverse through the dictionary using the parts
2265
- value = metadata
2266
- for part in keys_parts:
2267
- value = value.get(part, None) # Get the value, return None if not found
2268
- if value is None:
2269
- break
2270
-
2271
- if value is not None and isinstance(value, str):
2272
- result[original_key.replace(".", "_")] = value
2273
-
2274
- return result
2275
-
2276
-
2277
- def _tag_matches_wildcard_configured_pattern(
2278
- tags: List[str], configured_tag: str
2279
- ) -> bool:
2280
- """
2281
- Check if any of the request tags matches a wildcard configured pattern
2282
-
2283
- Args:
2284
- tags: List[str] - The request tags
2285
- configured_tag: str - The configured tag
2286
-
2287
- Returns:
2288
- bool - True if any of the request tags matches the configured tag, False otherwise
2289
-
2290
- e.g.
2291
- tags = ["User-Agent: curl/7.68.0", "User-Agent: python-requests/2.28.1", "prod"]
2292
- configured_tag = "User-Agent: curl/*"
2293
- _tag_matches_wildcard_configured_pattern(tags=tags, configured_tag=configured_tag) # True
2294
-
2295
- configured_tag = "User-Agent: python-requests/*"
2296
- _tag_matches_wildcard_configured_pattern(tags=tags, configured_tag=configured_tag) # True
2297
-
2298
- configured_tag = "gm"
2299
- _tag_matches_wildcard_configured_pattern(tags=tags, configured_tag=configured_tag) # False
2300
- """
2301
- import re
2302
-
2303
- from litellm.router_utils.pattern_match_deployments import PatternMatchRouter
2304
-
2305
- pattern_router = PatternMatchRouter()
2306
- regex_pattern = pattern_router._pattern_to_regex(configured_tag)
2307
- return any(re.match(pattern=regex_pattern, string=tag) for tag in tags)
2308
-
2309
-
2310
- def get_custom_labels_from_tags(tags: List[str]) -> Dict[str, str]:
2311
- """
2312
- Get custom labels from tags based on admin configuration.
2313
-
2314
- Supports both exact matches and wildcard patterns:
2315
- - Exact match: "prod" matches "prod" exactly
2316
- - Wildcard pattern: "User-Agent: curl/*" matches "User-Agent: curl/7.68.0"
2317
-
2318
- Reuses PatternMatchRouter for wildcard pattern matching.
2319
-
2320
- Returns dict of label_name: "true" if the tag matches the configured tag, "false" otherwise
2321
-
2322
- {
2323
- "tag_User-Agent_curl": "true",
2324
- "tag_User-Agent_python_requests": "false",
2325
- "tag_Environment_prod": "true",
2326
- "tag_Environment_dev": "false",
2327
- "tag_Service_api_gateway_v2": "true",
2328
- "tag_Service_web_app_v1": "false",
2329
- }
2330
- """
2331
- import re
2332
-
2333
- from litellm.router_utils.pattern_match_deployments import PatternMatchRouter
2334
- from litellm.types.integrations.prometheus import _sanitize_prometheus_label_name
2335
-
2336
- configured_tags = litellm.custom_prometheus_tags
2337
- if configured_tags is None or len(configured_tags) == 0:
2338
- return {}
2339
-
2340
- result: Dict[str, str] = {}
2341
- pattern_router = PatternMatchRouter()
2342
-
2343
- for configured_tag in configured_tags:
2344
- label_name = _sanitize_prometheus_label_name(f"tag_{configured_tag}")
2345
-
2346
- # Check for exact match first (backwards compatibility)
2347
- if configured_tag in tags:
2348
- result[label_name] = "true"
2349
- continue
2350
-
2351
- # Use PatternMatchRouter for wildcard pattern matching
2352
- if "*" in configured_tag and _tag_matches_wildcard_configured_pattern(
2353
- tags=tags, configured_tag=configured_tag
2354
- ):
2355
- result[label_name] = "true"
2356
- continue
2357
-
2358
- # No match found
2359
- result[label_name] = "false"
2360
-
2361
- return result