litellm-enterprise 0.1.20__py3-none-any.whl → 0.1.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- litellm_enterprise/enterprise_callbacks/.pytest_cache/.gitignore +2 -0
- litellm_enterprise/enterprise_callbacks/.pytest_cache/CACHEDIR.TAG +4 -0
- litellm_enterprise/enterprise_callbacks/.pytest_cache/README.md +8 -0
- litellm_enterprise/enterprise_callbacks/.pytest_cache/v/cache/nodeids +1 -0
- litellm_enterprise/enterprise_callbacks/.pytest_cache/v/cache/stepwise +1 -0
- litellm_enterprise/enterprise_callbacks/generic_api_callback.py +1 -1
- litellm_enterprise/enterprise_callbacks/llama_guard.py +2 -10
- litellm_enterprise/enterprise_callbacks/llm_guard.py +2 -9
- litellm_enterprise/enterprise_callbacks/pagerduty/pagerduty.py +9 -12
- litellm_enterprise/enterprise_callbacks/send_emails/base_email.py +61 -1
- litellm_enterprise/integrations/custom_guardrail.py +1 -2
- litellm_enterprise/proxy/common_utils/check_batch_cost.py +3 -4
- litellm_enterprise/proxy/hooks/managed_files.py +6 -24
- litellm_enterprise/proxy/management_endpoints/internal_user_endpoints.py +0 -1
- litellm_enterprise/proxy/management_endpoints/key_management_endpoints.py +12 -0
- litellm_enterprise/proxy/vector_stores/endpoints.py +49 -7
- litellm_enterprise/types/enterprise_callbacks/send_emails.py +14 -2
- {litellm_enterprise-0.1.20.dist-info → litellm_enterprise-0.1.21.dist-info}/METADATA +1 -1
- {litellm_enterprise-0.1.20.dist-info → litellm_enterprise-0.1.21.dist-info}/RECORD +21 -18
- litellm_enterprise/integrations/prometheus.py +0 -2361
- litellm_enterprise/proxy/guardrails/endpoints.py +0 -41
- {litellm_enterprise-0.1.20.dist-info → litellm_enterprise-0.1.21.dist-info}/LICENSE.md +0 -0
- {litellm_enterprise-0.1.20.dist-info → litellm_enterprise-0.1.21.dist-info}/WHEEL +0 -0
|
@@ -1,2361 +0,0 @@
|
|
|
1
|
-
# used for /metrics endpoint on LiteLLM Proxy
|
|
2
|
-
#### What this does ####
|
|
3
|
-
# On success, log events to Prometheus
|
|
4
|
-
import sys
|
|
5
|
-
from datetime import datetime, timedelta
|
|
6
|
-
from typing import (
|
|
7
|
-
TYPE_CHECKING,
|
|
8
|
-
Any,
|
|
9
|
-
Awaitable,
|
|
10
|
-
Callable,
|
|
11
|
-
Dict,
|
|
12
|
-
List,
|
|
13
|
-
Literal,
|
|
14
|
-
Optional,
|
|
15
|
-
Tuple,
|
|
16
|
-
cast,
|
|
17
|
-
)
|
|
18
|
-
|
|
19
|
-
import litellm
|
|
20
|
-
from litellm._logging import print_verbose, verbose_logger
|
|
21
|
-
from litellm.integrations.custom_logger import CustomLogger
|
|
22
|
-
from litellm.proxy._types import LiteLLM_TeamTable, UserAPIKeyAuth
|
|
23
|
-
from litellm.types.integrations.prometheus import *
|
|
24
|
-
from litellm.types.utils import StandardLoggingPayload
|
|
25
|
-
from litellm.utils import get_end_user_id_for_cost_tracking
|
|
26
|
-
|
|
27
|
-
if TYPE_CHECKING:
|
|
28
|
-
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
|
29
|
-
else:
|
|
30
|
-
AsyncIOScheduler = Any
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class PrometheusLogger(CustomLogger):
|
|
34
|
-
# Class variables or attributes
|
|
35
|
-
def __init__(
|
|
36
|
-
self,
|
|
37
|
-
**kwargs,
|
|
38
|
-
):
|
|
39
|
-
try:
|
|
40
|
-
from prometheus_client import Counter, Gauge, Histogram
|
|
41
|
-
|
|
42
|
-
from litellm.proxy.proxy_server import CommonProxyErrors, premium_user
|
|
43
|
-
|
|
44
|
-
# Always initialize label_filters, even for non-premium users
|
|
45
|
-
self.label_filters = self._parse_prometheus_config()
|
|
46
|
-
|
|
47
|
-
if premium_user is not True:
|
|
48
|
-
verbose_logger.warning(
|
|
49
|
-
f"🚨🚨🚨 Prometheus Metrics is on LiteLLM Enterprise\n🚨 {CommonProxyErrors.not_premium_user.value}"
|
|
50
|
-
)
|
|
51
|
-
self.litellm_not_a_premium_user_metric = Counter(
|
|
52
|
-
name="litellm_not_a_premium_user_metric",
|
|
53
|
-
documentation=f"🚨🚨🚨 Prometheus Metrics is on LiteLLM Enterprise. 🚨 {CommonProxyErrors.not_premium_user.value}",
|
|
54
|
-
)
|
|
55
|
-
return
|
|
56
|
-
|
|
57
|
-
# Create metric factory functions
|
|
58
|
-
self._counter_factory = self._create_metric_factory(Counter)
|
|
59
|
-
self._gauge_factory = self._create_metric_factory(Gauge)
|
|
60
|
-
self._histogram_factory = self._create_metric_factory(Histogram)
|
|
61
|
-
|
|
62
|
-
self.litellm_proxy_failed_requests_metric = self._counter_factory(
|
|
63
|
-
name="litellm_proxy_failed_requests_metric",
|
|
64
|
-
documentation="Total number of failed responses from proxy - the client did not get a success response from litellm proxy",
|
|
65
|
-
labelnames=self.get_labels_for_metric(
|
|
66
|
-
"litellm_proxy_failed_requests_metric"
|
|
67
|
-
),
|
|
68
|
-
)
|
|
69
|
-
|
|
70
|
-
self.litellm_proxy_total_requests_metric = self._counter_factory(
|
|
71
|
-
name="litellm_proxy_total_requests_metric",
|
|
72
|
-
documentation="Total number of requests made to the proxy server - track number of client side requests",
|
|
73
|
-
labelnames=self.get_labels_for_metric(
|
|
74
|
-
"litellm_proxy_total_requests_metric"
|
|
75
|
-
),
|
|
76
|
-
)
|
|
77
|
-
|
|
78
|
-
# request latency metrics
|
|
79
|
-
self.litellm_request_total_latency_metric = self._histogram_factory(
|
|
80
|
-
"litellm_request_total_latency_metric",
|
|
81
|
-
"Total latency (seconds) for a request to LiteLLM",
|
|
82
|
-
labelnames=self.get_labels_for_metric(
|
|
83
|
-
"litellm_request_total_latency_metric"
|
|
84
|
-
),
|
|
85
|
-
buckets=LATENCY_BUCKETS,
|
|
86
|
-
)
|
|
87
|
-
|
|
88
|
-
self.litellm_llm_api_latency_metric = self._histogram_factory(
|
|
89
|
-
"litellm_llm_api_latency_metric",
|
|
90
|
-
"Total latency (seconds) for a models LLM API call",
|
|
91
|
-
labelnames=self.get_labels_for_metric("litellm_llm_api_latency_metric"),
|
|
92
|
-
buckets=LATENCY_BUCKETS,
|
|
93
|
-
)
|
|
94
|
-
|
|
95
|
-
self.litellm_llm_api_time_to_first_token_metric = self._histogram_factory(
|
|
96
|
-
"litellm_llm_api_time_to_first_token_metric",
|
|
97
|
-
"Time to first token for a models LLM API call",
|
|
98
|
-
# labelnames=[
|
|
99
|
-
# "model",
|
|
100
|
-
# "hashed_api_key",
|
|
101
|
-
# "api_key_alias",
|
|
102
|
-
# "team",
|
|
103
|
-
# "team_alias",
|
|
104
|
-
# ],
|
|
105
|
-
labelnames=self.get_labels_for_metric(
|
|
106
|
-
"litellm_llm_api_time_to_first_token_metric"
|
|
107
|
-
),
|
|
108
|
-
buckets=LATENCY_BUCKETS,
|
|
109
|
-
)
|
|
110
|
-
|
|
111
|
-
# Counter for spend
|
|
112
|
-
self.litellm_spend_metric = self._counter_factory(
|
|
113
|
-
"litellm_spend_metric",
|
|
114
|
-
"Total spend on LLM requests",
|
|
115
|
-
labelnames=self.get_labels_for_metric("litellm_spend_metric"),
|
|
116
|
-
)
|
|
117
|
-
|
|
118
|
-
# Counter for total_output_tokens
|
|
119
|
-
self.litellm_tokens_metric = self._counter_factory(
|
|
120
|
-
"litellm_total_tokens_metric",
|
|
121
|
-
"Total number of input + output tokens from LLM requests",
|
|
122
|
-
labelnames=self.get_labels_for_metric("litellm_total_tokens_metric"),
|
|
123
|
-
)
|
|
124
|
-
|
|
125
|
-
self.litellm_input_tokens_metric = self._counter_factory(
|
|
126
|
-
"litellm_input_tokens_metric",
|
|
127
|
-
"Total number of input tokens from LLM requests",
|
|
128
|
-
labelnames=self.get_labels_for_metric("litellm_input_tokens_metric"),
|
|
129
|
-
)
|
|
130
|
-
|
|
131
|
-
self.litellm_output_tokens_metric = self._counter_factory(
|
|
132
|
-
"litellm_output_tokens_metric",
|
|
133
|
-
"Total number of output tokens from LLM requests",
|
|
134
|
-
labelnames=self.get_labels_for_metric("litellm_output_tokens_metric"),
|
|
135
|
-
)
|
|
136
|
-
|
|
137
|
-
# Remaining Budget for Team
|
|
138
|
-
self.litellm_remaining_team_budget_metric = self._gauge_factory(
|
|
139
|
-
"litellm_remaining_team_budget_metric",
|
|
140
|
-
"Remaining budget for team",
|
|
141
|
-
labelnames=self.get_labels_for_metric(
|
|
142
|
-
"litellm_remaining_team_budget_metric"
|
|
143
|
-
),
|
|
144
|
-
)
|
|
145
|
-
|
|
146
|
-
# Max Budget for Team
|
|
147
|
-
self.litellm_team_max_budget_metric = self._gauge_factory(
|
|
148
|
-
"litellm_team_max_budget_metric",
|
|
149
|
-
"Maximum budget set for team",
|
|
150
|
-
labelnames=self.get_labels_for_metric("litellm_team_max_budget_metric"),
|
|
151
|
-
)
|
|
152
|
-
|
|
153
|
-
# Team Budget Reset At
|
|
154
|
-
self.litellm_team_budget_remaining_hours_metric = self._gauge_factory(
|
|
155
|
-
"litellm_team_budget_remaining_hours_metric",
|
|
156
|
-
"Remaining days for team budget to be reset",
|
|
157
|
-
labelnames=self.get_labels_for_metric(
|
|
158
|
-
"litellm_team_budget_remaining_hours_metric"
|
|
159
|
-
),
|
|
160
|
-
)
|
|
161
|
-
|
|
162
|
-
# Remaining Budget for API Key
|
|
163
|
-
self.litellm_remaining_api_key_budget_metric = self._gauge_factory(
|
|
164
|
-
"litellm_remaining_api_key_budget_metric",
|
|
165
|
-
"Remaining budget for api key",
|
|
166
|
-
labelnames=self.get_labels_for_metric(
|
|
167
|
-
"litellm_remaining_api_key_budget_metric"
|
|
168
|
-
),
|
|
169
|
-
)
|
|
170
|
-
|
|
171
|
-
# Max Budget for API Key
|
|
172
|
-
self.litellm_api_key_max_budget_metric = self._gauge_factory(
|
|
173
|
-
"litellm_api_key_max_budget_metric",
|
|
174
|
-
"Maximum budget set for api key",
|
|
175
|
-
labelnames=self.get_labels_for_metric(
|
|
176
|
-
"litellm_api_key_max_budget_metric"
|
|
177
|
-
),
|
|
178
|
-
)
|
|
179
|
-
|
|
180
|
-
self.litellm_api_key_budget_remaining_hours_metric = self._gauge_factory(
|
|
181
|
-
"litellm_api_key_budget_remaining_hours_metric",
|
|
182
|
-
"Remaining hours for api key budget to be reset",
|
|
183
|
-
labelnames=self.get_labels_for_metric(
|
|
184
|
-
"litellm_api_key_budget_remaining_hours_metric"
|
|
185
|
-
),
|
|
186
|
-
)
|
|
187
|
-
|
|
188
|
-
########################################
|
|
189
|
-
# LiteLLM Virtual API KEY metrics
|
|
190
|
-
########################################
|
|
191
|
-
# Remaining MODEL RPM limit for API Key
|
|
192
|
-
self.litellm_remaining_api_key_requests_for_model = self._gauge_factory(
|
|
193
|
-
"litellm_remaining_api_key_requests_for_model",
|
|
194
|
-
"Remaining Requests API Key can make for model (model based rpm limit on key)",
|
|
195
|
-
labelnames=["hashed_api_key", "api_key_alias", "model"],
|
|
196
|
-
)
|
|
197
|
-
|
|
198
|
-
# Remaining MODEL TPM limit for API Key
|
|
199
|
-
self.litellm_remaining_api_key_tokens_for_model = self._gauge_factory(
|
|
200
|
-
"litellm_remaining_api_key_tokens_for_model",
|
|
201
|
-
"Remaining Tokens API Key can make for model (model based tpm limit on key)",
|
|
202
|
-
labelnames=["hashed_api_key", "api_key_alias", "model"],
|
|
203
|
-
)
|
|
204
|
-
|
|
205
|
-
########################################
|
|
206
|
-
# LLM API Deployment Metrics / analytics
|
|
207
|
-
########################################
|
|
208
|
-
|
|
209
|
-
# Remaining Rate Limit for model
|
|
210
|
-
self.litellm_remaining_requests_metric = self._gauge_factory(
|
|
211
|
-
"litellm_remaining_requests",
|
|
212
|
-
"LLM Deployment Analytics - remaining requests for model, returned from LLM API Provider",
|
|
213
|
-
labelnames=self.get_labels_for_metric(
|
|
214
|
-
"litellm_remaining_requests_metric"
|
|
215
|
-
),
|
|
216
|
-
)
|
|
217
|
-
|
|
218
|
-
self.litellm_remaining_tokens_metric = self._gauge_factory(
|
|
219
|
-
"litellm_remaining_tokens",
|
|
220
|
-
"remaining tokens for model, returned from LLM API Provider",
|
|
221
|
-
labelnames=self.get_labels_for_metric(
|
|
222
|
-
"litellm_remaining_tokens_metric"
|
|
223
|
-
),
|
|
224
|
-
)
|
|
225
|
-
|
|
226
|
-
self.litellm_overhead_latency_metric = self._histogram_factory(
|
|
227
|
-
"litellm_overhead_latency_metric",
|
|
228
|
-
"Latency overhead (milliseconds) added by LiteLLM processing",
|
|
229
|
-
labelnames=self.get_labels_for_metric(
|
|
230
|
-
"litellm_overhead_latency_metric"
|
|
231
|
-
),
|
|
232
|
-
buckets=LATENCY_BUCKETS,
|
|
233
|
-
)
|
|
234
|
-
# llm api provider budget metrics
|
|
235
|
-
self.litellm_provider_remaining_budget_metric = self._gauge_factory(
|
|
236
|
-
"litellm_provider_remaining_budget_metric",
|
|
237
|
-
"Remaining budget for provider - used when you set provider budget limits",
|
|
238
|
-
labelnames=["api_provider"],
|
|
239
|
-
)
|
|
240
|
-
|
|
241
|
-
# Metric for deployment state
|
|
242
|
-
self.litellm_deployment_state = self._gauge_factory(
|
|
243
|
-
"litellm_deployment_state",
|
|
244
|
-
"LLM Deployment Analytics - The state of the deployment: 0 = healthy, 1 = partial outage, 2 = complete outage",
|
|
245
|
-
labelnames=self.get_labels_for_metric("litellm_deployment_state"),
|
|
246
|
-
)
|
|
247
|
-
|
|
248
|
-
self.litellm_deployment_cooled_down = self._counter_factory(
|
|
249
|
-
"litellm_deployment_cooled_down",
|
|
250
|
-
"LLM Deployment Analytics - Number of times a deployment has been cooled down by LiteLLM load balancing logic. exception_status is the status of the exception that caused the deployment to be cooled down",
|
|
251
|
-
# labelnames=_logged_llm_labels + [EXCEPTION_STATUS],
|
|
252
|
-
labelnames=self.get_labels_for_metric("litellm_deployment_cooled_down"),
|
|
253
|
-
)
|
|
254
|
-
|
|
255
|
-
self.litellm_deployment_success_responses = self._counter_factory(
|
|
256
|
-
name="litellm_deployment_success_responses",
|
|
257
|
-
documentation="LLM Deployment Analytics - Total number of successful LLM API calls via litellm",
|
|
258
|
-
labelnames=self.get_labels_for_metric(
|
|
259
|
-
"litellm_deployment_success_responses"
|
|
260
|
-
),
|
|
261
|
-
)
|
|
262
|
-
self.litellm_deployment_failure_responses = self._counter_factory(
|
|
263
|
-
name="litellm_deployment_failure_responses",
|
|
264
|
-
documentation="LLM Deployment Analytics - Total number of failed LLM API calls for a specific LLM deploymeny. exception_status is the status of the exception from the llm api",
|
|
265
|
-
labelnames=self.get_labels_for_metric(
|
|
266
|
-
"litellm_deployment_failure_responses"
|
|
267
|
-
),
|
|
268
|
-
)
|
|
269
|
-
|
|
270
|
-
self.litellm_deployment_total_requests = self._counter_factory(
|
|
271
|
-
name="litellm_deployment_total_requests",
|
|
272
|
-
documentation="LLM Deployment Analytics - Total number of LLM API calls via litellm - success + failure",
|
|
273
|
-
labelnames=self.get_labels_for_metric(
|
|
274
|
-
"litellm_deployment_total_requests"
|
|
275
|
-
),
|
|
276
|
-
)
|
|
277
|
-
|
|
278
|
-
# Deployment Latency tracking
|
|
279
|
-
self.litellm_deployment_latency_per_output_token = self._histogram_factory(
|
|
280
|
-
name="litellm_deployment_latency_per_output_token",
|
|
281
|
-
documentation="LLM Deployment Analytics - Latency per output token",
|
|
282
|
-
labelnames=self.get_labels_for_metric(
|
|
283
|
-
"litellm_deployment_latency_per_output_token"
|
|
284
|
-
),
|
|
285
|
-
)
|
|
286
|
-
|
|
287
|
-
self.litellm_deployment_successful_fallbacks = self._counter_factory(
|
|
288
|
-
"litellm_deployment_successful_fallbacks",
|
|
289
|
-
"LLM Deployment Analytics - Number of successful fallback requests from primary model -> fallback model",
|
|
290
|
-
self.get_labels_for_metric("litellm_deployment_successful_fallbacks"),
|
|
291
|
-
)
|
|
292
|
-
|
|
293
|
-
self.litellm_deployment_failed_fallbacks = self._counter_factory(
|
|
294
|
-
"litellm_deployment_failed_fallbacks",
|
|
295
|
-
"LLM Deployment Analytics - Number of failed fallback requests from primary model -> fallback model",
|
|
296
|
-
self.get_labels_for_metric("litellm_deployment_failed_fallbacks"),
|
|
297
|
-
)
|
|
298
|
-
|
|
299
|
-
self.litellm_llm_api_failed_requests_metric = self._counter_factory(
|
|
300
|
-
name="litellm_llm_api_failed_requests_metric",
|
|
301
|
-
documentation="deprecated - use litellm_proxy_failed_requests_metric",
|
|
302
|
-
labelnames=[
|
|
303
|
-
"end_user",
|
|
304
|
-
"hashed_api_key",
|
|
305
|
-
"api_key_alias",
|
|
306
|
-
"model",
|
|
307
|
-
"team",
|
|
308
|
-
"team_alias",
|
|
309
|
-
"user",
|
|
310
|
-
],
|
|
311
|
-
)
|
|
312
|
-
|
|
313
|
-
self.litellm_requests_metric = self._counter_factory(
|
|
314
|
-
name="litellm_requests_metric",
|
|
315
|
-
documentation="deprecated - use litellm_proxy_total_requests_metric. Total number of LLM calls to litellm - track total per API Key, team, user",
|
|
316
|
-
labelnames=self.get_labels_for_metric("litellm_requests_metric"),
|
|
317
|
-
)
|
|
318
|
-
|
|
319
|
-
except Exception as e:
|
|
320
|
-
print_verbose(f"Got exception on init prometheus client {str(e)}")
|
|
321
|
-
raise e
|
|
322
|
-
|
|
323
|
-
def _parse_prometheus_config(self) -> Dict[str, List[str]]:
|
|
324
|
-
"""Parse prometheus metrics configuration for label filtering and enabled metrics"""
|
|
325
|
-
import litellm
|
|
326
|
-
from litellm.types.integrations.prometheus import PrometheusMetricsConfig
|
|
327
|
-
|
|
328
|
-
config = litellm.prometheus_metrics_config
|
|
329
|
-
|
|
330
|
-
# If no config is provided, return empty dict (no filtering)
|
|
331
|
-
if not config:
|
|
332
|
-
return {}
|
|
333
|
-
|
|
334
|
-
verbose_logger.debug(f"prometheus config: {config}")
|
|
335
|
-
|
|
336
|
-
# Parse and validate all configuration groups
|
|
337
|
-
parsed_configs = []
|
|
338
|
-
self.enabled_metrics = set()
|
|
339
|
-
|
|
340
|
-
for group_config in config:
|
|
341
|
-
# Validate configuration using Pydantic
|
|
342
|
-
if isinstance(group_config, dict):
|
|
343
|
-
parsed_config = PrometheusMetricsConfig(**group_config)
|
|
344
|
-
else:
|
|
345
|
-
parsed_config = group_config
|
|
346
|
-
|
|
347
|
-
parsed_configs.append(parsed_config)
|
|
348
|
-
self.enabled_metrics.update(parsed_config.metrics)
|
|
349
|
-
|
|
350
|
-
# Validate all configurations
|
|
351
|
-
validation_results = self._validate_all_configurations(parsed_configs)
|
|
352
|
-
|
|
353
|
-
if validation_results.has_errors:
|
|
354
|
-
self._pretty_print_validation_errors(validation_results)
|
|
355
|
-
error_message = "Configuration validation failed:\n" + "\n".join(
|
|
356
|
-
validation_results.all_error_messages
|
|
357
|
-
)
|
|
358
|
-
raise ValueError(error_message)
|
|
359
|
-
|
|
360
|
-
# Build label filters from valid configurations
|
|
361
|
-
label_filters = self._build_label_filters(parsed_configs)
|
|
362
|
-
|
|
363
|
-
# Pretty print the processed configuration
|
|
364
|
-
self._pretty_print_prometheus_config(label_filters)
|
|
365
|
-
return label_filters
|
|
366
|
-
|
|
367
|
-
def _validate_all_configurations(self, parsed_configs: List) -> ValidationResults:
|
|
368
|
-
"""Validate all metric configurations and return collected errors"""
|
|
369
|
-
metric_errors = []
|
|
370
|
-
label_errors = []
|
|
371
|
-
|
|
372
|
-
for config in parsed_configs:
|
|
373
|
-
for metric_name in config.metrics:
|
|
374
|
-
# Validate metric name
|
|
375
|
-
metric_error = self._validate_single_metric_name(metric_name)
|
|
376
|
-
if metric_error:
|
|
377
|
-
metric_errors.append(metric_error)
|
|
378
|
-
continue # Skip label validation if metric name is invalid
|
|
379
|
-
|
|
380
|
-
# Validate labels if provided
|
|
381
|
-
if config.include_labels:
|
|
382
|
-
label_error = self._validate_single_metric_labels(
|
|
383
|
-
metric_name, config.include_labels
|
|
384
|
-
)
|
|
385
|
-
if label_error:
|
|
386
|
-
label_errors.append(label_error)
|
|
387
|
-
|
|
388
|
-
return ValidationResults(metric_errors=metric_errors, label_errors=label_errors)
|
|
389
|
-
|
|
390
|
-
def _validate_single_metric_name(
|
|
391
|
-
self, metric_name: str
|
|
392
|
-
) -> Optional[MetricValidationError]:
|
|
393
|
-
"""Validate a single metric name"""
|
|
394
|
-
from typing import get_args
|
|
395
|
-
|
|
396
|
-
if metric_name not in set(get_args(DEFINED_PROMETHEUS_METRICS)):
|
|
397
|
-
return MetricValidationError(
|
|
398
|
-
metric_name=metric_name,
|
|
399
|
-
valid_metrics=get_args(DEFINED_PROMETHEUS_METRICS),
|
|
400
|
-
)
|
|
401
|
-
return None
|
|
402
|
-
|
|
403
|
-
def _validate_single_metric_labels(
|
|
404
|
-
self, metric_name: str, labels: List[str]
|
|
405
|
-
) -> Optional[LabelValidationError]:
|
|
406
|
-
"""Validate labels for a single metric"""
|
|
407
|
-
from typing import cast
|
|
408
|
-
|
|
409
|
-
# Get valid labels for this metric from PrometheusMetricLabels
|
|
410
|
-
valid_labels = PrometheusMetricLabels.get_labels(
|
|
411
|
-
cast(DEFINED_PROMETHEUS_METRICS, metric_name)
|
|
412
|
-
)
|
|
413
|
-
|
|
414
|
-
# Find invalid labels
|
|
415
|
-
invalid_labels = [label for label in labels if label not in valid_labels]
|
|
416
|
-
|
|
417
|
-
if invalid_labels:
|
|
418
|
-
return LabelValidationError(
|
|
419
|
-
metric_name=metric_name,
|
|
420
|
-
invalid_labels=invalid_labels,
|
|
421
|
-
valid_labels=valid_labels,
|
|
422
|
-
)
|
|
423
|
-
return None
|
|
424
|
-
|
|
425
|
-
def _build_label_filters(self, parsed_configs: List) -> Dict[str, List[str]]:
|
|
426
|
-
"""Build label filters from validated configurations"""
|
|
427
|
-
label_filters = {}
|
|
428
|
-
|
|
429
|
-
for config in parsed_configs:
|
|
430
|
-
for metric_name in config.metrics:
|
|
431
|
-
if config.include_labels:
|
|
432
|
-
# Only add if metric name is valid (validation already passed)
|
|
433
|
-
if self._validate_single_metric_name(metric_name) is None:
|
|
434
|
-
label_filters[metric_name] = config.include_labels
|
|
435
|
-
|
|
436
|
-
return label_filters
|
|
437
|
-
|
|
438
|
-
def _validate_configured_metric_labels(self, metric_name: str, labels: List[str]):
|
|
439
|
-
"""
|
|
440
|
-
Ensure that all the configured labels are valid for the metric
|
|
441
|
-
|
|
442
|
-
Raises ValueError if the metric labels are invalid and pretty prints the error
|
|
443
|
-
"""
|
|
444
|
-
label_error = self._validate_single_metric_labels(metric_name, labels)
|
|
445
|
-
if label_error:
|
|
446
|
-
self._pretty_print_invalid_labels_error(
|
|
447
|
-
metric_name=label_error.metric_name,
|
|
448
|
-
invalid_labels=label_error.invalid_labels,
|
|
449
|
-
valid_labels=label_error.valid_labels,
|
|
450
|
-
)
|
|
451
|
-
raise ValueError(label_error.message)
|
|
452
|
-
|
|
453
|
-
return True
|
|
454
|
-
|
|
455
|
-
#########################################################
|
|
456
|
-
# Pretty print functions
|
|
457
|
-
#########################################################
|
|
458
|
-
|
|
459
|
-
def _pretty_print_validation_errors(
|
|
460
|
-
self, validation_results: ValidationResults
|
|
461
|
-
) -> None:
|
|
462
|
-
"""Pretty print all validation errors using rich"""
|
|
463
|
-
try:
|
|
464
|
-
from rich.console import Console
|
|
465
|
-
from rich.panel import Panel
|
|
466
|
-
from rich.table import Table
|
|
467
|
-
from rich.text import Text
|
|
468
|
-
|
|
469
|
-
console = Console()
|
|
470
|
-
|
|
471
|
-
# Create error panel title
|
|
472
|
-
title = Text("🚨🚨 Configuration Validation Errors", style="bold red")
|
|
473
|
-
|
|
474
|
-
# Print main error panel
|
|
475
|
-
console.print("\n")
|
|
476
|
-
console.print(Panel(title, border_style="red"))
|
|
477
|
-
|
|
478
|
-
# Show invalid metric names if any
|
|
479
|
-
if validation_results.metric_errors:
|
|
480
|
-
invalid_metrics = [
|
|
481
|
-
e.metric_name for e in validation_results.metric_errors
|
|
482
|
-
]
|
|
483
|
-
valid_metrics = validation_results.metric_errors[
|
|
484
|
-
0
|
|
485
|
-
].valid_metrics # All should have same valid metrics
|
|
486
|
-
|
|
487
|
-
metrics_error_text = Text(
|
|
488
|
-
f"Invalid Metric Names: {', '.join(invalid_metrics)}",
|
|
489
|
-
style="bold red",
|
|
490
|
-
)
|
|
491
|
-
console.print(Panel(metrics_error_text, border_style="red"))
|
|
492
|
-
|
|
493
|
-
metrics_table = Table(
|
|
494
|
-
title="📊 Valid Metric Names",
|
|
495
|
-
show_header=True,
|
|
496
|
-
header_style="bold green",
|
|
497
|
-
title_justify="left",
|
|
498
|
-
border_style="green",
|
|
499
|
-
)
|
|
500
|
-
metrics_table.add_column(
|
|
501
|
-
"Available Metrics", style="cyan", no_wrap=True
|
|
502
|
-
)
|
|
503
|
-
|
|
504
|
-
for metric in sorted(valid_metrics):
|
|
505
|
-
metrics_table.add_row(metric)
|
|
506
|
-
|
|
507
|
-
console.print(metrics_table)
|
|
508
|
-
|
|
509
|
-
# Show invalid labels if any
|
|
510
|
-
if validation_results.label_errors:
|
|
511
|
-
for error in validation_results.label_errors:
|
|
512
|
-
labels_error_text = Text(
|
|
513
|
-
f"Invalid Labels for '{error.metric_name}': {', '.join(error.invalid_labels)}",
|
|
514
|
-
style="bold red",
|
|
515
|
-
)
|
|
516
|
-
console.print(Panel(labels_error_text, border_style="red"))
|
|
517
|
-
|
|
518
|
-
labels_table = Table(
|
|
519
|
-
title=f"🏷️ Valid Labels for '{error.metric_name}'",
|
|
520
|
-
show_header=True,
|
|
521
|
-
header_style="bold green",
|
|
522
|
-
title_justify="left",
|
|
523
|
-
border_style="green",
|
|
524
|
-
)
|
|
525
|
-
labels_table.add_column("Valid Labels", style="cyan", no_wrap=True)
|
|
526
|
-
|
|
527
|
-
for label in sorted(error.valid_labels):
|
|
528
|
-
labels_table.add_row(label)
|
|
529
|
-
|
|
530
|
-
console.print(labels_table)
|
|
531
|
-
|
|
532
|
-
console.print("\n")
|
|
533
|
-
|
|
534
|
-
except ImportError:
|
|
535
|
-
# Fallback to simple logging if rich is not available
|
|
536
|
-
for metric_error in validation_results.metric_errors:
|
|
537
|
-
verbose_logger.error(metric_error.message)
|
|
538
|
-
for label_error in validation_results.label_errors:
|
|
539
|
-
verbose_logger.error(label_error.message)
|
|
540
|
-
|
|
541
|
-
def _pretty_print_invalid_labels_error(
|
|
542
|
-
self, metric_name: str, invalid_labels: List[str], valid_labels: List[str]
|
|
543
|
-
) -> None:
|
|
544
|
-
"""Pretty print error message for invalid labels using rich"""
|
|
545
|
-
try:
|
|
546
|
-
from rich.console import Console
|
|
547
|
-
from rich.panel import Panel
|
|
548
|
-
from rich.table import Table
|
|
549
|
-
from rich.text import Text
|
|
550
|
-
|
|
551
|
-
console = Console()
|
|
552
|
-
|
|
553
|
-
# Create error panel title
|
|
554
|
-
title = Text(
|
|
555
|
-
f"🚨🚨 Invalid Labels for Metric: '{metric_name}'\nInvalid labels: {', '.join(invalid_labels)}\nPlease specify only valid labels below",
|
|
556
|
-
style="bold red",
|
|
557
|
-
)
|
|
558
|
-
|
|
559
|
-
# Create valid labels table
|
|
560
|
-
labels_table = Table(
|
|
561
|
-
title="🏷️ Valid Labels for this Metric",
|
|
562
|
-
show_header=True,
|
|
563
|
-
header_style="bold green",
|
|
564
|
-
title_justify="left",
|
|
565
|
-
border_style="green",
|
|
566
|
-
)
|
|
567
|
-
labels_table.add_column("Valid Labels", style="cyan", no_wrap=True)
|
|
568
|
-
|
|
569
|
-
for label in sorted(valid_labels):
|
|
570
|
-
labels_table.add_row(label)
|
|
571
|
-
|
|
572
|
-
# Print everything in a nice panel
|
|
573
|
-
console.print("\n")
|
|
574
|
-
console.print(Panel(title, border_style="red"))
|
|
575
|
-
console.print(labels_table)
|
|
576
|
-
console.print("\n")
|
|
577
|
-
|
|
578
|
-
except ImportError:
|
|
579
|
-
# Fallback to simple logging if rich is not available
|
|
580
|
-
verbose_logger.error(
|
|
581
|
-
f"Invalid labels for metric '{metric_name}': {invalid_labels}. Valid labels: {sorted(valid_labels)}"
|
|
582
|
-
)
|
|
583
|
-
|
|
584
|
-
def _pretty_print_invalid_metric_error(
|
|
585
|
-
self, invalid_metric_name: str, valid_metrics: tuple
|
|
586
|
-
) -> None:
|
|
587
|
-
"""Pretty print error message for invalid metric name using rich"""
|
|
588
|
-
try:
|
|
589
|
-
from rich.console import Console
|
|
590
|
-
from rich.panel import Panel
|
|
591
|
-
from rich.table import Table
|
|
592
|
-
from rich.text import Text
|
|
593
|
-
|
|
594
|
-
console = Console()
|
|
595
|
-
|
|
596
|
-
# Create error panel title
|
|
597
|
-
title = Text(
|
|
598
|
-
f"🚨🚨 Invalid Metric Name: '{invalid_metric_name}'\nPlease specify one of the allowed metrics below",
|
|
599
|
-
style="bold red",
|
|
600
|
-
)
|
|
601
|
-
|
|
602
|
-
# Create valid metrics table
|
|
603
|
-
metrics_table = Table(
|
|
604
|
-
title="📊 Valid Metric Names",
|
|
605
|
-
show_header=True,
|
|
606
|
-
header_style="bold green",
|
|
607
|
-
title_justify="left",
|
|
608
|
-
border_style="green",
|
|
609
|
-
)
|
|
610
|
-
metrics_table.add_column("Available Metrics", style="cyan", no_wrap=True)
|
|
611
|
-
|
|
612
|
-
for metric in sorted(valid_metrics):
|
|
613
|
-
metrics_table.add_row(metric)
|
|
614
|
-
|
|
615
|
-
# Print everything in a nice panel
|
|
616
|
-
console.print("\n")
|
|
617
|
-
console.print(Panel(title, border_style="red"))
|
|
618
|
-
console.print(metrics_table)
|
|
619
|
-
console.print("\n")
|
|
620
|
-
|
|
621
|
-
except ImportError:
|
|
622
|
-
# Fallback to simple logging if rich is not available
|
|
623
|
-
verbose_logger.error(
|
|
624
|
-
f"Invalid metric name: {invalid_metric_name}. Valid metrics: {sorted(valid_metrics)}"
|
|
625
|
-
)
|
|
626
|
-
|
|
627
|
-
#########################################################
|
|
628
|
-
# End of pretty print functions
|
|
629
|
-
#########################################################
|
|
630
|
-
|
|
631
|
-
def _valid_metric_name(self, metric_name: str):
|
|
632
|
-
"""
|
|
633
|
-
Raises ValueError if the metric name is invalid and pretty prints the error
|
|
634
|
-
"""
|
|
635
|
-
error = self._validate_single_metric_name(metric_name)
|
|
636
|
-
if error:
|
|
637
|
-
self._pretty_print_invalid_metric_error(
|
|
638
|
-
invalid_metric_name=error.metric_name, valid_metrics=error.valid_metrics
|
|
639
|
-
)
|
|
640
|
-
raise ValueError(error.message)
|
|
641
|
-
|
|
642
|
-
def _pretty_print_prometheus_config(
|
|
643
|
-
self, label_filters: Dict[str, List[str]]
|
|
644
|
-
) -> None:
|
|
645
|
-
"""Pretty print the processed prometheus configuration using rich"""
|
|
646
|
-
try:
|
|
647
|
-
from rich.console import Console
|
|
648
|
-
from rich.panel import Panel
|
|
649
|
-
from rich.table import Table
|
|
650
|
-
from rich.text import Text
|
|
651
|
-
|
|
652
|
-
console = Console()
|
|
653
|
-
|
|
654
|
-
# Create main panel title
|
|
655
|
-
title = Text("Prometheus Configuration Processed", style="bold blue")
|
|
656
|
-
|
|
657
|
-
# Create enabled metrics table
|
|
658
|
-
metrics_table = Table(
|
|
659
|
-
title="📊 Enabled Metrics",
|
|
660
|
-
show_header=True,
|
|
661
|
-
header_style="bold magenta",
|
|
662
|
-
title_justify="left",
|
|
663
|
-
)
|
|
664
|
-
metrics_table.add_column("Metric Name", style="cyan", no_wrap=True)
|
|
665
|
-
|
|
666
|
-
if hasattr(self, "enabled_metrics") and self.enabled_metrics:
|
|
667
|
-
for metric in sorted(self.enabled_metrics):
|
|
668
|
-
metrics_table.add_row(metric)
|
|
669
|
-
else:
|
|
670
|
-
metrics_table.add_row(
|
|
671
|
-
"[yellow]All metrics enabled (no filter applied)[/yellow]"
|
|
672
|
-
)
|
|
673
|
-
|
|
674
|
-
# Create label filters table
|
|
675
|
-
labels_table = Table(
|
|
676
|
-
title="🏷️ Label Filters",
|
|
677
|
-
show_header=True,
|
|
678
|
-
header_style="bold green",
|
|
679
|
-
title_justify="left",
|
|
680
|
-
)
|
|
681
|
-
labels_table.add_column("Metric Name", style="cyan", no_wrap=True)
|
|
682
|
-
labels_table.add_column("Allowed Labels", style="yellow")
|
|
683
|
-
|
|
684
|
-
if label_filters:
|
|
685
|
-
for metric_name, labels in sorted(label_filters.items()):
|
|
686
|
-
labels_str = (
|
|
687
|
-
", ".join(labels)
|
|
688
|
-
if labels
|
|
689
|
-
else "[dim]No labels specified[/dim]"
|
|
690
|
-
)
|
|
691
|
-
labels_table.add_row(metric_name, labels_str)
|
|
692
|
-
else:
|
|
693
|
-
labels_table.add_row(
|
|
694
|
-
"[yellow]No label filtering applied[/yellow]",
|
|
695
|
-
"[dim]All default labels will be used[/dim]",
|
|
696
|
-
)
|
|
697
|
-
|
|
698
|
-
# Print everything in a nice panel
|
|
699
|
-
console.print("\n")
|
|
700
|
-
console.print(Panel(title, border_style="blue"))
|
|
701
|
-
console.print(metrics_table)
|
|
702
|
-
console.print(labels_table)
|
|
703
|
-
console.print("\n")
|
|
704
|
-
|
|
705
|
-
except ImportError:
|
|
706
|
-
# Fallback to simple logging if rich is not available
|
|
707
|
-
verbose_logger.info(
|
|
708
|
-
f"Enabled metrics: {sorted(self.enabled_metrics) if hasattr(self, 'enabled_metrics') else 'All metrics'}"
|
|
709
|
-
)
|
|
710
|
-
verbose_logger.info(f"Label filters: {label_filters}")
|
|
711
|
-
|
|
712
|
-
def _is_metric_enabled(self, metric_name: str) -> bool:
|
|
713
|
-
"""Check if a metric is enabled based on configuration"""
|
|
714
|
-
# If no specific configuration is provided, enable all metrics (default behavior)
|
|
715
|
-
if not hasattr(self, "enabled_metrics"):
|
|
716
|
-
return True
|
|
717
|
-
|
|
718
|
-
# If enabled_metrics is empty, enable all metrics
|
|
719
|
-
if not self.enabled_metrics:
|
|
720
|
-
return True
|
|
721
|
-
|
|
722
|
-
return metric_name in self.enabled_metrics
|
|
723
|
-
|
|
724
|
-
def _create_metric_factory(self, metric_class):
|
|
725
|
-
"""Create a factory function that returns either a real metric or a no-op metric"""
|
|
726
|
-
|
|
727
|
-
def factory(*args, **kwargs):
|
|
728
|
-
# Extract metric name from the first argument or 'name' keyword argument
|
|
729
|
-
metric_name = args[0] if args else kwargs.get("name", "")
|
|
730
|
-
|
|
731
|
-
if self._is_metric_enabled(metric_name):
|
|
732
|
-
return metric_class(*args, **kwargs)
|
|
733
|
-
else:
|
|
734
|
-
return NoOpMetric()
|
|
735
|
-
|
|
736
|
-
return factory
|
|
737
|
-
|
|
738
|
-
def get_labels_for_metric(
|
|
739
|
-
self, metric_name: DEFINED_PROMETHEUS_METRICS
|
|
740
|
-
) -> List[str]:
|
|
741
|
-
"""
|
|
742
|
-
Get the labels for a metric, filtered if configured
|
|
743
|
-
"""
|
|
744
|
-
# Get default labels for this metric from PrometheusMetricLabels
|
|
745
|
-
default_labels = PrometheusMetricLabels.get_labels(metric_name)
|
|
746
|
-
|
|
747
|
-
# If no label filtering is configured for this metric, use default labels
|
|
748
|
-
if metric_name not in self.label_filters:
|
|
749
|
-
return default_labels
|
|
750
|
-
|
|
751
|
-
# Get configured labels for this metric
|
|
752
|
-
configured_labels = self.label_filters[metric_name]
|
|
753
|
-
|
|
754
|
-
# Return intersection of configured and default labels to ensure we only use valid labels
|
|
755
|
-
filtered_labels = [
|
|
756
|
-
label for label in default_labels if label in configured_labels
|
|
757
|
-
]
|
|
758
|
-
|
|
759
|
-
return filtered_labels
|
|
760
|
-
|
|
761
|
-
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
|
762
|
-
# Define prometheus client
|
|
763
|
-
from litellm.types.utils import StandardLoggingPayload
|
|
764
|
-
|
|
765
|
-
verbose_logger.debug(
|
|
766
|
-
f"prometheus Logging - Enters success logging function for kwargs {kwargs}"
|
|
767
|
-
)
|
|
768
|
-
|
|
769
|
-
# unpack kwargs
|
|
770
|
-
standard_logging_payload: Optional[StandardLoggingPayload] = kwargs.get(
|
|
771
|
-
"standard_logging_object"
|
|
772
|
-
)
|
|
773
|
-
|
|
774
|
-
if standard_logging_payload is None or not isinstance(
|
|
775
|
-
standard_logging_payload, dict
|
|
776
|
-
):
|
|
777
|
-
raise ValueError(
|
|
778
|
-
f"standard_logging_object is required, got={standard_logging_payload}"
|
|
779
|
-
)
|
|
780
|
-
|
|
781
|
-
model = kwargs.get("model", "")
|
|
782
|
-
litellm_params = kwargs.get("litellm_params", {}) or {}
|
|
783
|
-
_metadata = litellm_params.get("metadata", {})
|
|
784
|
-
end_user_id = get_end_user_id_for_cost_tracking(
|
|
785
|
-
litellm_params, service_type="prometheus"
|
|
786
|
-
)
|
|
787
|
-
user_id = standard_logging_payload["metadata"]["user_api_key_user_id"]
|
|
788
|
-
user_api_key = standard_logging_payload["metadata"]["user_api_key_hash"]
|
|
789
|
-
user_api_key_alias = standard_logging_payload["metadata"]["user_api_key_alias"]
|
|
790
|
-
user_api_team = standard_logging_payload["metadata"]["user_api_key_team_id"]
|
|
791
|
-
user_api_team_alias = standard_logging_payload["metadata"][
|
|
792
|
-
"user_api_key_team_alias"
|
|
793
|
-
]
|
|
794
|
-
output_tokens = standard_logging_payload["completion_tokens"]
|
|
795
|
-
tokens_used = standard_logging_payload["total_tokens"]
|
|
796
|
-
response_cost = standard_logging_payload["response_cost"]
|
|
797
|
-
_requester_metadata = standard_logging_payload["metadata"].get(
|
|
798
|
-
"requester_metadata"
|
|
799
|
-
)
|
|
800
|
-
if standard_logging_payload is not None and isinstance(
|
|
801
|
-
standard_logging_payload, dict
|
|
802
|
-
):
|
|
803
|
-
_tags = standard_logging_payload["request_tags"]
|
|
804
|
-
else:
|
|
805
|
-
_tags = []
|
|
806
|
-
|
|
807
|
-
print_verbose(
|
|
808
|
-
f"inside track_prometheus_metrics, model {model}, response_cost {response_cost}, tokens_used {tokens_used}, end_user_id {end_user_id}, user_api_key {user_api_key}"
|
|
809
|
-
)
|
|
810
|
-
|
|
811
|
-
enum_values = UserAPIKeyLabelValues(
|
|
812
|
-
end_user=end_user_id,
|
|
813
|
-
hashed_api_key=user_api_key,
|
|
814
|
-
api_key_alias=user_api_key_alias,
|
|
815
|
-
requested_model=standard_logging_payload["model_group"],
|
|
816
|
-
model_group=standard_logging_payload["model_group"],
|
|
817
|
-
team=user_api_team,
|
|
818
|
-
team_alias=user_api_team_alias,
|
|
819
|
-
user=user_id,
|
|
820
|
-
user_email=standard_logging_payload["metadata"]["user_api_key_user_email"],
|
|
821
|
-
status_code="200",
|
|
822
|
-
model=model,
|
|
823
|
-
litellm_model_name=model,
|
|
824
|
-
tags=_tags,
|
|
825
|
-
model_id=standard_logging_payload["model_id"],
|
|
826
|
-
api_base=standard_logging_payload["api_base"],
|
|
827
|
-
api_provider=standard_logging_payload["custom_llm_provider"],
|
|
828
|
-
exception_status=None,
|
|
829
|
-
exception_class=None,
|
|
830
|
-
custom_metadata_labels=get_custom_labels_from_metadata(
|
|
831
|
-
metadata=standard_logging_payload["metadata"].get("requester_metadata")
|
|
832
|
-
or {}
|
|
833
|
-
),
|
|
834
|
-
route=standard_logging_payload["metadata"].get(
|
|
835
|
-
"user_api_key_request_route"
|
|
836
|
-
),
|
|
837
|
-
)
|
|
838
|
-
|
|
839
|
-
if (
|
|
840
|
-
user_api_key is not None
|
|
841
|
-
and isinstance(user_api_key, str)
|
|
842
|
-
and user_api_key.startswith("sk-")
|
|
843
|
-
):
|
|
844
|
-
from litellm.proxy.utils import hash_token
|
|
845
|
-
|
|
846
|
-
user_api_key = hash_token(user_api_key)
|
|
847
|
-
|
|
848
|
-
# increment total LLM requests and spend metric
|
|
849
|
-
self._increment_top_level_request_and_spend_metrics(
|
|
850
|
-
end_user_id=end_user_id,
|
|
851
|
-
user_api_key=user_api_key,
|
|
852
|
-
user_api_key_alias=user_api_key_alias,
|
|
853
|
-
model=model,
|
|
854
|
-
user_api_team=user_api_team,
|
|
855
|
-
user_api_team_alias=user_api_team_alias,
|
|
856
|
-
user_id=user_id,
|
|
857
|
-
response_cost=response_cost,
|
|
858
|
-
enum_values=enum_values,
|
|
859
|
-
)
|
|
860
|
-
|
|
861
|
-
# input, output, total token metrics
|
|
862
|
-
self._increment_token_metrics(
|
|
863
|
-
# why type ignore below?
|
|
864
|
-
# 1. We just checked if isinstance(standard_logging_payload, dict). Pyright complains.
|
|
865
|
-
# 2. Pyright does not allow us to run isinstance(standard_logging_payload, StandardLoggingPayload) <- this would be ideal
|
|
866
|
-
standard_logging_payload=standard_logging_payload, # type: ignore
|
|
867
|
-
end_user_id=end_user_id,
|
|
868
|
-
user_api_key=user_api_key,
|
|
869
|
-
user_api_key_alias=user_api_key_alias,
|
|
870
|
-
model=model,
|
|
871
|
-
user_api_team=user_api_team,
|
|
872
|
-
user_api_team_alias=user_api_team_alias,
|
|
873
|
-
user_id=user_id,
|
|
874
|
-
enum_values=enum_values,
|
|
875
|
-
)
|
|
876
|
-
|
|
877
|
-
# remaining budget metrics
|
|
878
|
-
await self._increment_remaining_budget_metrics(
|
|
879
|
-
user_api_team=user_api_team,
|
|
880
|
-
user_api_team_alias=user_api_team_alias,
|
|
881
|
-
user_api_key=user_api_key,
|
|
882
|
-
user_api_key_alias=user_api_key_alias,
|
|
883
|
-
litellm_params=litellm_params,
|
|
884
|
-
response_cost=response_cost,
|
|
885
|
-
)
|
|
886
|
-
|
|
887
|
-
# set proxy virtual key rpm/tpm metrics
|
|
888
|
-
self._set_virtual_key_rate_limit_metrics(
|
|
889
|
-
user_api_key=user_api_key,
|
|
890
|
-
user_api_key_alias=user_api_key_alias,
|
|
891
|
-
kwargs=kwargs,
|
|
892
|
-
metadata=_metadata,
|
|
893
|
-
)
|
|
894
|
-
|
|
895
|
-
# set latency metrics
|
|
896
|
-
self._set_latency_metrics(
|
|
897
|
-
kwargs=kwargs,
|
|
898
|
-
model=model,
|
|
899
|
-
user_api_key=user_api_key,
|
|
900
|
-
user_api_key_alias=user_api_key_alias,
|
|
901
|
-
user_api_team=user_api_team,
|
|
902
|
-
user_api_team_alias=user_api_team_alias,
|
|
903
|
-
# why type ignore below?
|
|
904
|
-
# 1. We just checked if isinstance(standard_logging_payload, dict). Pyright complains.
|
|
905
|
-
# 2. Pyright does not allow us to run isinstance(standard_logging_payload, StandardLoggingPayload) <- this would be ideal
|
|
906
|
-
enum_values=enum_values,
|
|
907
|
-
)
|
|
908
|
-
|
|
909
|
-
# set x-ratelimit headers
|
|
910
|
-
self.set_llm_deployment_success_metrics(
|
|
911
|
-
kwargs, start_time, end_time, enum_values, output_tokens
|
|
912
|
-
)
|
|
913
|
-
|
|
914
|
-
if (
|
|
915
|
-
standard_logging_payload["stream"] is True
|
|
916
|
-
): # log successful streaming requests from logging event hook.
|
|
917
|
-
_labels = prometheus_label_factory(
|
|
918
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
919
|
-
metric_name="litellm_proxy_total_requests_metric"
|
|
920
|
-
),
|
|
921
|
-
enum_values=enum_values,
|
|
922
|
-
)
|
|
923
|
-
self.litellm_proxy_total_requests_metric.labels(**_labels).inc()
|
|
924
|
-
|
|
925
|
-
def _increment_token_metrics(
|
|
926
|
-
self,
|
|
927
|
-
standard_logging_payload: StandardLoggingPayload,
|
|
928
|
-
end_user_id: Optional[str],
|
|
929
|
-
user_api_key: Optional[str],
|
|
930
|
-
user_api_key_alias: Optional[str],
|
|
931
|
-
model: Optional[str],
|
|
932
|
-
user_api_team: Optional[str],
|
|
933
|
-
user_api_team_alias: Optional[str],
|
|
934
|
-
user_id: Optional[str],
|
|
935
|
-
enum_values: UserAPIKeyLabelValues,
|
|
936
|
-
):
|
|
937
|
-
verbose_logger.debug("prometheus Logging - Enters token metrics function")
|
|
938
|
-
# token metrics
|
|
939
|
-
|
|
940
|
-
if standard_logging_payload is not None and isinstance(
|
|
941
|
-
standard_logging_payload, dict
|
|
942
|
-
):
|
|
943
|
-
_tags = standard_logging_payload["request_tags"]
|
|
944
|
-
|
|
945
|
-
_labels = prometheus_label_factory(
|
|
946
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
947
|
-
metric_name="litellm_proxy_total_requests_metric"
|
|
948
|
-
),
|
|
949
|
-
enum_values=enum_values,
|
|
950
|
-
)
|
|
951
|
-
|
|
952
|
-
_labels = prometheus_label_factory(
|
|
953
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
954
|
-
metric_name="litellm_total_tokens_metric"
|
|
955
|
-
),
|
|
956
|
-
enum_values=enum_values,
|
|
957
|
-
)
|
|
958
|
-
self.litellm_tokens_metric.labels(**_labels).inc(
|
|
959
|
-
standard_logging_payload["total_tokens"]
|
|
960
|
-
)
|
|
961
|
-
|
|
962
|
-
_labels = prometheus_label_factory(
|
|
963
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
964
|
-
metric_name="litellm_input_tokens_metric"
|
|
965
|
-
),
|
|
966
|
-
enum_values=enum_values,
|
|
967
|
-
)
|
|
968
|
-
self.litellm_input_tokens_metric.labels(**_labels).inc(
|
|
969
|
-
standard_logging_payload["prompt_tokens"]
|
|
970
|
-
)
|
|
971
|
-
|
|
972
|
-
_labels = prometheus_label_factory(
|
|
973
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
974
|
-
metric_name="litellm_output_tokens_metric"
|
|
975
|
-
),
|
|
976
|
-
enum_values=enum_values,
|
|
977
|
-
)
|
|
978
|
-
|
|
979
|
-
self.litellm_output_tokens_metric.labels(**_labels).inc(
|
|
980
|
-
standard_logging_payload["completion_tokens"]
|
|
981
|
-
)
|
|
982
|
-
|
|
983
|
-
async def _increment_remaining_budget_metrics(
|
|
984
|
-
self,
|
|
985
|
-
user_api_team: Optional[str],
|
|
986
|
-
user_api_team_alias: Optional[str],
|
|
987
|
-
user_api_key: Optional[str],
|
|
988
|
-
user_api_key_alias: Optional[str],
|
|
989
|
-
litellm_params: dict,
|
|
990
|
-
response_cost: float,
|
|
991
|
-
):
|
|
992
|
-
_team_spend = litellm_params.get("metadata", {}).get(
|
|
993
|
-
"user_api_key_team_spend", None
|
|
994
|
-
)
|
|
995
|
-
_team_max_budget = litellm_params.get("metadata", {}).get(
|
|
996
|
-
"user_api_key_team_max_budget", None
|
|
997
|
-
)
|
|
998
|
-
|
|
999
|
-
_api_key_spend = litellm_params.get("metadata", {}).get(
|
|
1000
|
-
"user_api_key_spend", None
|
|
1001
|
-
)
|
|
1002
|
-
_api_key_max_budget = litellm_params.get("metadata", {}).get(
|
|
1003
|
-
"user_api_key_max_budget", None
|
|
1004
|
-
)
|
|
1005
|
-
await self._set_api_key_budget_metrics_after_api_request(
|
|
1006
|
-
user_api_key=user_api_key,
|
|
1007
|
-
user_api_key_alias=user_api_key_alias,
|
|
1008
|
-
response_cost=response_cost,
|
|
1009
|
-
key_max_budget=_api_key_max_budget,
|
|
1010
|
-
key_spend=_api_key_spend,
|
|
1011
|
-
)
|
|
1012
|
-
|
|
1013
|
-
await self._set_team_budget_metrics_after_api_request(
|
|
1014
|
-
user_api_team=user_api_team,
|
|
1015
|
-
user_api_team_alias=user_api_team_alias,
|
|
1016
|
-
team_spend=_team_spend,
|
|
1017
|
-
team_max_budget=_team_max_budget,
|
|
1018
|
-
response_cost=response_cost,
|
|
1019
|
-
)
|
|
1020
|
-
|
|
1021
|
-
def _increment_top_level_request_and_spend_metrics(
|
|
1022
|
-
self,
|
|
1023
|
-
end_user_id: Optional[str],
|
|
1024
|
-
user_api_key: Optional[str],
|
|
1025
|
-
user_api_key_alias: Optional[str],
|
|
1026
|
-
model: Optional[str],
|
|
1027
|
-
user_api_team: Optional[str],
|
|
1028
|
-
user_api_team_alias: Optional[str],
|
|
1029
|
-
user_id: Optional[str],
|
|
1030
|
-
response_cost: float,
|
|
1031
|
-
enum_values: UserAPIKeyLabelValues,
|
|
1032
|
-
):
|
|
1033
|
-
_labels = prometheus_label_factory(
|
|
1034
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
1035
|
-
metric_name="litellm_requests_metric"
|
|
1036
|
-
),
|
|
1037
|
-
enum_values=enum_values,
|
|
1038
|
-
)
|
|
1039
|
-
|
|
1040
|
-
self.litellm_requests_metric.labels(**_labels).inc()
|
|
1041
|
-
|
|
1042
|
-
_labels = prometheus_label_factory(
|
|
1043
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
1044
|
-
metric_name="litellm_spend_metric"
|
|
1045
|
-
),
|
|
1046
|
-
enum_values=enum_values,
|
|
1047
|
-
)
|
|
1048
|
-
|
|
1049
|
-
self.litellm_spend_metric.labels(**_labels).inc(response_cost)
|
|
1050
|
-
|
|
1051
|
-
def _set_virtual_key_rate_limit_metrics(
|
|
1052
|
-
self,
|
|
1053
|
-
user_api_key: Optional[str],
|
|
1054
|
-
user_api_key_alias: Optional[str],
|
|
1055
|
-
kwargs: dict,
|
|
1056
|
-
metadata: dict,
|
|
1057
|
-
):
|
|
1058
|
-
from litellm.proxy.common_utils.callback_utils import (
|
|
1059
|
-
get_model_group_from_litellm_kwargs,
|
|
1060
|
-
)
|
|
1061
|
-
|
|
1062
|
-
# Set remaining rpm/tpm for API Key + model
|
|
1063
|
-
# see parallel_request_limiter.py - variables are set there
|
|
1064
|
-
model_group = get_model_group_from_litellm_kwargs(kwargs)
|
|
1065
|
-
remaining_requests_variable_name = (
|
|
1066
|
-
f"litellm-key-remaining-requests-{model_group}"
|
|
1067
|
-
)
|
|
1068
|
-
remaining_tokens_variable_name = f"litellm-key-remaining-tokens-{model_group}"
|
|
1069
|
-
|
|
1070
|
-
remaining_requests = (
|
|
1071
|
-
metadata.get(remaining_requests_variable_name, sys.maxsize) or sys.maxsize
|
|
1072
|
-
)
|
|
1073
|
-
remaining_tokens = (
|
|
1074
|
-
metadata.get(remaining_tokens_variable_name, sys.maxsize) or sys.maxsize
|
|
1075
|
-
)
|
|
1076
|
-
|
|
1077
|
-
self.litellm_remaining_api_key_requests_for_model.labels(
|
|
1078
|
-
user_api_key, user_api_key_alias, model_group
|
|
1079
|
-
).set(remaining_requests)
|
|
1080
|
-
|
|
1081
|
-
self.litellm_remaining_api_key_tokens_for_model.labels(
|
|
1082
|
-
user_api_key, user_api_key_alias, model_group
|
|
1083
|
-
).set(remaining_tokens)
|
|
1084
|
-
|
|
1085
|
-
def _set_latency_metrics(
|
|
1086
|
-
self,
|
|
1087
|
-
kwargs: dict,
|
|
1088
|
-
model: Optional[str],
|
|
1089
|
-
user_api_key: Optional[str],
|
|
1090
|
-
user_api_key_alias: Optional[str],
|
|
1091
|
-
user_api_team: Optional[str],
|
|
1092
|
-
user_api_team_alias: Optional[str],
|
|
1093
|
-
enum_values: UserAPIKeyLabelValues,
|
|
1094
|
-
):
|
|
1095
|
-
# latency metrics
|
|
1096
|
-
end_time: datetime = kwargs.get("end_time") or datetime.now()
|
|
1097
|
-
start_time: Optional[datetime] = kwargs.get("start_time")
|
|
1098
|
-
api_call_start_time = kwargs.get("api_call_start_time", None)
|
|
1099
|
-
completion_start_time = kwargs.get("completion_start_time", None)
|
|
1100
|
-
time_to_first_token_seconds = self._safe_duration_seconds(
|
|
1101
|
-
start_time=api_call_start_time,
|
|
1102
|
-
end_time=completion_start_time,
|
|
1103
|
-
)
|
|
1104
|
-
if (
|
|
1105
|
-
time_to_first_token_seconds is not None
|
|
1106
|
-
and kwargs.get("stream", False) is True # only emit for streaming requests
|
|
1107
|
-
):
|
|
1108
|
-
self.litellm_llm_api_time_to_first_token_metric.labels(
|
|
1109
|
-
model,
|
|
1110
|
-
user_api_key,
|
|
1111
|
-
user_api_key_alias,
|
|
1112
|
-
user_api_team,
|
|
1113
|
-
user_api_team_alias,
|
|
1114
|
-
).observe(time_to_first_token_seconds)
|
|
1115
|
-
else:
|
|
1116
|
-
verbose_logger.debug(
|
|
1117
|
-
"Time to first token metric not emitted, stream option in model_parameters is not True"
|
|
1118
|
-
)
|
|
1119
|
-
|
|
1120
|
-
api_call_total_time_seconds = self._safe_duration_seconds(
|
|
1121
|
-
start_time=api_call_start_time,
|
|
1122
|
-
end_time=end_time,
|
|
1123
|
-
)
|
|
1124
|
-
if api_call_total_time_seconds is not None:
|
|
1125
|
-
_labels = prometheus_label_factory(
|
|
1126
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
1127
|
-
metric_name="litellm_llm_api_latency_metric"
|
|
1128
|
-
),
|
|
1129
|
-
enum_values=enum_values,
|
|
1130
|
-
)
|
|
1131
|
-
self.litellm_llm_api_latency_metric.labels(**_labels).observe(
|
|
1132
|
-
api_call_total_time_seconds
|
|
1133
|
-
)
|
|
1134
|
-
|
|
1135
|
-
# total request latency
|
|
1136
|
-
total_time_seconds = self._safe_duration_seconds(
|
|
1137
|
-
start_time=start_time,
|
|
1138
|
-
end_time=end_time,
|
|
1139
|
-
)
|
|
1140
|
-
if total_time_seconds is not None:
|
|
1141
|
-
_labels = prometheus_label_factory(
|
|
1142
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
1143
|
-
metric_name="litellm_request_total_latency_metric"
|
|
1144
|
-
),
|
|
1145
|
-
enum_values=enum_values,
|
|
1146
|
-
)
|
|
1147
|
-
self.litellm_request_total_latency_metric.labels(**_labels).observe(
|
|
1148
|
-
total_time_seconds
|
|
1149
|
-
)
|
|
1150
|
-
|
|
1151
|
-
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
|
1152
|
-
from litellm.types.utils import StandardLoggingPayload
|
|
1153
|
-
|
|
1154
|
-
verbose_logger.debug(
|
|
1155
|
-
f"prometheus Logging - Enters failure logging function for kwargs {kwargs}"
|
|
1156
|
-
)
|
|
1157
|
-
|
|
1158
|
-
# unpack kwargs
|
|
1159
|
-
model = kwargs.get("model", "")
|
|
1160
|
-
standard_logging_payload: StandardLoggingPayload = kwargs.get(
|
|
1161
|
-
"standard_logging_object", {}
|
|
1162
|
-
)
|
|
1163
|
-
litellm_params = kwargs.get("litellm_params", {}) or {}
|
|
1164
|
-
end_user_id = get_end_user_id_for_cost_tracking(
|
|
1165
|
-
litellm_params, service_type="prometheus"
|
|
1166
|
-
)
|
|
1167
|
-
user_id = standard_logging_payload["metadata"]["user_api_key_user_id"]
|
|
1168
|
-
user_api_key = standard_logging_payload["metadata"]["user_api_key_hash"]
|
|
1169
|
-
user_api_key_alias = standard_logging_payload["metadata"]["user_api_key_alias"]
|
|
1170
|
-
user_api_team = standard_logging_payload["metadata"]["user_api_key_team_id"]
|
|
1171
|
-
user_api_team_alias = standard_logging_payload["metadata"][
|
|
1172
|
-
"user_api_key_team_alias"
|
|
1173
|
-
]
|
|
1174
|
-
kwargs.get("exception", None)
|
|
1175
|
-
|
|
1176
|
-
try:
|
|
1177
|
-
self.litellm_llm_api_failed_requests_metric.labels(
|
|
1178
|
-
end_user_id,
|
|
1179
|
-
user_api_key,
|
|
1180
|
-
user_api_key_alias,
|
|
1181
|
-
model,
|
|
1182
|
-
user_api_team,
|
|
1183
|
-
user_api_team_alias,
|
|
1184
|
-
user_id,
|
|
1185
|
-
).inc()
|
|
1186
|
-
self.set_llm_deployment_failure_metrics(kwargs)
|
|
1187
|
-
except Exception as e:
|
|
1188
|
-
verbose_logger.exception(
|
|
1189
|
-
"prometheus Layer Error(): Exception occured - {}".format(str(e))
|
|
1190
|
-
)
|
|
1191
|
-
pass
|
|
1192
|
-
pass
|
|
1193
|
-
|
|
1194
|
-
async def async_post_call_failure_hook(
|
|
1195
|
-
self,
|
|
1196
|
-
request_data: dict,
|
|
1197
|
-
original_exception: Exception,
|
|
1198
|
-
user_api_key_dict: UserAPIKeyAuth,
|
|
1199
|
-
traceback_str: Optional[str] = None,
|
|
1200
|
-
):
|
|
1201
|
-
"""
|
|
1202
|
-
Track client side failures
|
|
1203
|
-
|
|
1204
|
-
Proxy level tracking - failed client side requests
|
|
1205
|
-
|
|
1206
|
-
labelnames=[
|
|
1207
|
-
"end_user",
|
|
1208
|
-
"hashed_api_key",
|
|
1209
|
-
"api_key_alias",
|
|
1210
|
-
REQUESTED_MODEL,
|
|
1211
|
-
"team",
|
|
1212
|
-
"team_alias",
|
|
1213
|
-
] + EXCEPTION_LABELS,
|
|
1214
|
-
"""
|
|
1215
|
-
from litellm.litellm_core_utils.litellm_logging import (
|
|
1216
|
-
StandardLoggingPayloadSetup,
|
|
1217
|
-
)
|
|
1218
|
-
|
|
1219
|
-
try:
|
|
1220
|
-
_tags = StandardLoggingPayloadSetup._get_request_tags(
|
|
1221
|
-
request_data.get("metadata", {}),
|
|
1222
|
-
request_data.get("proxy_server_request", {}),
|
|
1223
|
-
)
|
|
1224
|
-
enum_values = UserAPIKeyLabelValues(
|
|
1225
|
-
end_user=user_api_key_dict.end_user_id,
|
|
1226
|
-
user=user_api_key_dict.user_id,
|
|
1227
|
-
user_email=user_api_key_dict.user_email,
|
|
1228
|
-
hashed_api_key=user_api_key_dict.api_key,
|
|
1229
|
-
api_key_alias=user_api_key_dict.key_alias,
|
|
1230
|
-
team=user_api_key_dict.team_id,
|
|
1231
|
-
team_alias=user_api_key_dict.team_alias,
|
|
1232
|
-
requested_model=request_data.get("model", ""),
|
|
1233
|
-
status_code=str(getattr(original_exception, "status_code", None)),
|
|
1234
|
-
exception_status=str(getattr(original_exception, "status_code", None)),
|
|
1235
|
-
exception_class=self._get_exception_class_name(original_exception),
|
|
1236
|
-
tags=_tags,
|
|
1237
|
-
route=user_api_key_dict.request_route,
|
|
1238
|
-
)
|
|
1239
|
-
_labels = prometheus_label_factory(
|
|
1240
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
1241
|
-
metric_name="litellm_proxy_failed_requests_metric"
|
|
1242
|
-
),
|
|
1243
|
-
enum_values=enum_values,
|
|
1244
|
-
)
|
|
1245
|
-
self.litellm_proxy_failed_requests_metric.labels(**_labels).inc()
|
|
1246
|
-
|
|
1247
|
-
_labels = prometheus_label_factory(
|
|
1248
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
1249
|
-
metric_name="litellm_proxy_total_requests_metric"
|
|
1250
|
-
),
|
|
1251
|
-
enum_values=enum_values,
|
|
1252
|
-
)
|
|
1253
|
-
self.litellm_proxy_total_requests_metric.labels(**_labels).inc()
|
|
1254
|
-
|
|
1255
|
-
except Exception as e:
|
|
1256
|
-
verbose_logger.exception(
|
|
1257
|
-
"prometheus Layer Error(): Exception occured - {}".format(str(e))
|
|
1258
|
-
)
|
|
1259
|
-
pass
|
|
1260
|
-
|
|
1261
|
-
async def async_post_call_success_hook(
|
|
1262
|
-
self, data: dict, user_api_key_dict: UserAPIKeyAuth, response
|
|
1263
|
-
):
|
|
1264
|
-
"""
|
|
1265
|
-
Proxy level tracking - triggered when the proxy responds with a success response to the client
|
|
1266
|
-
"""
|
|
1267
|
-
try:
|
|
1268
|
-
from litellm.litellm_core_utils.litellm_logging import (
|
|
1269
|
-
StandardLoggingPayloadSetup,
|
|
1270
|
-
)
|
|
1271
|
-
|
|
1272
|
-
enum_values = UserAPIKeyLabelValues(
|
|
1273
|
-
end_user=user_api_key_dict.end_user_id,
|
|
1274
|
-
hashed_api_key=user_api_key_dict.api_key,
|
|
1275
|
-
api_key_alias=user_api_key_dict.key_alias,
|
|
1276
|
-
requested_model=data.get("model", ""),
|
|
1277
|
-
team=user_api_key_dict.team_id,
|
|
1278
|
-
team_alias=user_api_key_dict.team_alias,
|
|
1279
|
-
user=user_api_key_dict.user_id,
|
|
1280
|
-
user_email=user_api_key_dict.user_email,
|
|
1281
|
-
status_code="200",
|
|
1282
|
-
route=user_api_key_dict.request_route,
|
|
1283
|
-
tags=StandardLoggingPayloadSetup._get_request_tags(
|
|
1284
|
-
data.get("metadata", {}), data.get("proxy_server_request", {})
|
|
1285
|
-
),
|
|
1286
|
-
)
|
|
1287
|
-
_labels = prometheus_label_factory(
|
|
1288
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
1289
|
-
metric_name="litellm_proxy_total_requests_metric"
|
|
1290
|
-
),
|
|
1291
|
-
enum_values=enum_values,
|
|
1292
|
-
)
|
|
1293
|
-
self.litellm_proxy_total_requests_metric.labels(**_labels).inc()
|
|
1294
|
-
|
|
1295
|
-
except Exception as e:
|
|
1296
|
-
verbose_logger.exception(
|
|
1297
|
-
"prometheus Layer Error(): Exception occured - {}".format(str(e))
|
|
1298
|
-
)
|
|
1299
|
-
pass
|
|
1300
|
-
|
|
1301
|
-
def set_llm_deployment_failure_metrics(self, request_kwargs: dict):
|
|
1302
|
-
"""
|
|
1303
|
-
Sets Failure metrics when an LLM API call fails
|
|
1304
|
-
|
|
1305
|
-
- mark the deployment as partial outage
|
|
1306
|
-
- increment deployment failure responses metric
|
|
1307
|
-
- increment deployment total requests metric
|
|
1308
|
-
|
|
1309
|
-
Args:
|
|
1310
|
-
request_kwargs: dict
|
|
1311
|
-
|
|
1312
|
-
"""
|
|
1313
|
-
try:
|
|
1314
|
-
verbose_logger.debug("setting remaining tokens requests metric")
|
|
1315
|
-
standard_logging_payload: StandardLoggingPayload = request_kwargs.get(
|
|
1316
|
-
"standard_logging_object", {}
|
|
1317
|
-
)
|
|
1318
|
-
_litellm_params = request_kwargs.get("litellm_params", {}) or {}
|
|
1319
|
-
litellm_model_name = request_kwargs.get("model", None)
|
|
1320
|
-
model_group = standard_logging_payload.get("model_group", None)
|
|
1321
|
-
api_base = standard_logging_payload.get("api_base", None)
|
|
1322
|
-
model_id = standard_logging_payload.get("model_id", None)
|
|
1323
|
-
exception = request_kwargs.get("exception", None)
|
|
1324
|
-
|
|
1325
|
-
llm_provider = _litellm_params.get("custom_llm_provider", None)
|
|
1326
|
-
|
|
1327
|
-
# Create enum_values for the label factory (always create for use in different metrics)
|
|
1328
|
-
enum_values = UserAPIKeyLabelValues(
|
|
1329
|
-
litellm_model_name=litellm_model_name,
|
|
1330
|
-
model_id=model_id,
|
|
1331
|
-
api_base=api_base,
|
|
1332
|
-
api_provider=llm_provider,
|
|
1333
|
-
exception_status=(
|
|
1334
|
-
str(getattr(exception, "status_code", None)) if exception else None
|
|
1335
|
-
),
|
|
1336
|
-
exception_class=(
|
|
1337
|
-
self._get_exception_class_name(exception) if exception else None
|
|
1338
|
-
),
|
|
1339
|
-
requested_model=model_group,
|
|
1340
|
-
hashed_api_key=standard_logging_payload["metadata"][
|
|
1341
|
-
"user_api_key_hash"
|
|
1342
|
-
],
|
|
1343
|
-
api_key_alias=standard_logging_payload["metadata"][
|
|
1344
|
-
"user_api_key_alias"
|
|
1345
|
-
],
|
|
1346
|
-
team=standard_logging_payload["metadata"]["user_api_key_team_id"],
|
|
1347
|
-
team_alias=standard_logging_payload["metadata"][
|
|
1348
|
-
"user_api_key_team_alias"
|
|
1349
|
-
],
|
|
1350
|
-
tags=standard_logging_payload.get("request_tags", []),
|
|
1351
|
-
)
|
|
1352
|
-
|
|
1353
|
-
"""
|
|
1354
|
-
log these labels
|
|
1355
|
-
["litellm_model_name", "model_id", "api_base", "api_provider"]
|
|
1356
|
-
"""
|
|
1357
|
-
self.set_deployment_partial_outage(
|
|
1358
|
-
litellm_model_name=litellm_model_name or "",
|
|
1359
|
-
model_id=model_id,
|
|
1360
|
-
api_base=api_base,
|
|
1361
|
-
api_provider=llm_provider or "",
|
|
1362
|
-
)
|
|
1363
|
-
if exception is not None:
|
|
1364
|
-
|
|
1365
|
-
_labels = prometheus_label_factory(
|
|
1366
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
1367
|
-
metric_name="litellm_deployment_failure_responses"
|
|
1368
|
-
),
|
|
1369
|
-
enum_values=enum_values,
|
|
1370
|
-
)
|
|
1371
|
-
self.litellm_deployment_failure_responses.labels(**_labels).inc()
|
|
1372
|
-
|
|
1373
|
-
_labels = prometheus_label_factory(
|
|
1374
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
1375
|
-
metric_name="litellm_deployment_total_requests"
|
|
1376
|
-
),
|
|
1377
|
-
enum_values=enum_values,
|
|
1378
|
-
)
|
|
1379
|
-
self.litellm_deployment_total_requests.labels(**_labels).inc()
|
|
1380
|
-
|
|
1381
|
-
pass
|
|
1382
|
-
except Exception as e:
|
|
1383
|
-
verbose_logger.debug(
|
|
1384
|
-
"Prometheus Error: set_llm_deployment_failure_metrics. Exception occured - {}".format(
|
|
1385
|
-
str(e)
|
|
1386
|
-
)
|
|
1387
|
-
)
|
|
1388
|
-
|
|
1389
|
-
def set_llm_deployment_success_metrics(
|
|
1390
|
-
self,
|
|
1391
|
-
request_kwargs: dict,
|
|
1392
|
-
start_time,
|
|
1393
|
-
end_time,
|
|
1394
|
-
enum_values: UserAPIKeyLabelValues,
|
|
1395
|
-
output_tokens: float = 1.0,
|
|
1396
|
-
):
|
|
1397
|
-
|
|
1398
|
-
try:
|
|
1399
|
-
verbose_logger.debug("setting remaining tokens requests metric")
|
|
1400
|
-
standard_logging_payload: Optional[StandardLoggingPayload] = (
|
|
1401
|
-
request_kwargs.get("standard_logging_object")
|
|
1402
|
-
)
|
|
1403
|
-
|
|
1404
|
-
if standard_logging_payload is None:
|
|
1405
|
-
return
|
|
1406
|
-
|
|
1407
|
-
api_base = standard_logging_payload["api_base"]
|
|
1408
|
-
_litellm_params = request_kwargs.get("litellm_params", {}) or {}
|
|
1409
|
-
_metadata = _litellm_params.get("metadata", {})
|
|
1410
|
-
litellm_model_name = request_kwargs.get("model", None)
|
|
1411
|
-
llm_provider = _litellm_params.get("custom_llm_provider", None)
|
|
1412
|
-
_model_info = _metadata.get("model_info") or {}
|
|
1413
|
-
model_id = _model_info.get("id", None)
|
|
1414
|
-
|
|
1415
|
-
remaining_requests: Optional[int] = None
|
|
1416
|
-
remaining_tokens: Optional[int] = None
|
|
1417
|
-
if additional_headers := standard_logging_payload["hidden_params"][
|
|
1418
|
-
"additional_headers"
|
|
1419
|
-
]:
|
|
1420
|
-
# OpenAI / OpenAI Compatible headers
|
|
1421
|
-
remaining_requests = additional_headers.get(
|
|
1422
|
-
"x_ratelimit_remaining_requests", None
|
|
1423
|
-
)
|
|
1424
|
-
remaining_tokens = additional_headers.get(
|
|
1425
|
-
"x_ratelimit_remaining_tokens", None
|
|
1426
|
-
)
|
|
1427
|
-
|
|
1428
|
-
if litellm_overhead_time_ms := standard_logging_payload[
|
|
1429
|
-
"hidden_params"
|
|
1430
|
-
].get("litellm_overhead_time_ms"):
|
|
1431
|
-
_labels = prometheus_label_factory(
|
|
1432
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
1433
|
-
metric_name="litellm_overhead_latency_metric"
|
|
1434
|
-
),
|
|
1435
|
-
enum_values=enum_values,
|
|
1436
|
-
)
|
|
1437
|
-
self.litellm_overhead_latency_metric.labels(**_labels).observe(
|
|
1438
|
-
litellm_overhead_time_ms / 1000
|
|
1439
|
-
) # set as seconds
|
|
1440
|
-
|
|
1441
|
-
if remaining_requests:
|
|
1442
|
-
"""
|
|
1443
|
-
"model_group",
|
|
1444
|
-
"api_provider",
|
|
1445
|
-
"api_base",
|
|
1446
|
-
"litellm_model_name"
|
|
1447
|
-
"""
|
|
1448
|
-
_labels = prometheus_label_factory(
|
|
1449
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
1450
|
-
metric_name="litellm_remaining_requests_metric"
|
|
1451
|
-
),
|
|
1452
|
-
enum_values=enum_values,
|
|
1453
|
-
)
|
|
1454
|
-
self.litellm_remaining_requests_metric.labels(**_labels).set(
|
|
1455
|
-
remaining_requests
|
|
1456
|
-
)
|
|
1457
|
-
|
|
1458
|
-
if remaining_tokens:
|
|
1459
|
-
_labels = prometheus_label_factory(
|
|
1460
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
1461
|
-
metric_name="litellm_remaining_tokens_metric"
|
|
1462
|
-
),
|
|
1463
|
-
enum_values=enum_values,
|
|
1464
|
-
)
|
|
1465
|
-
self.litellm_remaining_tokens_metric.labels(**_labels).set(
|
|
1466
|
-
remaining_tokens
|
|
1467
|
-
)
|
|
1468
|
-
|
|
1469
|
-
"""
|
|
1470
|
-
log these labels
|
|
1471
|
-
["litellm_model_name", "requested_model", model_id", "api_base", "api_provider"]
|
|
1472
|
-
"""
|
|
1473
|
-
self.set_deployment_healthy(
|
|
1474
|
-
litellm_model_name=litellm_model_name or "",
|
|
1475
|
-
model_id=model_id or "",
|
|
1476
|
-
api_base=api_base or "",
|
|
1477
|
-
api_provider=llm_provider or "",
|
|
1478
|
-
)
|
|
1479
|
-
|
|
1480
|
-
_labels = prometheus_label_factory(
|
|
1481
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
1482
|
-
metric_name="litellm_deployment_success_responses"
|
|
1483
|
-
),
|
|
1484
|
-
enum_values=enum_values,
|
|
1485
|
-
)
|
|
1486
|
-
self.litellm_deployment_success_responses.labels(**_labels).inc()
|
|
1487
|
-
|
|
1488
|
-
_labels = prometheus_label_factory(
|
|
1489
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
1490
|
-
metric_name="litellm_deployment_total_requests"
|
|
1491
|
-
),
|
|
1492
|
-
enum_values=enum_values,
|
|
1493
|
-
)
|
|
1494
|
-
self.litellm_deployment_total_requests.labels(**_labels).inc()
|
|
1495
|
-
|
|
1496
|
-
# Track deployment Latency
|
|
1497
|
-
response_ms: timedelta = end_time - start_time
|
|
1498
|
-
time_to_first_token_response_time: Optional[timedelta] = None
|
|
1499
|
-
|
|
1500
|
-
if (
|
|
1501
|
-
request_kwargs.get("stream", None) is not None
|
|
1502
|
-
and request_kwargs["stream"] is True
|
|
1503
|
-
):
|
|
1504
|
-
# only log ttft for streaming request
|
|
1505
|
-
time_to_first_token_response_time = (
|
|
1506
|
-
request_kwargs.get("completion_start_time", end_time) - start_time
|
|
1507
|
-
)
|
|
1508
|
-
|
|
1509
|
-
# use the metric that is not None
|
|
1510
|
-
# if streaming - use time_to_first_token_response
|
|
1511
|
-
# if not streaming - use response_ms
|
|
1512
|
-
_latency: timedelta = time_to_first_token_response_time or response_ms
|
|
1513
|
-
_latency_seconds = _latency.total_seconds()
|
|
1514
|
-
|
|
1515
|
-
# latency per output token
|
|
1516
|
-
latency_per_token = None
|
|
1517
|
-
if output_tokens is not None and output_tokens > 0:
|
|
1518
|
-
latency_per_token = _latency_seconds / output_tokens
|
|
1519
|
-
_labels = prometheus_label_factory(
|
|
1520
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
1521
|
-
metric_name="litellm_deployment_latency_per_output_token"
|
|
1522
|
-
),
|
|
1523
|
-
enum_values=enum_values,
|
|
1524
|
-
)
|
|
1525
|
-
self.litellm_deployment_latency_per_output_token.labels(
|
|
1526
|
-
**_labels
|
|
1527
|
-
).observe(latency_per_token)
|
|
1528
|
-
|
|
1529
|
-
except Exception as e:
|
|
1530
|
-
verbose_logger.exception(
|
|
1531
|
-
"Prometheus Error: set_llm_deployment_success_metrics. Exception occured - {}".format(
|
|
1532
|
-
str(e)
|
|
1533
|
-
)
|
|
1534
|
-
)
|
|
1535
|
-
return
|
|
1536
|
-
|
|
1537
|
-
@staticmethod
|
|
1538
|
-
def _get_exception_class_name(exception: Exception) -> str:
|
|
1539
|
-
exception_class_name = ""
|
|
1540
|
-
if hasattr(exception, "llm_provider"):
|
|
1541
|
-
exception_class_name = getattr(exception, "llm_provider") or ""
|
|
1542
|
-
|
|
1543
|
-
# pretty print the provider name on prometheus
|
|
1544
|
-
# eg. `openai` -> `Openai.`
|
|
1545
|
-
if len(exception_class_name) >= 1:
|
|
1546
|
-
exception_class_name = (
|
|
1547
|
-
exception_class_name[0].upper() + exception_class_name[1:] + "."
|
|
1548
|
-
)
|
|
1549
|
-
|
|
1550
|
-
exception_class_name += exception.__class__.__name__
|
|
1551
|
-
return exception_class_name
|
|
1552
|
-
|
|
1553
|
-
async def log_success_fallback_event(
|
|
1554
|
-
self, original_model_group: str, kwargs: dict, original_exception: Exception
|
|
1555
|
-
):
|
|
1556
|
-
"""
|
|
1557
|
-
|
|
1558
|
-
Logs a successful LLM fallback event on prometheus
|
|
1559
|
-
|
|
1560
|
-
"""
|
|
1561
|
-
from litellm.litellm_core_utils.litellm_logging import (
|
|
1562
|
-
StandardLoggingMetadata,
|
|
1563
|
-
StandardLoggingPayloadSetup,
|
|
1564
|
-
)
|
|
1565
|
-
|
|
1566
|
-
verbose_logger.debug(
|
|
1567
|
-
"Prometheus: log_success_fallback_event, original_model_group: %s, kwargs: %s",
|
|
1568
|
-
original_model_group,
|
|
1569
|
-
kwargs,
|
|
1570
|
-
)
|
|
1571
|
-
_metadata = kwargs.get("metadata", {})
|
|
1572
|
-
standard_metadata: StandardLoggingMetadata = (
|
|
1573
|
-
StandardLoggingPayloadSetup.get_standard_logging_metadata(
|
|
1574
|
-
metadata=_metadata
|
|
1575
|
-
)
|
|
1576
|
-
)
|
|
1577
|
-
_new_model = kwargs.get("model")
|
|
1578
|
-
_tags = cast(List[str], kwargs.get("tags") or [])
|
|
1579
|
-
|
|
1580
|
-
enum_values = UserAPIKeyLabelValues(
|
|
1581
|
-
requested_model=original_model_group,
|
|
1582
|
-
fallback_model=_new_model,
|
|
1583
|
-
hashed_api_key=standard_metadata["user_api_key_hash"],
|
|
1584
|
-
api_key_alias=standard_metadata["user_api_key_alias"],
|
|
1585
|
-
team=standard_metadata["user_api_key_team_id"],
|
|
1586
|
-
team_alias=standard_metadata["user_api_key_team_alias"],
|
|
1587
|
-
exception_status=str(getattr(original_exception, "status_code", None)),
|
|
1588
|
-
exception_class=self._get_exception_class_name(original_exception),
|
|
1589
|
-
tags=_tags,
|
|
1590
|
-
)
|
|
1591
|
-
_labels = prometheus_label_factory(
|
|
1592
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
1593
|
-
metric_name="litellm_deployment_successful_fallbacks"
|
|
1594
|
-
),
|
|
1595
|
-
enum_values=enum_values,
|
|
1596
|
-
)
|
|
1597
|
-
self.litellm_deployment_successful_fallbacks.labels(**_labels).inc()
|
|
1598
|
-
|
|
1599
|
-
async def log_failure_fallback_event(
|
|
1600
|
-
self, original_model_group: str, kwargs: dict, original_exception: Exception
|
|
1601
|
-
):
|
|
1602
|
-
"""
|
|
1603
|
-
Logs a failed LLM fallback event on prometheus
|
|
1604
|
-
"""
|
|
1605
|
-
from litellm.litellm_core_utils.litellm_logging import (
|
|
1606
|
-
StandardLoggingMetadata,
|
|
1607
|
-
StandardLoggingPayloadSetup,
|
|
1608
|
-
)
|
|
1609
|
-
|
|
1610
|
-
verbose_logger.debug(
|
|
1611
|
-
"Prometheus: log_failure_fallback_event, original_model_group: %s, kwargs: %s",
|
|
1612
|
-
original_model_group,
|
|
1613
|
-
kwargs,
|
|
1614
|
-
)
|
|
1615
|
-
_new_model = kwargs.get("model")
|
|
1616
|
-
_metadata = kwargs.get("metadata", {})
|
|
1617
|
-
_tags = cast(List[str], kwargs.get("tags") or [])
|
|
1618
|
-
standard_metadata: StandardLoggingMetadata = (
|
|
1619
|
-
StandardLoggingPayloadSetup.get_standard_logging_metadata(
|
|
1620
|
-
metadata=_metadata
|
|
1621
|
-
)
|
|
1622
|
-
)
|
|
1623
|
-
|
|
1624
|
-
enum_values = UserAPIKeyLabelValues(
|
|
1625
|
-
requested_model=original_model_group,
|
|
1626
|
-
fallback_model=_new_model,
|
|
1627
|
-
hashed_api_key=standard_metadata["user_api_key_hash"],
|
|
1628
|
-
api_key_alias=standard_metadata["user_api_key_alias"],
|
|
1629
|
-
team=standard_metadata["user_api_key_team_id"],
|
|
1630
|
-
team_alias=standard_metadata["user_api_key_team_alias"],
|
|
1631
|
-
exception_status=str(getattr(original_exception, "status_code", None)),
|
|
1632
|
-
exception_class=self._get_exception_class_name(original_exception),
|
|
1633
|
-
tags=_tags,
|
|
1634
|
-
)
|
|
1635
|
-
|
|
1636
|
-
_labels = prometheus_label_factory(
|
|
1637
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
1638
|
-
metric_name="litellm_deployment_failed_fallbacks"
|
|
1639
|
-
),
|
|
1640
|
-
enum_values=enum_values,
|
|
1641
|
-
)
|
|
1642
|
-
self.litellm_deployment_failed_fallbacks.labels(**_labels).inc()
|
|
1643
|
-
|
|
1644
|
-
def set_litellm_deployment_state(
|
|
1645
|
-
self,
|
|
1646
|
-
state: int,
|
|
1647
|
-
litellm_model_name: str,
|
|
1648
|
-
model_id: Optional[str],
|
|
1649
|
-
api_base: Optional[str],
|
|
1650
|
-
api_provider: str,
|
|
1651
|
-
):
|
|
1652
|
-
self.litellm_deployment_state.labels(
|
|
1653
|
-
litellm_model_name, model_id, api_base, api_provider
|
|
1654
|
-
).set(state)
|
|
1655
|
-
|
|
1656
|
-
def set_deployment_healthy(
|
|
1657
|
-
self,
|
|
1658
|
-
litellm_model_name: str,
|
|
1659
|
-
model_id: str,
|
|
1660
|
-
api_base: str,
|
|
1661
|
-
api_provider: str,
|
|
1662
|
-
):
|
|
1663
|
-
self.set_litellm_deployment_state(
|
|
1664
|
-
0, litellm_model_name, model_id, api_base, api_provider
|
|
1665
|
-
)
|
|
1666
|
-
|
|
1667
|
-
def set_deployment_partial_outage(
|
|
1668
|
-
self,
|
|
1669
|
-
litellm_model_name: str,
|
|
1670
|
-
model_id: Optional[str],
|
|
1671
|
-
api_base: Optional[str],
|
|
1672
|
-
api_provider: str,
|
|
1673
|
-
):
|
|
1674
|
-
self.set_litellm_deployment_state(
|
|
1675
|
-
1, litellm_model_name, model_id, api_base, api_provider
|
|
1676
|
-
)
|
|
1677
|
-
|
|
1678
|
-
def set_deployment_complete_outage(
|
|
1679
|
-
self,
|
|
1680
|
-
litellm_model_name: str,
|
|
1681
|
-
model_id: Optional[str],
|
|
1682
|
-
api_base: Optional[str],
|
|
1683
|
-
api_provider: str,
|
|
1684
|
-
):
|
|
1685
|
-
self.set_litellm_deployment_state(
|
|
1686
|
-
2, litellm_model_name, model_id, api_base, api_provider
|
|
1687
|
-
)
|
|
1688
|
-
|
|
1689
|
-
def increment_deployment_cooled_down(
|
|
1690
|
-
self,
|
|
1691
|
-
litellm_model_name: str,
|
|
1692
|
-
model_id: str,
|
|
1693
|
-
api_base: str,
|
|
1694
|
-
api_provider: str,
|
|
1695
|
-
exception_status: str,
|
|
1696
|
-
):
|
|
1697
|
-
"""
|
|
1698
|
-
increment metric when litellm.Router / load balancing logic places a deployment in cool down
|
|
1699
|
-
"""
|
|
1700
|
-
self.litellm_deployment_cooled_down.labels(
|
|
1701
|
-
litellm_model_name, model_id, api_base, api_provider, exception_status
|
|
1702
|
-
).inc()
|
|
1703
|
-
|
|
1704
|
-
def track_provider_remaining_budget(
|
|
1705
|
-
self, provider: str, spend: float, budget_limit: float
|
|
1706
|
-
):
|
|
1707
|
-
"""
|
|
1708
|
-
Track provider remaining budget in Prometheus
|
|
1709
|
-
"""
|
|
1710
|
-
self.litellm_provider_remaining_budget_metric.labels(provider).set(
|
|
1711
|
-
self._safe_get_remaining_budget(
|
|
1712
|
-
max_budget=budget_limit,
|
|
1713
|
-
spend=spend,
|
|
1714
|
-
)
|
|
1715
|
-
)
|
|
1716
|
-
|
|
1717
|
-
def _safe_get_remaining_budget(
|
|
1718
|
-
self, max_budget: Optional[float], spend: Optional[float]
|
|
1719
|
-
) -> float:
|
|
1720
|
-
if max_budget is None:
|
|
1721
|
-
return float("inf")
|
|
1722
|
-
|
|
1723
|
-
if spend is None:
|
|
1724
|
-
return max_budget
|
|
1725
|
-
|
|
1726
|
-
return max_budget - spend
|
|
1727
|
-
|
|
1728
|
-
async def _initialize_budget_metrics(
|
|
1729
|
-
self,
|
|
1730
|
-
data_fetch_function: Callable[..., Awaitable[Tuple[List[Any], Optional[int]]]],
|
|
1731
|
-
set_metrics_function: Callable[[List[Any]], Awaitable[None]],
|
|
1732
|
-
data_type: Literal["teams", "keys"],
|
|
1733
|
-
):
|
|
1734
|
-
"""
|
|
1735
|
-
Generic method to initialize budget metrics for teams or API keys.
|
|
1736
|
-
|
|
1737
|
-
Args:
|
|
1738
|
-
data_fetch_function: Function to fetch data with pagination.
|
|
1739
|
-
set_metrics_function: Function to set metrics for the fetched data.
|
|
1740
|
-
data_type: String representing the type of data ("teams" or "keys") for logging purposes.
|
|
1741
|
-
"""
|
|
1742
|
-
from litellm.proxy.proxy_server import prisma_client
|
|
1743
|
-
|
|
1744
|
-
if prisma_client is None:
|
|
1745
|
-
return
|
|
1746
|
-
|
|
1747
|
-
try:
|
|
1748
|
-
page = 1
|
|
1749
|
-
page_size = 50
|
|
1750
|
-
data, total_count = await data_fetch_function(
|
|
1751
|
-
page_size=page_size, page=page
|
|
1752
|
-
)
|
|
1753
|
-
|
|
1754
|
-
if total_count is None:
|
|
1755
|
-
total_count = len(data)
|
|
1756
|
-
|
|
1757
|
-
# Calculate total pages needed
|
|
1758
|
-
total_pages = (total_count + page_size - 1) // page_size
|
|
1759
|
-
|
|
1760
|
-
# Set metrics for first page of data
|
|
1761
|
-
await set_metrics_function(data)
|
|
1762
|
-
|
|
1763
|
-
# Get and set metrics for remaining pages
|
|
1764
|
-
for page in range(2, total_pages + 1):
|
|
1765
|
-
data, _ = await data_fetch_function(page_size=page_size, page=page)
|
|
1766
|
-
await set_metrics_function(data)
|
|
1767
|
-
|
|
1768
|
-
except Exception as e:
|
|
1769
|
-
verbose_logger.exception(
|
|
1770
|
-
f"Error initializing {data_type} budget metrics: {str(e)}"
|
|
1771
|
-
)
|
|
1772
|
-
|
|
1773
|
-
async def _initialize_team_budget_metrics(self):
|
|
1774
|
-
"""
|
|
1775
|
-
Initialize team budget metrics by reusing the generic pagination logic.
|
|
1776
|
-
"""
|
|
1777
|
-
from litellm.proxy.management_endpoints.team_endpoints import (
|
|
1778
|
-
get_paginated_teams,
|
|
1779
|
-
)
|
|
1780
|
-
from litellm.proxy.proxy_server import prisma_client
|
|
1781
|
-
|
|
1782
|
-
if prisma_client is None:
|
|
1783
|
-
verbose_logger.debug(
|
|
1784
|
-
"Prometheus: skipping team metrics initialization, DB not initialized"
|
|
1785
|
-
)
|
|
1786
|
-
return
|
|
1787
|
-
|
|
1788
|
-
async def fetch_teams(
|
|
1789
|
-
page_size: int, page: int
|
|
1790
|
-
) -> Tuple[List[LiteLLM_TeamTable], Optional[int]]:
|
|
1791
|
-
teams, total_count = await get_paginated_teams(
|
|
1792
|
-
prisma_client=prisma_client, page_size=page_size, page=page
|
|
1793
|
-
)
|
|
1794
|
-
if total_count is None:
|
|
1795
|
-
total_count = len(teams)
|
|
1796
|
-
return teams, total_count
|
|
1797
|
-
|
|
1798
|
-
await self._initialize_budget_metrics(
|
|
1799
|
-
data_fetch_function=fetch_teams,
|
|
1800
|
-
set_metrics_function=self._set_team_list_budget_metrics,
|
|
1801
|
-
data_type="teams",
|
|
1802
|
-
)
|
|
1803
|
-
|
|
1804
|
-
async def _initialize_api_key_budget_metrics(self):
|
|
1805
|
-
"""
|
|
1806
|
-
Initialize API key budget metrics by reusing the generic pagination logic.
|
|
1807
|
-
"""
|
|
1808
|
-
from typing import Union
|
|
1809
|
-
|
|
1810
|
-
from litellm.constants import UI_SESSION_TOKEN_TEAM_ID
|
|
1811
|
-
from litellm.proxy.management_endpoints.key_management_endpoints import (
|
|
1812
|
-
_list_key_helper,
|
|
1813
|
-
)
|
|
1814
|
-
from litellm.proxy.proxy_server import prisma_client
|
|
1815
|
-
|
|
1816
|
-
if prisma_client is None:
|
|
1817
|
-
verbose_logger.debug(
|
|
1818
|
-
"Prometheus: skipping key metrics initialization, DB not initialized"
|
|
1819
|
-
)
|
|
1820
|
-
return
|
|
1821
|
-
|
|
1822
|
-
async def fetch_keys(
|
|
1823
|
-
page_size: int, page: int
|
|
1824
|
-
) -> Tuple[List[Union[str, UserAPIKeyAuth]], Optional[int]]:
|
|
1825
|
-
key_list_response = await _list_key_helper(
|
|
1826
|
-
prisma_client=prisma_client,
|
|
1827
|
-
page=page,
|
|
1828
|
-
size=page_size,
|
|
1829
|
-
user_id=None,
|
|
1830
|
-
team_id=None,
|
|
1831
|
-
key_alias=None,
|
|
1832
|
-
key_hash=None,
|
|
1833
|
-
exclude_team_id=UI_SESSION_TOKEN_TEAM_ID,
|
|
1834
|
-
return_full_object=True,
|
|
1835
|
-
organization_id=None,
|
|
1836
|
-
)
|
|
1837
|
-
keys = key_list_response.get("keys", [])
|
|
1838
|
-
total_count = key_list_response.get("total_count")
|
|
1839
|
-
if total_count is None:
|
|
1840
|
-
total_count = len(keys)
|
|
1841
|
-
return keys, total_count
|
|
1842
|
-
|
|
1843
|
-
await self._initialize_budget_metrics(
|
|
1844
|
-
data_fetch_function=fetch_keys,
|
|
1845
|
-
set_metrics_function=self._set_key_list_budget_metrics,
|
|
1846
|
-
data_type="keys",
|
|
1847
|
-
)
|
|
1848
|
-
|
|
1849
|
-
async def initialize_remaining_budget_metrics(self):
|
|
1850
|
-
"""
|
|
1851
|
-
Handler for initializing remaining budget metrics for all teams to avoid metric discrepancies.
|
|
1852
|
-
|
|
1853
|
-
Runs when prometheus logger starts up.
|
|
1854
|
-
|
|
1855
|
-
- If redis cache is available, we use the pod lock manager to acquire a lock and initialize the metrics.
|
|
1856
|
-
- Ensures only one pod emits the metrics at a time.
|
|
1857
|
-
- If redis cache is not available, we initialize the metrics directly.
|
|
1858
|
-
"""
|
|
1859
|
-
from litellm.constants import PROMETHEUS_EMIT_BUDGET_METRICS_JOB_NAME
|
|
1860
|
-
from litellm.proxy.proxy_server import proxy_logging_obj
|
|
1861
|
-
|
|
1862
|
-
pod_lock_manager = proxy_logging_obj.db_spend_update_writer.pod_lock_manager
|
|
1863
|
-
|
|
1864
|
-
# if using redis, ensure only one pod emits the metrics at a time
|
|
1865
|
-
if pod_lock_manager and pod_lock_manager.redis_cache:
|
|
1866
|
-
if await pod_lock_manager.acquire_lock(
|
|
1867
|
-
cronjob_id=PROMETHEUS_EMIT_BUDGET_METRICS_JOB_NAME
|
|
1868
|
-
):
|
|
1869
|
-
try:
|
|
1870
|
-
await self._initialize_remaining_budget_metrics()
|
|
1871
|
-
finally:
|
|
1872
|
-
await pod_lock_manager.release_lock(
|
|
1873
|
-
cronjob_id=PROMETHEUS_EMIT_BUDGET_METRICS_JOB_NAME
|
|
1874
|
-
)
|
|
1875
|
-
else:
|
|
1876
|
-
# if not using redis, initialize the metrics directly
|
|
1877
|
-
await self._initialize_remaining_budget_metrics()
|
|
1878
|
-
|
|
1879
|
-
async def _initialize_remaining_budget_metrics(self):
|
|
1880
|
-
"""
|
|
1881
|
-
Helper to initialize remaining budget metrics for all teams and API keys.
|
|
1882
|
-
"""
|
|
1883
|
-
verbose_logger.debug("Emitting key, team budget metrics....")
|
|
1884
|
-
await self._initialize_team_budget_metrics()
|
|
1885
|
-
await self._initialize_api_key_budget_metrics()
|
|
1886
|
-
|
|
1887
|
-
async def _set_key_list_budget_metrics(
|
|
1888
|
-
self, keys: List[Union[str, UserAPIKeyAuth]]
|
|
1889
|
-
):
|
|
1890
|
-
"""Helper function to set budget metrics for a list of keys"""
|
|
1891
|
-
for key in keys:
|
|
1892
|
-
if isinstance(key, UserAPIKeyAuth):
|
|
1893
|
-
self._set_key_budget_metrics(key)
|
|
1894
|
-
|
|
1895
|
-
async def _set_team_list_budget_metrics(self, teams: List[LiteLLM_TeamTable]):
|
|
1896
|
-
"""Helper function to set budget metrics for a list of teams"""
|
|
1897
|
-
for team in teams:
|
|
1898
|
-
self._set_team_budget_metrics(team)
|
|
1899
|
-
|
|
1900
|
-
async def _set_team_budget_metrics_after_api_request(
|
|
1901
|
-
self,
|
|
1902
|
-
user_api_team: Optional[str],
|
|
1903
|
-
user_api_team_alias: Optional[str],
|
|
1904
|
-
team_spend: float,
|
|
1905
|
-
team_max_budget: float,
|
|
1906
|
-
response_cost: float,
|
|
1907
|
-
):
|
|
1908
|
-
"""
|
|
1909
|
-
Set team budget metrics after an LLM API request
|
|
1910
|
-
|
|
1911
|
-
- Assemble a LiteLLM_TeamTable object
|
|
1912
|
-
- looks up team info from db if not available in metadata
|
|
1913
|
-
- Set team budget metrics
|
|
1914
|
-
"""
|
|
1915
|
-
if user_api_team:
|
|
1916
|
-
team_object = await self._assemble_team_object(
|
|
1917
|
-
team_id=user_api_team,
|
|
1918
|
-
team_alias=user_api_team_alias or "",
|
|
1919
|
-
spend=team_spend,
|
|
1920
|
-
max_budget=team_max_budget,
|
|
1921
|
-
response_cost=response_cost,
|
|
1922
|
-
)
|
|
1923
|
-
|
|
1924
|
-
self._set_team_budget_metrics(team_object)
|
|
1925
|
-
|
|
1926
|
-
async def _assemble_team_object(
|
|
1927
|
-
self,
|
|
1928
|
-
team_id: str,
|
|
1929
|
-
team_alias: str,
|
|
1930
|
-
spend: Optional[float],
|
|
1931
|
-
max_budget: Optional[float],
|
|
1932
|
-
response_cost: float,
|
|
1933
|
-
) -> LiteLLM_TeamTable:
|
|
1934
|
-
"""
|
|
1935
|
-
Assemble a LiteLLM_TeamTable object
|
|
1936
|
-
|
|
1937
|
-
for fields not available in metadata, we fetch from db
|
|
1938
|
-
Fields not available in metadata:
|
|
1939
|
-
- `budget_reset_at`
|
|
1940
|
-
"""
|
|
1941
|
-
from litellm.proxy.auth.auth_checks import get_team_object
|
|
1942
|
-
from litellm.proxy.proxy_server import prisma_client, user_api_key_cache
|
|
1943
|
-
|
|
1944
|
-
_total_team_spend = (spend or 0) + response_cost
|
|
1945
|
-
team_object = LiteLLM_TeamTable(
|
|
1946
|
-
team_id=team_id,
|
|
1947
|
-
team_alias=team_alias,
|
|
1948
|
-
spend=_total_team_spend,
|
|
1949
|
-
max_budget=max_budget,
|
|
1950
|
-
)
|
|
1951
|
-
try:
|
|
1952
|
-
team_info = await get_team_object(
|
|
1953
|
-
team_id=team_id,
|
|
1954
|
-
prisma_client=prisma_client,
|
|
1955
|
-
user_api_key_cache=user_api_key_cache,
|
|
1956
|
-
)
|
|
1957
|
-
except Exception as e:
|
|
1958
|
-
verbose_logger.debug(
|
|
1959
|
-
f"[Non-Blocking] Prometheus: Error getting team info: {str(e)}"
|
|
1960
|
-
)
|
|
1961
|
-
return team_object
|
|
1962
|
-
|
|
1963
|
-
if team_info:
|
|
1964
|
-
team_object.budget_reset_at = team_info.budget_reset_at
|
|
1965
|
-
|
|
1966
|
-
return team_object
|
|
1967
|
-
|
|
1968
|
-
def _set_team_budget_metrics(
|
|
1969
|
-
self,
|
|
1970
|
-
team: LiteLLM_TeamTable,
|
|
1971
|
-
):
|
|
1972
|
-
"""
|
|
1973
|
-
Set team budget metrics for a single team
|
|
1974
|
-
|
|
1975
|
-
- Remaining Budget
|
|
1976
|
-
- Max Budget
|
|
1977
|
-
- Budget Reset At
|
|
1978
|
-
"""
|
|
1979
|
-
enum_values = UserAPIKeyLabelValues(
|
|
1980
|
-
team=team.team_id,
|
|
1981
|
-
team_alias=team.team_alias or "",
|
|
1982
|
-
)
|
|
1983
|
-
|
|
1984
|
-
_labels = prometheus_label_factory(
|
|
1985
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
1986
|
-
metric_name="litellm_remaining_team_budget_metric"
|
|
1987
|
-
),
|
|
1988
|
-
enum_values=enum_values,
|
|
1989
|
-
)
|
|
1990
|
-
self.litellm_remaining_team_budget_metric.labels(**_labels).set(
|
|
1991
|
-
self._safe_get_remaining_budget(
|
|
1992
|
-
max_budget=team.max_budget,
|
|
1993
|
-
spend=team.spend,
|
|
1994
|
-
)
|
|
1995
|
-
)
|
|
1996
|
-
|
|
1997
|
-
if team.max_budget is not None:
|
|
1998
|
-
_labels = prometheus_label_factory(
|
|
1999
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
2000
|
-
metric_name="litellm_team_max_budget_metric"
|
|
2001
|
-
),
|
|
2002
|
-
enum_values=enum_values,
|
|
2003
|
-
)
|
|
2004
|
-
self.litellm_team_max_budget_metric.labels(**_labels).set(team.max_budget)
|
|
2005
|
-
|
|
2006
|
-
if team.budget_reset_at is not None:
|
|
2007
|
-
_labels = prometheus_label_factory(
|
|
2008
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
2009
|
-
metric_name="litellm_team_budget_remaining_hours_metric"
|
|
2010
|
-
),
|
|
2011
|
-
enum_values=enum_values,
|
|
2012
|
-
)
|
|
2013
|
-
self.litellm_team_budget_remaining_hours_metric.labels(**_labels).set(
|
|
2014
|
-
self._get_remaining_hours_for_budget_reset(
|
|
2015
|
-
budget_reset_at=team.budget_reset_at
|
|
2016
|
-
)
|
|
2017
|
-
)
|
|
2018
|
-
|
|
2019
|
-
def _set_key_budget_metrics(self, user_api_key_dict: UserAPIKeyAuth):
|
|
2020
|
-
"""
|
|
2021
|
-
Set virtual key budget metrics
|
|
2022
|
-
|
|
2023
|
-
- Remaining Budget
|
|
2024
|
-
- Max Budget
|
|
2025
|
-
- Budget Reset At
|
|
2026
|
-
"""
|
|
2027
|
-
enum_values = UserAPIKeyLabelValues(
|
|
2028
|
-
hashed_api_key=user_api_key_dict.token,
|
|
2029
|
-
api_key_alias=user_api_key_dict.key_alias or "",
|
|
2030
|
-
)
|
|
2031
|
-
_labels = prometheus_label_factory(
|
|
2032
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
2033
|
-
metric_name="litellm_remaining_api_key_budget_metric"
|
|
2034
|
-
),
|
|
2035
|
-
enum_values=enum_values,
|
|
2036
|
-
)
|
|
2037
|
-
self.litellm_remaining_api_key_budget_metric.labels(**_labels).set(
|
|
2038
|
-
self._safe_get_remaining_budget(
|
|
2039
|
-
max_budget=user_api_key_dict.max_budget,
|
|
2040
|
-
spend=user_api_key_dict.spend,
|
|
2041
|
-
)
|
|
2042
|
-
)
|
|
2043
|
-
|
|
2044
|
-
if user_api_key_dict.max_budget is not None:
|
|
2045
|
-
_labels = prometheus_label_factory(
|
|
2046
|
-
supported_enum_labels=self.get_labels_for_metric(
|
|
2047
|
-
metric_name="litellm_api_key_max_budget_metric"
|
|
2048
|
-
),
|
|
2049
|
-
enum_values=enum_values,
|
|
2050
|
-
)
|
|
2051
|
-
self.litellm_api_key_max_budget_metric.labels(**_labels).set(
|
|
2052
|
-
user_api_key_dict.max_budget
|
|
2053
|
-
)
|
|
2054
|
-
|
|
2055
|
-
if user_api_key_dict.budget_reset_at is not None:
|
|
2056
|
-
self.litellm_api_key_budget_remaining_hours_metric.labels(**_labels).set(
|
|
2057
|
-
self._get_remaining_hours_for_budget_reset(
|
|
2058
|
-
budget_reset_at=user_api_key_dict.budget_reset_at
|
|
2059
|
-
)
|
|
2060
|
-
)
|
|
2061
|
-
|
|
2062
|
-
async def _set_api_key_budget_metrics_after_api_request(
|
|
2063
|
-
self,
|
|
2064
|
-
user_api_key: Optional[str],
|
|
2065
|
-
user_api_key_alias: Optional[str],
|
|
2066
|
-
response_cost: float,
|
|
2067
|
-
key_max_budget: float,
|
|
2068
|
-
key_spend: Optional[float],
|
|
2069
|
-
):
|
|
2070
|
-
if user_api_key:
|
|
2071
|
-
user_api_key_dict = await self._assemble_key_object(
|
|
2072
|
-
user_api_key=user_api_key,
|
|
2073
|
-
user_api_key_alias=user_api_key_alias or "",
|
|
2074
|
-
key_max_budget=key_max_budget,
|
|
2075
|
-
key_spend=key_spend,
|
|
2076
|
-
response_cost=response_cost,
|
|
2077
|
-
)
|
|
2078
|
-
self._set_key_budget_metrics(user_api_key_dict)
|
|
2079
|
-
|
|
2080
|
-
async def _assemble_key_object(
|
|
2081
|
-
self,
|
|
2082
|
-
user_api_key: str,
|
|
2083
|
-
user_api_key_alias: str,
|
|
2084
|
-
key_max_budget: float,
|
|
2085
|
-
key_spend: Optional[float],
|
|
2086
|
-
response_cost: float,
|
|
2087
|
-
) -> UserAPIKeyAuth:
|
|
2088
|
-
"""
|
|
2089
|
-
Assemble a UserAPIKeyAuth object
|
|
2090
|
-
"""
|
|
2091
|
-
from litellm.proxy.auth.auth_checks import get_key_object
|
|
2092
|
-
from litellm.proxy.proxy_server import prisma_client, user_api_key_cache
|
|
2093
|
-
|
|
2094
|
-
_total_key_spend = (key_spend or 0) + response_cost
|
|
2095
|
-
user_api_key_dict = UserAPIKeyAuth(
|
|
2096
|
-
token=user_api_key,
|
|
2097
|
-
key_alias=user_api_key_alias,
|
|
2098
|
-
max_budget=key_max_budget,
|
|
2099
|
-
spend=_total_key_spend,
|
|
2100
|
-
)
|
|
2101
|
-
try:
|
|
2102
|
-
if user_api_key_dict.token:
|
|
2103
|
-
key_object = await get_key_object(
|
|
2104
|
-
hashed_token=user_api_key_dict.token,
|
|
2105
|
-
prisma_client=prisma_client,
|
|
2106
|
-
user_api_key_cache=user_api_key_cache,
|
|
2107
|
-
)
|
|
2108
|
-
if key_object:
|
|
2109
|
-
user_api_key_dict.budget_reset_at = key_object.budget_reset_at
|
|
2110
|
-
except Exception as e:
|
|
2111
|
-
verbose_logger.debug(
|
|
2112
|
-
f"[Non-Blocking] Prometheus: Error getting key info: {str(e)}"
|
|
2113
|
-
)
|
|
2114
|
-
|
|
2115
|
-
return user_api_key_dict
|
|
2116
|
-
|
|
2117
|
-
def _get_remaining_hours_for_budget_reset(self, budget_reset_at: datetime) -> float:
|
|
2118
|
-
"""
|
|
2119
|
-
Get remaining hours for budget reset
|
|
2120
|
-
"""
|
|
2121
|
-
return (
|
|
2122
|
-
budget_reset_at - datetime.now(budget_reset_at.tzinfo)
|
|
2123
|
-
).total_seconds() / 3600
|
|
2124
|
-
|
|
2125
|
-
def _safe_duration_seconds(
|
|
2126
|
-
self,
|
|
2127
|
-
start_time: Any,
|
|
2128
|
-
end_time: Any,
|
|
2129
|
-
) -> Optional[float]:
|
|
2130
|
-
"""
|
|
2131
|
-
Compute the duration in seconds between two objects.
|
|
2132
|
-
|
|
2133
|
-
Returns the duration as a float if both start and end are instances of datetime,
|
|
2134
|
-
otherwise returns None.
|
|
2135
|
-
"""
|
|
2136
|
-
if isinstance(start_time, datetime) and isinstance(end_time, datetime):
|
|
2137
|
-
return (end_time - start_time).total_seconds()
|
|
2138
|
-
return None
|
|
2139
|
-
|
|
2140
|
-
@staticmethod
|
|
2141
|
-
def initialize_budget_metrics_cron_job(scheduler: AsyncIOScheduler):
|
|
2142
|
-
"""
|
|
2143
|
-
Initialize budget metrics as a cron job. This job runs every `PROMETHEUS_BUDGET_METRICS_REFRESH_INTERVAL_MINUTES` minutes.
|
|
2144
|
-
|
|
2145
|
-
It emits the current remaining budget metrics for all Keys and Teams.
|
|
2146
|
-
"""
|
|
2147
|
-
from enterprise.litellm_enterprise.integrations.prometheus import (
|
|
2148
|
-
PrometheusLogger,
|
|
2149
|
-
)
|
|
2150
|
-
from litellm.constants import PROMETHEUS_BUDGET_METRICS_REFRESH_INTERVAL_MINUTES
|
|
2151
|
-
from litellm.integrations.custom_logger import CustomLogger
|
|
2152
|
-
|
|
2153
|
-
prometheus_loggers: List[CustomLogger] = (
|
|
2154
|
-
litellm.logging_callback_manager.get_custom_loggers_for_type(
|
|
2155
|
-
callback_type=PrometheusLogger
|
|
2156
|
-
)
|
|
2157
|
-
)
|
|
2158
|
-
# we need to get the initialized prometheus logger instance(s) and call logger.initialize_remaining_budget_metrics() on them
|
|
2159
|
-
verbose_logger.debug("found %s prometheus loggers", len(prometheus_loggers))
|
|
2160
|
-
if len(prometheus_loggers) > 0:
|
|
2161
|
-
prometheus_logger = cast(PrometheusLogger, prometheus_loggers[0])
|
|
2162
|
-
verbose_logger.debug(
|
|
2163
|
-
"Initializing remaining budget metrics as a cron job executing every %s minutes"
|
|
2164
|
-
% PROMETHEUS_BUDGET_METRICS_REFRESH_INTERVAL_MINUTES
|
|
2165
|
-
)
|
|
2166
|
-
scheduler.add_job(
|
|
2167
|
-
prometheus_logger.initialize_remaining_budget_metrics,
|
|
2168
|
-
"interval",
|
|
2169
|
-
minutes=PROMETHEUS_BUDGET_METRICS_REFRESH_INTERVAL_MINUTES,
|
|
2170
|
-
)
|
|
2171
|
-
|
|
2172
|
-
@staticmethod
|
|
2173
|
-
def _mount_metrics_endpoint(premium_user: bool):
|
|
2174
|
-
"""
|
|
2175
|
-
Mount the Prometheus metrics endpoint with optional authentication.
|
|
2176
|
-
|
|
2177
|
-
Args:
|
|
2178
|
-
premium_user (bool): Whether the user is a premium user
|
|
2179
|
-
require_auth (bool, optional): Whether to require authentication for the metrics endpoint.
|
|
2180
|
-
Defaults to False.
|
|
2181
|
-
"""
|
|
2182
|
-
from prometheus_client import make_asgi_app
|
|
2183
|
-
|
|
2184
|
-
from litellm._logging import verbose_proxy_logger
|
|
2185
|
-
from litellm.proxy._types import CommonProxyErrors
|
|
2186
|
-
from litellm.proxy.proxy_server import app
|
|
2187
|
-
|
|
2188
|
-
if premium_user is not True:
|
|
2189
|
-
verbose_proxy_logger.warning(
|
|
2190
|
-
f"Prometheus metrics are only available for premium users. {CommonProxyErrors.not_premium_user.value}"
|
|
2191
|
-
)
|
|
2192
|
-
|
|
2193
|
-
# Create metrics ASGI app
|
|
2194
|
-
metrics_app = make_asgi_app()
|
|
2195
|
-
|
|
2196
|
-
# Mount the metrics app to the app
|
|
2197
|
-
app.mount("/metrics", metrics_app)
|
|
2198
|
-
verbose_proxy_logger.debug(
|
|
2199
|
-
"Starting Prometheus Metrics on /metrics (no authentication)"
|
|
2200
|
-
)
|
|
2201
|
-
|
|
2202
|
-
|
|
2203
|
-
def prometheus_label_factory(
|
|
2204
|
-
supported_enum_labels: List[str],
|
|
2205
|
-
enum_values: UserAPIKeyLabelValues,
|
|
2206
|
-
tag: Optional[str] = None,
|
|
2207
|
-
) -> dict:
|
|
2208
|
-
"""
|
|
2209
|
-
Returns a dictionary of label + values for prometheus.
|
|
2210
|
-
|
|
2211
|
-
Ensures end_user param is not sent to prometheus if it is not supported.
|
|
2212
|
-
"""
|
|
2213
|
-
# Extract dictionary from Pydantic object
|
|
2214
|
-
enum_dict = enum_values.model_dump()
|
|
2215
|
-
|
|
2216
|
-
# Filter supported labels
|
|
2217
|
-
filtered_labels = {
|
|
2218
|
-
label: value
|
|
2219
|
-
for label, value in enum_dict.items()
|
|
2220
|
-
if label in supported_enum_labels
|
|
2221
|
-
}
|
|
2222
|
-
|
|
2223
|
-
if UserAPIKeyLabelNames.END_USER.value in filtered_labels:
|
|
2224
|
-
filtered_labels["end_user"] = get_end_user_id_for_cost_tracking(
|
|
2225
|
-
litellm_params={"user_api_key_end_user_id": enum_values.end_user},
|
|
2226
|
-
service_type="prometheus",
|
|
2227
|
-
)
|
|
2228
|
-
|
|
2229
|
-
if enum_values.custom_metadata_labels is not None:
|
|
2230
|
-
for key, value in enum_values.custom_metadata_labels.items():
|
|
2231
|
-
if key in supported_enum_labels:
|
|
2232
|
-
filtered_labels[key] = value
|
|
2233
|
-
|
|
2234
|
-
# Add custom tags if configured
|
|
2235
|
-
if enum_values.tags is not None:
|
|
2236
|
-
custom_tag_labels = get_custom_labels_from_tags(enum_values.tags)
|
|
2237
|
-
for key, value in custom_tag_labels.items():
|
|
2238
|
-
if key in supported_enum_labels:
|
|
2239
|
-
filtered_labels[key] = value
|
|
2240
|
-
|
|
2241
|
-
for label in supported_enum_labels:
|
|
2242
|
-
if label not in filtered_labels:
|
|
2243
|
-
filtered_labels[label] = None
|
|
2244
|
-
|
|
2245
|
-
return filtered_labels
|
|
2246
|
-
|
|
2247
|
-
|
|
2248
|
-
def get_custom_labels_from_metadata(metadata: dict) -> Dict[str, str]:
|
|
2249
|
-
"""
|
|
2250
|
-
Get custom labels from metadata
|
|
2251
|
-
"""
|
|
2252
|
-
keys = litellm.custom_prometheus_metadata_labels
|
|
2253
|
-
if keys is None or len(keys) == 0:
|
|
2254
|
-
return {}
|
|
2255
|
-
|
|
2256
|
-
result: Dict[str, str] = {}
|
|
2257
|
-
|
|
2258
|
-
for key in keys:
|
|
2259
|
-
# Split the dot notation key into parts
|
|
2260
|
-
original_key = key
|
|
2261
|
-
key = key.replace("metadata.", "", 1) if key.startswith("metadata.") else key
|
|
2262
|
-
|
|
2263
|
-
keys_parts = key.split(".")
|
|
2264
|
-
# Traverse through the dictionary using the parts
|
|
2265
|
-
value = metadata
|
|
2266
|
-
for part in keys_parts:
|
|
2267
|
-
value = value.get(part, None) # Get the value, return None if not found
|
|
2268
|
-
if value is None:
|
|
2269
|
-
break
|
|
2270
|
-
|
|
2271
|
-
if value is not None and isinstance(value, str):
|
|
2272
|
-
result[original_key.replace(".", "_")] = value
|
|
2273
|
-
|
|
2274
|
-
return result
|
|
2275
|
-
|
|
2276
|
-
|
|
2277
|
-
def _tag_matches_wildcard_configured_pattern(
|
|
2278
|
-
tags: List[str], configured_tag: str
|
|
2279
|
-
) -> bool:
|
|
2280
|
-
"""
|
|
2281
|
-
Check if any of the request tags matches a wildcard configured pattern
|
|
2282
|
-
|
|
2283
|
-
Args:
|
|
2284
|
-
tags: List[str] - The request tags
|
|
2285
|
-
configured_tag: str - The configured tag
|
|
2286
|
-
|
|
2287
|
-
Returns:
|
|
2288
|
-
bool - True if any of the request tags matches the configured tag, False otherwise
|
|
2289
|
-
|
|
2290
|
-
e.g.
|
|
2291
|
-
tags = ["User-Agent: curl/7.68.0", "User-Agent: python-requests/2.28.1", "prod"]
|
|
2292
|
-
configured_tag = "User-Agent: curl/*"
|
|
2293
|
-
_tag_matches_wildcard_configured_pattern(tags=tags, configured_tag=configured_tag) # True
|
|
2294
|
-
|
|
2295
|
-
configured_tag = "User-Agent: python-requests/*"
|
|
2296
|
-
_tag_matches_wildcard_configured_pattern(tags=tags, configured_tag=configured_tag) # True
|
|
2297
|
-
|
|
2298
|
-
configured_tag = "gm"
|
|
2299
|
-
_tag_matches_wildcard_configured_pattern(tags=tags, configured_tag=configured_tag) # False
|
|
2300
|
-
"""
|
|
2301
|
-
import re
|
|
2302
|
-
|
|
2303
|
-
from litellm.router_utils.pattern_match_deployments import PatternMatchRouter
|
|
2304
|
-
|
|
2305
|
-
pattern_router = PatternMatchRouter()
|
|
2306
|
-
regex_pattern = pattern_router._pattern_to_regex(configured_tag)
|
|
2307
|
-
return any(re.match(pattern=regex_pattern, string=tag) for tag in tags)
|
|
2308
|
-
|
|
2309
|
-
|
|
2310
|
-
def get_custom_labels_from_tags(tags: List[str]) -> Dict[str, str]:
|
|
2311
|
-
"""
|
|
2312
|
-
Get custom labels from tags based on admin configuration.
|
|
2313
|
-
|
|
2314
|
-
Supports both exact matches and wildcard patterns:
|
|
2315
|
-
- Exact match: "prod" matches "prod" exactly
|
|
2316
|
-
- Wildcard pattern: "User-Agent: curl/*" matches "User-Agent: curl/7.68.0"
|
|
2317
|
-
|
|
2318
|
-
Reuses PatternMatchRouter for wildcard pattern matching.
|
|
2319
|
-
|
|
2320
|
-
Returns dict of label_name: "true" if the tag matches the configured tag, "false" otherwise
|
|
2321
|
-
|
|
2322
|
-
{
|
|
2323
|
-
"tag_User-Agent_curl": "true",
|
|
2324
|
-
"tag_User-Agent_python_requests": "false",
|
|
2325
|
-
"tag_Environment_prod": "true",
|
|
2326
|
-
"tag_Environment_dev": "false",
|
|
2327
|
-
"tag_Service_api_gateway_v2": "true",
|
|
2328
|
-
"tag_Service_web_app_v1": "false",
|
|
2329
|
-
}
|
|
2330
|
-
"""
|
|
2331
|
-
import re
|
|
2332
|
-
|
|
2333
|
-
from litellm.router_utils.pattern_match_deployments import PatternMatchRouter
|
|
2334
|
-
from litellm.types.integrations.prometheus import _sanitize_prometheus_label_name
|
|
2335
|
-
|
|
2336
|
-
configured_tags = litellm.custom_prometheus_tags
|
|
2337
|
-
if configured_tags is None or len(configured_tags) == 0:
|
|
2338
|
-
return {}
|
|
2339
|
-
|
|
2340
|
-
result: Dict[str, str] = {}
|
|
2341
|
-
pattern_router = PatternMatchRouter()
|
|
2342
|
-
|
|
2343
|
-
for configured_tag in configured_tags:
|
|
2344
|
-
label_name = _sanitize_prometheus_label_name(f"tag_{configured_tag}")
|
|
2345
|
-
|
|
2346
|
-
# Check for exact match first (backwards compatibility)
|
|
2347
|
-
if configured_tag in tags:
|
|
2348
|
-
result[label_name] = "true"
|
|
2349
|
-
continue
|
|
2350
|
-
|
|
2351
|
-
# Use PatternMatchRouter for wildcard pattern matching
|
|
2352
|
-
if "*" in configured_tag and _tag_matches_wildcard_configured_pattern(
|
|
2353
|
-
tags=tags, configured_tag=configured_tag
|
|
2354
|
-
):
|
|
2355
|
-
result[label_name] = "true"
|
|
2356
|
-
continue
|
|
2357
|
-
|
|
2358
|
-
# No match found
|
|
2359
|
-
result[label_name] = "false"
|
|
2360
|
-
|
|
2361
|
-
return result
|