litellm-pulse 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- litellm_pulse/__init__.py +1 -0
- litellm_pulse/app.py +389 -0
- litellm_pulse/db.py +179 -0
- litellm_pulse/parser.py +39 -0
- litellm_pulse-0.2.0.dist-info/METADATA +500 -0
- litellm_pulse-0.2.0.dist-info/RECORD +9 -0
- litellm_pulse-0.2.0.dist-info/WHEEL +4 -0
- litellm_pulse-0.2.0.dist-info/entry_points.txt +2 -0
- litellm_pulse-0.2.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.2.0"
|
litellm_pulse/app.py
ADDED
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
"""LiteLLM Pulse — a lightweight LiteLLM metrics exporter with SQLite time-series storage."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
import os
|
|
8
|
+
from collections import deque
|
|
9
|
+
from contextlib import asynccontextmanager, suppress
|
|
10
|
+
from datetime import UTC, datetime, timedelta, tzinfo
|
|
11
|
+
from typing import Any
|
|
12
|
+
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
|
13
|
+
|
|
14
|
+
import httpx
|
|
15
|
+
import uvicorn
|
|
16
|
+
from fastapi import FastAPI
|
|
17
|
+
from fastapi.responses import JSONResponse
|
|
18
|
+
|
|
19
|
+
from .db import (
|
|
20
|
+
METRIC_KEYS,
|
|
21
|
+
get_history,
|
|
22
|
+
get_latest,
|
|
23
|
+
get_window_aggregate,
|
|
24
|
+
open_db,
|
|
25
|
+
purge_old,
|
|
26
|
+
store_snapshot,
|
|
27
|
+
)
|
|
28
|
+
from .parser import parse_prometheus_text
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger("litellm-pulse")
|
|
31
|
+
|
|
32
|
+
# ---------------------------------------------------------------------------
|
|
33
|
+
# Configuration (all env-var driven, prefixed with LITELLM_PULSE_)
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
METRICS_URL = os.environ.get("LITELLM_PULSE_METRICS_URL", "http://litellm:4000/metrics/")
|
|
37
|
+
SCRAPE_INTERVAL = int(os.environ.get("LITELLM_PULSE_SCRAPE_INTERVAL", "60"))
|
|
38
|
+
PORT = int(os.environ.get("LITELLM_PULSE_PORT", "8000"))
|
|
39
|
+
HOST = os.environ.get("LITELLM_PULSE_HOST", "0.0.0.0")
|
|
40
|
+
VERIFY_SSL = os.environ.get("LITELLM_PULSE_VERIFY_SSL", "false").lower() == "true"
|
|
41
|
+
SCRAPE_TIMEOUT = float(os.environ.get("LITELLM_PULSE_SCRAPE_TIMEOUT", "30"))
|
|
42
|
+
LOG_LEVEL = os.environ.get("LITELLM_PULSE_LOG_LEVEL", "info").upper()
|
|
43
|
+
DB_PATH = os.environ.get("LITELLM_PULSE_DB_PATH", "./data/litellm_pulse.db")
|
|
44
|
+
DB_RETENTION_DAYS = int(os.environ.get("LITELLM_PULSE_DB_RETENTION_DAYS", "90"))
|
|
45
|
+
HISTORY_SIZE = int(os.environ.get("LITELLM_PULSE_HISTORY_SIZE", "168"))
|
|
46
|
+
METRICS_API_KEY = os.environ.get("LITELLM_PULSE_METRICS_API_KEY", "")
|
|
47
|
+
|
|
48
|
+
# Timezone for API output and window boundaries. DB always stores UTC.
|
|
49
|
+
_TZ: tzinfo = UTC
|
|
50
|
+
_tz_name = os.environ.get("LITELLM_PULSE_TIMEZONE", "UTC")
|
|
51
|
+
try:
|
|
52
|
+
_TZ = ZoneInfo(_tz_name)
|
|
53
|
+
except ZoneInfoNotFoundError:
|
|
54
|
+
logger.warning("Unknown timezone %r — falling back to UTC", _tz_name)
|
|
55
|
+
except Exception:
|
|
56
|
+
logger.exception("Failed to load timezone %r — falling back to UTC", _tz_name)
|
|
57
|
+
|
|
58
|
+
# Default metric mappings — LiteLLM Prometheus metric names.
|
|
59
|
+
# Each can be overridden via env var LITELLM_PULSE_METRIC_<FRIENDLY_NAME>.
|
|
60
|
+
DEFAULT_METRIC_MAP = {
|
|
61
|
+
"requests": "litellm_proxy_total_requests_metric_total",
|
|
62
|
+
"failed_requests": "litellm_proxy_failed_requests_metric_total",
|
|
63
|
+
"tokens": "litellm_total_tokens_metric_total",
|
|
64
|
+
"input_tokens": "litellm_input_tokens_metric_total",
|
|
65
|
+
"output_tokens": "litellm_output_tokens_metric_total",
|
|
66
|
+
"reasoning_tokens": "litellm_output_reasoning_tokens_metric_total",
|
|
67
|
+
"cost": "litellm_spend_metric_total",
|
|
68
|
+
"in_flight_requests": "litellm_in_flight_requests",
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
METRIC_MAP: dict[str, str] = {}
|
|
72
|
+
for _friendly, _prom in DEFAULT_METRIC_MAP.items():
|
|
73
|
+
METRIC_MAP[_friendly] = os.environ.get(f"LITELLM_PULSE_METRIC_{_friendly.upper()}", _prom)
|
|
74
|
+
|
|
75
|
+
# ---------------------------------------------------------------------------
|
|
76
|
+
# State
|
|
77
|
+
# ---------------------------------------------------------------------------
|
|
78
|
+
|
|
79
|
+
_raw_metrics: dict[str, float] = {}
|
|
80
|
+
_previous_raw: dict[str, float] = {}
|
|
81
|
+
_last_scrape: datetime | None = None
|
|
82
|
+
_last_error: str | None = None
|
|
83
|
+
_history: deque[dict[str, Any]] = deque(maxlen=HISTORY_SIZE) if HISTORY_SIZE > 0 else None
|
|
84
|
+
_db: Any = None # sqlite3.Connection
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
# ---------------------------------------------------------------------------
|
|
88
|
+
# Reset detection & delta computation
|
|
89
|
+
# ---------------------------------------------------------------------------
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _detect_reset(prev: dict[str, float], curr: dict[str, float]) -> bool:
|
|
93
|
+
"""Return True if any tracked counter appears to have reset (dropped >50%)."""
|
|
94
|
+
if not prev:
|
|
95
|
+
return False
|
|
96
|
+
for key in METRIC_MAP:
|
|
97
|
+
prom_name = METRIC_MAP[key]
|
|
98
|
+
old_val = prev.get(prom_name)
|
|
99
|
+
new_val = curr.get(prom_name)
|
|
100
|
+
if old_val is not None and new_val is not None and old_val > 0 and new_val < old_val * 0.5:
|
|
101
|
+
return True
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _compute_deltas(
|
|
106
|
+
prev: dict[str, float], curr: dict[str, float], is_reset: bool
|
|
107
|
+
) -> dict[str, float]:
|
|
108
|
+
"""Compute per-metric deltas. On reset, delta is the current value (from 0)."""
|
|
109
|
+
deltas: dict[str, float] = {}
|
|
110
|
+
for friendly, prom_name in METRIC_MAP.items():
|
|
111
|
+
curr_val = curr.get(prom_name, 0.0)
|
|
112
|
+
if is_reset:
|
|
113
|
+
deltas[friendly] = curr_val
|
|
114
|
+
else:
|
|
115
|
+
deltas[friendly] = curr_val - prev.get(prom_name, 0.0)
|
|
116
|
+
return deltas
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
# ---------------------------------------------------------------------------
|
|
120
|
+
# Window boundaries
|
|
121
|
+
# ---------------------------------------------------------------------------
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _format_ts(ts: int | float) -> str:
|
|
125
|
+
"""Format a UTC Unix timestamp as an ISO 8601 string in the configured timezone."""
|
|
126
|
+
return datetime.fromtimestamp(ts, tz=_TZ).isoformat()
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _start_of_day() -> int:
|
|
130
|
+
now = datetime.now(_TZ)
|
|
131
|
+
start = now.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
132
|
+
return int(start.timestamp())
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _start_of_week() -> int:
|
|
136
|
+
now = datetime.now(_TZ)
|
|
137
|
+
start_of_day = now.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
138
|
+
start = start_of_day - timedelta(days=start_of_day.weekday())
|
|
139
|
+
return int(start.timestamp())
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _start_of_month() -> int:
|
|
143
|
+
now = datetime.now(_TZ)
|
|
144
|
+
start = now.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
|
|
145
|
+
return int(start.timestamp())
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
# ---------------------------------------------------------------------------
|
|
149
|
+
# Scraper
|
|
150
|
+
# ---------------------------------------------------------------------------
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
async def _scrape(client: httpx.AsyncClient) -> None:
|
|
154
|
+
global _raw_metrics, _previous_raw, _last_scrape, _last_error
|
|
155
|
+
|
|
156
|
+
try:
|
|
157
|
+
resp = await client.get(METRICS_URL, timeout=SCRAPE_TIMEOUT)
|
|
158
|
+
resp.raise_for_status()
|
|
159
|
+
_raw_metrics = parse_prometheus_text(resp.text)
|
|
160
|
+
now = datetime.now(UTC)
|
|
161
|
+
_last_scrape = now
|
|
162
|
+
_last_error = None
|
|
163
|
+
|
|
164
|
+
is_reset = _detect_reset(_previous_raw, _raw_metrics)
|
|
165
|
+
deltas = _compute_deltas(_previous_raw, _raw_metrics, is_reset)
|
|
166
|
+
|
|
167
|
+
if is_reset:
|
|
168
|
+
logger.warning("Counter reset detected — treating as fresh LiteLLM session")
|
|
169
|
+
|
|
170
|
+
if _db is not None:
|
|
171
|
+
ts = int(now.timestamp())
|
|
172
|
+
raw_by_friendly = {
|
|
173
|
+
friendly: _raw_metrics.get(prom_name, 0.0)
|
|
174
|
+
for friendly, prom_name in METRIC_MAP.items()
|
|
175
|
+
}
|
|
176
|
+
store_snapshot(_db, ts, raw_by_friendly, deltas, is_reset)
|
|
177
|
+
|
|
178
|
+
if _history is not None:
|
|
179
|
+
entry: dict[str, Any] = {
|
|
180
|
+
"ts": int(now.timestamp()),
|
|
181
|
+
"is_reset": is_reset,
|
|
182
|
+
}
|
|
183
|
+
for friendly, prom_name in METRIC_MAP.items():
|
|
184
|
+
val = _raw_metrics.get(prom_name, 0.0)
|
|
185
|
+
entry[friendly] = val
|
|
186
|
+
entry[f"{friendly}_delta"] = deltas.get(friendly, 0.0)
|
|
187
|
+
_history.append(entry)
|
|
188
|
+
|
|
189
|
+
_previous_raw = dict(_raw_metrics)
|
|
190
|
+
|
|
191
|
+
logger.debug(
|
|
192
|
+
"Scraped %s — %d metric families, reset=%s",
|
|
193
|
+
METRICS_URL,
|
|
194
|
+
len(_raw_metrics),
|
|
195
|
+
is_reset,
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
except Exception as exc:
|
|
199
|
+
_last_error = str(exc)
|
|
200
|
+
logger.warning("Scrape failed: %s", exc)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _build_auth_headers() -> dict[str, str] | None:
|
|
204
|
+
if METRICS_API_KEY and METRICS_API_KEY.strip():
|
|
205
|
+
return {"Authorization": f"Bearer {METRICS_API_KEY.strip()}"}
|
|
206
|
+
return None
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
async def _scraper_loop() -> None:
|
|
210
|
+
async with httpx.AsyncClient(verify=VERIFY_SSL, headers=_build_auth_headers()) as client:
|
|
211
|
+
while True:
|
|
212
|
+
await _scrape(client)
|
|
213
|
+
await asyncio.sleep(SCRAPE_INTERVAL)
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
async def _purge_loop() -> None:
|
|
217
|
+
while True:
|
|
218
|
+
await asyncio.sleep(3600) # Run hourly
|
|
219
|
+
if _db is not None:
|
|
220
|
+
try:
|
|
221
|
+
purge_old(_db, DB_RETENTION_DAYS)
|
|
222
|
+
except Exception as exc:
|
|
223
|
+
logger.warning("Purge failed: %s", exc)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
# ---------------------------------------------------------------------------
|
|
227
|
+
# FastAPI
|
|
228
|
+
# ---------------------------------------------------------------------------
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
@asynccontextmanager
|
|
232
|
+
async def lifespan(_app: FastAPI):
|
|
233
|
+
global _db, _previous_raw
|
|
234
|
+
logging.basicConfig(
|
|
235
|
+
level=LOG_LEVEL,
|
|
236
|
+
format="%(asctime)s %(levelname)-8s %(name)s — %(message)s",
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
try:
|
|
240
|
+
_db = open_db(DB_PATH)
|
|
241
|
+
latest = get_latest(_db)
|
|
242
|
+
if latest:
|
|
243
|
+
_previous_raw = {METRIC_MAP[k]: latest.get(k, 0.0) for k in METRIC_KEYS}
|
|
244
|
+
logger.info("Recovered state from DB — %d metrics loaded", len(_previous_raw))
|
|
245
|
+
else:
|
|
246
|
+
logger.info("DB empty — starting fresh")
|
|
247
|
+
except Exception as exc:
|
|
248
|
+
logger.error("Failed to open DB: %s — continuing without persistence", exc)
|
|
249
|
+
_db = None
|
|
250
|
+
|
|
251
|
+
scrape_task = asyncio.create_task(_scraper_loop())
|
|
252
|
+
purge_task = asyncio.create_task(_purge_loop())
|
|
253
|
+
logger.info(
|
|
254
|
+
"LiteLLM Pulse started — scraping %s every %ds, DB: %s, timezone: %s, auth: %s",
|
|
255
|
+
METRICS_URL,
|
|
256
|
+
SCRAPE_INTERVAL,
|
|
257
|
+
DB_PATH if _db else "disabled",
|
|
258
|
+
str(_TZ),
|
|
259
|
+
"enabled" if METRICS_API_KEY and METRICS_API_KEY.strip() else "disabled",
|
|
260
|
+
)
|
|
261
|
+
yield
|
|
262
|
+
scrape_task.cancel()
|
|
263
|
+
purge_task.cancel()
|
|
264
|
+
with suppress(asyncio.CancelledError):
|
|
265
|
+
await scrape_task
|
|
266
|
+
with suppress(asyncio.CancelledError):
|
|
267
|
+
await purge_task
|
|
268
|
+
if _db is not None:
|
|
269
|
+
_db.close()
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
app = FastAPI(
|
|
273
|
+
title="LiteLLM Pulse",
|
|
274
|
+
description="A lightweight metrics exporter for LiteLLM with SQLite time-series storage.",
|
|
275
|
+
version="0.0.0",
|
|
276
|
+
lifespan=lifespan,
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def _summary() -> dict:
|
|
281
|
+
data: dict[str, float | None | str] = {}
|
|
282
|
+
|
|
283
|
+
for friendly, prom_name in METRIC_MAP.items():
|
|
284
|
+
data[friendly] = _raw_metrics.get(prom_name, 0.0)
|
|
285
|
+
|
|
286
|
+
if _db is not None:
|
|
287
|
+
daily = get_window_aggregate(_db, _start_of_day())
|
|
288
|
+
weekly = get_window_aggregate(_db, _start_of_week())
|
|
289
|
+
monthly = get_window_aggregate(_db, _start_of_month())
|
|
290
|
+
for friendly in METRIC_MAP:
|
|
291
|
+
data[f"{friendly}_daily"] = daily.get(friendly, 0.0)
|
|
292
|
+
data[f"{friendly}_weekly"] = weekly.get(friendly, 0.0)
|
|
293
|
+
data[f"{friendly}_monthly"] = monthly.get(friendly, 0.0)
|
|
294
|
+
else:
|
|
295
|
+
for friendly in METRIC_MAP:
|
|
296
|
+
data[f"{friendly}_daily"] = 0.0
|
|
297
|
+
data[f"{friendly}_weekly"] = 0.0
|
|
298
|
+
data[f"{friendly}_monthly"] = 0.0
|
|
299
|
+
|
|
300
|
+
data["last_scrape"] = _format_ts(_last_scrape.timestamp()) if _last_scrape else None
|
|
301
|
+
data["source"] = METRICS_URL
|
|
302
|
+
if _last_error:
|
|
303
|
+
data["error"] = _last_error
|
|
304
|
+
return data
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
@app.get("/")
|
|
308
|
+
async def root():
|
|
309
|
+
return _summary()
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
@app.get("/api/v1/metrics")
|
|
313
|
+
async def all_metrics():
|
|
314
|
+
return _summary()
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
@app.get("/api/v1/metrics/{name}")
|
|
318
|
+
async def get_metric(name: str):
|
|
319
|
+
valid_names = set(METRIC_MAP.keys())
|
|
320
|
+
valid_suffixes = {"daily", "weekly", "monthly"}
|
|
321
|
+
parts = name.rsplit("_", 1)
|
|
322
|
+
if len(parts) == 2 and parts[1] in valid_suffixes:
|
|
323
|
+
base, suffix = parts
|
|
324
|
+
if base in valid_names:
|
|
325
|
+
return {
|
|
326
|
+
"name": name,
|
|
327
|
+
"value": _summary().get(name, 0.0),
|
|
328
|
+
"last_scrape": _format_ts(_last_scrape.timestamp()) if _last_scrape else None,
|
|
329
|
+
}
|
|
330
|
+
if name in valid_names:
|
|
331
|
+
prom_name = METRIC_MAP[name]
|
|
332
|
+
return {
|
|
333
|
+
"name": name,
|
|
334
|
+
"value": _raw_metrics.get(prom_name, 0.0),
|
|
335
|
+
"last_scrape": _format_ts(_last_scrape.timestamp()) if _last_scrape else None,
|
|
336
|
+
}
|
|
337
|
+
return JSONResponse(
|
|
338
|
+
status_code=404,
|
|
339
|
+
content={
|
|
340
|
+
"error": f"Unknown metric: {name}",
|
|
341
|
+
"available": list(METRIC_MAP.keys()),
|
|
342
|
+
},
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
@app.get("/api/v1/history")
|
|
347
|
+
async def history(limit: int = 168):
|
|
348
|
+
if _db is not None:
|
|
349
|
+
snapshots = get_history(_db, limit=limit, tz=_TZ)
|
|
350
|
+
return {
|
|
351
|
+
"snapshots": snapshots,
|
|
352
|
+
"count": len(snapshots),
|
|
353
|
+
"source": "sqlite",
|
|
354
|
+
}
|
|
355
|
+
if _history is not None:
|
|
356
|
+
snapshots = []
|
|
357
|
+
for entry in list(_history)[-limit:]:
|
|
358
|
+
out = {k: v for k, v in entry.items() if k != "ts"}
|
|
359
|
+
out["timestamp"] = _format_ts(entry["ts"])
|
|
360
|
+
snapshots.append(out)
|
|
361
|
+
return {
|
|
362
|
+
"snapshots": snapshots,
|
|
363
|
+
"count": len(snapshots),
|
|
364
|
+
"source": "memory",
|
|
365
|
+
}
|
|
366
|
+
return {"snapshots": [], "count": 0, "source": "disabled"}
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
@app.get("/raw")
|
|
370
|
+
async def raw_metrics():
|
|
371
|
+
return _raw_metrics
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
@app.get("/health")
|
|
375
|
+
async def health():
|
|
376
|
+
return {"status": "ok" if _last_scrape else "starting"}
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
# ---------------------------------------------------------------------------
|
|
380
|
+
# Entry point
|
|
381
|
+
# ---------------------------------------------------------------------------
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def main():
|
|
385
|
+
uvicorn.run(app, host=HOST, port=PORT, log_level=LOG_LEVEL.lower())
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
if __name__ == "__main__":
|
|
389
|
+
main()
|
litellm_pulse/db.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
"""SQLite-backed time-series storage for scrape snapshots."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import sqlite3
|
|
7
|
+
from datetime import UTC, datetime, tzinfo
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger("litellm-pulse")
|
|
11
|
+
|
|
12
|
+
# All friendly metric names — defines the columns in the scrapes table.
|
|
13
|
+
METRIC_KEYS = [
|
|
14
|
+
"requests",
|
|
15
|
+
"failed_requests",
|
|
16
|
+
"tokens",
|
|
17
|
+
"input_tokens",
|
|
18
|
+
"output_tokens",
|
|
19
|
+
"reasoning_tokens",
|
|
20
|
+
"cost",
|
|
21
|
+
"in_flight_requests",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def open_db(path: str) -> sqlite3.Connection:
|
|
26
|
+
"""Open (or create) the SQLite database with WAL mode and the scrapes table.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
path: Filesystem path to the .db file. Parent directories are created.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
A sqlite3 Connection with row factory set, WAL mode enabled.
|
|
33
|
+
"""
|
|
34
|
+
db_path = Path(path)
|
|
35
|
+
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
36
|
+
|
|
37
|
+
conn = sqlite3.connect(str(db_path), check_same_thread=False)
|
|
38
|
+
conn.row_factory = sqlite3.Row
|
|
39
|
+
conn.execute("PRAGMA journal_mode=WAL")
|
|
40
|
+
conn.execute("PRAGMA synchronous=NORMAL")
|
|
41
|
+
|
|
42
|
+
columns = [
|
|
43
|
+
"id INTEGER PRIMARY KEY AUTOINCREMENT",
|
|
44
|
+
"ts INTEGER NOT NULL",
|
|
45
|
+
"is_reset INTEGER DEFAULT 0",
|
|
46
|
+
]
|
|
47
|
+
for key in METRIC_KEYS:
|
|
48
|
+
columns.append(f"raw_{key} REAL DEFAULT 0")
|
|
49
|
+
for key in METRIC_KEYS:
|
|
50
|
+
columns.append(f"delta_{key} REAL DEFAULT 0")
|
|
51
|
+
|
|
52
|
+
conn.execute(f"CREATE TABLE IF NOT EXISTS scrapes ({', '.join(columns)})")
|
|
53
|
+
conn.execute("CREATE INDEX IF NOT EXISTS idx_scrapes_ts ON scrapes(ts)")
|
|
54
|
+
conn.commit()
|
|
55
|
+
|
|
56
|
+
logger.info("SQLite database opened at %s", db_path)
|
|
57
|
+
return conn
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def store_snapshot(
|
|
61
|
+
conn: sqlite3.Connection,
|
|
62
|
+
ts: int,
|
|
63
|
+
raw: dict[str, float],
|
|
64
|
+
deltas: dict[str, float],
|
|
65
|
+
is_reset: bool,
|
|
66
|
+
) -> None:
|
|
67
|
+
"""Store one scrape snapshot with raw cumulative values and precomputed deltas.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
conn: SQLite connection.
|
|
71
|
+
ts: Unix epoch timestamp (seconds).
|
|
72
|
+
raw: Mapping of friendly metric names to raw cumulative values.
|
|
73
|
+
deltas: Mapping of friendly metric names to delta values.
|
|
74
|
+
is_reset: Whether a counter reset was detected this scrape.
|
|
75
|
+
"""
|
|
76
|
+
raw_cols = ", ".join(f"raw_{k}" for k in METRIC_KEYS)
|
|
77
|
+
delta_cols = ", ".join(f"delta_{k}" for k in METRIC_KEYS)
|
|
78
|
+
raw_vals = ", ".join(str(raw.get(k, 0.0)) for k in METRIC_KEYS)
|
|
79
|
+
delta_vals = ", ".join(str(deltas.get(k, 0.0)) for k in METRIC_KEYS)
|
|
80
|
+
|
|
81
|
+
conn.execute(
|
|
82
|
+
f"INSERT INTO scrapes (ts, is_reset, {raw_cols}, {delta_cols}) "
|
|
83
|
+
f"VALUES (?, ?, {raw_vals}, {delta_vals})",
|
|
84
|
+
(ts, 1 if is_reset else 0),
|
|
85
|
+
)
|
|
86
|
+
conn.commit()
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def get_latest(conn: sqlite3.Connection) -> dict[str, float] | None:
|
|
90
|
+
"""Return the raw cumulative values from the most recent scrape, or None.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
conn: SQLite connection.
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
Dict mapping friendly metric names to raw cumulative values, or None
|
|
97
|
+
if the database is empty.
|
|
98
|
+
"""
|
|
99
|
+
row = conn.execute("SELECT * FROM scrapes ORDER BY ts DESC LIMIT 1").fetchone()
|
|
100
|
+
if row is None:
|
|
101
|
+
return None
|
|
102
|
+
return {k: row[f"raw_{k}"] for k in METRIC_KEYS}
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def get_latest_ts(conn: sqlite3.Connection) -> int | None:
|
|
106
|
+
"""Return the Unix timestamp of the most recent scrape, or None if empty."""
|
|
107
|
+
row = conn.execute("SELECT ts FROM scrapes ORDER BY ts DESC LIMIT 1").fetchone()
|
|
108
|
+
return row["ts"] if row else None
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def get_window_aggregate(conn: sqlite3.Connection, start_ts: int) -> dict[str, float]:
|
|
112
|
+
"""Return SUM of deltas for all scrapes at or after ``start_ts``.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
conn: SQLite connection.
|
|
116
|
+
start_ts: Unix epoch timestamp (seconds) — start of the aggregation window.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
Dict mapping friendly metric names to summed deltas over the window.
|
|
120
|
+
"""
|
|
121
|
+
cols = ", ".join(f"COALESCE(SUM(delta_{k}), 0) AS {k}" for k in METRIC_KEYS)
|
|
122
|
+
row = conn.execute(f"SELECT {cols} FROM scrapes WHERE ts >= ?", (start_ts,)).fetchone()
|
|
123
|
+
return {k: row[k] for k in METRIC_KEYS}
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def get_history(conn: sqlite3.Connection, limit: int = 168, tz: tzinfo = UTC) -> list[dict]:
|
|
127
|
+
"""Return the most recent ``limit`` scrape snapshots as a list of dicts.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
conn: SQLite connection.
|
|
131
|
+
limit: Maximum number of snapshots to return.
|
|
132
|
+
tz: Timezone to use when formatting the ``timestamp`` field (the DB
|
|
133
|
+
always stores UTC Unix timestamps; conversion happens here).
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
List of dicts, each with ``timestamp`` (ISO string in ``tz``),
|
|
137
|
+
``is_reset``, raw values, and delta values.
|
|
138
|
+
"""
|
|
139
|
+
rows = conn.execute(
|
|
140
|
+
f"SELECT ts, is_reset, {', '.join(f'raw_{k}' for k in METRIC_KEYS)}, "
|
|
141
|
+
f"{', '.join(f'delta_{k}' for k in METRIC_KEYS)} "
|
|
142
|
+
f"FROM scrapes ORDER BY ts DESC LIMIT ?",
|
|
143
|
+
(limit,),
|
|
144
|
+
).fetchall()
|
|
145
|
+
|
|
146
|
+
results = []
|
|
147
|
+
for row in rows:
|
|
148
|
+
entry: dict[str, float | int | str] = {
|
|
149
|
+
"timestamp": datetime.fromtimestamp(row["ts"], tz=tz).isoformat(),
|
|
150
|
+
"is_reset": bool(row["is_reset"]),
|
|
151
|
+
}
|
|
152
|
+
for k in METRIC_KEYS:
|
|
153
|
+
entry[k] = row[f"raw_{k}"]
|
|
154
|
+
entry[f"{k}_delta"] = row[f"delta_{k}"]
|
|
155
|
+
results.append(entry)
|
|
156
|
+
|
|
157
|
+
results.reverse()
|
|
158
|
+
return results
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def purge_old(conn: sqlite3.Connection, retention_days: int) -> int:
|
|
162
|
+
"""Delete scrapes older than ``retention_days`` days.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
conn: SQLite connection.
|
|
166
|
+
retention_days: Number of days of data to retain.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
Number of rows deleted.
|
|
170
|
+
"""
|
|
171
|
+
cursor = conn.execute(
|
|
172
|
+
"DELETE FROM scrapes WHERE ts < strftime('%s', 'now', ?)",
|
|
173
|
+
(f"-{retention_days} days",),
|
|
174
|
+
)
|
|
175
|
+
conn.commit()
|
|
176
|
+
deleted = cursor.rowcount
|
|
177
|
+
if deleted:
|
|
178
|
+
logger.info("Purged %d old scrapes (older than %d days)", deleted, retention_days)
|
|
179
|
+
return deleted
|
litellm_pulse/parser.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Prometheus text format parser."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from collections import defaultdict
|
|
7
|
+
|
|
8
|
+
_LINE_RE = re.compile(
|
|
9
|
+
r"^(?P<name>[a-zA-Z_:][a-zA-Z0-9_:]*)"
|
|
10
|
+
r"(?:\{[^}]*\})?"
|
|
11
|
+
r"\s+"
|
|
12
|
+
r"(?P<value>[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)"
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def parse_prometheus_text(text: str) -> dict[str, float]:
|
|
17
|
+
"""Parse Prometheus text exposition format and sum values per metric family.
|
|
18
|
+
|
|
19
|
+
Labels are ignored — all samples sharing the same metric name are summed.
|
|
20
|
+
This is useful for counter metrics where you want the grand total across
|
|
21
|
+
all label combinations.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
text: Raw Prometheus text exposition format string.
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
Dict mapping metric names to their summed float values.
|
|
28
|
+
"""
|
|
29
|
+
totals: dict[str, float] = defaultdict(float)
|
|
30
|
+
|
|
31
|
+
for line in text.splitlines():
|
|
32
|
+
line = line.strip()
|
|
33
|
+
if not line or line.startswith("#"):
|
|
34
|
+
continue
|
|
35
|
+
match = _LINE_RE.match(line)
|
|
36
|
+
if match:
|
|
37
|
+
totals[match.group("name")] += float(match.group("value"))
|
|
38
|
+
|
|
39
|
+
return dict(totals)
|
|
@@ -0,0 +1,500 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: litellm-pulse
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: A lightweight metrics exporter for LiteLLM — scrapes Prometheus metrics and serves JSON for dashboards like Homepage and Home Assistant.
|
|
5
|
+
Project-URL: Homepage, https://github.com/jakepenzak/litellm-pulse
|
|
6
|
+
Project-URL: Repository, https://github.com/jakepenzak/litellm-pulse
|
|
7
|
+
Project-URL: Issues, https://github.com/jakepenzak/litellm-pulse/issues
|
|
8
|
+
Author: Jake Pieniazek
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: home-assistant,homepage,litellm,metrics,prometheus
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Framework :: FastAPI
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Topic :: System :: Monitoring
|
|
18
|
+
Requires-Python: >=3.11
|
|
19
|
+
Requires-Dist: fastapi>=0.115.0
|
|
20
|
+
Requires-Dist: httpx>=0.28.0
|
|
21
|
+
Requires-Dist: uvicorn[standard]>=0.34.0
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
|
|
24
|
+
<p align="center">
|
|
25
|
+
<img src="https://raw.githubusercontent.com/jakepenzak/litellm-pulse/main/assets/litellm-pulse.svg" alt="LiteLLM Pulse" width="320">
|
|
26
|
+
</p>
|
|
27
|
+
|
|
28
|
+
<p align="center">
|
|
29
|
+
<a href="https://github.com/jakepenzak/litellm-pulse/releases"><img src="https://img.shields.io/github/v/release/jakepenzak/litellm-pulse" alt="GitHub release"></a>
|
|
30
|
+
<a href="https://pypi.org/project/litellm-pulse/"><img src="https://img.shields.io/pypi/v/litellm-pulse" alt="PyPI version"></a>
|
|
31
|
+
<a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.11+-blue" alt="Python 3.11+"></a>
|
|
32
|
+
<a href="https://github.com/jakepenzak/litellm-pulse/blob/main/LICENSE"><img src="https://img.shields.io/github/license/jakepenzak/litellm-pulse" alt="License: MIT"></a>
|
|
33
|
+
<a href="https://github.com/jakepenzak/litellm-pulse"><img src="https://img.shields.io/badge/status-beta-yellow" alt="Development Status"></a>
|
|
34
|
+
<br>
|
|
35
|
+
<a href="https://github.com/jakepenzak/litellm-pulse/actions/workflows/ci.yml"><img src="https://github.com/jakepenzak/litellm-pulse/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
|
|
36
|
+
<a href="https://github.com/jakepenzak/litellm-pulse/actions/workflows/release.yml"><img src="https://github.com/jakepenzak/litellm-pulse/actions/workflows/release.yml/badge.svg" alt="Release"></a>
|
|
37
|
+
</p>
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
A lightweight metrics exporter for [LiteLLM](https://github.com/BerriAI/litellm) — scrapes Prometheus metrics, stores them in SQLite, and serves JSON for dashboards like [Homepage](https://gethomepage.dev) and home automation systems like [Home Assistant](https://www.home-assistant.io).
|
|
42
|
+
|
|
43
|
+
## Why
|
|
44
|
+
|
|
45
|
+
LiteLLM exposes usage metrics in Prometheus format, but consuming them typically means standing up Prometheus, Grafana, and an alertmanager — a stack that's overkill if you just want to see "how much did I spend today?" on a dashboard. For homelab enthusiasts running LiteLLM alongside services like Homepage and Home Assistant, that's a lot of overhead for very simple needs.
|
|
46
|
+
|
|
47
|
+
LiteLLM Pulse is a lightweight observability layer that sits between LiteLLM's `/metrics` endpoint and a JSON-based REST API. It scrapes Prometheus text format on a schedule, stores time-series snapshots in SQLite, and serves clean JSON that any HTTP client can consume — no Prometheus server, no Grafana dashboards, no query language to learn.
|
|
48
|
+
|
|
49
|
+
It is **not** designed to replace Prometheus or Grafana. If you need multi-source metrics, complex alerting rules, or rich visual dashboards, use those tools. LiteLLM Pulse is for the 90% case: you have a single LiteLLM instance, you want today's token spend on your Homepage dashboard, and you don't want to run three more containers to get it.
|
|
50
|
+
|
|
51
|
+
## What It Does
|
|
52
|
+
|
|
53
|
+
LiteLLM exposes usage metrics (requests, tokens, spend) in Prometheus text format as cumulative counters. LiteLLM Pulse scrapes that endpoint on a schedule, parses the metrics, stores snapshots in SQLite, and serves them as clean JSON over a REST API.
|
|
54
|
+
|
|
55
|
+
Beyond raw cumulative totals, LiteLLM Pulse computes **deltas** (change since last scrape), and **daily/weekly/monthly aggregates** (sum of deltas since the start of the current day/week/month) — all backed by SQLite for persistence across restarts.
|
|
56
|
+
|
|
57
|
+
```
|
|
58
|
+
LiteLLM /metrics ──scrape──▶ LiteLLM Pulse ──JSON──▶ Homepage / Home Assistant / anything
|
|
59
|
+
│
|
|
60
|
+
▼
|
|
61
|
+
SQLite
|
|
62
|
+
(time-series storage)
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## LiteLLM Setup
|
|
66
|
+
|
|
67
|
+
> **The LiteLLM `/metrics` endpoint is not enabled by default.** You must configure LiteLLM to publish Prometheus metrics before LiteLLM Pulse can scrape them.
|
|
68
|
+
|
|
69
|
+
Add the `prometheus` callback to your LiteLLM proxy config (`config.yaml`):
|
|
70
|
+
|
|
71
|
+
```yaml
|
|
72
|
+
litellm_settings:
|
|
73
|
+
callbacks:
|
|
74
|
+
- prometheus
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Start LiteLLM and verify the endpoint:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
curl http://localhost:4000/metrics/
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
If you see Prometheus-formatted text, LiteLLM is publishing metrics and you're ready to set up LiteLLM Pulse.
|
|
84
|
+
|
|
85
|
+
See the [LiteLLM Prometheus docs](https://docs.litellm.ai/docs/proxy/prometheus) for advanced configuration options.
|
|
86
|
+
|
|
87
|
+
## Quick Start
|
|
88
|
+
|
|
89
|
+
### Docker Compose
|
|
90
|
+
|
|
91
|
+
```yaml
|
|
92
|
+
services:
|
|
93
|
+
litellm-pulse:
|
|
94
|
+
image: ghcr.io/jakepenzak/litellm-pulse:latest
|
|
95
|
+
container_name: litellm-pulse
|
|
96
|
+
restart: unless-stopped
|
|
97
|
+
environment:
|
|
98
|
+
LITELLM_PULSE_METRICS_URL: "http://litellm:4000/metrics/"
|
|
99
|
+
LITELLM_PULSE_SCRAPE_INTERVAL: "60"
|
|
100
|
+
LITELLM_PULSE_PORT: "8000"
|
|
101
|
+
LITELLM_PULSE_TIMEZONE: "America/New_York"
|
|
102
|
+
# LITELLM_PULSE_METRICS_API_KEY: "sk-your-litellm-api-key"
|
|
103
|
+
ports:
|
|
104
|
+
- "8000:8000"
|
|
105
|
+
volumes:
|
|
106
|
+
- litellm-pulse-data:/app/data
|
|
107
|
+
|
|
108
|
+
volumes:
|
|
109
|
+
litellm-pulse-data:
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Docker Run
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
docker run -d \
|
|
116
|
+
--name litellm-pulse \
|
|
117
|
+
-p 8000:8000 \
|
|
118
|
+
-e LITELLM_PULSE_METRICS_URL=http://litellm:4000/metrics/ \
|
|
119
|
+
-e LITELLM_PULSE_SCRAPE_INTERVAL=60 \
|
|
120
|
+
-e LITELLM_PULSE_TIMEZONE=America/New_York \
|
|
121
|
+
-v litellm-pulse-data:/app/data \
|
|
122
|
+
ghcr.io/jakepenzak/litellm-pulse:latest
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Running Locally (with uv)
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
uv sync
|
|
129
|
+
uv run litellm-pulse
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
### Running from PyPI
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
uvx litellm-pulse # run directly
|
|
136
|
+
uv tool install litellm-pulse && litellm-pulse # install permanently
|
|
137
|
+
pip install litellm-pulse && litellm-pulse # with pip
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## Configuration
|
|
141
|
+
|
|
142
|
+
All configuration is via environment variables prefixed with `LITELLM_PULSE_`. No config files required.
|
|
143
|
+
|
|
144
|
+
### Core Settings
|
|
145
|
+
|
|
146
|
+
| Variable | Default | Description |
|
|
147
|
+
|---|---|---|
|
|
148
|
+
| `LITELLM_PULSE_METRICS_URL` | `http://litellm:4000/metrics/` | Prometheus metrics endpoint to scrape |
|
|
149
|
+
| `LITELLM_PULSE_SCRAPE_INTERVAL` | `60` | Seconds between scrapes |
|
|
150
|
+
| `LITELLM_PULSE_PORT` | `8000` | Port to serve the API on |
|
|
151
|
+
| `LITELLM_PULSE_HOST` | `0.0.0.0` | Address to bind to |
|
|
152
|
+
| `LITELLM_PULSE_VERIFY_SSL` | `false` | Whether to verify TLS certificates when scraping |
|
|
153
|
+
| `LITELLM_PULSE_SCRAPE_TIMEOUT` | `30` | Request timeout in seconds |
|
|
154
|
+
| `LITELLM_PULSE_LOG_LEVEL` | `info` | Log level (`debug`, `info`, `warning`, `error`) |
|
|
155
|
+
| `LITELLM_PULSE_TIMEZONE` | `UTC` | Timezone for API timestamps and day/week/month boundaries (IANA name, e.g. `America/New_York`) |
|
|
156
|
+
| `LITELLM_PULSE_METRICS_API_KEY` | _(empty)_ | LiteLLM API key for authenticated `/metrics` endpoints. Only needed if your LiteLLM proxy has [`require_auth_for_metrics_endpoint`](https://docs.litellm.ai/docs/proxy/prometheus#add-authentication-on-metrics-endpoint) set to `true`. |
|
|
157
|
+
|
|
158
|
+
> **When to use `LITELLM_PULSE_METRICS_API_KEY`:** If your LiteLLM proxy config includes `require_auth_for_metrics_endpoint: true` under `litellm_settings`, the `/metrics` endpoint requires authentication via a `Bearer` token. Set `LITELLM_PULSE_METRICS_API_KEY` to a valid LiteLLM API key so LiteLLM Pulse can authenticate. If this variable is left empty (the default), no `Authorization` header is sent — matching the default unauthenticated LiteLLM behavior.
|
|
159
|
+
|
|
160
|
+
### SQLite / Time-Series Settings
|
|
161
|
+
|
|
162
|
+
| Variable | Default | Description |
|
|
163
|
+
|---|---|---|
|
|
164
|
+
| `LITELLM_PULSE_DB_PATH` | `./data/litellm_pulse.db` | Path to the SQLite database file |
|
|
165
|
+
| `LITELLM_PULSE_DB_RETENTION_DAYS` | `90` | Auto-purge data older than N days (hourly purge cycle) |
|
|
166
|
+
| `LITELLM_PULSE_HISTORY_SIZE` | `168` | Max snapshots in the in-memory ring buffer (used as fallback if DB is unavailable) |
|
|
167
|
+
|
|
168
|
+
> **Timezone note:** The database always stores timestamps as UTC. The `LITELLM_PULSE_TIMEZONE` setting only affects API output (timestamps are converted to the configured timezone) and aggregate window boundaries (daily/weekly/monthly resets are computed against the configured timezone's midnight/Monday/1st). Set it to any valid [IANA timezone name](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) (e.g. `America/New_York`, `Europe/London`). Invalid values fall back to UTC with a warning.
|
|
169
|
+
|
|
170
|
+
### Metric Mappings
|
|
171
|
+
|
|
172
|
+
Each tracked metric maps a friendly name to a Prometheus metric name. Override any of them by setting the corresponding `LITELLM_PULSE_METRIC_*` env var.
|
|
173
|
+
|
|
174
|
+
| Variable | Default |
|
|
175
|
+
|---|---|
|
|
176
|
+
| `LITELLM_PULSE_METRIC_REQUESTS` | `litellm_proxy_total_requests_metric_total` |
|
|
177
|
+
| `LITELLM_PULSE_METRIC_FAILED_REQUESTS` | `litellm_proxy_failed_requests_metric_total` |
|
|
178
|
+
| `LITELLM_PULSE_METRIC_TOKENS` | `litellm_total_tokens_metric_total` |
|
|
179
|
+
| `LITELLM_PULSE_METRIC_INPUT_TOKENS` | `litellm_input_tokens_metric_total` |
|
|
180
|
+
| `LITELLM_PULSE_METRIC_OUTPUT_TOKENS` | `litellm_output_tokens_metric_total` |
|
|
181
|
+
| `LITELLM_PULSE_METRIC_REASONING_TOKENS` | `litellm_output_reasoning_tokens_metric_total` |
|
|
182
|
+
| `LITELLM_PULSE_METRIC_COST` | `litellm_spend_metric_total` |
|
|
183
|
+
| `LITELLM_PULSE_METRIC_IN_FLIGHT_REQUESTS` | `litellm_in_flight_requests` |
|
|
184
|
+
|
|
185
|
+
## API Endpoints
|
|
186
|
+
|
|
187
|
+
### `GET /` or `GET /api/v1/metrics`
|
|
188
|
+
|
|
189
|
+
Returns all tracked metrics: cumulative totals, daily/weekly/monthly aggregates, and metadata.
|
|
190
|
+
|
|
191
|
+
```json
|
|
192
|
+
{
|
|
193
|
+
"requests": 1234,
|
|
194
|
+
"failed_requests": 5,
|
|
195
|
+
"tokens": 567890,
|
|
196
|
+
"input_tokens": 300000,
|
|
197
|
+
"output_tokens": 267890,
|
|
198
|
+
"reasoning_tokens": 0,
|
|
199
|
+
"cost": 12.345678,
|
|
200
|
+
"in_flight_requests": 2,
|
|
201
|
+
"requests_daily": 215,
|
|
202
|
+
"requests_weekly": 1200,
|
|
203
|
+
"requests_monthly": 3400,
|
|
204
|
+
"tokens_daily": 45000,
|
|
205
|
+
"tokens_weekly": 310000,
|
|
206
|
+
"tokens_monthly": 780000,
|
|
207
|
+
"cost_daily": 0.02,
|
|
208
|
+
"cost_weekly": 0.15,
|
|
209
|
+
"cost_monthly": 0.52,
|
|
210
|
+
"last_scrape": "2025-06-21T12:00:00+00:00",
|
|
211
|
+
"source": "http://litellm:4000/metrics/"
|
|
212
|
+
}
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
Every tracked metric gets `_daily`, `_weekly`, and `_monthly` suffixes:
|
|
216
|
+
|
|
217
|
+
| Suffix | Meaning |
|
|
218
|
+
|---|---|
|
|
219
|
+
| _(none)_ | Cumulative total since LiteLLM started (raw counter value) |
|
|
220
|
+
| `_daily` | Sum of deltas since start of today (midnight in the configured timezone) |
|
|
221
|
+
| `_weekly` | Sum of deltas since start of this week (Monday in the configured timezone) |
|
|
222
|
+
| `_monthly` | Sum of deltas since start of this month (1st in the configured timezone) |
|
|
223
|
+
|
|
224
|
+
### `GET /api/v1/metrics/{name}`
|
|
225
|
+
|
|
226
|
+
Returns a single metric by friendly name. Also supports `_daily`, `_weekly`, `_monthly` suffixes.
|
|
227
|
+
|
|
228
|
+
```
|
|
229
|
+
GET /api/v1/metrics/cost
|
|
230
|
+
GET /api/v1/metrics/cost_daily
|
|
231
|
+
GET /api/v1/metrics/tokens_weekly
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
```json
|
|
235
|
+
{
|
|
236
|
+
"name": "cost_daily",
|
|
237
|
+
"value": 0.02,
|
|
238
|
+
"last_scrape": "2025-06-21T12:00:00+00:00"
|
|
239
|
+
}
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
### `GET /api/v1/history?limit=168`
|
|
243
|
+
|
|
244
|
+
Returns the most recent scrape snapshots as a JSON array (newest last). Draws from SQLite if available, falls back to in-memory ring buffer.
|
|
245
|
+
|
|
246
|
+
```json
|
|
247
|
+
{
|
|
248
|
+
"snapshots": [
|
|
249
|
+
{
|
|
250
|
+
"timestamp": "2025-06-21T12:00:00+00:00",
|
|
251
|
+
"is_reset": false,
|
|
252
|
+
"requests": 1234,
|
|
253
|
+
"requests_delta": 3,
|
|
254
|
+
"tokens": 567890,
|
|
255
|
+
"tokens_delta": 24500,
|
|
256
|
+
"cost": 12.3456,
|
|
257
|
+
"cost_delta": 0.0231
|
|
258
|
+
}
|
|
259
|
+
],
|
|
260
|
+
"count": 168,
|
|
261
|
+
"source": "sqlite"
|
|
262
|
+
}
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
### `GET /raw`
|
|
266
|
+
|
|
267
|
+
Returns all raw parsed Prometheus metrics (every metric family found, summed). Useful for debugging.
|
|
268
|
+
|
|
269
|
+
### `GET /health`
|
|
270
|
+
|
|
271
|
+
Returns `{"status": "ok"}` once the first successful scrape has completed.
|
|
272
|
+
|
|
273
|
+
## How Deltas & Aggregates Work
|
|
274
|
+
|
|
275
|
+
LiteLLM's Prometheus metrics are **counters** — they grow cumulatively and only reset when the LiteLLM process restarts. LiteLLM Pulse handles this as follows:
|
|
276
|
+
|
|
277
|
+
1. **Each scrape** stores the raw cumulative value and a computed delta (change since the previous scrape).
|
|
278
|
+
2. **Daily/weekly/monthly** values are computed as `SUM(delta)` for all scrapes within the time window.
|
|
279
|
+
3. **Counter reset detection**: If any counter drops by more than 50%, LiteLLM Pulse assumes LiteLLM restarted. The delta for that scrape is set to the current value (treating it as starting from 0), and `is_reset=true` is recorded in the database. This ensures daily/weekly/monthly sums remain correct even across LiteLLM restarts.
|
|
280
|
+
|
|
281
|
+
## State Recovery
|
|
282
|
+
|
|
283
|
+
| Scenario | Behavior |
|
|
284
|
+
|---|---|
|
|
285
|
+
| **Fresh start** | DB empty → first scrape has no deltas, second scrape onward has valid deltas |
|
|
286
|
+
| **App restart** | Reads last row from DB → restores last-known raw counters → seamless continuation |
|
|
287
|
+
| **LiteLLM restart** | Counters drop → reset detected → delta computed from 0, `is_reset=1` stored → daily sums remain correct |
|
|
288
|
+
| **DB corrupted** | `open_db()` catches SQLite errors, starts fresh with a warning log |
|
|
289
|
+
| **Disk full** | Writes fail → `error` field set in API response → recovers when disk space returns |
|
|
290
|
+
|
|
291
|
+
## Integrations
|
|
292
|
+
|
|
293
|
+
### Homepage (Custom API Widget)
|
|
294
|
+
|
|
295
|
+
Add a service entry in `services.yaml` with a `customapi` widget:
|
|
296
|
+
|
|
297
|
+
```yaml
|
|
298
|
+
- LiteLLM:
|
|
299
|
+
icon: https://cdn.jsdelivr.net/gh/selfhst/icons/png/litellm.png
|
|
300
|
+
href: https://litellm.home.lan
|
|
301
|
+
description: LLM proxy and management
|
|
302
|
+
widget:
|
|
303
|
+
type: customapi
|
|
304
|
+
url: http://litellm-pulse:8000/api/v1/metrics
|
|
305
|
+
refreshInterval: 60000
|
|
306
|
+
mappings:
|
|
307
|
+
- field: requests
|
|
308
|
+
label: Total Requests
|
|
309
|
+
format: number
|
|
310
|
+
- field: cost_daily
|
|
311
|
+
label: Spend Today
|
|
312
|
+
format: float
|
|
313
|
+
prefix: "$"
|
|
314
|
+
- field: cost_monthly
|
|
315
|
+
label: Spend This Month
|
|
316
|
+
format: float
|
|
317
|
+
prefix: "$"
|
|
318
|
+
- field: tokens_daily
|
|
319
|
+
label: Tokens Today
|
|
320
|
+
format: number
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
### Home Assistant (REST Sensors)
|
|
324
|
+
|
|
325
|
+
Add RESTful sensors to `configuration.yaml`. The [`rest`](https://www.home-assistant.io/integrations/rest) integration lets you define multiple sensors from a single HTTP request, which avoids polling the LiteLLM Pulse endpoint more than necessary:
|
|
326
|
+
|
|
327
|
+
```yaml
|
|
328
|
+
rest:
|
|
329
|
+
- resource: http://litellm-pulse:8000/api/v1/metrics
|
|
330
|
+
scan_interval: 60 # seconds between polls (default: 30)
|
|
331
|
+
timeout: 10 # seconds before the sensor is marked unavailable
|
|
332
|
+
verify_ssl: true
|
|
333
|
+
sensor:
|
|
334
|
+
- name: LiteLLM Requests
|
|
335
|
+
unique_id: litellm_requests
|
|
336
|
+
value_template: "{{ value_json.requests }}"
|
|
337
|
+
unit_of_measurement: "req"
|
|
338
|
+
device_class: duration
|
|
339
|
+
state_class: total_increasing
|
|
340
|
+
- name: LiteLLM Tokens
|
|
341
|
+
unique_id: litellm_tokens
|
|
342
|
+
value_template: "{{ value_json.tokens }}"
|
|
343
|
+
unit_of_measurement: "tokens"
|
|
344
|
+
state_class: total_increasing
|
|
345
|
+
- name: LiteLLM Spend
|
|
346
|
+
unique_id: litellm_spend
|
|
347
|
+
value_template: "{{ value_json.cost }}"
|
|
348
|
+
unit_of_measurement: "USD"
|
|
349
|
+
state_class: total_increasing
|
|
350
|
+
- name: LiteLLM Spend Today
|
|
351
|
+
unique_id: litellm_spend_today
|
|
352
|
+
value_template: "{{ value_json.cost_daily }}"
|
|
353
|
+
unit_of_measurement: "USD"
|
|
354
|
+
state_class: measurement
|
|
355
|
+
force_update: true
|
|
356
|
+
- name: LiteLLM Spend This Month
|
|
357
|
+
unique_id: litellm_spend_this_month
|
|
358
|
+
value_template: "{{ value_json.cost_monthly }}"
|
|
359
|
+
unit_of_measurement: "USD"
|
|
360
|
+
state_class: measurement
|
|
361
|
+
force_update: true
|
|
362
|
+
- name: LiteLLM Tokens Today
|
|
363
|
+
unique_id: litellm_tokens_today
|
|
364
|
+
value_template: "{{ value_json.tokens_daily }}"
|
|
365
|
+
unit_of_measurement: "tokens"
|
|
366
|
+
state_class: measurement
|
|
367
|
+
force_update: true
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
If you only need a single metric, you can use the [`sensor.rest`](https://www.home-assistant.io/integrations/sensor.rest/) platform instead, which polls the endpoint once per sensor:
|
|
371
|
+
|
|
372
|
+
```yaml
|
|
373
|
+
sensor:
|
|
374
|
+
- platform: rest
|
|
375
|
+
resource: http://litellm-pulse:8000/api/v1/metrics/cost_daily
|
|
376
|
+
name: LiteLLM Spend Today
|
|
377
|
+
unique_id: litellm_spend_today
|
|
378
|
+
value_template: "{{ value_json.value }}"
|
|
379
|
+
unit_of_measurement: "USD"
|
|
380
|
+
state_class: measurement
|
|
381
|
+
force_update: true
|
|
382
|
+
```
|
|
383
|
+
|
|
384
|
+
> **Tip:** To refresh a sensor on demand (outside the polling schedule), call the `homeassistant.update_entity` action targeting the sensor entity.
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
## Contributing
|
|
388
|
+
|
|
389
|
+
Contributions are welcome! Please read the guidelines below before opening a pull request.
|
|
390
|
+
|
|
391
|
+
### Pull Request Process
|
|
392
|
+
|
|
393
|
+
1. Fork the repository and create a feature branch from `main`
|
|
394
|
+
2. Run `uv run pre-commit install` to set up local git hooks
|
|
395
|
+
3. Make your changes, ensuring `pre-commit run --all-files` passes
|
|
396
|
+
4. Add or update tests as appropriate
|
|
397
|
+
5. Open a pull request with a clear description of the changes
|
|
398
|
+
|
|
399
|
+
### Conventional Commits
|
|
400
|
+
|
|
401
|
+
**Pull request titles must follow the [Conventional Commits](https://www.conventionalcommits.org/) specification.** This is enforced by branch protection rules and is required for the release automation to work correctly.
|
|
402
|
+
|
|
403
|
+
The format is:
|
|
404
|
+
|
|
405
|
+
```
|
|
406
|
+
<type>(<scope>): <description>
|
|
407
|
+
```
|
|
408
|
+
|
|
409
|
+
#### Allowed Types
|
|
410
|
+
|
|
411
|
+
| Type | Description |
|
|
412
|
+
|---|---|
|
|
413
|
+
| `feat` | A new feature |
|
|
414
|
+
| `fix` | A bug fix |
|
|
415
|
+
| `docs` | Documentation only changes |
|
|
416
|
+
| `style` | Changes that do not affect the meaning of the code (formatting, etc.) |
|
|
417
|
+
| `refactor` | A code change that neither fixes a bug nor adds a feature |
|
|
418
|
+
| `perf` | A code change that improves performance |
|
|
419
|
+
| `test` | Adding or correcting tests |
|
|
420
|
+
| `ci` | Changes to CI configuration files and scripts |
|
|
421
|
+
| `chore` | Other changes that don't modify src or test files |
|
|
422
|
+
| `build` | Changes that affect the build system or dependencies |
|
|
423
|
+
|
|
424
|
+
#### Examples
|
|
425
|
+
|
|
426
|
+
- `feat: add Prometheus push gateway support`
|
|
427
|
+
- `fix(db): handle negative deltas on counter reset`
|
|
428
|
+
- `docs: update Home Assistant integration examples`
|
|
429
|
+
- `ci: add Python 3.13 to test matrix`
|
|
430
|
+
- `refactor(parser): simplify metric extraction logic`
|
|
431
|
+
|
|
432
|
+
#### Scopes (optional)
|
|
433
|
+
|
|
434
|
+
Common scopes: `parser`, `db`, `app`, `ci`, `docker`, `deps`
|
|
435
|
+
|
|
436
|
+
### Releases
|
|
437
|
+
|
|
438
|
+
Releases are managed automatically by [release-please](https://github.com/googleapis/release-please) using the [manifest-driven](https://github.com/googleapis/release-please/blob/main/docs/manifest-releaser.md) approach. Configuration lives in [`.github/release-please-config.json`](.github/release-please-config.json) and version tracking in [`.github/.release-please-manifest.json`](.github/.release-please-manifest.json).
|
|
439
|
+
|
|
440
|
+
When PRs with conventional commit titles are merged to `main`:
|
|
441
|
+
|
|
442
|
+
1. release-please maintains a "release PR" that accumulates changes and updates `CHANGELOG.md`
|
|
443
|
+
2. When the release PR is merged, a new GitHub Release is created with an auto-generated changelog (with emoji section headers)
|
|
444
|
+
3. release-please bumps the version in `pyproject.toml` and `litellm_pulse/__init__.py`
|
|
445
|
+
4. The Docker build & publish workflow is triggered by the `release: published` event
|
|
446
|
+
5. Images are tagged with semantic version (e.g., `v1.2.3`), major/minor aliases (e.g., `1.2`, `1`), and `latest`
|
|
447
|
+
|
|
448
|
+
### Setup
|
|
449
|
+
|
|
450
|
+
```bash
|
|
451
|
+
make venv # sync deps + install pre-commit hooks
|
|
452
|
+
# or: uv sync --all-extras --all-groups --frozen && uv run pre-commit install
|
|
453
|
+
```
|
|
454
|
+
|
|
455
|
+
### Running
|
|
456
|
+
|
|
457
|
+
```bash
|
|
458
|
+
uv run litellm-pulse # run the server locally
|
|
459
|
+
# or: make run
|
|
460
|
+
```
|
|
461
|
+
|
|
462
|
+
### Linting & Formatting
|
|
463
|
+
|
|
464
|
+
Linting and formatting are enforced via [pre-commit](https://pre-commit.com) with [ruff](https://docs.astral.sh/ruff):
|
|
465
|
+
|
|
466
|
+
```bash
|
|
467
|
+
uv run pre-commit install # install git hooks (run once)
|
|
468
|
+
uv run pre-commit run --all-files # run all checks manually
|
|
469
|
+
```
|
|
470
|
+
|
|
471
|
+
This runs `ruff check --fix` and `ruff format` across the codebase. The same checks run in CI on every push and pull request.
|
|
472
|
+
|
|
473
|
+
### Testing
|
|
474
|
+
|
|
475
|
+
```bash
|
|
476
|
+
uv run pytest -v # run tests
|
|
477
|
+
# or: make tests # runs pytest
|
|
478
|
+
# or: make coverage # serve HTML coverage report at http://localhost:8080
|
|
479
|
+
```
|
|
480
|
+
|
|
481
|
+
> Run `make help` to see all available targets.
|
|
482
|
+
|
|
483
|
+
### CI/CD
|
|
484
|
+
|
|
485
|
+
| Workflow | Trigger | What it does |
|
|
486
|
+
|---|---|---|
|
|
487
|
+
| **CI** ([ci.yml](.github/workflows/ci.yml)) | Push to `main`, PRs | Runs pre-commit (ruff lint + format) and pytest on Python 3.11 & 3.12 |
|
|
488
|
+
| **Release** ([release.yml](.github/workflows/release.yml)) | Push to `main` | Runs `release-please` suite. On releases created via `release-please`, builds Docker image and publishes to `ghcr.io/jakepenzak/litellm-pulse` with semantic version tags, and publishes the package to PyPI |
|
|
489
|
+
|
|
490
|
+
## License
|
|
491
|
+
|
|
492
|
+
MIT — see [LICENSE](LICENSE).
|
|
493
|
+
|
|
494
|
+
## Disclaimer
|
|
495
|
+
|
|
496
|
+
LiteLLM Pulse is an independent, community-developed project created to provide monitoring and analytics for LiteLLM deployments.
|
|
497
|
+
|
|
498
|
+
This project is **not affiliated with, endorsed by, sponsored by, or maintained by** LiteLLM or Berri AI.
|
|
499
|
+
|
|
500
|
+
"LiteLLM" and any associated trademarks, service marks, logos, or trade names are the property of their respective owners and are used here solely to identify compatibility with the LiteLLM ecosystem.
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
litellm_pulse/__init__.py,sha256=Zn1KFblwuFHiDRdRAiRnDBRkbPttWh44jKa5zG2ov0E,22
|
|
2
|
+
litellm_pulse/app.py,sha256=z6JaKXkEEgQVT6Skh-9hvNyoZdMkFDScCAA3G4alFVw,13026
|
|
3
|
+
litellm_pulse/db.py,sha256=2Ffv88S95cp-klGHo1ghV2bWHC9C_knOdEjSanChe7U,5825
|
|
4
|
+
litellm_pulse/parser.py,sha256=Depph4QMG19NUqLt5L7gDCbbaFZyyCYo707fDgsImVU,1087
|
|
5
|
+
litellm_pulse-0.2.0.dist-info/METADATA,sha256=9Ou73d8kXNWYBm0Q-ThwaQjUu4l_EjG47r7ne-CFxZc,21015
|
|
6
|
+
litellm_pulse-0.2.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
7
|
+
litellm_pulse-0.2.0.dist-info/entry_points.txt,sha256=-S5Z4mFATM9MtooC3yYvCqGEfe0FU5eMA_AbmoO8haQ,57
|
|
8
|
+
litellm_pulse-0.2.0.dist-info/licenses/LICENSE,sha256=Tb9yMSN4l7RG0TSrXnzo1YE1zHUb53l4RLeVYaSPzZ0,1071
|
|
9
|
+
litellm_pulse-0.2.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Jake Pieniazek
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|