headroom-ai 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- headroom/__init__.py +212 -0
- headroom/cache/__init__.py +76 -0
- headroom/cache/anthropic.py +517 -0
- headroom/cache/base.py +342 -0
- headroom/cache/compression_feedback.py +613 -0
- headroom/cache/compression_store.py +814 -0
- headroom/cache/dynamic_detector.py +1026 -0
- headroom/cache/google.py +884 -0
- headroom/cache/openai.py +584 -0
- headroom/cache/registry.py +175 -0
- headroom/cache/semantic.py +451 -0
- headroom/ccr/__init__.py +77 -0
- headroom/ccr/context_tracker.py +582 -0
- headroom/ccr/mcp_server.py +319 -0
- headroom/ccr/response_handler.py +772 -0
- headroom/ccr/tool_injection.py +415 -0
- headroom/cli.py +219 -0
- headroom/client.py +977 -0
- headroom/compression/__init__.py +42 -0
- headroom/compression/detector.py +424 -0
- headroom/compression/handlers/__init__.py +22 -0
- headroom/compression/handlers/base.py +219 -0
- headroom/compression/handlers/code_handler.py +506 -0
- headroom/compression/handlers/json_handler.py +418 -0
- headroom/compression/masks.py +345 -0
- headroom/compression/universal.py +465 -0
- headroom/config.py +474 -0
- headroom/exceptions.py +192 -0
- headroom/integrations/__init__.py +159 -0
- headroom/integrations/agno/__init__.py +53 -0
- headroom/integrations/agno/hooks.py +345 -0
- headroom/integrations/agno/model.py +625 -0
- headroom/integrations/agno/providers.py +154 -0
- headroom/integrations/langchain/__init__.py +106 -0
- headroom/integrations/langchain/agents.py +326 -0
- headroom/integrations/langchain/chat_model.py +1002 -0
- headroom/integrations/langchain/langsmith.py +324 -0
- headroom/integrations/langchain/memory.py +319 -0
- headroom/integrations/langchain/providers.py +200 -0
- headroom/integrations/langchain/retriever.py +371 -0
- headroom/integrations/langchain/streaming.py +341 -0
- headroom/integrations/mcp/__init__.py +37 -0
- headroom/integrations/mcp/server.py +533 -0
- headroom/memory/__init__.py +37 -0
- headroom/memory/extractor.py +390 -0
- headroom/memory/fast_store.py +621 -0
- headroom/memory/fast_wrapper.py +311 -0
- headroom/memory/inline_extractor.py +229 -0
- headroom/memory/store.py +434 -0
- headroom/memory/worker.py +260 -0
- headroom/memory/wrapper.py +321 -0
- headroom/models/__init__.py +39 -0
- headroom/models/registry.py +687 -0
- headroom/parser.py +293 -0
- headroom/pricing/__init__.py +51 -0
- headroom/pricing/anthropic_prices.py +81 -0
- headroom/pricing/litellm_pricing.py +113 -0
- headroom/pricing/openai_prices.py +91 -0
- headroom/pricing/registry.py +188 -0
- headroom/providers/__init__.py +61 -0
- headroom/providers/anthropic.py +621 -0
- headroom/providers/base.py +131 -0
- headroom/providers/cohere.py +362 -0
- headroom/providers/google.py +427 -0
- headroom/providers/litellm.py +297 -0
- headroom/providers/openai.py +566 -0
- headroom/providers/openai_compatible.py +521 -0
- headroom/proxy/__init__.py +19 -0
- headroom/proxy/server.py +2683 -0
- headroom/py.typed +0 -0
- headroom/relevance/__init__.py +124 -0
- headroom/relevance/base.py +106 -0
- headroom/relevance/bm25.py +255 -0
- headroom/relevance/embedding.py +255 -0
- headroom/relevance/hybrid.py +259 -0
- headroom/reporting/__init__.py +5 -0
- headroom/reporting/generator.py +549 -0
- headroom/storage/__init__.py +41 -0
- headroom/storage/base.py +125 -0
- headroom/storage/jsonl.py +220 -0
- headroom/storage/sqlite.py +289 -0
- headroom/telemetry/__init__.py +91 -0
- headroom/telemetry/collector.py +764 -0
- headroom/telemetry/models.py +880 -0
- headroom/telemetry/toin.py +1579 -0
- headroom/tokenizer.py +80 -0
- headroom/tokenizers/__init__.py +75 -0
- headroom/tokenizers/base.py +210 -0
- headroom/tokenizers/estimator.py +198 -0
- headroom/tokenizers/huggingface.py +317 -0
- headroom/tokenizers/mistral.py +245 -0
- headroom/tokenizers/registry.py +398 -0
- headroom/tokenizers/tiktoken_counter.py +248 -0
- headroom/transforms/__init__.py +106 -0
- headroom/transforms/base.py +57 -0
- headroom/transforms/cache_aligner.py +357 -0
- headroom/transforms/code_compressor.py +1313 -0
- headroom/transforms/content_detector.py +335 -0
- headroom/transforms/content_router.py +1158 -0
- headroom/transforms/llmlingua_compressor.py +638 -0
- headroom/transforms/log_compressor.py +529 -0
- headroom/transforms/pipeline.py +297 -0
- headroom/transforms/rolling_window.py +350 -0
- headroom/transforms/search_compressor.py +365 -0
- headroom/transforms/smart_crusher.py +2682 -0
- headroom/transforms/text_compressor.py +259 -0
- headroom/transforms/tool_crusher.py +338 -0
- headroom/utils.py +215 -0
- headroom_ai-0.2.13.dist-info/METADATA +315 -0
- headroom_ai-0.2.13.dist-info/RECORD +114 -0
- headroom_ai-0.2.13.dist-info/WHEEL +4 -0
- headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
- headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
- headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
headroom/storage/base.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""Base storage interface for Headroom SDK."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from collections.abc import Iterator
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from ..config import RequestMetrics
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Storage(ABC):
|
|
14
|
+
"""Abstract base class for metrics storage."""
|
|
15
|
+
|
|
16
|
+
@abstractmethod
|
|
17
|
+
def save(self, metrics: RequestMetrics) -> None:
|
|
18
|
+
"""
|
|
19
|
+
Save request metrics.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
metrics: RequestMetrics to save.
|
|
23
|
+
"""
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
@abstractmethod
|
|
27
|
+
def get(self, request_id: str) -> RequestMetrics | None:
|
|
28
|
+
"""
|
|
29
|
+
Get metrics by request ID.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
request_id: The request ID.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
RequestMetrics or None if not found.
|
|
36
|
+
"""
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
@abstractmethod
|
|
40
|
+
def query(
|
|
41
|
+
self,
|
|
42
|
+
start_time: datetime | None = None,
|
|
43
|
+
end_time: datetime | None = None,
|
|
44
|
+
model: str | None = None,
|
|
45
|
+
mode: str | None = None,
|
|
46
|
+
limit: int = 100,
|
|
47
|
+
offset: int = 0,
|
|
48
|
+
) -> list[RequestMetrics]:
|
|
49
|
+
"""
|
|
50
|
+
Query metrics with filters.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
start_time: Filter by timestamp >= start_time.
|
|
54
|
+
end_time: Filter by timestamp <= end_time.
|
|
55
|
+
model: Filter by model name.
|
|
56
|
+
mode: Filter by mode (audit/optimize).
|
|
57
|
+
limit: Maximum results to return.
|
|
58
|
+
offset: Number of results to skip.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
List of matching RequestMetrics.
|
|
62
|
+
"""
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
@abstractmethod
|
|
66
|
+
def count(
|
|
67
|
+
self,
|
|
68
|
+
start_time: datetime | None = None,
|
|
69
|
+
end_time: datetime | None = None,
|
|
70
|
+
model: str | None = None,
|
|
71
|
+
mode: str | None = None,
|
|
72
|
+
) -> int:
|
|
73
|
+
"""
|
|
74
|
+
Count metrics matching filters.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
start_time: Filter by timestamp >= start_time.
|
|
78
|
+
end_time: Filter by timestamp <= end_time.
|
|
79
|
+
model: Filter by model name.
|
|
80
|
+
mode: Filter by mode.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Count of matching records.
|
|
84
|
+
"""
|
|
85
|
+
pass
|
|
86
|
+
|
|
87
|
+
@abstractmethod
|
|
88
|
+
def iter_all(self) -> Iterator[RequestMetrics]:
|
|
89
|
+
"""
|
|
90
|
+
Iterate over all stored metrics.
|
|
91
|
+
|
|
92
|
+
Yields:
|
|
93
|
+
RequestMetrics objects.
|
|
94
|
+
"""
|
|
95
|
+
pass
|
|
96
|
+
|
|
97
|
+
@abstractmethod
|
|
98
|
+
def get_summary_stats(
|
|
99
|
+
self,
|
|
100
|
+
start_time: datetime | None = None,
|
|
101
|
+
end_time: datetime | None = None,
|
|
102
|
+
) -> dict[str, Any]:
|
|
103
|
+
"""
|
|
104
|
+
Get summary statistics.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
start_time: Filter by timestamp >= start_time.
|
|
108
|
+
end_time: Filter by timestamp <= end_time.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
Dict with summary stats (total_requests, tokens_saved, etc.)
|
|
112
|
+
"""
|
|
113
|
+
pass
|
|
114
|
+
|
|
115
|
+
def close(self) -> None: # noqa: B027
|
|
116
|
+
"""Close storage connection if applicable."""
|
|
117
|
+
pass
|
|
118
|
+
|
|
119
|
+
def __enter__(self) -> Storage:
|
|
120
|
+
"""Context manager entry."""
|
|
121
|
+
return self
|
|
122
|
+
|
|
123
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
124
|
+
"""Context manager exit."""
|
|
125
|
+
self.close()
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"""JSONL file storage implementation for Headroom SDK."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from collections.abc import Iterator
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from ..config import RequestMetrics
|
|
12
|
+
from ..utils import format_timestamp, parse_timestamp
|
|
13
|
+
from .base import Storage
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class JSONLStorage(Storage):
|
|
17
|
+
"""JSONL file-based metrics storage."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, file_path: str):
|
|
20
|
+
"""
|
|
21
|
+
Initialize JSONL storage.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
file_path: Path to JSONL file.
|
|
25
|
+
"""
|
|
26
|
+
self.file_path = file_path
|
|
27
|
+
self._ensure_file_exists()
|
|
28
|
+
|
|
29
|
+
def _ensure_file_exists(self) -> None:
|
|
30
|
+
"""Create file and parent directories if they don't exist."""
|
|
31
|
+
path = Path(self.file_path)
|
|
32
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
33
|
+
if not path.exists():
|
|
34
|
+
path.touch()
|
|
35
|
+
|
|
36
|
+
def _metrics_to_dict(self, metrics: RequestMetrics) -> dict[str, Any]:
|
|
37
|
+
"""Convert RequestMetrics to serializable dict."""
|
|
38
|
+
return {
|
|
39
|
+
"id": metrics.request_id,
|
|
40
|
+
"timestamp": format_timestamp(metrics.timestamp),
|
|
41
|
+
"model": metrics.model,
|
|
42
|
+
"stream": metrics.stream,
|
|
43
|
+
"mode": metrics.mode,
|
|
44
|
+
"tokens_input_before": metrics.tokens_input_before,
|
|
45
|
+
"tokens_input_after": metrics.tokens_input_after,
|
|
46
|
+
"tokens_output": metrics.tokens_output,
|
|
47
|
+
"block_breakdown": metrics.block_breakdown,
|
|
48
|
+
"waste_signals": metrics.waste_signals,
|
|
49
|
+
"stable_prefix_hash": metrics.stable_prefix_hash,
|
|
50
|
+
"cache_alignment_score": metrics.cache_alignment_score,
|
|
51
|
+
"cached_tokens": metrics.cached_tokens,
|
|
52
|
+
"transforms_applied": metrics.transforms_applied,
|
|
53
|
+
"tool_units_dropped": metrics.tool_units_dropped,
|
|
54
|
+
"turns_dropped": metrics.turns_dropped,
|
|
55
|
+
"messages_hash": metrics.messages_hash,
|
|
56
|
+
"error": metrics.error,
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
def _dict_to_metrics(self, data: dict[str, Any]) -> RequestMetrics:
|
|
60
|
+
"""Convert dict to RequestMetrics."""
|
|
61
|
+
return RequestMetrics(
|
|
62
|
+
request_id=data["id"],
|
|
63
|
+
timestamp=parse_timestamp(data["timestamp"]),
|
|
64
|
+
model=data["model"],
|
|
65
|
+
stream=data["stream"],
|
|
66
|
+
mode=data["mode"],
|
|
67
|
+
tokens_input_before=data["tokens_input_before"],
|
|
68
|
+
tokens_input_after=data["tokens_input_after"],
|
|
69
|
+
tokens_output=data.get("tokens_output"),
|
|
70
|
+
block_breakdown=data.get("block_breakdown", {}),
|
|
71
|
+
waste_signals=data.get("waste_signals", {}),
|
|
72
|
+
stable_prefix_hash=data.get("stable_prefix_hash", ""),
|
|
73
|
+
cache_alignment_score=data.get("cache_alignment_score", 0.0),
|
|
74
|
+
cached_tokens=data.get("cached_tokens"),
|
|
75
|
+
transforms_applied=data.get("transforms_applied", []),
|
|
76
|
+
tool_units_dropped=data.get("tool_units_dropped", 0),
|
|
77
|
+
turns_dropped=data.get("turns_dropped", 0),
|
|
78
|
+
messages_hash=data.get("messages_hash", ""),
|
|
79
|
+
error=data.get("error"),
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
def save(self, metrics: RequestMetrics) -> None:
|
|
83
|
+
"""Save request metrics."""
|
|
84
|
+
data = self._metrics_to_dict(metrics)
|
|
85
|
+
with open(self.file_path, "a") as f:
|
|
86
|
+
f.write(json.dumps(data) + "\n")
|
|
87
|
+
|
|
88
|
+
def get(self, request_id: str) -> RequestMetrics | None:
|
|
89
|
+
"""Get metrics by request ID."""
|
|
90
|
+
for metrics in self.iter_all():
|
|
91
|
+
if metrics.request_id == request_id:
|
|
92
|
+
return metrics
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
def query(
|
|
96
|
+
self,
|
|
97
|
+
start_time: datetime | None = None,
|
|
98
|
+
end_time: datetime | None = None,
|
|
99
|
+
model: str | None = None,
|
|
100
|
+
mode: str | None = None,
|
|
101
|
+
limit: int = 100,
|
|
102
|
+
offset: int = 0,
|
|
103
|
+
) -> list[RequestMetrics]:
|
|
104
|
+
"""Query metrics with filters."""
|
|
105
|
+
results: list[RequestMetrics] = []
|
|
106
|
+
skipped = 0
|
|
107
|
+
|
|
108
|
+
for metrics in self.iter_all():
|
|
109
|
+
# Apply filters
|
|
110
|
+
if start_time is not None and metrics.timestamp < start_time:
|
|
111
|
+
continue
|
|
112
|
+
if end_time is not None and metrics.timestamp > end_time:
|
|
113
|
+
continue
|
|
114
|
+
if model is not None and metrics.model != model:
|
|
115
|
+
continue
|
|
116
|
+
if mode is not None and metrics.mode != mode:
|
|
117
|
+
continue
|
|
118
|
+
|
|
119
|
+
# Handle offset
|
|
120
|
+
if skipped < offset:
|
|
121
|
+
skipped += 1
|
|
122
|
+
continue
|
|
123
|
+
|
|
124
|
+
results.append(metrics)
|
|
125
|
+
|
|
126
|
+
# Handle limit
|
|
127
|
+
if len(results) >= limit:
|
|
128
|
+
break
|
|
129
|
+
|
|
130
|
+
# Sort by timestamp descending
|
|
131
|
+
results.sort(key=lambda m: m.timestamp, reverse=True)
|
|
132
|
+
return results
|
|
133
|
+
|
|
134
|
+
def count(
|
|
135
|
+
self,
|
|
136
|
+
start_time: datetime | None = None,
|
|
137
|
+
end_time: datetime | None = None,
|
|
138
|
+
model: str | None = None,
|
|
139
|
+
mode: str | None = None,
|
|
140
|
+
) -> int:
|
|
141
|
+
"""Count metrics matching filters."""
|
|
142
|
+
count = 0
|
|
143
|
+
|
|
144
|
+
for metrics in self.iter_all():
|
|
145
|
+
if start_time is not None and metrics.timestamp < start_time:
|
|
146
|
+
continue
|
|
147
|
+
if end_time is not None and metrics.timestamp > end_time:
|
|
148
|
+
continue
|
|
149
|
+
if model is not None and metrics.model != model:
|
|
150
|
+
continue
|
|
151
|
+
if mode is not None and metrics.mode != mode:
|
|
152
|
+
continue
|
|
153
|
+
count += 1
|
|
154
|
+
|
|
155
|
+
return count
|
|
156
|
+
|
|
157
|
+
def iter_all(self) -> Iterator[RequestMetrics]:
|
|
158
|
+
"""Iterate over all stored metrics."""
|
|
159
|
+
if not Path(self.file_path).exists():
|
|
160
|
+
return
|
|
161
|
+
|
|
162
|
+
with open(self.file_path) as f:
|
|
163
|
+
for line in f:
|
|
164
|
+
line = line.strip()
|
|
165
|
+
if not line:
|
|
166
|
+
continue
|
|
167
|
+
try:
|
|
168
|
+
data = json.loads(line)
|
|
169
|
+
yield self._dict_to_metrics(data)
|
|
170
|
+
except json.JSONDecodeError:
|
|
171
|
+
# Skip malformed lines
|
|
172
|
+
continue
|
|
173
|
+
|
|
174
|
+
def get_summary_stats(
|
|
175
|
+
self,
|
|
176
|
+
start_time: datetime | None = None,
|
|
177
|
+
end_time: datetime | None = None,
|
|
178
|
+
) -> dict[str, Any]:
|
|
179
|
+
"""Get summary statistics."""
|
|
180
|
+
total_requests = 0
|
|
181
|
+
total_tokens_before = 0
|
|
182
|
+
total_tokens_after = 0
|
|
183
|
+
total_cache_alignment = 0.0
|
|
184
|
+
audit_count = 0
|
|
185
|
+
optimize_count = 0
|
|
186
|
+
|
|
187
|
+
for metrics in self.iter_all():
|
|
188
|
+
if start_time is not None and metrics.timestamp < start_time:
|
|
189
|
+
continue
|
|
190
|
+
if end_time is not None and metrics.timestamp > end_time:
|
|
191
|
+
continue
|
|
192
|
+
|
|
193
|
+
total_requests += 1
|
|
194
|
+
total_tokens_before += metrics.tokens_input_before
|
|
195
|
+
total_tokens_after += metrics.tokens_input_after
|
|
196
|
+
total_cache_alignment += metrics.cache_alignment_score
|
|
197
|
+
|
|
198
|
+
if metrics.mode == "audit":
|
|
199
|
+
audit_count += 1
|
|
200
|
+
elif metrics.mode == "optimize":
|
|
201
|
+
optimize_count += 1
|
|
202
|
+
|
|
203
|
+
total_tokens_saved = total_tokens_before - total_tokens_after
|
|
204
|
+
avg_tokens_saved = total_tokens_saved / total_requests if total_requests > 0 else 0
|
|
205
|
+
avg_cache_alignment = total_cache_alignment / total_requests if total_requests > 0 else 0
|
|
206
|
+
|
|
207
|
+
return {
|
|
208
|
+
"total_requests": total_requests,
|
|
209
|
+
"total_tokens_before": total_tokens_before,
|
|
210
|
+
"total_tokens_after": total_tokens_after,
|
|
211
|
+
"total_tokens_saved": total_tokens_saved,
|
|
212
|
+
"avg_tokens_saved": avg_tokens_saved,
|
|
213
|
+
"avg_cache_alignment": avg_cache_alignment,
|
|
214
|
+
"audit_count": audit_count,
|
|
215
|
+
"optimize_count": optimize_count,
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
def close(self) -> None:
|
|
219
|
+
"""No-op for file storage."""
|
|
220
|
+
pass
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
"""SQLite storage implementation for Headroom SDK."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import sqlite3
|
|
7
|
+
from collections.abc import Iterator
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from ..config import RequestMetrics
|
|
13
|
+
from ..utils import format_timestamp, parse_timestamp
|
|
14
|
+
from .base import Storage
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class SQLiteStorage(Storage):
|
|
18
|
+
"""SQLite-based metrics storage."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, db_path: str):
|
|
21
|
+
"""
|
|
22
|
+
Initialize SQLite storage.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
db_path: Path to SQLite database file.
|
|
26
|
+
"""
|
|
27
|
+
self.db_path = db_path
|
|
28
|
+
self._ensure_db_exists()
|
|
29
|
+
self._conn: sqlite3.Connection | None = None
|
|
30
|
+
|
|
31
|
+
def _ensure_db_exists(self) -> None:
|
|
32
|
+
"""Create database and tables if they don't exist."""
|
|
33
|
+
path = Path(self.db_path)
|
|
34
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
35
|
+
|
|
36
|
+
conn = sqlite3.connect(self.db_path)
|
|
37
|
+
try:
|
|
38
|
+
cursor = conn.cursor()
|
|
39
|
+
cursor.execute("""
|
|
40
|
+
CREATE TABLE IF NOT EXISTS requests (
|
|
41
|
+
id TEXT PRIMARY KEY,
|
|
42
|
+
timestamp TEXT NOT NULL,
|
|
43
|
+
model TEXT NOT NULL,
|
|
44
|
+
stream INTEGER NOT NULL,
|
|
45
|
+
mode TEXT NOT NULL,
|
|
46
|
+
tokens_input_before INTEGER NOT NULL,
|
|
47
|
+
tokens_input_after INTEGER NOT NULL,
|
|
48
|
+
tokens_output INTEGER,
|
|
49
|
+
block_breakdown TEXT NOT NULL,
|
|
50
|
+
waste_signals TEXT NOT NULL,
|
|
51
|
+
stable_prefix_hash TEXT,
|
|
52
|
+
cache_alignment_score REAL,
|
|
53
|
+
cached_tokens INTEGER,
|
|
54
|
+
transforms_applied TEXT NOT NULL,
|
|
55
|
+
tool_units_dropped INTEGER DEFAULT 0,
|
|
56
|
+
turns_dropped INTEGER DEFAULT 0,
|
|
57
|
+
messages_hash TEXT,
|
|
58
|
+
error TEXT
|
|
59
|
+
)
|
|
60
|
+
""")
|
|
61
|
+
|
|
62
|
+
# Create indices
|
|
63
|
+
cursor.execute("""
|
|
64
|
+
CREATE INDEX IF NOT EXISTS idx_timestamp ON requests(timestamp)
|
|
65
|
+
""")
|
|
66
|
+
cursor.execute("""
|
|
67
|
+
CREATE INDEX IF NOT EXISTS idx_model ON requests(model)
|
|
68
|
+
""")
|
|
69
|
+
cursor.execute("""
|
|
70
|
+
CREATE INDEX IF NOT EXISTS idx_mode ON requests(mode)
|
|
71
|
+
""")
|
|
72
|
+
|
|
73
|
+
conn.commit()
|
|
74
|
+
finally:
|
|
75
|
+
conn.close()
|
|
76
|
+
|
|
77
|
+
def _get_conn(self) -> sqlite3.Connection:
|
|
78
|
+
"""Get or create connection."""
|
|
79
|
+
if self._conn is None:
|
|
80
|
+
self._conn = sqlite3.connect(self.db_path)
|
|
81
|
+
self._conn.row_factory = sqlite3.Row
|
|
82
|
+
return self._conn
|
|
83
|
+
|
|
84
|
+
def save(self, metrics: RequestMetrics) -> None:
|
|
85
|
+
"""Save request metrics."""
|
|
86
|
+
conn = self._get_conn()
|
|
87
|
+
cursor = conn.cursor()
|
|
88
|
+
|
|
89
|
+
cursor.execute(
|
|
90
|
+
"""
|
|
91
|
+
INSERT OR REPLACE INTO requests (
|
|
92
|
+
id, timestamp, model, stream, mode,
|
|
93
|
+
tokens_input_before, tokens_input_after, tokens_output,
|
|
94
|
+
block_breakdown, waste_signals,
|
|
95
|
+
stable_prefix_hash, cache_alignment_score, cached_tokens,
|
|
96
|
+
transforms_applied, tool_units_dropped, turns_dropped,
|
|
97
|
+
messages_hash, error
|
|
98
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
99
|
+
""",
|
|
100
|
+
(
|
|
101
|
+
metrics.request_id,
|
|
102
|
+
format_timestamp(metrics.timestamp),
|
|
103
|
+
metrics.model,
|
|
104
|
+
1 if metrics.stream else 0,
|
|
105
|
+
metrics.mode,
|
|
106
|
+
metrics.tokens_input_before,
|
|
107
|
+
metrics.tokens_input_after,
|
|
108
|
+
metrics.tokens_output,
|
|
109
|
+
json.dumps(metrics.block_breakdown),
|
|
110
|
+
json.dumps(metrics.waste_signals),
|
|
111
|
+
metrics.stable_prefix_hash,
|
|
112
|
+
metrics.cache_alignment_score,
|
|
113
|
+
metrics.cached_tokens,
|
|
114
|
+
json.dumps(metrics.transforms_applied),
|
|
115
|
+
metrics.tool_units_dropped,
|
|
116
|
+
metrics.turns_dropped,
|
|
117
|
+
metrics.messages_hash,
|
|
118
|
+
metrics.error,
|
|
119
|
+
),
|
|
120
|
+
)
|
|
121
|
+
conn.commit()
|
|
122
|
+
|
|
123
|
+
def get(self, request_id: str) -> RequestMetrics | None:
|
|
124
|
+
"""Get metrics by request ID."""
|
|
125
|
+
conn = self._get_conn()
|
|
126
|
+
cursor = conn.cursor()
|
|
127
|
+
|
|
128
|
+
cursor.execute("SELECT * FROM requests WHERE id = ?", (request_id,))
|
|
129
|
+
row = cursor.fetchone()
|
|
130
|
+
|
|
131
|
+
if row is None:
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
return self._row_to_metrics(row)
|
|
135
|
+
|
|
136
|
+
def query(
|
|
137
|
+
self,
|
|
138
|
+
start_time: datetime | None = None,
|
|
139
|
+
end_time: datetime | None = None,
|
|
140
|
+
model: str | None = None,
|
|
141
|
+
mode: str | None = None,
|
|
142
|
+
limit: int = 100,
|
|
143
|
+
offset: int = 0,
|
|
144
|
+
) -> list[RequestMetrics]:
|
|
145
|
+
"""Query metrics with filters."""
|
|
146
|
+
conn = self._get_conn()
|
|
147
|
+
cursor = conn.cursor()
|
|
148
|
+
|
|
149
|
+
query = "SELECT * FROM requests WHERE 1=1"
|
|
150
|
+
params: list[Any] = []
|
|
151
|
+
|
|
152
|
+
if start_time is not None:
|
|
153
|
+
query += " AND timestamp >= ?"
|
|
154
|
+
params.append(format_timestamp(start_time))
|
|
155
|
+
if end_time is not None:
|
|
156
|
+
query += " AND timestamp <= ?"
|
|
157
|
+
params.append(format_timestamp(end_time))
|
|
158
|
+
if model is not None:
|
|
159
|
+
query += " AND model = ?"
|
|
160
|
+
params.append(model)
|
|
161
|
+
if mode is not None:
|
|
162
|
+
query += " AND mode = ?"
|
|
163
|
+
params.append(mode)
|
|
164
|
+
|
|
165
|
+
query += " ORDER BY timestamp DESC LIMIT ? OFFSET ?"
|
|
166
|
+
params.extend([limit, offset])
|
|
167
|
+
|
|
168
|
+
cursor.execute(query, params)
|
|
169
|
+
rows = cursor.fetchall()
|
|
170
|
+
|
|
171
|
+
return [self._row_to_metrics(row) for row in rows]
|
|
172
|
+
|
|
173
|
+
def count(
|
|
174
|
+
self,
|
|
175
|
+
start_time: datetime | None = None,
|
|
176
|
+
end_time: datetime | None = None,
|
|
177
|
+
model: str | None = None,
|
|
178
|
+
mode: str | None = None,
|
|
179
|
+
) -> int:
|
|
180
|
+
"""Count metrics matching filters."""
|
|
181
|
+
conn = self._get_conn()
|
|
182
|
+
cursor = conn.cursor()
|
|
183
|
+
|
|
184
|
+
query = "SELECT COUNT(*) FROM requests WHERE 1=1"
|
|
185
|
+
params: list[Any] = []
|
|
186
|
+
|
|
187
|
+
if start_time is not None:
|
|
188
|
+
query += " AND timestamp >= ?"
|
|
189
|
+
params.append(format_timestamp(start_time))
|
|
190
|
+
if end_time is not None:
|
|
191
|
+
query += " AND timestamp <= ?"
|
|
192
|
+
params.append(format_timestamp(end_time))
|
|
193
|
+
if model is not None:
|
|
194
|
+
query += " AND model = ?"
|
|
195
|
+
params.append(model)
|
|
196
|
+
if mode is not None:
|
|
197
|
+
query += " AND mode = ?"
|
|
198
|
+
params.append(mode)
|
|
199
|
+
|
|
200
|
+
cursor.execute(query, params)
|
|
201
|
+
result = cursor.fetchone()[0]
|
|
202
|
+
return int(result) if result is not None else 0
|
|
203
|
+
|
|
204
|
+
def iter_all(self) -> Iterator[RequestMetrics]:
|
|
205
|
+
"""Iterate over all stored metrics."""
|
|
206
|
+
conn = self._get_conn()
|
|
207
|
+
cursor = conn.cursor()
|
|
208
|
+
|
|
209
|
+
cursor.execute("SELECT * FROM requests ORDER BY timestamp")
|
|
210
|
+
for row in cursor:
|
|
211
|
+
yield self._row_to_metrics(row)
|
|
212
|
+
|
|
213
|
+
def get_summary_stats(
|
|
214
|
+
self,
|
|
215
|
+
start_time: datetime | None = None,
|
|
216
|
+
end_time: datetime | None = None,
|
|
217
|
+
) -> dict[str, Any]:
|
|
218
|
+
"""Get summary statistics."""
|
|
219
|
+
conn = self._get_conn()
|
|
220
|
+
cursor = conn.cursor()
|
|
221
|
+
|
|
222
|
+
where_clause = "WHERE 1=1"
|
|
223
|
+
params: list[Any] = []
|
|
224
|
+
|
|
225
|
+
if start_time is not None:
|
|
226
|
+
where_clause += " AND timestamp >= ?"
|
|
227
|
+
params.append(format_timestamp(start_time))
|
|
228
|
+
if end_time is not None:
|
|
229
|
+
where_clause += " AND timestamp <= ?"
|
|
230
|
+
params.append(format_timestamp(end_time))
|
|
231
|
+
|
|
232
|
+
cursor.execute(
|
|
233
|
+
f"""
|
|
234
|
+
SELECT
|
|
235
|
+
COUNT(*) as total_requests,
|
|
236
|
+
SUM(tokens_input_before) as total_tokens_before,
|
|
237
|
+
SUM(tokens_input_after) as total_tokens_after,
|
|
238
|
+
SUM(tokens_input_before - tokens_input_after) as total_tokens_saved,
|
|
239
|
+
AVG(tokens_input_before - tokens_input_after) as avg_tokens_saved,
|
|
240
|
+
AVG(cache_alignment_score) as avg_cache_alignment,
|
|
241
|
+
SUM(CASE WHEN mode = 'audit' THEN 1 ELSE 0 END) as audit_count,
|
|
242
|
+
SUM(CASE WHEN mode = 'optimize' THEN 1 ELSE 0 END) as optimize_count
|
|
243
|
+
FROM requests
|
|
244
|
+
{where_clause}
|
|
245
|
+
""",
|
|
246
|
+
params,
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
row = cursor.fetchone()
|
|
250
|
+
|
|
251
|
+
return {
|
|
252
|
+
"total_requests": row[0] or 0,
|
|
253
|
+
"total_tokens_before": row[1] or 0,
|
|
254
|
+
"total_tokens_after": row[2] or 0,
|
|
255
|
+
"total_tokens_saved": row[3] or 0,
|
|
256
|
+
"avg_tokens_saved": row[4] or 0,
|
|
257
|
+
"avg_cache_alignment": row[5] or 0,
|
|
258
|
+
"audit_count": row[6] or 0,
|
|
259
|
+
"optimize_count": row[7] or 0,
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
def _row_to_metrics(self, row: sqlite3.Row) -> RequestMetrics:
|
|
263
|
+
"""Convert database row to RequestMetrics."""
|
|
264
|
+
return RequestMetrics(
|
|
265
|
+
request_id=row["id"],
|
|
266
|
+
timestamp=parse_timestamp(row["timestamp"]),
|
|
267
|
+
model=row["model"],
|
|
268
|
+
stream=bool(row["stream"]),
|
|
269
|
+
mode=row["mode"],
|
|
270
|
+
tokens_input_before=row["tokens_input_before"],
|
|
271
|
+
tokens_input_after=row["tokens_input_after"],
|
|
272
|
+
tokens_output=row["tokens_output"],
|
|
273
|
+
block_breakdown=json.loads(row["block_breakdown"]),
|
|
274
|
+
waste_signals=json.loads(row["waste_signals"]),
|
|
275
|
+
stable_prefix_hash=row["stable_prefix_hash"] or "",
|
|
276
|
+
cache_alignment_score=row["cache_alignment_score"] or 0.0,
|
|
277
|
+
cached_tokens=row["cached_tokens"],
|
|
278
|
+
transforms_applied=json.loads(row["transforms_applied"]),
|
|
279
|
+
tool_units_dropped=row["tool_units_dropped"] or 0,
|
|
280
|
+
turns_dropped=row["turns_dropped"] or 0,
|
|
281
|
+
messages_hash=row["messages_hash"] or "",
|
|
282
|
+
error=row["error"],
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
def close(self) -> None:
|
|
286
|
+
"""Close database connection."""
|
|
287
|
+
if self._conn is not None:
|
|
288
|
+
self._conn.close()
|
|
289
|
+
self._conn = None
|