headroom-ai 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- headroom/__init__.py +212 -0
- headroom/cache/__init__.py +76 -0
- headroom/cache/anthropic.py +517 -0
- headroom/cache/base.py +342 -0
- headroom/cache/compression_feedback.py +613 -0
- headroom/cache/compression_store.py +814 -0
- headroom/cache/dynamic_detector.py +1026 -0
- headroom/cache/google.py +884 -0
- headroom/cache/openai.py +584 -0
- headroom/cache/registry.py +175 -0
- headroom/cache/semantic.py +451 -0
- headroom/ccr/__init__.py +77 -0
- headroom/ccr/context_tracker.py +582 -0
- headroom/ccr/mcp_server.py +319 -0
- headroom/ccr/response_handler.py +772 -0
- headroom/ccr/tool_injection.py +415 -0
- headroom/cli.py +219 -0
- headroom/client.py +977 -0
- headroom/compression/__init__.py +42 -0
- headroom/compression/detector.py +424 -0
- headroom/compression/handlers/__init__.py +22 -0
- headroom/compression/handlers/base.py +219 -0
- headroom/compression/handlers/code_handler.py +506 -0
- headroom/compression/handlers/json_handler.py +418 -0
- headroom/compression/masks.py +345 -0
- headroom/compression/universal.py +465 -0
- headroom/config.py +474 -0
- headroom/exceptions.py +192 -0
- headroom/integrations/__init__.py +159 -0
- headroom/integrations/agno/__init__.py +53 -0
- headroom/integrations/agno/hooks.py +345 -0
- headroom/integrations/agno/model.py +625 -0
- headroom/integrations/agno/providers.py +154 -0
- headroom/integrations/langchain/__init__.py +106 -0
- headroom/integrations/langchain/agents.py +326 -0
- headroom/integrations/langchain/chat_model.py +1002 -0
- headroom/integrations/langchain/langsmith.py +324 -0
- headroom/integrations/langchain/memory.py +319 -0
- headroom/integrations/langchain/providers.py +200 -0
- headroom/integrations/langchain/retriever.py +371 -0
- headroom/integrations/langchain/streaming.py +341 -0
- headroom/integrations/mcp/__init__.py +37 -0
- headroom/integrations/mcp/server.py +533 -0
- headroom/memory/__init__.py +37 -0
- headroom/memory/extractor.py +390 -0
- headroom/memory/fast_store.py +621 -0
- headroom/memory/fast_wrapper.py +311 -0
- headroom/memory/inline_extractor.py +229 -0
- headroom/memory/store.py +434 -0
- headroom/memory/worker.py +260 -0
- headroom/memory/wrapper.py +321 -0
- headroom/models/__init__.py +39 -0
- headroom/models/registry.py +687 -0
- headroom/parser.py +293 -0
- headroom/pricing/__init__.py +51 -0
- headroom/pricing/anthropic_prices.py +81 -0
- headroom/pricing/litellm_pricing.py +113 -0
- headroom/pricing/openai_prices.py +91 -0
- headroom/pricing/registry.py +188 -0
- headroom/providers/__init__.py +61 -0
- headroom/providers/anthropic.py +621 -0
- headroom/providers/base.py +131 -0
- headroom/providers/cohere.py +362 -0
- headroom/providers/google.py +427 -0
- headroom/providers/litellm.py +297 -0
- headroom/providers/openai.py +566 -0
- headroom/providers/openai_compatible.py +521 -0
- headroom/proxy/__init__.py +19 -0
- headroom/proxy/server.py +2683 -0
- headroom/py.typed +0 -0
- headroom/relevance/__init__.py +124 -0
- headroom/relevance/base.py +106 -0
- headroom/relevance/bm25.py +255 -0
- headroom/relevance/embedding.py +255 -0
- headroom/relevance/hybrid.py +259 -0
- headroom/reporting/__init__.py +5 -0
- headroom/reporting/generator.py +549 -0
- headroom/storage/__init__.py +41 -0
- headroom/storage/base.py +125 -0
- headroom/storage/jsonl.py +220 -0
- headroom/storage/sqlite.py +289 -0
- headroom/telemetry/__init__.py +91 -0
- headroom/telemetry/collector.py +764 -0
- headroom/telemetry/models.py +880 -0
- headroom/telemetry/toin.py +1579 -0
- headroom/tokenizer.py +80 -0
- headroom/tokenizers/__init__.py +75 -0
- headroom/tokenizers/base.py +210 -0
- headroom/tokenizers/estimator.py +198 -0
- headroom/tokenizers/huggingface.py +317 -0
- headroom/tokenizers/mistral.py +245 -0
- headroom/tokenizers/registry.py +398 -0
- headroom/tokenizers/tiktoken_counter.py +248 -0
- headroom/transforms/__init__.py +106 -0
- headroom/transforms/base.py +57 -0
- headroom/transforms/cache_aligner.py +357 -0
- headroom/transforms/code_compressor.py +1313 -0
- headroom/transforms/content_detector.py +335 -0
- headroom/transforms/content_router.py +1158 -0
- headroom/transforms/llmlingua_compressor.py +638 -0
- headroom/transforms/log_compressor.py +529 -0
- headroom/transforms/pipeline.py +297 -0
- headroom/transforms/rolling_window.py +350 -0
- headroom/transforms/search_compressor.py +365 -0
- headroom/transforms/smart_crusher.py +2682 -0
- headroom/transforms/text_compressor.py +259 -0
- headroom/transforms/tool_crusher.py +338 -0
- headroom/utils.py +215 -0
- headroom_ai-0.2.13.dist-info/METADATA +315 -0
- headroom_ai-0.2.13.dist-info/RECORD +114 -0
- headroom_ai-0.2.13.dist-info/WHEEL +4 -0
- headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
- headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
- headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
|
@@ -0,0 +1,549 @@
|
|
|
1
|
+
"""HTML report generator for Headroom SDK."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import TYPE_CHECKING, Any
|
|
8
|
+
|
|
9
|
+
from ..storage import create_storage
|
|
10
|
+
from ..utils import estimate_cost, format_cost
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _get_jinja2_template(template_str: str):
|
|
17
|
+
"""Lazily import jinja2 and create template."""
|
|
18
|
+
try:
|
|
19
|
+
from jinja2 import Template
|
|
20
|
+
|
|
21
|
+
return Template(template_str)
|
|
22
|
+
except ImportError as e:
|
|
23
|
+
raise ImportError(
|
|
24
|
+
"jinja2 is required for report generation. Install with: pip install headroom[reports]"
|
|
25
|
+
) from e
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# HTML template embedded as string
|
|
29
|
+
REPORT_TEMPLATE = """
|
|
30
|
+
<!DOCTYPE html>
|
|
31
|
+
<html lang="en">
|
|
32
|
+
<head>
|
|
33
|
+
<meta charset="UTF-8">
|
|
34
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
35
|
+
<title>Headroom Report - {{ generated_at }}</title>
|
|
36
|
+
<style>
|
|
37
|
+
* {
|
|
38
|
+
box-sizing: border-box;
|
|
39
|
+
margin: 0;
|
|
40
|
+
padding: 0;
|
|
41
|
+
}
|
|
42
|
+
body {
|
|
43
|
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
|
44
|
+
line-height: 1.6;
|
|
45
|
+
color: #333;
|
|
46
|
+
background: #f5f5f5;
|
|
47
|
+
padding: 20px;
|
|
48
|
+
}
|
|
49
|
+
.container {
|
|
50
|
+
max-width: 1200px;
|
|
51
|
+
margin: 0 auto;
|
|
52
|
+
}
|
|
53
|
+
header {
|
|
54
|
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
|
55
|
+
color: white;
|
|
56
|
+
padding: 30px;
|
|
57
|
+
border-radius: 10px;
|
|
58
|
+
margin-bottom: 20px;
|
|
59
|
+
}
|
|
60
|
+
header h1 {
|
|
61
|
+
font-size: 2em;
|
|
62
|
+
margin-bottom: 10px;
|
|
63
|
+
}
|
|
64
|
+
header p {
|
|
65
|
+
opacity: 0.9;
|
|
66
|
+
}
|
|
67
|
+
.stats-grid {
|
|
68
|
+
display: grid;
|
|
69
|
+
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
|
70
|
+
gap: 20px;
|
|
71
|
+
margin-bottom: 30px;
|
|
72
|
+
}
|
|
73
|
+
.stat-card {
|
|
74
|
+
background: white;
|
|
75
|
+
padding: 20px;
|
|
76
|
+
border-radius: 10px;
|
|
77
|
+
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
|
|
78
|
+
}
|
|
79
|
+
.stat-card h3 {
|
|
80
|
+
font-size: 0.9em;
|
|
81
|
+
color: #666;
|
|
82
|
+
margin-bottom: 5px;
|
|
83
|
+
}
|
|
84
|
+
.stat-card .value {
|
|
85
|
+
font-size: 2em;
|
|
86
|
+
font-weight: bold;
|
|
87
|
+
color: #333;
|
|
88
|
+
}
|
|
89
|
+
.stat-card .value.positive {
|
|
90
|
+
color: #22c55e;
|
|
91
|
+
}
|
|
92
|
+
.stat-card .value.warning {
|
|
93
|
+
color: #f59e0b;
|
|
94
|
+
}
|
|
95
|
+
.section {
|
|
96
|
+
background: white;
|
|
97
|
+
padding: 25px;
|
|
98
|
+
border-radius: 10px;
|
|
99
|
+
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
|
|
100
|
+
margin-bottom: 20px;
|
|
101
|
+
}
|
|
102
|
+
.section h2 {
|
|
103
|
+
font-size: 1.3em;
|
|
104
|
+
margin-bottom: 20px;
|
|
105
|
+
padding-bottom: 10px;
|
|
106
|
+
border-bottom: 2px solid #eee;
|
|
107
|
+
}
|
|
108
|
+
.histogram {
|
|
109
|
+
display: flex;
|
|
110
|
+
flex-direction: column;
|
|
111
|
+
gap: 10px;
|
|
112
|
+
}
|
|
113
|
+
.bar-row {
|
|
114
|
+
display: flex;
|
|
115
|
+
align-items: center;
|
|
116
|
+
gap: 10px;
|
|
117
|
+
}
|
|
118
|
+
.bar-label {
|
|
119
|
+
width: 120px;
|
|
120
|
+
font-size: 0.9em;
|
|
121
|
+
color: #666;
|
|
122
|
+
}
|
|
123
|
+
.bar-container {
|
|
124
|
+
flex: 1;
|
|
125
|
+
background: #eee;
|
|
126
|
+
border-radius: 4px;
|
|
127
|
+
height: 24px;
|
|
128
|
+
overflow: hidden;
|
|
129
|
+
}
|
|
130
|
+
.bar {
|
|
131
|
+
height: 100%;
|
|
132
|
+
background: linear-gradient(90deg, #667eea, #764ba2);
|
|
133
|
+
border-radius: 4px;
|
|
134
|
+
display: flex;
|
|
135
|
+
align-items: center;
|
|
136
|
+
padding: 0 10px;
|
|
137
|
+
color: white;
|
|
138
|
+
font-size: 0.8em;
|
|
139
|
+
min-width: fit-content;
|
|
140
|
+
}
|
|
141
|
+
table {
|
|
142
|
+
width: 100%;
|
|
143
|
+
border-collapse: collapse;
|
|
144
|
+
}
|
|
145
|
+
th, td {
|
|
146
|
+
padding: 12px;
|
|
147
|
+
text-align: left;
|
|
148
|
+
border-bottom: 1px solid #eee;
|
|
149
|
+
}
|
|
150
|
+
th {
|
|
151
|
+
background: #f9f9f9;
|
|
152
|
+
font-weight: 600;
|
|
153
|
+
}
|
|
154
|
+
tr:hover {
|
|
155
|
+
background: #f9f9f9;
|
|
156
|
+
}
|
|
157
|
+
.tag {
|
|
158
|
+
display: inline-block;
|
|
159
|
+
padding: 2px 8px;
|
|
160
|
+
border-radius: 4px;
|
|
161
|
+
font-size: 0.8em;
|
|
162
|
+
font-weight: 500;
|
|
163
|
+
}
|
|
164
|
+
.tag.audit {
|
|
165
|
+
background: #dbeafe;
|
|
166
|
+
color: #1d4ed8;
|
|
167
|
+
}
|
|
168
|
+
.tag.optimize {
|
|
169
|
+
background: #dcfce7;
|
|
170
|
+
color: #16a34a;
|
|
171
|
+
}
|
|
172
|
+
.recommendations {
|
|
173
|
+
list-style: none;
|
|
174
|
+
}
|
|
175
|
+
.recommendations li {
|
|
176
|
+
padding: 15px;
|
|
177
|
+
background: #f9f9f9;
|
|
178
|
+
border-radius: 8px;
|
|
179
|
+
margin-bottom: 10px;
|
|
180
|
+
border-left: 4px solid #667eea;
|
|
181
|
+
}
|
|
182
|
+
.recommendations li strong {
|
|
183
|
+
display: block;
|
|
184
|
+
margin-bottom: 5px;
|
|
185
|
+
}
|
|
186
|
+
footer {
|
|
187
|
+
text-align: center;
|
|
188
|
+
padding: 20px;
|
|
189
|
+
color: #666;
|
|
190
|
+
font-size: 0.9em;
|
|
191
|
+
}
|
|
192
|
+
</style>
|
|
193
|
+
</head>
|
|
194
|
+
<body>
|
|
195
|
+
<div class="container">
|
|
196
|
+
<header>
|
|
197
|
+
<h1>Headroom Context Analysis Report</h1>
|
|
198
|
+
<p>Generated: {{ generated_at }} | Period: {{ period }}</p>
|
|
199
|
+
</header>
|
|
200
|
+
|
|
201
|
+
<div class="stats-grid">
|
|
202
|
+
<div class="stat-card">
|
|
203
|
+
<h3>Total Requests</h3>
|
|
204
|
+
<div class="value">{{ stats.total_requests }}</div>
|
|
205
|
+
</div>
|
|
206
|
+
<div class="stat-card">
|
|
207
|
+
<h3>Tokens Saved</h3>
|
|
208
|
+
<div class="value positive">{{ "{:,}".format(stats.total_tokens_saved) }}</div>
|
|
209
|
+
</div>
|
|
210
|
+
<div class="stat-card">
|
|
211
|
+
<h3>Avg Saved/Request</h3>
|
|
212
|
+
<div class="value positive">{{ "{:,.0f}".format(stats.avg_tokens_saved) }}</div>
|
|
213
|
+
</div>
|
|
214
|
+
<div class="stat-card">
|
|
215
|
+
<h3>Est. Cost Savings</h3>
|
|
216
|
+
<div class="value positive">{{ stats.estimated_savings }}</div>
|
|
217
|
+
</div>
|
|
218
|
+
<div class="stat-card">
|
|
219
|
+
<h3>Cache Alignment</h3>
|
|
220
|
+
<div class="value {% if stats.avg_cache_alignment > 80 %}positive{% elif stats.avg_cache_alignment > 50 %}warning{% endif %}">{{ "{:.0f}%".format(stats.avg_cache_alignment) }}</div>
|
|
221
|
+
</div>
|
|
222
|
+
<div class="stat-card">
|
|
223
|
+
<h3>TPM Headroom</h3>
|
|
224
|
+
<div class="value positive">{{ "{:.1f}x".format(stats.tpm_multiplier) }}</div>
|
|
225
|
+
</div>
|
|
226
|
+
</div>
|
|
227
|
+
|
|
228
|
+
<div class="section">
|
|
229
|
+
<h2>Waste Histogram</h2>
|
|
230
|
+
<div class="histogram">
|
|
231
|
+
{% for item in waste_histogram %}
|
|
232
|
+
<div class="bar-row">
|
|
233
|
+
<span class="bar-label">{{ item.label }}</span>
|
|
234
|
+
<div class="bar-container">
|
|
235
|
+
<div class="bar" style="width: {{ item.percentage }}%;">
|
|
236
|
+
{{ "{:,}".format(item.tokens) }} tokens
|
|
237
|
+
</div>
|
|
238
|
+
</div>
|
|
239
|
+
</div>
|
|
240
|
+
{% endfor %}
|
|
241
|
+
</div>
|
|
242
|
+
</div>
|
|
243
|
+
|
|
244
|
+
<div class="section">
|
|
245
|
+
<h2>Top 10 High-Waste Requests</h2>
|
|
246
|
+
<table>
|
|
247
|
+
<thead>
|
|
248
|
+
<tr>
|
|
249
|
+
<th>Request ID</th>
|
|
250
|
+
<th>Model</th>
|
|
251
|
+
<th>Mode</th>
|
|
252
|
+
<th>Tokens Before</th>
|
|
253
|
+
<th>Tokens Saved</th>
|
|
254
|
+
<th>Cache Align</th>
|
|
255
|
+
</tr>
|
|
256
|
+
</thead>
|
|
257
|
+
<tbody>
|
|
258
|
+
{% for req in top_requests %}
|
|
259
|
+
<tr>
|
|
260
|
+
<td><code>{{ req.request_id[:8] }}...</code></td>
|
|
261
|
+
<td>{{ req.model }}</td>
|
|
262
|
+
<td><span class="tag {{ req.mode }}">{{ req.mode }}</span></td>
|
|
263
|
+
<td>{{ "{:,}".format(req.tokens_before) }}</td>
|
|
264
|
+
<td>{{ "{:,}".format(req.tokens_saved) }}</td>
|
|
265
|
+
<td>{{ "{:.0f}%".format(req.cache_alignment) }}</td>
|
|
266
|
+
</tr>
|
|
267
|
+
{% endfor %}
|
|
268
|
+
</tbody>
|
|
269
|
+
</table>
|
|
270
|
+
</div>
|
|
271
|
+
|
|
272
|
+
<div class="section">
|
|
273
|
+
<h2>Cache Alignment Analysis</h2>
|
|
274
|
+
<p style="margin-bottom: 15px;">
|
|
275
|
+
Average cache alignment score: <strong>{{ "{:.1f}%".format(stats.avg_cache_alignment) }}</strong>
|
|
276
|
+
</p>
|
|
277
|
+
<p style="color: #666;">
|
|
278
|
+
{% if stats.avg_cache_alignment > 80 %}
|
|
279
|
+
Excellent! Your prompts are well-aligned for provider caching.
|
|
280
|
+
{% elif stats.avg_cache_alignment > 50 %}
|
|
281
|
+
Good alignment, but there's room for improvement. Consider stabilizing dynamic content in system prompts.
|
|
282
|
+
{% else %}
|
|
283
|
+
Low cache alignment detected. Review system prompts for dynamic content (dates, timestamps, variable data).
|
|
284
|
+
{% endif %}
|
|
285
|
+
</p>
|
|
286
|
+
</div>
|
|
287
|
+
|
|
288
|
+
<div class="section">
|
|
289
|
+
<h2>Recommendations</h2>
|
|
290
|
+
<ul class="recommendations">
|
|
291
|
+
{% for rec in recommendations %}
|
|
292
|
+
<li>
|
|
293
|
+
<strong>{{ rec.title }}</strong>
|
|
294
|
+
{{ rec.description }}
|
|
295
|
+
</li>
|
|
296
|
+
{% endfor %}
|
|
297
|
+
</ul>
|
|
298
|
+
</div>
|
|
299
|
+
|
|
300
|
+
<footer>
|
|
301
|
+
Generated by Headroom SDK v0.1.0
|
|
302
|
+
</footer>
|
|
303
|
+
</div>
|
|
304
|
+
</body>
|
|
305
|
+
</html>
|
|
306
|
+
"""
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def generate_report(
|
|
310
|
+
store_url: str,
|
|
311
|
+
output_path: str = "report.html",
|
|
312
|
+
start_time: datetime | None = None,
|
|
313
|
+
end_time: datetime | None = None,
|
|
314
|
+
) -> str:
|
|
315
|
+
"""
|
|
316
|
+
Generate HTML report from stored metrics.
|
|
317
|
+
|
|
318
|
+
Args:
|
|
319
|
+
store_url: Storage URL (sqlite:// or jsonl://).
|
|
320
|
+
output_path: Path for output HTML file.
|
|
321
|
+
start_time: Filter by timestamp >= start_time.
|
|
322
|
+
end_time: Filter by timestamp <= end_time.
|
|
323
|
+
|
|
324
|
+
Returns:
|
|
325
|
+
Path to generated report.
|
|
326
|
+
"""
|
|
327
|
+
storage = create_storage(store_url)
|
|
328
|
+
|
|
329
|
+
try:
|
|
330
|
+
# Get summary stats
|
|
331
|
+
stats = storage.get_summary_stats(start_time, end_time)
|
|
332
|
+
|
|
333
|
+
# Calculate additional metrics
|
|
334
|
+
if stats["total_tokens_before"] > 0:
|
|
335
|
+
tpm_multiplier = stats["total_tokens_before"] / max(stats["total_tokens_after"], 1)
|
|
336
|
+
else:
|
|
337
|
+
tpm_multiplier = 1.0
|
|
338
|
+
|
|
339
|
+
# Estimate cost savings (using gpt-4o pricing)
|
|
340
|
+
cost_before = estimate_cost(stats["total_tokens_before"], 0, "gpt-4o") or 0.0
|
|
341
|
+
cost_after = estimate_cost(stats["total_tokens_after"], 0, "gpt-4o") or 0.0
|
|
342
|
+
estimated_savings = format_cost(cost_before - cost_after)
|
|
343
|
+
|
|
344
|
+
stats["tpm_multiplier"] = tpm_multiplier
|
|
345
|
+
stats["estimated_savings"] = estimated_savings
|
|
346
|
+
|
|
347
|
+
# Build waste histogram
|
|
348
|
+
waste_histogram = _build_waste_histogram(storage, start_time, end_time)
|
|
349
|
+
|
|
350
|
+
# Get top requests by waste
|
|
351
|
+
top_requests = _get_top_waste_requests(storage, start_time, end_time, limit=10)
|
|
352
|
+
|
|
353
|
+
# Generate recommendations
|
|
354
|
+
recommendations = _generate_recommendations(stats, waste_histogram, top_requests)
|
|
355
|
+
|
|
356
|
+
# Format period string
|
|
357
|
+
if start_time and end_time:
|
|
358
|
+
period = f"{start_time.date()} to {end_time.date()}"
|
|
359
|
+
elif start_time:
|
|
360
|
+
period = f"Since {start_time.date()}"
|
|
361
|
+
elif end_time:
|
|
362
|
+
period = f"Until {end_time.date()}"
|
|
363
|
+
else:
|
|
364
|
+
period = "All time"
|
|
365
|
+
|
|
366
|
+
# Render template
|
|
367
|
+
template = _get_jinja2_template(REPORT_TEMPLATE)
|
|
368
|
+
html = template.render(
|
|
369
|
+
generated_at=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
|
370
|
+
period=period,
|
|
371
|
+
stats=stats,
|
|
372
|
+
waste_histogram=waste_histogram,
|
|
373
|
+
top_requests=top_requests,
|
|
374
|
+
recommendations=recommendations,
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
# Write output
|
|
378
|
+
Path(output_path).write_text(html)
|
|
379
|
+
|
|
380
|
+
return output_path
|
|
381
|
+
|
|
382
|
+
finally:
|
|
383
|
+
storage.close()
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def _build_waste_histogram(
|
|
387
|
+
storage: Any,
|
|
388
|
+
start_time: datetime | None,
|
|
389
|
+
end_time: datetime | None,
|
|
390
|
+
) -> list[dict[str, Any]]:
|
|
391
|
+
"""Build waste histogram data."""
|
|
392
|
+
totals: dict[str, int] = {
|
|
393
|
+
"json_bloat": 0,
|
|
394
|
+
"html_noise": 0,
|
|
395
|
+
"base64": 0,
|
|
396
|
+
"whitespace": 0,
|
|
397
|
+
"dynamic_date": 0,
|
|
398
|
+
"history_bloat": 0,
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
for metrics in storage.iter_all():
|
|
402
|
+
if start_time and metrics.timestamp < start_time:
|
|
403
|
+
continue
|
|
404
|
+
if end_time and metrics.timestamp > end_time:
|
|
405
|
+
continue
|
|
406
|
+
|
|
407
|
+
waste = metrics.waste_signals
|
|
408
|
+
for key in totals:
|
|
409
|
+
totals[key] += waste.get(key, 0)
|
|
410
|
+
|
|
411
|
+
# Estimate history bloat from tokens saved
|
|
412
|
+
if metrics.tokens_input_before > metrics.tokens_input_after:
|
|
413
|
+
tokens_saved = metrics.tokens_input_before - metrics.tokens_input_after
|
|
414
|
+
# Subtract known waste types
|
|
415
|
+
known_waste = sum(waste.values())
|
|
416
|
+
history_bloat = max(0, tokens_saved - known_waste)
|
|
417
|
+
totals["history_bloat"] += history_bloat
|
|
418
|
+
|
|
419
|
+
# Find max for percentage calculation
|
|
420
|
+
max_val = max(totals.values()) if totals.values() else 1
|
|
421
|
+
|
|
422
|
+
labels = {
|
|
423
|
+
"json_bloat": "Tool JSON Bloat",
|
|
424
|
+
"html_noise": "HTML Noise",
|
|
425
|
+
"base64": "Base64 Blobs",
|
|
426
|
+
"whitespace": "Whitespace",
|
|
427
|
+
"dynamic_date": "Dynamic Dates",
|
|
428
|
+
"history_bloat": "History Bloat",
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
histogram = []
|
|
432
|
+
for key, tokens in sorted(totals.items(), key=lambda x: x[1], reverse=True):
|
|
433
|
+
percentage = (tokens / max_val * 100) if max_val > 0 else 0
|
|
434
|
+
histogram.append(
|
|
435
|
+
{
|
|
436
|
+
"label": labels.get(key, key),
|
|
437
|
+
"tokens": tokens,
|
|
438
|
+
"percentage": percentage,
|
|
439
|
+
}
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
return histogram
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def _get_top_waste_requests(
|
|
446
|
+
storage: Any,
|
|
447
|
+
start_time: datetime | None,
|
|
448
|
+
end_time: datetime | None,
|
|
449
|
+
limit: int = 10,
|
|
450
|
+
) -> list[dict[str, Any]]:
|
|
451
|
+
"""Get top requests by waste."""
|
|
452
|
+
requests: list[dict[str, Any]] = []
|
|
453
|
+
|
|
454
|
+
for metrics in storage.iter_all():
|
|
455
|
+
if start_time and metrics.timestamp < start_time:
|
|
456
|
+
continue
|
|
457
|
+
if end_time and metrics.timestamp > end_time:
|
|
458
|
+
continue
|
|
459
|
+
|
|
460
|
+
tokens_saved = metrics.tokens_input_before - metrics.tokens_input_after
|
|
461
|
+
|
|
462
|
+
requests.append(
|
|
463
|
+
{
|
|
464
|
+
"request_id": metrics.request_id,
|
|
465
|
+
"model": metrics.model,
|
|
466
|
+
"mode": metrics.mode,
|
|
467
|
+
"tokens_before": metrics.tokens_input_before,
|
|
468
|
+
"tokens_saved": tokens_saved,
|
|
469
|
+
"cache_alignment": metrics.cache_alignment_score,
|
|
470
|
+
}
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
# Sort by tokens saved (waste potential)
|
|
474
|
+
requests.sort(key=lambda x: x["tokens_before"], reverse=True)
|
|
475
|
+
|
|
476
|
+
return requests[:limit]
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
def _generate_recommendations(
|
|
480
|
+
stats: dict[str, Any],
|
|
481
|
+
waste_histogram: list[dict[str, Any]],
|
|
482
|
+
top_requests: list[dict[str, Any]],
|
|
483
|
+
) -> list[dict[str, str]]:
|
|
484
|
+
"""Generate actionable recommendations."""
|
|
485
|
+
recommendations = []
|
|
486
|
+
|
|
487
|
+
# Check cache alignment
|
|
488
|
+
if stats["avg_cache_alignment"] < 50:
|
|
489
|
+
recommendations.append(
|
|
490
|
+
{
|
|
491
|
+
"title": "Improve Cache Alignment",
|
|
492
|
+
"description": "Your cache alignment score is low. Consider moving dynamic content "
|
|
493
|
+
"(dates, timestamps, session IDs) out of system prompts into user messages.",
|
|
494
|
+
}
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
# Check for tool JSON bloat
|
|
498
|
+
for item in waste_histogram:
|
|
499
|
+
if item["label"] == "Tool JSON Bloat" and item["tokens"] > 10000:
|
|
500
|
+
recommendations.append(
|
|
501
|
+
{
|
|
502
|
+
"title": "Enable Tool Output Compression",
|
|
503
|
+
"description": f"Detected {item['tokens']:,} tokens of tool JSON bloat. "
|
|
504
|
+
"Switch to 'optimize' mode and configure tool profiles to compress large tool outputs.",
|
|
505
|
+
}
|
|
506
|
+
)
|
|
507
|
+
break
|
|
508
|
+
|
|
509
|
+
# Check for history bloat
|
|
510
|
+
for item in waste_histogram:
|
|
511
|
+
if item["label"] == "History Bloat" and item["tokens"] > 50000:
|
|
512
|
+
recommendations.append(
|
|
513
|
+
{
|
|
514
|
+
"title": "Review Rolling Window Settings",
|
|
515
|
+
"description": f"Detected {item['tokens']:,} tokens of history bloat. "
|
|
516
|
+
"Consider reducing keep_last_turns or increasing output_buffer_tokens.",
|
|
517
|
+
}
|
|
518
|
+
)
|
|
519
|
+
break
|
|
520
|
+
|
|
521
|
+
# Check audit vs optimize ratio
|
|
522
|
+
if stats["audit_count"] > stats["optimize_count"] * 2:
|
|
523
|
+
recommendations.append(
|
|
524
|
+
{
|
|
525
|
+
"title": "Switch to Optimize Mode",
|
|
526
|
+
"description": f"{stats['audit_count']} requests in audit mode vs {stats['optimize_count']} in optimize. "
|
|
527
|
+
"Consider switching default_mode to 'optimize' to realize token savings.",
|
|
528
|
+
}
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
# General recommendation
|
|
532
|
+
if stats["total_tokens_saved"] > 0:
|
|
533
|
+
recommendations.append(
|
|
534
|
+
{
|
|
535
|
+
"title": "Continue Monitoring",
|
|
536
|
+
"description": f"You've saved {stats['total_tokens_saved']:,} tokens so far. "
|
|
537
|
+
f"Estimated cost savings: {stats['estimated_savings']}. Keep up the good work!",
|
|
538
|
+
}
|
|
539
|
+
)
|
|
540
|
+
else:
|
|
541
|
+
recommendations.append(
|
|
542
|
+
{
|
|
543
|
+
"title": "Get Started",
|
|
544
|
+
"description": "No optimizations applied yet. Try setting headroom_mode='optimize' "
|
|
545
|
+
"on your next request to start seeing token savings.",
|
|
546
|
+
}
|
|
547
|
+
)
|
|
548
|
+
|
|
549
|
+
return recommendations
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Storage modules for Headroom SDK."""
|
|
2
|
+
|
|
3
|
+
from .base import Storage
|
|
4
|
+
from .jsonl import JSONLStorage
|
|
5
|
+
from .sqlite import SQLiteStorage
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"Storage",
|
|
9
|
+
"SQLiteStorage",
|
|
10
|
+
"JSONLStorage",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def create_storage(store_url: str) -> Storage:
|
|
15
|
+
"""
|
|
16
|
+
Create a storage instance from URL.
|
|
17
|
+
|
|
18
|
+
Supported URLs:
|
|
19
|
+
- sqlite:///path/to/file.db
|
|
20
|
+
- jsonl:///path/to/file.jsonl
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
store_url: Storage URL.
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
Storage instance.
|
|
27
|
+
"""
|
|
28
|
+
if store_url.startswith("sqlite://"):
|
|
29
|
+
path = store_url.replace("sqlite://", "")
|
|
30
|
+
# Handle sqlite:/// (3 slashes for absolute path)
|
|
31
|
+
if path.startswith("/"):
|
|
32
|
+
path = path # Already absolute
|
|
33
|
+
return SQLiteStorage(path)
|
|
34
|
+
elif store_url.startswith("jsonl://"):
|
|
35
|
+
path = store_url.replace("jsonl://", "")
|
|
36
|
+
if path.startswith("/"):
|
|
37
|
+
path = path
|
|
38
|
+
return JSONLStorage(path)
|
|
39
|
+
else:
|
|
40
|
+
# Default to SQLite
|
|
41
|
+
return SQLiteStorage(store_url)
|