headroom-ai 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- headroom/__init__.py +212 -0
- headroom/cache/__init__.py +76 -0
- headroom/cache/anthropic.py +517 -0
- headroom/cache/base.py +342 -0
- headroom/cache/compression_feedback.py +613 -0
- headroom/cache/compression_store.py +814 -0
- headroom/cache/dynamic_detector.py +1026 -0
- headroom/cache/google.py +884 -0
- headroom/cache/openai.py +584 -0
- headroom/cache/registry.py +175 -0
- headroom/cache/semantic.py +451 -0
- headroom/ccr/__init__.py +77 -0
- headroom/ccr/context_tracker.py +582 -0
- headroom/ccr/mcp_server.py +319 -0
- headroom/ccr/response_handler.py +772 -0
- headroom/ccr/tool_injection.py +415 -0
- headroom/cli.py +219 -0
- headroom/client.py +977 -0
- headroom/compression/__init__.py +42 -0
- headroom/compression/detector.py +424 -0
- headroom/compression/handlers/__init__.py +22 -0
- headroom/compression/handlers/base.py +219 -0
- headroom/compression/handlers/code_handler.py +506 -0
- headroom/compression/handlers/json_handler.py +418 -0
- headroom/compression/masks.py +345 -0
- headroom/compression/universal.py +465 -0
- headroom/config.py +474 -0
- headroom/exceptions.py +192 -0
- headroom/integrations/__init__.py +159 -0
- headroom/integrations/agno/__init__.py +53 -0
- headroom/integrations/agno/hooks.py +345 -0
- headroom/integrations/agno/model.py +625 -0
- headroom/integrations/agno/providers.py +154 -0
- headroom/integrations/langchain/__init__.py +106 -0
- headroom/integrations/langchain/agents.py +326 -0
- headroom/integrations/langchain/chat_model.py +1002 -0
- headroom/integrations/langchain/langsmith.py +324 -0
- headroom/integrations/langchain/memory.py +319 -0
- headroom/integrations/langchain/providers.py +200 -0
- headroom/integrations/langchain/retriever.py +371 -0
- headroom/integrations/langchain/streaming.py +341 -0
- headroom/integrations/mcp/__init__.py +37 -0
- headroom/integrations/mcp/server.py +533 -0
- headroom/memory/__init__.py +37 -0
- headroom/memory/extractor.py +390 -0
- headroom/memory/fast_store.py +621 -0
- headroom/memory/fast_wrapper.py +311 -0
- headroom/memory/inline_extractor.py +229 -0
- headroom/memory/store.py +434 -0
- headroom/memory/worker.py +260 -0
- headroom/memory/wrapper.py +321 -0
- headroom/models/__init__.py +39 -0
- headroom/models/registry.py +687 -0
- headroom/parser.py +293 -0
- headroom/pricing/__init__.py +51 -0
- headroom/pricing/anthropic_prices.py +81 -0
- headroom/pricing/litellm_pricing.py +113 -0
- headroom/pricing/openai_prices.py +91 -0
- headroom/pricing/registry.py +188 -0
- headroom/providers/__init__.py +61 -0
- headroom/providers/anthropic.py +621 -0
- headroom/providers/base.py +131 -0
- headroom/providers/cohere.py +362 -0
- headroom/providers/google.py +427 -0
- headroom/providers/litellm.py +297 -0
- headroom/providers/openai.py +566 -0
- headroom/providers/openai_compatible.py +521 -0
- headroom/proxy/__init__.py +19 -0
- headroom/proxy/server.py +2683 -0
- headroom/py.typed +0 -0
- headroom/relevance/__init__.py +124 -0
- headroom/relevance/base.py +106 -0
- headroom/relevance/bm25.py +255 -0
- headroom/relevance/embedding.py +255 -0
- headroom/relevance/hybrid.py +259 -0
- headroom/reporting/__init__.py +5 -0
- headroom/reporting/generator.py +549 -0
- headroom/storage/__init__.py +41 -0
- headroom/storage/base.py +125 -0
- headroom/storage/jsonl.py +220 -0
- headroom/storage/sqlite.py +289 -0
- headroom/telemetry/__init__.py +91 -0
- headroom/telemetry/collector.py +764 -0
- headroom/telemetry/models.py +880 -0
- headroom/telemetry/toin.py +1579 -0
- headroom/tokenizer.py +80 -0
- headroom/tokenizers/__init__.py +75 -0
- headroom/tokenizers/base.py +210 -0
- headroom/tokenizers/estimator.py +198 -0
- headroom/tokenizers/huggingface.py +317 -0
- headroom/tokenizers/mistral.py +245 -0
- headroom/tokenizers/registry.py +398 -0
- headroom/tokenizers/tiktoken_counter.py +248 -0
- headroom/transforms/__init__.py +106 -0
- headroom/transforms/base.py +57 -0
- headroom/transforms/cache_aligner.py +357 -0
- headroom/transforms/code_compressor.py +1313 -0
- headroom/transforms/content_detector.py +335 -0
- headroom/transforms/content_router.py +1158 -0
- headroom/transforms/llmlingua_compressor.py +638 -0
- headroom/transforms/log_compressor.py +529 -0
- headroom/transforms/pipeline.py +297 -0
- headroom/transforms/rolling_window.py +350 -0
- headroom/transforms/search_compressor.py +365 -0
- headroom/transforms/smart_crusher.py +2682 -0
- headroom/transforms/text_compressor.py +259 -0
- headroom/transforms/tool_crusher.py +338 -0
- headroom/utils.py +215 -0
- headroom_ai-0.2.13.dist-info/METADATA +315 -0
- headroom_ai-0.2.13.dist-info/RECORD +114 -0
- headroom_ai-0.2.13.dist-info/WHEEL +4 -0
- headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
- headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
- headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
|
@@ -0,0 +1,415 @@
|
|
|
1
|
+
"""Tool injection for CCR (Compress-Cache-Retrieve).
|
|
2
|
+
|
|
3
|
+
This module provides the retrieval tool definition that gets injected into
|
|
4
|
+
LLM requests when compression occurs. The tool allows the LLM to retrieve
|
|
5
|
+
original uncompressed content if needed.
|
|
6
|
+
|
|
7
|
+
Two injection modes:
|
|
8
|
+
1. Tool Definition Injection: Adds a function tool to the tools array
|
|
9
|
+
2. System Message Injection: Adds instructions to the system message
|
|
10
|
+
|
|
11
|
+
The LLM can then call the tool or follow instructions to retrieve more data.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
import re
|
|
18
|
+
from dataclasses import dataclass, field
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
# Tool name constant - used for matching tool calls
|
|
22
|
+
CCR_TOOL_NAME = "headroom_retrieve"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def create_ccr_tool_definition(
|
|
26
|
+
provider: str = "anthropic",
|
|
27
|
+
) -> dict[str, Any]:
|
|
28
|
+
"""Create the CCR retrieval tool definition.
|
|
29
|
+
|
|
30
|
+
This tool definition is injected into the request's tools array when
|
|
31
|
+
compression occurs. The LLM can call this tool to retrieve original
|
|
32
|
+
uncompressed content.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
provider: The provider type ("anthropic", "openai", "google").
|
|
36
|
+
Affects the tool definition format.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Tool definition dict in the appropriate format.
|
|
40
|
+
"""
|
|
41
|
+
# Base tool definition (OpenAI format)
|
|
42
|
+
openai_definition = {
|
|
43
|
+
"type": "function",
|
|
44
|
+
"function": {
|
|
45
|
+
"name": CCR_TOOL_NAME,
|
|
46
|
+
"description": (
|
|
47
|
+
"Retrieve original uncompressed content that was compressed to save tokens. "
|
|
48
|
+
"Use this when you need more data than what's shown in compressed tool results. "
|
|
49
|
+
"The hash is provided in compression markers like [N items compressed... hash=abc123]."
|
|
50
|
+
),
|
|
51
|
+
"parameters": {
|
|
52
|
+
"type": "object",
|
|
53
|
+
"properties": {
|
|
54
|
+
"hash": {
|
|
55
|
+
"type": "string",
|
|
56
|
+
"description": "Hash key from the compression marker (e.g., 'abc123' from hash=abc123)",
|
|
57
|
+
},
|
|
58
|
+
"query": {
|
|
59
|
+
"type": "string",
|
|
60
|
+
"description": (
|
|
61
|
+
"Optional search query to filter results. "
|
|
62
|
+
"If provided, only returns items matching the query. "
|
|
63
|
+
"If omitted, returns all original items."
|
|
64
|
+
),
|
|
65
|
+
},
|
|
66
|
+
},
|
|
67
|
+
"required": ["hash"],
|
|
68
|
+
},
|
|
69
|
+
},
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
if provider == "openai":
|
|
73
|
+
return openai_definition
|
|
74
|
+
|
|
75
|
+
elif provider == "anthropic":
|
|
76
|
+
# Anthropic uses a slightly different format
|
|
77
|
+
return {
|
|
78
|
+
"name": CCR_TOOL_NAME,
|
|
79
|
+
"description": (
|
|
80
|
+
"Retrieve original uncompressed content that was compressed to save tokens. "
|
|
81
|
+
"Use this when you need more data than what's shown in compressed tool results. "
|
|
82
|
+
"The hash is provided in compression markers like [N items compressed... hash=abc123]."
|
|
83
|
+
),
|
|
84
|
+
"input_schema": {
|
|
85
|
+
"type": "object",
|
|
86
|
+
"properties": {
|
|
87
|
+
"hash": {
|
|
88
|
+
"type": "string",
|
|
89
|
+
"description": "Hash key from the compression marker (e.g., 'abc123' from hash=abc123)",
|
|
90
|
+
},
|
|
91
|
+
"query": {
|
|
92
|
+
"type": "string",
|
|
93
|
+
"description": (
|
|
94
|
+
"Optional search query to filter results. "
|
|
95
|
+
"If provided, only returns items matching the query. "
|
|
96
|
+
"If omitted, returns all original items."
|
|
97
|
+
),
|
|
98
|
+
},
|
|
99
|
+
},
|
|
100
|
+
"required": ["hash"],
|
|
101
|
+
},
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
elif provider == "google":
|
|
105
|
+
# Google/Gemini format
|
|
106
|
+
return {
|
|
107
|
+
"name": CCR_TOOL_NAME,
|
|
108
|
+
"description": (
|
|
109
|
+
"Retrieve original uncompressed content that was compressed to save tokens. "
|
|
110
|
+
"Use this when you need more data than what's shown in compressed tool results."
|
|
111
|
+
),
|
|
112
|
+
"parameters": {
|
|
113
|
+
"type": "object",
|
|
114
|
+
"properties": {
|
|
115
|
+
"hash": {
|
|
116
|
+
"type": "string",
|
|
117
|
+
"description": "Hash key from the compression marker",
|
|
118
|
+
},
|
|
119
|
+
"query": {
|
|
120
|
+
"type": "string",
|
|
121
|
+
"description": "Optional search query to filter results",
|
|
122
|
+
},
|
|
123
|
+
},
|
|
124
|
+
"required": ["hash"],
|
|
125
|
+
},
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
else:
|
|
129
|
+
# Default to OpenAI format
|
|
130
|
+
return openai_definition
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def create_system_instructions(
|
|
134
|
+
hashes: list[str],
|
|
135
|
+
retrieval_endpoint: str = "/v1/retrieve",
|
|
136
|
+
) -> str:
|
|
137
|
+
"""Create system message instructions for CCR retrieval.
|
|
138
|
+
|
|
139
|
+
This is an alternative to tool injection - adds instructions to the
|
|
140
|
+
system message telling the LLM how to retrieve compressed data.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
hashes: List of hash keys for compressed content in this context.
|
|
144
|
+
retrieval_endpoint: The endpoint path for retrieval.
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
Instruction text to append to system message.
|
|
148
|
+
"""
|
|
149
|
+
hash_list = ", ".join(hashes) if len(hashes) <= 5 else f"{', '.join(hashes[:5])} ..."
|
|
150
|
+
|
|
151
|
+
return f"""
|
|
152
|
+
## Compressed Context Available
|
|
153
|
+
|
|
154
|
+
Some tool outputs have been compressed to reduce context size. If you need
|
|
155
|
+
the full uncompressed data, you can retrieve it using the `{CCR_TOOL_NAME}` tool.
|
|
156
|
+
|
|
157
|
+
**How to retrieve:**
|
|
158
|
+
- Call `{CCR_TOOL_NAME}(hash="<hash>")` to get all original items
|
|
159
|
+
- Call `{CCR_TOOL_NAME}(hash="<hash>", query="search terms")` to search within
|
|
160
|
+
|
|
161
|
+
**Available hashes:** {hash_list}
|
|
162
|
+
|
|
163
|
+
Look for markers like `[N items compressed to M. Retrieve more: hash=abc123]`
|
|
164
|
+
in tool results to find the hash for each compressed output.
|
|
165
|
+
"""
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
@dataclass
|
|
169
|
+
class CCRToolInjector:
|
|
170
|
+
"""Manages CCR tool injection into LLM requests.
|
|
171
|
+
|
|
172
|
+
This class handles:
|
|
173
|
+
1. Detecting compression markers in messages
|
|
174
|
+
2. Injecting the retrieval tool definition
|
|
175
|
+
3. Adding system message instructions
|
|
176
|
+
4. Tracking which hashes are available
|
|
177
|
+
|
|
178
|
+
Usage:
|
|
179
|
+
injector = CCRToolInjector(provider="anthropic")
|
|
180
|
+
|
|
181
|
+
# Process messages to detect compression markers
|
|
182
|
+
injector.scan_for_markers(messages)
|
|
183
|
+
|
|
184
|
+
# Inject tool if compression was detected
|
|
185
|
+
if injector.has_compressed_content:
|
|
186
|
+
tools = injector.inject_tool(tools)
|
|
187
|
+
messages = injector.inject_system_instructions(messages)
|
|
188
|
+
"""
|
|
189
|
+
|
|
190
|
+
provider: str = "anthropic"
|
|
191
|
+
inject_tool: bool = True
|
|
192
|
+
inject_system_instructions: bool = True
|
|
193
|
+
retrieval_endpoint: str = "/v1/retrieve"
|
|
194
|
+
|
|
195
|
+
# Detected compression markers
|
|
196
|
+
_detected_hashes: list[str] = field(default_factory=list)
|
|
197
|
+
_marker_pattern: re.Pattern = field(
|
|
198
|
+
default_factory=lambda: re.compile(
|
|
199
|
+
r"\[(\d+) items compressed to (\d+)\. Retrieve more: hash=([a-f0-9]+)\]"
|
|
200
|
+
)
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
def __post_init__(self) -> None:
|
|
204
|
+
# Reset detected hashes
|
|
205
|
+
self._detected_hashes = []
|
|
206
|
+
|
|
207
|
+
@property
|
|
208
|
+
def has_compressed_content(self) -> bool:
|
|
209
|
+
"""Check if any compressed content was detected."""
|
|
210
|
+
return len(self._detected_hashes) > 0
|
|
211
|
+
|
|
212
|
+
@property
|
|
213
|
+
def detected_hashes(self) -> list[str]:
|
|
214
|
+
"""Get list of detected compression hashes."""
|
|
215
|
+
return self._detected_hashes.copy()
|
|
216
|
+
|
|
217
|
+
def scan_for_markers(self, messages: list[dict[str, Any]]) -> list[str]:
|
|
218
|
+
"""Scan messages for compression markers and extract hashes.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
messages: List of messages to scan.
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
List of detected hash keys.
|
|
225
|
+
"""
|
|
226
|
+
self._detected_hashes = []
|
|
227
|
+
|
|
228
|
+
for message in messages:
|
|
229
|
+
content = message.get("content", "")
|
|
230
|
+
|
|
231
|
+
# Handle string content
|
|
232
|
+
if isinstance(content, str):
|
|
233
|
+
self._scan_text(content)
|
|
234
|
+
|
|
235
|
+
# Handle list content (Anthropic format with content blocks)
|
|
236
|
+
elif isinstance(content, list):
|
|
237
|
+
for block in content:
|
|
238
|
+
if isinstance(block, dict):
|
|
239
|
+
# Text blocks
|
|
240
|
+
if block.get("type") == "text":
|
|
241
|
+
self._scan_text(block.get("text", ""))
|
|
242
|
+
# Tool result blocks
|
|
243
|
+
elif block.get("type") == "tool_result":
|
|
244
|
+
tool_content = block.get("content", "")
|
|
245
|
+
if isinstance(tool_content, str):
|
|
246
|
+
self._scan_text(tool_content)
|
|
247
|
+
elif isinstance(tool_content, list):
|
|
248
|
+
for item in tool_content:
|
|
249
|
+
if isinstance(item, dict) and item.get("type") == "text":
|
|
250
|
+
self._scan_text(item.get("text", ""))
|
|
251
|
+
|
|
252
|
+
return self._detected_hashes
|
|
253
|
+
|
|
254
|
+
def _scan_text(self, text: str) -> None:
|
|
255
|
+
"""Scan text for compression markers."""
|
|
256
|
+
matches = self._marker_pattern.findall(text)
|
|
257
|
+
for _original, _compressed, hash_key in matches:
|
|
258
|
+
if hash_key not in self._detected_hashes:
|
|
259
|
+
self._detected_hashes.append(hash_key)
|
|
260
|
+
|
|
261
|
+
def inject_tool_definition(
|
|
262
|
+
self,
|
|
263
|
+
tools: list[dict[str, Any]] | None,
|
|
264
|
+
) -> tuple[list[dict[str, Any]], bool]:
|
|
265
|
+
"""Inject CCR retrieval tool into tools list.
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
tools: Existing tools list (may be None or empty).
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
Tuple of (updated_tools, was_injected).
|
|
272
|
+
was_injected is False if tool was already present (e.g., from MCP).
|
|
273
|
+
"""
|
|
274
|
+
if not self.inject_tool or not self.has_compressed_content:
|
|
275
|
+
return tools or [], False
|
|
276
|
+
|
|
277
|
+
tools = tools or []
|
|
278
|
+
|
|
279
|
+
# Check if already present (e.g., from MCP server)
|
|
280
|
+
for tool in tools:
|
|
281
|
+
tool_name = tool.get("name") or tool.get("function", {}).get("name")
|
|
282
|
+
if tool_name == CCR_TOOL_NAME:
|
|
283
|
+
return tools, False # Already present, skip injection
|
|
284
|
+
|
|
285
|
+
# Add CCR tool
|
|
286
|
+
ccr_tool = create_ccr_tool_definition(self.provider)
|
|
287
|
+
return tools + [ccr_tool], True
|
|
288
|
+
|
|
289
|
+
def inject_into_system_message(
|
|
290
|
+
self,
|
|
291
|
+
messages: list[dict[str, Any]],
|
|
292
|
+
) -> list[dict[str, Any]]:
|
|
293
|
+
"""Inject retrieval instructions into system message.
|
|
294
|
+
|
|
295
|
+
Args:
|
|
296
|
+
messages: List of messages.
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
Updated messages with instructions added to system message.
|
|
300
|
+
"""
|
|
301
|
+
if not self.inject_system_instructions or not self.has_compressed_content:
|
|
302
|
+
return messages
|
|
303
|
+
|
|
304
|
+
instructions = create_system_instructions(
|
|
305
|
+
self._detected_hashes,
|
|
306
|
+
self.retrieval_endpoint,
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
# Find and update system message
|
|
310
|
+
updated_messages = []
|
|
311
|
+
system_found = False
|
|
312
|
+
|
|
313
|
+
for message in messages:
|
|
314
|
+
if message.get("role") == "system" and not system_found:
|
|
315
|
+
system_found = True
|
|
316
|
+
content = message.get("content", "")
|
|
317
|
+
|
|
318
|
+
# Don't add if already present
|
|
319
|
+
if "Compressed Context Available" in content:
|
|
320
|
+
updated_messages.append(message)
|
|
321
|
+
else:
|
|
322
|
+
# Append instructions
|
|
323
|
+
if isinstance(content, str):
|
|
324
|
+
updated_messages.append(
|
|
325
|
+
{
|
|
326
|
+
**message,
|
|
327
|
+
"content": content + instructions,
|
|
328
|
+
}
|
|
329
|
+
)
|
|
330
|
+
else:
|
|
331
|
+
# Handle structured content
|
|
332
|
+
updated_messages.append(message)
|
|
333
|
+
else:
|
|
334
|
+
updated_messages.append(message)
|
|
335
|
+
|
|
336
|
+
# If no system message, prepend one
|
|
337
|
+
if not system_found:
|
|
338
|
+
updated_messages.insert(
|
|
339
|
+
0,
|
|
340
|
+
{
|
|
341
|
+
"role": "system",
|
|
342
|
+
"content": instructions.strip(),
|
|
343
|
+
},
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
return updated_messages
|
|
347
|
+
|
|
348
|
+
def process_request(
|
|
349
|
+
self,
|
|
350
|
+
messages: list[dict[str, Any]],
|
|
351
|
+
tools: list[dict[str, Any]] | None = None,
|
|
352
|
+
) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None, bool]:
|
|
353
|
+
"""Process a request, scanning for markers and injecting as needed.
|
|
354
|
+
|
|
355
|
+
This is a convenience method that does:
|
|
356
|
+
1. Scan messages for compression markers
|
|
357
|
+
2. Inject tool definition if enabled (skipped if already present from MCP)
|
|
358
|
+
3. Inject system instructions if enabled
|
|
359
|
+
|
|
360
|
+
Args:
|
|
361
|
+
messages: Request messages.
|
|
362
|
+
tools: Request tools (may be None).
|
|
363
|
+
|
|
364
|
+
Returns:
|
|
365
|
+
Tuple of (updated_messages, updated_tools, tool_was_injected).
|
|
366
|
+
tool_was_injected is False if tool was already present (e.g., from MCP).
|
|
367
|
+
"""
|
|
368
|
+
self.scan_for_markers(messages)
|
|
369
|
+
|
|
370
|
+
if not self.has_compressed_content:
|
|
371
|
+
return messages, tools, False
|
|
372
|
+
|
|
373
|
+
updated_tools, was_injected = self.inject_tool_definition(tools)
|
|
374
|
+
updated_messages = self.inject_into_system_message(messages)
|
|
375
|
+
|
|
376
|
+
return updated_messages, updated_tools if updated_tools else None, was_injected
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def parse_tool_call(
|
|
380
|
+
tool_call: dict[str, Any],
|
|
381
|
+
provider: str = "anthropic",
|
|
382
|
+
) -> tuple[str | None, str | None]:
|
|
383
|
+
"""Parse a CCR tool call to extract hash and query.
|
|
384
|
+
|
|
385
|
+
Args:
|
|
386
|
+
tool_call: The tool call object from the LLM response.
|
|
387
|
+
provider: The provider type for format detection.
|
|
388
|
+
|
|
389
|
+
Returns:
|
|
390
|
+
Tuple of (hash, query) or (None, None) if not a CCR tool call.
|
|
391
|
+
"""
|
|
392
|
+
# Get tool name
|
|
393
|
+
if provider == "anthropic":
|
|
394
|
+
name = tool_call.get("name")
|
|
395
|
+
input_data = tool_call.get("input", {})
|
|
396
|
+
elif provider == "openai":
|
|
397
|
+
function = tool_call.get("function", {})
|
|
398
|
+
name = function.get("name")
|
|
399
|
+
# OpenAI passes args as JSON string
|
|
400
|
+
args_str = function.get("arguments", "{}")
|
|
401
|
+
try:
|
|
402
|
+
input_data = json.loads(args_str)
|
|
403
|
+
except json.JSONDecodeError:
|
|
404
|
+
input_data = {}
|
|
405
|
+
else:
|
|
406
|
+
name = tool_call.get("name")
|
|
407
|
+
input_data = tool_call.get("input", tool_call.get("args", {}))
|
|
408
|
+
|
|
409
|
+
if name != CCR_TOOL_NAME:
|
|
410
|
+
return None, None
|
|
411
|
+
|
|
412
|
+
hash_key = input_data.get("hash")
|
|
413
|
+
query = input_data.get("query")
|
|
414
|
+
|
|
415
|
+
return hash_key, query
|
headroom/cli.py
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Headroom CLI - The Context Optimization Layer for LLM Applications.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
headroom proxy [OPTIONS] Start the optimization proxy server
|
|
6
|
+
headroom --version Show version
|
|
7
|
+
headroom --help Show this help message
|
|
8
|
+
|
|
9
|
+
Examples:
|
|
10
|
+
# Start proxy on default port (8787)
|
|
11
|
+
headroom proxy
|
|
12
|
+
|
|
13
|
+
# Start proxy on custom port
|
|
14
|
+
headroom proxy --port 8080
|
|
15
|
+
|
|
16
|
+
# Start with optimization disabled (passthrough mode)
|
|
17
|
+
headroom proxy --no-optimize
|
|
18
|
+
|
|
19
|
+
# Use with Claude Code
|
|
20
|
+
ANTHROPIC_BASE_URL=http://localhost:8787 claude
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import argparse
|
|
26
|
+
import sys
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def get_version() -> str:
|
|
30
|
+
"""Get the current version."""
|
|
31
|
+
try:
|
|
32
|
+
from headroom import __version__
|
|
33
|
+
|
|
34
|
+
return __version__
|
|
35
|
+
except ImportError:
|
|
36
|
+
return "unknown"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def cmd_proxy(args: argparse.Namespace) -> int:
|
|
40
|
+
"""Start the proxy server."""
|
|
41
|
+
try:
|
|
42
|
+
from headroom.proxy.server import ProxyConfig, run_server
|
|
43
|
+
except ImportError as e:
|
|
44
|
+
print("Error: Proxy dependencies not installed. Run: pip install headroom[proxy]")
|
|
45
|
+
print(f"Details: {e}")
|
|
46
|
+
return 1
|
|
47
|
+
|
|
48
|
+
config = ProxyConfig(
|
|
49
|
+
host=args.host,
|
|
50
|
+
port=args.port,
|
|
51
|
+
optimize=not args.no_optimize,
|
|
52
|
+
cache_enabled=not args.no_cache,
|
|
53
|
+
rate_limit_enabled=not args.no_rate_limit,
|
|
54
|
+
log_file=args.log_file,
|
|
55
|
+
budget_limit_usd=args.budget,
|
|
56
|
+
# LLMLingua: ON by default (use --no-llmlingua to disable)
|
|
57
|
+
llmlingua_enabled=not args.no_llmlingua,
|
|
58
|
+
llmlingua_device=args.llmlingua_device,
|
|
59
|
+
llmlingua_target_rate=args.llmlingua_rate,
|
|
60
|
+
# Code-aware: ON by default (use --no-code-aware to disable)
|
|
61
|
+
code_aware_enabled=not args.no_code_aware,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
print(f"""
|
|
65
|
+
╔═══════════════════════════════════════════════════════════════════════╗
|
|
66
|
+
║ HEADROOM PROXY ║
|
|
67
|
+
║ The Context Optimization Layer for LLM Applications ║
|
|
68
|
+
╚═══════════════════════════════════════════════════════════════════════╝
|
|
69
|
+
|
|
70
|
+
Starting proxy server...
|
|
71
|
+
|
|
72
|
+
URL: http://{config.host}:{config.port}
|
|
73
|
+
Optimization: {"ENABLED" if config.optimize else "DISABLED"}
|
|
74
|
+
Caching: {"ENABLED" if config.cache_enabled else "DISABLED"}
|
|
75
|
+
Rate Limit: {"ENABLED" if config.rate_limit_enabled else "DISABLED"}
|
|
76
|
+
|
|
77
|
+
Usage with Claude Code:
|
|
78
|
+
ANTHROPIC_BASE_URL=http://{config.host}:{config.port} claude
|
|
79
|
+
|
|
80
|
+
Usage with OpenAI-compatible clients:
|
|
81
|
+
OPENAI_BASE_URL=http://{config.host}:{config.port}/v1 your-app
|
|
82
|
+
|
|
83
|
+
Endpoints:
|
|
84
|
+
GET /health Health check
|
|
85
|
+
GET /stats Detailed statistics
|
|
86
|
+
GET /metrics Prometheus metrics
|
|
87
|
+
POST /v1/messages Anthropic API
|
|
88
|
+
POST /v1/chat/completions OpenAI API
|
|
89
|
+
|
|
90
|
+
Press Ctrl+C to stop.
|
|
91
|
+
""")
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
run_server(config)
|
|
95
|
+
except KeyboardInterrupt:
|
|
96
|
+
print("\nShutting down...")
|
|
97
|
+
return 0
|
|
98
|
+
|
|
99
|
+
return 0
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def cmd_version(args: argparse.Namespace) -> int:
|
|
103
|
+
"""Print version information."""
|
|
104
|
+
print(f"headroom {get_version()}")
|
|
105
|
+
return 0
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def main(argv: list[str] | None = None) -> int:
|
|
109
|
+
"""Main CLI entry point."""
|
|
110
|
+
parser = argparse.ArgumentParser(
|
|
111
|
+
prog="headroom",
|
|
112
|
+
description="The Context Optimization Layer for LLM Applications",
|
|
113
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
114
|
+
epilog="""
|
|
115
|
+
Examples:
|
|
116
|
+
headroom proxy Start proxy on port 8787
|
|
117
|
+
headroom proxy --port 8080 Start proxy on port 8080
|
|
118
|
+
headroom proxy --no-optimize Passthrough mode (no optimization)
|
|
119
|
+
|
|
120
|
+
Environment Variables:
|
|
121
|
+
ANTHROPIC_API_KEY Your Anthropic API key (for proxying)
|
|
122
|
+
OPENAI_API_KEY Your OpenAI API key (for proxying)
|
|
123
|
+
|
|
124
|
+
Documentation: https://github.com/headroom-sdk/headroom
|
|
125
|
+
""",
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
parser.add_argument(
|
|
129
|
+
"--version",
|
|
130
|
+
"-V",
|
|
131
|
+
action="store_true",
|
|
132
|
+
help="Show version and exit",
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
subparsers = parser.add_subparsers(dest="command", help="Commands")
|
|
136
|
+
|
|
137
|
+
# Proxy command
|
|
138
|
+
proxy_parser = subparsers.add_parser(
|
|
139
|
+
"proxy",
|
|
140
|
+
help="Start the optimization proxy server",
|
|
141
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
142
|
+
)
|
|
143
|
+
proxy_parser.add_argument(
|
|
144
|
+
"--host",
|
|
145
|
+
default="127.0.0.1",
|
|
146
|
+
help="Host to bind to (default: 127.0.0.1)",
|
|
147
|
+
)
|
|
148
|
+
proxy_parser.add_argument(
|
|
149
|
+
"--port",
|
|
150
|
+
"-p",
|
|
151
|
+
type=int,
|
|
152
|
+
default=8787,
|
|
153
|
+
help="Port to bind to (default: 8787)",
|
|
154
|
+
)
|
|
155
|
+
proxy_parser.add_argument(
|
|
156
|
+
"--no-optimize",
|
|
157
|
+
action="store_true",
|
|
158
|
+
help="Disable optimization (passthrough mode)",
|
|
159
|
+
)
|
|
160
|
+
proxy_parser.add_argument(
|
|
161
|
+
"--no-cache",
|
|
162
|
+
action="store_true",
|
|
163
|
+
help="Disable semantic caching",
|
|
164
|
+
)
|
|
165
|
+
proxy_parser.add_argument(
|
|
166
|
+
"--no-rate-limit",
|
|
167
|
+
action="store_true",
|
|
168
|
+
help="Disable rate limiting",
|
|
169
|
+
)
|
|
170
|
+
proxy_parser.add_argument(
|
|
171
|
+
"--log-file",
|
|
172
|
+
help="Path to JSONL log file",
|
|
173
|
+
)
|
|
174
|
+
proxy_parser.add_argument(
|
|
175
|
+
"--budget",
|
|
176
|
+
type=float,
|
|
177
|
+
help="Daily budget limit in USD",
|
|
178
|
+
)
|
|
179
|
+
# LLMLingua ML-based compression (ON by default if installed)
|
|
180
|
+
proxy_parser.add_argument(
|
|
181
|
+
"--no-llmlingua",
|
|
182
|
+
action="store_true",
|
|
183
|
+
help="Disable LLMLingua-2 ML-based compression",
|
|
184
|
+
)
|
|
185
|
+
proxy_parser.add_argument(
|
|
186
|
+
"--llmlingua-device",
|
|
187
|
+
choices=["auto", "cuda", "cpu", "mps"],
|
|
188
|
+
default="auto",
|
|
189
|
+
help="Device for LLMLingua model (default: auto)",
|
|
190
|
+
)
|
|
191
|
+
proxy_parser.add_argument(
|
|
192
|
+
"--llmlingua-rate",
|
|
193
|
+
type=float,
|
|
194
|
+
default=0.3,
|
|
195
|
+
help="LLMLingua compression rate 0.0-1.0 (default: 0.3 = keep 30%%)",
|
|
196
|
+
)
|
|
197
|
+
# Code-aware compression (ON by default if installed)
|
|
198
|
+
proxy_parser.add_argument(
|
|
199
|
+
"--no-code-aware",
|
|
200
|
+
action="store_true",
|
|
201
|
+
help="Disable AST-based code compression",
|
|
202
|
+
)
|
|
203
|
+
proxy_parser.set_defaults(func=cmd_proxy)
|
|
204
|
+
|
|
205
|
+
args = parser.parse_args(argv)
|
|
206
|
+
|
|
207
|
+
if args.version:
|
|
208
|
+
return cmd_version(args)
|
|
209
|
+
|
|
210
|
+
if args.command is None:
|
|
211
|
+
parser.print_help()
|
|
212
|
+
return 0
|
|
213
|
+
|
|
214
|
+
result = args.func(args)
|
|
215
|
+
return int(result) if result is not None else 0
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
if __name__ == "__main__":
|
|
219
|
+
sys.exit(main())
|