headroom-ai 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. headroom/__init__.py +212 -0
  2. headroom/cache/__init__.py +76 -0
  3. headroom/cache/anthropic.py +517 -0
  4. headroom/cache/base.py +342 -0
  5. headroom/cache/compression_feedback.py +613 -0
  6. headroom/cache/compression_store.py +814 -0
  7. headroom/cache/dynamic_detector.py +1026 -0
  8. headroom/cache/google.py +884 -0
  9. headroom/cache/openai.py +584 -0
  10. headroom/cache/registry.py +175 -0
  11. headroom/cache/semantic.py +451 -0
  12. headroom/ccr/__init__.py +77 -0
  13. headroom/ccr/context_tracker.py +582 -0
  14. headroom/ccr/mcp_server.py +319 -0
  15. headroom/ccr/response_handler.py +772 -0
  16. headroom/ccr/tool_injection.py +415 -0
  17. headroom/cli.py +219 -0
  18. headroom/client.py +977 -0
  19. headroom/compression/__init__.py +42 -0
  20. headroom/compression/detector.py +424 -0
  21. headroom/compression/handlers/__init__.py +22 -0
  22. headroom/compression/handlers/base.py +219 -0
  23. headroom/compression/handlers/code_handler.py +506 -0
  24. headroom/compression/handlers/json_handler.py +418 -0
  25. headroom/compression/masks.py +345 -0
  26. headroom/compression/universal.py +465 -0
  27. headroom/config.py +474 -0
  28. headroom/exceptions.py +192 -0
  29. headroom/integrations/__init__.py +159 -0
  30. headroom/integrations/agno/__init__.py +53 -0
  31. headroom/integrations/agno/hooks.py +345 -0
  32. headroom/integrations/agno/model.py +625 -0
  33. headroom/integrations/agno/providers.py +154 -0
  34. headroom/integrations/langchain/__init__.py +106 -0
  35. headroom/integrations/langchain/agents.py +326 -0
  36. headroom/integrations/langchain/chat_model.py +1002 -0
  37. headroom/integrations/langchain/langsmith.py +324 -0
  38. headroom/integrations/langchain/memory.py +319 -0
  39. headroom/integrations/langchain/providers.py +200 -0
  40. headroom/integrations/langchain/retriever.py +371 -0
  41. headroom/integrations/langchain/streaming.py +341 -0
  42. headroom/integrations/mcp/__init__.py +37 -0
  43. headroom/integrations/mcp/server.py +533 -0
  44. headroom/memory/__init__.py +37 -0
  45. headroom/memory/extractor.py +390 -0
  46. headroom/memory/fast_store.py +621 -0
  47. headroom/memory/fast_wrapper.py +311 -0
  48. headroom/memory/inline_extractor.py +229 -0
  49. headroom/memory/store.py +434 -0
  50. headroom/memory/worker.py +260 -0
  51. headroom/memory/wrapper.py +321 -0
  52. headroom/models/__init__.py +39 -0
  53. headroom/models/registry.py +687 -0
  54. headroom/parser.py +293 -0
  55. headroom/pricing/__init__.py +51 -0
  56. headroom/pricing/anthropic_prices.py +81 -0
  57. headroom/pricing/litellm_pricing.py +113 -0
  58. headroom/pricing/openai_prices.py +91 -0
  59. headroom/pricing/registry.py +188 -0
  60. headroom/providers/__init__.py +61 -0
  61. headroom/providers/anthropic.py +621 -0
  62. headroom/providers/base.py +131 -0
  63. headroom/providers/cohere.py +362 -0
  64. headroom/providers/google.py +427 -0
  65. headroom/providers/litellm.py +297 -0
  66. headroom/providers/openai.py +566 -0
  67. headroom/providers/openai_compatible.py +521 -0
  68. headroom/proxy/__init__.py +19 -0
  69. headroom/proxy/server.py +2683 -0
  70. headroom/py.typed +0 -0
  71. headroom/relevance/__init__.py +124 -0
  72. headroom/relevance/base.py +106 -0
  73. headroom/relevance/bm25.py +255 -0
  74. headroom/relevance/embedding.py +255 -0
  75. headroom/relevance/hybrid.py +259 -0
  76. headroom/reporting/__init__.py +5 -0
  77. headroom/reporting/generator.py +549 -0
  78. headroom/storage/__init__.py +41 -0
  79. headroom/storage/base.py +125 -0
  80. headroom/storage/jsonl.py +220 -0
  81. headroom/storage/sqlite.py +289 -0
  82. headroom/telemetry/__init__.py +91 -0
  83. headroom/telemetry/collector.py +764 -0
  84. headroom/telemetry/models.py +880 -0
  85. headroom/telemetry/toin.py +1579 -0
  86. headroom/tokenizer.py +80 -0
  87. headroom/tokenizers/__init__.py +75 -0
  88. headroom/tokenizers/base.py +210 -0
  89. headroom/tokenizers/estimator.py +198 -0
  90. headroom/tokenizers/huggingface.py +317 -0
  91. headroom/tokenizers/mistral.py +245 -0
  92. headroom/tokenizers/registry.py +398 -0
  93. headroom/tokenizers/tiktoken_counter.py +248 -0
  94. headroom/transforms/__init__.py +106 -0
  95. headroom/transforms/base.py +57 -0
  96. headroom/transforms/cache_aligner.py +357 -0
  97. headroom/transforms/code_compressor.py +1313 -0
  98. headroom/transforms/content_detector.py +335 -0
  99. headroom/transforms/content_router.py +1158 -0
  100. headroom/transforms/llmlingua_compressor.py +638 -0
  101. headroom/transforms/log_compressor.py +529 -0
  102. headroom/transforms/pipeline.py +297 -0
  103. headroom/transforms/rolling_window.py +350 -0
  104. headroom/transforms/search_compressor.py +365 -0
  105. headroom/transforms/smart_crusher.py +2682 -0
  106. headroom/transforms/text_compressor.py +259 -0
  107. headroom/transforms/tool_crusher.py +338 -0
  108. headroom/utils.py +215 -0
  109. headroom_ai-0.2.13.dist-info/METADATA +315 -0
  110. headroom_ai-0.2.13.dist-info/RECORD +114 -0
  111. headroom_ai-0.2.13.dist-info/WHEEL +4 -0
  112. headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
  113. headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
  114. headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
@@ -0,0 +1,415 @@
1
+ """Tool injection for CCR (Compress-Cache-Retrieve).
2
+
3
+ This module provides the retrieval tool definition that gets injected into
4
+ LLM requests when compression occurs. The tool allows the LLM to retrieve
5
+ original uncompressed content if needed.
6
+
7
+ Two injection modes:
8
+ 1. Tool Definition Injection: Adds a function tool to the tools array
9
+ 2. System Message Injection: Adds instructions to the system message
10
+
11
+ The LLM can then call the tool or follow instructions to retrieve more data.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import re
18
+ from dataclasses import dataclass, field
19
+ from typing import Any
20
+
21
+ # Tool name constant - used for matching tool calls
22
+ CCR_TOOL_NAME = "headroom_retrieve"
23
+
24
+
25
+ def create_ccr_tool_definition(
26
+ provider: str = "anthropic",
27
+ ) -> dict[str, Any]:
28
+ """Create the CCR retrieval tool definition.
29
+
30
+ This tool definition is injected into the request's tools array when
31
+ compression occurs. The LLM can call this tool to retrieve original
32
+ uncompressed content.
33
+
34
+ Args:
35
+ provider: The provider type ("anthropic", "openai", "google").
36
+ Affects the tool definition format.
37
+
38
+ Returns:
39
+ Tool definition dict in the appropriate format.
40
+ """
41
+ # Base tool definition (OpenAI format)
42
+ openai_definition = {
43
+ "type": "function",
44
+ "function": {
45
+ "name": CCR_TOOL_NAME,
46
+ "description": (
47
+ "Retrieve original uncompressed content that was compressed to save tokens. "
48
+ "Use this when you need more data than what's shown in compressed tool results. "
49
+ "The hash is provided in compression markers like [N items compressed... hash=abc123]."
50
+ ),
51
+ "parameters": {
52
+ "type": "object",
53
+ "properties": {
54
+ "hash": {
55
+ "type": "string",
56
+ "description": "Hash key from the compression marker (e.g., 'abc123' from hash=abc123)",
57
+ },
58
+ "query": {
59
+ "type": "string",
60
+ "description": (
61
+ "Optional search query to filter results. "
62
+ "If provided, only returns items matching the query. "
63
+ "If omitted, returns all original items."
64
+ ),
65
+ },
66
+ },
67
+ "required": ["hash"],
68
+ },
69
+ },
70
+ }
71
+
72
+ if provider == "openai":
73
+ return openai_definition
74
+
75
+ elif provider == "anthropic":
76
+ # Anthropic uses a slightly different format
77
+ return {
78
+ "name": CCR_TOOL_NAME,
79
+ "description": (
80
+ "Retrieve original uncompressed content that was compressed to save tokens. "
81
+ "Use this when you need more data than what's shown in compressed tool results. "
82
+ "The hash is provided in compression markers like [N items compressed... hash=abc123]."
83
+ ),
84
+ "input_schema": {
85
+ "type": "object",
86
+ "properties": {
87
+ "hash": {
88
+ "type": "string",
89
+ "description": "Hash key from the compression marker (e.g., 'abc123' from hash=abc123)",
90
+ },
91
+ "query": {
92
+ "type": "string",
93
+ "description": (
94
+ "Optional search query to filter results. "
95
+ "If provided, only returns items matching the query. "
96
+ "If omitted, returns all original items."
97
+ ),
98
+ },
99
+ },
100
+ "required": ["hash"],
101
+ },
102
+ }
103
+
104
+ elif provider == "google":
105
+ # Google/Gemini format
106
+ return {
107
+ "name": CCR_TOOL_NAME,
108
+ "description": (
109
+ "Retrieve original uncompressed content that was compressed to save tokens. "
110
+ "Use this when you need more data than what's shown in compressed tool results."
111
+ ),
112
+ "parameters": {
113
+ "type": "object",
114
+ "properties": {
115
+ "hash": {
116
+ "type": "string",
117
+ "description": "Hash key from the compression marker",
118
+ },
119
+ "query": {
120
+ "type": "string",
121
+ "description": "Optional search query to filter results",
122
+ },
123
+ },
124
+ "required": ["hash"],
125
+ },
126
+ }
127
+
128
+ else:
129
+ # Default to OpenAI format
130
+ return openai_definition
131
+
132
+
133
+ def create_system_instructions(
134
+ hashes: list[str],
135
+ retrieval_endpoint: str = "/v1/retrieve",
136
+ ) -> str:
137
+ """Create system message instructions for CCR retrieval.
138
+
139
+ This is an alternative to tool injection - adds instructions to the
140
+ system message telling the LLM how to retrieve compressed data.
141
+
142
+ Args:
143
+ hashes: List of hash keys for compressed content in this context.
144
+ retrieval_endpoint: The endpoint path for retrieval.
145
+
146
+ Returns:
147
+ Instruction text to append to system message.
148
+ """
149
+ hash_list = ", ".join(hashes) if len(hashes) <= 5 else f"{', '.join(hashes[:5])} ..."
150
+
151
+ return f"""
152
+ ## Compressed Context Available
153
+
154
+ Some tool outputs have been compressed to reduce context size. If you need
155
+ the full uncompressed data, you can retrieve it using the `{CCR_TOOL_NAME}` tool.
156
+
157
+ **How to retrieve:**
158
+ - Call `{CCR_TOOL_NAME}(hash="<hash>")` to get all original items
159
+ - Call `{CCR_TOOL_NAME}(hash="<hash>", query="search terms")` to search within
160
+
161
+ **Available hashes:** {hash_list}
162
+
163
+ Look for markers like `[N items compressed to M. Retrieve more: hash=abc123]`
164
+ in tool results to find the hash for each compressed output.
165
+ """
166
+
167
+
168
+ @dataclass
169
+ class CCRToolInjector:
170
+ """Manages CCR tool injection into LLM requests.
171
+
172
+ This class handles:
173
+ 1. Detecting compression markers in messages
174
+ 2. Injecting the retrieval tool definition
175
+ 3. Adding system message instructions
176
+ 4. Tracking which hashes are available
177
+
178
+ Usage:
179
+ injector = CCRToolInjector(provider="anthropic")
180
+
181
+ # Process messages to detect compression markers
182
+ injector.scan_for_markers(messages)
183
+
184
+ # Inject tool if compression was detected
185
+ if injector.has_compressed_content:
186
+ tools = injector.inject_tool(tools)
187
+ messages = injector.inject_system_instructions(messages)
188
+ """
189
+
190
+ provider: str = "anthropic"
191
+ inject_tool: bool = True
192
+ inject_system_instructions: bool = True
193
+ retrieval_endpoint: str = "/v1/retrieve"
194
+
195
+ # Detected compression markers
196
+ _detected_hashes: list[str] = field(default_factory=list)
197
+ _marker_pattern: re.Pattern = field(
198
+ default_factory=lambda: re.compile(
199
+ r"\[(\d+) items compressed to (\d+)\. Retrieve more: hash=([a-f0-9]+)\]"
200
+ )
201
+ )
202
+
203
+ def __post_init__(self) -> None:
204
+ # Reset detected hashes
205
+ self._detected_hashes = []
206
+
207
+ @property
208
+ def has_compressed_content(self) -> bool:
209
+ """Check if any compressed content was detected."""
210
+ return len(self._detected_hashes) > 0
211
+
212
+ @property
213
+ def detected_hashes(self) -> list[str]:
214
+ """Get list of detected compression hashes."""
215
+ return self._detected_hashes.copy()
216
+
217
+ def scan_for_markers(self, messages: list[dict[str, Any]]) -> list[str]:
218
+ """Scan messages for compression markers and extract hashes.
219
+
220
+ Args:
221
+ messages: List of messages to scan.
222
+
223
+ Returns:
224
+ List of detected hash keys.
225
+ """
226
+ self._detected_hashes = []
227
+
228
+ for message in messages:
229
+ content = message.get("content", "")
230
+
231
+ # Handle string content
232
+ if isinstance(content, str):
233
+ self._scan_text(content)
234
+
235
+ # Handle list content (Anthropic format with content blocks)
236
+ elif isinstance(content, list):
237
+ for block in content:
238
+ if isinstance(block, dict):
239
+ # Text blocks
240
+ if block.get("type") == "text":
241
+ self._scan_text(block.get("text", ""))
242
+ # Tool result blocks
243
+ elif block.get("type") == "tool_result":
244
+ tool_content = block.get("content", "")
245
+ if isinstance(tool_content, str):
246
+ self._scan_text(tool_content)
247
+ elif isinstance(tool_content, list):
248
+ for item in tool_content:
249
+ if isinstance(item, dict) and item.get("type") == "text":
250
+ self._scan_text(item.get("text", ""))
251
+
252
+ return self._detected_hashes
253
+
254
+ def _scan_text(self, text: str) -> None:
255
+ """Scan text for compression markers."""
256
+ matches = self._marker_pattern.findall(text)
257
+ for _original, _compressed, hash_key in matches:
258
+ if hash_key not in self._detected_hashes:
259
+ self._detected_hashes.append(hash_key)
260
+
261
+ def inject_tool_definition(
262
+ self,
263
+ tools: list[dict[str, Any]] | None,
264
+ ) -> tuple[list[dict[str, Any]], bool]:
265
+ """Inject CCR retrieval tool into tools list.
266
+
267
+ Args:
268
+ tools: Existing tools list (may be None or empty).
269
+
270
+ Returns:
271
+ Tuple of (updated_tools, was_injected).
272
+ was_injected is False if tool was already present (e.g., from MCP).
273
+ """
274
+ if not self.inject_tool or not self.has_compressed_content:
275
+ return tools or [], False
276
+
277
+ tools = tools or []
278
+
279
+ # Check if already present (e.g., from MCP server)
280
+ for tool in tools:
281
+ tool_name = tool.get("name") or tool.get("function", {}).get("name")
282
+ if tool_name == CCR_TOOL_NAME:
283
+ return tools, False # Already present, skip injection
284
+
285
+ # Add CCR tool
286
+ ccr_tool = create_ccr_tool_definition(self.provider)
287
+ return tools + [ccr_tool], True
288
+
289
+ def inject_into_system_message(
290
+ self,
291
+ messages: list[dict[str, Any]],
292
+ ) -> list[dict[str, Any]]:
293
+ """Inject retrieval instructions into system message.
294
+
295
+ Args:
296
+ messages: List of messages.
297
+
298
+ Returns:
299
+ Updated messages with instructions added to system message.
300
+ """
301
+ if not self.inject_system_instructions or not self.has_compressed_content:
302
+ return messages
303
+
304
+ instructions = create_system_instructions(
305
+ self._detected_hashes,
306
+ self.retrieval_endpoint,
307
+ )
308
+
309
+ # Find and update system message
310
+ updated_messages = []
311
+ system_found = False
312
+
313
+ for message in messages:
314
+ if message.get("role") == "system" and not system_found:
315
+ system_found = True
316
+ content = message.get("content", "")
317
+
318
+ # Don't add if already present
319
+ if "Compressed Context Available" in content:
320
+ updated_messages.append(message)
321
+ else:
322
+ # Append instructions
323
+ if isinstance(content, str):
324
+ updated_messages.append(
325
+ {
326
+ **message,
327
+ "content": content + instructions,
328
+ }
329
+ )
330
+ else:
331
+ # Handle structured content
332
+ updated_messages.append(message)
333
+ else:
334
+ updated_messages.append(message)
335
+
336
+ # If no system message, prepend one
337
+ if not system_found:
338
+ updated_messages.insert(
339
+ 0,
340
+ {
341
+ "role": "system",
342
+ "content": instructions.strip(),
343
+ },
344
+ )
345
+
346
+ return updated_messages
347
+
348
+ def process_request(
349
+ self,
350
+ messages: list[dict[str, Any]],
351
+ tools: list[dict[str, Any]] | None = None,
352
+ ) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None, bool]:
353
+ """Process a request, scanning for markers and injecting as needed.
354
+
355
+ This is a convenience method that does:
356
+ 1. Scan messages for compression markers
357
+ 2. Inject tool definition if enabled (skipped if already present from MCP)
358
+ 3. Inject system instructions if enabled
359
+
360
+ Args:
361
+ messages: Request messages.
362
+ tools: Request tools (may be None).
363
+
364
+ Returns:
365
+ Tuple of (updated_messages, updated_tools, tool_was_injected).
366
+ tool_was_injected is False if tool was already present (e.g., from MCP).
367
+ """
368
+ self.scan_for_markers(messages)
369
+
370
+ if not self.has_compressed_content:
371
+ return messages, tools, False
372
+
373
+ updated_tools, was_injected = self.inject_tool_definition(tools)
374
+ updated_messages = self.inject_into_system_message(messages)
375
+
376
+ return updated_messages, updated_tools if updated_tools else None, was_injected
377
+
378
+
379
+ def parse_tool_call(
380
+ tool_call: dict[str, Any],
381
+ provider: str = "anthropic",
382
+ ) -> tuple[str | None, str | None]:
383
+ """Parse a CCR tool call to extract hash and query.
384
+
385
+ Args:
386
+ tool_call: The tool call object from the LLM response.
387
+ provider: The provider type for format detection.
388
+
389
+ Returns:
390
+ Tuple of (hash, query) or (None, None) if not a CCR tool call.
391
+ """
392
+ # Get tool name
393
+ if provider == "anthropic":
394
+ name = tool_call.get("name")
395
+ input_data = tool_call.get("input", {})
396
+ elif provider == "openai":
397
+ function = tool_call.get("function", {})
398
+ name = function.get("name")
399
+ # OpenAI passes args as JSON string
400
+ args_str = function.get("arguments", "{}")
401
+ try:
402
+ input_data = json.loads(args_str)
403
+ except json.JSONDecodeError:
404
+ input_data = {}
405
+ else:
406
+ name = tool_call.get("name")
407
+ input_data = tool_call.get("input", tool_call.get("args", {}))
408
+
409
+ if name != CCR_TOOL_NAME:
410
+ return None, None
411
+
412
+ hash_key = input_data.get("hash")
413
+ query = input_data.get("query")
414
+
415
+ return hash_key, query
headroom/cli.py ADDED
@@ -0,0 +1,219 @@
1
+ #!/usr/bin/env python3
2
+ """Headroom CLI - The Context Optimization Layer for LLM Applications.
3
+
4
+ Usage:
5
+ headroom proxy [OPTIONS] Start the optimization proxy server
6
+ headroom --version Show version
7
+ headroom --help Show this help message
8
+
9
+ Examples:
10
+ # Start proxy on default port (8787)
11
+ headroom proxy
12
+
13
+ # Start proxy on custom port
14
+ headroom proxy --port 8080
15
+
16
+ # Start with optimization disabled (passthrough mode)
17
+ headroom proxy --no-optimize
18
+
19
+ # Use with Claude Code
20
+ ANTHROPIC_BASE_URL=http://localhost:8787 claude
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import argparse
26
+ import sys
27
+
28
+
29
+ def get_version() -> str:
30
+ """Get the current version."""
31
+ try:
32
+ from headroom import __version__
33
+
34
+ return __version__
35
+ except ImportError:
36
+ return "unknown"
37
+
38
+
39
+ def cmd_proxy(args: argparse.Namespace) -> int:
40
+ """Start the proxy server."""
41
+ try:
42
+ from headroom.proxy.server import ProxyConfig, run_server
43
+ except ImportError as e:
44
+ print("Error: Proxy dependencies not installed. Run: pip install headroom[proxy]")
45
+ print(f"Details: {e}")
46
+ return 1
47
+
48
+ config = ProxyConfig(
49
+ host=args.host,
50
+ port=args.port,
51
+ optimize=not args.no_optimize,
52
+ cache_enabled=not args.no_cache,
53
+ rate_limit_enabled=not args.no_rate_limit,
54
+ log_file=args.log_file,
55
+ budget_limit_usd=args.budget,
56
+ # LLMLingua: ON by default (use --no-llmlingua to disable)
57
+ llmlingua_enabled=not args.no_llmlingua,
58
+ llmlingua_device=args.llmlingua_device,
59
+ llmlingua_target_rate=args.llmlingua_rate,
60
+ # Code-aware: ON by default (use --no-code-aware to disable)
61
+ code_aware_enabled=not args.no_code_aware,
62
+ )
63
+
64
+ print(f"""
65
+ ╔═══════════════════════════════════════════════════════════════════════╗
66
+ ║ HEADROOM PROXY ║
67
+ ║ The Context Optimization Layer for LLM Applications ║
68
+ ╚═══════════════════════════════════════════════════════════════════════╝
69
+
70
+ Starting proxy server...
71
+
72
+ URL: http://{config.host}:{config.port}
73
+ Optimization: {"ENABLED" if config.optimize else "DISABLED"}
74
+ Caching: {"ENABLED" if config.cache_enabled else "DISABLED"}
75
+ Rate Limit: {"ENABLED" if config.rate_limit_enabled else "DISABLED"}
76
+
77
+ Usage with Claude Code:
78
+ ANTHROPIC_BASE_URL=http://{config.host}:{config.port} claude
79
+
80
+ Usage with OpenAI-compatible clients:
81
+ OPENAI_BASE_URL=http://{config.host}:{config.port}/v1 your-app
82
+
83
+ Endpoints:
84
+ GET /health Health check
85
+ GET /stats Detailed statistics
86
+ GET /metrics Prometheus metrics
87
+ POST /v1/messages Anthropic API
88
+ POST /v1/chat/completions OpenAI API
89
+
90
+ Press Ctrl+C to stop.
91
+ """)
92
+
93
+ try:
94
+ run_server(config)
95
+ except KeyboardInterrupt:
96
+ print("\nShutting down...")
97
+ return 0
98
+
99
+ return 0
100
+
101
+
102
+ def cmd_version(args: argparse.Namespace) -> int:
103
+ """Print version information."""
104
+ print(f"headroom {get_version()}")
105
+ return 0
106
+
107
+
108
+ def main(argv: list[str] | None = None) -> int:
109
+ """Main CLI entry point."""
110
+ parser = argparse.ArgumentParser(
111
+ prog="headroom",
112
+ description="The Context Optimization Layer for LLM Applications",
113
+ formatter_class=argparse.RawDescriptionHelpFormatter,
114
+ epilog="""
115
+ Examples:
116
+ headroom proxy Start proxy on port 8787
117
+ headroom proxy --port 8080 Start proxy on port 8080
118
+ headroom proxy --no-optimize Passthrough mode (no optimization)
119
+
120
+ Environment Variables:
121
+ ANTHROPIC_API_KEY Your Anthropic API key (for proxying)
122
+ OPENAI_API_KEY Your OpenAI API key (for proxying)
123
+
124
+ Documentation: https://github.com/headroom-sdk/headroom
125
+ """,
126
+ )
127
+
128
+ parser.add_argument(
129
+ "--version",
130
+ "-V",
131
+ action="store_true",
132
+ help="Show version and exit",
133
+ )
134
+
135
+ subparsers = parser.add_subparsers(dest="command", help="Commands")
136
+
137
+ # Proxy command
138
+ proxy_parser = subparsers.add_parser(
139
+ "proxy",
140
+ help="Start the optimization proxy server",
141
+ formatter_class=argparse.RawDescriptionHelpFormatter,
142
+ )
143
+ proxy_parser.add_argument(
144
+ "--host",
145
+ default="127.0.0.1",
146
+ help="Host to bind to (default: 127.0.0.1)",
147
+ )
148
+ proxy_parser.add_argument(
149
+ "--port",
150
+ "-p",
151
+ type=int,
152
+ default=8787,
153
+ help="Port to bind to (default: 8787)",
154
+ )
155
+ proxy_parser.add_argument(
156
+ "--no-optimize",
157
+ action="store_true",
158
+ help="Disable optimization (passthrough mode)",
159
+ )
160
+ proxy_parser.add_argument(
161
+ "--no-cache",
162
+ action="store_true",
163
+ help="Disable semantic caching",
164
+ )
165
+ proxy_parser.add_argument(
166
+ "--no-rate-limit",
167
+ action="store_true",
168
+ help="Disable rate limiting",
169
+ )
170
+ proxy_parser.add_argument(
171
+ "--log-file",
172
+ help="Path to JSONL log file",
173
+ )
174
+ proxy_parser.add_argument(
175
+ "--budget",
176
+ type=float,
177
+ help="Daily budget limit in USD",
178
+ )
179
+ # LLMLingua ML-based compression (ON by default if installed)
180
+ proxy_parser.add_argument(
181
+ "--no-llmlingua",
182
+ action="store_true",
183
+ help="Disable LLMLingua-2 ML-based compression",
184
+ )
185
+ proxy_parser.add_argument(
186
+ "--llmlingua-device",
187
+ choices=["auto", "cuda", "cpu", "mps"],
188
+ default="auto",
189
+ help="Device for LLMLingua model (default: auto)",
190
+ )
191
+ proxy_parser.add_argument(
192
+ "--llmlingua-rate",
193
+ type=float,
194
+ default=0.3,
195
+ help="LLMLingua compression rate 0.0-1.0 (default: 0.3 = keep 30%%)",
196
+ )
197
+ # Code-aware compression (ON by default if installed)
198
+ proxy_parser.add_argument(
199
+ "--no-code-aware",
200
+ action="store_true",
201
+ help="Disable AST-based code compression",
202
+ )
203
+ proxy_parser.set_defaults(func=cmd_proxy)
204
+
205
+ args = parser.parse_args(argv)
206
+
207
+ if args.version:
208
+ return cmd_version(args)
209
+
210
+ if args.command is None:
211
+ parser.print_help()
212
+ return 0
213
+
214
+ result = args.func(args)
215
+ return int(result) if result is not None else 0
216
+
217
+
218
+ if __name__ == "__main__":
219
+ sys.exit(main())