headroom-ai 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. headroom/__init__.py +212 -0
  2. headroom/cache/__init__.py +76 -0
  3. headroom/cache/anthropic.py +517 -0
  4. headroom/cache/base.py +342 -0
  5. headroom/cache/compression_feedback.py +613 -0
  6. headroom/cache/compression_store.py +814 -0
  7. headroom/cache/dynamic_detector.py +1026 -0
  8. headroom/cache/google.py +884 -0
  9. headroom/cache/openai.py +584 -0
  10. headroom/cache/registry.py +175 -0
  11. headroom/cache/semantic.py +451 -0
  12. headroom/ccr/__init__.py +77 -0
  13. headroom/ccr/context_tracker.py +582 -0
  14. headroom/ccr/mcp_server.py +319 -0
  15. headroom/ccr/response_handler.py +772 -0
  16. headroom/ccr/tool_injection.py +415 -0
  17. headroom/cli.py +219 -0
  18. headroom/client.py +977 -0
  19. headroom/compression/__init__.py +42 -0
  20. headroom/compression/detector.py +424 -0
  21. headroom/compression/handlers/__init__.py +22 -0
  22. headroom/compression/handlers/base.py +219 -0
  23. headroom/compression/handlers/code_handler.py +506 -0
  24. headroom/compression/handlers/json_handler.py +418 -0
  25. headroom/compression/masks.py +345 -0
  26. headroom/compression/universal.py +465 -0
  27. headroom/config.py +474 -0
  28. headroom/exceptions.py +192 -0
  29. headroom/integrations/__init__.py +159 -0
  30. headroom/integrations/agno/__init__.py +53 -0
  31. headroom/integrations/agno/hooks.py +345 -0
  32. headroom/integrations/agno/model.py +625 -0
  33. headroom/integrations/agno/providers.py +154 -0
  34. headroom/integrations/langchain/__init__.py +106 -0
  35. headroom/integrations/langchain/agents.py +326 -0
  36. headroom/integrations/langchain/chat_model.py +1002 -0
  37. headroom/integrations/langchain/langsmith.py +324 -0
  38. headroom/integrations/langchain/memory.py +319 -0
  39. headroom/integrations/langchain/providers.py +200 -0
  40. headroom/integrations/langchain/retriever.py +371 -0
  41. headroom/integrations/langchain/streaming.py +341 -0
  42. headroom/integrations/mcp/__init__.py +37 -0
  43. headroom/integrations/mcp/server.py +533 -0
  44. headroom/memory/__init__.py +37 -0
  45. headroom/memory/extractor.py +390 -0
  46. headroom/memory/fast_store.py +621 -0
  47. headroom/memory/fast_wrapper.py +311 -0
  48. headroom/memory/inline_extractor.py +229 -0
  49. headroom/memory/store.py +434 -0
  50. headroom/memory/worker.py +260 -0
  51. headroom/memory/wrapper.py +321 -0
  52. headroom/models/__init__.py +39 -0
  53. headroom/models/registry.py +687 -0
  54. headroom/parser.py +293 -0
  55. headroom/pricing/__init__.py +51 -0
  56. headroom/pricing/anthropic_prices.py +81 -0
  57. headroom/pricing/litellm_pricing.py +113 -0
  58. headroom/pricing/openai_prices.py +91 -0
  59. headroom/pricing/registry.py +188 -0
  60. headroom/providers/__init__.py +61 -0
  61. headroom/providers/anthropic.py +621 -0
  62. headroom/providers/base.py +131 -0
  63. headroom/providers/cohere.py +362 -0
  64. headroom/providers/google.py +427 -0
  65. headroom/providers/litellm.py +297 -0
  66. headroom/providers/openai.py +566 -0
  67. headroom/providers/openai_compatible.py +521 -0
  68. headroom/proxy/__init__.py +19 -0
  69. headroom/proxy/server.py +2683 -0
  70. headroom/py.typed +0 -0
  71. headroom/relevance/__init__.py +124 -0
  72. headroom/relevance/base.py +106 -0
  73. headroom/relevance/bm25.py +255 -0
  74. headroom/relevance/embedding.py +255 -0
  75. headroom/relevance/hybrid.py +259 -0
  76. headroom/reporting/__init__.py +5 -0
  77. headroom/reporting/generator.py +549 -0
  78. headroom/storage/__init__.py +41 -0
  79. headroom/storage/base.py +125 -0
  80. headroom/storage/jsonl.py +220 -0
  81. headroom/storage/sqlite.py +289 -0
  82. headroom/telemetry/__init__.py +91 -0
  83. headroom/telemetry/collector.py +764 -0
  84. headroom/telemetry/models.py +880 -0
  85. headroom/telemetry/toin.py +1579 -0
  86. headroom/tokenizer.py +80 -0
  87. headroom/tokenizers/__init__.py +75 -0
  88. headroom/tokenizers/base.py +210 -0
  89. headroom/tokenizers/estimator.py +198 -0
  90. headroom/tokenizers/huggingface.py +317 -0
  91. headroom/tokenizers/mistral.py +245 -0
  92. headroom/tokenizers/registry.py +398 -0
  93. headroom/tokenizers/tiktoken_counter.py +248 -0
  94. headroom/transforms/__init__.py +106 -0
  95. headroom/transforms/base.py +57 -0
  96. headroom/transforms/cache_aligner.py +357 -0
  97. headroom/transforms/code_compressor.py +1313 -0
  98. headroom/transforms/content_detector.py +335 -0
  99. headroom/transforms/content_router.py +1158 -0
  100. headroom/transforms/llmlingua_compressor.py +638 -0
  101. headroom/transforms/log_compressor.py +529 -0
  102. headroom/transforms/pipeline.py +297 -0
  103. headroom/transforms/rolling_window.py +350 -0
  104. headroom/transforms/search_compressor.py +365 -0
  105. headroom/transforms/smart_crusher.py +2682 -0
  106. headroom/transforms/text_compressor.py +259 -0
  107. headroom/transforms/tool_crusher.py +338 -0
  108. headroom/utils.py +215 -0
  109. headroom_ai-0.2.13.dist-info/METADATA +315 -0
  110. headroom_ai-0.2.13.dist-info/RECORD +114 -0
  111. headroom_ai-0.2.13.dist-info/WHEEL +4 -0
  112. headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
  113. headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
  114. headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
@@ -0,0 +1,521 @@
1
+ """OpenAI-compatible provider for universal LLM support.
2
+
3
+ This provider supports any LLM service that implements the OpenAI API format:
4
+ - Ollama (local)
5
+ - vLLM (local/cloud)
6
+ - Together AI
7
+ - Groq
8
+ - Fireworks AI
9
+ - Anyscale
10
+ - LM Studio
11
+ - LocalAI
12
+ - Hugging Face Inference Endpoints
13
+ - Azure OpenAI
14
+ - And many more...
15
+
16
+ The key insight: 70%+ of LLM providers use OpenAI-compatible APIs,
17
+ so supporting this format gives near-universal coverage.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import logging
23
+ from dataclasses import dataclass
24
+ from typing import Any
25
+
26
+ from headroom.tokenizers import get_tokenizer
27
+
28
+ from .base import Provider
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ @dataclass
34
+ class ModelCapabilities:
35
+ """Model capability metadata.
36
+
37
+ Stores information about a model's capabilities and constraints
38
+ that the provider needs for token counting and cost estimation.
39
+ """
40
+
41
+ model: str
42
+ context_window: int = 128000 # Default to 128K
43
+ max_output_tokens: int = 4096
44
+ supports_tools: bool = True
45
+ supports_vision: bool = False
46
+ supports_streaming: bool = True
47
+ tokenizer_backend: str | None = None # Force specific tokenizer
48
+ input_cost_per_1m: float | None = None # Cost per 1M input tokens
49
+ output_cost_per_1m: float | None = None # Cost per 1M output tokens
50
+
51
+
52
+ # Default context limits for common open models
53
+ # These are reasonable defaults; users can override
54
+ _DEFAULT_CONTEXT_LIMITS: dict[str, int] = {
55
+ # Llama 3 family
56
+ "llama-3": 8192,
57
+ "llama-3-8b": 8192,
58
+ "llama-3-70b": 8192,
59
+ "llama-3.1": 128000,
60
+ "llama-3.1-8b": 128000,
61
+ "llama-3.1-70b": 128000,
62
+ "llama-3.1-405b": 128000,
63
+ "llama-3.2": 128000,
64
+ "llama-3.3": 128000,
65
+ # Llama 2 family
66
+ "llama-2": 4096,
67
+ "llama-2-7b": 4096,
68
+ "llama-2-13b": 4096,
69
+ "llama-2-70b": 4096,
70
+ "codellama": 16384,
71
+ # Mistral family
72
+ "mistral": 32768,
73
+ "mistral-7b": 32768,
74
+ "mistral-nemo": 128000,
75
+ "mistral-small": 32768,
76
+ "mistral-large": 128000,
77
+ "mixtral": 32768,
78
+ "mixtral-8x7b": 32768,
79
+ "mixtral-8x22b": 65536,
80
+ # Qwen family
81
+ "qwen": 32768,
82
+ "qwen2": 32768,
83
+ "qwen2-7b": 32768,
84
+ "qwen2-72b": 32768,
85
+ "qwen2.5": 131072,
86
+ # DeepSeek
87
+ "deepseek": 32768,
88
+ "deepseek-coder": 16384,
89
+ "deepseek-v2": 128000,
90
+ "deepseek-v3": 128000,
91
+ # Yi
92
+ "yi": 32768,
93
+ "yi-34b": 32768,
94
+ # Phi
95
+ "phi-2": 2048,
96
+ "phi-3": 4096,
97
+ "phi-3-mini": 4096,
98
+ "phi-3-medium": 4096,
99
+ # Others
100
+ "falcon": 2048,
101
+ "falcon-40b": 2048,
102
+ "falcon-180b": 2048,
103
+ "gemma": 8192,
104
+ "gemma-2": 8192,
105
+ "starcoder": 8192,
106
+ "starcoder2": 16384,
107
+ }
108
+
109
+
110
+ class OpenAICompatibleTokenCounter:
111
+ """Token counter for OpenAI-compatible providers.
112
+
113
+ Uses the TokenizerRegistry to get the appropriate tokenizer
114
+ for the model, falling back to estimation if needed.
115
+ """
116
+
117
+ def __init__(
118
+ self,
119
+ model: str,
120
+ tokenizer_backend: str | None = None,
121
+ ):
122
+ """Initialize token counter.
123
+
124
+ Args:
125
+ model: Model name.
126
+ tokenizer_backend: Force specific tokenizer backend.
127
+ """
128
+ self.model = model
129
+ self._tokenizer = get_tokenizer(model, backend=tokenizer_backend)
130
+
131
+ def count_text(self, text: str) -> int:
132
+ """Count tokens in text."""
133
+ return self._tokenizer.count_text(text)
134
+
135
+ def count_message(self, message: dict[str, Any]) -> int:
136
+ """Count tokens in a single message."""
137
+ # Use OpenAI-style message overhead
138
+ tokens = 4 # Base overhead
139
+
140
+ role = message.get("role", "")
141
+ tokens += self.count_text(role)
142
+
143
+ content = message.get("content")
144
+ if content:
145
+ if isinstance(content, str):
146
+ tokens += self.count_text(content)
147
+ elif isinstance(content, list):
148
+ for part in content:
149
+ if isinstance(part, dict):
150
+ if part.get("type") == "text":
151
+ tokens += self.count_text(part.get("text", ""))
152
+ elif isinstance(part, str):
153
+ tokens += self.count_text(part)
154
+
155
+ name = message.get("name")
156
+ if name:
157
+ tokens += self.count_text(name) + 1
158
+
159
+ tool_calls = message.get("tool_calls")
160
+ if tool_calls:
161
+ for tc in tool_calls:
162
+ func = tc.get("function", {})
163
+ tokens += self.count_text(func.get("name", ""))
164
+ tokens += self.count_text(func.get("arguments", ""))
165
+ tokens += 10
166
+
167
+ tool_call_id = message.get("tool_call_id")
168
+ if tool_call_id:
169
+ tokens += self.count_text(tool_call_id) + 2
170
+
171
+ return tokens
172
+
173
+ def count_messages(self, messages: list[dict[str, Any]]) -> int:
174
+ """Count tokens in a list of messages."""
175
+ total = sum(self.count_message(msg) for msg in messages)
176
+ total += 3 # Priming tokens
177
+ return total
178
+
179
+
180
+ class OpenAICompatibleProvider(Provider):
181
+ """Provider for OpenAI-compatible LLM services.
182
+
183
+ Works with any service implementing the OpenAI chat completions API:
184
+ - Ollama (local)
185
+ - vLLM (local/cloud)
186
+ - Together AI
187
+ - Groq
188
+ - Fireworks AI
189
+ - LM Studio
190
+ - LocalAI
191
+ - And many more...
192
+
193
+ Example:
194
+ # For Ollama
195
+ provider = OpenAICompatibleProvider(
196
+ name="ollama",
197
+ base_url="http://localhost:11434/v1",
198
+ default_model="llama3.1",
199
+ )
200
+
201
+ # For Together AI
202
+ provider = OpenAICompatibleProvider(
203
+ name="together",
204
+ base_url="https://api.together.xyz/v1",
205
+ )
206
+
207
+ # Get token counter for a specific model
208
+ counter = provider.get_token_counter("llama-3.1-8b")
209
+ """
210
+
211
+ def __init__(
212
+ self,
213
+ name: str = "openai_compatible",
214
+ base_url: str | None = None,
215
+ api_key: str | None = None,
216
+ default_model: str | None = None,
217
+ models: dict[str, ModelCapabilities] | None = None,
218
+ ):
219
+ """Initialize OpenAI-compatible provider.
220
+
221
+ Args:
222
+ name: Provider name for identification.
223
+ base_url: API base URL (e.g., 'http://localhost:11434/v1').
224
+ api_key: API key (if required).
225
+ default_model: Default model for operations.
226
+ models: Custom model configurations.
227
+ """
228
+ self._name = name
229
+ self.base_url = base_url
230
+ self.api_key = api_key
231
+ self.default_model = default_model
232
+ self._models: dict[str, ModelCapabilities] = models or {}
233
+
234
+ @property
235
+ def name(self) -> str:
236
+ return self._name
237
+
238
+ def register_model(
239
+ self,
240
+ model: str,
241
+ capabilities: ModelCapabilities | None = None,
242
+ **kwargs: Any,
243
+ ) -> None:
244
+ """Register a model with its capabilities.
245
+
246
+ Args:
247
+ model: Model name.
248
+ capabilities: Model capabilities object.
249
+ **kwargs: Alternative way to specify capabilities.
250
+ """
251
+ if capabilities is not None:
252
+ self._models[model] = capabilities
253
+ else:
254
+ self._models[model] = ModelCapabilities(model=model, **kwargs)
255
+
256
+ def supports_model(self, model: str) -> bool:
257
+ """Check if model is supported.
258
+
259
+ OpenAI-compatible providers support any model by default,
260
+ using estimation for token counting.
261
+ """
262
+ return True # Always return True - we can estimate
263
+
264
+ def get_token_counter(self, model: str) -> OpenAICompatibleTokenCounter:
265
+ """Get token counter for a model.
266
+
267
+ Uses the TokenizerRegistry to find the best tokenizer,
268
+ with fallback to estimation.
269
+ """
270
+ tokenizer_backend = None
271
+
272
+ # Check for registered model with specific tokenizer
273
+ if model in self._models:
274
+ tokenizer_backend = self._models[model].tokenizer_backend
275
+
276
+ return OpenAICompatibleTokenCounter(model, tokenizer_backend)
277
+
278
+ def get_context_limit(self, model: str) -> int:
279
+ """Get context limit for a model.
280
+
281
+ Priority:
282
+ 1. Registered model capabilities
283
+ 2. Default limits for known models
284
+ 3. Prefix matching
285
+ 4. Default 128K
286
+ """
287
+ # Check registered models
288
+ if model in self._models:
289
+ return self._models[model].context_window
290
+
291
+ model_lower = model.lower()
292
+
293
+ # Check default limits
294
+ if model_lower in _DEFAULT_CONTEXT_LIMITS:
295
+ return _DEFAULT_CONTEXT_LIMITS[model_lower]
296
+
297
+ # Prefix match
298
+ for prefix, limit in _DEFAULT_CONTEXT_LIMITS.items():
299
+ if model_lower.startswith(prefix):
300
+ return limit
301
+
302
+ # Default to 128K for modern models
303
+ return 128000
304
+
305
+ def get_output_buffer(self, model: str, default: int = 4000) -> int:
306
+ """Get recommended output buffer."""
307
+ if model in self._models:
308
+ return min(self._models[model].max_output_tokens, default)
309
+ return default
310
+
311
+ def estimate_cost(
312
+ self,
313
+ input_tokens: int,
314
+ output_tokens: int,
315
+ model: str,
316
+ cached_tokens: int = 0,
317
+ ) -> float | None:
318
+ """Estimate cost if pricing is configured.
319
+
320
+ Args:
321
+ input_tokens: Number of input tokens.
322
+ output_tokens: Number of output tokens.
323
+ model: Model name.
324
+ cached_tokens: Number of cached tokens.
325
+
326
+ Returns:
327
+ Estimated cost in USD, or None if pricing unknown.
328
+ """
329
+ if model not in self._models:
330
+ return None
331
+
332
+ caps = self._models[model]
333
+ if caps.input_cost_per_1m is None or caps.output_cost_per_1m is None:
334
+ return None
335
+
336
+ input_cost = (input_tokens / 1_000_000) * caps.input_cost_per_1m
337
+ output_cost = (output_tokens / 1_000_000) * caps.output_cost_per_1m
338
+
339
+ return input_cost + output_cost
340
+
341
+
342
+ # Pre-configured provider factories for common services
343
+
344
+
345
+ def create_ollama_provider(
346
+ base_url: str = "http://localhost:11434/v1",
347
+ ) -> OpenAICompatibleProvider:
348
+ """Create provider for Ollama.
349
+
350
+ Ollama is a popular local LLM runner that supports many open models.
351
+
352
+ Args:
353
+ base_url: Ollama API URL (default: http://localhost:11434/v1).
354
+
355
+ Returns:
356
+ Configured provider.
357
+ """
358
+ return OpenAICompatibleProvider(
359
+ name="ollama",
360
+ base_url=base_url,
361
+ )
362
+
363
+
364
+ def create_together_provider(
365
+ api_key: str | None = None,
366
+ ) -> OpenAICompatibleProvider:
367
+ """Create provider for Together AI.
368
+
369
+ Together AI offers high-performance inference for open models.
370
+
371
+ Args:
372
+ api_key: Together AI API key.
373
+
374
+ Returns:
375
+ Configured provider with Together AI pricing.
376
+ """
377
+ provider = OpenAICompatibleProvider(
378
+ name="together",
379
+ base_url="https://api.together.xyz/v1",
380
+ api_key=api_key,
381
+ )
382
+
383
+ # Register common Together models with pricing
384
+ # Pricing as of Jan 2025 (verify current rates)
385
+ provider.register_model(
386
+ "meta-llama/Llama-3.1-8B-Instruct-Turbo",
387
+ context_window=128000,
388
+ input_cost_per_1m=0.18,
389
+ output_cost_per_1m=0.18,
390
+ )
391
+ provider.register_model(
392
+ "meta-llama/Llama-3.1-70B-Instruct-Turbo",
393
+ context_window=128000,
394
+ input_cost_per_1m=0.88,
395
+ output_cost_per_1m=0.88,
396
+ )
397
+ provider.register_model(
398
+ "meta-llama/Llama-3.1-405B-Instruct-Turbo",
399
+ context_window=128000,
400
+ input_cost_per_1m=3.50,
401
+ output_cost_per_1m=3.50,
402
+ )
403
+
404
+ return provider
405
+
406
+
407
+ def create_groq_provider(
408
+ api_key: str | None = None,
409
+ ) -> OpenAICompatibleProvider:
410
+ """Create provider for Groq.
411
+
412
+ Groq offers ultra-fast inference on custom hardware.
413
+
414
+ Args:
415
+ api_key: Groq API key.
416
+
417
+ Returns:
418
+ Configured provider with Groq pricing.
419
+ """
420
+ provider = OpenAICompatibleProvider(
421
+ name="groq",
422
+ base_url="https://api.groq.com/openai/v1",
423
+ api_key=api_key,
424
+ )
425
+
426
+ # Register common Groq models with pricing
427
+ # Pricing as of Jan 2025 (verify current rates)
428
+ provider.register_model(
429
+ "llama-3.1-8b-instant",
430
+ context_window=128000,
431
+ input_cost_per_1m=0.05,
432
+ output_cost_per_1m=0.08,
433
+ )
434
+ provider.register_model(
435
+ "llama-3.1-70b-versatile",
436
+ context_window=128000,
437
+ input_cost_per_1m=0.59,
438
+ output_cost_per_1m=0.79,
439
+ )
440
+ provider.register_model(
441
+ "mixtral-8x7b-32768",
442
+ context_window=32768,
443
+ input_cost_per_1m=0.24,
444
+ output_cost_per_1m=0.24,
445
+ )
446
+
447
+ return provider
448
+
449
+
450
+ def create_fireworks_provider(
451
+ api_key: str | None = None,
452
+ ) -> OpenAICompatibleProvider:
453
+ """Create provider for Fireworks AI.
454
+
455
+ Args:
456
+ api_key: Fireworks API key.
457
+
458
+ Returns:
459
+ Configured provider.
460
+ """
461
+ return OpenAICompatibleProvider(
462
+ name="fireworks",
463
+ base_url="https://api.fireworks.ai/inference/v1",
464
+ api_key=api_key,
465
+ )
466
+
467
+
468
+ def create_anyscale_provider(
469
+ api_key: str | None = None,
470
+ ) -> OpenAICompatibleProvider:
471
+ """Create provider for Anyscale Endpoints.
472
+
473
+ Args:
474
+ api_key: Anyscale API key.
475
+
476
+ Returns:
477
+ Configured provider.
478
+ """
479
+ return OpenAICompatibleProvider(
480
+ name="anyscale",
481
+ base_url="https://api.endpoints.anyscale.com/v1",
482
+ api_key=api_key,
483
+ )
484
+
485
+
486
+ def create_vllm_provider(
487
+ base_url: str,
488
+ ) -> OpenAICompatibleProvider:
489
+ """Create provider for vLLM server.
490
+
491
+ vLLM is a high-performance inference engine.
492
+
493
+ Args:
494
+ base_url: vLLM server URL (e.g., 'http://localhost:8000/v1').
495
+
496
+ Returns:
497
+ Configured provider.
498
+ """
499
+ return OpenAICompatibleProvider(
500
+ name="vllm",
501
+ base_url=base_url,
502
+ )
503
+
504
+
505
+ def create_lmstudio_provider(
506
+ base_url: str = "http://localhost:1234/v1",
507
+ ) -> OpenAICompatibleProvider:
508
+ """Create provider for LM Studio.
509
+
510
+ LM Studio is a desktop app for running local LLMs.
511
+
512
+ Args:
513
+ base_url: LM Studio API URL.
514
+
515
+ Returns:
516
+ Configured provider.
517
+ """
518
+ return OpenAICompatibleProvider(
519
+ name="lmstudio",
520
+ base_url=base_url,
521
+ )
@@ -0,0 +1,19 @@
1
+ """Headroom Proxy Server.
2
+
3
+ A transparent proxy that sits between LLM clients (Claude Code, Cursor, etc.)
4
+ and LLM APIs (Anthropic, OpenAI), applying Headroom optimizations.
5
+
6
+ Usage:
7
+ # Start the proxy
8
+ python -m headroom.proxy.server
9
+
10
+ # Use with Claude Code
11
+ ANTHROPIC_BASE_URL=http://localhost:8787 claude
12
+
13
+ # Use with Cursor (if using Anthropic)
14
+ Set base URL in Cursor settings to http://localhost:8787
15
+ """
16
+
17
+ from .server import create_app, run_server
18
+
19
+ __all__ = ["create_app", "run_server"]