headroom-ai 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. headroom/__init__.py +212 -0
  2. headroom/cache/__init__.py +76 -0
  3. headroom/cache/anthropic.py +517 -0
  4. headroom/cache/base.py +342 -0
  5. headroom/cache/compression_feedback.py +613 -0
  6. headroom/cache/compression_store.py +814 -0
  7. headroom/cache/dynamic_detector.py +1026 -0
  8. headroom/cache/google.py +884 -0
  9. headroom/cache/openai.py +584 -0
  10. headroom/cache/registry.py +175 -0
  11. headroom/cache/semantic.py +451 -0
  12. headroom/ccr/__init__.py +77 -0
  13. headroom/ccr/context_tracker.py +582 -0
  14. headroom/ccr/mcp_server.py +319 -0
  15. headroom/ccr/response_handler.py +772 -0
  16. headroom/ccr/tool_injection.py +415 -0
  17. headroom/cli.py +219 -0
  18. headroom/client.py +977 -0
  19. headroom/compression/__init__.py +42 -0
  20. headroom/compression/detector.py +424 -0
  21. headroom/compression/handlers/__init__.py +22 -0
  22. headroom/compression/handlers/base.py +219 -0
  23. headroom/compression/handlers/code_handler.py +506 -0
  24. headroom/compression/handlers/json_handler.py +418 -0
  25. headroom/compression/masks.py +345 -0
  26. headroom/compression/universal.py +465 -0
  27. headroom/config.py +474 -0
  28. headroom/exceptions.py +192 -0
  29. headroom/integrations/__init__.py +159 -0
  30. headroom/integrations/agno/__init__.py +53 -0
  31. headroom/integrations/agno/hooks.py +345 -0
  32. headroom/integrations/agno/model.py +625 -0
  33. headroom/integrations/agno/providers.py +154 -0
  34. headroom/integrations/langchain/__init__.py +106 -0
  35. headroom/integrations/langchain/agents.py +326 -0
  36. headroom/integrations/langchain/chat_model.py +1002 -0
  37. headroom/integrations/langchain/langsmith.py +324 -0
  38. headroom/integrations/langchain/memory.py +319 -0
  39. headroom/integrations/langchain/providers.py +200 -0
  40. headroom/integrations/langchain/retriever.py +371 -0
  41. headroom/integrations/langchain/streaming.py +341 -0
  42. headroom/integrations/mcp/__init__.py +37 -0
  43. headroom/integrations/mcp/server.py +533 -0
  44. headroom/memory/__init__.py +37 -0
  45. headroom/memory/extractor.py +390 -0
  46. headroom/memory/fast_store.py +621 -0
  47. headroom/memory/fast_wrapper.py +311 -0
  48. headroom/memory/inline_extractor.py +229 -0
  49. headroom/memory/store.py +434 -0
  50. headroom/memory/worker.py +260 -0
  51. headroom/memory/wrapper.py +321 -0
  52. headroom/models/__init__.py +39 -0
  53. headroom/models/registry.py +687 -0
  54. headroom/parser.py +293 -0
  55. headroom/pricing/__init__.py +51 -0
  56. headroom/pricing/anthropic_prices.py +81 -0
  57. headroom/pricing/litellm_pricing.py +113 -0
  58. headroom/pricing/openai_prices.py +91 -0
  59. headroom/pricing/registry.py +188 -0
  60. headroom/providers/__init__.py +61 -0
  61. headroom/providers/anthropic.py +621 -0
  62. headroom/providers/base.py +131 -0
  63. headroom/providers/cohere.py +362 -0
  64. headroom/providers/google.py +427 -0
  65. headroom/providers/litellm.py +297 -0
  66. headroom/providers/openai.py +566 -0
  67. headroom/providers/openai_compatible.py +521 -0
  68. headroom/proxy/__init__.py +19 -0
  69. headroom/proxy/server.py +2683 -0
  70. headroom/py.typed +0 -0
  71. headroom/relevance/__init__.py +124 -0
  72. headroom/relevance/base.py +106 -0
  73. headroom/relevance/bm25.py +255 -0
  74. headroom/relevance/embedding.py +255 -0
  75. headroom/relevance/hybrid.py +259 -0
  76. headroom/reporting/__init__.py +5 -0
  77. headroom/reporting/generator.py +549 -0
  78. headroom/storage/__init__.py +41 -0
  79. headroom/storage/base.py +125 -0
  80. headroom/storage/jsonl.py +220 -0
  81. headroom/storage/sqlite.py +289 -0
  82. headroom/telemetry/__init__.py +91 -0
  83. headroom/telemetry/collector.py +764 -0
  84. headroom/telemetry/models.py +880 -0
  85. headroom/telemetry/toin.py +1579 -0
  86. headroom/tokenizer.py +80 -0
  87. headroom/tokenizers/__init__.py +75 -0
  88. headroom/tokenizers/base.py +210 -0
  89. headroom/tokenizers/estimator.py +198 -0
  90. headroom/tokenizers/huggingface.py +317 -0
  91. headroom/tokenizers/mistral.py +245 -0
  92. headroom/tokenizers/registry.py +398 -0
  93. headroom/tokenizers/tiktoken_counter.py +248 -0
  94. headroom/transforms/__init__.py +106 -0
  95. headroom/transforms/base.py +57 -0
  96. headroom/transforms/cache_aligner.py +357 -0
  97. headroom/transforms/code_compressor.py +1313 -0
  98. headroom/transforms/content_detector.py +335 -0
  99. headroom/transforms/content_router.py +1158 -0
  100. headroom/transforms/llmlingua_compressor.py +638 -0
  101. headroom/transforms/log_compressor.py +529 -0
  102. headroom/transforms/pipeline.py +297 -0
  103. headroom/transforms/rolling_window.py +350 -0
  104. headroom/transforms/search_compressor.py +365 -0
  105. headroom/transforms/smart_crusher.py +2682 -0
  106. headroom/transforms/text_compressor.py +259 -0
  107. headroom/transforms/tool_crusher.py +338 -0
  108. headroom/utils.py +215 -0
  109. headroom_ai-0.2.13.dist-info/METADATA +315 -0
  110. headroom_ai-0.2.13.dist-info/RECORD +114 -0
  111. headroom_ai-0.2.13.dist-info/WHEEL +4 -0
  112. headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
  113. headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
  114. headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
@@ -0,0 +1,321 @@
1
+ """Memory wrapper - the main API for Headroom Memory.
2
+
3
+ One-line integration:
4
+ from headroom import with_memory
5
+ client = with_memory(OpenAI(), user_id="alice")
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import copy
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ from headroom.memory.extractor import MemoryExtractor
15
+ from headroom.memory.store import Memory, SQLiteMemoryStore
16
+ from headroom.memory.worker import ExtractionWorker
17
+
18
+
19
+ class MemoryWrapper:
20
+ """Wraps an LLM client to add automatic memory.
21
+
22
+ Intercepts chat completions to:
23
+ 1. BEFORE: Inject relevant memories into user message
24
+ 2. AFTER: Queue conversation for background memory extraction
25
+
26
+ The system prompt is left unchanged to preserve prompt caching.
27
+
28
+ Usage:
29
+ client = MemoryWrapper(OpenAI(), user_id="alice")
30
+ response = client.chat.completions.create(...)
31
+ """
32
+
33
+ def __init__(
34
+ self,
35
+ client: Any,
36
+ user_id: str,
37
+ db_path: str | Path = "headroom_memory.db",
38
+ extraction_model: str | None = None,
39
+ top_k: int = 5,
40
+ _extractor: Any = None, # For testing - inject mock
41
+ _store: SQLiteMemoryStore | None = None, # For testing
42
+ ):
43
+ """Initialize the memory wrapper.
44
+
45
+ Args:
46
+ client: LLM client (OpenAI, Anthropic, etc.)
47
+ user_id: User identifier for memory isolation
48
+ db_path: Path to SQLite database
49
+ extraction_model: Override extraction model (auto-detect if None)
50
+ top_k: Number of memories to inject
51
+ _extractor: Override extractor (for testing)
52
+ _store: Override store (for testing)
53
+ """
54
+ self._client = client
55
+ self._user_id = user_id
56
+ self._top_k = top_k
57
+
58
+ # Initialize store
59
+ self._store = _store or SQLiteMemoryStore(db_path)
60
+
61
+ # Initialize extractor
62
+ self._extractor = _extractor or MemoryExtractor(client, model=extraction_model)
63
+
64
+ # Initialize background worker with shorter wait for responsiveness
65
+ self._worker = ExtractionWorker(
66
+ store=self._store,
67
+ extractor=self._extractor,
68
+ max_wait_seconds=5.0, # Process partial batches after 5s
69
+ )
70
+ self._worker.start()
71
+
72
+ # Create wrapped chat interface
73
+ self.chat = _WrappedChat(self)
74
+
75
+ def flush_extractions(self, timeout: float = 60.0) -> bool:
76
+ """Force immediate processing of all queued extractions.
77
+
78
+ Useful for testing or when you need to ensure memories are saved.
79
+
80
+ Args:
81
+ timeout: Max time to wait in seconds
82
+
83
+ Returns:
84
+ True if all extractions completed, False if timed out
85
+ """
86
+ return self._worker.flush(timeout=timeout)
87
+
88
+ @property
89
+ def memory(self) -> _MemoryAPI:
90
+ """Direct access to memory operations."""
91
+ return _MemoryAPI(self._store, self._user_id)
92
+
93
+ def _inject_memories(self, messages: list[dict]) -> list[dict]:
94
+ """Inject relevant memories into messages.
95
+
96
+ Memories are prepended to the FIRST user message to preserve
97
+ system prompt caching.
98
+
99
+ Args:
100
+ messages: Original messages list
101
+
102
+ Returns:
103
+ New messages list with memories injected
104
+ """
105
+ # Find the last user message
106
+ user_content = None
107
+ for msg in reversed(messages):
108
+ if msg.get("role") == "user":
109
+ user_content = msg.get("content", "")
110
+ break
111
+
112
+ if not user_content:
113
+ return messages
114
+
115
+ # Search for relevant memories
116
+ memories = self._store.search(
117
+ self._user_id,
118
+ str(user_content),
119
+ top_k=self._top_k,
120
+ )
121
+
122
+ if not memories:
123
+ return messages
124
+
125
+ # Build context block
126
+ context_lines = ["<context>"]
127
+ for mem in memories:
128
+ context_lines.append(f"- {mem.content}")
129
+ context_lines.append("</context>")
130
+ context_block = "\n".join(context_lines)
131
+
132
+ # Find the first user message and prepend context
133
+ new_messages = copy.deepcopy(messages)
134
+ for msg in new_messages:
135
+ if msg.get("role") == "user":
136
+ original = msg.get("content", "")
137
+ msg["content"] = f"{context_block}\n\n{original}"
138
+ break
139
+
140
+ return new_messages
141
+
142
+ def _queue_extraction(self, query: str, response: str) -> None:
143
+ """Queue conversation for background memory extraction.
144
+
145
+ Args:
146
+ query: User's message
147
+ response: Assistant's response
148
+ """
149
+ self._worker.schedule(self._user_id, query, response)
150
+
151
+
152
+ class _WrappedChat:
153
+ """Wrapped chat interface that intercepts completions."""
154
+
155
+ def __init__(self, wrapper: MemoryWrapper):
156
+ self._wrapper = wrapper
157
+ self.completions = _WrappedCompletions(wrapper)
158
+
159
+
160
+ class _WrappedCompletions:
161
+ """Wrapped completions that add memory to requests."""
162
+
163
+ def __init__(self, wrapper: MemoryWrapper):
164
+ self._wrapper = wrapper
165
+
166
+ def create(self, **kwargs: Any) -> Any:
167
+ """Create a chat completion with memory injection.
168
+
169
+ This intercepts the request to:
170
+ 1. Inject relevant memories into user message
171
+ 2. Forward to the real client
172
+ 3. Queue response for background extraction
173
+
174
+ All kwargs are passed through to the underlying client.
175
+ """
176
+ messages = kwargs.get("messages", [])
177
+
178
+ # 1. Inject memories into user message
179
+ enhanced_messages = self._wrapper._inject_memories(messages)
180
+ kwargs["messages"] = enhanced_messages
181
+
182
+ # 2. Forward to real client
183
+ response = self._wrapper._client.chat.completions.create(**kwargs)
184
+
185
+ # 3. Queue for extraction (non-blocking)
186
+ self._extract_and_queue(messages, response)
187
+
188
+ return response
189
+
190
+ def _extract_and_queue(self, original_messages: list[dict], response: Any) -> None:
191
+ """Extract query and response, queue for extraction."""
192
+ # Get the last user message (without context injection)
193
+ user_query = None
194
+ for msg in reversed(original_messages):
195
+ if msg.get("role") == "user":
196
+ user_query = msg.get("content", "")
197
+ break
198
+
199
+ if not user_query:
200
+ return
201
+
202
+ # Get assistant response
203
+ try:
204
+ assistant_response = response.choices[0].message.content
205
+ except (AttributeError, IndexError):
206
+ return
207
+
208
+ if assistant_response:
209
+ self._wrapper._queue_extraction(user_query, assistant_response)
210
+
211
+
212
+ class _MemoryAPI:
213
+ """Direct API for memory operations."""
214
+
215
+ def __init__(self, store: SQLiteMemoryStore, user_id: str):
216
+ self._store = store
217
+ self._user_id = user_id
218
+
219
+ def search(self, query: str, top_k: int = 5) -> list[Memory]:
220
+ """Search memories.
221
+
222
+ Args:
223
+ query: Search query
224
+ top_k: Max results
225
+
226
+ Returns:
227
+ Matching memories
228
+ """
229
+ return self._store.search(self._user_id, query, top_k)
230
+
231
+ def add(
232
+ self,
233
+ content: str,
234
+ category: str = "fact",
235
+ importance: float = 0.5,
236
+ ) -> Memory:
237
+ """Manually add a memory.
238
+
239
+ Args:
240
+ content: Memory content
241
+ category: preference, fact, or context
242
+ importance: 0.0-1.0
243
+
244
+ Returns:
245
+ The created memory
246
+ """
247
+ memory = Memory(
248
+ content=content,
249
+ category=category, # type: ignore
250
+ importance=importance,
251
+ )
252
+ self._store.save(self._user_id, memory)
253
+ return memory
254
+
255
+ def get_all(self) -> list[Memory]:
256
+ """Get all memories for this user."""
257
+ return self._store.get_all(self._user_id)
258
+
259
+ def delete(self, memory_id: str) -> bool:
260
+ """Delete a specific memory."""
261
+ return self._store.delete(self._user_id, memory_id)
262
+
263
+ def clear(self) -> int:
264
+ """Clear all memories for this user."""
265
+ return self._store.clear(self._user_id)
266
+
267
+ def stats(self) -> dict:
268
+ """Get memory statistics."""
269
+ return self._store.stats(self._user_id)
270
+
271
+
272
+ def with_memory(
273
+ client: Any,
274
+ user_id: str,
275
+ db_path: str | Path = "headroom_memory.db",
276
+ extraction_model: str | None = None,
277
+ top_k: int = 5,
278
+ **kwargs: Any,
279
+ ) -> MemoryWrapper:
280
+ """Wrap an LLM client to add automatic memory.
281
+
282
+ One-line integration for adding persistent memory to any LLM client.
283
+
284
+ Args:
285
+ client: LLM client (OpenAI, Anthropic, Mistral, Groq, etc.)
286
+ user_id: User identifier for memory isolation
287
+ db_path: Path to SQLite database (default: headroom_memory.db)
288
+ extraction_model: Override extraction model (auto-detects by default)
289
+ top_k: Number of memories to inject per request (default: 5)
290
+ **kwargs: Additional arguments passed to MemoryWrapper
291
+
292
+ Returns:
293
+ Wrapped client with automatic memory
294
+
295
+ Example:
296
+ from openai import OpenAI
297
+ from headroom import with_memory
298
+
299
+ client = with_memory(OpenAI(), user_id="alice")
300
+
301
+ response = client.chat.completions.create(
302
+ model="gpt-4o",
303
+ messages=[{"role": "user", "content": "I prefer Python"}]
304
+ )
305
+ # Memory automatically extracted in background
306
+
307
+ # Later...
308
+ response = client.chat.completions.create(
309
+ model="gpt-4o",
310
+ messages=[{"role": "user", "content": "What language should I use?"}]
311
+ )
312
+ # Memory about Python preference automatically injected!
313
+ """
314
+ return MemoryWrapper(
315
+ client=client,
316
+ user_id=user_id,
317
+ db_path=db_path,
318
+ extraction_model=extraction_model,
319
+ top_k=top_k,
320
+ **kwargs,
321
+ )
@@ -0,0 +1,39 @@
1
+ """Model registry and capabilities database.
2
+
3
+ Provides a centralized registry of LLM models with their capabilities,
4
+ context limits, pricing, and provider information.
5
+
6
+ Usage:
7
+ from headroom.models import ModelRegistry, get_model_info
8
+
9
+ # Get info about a model
10
+ info = get_model_info("gpt-4o")
11
+ print(f"Context: {info.context_window}")
12
+ print(f"Provider: {info.provider}")
13
+
14
+ # List all models from a provider
15
+ models = ModelRegistry.list_models(provider="openai")
16
+
17
+ # Register a custom model
18
+ ModelRegistry.register(
19
+ "my-custom-model",
20
+ provider="custom",
21
+ context_window=32000,
22
+ )
23
+ """
24
+
25
+ from .registry import (
26
+ ModelInfo,
27
+ ModelRegistry,
28
+ get_model_info,
29
+ list_models,
30
+ register_model,
31
+ )
32
+
33
+ __all__ = [
34
+ "ModelRegistry",
35
+ "ModelInfo",
36
+ "get_model_info",
37
+ "list_models",
38
+ "register_model",
39
+ ]