headroom-ai 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- headroom/__init__.py +212 -0
- headroom/cache/__init__.py +76 -0
- headroom/cache/anthropic.py +517 -0
- headroom/cache/base.py +342 -0
- headroom/cache/compression_feedback.py +613 -0
- headroom/cache/compression_store.py +814 -0
- headroom/cache/dynamic_detector.py +1026 -0
- headroom/cache/google.py +884 -0
- headroom/cache/openai.py +584 -0
- headroom/cache/registry.py +175 -0
- headroom/cache/semantic.py +451 -0
- headroom/ccr/__init__.py +77 -0
- headroom/ccr/context_tracker.py +582 -0
- headroom/ccr/mcp_server.py +319 -0
- headroom/ccr/response_handler.py +772 -0
- headroom/ccr/tool_injection.py +415 -0
- headroom/cli.py +219 -0
- headroom/client.py +977 -0
- headroom/compression/__init__.py +42 -0
- headroom/compression/detector.py +424 -0
- headroom/compression/handlers/__init__.py +22 -0
- headroom/compression/handlers/base.py +219 -0
- headroom/compression/handlers/code_handler.py +506 -0
- headroom/compression/handlers/json_handler.py +418 -0
- headroom/compression/masks.py +345 -0
- headroom/compression/universal.py +465 -0
- headroom/config.py +474 -0
- headroom/exceptions.py +192 -0
- headroom/integrations/__init__.py +159 -0
- headroom/integrations/agno/__init__.py +53 -0
- headroom/integrations/agno/hooks.py +345 -0
- headroom/integrations/agno/model.py +625 -0
- headroom/integrations/agno/providers.py +154 -0
- headroom/integrations/langchain/__init__.py +106 -0
- headroom/integrations/langchain/agents.py +326 -0
- headroom/integrations/langchain/chat_model.py +1002 -0
- headroom/integrations/langchain/langsmith.py +324 -0
- headroom/integrations/langchain/memory.py +319 -0
- headroom/integrations/langchain/providers.py +200 -0
- headroom/integrations/langchain/retriever.py +371 -0
- headroom/integrations/langchain/streaming.py +341 -0
- headroom/integrations/mcp/__init__.py +37 -0
- headroom/integrations/mcp/server.py +533 -0
- headroom/memory/__init__.py +37 -0
- headroom/memory/extractor.py +390 -0
- headroom/memory/fast_store.py +621 -0
- headroom/memory/fast_wrapper.py +311 -0
- headroom/memory/inline_extractor.py +229 -0
- headroom/memory/store.py +434 -0
- headroom/memory/worker.py +260 -0
- headroom/memory/wrapper.py +321 -0
- headroom/models/__init__.py +39 -0
- headroom/models/registry.py +687 -0
- headroom/parser.py +293 -0
- headroom/pricing/__init__.py +51 -0
- headroom/pricing/anthropic_prices.py +81 -0
- headroom/pricing/litellm_pricing.py +113 -0
- headroom/pricing/openai_prices.py +91 -0
- headroom/pricing/registry.py +188 -0
- headroom/providers/__init__.py +61 -0
- headroom/providers/anthropic.py +621 -0
- headroom/providers/base.py +131 -0
- headroom/providers/cohere.py +362 -0
- headroom/providers/google.py +427 -0
- headroom/providers/litellm.py +297 -0
- headroom/providers/openai.py +566 -0
- headroom/providers/openai_compatible.py +521 -0
- headroom/proxy/__init__.py +19 -0
- headroom/proxy/server.py +2683 -0
- headroom/py.typed +0 -0
- headroom/relevance/__init__.py +124 -0
- headroom/relevance/base.py +106 -0
- headroom/relevance/bm25.py +255 -0
- headroom/relevance/embedding.py +255 -0
- headroom/relevance/hybrid.py +259 -0
- headroom/reporting/__init__.py +5 -0
- headroom/reporting/generator.py +549 -0
- headroom/storage/__init__.py +41 -0
- headroom/storage/base.py +125 -0
- headroom/storage/jsonl.py +220 -0
- headroom/storage/sqlite.py +289 -0
- headroom/telemetry/__init__.py +91 -0
- headroom/telemetry/collector.py +764 -0
- headroom/telemetry/models.py +880 -0
- headroom/telemetry/toin.py +1579 -0
- headroom/tokenizer.py +80 -0
- headroom/tokenizers/__init__.py +75 -0
- headroom/tokenizers/base.py +210 -0
- headroom/tokenizers/estimator.py +198 -0
- headroom/tokenizers/huggingface.py +317 -0
- headroom/tokenizers/mistral.py +245 -0
- headroom/tokenizers/registry.py +398 -0
- headroom/tokenizers/tiktoken_counter.py +248 -0
- headroom/transforms/__init__.py +106 -0
- headroom/transforms/base.py +57 -0
- headroom/transforms/cache_aligner.py +357 -0
- headroom/transforms/code_compressor.py +1313 -0
- headroom/transforms/content_detector.py +335 -0
- headroom/transforms/content_router.py +1158 -0
- headroom/transforms/llmlingua_compressor.py +638 -0
- headroom/transforms/log_compressor.py +529 -0
- headroom/transforms/pipeline.py +297 -0
- headroom/transforms/rolling_window.py +350 -0
- headroom/transforms/search_compressor.py +365 -0
- headroom/transforms/smart_crusher.py +2682 -0
- headroom/transforms/text_compressor.py +259 -0
- headroom/transforms/tool_crusher.py +338 -0
- headroom/utils.py +215 -0
- headroom_ai-0.2.13.dist-info/METADATA +315 -0
- headroom_ai-0.2.13.dist-info/RECORD +114 -0
- headroom_ai-0.2.13.dist-info/WHEEL +4 -0
- headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
- headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
- headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
"""Memory wrapper - the main API for Headroom Memory.
|
|
2
|
+
|
|
3
|
+
One-line integration:
|
|
4
|
+
from headroom import with_memory
|
|
5
|
+
client = with_memory(OpenAI(), user_id="alice")
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import copy
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from headroom.memory.extractor import MemoryExtractor
|
|
15
|
+
from headroom.memory.store import Memory, SQLiteMemoryStore
|
|
16
|
+
from headroom.memory.worker import ExtractionWorker
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class MemoryWrapper:
|
|
20
|
+
"""Wraps an LLM client to add automatic memory.
|
|
21
|
+
|
|
22
|
+
Intercepts chat completions to:
|
|
23
|
+
1. BEFORE: Inject relevant memories into user message
|
|
24
|
+
2. AFTER: Queue conversation for background memory extraction
|
|
25
|
+
|
|
26
|
+
The system prompt is left unchanged to preserve prompt caching.
|
|
27
|
+
|
|
28
|
+
Usage:
|
|
29
|
+
client = MemoryWrapper(OpenAI(), user_id="alice")
|
|
30
|
+
response = client.chat.completions.create(...)
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
client: Any,
|
|
36
|
+
user_id: str,
|
|
37
|
+
db_path: str | Path = "headroom_memory.db",
|
|
38
|
+
extraction_model: str | None = None,
|
|
39
|
+
top_k: int = 5,
|
|
40
|
+
_extractor: Any = None, # For testing - inject mock
|
|
41
|
+
_store: SQLiteMemoryStore | None = None, # For testing
|
|
42
|
+
):
|
|
43
|
+
"""Initialize the memory wrapper.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
client: LLM client (OpenAI, Anthropic, etc.)
|
|
47
|
+
user_id: User identifier for memory isolation
|
|
48
|
+
db_path: Path to SQLite database
|
|
49
|
+
extraction_model: Override extraction model (auto-detect if None)
|
|
50
|
+
top_k: Number of memories to inject
|
|
51
|
+
_extractor: Override extractor (for testing)
|
|
52
|
+
_store: Override store (for testing)
|
|
53
|
+
"""
|
|
54
|
+
self._client = client
|
|
55
|
+
self._user_id = user_id
|
|
56
|
+
self._top_k = top_k
|
|
57
|
+
|
|
58
|
+
# Initialize store
|
|
59
|
+
self._store = _store or SQLiteMemoryStore(db_path)
|
|
60
|
+
|
|
61
|
+
# Initialize extractor
|
|
62
|
+
self._extractor = _extractor or MemoryExtractor(client, model=extraction_model)
|
|
63
|
+
|
|
64
|
+
# Initialize background worker with shorter wait for responsiveness
|
|
65
|
+
self._worker = ExtractionWorker(
|
|
66
|
+
store=self._store,
|
|
67
|
+
extractor=self._extractor,
|
|
68
|
+
max_wait_seconds=5.0, # Process partial batches after 5s
|
|
69
|
+
)
|
|
70
|
+
self._worker.start()
|
|
71
|
+
|
|
72
|
+
# Create wrapped chat interface
|
|
73
|
+
self.chat = _WrappedChat(self)
|
|
74
|
+
|
|
75
|
+
def flush_extractions(self, timeout: float = 60.0) -> bool:
|
|
76
|
+
"""Force immediate processing of all queued extractions.
|
|
77
|
+
|
|
78
|
+
Useful for testing or when you need to ensure memories are saved.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
timeout: Max time to wait in seconds
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
True if all extractions completed, False if timed out
|
|
85
|
+
"""
|
|
86
|
+
return self._worker.flush(timeout=timeout)
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def memory(self) -> _MemoryAPI:
|
|
90
|
+
"""Direct access to memory operations."""
|
|
91
|
+
return _MemoryAPI(self._store, self._user_id)
|
|
92
|
+
|
|
93
|
+
def _inject_memories(self, messages: list[dict]) -> list[dict]:
|
|
94
|
+
"""Inject relevant memories into messages.
|
|
95
|
+
|
|
96
|
+
Memories are prepended to the FIRST user message to preserve
|
|
97
|
+
system prompt caching.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
messages: Original messages list
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
New messages list with memories injected
|
|
104
|
+
"""
|
|
105
|
+
# Find the last user message
|
|
106
|
+
user_content = None
|
|
107
|
+
for msg in reversed(messages):
|
|
108
|
+
if msg.get("role") == "user":
|
|
109
|
+
user_content = msg.get("content", "")
|
|
110
|
+
break
|
|
111
|
+
|
|
112
|
+
if not user_content:
|
|
113
|
+
return messages
|
|
114
|
+
|
|
115
|
+
# Search for relevant memories
|
|
116
|
+
memories = self._store.search(
|
|
117
|
+
self._user_id,
|
|
118
|
+
str(user_content),
|
|
119
|
+
top_k=self._top_k,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
if not memories:
|
|
123
|
+
return messages
|
|
124
|
+
|
|
125
|
+
# Build context block
|
|
126
|
+
context_lines = ["<context>"]
|
|
127
|
+
for mem in memories:
|
|
128
|
+
context_lines.append(f"- {mem.content}")
|
|
129
|
+
context_lines.append("</context>")
|
|
130
|
+
context_block = "\n".join(context_lines)
|
|
131
|
+
|
|
132
|
+
# Find the first user message and prepend context
|
|
133
|
+
new_messages = copy.deepcopy(messages)
|
|
134
|
+
for msg in new_messages:
|
|
135
|
+
if msg.get("role") == "user":
|
|
136
|
+
original = msg.get("content", "")
|
|
137
|
+
msg["content"] = f"{context_block}\n\n{original}"
|
|
138
|
+
break
|
|
139
|
+
|
|
140
|
+
return new_messages
|
|
141
|
+
|
|
142
|
+
def _queue_extraction(self, query: str, response: str) -> None:
|
|
143
|
+
"""Queue conversation for background memory extraction.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
query: User's message
|
|
147
|
+
response: Assistant's response
|
|
148
|
+
"""
|
|
149
|
+
self._worker.schedule(self._user_id, query, response)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class _WrappedChat:
|
|
153
|
+
"""Wrapped chat interface that intercepts completions."""
|
|
154
|
+
|
|
155
|
+
def __init__(self, wrapper: MemoryWrapper):
|
|
156
|
+
self._wrapper = wrapper
|
|
157
|
+
self.completions = _WrappedCompletions(wrapper)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
class _WrappedCompletions:
|
|
161
|
+
"""Wrapped completions that add memory to requests."""
|
|
162
|
+
|
|
163
|
+
def __init__(self, wrapper: MemoryWrapper):
|
|
164
|
+
self._wrapper = wrapper
|
|
165
|
+
|
|
166
|
+
def create(self, **kwargs: Any) -> Any:
|
|
167
|
+
"""Create a chat completion with memory injection.
|
|
168
|
+
|
|
169
|
+
This intercepts the request to:
|
|
170
|
+
1. Inject relevant memories into user message
|
|
171
|
+
2. Forward to the real client
|
|
172
|
+
3. Queue response for background extraction
|
|
173
|
+
|
|
174
|
+
All kwargs are passed through to the underlying client.
|
|
175
|
+
"""
|
|
176
|
+
messages = kwargs.get("messages", [])
|
|
177
|
+
|
|
178
|
+
# 1. Inject memories into user message
|
|
179
|
+
enhanced_messages = self._wrapper._inject_memories(messages)
|
|
180
|
+
kwargs["messages"] = enhanced_messages
|
|
181
|
+
|
|
182
|
+
# 2. Forward to real client
|
|
183
|
+
response = self._wrapper._client.chat.completions.create(**kwargs)
|
|
184
|
+
|
|
185
|
+
# 3. Queue for extraction (non-blocking)
|
|
186
|
+
self._extract_and_queue(messages, response)
|
|
187
|
+
|
|
188
|
+
return response
|
|
189
|
+
|
|
190
|
+
def _extract_and_queue(self, original_messages: list[dict], response: Any) -> None:
|
|
191
|
+
"""Extract query and response, queue for extraction."""
|
|
192
|
+
# Get the last user message (without context injection)
|
|
193
|
+
user_query = None
|
|
194
|
+
for msg in reversed(original_messages):
|
|
195
|
+
if msg.get("role") == "user":
|
|
196
|
+
user_query = msg.get("content", "")
|
|
197
|
+
break
|
|
198
|
+
|
|
199
|
+
if not user_query:
|
|
200
|
+
return
|
|
201
|
+
|
|
202
|
+
# Get assistant response
|
|
203
|
+
try:
|
|
204
|
+
assistant_response = response.choices[0].message.content
|
|
205
|
+
except (AttributeError, IndexError):
|
|
206
|
+
return
|
|
207
|
+
|
|
208
|
+
if assistant_response:
|
|
209
|
+
self._wrapper._queue_extraction(user_query, assistant_response)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
class _MemoryAPI:
|
|
213
|
+
"""Direct API for memory operations."""
|
|
214
|
+
|
|
215
|
+
def __init__(self, store: SQLiteMemoryStore, user_id: str):
|
|
216
|
+
self._store = store
|
|
217
|
+
self._user_id = user_id
|
|
218
|
+
|
|
219
|
+
def search(self, query: str, top_k: int = 5) -> list[Memory]:
|
|
220
|
+
"""Search memories.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
query: Search query
|
|
224
|
+
top_k: Max results
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
Matching memories
|
|
228
|
+
"""
|
|
229
|
+
return self._store.search(self._user_id, query, top_k)
|
|
230
|
+
|
|
231
|
+
def add(
|
|
232
|
+
self,
|
|
233
|
+
content: str,
|
|
234
|
+
category: str = "fact",
|
|
235
|
+
importance: float = 0.5,
|
|
236
|
+
) -> Memory:
|
|
237
|
+
"""Manually add a memory.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
content: Memory content
|
|
241
|
+
category: preference, fact, or context
|
|
242
|
+
importance: 0.0-1.0
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
The created memory
|
|
246
|
+
"""
|
|
247
|
+
memory = Memory(
|
|
248
|
+
content=content,
|
|
249
|
+
category=category, # type: ignore
|
|
250
|
+
importance=importance,
|
|
251
|
+
)
|
|
252
|
+
self._store.save(self._user_id, memory)
|
|
253
|
+
return memory
|
|
254
|
+
|
|
255
|
+
def get_all(self) -> list[Memory]:
|
|
256
|
+
"""Get all memories for this user."""
|
|
257
|
+
return self._store.get_all(self._user_id)
|
|
258
|
+
|
|
259
|
+
def delete(self, memory_id: str) -> bool:
|
|
260
|
+
"""Delete a specific memory."""
|
|
261
|
+
return self._store.delete(self._user_id, memory_id)
|
|
262
|
+
|
|
263
|
+
def clear(self) -> int:
|
|
264
|
+
"""Clear all memories for this user."""
|
|
265
|
+
return self._store.clear(self._user_id)
|
|
266
|
+
|
|
267
|
+
def stats(self) -> dict:
|
|
268
|
+
"""Get memory statistics."""
|
|
269
|
+
return self._store.stats(self._user_id)
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def with_memory(
|
|
273
|
+
client: Any,
|
|
274
|
+
user_id: str,
|
|
275
|
+
db_path: str | Path = "headroom_memory.db",
|
|
276
|
+
extraction_model: str | None = None,
|
|
277
|
+
top_k: int = 5,
|
|
278
|
+
**kwargs: Any,
|
|
279
|
+
) -> MemoryWrapper:
|
|
280
|
+
"""Wrap an LLM client to add automatic memory.
|
|
281
|
+
|
|
282
|
+
One-line integration for adding persistent memory to any LLM client.
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
client: LLM client (OpenAI, Anthropic, Mistral, Groq, etc.)
|
|
286
|
+
user_id: User identifier for memory isolation
|
|
287
|
+
db_path: Path to SQLite database (default: headroom_memory.db)
|
|
288
|
+
extraction_model: Override extraction model (auto-detects by default)
|
|
289
|
+
top_k: Number of memories to inject per request (default: 5)
|
|
290
|
+
**kwargs: Additional arguments passed to MemoryWrapper
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
Wrapped client with automatic memory
|
|
294
|
+
|
|
295
|
+
Example:
|
|
296
|
+
from openai import OpenAI
|
|
297
|
+
from headroom import with_memory
|
|
298
|
+
|
|
299
|
+
client = with_memory(OpenAI(), user_id="alice")
|
|
300
|
+
|
|
301
|
+
response = client.chat.completions.create(
|
|
302
|
+
model="gpt-4o",
|
|
303
|
+
messages=[{"role": "user", "content": "I prefer Python"}]
|
|
304
|
+
)
|
|
305
|
+
# Memory automatically extracted in background
|
|
306
|
+
|
|
307
|
+
# Later...
|
|
308
|
+
response = client.chat.completions.create(
|
|
309
|
+
model="gpt-4o",
|
|
310
|
+
messages=[{"role": "user", "content": "What language should I use?"}]
|
|
311
|
+
)
|
|
312
|
+
# Memory about Python preference automatically injected!
|
|
313
|
+
"""
|
|
314
|
+
return MemoryWrapper(
|
|
315
|
+
client=client,
|
|
316
|
+
user_id=user_id,
|
|
317
|
+
db_path=db_path,
|
|
318
|
+
extraction_model=extraction_model,
|
|
319
|
+
top_k=top_k,
|
|
320
|
+
**kwargs,
|
|
321
|
+
)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Model registry and capabilities database.
|
|
2
|
+
|
|
3
|
+
Provides a centralized registry of LLM models with their capabilities,
|
|
4
|
+
context limits, pricing, and provider information.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
from headroom.models import ModelRegistry, get_model_info
|
|
8
|
+
|
|
9
|
+
# Get info about a model
|
|
10
|
+
info = get_model_info("gpt-4o")
|
|
11
|
+
print(f"Context: {info.context_window}")
|
|
12
|
+
print(f"Provider: {info.provider}")
|
|
13
|
+
|
|
14
|
+
# List all models from a provider
|
|
15
|
+
models = ModelRegistry.list_models(provider="openai")
|
|
16
|
+
|
|
17
|
+
# Register a custom model
|
|
18
|
+
ModelRegistry.register(
|
|
19
|
+
"my-custom-model",
|
|
20
|
+
provider="custom",
|
|
21
|
+
context_window=32000,
|
|
22
|
+
)
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from .registry import (
|
|
26
|
+
ModelInfo,
|
|
27
|
+
ModelRegistry,
|
|
28
|
+
get_model_info,
|
|
29
|
+
list_models,
|
|
30
|
+
register_model,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
__all__ = [
|
|
34
|
+
"ModelRegistry",
|
|
35
|
+
"ModelInfo",
|
|
36
|
+
"get_model_info",
|
|
37
|
+
"list_models",
|
|
38
|
+
"register_model",
|
|
39
|
+
]
|