headroom-ai 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- headroom/__init__.py +212 -0
- headroom/cache/__init__.py +76 -0
- headroom/cache/anthropic.py +517 -0
- headroom/cache/base.py +342 -0
- headroom/cache/compression_feedback.py +613 -0
- headroom/cache/compression_store.py +814 -0
- headroom/cache/dynamic_detector.py +1026 -0
- headroom/cache/google.py +884 -0
- headroom/cache/openai.py +584 -0
- headroom/cache/registry.py +175 -0
- headroom/cache/semantic.py +451 -0
- headroom/ccr/__init__.py +77 -0
- headroom/ccr/context_tracker.py +582 -0
- headroom/ccr/mcp_server.py +319 -0
- headroom/ccr/response_handler.py +772 -0
- headroom/ccr/tool_injection.py +415 -0
- headroom/cli.py +219 -0
- headroom/client.py +977 -0
- headroom/compression/__init__.py +42 -0
- headroom/compression/detector.py +424 -0
- headroom/compression/handlers/__init__.py +22 -0
- headroom/compression/handlers/base.py +219 -0
- headroom/compression/handlers/code_handler.py +506 -0
- headroom/compression/handlers/json_handler.py +418 -0
- headroom/compression/masks.py +345 -0
- headroom/compression/universal.py +465 -0
- headroom/config.py +474 -0
- headroom/exceptions.py +192 -0
- headroom/integrations/__init__.py +159 -0
- headroom/integrations/agno/__init__.py +53 -0
- headroom/integrations/agno/hooks.py +345 -0
- headroom/integrations/agno/model.py +625 -0
- headroom/integrations/agno/providers.py +154 -0
- headroom/integrations/langchain/__init__.py +106 -0
- headroom/integrations/langchain/agents.py +326 -0
- headroom/integrations/langchain/chat_model.py +1002 -0
- headroom/integrations/langchain/langsmith.py +324 -0
- headroom/integrations/langchain/memory.py +319 -0
- headroom/integrations/langchain/providers.py +200 -0
- headroom/integrations/langchain/retriever.py +371 -0
- headroom/integrations/langchain/streaming.py +341 -0
- headroom/integrations/mcp/__init__.py +37 -0
- headroom/integrations/mcp/server.py +533 -0
- headroom/memory/__init__.py +37 -0
- headroom/memory/extractor.py +390 -0
- headroom/memory/fast_store.py +621 -0
- headroom/memory/fast_wrapper.py +311 -0
- headroom/memory/inline_extractor.py +229 -0
- headroom/memory/store.py +434 -0
- headroom/memory/worker.py +260 -0
- headroom/memory/wrapper.py +321 -0
- headroom/models/__init__.py +39 -0
- headroom/models/registry.py +687 -0
- headroom/parser.py +293 -0
- headroom/pricing/__init__.py +51 -0
- headroom/pricing/anthropic_prices.py +81 -0
- headroom/pricing/litellm_pricing.py +113 -0
- headroom/pricing/openai_prices.py +91 -0
- headroom/pricing/registry.py +188 -0
- headroom/providers/__init__.py +61 -0
- headroom/providers/anthropic.py +621 -0
- headroom/providers/base.py +131 -0
- headroom/providers/cohere.py +362 -0
- headroom/providers/google.py +427 -0
- headroom/providers/litellm.py +297 -0
- headroom/providers/openai.py +566 -0
- headroom/providers/openai_compatible.py +521 -0
- headroom/proxy/__init__.py +19 -0
- headroom/proxy/server.py +2683 -0
- headroom/py.typed +0 -0
- headroom/relevance/__init__.py +124 -0
- headroom/relevance/base.py +106 -0
- headroom/relevance/bm25.py +255 -0
- headroom/relevance/embedding.py +255 -0
- headroom/relevance/hybrid.py +259 -0
- headroom/reporting/__init__.py +5 -0
- headroom/reporting/generator.py +549 -0
- headroom/storage/__init__.py +41 -0
- headroom/storage/base.py +125 -0
- headroom/storage/jsonl.py +220 -0
- headroom/storage/sqlite.py +289 -0
- headroom/telemetry/__init__.py +91 -0
- headroom/telemetry/collector.py +764 -0
- headroom/telemetry/models.py +880 -0
- headroom/telemetry/toin.py +1579 -0
- headroom/tokenizer.py +80 -0
- headroom/tokenizers/__init__.py +75 -0
- headroom/tokenizers/base.py +210 -0
- headroom/tokenizers/estimator.py +198 -0
- headroom/tokenizers/huggingface.py +317 -0
- headroom/tokenizers/mistral.py +245 -0
- headroom/tokenizers/registry.py +398 -0
- headroom/tokenizers/tiktoken_counter.py +248 -0
- headroom/transforms/__init__.py +106 -0
- headroom/transforms/base.py +57 -0
- headroom/transforms/cache_aligner.py +357 -0
- headroom/transforms/code_compressor.py +1313 -0
- headroom/transforms/content_detector.py +335 -0
- headroom/transforms/content_router.py +1158 -0
- headroom/transforms/llmlingua_compressor.py +638 -0
- headroom/transforms/log_compressor.py +529 -0
- headroom/transforms/pipeline.py +297 -0
- headroom/transforms/rolling_window.py +350 -0
- headroom/transforms/search_compressor.py +365 -0
- headroom/transforms/smart_crusher.py +2682 -0
- headroom/transforms/text_compressor.py +259 -0
- headroom/transforms/tool_crusher.py +338 -0
- headroom/utils.py +215 -0
- headroom_ai-0.2.13.dist-info/METADATA +315 -0
- headroom_ai-0.2.13.dist-info/RECORD +114 -0
- headroom_ai-0.2.13.dist-info/WHEEL +4 -0
- headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
- headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
- headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
"""Memory extraction using LLMs.
|
|
2
|
+
|
|
3
|
+
Supports multiple providers by reusing the wrapped client with a cheap model.
|
|
4
|
+
Auto-detects provider from client class and selects appropriate cheap model.
|
|
5
|
+
Uses structured JSON output where available for reliable parsing.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
import re
|
|
13
|
+
from typing import Any, Protocol
|
|
14
|
+
|
|
15
|
+
from headroom.memory.store import Memory
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# Provider → Cheap Model mapping (verified January 2026)
|
|
21
|
+
# These are the most cost-effective models for simple extraction tasks
|
|
22
|
+
CHEAP_MODELS: dict[str, str] = {
|
|
23
|
+
"openai": "gpt-4o-mini", # $0.15/1M input, $0.60/1M output
|
|
24
|
+
"anthropic": "claude-3-5-haiku-latest", # $0.80/1M input, $4/1M output
|
|
25
|
+
"mistralai": "mistral-small-latest", # $0.10/1M input, $0.30/1M output
|
|
26
|
+
"groq": "llama-3.3-70b-versatile", # Free tier available
|
|
27
|
+
"together": "meta-llama/Llama-3.3-70B-Instruct-Turbo", # $0.88/1M
|
|
28
|
+
"fireworks": "accounts/fireworks/models/llama-v3p1-8b-instruct", # $0.20/1M
|
|
29
|
+
"google": "gemini-2.0-flash-lite", # $0.075/1M input, $0.30/1M output
|
|
30
|
+
"cohere": "command-r7b-12-2024", # $0.0375/1M input, $0.15/1M output
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
# Providers that support structured JSON output via response_format
|
|
34
|
+
SUPPORTS_JSON_MODE: set[str] = {"openai", "mistralai", "groq", "together", "fireworks"}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# Entity-agnostic prompt - works for users, agents, or any conversational entity
|
|
38
|
+
EXTRACTION_PROMPT = """Analyze this conversation and extract any facts worth remembering.
|
|
39
|
+
|
|
40
|
+
Focus on:
|
|
41
|
+
- Preferences (language, tools, frameworks, style, configuration)
|
|
42
|
+
- Facts (identity, role, capabilities, constraints, environment)
|
|
43
|
+
- Context (goals, ongoing tasks, relationships, history)
|
|
44
|
+
|
|
45
|
+
Conversation:
|
|
46
|
+
Speaker A: {query}
|
|
47
|
+
Speaker B: {response}
|
|
48
|
+
|
|
49
|
+
Return a JSON object with this structure:
|
|
50
|
+
{{
|
|
51
|
+
"memories": [
|
|
52
|
+
{{"content": "Prefers Python for backend development", "category": "preference", "importance": 0.8}},
|
|
53
|
+
{{"content": "Works on distributed systems", "category": "fact", "importance": 0.7}}
|
|
54
|
+
],
|
|
55
|
+
"should_remember": true
|
|
56
|
+
}}
|
|
57
|
+
|
|
58
|
+
Categories: "preference", "fact", "context"
|
|
59
|
+
Importance: 0.0-1.0 (higher = more important to remember long-term)
|
|
60
|
+
|
|
61
|
+
If there's nothing worth remembering (greetings, generic questions, transient info), return:
|
|
62
|
+
{{"memories": [], "should_remember": false}}
|
|
63
|
+
|
|
64
|
+
Return ONLY valid JSON."""
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class ChatClient(Protocol):
|
|
68
|
+
"""Protocol for chat clients (OpenAI, Anthropic, etc.)."""
|
|
69
|
+
|
|
70
|
+
class Chat:
|
|
71
|
+
class Completions:
|
|
72
|
+
def create(self, **kwargs: Any) -> Any: ...
|
|
73
|
+
|
|
74
|
+
completions: Completions
|
|
75
|
+
|
|
76
|
+
chat: Chat
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def detect_provider(client: Any) -> str | None:
|
|
80
|
+
"""Detect the provider from client class path.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
client: The LLM client instance
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Provider name or None if unknown
|
|
87
|
+
"""
|
|
88
|
+
module = type(client).__module__.lower()
|
|
89
|
+
|
|
90
|
+
# Check for known providers
|
|
91
|
+
providers = [
|
|
92
|
+
"openai",
|
|
93
|
+
"anthropic",
|
|
94
|
+
"mistralai",
|
|
95
|
+
"groq",
|
|
96
|
+
"together",
|
|
97
|
+
"fireworks",
|
|
98
|
+
"google",
|
|
99
|
+
"cohere",
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
for provider in providers:
|
|
103
|
+
if provider in module:
|
|
104
|
+
return provider
|
|
105
|
+
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def get_cheap_model(provider: str) -> str | None:
|
|
110
|
+
"""Get the cheap model for a provider.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
provider: Provider name
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
Cheap model ID or None if unknown
|
|
117
|
+
"""
|
|
118
|
+
return CHEAP_MODELS.get(provider)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class MemoryExtractor:
|
|
122
|
+
"""Extracts memories from conversations using LLMs.
|
|
123
|
+
|
|
124
|
+
Supports multiple providers by reusing the wrapped client.
|
|
125
|
+
Auto-detects provider and selects appropriate cheap model.
|
|
126
|
+
|
|
127
|
+
Usage:
|
|
128
|
+
extractor = MemoryExtractor(openai_client)
|
|
129
|
+
memories = extractor.extract("I prefer Python", "Great choice!")
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
def __init__(
|
|
133
|
+
self,
|
|
134
|
+
client: Any,
|
|
135
|
+
model: str | None = None,
|
|
136
|
+
):
|
|
137
|
+
"""Initialize the extractor.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
client: LLM client (OpenAI, Anthropic, etc.)
|
|
141
|
+
model: Override the extraction model (auto-detects if None)
|
|
142
|
+
"""
|
|
143
|
+
self.client = client
|
|
144
|
+
self._provider = detect_provider(client)
|
|
145
|
+
self._model: str | None = None
|
|
146
|
+
|
|
147
|
+
if model:
|
|
148
|
+
self._model = model
|
|
149
|
+
elif self._provider:
|
|
150
|
+
self._model = get_cheap_model(self._provider)
|
|
151
|
+
|
|
152
|
+
if not self._model:
|
|
153
|
+
logger.warning(
|
|
154
|
+
f"Could not detect cheap model for provider. "
|
|
155
|
+
f"Client type: {type(client).__module__}.{type(client).__name__}. "
|
|
156
|
+
f"Memory extraction may fail."
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
@property
|
|
160
|
+
def provider(self) -> str | None:
|
|
161
|
+
"""Get the detected provider."""
|
|
162
|
+
return self._provider
|
|
163
|
+
|
|
164
|
+
@property
|
|
165
|
+
def model(self) -> str | None:
|
|
166
|
+
"""Get the extraction model."""
|
|
167
|
+
return self._model
|
|
168
|
+
|
|
169
|
+
def extract(self, query: str, response: str) -> list[Memory]:
|
|
170
|
+
"""Extract memories from a conversation turn.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
query: User's message
|
|
174
|
+
response: Assistant's response
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
List of extracted memories (may be empty)
|
|
178
|
+
"""
|
|
179
|
+
if not self._model:
|
|
180
|
+
logger.warning("No extraction model configured, skipping extraction")
|
|
181
|
+
return []
|
|
182
|
+
|
|
183
|
+
prompt = EXTRACTION_PROMPT.format(query=query, response=response)
|
|
184
|
+
|
|
185
|
+
try:
|
|
186
|
+
result = self._call_llm(prompt)
|
|
187
|
+
return self._parse_response(result)
|
|
188
|
+
except Exception as e:
|
|
189
|
+
logger.error(f"Extraction failed: {e}")
|
|
190
|
+
return []
|
|
191
|
+
|
|
192
|
+
def extract_batch(self, conversations: list[tuple[str, str, str]]) -> dict[str, list[Memory]]:
|
|
193
|
+
"""Extract memories from multiple conversations.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
conversations: List of (user_id, query, response) tuples
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
Dict mapping user_id to list of memories
|
|
200
|
+
"""
|
|
201
|
+
if not conversations:
|
|
202
|
+
return {}
|
|
203
|
+
|
|
204
|
+
# Build batch prompt
|
|
205
|
+
batch_prompt = self._build_batch_prompt(conversations)
|
|
206
|
+
|
|
207
|
+
try:
|
|
208
|
+
result = self._call_llm(batch_prompt)
|
|
209
|
+
return self._parse_batch_response(result, conversations)
|
|
210
|
+
except Exception as e:
|
|
211
|
+
logger.error(f"Batch extraction failed: {e}")
|
|
212
|
+
return {}
|
|
213
|
+
|
|
214
|
+
def _call_llm(self, prompt: str) -> str:
|
|
215
|
+
"""Call the LLM with the given prompt.
|
|
216
|
+
|
|
217
|
+
Uses structured JSON output (response_format) where available
|
|
218
|
+
to ensure reliable JSON parsing.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
prompt: The prompt to send
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
The LLM's response text
|
|
225
|
+
"""
|
|
226
|
+
if self._provider == "anthropic":
|
|
227
|
+
# Anthropic uses different API - no native JSON mode yet
|
|
228
|
+
response = self.client.messages.create(
|
|
229
|
+
model=self._model,
|
|
230
|
+
max_tokens=1024,
|
|
231
|
+
messages=[{"role": "user", "content": prompt}],
|
|
232
|
+
)
|
|
233
|
+
return str(response.content[0].text)
|
|
234
|
+
elif self._provider == "cohere":
|
|
235
|
+
# Cohere uses different API
|
|
236
|
+
response = self.client.chat(
|
|
237
|
+
model=self._model,
|
|
238
|
+
message=prompt,
|
|
239
|
+
)
|
|
240
|
+
return str(response.text)
|
|
241
|
+
elif self._provider == "google":
|
|
242
|
+
# Google Gemini - use JSON response mime type
|
|
243
|
+
model = self.client.GenerativeModel(
|
|
244
|
+
self._model,
|
|
245
|
+
generation_config={"response_mime_type": "application/json"},
|
|
246
|
+
)
|
|
247
|
+
response = model.generate_content(prompt)
|
|
248
|
+
return str(response.text)
|
|
249
|
+
else:
|
|
250
|
+
# OpenAI-compatible API (OpenAI, Groq, Together, Fireworks, Mistral)
|
|
251
|
+
# Use JSON mode for structured output
|
|
252
|
+
kwargs: dict[str, Any] = {
|
|
253
|
+
"model": self._model,
|
|
254
|
+
"messages": [{"role": "user", "content": prompt}],
|
|
255
|
+
"temperature": 0.0, # Deterministic for extraction
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
# Add response_format for providers that support it
|
|
259
|
+
if self._provider in SUPPORTS_JSON_MODE:
|
|
260
|
+
kwargs["response_format"] = {"type": "json_object"}
|
|
261
|
+
|
|
262
|
+
response = self.client.chat.completions.create(**kwargs)
|
|
263
|
+
return str(response.choices[0].message.content)
|
|
264
|
+
|
|
265
|
+
def _parse_response(self, text: str) -> list[Memory]:
|
|
266
|
+
"""Parse LLM response into memories.
|
|
267
|
+
|
|
268
|
+
Args:
|
|
269
|
+
text: Raw LLM response
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
List of Memory objects
|
|
273
|
+
"""
|
|
274
|
+
try:
|
|
275
|
+
# Extract JSON from response (handle markdown code blocks)
|
|
276
|
+
json_match = re.search(r"```(?:json)?\s*(.*?)\s*```", text, re.DOTALL)
|
|
277
|
+
if json_match:
|
|
278
|
+
text = json_match.group(1)
|
|
279
|
+
|
|
280
|
+
data = json.loads(text.strip())
|
|
281
|
+
|
|
282
|
+
if not data.get("should_remember", False):
|
|
283
|
+
return []
|
|
284
|
+
|
|
285
|
+
memories = []
|
|
286
|
+
for item in data.get("memories", []):
|
|
287
|
+
memories.append(
|
|
288
|
+
Memory(
|
|
289
|
+
content=item["content"],
|
|
290
|
+
category=item.get("category", "fact"),
|
|
291
|
+
importance=item.get("importance", 0.5),
|
|
292
|
+
)
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
return memories
|
|
296
|
+
|
|
297
|
+
except (json.JSONDecodeError, KeyError) as e:
|
|
298
|
+
logger.warning(f"Failed to parse extraction response: {e}")
|
|
299
|
+
return []
|
|
300
|
+
|
|
301
|
+
def _build_batch_prompt(self, conversations: list[tuple[str, str, str]]) -> str:
|
|
302
|
+
"""Build a batch extraction prompt.
|
|
303
|
+
|
|
304
|
+
Args:
|
|
305
|
+
conversations: List of (entity_id, query, response) tuples
|
|
306
|
+
|
|
307
|
+
Returns:
|
|
308
|
+
Batch prompt string
|
|
309
|
+
"""
|
|
310
|
+
lines = [
|
|
311
|
+
"Analyze these conversations and extract facts worth remembering about each entity.",
|
|
312
|
+
"",
|
|
313
|
+
"Focus on: preferences, facts, context that helps future interactions.",
|
|
314
|
+
"",
|
|
315
|
+
]
|
|
316
|
+
|
|
317
|
+
for i, (entity_id, query, response) in enumerate(conversations):
|
|
318
|
+
lines.extend(
|
|
319
|
+
[
|
|
320
|
+
f"--- Conversation {i + 1} (Entity: {entity_id}) ---",
|
|
321
|
+
f"Speaker A: {query}",
|
|
322
|
+
f"Speaker B: {response}",
|
|
323
|
+
"",
|
|
324
|
+
]
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
lines.extend(
|
|
328
|
+
[
|
|
329
|
+
"Return a JSON object mapping entity_id to their memories:",
|
|
330
|
+
"{",
|
|
331
|
+
' "entity_123": {',
|
|
332
|
+
' "memories": [{"content": "...", "category": "preference", "importance": 0.8}],',
|
|
333
|
+
' "should_remember": true',
|
|
334
|
+
" }",
|
|
335
|
+
"}",
|
|
336
|
+
"",
|
|
337
|
+
"Categories: preference, fact, context",
|
|
338
|
+
"Importance: 0.0-1.0",
|
|
339
|
+
"",
|
|
340
|
+
"Return ONLY valid JSON.",
|
|
341
|
+
]
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
return "\n".join(lines)
|
|
345
|
+
|
|
346
|
+
def _parse_batch_response(
|
|
347
|
+
self,
|
|
348
|
+
text: str,
|
|
349
|
+
conversations: list[tuple[str, str, str]],
|
|
350
|
+
) -> dict[str, list[Memory]]:
|
|
351
|
+
"""Parse batch extraction response.
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
text: Raw LLM response
|
|
355
|
+
conversations: Original conversations for fallback
|
|
356
|
+
|
|
357
|
+
Returns:
|
|
358
|
+
Dict mapping user_id to list of memories
|
|
359
|
+
"""
|
|
360
|
+
try:
|
|
361
|
+
# Extract JSON from response
|
|
362
|
+
json_match = re.search(r"```(?:json)?\s*(.*?)\s*```", text, re.DOTALL)
|
|
363
|
+
if json_match:
|
|
364
|
+
text = json_match.group(1)
|
|
365
|
+
|
|
366
|
+
data = json.loads(text.strip())
|
|
367
|
+
result: dict[str, list[Memory]] = {}
|
|
368
|
+
|
|
369
|
+
for user_id, user_data in data.items():
|
|
370
|
+
if not user_data.get("should_remember", False):
|
|
371
|
+
continue
|
|
372
|
+
|
|
373
|
+
memories = []
|
|
374
|
+
for item in user_data.get("memories", []):
|
|
375
|
+
memories.append(
|
|
376
|
+
Memory(
|
|
377
|
+
content=item["content"],
|
|
378
|
+
category=item.get("category", "fact"),
|
|
379
|
+
importance=item.get("importance", 0.5),
|
|
380
|
+
)
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
if memories:
|
|
384
|
+
result[user_id] = memories
|
|
385
|
+
|
|
386
|
+
return result
|
|
387
|
+
|
|
388
|
+
except (json.JSONDecodeError, KeyError, AttributeError) as e:
|
|
389
|
+
logger.warning(f"Failed to parse batch response: {e}")
|
|
390
|
+
return {}
|