headroom-ai 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. headroom/__init__.py +212 -0
  2. headroom/cache/__init__.py +76 -0
  3. headroom/cache/anthropic.py +517 -0
  4. headroom/cache/base.py +342 -0
  5. headroom/cache/compression_feedback.py +613 -0
  6. headroom/cache/compression_store.py +814 -0
  7. headroom/cache/dynamic_detector.py +1026 -0
  8. headroom/cache/google.py +884 -0
  9. headroom/cache/openai.py +584 -0
  10. headroom/cache/registry.py +175 -0
  11. headroom/cache/semantic.py +451 -0
  12. headroom/ccr/__init__.py +77 -0
  13. headroom/ccr/context_tracker.py +582 -0
  14. headroom/ccr/mcp_server.py +319 -0
  15. headroom/ccr/response_handler.py +772 -0
  16. headroom/ccr/tool_injection.py +415 -0
  17. headroom/cli.py +219 -0
  18. headroom/client.py +977 -0
  19. headroom/compression/__init__.py +42 -0
  20. headroom/compression/detector.py +424 -0
  21. headroom/compression/handlers/__init__.py +22 -0
  22. headroom/compression/handlers/base.py +219 -0
  23. headroom/compression/handlers/code_handler.py +506 -0
  24. headroom/compression/handlers/json_handler.py +418 -0
  25. headroom/compression/masks.py +345 -0
  26. headroom/compression/universal.py +465 -0
  27. headroom/config.py +474 -0
  28. headroom/exceptions.py +192 -0
  29. headroom/integrations/__init__.py +159 -0
  30. headroom/integrations/agno/__init__.py +53 -0
  31. headroom/integrations/agno/hooks.py +345 -0
  32. headroom/integrations/agno/model.py +625 -0
  33. headroom/integrations/agno/providers.py +154 -0
  34. headroom/integrations/langchain/__init__.py +106 -0
  35. headroom/integrations/langchain/agents.py +326 -0
  36. headroom/integrations/langchain/chat_model.py +1002 -0
  37. headroom/integrations/langchain/langsmith.py +324 -0
  38. headroom/integrations/langchain/memory.py +319 -0
  39. headroom/integrations/langchain/providers.py +200 -0
  40. headroom/integrations/langchain/retriever.py +371 -0
  41. headroom/integrations/langchain/streaming.py +341 -0
  42. headroom/integrations/mcp/__init__.py +37 -0
  43. headroom/integrations/mcp/server.py +533 -0
  44. headroom/memory/__init__.py +37 -0
  45. headroom/memory/extractor.py +390 -0
  46. headroom/memory/fast_store.py +621 -0
  47. headroom/memory/fast_wrapper.py +311 -0
  48. headroom/memory/inline_extractor.py +229 -0
  49. headroom/memory/store.py +434 -0
  50. headroom/memory/worker.py +260 -0
  51. headroom/memory/wrapper.py +321 -0
  52. headroom/models/__init__.py +39 -0
  53. headroom/models/registry.py +687 -0
  54. headroom/parser.py +293 -0
  55. headroom/pricing/__init__.py +51 -0
  56. headroom/pricing/anthropic_prices.py +81 -0
  57. headroom/pricing/litellm_pricing.py +113 -0
  58. headroom/pricing/openai_prices.py +91 -0
  59. headroom/pricing/registry.py +188 -0
  60. headroom/providers/__init__.py +61 -0
  61. headroom/providers/anthropic.py +621 -0
  62. headroom/providers/base.py +131 -0
  63. headroom/providers/cohere.py +362 -0
  64. headroom/providers/google.py +427 -0
  65. headroom/providers/litellm.py +297 -0
  66. headroom/providers/openai.py +566 -0
  67. headroom/providers/openai_compatible.py +521 -0
  68. headroom/proxy/__init__.py +19 -0
  69. headroom/proxy/server.py +2683 -0
  70. headroom/py.typed +0 -0
  71. headroom/relevance/__init__.py +124 -0
  72. headroom/relevance/base.py +106 -0
  73. headroom/relevance/bm25.py +255 -0
  74. headroom/relevance/embedding.py +255 -0
  75. headroom/relevance/hybrid.py +259 -0
  76. headroom/reporting/__init__.py +5 -0
  77. headroom/reporting/generator.py +549 -0
  78. headroom/storage/__init__.py +41 -0
  79. headroom/storage/base.py +125 -0
  80. headroom/storage/jsonl.py +220 -0
  81. headroom/storage/sqlite.py +289 -0
  82. headroom/telemetry/__init__.py +91 -0
  83. headroom/telemetry/collector.py +764 -0
  84. headroom/telemetry/models.py +880 -0
  85. headroom/telemetry/toin.py +1579 -0
  86. headroom/tokenizer.py +80 -0
  87. headroom/tokenizers/__init__.py +75 -0
  88. headroom/tokenizers/base.py +210 -0
  89. headroom/tokenizers/estimator.py +198 -0
  90. headroom/tokenizers/huggingface.py +317 -0
  91. headroom/tokenizers/mistral.py +245 -0
  92. headroom/tokenizers/registry.py +398 -0
  93. headroom/tokenizers/tiktoken_counter.py +248 -0
  94. headroom/transforms/__init__.py +106 -0
  95. headroom/transforms/base.py +57 -0
  96. headroom/transforms/cache_aligner.py +357 -0
  97. headroom/transforms/code_compressor.py +1313 -0
  98. headroom/transforms/content_detector.py +335 -0
  99. headroom/transforms/content_router.py +1158 -0
  100. headroom/transforms/llmlingua_compressor.py +638 -0
  101. headroom/transforms/log_compressor.py +529 -0
  102. headroom/transforms/pipeline.py +297 -0
  103. headroom/transforms/rolling_window.py +350 -0
  104. headroom/transforms/search_compressor.py +365 -0
  105. headroom/transforms/smart_crusher.py +2682 -0
  106. headroom/transforms/text_compressor.py +259 -0
  107. headroom/transforms/tool_crusher.py +338 -0
  108. headroom/utils.py +215 -0
  109. headroom_ai-0.2.13.dist-info/METADATA +315 -0
  110. headroom_ai-0.2.13.dist-info/RECORD +114 -0
  111. headroom_ai-0.2.13.dist-info/WHEEL +4 -0
  112. headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
  113. headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
  114. headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
@@ -0,0 +1,390 @@
1
+ """Memory extraction using LLMs.
2
+
3
+ Supports multiple providers by reusing the wrapped client with a cheap model.
4
+ Auto-detects provider from client class and selects appropriate cheap model.
5
+ Uses structured JSON output where available for reliable parsing.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import logging
12
+ import re
13
+ from typing import Any, Protocol
14
+
15
+ from headroom.memory.store import Memory
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ # Provider → Cheap Model mapping (verified January 2026)
21
+ # These are the most cost-effective models for simple extraction tasks
22
+ CHEAP_MODELS: dict[str, str] = {
23
+ "openai": "gpt-4o-mini", # $0.15/1M input, $0.60/1M output
24
+ "anthropic": "claude-3-5-haiku-latest", # $0.80/1M input, $4/1M output
25
+ "mistralai": "mistral-small-latest", # $0.10/1M input, $0.30/1M output
26
+ "groq": "llama-3.3-70b-versatile", # Free tier available
27
+ "together": "meta-llama/Llama-3.3-70B-Instruct-Turbo", # $0.88/1M
28
+ "fireworks": "accounts/fireworks/models/llama-v3p1-8b-instruct", # $0.20/1M
29
+ "google": "gemini-2.0-flash-lite", # $0.075/1M input, $0.30/1M output
30
+ "cohere": "command-r7b-12-2024", # $0.0375/1M input, $0.15/1M output
31
+ }
32
+
33
+ # Providers that support structured JSON output via response_format
34
+ SUPPORTS_JSON_MODE: set[str] = {"openai", "mistralai", "groq", "together", "fireworks"}
35
+
36
+
37
+ # Entity-agnostic prompt - works for users, agents, or any conversational entity
38
+ EXTRACTION_PROMPT = """Analyze this conversation and extract any facts worth remembering.
39
+
40
+ Focus on:
41
+ - Preferences (language, tools, frameworks, style, configuration)
42
+ - Facts (identity, role, capabilities, constraints, environment)
43
+ - Context (goals, ongoing tasks, relationships, history)
44
+
45
+ Conversation:
46
+ Speaker A: {query}
47
+ Speaker B: {response}
48
+
49
+ Return a JSON object with this structure:
50
+ {{
51
+ "memories": [
52
+ {{"content": "Prefers Python for backend development", "category": "preference", "importance": 0.8}},
53
+ {{"content": "Works on distributed systems", "category": "fact", "importance": 0.7}}
54
+ ],
55
+ "should_remember": true
56
+ }}
57
+
58
+ Categories: "preference", "fact", "context"
59
+ Importance: 0.0-1.0 (higher = more important to remember long-term)
60
+
61
+ If there's nothing worth remembering (greetings, generic questions, transient info), return:
62
+ {{"memories": [], "should_remember": false}}
63
+
64
+ Return ONLY valid JSON."""
65
+
66
+
67
+ class ChatClient(Protocol):
68
+ """Protocol for chat clients (OpenAI, Anthropic, etc.)."""
69
+
70
+ class Chat:
71
+ class Completions:
72
+ def create(self, **kwargs: Any) -> Any: ...
73
+
74
+ completions: Completions
75
+
76
+ chat: Chat
77
+
78
+
79
+ def detect_provider(client: Any) -> str | None:
80
+ """Detect the provider from client class path.
81
+
82
+ Args:
83
+ client: The LLM client instance
84
+
85
+ Returns:
86
+ Provider name or None if unknown
87
+ """
88
+ module = type(client).__module__.lower()
89
+
90
+ # Check for known providers
91
+ providers = [
92
+ "openai",
93
+ "anthropic",
94
+ "mistralai",
95
+ "groq",
96
+ "together",
97
+ "fireworks",
98
+ "google",
99
+ "cohere",
100
+ ]
101
+
102
+ for provider in providers:
103
+ if provider in module:
104
+ return provider
105
+
106
+ return None
107
+
108
+
109
+ def get_cheap_model(provider: str) -> str | None:
110
+ """Get the cheap model for a provider.
111
+
112
+ Args:
113
+ provider: Provider name
114
+
115
+ Returns:
116
+ Cheap model ID or None if unknown
117
+ """
118
+ return CHEAP_MODELS.get(provider)
119
+
120
+
121
+ class MemoryExtractor:
122
+ """Extracts memories from conversations using LLMs.
123
+
124
+ Supports multiple providers by reusing the wrapped client.
125
+ Auto-detects provider and selects appropriate cheap model.
126
+
127
+ Usage:
128
+ extractor = MemoryExtractor(openai_client)
129
+ memories = extractor.extract("I prefer Python", "Great choice!")
130
+ """
131
+
132
+ def __init__(
133
+ self,
134
+ client: Any,
135
+ model: str | None = None,
136
+ ):
137
+ """Initialize the extractor.
138
+
139
+ Args:
140
+ client: LLM client (OpenAI, Anthropic, etc.)
141
+ model: Override the extraction model (auto-detects if None)
142
+ """
143
+ self.client = client
144
+ self._provider = detect_provider(client)
145
+ self._model: str | None = None
146
+
147
+ if model:
148
+ self._model = model
149
+ elif self._provider:
150
+ self._model = get_cheap_model(self._provider)
151
+
152
+ if not self._model:
153
+ logger.warning(
154
+ f"Could not detect cheap model for provider. "
155
+ f"Client type: {type(client).__module__}.{type(client).__name__}. "
156
+ f"Memory extraction may fail."
157
+ )
158
+
159
+ @property
160
+ def provider(self) -> str | None:
161
+ """Get the detected provider."""
162
+ return self._provider
163
+
164
+ @property
165
+ def model(self) -> str | None:
166
+ """Get the extraction model."""
167
+ return self._model
168
+
169
+ def extract(self, query: str, response: str) -> list[Memory]:
170
+ """Extract memories from a conversation turn.
171
+
172
+ Args:
173
+ query: User's message
174
+ response: Assistant's response
175
+
176
+ Returns:
177
+ List of extracted memories (may be empty)
178
+ """
179
+ if not self._model:
180
+ logger.warning("No extraction model configured, skipping extraction")
181
+ return []
182
+
183
+ prompt = EXTRACTION_PROMPT.format(query=query, response=response)
184
+
185
+ try:
186
+ result = self._call_llm(prompt)
187
+ return self._parse_response(result)
188
+ except Exception as e:
189
+ logger.error(f"Extraction failed: {e}")
190
+ return []
191
+
192
+ def extract_batch(self, conversations: list[tuple[str, str, str]]) -> dict[str, list[Memory]]:
193
+ """Extract memories from multiple conversations.
194
+
195
+ Args:
196
+ conversations: List of (user_id, query, response) tuples
197
+
198
+ Returns:
199
+ Dict mapping user_id to list of memories
200
+ """
201
+ if not conversations:
202
+ return {}
203
+
204
+ # Build batch prompt
205
+ batch_prompt = self._build_batch_prompt(conversations)
206
+
207
+ try:
208
+ result = self._call_llm(batch_prompt)
209
+ return self._parse_batch_response(result, conversations)
210
+ except Exception as e:
211
+ logger.error(f"Batch extraction failed: {e}")
212
+ return {}
213
+
214
+ def _call_llm(self, prompt: str) -> str:
215
+ """Call the LLM with the given prompt.
216
+
217
+ Uses structured JSON output (response_format) where available
218
+ to ensure reliable JSON parsing.
219
+
220
+ Args:
221
+ prompt: The prompt to send
222
+
223
+ Returns:
224
+ The LLM's response text
225
+ """
226
+ if self._provider == "anthropic":
227
+ # Anthropic uses different API - no native JSON mode yet
228
+ response = self.client.messages.create(
229
+ model=self._model,
230
+ max_tokens=1024,
231
+ messages=[{"role": "user", "content": prompt}],
232
+ )
233
+ return str(response.content[0].text)
234
+ elif self._provider == "cohere":
235
+ # Cohere uses different API
236
+ response = self.client.chat(
237
+ model=self._model,
238
+ message=prompt,
239
+ )
240
+ return str(response.text)
241
+ elif self._provider == "google":
242
+ # Google Gemini - use JSON response mime type
243
+ model = self.client.GenerativeModel(
244
+ self._model,
245
+ generation_config={"response_mime_type": "application/json"},
246
+ )
247
+ response = model.generate_content(prompt)
248
+ return str(response.text)
249
+ else:
250
+ # OpenAI-compatible API (OpenAI, Groq, Together, Fireworks, Mistral)
251
+ # Use JSON mode for structured output
252
+ kwargs: dict[str, Any] = {
253
+ "model": self._model,
254
+ "messages": [{"role": "user", "content": prompt}],
255
+ "temperature": 0.0, # Deterministic for extraction
256
+ }
257
+
258
+ # Add response_format for providers that support it
259
+ if self._provider in SUPPORTS_JSON_MODE:
260
+ kwargs["response_format"] = {"type": "json_object"}
261
+
262
+ response = self.client.chat.completions.create(**kwargs)
263
+ return str(response.choices[0].message.content)
264
+
265
+ def _parse_response(self, text: str) -> list[Memory]:
266
+ """Parse LLM response into memories.
267
+
268
+ Args:
269
+ text: Raw LLM response
270
+
271
+ Returns:
272
+ List of Memory objects
273
+ """
274
+ try:
275
+ # Extract JSON from response (handle markdown code blocks)
276
+ json_match = re.search(r"```(?:json)?\s*(.*?)\s*```", text, re.DOTALL)
277
+ if json_match:
278
+ text = json_match.group(1)
279
+
280
+ data = json.loads(text.strip())
281
+
282
+ if not data.get("should_remember", False):
283
+ return []
284
+
285
+ memories = []
286
+ for item in data.get("memories", []):
287
+ memories.append(
288
+ Memory(
289
+ content=item["content"],
290
+ category=item.get("category", "fact"),
291
+ importance=item.get("importance", 0.5),
292
+ )
293
+ )
294
+
295
+ return memories
296
+
297
+ except (json.JSONDecodeError, KeyError) as e:
298
+ logger.warning(f"Failed to parse extraction response: {e}")
299
+ return []
300
+
301
+ def _build_batch_prompt(self, conversations: list[tuple[str, str, str]]) -> str:
302
+ """Build a batch extraction prompt.
303
+
304
+ Args:
305
+ conversations: List of (entity_id, query, response) tuples
306
+
307
+ Returns:
308
+ Batch prompt string
309
+ """
310
+ lines = [
311
+ "Analyze these conversations and extract facts worth remembering about each entity.",
312
+ "",
313
+ "Focus on: preferences, facts, context that helps future interactions.",
314
+ "",
315
+ ]
316
+
317
+ for i, (entity_id, query, response) in enumerate(conversations):
318
+ lines.extend(
319
+ [
320
+ f"--- Conversation {i + 1} (Entity: {entity_id}) ---",
321
+ f"Speaker A: {query}",
322
+ f"Speaker B: {response}",
323
+ "",
324
+ ]
325
+ )
326
+
327
+ lines.extend(
328
+ [
329
+ "Return a JSON object mapping entity_id to their memories:",
330
+ "{",
331
+ ' "entity_123": {',
332
+ ' "memories": [{"content": "...", "category": "preference", "importance": 0.8}],',
333
+ ' "should_remember": true',
334
+ " }",
335
+ "}",
336
+ "",
337
+ "Categories: preference, fact, context",
338
+ "Importance: 0.0-1.0",
339
+ "",
340
+ "Return ONLY valid JSON.",
341
+ ]
342
+ )
343
+
344
+ return "\n".join(lines)
345
+
346
+ def _parse_batch_response(
347
+ self,
348
+ text: str,
349
+ conversations: list[tuple[str, str, str]],
350
+ ) -> dict[str, list[Memory]]:
351
+ """Parse batch extraction response.
352
+
353
+ Args:
354
+ text: Raw LLM response
355
+ conversations: Original conversations for fallback
356
+
357
+ Returns:
358
+ Dict mapping user_id to list of memories
359
+ """
360
+ try:
361
+ # Extract JSON from response
362
+ json_match = re.search(r"```(?:json)?\s*(.*?)\s*```", text, re.DOTALL)
363
+ if json_match:
364
+ text = json_match.group(1)
365
+
366
+ data = json.loads(text.strip())
367
+ result: dict[str, list[Memory]] = {}
368
+
369
+ for user_id, user_data in data.items():
370
+ if not user_data.get("should_remember", False):
371
+ continue
372
+
373
+ memories = []
374
+ for item in user_data.get("memories", []):
375
+ memories.append(
376
+ Memory(
377
+ content=item["content"],
378
+ category=item.get("category", "fact"),
379
+ importance=item.get("importance", 0.5),
380
+ )
381
+ )
382
+
383
+ if memories:
384
+ result[user_id] = memories
385
+
386
+ return result
387
+
388
+ except (json.JSONDecodeError, KeyError, AttributeError) as e:
389
+ logger.warning(f"Failed to parse batch response: {e}")
390
+ return {}