headroom-ai 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- headroom/__init__.py +212 -0
- headroom/cache/__init__.py +76 -0
- headroom/cache/anthropic.py +517 -0
- headroom/cache/base.py +342 -0
- headroom/cache/compression_feedback.py +613 -0
- headroom/cache/compression_store.py +814 -0
- headroom/cache/dynamic_detector.py +1026 -0
- headroom/cache/google.py +884 -0
- headroom/cache/openai.py +584 -0
- headroom/cache/registry.py +175 -0
- headroom/cache/semantic.py +451 -0
- headroom/ccr/__init__.py +77 -0
- headroom/ccr/context_tracker.py +582 -0
- headroom/ccr/mcp_server.py +319 -0
- headroom/ccr/response_handler.py +772 -0
- headroom/ccr/tool_injection.py +415 -0
- headroom/cli.py +219 -0
- headroom/client.py +977 -0
- headroom/compression/__init__.py +42 -0
- headroom/compression/detector.py +424 -0
- headroom/compression/handlers/__init__.py +22 -0
- headroom/compression/handlers/base.py +219 -0
- headroom/compression/handlers/code_handler.py +506 -0
- headroom/compression/handlers/json_handler.py +418 -0
- headroom/compression/masks.py +345 -0
- headroom/compression/universal.py +465 -0
- headroom/config.py +474 -0
- headroom/exceptions.py +192 -0
- headroom/integrations/__init__.py +159 -0
- headroom/integrations/agno/__init__.py +53 -0
- headroom/integrations/agno/hooks.py +345 -0
- headroom/integrations/agno/model.py +625 -0
- headroom/integrations/agno/providers.py +154 -0
- headroom/integrations/langchain/__init__.py +106 -0
- headroom/integrations/langchain/agents.py +326 -0
- headroom/integrations/langchain/chat_model.py +1002 -0
- headroom/integrations/langchain/langsmith.py +324 -0
- headroom/integrations/langchain/memory.py +319 -0
- headroom/integrations/langchain/providers.py +200 -0
- headroom/integrations/langchain/retriever.py +371 -0
- headroom/integrations/langchain/streaming.py +341 -0
- headroom/integrations/mcp/__init__.py +37 -0
- headroom/integrations/mcp/server.py +533 -0
- headroom/memory/__init__.py +37 -0
- headroom/memory/extractor.py +390 -0
- headroom/memory/fast_store.py +621 -0
- headroom/memory/fast_wrapper.py +311 -0
- headroom/memory/inline_extractor.py +229 -0
- headroom/memory/store.py +434 -0
- headroom/memory/worker.py +260 -0
- headroom/memory/wrapper.py +321 -0
- headroom/models/__init__.py +39 -0
- headroom/models/registry.py +687 -0
- headroom/parser.py +293 -0
- headroom/pricing/__init__.py +51 -0
- headroom/pricing/anthropic_prices.py +81 -0
- headroom/pricing/litellm_pricing.py +113 -0
- headroom/pricing/openai_prices.py +91 -0
- headroom/pricing/registry.py +188 -0
- headroom/providers/__init__.py +61 -0
- headroom/providers/anthropic.py +621 -0
- headroom/providers/base.py +131 -0
- headroom/providers/cohere.py +362 -0
- headroom/providers/google.py +427 -0
- headroom/providers/litellm.py +297 -0
- headroom/providers/openai.py +566 -0
- headroom/providers/openai_compatible.py +521 -0
- headroom/proxy/__init__.py +19 -0
- headroom/proxy/server.py +2683 -0
- headroom/py.typed +0 -0
- headroom/relevance/__init__.py +124 -0
- headroom/relevance/base.py +106 -0
- headroom/relevance/bm25.py +255 -0
- headroom/relevance/embedding.py +255 -0
- headroom/relevance/hybrid.py +259 -0
- headroom/reporting/__init__.py +5 -0
- headroom/reporting/generator.py +549 -0
- headroom/storage/__init__.py +41 -0
- headroom/storage/base.py +125 -0
- headroom/storage/jsonl.py +220 -0
- headroom/storage/sqlite.py +289 -0
- headroom/telemetry/__init__.py +91 -0
- headroom/telemetry/collector.py +764 -0
- headroom/telemetry/models.py +880 -0
- headroom/telemetry/toin.py +1579 -0
- headroom/tokenizer.py +80 -0
- headroom/tokenizers/__init__.py +75 -0
- headroom/tokenizers/base.py +210 -0
- headroom/tokenizers/estimator.py +198 -0
- headroom/tokenizers/huggingface.py +317 -0
- headroom/tokenizers/mistral.py +245 -0
- headroom/tokenizers/registry.py +398 -0
- headroom/tokenizers/tiktoken_counter.py +248 -0
- headroom/transforms/__init__.py +106 -0
- headroom/transforms/base.py +57 -0
- headroom/transforms/cache_aligner.py +357 -0
- headroom/transforms/code_compressor.py +1313 -0
- headroom/transforms/content_detector.py +335 -0
- headroom/transforms/content_router.py +1158 -0
- headroom/transforms/llmlingua_compressor.py +638 -0
- headroom/transforms/log_compressor.py +529 -0
- headroom/transforms/pipeline.py +297 -0
- headroom/transforms/rolling_window.py +350 -0
- headroom/transforms/search_compressor.py +365 -0
- headroom/transforms/smart_crusher.py +2682 -0
- headroom/transforms/text_compressor.py +259 -0
- headroom/transforms/tool_crusher.py +338 -0
- headroom/utils.py +215 -0
- headroom_ai-0.2.13.dist-info/METADATA +315 -0
- headroom_ai-0.2.13.dist-info/RECORD +114 -0
- headroom_ai-0.2.13.dist-info/WHEEL +4 -0
- headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
- headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
- headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
|
@@ -0,0 +1,533 @@
|
|
|
1
|
+
"""Headroom MCP Integration: Compress tool outputs from MCP servers.
|
|
2
|
+
|
|
3
|
+
This module provides multiple ways to integrate Headroom with MCP:
|
|
4
|
+
|
|
5
|
+
1. HeadroomMCPProxy - A proxy server that wraps upstream MCP servers
|
|
6
|
+
2. compress_tool_result() - Standalone function for host applications
|
|
7
|
+
3. HeadroomMCPMiddleware - Transport-level middleware
|
|
8
|
+
|
|
9
|
+
The key insight: MCP tool outputs are the PERFECT use case for Headroom.
|
|
10
|
+
They're often large (100s-1000s of items), structured (JSON), and contain
|
|
11
|
+
mostly low-relevance data with a few critical items (errors, matches).
|
|
12
|
+
|
|
13
|
+
Example - Proxy Server:
|
|
14
|
+
```python
|
|
15
|
+
# Configure proxy to wrap your MCP servers
|
|
16
|
+
proxy = HeadroomMCPProxy(
|
|
17
|
+
upstream_servers=["slack", "database", "github"],
|
|
18
|
+
config=HeadroomConfig(),
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# Run as MCP server - clients connect to this instead
|
|
22
|
+
proxy.run()
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Example - Standalone Function:
|
|
26
|
+
```python
|
|
27
|
+
# In your MCP host application
|
|
28
|
+
result = await mcp_client.call_tool("search_logs", {"service": "api"})
|
|
29
|
+
|
|
30
|
+
# Compress before adding to context
|
|
31
|
+
compressed = compress_tool_result(
|
|
32
|
+
content=result,
|
|
33
|
+
tool_name="search_logs",
|
|
34
|
+
tool_args={"service": "api"},
|
|
35
|
+
user_query="find errors in api service",
|
|
36
|
+
)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Example - Middleware (for MCP client libraries):
|
|
40
|
+
```python
|
|
41
|
+
# Wrap your MCP client's transport
|
|
42
|
+
middleware = HeadroomMCPMiddleware(config)
|
|
43
|
+
client = MCPClient(transport=middleware.wrap(base_transport))
|
|
44
|
+
```
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
from __future__ import annotations
|
|
48
|
+
|
|
49
|
+
import json
|
|
50
|
+
import re
|
|
51
|
+
from collections.abc import Callable
|
|
52
|
+
from dataclasses import dataclass, field
|
|
53
|
+
from typing import Any
|
|
54
|
+
|
|
55
|
+
from headroom.config import HeadroomConfig, SmartCrusherConfig
|
|
56
|
+
from headroom.providers import OpenAIProvider
|
|
57
|
+
from headroom.transforms import SmartCrusher
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass
|
|
61
|
+
class MCPCompressionResult:
|
|
62
|
+
"""Result of compressing an MCP tool output."""
|
|
63
|
+
|
|
64
|
+
original_content: str
|
|
65
|
+
compressed_content: str
|
|
66
|
+
original_tokens: int
|
|
67
|
+
compressed_tokens: int
|
|
68
|
+
tokens_saved: int
|
|
69
|
+
compression_ratio: float
|
|
70
|
+
items_before: int | None
|
|
71
|
+
items_after: int | None
|
|
72
|
+
errors_preserved: int
|
|
73
|
+
was_compressed: bool
|
|
74
|
+
tool_name: str
|
|
75
|
+
context_used: str
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass
|
|
79
|
+
class MCPToolProfile:
|
|
80
|
+
"""Configuration profile for a specific MCP tool.
|
|
81
|
+
|
|
82
|
+
Different tools may need different compression strategies:
|
|
83
|
+
- Slack search: High error preservation, relevance to query
|
|
84
|
+
- Database query: Schema detection, anomaly preservation
|
|
85
|
+
- File listing: Minimal compression (paths are important)
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
tool_name_pattern: str # Regex pattern to match tool names
|
|
89
|
+
enabled: bool = True
|
|
90
|
+
max_items: int = 20
|
|
91
|
+
min_tokens_to_compress: int = 500
|
|
92
|
+
preserve_error_keywords: set[str] = field(
|
|
93
|
+
default_factory=lambda: {"error", "failed", "exception", "critical", "fatal"}
|
|
94
|
+
)
|
|
95
|
+
always_keep_fields: set[str] = field(default_factory=set) # Fields to never drop
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# Default profiles for common MCP servers
|
|
99
|
+
DEFAULT_MCP_PROFILES: list[MCPToolProfile] = [
|
|
100
|
+
# Slack - preserve errors and messages matching query
|
|
101
|
+
MCPToolProfile(
|
|
102
|
+
tool_name_pattern=r".*slack.*",
|
|
103
|
+
max_items=25,
|
|
104
|
+
preserve_error_keywords={"error", "failed", "exception", "bug", "issue", "broken"},
|
|
105
|
+
),
|
|
106
|
+
# Database - preserve errors and anomalies
|
|
107
|
+
MCPToolProfile(
|
|
108
|
+
tool_name_pattern=r".*database.*|.*sql.*|.*query.*",
|
|
109
|
+
max_items=30,
|
|
110
|
+
preserve_error_keywords={"error", "null", "failed", "exception", "violation"},
|
|
111
|
+
),
|
|
112
|
+
# GitHub - preserve errors and high-priority issues
|
|
113
|
+
MCPToolProfile(
|
|
114
|
+
tool_name_pattern=r".*github.*|.*git.*",
|
|
115
|
+
max_items=20,
|
|
116
|
+
preserve_error_keywords={"error", "bug", "critical", "urgent", "blocker"},
|
|
117
|
+
),
|
|
118
|
+
# Logs - preserve ALL errors
|
|
119
|
+
MCPToolProfile(
|
|
120
|
+
tool_name_pattern=r".*log.*|.*trace.*",
|
|
121
|
+
max_items=40, # Keep more for logs
|
|
122
|
+
preserve_error_keywords={"error", "fatal", "critical", "exception", "failed", "panic"},
|
|
123
|
+
),
|
|
124
|
+
# File system - minimal compression (paths matter)
|
|
125
|
+
MCPToolProfile(
|
|
126
|
+
tool_name_pattern=r".*file.*|.*fs.*|.*directory.*",
|
|
127
|
+
max_items=50,
|
|
128
|
+
min_tokens_to_compress=1000, # Higher threshold
|
|
129
|
+
),
|
|
130
|
+
# Generic fallback
|
|
131
|
+
MCPToolProfile(
|
|
132
|
+
tool_name_pattern=r".*",
|
|
133
|
+
max_items=20,
|
|
134
|
+
),
|
|
135
|
+
]
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class HeadroomMCPCompressor:
|
|
139
|
+
"""Core compression logic for MCP tool outputs.
|
|
140
|
+
|
|
141
|
+
This class handles the actual compression of MCP tool results.
|
|
142
|
+
It's used by both the proxy server and standalone functions.
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
def __init__(
|
|
146
|
+
self,
|
|
147
|
+
config: HeadroomConfig | None = None,
|
|
148
|
+
profiles: list[MCPToolProfile] | None = None,
|
|
149
|
+
token_counter: Callable[[str], int] | None = None,
|
|
150
|
+
):
|
|
151
|
+
"""Initialize MCP compressor.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
config: Headroom configuration.
|
|
155
|
+
profiles: Tool-specific compression profiles.
|
|
156
|
+
token_counter: Function to count tokens. Uses tiktoken if None.
|
|
157
|
+
"""
|
|
158
|
+
self.config = config or HeadroomConfig()
|
|
159
|
+
self.profiles = profiles or DEFAULT_MCP_PROFILES
|
|
160
|
+
|
|
161
|
+
# Initialize token counter
|
|
162
|
+
if token_counter:
|
|
163
|
+
self._count_tokens = token_counter
|
|
164
|
+
else:
|
|
165
|
+
provider = OpenAIProvider()
|
|
166
|
+
counter = provider.get_token_counter("gpt-4o")
|
|
167
|
+
self._count_tokens = counter.count_text
|
|
168
|
+
|
|
169
|
+
def get_profile(self, tool_name: str) -> MCPToolProfile:
|
|
170
|
+
"""Get the compression profile for a tool."""
|
|
171
|
+
for profile in self.profiles:
|
|
172
|
+
if re.match(profile.tool_name_pattern, tool_name, re.IGNORECASE):
|
|
173
|
+
return profile
|
|
174
|
+
# Return last profile (generic fallback)
|
|
175
|
+
return self.profiles[-1]
|
|
176
|
+
|
|
177
|
+
def compress(
|
|
178
|
+
self,
|
|
179
|
+
content: str,
|
|
180
|
+
tool_name: str,
|
|
181
|
+
tool_args: dict[str, Any] | None = None,
|
|
182
|
+
user_query: str = "",
|
|
183
|
+
) -> MCPCompressionResult:
|
|
184
|
+
"""Compress MCP tool output.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
content: Raw tool output (usually JSON string).
|
|
188
|
+
tool_name: Name of the MCP tool (e.g., "mcp__slack__search").
|
|
189
|
+
tool_args: Arguments passed to the tool (used for context).
|
|
190
|
+
user_query: User's original query (for relevance scoring).
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
MCPCompressionResult with compressed content and metrics.
|
|
194
|
+
"""
|
|
195
|
+
profile = self.get_profile(tool_name)
|
|
196
|
+
original_tokens = self._count_tokens(content)
|
|
197
|
+
|
|
198
|
+
# Build context for relevance scoring
|
|
199
|
+
context_parts = []
|
|
200
|
+
if user_query:
|
|
201
|
+
context_parts.append(user_query)
|
|
202
|
+
if tool_args:
|
|
203
|
+
context_parts.append(json.dumps(tool_args))
|
|
204
|
+
context = " ".join(context_parts)
|
|
205
|
+
|
|
206
|
+
# Check if compression is needed
|
|
207
|
+
if not profile.enabled or original_tokens < profile.min_tokens_to_compress:
|
|
208
|
+
return MCPCompressionResult(
|
|
209
|
+
original_content=content,
|
|
210
|
+
compressed_content=content,
|
|
211
|
+
original_tokens=original_tokens,
|
|
212
|
+
compressed_tokens=original_tokens,
|
|
213
|
+
tokens_saved=0,
|
|
214
|
+
compression_ratio=0.0,
|
|
215
|
+
items_before=None,
|
|
216
|
+
items_after=None,
|
|
217
|
+
errors_preserved=0,
|
|
218
|
+
was_compressed=False,
|
|
219
|
+
tool_name=tool_name,
|
|
220
|
+
context_used=context,
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
# Try to parse as JSON
|
|
224
|
+
try:
|
|
225
|
+
json.loads(content)
|
|
226
|
+
except json.JSONDecodeError:
|
|
227
|
+
# Not JSON, return as-is
|
|
228
|
+
return MCPCompressionResult(
|
|
229
|
+
original_content=content,
|
|
230
|
+
compressed_content=content,
|
|
231
|
+
original_tokens=original_tokens,
|
|
232
|
+
compressed_tokens=original_tokens,
|
|
233
|
+
tokens_saved=0,
|
|
234
|
+
compression_ratio=0.0,
|
|
235
|
+
items_before=None,
|
|
236
|
+
items_after=None,
|
|
237
|
+
errors_preserved=0,
|
|
238
|
+
was_compressed=False,
|
|
239
|
+
tool_name=tool_name,
|
|
240
|
+
context_used=context,
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
# Find arrays to compress
|
|
244
|
+
items_before = 0
|
|
245
|
+
items_after = 0
|
|
246
|
+
errors_preserved = 0
|
|
247
|
+
|
|
248
|
+
# Create SmartCrusher with profile settings
|
|
249
|
+
smart_config = SmartCrusherConfig(
|
|
250
|
+
enabled=True,
|
|
251
|
+
min_tokens_to_crush=profile.min_tokens_to_compress,
|
|
252
|
+
max_items_after_crush=profile.max_items,
|
|
253
|
+
)
|
|
254
|
+
crusher = SmartCrusher(config=smart_config) # type: ignore[arg-type]
|
|
255
|
+
|
|
256
|
+
# Build messages for SmartCrusher (it expects conversation format)
|
|
257
|
+
messages = [
|
|
258
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
259
|
+
{"role": "user", "content": context or f"Process {tool_name} results"},
|
|
260
|
+
{
|
|
261
|
+
"role": "assistant",
|
|
262
|
+
"content": None,
|
|
263
|
+
"tool_calls": [
|
|
264
|
+
{
|
|
265
|
+
"id": "call_1",
|
|
266
|
+
"function": {"name": tool_name, "arguments": json.dumps(tool_args or {})},
|
|
267
|
+
}
|
|
268
|
+
],
|
|
269
|
+
},
|
|
270
|
+
{"role": "tool", "content": content, "tool_call_id": "call_1"},
|
|
271
|
+
]
|
|
272
|
+
|
|
273
|
+
# Create tokenizer wrapper
|
|
274
|
+
class TokenizerWrapper:
|
|
275
|
+
def __init__(self, count_fn: Any) -> None:
|
|
276
|
+
self._count = count_fn
|
|
277
|
+
|
|
278
|
+
def count_text(self, text: str) -> int:
|
|
279
|
+
result = self._count(text)
|
|
280
|
+
return int(result) if result is not None else 0
|
|
281
|
+
|
|
282
|
+
def count_messages(self, messages: list[dict[str, Any]]) -> int:
|
|
283
|
+
total = 0
|
|
284
|
+
for msg in messages:
|
|
285
|
+
if msg.get("content"):
|
|
286
|
+
total += self._count(str(msg["content"]))
|
|
287
|
+
return total
|
|
288
|
+
|
|
289
|
+
tokenizer = TokenizerWrapper(self._count_tokens)
|
|
290
|
+
|
|
291
|
+
# Apply SmartCrusher
|
|
292
|
+
result = crusher.apply(messages, tokenizer=tokenizer) # type: ignore[arg-type]
|
|
293
|
+
compressed_content = result.messages[-1]["content"]
|
|
294
|
+
|
|
295
|
+
# Remove any Headroom markers for clean output
|
|
296
|
+
compressed_content = re.sub(r"\n<headroom:[^>]+>", "", compressed_content)
|
|
297
|
+
|
|
298
|
+
# Count items and errors
|
|
299
|
+
try:
|
|
300
|
+
original_data = json.loads(content)
|
|
301
|
+
compressed_data = json.loads(compressed_content)
|
|
302
|
+
|
|
303
|
+
# Find the array in original
|
|
304
|
+
for _key, value in original_data.items():
|
|
305
|
+
if isinstance(value, list):
|
|
306
|
+
items_before = len(value)
|
|
307
|
+
break
|
|
308
|
+
|
|
309
|
+
# Find the array in compressed
|
|
310
|
+
for _key, value in compressed_data.items():
|
|
311
|
+
if isinstance(value, list):
|
|
312
|
+
items_after = len(value)
|
|
313
|
+
# Count errors preserved
|
|
314
|
+
for item in value:
|
|
315
|
+
item_str = str(item).lower()
|
|
316
|
+
if any(kw in item_str for kw in profile.preserve_error_keywords):
|
|
317
|
+
errors_preserved += 1
|
|
318
|
+
break
|
|
319
|
+
except (json.JSONDecodeError, AttributeError):
|
|
320
|
+
pass
|
|
321
|
+
|
|
322
|
+
compressed_tokens = self._count_tokens(compressed_content)
|
|
323
|
+
tokens_saved = original_tokens - compressed_tokens
|
|
324
|
+
compression_ratio = tokens_saved / original_tokens if original_tokens > 0 else 0.0
|
|
325
|
+
|
|
326
|
+
return MCPCompressionResult(
|
|
327
|
+
original_content=content,
|
|
328
|
+
compressed_content=compressed_content,
|
|
329
|
+
original_tokens=original_tokens,
|
|
330
|
+
compressed_tokens=compressed_tokens,
|
|
331
|
+
tokens_saved=tokens_saved,
|
|
332
|
+
compression_ratio=compression_ratio,
|
|
333
|
+
items_before=items_before,
|
|
334
|
+
items_after=items_after,
|
|
335
|
+
errors_preserved=errors_preserved,
|
|
336
|
+
was_compressed=True,
|
|
337
|
+
tool_name=tool_name,
|
|
338
|
+
context_used=context,
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def compress_tool_result(
|
|
343
|
+
content: str,
|
|
344
|
+
tool_name: str,
|
|
345
|
+
tool_args: dict[str, Any] | None = None,
|
|
346
|
+
user_query: str = "",
|
|
347
|
+
config: HeadroomConfig | None = None,
|
|
348
|
+
) -> str:
|
|
349
|
+
"""Compress an MCP tool result (standalone function).
|
|
350
|
+
|
|
351
|
+
This is the simplest way to use Headroom with MCP in your host application.
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
content: Raw tool output.
|
|
355
|
+
tool_name: Name of the MCP tool.
|
|
356
|
+
tool_args: Arguments passed to the tool.
|
|
357
|
+
user_query: User's query for relevance scoring.
|
|
358
|
+
config: Optional Headroom configuration.
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
Compressed content string.
|
|
362
|
+
|
|
363
|
+
Example:
|
|
364
|
+
```python
|
|
365
|
+
# In your MCP host application
|
|
366
|
+
result = await client.call_tool("search_logs", {"service": "api"})
|
|
367
|
+
|
|
368
|
+
compressed = compress_tool_result(
|
|
369
|
+
content=result,
|
|
370
|
+
tool_name="search_logs",
|
|
371
|
+
tool_args={"service": "api"},
|
|
372
|
+
user_query="find errors",
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
messages.append({"role": "tool", "content": compressed})
|
|
376
|
+
```
|
|
377
|
+
"""
|
|
378
|
+
compressor = HeadroomMCPCompressor(config=config)
|
|
379
|
+
result = compressor.compress(
|
|
380
|
+
content=content,
|
|
381
|
+
tool_name=tool_name,
|
|
382
|
+
tool_args=tool_args,
|
|
383
|
+
user_query=user_query,
|
|
384
|
+
)
|
|
385
|
+
return result.compressed_content
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
def compress_tool_result_with_metrics(
|
|
389
|
+
content: str,
|
|
390
|
+
tool_name: str,
|
|
391
|
+
tool_args: dict[str, Any] | None = None,
|
|
392
|
+
user_query: str = "",
|
|
393
|
+
config: HeadroomConfig | None = None,
|
|
394
|
+
) -> MCPCompressionResult:
|
|
395
|
+
"""Compress an MCP tool result and return full metrics.
|
|
396
|
+
|
|
397
|
+
Same as compress_tool_result but returns detailed metrics.
|
|
398
|
+
|
|
399
|
+
Returns:
|
|
400
|
+
MCPCompressionResult with all compression metrics.
|
|
401
|
+
"""
|
|
402
|
+
compressor = HeadroomMCPCompressor(config=config)
|
|
403
|
+
return compressor.compress(
|
|
404
|
+
content=content,
|
|
405
|
+
tool_name=tool_name,
|
|
406
|
+
tool_args=tool_args,
|
|
407
|
+
user_query=user_query,
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
class HeadroomMCPClientWrapper:
|
|
412
|
+
"""Wrapper for MCP clients that automatically compresses tool results.
|
|
413
|
+
|
|
414
|
+
This wraps an MCP client to transparently compress all tool outputs.
|
|
415
|
+
|
|
416
|
+
Example:
|
|
417
|
+
```python
|
|
418
|
+
from mcp import Client
|
|
419
|
+
from headroom.integrations.mcp import HeadroomMCPClientWrapper
|
|
420
|
+
|
|
421
|
+
# Wrap your MCP client
|
|
422
|
+
base_client = Client(transport)
|
|
423
|
+
client = HeadroomMCPClientWrapper(base_client)
|
|
424
|
+
|
|
425
|
+
# Use normally - compression is automatic
|
|
426
|
+
result = await client.call_tool("search", {"query": "errors"})
|
|
427
|
+
```
|
|
428
|
+
"""
|
|
429
|
+
|
|
430
|
+
def __init__(
|
|
431
|
+
self,
|
|
432
|
+
client: Any,
|
|
433
|
+
config: HeadroomConfig | None = None,
|
|
434
|
+
user_query_extractor: Callable[[dict], str] | None = None,
|
|
435
|
+
):
|
|
436
|
+
"""Initialize wrapper.
|
|
437
|
+
|
|
438
|
+
Args:
|
|
439
|
+
client: The MCP client to wrap.
|
|
440
|
+
config: Headroom configuration.
|
|
441
|
+
user_query_extractor: Function to extract user query from context.
|
|
442
|
+
"""
|
|
443
|
+
self._client = client
|
|
444
|
+
self._compressor = HeadroomMCPCompressor(config=config)
|
|
445
|
+
self._query_extractor = user_query_extractor or (lambda x: "")
|
|
446
|
+
self._metrics: list[MCPCompressionResult] = []
|
|
447
|
+
|
|
448
|
+
async def call_tool(
|
|
449
|
+
self,
|
|
450
|
+
name: str,
|
|
451
|
+
arguments: dict[str, Any] | None = None,
|
|
452
|
+
context: dict[str, Any] | None = None,
|
|
453
|
+
) -> str:
|
|
454
|
+
"""Call an MCP tool and compress the result.
|
|
455
|
+
|
|
456
|
+
Args:
|
|
457
|
+
name: Tool name.
|
|
458
|
+
arguments: Tool arguments.
|
|
459
|
+
context: Optional context (for query extraction).
|
|
460
|
+
|
|
461
|
+
Returns:
|
|
462
|
+
Compressed tool result.
|
|
463
|
+
"""
|
|
464
|
+
# Call the underlying client
|
|
465
|
+
result = await self._client.call_tool(name, arguments)
|
|
466
|
+
|
|
467
|
+
# Extract user query from context if available
|
|
468
|
+
user_query = ""
|
|
469
|
+
if context and self._query_extractor is not None:
|
|
470
|
+
user_query = self._query_extractor(context)
|
|
471
|
+
|
|
472
|
+
# Compress
|
|
473
|
+
compression_result = self._compressor.compress(
|
|
474
|
+
content=result,
|
|
475
|
+
tool_name=name,
|
|
476
|
+
tool_args=arguments,
|
|
477
|
+
user_query=user_query,
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
self._metrics.append(compression_result)
|
|
481
|
+
return compression_result.compressed_content
|
|
482
|
+
|
|
483
|
+
def get_metrics(self) -> list[MCPCompressionResult]:
|
|
484
|
+
"""Get compression metrics for all tool calls."""
|
|
485
|
+
return self._metrics.copy()
|
|
486
|
+
|
|
487
|
+
def get_total_tokens_saved(self) -> int:
|
|
488
|
+
"""Get total tokens saved across all tool calls."""
|
|
489
|
+
return sum(m.tokens_saved for m in self._metrics)
|
|
490
|
+
|
|
491
|
+
def __getattr__(self, name: str) -> Any:
|
|
492
|
+
"""Forward all other attributes to the wrapped client."""
|
|
493
|
+
return getattr(self._client, name)
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
# Type alias for MCP Server (will be properly typed when mcp package is used)
|
|
497
|
+
MCPServer = Any
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def create_headroom_mcp_proxy(
|
|
501
|
+
upstream_servers: list[tuple[str, MCPServer]],
|
|
502
|
+
config: HeadroomConfig | None = None,
|
|
503
|
+
) -> dict[str, Any]:
|
|
504
|
+
"""Create configuration for a Headroom MCP proxy server.
|
|
505
|
+
|
|
506
|
+
This returns a configuration dict that can be used to set up
|
|
507
|
+
a proxy server that wraps upstream MCP servers.
|
|
508
|
+
|
|
509
|
+
Args:
|
|
510
|
+
upstream_servers: List of (name, server) tuples.
|
|
511
|
+
config: Headroom configuration.
|
|
512
|
+
|
|
513
|
+
Returns:
|
|
514
|
+
Configuration dict for proxy server.
|
|
515
|
+
|
|
516
|
+
Example:
|
|
517
|
+
```python
|
|
518
|
+
# In your MCP server setup
|
|
519
|
+
proxy_config = create_headroom_mcp_proxy(
|
|
520
|
+
upstream_servers=[
|
|
521
|
+
("slack", slack_server),
|
|
522
|
+
("database", db_server),
|
|
523
|
+
]
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
# Use proxy_config to initialize your proxy server
|
|
527
|
+
```
|
|
528
|
+
"""
|
|
529
|
+
return {
|
|
530
|
+
"upstream_servers": dict(upstream_servers),
|
|
531
|
+
"compressor": HeadroomMCPCompressor(config=config),
|
|
532
|
+
"config": config or HeadroomConfig(),
|
|
533
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Headroom Memory - Simple, LLM-driven memory for AI applications.
|
|
2
|
+
|
|
3
|
+
Two approaches available:
|
|
4
|
+
|
|
5
|
+
1. Background extraction (original):
|
|
6
|
+
from headroom import with_memory
|
|
7
|
+
client = with_memory(OpenAI(), user_id="alice")
|
|
8
|
+
|
|
9
|
+
2. Zero-latency inline extraction (recommended):
|
|
10
|
+
from headroom.memory import with_fast_memory
|
|
11
|
+
client = with_fast_memory(OpenAI(), user_id="alice")
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from headroom.memory.fast_store import FastMemoryStore, MemoryChunk
|
|
15
|
+
from headroom.memory.fast_wrapper import with_fast_memory
|
|
16
|
+
from headroom.memory.inline_extractor import (
|
|
17
|
+
InlineMemoryWrapper,
|
|
18
|
+
inject_memory_instruction,
|
|
19
|
+
parse_response_with_memory,
|
|
20
|
+
)
|
|
21
|
+
from headroom.memory.store import Memory, SQLiteMemoryStore
|
|
22
|
+
from headroom.memory.wrapper import with_memory
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
# Original approach (background extraction)
|
|
26
|
+
"with_memory",
|
|
27
|
+
"Memory",
|
|
28
|
+
"SQLiteMemoryStore",
|
|
29
|
+
# Fast approach (inline extraction - recommended)
|
|
30
|
+
"with_fast_memory",
|
|
31
|
+
"FastMemoryStore",
|
|
32
|
+
"MemoryChunk",
|
|
33
|
+
# Low-level inline extraction
|
|
34
|
+
"InlineMemoryWrapper",
|
|
35
|
+
"inject_memory_instruction",
|
|
36
|
+
"parse_response_with_memory",
|
|
37
|
+
]
|