headroom-ai 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- headroom/__init__.py +212 -0
- headroom/cache/__init__.py +76 -0
- headroom/cache/anthropic.py +517 -0
- headroom/cache/base.py +342 -0
- headroom/cache/compression_feedback.py +613 -0
- headroom/cache/compression_store.py +814 -0
- headroom/cache/dynamic_detector.py +1026 -0
- headroom/cache/google.py +884 -0
- headroom/cache/openai.py +584 -0
- headroom/cache/registry.py +175 -0
- headroom/cache/semantic.py +451 -0
- headroom/ccr/__init__.py +77 -0
- headroom/ccr/context_tracker.py +582 -0
- headroom/ccr/mcp_server.py +319 -0
- headroom/ccr/response_handler.py +772 -0
- headroom/ccr/tool_injection.py +415 -0
- headroom/cli.py +219 -0
- headroom/client.py +977 -0
- headroom/compression/__init__.py +42 -0
- headroom/compression/detector.py +424 -0
- headroom/compression/handlers/__init__.py +22 -0
- headroom/compression/handlers/base.py +219 -0
- headroom/compression/handlers/code_handler.py +506 -0
- headroom/compression/handlers/json_handler.py +418 -0
- headroom/compression/masks.py +345 -0
- headroom/compression/universal.py +465 -0
- headroom/config.py +474 -0
- headroom/exceptions.py +192 -0
- headroom/integrations/__init__.py +159 -0
- headroom/integrations/agno/__init__.py +53 -0
- headroom/integrations/agno/hooks.py +345 -0
- headroom/integrations/agno/model.py +625 -0
- headroom/integrations/agno/providers.py +154 -0
- headroom/integrations/langchain/__init__.py +106 -0
- headroom/integrations/langchain/agents.py +326 -0
- headroom/integrations/langchain/chat_model.py +1002 -0
- headroom/integrations/langchain/langsmith.py +324 -0
- headroom/integrations/langchain/memory.py +319 -0
- headroom/integrations/langchain/providers.py +200 -0
- headroom/integrations/langchain/retriever.py +371 -0
- headroom/integrations/langchain/streaming.py +341 -0
- headroom/integrations/mcp/__init__.py +37 -0
- headroom/integrations/mcp/server.py +533 -0
- headroom/memory/__init__.py +37 -0
- headroom/memory/extractor.py +390 -0
- headroom/memory/fast_store.py +621 -0
- headroom/memory/fast_wrapper.py +311 -0
- headroom/memory/inline_extractor.py +229 -0
- headroom/memory/store.py +434 -0
- headroom/memory/worker.py +260 -0
- headroom/memory/wrapper.py +321 -0
- headroom/models/__init__.py +39 -0
- headroom/models/registry.py +687 -0
- headroom/parser.py +293 -0
- headroom/pricing/__init__.py +51 -0
- headroom/pricing/anthropic_prices.py +81 -0
- headroom/pricing/litellm_pricing.py +113 -0
- headroom/pricing/openai_prices.py +91 -0
- headroom/pricing/registry.py +188 -0
- headroom/providers/__init__.py +61 -0
- headroom/providers/anthropic.py +621 -0
- headroom/providers/base.py +131 -0
- headroom/providers/cohere.py +362 -0
- headroom/providers/google.py +427 -0
- headroom/providers/litellm.py +297 -0
- headroom/providers/openai.py +566 -0
- headroom/providers/openai_compatible.py +521 -0
- headroom/proxy/__init__.py +19 -0
- headroom/proxy/server.py +2683 -0
- headroom/py.typed +0 -0
- headroom/relevance/__init__.py +124 -0
- headroom/relevance/base.py +106 -0
- headroom/relevance/bm25.py +255 -0
- headroom/relevance/embedding.py +255 -0
- headroom/relevance/hybrid.py +259 -0
- headroom/reporting/__init__.py +5 -0
- headroom/reporting/generator.py +549 -0
- headroom/storage/__init__.py +41 -0
- headroom/storage/base.py +125 -0
- headroom/storage/jsonl.py +220 -0
- headroom/storage/sqlite.py +289 -0
- headroom/telemetry/__init__.py +91 -0
- headroom/telemetry/collector.py +764 -0
- headroom/telemetry/models.py +880 -0
- headroom/telemetry/toin.py +1579 -0
- headroom/tokenizer.py +80 -0
- headroom/tokenizers/__init__.py +75 -0
- headroom/tokenizers/base.py +210 -0
- headroom/tokenizers/estimator.py +198 -0
- headroom/tokenizers/huggingface.py +317 -0
- headroom/tokenizers/mistral.py +245 -0
- headroom/tokenizers/registry.py +398 -0
- headroom/tokenizers/tiktoken_counter.py +248 -0
- headroom/transforms/__init__.py +106 -0
- headroom/transforms/base.py +57 -0
- headroom/transforms/cache_aligner.py +357 -0
- headroom/transforms/code_compressor.py +1313 -0
- headroom/transforms/content_detector.py +335 -0
- headroom/transforms/content_router.py +1158 -0
- headroom/transforms/llmlingua_compressor.py +638 -0
- headroom/transforms/log_compressor.py +529 -0
- headroom/transforms/pipeline.py +297 -0
- headroom/transforms/rolling_window.py +350 -0
- headroom/transforms/search_compressor.py +365 -0
- headroom/transforms/smart_crusher.py +2682 -0
- headroom/transforms/text_compressor.py +259 -0
- headroom/transforms/tool_crusher.py +338 -0
- headroom/utils.py +215 -0
- headroom_ai-0.2.13.dist-info/METADATA +315 -0
- headroom_ai-0.2.13.dist-info/RECORD +114 -0
- headroom_ai-0.2.13.dist-info/WHEEL +4 -0
- headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
- headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
- headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
"""Streaming metrics tracking for LangChain.
|
|
2
|
+
|
|
3
|
+
This module provides StreamingMetricsTracker for tracking output tokens
|
|
4
|
+
during streaming responses from LangChain models.
|
|
5
|
+
|
|
6
|
+
Example:
|
|
7
|
+
from langchain_openai import ChatOpenAI
|
|
8
|
+
from headroom.integrations import HeadroomChatModel, StreamingMetricsTracker
|
|
9
|
+
|
|
10
|
+
llm = HeadroomChatModel(ChatOpenAI(model="gpt-4o"))
|
|
11
|
+
tracker = StreamingMetricsTracker(model="gpt-4o")
|
|
12
|
+
|
|
13
|
+
for chunk in llm.stream("Tell me a story"):
|
|
14
|
+
tracker.add_chunk(chunk)
|
|
15
|
+
print(chunk.content, end="", flush=True)
|
|
16
|
+
|
|
17
|
+
print(f"\\nOutput tokens: {tracker.output_tokens}")
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import logging
|
|
23
|
+
from dataclasses import dataclass
|
|
24
|
+
from datetime import datetime
|
|
25
|
+
from typing import Any
|
|
26
|
+
|
|
27
|
+
# LangChain imports - these are optional dependencies
|
|
28
|
+
try:
|
|
29
|
+
from langchain_core.messages import AIMessageChunk
|
|
30
|
+
from langchain_core.outputs import ChatGenerationChunk
|
|
31
|
+
|
|
32
|
+
LANGCHAIN_AVAILABLE = True
|
|
33
|
+
except ImportError:
|
|
34
|
+
LANGCHAIN_AVAILABLE = False
|
|
35
|
+
AIMessageChunk = object # type: ignore[misc,assignment]
|
|
36
|
+
ChatGenerationChunk = object # type: ignore[misc,assignment]
|
|
37
|
+
|
|
38
|
+
from headroom.providers import OpenAIProvider
|
|
39
|
+
|
|
40
|
+
logger = logging.getLogger(__name__)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _check_langchain_available() -> None:
|
|
44
|
+
"""Raise ImportError if LangChain is not installed."""
|
|
45
|
+
if not LANGCHAIN_AVAILABLE:
|
|
46
|
+
raise ImportError(
|
|
47
|
+
"LangChain is required for this integration. "
|
|
48
|
+
"Install with: pip install headroom[langchain] "
|
|
49
|
+
"or: pip install langchain-core"
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass
|
|
54
|
+
class StreamingMetrics:
|
|
55
|
+
"""Metrics from a streaming response."""
|
|
56
|
+
|
|
57
|
+
output_tokens: int
|
|
58
|
+
chunk_count: int
|
|
59
|
+
content_length: int
|
|
60
|
+
start_time: datetime
|
|
61
|
+
end_time: datetime | None
|
|
62
|
+
duration_ms: float | None
|
|
63
|
+
|
|
64
|
+
def to_dict(self) -> dict[str, Any]:
|
|
65
|
+
"""Convert to dictionary."""
|
|
66
|
+
return {
|
|
67
|
+
"output_tokens": self.output_tokens,
|
|
68
|
+
"chunk_count": self.chunk_count,
|
|
69
|
+
"content_length": self.content_length,
|
|
70
|
+
"start_time": self.start_time.isoformat(),
|
|
71
|
+
"end_time": self.end_time.isoformat() if self.end_time else None,
|
|
72
|
+
"duration_ms": self.duration_ms,
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class StreamingMetricsTracker:
|
|
77
|
+
"""Tracks output tokens and metrics during streaming.
|
|
78
|
+
|
|
79
|
+
Accumulates content from streaming chunks and provides accurate
|
|
80
|
+
token counting for the streamed output.
|
|
81
|
+
|
|
82
|
+
Example:
|
|
83
|
+
tracker = StreamingMetricsTracker(model="gpt-4o")
|
|
84
|
+
|
|
85
|
+
async for chunk in llm.astream(messages):
|
|
86
|
+
tracker.add_chunk(chunk)
|
|
87
|
+
print(chunk.content, end="")
|
|
88
|
+
|
|
89
|
+
print(f"\\nTokens: {tracker.output_tokens}")
|
|
90
|
+
print(f"Duration: {tracker.duration_ms}ms")
|
|
91
|
+
|
|
92
|
+
Attributes:
|
|
93
|
+
model: Model name for token counting
|
|
94
|
+
content: Accumulated content from all chunks
|
|
95
|
+
output_tokens: Estimated token count for output
|
|
96
|
+
chunk_count: Number of chunks received
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
def __init__(
|
|
100
|
+
self,
|
|
101
|
+
model: str = "gpt-4o",
|
|
102
|
+
provider: Any = None,
|
|
103
|
+
):
|
|
104
|
+
"""Initialize StreamingMetricsTracker.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
model: Model name for token counting. Default "gpt-4o".
|
|
108
|
+
provider: Headroom provider for token counting. Uses
|
|
109
|
+
OpenAIProvider if not specified.
|
|
110
|
+
"""
|
|
111
|
+
_check_langchain_available()
|
|
112
|
+
|
|
113
|
+
self._model = model
|
|
114
|
+
self._provider = provider or OpenAIProvider()
|
|
115
|
+
self._content = ""
|
|
116
|
+
self._chunk_count = 0
|
|
117
|
+
self._start_time: datetime | None = None
|
|
118
|
+
self._end_time: datetime | None = None
|
|
119
|
+
|
|
120
|
+
def add_chunk(self, chunk: Any) -> None:
|
|
121
|
+
"""Add a streaming chunk to the tracker.
|
|
122
|
+
|
|
123
|
+
Extracts content from various chunk types:
|
|
124
|
+
- AIMessageChunk
|
|
125
|
+
- ChatGenerationChunk
|
|
126
|
+
- dict with 'content' key
|
|
127
|
+
- string
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
chunk: Streaming chunk from LangChain.
|
|
131
|
+
"""
|
|
132
|
+
if self._start_time is None:
|
|
133
|
+
self._start_time = datetime.now()
|
|
134
|
+
|
|
135
|
+
self._chunk_count += 1
|
|
136
|
+
|
|
137
|
+
# Extract content from various chunk types
|
|
138
|
+
content = self._extract_content(chunk)
|
|
139
|
+
if content:
|
|
140
|
+
self._content += content
|
|
141
|
+
|
|
142
|
+
def _extract_content(self, chunk: Any) -> str:
|
|
143
|
+
"""Extract string content from a chunk.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
chunk: Streaming chunk of various types.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
Extracted content string.
|
|
150
|
+
"""
|
|
151
|
+
# AIMessageChunk
|
|
152
|
+
if hasattr(chunk, "content"):
|
|
153
|
+
content = chunk.content
|
|
154
|
+
if isinstance(content, str):
|
|
155
|
+
return content
|
|
156
|
+
return str(content) if content else ""
|
|
157
|
+
|
|
158
|
+
# ChatGenerationChunk
|
|
159
|
+
if hasattr(chunk, "message") and hasattr(chunk.message, "content"):
|
|
160
|
+
content = chunk.message.content
|
|
161
|
+
if isinstance(content, str):
|
|
162
|
+
return content
|
|
163
|
+
return str(content) if content else ""
|
|
164
|
+
|
|
165
|
+
# dict
|
|
166
|
+
if isinstance(chunk, dict):
|
|
167
|
+
return str(chunk.get("content", ""))
|
|
168
|
+
|
|
169
|
+
# string
|
|
170
|
+
if isinstance(chunk, str):
|
|
171
|
+
return chunk
|
|
172
|
+
|
|
173
|
+
return ""
|
|
174
|
+
|
|
175
|
+
def finish(self) -> StreamingMetrics:
|
|
176
|
+
"""Mark streaming as complete and return final metrics.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
StreamingMetrics with final values.
|
|
180
|
+
"""
|
|
181
|
+
self._end_time = datetime.now()
|
|
182
|
+
|
|
183
|
+
duration_ms = None
|
|
184
|
+
if self._start_time:
|
|
185
|
+
duration_ms = (self._end_time - self._start_time).total_seconds() * 1000
|
|
186
|
+
|
|
187
|
+
return StreamingMetrics(
|
|
188
|
+
output_tokens=self.output_tokens,
|
|
189
|
+
chunk_count=self._chunk_count,
|
|
190
|
+
content_length=len(self._content),
|
|
191
|
+
start_time=self._start_time or self._end_time,
|
|
192
|
+
end_time=self._end_time,
|
|
193
|
+
duration_ms=duration_ms,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
@property
|
|
197
|
+
def content(self) -> str:
|
|
198
|
+
"""Get accumulated content."""
|
|
199
|
+
return self._content
|
|
200
|
+
|
|
201
|
+
@property
|
|
202
|
+
def output_tokens(self) -> int:
|
|
203
|
+
"""Get estimated output token count."""
|
|
204
|
+
if not self._content:
|
|
205
|
+
return 0
|
|
206
|
+
token_counter = self._provider.get_token_counter(self._model)
|
|
207
|
+
return token_counter.count_text(self._content)
|
|
208
|
+
|
|
209
|
+
@property
|
|
210
|
+
def chunk_count(self) -> int:
|
|
211
|
+
"""Get number of chunks received."""
|
|
212
|
+
return self._chunk_count
|
|
213
|
+
|
|
214
|
+
@property
|
|
215
|
+
def duration_ms(self) -> float | None:
|
|
216
|
+
"""Get duration in milliseconds (after finish())."""
|
|
217
|
+
if self._start_time is None or self._end_time is None:
|
|
218
|
+
return None
|
|
219
|
+
return (self._end_time - self._start_time).total_seconds() * 1000
|
|
220
|
+
|
|
221
|
+
def reset(self) -> None:
|
|
222
|
+
"""Reset tracker for reuse."""
|
|
223
|
+
self._content = ""
|
|
224
|
+
self._chunk_count = 0
|
|
225
|
+
self._start_time = None
|
|
226
|
+
self._end_time = None
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class StreamingMetricsCallback:
|
|
230
|
+
"""Context manager for tracking streaming metrics.
|
|
231
|
+
|
|
232
|
+
Provides a clean interface for tracking a complete streaming
|
|
233
|
+
response with automatic timing.
|
|
234
|
+
|
|
235
|
+
Example:
|
|
236
|
+
with StreamingMetricsCallback(model="gpt-4o") as tracker:
|
|
237
|
+
for chunk in llm.stream(messages):
|
|
238
|
+
tracker.add_chunk(chunk)
|
|
239
|
+
print(chunk.content, end="")
|
|
240
|
+
|
|
241
|
+
print(f"\\nMetrics: {tracker.metrics}")
|
|
242
|
+
|
|
243
|
+
Attributes:
|
|
244
|
+
tracker: The underlying StreamingMetricsTracker
|
|
245
|
+
metrics: Final metrics after context exit
|
|
246
|
+
"""
|
|
247
|
+
|
|
248
|
+
def __init__(self, model: str = "gpt-4o", provider: Any = None):
|
|
249
|
+
"""Initialize StreamingMetricsCallback.
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
model: Model name for token counting.
|
|
253
|
+
provider: Headroom provider for token counting.
|
|
254
|
+
"""
|
|
255
|
+
self._tracker = StreamingMetricsTracker(model=model, provider=provider)
|
|
256
|
+
self._metrics: StreamingMetrics | None = None
|
|
257
|
+
|
|
258
|
+
def __enter__(self) -> StreamingMetricsTracker:
|
|
259
|
+
"""Enter context, return tracker."""
|
|
260
|
+
return self._tracker
|
|
261
|
+
|
|
262
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
263
|
+
"""Exit context, finalize metrics."""
|
|
264
|
+
self._metrics = self._tracker.finish()
|
|
265
|
+
|
|
266
|
+
@property
|
|
267
|
+
def tracker(self) -> StreamingMetricsTracker:
|
|
268
|
+
"""Get the tracker."""
|
|
269
|
+
return self._tracker
|
|
270
|
+
|
|
271
|
+
@property
|
|
272
|
+
def metrics(self) -> StreamingMetrics | None:
|
|
273
|
+
"""Get final metrics (after context exit)."""
|
|
274
|
+
return self._metrics
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def track_streaming_response(
|
|
278
|
+
stream: Any,
|
|
279
|
+
model: str = "gpt-4o",
|
|
280
|
+
provider: Any = None,
|
|
281
|
+
) -> tuple[str, StreamingMetrics]:
|
|
282
|
+
"""Track a complete streaming response.
|
|
283
|
+
|
|
284
|
+
Convenience function that consumes a stream and returns the
|
|
285
|
+
accumulated content and metrics.
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
stream: Iterable of streaming chunks.
|
|
289
|
+
model: Model name for token counting.
|
|
290
|
+
provider: Headroom provider for token counting.
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
Tuple of (accumulated_content, metrics).
|
|
294
|
+
|
|
295
|
+
Example:
|
|
296
|
+
content, metrics = track_streaming_response(
|
|
297
|
+
llm.stream(messages),
|
|
298
|
+
model="gpt-4o"
|
|
299
|
+
)
|
|
300
|
+
print(f"Content: {content}")
|
|
301
|
+
print(f"Tokens: {metrics.output_tokens}")
|
|
302
|
+
"""
|
|
303
|
+
tracker = StreamingMetricsTracker(model=model, provider=provider)
|
|
304
|
+
|
|
305
|
+
for chunk in stream:
|
|
306
|
+
tracker.add_chunk(chunk)
|
|
307
|
+
|
|
308
|
+
metrics = tracker.finish()
|
|
309
|
+
return tracker.content, metrics
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
async def track_async_streaming_response(
|
|
313
|
+
stream: Any,
|
|
314
|
+
model: str = "gpt-4o",
|
|
315
|
+
provider: Any = None,
|
|
316
|
+
) -> tuple[str, StreamingMetrics]:
|
|
317
|
+
"""Track a complete async streaming response.
|
|
318
|
+
|
|
319
|
+
Async version of track_streaming_response.
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
stream: Async iterable of streaming chunks.
|
|
323
|
+
model: Model name for token counting.
|
|
324
|
+
provider: Headroom provider for token counting.
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
Tuple of (accumulated_content, metrics).
|
|
328
|
+
|
|
329
|
+
Example:
|
|
330
|
+
content, metrics = await track_async_streaming_response(
|
|
331
|
+
llm.astream(messages),
|
|
332
|
+
model="gpt-4o"
|
|
333
|
+
)
|
|
334
|
+
"""
|
|
335
|
+
tracker = StreamingMetricsTracker(model=model, provider=provider)
|
|
336
|
+
|
|
337
|
+
async for chunk in stream:
|
|
338
|
+
tracker.add_chunk(chunk)
|
|
339
|
+
|
|
340
|
+
metrics = tracker.finish()
|
|
341
|
+
return tracker.content, metrics
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""MCP (Model Context Protocol) integration for Headroom.
|
|
2
|
+
|
|
3
|
+
This package provides compression utilities for MCP tool results,
|
|
4
|
+
helping reduce context usage when tools return large outputs.
|
|
5
|
+
|
|
6
|
+
Example:
|
|
7
|
+
from headroom.integrations.mcp import compress_tool_result
|
|
8
|
+
|
|
9
|
+
# Compress large tool output
|
|
10
|
+
result = compress_tool_result(
|
|
11
|
+
tool_name="search",
|
|
12
|
+
result=large_json_result,
|
|
13
|
+
max_chars=5000,
|
|
14
|
+
)
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from .server import (
|
|
18
|
+
DEFAULT_MCP_PROFILES,
|
|
19
|
+
HeadroomMCPClientWrapper,
|
|
20
|
+
HeadroomMCPCompressor,
|
|
21
|
+
MCPCompressionResult,
|
|
22
|
+
MCPToolProfile,
|
|
23
|
+
compress_tool_result,
|
|
24
|
+
compress_tool_result_with_metrics,
|
|
25
|
+
create_headroom_mcp_proxy,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
"HeadroomMCPCompressor",
|
|
30
|
+
"HeadroomMCPClientWrapper",
|
|
31
|
+
"MCPCompressionResult",
|
|
32
|
+
"MCPToolProfile",
|
|
33
|
+
"compress_tool_result",
|
|
34
|
+
"compress_tool_result_with_metrics",
|
|
35
|
+
"create_headroom_mcp_proxy",
|
|
36
|
+
"DEFAULT_MCP_PROFILES",
|
|
37
|
+
]
|