headroom-ai 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. headroom/__init__.py +212 -0
  2. headroom/cache/__init__.py +76 -0
  3. headroom/cache/anthropic.py +517 -0
  4. headroom/cache/base.py +342 -0
  5. headroom/cache/compression_feedback.py +613 -0
  6. headroom/cache/compression_store.py +814 -0
  7. headroom/cache/dynamic_detector.py +1026 -0
  8. headroom/cache/google.py +884 -0
  9. headroom/cache/openai.py +584 -0
  10. headroom/cache/registry.py +175 -0
  11. headroom/cache/semantic.py +451 -0
  12. headroom/ccr/__init__.py +77 -0
  13. headroom/ccr/context_tracker.py +582 -0
  14. headroom/ccr/mcp_server.py +319 -0
  15. headroom/ccr/response_handler.py +772 -0
  16. headroom/ccr/tool_injection.py +415 -0
  17. headroom/cli.py +219 -0
  18. headroom/client.py +977 -0
  19. headroom/compression/__init__.py +42 -0
  20. headroom/compression/detector.py +424 -0
  21. headroom/compression/handlers/__init__.py +22 -0
  22. headroom/compression/handlers/base.py +219 -0
  23. headroom/compression/handlers/code_handler.py +506 -0
  24. headroom/compression/handlers/json_handler.py +418 -0
  25. headroom/compression/masks.py +345 -0
  26. headroom/compression/universal.py +465 -0
  27. headroom/config.py +474 -0
  28. headroom/exceptions.py +192 -0
  29. headroom/integrations/__init__.py +159 -0
  30. headroom/integrations/agno/__init__.py +53 -0
  31. headroom/integrations/agno/hooks.py +345 -0
  32. headroom/integrations/agno/model.py +625 -0
  33. headroom/integrations/agno/providers.py +154 -0
  34. headroom/integrations/langchain/__init__.py +106 -0
  35. headroom/integrations/langchain/agents.py +326 -0
  36. headroom/integrations/langchain/chat_model.py +1002 -0
  37. headroom/integrations/langchain/langsmith.py +324 -0
  38. headroom/integrations/langchain/memory.py +319 -0
  39. headroom/integrations/langchain/providers.py +200 -0
  40. headroom/integrations/langchain/retriever.py +371 -0
  41. headroom/integrations/langchain/streaming.py +341 -0
  42. headroom/integrations/mcp/__init__.py +37 -0
  43. headroom/integrations/mcp/server.py +533 -0
  44. headroom/memory/__init__.py +37 -0
  45. headroom/memory/extractor.py +390 -0
  46. headroom/memory/fast_store.py +621 -0
  47. headroom/memory/fast_wrapper.py +311 -0
  48. headroom/memory/inline_extractor.py +229 -0
  49. headroom/memory/store.py +434 -0
  50. headroom/memory/worker.py +260 -0
  51. headroom/memory/wrapper.py +321 -0
  52. headroom/models/__init__.py +39 -0
  53. headroom/models/registry.py +687 -0
  54. headroom/parser.py +293 -0
  55. headroom/pricing/__init__.py +51 -0
  56. headroom/pricing/anthropic_prices.py +81 -0
  57. headroom/pricing/litellm_pricing.py +113 -0
  58. headroom/pricing/openai_prices.py +91 -0
  59. headroom/pricing/registry.py +188 -0
  60. headroom/providers/__init__.py +61 -0
  61. headroom/providers/anthropic.py +621 -0
  62. headroom/providers/base.py +131 -0
  63. headroom/providers/cohere.py +362 -0
  64. headroom/providers/google.py +427 -0
  65. headroom/providers/litellm.py +297 -0
  66. headroom/providers/openai.py +566 -0
  67. headroom/providers/openai_compatible.py +521 -0
  68. headroom/proxy/__init__.py +19 -0
  69. headroom/proxy/server.py +2683 -0
  70. headroom/py.typed +0 -0
  71. headroom/relevance/__init__.py +124 -0
  72. headroom/relevance/base.py +106 -0
  73. headroom/relevance/bm25.py +255 -0
  74. headroom/relevance/embedding.py +255 -0
  75. headroom/relevance/hybrid.py +259 -0
  76. headroom/reporting/__init__.py +5 -0
  77. headroom/reporting/generator.py +549 -0
  78. headroom/storage/__init__.py +41 -0
  79. headroom/storage/base.py +125 -0
  80. headroom/storage/jsonl.py +220 -0
  81. headroom/storage/sqlite.py +289 -0
  82. headroom/telemetry/__init__.py +91 -0
  83. headroom/telemetry/collector.py +764 -0
  84. headroom/telemetry/models.py +880 -0
  85. headroom/telemetry/toin.py +1579 -0
  86. headroom/tokenizer.py +80 -0
  87. headroom/tokenizers/__init__.py +75 -0
  88. headroom/tokenizers/base.py +210 -0
  89. headroom/tokenizers/estimator.py +198 -0
  90. headroom/tokenizers/huggingface.py +317 -0
  91. headroom/tokenizers/mistral.py +245 -0
  92. headroom/tokenizers/registry.py +398 -0
  93. headroom/tokenizers/tiktoken_counter.py +248 -0
  94. headroom/transforms/__init__.py +106 -0
  95. headroom/transforms/base.py +57 -0
  96. headroom/transforms/cache_aligner.py +357 -0
  97. headroom/transforms/code_compressor.py +1313 -0
  98. headroom/transforms/content_detector.py +335 -0
  99. headroom/transforms/content_router.py +1158 -0
  100. headroom/transforms/llmlingua_compressor.py +638 -0
  101. headroom/transforms/log_compressor.py +529 -0
  102. headroom/transforms/pipeline.py +297 -0
  103. headroom/transforms/rolling_window.py +350 -0
  104. headroom/transforms/search_compressor.py +365 -0
  105. headroom/transforms/smart_crusher.py +2682 -0
  106. headroom/transforms/text_compressor.py +259 -0
  107. headroom/transforms/tool_crusher.py +338 -0
  108. headroom/utils.py +215 -0
  109. headroom_ai-0.2.13.dist-info/METADATA +315 -0
  110. headroom_ai-0.2.13.dist-info/RECORD +114 -0
  111. headroom_ai-0.2.13.dist-info/WHEEL +4 -0
  112. headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
  113. headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
  114. headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
@@ -0,0 +1,341 @@
1
+ """Streaming metrics tracking for LangChain.
2
+
3
+ This module provides StreamingMetricsTracker for tracking output tokens
4
+ during streaming responses from LangChain models.
5
+
6
+ Example:
7
+ from langchain_openai import ChatOpenAI
8
+ from headroom.integrations import HeadroomChatModel, StreamingMetricsTracker
9
+
10
+ llm = HeadroomChatModel(ChatOpenAI(model="gpt-4o"))
11
+ tracker = StreamingMetricsTracker(model="gpt-4o")
12
+
13
+ for chunk in llm.stream("Tell me a story"):
14
+ tracker.add_chunk(chunk)
15
+ print(chunk.content, end="", flush=True)
16
+
17
+ print(f"\\nOutput tokens: {tracker.output_tokens}")
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import logging
23
+ from dataclasses import dataclass
24
+ from datetime import datetime
25
+ from typing import Any
26
+
27
+ # LangChain imports - these are optional dependencies
28
+ try:
29
+ from langchain_core.messages import AIMessageChunk
30
+ from langchain_core.outputs import ChatGenerationChunk
31
+
32
+ LANGCHAIN_AVAILABLE = True
33
+ except ImportError:
34
+ LANGCHAIN_AVAILABLE = False
35
+ AIMessageChunk = object # type: ignore[misc,assignment]
36
+ ChatGenerationChunk = object # type: ignore[misc,assignment]
37
+
38
+ from headroom.providers import OpenAIProvider
39
+
40
+ logger = logging.getLogger(__name__)
41
+
42
+
43
+ def _check_langchain_available() -> None:
44
+ """Raise ImportError if LangChain is not installed."""
45
+ if not LANGCHAIN_AVAILABLE:
46
+ raise ImportError(
47
+ "LangChain is required for this integration. "
48
+ "Install with: pip install headroom[langchain] "
49
+ "or: pip install langchain-core"
50
+ )
51
+
52
+
53
+ @dataclass
54
+ class StreamingMetrics:
55
+ """Metrics from a streaming response."""
56
+
57
+ output_tokens: int
58
+ chunk_count: int
59
+ content_length: int
60
+ start_time: datetime
61
+ end_time: datetime | None
62
+ duration_ms: float | None
63
+
64
+ def to_dict(self) -> dict[str, Any]:
65
+ """Convert to dictionary."""
66
+ return {
67
+ "output_tokens": self.output_tokens,
68
+ "chunk_count": self.chunk_count,
69
+ "content_length": self.content_length,
70
+ "start_time": self.start_time.isoformat(),
71
+ "end_time": self.end_time.isoformat() if self.end_time else None,
72
+ "duration_ms": self.duration_ms,
73
+ }
74
+
75
+
76
+ class StreamingMetricsTracker:
77
+ """Tracks output tokens and metrics during streaming.
78
+
79
+ Accumulates content from streaming chunks and provides accurate
80
+ token counting for the streamed output.
81
+
82
+ Example:
83
+ tracker = StreamingMetricsTracker(model="gpt-4o")
84
+
85
+ async for chunk in llm.astream(messages):
86
+ tracker.add_chunk(chunk)
87
+ print(chunk.content, end="")
88
+
89
+ print(f"\\nTokens: {tracker.output_tokens}")
90
+ print(f"Duration: {tracker.duration_ms}ms")
91
+
92
+ Attributes:
93
+ model: Model name for token counting
94
+ content: Accumulated content from all chunks
95
+ output_tokens: Estimated token count for output
96
+ chunk_count: Number of chunks received
97
+ """
98
+
99
+ def __init__(
100
+ self,
101
+ model: str = "gpt-4o",
102
+ provider: Any = None,
103
+ ):
104
+ """Initialize StreamingMetricsTracker.
105
+
106
+ Args:
107
+ model: Model name for token counting. Default "gpt-4o".
108
+ provider: Headroom provider for token counting. Uses
109
+ OpenAIProvider if not specified.
110
+ """
111
+ _check_langchain_available()
112
+
113
+ self._model = model
114
+ self._provider = provider or OpenAIProvider()
115
+ self._content = ""
116
+ self._chunk_count = 0
117
+ self._start_time: datetime | None = None
118
+ self._end_time: datetime | None = None
119
+
120
+ def add_chunk(self, chunk: Any) -> None:
121
+ """Add a streaming chunk to the tracker.
122
+
123
+ Extracts content from various chunk types:
124
+ - AIMessageChunk
125
+ - ChatGenerationChunk
126
+ - dict with 'content' key
127
+ - string
128
+
129
+ Args:
130
+ chunk: Streaming chunk from LangChain.
131
+ """
132
+ if self._start_time is None:
133
+ self._start_time = datetime.now()
134
+
135
+ self._chunk_count += 1
136
+
137
+ # Extract content from various chunk types
138
+ content = self._extract_content(chunk)
139
+ if content:
140
+ self._content += content
141
+
142
+ def _extract_content(self, chunk: Any) -> str:
143
+ """Extract string content from a chunk.
144
+
145
+ Args:
146
+ chunk: Streaming chunk of various types.
147
+
148
+ Returns:
149
+ Extracted content string.
150
+ """
151
+ # AIMessageChunk
152
+ if hasattr(chunk, "content"):
153
+ content = chunk.content
154
+ if isinstance(content, str):
155
+ return content
156
+ return str(content) if content else ""
157
+
158
+ # ChatGenerationChunk
159
+ if hasattr(chunk, "message") and hasattr(chunk.message, "content"):
160
+ content = chunk.message.content
161
+ if isinstance(content, str):
162
+ return content
163
+ return str(content) if content else ""
164
+
165
+ # dict
166
+ if isinstance(chunk, dict):
167
+ return str(chunk.get("content", ""))
168
+
169
+ # string
170
+ if isinstance(chunk, str):
171
+ return chunk
172
+
173
+ return ""
174
+
175
+ def finish(self) -> StreamingMetrics:
176
+ """Mark streaming as complete and return final metrics.
177
+
178
+ Returns:
179
+ StreamingMetrics with final values.
180
+ """
181
+ self._end_time = datetime.now()
182
+
183
+ duration_ms = None
184
+ if self._start_time:
185
+ duration_ms = (self._end_time - self._start_time).total_seconds() * 1000
186
+
187
+ return StreamingMetrics(
188
+ output_tokens=self.output_tokens,
189
+ chunk_count=self._chunk_count,
190
+ content_length=len(self._content),
191
+ start_time=self._start_time or self._end_time,
192
+ end_time=self._end_time,
193
+ duration_ms=duration_ms,
194
+ )
195
+
196
+ @property
197
+ def content(self) -> str:
198
+ """Get accumulated content."""
199
+ return self._content
200
+
201
+ @property
202
+ def output_tokens(self) -> int:
203
+ """Get estimated output token count."""
204
+ if not self._content:
205
+ return 0
206
+ token_counter = self._provider.get_token_counter(self._model)
207
+ return token_counter.count_text(self._content)
208
+
209
+ @property
210
+ def chunk_count(self) -> int:
211
+ """Get number of chunks received."""
212
+ return self._chunk_count
213
+
214
+ @property
215
+ def duration_ms(self) -> float | None:
216
+ """Get duration in milliseconds (after finish())."""
217
+ if self._start_time is None or self._end_time is None:
218
+ return None
219
+ return (self._end_time - self._start_time).total_seconds() * 1000
220
+
221
+ def reset(self) -> None:
222
+ """Reset tracker for reuse."""
223
+ self._content = ""
224
+ self._chunk_count = 0
225
+ self._start_time = None
226
+ self._end_time = None
227
+
228
+
229
+ class StreamingMetricsCallback:
230
+ """Context manager for tracking streaming metrics.
231
+
232
+ Provides a clean interface for tracking a complete streaming
233
+ response with automatic timing.
234
+
235
+ Example:
236
+ with StreamingMetricsCallback(model="gpt-4o") as tracker:
237
+ for chunk in llm.stream(messages):
238
+ tracker.add_chunk(chunk)
239
+ print(chunk.content, end="")
240
+
241
+ print(f"\\nMetrics: {tracker.metrics}")
242
+
243
+ Attributes:
244
+ tracker: The underlying StreamingMetricsTracker
245
+ metrics: Final metrics after context exit
246
+ """
247
+
248
+ def __init__(self, model: str = "gpt-4o", provider: Any = None):
249
+ """Initialize StreamingMetricsCallback.
250
+
251
+ Args:
252
+ model: Model name for token counting.
253
+ provider: Headroom provider for token counting.
254
+ """
255
+ self._tracker = StreamingMetricsTracker(model=model, provider=provider)
256
+ self._metrics: StreamingMetrics | None = None
257
+
258
+ def __enter__(self) -> StreamingMetricsTracker:
259
+ """Enter context, return tracker."""
260
+ return self._tracker
261
+
262
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
263
+ """Exit context, finalize metrics."""
264
+ self._metrics = self._tracker.finish()
265
+
266
+ @property
267
+ def tracker(self) -> StreamingMetricsTracker:
268
+ """Get the tracker."""
269
+ return self._tracker
270
+
271
+ @property
272
+ def metrics(self) -> StreamingMetrics | None:
273
+ """Get final metrics (after context exit)."""
274
+ return self._metrics
275
+
276
+
277
+ def track_streaming_response(
278
+ stream: Any,
279
+ model: str = "gpt-4o",
280
+ provider: Any = None,
281
+ ) -> tuple[str, StreamingMetrics]:
282
+ """Track a complete streaming response.
283
+
284
+ Convenience function that consumes a stream and returns the
285
+ accumulated content and metrics.
286
+
287
+ Args:
288
+ stream: Iterable of streaming chunks.
289
+ model: Model name for token counting.
290
+ provider: Headroom provider for token counting.
291
+
292
+ Returns:
293
+ Tuple of (accumulated_content, metrics).
294
+
295
+ Example:
296
+ content, metrics = track_streaming_response(
297
+ llm.stream(messages),
298
+ model="gpt-4o"
299
+ )
300
+ print(f"Content: {content}")
301
+ print(f"Tokens: {metrics.output_tokens}")
302
+ """
303
+ tracker = StreamingMetricsTracker(model=model, provider=provider)
304
+
305
+ for chunk in stream:
306
+ tracker.add_chunk(chunk)
307
+
308
+ metrics = tracker.finish()
309
+ return tracker.content, metrics
310
+
311
+
312
+ async def track_async_streaming_response(
313
+ stream: Any,
314
+ model: str = "gpt-4o",
315
+ provider: Any = None,
316
+ ) -> tuple[str, StreamingMetrics]:
317
+ """Track a complete async streaming response.
318
+
319
+ Async version of track_streaming_response.
320
+
321
+ Args:
322
+ stream: Async iterable of streaming chunks.
323
+ model: Model name for token counting.
324
+ provider: Headroom provider for token counting.
325
+
326
+ Returns:
327
+ Tuple of (accumulated_content, metrics).
328
+
329
+ Example:
330
+ content, metrics = await track_async_streaming_response(
331
+ llm.astream(messages),
332
+ model="gpt-4o"
333
+ )
334
+ """
335
+ tracker = StreamingMetricsTracker(model=model, provider=provider)
336
+
337
+ async for chunk in stream:
338
+ tracker.add_chunk(chunk)
339
+
340
+ metrics = tracker.finish()
341
+ return tracker.content, metrics
@@ -0,0 +1,37 @@
1
+ """MCP (Model Context Protocol) integration for Headroom.
2
+
3
+ This package provides compression utilities for MCP tool results,
4
+ helping reduce context usage when tools return large outputs.
5
+
6
+ Example:
7
+ from headroom.integrations.mcp import compress_tool_result
8
+
9
+ # Compress large tool output
10
+ result = compress_tool_result(
11
+ tool_name="search",
12
+ result=large_json_result,
13
+ max_chars=5000,
14
+ )
15
+ """
16
+
17
+ from .server import (
18
+ DEFAULT_MCP_PROFILES,
19
+ HeadroomMCPClientWrapper,
20
+ HeadroomMCPCompressor,
21
+ MCPCompressionResult,
22
+ MCPToolProfile,
23
+ compress_tool_result,
24
+ compress_tool_result_with_metrics,
25
+ create_headroom_mcp_proxy,
26
+ )
27
+
28
+ __all__ = [
29
+ "HeadroomMCPCompressor",
30
+ "HeadroomMCPClientWrapper",
31
+ "MCPCompressionResult",
32
+ "MCPToolProfile",
33
+ "compress_tool_result",
34
+ "compress_tool_result_with_metrics",
35
+ "create_headroom_mcp_proxy",
36
+ "DEFAULT_MCP_PROFILES",
37
+ ]