headroom-ai 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. headroom/__init__.py +212 -0
  2. headroom/cache/__init__.py +76 -0
  3. headroom/cache/anthropic.py +517 -0
  4. headroom/cache/base.py +342 -0
  5. headroom/cache/compression_feedback.py +613 -0
  6. headroom/cache/compression_store.py +814 -0
  7. headroom/cache/dynamic_detector.py +1026 -0
  8. headroom/cache/google.py +884 -0
  9. headroom/cache/openai.py +584 -0
  10. headroom/cache/registry.py +175 -0
  11. headroom/cache/semantic.py +451 -0
  12. headroom/ccr/__init__.py +77 -0
  13. headroom/ccr/context_tracker.py +582 -0
  14. headroom/ccr/mcp_server.py +319 -0
  15. headroom/ccr/response_handler.py +772 -0
  16. headroom/ccr/tool_injection.py +415 -0
  17. headroom/cli.py +219 -0
  18. headroom/client.py +977 -0
  19. headroom/compression/__init__.py +42 -0
  20. headroom/compression/detector.py +424 -0
  21. headroom/compression/handlers/__init__.py +22 -0
  22. headroom/compression/handlers/base.py +219 -0
  23. headroom/compression/handlers/code_handler.py +506 -0
  24. headroom/compression/handlers/json_handler.py +418 -0
  25. headroom/compression/masks.py +345 -0
  26. headroom/compression/universal.py +465 -0
  27. headroom/config.py +474 -0
  28. headroom/exceptions.py +192 -0
  29. headroom/integrations/__init__.py +159 -0
  30. headroom/integrations/agno/__init__.py +53 -0
  31. headroom/integrations/agno/hooks.py +345 -0
  32. headroom/integrations/agno/model.py +625 -0
  33. headroom/integrations/agno/providers.py +154 -0
  34. headroom/integrations/langchain/__init__.py +106 -0
  35. headroom/integrations/langchain/agents.py +326 -0
  36. headroom/integrations/langchain/chat_model.py +1002 -0
  37. headroom/integrations/langchain/langsmith.py +324 -0
  38. headroom/integrations/langchain/memory.py +319 -0
  39. headroom/integrations/langchain/providers.py +200 -0
  40. headroom/integrations/langchain/retriever.py +371 -0
  41. headroom/integrations/langchain/streaming.py +341 -0
  42. headroom/integrations/mcp/__init__.py +37 -0
  43. headroom/integrations/mcp/server.py +533 -0
  44. headroom/memory/__init__.py +37 -0
  45. headroom/memory/extractor.py +390 -0
  46. headroom/memory/fast_store.py +621 -0
  47. headroom/memory/fast_wrapper.py +311 -0
  48. headroom/memory/inline_extractor.py +229 -0
  49. headroom/memory/store.py +434 -0
  50. headroom/memory/worker.py +260 -0
  51. headroom/memory/wrapper.py +321 -0
  52. headroom/models/__init__.py +39 -0
  53. headroom/models/registry.py +687 -0
  54. headroom/parser.py +293 -0
  55. headroom/pricing/__init__.py +51 -0
  56. headroom/pricing/anthropic_prices.py +81 -0
  57. headroom/pricing/litellm_pricing.py +113 -0
  58. headroom/pricing/openai_prices.py +91 -0
  59. headroom/pricing/registry.py +188 -0
  60. headroom/providers/__init__.py +61 -0
  61. headroom/providers/anthropic.py +621 -0
  62. headroom/providers/base.py +131 -0
  63. headroom/providers/cohere.py +362 -0
  64. headroom/providers/google.py +427 -0
  65. headroom/providers/litellm.py +297 -0
  66. headroom/providers/openai.py +566 -0
  67. headroom/providers/openai_compatible.py +521 -0
  68. headroom/proxy/__init__.py +19 -0
  69. headroom/proxy/server.py +2683 -0
  70. headroom/py.typed +0 -0
  71. headroom/relevance/__init__.py +124 -0
  72. headroom/relevance/base.py +106 -0
  73. headroom/relevance/bm25.py +255 -0
  74. headroom/relevance/embedding.py +255 -0
  75. headroom/relevance/hybrid.py +259 -0
  76. headroom/reporting/__init__.py +5 -0
  77. headroom/reporting/generator.py +549 -0
  78. headroom/storage/__init__.py +41 -0
  79. headroom/storage/base.py +125 -0
  80. headroom/storage/jsonl.py +220 -0
  81. headroom/storage/sqlite.py +289 -0
  82. headroom/telemetry/__init__.py +91 -0
  83. headroom/telemetry/collector.py +764 -0
  84. headroom/telemetry/models.py +880 -0
  85. headroom/telemetry/toin.py +1579 -0
  86. headroom/tokenizer.py +80 -0
  87. headroom/tokenizers/__init__.py +75 -0
  88. headroom/tokenizers/base.py +210 -0
  89. headroom/tokenizers/estimator.py +198 -0
  90. headroom/tokenizers/huggingface.py +317 -0
  91. headroom/tokenizers/mistral.py +245 -0
  92. headroom/tokenizers/registry.py +398 -0
  93. headroom/tokenizers/tiktoken_counter.py +248 -0
  94. headroom/transforms/__init__.py +106 -0
  95. headroom/transforms/base.py +57 -0
  96. headroom/transforms/cache_aligner.py +357 -0
  97. headroom/transforms/code_compressor.py +1313 -0
  98. headroom/transforms/content_detector.py +335 -0
  99. headroom/transforms/content_router.py +1158 -0
  100. headroom/transforms/llmlingua_compressor.py +638 -0
  101. headroom/transforms/log_compressor.py +529 -0
  102. headroom/transforms/pipeline.py +297 -0
  103. headroom/transforms/rolling_window.py +350 -0
  104. headroom/transforms/search_compressor.py +365 -0
  105. headroom/transforms/smart_crusher.py +2682 -0
  106. headroom/transforms/text_compressor.py +259 -0
  107. headroom/transforms/tool_crusher.py +338 -0
  108. headroom/utils.py +215 -0
  109. headroom_ai-0.2.13.dist-info/METADATA +315 -0
  110. headroom_ai-0.2.13.dist-info/RECORD +114 -0
  111. headroom_ai-0.2.13.dist-info/WHEEL +4 -0
  112. headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
  113. headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
  114. headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
@@ -0,0 +1,159 @@
1
+ """Headroom integrations with popular LLM frameworks.
2
+
3
+ Available integrations:
4
+
5
+ LangChain (pip install headroom[langchain]):
6
+ - HeadroomChatModel: Drop-in wrapper for any LangChain chat model
7
+ - HeadroomChatMessageHistory: Automatic conversation compression
8
+ - HeadroomDocumentCompressor: Relevance-based document filtering
9
+ - HeadroomToolWrapper: Tool output compression for agents
10
+ - StreamingMetricsTracker: Token counting during streaming
11
+ - HeadroomLangSmithCallbackHandler: LangSmith trace enrichment
12
+
13
+ Agno (pip install agno):
14
+ - HeadroomAgnoModel: Drop-in wrapper for any Agno model
15
+ - HeadroomPreHook/HeadroomPostHook: Agent-level hooks for tracking
16
+ - create_headroom_hooks: Convenience function to create hook pairs
17
+
18
+ MCP (Model Context Protocol):
19
+ - HeadroomMCPCompressor: Compress MCP tool results
20
+ - compress_tool_result: Simple function for tool compression
21
+
22
+ Example:
23
+ # LangChain integration
24
+ from headroom.integrations import HeadroomChatModel
25
+ # or explicitly:
26
+ from headroom.integrations.langchain import HeadroomChatModel
27
+
28
+ # Agno integration
29
+ from headroom.integrations.agno import HeadroomAgnoModel
30
+ # or explicitly:
31
+ from headroom.integrations.agno import HeadroomAgnoModel
32
+
33
+ # MCP integration
34
+ from headroom.integrations import compress_tool_result
35
+ # or explicitly:
36
+ from headroom.integrations.mcp import compress_tool_result
37
+ """
38
+
39
+ # Re-export from langchain subpackage for backwards compatibility
40
+ from .langchain import (
41
+ # Retrievers
42
+ CompressionMetrics,
43
+ # Core
44
+ HeadroomCallbackHandler,
45
+ # Memory
46
+ HeadroomChatMessageHistory,
47
+ HeadroomChatModel,
48
+ HeadroomDocumentCompressor,
49
+ # LangSmith
50
+ HeadroomLangSmithCallbackHandler,
51
+ HeadroomRunnable,
52
+ # Agents
53
+ HeadroomToolWrapper,
54
+ OptimizationMetrics,
55
+ # Streaming
56
+ StreamingMetrics,
57
+ StreamingMetricsCallback,
58
+ StreamingMetricsTracker,
59
+ ToolCompressionMetrics,
60
+ ToolMetricsCollector,
61
+ # Provider Detection
62
+ detect_provider,
63
+ get_headroom_provider,
64
+ get_model_name_from_langchain,
65
+ get_tool_metrics,
66
+ is_langsmith_available,
67
+ is_langsmith_tracing_enabled,
68
+ langchain_available,
69
+ optimize_messages,
70
+ reset_tool_metrics,
71
+ track_async_streaming_response,
72
+ track_streaming_response,
73
+ wrap_tools_with_headroom,
74
+ )
75
+
76
+ # Re-export from mcp subpackage for backwards compatibility
77
+ from .mcp import (
78
+ DEFAULT_MCP_PROFILES,
79
+ HeadroomMCPClientWrapper,
80
+ HeadroomMCPCompressor,
81
+ MCPCompressionResult,
82
+ MCPToolProfile,
83
+ compress_tool_result,
84
+ compress_tool_result_with_metrics,
85
+ create_headroom_mcp_proxy,
86
+ )
87
+
88
+ # Re-export from agno subpackage (optional dependency)
89
+ try:
90
+ from .agno import (
91
+ HeadroomAgnoModel,
92
+ HeadroomPostHook,
93
+ HeadroomPreHook,
94
+ agno_available,
95
+ create_headroom_hooks,
96
+ get_model_name_from_agno,
97
+ )
98
+ from .agno import OptimizationMetrics as AgnoOptimizationMetrics
99
+ from .agno import get_headroom_provider as get_agno_provider
100
+ from .agno import optimize_messages as optimize_agno_messages
101
+
102
+ _AGNO_AVAILABLE = True
103
+ except ImportError:
104
+ _AGNO_AVAILABLE = False
105
+
106
+ __all__ = [
107
+ # LangChain Core
108
+ "HeadroomChatModel",
109
+ "HeadroomCallbackHandler",
110
+ "HeadroomRunnable",
111
+ "OptimizationMetrics",
112
+ "optimize_messages",
113
+ "langchain_available",
114
+ # Provider Detection
115
+ "detect_provider",
116
+ "get_headroom_provider",
117
+ "get_model_name_from_langchain",
118
+ # Memory
119
+ "HeadroomChatMessageHistory",
120
+ # Retrievers
121
+ "HeadroomDocumentCompressor",
122
+ "CompressionMetrics",
123
+ # Agents
124
+ "HeadroomToolWrapper",
125
+ "ToolCompressionMetrics",
126
+ "ToolMetricsCollector",
127
+ "wrap_tools_with_headroom",
128
+ "get_tool_metrics",
129
+ "reset_tool_metrics",
130
+ # LangSmith
131
+ "HeadroomLangSmithCallbackHandler",
132
+ "is_langsmith_available",
133
+ "is_langsmith_tracing_enabled",
134
+ # Streaming
135
+ "StreamingMetricsTracker",
136
+ "StreamingMetricsCallback",
137
+ "StreamingMetrics",
138
+ "track_streaming_response",
139
+ "track_async_streaming_response",
140
+ # MCP
141
+ "HeadroomMCPCompressor",
142
+ "HeadroomMCPClientWrapper",
143
+ "MCPCompressionResult",
144
+ "MCPToolProfile",
145
+ "compress_tool_result",
146
+ "compress_tool_result_with_metrics",
147
+ "create_headroom_mcp_proxy",
148
+ "DEFAULT_MCP_PROFILES",
149
+ # Agno
150
+ "HeadroomAgnoModel",
151
+ "HeadroomPreHook",
152
+ "HeadroomPostHook",
153
+ "agno_available",
154
+ "create_headroom_hooks",
155
+ "get_agno_provider",
156
+ "get_model_name_from_agno",
157
+ "AgnoOptimizationMetrics",
158
+ "optimize_agno_messages",
159
+ ]
@@ -0,0 +1,53 @@
1
+ """Agno integration for Headroom SDK.
2
+
3
+ This module provides seamless integration with Agno (formerly Phidata),
4
+ enabling automatic context optimization for Agno agents.
5
+
6
+ Components:
7
+ 1. HeadroomAgnoModel - Wraps any Agno model to apply Headroom transforms
8
+ 2. create_headroom_hooks - Creates pre/post hooks for Agno agents
9
+ 3. optimize_messages - Standalone function for manual optimization
10
+
11
+ Example:
12
+ from agno.agent import Agent
13
+ from agno.models.openai import OpenAIChat
14
+ from headroom.integrations.agno import HeadroomAgnoModel
15
+
16
+ # Wrap any Agno model
17
+ model = OpenAIChat(id="gpt-4o")
18
+ optimized_model = HeadroomAgnoModel(model)
19
+
20
+ # Use with agent
21
+ agent = Agent(model=optimized_model)
22
+ response = agent.run("Hello!")
23
+ """
24
+
25
+ from .hooks import (
26
+ HeadroomPostHook,
27
+ HeadroomPreHook,
28
+ HookMetrics,
29
+ create_headroom_hooks,
30
+ )
31
+ from .model import (
32
+ HeadroomAgnoModel,
33
+ OptimizationMetrics,
34
+ agno_available,
35
+ optimize_messages,
36
+ )
37
+ from .providers import get_headroom_provider, get_model_name_from_agno
38
+
39
+ __all__ = [
40
+ # Model wrapper
41
+ "HeadroomAgnoModel",
42
+ "OptimizationMetrics",
43
+ "agno_available",
44
+ "optimize_messages",
45
+ # Hooks
46
+ "create_headroom_hooks",
47
+ "HeadroomPreHook",
48
+ "HeadroomPostHook",
49
+ "HookMetrics",
50
+ # Provider detection
51
+ "get_headroom_provider",
52
+ "get_model_name_from_agno",
53
+ ]
@@ -0,0 +1,345 @@
1
+ """Agno hooks for Headroom integration.
2
+
3
+ This module provides pre_hooks and post_hooks that can be used with
4
+ Agno agents to apply Headroom optimization at the agent level.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ import threading
11
+ from dataclasses import dataclass, field
12
+ from datetime import datetime, timezone
13
+ from typing import Any
14
+ from uuid import uuid4
15
+
16
+ from headroom import HeadroomConfig, HeadroomMode
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ @dataclass
22
+ class HookMetrics:
23
+ """Metrics collected by Headroom pre-hooks.
24
+
25
+ Note: These metrics track request counts and timing, not token savings.
26
+ For actual token optimization metrics, use HeadroomAgnoModel which
27
+ wraps the model and provides detailed compression statistics.
28
+ """
29
+
30
+ request_id: str
31
+ timestamp: datetime
32
+ # These fields are kept for API compatibility but are always 0
33
+ # Use HeadroomAgnoModel for actual token optimization
34
+ tokens_before: int = 0
35
+ tokens_after: int = 0
36
+ tokens_saved: int = 0
37
+ savings_percent: float = 0.0
38
+ transforms_applied: list[str] = field(default_factory=list)
39
+
40
+
41
+ class HeadroomPreHook:
42
+ """Pre-hook for Agno agents that tracks request metrics.
43
+
44
+ This hook runs before the agent sends messages to the LLM,
45
+ providing observability into request patterns. For actual token
46
+ optimization, use HeadroomAgnoModel to wrap your model.
47
+
48
+ Note: Agno pre_hooks receive the user input string, not the full
49
+ message history, so optimization is best done at the model level
50
+ using HeadroomAgnoModel.
51
+
52
+ Example:
53
+ from agno.agent import Agent
54
+ from agno.models.openai import OpenAIChat
55
+ from headroom.integrations.agno import HeadroomPreHook, HeadroomAgnoModel
56
+
57
+ # For request tracking only
58
+ pre_hook = HeadroomPreHook()
59
+
60
+ # For actual optimization, wrap the model
61
+ model = HeadroomAgnoModel(OpenAIChat(id="gpt-4o"))
62
+
63
+ agent = Agent(
64
+ model=model,
65
+ pre_hooks=[pre_hook],
66
+ )
67
+
68
+ response = agent.run("Hello!")
69
+ print(f"Requests tracked: {len(pre_hook.metrics_history)}")
70
+ print(f"Tokens saved: {model.total_tokens_saved}")
71
+ """
72
+
73
+ def __init__(
74
+ self,
75
+ config: HeadroomConfig | None = None,
76
+ mode: HeadroomMode = HeadroomMode.OPTIMIZE,
77
+ model: str = "gpt-4o",
78
+ ) -> None:
79
+ """Initialize HeadroomPreHook.
80
+
81
+ Args:
82
+ config: HeadroomConfig for optimization settings (stored for future use)
83
+ mode: HeadroomMode (stored for future use)
84
+ model: Default model name for token estimation (stored for future use)
85
+ """
86
+ self.config = config or HeadroomConfig()
87
+ self.mode = mode
88
+ self.model = model
89
+
90
+ self._metrics_history: list[HookMetrics] = []
91
+ self._total_tokens_saved: int = 0
92
+ self._lock = threading.Lock() # Thread safety for metrics
93
+
94
+ @property
95
+ def total_tokens_saved(self) -> int:
96
+ """Total tokens saved across all calls (thread-safe)."""
97
+ with self._lock:
98
+ return self._total_tokens_saved
99
+
100
+ @property
101
+ def metrics_history(self) -> list[HookMetrics]:
102
+ """History of optimization metrics (thread-safe copy)."""
103
+ with self._lock:
104
+ return self._metrics_history.copy()
105
+
106
+ def __call__(self, run_input: Any, **kwargs: Any) -> Any:
107
+ """Track the run input.
108
+
109
+ This is called by Agno before the LLM processes the input.
110
+ The hook logs the request and returns input unchanged.
111
+
112
+ Args:
113
+ run_input: The input from the agent
114
+ **kwargs: Additional arguments (for forward compatibility with Agno)
115
+
116
+ Returns:
117
+ The unchanged run_input
118
+ """
119
+ request_id = str(uuid4())
120
+ logger.debug(f"HeadroomPreHook tracking request {request_id}")
121
+
122
+ # Record that we processed this input (timing/tracking only)
123
+ metrics = HookMetrics(
124
+ request_id=request_id,
125
+ timestamp=datetime.now(timezone.utc),
126
+ )
127
+
128
+ # Thread-safe metrics update
129
+ with self._lock:
130
+ self._metrics_history.append(metrics)
131
+
132
+ # Keep only last 100 metrics
133
+ if len(self._metrics_history) > 100:
134
+ self._metrics_history = self._metrics_history[-100:]
135
+
136
+ # Return input unchanged (use HeadroomAgnoModel for actual optimization)
137
+ return run_input
138
+
139
+ def get_savings_summary(self) -> dict[str, Any]:
140
+ """Get summary of token savings (thread-safe)."""
141
+ with self._lock:
142
+ if not self._metrics_history:
143
+ return {
144
+ "total_requests": 0,
145
+ "total_tokens_saved": 0,
146
+ "average_savings_percent": 0,
147
+ }
148
+
149
+ return {
150
+ "total_requests": len(self._metrics_history),
151
+ "total_tokens_saved": self._total_tokens_saved,
152
+ "average_savings_percent": (
153
+ sum(m.savings_percent for m in self._metrics_history)
154
+ / len(self._metrics_history)
155
+ if self._metrics_history
156
+ else 0
157
+ ),
158
+ }
159
+
160
+
161
+ class HeadroomPostHook:
162
+ """Post-hook for Agno agents that tracks optimization results.
163
+
164
+ This hook runs after the agent generates a response,
165
+ tracking metrics and providing observability.
166
+
167
+ Example:
168
+ from agno.agent import Agent
169
+ from agno.models.openai import OpenAIChat
170
+ from headroom.integrations.agno import HeadroomPostHook
171
+
172
+ post_hook = HeadroomPostHook()
173
+
174
+ agent = Agent(
175
+ model=OpenAIChat(id="gpt-4o"),
176
+ post_hooks=[post_hook],
177
+ )
178
+
179
+ response = agent.run("Hello!")
180
+ print(f"Requests tracked: {post_hook.total_requests}")
181
+ """
182
+
183
+ def __init__(
184
+ self,
185
+ log_level: str = "INFO",
186
+ token_alert_threshold: int | None = None,
187
+ ) -> None:
188
+ """Initialize HeadroomPostHook.
189
+
190
+ Args:
191
+ log_level: Logging level ("DEBUG", "INFO", "WARNING")
192
+ token_alert_threshold: Alert if response exceeds this many tokens
193
+ """
194
+ self.log_level = log_level
195
+ self.token_alert_threshold = token_alert_threshold
196
+
197
+ self._requests: list[dict[str, Any]] = []
198
+ self._alerts: list[str] = []
199
+ self._lock = threading.Lock() # Thread safety for requests/alerts
200
+
201
+ @property
202
+ def total_requests(self) -> int:
203
+ """Total number of requests tracked."""
204
+ with self._lock:
205
+ return len(self._requests)
206
+
207
+ @property
208
+ def alerts(self) -> list[str]:
209
+ """List of alerts triggered (thread-safe copy)."""
210
+ with self._lock:
211
+ return self._alerts.copy()
212
+
213
+ def __call__(self, run_output: Any, **kwargs: Any) -> Any:
214
+ """Track the run output.
215
+
216
+ This is called by Agno after the LLM generates a response.
217
+
218
+ Args:
219
+ run_output: The output from the agent
220
+ **kwargs: Additional arguments (for forward compatibility with Agno)
221
+
222
+ Returns:
223
+ The unchanged run_output
224
+ """
225
+ # Record request
226
+ request_info: dict[str, Any] = {
227
+ "timestamp": datetime.now(timezone.utc),
228
+ "output_type": type(run_output).__name__,
229
+ }
230
+
231
+ # Try to extract token usage if available
232
+ alert_to_add: str | None = None
233
+ if hasattr(run_output, "metrics"):
234
+ metrics = run_output.metrics
235
+ if hasattr(metrics, "input_tokens"):
236
+ request_info["input_tokens"] = metrics.input_tokens
237
+ if hasattr(metrics, "output_tokens"):
238
+ request_info["output_tokens"] = metrics.output_tokens
239
+ if hasattr(metrics, "total_tokens"):
240
+ request_info["total_tokens"] = metrics.total_tokens
241
+
242
+ # Check alert threshold
243
+ if self.token_alert_threshold and metrics.total_tokens > self.token_alert_threshold:
244
+ alert_to_add = (
245
+ f"Token alert: {metrics.total_tokens} tokens exceeds "
246
+ f"threshold {self.token_alert_threshold}"
247
+ )
248
+
249
+ # Try to get content length
250
+ if hasattr(run_output, "content") and run_output.content:
251
+ request_info["content_length"] = len(run_output.content)
252
+
253
+ # Thread-safe update of requests and alerts
254
+ with self._lock:
255
+ self._requests.append(request_info)
256
+
257
+ # Keep only last 1000 requests
258
+ if len(self._requests) > 1000:
259
+ self._requests = self._requests[-1000:]
260
+
261
+ if alert_to_add:
262
+ self._alerts.append(alert_to_add)
263
+
264
+ # Log alert outside of lock to avoid holding lock during I/O
265
+ if alert_to_add:
266
+ logger.warning(alert_to_add)
267
+
268
+ if self.log_level in ("DEBUG", "INFO"):
269
+ logger.log(
270
+ logging.DEBUG if self.log_level == "DEBUG" else logging.INFO,
271
+ f"Agno request completed: {request_info}",
272
+ )
273
+
274
+ # Return output unchanged
275
+ return run_output
276
+
277
+ def get_summary(self) -> dict[str, Any]:
278
+ """Get summary of tracked requests (thread-safe)."""
279
+ with self._lock:
280
+ if not self._requests:
281
+ return {
282
+ "total_requests": 0,
283
+ "total_tokens": 0,
284
+ "alerts": len(self._alerts),
285
+ }
286
+
287
+ total_tokens = sum(r.get("total_tokens", 0) for r in self._requests)
288
+
289
+ return {
290
+ "total_requests": len(self._requests),
291
+ "total_tokens": total_tokens,
292
+ "average_tokens": total_tokens / len(self._requests) if self._requests else 0,
293
+ "alerts": len(self._alerts),
294
+ }
295
+
296
+ def reset(self) -> None:
297
+ """Reset all tracked metrics (thread-safe)."""
298
+ with self._lock:
299
+ self._requests = []
300
+ self._alerts = []
301
+
302
+
303
+ def create_headroom_hooks(
304
+ config: HeadroomConfig | None = None,
305
+ mode: HeadroomMode = HeadroomMode.OPTIMIZE,
306
+ model: str = "gpt-4o",
307
+ log_level: str = "INFO",
308
+ token_alert_threshold: int | None = None,
309
+ ) -> tuple[HeadroomPreHook, HeadroomPostHook]:
310
+ """Create a pair of Headroom hooks for Agno agents.
311
+
312
+ This is a convenience function to create both pre and post hooks
313
+ with consistent configuration.
314
+
315
+ Args:
316
+ config: HeadroomConfig for optimization settings
317
+ mode: HeadroomMode (AUDIT, OPTIMIZE, or SIMULATE)
318
+ model: Default model name for token estimation
319
+ log_level: Logging level for post-hook
320
+ token_alert_threshold: Alert threshold for post-hook
321
+
322
+ Returns:
323
+ Tuple of (pre_hook, post_hook)
324
+
325
+ Example:
326
+ from agno.agent import Agent
327
+ from agno.models.openai import OpenAIChat
328
+ from headroom.integrations.agno import create_headroom_hooks
329
+
330
+ pre_hook, post_hook = create_headroom_hooks(
331
+ token_alert_threshold=10000,
332
+ )
333
+
334
+ agent = Agent(
335
+ model=OpenAIChat(id="gpt-4o"),
336
+ pre_hooks=[pre_hook],
337
+ post_hooks=[post_hook],
338
+ )
339
+ """
340
+ pre_hook = HeadroomPreHook(config=config, mode=mode, model=model)
341
+ post_hook = HeadroomPostHook(
342
+ log_level=log_level,
343
+ token_alert_threshold=token_alert_threshold,
344
+ )
345
+ return pre_hook, post_hook