headroom-ai 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- headroom/__init__.py +212 -0
- headroom/cache/__init__.py +76 -0
- headroom/cache/anthropic.py +517 -0
- headroom/cache/base.py +342 -0
- headroom/cache/compression_feedback.py +613 -0
- headroom/cache/compression_store.py +814 -0
- headroom/cache/dynamic_detector.py +1026 -0
- headroom/cache/google.py +884 -0
- headroom/cache/openai.py +584 -0
- headroom/cache/registry.py +175 -0
- headroom/cache/semantic.py +451 -0
- headroom/ccr/__init__.py +77 -0
- headroom/ccr/context_tracker.py +582 -0
- headroom/ccr/mcp_server.py +319 -0
- headroom/ccr/response_handler.py +772 -0
- headroom/ccr/tool_injection.py +415 -0
- headroom/cli.py +219 -0
- headroom/client.py +977 -0
- headroom/compression/__init__.py +42 -0
- headroom/compression/detector.py +424 -0
- headroom/compression/handlers/__init__.py +22 -0
- headroom/compression/handlers/base.py +219 -0
- headroom/compression/handlers/code_handler.py +506 -0
- headroom/compression/handlers/json_handler.py +418 -0
- headroom/compression/masks.py +345 -0
- headroom/compression/universal.py +465 -0
- headroom/config.py +474 -0
- headroom/exceptions.py +192 -0
- headroom/integrations/__init__.py +159 -0
- headroom/integrations/agno/__init__.py +53 -0
- headroom/integrations/agno/hooks.py +345 -0
- headroom/integrations/agno/model.py +625 -0
- headroom/integrations/agno/providers.py +154 -0
- headroom/integrations/langchain/__init__.py +106 -0
- headroom/integrations/langchain/agents.py +326 -0
- headroom/integrations/langchain/chat_model.py +1002 -0
- headroom/integrations/langchain/langsmith.py +324 -0
- headroom/integrations/langchain/memory.py +319 -0
- headroom/integrations/langchain/providers.py +200 -0
- headroom/integrations/langchain/retriever.py +371 -0
- headroom/integrations/langchain/streaming.py +341 -0
- headroom/integrations/mcp/__init__.py +37 -0
- headroom/integrations/mcp/server.py +533 -0
- headroom/memory/__init__.py +37 -0
- headroom/memory/extractor.py +390 -0
- headroom/memory/fast_store.py +621 -0
- headroom/memory/fast_wrapper.py +311 -0
- headroom/memory/inline_extractor.py +229 -0
- headroom/memory/store.py +434 -0
- headroom/memory/worker.py +260 -0
- headroom/memory/wrapper.py +321 -0
- headroom/models/__init__.py +39 -0
- headroom/models/registry.py +687 -0
- headroom/parser.py +293 -0
- headroom/pricing/__init__.py +51 -0
- headroom/pricing/anthropic_prices.py +81 -0
- headroom/pricing/litellm_pricing.py +113 -0
- headroom/pricing/openai_prices.py +91 -0
- headroom/pricing/registry.py +188 -0
- headroom/providers/__init__.py +61 -0
- headroom/providers/anthropic.py +621 -0
- headroom/providers/base.py +131 -0
- headroom/providers/cohere.py +362 -0
- headroom/providers/google.py +427 -0
- headroom/providers/litellm.py +297 -0
- headroom/providers/openai.py +566 -0
- headroom/providers/openai_compatible.py +521 -0
- headroom/proxy/__init__.py +19 -0
- headroom/proxy/server.py +2683 -0
- headroom/py.typed +0 -0
- headroom/relevance/__init__.py +124 -0
- headroom/relevance/base.py +106 -0
- headroom/relevance/bm25.py +255 -0
- headroom/relevance/embedding.py +255 -0
- headroom/relevance/hybrid.py +259 -0
- headroom/reporting/__init__.py +5 -0
- headroom/reporting/generator.py +549 -0
- headroom/storage/__init__.py +41 -0
- headroom/storage/base.py +125 -0
- headroom/storage/jsonl.py +220 -0
- headroom/storage/sqlite.py +289 -0
- headroom/telemetry/__init__.py +91 -0
- headroom/telemetry/collector.py +764 -0
- headroom/telemetry/models.py +880 -0
- headroom/telemetry/toin.py +1579 -0
- headroom/tokenizer.py +80 -0
- headroom/tokenizers/__init__.py +75 -0
- headroom/tokenizers/base.py +210 -0
- headroom/tokenizers/estimator.py +198 -0
- headroom/tokenizers/huggingface.py +317 -0
- headroom/tokenizers/mistral.py +245 -0
- headroom/tokenizers/registry.py +398 -0
- headroom/tokenizers/tiktoken_counter.py +248 -0
- headroom/transforms/__init__.py +106 -0
- headroom/transforms/base.py +57 -0
- headroom/transforms/cache_aligner.py +357 -0
- headroom/transforms/code_compressor.py +1313 -0
- headroom/transforms/content_detector.py +335 -0
- headroom/transforms/content_router.py +1158 -0
- headroom/transforms/llmlingua_compressor.py +638 -0
- headroom/transforms/log_compressor.py +529 -0
- headroom/transforms/pipeline.py +297 -0
- headroom/transforms/rolling_window.py +350 -0
- headroom/transforms/search_compressor.py +365 -0
- headroom/transforms/smart_crusher.py +2682 -0
- headroom/transforms/text_compressor.py +259 -0
- headroom/transforms/tool_crusher.py +338 -0
- headroom/utils.py +215 -0
- headroom_ai-0.2.13.dist-info/METADATA +315 -0
- headroom_ai-0.2.13.dist-info/RECORD +114 -0
- headroom_ai-0.2.13.dist-info/WHEEL +4 -0
- headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
- headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
- headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"""Headroom integrations with popular LLM frameworks.
|
|
2
|
+
|
|
3
|
+
Available integrations:
|
|
4
|
+
|
|
5
|
+
LangChain (pip install headroom[langchain]):
|
|
6
|
+
- HeadroomChatModel: Drop-in wrapper for any LangChain chat model
|
|
7
|
+
- HeadroomChatMessageHistory: Automatic conversation compression
|
|
8
|
+
- HeadroomDocumentCompressor: Relevance-based document filtering
|
|
9
|
+
- HeadroomToolWrapper: Tool output compression for agents
|
|
10
|
+
- StreamingMetricsTracker: Token counting during streaming
|
|
11
|
+
- HeadroomLangSmithCallbackHandler: LangSmith trace enrichment
|
|
12
|
+
|
|
13
|
+
Agno (pip install agno):
|
|
14
|
+
- HeadroomAgnoModel: Drop-in wrapper for any Agno model
|
|
15
|
+
- HeadroomPreHook/HeadroomPostHook: Agent-level hooks for tracking
|
|
16
|
+
- create_headroom_hooks: Convenience function to create hook pairs
|
|
17
|
+
|
|
18
|
+
MCP (Model Context Protocol):
|
|
19
|
+
- HeadroomMCPCompressor: Compress MCP tool results
|
|
20
|
+
- compress_tool_result: Simple function for tool compression
|
|
21
|
+
|
|
22
|
+
Example:
|
|
23
|
+
# LangChain integration
|
|
24
|
+
from headroom.integrations import HeadroomChatModel
|
|
25
|
+
# or explicitly:
|
|
26
|
+
from headroom.integrations.langchain import HeadroomChatModel
|
|
27
|
+
|
|
28
|
+
# Agno integration
|
|
29
|
+
from headroom.integrations.agno import HeadroomAgnoModel
|
|
30
|
+
# or explicitly:
|
|
31
|
+
from headroom.integrations.agno import HeadroomAgnoModel
|
|
32
|
+
|
|
33
|
+
# MCP integration
|
|
34
|
+
from headroom.integrations import compress_tool_result
|
|
35
|
+
# or explicitly:
|
|
36
|
+
from headroom.integrations.mcp import compress_tool_result
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
# Re-export from langchain subpackage for backwards compatibility
|
|
40
|
+
from .langchain import (
|
|
41
|
+
# Retrievers
|
|
42
|
+
CompressionMetrics,
|
|
43
|
+
# Core
|
|
44
|
+
HeadroomCallbackHandler,
|
|
45
|
+
# Memory
|
|
46
|
+
HeadroomChatMessageHistory,
|
|
47
|
+
HeadroomChatModel,
|
|
48
|
+
HeadroomDocumentCompressor,
|
|
49
|
+
# LangSmith
|
|
50
|
+
HeadroomLangSmithCallbackHandler,
|
|
51
|
+
HeadroomRunnable,
|
|
52
|
+
# Agents
|
|
53
|
+
HeadroomToolWrapper,
|
|
54
|
+
OptimizationMetrics,
|
|
55
|
+
# Streaming
|
|
56
|
+
StreamingMetrics,
|
|
57
|
+
StreamingMetricsCallback,
|
|
58
|
+
StreamingMetricsTracker,
|
|
59
|
+
ToolCompressionMetrics,
|
|
60
|
+
ToolMetricsCollector,
|
|
61
|
+
# Provider Detection
|
|
62
|
+
detect_provider,
|
|
63
|
+
get_headroom_provider,
|
|
64
|
+
get_model_name_from_langchain,
|
|
65
|
+
get_tool_metrics,
|
|
66
|
+
is_langsmith_available,
|
|
67
|
+
is_langsmith_tracing_enabled,
|
|
68
|
+
langchain_available,
|
|
69
|
+
optimize_messages,
|
|
70
|
+
reset_tool_metrics,
|
|
71
|
+
track_async_streaming_response,
|
|
72
|
+
track_streaming_response,
|
|
73
|
+
wrap_tools_with_headroom,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Re-export from mcp subpackage for backwards compatibility
|
|
77
|
+
from .mcp import (
|
|
78
|
+
DEFAULT_MCP_PROFILES,
|
|
79
|
+
HeadroomMCPClientWrapper,
|
|
80
|
+
HeadroomMCPCompressor,
|
|
81
|
+
MCPCompressionResult,
|
|
82
|
+
MCPToolProfile,
|
|
83
|
+
compress_tool_result,
|
|
84
|
+
compress_tool_result_with_metrics,
|
|
85
|
+
create_headroom_mcp_proxy,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Re-export from agno subpackage (optional dependency)
|
|
89
|
+
try:
|
|
90
|
+
from .agno import (
|
|
91
|
+
HeadroomAgnoModel,
|
|
92
|
+
HeadroomPostHook,
|
|
93
|
+
HeadroomPreHook,
|
|
94
|
+
agno_available,
|
|
95
|
+
create_headroom_hooks,
|
|
96
|
+
get_model_name_from_agno,
|
|
97
|
+
)
|
|
98
|
+
from .agno import OptimizationMetrics as AgnoOptimizationMetrics
|
|
99
|
+
from .agno import get_headroom_provider as get_agno_provider
|
|
100
|
+
from .agno import optimize_messages as optimize_agno_messages
|
|
101
|
+
|
|
102
|
+
_AGNO_AVAILABLE = True
|
|
103
|
+
except ImportError:
|
|
104
|
+
_AGNO_AVAILABLE = False
|
|
105
|
+
|
|
106
|
+
__all__ = [
|
|
107
|
+
# LangChain Core
|
|
108
|
+
"HeadroomChatModel",
|
|
109
|
+
"HeadroomCallbackHandler",
|
|
110
|
+
"HeadroomRunnable",
|
|
111
|
+
"OptimizationMetrics",
|
|
112
|
+
"optimize_messages",
|
|
113
|
+
"langchain_available",
|
|
114
|
+
# Provider Detection
|
|
115
|
+
"detect_provider",
|
|
116
|
+
"get_headroom_provider",
|
|
117
|
+
"get_model_name_from_langchain",
|
|
118
|
+
# Memory
|
|
119
|
+
"HeadroomChatMessageHistory",
|
|
120
|
+
# Retrievers
|
|
121
|
+
"HeadroomDocumentCompressor",
|
|
122
|
+
"CompressionMetrics",
|
|
123
|
+
# Agents
|
|
124
|
+
"HeadroomToolWrapper",
|
|
125
|
+
"ToolCompressionMetrics",
|
|
126
|
+
"ToolMetricsCollector",
|
|
127
|
+
"wrap_tools_with_headroom",
|
|
128
|
+
"get_tool_metrics",
|
|
129
|
+
"reset_tool_metrics",
|
|
130
|
+
# LangSmith
|
|
131
|
+
"HeadroomLangSmithCallbackHandler",
|
|
132
|
+
"is_langsmith_available",
|
|
133
|
+
"is_langsmith_tracing_enabled",
|
|
134
|
+
# Streaming
|
|
135
|
+
"StreamingMetricsTracker",
|
|
136
|
+
"StreamingMetricsCallback",
|
|
137
|
+
"StreamingMetrics",
|
|
138
|
+
"track_streaming_response",
|
|
139
|
+
"track_async_streaming_response",
|
|
140
|
+
# MCP
|
|
141
|
+
"HeadroomMCPCompressor",
|
|
142
|
+
"HeadroomMCPClientWrapper",
|
|
143
|
+
"MCPCompressionResult",
|
|
144
|
+
"MCPToolProfile",
|
|
145
|
+
"compress_tool_result",
|
|
146
|
+
"compress_tool_result_with_metrics",
|
|
147
|
+
"create_headroom_mcp_proxy",
|
|
148
|
+
"DEFAULT_MCP_PROFILES",
|
|
149
|
+
# Agno
|
|
150
|
+
"HeadroomAgnoModel",
|
|
151
|
+
"HeadroomPreHook",
|
|
152
|
+
"HeadroomPostHook",
|
|
153
|
+
"agno_available",
|
|
154
|
+
"create_headroom_hooks",
|
|
155
|
+
"get_agno_provider",
|
|
156
|
+
"get_model_name_from_agno",
|
|
157
|
+
"AgnoOptimizationMetrics",
|
|
158
|
+
"optimize_agno_messages",
|
|
159
|
+
]
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""Agno integration for Headroom SDK.
|
|
2
|
+
|
|
3
|
+
This module provides seamless integration with Agno (formerly Phidata),
|
|
4
|
+
enabling automatic context optimization for Agno agents.
|
|
5
|
+
|
|
6
|
+
Components:
|
|
7
|
+
1. HeadroomAgnoModel - Wraps any Agno model to apply Headroom transforms
|
|
8
|
+
2. create_headroom_hooks - Creates pre/post hooks for Agno agents
|
|
9
|
+
3. optimize_messages - Standalone function for manual optimization
|
|
10
|
+
|
|
11
|
+
Example:
|
|
12
|
+
from agno.agent import Agent
|
|
13
|
+
from agno.models.openai import OpenAIChat
|
|
14
|
+
from headroom.integrations.agno import HeadroomAgnoModel
|
|
15
|
+
|
|
16
|
+
# Wrap any Agno model
|
|
17
|
+
model = OpenAIChat(id="gpt-4o")
|
|
18
|
+
optimized_model = HeadroomAgnoModel(model)
|
|
19
|
+
|
|
20
|
+
# Use with agent
|
|
21
|
+
agent = Agent(model=optimized_model)
|
|
22
|
+
response = agent.run("Hello!")
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from .hooks import (
|
|
26
|
+
HeadroomPostHook,
|
|
27
|
+
HeadroomPreHook,
|
|
28
|
+
HookMetrics,
|
|
29
|
+
create_headroom_hooks,
|
|
30
|
+
)
|
|
31
|
+
from .model import (
|
|
32
|
+
HeadroomAgnoModel,
|
|
33
|
+
OptimizationMetrics,
|
|
34
|
+
agno_available,
|
|
35
|
+
optimize_messages,
|
|
36
|
+
)
|
|
37
|
+
from .providers import get_headroom_provider, get_model_name_from_agno
|
|
38
|
+
|
|
39
|
+
__all__ = [
|
|
40
|
+
# Model wrapper
|
|
41
|
+
"HeadroomAgnoModel",
|
|
42
|
+
"OptimizationMetrics",
|
|
43
|
+
"agno_available",
|
|
44
|
+
"optimize_messages",
|
|
45
|
+
# Hooks
|
|
46
|
+
"create_headroom_hooks",
|
|
47
|
+
"HeadroomPreHook",
|
|
48
|
+
"HeadroomPostHook",
|
|
49
|
+
"HookMetrics",
|
|
50
|
+
# Provider detection
|
|
51
|
+
"get_headroom_provider",
|
|
52
|
+
"get_model_name_from_agno",
|
|
53
|
+
]
|
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
"""Agno hooks for Headroom integration.
|
|
2
|
+
|
|
3
|
+
This module provides pre_hooks and post_hooks that can be used with
|
|
4
|
+
Agno agents to apply Headroom optimization at the agent level.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
import threading
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from datetime import datetime, timezone
|
|
13
|
+
from typing import Any
|
|
14
|
+
from uuid import uuid4
|
|
15
|
+
|
|
16
|
+
from headroom import HeadroomConfig, HeadroomMode
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class HookMetrics:
|
|
23
|
+
"""Metrics collected by Headroom pre-hooks.
|
|
24
|
+
|
|
25
|
+
Note: These metrics track request counts and timing, not token savings.
|
|
26
|
+
For actual token optimization metrics, use HeadroomAgnoModel which
|
|
27
|
+
wraps the model and provides detailed compression statistics.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
request_id: str
|
|
31
|
+
timestamp: datetime
|
|
32
|
+
# These fields are kept for API compatibility but are always 0
|
|
33
|
+
# Use HeadroomAgnoModel for actual token optimization
|
|
34
|
+
tokens_before: int = 0
|
|
35
|
+
tokens_after: int = 0
|
|
36
|
+
tokens_saved: int = 0
|
|
37
|
+
savings_percent: float = 0.0
|
|
38
|
+
transforms_applied: list[str] = field(default_factory=list)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class HeadroomPreHook:
|
|
42
|
+
"""Pre-hook for Agno agents that tracks request metrics.
|
|
43
|
+
|
|
44
|
+
This hook runs before the agent sends messages to the LLM,
|
|
45
|
+
providing observability into request patterns. For actual token
|
|
46
|
+
optimization, use HeadroomAgnoModel to wrap your model.
|
|
47
|
+
|
|
48
|
+
Note: Agno pre_hooks receive the user input string, not the full
|
|
49
|
+
message history, so optimization is best done at the model level
|
|
50
|
+
using HeadroomAgnoModel.
|
|
51
|
+
|
|
52
|
+
Example:
|
|
53
|
+
from agno.agent import Agent
|
|
54
|
+
from agno.models.openai import OpenAIChat
|
|
55
|
+
from headroom.integrations.agno import HeadroomPreHook, HeadroomAgnoModel
|
|
56
|
+
|
|
57
|
+
# For request tracking only
|
|
58
|
+
pre_hook = HeadroomPreHook()
|
|
59
|
+
|
|
60
|
+
# For actual optimization, wrap the model
|
|
61
|
+
model = HeadroomAgnoModel(OpenAIChat(id="gpt-4o"))
|
|
62
|
+
|
|
63
|
+
agent = Agent(
|
|
64
|
+
model=model,
|
|
65
|
+
pre_hooks=[pre_hook],
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
response = agent.run("Hello!")
|
|
69
|
+
print(f"Requests tracked: {len(pre_hook.metrics_history)}")
|
|
70
|
+
print(f"Tokens saved: {model.total_tokens_saved}")
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
def __init__(
|
|
74
|
+
self,
|
|
75
|
+
config: HeadroomConfig | None = None,
|
|
76
|
+
mode: HeadroomMode = HeadroomMode.OPTIMIZE,
|
|
77
|
+
model: str = "gpt-4o",
|
|
78
|
+
) -> None:
|
|
79
|
+
"""Initialize HeadroomPreHook.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
config: HeadroomConfig for optimization settings (stored for future use)
|
|
83
|
+
mode: HeadroomMode (stored for future use)
|
|
84
|
+
model: Default model name for token estimation (stored for future use)
|
|
85
|
+
"""
|
|
86
|
+
self.config = config or HeadroomConfig()
|
|
87
|
+
self.mode = mode
|
|
88
|
+
self.model = model
|
|
89
|
+
|
|
90
|
+
self._metrics_history: list[HookMetrics] = []
|
|
91
|
+
self._total_tokens_saved: int = 0
|
|
92
|
+
self._lock = threading.Lock() # Thread safety for metrics
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def total_tokens_saved(self) -> int:
|
|
96
|
+
"""Total tokens saved across all calls (thread-safe)."""
|
|
97
|
+
with self._lock:
|
|
98
|
+
return self._total_tokens_saved
|
|
99
|
+
|
|
100
|
+
@property
|
|
101
|
+
def metrics_history(self) -> list[HookMetrics]:
|
|
102
|
+
"""History of optimization metrics (thread-safe copy)."""
|
|
103
|
+
with self._lock:
|
|
104
|
+
return self._metrics_history.copy()
|
|
105
|
+
|
|
106
|
+
def __call__(self, run_input: Any, **kwargs: Any) -> Any:
|
|
107
|
+
"""Track the run input.
|
|
108
|
+
|
|
109
|
+
This is called by Agno before the LLM processes the input.
|
|
110
|
+
The hook logs the request and returns input unchanged.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
run_input: The input from the agent
|
|
114
|
+
**kwargs: Additional arguments (for forward compatibility with Agno)
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
The unchanged run_input
|
|
118
|
+
"""
|
|
119
|
+
request_id = str(uuid4())
|
|
120
|
+
logger.debug(f"HeadroomPreHook tracking request {request_id}")
|
|
121
|
+
|
|
122
|
+
# Record that we processed this input (timing/tracking only)
|
|
123
|
+
metrics = HookMetrics(
|
|
124
|
+
request_id=request_id,
|
|
125
|
+
timestamp=datetime.now(timezone.utc),
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# Thread-safe metrics update
|
|
129
|
+
with self._lock:
|
|
130
|
+
self._metrics_history.append(metrics)
|
|
131
|
+
|
|
132
|
+
# Keep only last 100 metrics
|
|
133
|
+
if len(self._metrics_history) > 100:
|
|
134
|
+
self._metrics_history = self._metrics_history[-100:]
|
|
135
|
+
|
|
136
|
+
# Return input unchanged (use HeadroomAgnoModel for actual optimization)
|
|
137
|
+
return run_input
|
|
138
|
+
|
|
139
|
+
def get_savings_summary(self) -> dict[str, Any]:
|
|
140
|
+
"""Get summary of token savings (thread-safe)."""
|
|
141
|
+
with self._lock:
|
|
142
|
+
if not self._metrics_history:
|
|
143
|
+
return {
|
|
144
|
+
"total_requests": 0,
|
|
145
|
+
"total_tokens_saved": 0,
|
|
146
|
+
"average_savings_percent": 0,
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
return {
|
|
150
|
+
"total_requests": len(self._metrics_history),
|
|
151
|
+
"total_tokens_saved": self._total_tokens_saved,
|
|
152
|
+
"average_savings_percent": (
|
|
153
|
+
sum(m.savings_percent for m in self._metrics_history)
|
|
154
|
+
/ len(self._metrics_history)
|
|
155
|
+
if self._metrics_history
|
|
156
|
+
else 0
|
|
157
|
+
),
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
class HeadroomPostHook:
|
|
162
|
+
"""Post-hook for Agno agents that tracks optimization results.
|
|
163
|
+
|
|
164
|
+
This hook runs after the agent generates a response,
|
|
165
|
+
tracking metrics and providing observability.
|
|
166
|
+
|
|
167
|
+
Example:
|
|
168
|
+
from agno.agent import Agent
|
|
169
|
+
from agno.models.openai import OpenAIChat
|
|
170
|
+
from headroom.integrations.agno import HeadroomPostHook
|
|
171
|
+
|
|
172
|
+
post_hook = HeadroomPostHook()
|
|
173
|
+
|
|
174
|
+
agent = Agent(
|
|
175
|
+
model=OpenAIChat(id="gpt-4o"),
|
|
176
|
+
post_hooks=[post_hook],
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
response = agent.run("Hello!")
|
|
180
|
+
print(f"Requests tracked: {post_hook.total_requests}")
|
|
181
|
+
"""
|
|
182
|
+
|
|
183
|
+
def __init__(
|
|
184
|
+
self,
|
|
185
|
+
log_level: str = "INFO",
|
|
186
|
+
token_alert_threshold: int | None = None,
|
|
187
|
+
) -> None:
|
|
188
|
+
"""Initialize HeadroomPostHook.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
log_level: Logging level ("DEBUG", "INFO", "WARNING")
|
|
192
|
+
token_alert_threshold: Alert if response exceeds this many tokens
|
|
193
|
+
"""
|
|
194
|
+
self.log_level = log_level
|
|
195
|
+
self.token_alert_threshold = token_alert_threshold
|
|
196
|
+
|
|
197
|
+
self._requests: list[dict[str, Any]] = []
|
|
198
|
+
self._alerts: list[str] = []
|
|
199
|
+
self._lock = threading.Lock() # Thread safety for requests/alerts
|
|
200
|
+
|
|
201
|
+
@property
|
|
202
|
+
def total_requests(self) -> int:
|
|
203
|
+
"""Total number of requests tracked."""
|
|
204
|
+
with self._lock:
|
|
205
|
+
return len(self._requests)
|
|
206
|
+
|
|
207
|
+
@property
|
|
208
|
+
def alerts(self) -> list[str]:
|
|
209
|
+
"""List of alerts triggered (thread-safe copy)."""
|
|
210
|
+
with self._lock:
|
|
211
|
+
return self._alerts.copy()
|
|
212
|
+
|
|
213
|
+
def __call__(self, run_output: Any, **kwargs: Any) -> Any:
|
|
214
|
+
"""Track the run output.
|
|
215
|
+
|
|
216
|
+
This is called by Agno after the LLM generates a response.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
run_output: The output from the agent
|
|
220
|
+
**kwargs: Additional arguments (for forward compatibility with Agno)
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
The unchanged run_output
|
|
224
|
+
"""
|
|
225
|
+
# Record request
|
|
226
|
+
request_info: dict[str, Any] = {
|
|
227
|
+
"timestamp": datetime.now(timezone.utc),
|
|
228
|
+
"output_type": type(run_output).__name__,
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
# Try to extract token usage if available
|
|
232
|
+
alert_to_add: str | None = None
|
|
233
|
+
if hasattr(run_output, "metrics"):
|
|
234
|
+
metrics = run_output.metrics
|
|
235
|
+
if hasattr(metrics, "input_tokens"):
|
|
236
|
+
request_info["input_tokens"] = metrics.input_tokens
|
|
237
|
+
if hasattr(metrics, "output_tokens"):
|
|
238
|
+
request_info["output_tokens"] = metrics.output_tokens
|
|
239
|
+
if hasattr(metrics, "total_tokens"):
|
|
240
|
+
request_info["total_tokens"] = metrics.total_tokens
|
|
241
|
+
|
|
242
|
+
# Check alert threshold
|
|
243
|
+
if self.token_alert_threshold and metrics.total_tokens > self.token_alert_threshold:
|
|
244
|
+
alert_to_add = (
|
|
245
|
+
f"Token alert: {metrics.total_tokens} tokens exceeds "
|
|
246
|
+
f"threshold {self.token_alert_threshold}"
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
# Try to get content length
|
|
250
|
+
if hasattr(run_output, "content") and run_output.content:
|
|
251
|
+
request_info["content_length"] = len(run_output.content)
|
|
252
|
+
|
|
253
|
+
# Thread-safe update of requests and alerts
|
|
254
|
+
with self._lock:
|
|
255
|
+
self._requests.append(request_info)
|
|
256
|
+
|
|
257
|
+
# Keep only last 1000 requests
|
|
258
|
+
if len(self._requests) > 1000:
|
|
259
|
+
self._requests = self._requests[-1000:]
|
|
260
|
+
|
|
261
|
+
if alert_to_add:
|
|
262
|
+
self._alerts.append(alert_to_add)
|
|
263
|
+
|
|
264
|
+
# Log alert outside of lock to avoid holding lock during I/O
|
|
265
|
+
if alert_to_add:
|
|
266
|
+
logger.warning(alert_to_add)
|
|
267
|
+
|
|
268
|
+
if self.log_level in ("DEBUG", "INFO"):
|
|
269
|
+
logger.log(
|
|
270
|
+
logging.DEBUG if self.log_level == "DEBUG" else logging.INFO,
|
|
271
|
+
f"Agno request completed: {request_info}",
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
# Return output unchanged
|
|
275
|
+
return run_output
|
|
276
|
+
|
|
277
|
+
def get_summary(self) -> dict[str, Any]:
|
|
278
|
+
"""Get summary of tracked requests (thread-safe)."""
|
|
279
|
+
with self._lock:
|
|
280
|
+
if not self._requests:
|
|
281
|
+
return {
|
|
282
|
+
"total_requests": 0,
|
|
283
|
+
"total_tokens": 0,
|
|
284
|
+
"alerts": len(self._alerts),
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
total_tokens = sum(r.get("total_tokens", 0) for r in self._requests)
|
|
288
|
+
|
|
289
|
+
return {
|
|
290
|
+
"total_requests": len(self._requests),
|
|
291
|
+
"total_tokens": total_tokens,
|
|
292
|
+
"average_tokens": total_tokens / len(self._requests) if self._requests else 0,
|
|
293
|
+
"alerts": len(self._alerts),
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
def reset(self) -> None:
|
|
297
|
+
"""Reset all tracked metrics (thread-safe)."""
|
|
298
|
+
with self._lock:
|
|
299
|
+
self._requests = []
|
|
300
|
+
self._alerts = []
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def create_headroom_hooks(
|
|
304
|
+
config: HeadroomConfig | None = None,
|
|
305
|
+
mode: HeadroomMode = HeadroomMode.OPTIMIZE,
|
|
306
|
+
model: str = "gpt-4o",
|
|
307
|
+
log_level: str = "INFO",
|
|
308
|
+
token_alert_threshold: int | None = None,
|
|
309
|
+
) -> tuple[HeadroomPreHook, HeadroomPostHook]:
|
|
310
|
+
"""Create a pair of Headroom hooks for Agno agents.
|
|
311
|
+
|
|
312
|
+
This is a convenience function to create both pre and post hooks
|
|
313
|
+
with consistent configuration.
|
|
314
|
+
|
|
315
|
+
Args:
|
|
316
|
+
config: HeadroomConfig for optimization settings
|
|
317
|
+
mode: HeadroomMode (AUDIT, OPTIMIZE, or SIMULATE)
|
|
318
|
+
model: Default model name for token estimation
|
|
319
|
+
log_level: Logging level for post-hook
|
|
320
|
+
token_alert_threshold: Alert threshold for post-hook
|
|
321
|
+
|
|
322
|
+
Returns:
|
|
323
|
+
Tuple of (pre_hook, post_hook)
|
|
324
|
+
|
|
325
|
+
Example:
|
|
326
|
+
from agno.agent import Agent
|
|
327
|
+
from agno.models.openai import OpenAIChat
|
|
328
|
+
from headroom.integrations.agno import create_headroom_hooks
|
|
329
|
+
|
|
330
|
+
pre_hook, post_hook = create_headroom_hooks(
|
|
331
|
+
token_alert_threshold=10000,
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
agent = Agent(
|
|
335
|
+
model=OpenAIChat(id="gpt-4o"),
|
|
336
|
+
pre_hooks=[pre_hook],
|
|
337
|
+
post_hooks=[post_hook],
|
|
338
|
+
)
|
|
339
|
+
"""
|
|
340
|
+
pre_hook = HeadroomPreHook(config=config, mode=mode, model=model)
|
|
341
|
+
post_hook = HeadroomPostHook(
|
|
342
|
+
log_level=log_level,
|
|
343
|
+
token_alert_threshold=token_alert_threshold,
|
|
344
|
+
)
|
|
345
|
+
return pre_hook, post_hook
|