headroom-ai 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. headroom/__init__.py +212 -0
  2. headroom/cache/__init__.py +76 -0
  3. headroom/cache/anthropic.py +517 -0
  4. headroom/cache/base.py +342 -0
  5. headroom/cache/compression_feedback.py +613 -0
  6. headroom/cache/compression_store.py +814 -0
  7. headroom/cache/dynamic_detector.py +1026 -0
  8. headroom/cache/google.py +884 -0
  9. headroom/cache/openai.py +584 -0
  10. headroom/cache/registry.py +175 -0
  11. headroom/cache/semantic.py +451 -0
  12. headroom/ccr/__init__.py +77 -0
  13. headroom/ccr/context_tracker.py +582 -0
  14. headroom/ccr/mcp_server.py +319 -0
  15. headroom/ccr/response_handler.py +772 -0
  16. headroom/ccr/tool_injection.py +415 -0
  17. headroom/cli.py +219 -0
  18. headroom/client.py +977 -0
  19. headroom/compression/__init__.py +42 -0
  20. headroom/compression/detector.py +424 -0
  21. headroom/compression/handlers/__init__.py +22 -0
  22. headroom/compression/handlers/base.py +219 -0
  23. headroom/compression/handlers/code_handler.py +506 -0
  24. headroom/compression/handlers/json_handler.py +418 -0
  25. headroom/compression/masks.py +345 -0
  26. headroom/compression/universal.py +465 -0
  27. headroom/config.py +474 -0
  28. headroom/exceptions.py +192 -0
  29. headroom/integrations/__init__.py +159 -0
  30. headroom/integrations/agno/__init__.py +53 -0
  31. headroom/integrations/agno/hooks.py +345 -0
  32. headroom/integrations/agno/model.py +625 -0
  33. headroom/integrations/agno/providers.py +154 -0
  34. headroom/integrations/langchain/__init__.py +106 -0
  35. headroom/integrations/langchain/agents.py +326 -0
  36. headroom/integrations/langchain/chat_model.py +1002 -0
  37. headroom/integrations/langchain/langsmith.py +324 -0
  38. headroom/integrations/langchain/memory.py +319 -0
  39. headroom/integrations/langchain/providers.py +200 -0
  40. headroom/integrations/langchain/retriever.py +371 -0
  41. headroom/integrations/langchain/streaming.py +341 -0
  42. headroom/integrations/mcp/__init__.py +37 -0
  43. headroom/integrations/mcp/server.py +533 -0
  44. headroom/memory/__init__.py +37 -0
  45. headroom/memory/extractor.py +390 -0
  46. headroom/memory/fast_store.py +621 -0
  47. headroom/memory/fast_wrapper.py +311 -0
  48. headroom/memory/inline_extractor.py +229 -0
  49. headroom/memory/store.py +434 -0
  50. headroom/memory/worker.py +260 -0
  51. headroom/memory/wrapper.py +321 -0
  52. headroom/models/__init__.py +39 -0
  53. headroom/models/registry.py +687 -0
  54. headroom/parser.py +293 -0
  55. headroom/pricing/__init__.py +51 -0
  56. headroom/pricing/anthropic_prices.py +81 -0
  57. headroom/pricing/litellm_pricing.py +113 -0
  58. headroom/pricing/openai_prices.py +91 -0
  59. headroom/pricing/registry.py +188 -0
  60. headroom/providers/__init__.py +61 -0
  61. headroom/providers/anthropic.py +621 -0
  62. headroom/providers/base.py +131 -0
  63. headroom/providers/cohere.py +362 -0
  64. headroom/providers/google.py +427 -0
  65. headroom/providers/litellm.py +297 -0
  66. headroom/providers/openai.py +566 -0
  67. headroom/providers/openai_compatible.py +521 -0
  68. headroom/proxy/__init__.py +19 -0
  69. headroom/proxy/server.py +2683 -0
  70. headroom/py.typed +0 -0
  71. headroom/relevance/__init__.py +124 -0
  72. headroom/relevance/base.py +106 -0
  73. headroom/relevance/bm25.py +255 -0
  74. headroom/relevance/embedding.py +255 -0
  75. headroom/relevance/hybrid.py +259 -0
  76. headroom/reporting/__init__.py +5 -0
  77. headroom/reporting/generator.py +549 -0
  78. headroom/storage/__init__.py +41 -0
  79. headroom/storage/base.py +125 -0
  80. headroom/storage/jsonl.py +220 -0
  81. headroom/storage/sqlite.py +289 -0
  82. headroom/telemetry/__init__.py +91 -0
  83. headroom/telemetry/collector.py +764 -0
  84. headroom/telemetry/models.py +880 -0
  85. headroom/telemetry/toin.py +1579 -0
  86. headroom/tokenizer.py +80 -0
  87. headroom/tokenizers/__init__.py +75 -0
  88. headroom/tokenizers/base.py +210 -0
  89. headroom/tokenizers/estimator.py +198 -0
  90. headroom/tokenizers/huggingface.py +317 -0
  91. headroom/tokenizers/mistral.py +245 -0
  92. headroom/tokenizers/registry.py +398 -0
  93. headroom/tokenizers/tiktoken_counter.py +248 -0
  94. headroom/transforms/__init__.py +106 -0
  95. headroom/transforms/base.py +57 -0
  96. headroom/transforms/cache_aligner.py +357 -0
  97. headroom/transforms/code_compressor.py +1313 -0
  98. headroom/transforms/content_detector.py +335 -0
  99. headroom/transforms/content_router.py +1158 -0
  100. headroom/transforms/llmlingua_compressor.py +638 -0
  101. headroom/transforms/log_compressor.py +529 -0
  102. headroom/transforms/pipeline.py +297 -0
  103. headroom/transforms/rolling_window.py +350 -0
  104. headroom/transforms/search_compressor.py +365 -0
  105. headroom/transforms/smart_crusher.py +2682 -0
  106. headroom/transforms/text_compressor.py +259 -0
  107. headroom/transforms/tool_crusher.py +338 -0
  108. headroom/utils.py +215 -0
  109. headroom_ai-0.2.13.dist-info/METADATA +315 -0
  110. headroom_ai-0.2.13.dist-info/RECORD +114 -0
  111. headroom_ai-0.2.13.dist-info/WHEEL +4 -0
  112. headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
  113. headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
  114. headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
@@ -0,0 +1,625 @@
1
+ """Agno model wrapper for Headroom optimization.
2
+
3
+ This module provides HeadroomAgnoModel, which wraps any Agno model
4
+ to apply Headroom context optimization before API calls.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import asyncio
10
+ import logging
11
+ import threading
12
+ import warnings
13
+ from collections.abc import AsyncIterator, Iterator
14
+ from dataclasses import dataclass, field
15
+ from datetime import datetime, timezone
16
+ from typing import Any
17
+ from uuid import uuid4
18
+
19
+ # Agno imports - these are optional dependencies
20
+ try:
21
+ from agno.models.base import Model
22
+ from agno.models.message import Message
23
+ from agno.models.response import ModelResponse
24
+
25
+ AGNO_AVAILABLE = True
26
+ except ImportError:
27
+ AGNO_AVAILABLE = False
28
+ Model = object # type: ignore[misc,assignment]
29
+ Message = dict # type: ignore[misc,assignment]
30
+ ModelResponse = dict # type: ignore[misc,assignment]
31
+
32
+ from headroom import HeadroomConfig, HeadroomMode
33
+ from headroom.providers import OpenAIProvider
34
+ from headroom.transforms import TransformPipeline
35
+
36
+ from .providers import get_headroom_provider, get_model_name_from_agno
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+
41
+ def _check_agno_available() -> None:
42
+ """Raise ImportError if Agno is not installed."""
43
+ if not AGNO_AVAILABLE:
44
+ raise ImportError("Agno is required for this integration. Install with: pip install agno")
45
+
46
+
47
+ def agno_available() -> bool:
48
+ """Check if Agno is installed."""
49
+ return AGNO_AVAILABLE
50
+
51
+
52
+ @dataclass
53
+ class OptimizationMetrics:
54
+ """Metrics from a single optimization pass."""
55
+
56
+ request_id: str
57
+ timestamp: datetime
58
+ tokens_before: int
59
+ tokens_after: int
60
+ tokens_saved: int
61
+ savings_percent: float
62
+ transforms_applied: list[str]
63
+ model: str
64
+
65
+
66
+ @dataclass
67
+ class HeadroomAgnoModel(Model): # type: ignore[misc]
68
+ """Agno model wrapper that applies Headroom optimizations.
69
+
70
+ Extends agno.models.base.Model to be fully compatible with Agno Agent.
71
+ Wraps any Agno Model and automatically optimizes the context
72
+ before each API call. Works with OpenAIChat, Claude, Gemini, and
73
+ other Agno model types.
74
+
75
+ Example:
76
+ from agno.agent import Agent
77
+ from agno.models.openai import OpenAIChat
78
+ from headroom.integrations.agno import HeadroomAgnoModel
79
+
80
+ # Basic usage
81
+ model = OpenAIChat(id="gpt-4o")
82
+ optimized = HeadroomAgnoModel(wrapped_model=model)
83
+
84
+ # Use with agent
85
+ agent = Agent(model=optimized)
86
+ response = agent.run("Hello!")
87
+
88
+ # Access metrics
89
+ print(f"Saved {optimized.total_tokens_saved} tokens")
90
+
91
+ # With custom config
92
+ from headroom import HeadroomConfig, HeadroomMode
93
+ config = HeadroomConfig(default_mode=HeadroomMode.OPTIMIZE)
94
+ optimized = HeadroomAgnoModel(wrapped_model=model, headroom_config=config)
95
+
96
+ Attributes:
97
+ wrapped_model: The underlying Agno model
98
+ total_tokens_saved: Running total of tokens saved
99
+ metrics_history: List of OptimizationMetrics from recent calls
100
+ """
101
+
102
+ # Required by Model base class - we'll derive from wrapped model
103
+ id: str = field(default="headroom-wrapper")
104
+ name: str | None = field(default=None)
105
+ provider: str | None = field(default=None)
106
+
107
+ # HeadroomAgnoModel specific fields
108
+ wrapped_model: Any = field(default=None)
109
+ headroom_config: HeadroomConfig | None = field(default=None)
110
+ headroom_mode: HeadroomMode | None = field(default=None)
111
+ auto_detect_provider: bool = field(default=True)
112
+
113
+ # Internal state (not part of dataclass comparison)
114
+ _metrics_history: list[OptimizationMetrics] = field(
115
+ default_factory=list, repr=False, compare=False
116
+ )
117
+ _total_tokens_saved: int = field(default=0, repr=False, compare=False)
118
+ _pipeline: TransformPipeline | None = field(default=None, repr=False, compare=False)
119
+ _headroom_provider: Any = field(default=None, repr=False, compare=False)
120
+ _lock: threading.Lock = field(default_factory=threading.Lock, repr=False, compare=False)
121
+ _initialized: bool = field(default=False, repr=False, compare=False)
122
+
123
+ def __post_init__(self) -> None:
124
+ """Initialize HeadroomAgnoModel after dataclass construction."""
125
+ _check_agno_available()
126
+
127
+ if self.wrapped_model is None:
128
+ raise ValueError("wrapped_model cannot be None")
129
+
130
+ # Set id from wrapped model
131
+ if hasattr(self.wrapped_model, "id"):
132
+ self.id = f"headroom:{self.wrapped_model.id}"
133
+
134
+ # Set name and provider from wrapped model for compatibility
135
+ if self.name is None and hasattr(self.wrapped_model, "name"):
136
+ self.name = self.wrapped_model.name
137
+ if self.provider is None and hasattr(self.wrapped_model, "provider"):
138
+ self.provider = self.wrapped_model.provider
139
+
140
+ # Initialize config
141
+ if self.headroom_config is None:
142
+ self.headroom_config = HeadroomConfig()
143
+
144
+ # Handle deprecated mode parameter
145
+ if self.headroom_mode is not None:
146
+ warnings.warn(
147
+ "The 'headroom_mode' parameter is deprecated. Use HeadroomConfig(default_mode=...) instead.",
148
+ DeprecationWarning,
149
+ stacklevel=2,
150
+ )
151
+
152
+ self._initialized = True
153
+
154
+ # Call parent __post_init__ if it exists
155
+ if hasattr(super(), "__post_init__"):
156
+ super().__post_init__()
157
+
158
+ # Forward attribute access to wrapped model for compatibility
159
+ def __getattr__(self, name: str) -> Any:
160
+ """Forward attribute access to wrapped model."""
161
+ # Avoid infinite recursion during initialization
162
+ if name.startswith("_") or not self.__dict__.get("_initialized", False):
163
+ raise AttributeError(f"'{type(self).__name__}' has no attribute '{name}'")
164
+ if name in (
165
+ "wrapped_model",
166
+ "headroom_config",
167
+ "headroom_mode",
168
+ "auto_detect_provider",
169
+ "pipeline",
170
+ "total_tokens_saved",
171
+ "metrics_history",
172
+ "id",
173
+ "name",
174
+ "provider",
175
+ ):
176
+ raise AttributeError(f"'{type(self).__name__}' has no attribute '{name}'")
177
+ return getattr(self.wrapped_model, name)
178
+
179
+ @property
180
+ def pipeline(self) -> TransformPipeline:
181
+ """Lazily initialize TransformPipeline (thread-safe)."""
182
+ if self._pipeline is None:
183
+ with self._lock:
184
+ # Double-check after acquiring lock
185
+ if self._pipeline is None:
186
+ if self.auto_detect_provider:
187
+ self._headroom_provider = get_headroom_provider(self.wrapped_model)
188
+ logger.debug(
189
+ f"Auto-detected provider: {self._headroom_provider.__class__.__name__}"
190
+ )
191
+ else:
192
+ self._headroom_provider = OpenAIProvider()
193
+ self._pipeline = TransformPipeline(
194
+ config=self.headroom_config,
195
+ provider=self._headroom_provider,
196
+ )
197
+ return self._pipeline
198
+
199
+ @property
200
+ def total_tokens_saved(self) -> int:
201
+ """Total tokens saved across all calls."""
202
+ return self._total_tokens_saved
203
+
204
+ @property
205
+ def metrics_history(self) -> list[OptimizationMetrics]:
206
+ """History of optimization metrics."""
207
+ return self._metrics_history.copy()
208
+
209
+ def _convert_messages_to_openai(self, messages: list[Any]) -> list[dict[str, Any]]:
210
+ """Convert Agno messages to OpenAI format for Headroom."""
211
+ result = []
212
+ for msg in messages:
213
+ # Handle Agno Message objects
214
+ if hasattr(msg, "role") and hasattr(msg, "content"):
215
+ entry: dict[str, Any] = {
216
+ "role": msg.role,
217
+ "content": msg.content if msg.content is not None else "",
218
+ }
219
+ # Handle tool calls
220
+ if hasattr(msg, "tool_calls") and msg.tool_calls:
221
+ entry["tool_calls"] = msg.tool_calls
222
+ # Handle tool call ID for tool responses
223
+ if hasattr(msg, "tool_call_id") and msg.tool_call_id:
224
+ entry["tool_call_id"] = msg.tool_call_id
225
+ result.append(entry)
226
+ # Handle dict format
227
+ elif isinstance(msg, dict):
228
+ result.append(msg.copy())
229
+ else:
230
+ # Try to extract content
231
+ content = str(msg) if msg is not None else ""
232
+ result.append({"role": "user", "content": content})
233
+ return result
234
+
235
+ def _convert_messages_from_openai(self, messages: list[dict[str, Any]]) -> list[Any]:
236
+ """Convert OpenAI format messages back to Agno format.
237
+
238
+ Note: Agno typically accepts OpenAI-format dicts directly,
239
+ so we may not need full conversion.
240
+ """
241
+ # Agno models generally accept OpenAI-format messages
242
+ # Return as-is for compatibility
243
+ return messages
244
+
245
+ def _optimize_messages(self, messages: list[Any]) -> tuple[list[Any], OptimizationMetrics]:
246
+ """Apply Headroom optimization to messages.
247
+
248
+ Thread-safe with fallback on pipeline errors.
249
+ """
250
+ request_id = str(uuid4())
251
+
252
+ # Convert to OpenAI format
253
+ openai_messages = self._convert_messages_to_openai(messages)
254
+
255
+ # Handle empty messages gracefully
256
+ if not openai_messages:
257
+ metrics = OptimizationMetrics(
258
+ request_id=request_id,
259
+ timestamp=datetime.now(timezone.utc),
260
+ tokens_before=0,
261
+ tokens_after=0,
262
+ tokens_saved=0,
263
+ savings_percent=0,
264
+ transforms_applied=[],
265
+ model=get_model_name_from_agno(self.wrapped_model),
266
+ )
267
+ return openai_messages, metrics
268
+
269
+ # Get model name from wrapped model
270
+ model = get_model_name_from_agno(self.wrapped_model)
271
+
272
+ # Ensure pipeline is initialized
273
+ _ = self.pipeline
274
+
275
+ # Get model context limit
276
+ model_limit = (
277
+ self._headroom_provider.get_context_limit(model) if self._headroom_provider else 128000
278
+ )
279
+
280
+ try:
281
+ # Apply Headroom transforms via pipeline
282
+ result = self.pipeline.apply(
283
+ messages=openai_messages,
284
+ model=model,
285
+ model_limit=model_limit,
286
+ )
287
+ optimized = result.messages
288
+ tokens_before = result.tokens_before
289
+ tokens_after = result.tokens_after
290
+ transforms_applied = result.transforms_applied
291
+ except (
292
+ ValueError,
293
+ TypeError,
294
+ AttributeError,
295
+ RuntimeError,
296
+ KeyError,
297
+ IndexError,
298
+ ImportError,
299
+ OSError,
300
+ ) as e:
301
+ # Fallback to original messages on pipeline error
302
+ # Log at warning level (degraded behavior, not critical failure)
303
+ logger.warning(
304
+ f"Headroom optimization failed, using original messages: {type(e).__name__}: {e}"
305
+ )
306
+ optimized = openai_messages
307
+ # Estimate token count for unoptimized messages (rough approximation)
308
+ # Note: This uses ~4 chars/token which is approximate for English text
309
+ tokens_before = sum(len(str(m.get("content", ""))) // 4 for m in openai_messages)
310
+ tokens_after = tokens_before # No optimization occurred
311
+ transforms_applied = ["fallback:error"]
312
+
313
+ # Create metrics
314
+ tokens_saved = max(0, tokens_before - tokens_after) # Never negative
315
+ metrics = OptimizationMetrics(
316
+ request_id=request_id,
317
+ timestamp=datetime.now(timezone.utc),
318
+ tokens_before=tokens_before,
319
+ tokens_after=tokens_after,
320
+ tokens_saved=tokens_saved,
321
+ savings_percent=(tokens_saved / tokens_before * 100 if tokens_before > 0 else 0),
322
+ transforms_applied=transforms_applied,
323
+ model=model,
324
+ )
325
+
326
+ # Track metrics (thread-safe)
327
+ with self._lock:
328
+ self._metrics_history.append(metrics)
329
+ self._total_tokens_saved += metrics.tokens_saved
330
+
331
+ # Keep only last 100 metrics
332
+ if len(self._metrics_history) > 100:
333
+ self._metrics_history = self._metrics_history[-100:]
334
+
335
+ # Convert back (Agno accepts OpenAI format)
336
+ optimized_messages = self._convert_messages_from_openai(optimized)
337
+
338
+ return optimized_messages, metrics
339
+
340
+ def response(self, messages: list[Any], **kwargs: Any) -> Any: # type: ignore[override]
341
+ """Generate response with Headroom optimization.
342
+
343
+ This is the core method that Agno agents call.
344
+ """
345
+ # Optimize messages
346
+ optimized_messages, metrics = self._optimize_messages(messages)
347
+
348
+ logger.info(
349
+ f"Headroom optimized: {metrics.tokens_before} -> {metrics.tokens_after} tokens "
350
+ f"({metrics.savings_percent:.1f}% saved)"
351
+ )
352
+
353
+ # Call wrapped model with optimized messages
354
+ return self.wrapped_model.response(optimized_messages, **kwargs)
355
+
356
+ def response_stream(self, messages: list[Any], **kwargs: Any) -> Iterator[Any]: # type: ignore[override]
357
+ """Stream response with Headroom optimization."""
358
+ # Optimize messages
359
+ optimized_messages, metrics = self._optimize_messages(messages)
360
+
361
+ logger.info(
362
+ f"Headroom optimized (streaming): {metrics.tokens_before} -> "
363
+ f"{metrics.tokens_after} tokens"
364
+ )
365
+
366
+ # Stream from wrapped model
367
+ yield from self.wrapped_model.response_stream(optimized_messages, **kwargs)
368
+
369
+ async def aresponse(self, messages: list[Any], **kwargs: Any) -> Any: # type: ignore[override]
370
+ """Async generate response with Headroom optimization."""
371
+ # Run optimization in executor (CPU-bound)
372
+ loop = asyncio.get_running_loop()
373
+ optimized_messages, metrics = await loop.run_in_executor(
374
+ None, self._optimize_messages, messages
375
+ )
376
+
377
+ logger.info(
378
+ f"Headroom optimized (async): {metrics.tokens_before} -> {metrics.tokens_after} tokens "
379
+ f"({metrics.savings_percent:.1f}% saved)"
380
+ )
381
+
382
+ # Call wrapped model's async method
383
+ if hasattr(self.wrapped_model, "aresponse"):
384
+ return await self.wrapped_model.aresponse(optimized_messages, **kwargs)
385
+ else:
386
+ # Fallback to sync in executor (non-blocking)
387
+ return await loop.run_in_executor(
388
+ None, lambda: self.wrapped_model.response(optimized_messages, **kwargs)
389
+ )
390
+
391
+ async def aresponse_stream(self, messages: list[Any], **kwargs: Any) -> AsyncIterator[Any]: # type: ignore[override]
392
+ """Async stream response with Headroom optimization."""
393
+ # Run optimization in executor (CPU-bound)
394
+ loop = asyncio.get_running_loop()
395
+ optimized_messages, metrics = await loop.run_in_executor(
396
+ None, self._optimize_messages, messages
397
+ )
398
+
399
+ logger.info(
400
+ f"Headroom optimized (async streaming): {metrics.tokens_before} -> "
401
+ f"{metrics.tokens_after} tokens"
402
+ )
403
+
404
+ # Async stream from wrapped model
405
+ if hasattr(self.wrapped_model, "aresponse_stream"):
406
+ async for chunk in self.wrapped_model.aresponse_stream(optimized_messages, **kwargs):
407
+ yield chunk
408
+ else:
409
+ # Fallback: wrap sync streaming in async iterator (non-blocking)
410
+ # Run the entire sync iteration in executor to avoid blocking event loop
411
+ def _sync_stream() -> list[Any]:
412
+ return list(self.wrapped_model.response_stream(optimized_messages, **kwargs))
413
+
414
+ chunks = await loop.run_in_executor(None, _sync_stream)
415
+ for chunk in chunks:
416
+ yield chunk
417
+
418
+ def get_savings_summary(self) -> dict[str, Any]:
419
+ """Get summary of token savings."""
420
+ if not self._metrics_history:
421
+ return {
422
+ "total_requests": 0,
423
+ "total_tokens_saved": 0,
424
+ "average_savings_percent": 0,
425
+ }
426
+
427
+ return {
428
+ "total_requests": len(self._metrics_history),
429
+ "total_tokens_saved": self._total_tokens_saved,
430
+ "average_savings_percent": sum(m.savings_percent for m in self._metrics_history)
431
+ / len(self._metrics_history),
432
+ "total_tokens_before": sum(m.tokens_before for m in self._metrics_history),
433
+ "total_tokens_after": sum(m.tokens_after for m in self._metrics_history),
434
+ }
435
+
436
+ def reset(self) -> None:
437
+ """Reset all tracked metrics (thread-safe).
438
+
439
+ Clears the metrics history and resets the total tokens saved counter.
440
+ Useful for starting fresh measurements or between test runs.
441
+ """
442
+ with self._lock:
443
+ self._metrics_history = []
444
+ self._total_tokens_saved = 0
445
+
446
+ # =========================================================================
447
+ # Abstract method implementations required by agno.models.base.Model
448
+ # These delegate to the wrapped model after applying Headroom optimization
449
+ # =========================================================================
450
+
451
+ def invoke(self, messages: list[Any], **kwargs: Any) -> Any:
452
+ """Invoke the wrapped model with optimized messages.
453
+
454
+ This is required by agno.models.base.Model abstract interface.
455
+ """
456
+ # Optimize messages before invoking
457
+ optimized_messages, metrics = self._optimize_messages(messages)
458
+
459
+ logger.info(
460
+ f"Headroom optimized (invoke): {metrics.tokens_before} -> {metrics.tokens_after} tokens "
461
+ f"({metrics.savings_percent:.1f}% saved)"
462
+ )
463
+
464
+ # Delegate to wrapped model
465
+ return self.wrapped_model.invoke(optimized_messages, **kwargs)
466
+
467
+ async def ainvoke(self, messages: list[Any], **kwargs: Any) -> Any:
468
+ """Async invoke the wrapped model with optimized messages.
469
+
470
+ This is required by agno.models.base.Model abstract interface.
471
+ """
472
+ # Run optimization in executor (CPU-bound)
473
+ loop = asyncio.get_running_loop()
474
+ optimized_messages, metrics = await loop.run_in_executor(
475
+ None, self._optimize_messages, messages
476
+ )
477
+
478
+ logger.info(
479
+ f"Headroom optimized (ainvoke): {metrics.tokens_before} -> {metrics.tokens_after} tokens "
480
+ f"({metrics.savings_percent:.1f}% saved)"
481
+ )
482
+
483
+ # Delegate to wrapped model
484
+ if hasattr(self.wrapped_model, "ainvoke"):
485
+ return await self.wrapped_model.ainvoke(optimized_messages, **kwargs)
486
+ else:
487
+ # Fallback to sync in executor
488
+ return await loop.run_in_executor(
489
+ None, lambda: self.wrapped_model.invoke(optimized_messages, **kwargs)
490
+ )
491
+
492
+ def invoke_stream(self, messages: list[Any], **kwargs: Any) -> Iterator[Any]:
493
+ """Stream invoke the wrapped model with optimized messages.
494
+
495
+ This is required by agno.models.base.Model abstract interface.
496
+ """
497
+ # Optimize messages before streaming
498
+ optimized_messages, metrics = self._optimize_messages(messages)
499
+
500
+ logger.info(
501
+ f"Headroom optimized (invoke_stream): {metrics.tokens_before} -> {metrics.tokens_after} tokens "
502
+ f"({metrics.savings_percent:.1f}% saved)"
503
+ )
504
+
505
+ # Delegate to wrapped model
506
+ yield from self.wrapped_model.invoke_stream(optimized_messages, **kwargs)
507
+
508
+ async def ainvoke_stream(self, messages: list[Any], **kwargs: Any) -> AsyncIterator[Any]:
509
+ """Async stream invoke the wrapped model with optimized messages.
510
+
511
+ This is required by agno.models.base.Model abstract interface.
512
+ """
513
+ # Run optimization in executor (CPU-bound)
514
+ loop = asyncio.get_running_loop()
515
+ optimized_messages, metrics = await loop.run_in_executor(
516
+ None, self._optimize_messages, messages
517
+ )
518
+
519
+ logger.info(
520
+ f"Headroom optimized (ainvoke_stream): {metrics.tokens_before} -> {metrics.tokens_after} tokens "
521
+ f"({metrics.savings_percent:.1f}% saved)"
522
+ )
523
+
524
+ # Delegate to wrapped model
525
+ if hasattr(self.wrapped_model, "ainvoke_stream"):
526
+ async for chunk in self.wrapped_model.ainvoke_stream(optimized_messages, **kwargs):
527
+ yield chunk
528
+ else:
529
+ # Fallback: wrap sync streaming
530
+ def _sync_stream() -> list[Any]:
531
+ return list(self.wrapped_model.invoke_stream(optimized_messages, **kwargs))
532
+
533
+ chunks = await loop.run_in_executor(None, _sync_stream)
534
+ for chunk in chunks:
535
+ yield chunk
536
+
537
+ def _parse_provider_response(self, response: Any, **kwargs: Any) -> Any:
538
+ """Parse provider response - delegates to wrapped model.
539
+
540
+ This is required by agno.models.base.Model abstract interface.
541
+ """
542
+ return self.wrapped_model._parse_provider_response(response, **kwargs)
543
+
544
+ def _parse_provider_response_delta(self, response: Any) -> Any:
545
+ """Parse streaming response delta - delegates to wrapped model.
546
+
547
+ This is required by agno.models.base.Model abstract interface.
548
+ """
549
+ return self.wrapped_model._parse_provider_response_delta(response)
550
+
551
+
552
+ def optimize_messages(
553
+ messages: list[Any],
554
+ config: HeadroomConfig | None = None,
555
+ mode: HeadroomMode = HeadroomMode.OPTIMIZE,
556
+ model: str = "gpt-4o",
557
+ ) -> tuple[list[dict[str, Any]], dict[str, Any]]:
558
+ """Standalone function to optimize Agno messages.
559
+
560
+ Use this for manual optimization when you need fine-grained control.
561
+
562
+ Args:
563
+ messages: List of Agno Message objects or dicts
564
+ config: HeadroomConfig for optimization settings
565
+ mode: HeadroomMode (AUDIT, OPTIMIZE, or SIMULATE)
566
+ model: Model name for token estimation
567
+
568
+ Returns:
569
+ Tuple of (optimized_messages, metrics_dict)
570
+
571
+ Example:
572
+ from headroom.integrations.agno import optimize_messages
573
+
574
+ messages = [
575
+ {"role": "system", "content": "You are helpful."},
576
+ {"role": "user", "content": "What is 2+2?"},
577
+ ]
578
+
579
+ optimized, metrics = optimize_messages(messages)
580
+ print(f"Saved {metrics['tokens_saved']} tokens")
581
+ """
582
+ _check_agno_available()
583
+
584
+ config = config or HeadroomConfig()
585
+ provider = OpenAIProvider()
586
+ pipeline = TransformPipeline(config=config, provider=provider)
587
+
588
+ # Convert to OpenAI format
589
+ openai_messages = []
590
+ for msg in messages:
591
+ if hasattr(msg, "role") and hasattr(msg, "content"):
592
+ entry: dict[str, Any] = {"role": msg.role, "content": msg.content or ""}
593
+ if hasattr(msg, "tool_calls") and msg.tool_calls:
594
+ entry["tool_calls"] = msg.tool_calls
595
+ if hasattr(msg, "tool_call_id") and msg.tool_call_id:
596
+ entry["tool_call_id"] = msg.tool_call_id
597
+ openai_messages.append(entry)
598
+ elif isinstance(msg, dict):
599
+ openai_messages.append(msg.copy())
600
+ else:
601
+ openai_messages.append({"role": "user", "content": str(msg)})
602
+
603
+ # Get model context limit
604
+ model_limit = provider.get_context_limit(model)
605
+
606
+ # Apply transforms
607
+ result = pipeline.apply(
608
+ messages=openai_messages,
609
+ model=model,
610
+ model_limit=model_limit,
611
+ )
612
+
613
+ metrics = {
614
+ "tokens_before": result.tokens_before,
615
+ "tokens_after": result.tokens_after,
616
+ "tokens_saved": result.tokens_before - result.tokens_after,
617
+ "savings_percent": (
618
+ (result.tokens_before - result.tokens_after) / result.tokens_before * 100
619
+ if result.tokens_before > 0
620
+ else 0
621
+ ),
622
+ "transforms_applied": result.transforms_applied,
623
+ }
624
+
625
+ return result.messages, metrics