headroom-ai 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. headroom/__init__.py +212 -0
  2. headroom/cache/__init__.py +76 -0
  3. headroom/cache/anthropic.py +517 -0
  4. headroom/cache/base.py +342 -0
  5. headroom/cache/compression_feedback.py +613 -0
  6. headroom/cache/compression_store.py +814 -0
  7. headroom/cache/dynamic_detector.py +1026 -0
  8. headroom/cache/google.py +884 -0
  9. headroom/cache/openai.py +584 -0
  10. headroom/cache/registry.py +175 -0
  11. headroom/cache/semantic.py +451 -0
  12. headroom/ccr/__init__.py +77 -0
  13. headroom/ccr/context_tracker.py +582 -0
  14. headroom/ccr/mcp_server.py +319 -0
  15. headroom/ccr/response_handler.py +772 -0
  16. headroom/ccr/tool_injection.py +415 -0
  17. headroom/cli.py +219 -0
  18. headroom/client.py +977 -0
  19. headroom/compression/__init__.py +42 -0
  20. headroom/compression/detector.py +424 -0
  21. headroom/compression/handlers/__init__.py +22 -0
  22. headroom/compression/handlers/base.py +219 -0
  23. headroom/compression/handlers/code_handler.py +506 -0
  24. headroom/compression/handlers/json_handler.py +418 -0
  25. headroom/compression/masks.py +345 -0
  26. headroom/compression/universal.py +465 -0
  27. headroom/config.py +474 -0
  28. headroom/exceptions.py +192 -0
  29. headroom/integrations/__init__.py +159 -0
  30. headroom/integrations/agno/__init__.py +53 -0
  31. headroom/integrations/agno/hooks.py +345 -0
  32. headroom/integrations/agno/model.py +625 -0
  33. headroom/integrations/agno/providers.py +154 -0
  34. headroom/integrations/langchain/__init__.py +106 -0
  35. headroom/integrations/langchain/agents.py +326 -0
  36. headroom/integrations/langchain/chat_model.py +1002 -0
  37. headroom/integrations/langchain/langsmith.py +324 -0
  38. headroom/integrations/langchain/memory.py +319 -0
  39. headroom/integrations/langchain/providers.py +200 -0
  40. headroom/integrations/langchain/retriever.py +371 -0
  41. headroom/integrations/langchain/streaming.py +341 -0
  42. headroom/integrations/mcp/__init__.py +37 -0
  43. headroom/integrations/mcp/server.py +533 -0
  44. headroom/memory/__init__.py +37 -0
  45. headroom/memory/extractor.py +390 -0
  46. headroom/memory/fast_store.py +621 -0
  47. headroom/memory/fast_wrapper.py +311 -0
  48. headroom/memory/inline_extractor.py +229 -0
  49. headroom/memory/store.py +434 -0
  50. headroom/memory/worker.py +260 -0
  51. headroom/memory/wrapper.py +321 -0
  52. headroom/models/__init__.py +39 -0
  53. headroom/models/registry.py +687 -0
  54. headroom/parser.py +293 -0
  55. headroom/pricing/__init__.py +51 -0
  56. headroom/pricing/anthropic_prices.py +81 -0
  57. headroom/pricing/litellm_pricing.py +113 -0
  58. headroom/pricing/openai_prices.py +91 -0
  59. headroom/pricing/registry.py +188 -0
  60. headroom/providers/__init__.py +61 -0
  61. headroom/providers/anthropic.py +621 -0
  62. headroom/providers/base.py +131 -0
  63. headroom/providers/cohere.py +362 -0
  64. headroom/providers/google.py +427 -0
  65. headroom/providers/litellm.py +297 -0
  66. headroom/providers/openai.py +566 -0
  67. headroom/providers/openai_compatible.py +521 -0
  68. headroom/proxy/__init__.py +19 -0
  69. headroom/proxy/server.py +2683 -0
  70. headroom/py.typed +0 -0
  71. headroom/relevance/__init__.py +124 -0
  72. headroom/relevance/base.py +106 -0
  73. headroom/relevance/bm25.py +255 -0
  74. headroom/relevance/embedding.py +255 -0
  75. headroom/relevance/hybrid.py +259 -0
  76. headroom/reporting/__init__.py +5 -0
  77. headroom/reporting/generator.py +549 -0
  78. headroom/storage/__init__.py +41 -0
  79. headroom/storage/base.py +125 -0
  80. headroom/storage/jsonl.py +220 -0
  81. headroom/storage/sqlite.py +289 -0
  82. headroom/telemetry/__init__.py +91 -0
  83. headroom/telemetry/collector.py +764 -0
  84. headroom/telemetry/models.py +880 -0
  85. headroom/telemetry/toin.py +1579 -0
  86. headroom/tokenizer.py +80 -0
  87. headroom/tokenizers/__init__.py +75 -0
  88. headroom/tokenizers/base.py +210 -0
  89. headroom/tokenizers/estimator.py +198 -0
  90. headroom/tokenizers/huggingface.py +317 -0
  91. headroom/tokenizers/mistral.py +245 -0
  92. headroom/tokenizers/registry.py +398 -0
  93. headroom/tokenizers/tiktoken_counter.py +248 -0
  94. headroom/transforms/__init__.py +106 -0
  95. headroom/transforms/base.py +57 -0
  96. headroom/transforms/cache_aligner.py +357 -0
  97. headroom/transforms/code_compressor.py +1313 -0
  98. headroom/transforms/content_detector.py +335 -0
  99. headroom/transforms/content_router.py +1158 -0
  100. headroom/transforms/llmlingua_compressor.py +638 -0
  101. headroom/transforms/log_compressor.py +529 -0
  102. headroom/transforms/pipeline.py +297 -0
  103. headroom/transforms/rolling_window.py +350 -0
  104. headroom/transforms/search_compressor.py +365 -0
  105. headroom/transforms/smart_crusher.py +2682 -0
  106. headroom/transforms/text_compressor.py +259 -0
  107. headroom/transforms/tool_crusher.py +338 -0
  108. headroom/utils.py +215 -0
  109. headroom_ai-0.2.13.dist-info/METADATA +315 -0
  110. headroom_ai-0.2.13.dist-info/RECORD +114 -0
  111. headroom_ai-0.2.13.dist-info/WHEEL +4 -0
  112. headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
  113. headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
  114. headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
@@ -0,0 +1,259 @@
1
+ """Generic text compressor for plain text content.
2
+
3
+ This module provides a fallback compressor for plain text that doesn't match
4
+ any specialized format (search results, logs, code, diffs). Uses line-based
5
+ sampling with anchor preservation.
6
+
7
+ Compression Strategy:
8
+ 1. Identify anchor lines (contain context keywords)
9
+ 2. Keep first N and last M lines
10
+ 3. Sample from middle based on line importance
11
+ 4. Add summary of omitted content
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import re
17
+ from dataclasses import dataclass, field
18
+
19
+
20
+ @dataclass
21
+ class TextCompressorConfig:
22
+ """Configuration for text compression."""
23
+
24
+ # Line limits
25
+ keep_first_lines: int = 10
26
+ keep_last_lines: int = 10
27
+ max_total_lines: int = 50
28
+
29
+ # Sampling
30
+ sample_every_n_lines: int = 10
31
+
32
+ # Anchor detection
33
+ anchor_keywords: list[str] = field(default_factory=list)
34
+ boost_pattern_lines: bool = True
35
+
36
+ # CCR integration
37
+ enable_ccr: bool = True
38
+ min_lines_for_ccr: int = 100
39
+
40
+
41
+ class TextCompressor:
42
+ """Compresses generic plain text.
43
+
44
+ Example:
45
+ >>> compressor = TextCompressor()
46
+ >>> result = compressor.compress(large_text, context="find errors")
47
+ >>> print(result.compressed)
48
+ """
49
+
50
+ # Patterns that indicate important lines
51
+ _IMPORTANT_PATTERNS = [
52
+ re.compile(r"\b(error|exception|fail|warning)\b", re.IGNORECASE),
53
+ re.compile(r"\b(important|note|todo|fixme)\b", re.IGNORECASE),
54
+ re.compile(r"^#+\s"), # Markdown headers
55
+ re.compile(r"^\*\*"), # Bold text
56
+ re.compile(r"^>\s"), # Quotes
57
+ ]
58
+
59
+ def __init__(self, config: TextCompressorConfig | None = None):
60
+ """Initialize text compressor.
61
+
62
+ Args:
63
+ config: Compression configuration.
64
+ """
65
+ self.config = config or TextCompressorConfig()
66
+
67
+ def compress(self, content: str, context: str = "") -> TextCompressionResult:
68
+ """Compress text content.
69
+
70
+ Args:
71
+ content: Raw text content.
72
+ context: User query context for anchor detection.
73
+
74
+ Returns:
75
+ TextCompressionResult with compressed output.
76
+ """
77
+ lines = content.split("\n")
78
+
79
+ if len(lines) <= self.config.max_total_lines:
80
+ return TextCompressionResult(
81
+ compressed=content,
82
+ original=content,
83
+ original_line_count=len(lines),
84
+ compressed_line_count=len(lines),
85
+ compression_ratio=1.0,
86
+ )
87
+
88
+ # Score lines by importance
89
+ scored_lines = self._score_lines(lines, context)
90
+
91
+ # Select lines
92
+ selected = self._select_lines(scored_lines, lines)
93
+
94
+ # Format output
95
+ compressed = self._format_output(selected, len(lines))
96
+
97
+ ratio = len(compressed) / max(len(content), 1)
98
+
99
+ # Store in CCR if significant compression
100
+ cache_key = None
101
+ if self.config.enable_ccr and len(lines) >= self.config.min_lines_for_ccr and ratio < 0.7:
102
+ cache_key = self._store_in_ccr(content, compressed, len(lines))
103
+ if cache_key:
104
+ compressed += f"\n[{len(lines)} lines compressed. hash={cache_key}]"
105
+
106
+ return TextCompressionResult(
107
+ compressed=compressed,
108
+ original=content,
109
+ original_line_count=len(lines),
110
+ compressed_line_count=len(selected),
111
+ compression_ratio=ratio,
112
+ cache_key=cache_key,
113
+ )
114
+
115
+ def _score_lines(self, lines: list[str], context: str) -> list[tuple[int, str, float]]:
116
+ """Score lines by importance."""
117
+ context_lower = context.lower()
118
+ context_words = set(context_lower.split()) if context else set()
119
+ anchor_keywords = {k.lower() for k in self.config.anchor_keywords}
120
+
121
+ scored: list[tuple[int, str, float]] = []
122
+
123
+ for i, line in enumerate(lines):
124
+ score = 0.0
125
+ line_lower = line.lower()
126
+
127
+ # Boost if contains context words
128
+ for word in context_words:
129
+ if len(word) > 2 and word in line_lower:
130
+ score += 0.3
131
+
132
+ # Boost if contains anchor keywords
133
+ for keyword in anchor_keywords:
134
+ if keyword in line_lower:
135
+ score += 0.4
136
+
137
+ # Boost if matches important patterns
138
+ if self.config.boost_pattern_lines:
139
+ for pattern in self._IMPORTANT_PATTERNS:
140
+ if pattern.search(line):
141
+ score += 0.2
142
+ break
143
+
144
+ # Small boost for non-empty lines
145
+ if line.strip():
146
+ score += 0.1
147
+
148
+ scored.append((i, line, min(1.0, score)))
149
+
150
+ return scored
151
+
152
+ def _select_lines(
153
+ self, scored_lines: list[tuple[int, str, float]], original_lines: list[str]
154
+ ) -> list[tuple[int, str]]:
155
+ """Select lines to keep."""
156
+ total = len(scored_lines)
157
+ selected_indices: set[int] = set()
158
+
159
+ # Always keep first N lines
160
+ for i in range(min(self.config.keep_first_lines, total)):
161
+ selected_indices.add(i)
162
+
163
+ # Always keep last M lines
164
+ for i in range(max(0, total - self.config.keep_last_lines), total):
165
+ selected_indices.add(i)
166
+
167
+ # Add high-scoring lines
168
+ high_score_lines = [
169
+ (idx, line, score)
170
+ for idx, line, score in scored_lines
171
+ if score >= 0.3 and idx not in selected_indices
172
+ ]
173
+ high_score_lines.sort(key=lambda x: x[2], reverse=True)
174
+
175
+ remaining_slots = self.config.max_total_lines - len(selected_indices)
176
+ for idx, _line, _score in high_score_lines[:remaining_slots]:
177
+ selected_indices.add(idx)
178
+ remaining_slots -= 1
179
+ if remaining_slots <= 0:
180
+ break
181
+
182
+ # Sample from remaining middle lines
183
+ if remaining_slots > 0:
184
+ middle_start = self.config.keep_first_lines
185
+ middle_end = total - self.config.keep_last_lines
186
+
187
+ for i in range(middle_start, middle_end, self.config.sample_every_n_lines):
188
+ if i not in selected_indices:
189
+ selected_indices.add(i)
190
+ remaining_slots -= 1
191
+ if remaining_slots <= 0:
192
+ break
193
+
194
+ # Sort by line number and return
195
+ selected = sorted(selected_indices)
196
+ return [(i, original_lines[i]) for i in selected]
197
+
198
+ def _format_output(self, selected: list[tuple[int, str]], total_lines: int) -> str:
199
+ """Format selected lines with ellipsis markers."""
200
+ if not selected:
201
+ return f"[{total_lines} lines omitted]"
202
+
203
+ output_lines: list[str] = []
204
+ prev_idx = -1
205
+
206
+ for idx, line in selected:
207
+ # Add ellipsis if there's a gap
208
+ if prev_idx >= 0 and idx - prev_idx > 1:
209
+ gap = idx - prev_idx - 1
210
+ output_lines.append(f"[... {gap} lines omitted ...]")
211
+
212
+ output_lines.append(line)
213
+ prev_idx = idx
214
+
215
+ # Add trailing ellipsis if needed
216
+ if selected and selected[-1][0] < total_lines - 1:
217
+ gap = total_lines - selected[-1][0] - 1
218
+ output_lines.append(f"[... {gap} lines omitted ...]")
219
+
220
+ return "\n".join(output_lines)
221
+
222
+ def _store_in_ccr(self, original: str, compressed: str, original_count: int) -> str | None:
223
+ """Store original in CCR for later retrieval."""
224
+ try:
225
+ from ..cache.compression_store import get_compression_store
226
+
227
+ store = get_compression_store()
228
+ return store.store(
229
+ original,
230
+ compressed,
231
+ original_item_count=original_count,
232
+ )
233
+ except ImportError:
234
+ return None
235
+ except Exception:
236
+ return None
237
+
238
+
239
+ @dataclass
240
+ class TextCompressionResult:
241
+ """Result of text compression."""
242
+
243
+ compressed: str
244
+ original: str
245
+ original_line_count: int
246
+ compressed_line_count: int
247
+ compression_ratio: float
248
+ cache_key: str | None = None
249
+
250
+ @property
251
+ def tokens_saved_estimate(self) -> int:
252
+ """Estimate tokens saved."""
253
+ chars_saved = len(self.original) - len(self.compressed)
254
+ return max(0, chars_saved // 4)
255
+
256
+ @property
257
+ def lines_omitted(self) -> int:
258
+ """Number of lines omitted."""
259
+ return self.original_line_count - self.compressed_line_count
@@ -0,0 +1,338 @@
1
+ """Tool output compression transform for Headroom SDK."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from typing import Any
7
+
8
+ from ..config import ToolCrusherConfig, TransformResult
9
+ from ..tokenizer import Tokenizer
10
+ from ..utils import (
11
+ compute_short_hash,
12
+ create_tool_digest_marker,
13
+ deep_copy_messages,
14
+ safe_json_dumps,
15
+ safe_json_loads,
16
+ )
17
+ from .base import Transform
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class ToolCrusher(Transform):
23
+ """
24
+ Compress tool output to reduce token usage.
25
+
26
+ This transform applies conservative compression:
27
+ - Only compresses tool role messages > min_tokens
28
+ - Preserves JSON structure (never removes keys)
29
+ - Truncates arrays to max_items
30
+ - Truncates long strings
31
+ - Limits nesting depth
32
+
33
+ Safety: If JSON parsing fails, content is returned unchanged.
34
+ """
35
+
36
+ name = "tool_crusher"
37
+
38
+ def __init__(self, config: ToolCrusherConfig | None = None):
39
+ """
40
+ Initialize tool crusher.
41
+
42
+ Args:
43
+ config: Configuration for compression behavior.
44
+ """
45
+ self.config = config or ToolCrusherConfig()
46
+
47
+ def should_apply(
48
+ self,
49
+ messages: list[dict[str, Any]],
50
+ tokenizer: Tokenizer,
51
+ **kwargs: Any,
52
+ ) -> bool:
53
+ """Check if any tool messages exceed threshold."""
54
+ if not self.config.enabled:
55
+ return False
56
+
57
+ for msg in messages:
58
+ # OpenAI style: role="tool"
59
+ if msg.get("role") == "tool":
60
+ content = msg.get("content", "")
61
+ if isinstance(content, str):
62
+ tokens = tokenizer.count_text(content)
63
+ if tokens > self.config.min_tokens_to_crush:
64
+ return True
65
+
66
+ # Anthropic style: role="user" with tool_result content blocks
67
+ content = msg.get("content")
68
+ if isinstance(content, list):
69
+ for block in content:
70
+ if isinstance(block, dict) and block.get("type") == "tool_result":
71
+ tool_content = block.get("content", "")
72
+ if isinstance(tool_content, str):
73
+ tokens = tokenizer.count_text(tool_content)
74
+ if tokens > self.config.min_tokens_to_crush:
75
+ return True
76
+
77
+ return False
78
+
79
+ def apply(
80
+ self,
81
+ messages: list[dict[str, Any]],
82
+ tokenizer: Tokenizer,
83
+ **kwargs: Any,
84
+ ) -> TransformResult:
85
+ """
86
+ Apply tool crushing to messages.
87
+
88
+ Args:
89
+ messages: List of messages.
90
+ tokenizer: Tokenizer for counting.
91
+ **kwargs: May include 'tool_profiles' for per-tool config.
92
+
93
+ Returns:
94
+ TransformResult with crushed messages.
95
+ """
96
+ tool_profiles = kwargs.get("tool_profiles", self.config.tool_profiles)
97
+
98
+ tokens_before = tokenizer.count_messages(messages)
99
+ result_messages = deep_copy_messages(messages)
100
+ transforms_applied: list[str] = []
101
+ markers_inserted: list[str] = []
102
+ warnings: list[str] = []
103
+
104
+ crushed_count = 0
105
+
106
+ for msg in result_messages:
107
+ # OpenAI style: role="tool"
108
+ if msg.get("role") == "tool":
109
+ content = msg.get("content", "")
110
+ if not isinstance(content, str):
111
+ continue
112
+
113
+ # Check token threshold
114
+ tokens = tokenizer.count_text(content)
115
+ if tokens <= self.config.min_tokens_to_crush:
116
+ continue
117
+
118
+ # Get tool-specific profile if available
119
+ tool_call_id = msg.get("tool_call_id", "")
120
+ profile = self._get_profile(tool_call_id, tool_profiles)
121
+
122
+ # Try to crush
123
+ crushed, was_modified = self._crush_content(content, profile)
124
+
125
+ if was_modified:
126
+ # Compute hash of original for marker
127
+ original_hash = compute_short_hash(content)
128
+ marker = create_tool_digest_marker(original_hash)
129
+
130
+ msg["content"] = crushed + "\n" + marker
131
+ crushed_count += 1
132
+ markers_inserted.append(marker)
133
+
134
+ # Anthropic style: role="user" with tool_result content blocks
135
+ content = msg.get("content")
136
+ if isinstance(content, list):
137
+ for i, block in enumerate(content):
138
+ if not isinstance(block, dict):
139
+ continue
140
+ if block.get("type") != "tool_result":
141
+ continue
142
+
143
+ tool_content = block.get("content", "")
144
+ if not isinstance(tool_content, str):
145
+ continue
146
+
147
+ # Check token threshold
148
+ tokens = tokenizer.count_text(tool_content)
149
+ if tokens <= self.config.min_tokens_to_crush:
150
+ continue
151
+
152
+ # Get tool-specific profile if available
153
+ tool_use_id = block.get("tool_use_id", "")
154
+ profile = self._get_profile(tool_use_id, tool_profiles)
155
+
156
+ # Try to crush
157
+ crushed, was_modified = self._crush_content(tool_content, profile)
158
+
159
+ if was_modified:
160
+ # Compute hash of original for marker
161
+ original_hash = compute_short_hash(tool_content)
162
+ marker = create_tool_digest_marker(original_hash)
163
+
164
+ # Update the content block
165
+ content[i]["content"] = crushed + "\n" + marker
166
+ crushed_count += 1
167
+ markers_inserted.append(marker)
168
+
169
+ if crushed_count > 0:
170
+ transforms_applied.append(f"tool_crush:{crushed_count}")
171
+ logger.info(
172
+ "ToolCrusher: compressed %d tool outputs, %d -> %d tokens",
173
+ crushed_count,
174
+ tokens_before,
175
+ tokenizer.count_messages(result_messages),
176
+ )
177
+
178
+ tokens_after = tokenizer.count_messages(result_messages)
179
+
180
+ return TransformResult(
181
+ messages=result_messages,
182
+ tokens_before=tokens_before,
183
+ tokens_after=tokens_after,
184
+ transforms_applied=transforms_applied,
185
+ markers_inserted=markers_inserted,
186
+ warnings=warnings,
187
+ )
188
+
189
+ def _get_profile(
190
+ self,
191
+ tool_call_id: str,
192
+ tool_profiles: dict[str, dict[str, Any]],
193
+ ) -> dict[str, Any]:
194
+ """Get compression profile for a tool."""
195
+ # Tool profiles are keyed by tool name, not call ID
196
+ # For now, use default config
197
+ # In a real implementation, you'd map call_id -> tool_name
198
+ return {
199
+ "max_array_items": self.config.max_array_items,
200
+ "max_string_length": self.config.max_string_length,
201
+ "max_depth": self.config.max_depth,
202
+ "preserve_keys": self.config.preserve_keys,
203
+ }
204
+
205
+ def _crush_content(
206
+ self,
207
+ content: str,
208
+ profile: dict[str, Any],
209
+ ) -> tuple[str, bool]:
210
+ """
211
+ Crush content according to profile.
212
+
213
+ Returns:
214
+ Tuple of (crushed_content, was_modified).
215
+ If parsing fails, returns (original_content, False).
216
+ """
217
+ # Try JSON parse
218
+ parsed, success = safe_json_loads(content)
219
+ if not success:
220
+ # Safety: don't modify unparseable content
221
+ return content, False
222
+
223
+ # Apply crushing
224
+ crushed = self._crush_value(
225
+ parsed,
226
+ depth=0,
227
+ max_depth=profile.get("max_depth", 5),
228
+ max_array_items=profile.get("max_array_items", 10),
229
+ max_string_length=profile.get("max_string_length", 1000),
230
+ )
231
+
232
+ # Serialize back
233
+ result = safe_json_dumps(crushed, indent=None)
234
+
235
+ # Check if actually modified
236
+ was_modified = result != content.strip()
237
+
238
+ return result, was_modified
239
+
240
+ def _crush_value(
241
+ self,
242
+ value: Any,
243
+ depth: int,
244
+ max_depth: int,
245
+ max_array_items: int,
246
+ max_string_length: int,
247
+ ) -> Any:
248
+ """Recursively crush a value."""
249
+ if depth >= max_depth:
250
+ # At max depth, summarize
251
+ if isinstance(value, dict):
252
+ return {"__headroom_depth_exceeded": len(value)}
253
+ elif isinstance(value, list):
254
+ return {"__headroom_depth_exceeded": len(value)}
255
+ elif isinstance(value, str) and len(value) > max_string_length:
256
+ return (
257
+ value[:max_string_length]
258
+ + f"...[truncated {len(value) - max_string_length} chars]"
259
+ )
260
+ return value
261
+
262
+ if isinstance(value, dict):
263
+ return {
264
+ k: self._crush_value(
265
+ v,
266
+ depth + 1,
267
+ max_depth,
268
+ max_array_items,
269
+ max_string_length,
270
+ )
271
+ for k, v in value.items()
272
+ }
273
+
274
+ elif isinstance(value, list):
275
+ if len(value) <= max_array_items:
276
+ return [
277
+ self._crush_value(
278
+ item,
279
+ depth + 1,
280
+ max_depth,
281
+ max_array_items,
282
+ max_string_length,
283
+ )
284
+ for item in value
285
+ ]
286
+ else:
287
+ # Truncate array
288
+ truncated = [
289
+ self._crush_value(
290
+ item,
291
+ depth + 1,
292
+ max_depth,
293
+ max_array_items,
294
+ max_string_length,
295
+ )
296
+ for item in value[:max_array_items]
297
+ ]
298
+ truncated.append({"__headroom_truncated": len(value) - max_array_items})
299
+ return truncated
300
+
301
+ elif isinstance(value, str):
302
+ if len(value) > max_string_length:
303
+ return (
304
+ value[:max_string_length]
305
+ + f"...[truncated {len(value) - max_string_length} chars]"
306
+ )
307
+ return value
308
+
309
+ else:
310
+ # Numbers, bools, None - pass through
311
+ return value
312
+
313
+
314
+ def crush_tool_output(
315
+ content: str,
316
+ config: ToolCrusherConfig | None = None,
317
+ ) -> tuple[str, bool]:
318
+ """
319
+ Convenience function to crush a single tool output.
320
+
321
+ Args:
322
+ content: The tool output content.
323
+ config: Optional configuration.
324
+
325
+ Returns:
326
+ Tuple of (crushed_content, was_modified).
327
+ """
328
+ cfg = config or ToolCrusherConfig()
329
+ crusher = ToolCrusher(cfg)
330
+
331
+ profile = {
332
+ "max_array_items": cfg.max_array_items,
333
+ "max_string_length": cfg.max_string_length,
334
+ "max_depth": cfg.max_depth,
335
+ "preserve_keys": cfg.preserve_keys,
336
+ }
337
+
338
+ return crusher._crush_content(content, profile)