ripperdoc 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. ripperdoc/__init__.py +3 -0
  2. ripperdoc/__main__.py +20 -0
  3. ripperdoc/cli/__init__.py +1 -0
  4. ripperdoc/cli/cli.py +405 -0
  5. ripperdoc/cli/commands/__init__.py +82 -0
  6. ripperdoc/cli/commands/agents_cmd.py +263 -0
  7. ripperdoc/cli/commands/base.py +19 -0
  8. ripperdoc/cli/commands/clear_cmd.py +18 -0
  9. ripperdoc/cli/commands/compact_cmd.py +23 -0
  10. ripperdoc/cli/commands/config_cmd.py +31 -0
  11. ripperdoc/cli/commands/context_cmd.py +144 -0
  12. ripperdoc/cli/commands/cost_cmd.py +82 -0
  13. ripperdoc/cli/commands/doctor_cmd.py +221 -0
  14. ripperdoc/cli/commands/exit_cmd.py +19 -0
  15. ripperdoc/cli/commands/help_cmd.py +20 -0
  16. ripperdoc/cli/commands/mcp_cmd.py +70 -0
  17. ripperdoc/cli/commands/memory_cmd.py +202 -0
  18. ripperdoc/cli/commands/models_cmd.py +413 -0
  19. ripperdoc/cli/commands/permissions_cmd.py +302 -0
  20. ripperdoc/cli/commands/resume_cmd.py +98 -0
  21. ripperdoc/cli/commands/status_cmd.py +167 -0
  22. ripperdoc/cli/commands/tasks_cmd.py +278 -0
  23. ripperdoc/cli/commands/todos_cmd.py +69 -0
  24. ripperdoc/cli/commands/tools_cmd.py +19 -0
  25. ripperdoc/cli/ui/__init__.py +1 -0
  26. ripperdoc/cli/ui/context_display.py +298 -0
  27. ripperdoc/cli/ui/helpers.py +22 -0
  28. ripperdoc/cli/ui/rich_ui.py +1557 -0
  29. ripperdoc/cli/ui/spinner.py +49 -0
  30. ripperdoc/cli/ui/thinking_spinner.py +128 -0
  31. ripperdoc/cli/ui/tool_renderers.py +298 -0
  32. ripperdoc/core/__init__.py +1 -0
  33. ripperdoc/core/agents.py +486 -0
  34. ripperdoc/core/commands.py +33 -0
  35. ripperdoc/core/config.py +559 -0
  36. ripperdoc/core/default_tools.py +88 -0
  37. ripperdoc/core/permissions.py +252 -0
  38. ripperdoc/core/providers/__init__.py +47 -0
  39. ripperdoc/core/providers/anthropic.py +250 -0
  40. ripperdoc/core/providers/base.py +265 -0
  41. ripperdoc/core/providers/gemini.py +615 -0
  42. ripperdoc/core/providers/openai.py +487 -0
  43. ripperdoc/core/query.py +1058 -0
  44. ripperdoc/core/query_utils.py +622 -0
  45. ripperdoc/core/skills.py +295 -0
  46. ripperdoc/core/system_prompt.py +431 -0
  47. ripperdoc/core/tool.py +240 -0
  48. ripperdoc/sdk/__init__.py +9 -0
  49. ripperdoc/sdk/client.py +333 -0
  50. ripperdoc/tools/__init__.py +1 -0
  51. ripperdoc/tools/ask_user_question_tool.py +431 -0
  52. ripperdoc/tools/background_shell.py +389 -0
  53. ripperdoc/tools/bash_output_tool.py +98 -0
  54. ripperdoc/tools/bash_tool.py +1016 -0
  55. ripperdoc/tools/dynamic_mcp_tool.py +428 -0
  56. ripperdoc/tools/enter_plan_mode_tool.py +226 -0
  57. ripperdoc/tools/exit_plan_mode_tool.py +153 -0
  58. ripperdoc/tools/file_edit_tool.py +346 -0
  59. ripperdoc/tools/file_read_tool.py +203 -0
  60. ripperdoc/tools/file_write_tool.py +205 -0
  61. ripperdoc/tools/glob_tool.py +179 -0
  62. ripperdoc/tools/grep_tool.py +370 -0
  63. ripperdoc/tools/kill_bash_tool.py +136 -0
  64. ripperdoc/tools/ls_tool.py +471 -0
  65. ripperdoc/tools/mcp_tools.py +591 -0
  66. ripperdoc/tools/multi_edit_tool.py +456 -0
  67. ripperdoc/tools/notebook_edit_tool.py +386 -0
  68. ripperdoc/tools/skill_tool.py +205 -0
  69. ripperdoc/tools/task_tool.py +379 -0
  70. ripperdoc/tools/todo_tool.py +494 -0
  71. ripperdoc/tools/tool_search_tool.py +380 -0
  72. ripperdoc/utils/__init__.py +1 -0
  73. ripperdoc/utils/bash_constants.py +51 -0
  74. ripperdoc/utils/bash_output_utils.py +43 -0
  75. ripperdoc/utils/coerce.py +34 -0
  76. ripperdoc/utils/context_length_errors.py +252 -0
  77. ripperdoc/utils/exit_code_handlers.py +241 -0
  78. ripperdoc/utils/file_watch.py +135 -0
  79. ripperdoc/utils/git_utils.py +274 -0
  80. ripperdoc/utils/json_utils.py +27 -0
  81. ripperdoc/utils/log.py +176 -0
  82. ripperdoc/utils/mcp.py +560 -0
  83. ripperdoc/utils/memory.py +253 -0
  84. ripperdoc/utils/message_compaction.py +676 -0
  85. ripperdoc/utils/messages.py +519 -0
  86. ripperdoc/utils/output_utils.py +258 -0
  87. ripperdoc/utils/path_ignore.py +677 -0
  88. ripperdoc/utils/path_utils.py +46 -0
  89. ripperdoc/utils/permissions/__init__.py +27 -0
  90. ripperdoc/utils/permissions/path_validation_utils.py +174 -0
  91. ripperdoc/utils/permissions/shell_command_validation.py +552 -0
  92. ripperdoc/utils/permissions/tool_permission_utils.py +279 -0
  93. ripperdoc/utils/prompt.py +17 -0
  94. ripperdoc/utils/safe_get_cwd.py +31 -0
  95. ripperdoc/utils/sandbox_utils.py +38 -0
  96. ripperdoc/utils/session_history.py +260 -0
  97. ripperdoc/utils/session_usage.py +117 -0
  98. ripperdoc/utils/shell_token_utils.py +95 -0
  99. ripperdoc/utils/shell_utils.py +159 -0
  100. ripperdoc/utils/todo.py +203 -0
  101. ripperdoc/utils/token_estimation.py +34 -0
  102. ripperdoc-0.2.6.dist-info/METADATA +193 -0
  103. ripperdoc-0.2.6.dist-info/RECORD +107 -0
  104. ripperdoc-0.2.6.dist-info/WHEEL +5 -0
  105. ripperdoc-0.2.6.dist-info/entry_points.txt +3 -0
  106. ripperdoc-0.2.6.dist-info/licenses/LICENSE +53 -0
  107. ripperdoc-0.2.6.dist-info/top_level.txt +1 -0
@@ -0,0 +1,676 @@
1
+ """Utilities for compacting conversation history when context grows too large."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ from dataclasses import dataclass
8
+ from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Union
9
+
10
+ from ripperdoc.core.config import GlobalConfig, ModelProfile, get_global_config
11
+ from ripperdoc.utils.log import get_logger
12
+ from ripperdoc.utils.token_estimation import estimate_tokens
13
+ from ripperdoc.utils.messages import (
14
+ AssistantMessage,
15
+ MessageContent,
16
+ ProgressMessage,
17
+ UserMessage,
18
+ normalize_messages_for_api,
19
+ )
20
+
21
+ logger = get_logger()
22
+
23
+ ConversationMessage = Union[UserMessage, AssistantMessage, ProgressMessage]
24
+
25
+ # Compaction thresholds.
26
+ MAX_TOKENS_SOFT = 20_000
27
+ MAX_TOKENS_HARD = 40_000
28
+ MAX_TOOL_USES_TO_PRESERVE = 3
29
+ IMAGE_TOKEN_COST = 2_000
30
+ AUTO_COMPACT_BUFFER = 13_000
31
+ WARNING_THRESHOLD = 20_000
32
+ ERROR_THRESHOLD = 20_000
33
+ COMPACT_PLACEHOLDER = "[Old tool result content cleared]"
34
+ TOOL_COMMANDS: Set[str] = {"Read", "Bash", "Grep", "Glob", "LS", "WebSearch", "WebFetch"}
35
+
36
+ # Defaults roughly match modern 200k context windows while still working for smaller models.
37
+ DEFAULT_CONTEXT_TOKENS = 200_000
38
+ MIN_CONTEXT_TOKENS = 20_000
39
+
40
+ # Track tool results we've already compacted so we don't reprocess them.
41
+ _processed_tool_use_ids: Set[str] = set()
42
+ _token_cache: Dict[str, int] = {}
43
+ _cleanup_callbacks: List[Callable[[], None]] = []
44
+ _is_compacting: bool = False
45
+
46
+
47
+ @dataclass
48
+ class ContextUsageStatus:
49
+ """Snapshot of the current context usage."""
50
+
51
+ used_tokens: int
52
+ max_context_tokens: int
53
+ tokens_left: int
54
+ percent_left: float
55
+ percent_used: float
56
+ is_above_warning_threshold: bool
57
+ is_above_error_threshold: bool
58
+ is_above_auto_compact_threshold: bool
59
+
60
+ @property
61
+ def total_tokens(self) -> int:
62
+ """Alias for backward compatibility."""
63
+ return self.used_tokens
64
+
65
+ @property
66
+ def is_above_warning(self) -> bool:
67
+ return self.is_above_warning_threshold
68
+
69
+ @property
70
+ def is_above_error(self) -> bool:
71
+ return self.is_above_error_threshold
72
+
73
+ @property
74
+ def should_auto_compact(self) -> bool:
75
+ return self.is_above_auto_compact_threshold
76
+
77
+
78
+ @dataclass
79
+ class CompactionResult:
80
+ """Result of a compaction run."""
81
+
82
+ messages: List[ConversationMessage]
83
+ tokens_before: int
84
+ tokens_after: int
85
+ tokens_saved: int
86
+ cleared_tool_ids: Set[str]
87
+ was_compacted: bool
88
+
89
+
90
+ @dataclass
91
+ class ContextBreakdown:
92
+ """Detailed breakdown of context usage for display."""
93
+
94
+ max_context_tokens: int
95
+ system_prompt_tokens: int
96
+ mcp_tokens: int
97
+ tool_schema_tokens: int
98
+ memory_tokens: int
99
+ message_tokens: int
100
+ reserved_tokens: int
101
+ message_count: int
102
+
103
+ @property
104
+ def reported_tokens(self) -> int:
105
+ return (
106
+ self.system_prompt_tokens
107
+ + self.mcp_tokens
108
+ + self.tool_schema_tokens
109
+ + self.memory_tokens
110
+ + self.message_tokens
111
+ )
112
+
113
+ @property
114
+ def effective_tokens(self) -> int:
115
+ """Tokens that count against the limit including any reserved buffer."""
116
+ return min(self.max_context_tokens, self.reported_tokens + self.reserved_tokens)
117
+
118
+ @property
119
+ def free_tokens(self) -> int:
120
+ return max(self.max_context_tokens - self.effective_tokens, 0)
121
+
122
+ @property
123
+ def percent_used(self) -> float:
124
+ if self.max_context_tokens <= 0:
125
+ return 0.0
126
+ return min(100.0, (self.effective_tokens / self.max_context_tokens) * 100)
127
+
128
+ def percent_of_limit(self, tokens: int) -> float:
129
+ if self.max_context_tokens <= 0:
130
+ return 0.0
131
+ return min(100.0, (tokens / self.max_context_tokens) * 100)
132
+
133
+
134
+ def _parse_truthy_env_value(value: Optional[str]) -> bool:
135
+ """Interpret common truthy environment variable values."""
136
+ if value is None:
137
+ return False
138
+ normalized = value.strip().lower()
139
+ return normalized in {"1", "true", "yes", "on"}
140
+
141
+
142
+ def estimate_tokens_from_text(text: str) -> int:
143
+ """Estimate token count using shared token estimation helper."""
144
+ return estimate_tokens(text)
145
+
146
+
147
+ def _stringify_content(content: Union[str, List[MessageContent], None]) -> str:
148
+ """Convert normalized content into plain text for estimation."""
149
+ if content is None:
150
+ return ""
151
+ if isinstance(content, str):
152
+ return content
153
+ parts: List[str] = []
154
+ for part in content:
155
+ if isinstance(part, dict):
156
+ block_type = part.get("type")
157
+ text_val = part.get("text")
158
+ if text_val:
159
+ parts.append(str(text_val))
160
+
161
+ # Capture nested text for tool_result content blocks
162
+ nested_content = part.get("content")
163
+ if isinstance(nested_content, list):
164
+ nested_text = _stringify_content(nested_content)
165
+ if nested_text:
166
+ parts.append(nested_text)
167
+
168
+ # Include tool payloads that otherwise don't have "text"
169
+ if block_type == "tool_use" and part.get("input") is not None:
170
+ try:
171
+ parts.append(json.dumps(part.get("input"), ensure_ascii=False))
172
+ except (TypeError, ValueError) as exc:
173
+ logger.warning(
174
+ "[message_compaction] Failed to serialize tool_use input for token estimate: %s: %s",
175
+ type(exc).__name__, exc,
176
+ )
177
+ parts.append(str(part.get("input")))
178
+
179
+ # OpenAI-style arguments blocks
180
+ if part.get("arguments"):
181
+ parts.append(str(part.get("arguments")))
182
+ elif hasattr(part, "text"):
183
+ text_val = getattr(part, "text", "")
184
+ if text_val:
185
+ parts.append(str(text_val))
186
+ else:
187
+ parts.append(str(part))
188
+ # Filter out empty strings to avoid over-counting separators
189
+ return "\n".join([p for p in parts if p])
190
+
191
+
192
+ def estimate_conversation_tokens(
193
+ messages: Sequence[ConversationMessage], *, protocol: str = "anthropic"
194
+ ) -> int:
195
+ """Estimate tokens for a conversation after normalization."""
196
+ normalized = normalize_messages_for_api(list(messages), protocol=protocol)
197
+ total = 0
198
+ for message in normalized:
199
+ total += estimate_tokens_from_text(_stringify_content(message.get("content")))
200
+
201
+ # Account for OpenAI-style tool_calls payloads (arguments + name)
202
+ tool_calls = message.get("tool_calls")
203
+ if isinstance(tool_calls, list):
204
+ for call in tool_calls:
205
+ if not isinstance(call, dict):
206
+ total += estimate_tokens_from_text(str(call))
207
+ continue
208
+ func = call.get("function")
209
+ if isinstance(func, dict):
210
+ arguments = func.get("arguments")
211
+ if arguments:
212
+ total += estimate_tokens_from_text(str(arguments))
213
+ name = func.get("name")
214
+ if name:
215
+ total += estimate_tokens_from_text(str(name))
216
+ else:
217
+ total += estimate_tokens_from_text(str(func))
218
+ return total
219
+
220
+
221
+ def _estimate_tool_schema_tokens(tools: Sequence[Any]) -> int:
222
+ """Estimate tokens consumed by tool schemas."""
223
+ total = 0
224
+ for tool in tools:
225
+ try:
226
+ schema = tool.input_schema.model_json_schema()
227
+ schema_text = json.dumps(schema, sort_keys=True)
228
+ total += estimate_tokens_from_text(schema_text)
229
+ except (AttributeError, TypeError, KeyError, ValueError) as exc:
230
+ logger.warning(
231
+ "Failed to estimate tokens for tool schema: %s: %s",
232
+ type(exc).__name__, exc,
233
+ extra={"tool": getattr(tool, "name", None)},
234
+ )
235
+ continue
236
+ return total
237
+
238
+
239
+ def get_model_context_limit(
240
+ model_profile: Optional[ModelProfile], explicit_limit: Optional[int] = None
241
+ ) -> int:
242
+ """Best-effort guess of the model context window."""
243
+ env_override = os.getenv("RIPPERDOC_CONTEXT_TOKENS")
244
+ if env_override:
245
+ try:
246
+ parsed = int(env_override)
247
+ if parsed > 0:
248
+ return parsed
249
+ except ValueError:
250
+ pass
251
+
252
+ if explicit_limit and explicit_limit > 0:
253
+ return explicit_limit
254
+
255
+ if model_profile and getattr(model_profile, "context_window", None):
256
+ try:
257
+ configured = int(model_profile.context_window) # type: ignore[arg-type]
258
+ if configured > 0:
259
+ return configured
260
+ except (TypeError, ValueError):
261
+ pass
262
+
263
+ if model_profile and model_profile.model:
264
+ name = model_profile.model.lower()
265
+ if "claude" in name:
266
+ # Claude 4.5 defaults and beta 1M thinking window.
267
+ if "4.5" in name or "sonnet" in name or "haiku" in name:
268
+ return 1_000_000 if "1m" in name or "beta" in name else 200_000
269
+ if "opus" in name or "4.1" in name:
270
+ return 200_000
271
+ return 200_000
272
+ if "gpt-4o" in name or "gpt-4.1" in name or "gpt-4-turbo" in name:
273
+ return 128_000
274
+ if "gpt-4" in name:
275
+ return 32_000
276
+ if "gpt-3.5" in name:
277
+ return 16_000
278
+ if "deepseek" in name:
279
+ return 128_000
280
+
281
+ return DEFAULT_CONTEXT_TOKENS
282
+
283
+
284
+ def get_remaining_context_tokens(
285
+ model_profile: Optional[ModelProfile], explicit_limit: Optional[int] = None
286
+ ) -> int:
287
+ """Return the context window minus the model's configured output tokens."""
288
+ context_limit = max(get_model_context_limit(model_profile, explicit_limit), MIN_CONTEXT_TOKENS)
289
+ try:
290
+ max_output_tokens = (
291
+ int(getattr(model_profile, "max_tokens", 0) or 0) if model_profile else 0
292
+ )
293
+ except (TypeError, ValueError):
294
+ max_output_tokens = 0
295
+ return max(MIN_CONTEXT_TOKENS, context_limit - max(0, max_output_tokens))
296
+
297
+
298
+ def resolve_auto_compact_enabled(config: GlobalConfig) -> bool:
299
+ """Return whether auto-compaction is enabled, honoring an env override."""
300
+ env_override = os.getenv("RIPPERDOC_AUTO_COMPACT")
301
+ if env_override is not None:
302
+ normalized = env_override.strip().lower()
303
+ return normalized not in {"0", "false", "no", "off"}
304
+ return bool(config.auto_compact_enabled)
305
+
306
+
307
+ def get_context_usage_status(
308
+ used_tokens: int,
309
+ max_context_tokens: Optional[int],
310
+ auto_compact_enabled: bool,
311
+ ) -> ContextUsageStatus:
312
+ """Compute context usage thresholds using the compaction heuristics."""
313
+ context_limit = max(max_context_tokens or DEFAULT_CONTEXT_TOKENS, MIN_CONTEXT_TOKENS)
314
+ effective_limit = (
315
+ max(MIN_CONTEXT_TOKENS, context_limit - AUTO_COMPACT_BUFFER)
316
+ if auto_compact_enabled
317
+ else context_limit
318
+ )
319
+
320
+ tokens_left = max(effective_limit - used_tokens, 0)
321
+ percent_left = (
322
+ 0.0 if effective_limit <= 0 else min(100.0, (tokens_left / effective_limit) * 100)
323
+ )
324
+ percent_used = 100.0 - percent_left
325
+
326
+ warning_limit = max(0, effective_limit - WARNING_THRESHOLD)
327
+ error_limit = max(0, effective_limit - ERROR_THRESHOLD)
328
+ auto_compact_limit = max(MIN_CONTEXT_TOKENS, context_limit - AUTO_COMPACT_BUFFER)
329
+
330
+ return ContextUsageStatus(
331
+ used_tokens=used_tokens,
332
+ max_context_tokens=context_limit,
333
+ tokens_left=tokens_left,
334
+ percent_left=percent_left,
335
+ percent_used=percent_used,
336
+ is_above_warning_threshold=used_tokens >= warning_limit,
337
+ is_above_error_threshold=used_tokens >= error_limit,
338
+ is_above_auto_compact_threshold=auto_compact_enabled and used_tokens >= auto_compact_limit,
339
+ )
340
+
341
+
342
+ def summarize_context_usage(
343
+ messages: Sequence[ConversationMessage],
344
+ tools: Sequence[Any],
345
+ system_prompt: str,
346
+ max_context_tokens: int,
347
+ auto_compact_enabled: bool,
348
+ memory_tokens: int = 0,
349
+ mcp_tokens: int = 0,
350
+ *,
351
+ protocol: str = "anthropic",
352
+ ) -> ContextBreakdown:
353
+ """Return a detailed breakdown of context usage."""
354
+ max_context_tokens = max(max_context_tokens, MIN_CONTEXT_TOKENS)
355
+ raw_system_tokens = estimate_tokens_from_text(system_prompt)
356
+ base_prompt_tokens = max(0, raw_system_tokens - max(0, mcp_tokens))
357
+ tool_schema_tokens = _estimate_tool_schema_tokens(tools)
358
+ message_tokens = estimate_conversation_tokens(messages, protocol=protocol)
359
+ message_count = len([m for m in messages if getattr(m, "type", "") != "progress"])
360
+ reserved_tokens = AUTO_COMPACT_BUFFER if auto_compact_enabled else 0
361
+
362
+ return ContextBreakdown(
363
+ max_context_tokens=max_context_tokens,
364
+ system_prompt_tokens=base_prompt_tokens,
365
+ mcp_tokens=max(0, mcp_tokens),
366
+ tool_schema_tokens=tool_schema_tokens,
367
+ memory_tokens=max(0, memory_tokens),
368
+ message_tokens=message_tokens,
369
+ reserved_tokens=reserved_tokens,
370
+ message_count=message_count,
371
+ )
372
+
373
+
374
+ def find_latest_assistant_usage_tokens(
375
+ messages: Sequence[ConversationMessage],
376
+ ) -> int:
377
+ """Best-effort extraction of usage tokens from the latest assistant message."""
378
+ for message in reversed(messages):
379
+ if getattr(message, "type", "") != "assistant":
380
+ continue
381
+ payload = getattr(message, "message", None) or getattr(message, "content", None)
382
+ usage = getattr(payload, "usage", None)
383
+ if usage is None and isinstance(payload, dict):
384
+ usage = payload.get("usage")
385
+ if not usage:
386
+ continue
387
+ try:
388
+ tokens = 0
389
+ for field in (
390
+ "input_tokens",
391
+ "cache_creation_input_tokens",
392
+ "cache_read_input_tokens",
393
+ "output_tokens",
394
+ "prompt_tokens",
395
+ "completion_tokens",
396
+ ):
397
+ value = getattr(usage, field, None)
398
+ if value is None and isinstance(usage, dict):
399
+ value = usage.get(field)
400
+ if value is not None:
401
+ tokens += int(value)
402
+ if tokens > 0:
403
+ return tokens
404
+ except (TypeError, ValueError, AttributeError):
405
+ logger.debug("[message_compaction] Failed to parse usage tokens")
406
+ continue
407
+ return 0
408
+
409
+
410
+ def estimate_used_tokens(
411
+ messages: Sequence[ConversationMessage],
412
+ *,
413
+ protocol: str = "anthropic",
414
+ precomputed_total_tokens: Optional[int] = None,
415
+ ) -> int:
416
+ """Return usage tokens if present; otherwise fall back to an estimated total."""
417
+ usage_tokens = find_latest_assistant_usage_tokens(messages)
418
+ if usage_tokens > 0:
419
+ return usage_tokens
420
+ if precomputed_total_tokens is not None:
421
+ return precomputed_total_tokens
422
+ return estimate_conversation_tokens(messages, protocol=protocol)
423
+
424
+
425
+ def register_cleanup_callback(callback: Callable[[], None]) -> Callable[[], None]:
426
+ """Register a callback that will run after a compaction pass."""
427
+ _cleanup_callbacks.append(callback)
428
+
429
+ def _unregister() -> None:
430
+ nonlocal callback
431
+ _cleanup_callbacks[:] = [cb for cb in _cleanup_callbacks if cb is not callback]
432
+
433
+ return _unregister
434
+
435
+
436
+ def _run_cleanup_callbacks() -> None:
437
+ callbacks = list(_cleanup_callbacks)
438
+ for callback in callbacks:
439
+ try:
440
+ callback()
441
+ except (RuntimeError, TypeError, ValueError, AttributeError) as exc:
442
+ logger.debug(
443
+ "[message_compaction] Cleanup callback failed: %s: %s",
444
+ type(exc).__name__, exc,
445
+ )
446
+
447
+
448
+ def _normalize_tool_use_id(block: Any) -> str:
449
+ if block is None:
450
+ return ""
451
+ if isinstance(block, dict):
452
+ return str(block.get("tool_use_id") or block.get("id") or "")
453
+ return str(getattr(block, "tool_use_id", None) or getattr(block, "id", None) or "")
454
+
455
+
456
+ def _estimate_message_tokens(content_block: Any) -> int:
457
+ """Estimate tokens for a single content block."""
458
+ if content_block is None:
459
+ return 0
460
+
461
+ content = getattr(content_block, "content", None)
462
+ if isinstance(content_block, dict) and content is None:
463
+ content = content_block.get("content")
464
+
465
+ if isinstance(content, str):
466
+ return estimate_tokens_from_text(content)
467
+ if isinstance(content, list):
468
+ total = 0
469
+ for part in content:
470
+ part_type = getattr(part, "type", None) or (
471
+ part.get("type") if isinstance(part, dict) else None
472
+ )
473
+ if part_type == "text":
474
+ text_val = getattr(part, "text", None) if hasattr(part, "text") else None
475
+ if text_val is None and isinstance(part, dict):
476
+ text_val = part.get("text")
477
+ total += estimate_tokens_from_text(text_val or "")
478
+ elif part_type == "image":
479
+ total += IMAGE_TOKEN_COST
480
+ return total
481
+
482
+ text_val = getattr(content_block, "text", None)
483
+ if text_val is None and isinstance(content_block, dict):
484
+ text_val = content_block.get("text") or content_block.get("content")
485
+ return estimate_tokens_from_text(text_val or "")
486
+
487
+
488
+ def _get_cached_token_count(cache_key: str, content_block: Any) -> int:
489
+ estimated = _token_cache.get(cache_key)
490
+ if estimated is None:
491
+ estimated = _estimate_message_tokens(content_block)
492
+ _token_cache[cache_key] = estimated
493
+ return estimated
494
+
495
+
496
+ def compact_messages(
497
+ messages: Sequence[ConversationMessage],
498
+ max_tokens: Optional[int] = None,
499
+ *,
500
+ protocol: str = "anthropic",
501
+ ) -> CompactionResult:
502
+ """Compact tool results by replacing older outputs with placeholders."""
503
+ global _is_compacting
504
+ _is_compacting = False
505
+
506
+ tokens_before = estimate_conversation_tokens(messages, protocol=protocol)
507
+
508
+ if _parse_truthy_env_value(os.getenv("DISABLE_MICROCOMPACT")):
509
+ return CompactionResult(
510
+ messages=list(messages),
511
+ tokens_before=tokens_before,
512
+ tokens_after=tokens_before,
513
+ tokens_saved=0,
514
+ cleared_tool_ids=set(),
515
+ was_compacted=False,
516
+ )
517
+
518
+ # Presence of this flag mirrors the upstream implementation even though we don't act on it.
519
+ _parse_truthy_env_value(os.getenv("USE_API_CONTEXT_MANAGEMENT"))
520
+
521
+ is_max_tokens_specified = max_tokens is not None
522
+ try:
523
+ base_max_tokens = int(max_tokens) if max_tokens is not None else MAX_TOKENS_HARD
524
+ except (TypeError, ValueError):
525
+ base_max_tokens = MAX_TOKENS_HARD
526
+ effective_max_tokens = max(base_max_tokens, MIN_CONTEXT_TOKENS)
527
+
528
+ tool_use_ids_to_compact: List[str] = []
529
+ token_counts_by_tool_use_id: Dict[str, int] = {}
530
+
531
+ for message in messages:
532
+ msg_type = getattr(message, "type", "")
533
+ content = getattr(getattr(message, "message", None), "content", None)
534
+ if msg_type not in {"user", "assistant"} or not isinstance(content, list):
535
+ continue
536
+ for content_block in content:
537
+ block_type = getattr(content_block, "type", None) or (
538
+ content_block.get("type") if isinstance(content_block, dict) else None
539
+ )
540
+ tool_use_id = _normalize_tool_use_id(content_block)
541
+ tool_name = getattr(content_block, "name", None)
542
+ if tool_name is None and isinstance(content_block, dict):
543
+ tool_name = content_block.get("name")
544
+ if block_type == "tool_use" and tool_name in TOOL_COMMANDS:
545
+ if tool_use_id and tool_use_id not in _processed_tool_use_ids:
546
+ tool_use_ids_to_compact.append(tool_use_id)
547
+ elif block_type == "tool_result" and tool_use_id in tool_use_ids_to_compact:
548
+ token_count = _get_cached_token_count(tool_use_id, content_block)
549
+ token_counts_by_tool_use_id[tool_use_id] = token_count
550
+
551
+ latest_tool_use_ids = (
552
+ tool_use_ids_to_compact[-MAX_TOOL_USES_TO_PRESERVE:]
553
+ if MAX_TOOL_USES_TO_PRESERVE > 0
554
+ else []
555
+ )
556
+ total_token_count = sum(token_counts_by_tool_use_id.values())
557
+
558
+ total_tokens_removed = 0
559
+ ids_to_remove: Set[str] = set()
560
+
561
+ for tool_use_id in tool_use_ids_to_compact:
562
+ if tool_use_id in latest_tool_use_ids:
563
+ continue
564
+ if total_token_count - total_tokens_removed > effective_max_tokens:
565
+ ids_to_remove.add(tool_use_id)
566
+ total_tokens_removed += token_counts_by_tool_use_id.get(tool_use_id, 0)
567
+
568
+ if not is_max_tokens_specified:
569
+ auto_compact_enabled = resolve_auto_compact_enabled(get_global_config())
570
+ usage_tokens = estimate_used_tokens(
571
+ messages, protocol=protocol, precomputed_total_tokens=tokens_before
572
+ )
573
+ status = get_context_usage_status(
574
+ usage_tokens,
575
+ max_context_tokens=max_tokens,
576
+ auto_compact_enabled=auto_compact_enabled,
577
+ )
578
+ if not status.is_above_warning_threshold or total_tokens_removed < MAX_TOKENS_SOFT:
579
+ ids_to_remove.clear()
580
+ total_tokens_removed = 0
581
+
582
+ def _should_remove(tool_use_id: str) -> bool:
583
+ return tool_use_id in ids_to_remove or tool_use_id in _processed_tool_use_ids
584
+
585
+ compacted_messages: List[ConversationMessage] = []
586
+
587
+ for message in messages:
588
+ msg_type = getattr(message, "type", "")
589
+ content = getattr(getattr(message, "message", None), "content", None)
590
+ if msg_type not in {"user", "assistant"} or not isinstance(content, list):
591
+ compacted_messages.append(message)
592
+ continue
593
+
594
+ if msg_type == "assistant" and isinstance(message, AssistantMessage):
595
+ # Copy content list to avoid mutating the original message.
596
+ compacted_messages.append(
597
+ AssistantMessage(
598
+ message=message.message.model_copy(update={"content": list(content)}),
599
+ cost_usd=getattr(message, "cost_usd", 0.0),
600
+ duration_ms=getattr(message, "duration_ms", 0.0),
601
+ uuid=getattr(message, "uuid", None),
602
+ is_api_error_message=getattr(message, "is_api_error_message", False),
603
+ )
604
+ )
605
+ continue
606
+
607
+ filtered_content: List[MessageContent] = []
608
+ modified = False
609
+ for content_item in content:
610
+ block_type = getattr(content_item, "type", None) or (
611
+ content_item.get("type") if isinstance(content_item, dict) else None
612
+ )
613
+ tool_use_id = _normalize_tool_use_id(content_item)
614
+ if block_type == "tool_result" and _should_remove(tool_use_id):
615
+ modified = True
616
+ if hasattr(content_item, "model_copy"):
617
+ new_block = content_item.model_copy()
618
+ new_block.text = COMPACT_PLACEHOLDER
619
+ else:
620
+ block_dict = (
621
+ dict(content_item)
622
+ if isinstance(content_item, dict)
623
+ else {"type": "tool_result"}
624
+ )
625
+ block_dict["text"] = COMPACT_PLACEHOLDER
626
+ block_dict["tool_use_id"] = tool_use_id
627
+ new_block = MessageContent(**block_dict)
628
+ filtered_content.append(new_block)
629
+ else:
630
+ if isinstance(content_item, MessageContent):
631
+ filtered_content.append(content_item)
632
+ elif isinstance(content_item, dict):
633
+ filtered_content.append(MessageContent(**content_item))
634
+ else:
635
+ filtered_content.append(
636
+ MessageContent(type=str(block_type or "text"), text=str(content_item))
637
+ )
638
+
639
+ if modified and isinstance(message, UserMessage):
640
+ compacted_messages.append(
641
+ UserMessage(
642
+ message=message.message.model_copy(update={"content": filtered_content}),
643
+ tool_use_result=getattr(message, "tool_use_result", None),
644
+ uuid=getattr(message, "uuid", None),
645
+ )
646
+ )
647
+ else:
648
+ compacted_messages.append(message)
649
+
650
+ for id_to_remove in ids_to_remove:
651
+ _processed_tool_use_ids.add(id_to_remove)
652
+
653
+ tokens_after = estimate_conversation_tokens(compacted_messages, protocol=protocol)
654
+ tokens_saved = max(0, tokens_before - tokens_after)
655
+
656
+ if ids_to_remove:
657
+ _is_compacting = True
658
+ _run_cleanup_callbacks()
659
+ logger.debug(
660
+ "[message_compaction] Compacted conversation",
661
+ extra={
662
+ "tokens_before": tokens_before,
663
+ "tokens_after": tokens_after,
664
+ "tokens_saved": tokens_saved,
665
+ "cleared_tool_ids": list(ids_to_remove),
666
+ },
667
+ )
668
+
669
+ return CompactionResult(
670
+ messages=compacted_messages,
671
+ tokens_before=tokens_before,
672
+ tokens_after=tokens_after,
673
+ tokens_saved=tokens_saved,
674
+ cleared_tool_ids=ids_to_remove,
675
+ was_compacted=bool(ids_to_remove),
676
+ )