ripperdoc 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. ripperdoc/__init__.py +1 -1
  2. ripperdoc/cli/cli.py +5 -0
  3. ripperdoc/cli/commands/__init__.py +71 -6
  4. ripperdoc/cli/commands/clear_cmd.py +1 -0
  5. ripperdoc/cli/commands/exit_cmd.py +1 -1
  6. ripperdoc/cli/commands/help_cmd.py +11 -1
  7. ripperdoc/cli/commands/hooks_cmd.py +636 -0
  8. ripperdoc/cli/commands/permissions_cmd.py +36 -34
  9. ripperdoc/cli/commands/resume_cmd.py +71 -37
  10. ripperdoc/cli/ui/file_mention_completer.py +276 -0
  11. ripperdoc/cli/ui/helpers.py +100 -3
  12. ripperdoc/cli/ui/interrupt_handler.py +175 -0
  13. ripperdoc/cli/ui/message_display.py +249 -0
  14. ripperdoc/cli/ui/panels.py +63 -0
  15. ripperdoc/cli/ui/rich_ui.py +233 -648
  16. ripperdoc/cli/ui/tool_renderers.py +2 -2
  17. ripperdoc/core/agents.py +4 -4
  18. ripperdoc/core/custom_commands.py +411 -0
  19. ripperdoc/core/hooks/__init__.py +99 -0
  20. ripperdoc/core/hooks/config.py +303 -0
  21. ripperdoc/core/hooks/events.py +540 -0
  22. ripperdoc/core/hooks/executor.py +498 -0
  23. ripperdoc/core/hooks/integration.py +353 -0
  24. ripperdoc/core/hooks/manager.py +720 -0
  25. ripperdoc/core/providers/anthropic.py +476 -69
  26. ripperdoc/core/query.py +61 -4
  27. ripperdoc/core/query_utils.py +1 -1
  28. ripperdoc/core/tool.py +1 -1
  29. ripperdoc/tools/bash_tool.py +5 -5
  30. ripperdoc/tools/file_edit_tool.py +2 -2
  31. ripperdoc/tools/file_read_tool.py +2 -2
  32. ripperdoc/tools/multi_edit_tool.py +1 -1
  33. ripperdoc/utils/conversation_compaction.py +476 -0
  34. ripperdoc/utils/message_compaction.py +109 -154
  35. ripperdoc/utils/message_formatting.py +216 -0
  36. ripperdoc/utils/messages.py +31 -9
  37. ripperdoc/utils/path_ignore.py +3 -4
  38. ripperdoc/utils/session_history.py +19 -7
  39. {ripperdoc-0.2.6.dist-info → ripperdoc-0.2.8.dist-info}/METADATA +24 -3
  40. {ripperdoc-0.2.6.dist-info → ripperdoc-0.2.8.dist-info}/RECORD +44 -30
  41. {ripperdoc-0.2.6.dist-info → ripperdoc-0.2.8.dist-info}/WHEEL +0 -0
  42. {ripperdoc-0.2.6.dist-info → ripperdoc-0.2.8.dist-info}/entry_points.txt +0 -0
  43. {ripperdoc-0.2.6.dist-info → ripperdoc-0.2.8.dist-info}/licenses/LICENSE +0 -0
  44. {ripperdoc-0.2.6.dist-info → ripperdoc-0.2.8.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,11 @@
1
- """Utilities for compacting conversation history when context grows too large."""
1
+ """Context compaction utilities"""
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
5
  import json
6
6
  import os
7
7
  from dataclasses import dataclass
8
- from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Union
8
+ from typing import Any, Dict, List, Optional, Sequence, Set, Union
9
9
 
10
10
  from ripperdoc.core.config import GlobalConfig, ModelProfile, get_global_config
11
11
  from ripperdoc.utils.log import get_logger
@@ -22,7 +22,7 @@ logger = get_logger()
22
22
 
23
23
  ConversationMessage = Union[UserMessage, AssistantMessage, ProgressMessage]
24
24
 
25
- # Compaction thresholds.
25
+ # Thresholds.
26
26
  MAX_TOKENS_SOFT = 20_000
27
27
  MAX_TOKENS_HARD = 40_000
28
28
  MAX_TOOL_USES_TO_PRESERVE = 3
@@ -30,23 +30,39 @@ IMAGE_TOKEN_COST = 2_000
30
30
  AUTO_COMPACT_BUFFER = 13_000
31
31
  WARNING_THRESHOLD = 20_000
32
32
  ERROR_THRESHOLD = 20_000
33
- COMPACT_PLACEHOLDER = "[Old tool result content cleared]"
34
- TOOL_COMMANDS: Set[str] = {"Read", "Bash", "Grep", "Glob", "LS", "WebSearch", "WebFetch"}
33
+ MICRO_PLACEHOLDER = "[Old tool result content cleared]"
35
34
 
36
- # Defaults roughly match modern 200k context windows while still working for smaller models.
35
+ # Context sizing.
37
36
  DEFAULT_CONTEXT_TOKENS = 200_000
38
37
  MIN_CONTEXT_TOKENS = 20_000
39
38
 
40
- # Track tool results we've already compacted so we don't reprocess them.
39
+ # Tools likely to generate large payloads.
40
+ TOOL_COMMANDS: Set[str] = {
41
+ "Read",
42
+ "Bash",
43
+ "Grep",
44
+ "Glob",
45
+ "LS",
46
+ "WebSearch",
47
+ "WebFetch",
48
+ "BashOutput",
49
+ "ListMcpServers",
50
+ "ListMcpResources",
51
+ "ReadMcpResource",
52
+ # "FileEdit",
53
+ # "MultiEdit",
54
+ # "NotebookEdit",
55
+ # "FileWrite",
56
+ }
57
+
58
+ # State to avoid re-compacting the same tool results.
41
59
  _processed_tool_use_ids: Set[str] = set()
42
60
  _token_cache: Dict[str, int] = {}
43
- _cleanup_callbacks: List[Callable[[], None]] = []
44
- _is_compacting: bool = False
45
61
 
46
62
 
47
63
  @dataclass
48
64
  class ContextUsageStatus:
49
- """Snapshot of the current context usage."""
65
+ """Snapshot of current context usage."""
50
66
 
51
67
  used_tokens: int
52
68
  max_context_tokens: int
@@ -59,7 +75,6 @@ class ContextUsageStatus:
59
75
 
60
76
  @property
61
77
  def total_tokens(self) -> int:
62
- """Alias for backward compatibility."""
63
78
  return self.used_tokens
64
79
 
65
80
  @property
@@ -75,21 +90,9 @@ class ContextUsageStatus:
75
90
  return self.is_above_auto_compact_threshold
76
91
 
77
92
 
78
- @dataclass
79
- class CompactionResult:
80
- """Result of a compaction run."""
81
-
82
- messages: List[ConversationMessage]
83
- tokens_before: int
84
- tokens_after: int
85
- tokens_saved: int
86
- cleared_tool_ids: Set[str]
87
- was_compacted: bool
88
-
89
-
90
93
  @dataclass
91
94
  class ContextBreakdown:
92
- """Detailed breakdown of context usage for display."""
95
+ """Detailed breakdown for UI display."""
93
96
 
94
97
  max_context_tokens: int
95
98
  system_prompt_tokens: int
@@ -112,7 +115,6 @@ class ContextBreakdown:
112
115
 
113
116
  @property
114
117
  def effective_tokens(self) -> int:
115
- """Tokens that count against the limit including any reserved buffer."""
116
118
  return min(self.max_context_tokens, self.reported_tokens + self.reserved_tokens)
117
119
 
118
120
  @property
@@ -131,8 +133,20 @@ class ContextBreakdown:
131
133
  return min(100.0, (tokens / self.max_context_tokens) * 100)
132
134
 
133
135
 
136
+ @dataclass
137
+ class MicroCompactionResult:
138
+ """Result of a micro-compaction pass."""
139
+
140
+ messages: List[ConversationMessage]
141
+ tokens_before: int
142
+ tokens_after: int
143
+ tokens_saved: int
144
+ tools_compacted: int
145
+ trigger_type: str
146
+ was_compacted: bool
147
+
148
+
134
149
  def _parse_truthy_env_value(value: Optional[str]) -> bool:
135
- """Interpret common truthy environment variable values."""
136
150
  if value is None:
137
151
  return False
138
152
  normalized = value.strip().lower()
@@ -140,12 +154,10 @@ def _parse_truthy_env_value(value: Optional[str]) -> bool:
140
154
 
141
155
 
142
156
  def estimate_tokens_from_text(text: str) -> int:
143
- """Estimate token count using shared token estimation helper."""
144
- return estimate_tokens(text)
157
+ return estimate_tokens(text or "")
145
158
 
146
159
 
147
160
  def _stringify_content(content: Union[str, List[MessageContent], None]) -> str:
148
- """Convert normalized content into plain text for estimation."""
149
161
  if content is None:
150
162
  return ""
151
163
  if isinstance(content, str):
@@ -153,39 +165,22 @@ def _stringify_content(content: Union[str, List[MessageContent], None]) -> str:
153
165
  parts: List[str] = []
154
166
  for part in content:
155
167
  if isinstance(part, dict):
156
- block_type = part.get("type")
157
- text_val = part.get("text")
168
+ text_val = part.get("text") or part.get("content") or ""
158
169
  if text_val:
159
170
  parts.append(str(text_val))
160
-
161
- # Capture nested text for tool_result content blocks
162
- nested_content = part.get("content")
163
- if isinstance(nested_content, list):
164
- nested_text = _stringify_content(nested_content)
171
+ nested = part.get("content")
172
+ if isinstance(nested, list):
173
+ nested_text = _stringify_content(nested)
165
174
  if nested_text:
166
175
  parts.append(nested_text)
167
-
168
- # Include tool payloads that otherwise don't have "text"
169
- if block_type == "tool_use" and part.get("input") is not None:
170
- try:
171
- parts.append(json.dumps(part.get("input"), ensure_ascii=False))
172
- except (TypeError, ValueError) as exc:
173
- logger.warning(
174
- "[message_compaction] Failed to serialize tool_use input for token estimate: %s: %s",
175
- type(exc).__name__, exc,
176
- )
177
- parts.append(str(part.get("input")))
178
-
179
- # OpenAI-style arguments blocks
180
176
  if part.get("arguments"):
181
177
  parts.append(str(part.get("arguments")))
182
178
  elif hasattr(part, "text"):
183
- text_val = getattr(part, "text", "")
179
+ text_val = getattr(part, "text", "") or ""
184
180
  if text_val:
185
- parts.append(str(text_val))
181
+ parts.append(text_val)
186
182
  else:
187
183
  parts.append(str(part))
188
- # Filter out empty strings to avoid over-counting separators
189
184
  return "\n".join([p for p in parts if p])
190
185
 
191
186
 
@@ -198,7 +193,6 @@ def estimate_conversation_tokens(
198
193
  for message in normalized:
199
194
  total += estimate_tokens_from_text(_stringify_content(message.get("content")))
200
195
 
201
- # Account for OpenAI-style tool_calls payloads (arguments + name)
202
196
  tool_calls = message.get("tool_calls")
203
197
  if isinstance(tool_calls, list):
204
198
  for call in tool_calls:
@@ -219,7 +213,6 @@ def estimate_conversation_tokens(
219
213
 
220
214
 
221
215
  def _estimate_tool_schema_tokens(tools: Sequence[Any]) -> int:
222
- """Estimate tokens consumed by tool schemas."""
223
216
  total = 0
224
217
  for tool in tools:
225
218
  try:
@@ -229,7 +222,8 @@ def _estimate_tool_schema_tokens(tools: Sequence[Any]) -> int:
229
222
  except (AttributeError, TypeError, KeyError, ValueError) as exc:
230
223
  logger.warning(
231
224
  "Failed to estimate tokens for tool schema: %s: %s",
232
- type(exc).__name__, exc,
225
+ type(exc).__name__,
226
+ exc,
233
227
  extra={"tool": getattr(tool, "name", None)},
234
228
  )
235
229
  continue
@@ -252,51 +246,37 @@ def get_model_context_limit(
252
246
  if explicit_limit and explicit_limit > 0:
253
247
  return explicit_limit
254
248
 
255
- if model_profile and getattr(model_profile, "context_window", None):
256
- try:
257
- configured = int(model_profile.context_window) # type: ignore[arg-type]
258
- if configured > 0:
259
- return configured
260
- except (TypeError, ValueError):
261
- pass
262
-
263
- if model_profile and model_profile.model:
264
- name = model_profile.model.lower()
265
- if "claude" in name:
266
- # Claude 4.5 defaults and beta 1M thinking window.
267
- if "4.5" in name or "sonnet" in name or "haiku" in name:
268
- return 1_000_000 if "1m" in name or "beta" in name else 200_000
269
- if "opus" in name or "4.1" in name:
270
- return 200_000
271
- return 200_000
272
- if "gpt-4o" in name or "gpt-4.1" in name or "gpt-4-turbo" in name:
273
- return 128_000
274
- if "gpt-4" in name:
275
- return 32_000
276
- if "gpt-3.5" in name:
277
- return 16_000
278
- if "deepseek" in name:
279
- return 128_000
280
-
249
+ try:
250
+ model = getattr(model_profile, "model", None) or ""
251
+ except Exception:
252
+ model = ""
253
+
254
+ # Fallback mapping; tuned for common providers.
255
+ model = model.lower()
256
+ if "1000k" in model or "1m" in model:
257
+ return 1_000_000
258
+ if "gpt-4o" in model or "gpt4o" in model:
259
+ return 128_000
260
+ if "gpt-4" in model:
261
+ return 32_000
262
+ if "deepseek" in model:
263
+ return 128_000
281
264
  return DEFAULT_CONTEXT_TOKENS
282
265
 
283
266
 
284
267
  def get_remaining_context_tokens(
285
268
  model_profile: Optional[ModelProfile], explicit_limit: Optional[int] = None
286
269
  ) -> int:
287
- """Return the context window minus the model's configured output tokens."""
270
+ """Context window minus configured output tokens."""
288
271
  context_limit = max(get_model_context_limit(model_profile, explicit_limit), MIN_CONTEXT_TOKENS)
289
272
  try:
290
- max_output_tokens = (
291
- int(getattr(model_profile, "max_tokens", 0) or 0) if model_profile else 0
292
- )
273
+ max_output_tokens = int(getattr(model_profile, "max_tokens", 0) or 0) if model_profile else 0
293
274
  except (TypeError, ValueError):
294
275
  max_output_tokens = 0
295
276
  return max(MIN_CONTEXT_TOKENS, context_limit - max(0, max_output_tokens))
296
277
 
297
278
 
298
279
  def resolve_auto_compact_enabled(config: GlobalConfig) -> bool:
299
- """Return whether auto-compaction is enabled, honoring an env override."""
300
280
  env_override = os.getenv("RIPPERDOC_AUTO_COMPACT")
301
281
  if env_override is not None:
302
282
  normalized = env_override.strip().lower()
@@ -309,7 +289,7 @@ def get_context_usage_status(
309
289
  max_context_tokens: Optional[int],
310
290
  auto_compact_enabled: bool,
311
291
  ) -> ContextUsageStatus:
312
- """Compute context usage thresholds using the compaction heuristics."""
292
+ """Compute usage thresholds."""
313
293
  context_limit = max(max_context_tokens or DEFAULT_CONTEXT_TOKENS, MIN_CONTEXT_TOKENS)
314
294
  effective_limit = (
315
295
  max(MIN_CONTEXT_TOKENS, context_limit - AUTO_COMPACT_BUFFER)
@@ -318,9 +298,7 @@ def get_context_usage_status(
318
298
  )
319
299
 
320
300
  tokens_left = max(effective_limit - used_tokens, 0)
321
- percent_left = (
322
- 0.0 if effective_limit <= 0 else min(100.0, (tokens_left / effective_limit) * 100)
323
- )
301
+ percent_left = 0.0 if effective_limit <= 0 else min(100.0, (tokens_left / effective_limit) * 100)
324
302
  percent_used = 100.0 - percent_left
325
303
 
326
304
  warning_limit = max(0, effective_limit - WARNING_THRESHOLD)
@@ -371,9 +349,7 @@ def summarize_context_usage(
371
349
  )
372
350
 
373
351
 
374
- def find_latest_assistant_usage_tokens(
375
- messages: Sequence[ConversationMessage],
376
- ) -> int:
352
+ def find_latest_assistant_usage_tokens(messages: Sequence[ConversationMessage]) -> int:
377
353
  """Best-effort extraction of usage tokens from the latest assistant message."""
378
354
  for message in reversed(messages):
379
355
  if getattr(message, "type", "") != "assistant":
@@ -413,7 +389,6 @@ def estimate_used_tokens(
413
389
  protocol: str = "anthropic",
414
390
  precomputed_total_tokens: Optional[int] = None,
415
391
  ) -> int:
416
- """Return usage tokens if present; otherwise fall back to an estimated total."""
417
392
  usage_tokens = find_latest_assistant_usage_tokens(messages)
418
393
  if usage_tokens > 0:
419
394
  return usage_tokens
@@ -422,29 +397,6 @@ def estimate_used_tokens(
422
397
  return estimate_conversation_tokens(messages, protocol=protocol)
423
398
 
424
399
 
425
- def register_cleanup_callback(callback: Callable[[], None]) -> Callable[[], None]:
426
- """Register a callback that will run after a compaction pass."""
427
- _cleanup_callbacks.append(callback)
428
-
429
- def _unregister() -> None:
430
- nonlocal callback
431
- _cleanup_callbacks[:] = [cb for cb in _cleanup_callbacks if cb is not callback]
432
-
433
- return _unregister
434
-
435
-
436
- def _run_cleanup_callbacks() -> None:
437
- callbacks = list(_cleanup_callbacks)
438
- for callback in callbacks:
439
- try:
440
- callback()
441
- except (RuntimeError, TypeError, ValueError, AttributeError) as exc:
442
- logger.debug(
443
- "[message_compaction] Cleanup callback failed: %s: %s",
444
- type(exc).__name__, exc,
445
- )
446
-
447
-
448
400
  def _normalize_tool_use_id(block: Any) -> str:
449
401
  if block is None:
450
402
  return ""
@@ -454,7 +406,7 @@ def _normalize_tool_use_id(block: Any) -> str:
454
406
 
455
407
 
456
408
  def _estimate_message_tokens(content_block: Any) -> int:
457
- """Estimate tokens for a single content block."""
409
+ """Estimate tokens for a single content block (text/image only)."""
458
410
  if content_block is None:
459
411
  return 0
460
412
 
@@ -467,9 +419,7 @@ def _estimate_message_tokens(content_block: Any) -> int:
467
419
  if isinstance(content, list):
468
420
  total = 0
469
421
  for part in content:
470
- part_type = getattr(part, "type", None) or (
471
- part.get("type") if isinstance(part, dict) else None
472
- )
422
+ part_type = getattr(part, "type", None) or (part.get("type") if isinstance(part, dict) else None)
473
423
  if part_type == "text":
474
424
  text_val = getattr(part, "text", None) if hasattr(part, "text") else None
475
425
  if text_val is None and isinstance(part, dict):
@@ -493,37 +443,37 @@ def _get_cached_token_count(cache_key: str, content_block: Any) -> int:
493
443
  return estimated
494
444
 
495
445
 
496
- def compact_messages(
446
+ def micro_compact_messages(
497
447
  messages: Sequence[ConversationMessage],
498
- max_tokens: Optional[int] = None,
499
448
  *,
449
+ max_tokens: Optional[int] = None,
450
+ context_limit: Optional[int] = None,
451
+ auto_compact_enabled: Optional[bool] = None,
500
452
  protocol: str = "anthropic",
501
- ) -> CompactionResult:
502
- """Compact tool results by replacing older outputs with placeholders."""
503
- global _is_compacting
504
- _is_compacting = False
505
-
453
+ trigger_type: str = "auto",
454
+ ) -> MicroCompactionResult:
455
+ """Micro-compaction: strip older tool_result payloads to keep context lean."""
506
456
  tokens_before = estimate_conversation_tokens(messages, protocol=protocol)
507
457
 
508
458
  if _parse_truthy_env_value(os.getenv("DISABLE_MICROCOMPACT")):
509
- return CompactionResult(
459
+ return MicroCompactionResult(
510
460
  messages=list(messages),
511
461
  tokens_before=tokens_before,
512
462
  tokens_after=tokens_before,
513
463
  tokens_saved=0,
514
- cleared_tool_ids=set(),
464
+ tools_compacted=0,
465
+ trigger_type=trigger_type,
515
466
  was_compacted=False,
516
467
  )
517
468
 
518
- # Presence of this flag mirrors the upstream implementation even though we don't act on it.
469
+ # Legacy flag kept for parity with upstream behavior.
519
470
  _parse_truthy_env_value(os.getenv("USE_API_CONTEXT_MANAGEMENT"))
520
471
 
521
472
  is_max_tokens_specified = max_tokens is not None
522
473
  try:
523
- base_max_tokens = int(max_tokens) if max_tokens is not None else MAX_TOKENS_HARD
474
+ effective_max_tokens = int(max_tokens) if max_tokens is not None else MAX_TOKENS_HARD
524
475
  except (TypeError, ValueError):
525
- base_max_tokens = MAX_TOKENS_HARD
526
- effective_max_tokens = max(base_max_tokens, MIN_CONTEXT_TOKENS)
476
+ effective_max_tokens = MAX_TOKENS_HARD
527
477
 
528
478
  tool_use_ids_to_compact: List[str] = []
529
479
  token_counts_by_tool_use_id: Dict[str, int] = {}
@@ -533,6 +483,7 @@ def compact_messages(
533
483
  content = getattr(getattr(message, "message", None), "content", None)
534
484
  if msg_type not in {"user", "assistant"} or not isinstance(content, list):
535
485
  continue
486
+
536
487
  for content_block in content:
537
488
  block_type = getattr(content_block, "type", None) or (
538
489
  content_block.get("type") if isinstance(content_block, dict) else None
@@ -541,6 +492,7 @@ def compact_messages(
541
492
  tool_name = getattr(content_block, "name", None)
542
493
  if tool_name is None and isinstance(content_block, dict):
543
494
  tool_name = content_block.get("name")
495
+
544
496
  if block_type == "tool_use" and tool_name in TOOL_COMMANDS:
545
497
  if tool_use_id and tool_use_id not in _processed_tool_use_ids:
546
498
  tool_use_ids_to_compact.append(tool_use_id)
@@ -549,9 +501,7 @@ def compact_messages(
549
501
  token_counts_by_tool_use_id[tool_use_id] = token_count
550
502
 
551
503
  latest_tool_use_ids = (
552
- tool_use_ids_to_compact[-MAX_TOOL_USES_TO_PRESERVE:]
553
- if MAX_TOOL_USES_TO_PRESERVE > 0
554
- else []
504
+ tool_use_ids_to_compact[-MAX_TOOL_USES_TO_PRESERVE:] if MAX_TOOL_USES_TO_PRESERVE > 0 else []
555
505
  )
556
506
  total_token_count = sum(token_counts_by_tool_use_id.values())
557
507
 
@@ -566,14 +516,16 @@ def compact_messages(
566
516
  total_tokens_removed += token_counts_by_tool_use_id.get(tool_use_id, 0)
567
517
 
568
518
  if not is_max_tokens_specified:
569
- auto_compact_enabled = resolve_auto_compact_enabled(get_global_config())
519
+ resolved_auto_compact = (
520
+ auto_compact_enabled
521
+ if auto_compact_enabled is not None
522
+ else resolve_auto_compact_enabled(get_global_config())
523
+ )
570
524
  usage_tokens = estimate_used_tokens(
571
525
  messages, protocol=protocol, precomputed_total_tokens=tokens_before
572
526
  )
573
527
  status = get_context_usage_status(
574
- usage_tokens,
575
- max_context_tokens=max_tokens,
576
- auto_compact_enabled=auto_compact_enabled,
528
+ usage_tokens, max_context_tokens=context_limit, auto_compact_enabled=resolved_auto_compact
577
529
  )
578
530
  if not status.is_above_warning_threshold or total_tokens_removed < MAX_TOKENS_SOFT:
579
531
  ids_to_remove.clear()
@@ -587,12 +539,12 @@ def compact_messages(
587
539
  for message in messages:
588
540
  msg_type = getattr(message, "type", "")
589
541
  content = getattr(getattr(message, "message", None), "content", None)
542
+
590
543
  if msg_type not in {"user", "assistant"} or not isinstance(content, list):
591
544
  compacted_messages.append(message)
592
545
  continue
593
546
 
594
547
  if msg_type == "assistant" and isinstance(message, AssistantMessage):
595
- # Copy content list to avoid mutating the original message.
596
548
  compacted_messages.append(
597
549
  AssistantMessage(
598
550
  message=message.message.model_copy(update={"content": list(content)}),
@@ -606,23 +558,21 @@ def compact_messages(
606
558
 
607
559
  filtered_content: List[MessageContent] = []
608
560
  modified = False
561
+
609
562
  for content_item in content:
610
563
  block_type = getattr(content_item, "type", None) or (
611
564
  content_item.get("type") if isinstance(content_item, dict) else None
612
565
  )
613
566
  tool_use_id = _normalize_tool_use_id(content_item)
567
+
614
568
  if block_type == "tool_result" and _should_remove(tool_use_id):
615
569
  modified = True
616
570
  if hasattr(content_item, "model_copy"):
617
571
  new_block = content_item.model_copy()
618
- new_block.text = COMPACT_PLACEHOLDER
572
+ new_block.text = MICRO_PLACEHOLDER
619
573
  else:
620
- block_dict = (
621
- dict(content_item)
622
- if isinstance(content_item, dict)
623
- else {"type": "tool_result"}
624
- )
625
- block_dict["text"] = COMPACT_PLACEHOLDER
574
+ block_dict = dict(content_item) if isinstance(content_item, dict) else {"type": "tool_result"}
575
+ block_dict["text"] = MICRO_PLACEHOLDER
626
576
  block_dict["tool_use_id"] = tool_use_id
627
577
  new_block = MessageContent(**block_dict)
628
578
  filtered_content.append(new_block)
@@ -654,10 +604,8 @@ def compact_messages(
654
604
  tokens_saved = max(0, tokens_before - tokens_after)
655
605
 
656
606
  if ids_to_remove:
657
- _is_compacting = True
658
- _run_cleanup_callbacks()
659
607
  logger.debug(
660
- "[message_compaction] Compacted conversation",
608
+ "[message_compaction] Micro-compacted conversation",
661
609
  extra={
662
610
  "tokens_before": tokens_before,
663
611
  "tokens_after": tokens_after,
@@ -666,11 +614,18 @@ def compact_messages(
666
614
  },
667
615
  )
668
616
 
669
- return CompactionResult(
617
+ return MicroCompactionResult(
670
618
  messages=compacted_messages,
671
619
  tokens_before=tokens_before,
672
620
  tokens_after=tokens_after,
673
621
  tokens_saved=tokens_saved,
674
- cleared_tool_ids=ids_to_remove,
622
+ tools_compacted=len(ids_to_remove),
623
+ trigger_type="manual" if is_max_tokens_specified else trigger_type,
675
624
  was_compacted=bool(ids_to_remove),
676
625
  )
626
+
627
+
628
+ def reset_micro_compaction_state() -> None:
629
+ """Clear caches and processed IDs (useful for tests)."""
630
+ _processed_tool_use_ids.clear()
631
+ _token_cache.clear()