ripperdoc 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ripperdoc/__init__.py +1 -1
- ripperdoc/cli/commands/clear_cmd.py +1 -0
- ripperdoc/cli/commands/exit_cmd.py +1 -1
- ripperdoc/cli/commands/resume_cmd.py +71 -37
- ripperdoc/cli/ui/file_mention_completer.py +221 -0
- ripperdoc/cli/ui/helpers.py +100 -3
- ripperdoc/cli/ui/interrupt_handler.py +175 -0
- ripperdoc/cli/ui/message_display.py +249 -0
- ripperdoc/cli/ui/panels.py +60 -0
- ripperdoc/cli/ui/rich_ui.py +147 -630
- ripperdoc/cli/ui/tool_renderers.py +2 -2
- ripperdoc/core/agents.py +4 -4
- ripperdoc/core/query_utils.py +1 -1
- ripperdoc/core/tool.py +1 -1
- ripperdoc/tools/bash_tool.py +1 -1
- ripperdoc/tools/file_edit_tool.py +2 -2
- ripperdoc/tools/file_read_tool.py +1 -1
- ripperdoc/tools/multi_edit_tool.py +1 -1
- ripperdoc/utils/conversation_compaction.py +476 -0
- ripperdoc/utils/message_compaction.py +109 -154
- ripperdoc/utils/message_formatting.py +216 -0
- ripperdoc/utils/messages.py +31 -9
- ripperdoc/utils/session_history.py +19 -7
- {ripperdoc-0.2.6.dist-info → ripperdoc-0.2.7.dist-info}/METADATA +1 -1
- {ripperdoc-0.2.6.dist-info → ripperdoc-0.2.7.dist-info}/RECORD +29 -23
- {ripperdoc-0.2.6.dist-info → ripperdoc-0.2.7.dist-info}/WHEEL +0 -0
- {ripperdoc-0.2.6.dist-info → ripperdoc-0.2.7.dist-info}/entry_points.txt +0 -0
- {ripperdoc-0.2.6.dist-info → ripperdoc-0.2.7.dist-info}/licenses/LICENSE +0 -0
- {ripperdoc-0.2.6.dist-info → ripperdoc-0.2.7.dist-info}/top_level.txt +0 -0
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Context compaction utilities"""
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import json
|
|
6
6
|
import os
|
|
7
7
|
from dataclasses import dataclass
|
|
8
|
-
from typing import Any,
|
|
8
|
+
from typing import Any, Dict, List, Optional, Sequence, Set, Union
|
|
9
9
|
|
|
10
10
|
from ripperdoc.core.config import GlobalConfig, ModelProfile, get_global_config
|
|
11
11
|
from ripperdoc.utils.log import get_logger
|
|
@@ -22,7 +22,7 @@ logger = get_logger()
|
|
|
22
22
|
|
|
23
23
|
ConversationMessage = Union[UserMessage, AssistantMessage, ProgressMessage]
|
|
24
24
|
|
|
25
|
-
#
|
|
25
|
+
# Thresholds.
|
|
26
26
|
MAX_TOKENS_SOFT = 20_000
|
|
27
27
|
MAX_TOKENS_HARD = 40_000
|
|
28
28
|
MAX_TOOL_USES_TO_PRESERVE = 3
|
|
@@ -30,23 +30,39 @@ IMAGE_TOKEN_COST = 2_000
|
|
|
30
30
|
AUTO_COMPACT_BUFFER = 13_000
|
|
31
31
|
WARNING_THRESHOLD = 20_000
|
|
32
32
|
ERROR_THRESHOLD = 20_000
|
|
33
|
-
|
|
34
|
-
TOOL_COMMANDS: Set[str] = {"Read", "Bash", "Grep", "Glob", "LS", "WebSearch", "WebFetch"}
|
|
33
|
+
MICRO_PLACEHOLDER = "[Old tool result content cleared]"
|
|
35
34
|
|
|
36
|
-
#
|
|
35
|
+
# Context sizing.
|
|
37
36
|
DEFAULT_CONTEXT_TOKENS = 200_000
|
|
38
37
|
MIN_CONTEXT_TOKENS = 20_000
|
|
39
38
|
|
|
40
|
-
#
|
|
39
|
+
# Tools likely to generate large payloads.
|
|
40
|
+
TOOL_COMMANDS: Set[str] = {
|
|
41
|
+
"Read",
|
|
42
|
+
"Bash",
|
|
43
|
+
"Grep",
|
|
44
|
+
"Glob",
|
|
45
|
+
"LS",
|
|
46
|
+
"WebSearch",
|
|
47
|
+
"WebFetch",
|
|
48
|
+
"BashOutput",
|
|
49
|
+
"ListMcpServers",
|
|
50
|
+
"ListMcpResources",
|
|
51
|
+
"ReadMcpResource",
|
|
52
|
+
# "FileEdit",
|
|
53
|
+
# "MultiEdit",
|
|
54
|
+
# "NotebookEdit",
|
|
55
|
+
# "FileWrite",
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
# State to avoid re-compacting the same tool results.
|
|
41
59
|
_processed_tool_use_ids: Set[str] = set()
|
|
42
60
|
_token_cache: Dict[str, int] = {}
|
|
43
|
-
_cleanup_callbacks: List[Callable[[], None]] = []
|
|
44
|
-
_is_compacting: bool = False
|
|
45
61
|
|
|
46
62
|
|
|
47
63
|
@dataclass
|
|
48
64
|
class ContextUsageStatus:
|
|
49
|
-
"""Snapshot of
|
|
65
|
+
"""Snapshot of current context usage."""
|
|
50
66
|
|
|
51
67
|
used_tokens: int
|
|
52
68
|
max_context_tokens: int
|
|
@@ -59,7 +75,6 @@ class ContextUsageStatus:
|
|
|
59
75
|
|
|
60
76
|
@property
|
|
61
77
|
def total_tokens(self) -> int:
|
|
62
|
-
"""Alias for backward compatibility."""
|
|
63
78
|
return self.used_tokens
|
|
64
79
|
|
|
65
80
|
@property
|
|
@@ -75,21 +90,9 @@ class ContextUsageStatus:
|
|
|
75
90
|
return self.is_above_auto_compact_threshold
|
|
76
91
|
|
|
77
92
|
|
|
78
|
-
@dataclass
|
|
79
|
-
class CompactionResult:
|
|
80
|
-
"""Result of a compaction run."""
|
|
81
|
-
|
|
82
|
-
messages: List[ConversationMessage]
|
|
83
|
-
tokens_before: int
|
|
84
|
-
tokens_after: int
|
|
85
|
-
tokens_saved: int
|
|
86
|
-
cleared_tool_ids: Set[str]
|
|
87
|
-
was_compacted: bool
|
|
88
|
-
|
|
89
|
-
|
|
90
93
|
@dataclass
|
|
91
94
|
class ContextBreakdown:
|
|
92
|
-
"""Detailed breakdown
|
|
95
|
+
"""Detailed breakdown for UI display."""
|
|
93
96
|
|
|
94
97
|
max_context_tokens: int
|
|
95
98
|
system_prompt_tokens: int
|
|
@@ -112,7 +115,6 @@ class ContextBreakdown:
|
|
|
112
115
|
|
|
113
116
|
@property
|
|
114
117
|
def effective_tokens(self) -> int:
|
|
115
|
-
"""Tokens that count against the limit including any reserved buffer."""
|
|
116
118
|
return min(self.max_context_tokens, self.reported_tokens + self.reserved_tokens)
|
|
117
119
|
|
|
118
120
|
@property
|
|
@@ -131,8 +133,20 @@ class ContextBreakdown:
|
|
|
131
133
|
return min(100.0, (tokens / self.max_context_tokens) * 100)
|
|
132
134
|
|
|
133
135
|
|
|
136
|
+
@dataclass
|
|
137
|
+
class MicroCompactionResult:
|
|
138
|
+
"""Result of a micro-compaction pass."""
|
|
139
|
+
|
|
140
|
+
messages: List[ConversationMessage]
|
|
141
|
+
tokens_before: int
|
|
142
|
+
tokens_after: int
|
|
143
|
+
tokens_saved: int
|
|
144
|
+
tools_compacted: int
|
|
145
|
+
trigger_type: str
|
|
146
|
+
was_compacted: bool
|
|
147
|
+
|
|
148
|
+
|
|
134
149
|
def _parse_truthy_env_value(value: Optional[str]) -> bool:
|
|
135
|
-
"""Interpret common truthy environment variable values."""
|
|
136
150
|
if value is None:
|
|
137
151
|
return False
|
|
138
152
|
normalized = value.strip().lower()
|
|
@@ -140,12 +154,10 @@ def _parse_truthy_env_value(value: Optional[str]) -> bool:
|
|
|
140
154
|
|
|
141
155
|
|
|
142
156
|
def estimate_tokens_from_text(text: str) -> int:
|
|
143
|
-
|
|
144
|
-
return estimate_tokens(text)
|
|
157
|
+
return estimate_tokens(text or "")
|
|
145
158
|
|
|
146
159
|
|
|
147
160
|
def _stringify_content(content: Union[str, List[MessageContent], None]) -> str:
|
|
148
|
-
"""Convert normalized content into plain text for estimation."""
|
|
149
161
|
if content is None:
|
|
150
162
|
return ""
|
|
151
163
|
if isinstance(content, str):
|
|
@@ -153,39 +165,22 @@ def _stringify_content(content: Union[str, List[MessageContent], None]) -> str:
|
|
|
153
165
|
parts: List[str] = []
|
|
154
166
|
for part in content:
|
|
155
167
|
if isinstance(part, dict):
|
|
156
|
-
|
|
157
|
-
text_val = part.get("text")
|
|
168
|
+
text_val = part.get("text") or part.get("content") or ""
|
|
158
169
|
if text_val:
|
|
159
170
|
parts.append(str(text_val))
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
if isinstance(nested_content, list):
|
|
164
|
-
nested_text = _stringify_content(nested_content)
|
|
171
|
+
nested = part.get("content")
|
|
172
|
+
if isinstance(nested, list):
|
|
173
|
+
nested_text = _stringify_content(nested)
|
|
165
174
|
if nested_text:
|
|
166
175
|
parts.append(nested_text)
|
|
167
|
-
|
|
168
|
-
# Include tool payloads that otherwise don't have "text"
|
|
169
|
-
if block_type == "tool_use" and part.get("input") is not None:
|
|
170
|
-
try:
|
|
171
|
-
parts.append(json.dumps(part.get("input"), ensure_ascii=False))
|
|
172
|
-
except (TypeError, ValueError) as exc:
|
|
173
|
-
logger.warning(
|
|
174
|
-
"[message_compaction] Failed to serialize tool_use input for token estimate: %s: %s",
|
|
175
|
-
type(exc).__name__, exc,
|
|
176
|
-
)
|
|
177
|
-
parts.append(str(part.get("input")))
|
|
178
|
-
|
|
179
|
-
# OpenAI-style arguments blocks
|
|
180
176
|
if part.get("arguments"):
|
|
181
177
|
parts.append(str(part.get("arguments")))
|
|
182
178
|
elif hasattr(part, "text"):
|
|
183
|
-
text_val = getattr(part, "text", "")
|
|
179
|
+
text_val = getattr(part, "text", "") or ""
|
|
184
180
|
if text_val:
|
|
185
|
-
parts.append(
|
|
181
|
+
parts.append(text_val)
|
|
186
182
|
else:
|
|
187
183
|
parts.append(str(part))
|
|
188
|
-
# Filter out empty strings to avoid over-counting separators
|
|
189
184
|
return "\n".join([p for p in parts if p])
|
|
190
185
|
|
|
191
186
|
|
|
@@ -198,7 +193,6 @@ def estimate_conversation_tokens(
|
|
|
198
193
|
for message in normalized:
|
|
199
194
|
total += estimate_tokens_from_text(_stringify_content(message.get("content")))
|
|
200
195
|
|
|
201
|
-
# Account for OpenAI-style tool_calls payloads (arguments + name)
|
|
202
196
|
tool_calls = message.get("tool_calls")
|
|
203
197
|
if isinstance(tool_calls, list):
|
|
204
198
|
for call in tool_calls:
|
|
@@ -219,7 +213,6 @@ def estimate_conversation_tokens(
|
|
|
219
213
|
|
|
220
214
|
|
|
221
215
|
def _estimate_tool_schema_tokens(tools: Sequence[Any]) -> int:
|
|
222
|
-
"""Estimate tokens consumed by tool schemas."""
|
|
223
216
|
total = 0
|
|
224
217
|
for tool in tools:
|
|
225
218
|
try:
|
|
@@ -229,7 +222,8 @@ def _estimate_tool_schema_tokens(tools: Sequence[Any]) -> int:
|
|
|
229
222
|
except (AttributeError, TypeError, KeyError, ValueError) as exc:
|
|
230
223
|
logger.warning(
|
|
231
224
|
"Failed to estimate tokens for tool schema: %s: %s",
|
|
232
|
-
type(exc).__name__,
|
|
225
|
+
type(exc).__name__,
|
|
226
|
+
exc,
|
|
233
227
|
extra={"tool": getattr(tool, "name", None)},
|
|
234
228
|
)
|
|
235
229
|
continue
|
|
@@ -252,51 +246,37 @@ def get_model_context_limit(
|
|
|
252
246
|
if explicit_limit and explicit_limit > 0:
|
|
253
247
|
return explicit_limit
|
|
254
248
|
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
return 200_000
|
|
271
|
-
return 200_000
|
|
272
|
-
if "gpt-4o" in name or "gpt-4.1" in name or "gpt-4-turbo" in name:
|
|
273
|
-
return 128_000
|
|
274
|
-
if "gpt-4" in name:
|
|
275
|
-
return 32_000
|
|
276
|
-
if "gpt-3.5" in name:
|
|
277
|
-
return 16_000
|
|
278
|
-
if "deepseek" in name:
|
|
279
|
-
return 128_000
|
|
280
|
-
|
|
249
|
+
try:
|
|
250
|
+
model = getattr(model_profile, "model", None) or ""
|
|
251
|
+
except Exception:
|
|
252
|
+
model = ""
|
|
253
|
+
|
|
254
|
+
# Fallback mapping; tuned for common providers.
|
|
255
|
+
model = model.lower()
|
|
256
|
+
if "1000k" in model or "1m" in model:
|
|
257
|
+
return 1_000_000
|
|
258
|
+
if "gpt-4o" in model or "gpt4o" in model:
|
|
259
|
+
return 128_000
|
|
260
|
+
if "gpt-4" in model:
|
|
261
|
+
return 32_000
|
|
262
|
+
if "deepseek" in model:
|
|
263
|
+
return 128_000
|
|
281
264
|
return DEFAULT_CONTEXT_TOKENS
|
|
282
265
|
|
|
283
266
|
|
|
284
267
|
def get_remaining_context_tokens(
|
|
285
268
|
model_profile: Optional[ModelProfile], explicit_limit: Optional[int] = None
|
|
286
269
|
) -> int:
|
|
287
|
-
"""
|
|
270
|
+
"""Context window minus configured output tokens."""
|
|
288
271
|
context_limit = max(get_model_context_limit(model_profile, explicit_limit), MIN_CONTEXT_TOKENS)
|
|
289
272
|
try:
|
|
290
|
-
max_output_tokens = (
|
|
291
|
-
int(getattr(model_profile, "max_tokens", 0) or 0) if model_profile else 0
|
|
292
|
-
)
|
|
273
|
+
max_output_tokens = int(getattr(model_profile, "max_tokens", 0) or 0) if model_profile else 0
|
|
293
274
|
except (TypeError, ValueError):
|
|
294
275
|
max_output_tokens = 0
|
|
295
276
|
return max(MIN_CONTEXT_TOKENS, context_limit - max(0, max_output_tokens))
|
|
296
277
|
|
|
297
278
|
|
|
298
279
|
def resolve_auto_compact_enabled(config: GlobalConfig) -> bool:
|
|
299
|
-
"""Return whether auto-compaction is enabled, honoring an env override."""
|
|
300
280
|
env_override = os.getenv("RIPPERDOC_AUTO_COMPACT")
|
|
301
281
|
if env_override is not None:
|
|
302
282
|
normalized = env_override.strip().lower()
|
|
@@ -309,7 +289,7 @@ def get_context_usage_status(
|
|
|
309
289
|
max_context_tokens: Optional[int],
|
|
310
290
|
auto_compact_enabled: bool,
|
|
311
291
|
) -> ContextUsageStatus:
|
|
312
|
-
"""Compute
|
|
292
|
+
"""Compute usage thresholds."""
|
|
313
293
|
context_limit = max(max_context_tokens or DEFAULT_CONTEXT_TOKENS, MIN_CONTEXT_TOKENS)
|
|
314
294
|
effective_limit = (
|
|
315
295
|
max(MIN_CONTEXT_TOKENS, context_limit - AUTO_COMPACT_BUFFER)
|
|
@@ -318,9 +298,7 @@ def get_context_usage_status(
|
|
|
318
298
|
)
|
|
319
299
|
|
|
320
300
|
tokens_left = max(effective_limit - used_tokens, 0)
|
|
321
|
-
percent_left = (
|
|
322
|
-
0.0 if effective_limit <= 0 else min(100.0, (tokens_left / effective_limit) * 100)
|
|
323
|
-
)
|
|
301
|
+
percent_left = 0.0 if effective_limit <= 0 else min(100.0, (tokens_left / effective_limit) * 100)
|
|
324
302
|
percent_used = 100.0 - percent_left
|
|
325
303
|
|
|
326
304
|
warning_limit = max(0, effective_limit - WARNING_THRESHOLD)
|
|
@@ -371,9 +349,7 @@ def summarize_context_usage(
|
|
|
371
349
|
)
|
|
372
350
|
|
|
373
351
|
|
|
374
|
-
def find_latest_assistant_usage_tokens(
|
|
375
|
-
messages: Sequence[ConversationMessage],
|
|
376
|
-
) -> int:
|
|
352
|
+
def find_latest_assistant_usage_tokens(messages: Sequence[ConversationMessage]) -> int:
|
|
377
353
|
"""Best-effort extraction of usage tokens from the latest assistant message."""
|
|
378
354
|
for message in reversed(messages):
|
|
379
355
|
if getattr(message, "type", "") != "assistant":
|
|
@@ -413,7 +389,6 @@ def estimate_used_tokens(
|
|
|
413
389
|
protocol: str = "anthropic",
|
|
414
390
|
precomputed_total_tokens: Optional[int] = None,
|
|
415
391
|
) -> int:
|
|
416
|
-
"""Return usage tokens if present; otherwise fall back to an estimated total."""
|
|
417
392
|
usage_tokens = find_latest_assistant_usage_tokens(messages)
|
|
418
393
|
if usage_tokens > 0:
|
|
419
394
|
return usage_tokens
|
|
@@ -422,29 +397,6 @@ def estimate_used_tokens(
|
|
|
422
397
|
return estimate_conversation_tokens(messages, protocol=protocol)
|
|
423
398
|
|
|
424
399
|
|
|
425
|
-
def register_cleanup_callback(callback: Callable[[], None]) -> Callable[[], None]:
|
|
426
|
-
"""Register a callback that will run after a compaction pass."""
|
|
427
|
-
_cleanup_callbacks.append(callback)
|
|
428
|
-
|
|
429
|
-
def _unregister() -> None:
|
|
430
|
-
nonlocal callback
|
|
431
|
-
_cleanup_callbacks[:] = [cb for cb in _cleanup_callbacks if cb is not callback]
|
|
432
|
-
|
|
433
|
-
return _unregister
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
def _run_cleanup_callbacks() -> None:
|
|
437
|
-
callbacks = list(_cleanup_callbacks)
|
|
438
|
-
for callback in callbacks:
|
|
439
|
-
try:
|
|
440
|
-
callback()
|
|
441
|
-
except (RuntimeError, TypeError, ValueError, AttributeError) as exc:
|
|
442
|
-
logger.debug(
|
|
443
|
-
"[message_compaction] Cleanup callback failed: %s: %s",
|
|
444
|
-
type(exc).__name__, exc,
|
|
445
|
-
)
|
|
446
|
-
|
|
447
|
-
|
|
448
400
|
def _normalize_tool_use_id(block: Any) -> str:
|
|
449
401
|
if block is None:
|
|
450
402
|
return ""
|
|
@@ -454,7 +406,7 @@ def _normalize_tool_use_id(block: Any) -> str:
|
|
|
454
406
|
|
|
455
407
|
|
|
456
408
|
def _estimate_message_tokens(content_block: Any) -> int:
|
|
457
|
-
"""Estimate tokens for a single content block."""
|
|
409
|
+
"""Estimate tokens for a single content block (text/image only)."""
|
|
458
410
|
if content_block is None:
|
|
459
411
|
return 0
|
|
460
412
|
|
|
@@ -467,9 +419,7 @@ def _estimate_message_tokens(content_block: Any) -> int:
|
|
|
467
419
|
if isinstance(content, list):
|
|
468
420
|
total = 0
|
|
469
421
|
for part in content:
|
|
470
|
-
part_type = getattr(part, "type", None) or (
|
|
471
|
-
part.get("type") if isinstance(part, dict) else None
|
|
472
|
-
)
|
|
422
|
+
part_type = getattr(part, "type", None) or (part.get("type") if isinstance(part, dict) else None)
|
|
473
423
|
if part_type == "text":
|
|
474
424
|
text_val = getattr(part, "text", None) if hasattr(part, "text") else None
|
|
475
425
|
if text_val is None and isinstance(part, dict):
|
|
@@ -493,37 +443,37 @@ def _get_cached_token_count(cache_key: str, content_block: Any) -> int:
|
|
|
493
443
|
return estimated
|
|
494
444
|
|
|
495
445
|
|
|
496
|
-
def
|
|
446
|
+
def micro_compact_messages(
|
|
497
447
|
messages: Sequence[ConversationMessage],
|
|
498
|
-
max_tokens: Optional[int] = None,
|
|
499
448
|
*,
|
|
449
|
+
max_tokens: Optional[int] = None,
|
|
450
|
+
context_limit: Optional[int] = None,
|
|
451
|
+
auto_compact_enabled: Optional[bool] = None,
|
|
500
452
|
protocol: str = "anthropic",
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
_is_compacting = False
|
|
505
|
-
|
|
453
|
+
trigger_type: str = "auto",
|
|
454
|
+
) -> MicroCompactionResult:
|
|
455
|
+
"""Micro-compaction: strip older tool_result payloads to keep context lean."""
|
|
506
456
|
tokens_before = estimate_conversation_tokens(messages, protocol=protocol)
|
|
507
457
|
|
|
508
458
|
if _parse_truthy_env_value(os.getenv("DISABLE_MICROCOMPACT")):
|
|
509
|
-
return
|
|
459
|
+
return MicroCompactionResult(
|
|
510
460
|
messages=list(messages),
|
|
511
461
|
tokens_before=tokens_before,
|
|
512
462
|
tokens_after=tokens_before,
|
|
513
463
|
tokens_saved=0,
|
|
514
|
-
|
|
464
|
+
tools_compacted=0,
|
|
465
|
+
trigger_type=trigger_type,
|
|
515
466
|
was_compacted=False,
|
|
516
467
|
)
|
|
517
468
|
|
|
518
|
-
#
|
|
469
|
+
# Legacy flag kept for parity with upstream behavior.
|
|
519
470
|
_parse_truthy_env_value(os.getenv("USE_API_CONTEXT_MANAGEMENT"))
|
|
520
471
|
|
|
521
472
|
is_max_tokens_specified = max_tokens is not None
|
|
522
473
|
try:
|
|
523
|
-
|
|
474
|
+
effective_max_tokens = int(max_tokens) if max_tokens is not None else MAX_TOKENS_HARD
|
|
524
475
|
except (TypeError, ValueError):
|
|
525
|
-
|
|
526
|
-
effective_max_tokens = max(base_max_tokens, MIN_CONTEXT_TOKENS)
|
|
476
|
+
effective_max_tokens = MAX_TOKENS_HARD
|
|
527
477
|
|
|
528
478
|
tool_use_ids_to_compact: List[str] = []
|
|
529
479
|
token_counts_by_tool_use_id: Dict[str, int] = {}
|
|
@@ -533,6 +483,7 @@ def compact_messages(
|
|
|
533
483
|
content = getattr(getattr(message, "message", None), "content", None)
|
|
534
484
|
if msg_type not in {"user", "assistant"} or not isinstance(content, list):
|
|
535
485
|
continue
|
|
486
|
+
|
|
536
487
|
for content_block in content:
|
|
537
488
|
block_type = getattr(content_block, "type", None) or (
|
|
538
489
|
content_block.get("type") if isinstance(content_block, dict) else None
|
|
@@ -541,6 +492,7 @@ def compact_messages(
|
|
|
541
492
|
tool_name = getattr(content_block, "name", None)
|
|
542
493
|
if tool_name is None and isinstance(content_block, dict):
|
|
543
494
|
tool_name = content_block.get("name")
|
|
495
|
+
|
|
544
496
|
if block_type == "tool_use" and tool_name in TOOL_COMMANDS:
|
|
545
497
|
if tool_use_id and tool_use_id not in _processed_tool_use_ids:
|
|
546
498
|
tool_use_ids_to_compact.append(tool_use_id)
|
|
@@ -549,9 +501,7 @@ def compact_messages(
|
|
|
549
501
|
token_counts_by_tool_use_id[tool_use_id] = token_count
|
|
550
502
|
|
|
551
503
|
latest_tool_use_ids = (
|
|
552
|
-
tool_use_ids_to_compact[-MAX_TOOL_USES_TO_PRESERVE:]
|
|
553
|
-
if MAX_TOOL_USES_TO_PRESERVE > 0
|
|
554
|
-
else []
|
|
504
|
+
tool_use_ids_to_compact[-MAX_TOOL_USES_TO_PRESERVE:] if MAX_TOOL_USES_TO_PRESERVE > 0 else []
|
|
555
505
|
)
|
|
556
506
|
total_token_count = sum(token_counts_by_tool_use_id.values())
|
|
557
507
|
|
|
@@ -566,14 +516,16 @@ def compact_messages(
|
|
|
566
516
|
total_tokens_removed += token_counts_by_tool_use_id.get(tool_use_id, 0)
|
|
567
517
|
|
|
568
518
|
if not is_max_tokens_specified:
|
|
569
|
-
|
|
519
|
+
resolved_auto_compact = (
|
|
520
|
+
auto_compact_enabled
|
|
521
|
+
if auto_compact_enabled is not None
|
|
522
|
+
else resolve_auto_compact_enabled(get_global_config())
|
|
523
|
+
)
|
|
570
524
|
usage_tokens = estimate_used_tokens(
|
|
571
525
|
messages, protocol=protocol, precomputed_total_tokens=tokens_before
|
|
572
526
|
)
|
|
573
527
|
status = get_context_usage_status(
|
|
574
|
-
usage_tokens,
|
|
575
|
-
max_context_tokens=max_tokens,
|
|
576
|
-
auto_compact_enabled=auto_compact_enabled,
|
|
528
|
+
usage_tokens, max_context_tokens=context_limit, auto_compact_enabled=resolved_auto_compact
|
|
577
529
|
)
|
|
578
530
|
if not status.is_above_warning_threshold or total_tokens_removed < MAX_TOKENS_SOFT:
|
|
579
531
|
ids_to_remove.clear()
|
|
@@ -587,12 +539,12 @@ def compact_messages(
|
|
|
587
539
|
for message in messages:
|
|
588
540
|
msg_type = getattr(message, "type", "")
|
|
589
541
|
content = getattr(getattr(message, "message", None), "content", None)
|
|
542
|
+
|
|
590
543
|
if msg_type not in {"user", "assistant"} or not isinstance(content, list):
|
|
591
544
|
compacted_messages.append(message)
|
|
592
545
|
continue
|
|
593
546
|
|
|
594
547
|
if msg_type == "assistant" and isinstance(message, AssistantMessage):
|
|
595
|
-
# Copy content list to avoid mutating the original message.
|
|
596
548
|
compacted_messages.append(
|
|
597
549
|
AssistantMessage(
|
|
598
550
|
message=message.message.model_copy(update={"content": list(content)}),
|
|
@@ -606,23 +558,21 @@ def compact_messages(
|
|
|
606
558
|
|
|
607
559
|
filtered_content: List[MessageContent] = []
|
|
608
560
|
modified = False
|
|
561
|
+
|
|
609
562
|
for content_item in content:
|
|
610
563
|
block_type = getattr(content_item, "type", None) or (
|
|
611
564
|
content_item.get("type") if isinstance(content_item, dict) else None
|
|
612
565
|
)
|
|
613
566
|
tool_use_id = _normalize_tool_use_id(content_item)
|
|
567
|
+
|
|
614
568
|
if block_type == "tool_result" and _should_remove(tool_use_id):
|
|
615
569
|
modified = True
|
|
616
570
|
if hasattr(content_item, "model_copy"):
|
|
617
571
|
new_block = content_item.model_copy()
|
|
618
|
-
new_block.text =
|
|
572
|
+
new_block.text = MICRO_PLACEHOLDER
|
|
619
573
|
else:
|
|
620
|
-
block_dict = (
|
|
621
|
-
|
|
622
|
-
if isinstance(content_item, dict)
|
|
623
|
-
else {"type": "tool_result"}
|
|
624
|
-
)
|
|
625
|
-
block_dict["text"] = COMPACT_PLACEHOLDER
|
|
574
|
+
block_dict = dict(content_item) if isinstance(content_item, dict) else {"type": "tool_result"}
|
|
575
|
+
block_dict["text"] = MICRO_PLACEHOLDER
|
|
626
576
|
block_dict["tool_use_id"] = tool_use_id
|
|
627
577
|
new_block = MessageContent(**block_dict)
|
|
628
578
|
filtered_content.append(new_block)
|
|
@@ -654,10 +604,8 @@ def compact_messages(
|
|
|
654
604
|
tokens_saved = max(0, tokens_before - tokens_after)
|
|
655
605
|
|
|
656
606
|
if ids_to_remove:
|
|
657
|
-
_is_compacting = True
|
|
658
|
-
_run_cleanup_callbacks()
|
|
659
607
|
logger.debug(
|
|
660
|
-
"[message_compaction]
|
|
608
|
+
"[message_compaction] Micro-compacted conversation",
|
|
661
609
|
extra={
|
|
662
610
|
"tokens_before": tokens_before,
|
|
663
611
|
"tokens_after": tokens_after,
|
|
@@ -666,11 +614,18 @@ def compact_messages(
|
|
|
666
614
|
},
|
|
667
615
|
)
|
|
668
616
|
|
|
669
|
-
return
|
|
617
|
+
return MicroCompactionResult(
|
|
670
618
|
messages=compacted_messages,
|
|
671
619
|
tokens_before=tokens_before,
|
|
672
620
|
tokens_after=tokens_after,
|
|
673
621
|
tokens_saved=tokens_saved,
|
|
674
|
-
|
|
622
|
+
tools_compacted=len(ids_to_remove),
|
|
623
|
+
trigger_type="manual" if is_max_tokens_specified else trigger_type,
|
|
675
624
|
was_compacted=bool(ids_to_remove),
|
|
676
625
|
)
|
|
626
|
+
|
|
627
|
+
|
|
628
|
+
def reset_micro_compaction_state() -> None:
|
|
629
|
+
"""Clear caches and processed IDs (useful for tests)."""
|
|
630
|
+
_processed_tool_use_ids.clear()
|
|
631
|
+
_token_cache.clear()
|