ripperdoc 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ripperdoc/__init__.py +3 -0
- ripperdoc/__main__.py +20 -0
- ripperdoc/cli/__init__.py +1 -0
- ripperdoc/cli/cli.py +405 -0
- ripperdoc/cli/commands/__init__.py +82 -0
- ripperdoc/cli/commands/agents_cmd.py +263 -0
- ripperdoc/cli/commands/base.py +19 -0
- ripperdoc/cli/commands/clear_cmd.py +18 -0
- ripperdoc/cli/commands/compact_cmd.py +23 -0
- ripperdoc/cli/commands/config_cmd.py +31 -0
- ripperdoc/cli/commands/context_cmd.py +144 -0
- ripperdoc/cli/commands/cost_cmd.py +82 -0
- ripperdoc/cli/commands/doctor_cmd.py +221 -0
- ripperdoc/cli/commands/exit_cmd.py +19 -0
- ripperdoc/cli/commands/help_cmd.py +20 -0
- ripperdoc/cli/commands/mcp_cmd.py +70 -0
- ripperdoc/cli/commands/memory_cmd.py +202 -0
- ripperdoc/cli/commands/models_cmd.py +413 -0
- ripperdoc/cli/commands/permissions_cmd.py +302 -0
- ripperdoc/cli/commands/resume_cmd.py +98 -0
- ripperdoc/cli/commands/status_cmd.py +167 -0
- ripperdoc/cli/commands/tasks_cmd.py +278 -0
- ripperdoc/cli/commands/todos_cmd.py +69 -0
- ripperdoc/cli/commands/tools_cmd.py +19 -0
- ripperdoc/cli/ui/__init__.py +1 -0
- ripperdoc/cli/ui/context_display.py +298 -0
- ripperdoc/cli/ui/helpers.py +22 -0
- ripperdoc/cli/ui/rich_ui.py +1557 -0
- ripperdoc/cli/ui/spinner.py +49 -0
- ripperdoc/cli/ui/thinking_spinner.py +128 -0
- ripperdoc/cli/ui/tool_renderers.py +298 -0
- ripperdoc/core/__init__.py +1 -0
- ripperdoc/core/agents.py +486 -0
- ripperdoc/core/commands.py +33 -0
- ripperdoc/core/config.py +559 -0
- ripperdoc/core/default_tools.py +88 -0
- ripperdoc/core/permissions.py +252 -0
- ripperdoc/core/providers/__init__.py +47 -0
- ripperdoc/core/providers/anthropic.py +250 -0
- ripperdoc/core/providers/base.py +265 -0
- ripperdoc/core/providers/gemini.py +615 -0
- ripperdoc/core/providers/openai.py +487 -0
- ripperdoc/core/query.py +1058 -0
- ripperdoc/core/query_utils.py +622 -0
- ripperdoc/core/skills.py +295 -0
- ripperdoc/core/system_prompt.py +431 -0
- ripperdoc/core/tool.py +240 -0
- ripperdoc/sdk/__init__.py +9 -0
- ripperdoc/sdk/client.py +333 -0
- ripperdoc/tools/__init__.py +1 -0
- ripperdoc/tools/ask_user_question_tool.py +431 -0
- ripperdoc/tools/background_shell.py +389 -0
- ripperdoc/tools/bash_output_tool.py +98 -0
- ripperdoc/tools/bash_tool.py +1016 -0
- ripperdoc/tools/dynamic_mcp_tool.py +428 -0
- ripperdoc/tools/enter_plan_mode_tool.py +226 -0
- ripperdoc/tools/exit_plan_mode_tool.py +153 -0
- ripperdoc/tools/file_edit_tool.py +346 -0
- ripperdoc/tools/file_read_tool.py +203 -0
- ripperdoc/tools/file_write_tool.py +205 -0
- ripperdoc/tools/glob_tool.py +179 -0
- ripperdoc/tools/grep_tool.py +370 -0
- ripperdoc/tools/kill_bash_tool.py +136 -0
- ripperdoc/tools/ls_tool.py +471 -0
- ripperdoc/tools/mcp_tools.py +591 -0
- ripperdoc/tools/multi_edit_tool.py +456 -0
- ripperdoc/tools/notebook_edit_tool.py +386 -0
- ripperdoc/tools/skill_tool.py +205 -0
- ripperdoc/tools/task_tool.py +379 -0
- ripperdoc/tools/todo_tool.py +494 -0
- ripperdoc/tools/tool_search_tool.py +380 -0
- ripperdoc/utils/__init__.py +1 -0
- ripperdoc/utils/bash_constants.py +51 -0
- ripperdoc/utils/bash_output_utils.py +43 -0
- ripperdoc/utils/coerce.py +34 -0
- ripperdoc/utils/context_length_errors.py +252 -0
- ripperdoc/utils/exit_code_handlers.py +241 -0
- ripperdoc/utils/file_watch.py +135 -0
- ripperdoc/utils/git_utils.py +274 -0
- ripperdoc/utils/json_utils.py +27 -0
- ripperdoc/utils/log.py +176 -0
- ripperdoc/utils/mcp.py +560 -0
- ripperdoc/utils/memory.py +253 -0
- ripperdoc/utils/message_compaction.py +676 -0
- ripperdoc/utils/messages.py +519 -0
- ripperdoc/utils/output_utils.py +258 -0
- ripperdoc/utils/path_ignore.py +677 -0
- ripperdoc/utils/path_utils.py +46 -0
- ripperdoc/utils/permissions/__init__.py +27 -0
- ripperdoc/utils/permissions/path_validation_utils.py +174 -0
- ripperdoc/utils/permissions/shell_command_validation.py +552 -0
- ripperdoc/utils/permissions/tool_permission_utils.py +279 -0
- ripperdoc/utils/prompt.py +17 -0
- ripperdoc/utils/safe_get_cwd.py +31 -0
- ripperdoc/utils/sandbox_utils.py +38 -0
- ripperdoc/utils/session_history.py +260 -0
- ripperdoc/utils/session_usage.py +117 -0
- ripperdoc/utils/shell_token_utils.py +95 -0
- ripperdoc/utils/shell_utils.py +159 -0
- ripperdoc/utils/todo.py +203 -0
- ripperdoc/utils/token_estimation.py +34 -0
- ripperdoc-0.2.6.dist-info/METADATA +193 -0
- ripperdoc-0.2.6.dist-info/RECORD +107 -0
- ripperdoc-0.2.6.dist-info/WHEEL +5 -0
- ripperdoc-0.2.6.dist-info/entry_points.txt +3 -0
- ripperdoc-0.2.6.dist-info/licenses/LICENSE +53 -0
- ripperdoc-0.2.6.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,676 @@
|
|
|
1
|
+
"""Utilities for compacting conversation history when context grows too large."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Union
|
|
9
|
+
|
|
10
|
+
from ripperdoc.core.config import GlobalConfig, ModelProfile, get_global_config
|
|
11
|
+
from ripperdoc.utils.log import get_logger
|
|
12
|
+
from ripperdoc.utils.token_estimation import estimate_tokens
|
|
13
|
+
from ripperdoc.utils.messages import (
|
|
14
|
+
AssistantMessage,
|
|
15
|
+
MessageContent,
|
|
16
|
+
ProgressMessage,
|
|
17
|
+
UserMessage,
|
|
18
|
+
normalize_messages_for_api,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
logger = get_logger()
|
|
22
|
+
|
|
23
|
+
ConversationMessage = Union[UserMessage, AssistantMessage, ProgressMessage]
|
|
24
|
+
|
|
25
|
+
# Compaction thresholds.
|
|
26
|
+
MAX_TOKENS_SOFT = 20_000
|
|
27
|
+
MAX_TOKENS_HARD = 40_000
|
|
28
|
+
MAX_TOOL_USES_TO_PRESERVE = 3
|
|
29
|
+
IMAGE_TOKEN_COST = 2_000
|
|
30
|
+
AUTO_COMPACT_BUFFER = 13_000
|
|
31
|
+
WARNING_THRESHOLD = 20_000
|
|
32
|
+
ERROR_THRESHOLD = 20_000
|
|
33
|
+
COMPACT_PLACEHOLDER = "[Old tool result content cleared]"
|
|
34
|
+
TOOL_COMMANDS: Set[str] = {"Read", "Bash", "Grep", "Glob", "LS", "WebSearch", "WebFetch"}
|
|
35
|
+
|
|
36
|
+
# Defaults roughly match modern 200k context windows while still working for smaller models.
|
|
37
|
+
DEFAULT_CONTEXT_TOKENS = 200_000
|
|
38
|
+
MIN_CONTEXT_TOKENS = 20_000
|
|
39
|
+
|
|
40
|
+
# Track tool results we've already compacted so we don't reprocess them.
|
|
41
|
+
_processed_tool_use_ids: Set[str] = set()
|
|
42
|
+
_token_cache: Dict[str, int] = {}
|
|
43
|
+
_cleanup_callbacks: List[Callable[[], None]] = []
|
|
44
|
+
_is_compacting: bool = False
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class ContextUsageStatus:
|
|
49
|
+
"""Snapshot of the current context usage."""
|
|
50
|
+
|
|
51
|
+
used_tokens: int
|
|
52
|
+
max_context_tokens: int
|
|
53
|
+
tokens_left: int
|
|
54
|
+
percent_left: float
|
|
55
|
+
percent_used: float
|
|
56
|
+
is_above_warning_threshold: bool
|
|
57
|
+
is_above_error_threshold: bool
|
|
58
|
+
is_above_auto_compact_threshold: bool
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def total_tokens(self) -> int:
|
|
62
|
+
"""Alias for backward compatibility."""
|
|
63
|
+
return self.used_tokens
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def is_above_warning(self) -> bool:
|
|
67
|
+
return self.is_above_warning_threshold
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def is_above_error(self) -> bool:
|
|
71
|
+
return self.is_above_error_threshold
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def should_auto_compact(self) -> bool:
|
|
75
|
+
return self.is_above_auto_compact_threshold
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass
|
|
79
|
+
class CompactionResult:
|
|
80
|
+
"""Result of a compaction run."""
|
|
81
|
+
|
|
82
|
+
messages: List[ConversationMessage]
|
|
83
|
+
tokens_before: int
|
|
84
|
+
tokens_after: int
|
|
85
|
+
tokens_saved: int
|
|
86
|
+
cleared_tool_ids: Set[str]
|
|
87
|
+
was_compacted: bool
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@dataclass
|
|
91
|
+
class ContextBreakdown:
|
|
92
|
+
"""Detailed breakdown of context usage for display."""
|
|
93
|
+
|
|
94
|
+
max_context_tokens: int
|
|
95
|
+
system_prompt_tokens: int
|
|
96
|
+
mcp_tokens: int
|
|
97
|
+
tool_schema_tokens: int
|
|
98
|
+
memory_tokens: int
|
|
99
|
+
message_tokens: int
|
|
100
|
+
reserved_tokens: int
|
|
101
|
+
message_count: int
|
|
102
|
+
|
|
103
|
+
@property
|
|
104
|
+
def reported_tokens(self) -> int:
|
|
105
|
+
return (
|
|
106
|
+
self.system_prompt_tokens
|
|
107
|
+
+ self.mcp_tokens
|
|
108
|
+
+ self.tool_schema_tokens
|
|
109
|
+
+ self.memory_tokens
|
|
110
|
+
+ self.message_tokens
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
@property
|
|
114
|
+
def effective_tokens(self) -> int:
|
|
115
|
+
"""Tokens that count against the limit including any reserved buffer."""
|
|
116
|
+
return min(self.max_context_tokens, self.reported_tokens + self.reserved_tokens)
|
|
117
|
+
|
|
118
|
+
@property
|
|
119
|
+
def free_tokens(self) -> int:
|
|
120
|
+
return max(self.max_context_tokens - self.effective_tokens, 0)
|
|
121
|
+
|
|
122
|
+
@property
|
|
123
|
+
def percent_used(self) -> float:
|
|
124
|
+
if self.max_context_tokens <= 0:
|
|
125
|
+
return 0.0
|
|
126
|
+
return min(100.0, (self.effective_tokens / self.max_context_tokens) * 100)
|
|
127
|
+
|
|
128
|
+
def percent_of_limit(self, tokens: int) -> float:
|
|
129
|
+
if self.max_context_tokens <= 0:
|
|
130
|
+
return 0.0
|
|
131
|
+
return min(100.0, (tokens / self.max_context_tokens) * 100)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _parse_truthy_env_value(value: Optional[str]) -> bool:
|
|
135
|
+
"""Interpret common truthy environment variable values."""
|
|
136
|
+
if value is None:
|
|
137
|
+
return False
|
|
138
|
+
normalized = value.strip().lower()
|
|
139
|
+
return normalized in {"1", "true", "yes", "on"}
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def estimate_tokens_from_text(text: str) -> int:
|
|
143
|
+
"""Estimate token count using shared token estimation helper."""
|
|
144
|
+
return estimate_tokens(text)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _stringify_content(content: Union[str, List[MessageContent], None]) -> str:
|
|
148
|
+
"""Convert normalized content into plain text for estimation."""
|
|
149
|
+
if content is None:
|
|
150
|
+
return ""
|
|
151
|
+
if isinstance(content, str):
|
|
152
|
+
return content
|
|
153
|
+
parts: List[str] = []
|
|
154
|
+
for part in content:
|
|
155
|
+
if isinstance(part, dict):
|
|
156
|
+
block_type = part.get("type")
|
|
157
|
+
text_val = part.get("text")
|
|
158
|
+
if text_val:
|
|
159
|
+
parts.append(str(text_val))
|
|
160
|
+
|
|
161
|
+
# Capture nested text for tool_result content blocks
|
|
162
|
+
nested_content = part.get("content")
|
|
163
|
+
if isinstance(nested_content, list):
|
|
164
|
+
nested_text = _stringify_content(nested_content)
|
|
165
|
+
if nested_text:
|
|
166
|
+
parts.append(nested_text)
|
|
167
|
+
|
|
168
|
+
# Include tool payloads that otherwise don't have "text"
|
|
169
|
+
if block_type == "tool_use" and part.get("input") is not None:
|
|
170
|
+
try:
|
|
171
|
+
parts.append(json.dumps(part.get("input"), ensure_ascii=False))
|
|
172
|
+
except (TypeError, ValueError) as exc:
|
|
173
|
+
logger.warning(
|
|
174
|
+
"[message_compaction] Failed to serialize tool_use input for token estimate: %s: %s",
|
|
175
|
+
type(exc).__name__, exc,
|
|
176
|
+
)
|
|
177
|
+
parts.append(str(part.get("input")))
|
|
178
|
+
|
|
179
|
+
# OpenAI-style arguments blocks
|
|
180
|
+
if part.get("arguments"):
|
|
181
|
+
parts.append(str(part.get("arguments")))
|
|
182
|
+
elif hasattr(part, "text"):
|
|
183
|
+
text_val = getattr(part, "text", "")
|
|
184
|
+
if text_val:
|
|
185
|
+
parts.append(str(text_val))
|
|
186
|
+
else:
|
|
187
|
+
parts.append(str(part))
|
|
188
|
+
# Filter out empty strings to avoid over-counting separators
|
|
189
|
+
return "\n".join([p for p in parts if p])
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def estimate_conversation_tokens(
|
|
193
|
+
messages: Sequence[ConversationMessage], *, protocol: str = "anthropic"
|
|
194
|
+
) -> int:
|
|
195
|
+
"""Estimate tokens for a conversation after normalization."""
|
|
196
|
+
normalized = normalize_messages_for_api(list(messages), protocol=protocol)
|
|
197
|
+
total = 0
|
|
198
|
+
for message in normalized:
|
|
199
|
+
total += estimate_tokens_from_text(_stringify_content(message.get("content")))
|
|
200
|
+
|
|
201
|
+
# Account for OpenAI-style tool_calls payloads (arguments + name)
|
|
202
|
+
tool_calls = message.get("tool_calls")
|
|
203
|
+
if isinstance(tool_calls, list):
|
|
204
|
+
for call in tool_calls:
|
|
205
|
+
if not isinstance(call, dict):
|
|
206
|
+
total += estimate_tokens_from_text(str(call))
|
|
207
|
+
continue
|
|
208
|
+
func = call.get("function")
|
|
209
|
+
if isinstance(func, dict):
|
|
210
|
+
arguments = func.get("arguments")
|
|
211
|
+
if arguments:
|
|
212
|
+
total += estimate_tokens_from_text(str(arguments))
|
|
213
|
+
name = func.get("name")
|
|
214
|
+
if name:
|
|
215
|
+
total += estimate_tokens_from_text(str(name))
|
|
216
|
+
else:
|
|
217
|
+
total += estimate_tokens_from_text(str(func))
|
|
218
|
+
return total
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _estimate_tool_schema_tokens(tools: Sequence[Any]) -> int:
|
|
222
|
+
"""Estimate tokens consumed by tool schemas."""
|
|
223
|
+
total = 0
|
|
224
|
+
for tool in tools:
|
|
225
|
+
try:
|
|
226
|
+
schema = tool.input_schema.model_json_schema()
|
|
227
|
+
schema_text = json.dumps(schema, sort_keys=True)
|
|
228
|
+
total += estimate_tokens_from_text(schema_text)
|
|
229
|
+
except (AttributeError, TypeError, KeyError, ValueError) as exc:
|
|
230
|
+
logger.warning(
|
|
231
|
+
"Failed to estimate tokens for tool schema: %s: %s",
|
|
232
|
+
type(exc).__name__, exc,
|
|
233
|
+
extra={"tool": getattr(tool, "name", None)},
|
|
234
|
+
)
|
|
235
|
+
continue
|
|
236
|
+
return total
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def get_model_context_limit(
|
|
240
|
+
model_profile: Optional[ModelProfile], explicit_limit: Optional[int] = None
|
|
241
|
+
) -> int:
|
|
242
|
+
"""Best-effort guess of the model context window."""
|
|
243
|
+
env_override = os.getenv("RIPPERDOC_CONTEXT_TOKENS")
|
|
244
|
+
if env_override:
|
|
245
|
+
try:
|
|
246
|
+
parsed = int(env_override)
|
|
247
|
+
if parsed > 0:
|
|
248
|
+
return parsed
|
|
249
|
+
except ValueError:
|
|
250
|
+
pass
|
|
251
|
+
|
|
252
|
+
if explicit_limit and explicit_limit > 0:
|
|
253
|
+
return explicit_limit
|
|
254
|
+
|
|
255
|
+
if model_profile and getattr(model_profile, "context_window", None):
|
|
256
|
+
try:
|
|
257
|
+
configured = int(model_profile.context_window) # type: ignore[arg-type]
|
|
258
|
+
if configured > 0:
|
|
259
|
+
return configured
|
|
260
|
+
except (TypeError, ValueError):
|
|
261
|
+
pass
|
|
262
|
+
|
|
263
|
+
if model_profile and model_profile.model:
|
|
264
|
+
name = model_profile.model.lower()
|
|
265
|
+
if "claude" in name:
|
|
266
|
+
# Claude 4.5 defaults and beta 1M thinking window.
|
|
267
|
+
if "4.5" in name or "sonnet" in name or "haiku" in name:
|
|
268
|
+
return 1_000_000 if "1m" in name or "beta" in name else 200_000
|
|
269
|
+
if "opus" in name or "4.1" in name:
|
|
270
|
+
return 200_000
|
|
271
|
+
return 200_000
|
|
272
|
+
if "gpt-4o" in name or "gpt-4.1" in name or "gpt-4-turbo" in name:
|
|
273
|
+
return 128_000
|
|
274
|
+
if "gpt-4" in name:
|
|
275
|
+
return 32_000
|
|
276
|
+
if "gpt-3.5" in name:
|
|
277
|
+
return 16_000
|
|
278
|
+
if "deepseek" in name:
|
|
279
|
+
return 128_000
|
|
280
|
+
|
|
281
|
+
return DEFAULT_CONTEXT_TOKENS
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def get_remaining_context_tokens(
|
|
285
|
+
model_profile: Optional[ModelProfile], explicit_limit: Optional[int] = None
|
|
286
|
+
) -> int:
|
|
287
|
+
"""Return the context window minus the model's configured output tokens."""
|
|
288
|
+
context_limit = max(get_model_context_limit(model_profile, explicit_limit), MIN_CONTEXT_TOKENS)
|
|
289
|
+
try:
|
|
290
|
+
max_output_tokens = (
|
|
291
|
+
int(getattr(model_profile, "max_tokens", 0) or 0) if model_profile else 0
|
|
292
|
+
)
|
|
293
|
+
except (TypeError, ValueError):
|
|
294
|
+
max_output_tokens = 0
|
|
295
|
+
return max(MIN_CONTEXT_TOKENS, context_limit - max(0, max_output_tokens))
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def resolve_auto_compact_enabled(config: GlobalConfig) -> bool:
|
|
299
|
+
"""Return whether auto-compaction is enabled, honoring an env override."""
|
|
300
|
+
env_override = os.getenv("RIPPERDOC_AUTO_COMPACT")
|
|
301
|
+
if env_override is not None:
|
|
302
|
+
normalized = env_override.strip().lower()
|
|
303
|
+
return normalized not in {"0", "false", "no", "off"}
|
|
304
|
+
return bool(config.auto_compact_enabled)
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def get_context_usage_status(
|
|
308
|
+
used_tokens: int,
|
|
309
|
+
max_context_tokens: Optional[int],
|
|
310
|
+
auto_compact_enabled: bool,
|
|
311
|
+
) -> ContextUsageStatus:
|
|
312
|
+
"""Compute context usage thresholds using the compaction heuristics."""
|
|
313
|
+
context_limit = max(max_context_tokens or DEFAULT_CONTEXT_TOKENS, MIN_CONTEXT_TOKENS)
|
|
314
|
+
effective_limit = (
|
|
315
|
+
max(MIN_CONTEXT_TOKENS, context_limit - AUTO_COMPACT_BUFFER)
|
|
316
|
+
if auto_compact_enabled
|
|
317
|
+
else context_limit
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
tokens_left = max(effective_limit - used_tokens, 0)
|
|
321
|
+
percent_left = (
|
|
322
|
+
0.0 if effective_limit <= 0 else min(100.0, (tokens_left / effective_limit) * 100)
|
|
323
|
+
)
|
|
324
|
+
percent_used = 100.0 - percent_left
|
|
325
|
+
|
|
326
|
+
warning_limit = max(0, effective_limit - WARNING_THRESHOLD)
|
|
327
|
+
error_limit = max(0, effective_limit - ERROR_THRESHOLD)
|
|
328
|
+
auto_compact_limit = max(MIN_CONTEXT_TOKENS, context_limit - AUTO_COMPACT_BUFFER)
|
|
329
|
+
|
|
330
|
+
return ContextUsageStatus(
|
|
331
|
+
used_tokens=used_tokens,
|
|
332
|
+
max_context_tokens=context_limit,
|
|
333
|
+
tokens_left=tokens_left,
|
|
334
|
+
percent_left=percent_left,
|
|
335
|
+
percent_used=percent_used,
|
|
336
|
+
is_above_warning_threshold=used_tokens >= warning_limit,
|
|
337
|
+
is_above_error_threshold=used_tokens >= error_limit,
|
|
338
|
+
is_above_auto_compact_threshold=auto_compact_enabled and used_tokens >= auto_compact_limit,
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def summarize_context_usage(
|
|
343
|
+
messages: Sequence[ConversationMessage],
|
|
344
|
+
tools: Sequence[Any],
|
|
345
|
+
system_prompt: str,
|
|
346
|
+
max_context_tokens: int,
|
|
347
|
+
auto_compact_enabled: bool,
|
|
348
|
+
memory_tokens: int = 0,
|
|
349
|
+
mcp_tokens: int = 0,
|
|
350
|
+
*,
|
|
351
|
+
protocol: str = "anthropic",
|
|
352
|
+
) -> ContextBreakdown:
|
|
353
|
+
"""Return a detailed breakdown of context usage."""
|
|
354
|
+
max_context_tokens = max(max_context_tokens, MIN_CONTEXT_TOKENS)
|
|
355
|
+
raw_system_tokens = estimate_tokens_from_text(system_prompt)
|
|
356
|
+
base_prompt_tokens = max(0, raw_system_tokens - max(0, mcp_tokens))
|
|
357
|
+
tool_schema_tokens = _estimate_tool_schema_tokens(tools)
|
|
358
|
+
message_tokens = estimate_conversation_tokens(messages, protocol=protocol)
|
|
359
|
+
message_count = len([m for m in messages if getattr(m, "type", "") != "progress"])
|
|
360
|
+
reserved_tokens = AUTO_COMPACT_BUFFER if auto_compact_enabled else 0
|
|
361
|
+
|
|
362
|
+
return ContextBreakdown(
|
|
363
|
+
max_context_tokens=max_context_tokens,
|
|
364
|
+
system_prompt_tokens=base_prompt_tokens,
|
|
365
|
+
mcp_tokens=max(0, mcp_tokens),
|
|
366
|
+
tool_schema_tokens=tool_schema_tokens,
|
|
367
|
+
memory_tokens=max(0, memory_tokens),
|
|
368
|
+
message_tokens=message_tokens,
|
|
369
|
+
reserved_tokens=reserved_tokens,
|
|
370
|
+
message_count=message_count,
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def find_latest_assistant_usage_tokens(
|
|
375
|
+
messages: Sequence[ConversationMessage],
|
|
376
|
+
) -> int:
|
|
377
|
+
"""Best-effort extraction of usage tokens from the latest assistant message."""
|
|
378
|
+
for message in reversed(messages):
|
|
379
|
+
if getattr(message, "type", "") != "assistant":
|
|
380
|
+
continue
|
|
381
|
+
payload = getattr(message, "message", None) or getattr(message, "content", None)
|
|
382
|
+
usage = getattr(payload, "usage", None)
|
|
383
|
+
if usage is None and isinstance(payload, dict):
|
|
384
|
+
usage = payload.get("usage")
|
|
385
|
+
if not usage:
|
|
386
|
+
continue
|
|
387
|
+
try:
|
|
388
|
+
tokens = 0
|
|
389
|
+
for field in (
|
|
390
|
+
"input_tokens",
|
|
391
|
+
"cache_creation_input_tokens",
|
|
392
|
+
"cache_read_input_tokens",
|
|
393
|
+
"output_tokens",
|
|
394
|
+
"prompt_tokens",
|
|
395
|
+
"completion_tokens",
|
|
396
|
+
):
|
|
397
|
+
value = getattr(usage, field, None)
|
|
398
|
+
if value is None and isinstance(usage, dict):
|
|
399
|
+
value = usage.get(field)
|
|
400
|
+
if value is not None:
|
|
401
|
+
tokens += int(value)
|
|
402
|
+
if tokens > 0:
|
|
403
|
+
return tokens
|
|
404
|
+
except (TypeError, ValueError, AttributeError):
|
|
405
|
+
logger.debug("[message_compaction] Failed to parse usage tokens")
|
|
406
|
+
continue
|
|
407
|
+
return 0
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def estimate_used_tokens(
|
|
411
|
+
messages: Sequence[ConversationMessage],
|
|
412
|
+
*,
|
|
413
|
+
protocol: str = "anthropic",
|
|
414
|
+
precomputed_total_tokens: Optional[int] = None,
|
|
415
|
+
) -> int:
|
|
416
|
+
"""Return usage tokens if present; otherwise fall back to an estimated total."""
|
|
417
|
+
usage_tokens = find_latest_assistant_usage_tokens(messages)
|
|
418
|
+
if usage_tokens > 0:
|
|
419
|
+
return usage_tokens
|
|
420
|
+
if precomputed_total_tokens is not None:
|
|
421
|
+
return precomputed_total_tokens
|
|
422
|
+
return estimate_conversation_tokens(messages, protocol=protocol)
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
def register_cleanup_callback(callback: Callable[[], None]) -> Callable[[], None]:
|
|
426
|
+
"""Register a callback that will run after a compaction pass."""
|
|
427
|
+
_cleanup_callbacks.append(callback)
|
|
428
|
+
|
|
429
|
+
def _unregister() -> None:
|
|
430
|
+
nonlocal callback
|
|
431
|
+
_cleanup_callbacks[:] = [cb for cb in _cleanup_callbacks if cb is not callback]
|
|
432
|
+
|
|
433
|
+
return _unregister
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
def _run_cleanup_callbacks() -> None:
|
|
437
|
+
callbacks = list(_cleanup_callbacks)
|
|
438
|
+
for callback in callbacks:
|
|
439
|
+
try:
|
|
440
|
+
callback()
|
|
441
|
+
except (RuntimeError, TypeError, ValueError, AttributeError) as exc:
|
|
442
|
+
logger.debug(
|
|
443
|
+
"[message_compaction] Cleanup callback failed: %s: %s",
|
|
444
|
+
type(exc).__name__, exc,
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def _normalize_tool_use_id(block: Any) -> str:
|
|
449
|
+
if block is None:
|
|
450
|
+
return ""
|
|
451
|
+
if isinstance(block, dict):
|
|
452
|
+
return str(block.get("tool_use_id") or block.get("id") or "")
|
|
453
|
+
return str(getattr(block, "tool_use_id", None) or getattr(block, "id", None) or "")
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
def _estimate_message_tokens(content_block: Any) -> int:
|
|
457
|
+
"""Estimate tokens for a single content block."""
|
|
458
|
+
if content_block is None:
|
|
459
|
+
return 0
|
|
460
|
+
|
|
461
|
+
content = getattr(content_block, "content", None)
|
|
462
|
+
if isinstance(content_block, dict) and content is None:
|
|
463
|
+
content = content_block.get("content")
|
|
464
|
+
|
|
465
|
+
if isinstance(content, str):
|
|
466
|
+
return estimate_tokens_from_text(content)
|
|
467
|
+
if isinstance(content, list):
|
|
468
|
+
total = 0
|
|
469
|
+
for part in content:
|
|
470
|
+
part_type = getattr(part, "type", None) or (
|
|
471
|
+
part.get("type") if isinstance(part, dict) else None
|
|
472
|
+
)
|
|
473
|
+
if part_type == "text":
|
|
474
|
+
text_val = getattr(part, "text", None) if hasattr(part, "text") else None
|
|
475
|
+
if text_val is None and isinstance(part, dict):
|
|
476
|
+
text_val = part.get("text")
|
|
477
|
+
total += estimate_tokens_from_text(text_val or "")
|
|
478
|
+
elif part_type == "image":
|
|
479
|
+
total += IMAGE_TOKEN_COST
|
|
480
|
+
return total
|
|
481
|
+
|
|
482
|
+
text_val = getattr(content_block, "text", None)
|
|
483
|
+
if text_val is None and isinstance(content_block, dict):
|
|
484
|
+
text_val = content_block.get("text") or content_block.get("content")
|
|
485
|
+
return estimate_tokens_from_text(text_val or "")
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
def _get_cached_token_count(cache_key: str, content_block: Any) -> int:
|
|
489
|
+
estimated = _token_cache.get(cache_key)
|
|
490
|
+
if estimated is None:
|
|
491
|
+
estimated = _estimate_message_tokens(content_block)
|
|
492
|
+
_token_cache[cache_key] = estimated
|
|
493
|
+
return estimated
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
def compact_messages(
|
|
497
|
+
messages: Sequence[ConversationMessage],
|
|
498
|
+
max_tokens: Optional[int] = None,
|
|
499
|
+
*,
|
|
500
|
+
protocol: str = "anthropic",
|
|
501
|
+
) -> CompactionResult:
|
|
502
|
+
"""Compact tool results by replacing older outputs with placeholders."""
|
|
503
|
+
global _is_compacting
|
|
504
|
+
_is_compacting = False
|
|
505
|
+
|
|
506
|
+
tokens_before = estimate_conversation_tokens(messages, protocol=protocol)
|
|
507
|
+
|
|
508
|
+
if _parse_truthy_env_value(os.getenv("DISABLE_MICROCOMPACT")):
|
|
509
|
+
return CompactionResult(
|
|
510
|
+
messages=list(messages),
|
|
511
|
+
tokens_before=tokens_before,
|
|
512
|
+
tokens_after=tokens_before,
|
|
513
|
+
tokens_saved=0,
|
|
514
|
+
cleared_tool_ids=set(),
|
|
515
|
+
was_compacted=False,
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
# Presence of this flag mirrors the upstream implementation even though we don't act on it.
|
|
519
|
+
_parse_truthy_env_value(os.getenv("USE_API_CONTEXT_MANAGEMENT"))
|
|
520
|
+
|
|
521
|
+
is_max_tokens_specified = max_tokens is not None
|
|
522
|
+
try:
|
|
523
|
+
base_max_tokens = int(max_tokens) if max_tokens is not None else MAX_TOKENS_HARD
|
|
524
|
+
except (TypeError, ValueError):
|
|
525
|
+
base_max_tokens = MAX_TOKENS_HARD
|
|
526
|
+
effective_max_tokens = max(base_max_tokens, MIN_CONTEXT_TOKENS)
|
|
527
|
+
|
|
528
|
+
tool_use_ids_to_compact: List[str] = []
|
|
529
|
+
token_counts_by_tool_use_id: Dict[str, int] = {}
|
|
530
|
+
|
|
531
|
+
for message in messages:
|
|
532
|
+
msg_type = getattr(message, "type", "")
|
|
533
|
+
content = getattr(getattr(message, "message", None), "content", None)
|
|
534
|
+
if msg_type not in {"user", "assistant"} or not isinstance(content, list):
|
|
535
|
+
continue
|
|
536
|
+
for content_block in content:
|
|
537
|
+
block_type = getattr(content_block, "type", None) or (
|
|
538
|
+
content_block.get("type") if isinstance(content_block, dict) else None
|
|
539
|
+
)
|
|
540
|
+
tool_use_id = _normalize_tool_use_id(content_block)
|
|
541
|
+
tool_name = getattr(content_block, "name", None)
|
|
542
|
+
if tool_name is None and isinstance(content_block, dict):
|
|
543
|
+
tool_name = content_block.get("name")
|
|
544
|
+
if block_type == "tool_use" and tool_name in TOOL_COMMANDS:
|
|
545
|
+
if tool_use_id and tool_use_id not in _processed_tool_use_ids:
|
|
546
|
+
tool_use_ids_to_compact.append(tool_use_id)
|
|
547
|
+
elif block_type == "tool_result" and tool_use_id in tool_use_ids_to_compact:
|
|
548
|
+
token_count = _get_cached_token_count(tool_use_id, content_block)
|
|
549
|
+
token_counts_by_tool_use_id[tool_use_id] = token_count
|
|
550
|
+
|
|
551
|
+
latest_tool_use_ids = (
|
|
552
|
+
tool_use_ids_to_compact[-MAX_TOOL_USES_TO_PRESERVE:]
|
|
553
|
+
if MAX_TOOL_USES_TO_PRESERVE > 0
|
|
554
|
+
else []
|
|
555
|
+
)
|
|
556
|
+
total_token_count = sum(token_counts_by_tool_use_id.values())
|
|
557
|
+
|
|
558
|
+
total_tokens_removed = 0
|
|
559
|
+
ids_to_remove: Set[str] = set()
|
|
560
|
+
|
|
561
|
+
for tool_use_id in tool_use_ids_to_compact:
|
|
562
|
+
if tool_use_id in latest_tool_use_ids:
|
|
563
|
+
continue
|
|
564
|
+
if total_token_count - total_tokens_removed > effective_max_tokens:
|
|
565
|
+
ids_to_remove.add(tool_use_id)
|
|
566
|
+
total_tokens_removed += token_counts_by_tool_use_id.get(tool_use_id, 0)
|
|
567
|
+
|
|
568
|
+
if not is_max_tokens_specified:
|
|
569
|
+
auto_compact_enabled = resolve_auto_compact_enabled(get_global_config())
|
|
570
|
+
usage_tokens = estimate_used_tokens(
|
|
571
|
+
messages, protocol=protocol, precomputed_total_tokens=tokens_before
|
|
572
|
+
)
|
|
573
|
+
status = get_context_usage_status(
|
|
574
|
+
usage_tokens,
|
|
575
|
+
max_context_tokens=max_tokens,
|
|
576
|
+
auto_compact_enabled=auto_compact_enabled,
|
|
577
|
+
)
|
|
578
|
+
if not status.is_above_warning_threshold or total_tokens_removed < MAX_TOKENS_SOFT:
|
|
579
|
+
ids_to_remove.clear()
|
|
580
|
+
total_tokens_removed = 0
|
|
581
|
+
|
|
582
|
+
def _should_remove(tool_use_id: str) -> bool:
|
|
583
|
+
return tool_use_id in ids_to_remove or tool_use_id in _processed_tool_use_ids
|
|
584
|
+
|
|
585
|
+
compacted_messages: List[ConversationMessage] = []
|
|
586
|
+
|
|
587
|
+
for message in messages:
|
|
588
|
+
msg_type = getattr(message, "type", "")
|
|
589
|
+
content = getattr(getattr(message, "message", None), "content", None)
|
|
590
|
+
if msg_type not in {"user", "assistant"} or not isinstance(content, list):
|
|
591
|
+
compacted_messages.append(message)
|
|
592
|
+
continue
|
|
593
|
+
|
|
594
|
+
if msg_type == "assistant" and isinstance(message, AssistantMessage):
|
|
595
|
+
# Copy content list to avoid mutating the original message.
|
|
596
|
+
compacted_messages.append(
|
|
597
|
+
AssistantMessage(
|
|
598
|
+
message=message.message.model_copy(update={"content": list(content)}),
|
|
599
|
+
cost_usd=getattr(message, "cost_usd", 0.0),
|
|
600
|
+
duration_ms=getattr(message, "duration_ms", 0.0),
|
|
601
|
+
uuid=getattr(message, "uuid", None),
|
|
602
|
+
is_api_error_message=getattr(message, "is_api_error_message", False),
|
|
603
|
+
)
|
|
604
|
+
)
|
|
605
|
+
continue
|
|
606
|
+
|
|
607
|
+
filtered_content: List[MessageContent] = []
|
|
608
|
+
modified = False
|
|
609
|
+
for content_item in content:
|
|
610
|
+
block_type = getattr(content_item, "type", None) or (
|
|
611
|
+
content_item.get("type") if isinstance(content_item, dict) else None
|
|
612
|
+
)
|
|
613
|
+
tool_use_id = _normalize_tool_use_id(content_item)
|
|
614
|
+
if block_type == "tool_result" and _should_remove(tool_use_id):
|
|
615
|
+
modified = True
|
|
616
|
+
if hasattr(content_item, "model_copy"):
|
|
617
|
+
new_block = content_item.model_copy()
|
|
618
|
+
new_block.text = COMPACT_PLACEHOLDER
|
|
619
|
+
else:
|
|
620
|
+
block_dict = (
|
|
621
|
+
dict(content_item)
|
|
622
|
+
if isinstance(content_item, dict)
|
|
623
|
+
else {"type": "tool_result"}
|
|
624
|
+
)
|
|
625
|
+
block_dict["text"] = COMPACT_PLACEHOLDER
|
|
626
|
+
block_dict["tool_use_id"] = tool_use_id
|
|
627
|
+
new_block = MessageContent(**block_dict)
|
|
628
|
+
filtered_content.append(new_block)
|
|
629
|
+
else:
|
|
630
|
+
if isinstance(content_item, MessageContent):
|
|
631
|
+
filtered_content.append(content_item)
|
|
632
|
+
elif isinstance(content_item, dict):
|
|
633
|
+
filtered_content.append(MessageContent(**content_item))
|
|
634
|
+
else:
|
|
635
|
+
filtered_content.append(
|
|
636
|
+
MessageContent(type=str(block_type or "text"), text=str(content_item))
|
|
637
|
+
)
|
|
638
|
+
|
|
639
|
+
if modified and isinstance(message, UserMessage):
|
|
640
|
+
compacted_messages.append(
|
|
641
|
+
UserMessage(
|
|
642
|
+
message=message.message.model_copy(update={"content": filtered_content}),
|
|
643
|
+
tool_use_result=getattr(message, "tool_use_result", None),
|
|
644
|
+
uuid=getattr(message, "uuid", None),
|
|
645
|
+
)
|
|
646
|
+
)
|
|
647
|
+
else:
|
|
648
|
+
compacted_messages.append(message)
|
|
649
|
+
|
|
650
|
+
for id_to_remove in ids_to_remove:
|
|
651
|
+
_processed_tool_use_ids.add(id_to_remove)
|
|
652
|
+
|
|
653
|
+
tokens_after = estimate_conversation_tokens(compacted_messages, protocol=protocol)
|
|
654
|
+
tokens_saved = max(0, tokens_before - tokens_after)
|
|
655
|
+
|
|
656
|
+
if ids_to_remove:
|
|
657
|
+
_is_compacting = True
|
|
658
|
+
_run_cleanup_callbacks()
|
|
659
|
+
logger.debug(
|
|
660
|
+
"[message_compaction] Compacted conversation",
|
|
661
|
+
extra={
|
|
662
|
+
"tokens_before": tokens_before,
|
|
663
|
+
"tokens_after": tokens_after,
|
|
664
|
+
"tokens_saved": tokens_saved,
|
|
665
|
+
"cleared_tool_ids": list(ids_to_remove),
|
|
666
|
+
},
|
|
667
|
+
)
|
|
668
|
+
|
|
669
|
+
return CompactionResult(
|
|
670
|
+
messages=compacted_messages,
|
|
671
|
+
tokens_before=tokens_before,
|
|
672
|
+
tokens_after=tokens_after,
|
|
673
|
+
tokens_saved=tokens_saved,
|
|
674
|
+
cleared_tool_ids=ids_to_remove,
|
|
675
|
+
was_compacted=bool(ids_to_remove),
|
|
676
|
+
)
|