cade-cli 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cade_cli-0.3.3.dist-info/METADATA +151 -0
- cade_cli-0.3.3.dist-info/RECORD +44 -0
- cade_cli-0.3.3.dist-info/WHEEL +4 -0
- cade_cli-0.3.3.dist-info/entry_points.txt +2 -0
- cadecoder/__init__.py +1 -0
- cadecoder/ai/__init__.py +6 -0
- cadecoder/ai/prompts.py +572 -0
- cadecoder/cli/__init__.py +0 -0
- cadecoder/cli/app.py +147 -0
- cadecoder/cli/auth.py +483 -0
- cadecoder/cli/commands/__init__.py +5 -0
- cadecoder/cli/commands/auth.py +143 -0
- cadecoder/cli/commands/chat.py +264 -0
- cadecoder/cli/commands/mcp.py +477 -0
- cadecoder/cli/commands/tools.py +226 -0
- cadecoder/core/__init__.py +12 -0
- cadecoder/core/config.py +380 -0
- cadecoder/core/constants.py +281 -0
- cadecoder/core/errors.py +145 -0
- cadecoder/core/logging.py +148 -0
- cadecoder/core/types.py +235 -0
- cadecoder/core/utils.py +279 -0
- cadecoder/execution/__init__.py +46 -0
- cadecoder/execution/context_window.py +521 -0
- cadecoder/execution/orchestrator.py +562 -0
- cadecoder/execution/parallel.py +287 -0
- cadecoder/providers/__init__.py +60 -0
- cadecoder/providers/base.py +294 -0
- cadecoder/providers/openai.py +251 -0
- cadecoder/storage/__init__.py +0 -0
- cadecoder/storage/threads.py +489 -0
- cadecoder/templates/login_failed.html +21 -0
- cadecoder/templates/login_success.html +21 -0
- cadecoder/templates/styles.css +87 -0
- cadecoder/tools/__init__.py +19 -0
- cadecoder/tools/builtin.py +644 -0
- cadecoder/tools/filesystem.py +315 -0
- cadecoder/tools/git.py +221 -0
- cadecoder/tools/manager.py +1635 -0
- cadecoder/ui/__init__.py +7 -0
- cadecoder/ui/display.py +338 -0
- cadecoder/ui/input.py +145 -0
- cadecoder/ui/session.py +455 -0
- cadecoder/ui/state.py +20 -0
|
@@ -0,0 +1,521 @@
|
|
|
1
|
+
"""Context window management for the agent.
|
|
2
|
+
|
|
3
|
+
Provides token tracking, context compaction, and backup functionality
|
|
4
|
+
to manage the LLM context window effectively.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from enum import Enum
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
# Module-level logger (avoids circular import)
|
|
16
|
+
log = logging.getLogger("cadecoder")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class CompactionStrategy(str, Enum):
|
|
20
|
+
"""Strategy for compacting context when window is full."""
|
|
21
|
+
|
|
22
|
+
KEEP_RECENT = "keep_recent" # Keep most recent messages
|
|
23
|
+
SUMMARIZE_EARLY = "summarize_early" # Summarize early messages, keep recent
|
|
24
|
+
KEEP_TOOL_RESULTS_FINAL = "keep_tool_results_final" # Only keep final tool results
|
|
25
|
+
DROP_TOOL_OUTPUTS = "drop_tool_outputs" # Remove tool outputs, keep structure
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class TokenEstimate:
|
|
30
|
+
"""Token estimate for a message or context."""
|
|
31
|
+
|
|
32
|
+
text_tokens: int
|
|
33
|
+
tool_call_tokens: int
|
|
34
|
+
total: int
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def from_text(cls, text: str, chars_per_token: float = 4.0) -> "TokenEstimate":
|
|
38
|
+
"""Estimate tokens from text using character ratio."""
|
|
39
|
+
text_tokens = int(len(text) / chars_per_token)
|
|
40
|
+
return cls(text_tokens=text_tokens, tool_call_tokens=0, total=text_tokens)
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def from_message(cls, message: dict[str, Any], chars_per_token: float = 4.0) -> "TokenEstimate":
|
|
44
|
+
"""Estimate tokens from a message dict."""
|
|
45
|
+
text_tokens = 0
|
|
46
|
+
tool_tokens = 0
|
|
47
|
+
|
|
48
|
+
content = message.get("content", "")
|
|
49
|
+
if content:
|
|
50
|
+
text_tokens = int(len(content) / chars_per_token)
|
|
51
|
+
|
|
52
|
+
# Count tool calls
|
|
53
|
+
tool_calls = message.get("tool_calls", [])
|
|
54
|
+
for tc in tool_calls:
|
|
55
|
+
func = tc.get("function", {})
|
|
56
|
+
name = func.get("name", "")
|
|
57
|
+
args = func.get("arguments", "")
|
|
58
|
+
tool_tokens += int((len(name) + len(args)) / chars_per_token)
|
|
59
|
+
|
|
60
|
+
return cls(
|
|
61
|
+
text_tokens=text_tokens,
|
|
62
|
+
tool_call_tokens=tool_tokens,
|
|
63
|
+
total=text_tokens + tool_tokens,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dataclass
|
|
68
|
+
class ContextBackup:
|
|
69
|
+
"""Backup of context before compaction."""
|
|
70
|
+
|
|
71
|
+
timestamp: datetime
|
|
72
|
+
messages: list[dict[str, Any]]
|
|
73
|
+
token_count: int
|
|
74
|
+
compaction_reason: str
|
|
75
|
+
|
|
76
|
+
def to_dict(self) -> dict[str, Any]:
|
|
77
|
+
"""Convert to dictionary for JSON serialization."""
|
|
78
|
+
return {
|
|
79
|
+
"timestamp": self.timestamp.isoformat(),
|
|
80
|
+
"messages": self.messages,
|
|
81
|
+
"token_count": self.token_count,
|
|
82
|
+
"compaction_reason": self.compaction_reason,
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
@classmethod
|
|
86
|
+
def from_dict(cls, data: dict[str, Any]) -> "ContextBackup":
|
|
87
|
+
"""Create from dictionary."""
|
|
88
|
+
return cls(
|
|
89
|
+
timestamp=datetime.fromisoformat(data["timestamp"]),
|
|
90
|
+
messages=data["messages"],
|
|
91
|
+
token_count=data["token_count"],
|
|
92
|
+
compaction_reason=data["compaction_reason"],
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@dataclass
|
|
97
|
+
class ToolOutputCollection:
|
|
98
|
+
"""Collection of tool outputs with different retention strategies."""
|
|
99
|
+
|
|
100
|
+
all_outputs: list[dict[str, Any]] = field(default_factory=list)
|
|
101
|
+
final_outputs: dict[str, str] = field(default_factory=dict)
|
|
102
|
+
|
|
103
|
+
def add_output(self, tool_name: str, output: str, tool_call_id: str) -> None:
|
|
104
|
+
"""Add a tool output to the collection."""
|
|
105
|
+
self.all_outputs.append(
|
|
106
|
+
{
|
|
107
|
+
"tool_name": tool_name,
|
|
108
|
+
"output": output,
|
|
109
|
+
"tool_call_id": tool_call_id,
|
|
110
|
+
"timestamp": datetime.now().isoformat(),
|
|
111
|
+
}
|
|
112
|
+
)
|
|
113
|
+
# Always update final output to latest
|
|
114
|
+
self.final_outputs[tool_name] = output
|
|
115
|
+
|
|
116
|
+
def get_all_outputs(self) -> list[dict[str, Any]]:
|
|
117
|
+
"""Get all collected outputs."""
|
|
118
|
+
return self.all_outputs.copy()
|
|
119
|
+
|
|
120
|
+
def get_final_outputs(self) -> dict[str, str]:
|
|
121
|
+
"""Get only the final output for each tool."""
|
|
122
|
+
return self.final_outputs.copy()
|
|
123
|
+
|
|
124
|
+
def get_total_size(self) -> int:
|
|
125
|
+
"""Get total character size of all outputs."""
|
|
126
|
+
return sum(len(o.get("output", "")) for o in self.all_outputs)
|
|
127
|
+
|
|
128
|
+
def clear(self) -> None:
|
|
129
|
+
"""Clear all collected outputs."""
|
|
130
|
+
self.all_outputs.clear()
|
|
131
|
+
self.final_outputs.clear()
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class ContextWindowManager:
|
|
135
|
+
"""Manages the LLM context window with compaction and backup support.
|
|
136
|
+
|
|
137
|
+
Tracks token usage, manages context size, and provides methods for
|
|
138
|
+
compacting context when the window is full.
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
# Model context limits (updated Jan 2026)
|
|
142
|
+
# OpenAI: https://platform.openai.com/docs/models
|
|
143
|
+
# Anthropic: https://docs.anthropic.com/en/docs/about-claude/models
|
|
144
|
+
MODEL_CONTEXT_LIMITS = {
|
|
145
|
+
# OpenAI GPT-4.1 family (1M context via API, max output 32,768)
|
|
146
|
+
"gpt-4.1": 1_000_000,
|
|
147
|
+
"gpt-4.1-mini": 1_000_000,
|
|
148
|
+
"gpt-4.1-nano": 1_000_000,
|
|
149
|
+
# OpenAI GPT-4o family
|
|
150
|
+
"gpt-4o": 128_000,
|
|
151
|
+
"gpt-4o-mini": 128_000,
|
|
152
|
+
# OpenAI legacy models
|
|
153
|
+
"gpt-4-turbo": 128_000,
|
|
154
|
+
"gpt-4": 8_192,
|
|
155
|
+
"gpt-3.5-turbo": 16_385,
|
|
156
|
+
# Anthropic Claude 4 family (200K standard, 1M beta for Sonnet 4)
|
|
157
|
+
"claude-opus-4.5": 200_000,
|
|
158
|
+
"claude-sonnet-4": 200_000,
|
|
159
|
+
# Anthropic Claude 3 family
|
|
160
|
+
"claude-3-opus": 200_000,
|
|
161
|
+
"claude-3-sonnet": 200_000,
|
|
162
|
+
"claude-3-haiku": 200_000,
|
|
163
|
+
"claude-3.5-sonnet": 200_000,
|
|
164
|
+
"claude-3.5-haiku": 200_000,
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
# Max output tokens per model (for reserving response space)
|
|
168
|
+
MODEL_MAX_OUTPUT = {
|
|
169
|
+
"gpt-4.1": 32_768,
|
|
170
|
+
"gpt-4.1-mini": 32_768,
|
|
171
|
+
"gpt-4.1-nano": 32_768,
|
|
172
|
+
"gpt-4o": 16_384,
|
|
173
|
+
"gpt-4o-mini": 16_384,
|
|
174
|
+
"gpt-4-turbo": 4_096,
|
|
175
|
+
"gpt-4": 4_096, # Legacy GPT-4 has smaller output
|
|
176
|
+
"gpt-3.5-turbo": 4_096,
|
|
177
|
+
"claude-opus-4.5": 8_192,
|
|
178
|
+
"claude-sonnet-4": 8_192,
|
|
179
|
+
"claude-3-opus": 4_096,
|
|
180
|
+
"claude-3-sonnet": 4_096,
|
|
181
|
+
"claude-3-haiku": 4_096,
|
|
182
|
+
"claude-3.5-sonnet": 8_192,
|
|
183
|
+
"claude-3.5-haiku": 8_192,
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
# Reserve tokens for response (fallback if model not in MODEL_MAX_OUTPUT)
|
|
187
|
+
# Uses 10% of context limit or 4096, whichever is smaller
|
|
188
|
+
RESPONSE_RESERVE = 4_096
|
|
189
|
+
|
|
190
|
+
# Compaction threshold (% of window used before compaction)
|
|
191
|
+
COMPACTION_THRESHOLD = 0.85
|
|
192
|
+
|
|
193
|
+
def __init__(
|
|
194
|
+
self,
|
|
195
|
+
model: str = "gpt-4.1",
|
|
196
|
+
backup_dir: Path | None = None,
|
|
197
|
+
chars_per_token: float = 4.0,
|
|
198
|
+
) -> None:
|
|
199
|
+
"""Initialize context window manager.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
model: Model name for context limit lookup
|
|
203
|
+
backup_dir: Directory for storing context backups
|
|
204
|
+
chars_per_token: Character to token ratio for estimation
|
|
205
|
+
"""
|
|
206
|
+
self.model = model
|
|
207
|
+
self.chars_per_token = chars_per_token
|
|
208
|
+
|
|
209
|
+
# Set context limit based on model (default to 128K for unknown models)
|
|
210
|
+
self.context_limit = self.MODEL_CONTEXT_LIMITS.get(model, 128_000)
|
|
211
|
+
|
|
212
|
+
# Set response reserve based on model's max output tokens
|
|
213
|
+
self.response_reserve = self.MODEL_MAX_OUTPUT.get(model, self.RESPONSE_RESERVE)
|
|
214
|
+
self.effective_limit = self.context_limit - self.response_reserve
|
|
215
|
+
|
|
216
|
+
# Backup directory
|
|
217
|
+
if backup_dir is None:
|
|
218
|
+
backup_dir = Path.home() / ".cadecoder" / "context_backups"
|
|
219
|
+
self.backup_dir = backup_dir
|
|
220
|
+
self.backup_dir.mkdir(parents=True, exist_ok=True)
|
|
221
|
+
|
|
222
|
+
# Tool output collection
|
|
223
|
+
self.tool_outputs = ToolOutputCollection()
|
|
224
|
+
|
|
225
|
+
# Current context tracking
|
|
226
|
+
self._current_token_count = 0
|
|
227
|
+
self._message_count = 0
|
|
228
|
+
self._backups: list[ContextBackup] = []
|
|
229
|
+
|
|
230
|
+
log.info(
|
|
231
|
+
f"ContextWindowManager initialized: model={model}, "
|
|
232
|
+
f"limit={self.context_limit:,}, effective={self.effective_limit:,}"
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
def estimate_tokens(self, messages: list[dict[str, Any]]) -> int:
|
|
236
|
+
"""Estimate total tokens for a list of messages."""
|
|
237
|
+
total = 0
|
|
238
|
+
for msg in messages:
|
|
239
|
+
estimate = TokenEstimate.from_message(msg, self.chars_per_token)
|
|
240
|
+
total += estimate.total
|
|
241
|
+
return total
|
|
242
|
+
|
|
243
|
+
def estimate_message_tokens(self, message: dict[str, Any]) -> TokenEstimate:
|
|
244
|
+
"""Estimate tokens for a single message."""
|
|
245
|
+
return TokenEstimate.from_message(message, self.chars_per_token)
|
|
246
|
+
|
|
247
|
+
def check_context_status(self, messages: list[dict[str, Any]]) -> dict[str, Any]:
|
|
248
|
+
"""Check current context window status.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
Dict with token_count, percentage_used, needs_compaction, available_tokens
|
|
252
|
+
"""
|
|
253
|
+
token_count = self.estimate_tokens(messages)
|
|
254
|
+
percentage_used = token_count / self.effective_limit
|
|
255
|
+
needs_compaction = percentage_used >= self.COMPACTION_THRESHOLD
|
|
256
|
+
|
|
257
|
+
return {
|
|
258
|
+
"token_count": token_count,
|
|
259
|
+
"percentage_used": round(percentage_used * 100, 1),
|
|
260
|
+
"needs_compaction": needs_compaction,
|
|
261
|
+
"available_tokens": self.effective_limit - token_count,
|
|
262
|
+
"effective_limit": self.effective_limit,
|
|
263
|
+
"message_count": len(messages),
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
def compact_context(
|
|
267
|
+
self,
|
|
268
|
+
messages: list[dict[str, Any]],
|
|
269
|
+
strategy: CompactionStrategy = CompactionStrategy.KEEP_RECENT,
|
|
270
|
+
target_percentage: float = 0.6,
|
|
271
|
+
) -> tuple[list[dict[str, Any]], ContextBackup]:
|
|
272
|
+
"""Compact context to reduce token usage.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
messages: Current message list
|
|
276
|
+
strategy: Compaction strategy to use
|
|
277
|
+
target_percentage: Target percentage of context limit after compaction
|
|
278
|
+
|
|
279
|
+
Returns:
|
|
280
|
+
Tuple of (compacted_messages, backup)
|
|
281
|
+
"""
|
|
282
|
+
# Create backup before compaction
|
|
283
|
+
current_tokens = self.estimate_tokens(messages)
|
|
284
|
+
backup = ContextBackup(
|
|
285
|
+
timestamp=datetime.now(),
|
|
286
|
+
messages=messages.copy(),
|
|
287
|
+
token_count=current_tokens,
|
|
288
|
+
compaction_reason=f"strategy={strategy.value}, threshold={self.COMPACTION_THRESHOLD}",
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
# Save backup to disk
|
|
292
|
+
self._save_backup(backup)
|
|
293
|
+
self._backups.append(backup)
|
|
294
|
+
|
|
295
|
+
target_tokens = int(self.effective_limit * target_percentage)
|
|
296
|
+
|
|
297
|
+
log.info(
|
|
298
|
+
f"Compacting context: {current_tokens:,} -> {target_tokens:,} tokens "
|
|
299
|
+
f"(strategy={strategy.value})"
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
if strategy == CompactionStrategy.KEEP_RECENT:
|
|
303
|
+
compacted = self._compact_keep_recent(messages, target_tokens)
|
|
304
|
+
elif strategy == CompactionStrategy.SUMMARIZE_EARLY:
|
|
305
|
+
compacted = self._compact_summarize_early(messages, target_tokens)
|
|
306
|
+
elif strategy == CompactionStrategy.KEEP_TOOL_RESULTS_FINAL:
|
|
307
|
+
compacted = self._compact_keep_final_tool_results(messages, target_tokens)
|
|
308
|
+
elif strategy == CompactionStrategy.DROP_TOOL_OUTPUTS:
|
|
309
|
+
compacted = self._compact_drop_tool_outputs(messages, target_tokens)
|
|
310
|
+
else:
|
|
311
|
+
compacted = self._compact_keep_recent(messages, target_tokens)
|
|
312
|
+
|
|
313
|
+
new_token_count = self.estimate_tokens(compacted)
|
|
314
|
+
log.info(
|
|
315
|
+
f"Compaction complete: {len(messages)} -> {len(compacted)} messages, "
|
|
316
|
+
f"{current_tokens:,} -> {new_token_count:,} tokens"
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
return compacted, backup
|
|
320
|
+
|
|
321
|
+
def _compact_keep_recent(
|
|
322
|
+
self, messages: list[dict[str, Any]], target_tokens: int
|
|
323
|
+
) -> list[dict[str, Any]]:
|
|
324
|
+
"""Keep only the most recent messages within token budget."""
|
|
325
|
+
# Always keep system message if present
|
|
326
|
+
system_msg = None
|
|
327
|
+
other_msgs = []
|
|
328
|
+
|
|
329
|
+
for msg in messages:
|
|
330
|
+
if msg.get("role") == "system":
|
|
331
|
+
system_msg = msg
|
|
332
|
+
else:
|
|
333
|
+
other_msgs.append(msg)
|
|
334
|
+
|
|
335
|
+
# Build from most recent, working backwards
|
|
336
|
+
compacted = []
|
|
337
|
+
current_tokens = 0
|
|
338
|
+
|
|
339
|
+
if system_msg:
|
|
340
|
+
system_tokens = self.estimate_message_tokens(system_msg).total
|
|
341
|
+
current_tokens += system_tokens
|
|
342
|
+
|
|
343
|
+
# Add messages from most recent
|
|
344
|
+
for msg in reversed(other_msgs):
|
|
345
|
+
msg_tokens = self.estimate_message_tokens(msg).total
|
|
346
|
+
if current_tokens + msg_tokens <= target_tokens:
|
|
347
|
+
compacted.insert(0, msg)
|
|
348
|
+
current_tokens += msg_tokens
|
|
349
|
+
else:
|
|
350
|
+
break
|
|
351
|
+
|
|
352
|
+
# Prepend system message
|
|
353
|
+
if system_msg:
|
|
354
|
+
compacted.insert(0, system_msg)
|
|
355
|
+
|
|
356
|
+
return compacted
|
|
357
|
+
|
|
358
|
+
def _compact_summarize_early(
|
|
359
|
+
self, messages: list[dict[str, Any]], target_tokens: int
|
|
360
|
+
) -> list[dict[str, Any]]:
|
|
361
|
+
"""Summarize early messages, keep recent ones in full."""
|
|
362
|
+
# For now, use keep_recent with a summary placeholder
|
|
363
|
+
# Full implementation would call LLM to summarize
|
|
364
|
+
compacted = self._compact_keep_recent(messages, target_tokens)
|
|
365
|
+
|
|
366
|
+
# Add summary placeholder for dropped messages
|
|
367
|
+
dropped_count = len(messages) - len(compacted)
|
|
368
|
+
if dropped_count > 0 and compacted:
|
|
369
|
+
# Find first non-system message
|
|
370
|
+
insert_idx = 1 if compacted[0].get("role") == "system" else 0
|
|
371
|
+
summary_msg = {
|
|
372
|
+
"role": "system",
|
|
373
|
+
"content": f"[Context compacted: {dropped_count} earlier messages summarized. "
|
|
374
|
+
"Continue from here.]",
|
|
375
|
+
}
|
|
376
|
+
compacted.insert(insert_idx, summary_msg)
|
|
377
|
+
|
|
378
|
+
return compacted
|
|
379
|
+
|
|
380
|
+
def _compact_keep_final_tool_results(
|
|
381
|
+
self, messages: list[dict[str, Any]], target_tokens: int
|
|
382
|
+
) -> list[dict[str, Any]]:
|
|
383
|
+
"""Keep only final tool results, replacing intermediate ones with summaries."""
|
|
384
|
+
compacted = []
|
|
385
|
+
tool_results_by_name: dict[str, dict[str, Any]] = {}
|
|
386
|
+
|
|
387
|
+
# First pass: identify final tool results
|
|
388
|
+
for msg in messages:
|
|
389
|
+
if msg.get("role") == "tool":
|
|
390
|
+
tool_name = msg.get("tool_name", msg.get("name", "unknown"))
|
|
391
|
+
tool_results_by_name[tool_name] = msg
|
|
392
|
+
else:
|
|
393
|
+
compacted.append(msg)
|
|
394
|
+
|
|
395
|
+
# Second pass: add final tool results
|
|
396
|
+
for tool_msg in tool_results_by_name.values():
|
|
397
|
+
compacted.append(tool_msg)
|
|
398
|
+
|
|
399
|
+
# If still over budget, apply keep_recent
|
|
400
|
+
if self.estimate_tokens(compacted) > target_tokens:
|
|
401
|
+
compacted = self._compact_keep_recent(compacted, target_tokens)
|
|
402
|
+
|
|
403
|
+
return compacted
|
|
404
|
+
|
|
405
|
+
def _compact_drop_tool_outputs(
|
|
406
|
+
self, messages: list[dict[str, Any]], target_tokens: int
|
|
407
|
+
) -> list[dict[str, Any]]:
|
|
408
|
+
"""Drop tool output content but keep structure."""
|
|
409
|
+
compacted = []
|
|
410
|
+
|
|
411
|
+
for msg in messages:
|
|
412
|
+
if msg.get("role") == "tool":
|
|
413
|
+
# Replace content with placeholder
|
|
414
|
+
compacted.append(
|
|
415
|
+
{
|
|
416
|
+
**msg,
|
|
417
|
+
"content": "[Tool output truncated for context management]",
|
|
418
|
+
}
|
|
419
|
+
)
|
|
420
|
+
else:
|
|
421
|
+
compacted.append(msg)
|
|
422
|
+
|
|
423
|
+
# If still over budget, apply keep_recent
|
|
424
|
+
if self.estimate_tokens(compacted) > target_tokens:
|
|
425
|
+
compacted = self._compact_keep_recent(compacted, target_tokens)
|
|
426
|
+
|
|
427
|
+
return compacted
|
|
428
|
+
|
|
429
|
+
def _save_backup(self, backup: ContextBackup) -> None:
|
|
430
|
+
"""Save backup to disk."""
|
|
431
|
+
try:
|
|
432
|
+
filename = f"context_backup_{backup.timestamp.strftime('%Y%m%d_%H%M%S')}.json"
|
|
433
|
+
backup_path = self.backup_dir / filename
|
|
434
|
+
|
|
435
|
+
with open(backup_path, "w", encoding="utf-8") as f:
|
|
436
|
+
json.dump(backup.to_dict(), f, indent=2)
|
|
437
|
+
|
|
438
|
+
log.debug(f"Context backup saved: {backup_path}")
|
|
439
|
+
|
|
440
|
+
# Clean old backups (keep last 10)
|
|
441
|
+
self._cleanup_old_backups(keep_count=10)
|
|
442
|
+
|
|
443
|
+
except Exception as e:
|
|
444
|
+
log.warning(f"Failed to save context backup: {e}")
|
|
445
|
+
|
|
446
|
+
def _cleanup_old_backups(self, keep_count: int = 10) -> None:
|
|
447
|
+
"""Remove old backup files, keeping only the most recent."""
|
|
448
|
+
try:
|
|
449
|
+
backup_files = sorted(
|
|
450
|
+
self.backup_dir.glob("context_backup_*.json"),
|
|
451
|
+
key=lambda p: p.stat().st_mtime,
|
|
452
|
+
reverse=True,
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
for old_file in backup_files[keep_count:]:
|
|
456
|
+
old_file.unlink()
|
|
457
|
+
log.debug(f"Removed old context backup: {old_file.name}")
|
|
458
|
+
|
|
459
|
+
except Exception as e:
|
|
460
|
+
log.warning(f"Failed to cleanup old backups: {e}")
|
|
461
|
+
|
|
462
|
+
def load_backup(self, backup_path: Path) -> ContextBackup | None:
|
|
463
|
+
"""Load a backup from disk."""
|
|
464
|
+
try:
|
|
465
|
+
with open(backup_path, encoding="utf-8") as f:
|
|
466
|
+
data = json.load(f)
|
|
467
|
+
return ContextBackup.from_dict(data)
|
|
468
|
+
except Exception as e:
|
|
469
|
+
log.warning(f"Failed to load backup {backup_path}: {e}")
|
|
470
|
+
return None
|
|
471
|
+
|
|
472
|
+
def list_backups(self) -> list[Path]:
|
|
473
|
+
"""List available backup files."""
|
|
474
|
+
return sorted(
|
|
475
|
+
self.backup_dir.glob("context_backup_*.json"),
|
|
476
|
+
key=lambda p: p.stat().st_mtime,
|
|
477
|
+
reverse=True,
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
def add_tool_output(self, tool_name: str, output: str, tool_call_id: str) -> None:
|
|
481
|
+
"""Add a tool output to the collection."""
|
|
482
|
+
self.tool_outputs.add_output(tool_name, output, tool_call_id)
|
|
483
|
+
|
|
484
|
+
def get_tool_outputs_summary(self) -> dict[str, Any]:
|
|
485
|
+
"""Get summary of collected tool outputs."""
|
|
486
|
+
return {
|
|
487
|
+
"total_outputs": len(self.tool_outputs.all_outputs),
|
|
488
|
+
"unique_tools": len(self.tool_outputs.final_outputs),
|
|
489
|
+
"total_size_chars": self.tool_outputs.get_total_size(),
|
|
490
|
+
"estimated_tokens": int(self.tool_outputs.get_total_size() / self.chars_per_token),
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
def clear_tool_outputs(self) -> None:
|
|
494
|
+
"""Clear collected tool outputs."""
|
|
495
|
+
self.tool_outputs.clear()
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
def create_context_manager(
|
|
499
|
+
model: str = "gpt-4.1",
|
|
500
|
+
backup_dir: Path | None = None,
|
|
501
|
+
) -> ContextWindowManager:
|
|
502
|
+
"""Factory function to create a context window manager.
|
|
503
|
+
|
|
504
|
+
Args:
|
|
505
|
+
model: Model name for context limit lookup
|
|
506
|
+
backup_dir: Directory for storing context backups
|
|
507
|
+
|
|
508
|
+
Returns:
|
|
509
|
+
Configured ContextWindowManager instance
|
|
510
|
+
"""
|
|
511
|
+
return ContextWindowManager(model=model, backup_dir=backup_dir)
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
__all__ = [
|
|
515
|
+
"ContextWindowManager",
|
|
516
|
+
"CompactionStrategy",
|
|
517
|
+
"TokenEstimate",
|
|
518
|
+
"ContextBackup",
|
|
519
|
+
"ToolOutputCollection",
|
|
520
|
+
"create_context_manager",
|
|
521
|
+
]
|