aloop 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aloop might be problematic. Click here for more details.
- agent/__init__.py +0 -0
- agent/agent.py +182 -0
- agent/base.py +406 -0
- agent/context.py +126 -0
- agent/todo.py +149 -0
- agent/tool_executor.py +54 -0
- agent/verification.py +135 -0
- aloop-0.1.0.dist-info/METADATA +246 -0
- aloop-0.1.0.dist-info/RECORD +62 -0
- aloop-0.1.0.dist-info/WHEEL +5 -0
- aloop-0.1.0.dist-info/entry_points.txt +2 -0
- aloop-0.1.0.dist-info/licenses/LICENSE +21 -0
- aloop-0.1.0.dist-info/top_level.txt +9 -0
- cli.py +19 -0
- config.py +146 -0
- interactive.py +865 -0
- llm/__init__.py +51 -0
- llm/base.py +26 -0
- llm/compat.py +226 -0
- llm/content_utils.py +309 -0
- llm/litellm_adapter.py +450 -0
- llm/message_types.py +245 -0
- llm/model_manager.py +265 -0
- llm/retry.py +95 -0
- main.py +246 -0
- memory/__init__.py +20 -0
- memory/compressor.py +554 -0
- memory/manager.py +538 -0
- memory/serialization.py +82 -0
- memory/short_term.py +88 -0
- memory/token_tracker.py +203 -0
- memory/types.py +51 -0
- tools/__init__.py +6 -0
- tools/advanced_file_ops.py +557 -0
- tools/base.py +51 -0
- tools/calculator.py +50 -0
- tools/code_navigator.py +975 -0
- tools/explore.py +254 -0
- tools/file_ops.py +150 -0
- tools/git_tools.py +791 -0
- tools/notify.py +69 -0
- tools/parallel_execute.py +420 -0
- tools/session_manager.py +205 -0
- tools/shell.py +147 -0
- tools/shell_background.py +470 -0
- tools/smart_edit.py +491 -0
- tools/todo.py +130 -0
- tools/web_fetch.py +673 -0
- tools/web_search.py +61 -0
- utils/__init__.py +15 -0
- utils/logger.py +105 -0
- utils/model_pricing.py +49 -0
- utils/runtime.py +75 -0
- utils/terminal_ui.py +422 -0
- utils/tui/__init__.py +39 -0
- utils/tui/command_registry.py +49 -0
- utils/tui/components.py +306 -0
- utils/tui/input_handler.py +393 -0
- utils/tui/model_ui.py +204 -0
- utils/tui/progress.py +292 -0
- utils/tui/status_bar.py +178 -0
- utils/tui/theme.py +165 -0
memory/compressor.py
ADDED
|
@@ -0,0 +1,554 @@
|
|
|
1
|
+
"""Memory compression using LLM-based summarization."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import TYPE_CHECKING, List, Optional, Tuple
|
|
5
|
+
|
|
6
|
+
from config import Config
|
|
7
|
+
from llm.content_utils import extract_text
|
|
8
|
+
from llm.message_types import LLMMessage
|
|
9
|
+
|
|
10
|
+
from .types import CompressedMemory, CompressionStrategy
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from llm import LiteLLMAdapter
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class WorkingMemoryCompressor:
|
|
19
|
+
"""Compresses conversation history using LLM summarization."""
|
|
20
|
+
|
|
21
|
+
# Tools that should NEVER be compressed - their state must be preserved
|
|
22
|
+
# Note: manage_todo_list is NOT protected because its state is managed externally
|
|
23
|
+
# by TodoList object. Instead, we inject current todo state into the summary.
|
|
24
|
+
PROTECTED_TOOLS: set[str] = set()
|
|
25
|
+
|
|
26
|
+
# Prefix for summary messages to identify them
|
|
27
|
+
SUMMARY_PREFIX = "[Previous conversation summary]\n"
|
|
28
|
+
|
|
29
|
+
COMPRESSION_PROMPT = """You are a memory compression system. Summarize the following conversation messages while preserving:
|
|
30
|
+
1. Key decisions and outcomes
|
|
31
|
+
2. Important facts, data, and findings
|
|
32
|
+
3. Tool usage patterns and results
|
|
33
|
+
4. User intent and goals
|
|
34
|
+
5. Critical context needed for future interactions
|
|
35
|
+
|
|
36
|
+
Original messages ({count} messages, ~{tokens} tokens):
|
|
37
|
+
|
|
38
|
+
{messages}
|
|
39
|
+
|
|
40
|
+
Provide a concise but comprehensive summary that captures the essential information. Be specific and include concrete details. Target length: {target_tokens} tokens."""
|
|
41
|
+
|
|
42
|
+
def __init__(self, llm: "LiteLLMAdapter"):
|
|
43
|
+
"""Initialize compressor.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
llm: LLM instance to use for summarization
|
|
47
|
+
"""
|
|
48
|
+
self.llm = llm
|
|
49
|
+
|
|
50
|
+
async def compress(
|
|
51
|
+
self,
|
|
52
|
+
messages: List[LLMMessage],
|
|
53
|
+
strategy: str = CompressionStrategy.SLIDING_WINDOW,
|
|
54
|
+
target_tokens: Optional[int] = None,
|
|
55
|
+
todo_context: Optional[str] = None,
|
|
56
|
+
) -> CompressedMemory:
|
|
57
|
+
"""Compress messages using specified strategy.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
messages: List of messages to compress
|
|
61
|
+
strategy: Compression strategy to use
|
|
62
|
+
target_tokens: Target token count for compressed output
|
|
63
|
+
todo_context: Optional current todo list state to inject into summary
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
CompressedMemory object
|
|
67
|
+
"""
|
|
68
|
+
if not messages:
|
|
69
|
+
return CompressedMemory(messages=[])
|
|
70
|
+
|
|
71
|
+
if target_tokens is None:
|
|
72
|
+
# Calculate target based on config compression ratio
|
|
73
|
+
original_tokens = self._estimate_tokens(messages)
|
|
74
|
+
target_tokens = int(original_tokens * Config.MEMORY_COMPRESSION_RATIO)
|
|
75
|
+
|
|
76
|
+
# Select and apply compression strategy
|
|
77
|
+
if strategy == CompressionStrategy.SLIDING_WINDOW:
|
|
78
|
+
return await self._compress_sliding_window(messages, target_tokens, todo_context)
|
|
79
|
+
elif strategy == CompressionStrategy.SELECTIVE:
|
|
80
|
+
return await self._compress_selective(messages, target_tokens, todo_context)
|
|
81
|
+
elif strategy == CompressionStrategy.DELETION:
|
|
82
|
+
return self._compress_deletion(messages)
|
|
83
|
+
else:
|
|
84
|
+
logger.warning(f"Unknown strategy {strategy}, using sliding window")
|
|
85
|
+
return await self._compress_sliding_window(messages, target_tokens, todo_context)
|
|
86
|
+
|
|
87
|
+
async def _compress_sliding_window(
|
|
88
|
+
self,
|
|
89
|
+
messages: List[LLMMessage],
|
|
90
|
+
target_tokens: int,
|
|
91
|
+
todo_context: Optional[str] = None,
|
|
92
|
+
) -> CompressedMemory:
|
|
93
|
+
"""Compress using sliding window strategy.
|
|
94
|
+
|
|
95
|
+
Summarizes all messages into a single summary. If todo_context is provided,
|
|
96
|
+
it will be appended to the summary to preserve current task state.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
messages: Messages to compress
|
|
100
|
+
target_tokens: Target token count
|
|
101
|
+
todo_context: Optional current todo list state to inject
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
CompressedMemory object
|
|
105
|
+
"""
|
|
106
|
+
# Format messages for summarization
|
|
107
|
+
formatted = self._format_messages_for_summary(messages)
|
|
108
|
+
original_tokens = self._estimate_tokens(messages)
|
|
109
|
+
|
|
110
|
+
# Create summarization prompt
|
|
111
|
+
prompt_text = self.COMPRESSION_PROMPT.format(
|
|
112
|
+
count=len(messages),
|
|
113
|
+
tokens=original_tokens,
|
|
114
|
+
messages=formatted,
|
|
115
|
+
target_tokens=target_tokens,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# Extract system messages to preserve them
|
|
119
|
+
system_msgs = [m for m in messages if m.role == "system"]
|
|
120
|
+
|
|
121
|
+
# Call LLM to generate summary
|
|
122
|
+
try:
|
|
123
|
+
prompt = LLMMessage(role="user", content=prompt_text)
|
|
124
|
+
response = await self.llm.call_async(messages=[prompt], max_tokens=target_tokens * 2)
|
|
125
|
+
summary_text = self.llm.extract_text(response)
|
|
126
|
+
|
|
127
|
+
# Append todo context if available
|
|
128
|
+
if todo_context:
|
|
129
|
+
summary_text = f"{summary_text}\n\n[Current Tasks]\n{todo_context}"
|
|
130
|
+
|
|
131
|
+
# Convert summary to a user message
|
|
132
|
+
summary_message = LLMMessage(
|
|
133
|
+
role="user",
|
|
134
|
+
content=f"{self.SUMMARY_PREFIX}{summary_text}",
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# System messages first, then summary
|
|
138
|
+
result_messages = system_msgs + [summary_message]
|
|
139
|
+
|
|
140
|
+
# Calculate compression metrics
|
|
141
|
+
compressed_tokens = self._estimate_tokens(result_messages)
|
|
142
|
+
compression_ratio = compressed_tokens / original_tokens if original_tokens > 0 else 0
|
|
143
|
+
|
|
144
|
+
return CompressedMemory(
|
|
145
|
+
messages=result_messages,
|
|
146
|
+
original_message_count=len(messages),
|
|
147
|
+
compressed_tokens=compressed_tokens,
|
|
148
|
+
original_tokens=original_tokens,
|
|
149
|
+
compression_ratio=compression_ratio,
|
|
150
|
+
metadata={"strategy": "sliding_window"},
|
|
151
|
+
)
|
|
152
|
+
except Exception as e:
|
|
153
|
+
logger.error(f"Error during compression: {e}")
|
|
154
|
+
# Fallback: keep system messages + first and last non-system message
|
|
155
|
+
non_system = [m for m in messages if m.role != "system"]
|
|
156
|
+
fallback_other = [non_system[0], non_system[-1]] if len(non_system) > 1 else non_system
|
|
157
|
+
fallback_messages = system_msgs + fallback_other
|
|
158
|
+
return CompressedMemory(
|
|
159
|
+
messages=fallback_messages,
|
|
160
|
+
original_message_count=len(messages),
|
|
161
|
+
compressed_tokens=self._estimate_tokens(fallback_messages),
|
|
162
|
+
original_tokens=original_tokens,
|
|
163
|
+
compression_ratio=0.5,
|
|
164
|
+
metadata={"strategy": "sliding_window", "error": str(e)},
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
async def _compress_selective(
|
|
168
|
+
self,
|
|
169
|
+
messages: List[LLMMessage],
|
|
170
|
+
target_tokens: int,
|
|
171
|
+
todo_context: Optional[str] = None,
|
|
172
|
+
) -> CompressedMemory:
|
|
173
|
+
"""Compress using selective preservation strategy.
|
|
174
|
+
|
|
175
|
+
Preserves important messages (tool calls, system prompts) and
|
|
176
|
+
summarizes the rest. If todo_context is provided, it will be
|
|
177
|
+
appended to the summary to preserve current task state.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
messages: Messages to compress
|
|
181
|
+
target_tokens: Target token count
|
|
182
|
+
todo_context: Optional current todo list state to inject
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
CompressedMemory object
|
|
186
|
+
"""
|
|
187
|
+
# Separate preserved vs compressible messages
|
|
188
|
+
preserved, to_compress = self._separate_messages(messages)
|
|
189
|
+
|
|
190
|
+
if not to_compress:
|
|
191
|
+
# Nothing to compress, just return preserved messages
|
|
192
|
+
# Ensure system messages are first
|
|
193
|
+
system_msgs = [m for m in preserved if m.role == "system"]
|
|
194
|
+
other_msgs = [m for m in preserved if m.role != "system"]
|
|
195
|
+
result_messages = system_msgs + other_msgs
|
|
196
|
+
return CompressedMemory(
|
|
197
|
+
messages=result_messages,
|
|
198
|
+
original_message_count=len(messages),
|
|
199
|
+
compressed_tokens=self._estimate_tokens(result_messages),
|
|
200
|
+
original_tokens=self._estimate_tokens(messages),
|
|
201
|
+
compression_ratio=1.0,
|
|
202
|
+
metadata={"strategy": "selective"},
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
# Compress the compressible messages
|
|
206
|
+
original_tokens = self._estimate_tokens(messages)
|
|
207
|
+
preserved_tokens = self._estimate_tokens(preserved)
|
|
208
|
+
available_for_summary = target_tokens - preserved_tokens
|
|
209
|
+
|
|
210
|
+
if available_for_summary > 0:
|
|
211
|
+
# Generate summary for compressible messages
|
|
212
|
+
formatted = self._format_messages_for_summary(to_compress)
|
|
213
|
+
prompt_text = self.COMPRESSION_PROMPT.format(
|
|
214
|
+
count=len(to_compress),
|
|
215
|
+
tokens=self._estimate_tokens(to_compress),
|
|
216
|
+
messages=formatted,
|
|
217
|
+
target_tokens=available_for_summary,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
try:
|
|
221
|
+
prompt = LLMMessage(role="user", content=prompt_text)
|
|
222
|
+
response = await self.llm.call_async(
|
|
223
|
+
messages=[prompt], max_tokens=available_for_summary * 2
|
|
224
|
+
)
|
|
225
|
+
summary_text = self.llm.extract_text(response)
|
|
226
|
+
|
|
227
|
+
# Append todo context if available
|
|
228
|
+
if todo_context:
|
|
229
|
+
summary_text = f"{summary_text}\n\n[Current Tasks]\n{todo_context}"
|
|
230
|
+
|
|
231
|
+
# Convert summary to user message
|
|
232
|
+
summary_message = LLMMessage(
|
|
233
|
+
role="user",
|
|
234
|
+
content=f"{self.SUMMARY_PREFIX}{summary_text}",
|
|
235
|
+
)
|
|
236
|
+
# Ensure system messages come first, then summary, then other preserved
|
|
237
|
+
system_msgs = [m for m in preserved if m.role == "system"]
|
|
238
|
+
other_msgs = [m for m in preserved if m.role != "system"]
|
|
239
|
+
result_messages = system_msgs + [summary_message] + other_msgs
|
|
240
|
+
|
|
241
|
+
summary_tokens = self._estimate_tokens([summary_message])
|
|
242
|
+
compressed_tokens = preserved_tokens + summary_tokens
|
|
243
|
+
compression_ratio = (
|
|
244
|
+
compressed_tokens / original_tokens if original_tokens > 0 else 0
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
return CompressedMemory(
|
|
248
|
+
messages=result_messages,
|
|
249
|
+
original_message_count=len(messages),
|
|
250
|
+
compressed_tokens=compressed_tokens,
|
|
251
|
+
original_tokens=original_tokens,
|
|
252
|
+
compression_ratio=compression_ratio,
|
|
253
|
+
metadata={"strategy": "selective", "preserved_count": len(preserved)},
|
|
254
|
+
)
|
|
255
|
+
except Exception as e:
|
|
256
|
+
logger.error(f"Error during selective compression: {e}")
|
|
257
|
+
|
|
258
|
+
# Fallback: just preserve the important messages (no summary)
|
|
259
|
+
# Ensure system messages are first
|
|
260
|
+
system_msgs = [m for m in preserved if m.role == "system"]
|
|
261
|
+
other_msgs = [m for m in preserved if m.role != "system"]
|
|
262
|
+
result_messages = system_msgs + other_msgs
|
|
263
|
+
return CompressedMemory(
|
|
264
|
+
messages=result_messages,
|
|
265
|
+
original_message_count=len(messages),
|
|
266
|
+
compressed_tokens=preserved_tokens,
|
|
267
|
+
original_tokens=original_tokens,
|
|
268
|
+
compression_ratio=preserved_tokens / original_tokens if original_tokens > 0 else 1.0,
|
|
269
|
+
metadata={"strategy": "selective", "preserved_count": len(preserved)},
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
def _compress_deletion(self, messages: List[LLMMessage]) -> CompressedMemory:
|
|
273
|
+
"""Simple deletion strategy - no compression, just drop old messages.
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
messages: Messages (will be deleted)
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
CompressedMemory with empty messages list
|
|
280
|
+
"""
|
|
281
|
+
original_tokens = self._estimate_tokens(messages)
|
|
282
|
+
|
|
283
|
+
return CompressedMemory(
|
|
284
|
+
messages=[],
|
|
285
|
+
original_message_count=len(messages),
|
|
286
|
+
compressed_tokens=0,
|
|
287
|
+
original_tokens=original_tokens,
|
|
288
|
+
compression_ratio=0.0,
|
|
289
|
+
metadata={"strategy": "deletion"},
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
def _separate_messages(
|
|
293
|
+
self, messages: List[LLMMessage]
|
|
294
|
+
) -> Tuple[List[LLMMessage], List[LLMMessage]]:
|
|
295
|
+
"""Separate messages into preserved and compressible.
|
|
296
|
+
|
|
297
|
+
Strategy:
|
|
298
|
+
1. Preserve system messages (if configured)
|
|
299
|
+
2. Preserve orphaned tool_use (waiting for tool_result)
|
|
300
|
+
3. Preserve protected tools (todo list, etc.) - NEVER compress these
|
|
301
|
+
4. Preserve the most recent N messages (MEMORY_SHORT_TERM_MIN_SIZE)
|
|
302
|
+
5. **Critical rule**: Tool pairs (tool_use + tool_result) must stay together
|
|
303
|
+
- If one is preserved, the other must be preserved too
|
|
304
|
+
- If one is compressed, the other must be compressed too
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
messages: All messages
|
|
308
|
+
|
|
309
|
+
Returns:
|
|
310
|
+
Tuple of (preserved, to_compress)
|
|
311
|
+
"""
|
|
312
|
+
preserve_indices = set()
|
|
313
|
+
|
|
314
|
+
# Step 1: Mark system messages for preservation
|
|
315
|
+
for i, msg in enumerate(messages):
|
|
316
|
+
if Config.MEMORY_PRESERVE_SYSTEM_PROMPTS and msg.role == "system":
|
|
317
|
+
preserve_indices.add(i)
|
|
318
|
+
|
|
319
|
+
# Step 2: Find tool pairs and orphaned tool_use messages
|
|
320
|
+
tool_pairs, orphaned_tool_use_indices = self._find_tool_pairs(messages)
|
|
321
|
+
|
|
322
|
+
# Step 2a: CRITICAL - Preserve orphaned tool_use (waiting for tool_result)
|
|
323
|
+
# These must NEVER be compressed, or we'll lose the tool_use without its result
|
|
324
|
+
for orphan_idx in orphaned_tool_use_indices:
|
|
325
|
+
preserve_indices.add(orphan_idx)
|
|
326
|
+
|
|
327
|
+
# Step 2b: Mark protected tools for preservation (CRITICAL for stateful tools)
|
|
328
|
+
protected_pairs = self._find_protected_tool_pairs(messages, tool_pairs)
|
|
329
|
+
for assistant_idx, user_idx in protected_pairs:
|
|
330
|
+
preserve_indices.add(assistant_idx)
|
|
331
|
+
preserve_indices.add(user_idx)
|
|
332
|
+
|
|
333
|
+
# Step 3: Preserve the most recent N messages to maintain conversation continuity
|
|
334
|
+
preserve_count = min(Config.MEMORY_SHORT_TERM_MIN_SIZE, len(messages))
|
|
335
|
+
for i in range(len(messages) - preserve_count, len(messages)):
|
|
336
|
+
if i >= 0:
|
|
337
|
+
preserve_indices.add(i)
|
|
338
|
+
|
|
339
|
+
# Step 4: Ensure tool pairs stay together (iterate until stable)
|
|
340
|
+
# A single pass can miss pairs: e.g. pair [A, T1] is skipped because
|
|
341
|
+
# neither is preserved, then pair [A, T2] preserves A because T2 is in
|
|
342
|
+
# the recent window — but T1 was already skipped. Fixed-point loop
|
|
343
|
+
# ensures all pairs containing a preserved index are fully preserved.
|
|
344
|
+
changed = True
|
|
345
|
+
while changed:
|
|
346
|
+
changed = False
|
|
347
|
+
for assistant_idx, user_idx in tool_pairs:
|
|
348
|
+
if assistant_idx in preserve_indices or user_idx in preserve_indices:
|
|
349
|
+
if assistant_idx not in preserve_indices:
|
|
350
|
+
preserve_indices.add(assistant_idx)
|
|
351
|
+
changed = True
|
|
352
|
+
if user_idx not in preserve_indices:
|
|
353
|
+
preserve_indices.add(user_idx)
|
|
354
|
+
changed = True
|
|
355
|
+
|
|
356
|
+
# Step 5: Build preserved and to_compress lists
|
|
357
|
+
preserved = []
|
|
358
|
+
to_compress = []
|
|
359
|
+
for i, msg in enumerate(messages):
|
|
360
|
+
if i in preserve_indices:
|
|
361
|
+
preserved.append(msg)
|
|
362
|
+
else:
|
|
363
|
+
to_compress.append(msg)
|
|
364
|
+
|
|
365
|
+
logger.info(
|
|
366
|
+
f"Separated: {len(preserved)} preserved, {len(to_compress)} to compress "
|
|
367
|
+
f"({len(tool_pairs)} tool pairs, {len(protected_pairs)} protected, "
|
|
368
|
+
f"{len(orphaned_tool_use_indices)} orphaned tool_use, "
|
|
369
|
+
f"{preserve_count} recent)"
|
|
370
|
+
)
|
|
371
|
+
return preserved, to_compress
|
|
372
|
+
|
|
373
|
+
def _find_tool_pairs(self, messages: List[LLMMessage]) -> tuple[List[List[int]], List[int]]:
|
|
374
|
+
"""Find tool_use/tool_result pairs in messages.
|
|
375
|
+
|
|
376
|
+
Handles both:
|
|
377
|
+
- New format: assistant.tool_calls + tool role messages
|
|
378
|
+
- Legacy format: tool_use blocks in assistant content + tool_result blocks in user content
|
|
379
|
+
|
|
380
|
+
Returns:
|
|
381
|
+
Tuple of (pairs, orphaned_tool_use_indices)
|
|
382
|
+
- pairs: List of [assistant_index, tool_response_index] for matched pairs
|
|
383
|
+
- orphaned_tool_use_indices: List of message indices with unmatched tool_use
|
|
384
|
+
"""
|
|
385
|
+
pairs = []
|
|
386
|
+
pending_tool_uses = {} # tool_id -> message_index
|
|
387
|
+
|
|
388
|
+
for i, msg in enumerate(messages):
|
|
389
|
+
# New format: assistant with tool_calls field
|
|
390
|
+
if msg.role == "assistant" and hasattr(msg, "tool_calls") and msg.tool_calls:
|
|
391
|
+
for tc in msg.tool_calls:
|
|
392
|
+
tool_id = tc.get("id") if isinstance(tc, dict) else getattr(tc, "id", None)
|
|
393
|
+
if tool_id:
|
|
394
|
+
pending_tool_uses[tool_id] = i
|
|
395
|
+
|
|
396
|
+
# Legacy format: assistant with tool_use blocks in content
|
|
397
|
+
elif msg.role == "assistant" and isinstance(msg.content, list):
|
|
398
|
+
for block in msg.content:
|
|
399
|
+
btype = self._get_block_attr(block, "type")
|
|
400
|
+
if btype == "tool_use":
|
|
401
|
+
tool_id = self._get_block_attr(block, "id")
|
|
402
|
+
if tool_id:
|
|
403
|
+
pending_tool_uses[tool_id] = i
|
|
404
|
+
|
|
405
|
+
# New format: tool role message
|
|
406
|
+
elif msg.role == "tool" and hasattr(msg, "tool_call_id") and msg.tool_call_id:
|
|
407
|
+
tool_call_id = msg.tool_call_id
|
|
408
|
+
if tool_call_id in pending_tool_uses:
|
|
409
|
+
assistant_idx = pending_tool_uses[tool_call_id]
|
|
410
|
+
pairs.append([assistant_idx, i])
|
|
411
|
+
del pending_tool_uses[tool_call_id]
|
|
412
|
+
|
|
413
|
+
# Legacy format: user with tool_result blocks in content
|
|
414
|
+
elif msg.role == "user" and isinstance(msg.content, list):
|
|
415
|
+
for block in msg.content:
|
|
416
|
+
btype = self._get_block_attr(block, "type")
|
|
417
|
+
if btype == "tool_result":
|
|
418
|
+
tool_use_id = self._get_block_attr(block, "tool_use_id")
|
|
419
|
+
if tool_use_id in pending_tool_uses:
|
|
420
|
+
assistant_idx = pending_tool_uses[tool_use_id]
|
|
421
|
+
pairs.append([assistant_idx, i])
|
|
422
|
+
del pending_tool_uses[tool_use_id]
|
|
423
|
+
|
|
424
|
+
# Remaining items in pending_tool_uses are orphaned (no matching result yet)
|
|
425
|
+
orphaned_indices = list(pending_tool_uses.values())
|
|
426
|
+
|
|
427
|
+
if orphaned_indices:
|
|
428
|
+
logger.debug(
|
|
429
|
+
f"Found {len(orphaned_indices)} orphaned tool_use without matching tool_result - "
|
|
430
|
+
f"these will be preserved to wait for results"
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
return pairs, orphaned_indices
|
|
434
|
+
|
|
435
|
+
def _find_protected_tool_pairs(
|
|
436
|
+
self, messages: List[LLMMessage], tool_pairs: List[List[int]]
|
|
437
|
+
) -> List[List[int]]:
|
|
438
|
+
"""Find tool pairs that use protected tools (must never be compressed).
|
|
439
|
+
|
|
440
|
+
Handles both new format (tool_calls field) and legacy format (tool_use blocks).
|
|
441
|
+
|
|
442
|
+
Args:
|
|
443
|
+
messages: All messages
|
|
444
|
+
tool_pairs: All tool_use/tool_result pairs
|
|
445
|
+
|
|
446
|
+
Returns:
|
|
447
|
+
List of protected tool pairs [assistant_index, tool_response_index]
|
|
448
|
+
"""
|
|
449
|
+
protected_pairs = []
|
|
450
|
+
|
|
451
|
+
for assistant_idx, response_idx in tool_pairs:
|
|
452
|
+
msg = messages[assistant_idx]
|
|
453
|
+
|
|
454
|
+
# New format: check tool_calls field
|
|
455
|
+
if msg.role == "assistant" and hasattr(msg, "tool_calls") and msg.tool_calls:
|
|
456
|
+
for tc in msg.tool_calls:
|
|
457
|
+
if isinstance(tc, dict):
|
|
458
|
+
tool_name = tc.get("function", {}).get("name", "")
|
|
459
|
+
else:
|
|
460
|
+
tool_name = (
|
|
461
|
+
getattr(tc.function, "name", "") if hasattr(tc, "function") else ""
|
|
462
|
+
)
|
|
463
|
+
if tool_name in self.PROTECTED_TOOLS:
|
|
464
|
+
protected_pairs.append([assistant_idx, response_idx])
|
|
465
|
+
logger.debug(
|
|
466
|
+
f"Protected tool '{tool_name}' at indices [{assistant_idx}, {response_idx}] - will be preserved"
|
|
467
|
+
)
|
|
468
|
+
break
|
|
469
|
+
|
|
470
|
+
# Legacy format: check tool_use blocks in content
|
|
471
|
+
elif msg.role == "assistant" and isinstance(msg.content, list):
|
|
472
|
+
for block in msg.content:
|
|
473
|
+
btype = self._get_block_attr(block, "type")
|
|
474
|
+
if btype == "tool_use":
|
|
475
|
+
tool_name = self._get_block_attr(block, "name")
|
|
476
|
+
if tool_name in self.PROTECTED_TOOLS:
|
|
477
|
+
protected_pairs.append([assistant_idx, response_idx])
|
|
478
|
+
logger.debug(
|
|
479
|
+
f"Protected tool '{tool_name}' at indices [{assistant_idx}, {response_idx}] - will be preserved"
|
|
480
|
+
)
|
|
481
|
+
break
|
|
482
|
+
|
|
483
|
+
return protected_pairs
|
|
484
|
+
|
|
485
|
+
def _get_block_attr(self, block, attr: str):
|
|
486
|
+
"""Get attribute from block (supports dict and object)."""
|
|
487
|
+
if isinstance(block, dict):
|
|
488
|
+
return block.get(attr)
|
|
489
|
+
return getattr(block, attr, None)
|
|
490
|
+
|
|
491
|
+
def _format_messages_for_summary(self, messages: List[LLMMessage]) -> str:
|
|
492
|
+
"""Format messages for inclusion in summary prompt.
|
|
493
|
+
|
|
494
|
+
Args:
|
|
495
|
+
messages: Messages to format
|
|
496
|
+
|
|
497
|
+
Returns:
|
|
498
|
+
Formatted string
|
|
499
|
+
"""
|
|
500
|
+
formatted = []
|
|
501
|
+
for i, msg in enumerate(messages, 1):
|
|
502
|
+
role = msg.role.upper()
|
|
503
|
+
content = self._extract_text_content(msg)
|
|
504
|
+
formatted.append(f"[{i}] {role}: {content}")
|
|
505
|
+
|
|
506
|
+
return "\n\n".join(formatted)
|
|
507
|
+
|
|
508
|
+
def _extract_text_content(self, message: LLMMessage) -> str:
|
|
509
|
+
"""Extract text content from message for token estimation.
|
|
510
|
+
|
|
511
|
+
Uses centralized extract_text from content_utils.
|
|
512
|
+
|
|
513
|
+
Args:
|
|
514
|
+
message: Message to extract from
|
|
515
|
+
|
|
516
|
+
Returns:
|
|
517
|
+
Text content
|
|
518
|
+
"""
|
|
519
|
+
# Use centralized extraction
|
|
520
|
+
text = extract_text(message.content)
|
|
521
|
+
|
|
522
|
+
# For token estimation, also include tool call info as string representation
|
|
523
|
+
if hasattr(message, "tool_calls") and message.tool_calls:
|
|
524
|
+
text += " " + str(message.tool_calls)
|
|
525
|
+
|
|
526
|
+
return text if text else str(message.content)
|
|
527
|
+
|
|
528
|
+
def _estimate_tokens(self, messages: List[LLMMessage]) -> int:
|
|
529
|
+
"""Estimate token count for messages.
|
|
530
|
+
|
|
531
|
+
Args:
|
|
532
|
+
messages: Messages to count
|
|
533
|
+
|
|
534
|
+
Returns:
|
|
535
|
+
Estimated token count
|
|
536
|
+
"""
|
|
537
|
+
# Improved estimation: account for message structure and content
|
|
538
|
+
total_chars = 0
|
|
539
|
+
for msg in messages:
|
|
540
|
+
# Add overhead for message structure (role, type fields, etc.)
|
|
541
|
+
total_chars += 20 # ~5 tokens for structure
|
|
542
|
+
|
|
543
|
+
# Extract and count content
|
|
544
|
+
content = self._extract_text_content(msg)
|
|
545
|
+
total_chars += len(content)
|
|
546
|
+
|
|
547
|
+
# For complex content (lists), add overhead for JSON structure
|
|
548
|
+
if isinstance(msg.content, list):
|
|
549
|
+
# Each block has type, id, etc. fields
|
|
550
|
+
total_chars += len(msg.content) * 30 # ~7 tokens per block overhead
|
|
551
|
+
|
|
552
|
+
# More accurate ratio: ~3.5 characters per token for mixed content
|
|
553
|
+
# (English text is ~4 chars/token, code/JSON is ~3 chars/token)
|
|
554
|
+
return int(total_chars / 3.5)
|