illusion-code 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- illusion/__init__.py +24 -0
- illusion/__main__.py +15 -0
- illusion/_frontend/dist/index.mjs +39208 -0
- illusion/_frontend/package.json +27 -0
- illusion/_frontend/src/App.tsx +624 -0
- illusion/_frontend/src/components/CommandPicker.tsx +98 -0
- illusion/_frontend/src/components/Composer.tsx +55 -0
- illusion/_frontend/src/components/ComposerController.tsx +128 -0
- illusion/_frontend/src/components/ConversationView.tsx +750 -0
- illusion/_frontend/src/components/Footer.tsx +25 -0
- illusion/_frontend/src/components/MarkdownContent.tsx +537 -0
- illusion/_frontend/src/components/MarkdownTable.tsx +245 -0
- illusion/_frontend/src/components/ModalHost.tsx +425 -0
- illusion/_frontend/src/components/MultilineTextInput.tsx +250 -0
- illusion/_frontend/src/components/PromptInput.tsx +64 -0
- illusion/_frontend/src/components/SelectModal.tsx +78 -0
- illusion/_frontend/src/components/SidePanel.tsx +175 -0
- illusion/_frontend/src/components/Spinner.tsx +77 -0
- illusion/_frontend/src/components/StatusBar.tsx +142 -0
- illusion/_frontend/src/components/SwarmPanel.tsx +141 -0
- illusion/_frontend/src/components/TodoPanel.tsx +126 -0
- illusion/_frontend/src/components/ToolCallDisplay.tsx +202 -0
- illusion/_frontend/src/components/TranscriptPane.tsx +79 -0
- illusion/_frontend/src/components/WelcomeBanner.tsx +37 -0
- illusion/_frontend/src/hooks/useBackendSession.ts +468 -0
- illusion/_frontend/src/hooks/useTerminalSize.ts +9 -0
- illusion/_frontend/src/i18n.ts +78 -0
- illusion/_frontend/src/index.tsx +42 -0
- illusion/_frontend/src/theme/ThemeContext.tsx +19 -0
- illusion/_frontend/src/theme/builtinThemes.ts +89 -0
- illusion/_frontend/src/types.ts +110 -0
- illusion/_frontend/src/utils/markdown.ts +33 -0
- illusion/_frontend/src/utils/thinking.ts +191 -0
- illusion/_frontend/tsconfig.json +13 -0
- illusion/_web_dist/assets/index-BseIw-ik.css +10 -0
- illusion/_web_dist/assets/index-C_0ZWMuW.js +82 -0
- illusion/_web_dist/index.html +16 -0
- illusion/api/__init__.py +36 -0
- illusion/api/client.py +568 -0
- illusion/api/codex_client.py +563 -0
- illusion/api/compat.py +138 -0
- illusion/api/effort.py +128 -0
- illusion/api/errors.py +57 -0
- illusion/api/openai_client.py +819 -0
- illusion/api/provider.py +148 -0
- illusion/api/registry.py +479 -0
- illusion/api/usage.py +45 -0
- illusion/auth/__init__.py +50 -0
- illusion/auth/copilot.py +419 -0
- illusion/auth/external.py +612 -0
- illusion/auth/flows.py +58 -0
- illusion/auth/manager.py +214 -0
- illusion/auth/storage.py +372 -0
- illusion/bridge/__init__.py +38 -0
- illusion/bridge/manager.py +190 -0
- illusion/bridge/session_runner.py +84 -0
- illusion/bridge/types.py +113 -0
- illusion/bridge/work_secret.py +131 -0
- illusion/cli.py +1228 -0
- illusion/commands/__init__.py +32 -0
- illusion/commands/registry.py +1934 -0
- illusion/config/__init__.py +39 -0
- illusion/config/i18n.py +522 -0
- illusion/config/paths.py +259 -0
- illusion/config/settings.py +564 -0
- illusion/coordinator/__init__.py +41 -0
- illusion/coordinator/agent_definitions.py +1093 -0
- illusion/coordinator/coordinator_mode.py +127 -0
- illusion/engine/__init__.py +95 -0
- illusion/engine/cost_tracker.py +55 -0
- illusion/engine/messages.py +369 -0
- illusion/engine/query.py +632 -0
- illusion/engine/query_engine.py +343 -0
- illusion/engine/stream_events.py +169 -0
- illusion/hooks/__init__.py +67 -0
- illusion/hooks/events.py +43 -0
- illusion/hooks/executor.py +397 -0
- illusion/hooks/hot_reload.py +74 -0
- illusion/hooks/loader.py +133 -0
- illusion/hooks/schemas.py +121 -0
- illusion/hooks/types.py +86 -0
- illusion/mcp/__init__.py +104 -0
- illusion/mcp/client.py +377 -0
- illusion/mcp/config.py +140 -0
- illusion/mcp/types.py +175 -0
- illusion/memory/__init__.py +36 -0
- illusion/memory/manager.py +94 -0
- illusion/memory/memdir.py +58 -0
- illusion/memory/paths.py +57 -0
- illusion/memory/scan.py +120 -0
- illusion/memory/search.py +83 -0
- illusion/memory/types.py +43 -0
- illusion/output_styles/__init__.py +15 -0
- illusion/output_styles/loader.py +64 -0
- illusion/permissions/__init__.py +39 -0
- illusion/permissions/checker.py +174 -0
- illusion/permissions/modes.py +38 -0
- illusion/platforms.py +148 -0
- illusion/plugins/__init__.py +71 -0
- illusion/plugins/bundled/__init__.py +0 -0
- illusion/plugins/installer.py +59 -0
- illusion/plugins/loader.py +301 -0
- illusion/plugins/schemas.py +51 -0
- illusion/plugins/types.py +56 -0
- illusion/prompts/__init__.py +29 -0
- illusion/prompts/claudemd.py +74 -0
- illusion/prompts/context.py +187 -0
- illusion/prompts/environment.py +189 -0
- illusion/prompts/system_prompt.py +155 -0
- illusion/py.typed +0 -0
- illusion/sandbox/__init__.py +29 -0
- illusion/sandbox/adapter.py +174 -0
- illusion/services/__init__.py +59 -0
- illusion/services/compact/__init__.py +1015 -0
- illusion/services/cron.py +338 -0
- illusion/services/cron_scheduler.py +715 -0
- illusion/services/file_history.py +258 -0
- illusion/services/lsp/__init__.py +455 -0
- illusion/services/session_storage.py +237 -0
- illusion/services/token_estimation.py +72 -0
- illusion/skills/__init__.py +60 -0
- illusion/skills/bundled/__init__.py +110 -0
- illusion/skills/bundled/content/batch.md +86 -0
- illusion/skills/bundled/content/coding-guidelines.md +70 -0
- illusion/skills/bundled/content/debug.md +38 -0
- illusion/skills/bundled/content/loop.md +82 -0
- illusion/skills/bundled/content/remember.md +105 -0
- illusion/skills/bundled/content/simplify.md +53 -0
- illusion/skills/bundled/content/skillify.md +113 -0
- illusion/skills/bundled/content/stuck.md +54 -0
- illusion/skills/bundled/content/update-config.md +329 -0
- illusion/skills/bundled/content/verify.md +74 -0
- illusion/skills/loader.py +219 -0
- illusion/skills/registry.py +40 -0
- illusion/skills/types.py +24 -0
- illusion/state/__init__.py +18 -0
- illusion/state/app_state.py +67 -0
- illusion/state/store.py +93 -0
- illusion/swarm/__init__.py +71 -0
- illusion/swarm/agent_executor.py +857 -0
- illusion/swarm/in_process.py +259 -0
- illusion/swarm/subprocess_backend.py +136 -0
- illusion/swarm/team_helpers.py +123 -0
- illusion/swarm/types.py +159 -0
- illusion/swarm/worktree.py +347 -0
- illusion/tasks/__init__.py +33 -0
- illusion/tasks/local_agent_task.py +42 -0
- illusion/tasks/local_shell_task.py +27 -0
- illusion/tasks/manager.py +377 -0
- illusion/tasks/stop_task.py +21 -0
- illusion/tasks/types.py +88 -0
- illusion/tools/__init__.py +126 -0
- illusion/tools/agent_tool.py +388 -0
- illusion/tools/ask_user_question_tool.py +186 -0
- illusion/tools/base.py +149 -0
- illusion/tools/bash_tool.py +413 -0
- illusion/tools/config_tool.py +90 -0
- illusion/tools/cron_tool.py +473 -0
- illusion/tools/enter_plan_mode_tool.py +147 -0
- illusion/tools/enter_worktree_tool.py +188 -0
- illusion/tools/exit_plan_mode_tool.py +69 -0
- illusion/tools/exit_worktree_tool.py +225 -0
- illusion/tools/file_edit_tool.py +283 -0
- illusion/tools/file_read_tool.py +294 -0
- illusion/tools/file_write_tool.py +184 -0
- illusion/tools/glob_tool.py +165 -0
- illusion/tools/grep_tool.py +190 -0
- illusion/tools/list_mcp_resources_tool.py +80 -0
- illusion/tools/lsp_tool.py +333 -0
- illusion/tools/mcp_auth_tool.py +100 -0
- illusion/tools/mcp_tool.py +75 -0
- illusion/tools/notebook_edit_tool.py +242 -0
- illusion/tools/powershell_tool.py +334 -0
- illusion/tools/read_mcp_resource_tool.py +63 -0
- illusion/tools/repl_tool.py +100 -0
- illusion/tools/send_message_tool.py +112 -0
- illusion/tools/shell_common.py +187 -0
- illusion/tools/skill_tool.py +86 -0
- illusion/tools/sleep_tool.py +62 -0
- illusion/tools/structured_output_tool.py +58 -0
- illusion/tools/task_create_tool.py +98 -0
- illusion/tools/task_get_tool.py +94 -0
- illusion/tools/task_list_tool.py +94 -0
- illusion/tools/task_output_tool.py +55 -0
- illusion/tools/task_stop_tool.py +52 -0
- illusion/tools/task_update_tool.py +224 -0
- illusion/tools/team_create_tool.py +236 -0
- illusion/tools/team_delete_tool.py +104 -0
- illusion/tools/todo_write_tool.py +198 -0
- illusion/tools/tool_search_tool.py +156 -0
- illusion/tools/web_fetch_tool.py +264 -0
- illusion/tools/web_search_tool.py +186 -0
- illusion/ui/__init__.py +23 -0
- illusion/ui/app.py +258 -0
- illusion/ui/backend_host.py +1180 -0
- illusion/ui/input.py +86 -0
- illusion/ui/output.py +363 -0
- illusion/ui/permission_dialog.py +47 -0
- illusion/ui/permission_store.py +99 -0
- illusion/ui/protocol.py +384 -0
- illusion/ui/react_launcher.py +280 -0
- illusion/ui/runtime.py +787 -0
- illusion/ui/textual_app.py +603 -0
- illusion/ui/web/__init__.py +10 -0
- illusion/ui/web/server.py +87 -0
- illusion/ui/web/ws_host.py +1197 -0
- illusion/utils/__init__.py +0 -0
- illusion/utils/ripgrep.py +299 -0
- illusion/utils/shell.py +248 -0
- illusion_code-0.1.0.dist-info/METADATA +1159 -0
- illusion_code-0.1.0.dist-info/RECORD +214 -0
- illusion_code-0.1.0.dist-info/WHEEL +4 -0
- illusion_code-0.1.0.dist-info/entry_points.txt +2 -0
- illusion_code-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,1015 @@
|
|
|
1
|
+
"""
|
|
2
|
+
会话压缩模块 — 微压缩、LLM 摘要和自动压缩
|
|
3
|
+
=============================================
|
|
4
|
+
|
|
5
|
+
本模块实现会话压缩功能,参考 Claude Code 的压缩系统:
|
|
6
|
+
- 微压缩(Microcompact):清除旧工具结果内容以廉价方式减少 Token 数量
|
|
7
|
+
- 完整压缩(Full Compact):调用 LLM 生成早期消息的结构化摘要
|
|
8
|
+
- 自动压缩(Auto-compact):当 Token 数量超过阈值时自动触发压缩
|
|
9
|
+
- 响应式压缩(Reactive Compact):API 返回 prompt-too-long 时触发压缩
|
|
10
|
+
- 上下文警告:接近阈值时通知用户
|
|
11
|
+
|
|
12
|
+
主要修复:
|
|
13
|
+
- 修复压缩后消息结构混乱(连续 user 消息导致 API 报错)
|
|
14
|
+
- 修复日志格式 bug(~d → ~%d)
|
|
15
|
+
- 添加压缩边界标记(Compact Boundary Marker)
|
|
16
|
+
- 添加图片剥离(压缩前移除图片数据)
|
|
17
|
+
- 添加 PTL 重试(prompt-too-long 时截断重试)
|
|
18
|
+
- 添加响应式压缩
|
|
19
|
+
- 添加上下文警告系统
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import logging
|
|
25
|
+
import re
|
|
26
|
+
from dataclasses import dataclass
|
|
27
|
+
from typing import Any
|
|
28
|
+
|
|
29
|
+
from illusion.engine.messages import (
|
|
30
|
+
ConversationMessage,
|
|
31
|
+
ContentBlock,
|
|
32
|
+
MediaBlock,
|
|
33
|
+
TextBlock,
|
|
34
|
+
ThinkingBlock,
|
|
35
|
+
ToolResultBlock,
|
|
36
|
+
ToolUseBlock,
|
|
37
|
+
)
|
|
38
|
+
from illusion.services.token_estimation import estimate_tokens
|
|
39
|
+
|
|
40
|
+
# 配置模块级日志记录器
|
|
41
|
+
log = logging.getLogger(__name__)
|
|
42
|
+
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
# 常量(来自 Claude Code microCompact.ts / autoCompact.ts)
|
|
45
|
+
# ---------------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
# 可压缩的工具列表
|
|
48
|
+
COMPACTABLE_TOOLS: frozenset[str] = frozenset({
|
|
49
|
+
"read_file",
|
|
50
|
+
"bash",
|
|
51
|
+
"grep",
|
|
52
|
+
"glob",
|
|
53
|
+
"web_search",
|
|
54
|
+
"web_fetch",
|
|
55
|
+
"edit_file",
|
|
56
|
+
"write_file",
|
|
57
|
+
})
|
|
58
|
+
|
|
59
|
+
# 微压缩清除后的占位符消息
|
|
60
|
+
TIME_BASED_MC_CLEARED_MESSAGE = "[Old tool result content cleared]"
|
|
61
|
+
|
|
62
|
+
# 自动压缩阈值
|
|
63
|
+
AUTOCOMPACT_BUFFER_TOKENS = 13_000 # 缓冲区 Token 数
|
|
64
|
+
WARNING_THRESHOLD_BUFFER_TOKENS = 20_000 # 警告阈值缓冲区
|
|
65
|
+
MAX_OUTPUT_TOKENS_FOR_SUMMARY = 20_000 # 摘要最大输出 Token 数
|
|
66
|
+
MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES = 3 # 最大连续失败次数
|
|
67
|
+
MANUAL_COMPACT_BUFFER_TOKENS = 3_000 # 手动压缩缓冲区
|
|
68
|
+
|
|
69
|
+
# 微压缩默认值
|
|
70
|
+
DEFAULT_KEEP_RECENT = 5 # 保留最近工具结果数量
|
|
71
|
+
DEFAULT_GAP_THRESHOLD_MINUTES = 60 # 时间间隔阈值(分钟)
|
|
72
|
+
DEFAULT_PRESERVE_RECENT = 6 # 默认保留最近消息数量
|
|
73
|
+
|
|
74
|
+
# Token 估算 padding(保守估计)
|
|
75
|
+
TOKEN_ESTIMATION_PADDING = 4 / 3
|
|
76
|
+
|
|
77
|
+
# 默认上下文窗口大小(按模型系列)
|
|
78
|
+
_DEFAULT_CONTEXT_WINDOW = 200_000
|
|
79
|
+
|
|
80
|
+
# PTL 重试最大次数
|
|
81
|
+
MAX_PTL_RETRIES = 3
|
|
82
|
+
|
|
83
|
+
# 压缩边界标记前缀
|
|
84
|
+
COMPACT_BOUNDARY_PREFIX = "[COMPACT_BOUNDARY]"
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
# ---------------------------------------------------------------------------
|
|
88
|
+
# Token 估算
|
|
89
|
+
# ---------------------------------------------------------------------------
|
|
90
|
+
|
|
91
|
+
def estimate_message_tokens(messages: list[ConversationMessage]) -> int:
|
|
92
|
+
"""估算会话消息的总 Token 数,包含 4/3 padding。"""
|
|
93
|
+
total = 0
|
|
94
|
+
for msg in messages:
|
|
95
|
+
for block in msg.content:
|
|
96
|
+
if isinstance(block, TextBlock):
|
|
97
|
+
total += estimate_tokens(block.text)
|
|
98
|
+
elif isinstance(block, ToolResultBlock):
|
|
99
|
+
if isinstance(block.content, str):
|
|
100
|
+
total += estimate_tokens(block.content)
|
|
101
|
+
elif isinstance(block.content, list):
|
|
102
|
+
for inner in block.content:
|
|
103
|
+
if isinstance(inner, TextBlock):
|
|
104
|
+
total += estimate_tokens(inner.text)
|
|
105
|
+
elif isinstance(inner, MediaBlock):
|
|
106
|
+
total += 2000 # 图片统一估算为 2000 tokens
|
|
107
|
+
elif isinstance(block, ToolUseBlock):
|
|
108
|
+
total += estimate_tokens(block.name)
|
|
109
|
+
total += estimate_tokens(str(block.input))
|
|
110
|
+
elif isinstance(block, ThinkingBlock):
|
|
111
|
+
total += estimate_tokens(block.thinking)
|
|
112
|
+
if block.signature:
|
|
113
|
+
total += estimate_tokens(block.signature)
|
|
114
|
+
elif isinstance(block, MediaBlock):
|
|
115
|
+
total += 2000 # 图片统一估算为 2000 tokens
|
|
116
|
+
return int(total * TOKEN_ESTIMATION_PADDING)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def estimate_conversation_tokens(messages: list[ConversationMessage]) -> int:
|
|
120
|
+
"""保持向后兼容性的别名。"""
|
|
121
|
+
return estimate_message_tokens(messages)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
# ---------------------------------------------------------------------------
|
|
125
|
+
# 图片剥离 — 压缩前移除图片数据以减少 Token
|
|
126
|
+
# ---------------------------------------------------------------------------
|
|
127
|
+
|
|
128
|
+
def strip_images_from_messages(
|
|
129
|
+
messages: list[ConversationMessage],
|
|
130
|
+
) -> list[ConversationMessage]:
|
|
131
|
+
"""将消息中的图片和文档替换为文本占位符。
|
|
132
|
+
|
|
133
|
+
在发送给摘要 LLM 之前调用,避免浪费 Token 在 base64 图片数据上。
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
messages: 原始消息列表
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
剥离图片后的新消息列表(不修改原始消息)
|
|
140
|
+
"""
|
|
141
|
+
result: list[ConversationMessage] = []
|
|
142
|
+
for msg in messages:
|
|
143
|
+
new_blocks: list[ContentBlock] = []
|
|
144
|
+
for block in msg.content:
|
|
145
|
+
if isinstance(block, MediaBlock):
|
|
146
|
+
new_blocks.append(TextBlock(
|
|
147
|
+
text=f"[image: {block.file_path}, {block.media_type}]"
|
|
148
|
+
))
|
|
149
|
+
elif isinstance(block, ToolResultBlock):
|
|
150
|
+
if isinstance(block.content, list):
|
|
151
|
+
stripped: list[ContentBlock] = []
|
|
152
|
+
for inner in block.content:
|
|
153
|
+
if isinstance(inner, MediaBlock):
|
|
154
|
+
stripped.append(TextBlock(
|
|
155
|
+
text=f"[image: {inner.file_path}, {inner.media_type}]"
|
|
156
|
+
))
|
|
157
|
+
else:
|
|
158
|
+
stripped.append(inner)
|
|
159
|
+
new_blocks.append(ToolResultBlock(
|
|
160
|
+
tool_use_id=block.tool_use_id,
|
|
161
|
+
content=stripped,
|
|
162
|
+
is_error=block.is_error,
|
|
163
|
+
))
|
|
164
|
+
else:
|
|
165
|
+
new_blocks.append(block)
|
|
166
|
+
else:
|
|
167
|
+
new_blocks.append(block)
|
|
168
|
+
result.append(ConversationMessage(role=msg.role, content=new_blocks))
|
|
169
|
+
return result
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
# ---------------------------------------------------------------------------
|
|
173
|
+
# 微压缩 — 清除旧工具结果以廉价方式减少 Token
|
|
174
|
+
# ---------------------------------------------------------------------------
|
|
175
|
+
|
|
176
|
+
def _collect_compactable_tool_ids(messages: list[ConversationMessage]) -> list[str]:
|
|
177
|
+
"""遍历消息并收集可压缩的工具使用 ID。"""
|
|
178
|
+
ids: list[str] = []
|
|
179
|
+
for msg in messages:
|
|
180
|
+
if msg.role != "assistant":
|
|
181
|
+
continue
|
|
182
|
+
for block in msg.content:
|
|
183
|
+
if isinstance(block, ToolUseBlock) and block.name in COMPACTABLE_TOOLS:
|
|
184
|
+
ids.append(block.id)
|
|
185
|
+
return ids
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def microcompact_messages(
|
|
189
|
+
messages: list[ConversationMessage],
|
|
190
|
+
*,
|
|
191
|
+
keep_recent: int = DEFAULT_KEEP_RECENT,
|
|
192
|
+
) -> tuple[list[ConversationMessage], int]:
|
|
193
|
+
"""清除旧的可压缩工具结果,保留最近的 keep_recent 个。
|
|
194
|
+
|
|
195
|
+
这是廉价的第一轮压缩 — 无需调用 LLM。工具结果内容
|
|
196
|
+
将被替换为 TIME_BASED_MC_CLEARED_MESSAGE。
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
(messages, tokens_saved) — 消息在原地修改以提高效率。
|
|
200
|
+
"""
|
|
201
|
+
keep_recent = max(1, keep_recent) # 永远不清除所有结果
|
|
202
|
+
all_ids = _collect_compactable_tool_ids(messages)
|
|
203
|
+
|
|
204
|
+
if len(all_ids) <= keep_recent:
|
|
205
|
+
return messages, 0
|
|
206
|
+
|
|
207
|
+
# 计算需要保留和清除的 ID 集合
|
|
208
|
+
keep_set = set(all_ids[-keep_recent:])
|
|
209
|
+
clear_set = set(all_ids) - keep_set
|
|
210
|
+
|
|
211
|
+
tokens_saved = 0
|
|
212
|
+
for msg in messages:
|
|
213
|
+
if msg.role != "user":
|
|
214
|
+
continue
|
|
215
|
+
new_content: list[ContentBlock] = []
|
|
216
|
+
for block in msg.content:
|
|
217
|
+
if (
|
|
218
|
+
isinstance(block, ToolResultBlock)
|
|
219
|
+
and block.tool_use_id in clear_set
|
|
220
|
+
):
|
|
221
|
+
old_content = block.content
|
|
222
|
+
if isinstance(old_content, str) and old_content == TIME_BASED_MC_CLEARED_MESSAGE:
|
|
223
|
+
new_content.append(block)
|
|
224
|
+
continue
|
|
225
|
+
# 计算节省的 Token 数
|
|
226
|
+
if isinstance(old_content, str):
|
|
227
|
+
tokens_saved += estimate_tokens(old_content)
|
|
228
|
+
elif isinstance(old_content, list):
|
|
229
|
+
for inner in old_content:
|
|
230
|
+
if isinstance(inner, TextBlock):
|
|
231
|
+
tokens_saved += estimate_tokens(inner.text)
|
|
232
|
+
elif isinstance(inner, MediaBlock):
|
|
233
|
+
tokens_saved += 2000
|
|
234
|
+
new_content.append(
|
|
235
|
+
ToolResultBlock(
|
|
236
|
+
tool_use_id=block.tool_use_id,
|
|
237
|
+
content=TIME_BASED_MC_CLEARED_MESSAGE,
|
|
238
|
+
is_error=block.is_error,
|
|
239
|
+
)
|
|
240
|
+
)
|
|
241
|
+
else:
|
|
242
|
+
new_content.append(block)
|
|
243
|
+
msg.content = new_content
|
|
244
|
+
|
|
245
|
+
if tokens_saved > 0:
|
|
246
|
+
log.info("Microcompact cleared %d tool results, saved ~%d tokens", len(clear_set), tokens_saved)
|
|
247
|
+
|
|
248
|
+
return messages, tokens_saved
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
# ---------------------------------------------------------------------------
|
|
252
|
+
# 消息分组 — 按 API 轮次分组(assistant + 对应的 user tool_result)
|
|
253
|
+
# ---------------------------------------------------------------------------
|
|
254
|
+
|
|
255
|
+
def _group_messages_by_turn(
|
|
256
|
+
messages: list[ConversationMessage],
|
|
257
|
+
) -> list[list[ConversationMessage]]:
|
|
258
|
+
"""将消息按 API 轮次分组。
|
|
259
|
+
|
|
260
|
+
每组包含一条 assistant 消息和紧随其后的 user 消息(工具结果)。
|
|
261
|
+
开头的 user 消息(无前置 assistant)单独成组。
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
消息组的列表
|
|
265
|
+
"""
|
|
266
|
+
groups: list[list[ConversationMessage]] = []
|
|
267
|
+
current_group: list[ConversationMessage] = []
|
|
268
|
+
|
|
269
|
+
for msg in messages:
|
|
270
|
+
if msg.role == "assistant" and current_group:
|
|
271
|
+
# 新的 assistant 消息开始新的一组
|
|
272
|
+
groups.append(current_group)
|
|
273
|
+
current_group = [msg]
|
|
274
|
+
else:
|
|
275
|
+
current_group.append(msg)
|
|
276
|
+
|
|
277
|
+
if current_group:
|
|
278
|
+
groups.append(current_group)
|
|
279
|
+
|
|
280
|
+
return groups
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
# ---------------------------------------------------------------------------
|
|
284
|
+
# 安全分割 — 确保 tool_use/tool_result 对不被切断
|
|
285
|
+
# ---------------------------------------------------------------------------
|
|
286
|
+
|
|
287
|
+
def _find_safe_split_index(
|
|
288
|
+
messages: list[ConversationMessage],
|
|
289
|
+
preserve_recent: int,
|
|
290
|
+
) -> int:
|
|
291
|
+
"""找到安全的分割索引,确保 tool_use/tool_result 对不被切断。
|
|
292
|
+
|
|
293
|
+
从 preserve_recent 位置向前搜索,找到一个不切断工具调用对的分割点。
|
|
294
|
+
如果 newer 部分的 user 消息包含 tool_result,则其对应的 assistant
|
|
295
|
+
消息(含 tool_use)也必须包含在 newer 部分。
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
messages: 完整消息列表
|
|
299
|
+
preserve_recent: 期望保留的最近消息数量
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
安全的分割索引(older = messages[:split], newer = messages[split:])
|
|
303
|
+
"""
|
|
304
|
+
n = len(messages)
|
|
305
|
+
if n <= preserve_recent:
|
|
306
|
+
return 0
|
|
307
|
+
|
|
308
|
+
split = n - preserve_recent
|
|
309
|
+
|
|
310
|
+
# 收集 newer 部分中所有 tool_result 的 tool_use_id
|
|
311
|
+
newer_tool_result_ids: set[str] = set()
|
|
312
|
+
for msg in messages[split:]:
|
|
313
|
+
if msg.role == "user":
|
|
314
|
+
for block in msg.content:
|
|
315
|
+
if isinstance(block, ToolResultBlock):
|
|
316
|
+
newer_tool_result_ids.add(block.tool_use_id)
|
|
317
|
+
|
|
318
|
+
if not newer_tool_result_ids:
|
|
319
|
+
# newer 中没有 tool_result,直接分割即可
|
|
320
|
+
return split
|
|
321
|
+
|
|
322
|
+
# 向前搜索,找到所有对应的 tool_use 所在的 assistant 消息
|
|
323
|
+
# 确保这些 assistant 消息也在 newer 部分
|
|
324
|
+
for i in range(split - 1, -1, -1):
|
|
325
|
+
msg = messages[i]
|
|
326
|
+
if msg.role == "assistant":
|
|
327
|
+
for block in msg.content:
|
|
328
|
+
if isinstance(block, ToolUseBlock) and block.id in newer_tool_result_ids:
|
|
329
|
+
# 这个 tool_use 在 older 部分,需要将其纳入 newer
|
|
330
|
+
newer_tool_result_ids.discard(block.id)
|
|
331
|
+
if not newer_tool_result_ids:
|
|
332
|
+
# 所有 tool_use 都已找到
|
|
333
|
+
# split 应该包含这条 assistant 消息
|
|
334
|
+
return i
|
|
335
|
+
|
|
336
|
+
# 如果还有未找到的 tool_use_id(不应该发生),保守返回 0
|
|
337
|
+
return 0
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def _remove_orphaned_tool_results(
|
|
341
|
+
messages: list[ConversationMessage],
|
|
342
|
+
) -> list[ConversationMessage]:
|
|
343
|
+
"""移除没有对应 tool_use 的孤立 tool_result 块。
|
|
344
|
+
|
|
345
|
+
压缩后可能存在 tool_result 但其对应的 tool_use 已被摘要移除,
|
|
346
|
+
这会导致 API 报错 "Message has tool role, but there was no previous
|
|
347
|
+
assistant message with a tool call!"。
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
messages: 消息列表
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
清理后的消息列表
|
|
354
|
+
"""
|
|
355
|
+
# 收集所有 tool_use 的 ID
|
|
356
|
+
tool_use_ids: set[str] = set()
|
|
357
|
+
for msg in messages:
|
|
358
|
+
if msg.role == "assistant":
|
|
359
|
+
for block in msg.content:
|
|
360
|
+
if isinstance(block, ToolUseBlock):
|
|
361
|
+
tool_use_ids.add(block.id)
|
|
362
|
+
|
|
363
|
+
# 检查每个 tool_result 是否有对应的 tool_use
|
|
364
|
+
result: list[ConversationMessage] = []
|
|
365
|
+
for msg in messages:
|
|
366
|
+
if msg.role != "user":
|
|
367
|
+
result.append(msg)
|
|
368
|
+
continue
|
|
369
|
+
|
|
370
|
+
# 检查 user 消息中的 tool_result
|
|
371
|
+
has_orphan = False
|
|
372
|
+
for block in msg.content:
|
|
373
|
+
if isinstance(block, ToolResultBlock) and block.tool_use_id not in tool_use_ids:
|
|
374
|
+
has_orphan = True
|
|
375
|
+
break
|
|
376
|
+
|
|
377
|
+
if not has_orphan:
|
|
378
|
+
result.append(msg)
|
|
379
|
+
continue
|
|
380
|
+
|
|
381
|
+
# 过滤掉孤立的 tool_result
|
|
382
|
+
new_blocks: list[ContentBlock] = []
|
|
383
|
+
for block in msg.content:
|
|
384
|
+
if isinstance(block, ToolResultBlock) and block.tool_use_id not in tool_use_ids:
|
|
385
|
+
log.warning(
|
|
386
|
+
"Removing orphaned tool_result (tool_use_id=%s) — "
|
|
387
|
+
"corresponding tool_use was compacted away",
|
|
388
|
+
block.tool_use_id,
|
|
389
|
+
)
|
|
390
|
+
continue
|
|
391
|
+
new_blocks.append(block)
|
|
392
|
+
|
|
393
|
+
if new_blocks:
|
|
394
|
+
result.append(ConversationMessage(role=msg.role, content=new_blocks))
|
|
395
|
+
else:
|
|
396
|
+
# 整条消息都是孤立的 tool_result,跳过
|
|
397
|
+
log.warning("Dropping user message that contained only orphaned tool_results")
|
|
398
|
+
|
|
399
|
+
return result
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
# ---------------------------------------------------------------------------
|
|
403
|
+
# 完整压缩 — 基于 LLM 的摘要
|
|
404
|
+
# ---------------------------------------------------------------------------
|
|
405
|
+
|
|
406
|
+
# 不使用工具的前导文本
|
|
407
|
+
NO_TOOLS_PREAMBLE = """\
|
|
408
|
+
CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.
|
|
409
|
+
|
|
410
|
+
- Do NOT use read_file, bash, grep, glob, edit_file, write_file, or ANY other tool.
|
|
411
|
+
- You already have all the context you need in the conversation above.
|
|
412
|
+
- Tool calls will be REJECTED and will waste your only turn — you will fail the task.
|
|
413
|
+
- Your entire response must be plain text: an <analysis> block followed by a <summary> block.
|
|
414
|
+
|
|
415
|
+
"""
|
|
416
|
+
|
|
417
|
+
# 基础压缩提示词
|
|
418
|
+
BASE_COMPACT_PROMPT = """\
|
|
419
|
+
Your task is to create a detailed summary of the conversation so far. This summary will replace the earlier messages, so it must capture all important information.
|
|
420
|
+
|
|
421
|
+
First, draft your analysis inside <analysis> tags. Walk through the conversation chronologically and extract:
|
|
422
|
+
- Every user request and intent (explicit and implicit)
|
|
423
|
+
- The approach taken and technical decisions made
|
|
424
|
+
- Specific code, files, and configurations discussed (with paths and line numbers where available)
|
|
425
|
+
- All errors encountered and how they were fixed
|
|
426
|
+
- Any user feedback or corrections
|
|
427
|
+
|
|
428
|
+
Then, produce a structured summary inside <summary> tags with these sections:
|
|
429
|
+
|
|
430
|
+
1. **Primary Request and Intent**: All user requests in full detail, including nuances and constraints.
|
|
431
|
+
2. **Key Technical Concepts**: Technologies, frameworks, patterns, and conventions discussed.
|
|
432
|
+
3. **Files and Code Sections**: Every file examined or modified, with specific code snippets and line numbers.
|
|
433
|
+
4. **Errors and Fixes**: Every error encountered, its cause, and how it was resolved.
|
|
434
|
+
5. **Problem Solving**: Problems solved and approaches that worked vs. didn't work.
|
|
435
|
+
6. **All User Messages**: Non-tool-result user messages (preserve exact wording for context).
|
|
436
|
+
7. **Pending Tasks**: Explicitly requested work that hasn't been completed yet.
|
|
437
|
+
8. **Current Work**: Detailed description of the last task being worked on before compaction.
|
|
438
|
+
9. **Optional Next Step**: The single most logical next step, directly aligned with the user's recent request.
|
|
439
|
+
"""
|
|
440
|
+
|
|
441
|
+
# 不使用工具的结尾文本
|
|
442
|
+
NO_TOOLS_TRAILER = """
|
|
443
|
+
REMINDER: Do NOT call any tools. Respond with plain text only — an <analysis> block followed by a <summary> block. Tool calls will be rejected and you will fail the task."""
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def get_compact_prompt(custom_instructions: str | None = None) -> str:
|
|
447
|
+
"""构建发送给模型的完整压缩提示词。"""
|
|
448
|
+
prompt = NO_TOOLS_PREAMBLE + BASE_COMPACT_PROMPT
|
|
449
|
+
if custom_instructions and custom_instructions.strip():
|
|
450
|
+
prompt += f"\n\nAdditional Instructions:\n{custom_instructions}"
|
|
451
|
+
prompt += NO_TOOLS_TRAILER
|
|
452
|
+
return prompt
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
def format_compact_summary(raw_summary: str) -> str:
|
|
456
|
+
"""移除 <analysis> 草稿并提取 <summary> 内容。"""
|
|
457
|
+
text = re.sub(r"<analysis>[\s\S]*?</analysis>", "", raw_summary)
|
|
458
|
+
m = re.search(r"<summary>([\s\S]*?)</summary>", text)
|
|
459
|
+
if m:
|
|
460
|
+
text = text.replace(m.group(0), f"Summary:\n{m.group(1).strip()}")
|
|
461
|
+
# 清理多余空行
|
|
462
|
+
text = re.sub(r"\n\n+", "\n\n", text)
|
|
463
|
+
return text.strip()
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
def build_compact_summary_message(
|
|
467
|
+
summary: str,
|
|
468
|
+
*,
|
|
469
|
+
suppress_follow_up: bool = False,
|
|
470
|
+
recent_preserved: bool = False,
|
|
471
|
+
) -> str:
|
|
472
|
+
"""创建替换压缩历史的消息。"""
|
|
473
|
+
from illusion.config.i18n import t
|
|
474
|
+
|
|
475
|
+
formatted = format_compact_summary(summary)
|
|
476
|
+
text = f"{t('compact_summary_prefix')}\n\n{formatted}"
|
|
477
|
+
if recent_preserved:
|
|
478
|
+
text += f"\n\n{t('compact_recent_preserved')}"
|
|
479
|
+
if suppress_follow_up:
|
|
480
|
+
text += t("compact_suppress_followup")
|
|
481
|
+
return text
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
# ---------------------------------------------------------------------------
|
|
485
|
+
# 压缩边界标记
|
|
486
|
+
# ---------------------------------------------------------------------------
|
|
487
|
+
|
|
488
|
+
def create_compact_boundary_marker() -> ConversationMessage:
|
|
489
|
+
"""创建压缩边界标记消息。
|
|
490
|
+
|
|
491
|
+
边界标记是一条特殊的 assistant 消息,用于标识压缩发生的位置。
|
|
492
|
+
这确保了压缩后的消息列表不会以两条连续的 user 消息开头。
|
|
493
|
+
|
|
494
|
+
Returns:
|
|
495
|
+
边界标记的 ConversationMessage
|
|
496
|
+
"""
|
|
497
|
+
return ConversationMessage(
|
|
498
|
+
role="assistant",
|
|
499
|
+
content=[TextBlock(text=COMPACT_BOUNDARY_PREFIX)],
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
def is_compact_boundary_marker(msg: ConversationMessage) -> bool:
|
|
504
|
+
"""检查消息是否为压缩边界标记。"""
|
|
505
|
+
return (
|
|
506
|
+
msg.role == "assistant"
|
|
507
|
+
and len(msg.content) == 1
|
|
508
|
+
and isinstance(msg.content[0], TextBlock)
|
|
509
|
+
and msg.content[0].text.strip() == COMPACT_BOUNDARY_PREFIX
|
|
510
|
+
)
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
def get_messages_after_compact_boundary(
|
|
514
|
+
messages: list[ConversationMessage],
|
|
515
|
+
) -> list[ConversationMessage]:
|
|
516
|
+
"""获取最后一个压缩边界标记之后的消息。
|
|
517
|
+
|
|
518
|
+
如果没有边界标记,返回所有消息。
|
|
519
|
+
|
|
520
|
+
Returns:
|
|
521
|
+
边界标记之后的消息列表
|
|
522
|
+
"""
|
|
523
|
+
last_boundary = -1
|
|
524
|
+
for i, msg in enumerate(messages):
|
|
525
|
+
if is_compact_boundary_marker(msg):
|
|
526
|
+
last_boundary = i
|
|
527
|
+
if last_boundary >= 0:
|
|
528
|
+
return messages[last_boundary + 1:]
|
|
529
|
+
return messages
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
# ---------------------------------------------------------------------------
|
|
533
|
+
# 消息结构修复 — 确保压缩后消息角色交替正确
|
|
534
|
+
# ---------------------------------------------------------------------------
|
|
535
|
+
|
|
536
|
+
def _ensure_message_alternation(
|
|
537
|
+
messages: list[ConversationMessage],
|
|
538
|
+
) -> list[ConversationMessage]:
|
|
539
|
+
"""确保消息列表中 user/assistant 角色正确交替。
|
|
540
|
+
|
|
541
|
+
修复以下问题:
|
|
542
|
+
- 连续两条 user 消息之间插入空的 assistant 消息
|
|
543
|
+
- 连续两条 assistant 消息之间插入空的 user 消息
|
|
544
|
+
- 开头不是 user 消息时插入空的 user 消息
|
|
545
|
+
|
|
546
|
+
Args:
|
|
547
|
+
messages: 原始消息列表
|
|
548
|
+
|
|
549
|
+
Returns:
|
|
550
|
+
修复后的消息列表
|
|
551
|
+
"""
|
|
552
|
+
if not messages:
|
|
553
|
+
return messages
|
|
554
|
+
|
|
555
|
+
result: list[ConversationMessage] = []
|
|
556
|
+
|
|
557
|
+
# 确保第一条消息是 user 角色
|
|
558
|
+
if messages[0].role != "user":
|
|
559
|
+
from illusion.config.i18n import t
|
|
560
|
+
result.append(ConversationMessage.from_user_text(t("compact_conversation_start")))
|
|
561
|
+
|
|
562
|
+
for i, msg in enumerate(messages):
|
|
563
|
+
if not result:
|
|
564
|
+
result.append(msg)
|
|
565
|
+
continue
|
|
566
|
+
|
|
567
|
+
last_role = result[-1].role
|
|
568
|
+
current_role = msg.role
|
|
569
|
+
|
|
570
|
+
if last_role == current_role:
|
|
571
|
+
# 连续相同角色,需要插入间隔消息
|
|
572
|
+
if current_role == "user":
|
|
573
|
+
# 两条连续 user 消息之间插入空 assistant
|
|
574
|
+
result.append(ConversationMessage(
|
|
575
|
+
role="assistant",
|
|
576
|
+
content=[TextBlock(text="")],
|
|
577
|
+
))
|
|
578
|
+
else:
|
|
579
|
+
# 两条连续 assistant 消息之间插入空 user
|
|
580
|
+
result.append(ConversationMessage.from_user_text(""))
|
|
581
|
+
elif last_role == "assistant" and current_role == "user":
|
|
582
|
+
# 正常交替,无需修复
|
|
583
|
+
pass
|
|
584
|
+
|
|
585
|
+
result.append(msg)
|
|
586
|
+
|
|
587
|
+
return result
|
|
588
|
+
|
|
589
|
+
|
|
590
|
+
# ---------------------------------------------------------------------------
|
|
591
|
+
# 自动压缩跟踪
|
|
592
|
+
# ---------------------------------------------------------------------------
|
|
593
|
+
|
|
594
|
+
@dataclass
|
|
595
|
+
class AutoCompactState:
|
|
596
|
+
"""跨查询循环轮次持久的可变状态。"""
|
|
597
|
+
|
|
598
|
+
compacted: bool = False
|
|
599
|
+
turn_counter: int = 0
|
|
600
|
+
consecutive_failures: int = 0
|
|
601
|
+
last_compacted_at_turn: int = 0 # 上次压缩时的轮次
|
|
602
|
+
warning_suppressed: bool = False # 压缩后暂时抑制警告
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
# ---------------------------------------------------------------------------
|
|
606
|
+
# 上下文警告系统
|
|
607
|
+
# ---------------------------------------------------------------------------
|
|
608
|
+
|
|
609
|
+
@dataclass
|
|
610
|
+
class TokenWarningState:
|
|
611
|
+
"""上下文使用量的警告状态。"""
|
|
612
|
+
|
|
613
|
+
is_above_warning_threshold: bool = False # 接近阈值
|
|
614
|
+
is_above_autocompact_threshold: bool = False # 超过自动压缩阈值
|
|
615
|
+
is_at_blocking_limit: bool = False # 达到阻塞限制
|
|
616
|
+
estimated_tokens: int = 0 # 当前估算的 Token 数
|
|
617
|
+
threshold: int = 0 # 自动压缩阈值
|
|
618
|
+
context_window: int = 0 # 上下文窗口大小
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
def calculate_token_warning_state(
|
|
622
|
+
messages: list[ConversationMessage],
|
|
623
|
+
model: str,
|
|
624
|
+
*,
|
|
625
|
+
auto_compact_enabled: bool = True,
|
|
626
|
+
) -> TokenWarningState:
|
|
627
|
+
"""计算当前上下文使用量的警告状态。
|
|
628
|
+
|
|
629
|
+
Args:
|
|
630
|
+
messages: 当前消息列表
|
|
631
|
+
model: 模型名称
|
|
632
|
+
auto_compact_enabled: 是否启用了自动压缩
|
|
633
|
+
|
|
634
|
+
Returns:
|
|
635
|
+
TokenWarningState 警告状态
|
|
636
|
+
"""
|
|
637
|
+
estimated = estimate_message_tokens(messages)
|
|
638
|
+
context_window = get_context_window(model)
|
|
639
|
+
threshold = get_autocompact_threshold(model)
|
|
640
|
+
|
|
641
|
+
is_above_autocompact = estimated >= threshold
|
|
642
|
+
is_above_warning = estimated >= (threshold - WARNING_THRESHOLD_BUFFER_TOKENS)
|
|
643
|
+
# 仅当自动压缩关闭时才检查阻塞限制
|
|
644
|
+
is_at_blocking = (
|
|
645
|
+
not auto_compact_enabled
|
|
646
|
+
and estimated >= (context_window - MANUAL_COMPACT_BUFFER_TOKENS)
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
return TokenWarningState(
|
|
650
|
+
is_above_warning_threshold=is_above_warning,
|
|
651
|
+
is_above_autocompact_threshold=is_above_autocompact,
|
|
652
|
+
is_at_blocking_limit=is_at_blocking,
|
|
653
|
+
estimated_tokens=estimated,
|
|
654
|
+
threshold=threshold,
|
|
655
|
+
context_window=context_window,
|
|
656
|
+
)
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
# ---------------------------------------------------------------------------
|
|
660
|
+
# 上下文窗口辅助函数
|
|
661
|
+
# ---------------------------------------------------------------------------
|
|
662
|
+
|
|
663
|
+
def get_context_window(model: str) -> int:
|
|
664
|
+
"""返回模型的上下文窗口大小。
|
|
665
|
+
|
|
666
|
+
优先从 settings.context_window 读取;若未配置或为 0,则返回默认值。
|
|
667
|
+
"""
|
|
668
|
+
from illusion.config.settings import load_settings
|
|
669
|
+
|
|
670
|
+
settings = load_settings()
|
|
671
|
+
if settings.context_window and settings.context_window > 0:
|
|
672
|
+
return settings.context_window
|
|
673
|
+
return _DEFAULT_CONTEXT_WINDOW
|
|
674
|
+
|
|
675
|
+
|
|
676
|
+
def get_autocompact_threshold(model: str) -> int:
|
|
677
|
+
"""计算触发自动压缩的 Token 数量阈值。"""
|
|
678
|
+
context_window = get_context_window(model)
|
|
679
|
+
reserved = min(MAX_OUTPUT_TOKENS_FOR_SUMMARY, 20_000)
|
|
680
|
+
effective = context_window - reserved
|
|
681
|
+
return effective - AUTOCOMPACT_BUFFER_TOKENS
|
|
682
|
+
|
|
683
|
+
|
|
684
|
+
def should_autocompact(
|
|
685
|
+
messages: list[ConversationMessage],
|
|
686
|
+
model: str,
|
|
687
|
+
state: AutoCompactState,
|
|
688
|
+
) -> bool:
|
|
689
|
+
"""返回是否应该自动压缩会话。"""
|
|
690
|
+
if state.consecutive_failures >= MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES:
|
|
691
|
+
return False
|
|
692
|
+
token_count = estimate_message_tokens(messages)
|
|
693
|
+
threshold = get_autocompact_threshold(model)
|
|
694
|
+
return token_count >= threshold
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
# ---------------------------------------------------------------------------
|
|
698
|
+
# 完整压缩执行(调用 LLM)
|
|
699
|
+
# ---------------------------------------------------------------------------
|
|
700
|
+
|
|
701
|
+
async def compact_conversation(
|
|
702
|
+
messages: list[ConversationMessage],
|
|
703
|
+
*,
|
|
704
|
+
api_client: Any,
|
|
705
|
+
model: str,
|
|
706
|
+
system_prompt: str = "",
|
|
707
|
+
preserve_recent: int = DEFAULT_PRESERVE_RECENT,
|
|
708
|
+
custom_instructions: str | None = None,
|
|
709
|
+
suppress_follow_up: bool = True,
|
|
710
|
+
) -> list[ConversationMessage]:
|
|
711
|
+
"""通过调用 LLM 生成摘要来压缩消息。
|
|
712
|
+
|
|
713
|
+
流程:
|
|
714
|
+
1. 先执行微压缩(廉价 Token 减少)
|
|
715
|
+
2. 剥离图片数据
|
|
716
|
+
3. 分割为待摘要的旧消息和待保留的新消息
|
|
717
|
+
4. 调用 LLM 获取结构化摘要(含 PTL 重试)
|
|
718
|
+
5. 用摘要消息 + 边界标记 + 保留的新消息替换旧消息
|
|
719
|
+
6. 确保消息角色交替正确
|
|
720
|
+
|
|
721
|
+
Args:
|
|
722
|
+
messages: 完整的会话历史。
|
|
723
|
+
api_client: 用于摘要调用的 ApiClient 或兼容客户端。
|
|
724
|
+
model: 使用的模型 ID。
|
|
725
|
+
system_prompt: 摘要调用的系统提示词。
|
|
726
|
+
preserve_recent: 保留 verbatim 的最近消息数量。
|
|
727
|
+
custom_instructions: 摘要提示词的可选额外指令。
|
|
728
|
+
suppress_follow_up: 为 True 时指示模型不询问后续问题。
|
|
729
|
+
|
|
730
|
+
Returns:
|
|
731
|
+
压缩后的新消息列表。
|
|
732
|
+
"""
|
|
733
|
+
from illusion.api.client import ApiMessageRequest, ApiMessageCompleteEvent
|
|
734
|
+
|
|
735
|
+
if len(messages) <= preserve_recent:
|
|
736
|
+
return list(messages)
|
|
737
|
+
|
|
738
|
+
# 步骤 1:微压缩以廉价方式减少 Token
|
|
739
|
+
messages, tokens_freed = microcompact_messages(messages, keep_recent=DEFAULT_KEEP_RECENT)
|
|
740
|
+
|
|
741
|
+
# 步骤 2:剥离图片数据
|
|
742
|
+
messages = strip_images_from_messages(messages)
|
|
743
|
+
|
|
744
|
+
pre_compact_tokens = estimate_message_tokens(messages)
|
|
745
|
+
log.info("Compacting conversation: %d messages, ~%d tokens", len(messages), pre_compact_tokens)
|
|
746
|
+
|
|
747
|
+
# 步骤 3:安全分割为待摘要和待保留部分(不切断 tool_use/tool_result 对)
|
|
748
|
+
split_index = _find_safe_split_index(messages, preserve_recent)
|
|
749
|
+
older = messages[:split_index]
|
|
750
|
+
newer = messages[split_index:]
|
|
751
|
+
|
|
752
|
+
# 步骤 4:构建压缩请求 — 发送旧消息 + 压缩提示词
|
|
753
|
+
compact_prompt = get_compact_prompt(custom_instructions)
|
|
754
|
+
compact_messages_list = list(older) + [ConversationMessage.from_user_text(compact_prompt)]
|
|
755
|
+
|
|
756
|
+
summary_text = ""
|
|
757
|
+
ptl_retries = 0
|
|
758
|
+
|
|
759
|
+
while ptl_retries <= MAX_PTL_RETRIES:
|
|
760
|
+
try:
|
|
761
|
+
async for event in api_client.stream_message(
|
|
762
|
+
ApiMessageRequest(
|
|
763
|
+
model=model,
|
|
764
|
+
messages=compact_messages_list,
|
|
765
|
+
system_prompt=system_prompt or "You are a conversation summarizer.",
|
|
766
|
+
max_tokens=MAX_OUTPUT_TOKENS_FOR_SUMMARY,
|
|
767
|
+
tools=[], # 压缩调用不使用工具
|
|
768
|
+
)
|
|
769
|
+
):
|
|
770
|
+
if isinstance(event, ApiMessageCompleteEvent):
|
|
771
|
+
summary_text = event.message.text
|
|
772
|
+
break # 成功,退出重试循环
|
|
773
|
+
except Exception as exc:
|
|
774
|
+
error_msg = str(exc).lower()
|
|
775
|
+
is_ptl = "prompt" in error_msg and "long" in error_msg
|
|
776
|
+
if is_ptl and ptl_retries < MAX_PTL_RETRIES:
|
|
777
|
+
ptl_retries += 1
|
|
778
|
+
log.warning(
|
|
779
|
+
"Compact summary hit prompt-too-long, truncating head (retry %d/%d)",
|
|
780
|
+
ptl_retries, MAX_PTL_RETRIES,
|
|
781
|
+
)
|
|
782
|
+
# 截断最老的一组消息以减少 Token
|
|
783
|
+
groups = _group_messages_by_turn(compact_messages_list)
|
|
784
|
+
if len(groups) > 2:
|
|
785
|
+
# 移除最老的一组(保留最后的 compact_prompt)
|
|
786
|
+
compact_messages_list = []
|
|
787
|
+
for g in groups[1:]:
|
|
788
|
+
compact_messages_list.extend(g)
|
|
789
|
+
else:
|
|
790
|
+
# 无法再截断,放弃
|
|
791
|
+
log.error("Cannot truncate further for PTL retry")
|
|
792
|
+
break
|
|
793
|
+
else:
|
|
794
|
+
# 非 PTL 错误或重试次数用尽,重新抛出
|
|
795
|
+
raise
|
|
796
|
+
|
|
797
|
+
if not summary_text:
|
|
798
|
+
# 空摘要则返回原始消息
|
|
799
|
+
log.warning("Compact summary was empty — returning original messages")
|
|
800
|
+
return messages
|
|
801
|
+
|
|
802
|
+
# 步骤 5:构建新消息列表
|
|
803
|
+
summary_content = build_compact_summary_message(
|
|
804
|
+
summary_text,
|
|
805
|
+
suppress_follow_up=suppress_follow_up,
|
|
806
|
+
recent_preserved=len(newer) > 0,
|
|
807
|
+
)
|
|
808
|
+
summary_msg = ConversationMessage.from_user_text(summary_content)
|
|
809
|
+
boundary_marker = create_compact_boundary_marker()
|
|
810
|
+
|
|
811
|
+
result = [summary_msg, boundary_marker, *newer]
|
|
812
|
+
|
|
813
|
+
# 步骤 6:清理孤立的 tool_result(没有对应 tool_use 的)
|
|
814
|
+
result = _remove_orphaned_tool_results(result)
|
|
815
|
+
|
|
816
|
+
# 步骤 7:确保消息角色交替正确
|
|
817
|
+
result = _ensure_message_alternation(result)
|
|
818
|
+
|
|
819
|
+
post_compact_tokens = estimate_message_tokens(result)
|
|
820
|
+
log.info(
|
|
821
|
+
"Compaction done: %d -> %d messages, ~%d -> ~%d tokens (saved ~%d)",
|
|
822
|
+
len(messages), len(result),
|
|
823
|
+
pre_compact_tokens, post_compact_tokens,
|
|
824
|
+
max(0, pre_compact_tokens - post_compact_tokens),
|
|
825
|
+
)
|
|
826
|
+
return result
|
|
827
|
+
|
|
828
|
+
|
|
829
|
+
# ---------------------------------------------------------------------------
|
|
830
|
+
# 响应式压缩 — API 返回 prompt-too-long 时触发
|
|
831
|
+
# ---------------------------------------------------------------------------
|
|
832
|
+
|
|
833
|
+
async def reactive_compact(
|
|
834
|
+
messages: list[ConversationMessage],
|
|
835
|
+
*,
|
|
836
|
+
api_client: Any,
|
|
837
|
+
model: str,
|
|
838
|
+
system_prompt: str = "",
|
|
839
|
+
preserve_recent: int = DEFAULT_PRESERVE_RECENT,
|
|
840
|
+
) -> tuple[list[ConversationMessage], bool]:
|
|
841
|
+
"""当 API 返回 prompt-too-long 错误时,尝试压缩并重试。
|
|
842
|
+
|
|
843
|
+
这是最后的防线 — 在自动压缩未能阻止溢出时触发。
|
|
844
|
+
|
|
845
|
+
Args:
|
|
846
|
+
messages: 当前消息列表
|
|
847
|
+
api_client: API 客户端
|
|
848
|
+
model: 模型名称
|
|
849
|
+
system_prompt: 系统提示词
|
|
850
|
+
preserve_recent: 保留最近消息数量
|
|
851
|
+
|
|
852
|
+
Returns:
|
|
853
|
+
(messages, was_compacted) — 压缩后的消息和是否执行了压缩
|
|
854
|
+
"""
|
|
855
|
+
log.info("Reactive compact triggered due to prompt-too-long error")
|
|
856
|
+
|
|
857
|
+
# 先尝试微压缩
|
|
858
|
+
messages, tokens_freed = microcompact_messages(messages, keep_recent=DEFAULT_KEEP_RECENT)
|
|
859
|
+
if tokens_freed > 0:
|
|
860
|
+
log.info("Reactive microcompact freed ~%d tokens", tokens_freed)
|
|
861
|
+
return messages, True
|
|
862
|
+
|
|
863
|
+
# 微压缩不够,执行完整压缩
|
|
864
|
+
try:
|
|
865
|
+
result = await compact_conversation(
|
|
866
|
+
messages,
|
|
867
|
+
api_client=api_client,
|
|
868
|
+
model=model,
|
|
869
|
+
system_prompt=system_prompt,
|
|
870
|
+
preserve_recent=preserve_recent,
|
|
871
|
+
suppress_follow_up=True,
|
|
872
|
+
)
|
|
873
|
+
return result, True
|
|
874
|
+
except Exception as exc:
|
|
875
|
+
log.error("Reactive compact failed: %s", exc)
|
|
876
|
+
return messages, False
|
|
877
|
+
|
|
878
|
+
|
|
879
|
+
# ---------------------------------------------------------------------------
|
|
880
|
+
# 自动压缩集成(从查询循环调用)
|
|
881
|
+
# ---------------------------------------------------------------------------
|
|
882
|
+
|
|
883
|
+
async def auto_compact_if_needed(
|
|
884
|
+
messages: list[ConversationMessage],
|
|
885
|
+
*,
|
|
886
|
+
api_client: Any,
|
|
887
|
+
model: str,
|
|
888
|
+
system_prompt: str = "",
|
|
889
|
+
state: AutoCompactState,
|
|
890
|
+
preserve_recent: int = DEFAULT_PRESERVE_RECENT,
|
|
891
|
+
) -> tuple[list[ConversationMessage], bool]:
|
|
892
|
+
"""检查是否应该自动压缩,如果是则执行压缩。
|
|
893
|
+
|
|
894
|
+
在每个查询循环轮次开始时调用此函数。
|
|
895
|
+
|
|
896
|
+
Returns:
|
|
897
|
+
(messages, was_compacted) — 如果已压缩,messages 是新列表。
|
|
898
|
+
"""
|
|
899
|
+
if not should_autocompact(messages, model, state):
|
|
900
|
+
return messages, False
|
|
901
|
+
|
|
902
|
+
log.info("Auto-compact triggered (failures=%d)", state.consecutive_failures)
|
|
903
|
+
|
|
904
|
+
# 先尝试微压缩 — 可能已经足够
|
|
905
|
+
messages, tokens_freed = microcompact_messages(messages)
|
|
906
|
+
if tokens_freed > 0 and not should_autocompact(messages, model, state):
|
|
907
|
+
log.info("Microcompact freed ~%d tokens, auto-compact no longer needed", tokens_freed)
|
|
908
|
+
state.warning_suppressed = True
|
|
909
|
+
return messages, True
|
|
910
|
+
|
|
911
|
+
# 需要完整压缩
|
|
912
|
+
try:
|
|
913
|
+
result = await compact_conversation(
|
|
914
|
+
messages,
|
|
915
|
+
api_client=api_client,
|
|
916
|
+
model=model,
|
|
917
|
+
system_prompt=system_prompt,
|
|
918
|
+
preserve_recent=preserve_recent,
|
|
919
|
+
suppress_follow_up=True,
|
|
920
|
+
)
|
|
921
|
+
state.compacted = True
|
|
922
|
+
state.turn_counter += 1
|
|
923
|
+
state.last_compacted_at_turn = state.turn_counter
|
|
924
|
+
state.consecutive_failures = 0
|
|
925
|
+
state.warning_suppressed = True
|
|
926
|
+
return result, True
|
|
927
|
+
except Exception as exc:
|
|
928
|
+
state.consecutive_failures += 1
|
|
929
|
+
log.error(
|
|
930
|
+
"Auto-compact failed (attempt %d/%d): %s",
|
|
931
|
+
state.consecutive_failures,
|
|
932
|
+
MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES,
|
|
933
|
+
exc,
|
|
934
|
+
)
|
|
935
|
+
return messages, False
|
|
936
|
+
|
|
937
|
+
|
|
938
|
+
# ---------------------------------------------------------------------------
|
|
939
|
+
# 向后兼容
|
|
940
|
+
# ---------------------------------------------------------------------------
|
|
941
|
+
|
|
942
|
+
def summarize_messages(
|
|
943
|
+
messages: list[ConversationMessage],
|
|
944
|
+
*,
|
|
945
|
+
max_messages: int = 8,
|
|
946
|
+
) -> str:
|
|
947
|
+
"""生成最近消息的紧凑文本摘要(传统方法,仅用于 /summary 命令)。"""
|
|
948
|
+
selected = messages[-max_messages:]
|
|
949
|
+
lines: list[str] = []
|
|
950
|
+
for message in selected:
|
|
951
|
+
text = message.text.strip()
|
|
952
|
+
if not text:
|
|
953
|
+
continue
|
|
954
|
+
lines.append(f"{message.role}: {text[:300]}")
|
|
955
|
+
return "\n".join(lines)
|
|
956
|
+
|
|
957
|
+
|
|
958
|
+
def compact_messages(
|
|
959
|
+
messages: list[ConversationMessage],
|
|
960
|
+
*,
|
|
961
|
+
preserve_recent: int = DEFAULT_PRESERVE_RECENT,
|
|
962
|
+
) -> list[ConversationMessage]:
|
|
963
|
+
"""用合成摘要替换旧的会话历史(传统方法,仅作为后备)。
|
|
964
|
+
|
|
965
|
+
注意:此方法不调用 LLM,摘要质量较低。
|
|
966
|
+
推荐使用 compact_conversation() 获取高质量摘要。
|
|
967
|
+
"""
|
|
968
|
+
if len(messages) <= preserve_recent:
|
|
969
|
+
return list(messages)
|
|
970
|
+
# 安全分割,不切断 tool_use/tool_result 对
|
|
971
|
+
split_index = _find_safe_split_index(messages, preserve_recent)
|
|
972
|
+
older = messages[:split_index]
|
|
973
|
+
newer = messages[split_index:]
|
|
974
|
+
summary = summarize_messages(older)
|
|
975
|
+
if not summary:
|
|
976
|
+
return list(newer)
|
|
977
|
+
result = [
|
|
978
|
+
ConversationMessage(
|
|
979
|
+
role="user",
|
|
980
|
+
content=[TextBlock(text=f"[conversation summary]\n{summary}")],
|
|
981
|
+
),
|
|
982
|
+
create_compact_boundary_marker(),
|
|
983
|
+
*newer,
|
|
984
|
+
]
|
|
985
|
+
result = _remove_orphaned_tool_results(result)
|
|
986
|
+
return _ensure_message_alternation(result)
|
|
987
|
+
|
|
988
|
+
|
|
989
|
+
__all__ = [
|
|
990
|
+
"AUTOCOMPACT_BUFFER_TOKENS",
|
|
991
|
+
"AutoCompactState",
|
|
992
|
+
"COMPACTABLE_TOOLS",
|
|
993
|
+
"COMPACT_BOUNDARY_PREFIX",
|
|
994
|
+
"TIME_BASED_MC_CLEARED_MESSAGE",
|
|
995
|
+
"TokenWarningState",
|
|
996
|
+
"auto_compact_if_needed",
|
|
997
|
+
"build_compact_summary_message",
|
|
998
|
+
"calculate_token_warning_state",
|
|
999
|
+
"compact_conversation",
|
|
1000
|
+
"compact_messages",
|
|
1001
|
+
"create_compact_boundary_marker",
|
|
1002
|
+
"estimate_conversation_tokens",
|
|
1003
|
+
"estimate_message_tokens",
|
|
1004
|
+
"format_compact_summary",
|
|
1005
|
+
"get_autocompact_threshold",
|
|
1006
|
+
"get_compact_prompt",
|
|
1007
|
+
"get_context_window",
|
|
1008
|
+
"get_messages_after_compact_boundary",
|
|
1009
|
+
"is_compact_boundary_marker",
|
|
1010
|
+
"microcompact_messages",
|
|
1011
|
+
"reactive_compact",
|
|
1012
|
+
"should_autocompact",
|
|
1013
|
+
"strip_images_from_messages",
|
|
1014
|
+
"summarize_messages",
|
|
1015
|
+
]
|