myagent-ai 1.10.8 → 1.10.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1137,6 +1137,10 @@ class MainAgent(BaseAgent):
1137
1137
  code_lang = params.get("language", "python")
1138
1138
  code_text = params.get("code", parms_str)
1139
1139
  if self.executor:
1140
+ # 注入权限检查器(V1 路径在 api_server 中设置,V2 路径需要在此设置)
1141
+ self.executor.set_permission_checker(
1142
+ self.check_permission, self.name
1143
+ )
1140
1144
  exec_result = await self.executor.execute(
1141
1145
  language=code_lang,
1142
1146
  code=code_text,
@@ -1149,6 +1153,10 @@ class MainAgent(BaseAgent):
1149
1153
  elif tool_name == "command" or tool_name == "command_run":
1150
1154
  code_text = params.get("command", parms_str)
1151
1155
  if self.executor:
1156
+ # 注入权限检查器(V1 路径在 api_server 中设置,V2 路径需要在此设置)
1157
+ self.executor.set_permission_checker(
1158
+ self.check_permission, self.name
1159
+ )
1152
1160
  exec_result = await self.executor.execute(
1153
1161
  language="shell",
1154
1162
  code=code_text,
package/main.py CHANGED
@@ -218,6 +218,10 @@ class MyAgentApp:
218
218
  max_retries=exe_cfg.max_retries,
219
219
  auto_fix=exe_cfg.auto_fix,
220
220
  max_output_length=exe_cfg.max_output_length,
221
+ execution_mode=exe_cfg.execution_mode,
222
+ sandbox_image=exe_cfg.sandbox_image,
223
+ sandbox_network=exe_cfg.sandbox_network,
224
+ sandbox_memory=exe_cfg.sandbox_memory,
221
225
  )
222
226
  self.logger.info(f"执行引擎: timeout={exe_cfg.timeout}s, auto_fix={exe_cfg.auto_fix}")
223
227
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "myagent-ai",
3
- "version": "1.10.8",
3
+ "version": "1.10.9",
4
4
  "description": "本地桌面端执行型AI助手 - Open Interpreter 风格 | Local Desktop Execution-Oriented AI Assistant",
5
5
  "main": "main.py",
6
6
  "bin": {
package/web/api_server.py CHANGED
@@ -2358,7 +2358,7 @@ class ApiServer:
2358
2358
  return web.json_response({**agent_info, "sessions": sessions})
2359
2359
 
2360
2360
  # Internal keys that should not appear in chat history UI
2361
- _HIDDEN_KEYS = {"llm_output", "tool_call", "tool_result"}
2361
+ _HIDDEN_KEYS = {"llm_output"}
2362
2362
 
2363
2363
  async def handle_get_messages(self, request):
2364
2364
  sid = request.match_info["sid"]
@@ -3280,6 +3280,15 @@ class ApiServer:
3280
3280
  context.metadata["agent_override_path"] = agent_path
3281
3281
  context.metadata["chat_mode"] = chat_mode
3282
3282
 
3283
+ # ── 根据 Agent 配置设置执行引擎参数(execution_mode 等)──
3284
+ agent_cfg_for_exec = self._read_agent_config(agent_path)
3285
+ _original_exec_mode = None
3286
+ if agent_cfg_for_exec and agent.executor:
3287
+ _exec_mode = agent_cfg_for_exec.get("execution_mode")
3288
+ if _exec_mode:
3289
+ _original_exec_mode = agent.executor.execution_mode
3290
+ agent.executor.set_execution_mode(_exec_mode)
3291
+
3283
3292
  # Clear execution events from previous runs
3284
3293
  agent.clear_execution_events()
3285
3294
 
@@ -3358,6 +3367,9 @@ class ApiServer:
3358
3367
  finally:
3359
3368
  # 无论成功或异常,都清理 active_contexts
3360
3369
  agent.active_contexts.pop(session_id, None)
3370
+ # 恢复执行引擎原始模式(防止影响后续 Agent 请求)
3371
+ if _original_exec_mode is not None and agent.executor:
3372
+ agent.executor.set_execution_mode(_original_exec_mode)
3361
3373
 
3362
3374
  # V2 结束后:如果 task_list_store 中有任务,确保最终推送一次
3363
3375
  if chat_mode == "exec" and session_id in self._task_list_store:
@@ -242,6 +242,9 @@ input,textarea,select{font:inherit}
242
242
  border-bottom-left-radius:4px;
243
243
  max-width:95%;
244
244
  }
245
+ /* Unified bubble wrapper for timeline content — always full width */
246
+ .msg-bubble-wrapper{max-width:95%!important;width:100%}
247
+ .msg-bubble-wrapper>.msg-timeline{gap:10px}
245
248
  .message-bubble p{margin-bottom:8px}
246
249
  .message-bubble p:last-child{margin-bottom:0}
247
250
  .message-bubble code{
@@ -466,7 +469,7 @@ input,textarea,select{font:inherit}
466
469
  }
467
470
 
468
471
  /* ── Thought Block (Agent Thinking) ── */
469
- .thought-block{margin:0 0 10px 0;border:1px solid var(--border-light);border-radius:var(--radius-sm);overflow:hidden;background:linear-gradient(135deg,var(--accent-light),var(--bg2));animation:thoughtFadeIn .4s ease-out}
472
+ .thought-block{width:100%;display:block;margin:0 0 10px 0;border:1px solid var(--border-light);border-radius:var(--radius-sm);overflow:hidden;background:linear-gradient(135deg,var(--accent-light),var(--bg2));animation:thoughtFadeIn .4s ease-out}
470
473
  .thought-block.streaming{border-color:var(--accent);box-shadow:0 0 12px rgba(99,102,241,.15)}
471
474
  @keyframes thoughtFadeIn{from{opacity:0;transform:translateY(-6px)}to{opacity:1;transform:translateY(0)}}
472
475
  .thought-block summary{display:flex;align-items:center;gap:8px;padding:8px 14px;cursor:pointer;font-size:12px;font-weight:600;color:var(--text2);user-select:none;transition:var(--transition);text-transform:uppercase;letter-spacing:.3px}
@@ -478,7 +481,7 @@ input,textarea,select{font:inherit}
478
481
  .thought-block summary .thought-badge{font-size:10px;padding:2px 8px;border-radius:10px;background:var(--accent);color:#fff;font-weight:500;animation:badgePulse 1.2s ease-in-out infinite}
479
482
  @keyframes badgePulse{0%,100%{opacity:1}50%{opacity:.6}}
480
483
  .thought-block:not(.streaming) summary .thought-badge{background:var(--bg4);color:var(--text3);animation:none}
481
- .thought-content{padding:10px 14px 14px;font-size:13px;line-height:1.7;color:var(--text2);border-top:1px solid var(--border-light);max-height:300px;overflow-y:auto;overflow-x:hidden;word-break:break-word;overflow-wrap:break-word}
484
+ .thought-content{width:100%;padding:10px 14px 14px;font-size:13px;line-height:1.7;color:var(--text2);border-top:1px solid var(--border-light);max-height:300px;overflow-y:auto;overflow-x:hidden;word-break:break-word;overflow-wrap:break-word}
482
485
  .thought-content p{margin:4px 0}
483
486
  .thought-content p:first-child{margin-top:0}
484
487
  .thought-content p:last-child{margin-bottom:0}
@@ -2050,7 +2053,22 @@ input,textarea,select{font:inherit}
2050
2053
  .exec-event-result-btn svg{width:12px;height:12px}
2051
2054
 
2052
2055
  /* ── Inline Exec Events (Timeline Interleaved) ── */
2053
- .msg-timeline{display:flex;flex-direction:column;gap:6px;overflow:hidden}
2056
+ .msg-timeline{display:flex;flex-direction:column;gap:10px;overflow:hidden}
2057
+ /* Text segment inside the unified bubble */
2058
+ .timeline-segment{word-break:break-word;overflow-wrap:break-word}
2059
+ .timeline-segment p{margin-bottom:8px}
2060
+ .timeline-segment p:last-child{margin-bottom:0}
2061
+ .timeline-segment code{background:rgba(0,0,0,.06);padding:2px 6px;border-radius:4px;font-family:'SF Mono','Fira Code','Cascadia Code',monospace;font-size:12.5px}
2062
+ .timeline-segment pre{background:#1e1e2e;color:#cdd6f4;padding:14px 16px;border-radius:var(--radius-sm);overflow-x:auto;margin:8px 0;font-size:12.5px;line-height:1.5;max-width:100%;white-space:pre-wrap;word-break:break-all}
2063
+ .timeline-segment strong{font-weight:600}
2064
+ .timeline-segment em{font-style:italic}
2065
+ .timeline-segment ul,.timeline-segment ol{padding-left:20px;margin:6px 0}
2066
+ .timeline-segment li{margin:3px 0}
2067
+ .timeline-segment blockquote{border-left:3px solid var(--accent);padding-left:12px;color:var(--text2);margin:6px 0}
2068
+ /* Exec events inside a message-bubble need distinct background for contrast */
2069
+ .message-bubble > .msg-timeline > .inline-exec-event{background:var(--bg);border-left-color:var(--accent)}
2070
+ [data-theme="dark"] .message-bubble > .msg-timeline > .inline-exec-event{background:var(--bg)}
2071
+ [data-theme="dark"] .message-bubble > .msg-timeline > .inline-exec-code{background:var(--bg2)}
2054
2072
  .inline-exec-event{margin:2px 0;padding:8px 12px;background:var(--bg2);border-left:3px solid var(--border);border-radius:6px;font-size:13px;animation:execEventSlide .3s ease-out}
2055
2073
  .inline-exec-header{display:flex;align-items:center;gap:6px;margin-bottom:4px}
2056
2074
  .inline-exec-icon{font-size:14px}
@@ -1777,7 +1777,9 @@ async function selectSession(id) {
1777
1777
  key: m.key || '',
1778
1778
  };
1779
1779
  });
1780
- state.messages = loaded;
1780
+ // Group consecutive non-user messages into single assistant messages with parts[]
1781
+ // This creates the interleaved speak→tool→speak pattern matching streaming display
1782
+ state.messages = groupHistoryMessages(loaded);
1781
1783
  state._msgLoadOffset = loaded.length;
1782
1784
  state._msgLoadTotal = loaded.length;
1783
1785
  } catch (e) {
@@ -1838,11 +1840,14 @@ async function loadMoreMessages() {
1838
1840
  role: m.role || 'assistant',
1839
1841
  content: content,
1840
1842
  time: m.time || m.created_at || '',
1843
+ key: m.key || '',
1841
1844
  };
1842
1845
  });
1843
1846
 
1847
+ // Group consecutive non-user messages for the loaded batch
1848
+ const grouped = groupHistoryMessages(loaded);
1844
1849
  // 追加到现有消息前面(保持滚动位置)
1845
- state.messages = loaded.concat(state.messages);
1850
+ state.messages = grouped.concat(state.messages);
1846
1851
  state._msgLoadOffset += loaded.length;
1847
1852
  state._msgLoadTotal = state.messages.length;
1848
1853
 
@@ -1995,6 +2000,165 @@ async function clearCurrentChat() {
1995
2000
  }
1996
2001
  }
1997
2002
 
2003
+ // ── Group History Messages ──
2004
+ // Groups consecutive non-user messages (assistant + tool) into single assistant messages
2005
+ // with parts[] for timeline rendering, matching the streaming display format.
2006
+ // This creates: user → [assistant (speak → tool → speak → tool)] → user → ...
2007
+ function groupHistoryMessages(messages) {
2008
+ if (!Array.isArray(messages) || messages.length === 0) return messages;
2009
+
2010
+ const grouped = [];
2011
+ let i = 0;
2012
+
2013
+ while (i < messages.length) {
2014
+ const msg = messages[i];
2015
+
2016
+ if (msg.role === 'user') {
2017
+ // User message: pass through as-is
2018
+ grouped.push({ role: 'user', content: msg.content, time: msg.time || '' });
2019
+ i++;
2020
+ } else if (msg.role === 'assistant') {
2021
+ // Start of a new agent group: collect all consecutive non-user messages
2022
+ const parts = [];
2023
+ let lastAssistantTime = msg.time || '';
2024
+
2025
+ // If assistant has content, add as text part
2026
+ if (msg.content && msg.content.trim() && msg.content !== '(无回复)') {
2027
+ parts.push({ type: 'text', content: msg.content });
2028
+ }
2029
+
2030
+ i++; // Move to next message
2031
+
2032
+ // Collect following tool messages
2033
+ while (i < messages.length && messages[i].role === 'tool') {
2034
+ const toolMsg = messages[i];
2035
+ const isResult = toolMsg.key === 'tool_result';
2036
+ const isCall = toolMsg.key === 'tool_call';
2037
+
2038
+ if (isCall) {
2039
+ // Extract tool name from content
2040
+ const toolName = (toolMsg.content.match(/^调用工具:\s*(\S+)/) || [])[1] || '';
2041
+ parts.push({
2042
+ type: 'exec',
2043
+ data: {
2044
+ id: 'hist_tool_' + i,
2045
+ type: 'tool_call',
2046
+ title: toolMsg.content.substring(0, 100) || ('调用工具: ' + toolName),
2047
+ tool_name: toolName,
2048
+ status: 'done',
2049
+ }
2050
+ });
2051
+ } else if (isResult) {
2052
+ // Determine success/failure from content
2053
+ const isOk = !toolMsg.content.includes('失败');
2054
+ parts.push({
2055
+ type: 'exec',
2056
+ data: {
2057
+ id: 'hist_tool_' + i,
2058
+ type: 'tool_result',
2059
+ title: (toolMsg.content.substring(0, 80) || '工具执行结果'),
2060
+ success: isOk,
2061
+ summary: toolMsg.content.substring(0, 500),
2062
+ }
2063
+ });
2064
+ } else {
2065
+ // Generic tool message
2066
+ parts.push({
2067
+ type: 'exec',
2068
+ data: {
2069
+ id: 'hist_tool_' + i,
2070
+ type: 'tool_call',
2071
+ title: toolMsg.content.substring(0, 100) || '工具调用',
2072
+ status: 'done',
2073
+ }
2074
+ });
2075
+ }
2076
+
2077
+ i++;
2078
+
2079
+ // If next message is an assistant message, add its content as a text part and continue
2080
+ // This handles the pattern: text → tool → text → tool
2081
+ if (i < messages.length && messages[i].role === 'assistant') {
2082
+ const nextAssistant = messages[i];
2083
+ if (nextAssistant.content && nextAssistant.content.trim() && nextAssistant.content !== '(无回复)') {
2084
+ parts.push({ type: 'text', content: nextAssistant.content });
2085
+ lastAssistantTime = nextAssistant.time || lastAssistantTime;
2086
+ }
2087
+ i++;
2088
+ }
2089
+ }
2090
+
2091
+ // Create grouped assistant message with parts
2092
+ // Assemble content from text parts for backward compat
2093
+ const textParts = parts.filter(p => p.type === 'text');
2094
+ const assembledContent = textParts.map(p => p.content).join('\n\n');
2095
+
2096
+ grouped.push({
2097
+ role: 'assistant',
2098
+ content: assembledContent || '',
2099
+ time: lastAssistantTime,
2100
+ parts: parts.length > 0 ? parts : undefined,
2101
+ // Also collect exec_events for backward compat display
2102
+ exec_events: parts.filter(p => p.type === 'exec').map(p => p.data),
2103
+ });
2104
+ } else if (msg.role === 'tool') {
2105
+ // Orphan tool message (no preceding assistant) — wrap in an assistant group
2106
+ const parts = [];
2107
+ const isResult = msg.key === 'tool_result';
2108
+ const isCall = msg.key === 'tool_call';
2109
+
2110
+ if (isCall) {
2111
+ const toolName = (msg.content.match(/^调用工具:\s*(\S+)/) || [])[1] || '';
2112
+ parts.push({
2113
+ type: 'exec',
2114
+ data: {
2115
+ id: 'hist_tool_' + i,
2116
+ type: 'tool_call',
2117
+ title: msg.content.substring(0, 100) || ('调用工具: ' + toolName),
2118
+ tool_name: toolName,
2119
+ status: 'done',
2120
+ }
2121
+ });
2122
+ } else if (isResult) {
2123
+ const isOk = !msg.content.includes('失败');
2124
+ parts.push({
2125
+ type: 'exec',
2126
+ data: {
2127
+ id: 'hist_tool_' + i,
2128
+ type: 'tool_result',
2129
+ title: msg.content.substring(0, 80) || '工具执行结果',
2130
+ success: isOk,
2131
+ summary: msg.content.substring(0, 500),
2132
+ }
2133
+ });
2134
+ }
2135
+
2136
+ i++;
2137
+ // Check if next is assistant (to include its content)
2138
+ if (i < messages.length && messages[i].role === 'assistant') {
2139
+ const nextAssistant = messages[i];
2140
+ if (nextAssistant.content && nextAssistant.content.trim() && nextAssistant.content !== '(无回复)') {
2141
+ parts.push({ type: 'text', content: nextAssistant.content });
2142
+ }
2143
+ i++;
2144
+ }
2145
+
2146
+ grouped.push({
2147
+ role: 'assistant',
2148
+ content: parts.filter(p => p.type === 'text').map(p => p.content).join('\n\n'),
2149
+ time: msg.time || '',
2150
+ parts: parts.length > 0 ? parts : undefined,
2151
+ exec_events: parts.filter(p => p.type === 'exec').map(p => p.data),
2152
+ });
2153
+ } else {
2154
+ // Skip unknown roles
2155
+ i++;
2156
+ }
2157
+ }
2158
+
2159
+ return grouped;
2160
+ }
2161
+
1998
2162
  // ── Messages ──
1999
2163
  function renderMessages() {
2000
2164
  try {
@@ -2072,31 +2236,9 @@ function _renderMessagesInner() {
2072
2236
  for (let i = 0; i < state.messages.length; i++) {
2073
2237
  const msg = state.messages[i];
2074
2238
  const isUser = msg.role === 'user';
2075
- const isTool = msg.role === 'tool';
2076
2239
 
2077
- // ── 工具消息:根据 key 区分 tool_call / tool_result ──
2078
- if (isTool) {
2079
- const isResult = msg.key === 'tool_result';
2080
- const isCall = msg.key === 'tool_call';
2081
- const icon = isResult ? '📋' : (isCall ? '⚙️' : '🔧');
2082
- const label = isResult ? '工具执行结果' : (isCall ? '工具调用' : '工具调用过程');
2083
- // 提取工具名称
2084
- const toolName = msg.content.match(/^调用工具:\s*(\S+)/) ? msg.content.match(/^调用工具:\s*(\S+)/)[1]
2085
- : msg.content.match(/^\[([^\]]+)\]/) ? msg.content.match(/^\[([^\]]+)\]/)[1] : '';
2086
- const titleExtra = toolName ? ' — ' + escapeHtml(toolName) : '';
2087
- // 判断成功/失败
2088
- const isOk = isResult && !msg.content.includes('失败');
2089
- const badge = isResult ? `<span class="thought-badge" style="${isOk ? 'background:var(--ok)' : 'background:var(--danger)'}">${isOk ? '成功' : '失败'}</span>` : '';
2090
- html += `<details class="thought-block">
2091
- <summary>
2092
- <span class="thought-icon">${icon}</span>
2093
- <span class="thought-label">${label}${titleExtra}</span>
2094
- ${badge}
2095
- </summary>
2096
- <div class="thought-content"><pre style="white-space:pre-wrap;word-break:break-word;margin:0;font-size:12px;line-height:1.6">${escapeHtml(msg.content)}</pre></div>
2097
- </details>`;
2098
- continue;
2099
- }
2240
+ // Skip standalone tool messages (now grouped into assistant parts via groupHistoryMessages)
2241
+ if (msg.role === 'tool') continue;
2100
2242
 
2101
2243
  const avatar = isUser ? '👤' : botEmoji;
2102
2244
  const content = renderMarkdown(msg.content);
@@ -2147,29 +2289,34 @@ function _renderMessagesInner() {
2147
2289
  <span style="font-weight:500">Agent 正在思考...</span>
2148
2290
  </div>` : '';
2149
2291
 
2150
- // ── Timeline rendering for interleaved text + exec events ──
2292
+ // ── Timeline rendering: all parts in ONE unified bubble ──
2151
2293
  let timelineHtml = '';
2152
2294
  if (hasParts || hasStreamingText) {
2153
- let partsHtml = '';
2295
+ let partsInner = '';
2154
2296
  for (const part of (msg.parts || [])) {
2155
2297
  if (part.type === 'text' && part.content.trim()) {
2156
- partsHtml += '<div class="message-bubble">' + renderMarkdown(part.content) + '</div>';
2298
+ partsInner += '<div class="timeline-segment">' + renderMarkdown(part.content) + '</div>';
2157
2299
  } else if (part.type === 'exec') {
2158
- partsHtml += renderInlineExecEvent(part.data, i);
2300
+ partsInner += renderInlineExecEvent(part.data, i);
2301
+ } else if (part.type === 'v2_tool') {
2302
+ partsInner += renderInlineExecEvent(part, i);
2303
+ } else if (part.type === 'v2_ask') {
2304
+ partsInner += '<div class="v2-ask-user"><div class="v2-ask-icon">❓</div><div class="v2-ask-content">' + renderMarkdown(part.data.question) + '</div></div>';
2159
2305
  }
2160
2306
  }
2161
2307
  if (hasStreamingText) {
2162
2308
  const _cursor = msg.streaming ? '<span class="streaming-cursor"></span>' : '';
2163
- partsHtml += '<div class="message-bubble">' + renderMarkdown(msg._streamingText) + _cursor + '</div>';
2309
+ partsInner += '<div class="timeline-segment">' + renderMarkdown(msg._streamingText) + _cursor + '</div>';
2164
2310
  }
2165
- if (partsHtml) {
2166
- timelineHtml = '<div class="msg-timeline">' + partsHtml + '</div>';
2311
+ if (partsInner) {
2312
+ // All parts (text segments + tool calls) wrapped in ONE message-bubble
2313
+ timelineHtml = '<div class="message-bubble"><div class="msg-timeline">' + partsInner + '</div></div>';
2167
2314
  }
2168
2315
  }
2169
2316
 
2170
2317
  // Backward compat: single bubble for messages without parts
2171
2318
  const singleBubbleHtml = (!hasParts && !hasStreamingText)
2172
- ? ((content || streamingIndicator) ? `<div class="message-bubble">${content}${ttsIndicator}</div>` : '')
2319
+ ? (content ? `<div class="message-bubble">${content}${ttsIndicator}</div>` : '')
2173
2320
  : '';
2174
2321
 
2175
2322
  // Exec events panel: only for backward compat (messages without parts loaded from DB)
@@ -494,114 +494,116 @@ function updateStreamingMessage(msgIdx) {
494
494
  // Update content - timeline (interleaved text + exec events) or single bubble (backward compat)
495
495
  const hasParts = Array.isArray(msg.parts);
496
496
  if (hasParts) {
497
- // ── Timeline rendering for interleaved text + exec events ──
498
- let timeline = contentArea.querySelector('.msg-timeline');
499
- if (!timeline) {
497
+ // ── Timeline rendering: all parts in ONE unified bubble ──
498
+ // Structure: .message-bubble > .msg-timeline > (.timeline-segment | .inline-exec-event | .streaming-segment)
499
+ let bubbleWrapper = contentArea.querySelector(':scope > .msg-bubble-wrapper');
500
+ let timeline = bubbleWrapper ? bubbleWrapper.querySelector('.msg-timeline') : null;
501
+
502
+ if (!bubbleWrapper || !timeline) {
500
503
  // Remove old single bubble if exists
501
- const oldBubble = contentArea.querySelector(':scope > .message-bubble');
504
+ const oldBubble = contentArea.querySelector(':scope > .message-bubble:not(.msg-bubble-wrapper .message-bubble)');
502
505
  if (oldBubble) oldBubble.remove();
503
- // Create timeline container
506
+ // Remove old standalone timeline if exists
507
+ const oldTimeline = contentArea.querySelector(':scope > .msg-timeline');
508
+ if (oldTimeline) oldTimeline.remove();
509
+ // Create unified bubble wrapper
510
+ bubbleWrapper = document.createElement('div');
511
+ bubbleWrapper.className = 'message-bubble msg-bubble-wrapper';
504
512
  timeline = document.createElement('div');
505
513
  timeline.className = 'msg-timeline';
514
+ bubbleWrapper.appendChild(timeline);
506
515
  // Insert after thought blocks or at beginning
507
516
  const allThoughts = contentArea.querySelectorAll(':scope > .thought-block');
508
517
  if (allThoughts.length > 0) {
509
- allThoughts[allThoughts.length - 1].insertAdjacentElement('afterend', timeline);
518
+ allThoughts[allThoughts.length - 1].insertAdjacentElement('afterend', bubbleWrapper);
510
519
  } else {
511
- contentArea.appendChild(timeline);
520
+ contentArea.appendChild(bubbleWrapper);
512
521
  }
513
522
  }
514
523
 
515
524
  // Update completed parts only when count changes (avoid rebuilding DOM)
516
525
  const partsCount = msg.parts.length;
517
526
  if (!msg._lastPartsCount || msg._lastPartsCount !== partsCount) {
518
- if (!timeline) {
519
- timeline = contentArea.querySelector('.msg-timeline');
520
- }
521
- if (timeline) {
522
- // Only append new parts (don't rebuild existing)
523
- const prevCount = msg._lastPartsCount || 0;
524
- for (let pi = prevCount; pi < msg.parts.length; pi++) {
525
- const part = msg.parts[pi];
526
- let partHtml = '';
527
- if (part.type === 'text' && part.content.trim()) {
528
- partHtml = '<div class="message-bubble">' + renderMarkdown(part.content) + '</div>';
529
- } else if (part.type === 'exec') {
530
- partHtml = renderInlineExecEvent(part.data, msgIdx);
531
- } else if (part.type === 'v2_tool') {
532
- partHtml = renderInlineExecEvent(part, msgIdx);
533
- } else if (part.type === 'v2_ask') {
534
- partHtml = '<div class="v2-ask-user"><div class="v2-ask-icon">❓</div><div class="v2-ask-content">' + renderMarkdown(part.data.question) + '</div></div>';
535
- }
536
- if (partHtml) {
537
- // Insert before streaming bubble if it exists
538
- const existingStreaming = timeline.querySelector('.streaming-bubble');
539
- if (existingStreaming) {
540
- existingStreaming.insertAdjacentHTML('beforebegin', partHtml);
541
- } else {
542
- timeline.insertAdjacentHTML('beforeend', partHtml);
543
- }
527
+ // Only append new parts (don't rebuild existing)
528
+ const prevCount = msg._lastPartsCount || 0;
529
+ for (let pi = prevCount; pi < msg.parts.length; pi++) {
530
+ const part = msg.parts[pi];
531
+ let partHtml = '';
532
+ if (part.type === 'text' && part.content.trim()) {
533
+ partHtml = '<div class="timeline-segment">' + renderMarkdown(part.content) + '</div>';
534
+ } else if (part.type === 'exec') {
535
+ partHtml = renderInlineExecEvent(part.data, msgIdx);
536
+ } else if (part.type === 'v2_tool') {
537
+ partHtml = renderInlineExecEvent(part, msgIdx);
538
+ } else if (part.type === 'v2_ask') {
539
+ partHtml = '<div class="v2-ask-user"><div class="v2-ask-icon">❓</div><div class="v2-ask-content">' + renderMarkdown(part.data.question) + '</div></div>';
540
+ }
541
+ if (partHtml) {
542
+ // Insert before streaming segment if it exists
543
+ const existingStreaming = timeline.querySelector('.streaming-segment');
544
+ if (existingStreaming) {
545
+ existingStreaming.insertAdjacentHTML('beforebegin', partHtml);
546
+ } else {
547
+ timeline.insertAdjacentHTML('beforeend', partHtml);
544
548
  }
545
549
  }
546
- msg._lastPartsCount = partsCount;
547
550
  }
551
+ msg._lastPartsCount = partsCount;
548
552
  }
549
553
 
550
- // Incrementally update streaming bubble only (not entire timeline)
554
+ // Incrementally update streaming segment (inside the same bubble)
551
555
  const streamingText = msg._streamingText || '';
552
556
  const cursorHtml = msg.streaming ? '<span class="streaming-cursor"></span>' : '';
553
- let streamingBubble = timeline.querySelector('.streaming-bubble');
557
+ let streamingSeg = timeline.querySelector('.streaming-segment');
554
558
  if (streamingText.trim()) {
555
- if (!streamingBubble) {
556
- // Append streaming bubble at the end of timeline
557
- streamingBubble = document.createElement('div');
558
- streamingBubble.className = 'message-bubble streaming-bubble';
559
- timeline.appendChild(streamingBubble);
559
+ if (!streamingSeg) {
560
+ // Append streaming segment at the end of timeline (inside the unified bubble)
561
+ streamingSeg = document.createElement('div');
562
+ streamingSeg.className = 'timeline-segment streaming-segment';
563
+ timeline.appendChild(streamingSeg);
560
564
  }
561
565
  // Lightweight incremental text update during streaming to avoid flicker
562
566
  const prevStreamLen = msg._lastStreamRenderedLen || 0;
563
567
  const nowMs = performance.now();
564
568
  if (msg.streaming && streamingText.length > prevStreamLen && prevStreamLen > 0) {
565
569
  const newChars = streamingText.length - prevStreamLen;
566
- // Always use fast path (text node append) to avoid innerHTML rebuilds
567
- if (newChars < 200 && streamingBubble._lastFullHtml) {
570
+ if (newChars < 200 && streamingSeg._lastFullHtml) {
568
571
  // Fast path: append new text node (smooth, no reflow)
569
572
  const textNode = document.createTextNode(streamingText.substring(prevStreamLen));
570
- const oldCursor = streamingBubble.querySelector('.streaming-cursor');
573
+ const oldCursor = streamingSeg.querySelector('.streaming-cursor');
571
574
  if (oldCursor) oldCursor.remove();
572
- streamingBubble.appendChild(textNode);
575
+ streamingSeg.appendChild(textNode);
573
576
  const cursor = document.createElement('span');
574
577
  cursor.className = 'streaming-cursor';
575
- streamingBubble.appendChild(cursor);
578
+ streamingSeg.appendChild(cursor);
576
579
  msg._lastStreamRenderedLen = streamingText.length;
577
580
  } else {
578
581
  // Full markdown render — only when newChars >= 200 or every 1.5s
579
582
  const timeSinceFullRender = nowMs - (_fullMdTimer.last || 0);
580
583
  if (newChars >= 200 || timeSinceFullRender > 1500) {
581
- streamingBubble.innerHTML = renderMarkdown(streamingText) + cursorHtml;
582
- streamingBubble._lastFullHtml = streamingBubble.innerHTML;
584
+ streamingSeg.innerHTML = renderMarkdown(streamingText) + cursorHtml;
585
+ streamingSeg._lastFullHtml = streamingSeg.innerHTML;
583
586
  msg._lastStreamRenderedLen = streamingText.length;
584
587
  _fullMdTimer.last = nowMs;
585
588
  } else {
586
- // Still use fast path even for larger chunks if within 2s cooldown
587
589
  const textNode = document.createTextNode(streamingText.substring(prevStreamLen));
588
- const oldCursor = streamingBubble.querySelector('.streaming-cursor');
590
+ const oldCursor = streamingSeg.querySelector('.streaming-cursor');
589
591
  if (oldCursor) oldCursor.remove();
590
- streamingBubble.appendChild(textNode);
592
+ streamingSeg.appendChild(textNode);
591
593
  const cursor = document.createElement('span');
592
594
  cursor.className = 'streaming-cursor';
593
- streamingBubble.appendChild(cursor);
595
+ streamingSeg.appendChild(cursor);
594
596
  msg._lastStreamRenderedLen = streamingText.length;
595
597
  }
596
598
  }
597
599
  } else {
598
600
  // Full render
599
- streamingBubble.innerHTML = renderMarkdown(streamingText) + cursorHtml;
600
- streamingBubble._lastFullHtml = streamingBubble.innerHTML;
601
+ streamingSeg.innerHTML = renderMarkdown(streamingText) + cursorHtml;
602
+ streamingSeg._lastFullHtml = streamingSeg.innerHTML;
601
603
  msg._lastStreamRenderedLen = streamingText.length;
602
604
  }
603
- } else if (streamingBubble) {
604
- streamingBubble.remove();
605
+ } else if (streamingSeg) {
606
+ streamingSeg.remove();
605
607
  msg._lastStreamRenderedLen = 0;
606
608
  }
607
609
 
@@ -1064,29 +1066,32 @@ function _stripXmlTags(xml) {
1064
1066
  // ══════════════════════════════════════════════════════
1065
1067
 
1066
1068
  function _assembleV2Content(msg, msgParts) {
1067
- // Priority 1: V2 reasoning text (user-facing response from v2_reasoning events)
1069
+ // Priority 1: Text parts from msgParts (now includes flushed V2 reasoning segments)
1070
+ // In the new interleaved model, reasoning text is flushed into text parts at tool boundaries
1071
+ if (msgParts && Array.isArray(msgParts) && msgParts.length > 0) {
1072
+ var textParts = msgParts.filter(function(p) { return p.type === 'text'; });
1073
+ if (textParts.length > 0) {
1074
+ // Return only the LAST text part as content (the final response)
1075
+ // This avoids showing intermediate reasoning text as the message content
1076
+ var lastText = textParts[textParts.length - 1].content;
1077
+ if (lastText && lastText.trim()) return lastText.trim();
1078
+ }
1079
+ }
1080
+ // Priority 2: V2 reasoning text (may still be present during streaming before flush)
1068
1081
  if (msg._v2Reasoning && msg._v2Reasoning.trim()) {
1069
1082
  return msg._v2Reasoning.trim();
1070
1083
  }
1071
- // Priority 2: V2 ask user text
1084
+ // Priority 3: V2 ask user text
1072
1085
  if (msg._askUser && msg._askUser.trim()) {
1073
1086
  return msg._askUser.trim();
1074
1087
  }
1075
- // Priority 3: V2 raw XML stripped of tags (fallback when v2_reasoning not sent)
1088
+ // Priority 4: V2 raw XML stripped of tags (fallback when v2_reasoning not sent)
1076
1089
  if (msg._v2RawXml && msg._v2RawXml.trim()) {
1077
1090
  var strippedText = _stripXmlTags(msg._v2RawXml);
1078
1091
  if (strippedText && strippedText.trim()) {
1079
1092
  return strippedText.trim();
1080
1093
  }
1081
1094
  }
1082
- // Priority 4: V1 text parts (backward compat — non-V2 mode)
1083
- // Guard: msgParts may be undefined after page refresh (only role/content/time persisted)
1084
- if (msgParts && Array.isArray(msgParts) && msgParts.length > 0) {
1085
- var textParts = msgParts.filter(function(p) { return p.type === 'text'; });
1086
- if (textParts.length > 0) {
1087
- return textParts.map(function(p) { return p.content; }).join('\n\n');
1088
- }
1089
- }
1090
1095
  // Priority 5: raw content from message (server-stored response)
1091
1096
  if (msg.content && msg.content.trim() && msg.content !== '(无回复)') {
1092
1097
  return msg.content.trim();
@@ -1199,6 +1204,15 @@ async function sendMessage() {
1199
1204
  }
1200
1205
  currentText = '';
1201
1206
  }
1207
+ // Flush accumulated V2 reasoning text as a text part into msgParts
1208
+ // This creates the interleaved speak→tool→speak pattern
1209
+ function flushV2Reasoning() {
1210
+ if (_v2ReasoningText && _v2ReasoningText.trim()) {
1211
+ msgParts.push({type: 'text', content: _v2ReasoningText.trim()});
1212
+ _v2ReasoningText = '';
1213
+ state.messages[msgIdx]._v2Reasoning = _v2ReasoningText;
1214
+ }
1215
+ }
1202
1216
 
1203
1217
  // Add placeholder for streaming response
1204
1218
  state.messages.push({ role: 'assistant', content: '', thought: '', parts: [], time: new Date().toISOString(), streaming: true });
@@ -1297,6 +1311,8 @@ async function sendMessage() {
1297
1311
  scrollToBottom(true); // Force scroll for new message
1298
1312
  } else if (evt.type === 'clear_text') {
1299
1313
  // Clear intermediate text from previous agent loop iterations
1314
+ // Flush V2 reasoning as text part before clearing
1315
+ flushV2Reasoning();
1300
1316
  flushCurrentText();
1301
1317
  state.messages[msgIdx].parts = [...msgParts];
1302
1318
  state.messages[msgIdx]._streamingText = '';
@@ -1388,10 +1404,13 @@ async function sendMessage() {
1388
1404
  callback: evt.tool.callback
1389
1405
  });
1390
1406
  }
1407
+ // Flush reasoning text BEFORE tool call to create speak→tool pattern
1408
+ flushV2Reasoning();
1391
1409
  flushCurrentText();
1392
1410
  var toolEvent = {
1393
1411
  type: 'v2_tool',
1394
1412
  data: {
1413
+ id: 'v2tool_' + Date.now() + '_' + allExecEvents.length,
1395
1414
  type: 'tool_start',
1396
1415
  title: (evt.tool.beforecalltext || '调用工具: ' + (evt.tool.toolname || '')),
1397
1416
  tool_name: evt.tool.toolname,
@@ -1430,6 +1449,7 @@ async function sendMessage() {
1430
1449
  var resultEvent = {
1431
1450
  type: 'v2_tool',
1432
1451
  data: {
1452
+ id: 'v2tool_' + Date.now() + '_' + allExecEvents.length,
1433
1453
  type: 'tool_result',
1434
1454
  title: (evt.tool.toolname || '工具') + ' 执行完成',
1435
1455
  tool_name: evt.tool.toolname,
@@ -1504,6 +1524,8 @@ async function sendMessage() {
1504
1524
  ttsManager.streamDelta(evt.content);
1505
1525
  }
1506
1526
  } else if (evt.type === 'done') {
1527
+ // Flush remaining V2 reasoning as final text part
1528
+ flushV2Reasoning();
1507
1529
  flushCurrentText();
1508
1530
  // 记录到调试控制台
1509
1531
  if (window.addDebugLog) {
@@ -1547,6 +1569,7 @@ async function sendMessage() {
1547
1569
  }
1548
1570
 
1549
1571
  // Finalize message
1572
+ flushV2Reasoning();
1550
1573
  flushCurrentText();
1551
1574
  // Stop all running tool timers
1552
1575
  for (var tid in _toolTimers) {