myagent-ai 1.7.1 → 1.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "myagent-ai",
3
- "version": "1.7.1",
3
+ "version": "1.7.3",
4
4
  "description": "本地桌面端执行型AI助手 - Open Interpreter 风格 | Local Desktop Execution-Oriented AI Assistant",
5
5
  "main": "main.py",
6
6
  "bin": {
package/web/api_server.py CHANGED
@@ -746,29 +746,41 @@ class ApiServer:
746
746
  return ""
747
747
 
748
748
  base_instruction = (
749
- "你当前处于【执行模式】(Execution Mode)。\n"
750
- "1. **复杂度分析**:首先评估任务复杂度。对于简单问候或常见问题,直接回答;对于多步骤任务,【必须】先制定计划。\n"
751
- "2. **强制规则 - 任务列表**:每次回复【必须】包含 ```tasklist``` 代码块,输出 JSON 格式的任务进度列表。先写纯文本分析,再写 tasklist,最后写 action(如有)。\n"
752
- "3. **强制规则 - 单步执行**:每次回复【只能执行一个操作】(一个工具调用或一个代码块)。执行完后等待结果反馈。\n"
749
+ "你当前处于【执行模式】(Execution Mode)。\n\n"
750
+ "## 核心规则\n"
751
+ "1. **任务列表(强制)**:每次回复【必须】包含一个 ```tasklist``` 代码块,内含 JSON 数组格式的任务进度。\n"
752
+ " - 格式:```tasklist\\n[{\"text\": \"步骤描述\", \"status\": \"pending\"}]\\n```\n"
753
+ " - status 可选值:pending(待执行)、running(进行中)、done(已完成)、blocked(受阻)\n"
754
+ " - 首次收到任务时,拆分为多个步骤,全部标记为 pending\n"
755
+ " - 每次执行完一个步骤后,更新对应步骤状态为 done,下一个为 running\n"
756
+ "2. **单步执行(强制)**:每次回复【只能执行一个操作】(一个工具调用、一个代码块或一个技能调用)。\n"
757
+ " - 执行完一个操作后停下来,等待结果反馈后再决定下一步\n"
758
+ " - 不要一次性执行多个操作\n"
759
+ "3. **回复格式**:先写纯文本分析/总结 → 再写 ```tasklist``` 更新进度 → 最后写 ```action``` 执行操作(如有)\n"
753
760
  )
754
761
 
755
762
  # 从内存读取当前任务列表
756
763
  tasks = self._task_list_store.get(agent_path, [])
757
764
  if not tasks:
758
- return base_instruction + "请分析需求后制定计划。"
765
+ return base_instruction + "\n## 当前状态\n暂无任务计划。请先分析用户需求,拆分为具体步骤,然后用 ```tasklist``` 输出计划。"
759
766
 
760
767
  pending = [f" - ⏳ {t['text']}" for t in tasks if t.get("status") in ("pending", "running", "blocked")]
761
768
  done = [f" - ✅ {t['text']}" for t in tasks if t.get("status") == "done"]
762
769
  running = [f" - 🔄 {t['text']}" for t in tasks if t.get("status") == "running"]
763
770
 
764
- context = base_instruction + "\n当前任务进度:\n"
771
+ context = base_instruction + "\n## 当前任务进度\n"
765
772
  if done:
766
773
  context += "已完成:\n" + "\n".join(done) + "\n"
767
774
  if running:
768
775
  context += "进行中:\n" + "\n".join(running) + "\n"
769
776
  if pending:
770
777
  context += "待执行:\n" + "\n".join(pending) + "\n"
771
- context += "\n请在回复中用 ```tasklist``` 更新任务进度(先写文本分析,再写 tasklist,最后写 action)。记住:【每次只能执行一个操作】。"
778
+ context += (
779
+ "\n## 下一步\n"
780
+ "1. 用纯文本简要分析当前进展\n"
781
+ "2. 用 ```tasklist``` 更新任务进度(标记已完成的步骤为 done,标记当前步骤为 running)\n"
782
+ "3. 用 ```action``` 执行下一个待执行步骤(每次只执行一个操作)\n"
783
+ )
772
784
  return context
773
785
 
774
786
  async def handle_chat_page(self, request):
@@ -2613,7 +2625,9 @@ class ApiServer:
2613
2625
  iteration = 0
2614
2626
  # 追踪连续无 action 迭代次数,防止无限重新提示
2615
2627
  _consecutive_no_action = 0
2616
- _MAX_NO_ACTION_RETRIES = 3
2628
+ _MAX_NO_ACTION_RETRIES = 5 # 提高重试次数,给 LLM 更多机会完成剩余任务
2629
+ # ── 追踪所有流式推送的纯文本(用于刷新后恢复) ──
2630
+ _all_streamed_text_parts = [] # 每轮迭代推送的纯文本片段
2617
2631
 
2618
2632
  while iteration < max_iter:
2619
2633
  iteration += 1
@@ -2682,6 +2696,7 @@ class ApiServer:
2682
2696
  text_before = remaining[:marker_pos]
2683
2697
  if text_before.strip():
2684
2698
  await _write_sse({"type": "text_delta", "content": text_before})
2699
+ _all_streamed_text_parts.append(text_before)
2685
2700
  # 跳过整个开始标记(```action 或 ```tasklist),不要只跳到 ```
2686
2701
  st["processed_pos"] += marker_pos + len(f"```{block_type}")
2687
2702
  if block_type == "tasklist":
@@ -2718,7 +2733,9 @@ class ApiServer:
2718
2733
  # 没有找到标记,流式推送(保留末尾可能的部分标记)
2719
2734
  safe_end = len(remaining) - _MAX_HOLD
2720
2735
  if safe_end > 0:
2721
- await _write_sse({"type": "text_delta", "content": remaining[:safe_end]})
2736
+ chunk = remaining[:safe_end]
2737
+ await _write_sse({"type": "text_delta", "content": chunk})
2738
+ _all_streamed_text_parts.append(chunk)
2722
2739
  st["processed_pos"] += safe_end
2723
2740
  remaining = full_text_so_far[st["processed_pos"]:]
2724
2741
  else:
@@ -2790,6 +2807,7 @@ class ApiServer:
2790
2807
  await _stream_text_chunked(remaining, _write_sse, chunk_size=3, delay=0.01)
2791
2808
  else:
2792
2809
  await _write_sse({"type": "text_delta", "content": remaining})
2810
+ _all_streamed_text_parts.append(remaining)
2793
2811
  st["processed_pos"] = len(full_text)
2794
2812
 
2795
2813
  # Call LLM with streaming — tokens are filtered through _text_delta_callback
@@ -3054,10 +3072,19 @@ class ApiServer:
3054
3072
  break
3055
3073
 
3056
3074
  # Save assistant response to memory
3057
- if agent.memory and final_response:
3058
- agent.memory.add_short_term(session_id=session_id, role="assistant", content=final_response)
3059
-
3060
- return final_response
3075
+ # ── 优先使用流式累积文本(包含所有迭代的纯文本),回退到 final_response ──
3076
+ saved_response = final_response
3077
+ if not saved_response and _all_streamed_text_parts:
3078
+ saved_response = "\n\n".join(p for p in _all_streamed_text_parts if p.strip())
3079
+ if not saved_response and content:
3080
+ saved_response = content # 兜底:使用最后一轮的完整输出
3081
+ if agent.memory and saved_response:
3082
+ agent.memory.add_short_term(session_id=session_id, role="assistant", content=saved_response)
3083
+ elif agent.memory:
3084
+ # 即使为空也保存一条,防止刷新后消息丢失
3085
+ agent.memory.add_short_term(session_id=session_id, role="assistant", content="(执行完成,无文本回复)")
3086
+
3087
+ return saved_response or final_response or content or ""
3061
3088
 
3062
3089
  async def _execute_actions_streaming(
3063
3090
  self, agent, action_data: dict, context, write_sse
@@ -1569,6 +1569,20 @@ input,textarea,select{font:inherit}
1569
1569
  .exec-event-result-btn:hover{background:var(--accent-light);color:var(--accent-dark)}
1570
1570
  .exec-event-result-btn svg{width:12px;height:12px}
1571
1571
 
1572
+ /* ── Inline Exec Events (Timeline Interleaved) ── */
1573
+ .msg-timeline{display:flex;flex-direction:column;gap:6px}
1574
+ .inline-exec-event{margin:2px 0;padding:8px 12px;background:var(--bg2);border-left:3px solid var(--border);border-radius:6px;font-size:13px;animation:execEventSlide .3s ease-out}
1575
+ .inline-exec-header{display:flex;align-items:center;gap:6px;margin-bottom:4px}
1576
+ .inline-exec-icon{font-size:14px}
1577
+ .inline-exec-title{font-weight:500;color:var(--text);font-size:12px}
1578
+ .inline-exec-meta{color:var(--text3);font-size:11px;margin-left:auto}
1579
+ .inline-exec-code{background:var(--bg);padding:6px 8px;border-radius:4px;font-family:'SF Mono','Fira Code','Cascadia Code',monospace;font-size:12px;color:var(--text2);margin:4px 0;max-height:100px;overflow:hidden;cursor:pointer;transition:var(--transition);white-space:pre-wrap;word-break:break-all}
1580
+ .inline-exec-code:hover{background:var(--bg3)}
1581
+ .inline-exec-code.expanded{max-height:none}
1582
+ .inline-exec-summary{color:var(--text2);font-size:12px;margin:4px 0}
1583
+ .inline-exec-result-btn{background:none;border:1px solid var(--border);color:var(--text2);font-size:11px;padding:2px 8px;border-radius:4px;cursor:pointer;margin-top:4px;transition:var(--transition)}
1584
+ .inline-exec-result-btn:hover{background:var(--bg2);border-color:var(--accent);color:var(--accent)}
1585
+
1572
1586
  /* ── Execution Result Modal ── */
1573
1587
  .exec-result-modal-overlay{position:fixed;inset:0;background:rgba(0,0,0,.5);z-index:1000;display:flex;align-items:center;justify-content:center;animation:fadeIn .15s ease}
1574
1588
  .exec-result-modal{background:var(--bg);border:1px solid var(--border);border-radius:12px;width:min(680px,90vw);max-height:80vh;display:flex;flex-direction:column;box-shadow:0 20px 60px rgba(0,0,0,.25);animation:slideUp .2s ease}
@@ -1605,6 +1619,9 @@ input,textarea,select{font:inherit}
1605
1619
  [data-theme="dark"] .exec-result-modal{background:var(--bg2);border-color:var(--border)}
1606
1620
  [data-theme="dark"] .exec-result-modal-body pre{background:#0a0c10;color:#cdd6f4}
1607
1621
  [data-theme="dark"] .exec-result-info-item{background:var(--bg3)}
1622
+ [data-theme="dark"] .inline-exec-event{background:var(--bg3);border-left-color:var(--border)}
1623
+ [data-theme="dark"] .inline-exec-code{background:var(--bg)}
1624
+ [data-theme="dark"] .inline-exec-result-btn:hover{background:var(--bg4)}
1608
1625
 
1609
1626
  .thought-block {
1610
1627
  background: rgba(0, 0, 0, 0.03);
@@ -1680,9 +1680,12 @@ function renderMessages() {
1680
1680
  </div>` : '';
1681
1681
  const ttsIndicator = ttsManager && ttsManager.isPlaying && ttsManager.currentMsgIndex === i ?
1682
1682
  ' <span class="tts-playing-icon"><svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/><path d="M15.54 8.46a5 5 0 0 1 0 7.07"/></svg></span>' : '';
1683
- const execEventsHtml = (!isUser && msg.exec_events && msg.exec_events.length > 0)
1684
- ? renderExecEvents(msg.exec_events, i) : '';
1685
- const streamingIndicator = msg.streaming && !msg.content && !msg.thought ? `
1683
+
1684
+ // ── Determine rendering mode and streaming indicator ──
1685
+ const hasParts = Array.isArray(msg.parts) && msg.parts.length > 0;
1686
+ const hasStreamingText = msg._streamingText && msg._streamingText.trim();
1687
+ const anyContent = msg.content || msg._streamingText || hasParts;
1688
+ const streamingIndicator = msg.streaming && !anyContent && !msg.thought ? `
1686
1689
  <div class="streaming-indicator">
1687
1690
  <div class="spinner"></div>
1688
1691
  <div class="streaming-dots">
@@ -1690,13 +1693,42 @@ function renderMessages() {
1690
1693
  </div>
1691
1694
  <span style="font-weight:500">Agent 正在思考...</span>
1692
1695
  </div>` : '';
1696
+
1697
+ // ── Timeline rendering for interleaved text + exec events ──
1698
+ let timelineHtml = '';
1699
+ if (hasParts || hasStreamingText) {
1700
+ let partsHtml = '';
1701
+ for (const part of (msg.parts || [])) {
1702
+ if (part.type === 'text' && part.content.trim()) {
1703
+ partsHtml += '<div class="message-bubble">' + renderMarkdown(part.content) + '</div>';
1704
+ } else if (part.type === 'exec') {
1705
+ partsHtml += renderInlineExecEvent(part.data, i);
1706
+ }
1707
+ }
1708
+ if (hasStreamingText) {
1709
+ partsHtml += '<div class="message-bubble">' + renderMarkdown(msg._streamingText) + '</div>';
1710
+ }
1711
+ if (partsHtml) {
1712
+ timelineHtml = '<div class="msg-timeline">' + partsHtml + '</div>';
1713
+ }
1714
+ }
1715
+
1716
+ // Backward compat: single bubble for messages without parts
1717
+ const singleBubbleHtml = (!hasParts && !hasStreamingText)
1718
+ ? ((content || streamingIndicator) ? `<div class="message-bubble">${content}${ttsIndicator}</div>` : '')
1719
+ : '';
1720
+
1721
+ // Exec events panel: only for backward compat (messages without parts loaded from DB)
1722
+ const execEventsHtml = (!isUser && !hasParts && msg.exec_events && msg.exec_events.length > 0)
1723
+ ? renderExecEvents(msg.exec_events, i) : '';
1693
1724
  html += `
1694
1725
  <div class="message-row ${msg.role}">
1695
1726
  <div class="message-avatar">${avatar}</div>
1696
1727
  <div style="flex:1;min-width:0">
1697
1728
  ${reasoningHtml}
1698
1729
  ${thoughtHtml}
1699
- ${content || streamingIndicator ? `<div class="message-bubble">${content}${ttsIndicator}</div>` : ''}
1730
+ ${timelineHtml}
1731
+ ${singleBubbleHtml}
1700
1732
  ${streamingIndicator}
1701
1733
  ${execEventsHtml}
1702
1734
  ${msg.time ? `<div class="message-time">${formatTime(msg.time)}</div>` : ''}
@@ -2582,6 +2614,7 @@ function insertQuick(text) {
2582
2614
  // ══════════════════════════════════════════════════════
2583
2615
  // ── TTS (Text-to-Speech) Manager ──
2584
2616
  // ══════════════════════════════════════════════════════
2617
+ // 支持分段流式播放:文本边生成边朗读,遇到句子边界立即合成播放
2585
2618
 
2586
2619
  // Simple hash function for text caching
2587
2620
  function simpleHash(str) {
@@ -2603,6 +2636,13 @@ const ttsManager = {
2603
2636
  cache: new Map(), // textHash -> blobUrl
2604
2637
  voice: 'zh-CN-XiaoxiaoNeural',
2605
2638
  speed: '+0%',
2639
+ // ── 分段流式状态 ──
2640
+ _streamActive: false, // 是否正在流式模式
2641
+ _streamBuffer: '', // 当前缓冲区(积累到句子边界前)
2642
+ _audioQueue: [], // 待播放的音频 blobUrl 队列
2643
+ _audioPlaying: false, // 队列是否正在播放
2644
+ _stopRequested: false, // 是否已请求停止
2645
+ _streamMsgIndex: -1, // 流式模式对应的消息索引
2606
2646
 
2607
2647
  init() {
2608
2648
  // Load TTS enabled state from localStorage
@@ -2613,15 +2653,25 @@ const ttsManager = {
2613
2653
  this.updateButtonUI();
2614
2654
  // Audio event handlers
2615
2655
  this.audio.addEventListener('ended', () => {
2616
- this.isPlaying = false;
2617
- this.currentMsgIndex = -1;
2618
- this.updatePlayingIndicator();
2656
+ if (this._streamActive) {
2657
+ // 流式模式:播放队列下一段
2658
+ this._playNextInQueue();
2659
+ } else {
2660
+ this.isPlaying = false;
2661
+ this.currentMsgIndex = -1;
2662
+ this.updatePlayingIndicator();
2663
+ }
2619
2664
  });
2620
2665
  this.audio.addEventListener('error', (e) => {
2621
2666
  console.error('TTS audio error:', e);
2622
- this.isPlaying = false;
2623
- this.currentMsgIndex = -1;
2624
- this.updatePlayingIndicator();
2667
+ if (this._streamActive) {
2668
+ // 流式模式:跳过错误段,播放下一段
2669
+ this._playNextInQueue();
2670
+ } else {
2671
+ this.isPlaying = false;
2672
+ this.currentMsgIndex = -1;
2673
+ this.updatePlayingIndicator();
2674
+ }
2625
2675
  });
2626
2676
  },
2627
2677
 
@@ -2652,10 +2702,16 @@ const ttsManager = {
2652
2702
  },
2653
2703
 
2654
2704
  stop() {
2705
+ this._stopRequested = true;
2655
2706
  this.audio.pause();
2656
2707
  this.audio.currentTime = 0;
2657
2708
  this.isPlaying = false;
2658
2709
  this.currentMsgIndex = -1;
2710
+ this._streamActive = false;
2711
+ this._streamBuffer = '';
2712
+ this._audioQueue = [];
2713
+ this._audioPlaying = false;
2714
+ this._streamMsgIndex = -1;
2659
2715
  this.updatePlayingIndicator();
2660
2716
  },
2661
2717
 
@@ -2666,10 +2722,216 @@ const ttsManager = {
2666
2722
  }
2667
2723
  },
2668
2724
 
2725
+ // ════════════════════════════════════════════
2726
+ // ── 分段流式 TTS:text_delta 回调 ──
2727
+ // ════════════════════════════════════════════
2728
+
2729
+ /**
2730
+ * 开始流式 TTS 会话
2731
+ * @param {number} msgIndex - 消息索引
2732
+ */
2733
+ _startStream(msgIndex) {
2734
+ this._stopRequested = false;
2735
+ this._streamActive = true;
2736
+ this._streamBuffer = '';
2737
+ this._audioQueue = [];
2738
+ this._audioPlaying = false;
2739
+ this._streamMsgIndex = msgIndex;
2740
+ this.currentMsgIndex = msgIndex;
2741
+ this.isPlaying = true;
2742
+ },
2743
+
2744
+ /**
2745
+ * 流式推送文本增量
2746
+ * 在 flow_engine.js 的 text_delta 处理中调用
2747
+ * 积累到句子边界时自动触发 TTS 合成
2748
+ * @param {string} delta - 新增文本片段
2749
+ */
2750
+ streamDelta(delta) {
2751
+ if (!this.enabled || !this._streamActive || this._stopRequested) return;
2752
+ if (!delta || !delta.trim()) return;
2753
+
2754
+ this._streamBuffer += delta;
2755
+
2756
+ // 检测句子边界:中文句号/感叹号/问号,英文句号+空格,或换行
2757
+ var boundaryPattern = /[。!?]|\.(?:\s|$)|\n/;
2758
+ var boundaryIdx = -1;
2759
+ for (var i = 0; i < this._streamBuffer.length; i++) {
2760
+ if (boundaryPattern.test(this._streamBuffer[i])) {
2761
+ boundaryIdx = i;
2762
+ break;
2763
+ }
2764
+ }
2765
+
2766
+ // 还没到句子边界,但如果缓冲区已经很长(>200字),强制切分
2767
+ if (boundaryIdx === -1 && this._streamBuffer.length > 200) {
2768
+ // 在最后一个逗号或空格处切分
2769
+ var lastComma = -1;
2770
+ for (var j = 0; j < this._streamBuffer.length; j++) {
2771
+ var ch = this._streamBuffer[j];
2772
+ if (ch === ',' || ch === ',' || ch === ';' || ch === ';' || ch === ' ' || ch === ':') {
2773
+ lastComma = j;
2774
+ }
2775
+ }
2776
+ if (lastComma > 0) {
2777
+ boundaryIdx = lastComma;
2778
+ } else {
2779
+ boundaryIdx = this._streamBuffer.length;
2780
+ }
2781
+ }
2782
+
2783
+ if (boundaryIdx !== -1) {
2784
+ // 提取到边界的文本
2785
+ var sentence = this._streamBuffer.substring(0, boundaryIdx + 1).trim();
2786
+ this._streamBuffer = this._streamBuffer.substring(boundaryIdx + 1);
2787
+
2788
+ if (sentence) {
2789
+ var cleanSentence = this._cleanForStreamTTS(sentence);
2790
+ if (cleanSentence) {
2791
+ this._enqueueTTS(cleanSentence);
2792
+ }
2793
+ }
2794
+ }
2795
+ },
2796
+
2797
+ /**
2798
+ * 刷新剩余缓冲区(流结束时调用)
2799
+ * 将 buffer 中剩余的文本立即合成
2800
+ */
2801
+ streamFlush() {
2802
+ if (!this.enabled || !this._streamActive || this._stopRequested) return;
2803
+ var remaining = this._streamBuffer.trim();
2804
+ this._streamBuffer = '';
2805
+ if (remaining) {
2806
+ var cleanText = this._cleanForStreamTTS(remaining);
2807
+ if (cleanText) {
2808
+ this._enqueueTTS(cleanText);
2809
+ }
2810
+ }
2811
+ // 标记流式阶段结束(队列播完后自动清理状态)
2812
+ this._streamActive = false;
2813
+ },
2814
+
2815
+ /**
2816
+ * 清理文本用于流式 TTS(去 HTML/代码块/执行结果等)
2817
+ */
2818
+ _cleanForStreamTTS(text) {
2819
+ // 去除代码块
2820
+ text = text.replace(/```[\s\S]*?```/g, '');
2821
+ // 去除执行结果标记
2822
+ text = text.replace(/^\s*[✅❌⏰]\s*\[执行结果\].*/gm, '');
2823
+ // 去除 HTML 标签
2824
+ text = text.replace(/<svg[^>]*>[\s\S]*?<\/svg>/gi, '');
2825
+ text = text.replace(/<img[^>]*>/gi, '');
2826
+ text = text.replace(/<br\s*\/?>/gi, '\n');
2827
+ text = text.replace(/<[^>]+>/g, '');
2828
+ // 去除 emoji
2829
+ text = text.replace(/[\u{1F300}-\u{1FAFF}]/gu, '');
2830
+ text = text.replace(/[\u{2600}-\u{27BF}]/gu, '');
2831
+ text = text.replace(/[\u{FE00}-\u{FE0F}]/gu, '');
2832
+ text = text.replace(/[\u{200D}]/gu, '');
2833
+ text = text.replace(/[\u{20E3}]/gu, '');
2834
+ text = text.replace(/[\u{2300}-\u{23FF}]/gu, '');
2835
+ text = text.replace(/[\u{2B50}-\u{2B55}]/gu, '');
2836
+ text = text.replace(/[\u{203C}-\u{3299}]/gu, '');
2837
+ text = text.replace(/[\u{E0020}-\u{E007F}]/gu, '');
2838
+ text = text.replace(/[✅❌⚠️🔄⏰🔒💻🔍📁🧠🌐🛠👋🤖🎯💡🚀👍🎯📊📝🔊🔍💬📌✨✓✗→←↓↑⏹⬇⬆↩]/g, '');
2839
+ // 去除多余换行
2840
+ text = text.replace(/\n{2,}/g, '\n');
2841
+ text = text.trim();
2842
+ return text || null;
2843
+ },
2844
+
2845
+ /**
2846
+ * 将文本加入 TTS 合成队列(异步,不阻塞)
2847
+ */
2848
+ _enqueueTTS(text) {
2849
+ if (this._stopRequested) return;
2850
+ var self = this;
2851
+
2852
+ (async function() {
2853
+ try {
2854
+ var hash = simpleHash(text);
2855
+ var blobUrl = self.cache.get(hash);
2856
+
2857
+ if (!blobUrl) {
2858
+ var resp = await fetch('/api/tts', {
2859
+ method: 'POST',
2860
+ headers: { 'Content-Type': 'application/json' },
2861
+ body: JSON.stringify({
2862
+ text: text,
2863
+ voice: self.voice,
2864
+ speed: self.speed,
2865
+ }),
2866
+ });
2867
+
2868
+ if (!resp.ok) {
2869
+ var errData = await resp.json().catch(function() { return {}; });
2870
+ throw new Error(errData.error || 'TTS 请求失败');
2871
+ }
2872
+
2873
+ var blob = await resp.blob();
2874
+ blobUrl = URL.createObjectURL(blob);
2875
+ self.cache.set(hash, blobUrl);
2876
+ }
2877
+
2878
+ if (!self._stopRequested) {
2879
+ self._audioQueue.push(blobUrl);
2880
+ // 如果还没开始播放队列,立即开始
2881
+ if (!self._audioPlaying) {
2882
+ self._playNextInQueue();
2883
+ }
2884
+ }
2885
+ } catch (e) {
2886
+ console.error('TTS stream chunk error:', e);
2887
+ }
2888
+ })();
2889
+ },
2890
+
2891
+ /**
2892
+ * 播放队列中的下一段音频
2893
+ */
2894
+ _playNextInQueue() {
2895
+ if (this._stopRequested) {
2896
+ this.isPlaying = false;
2897
+ this._audioPlaying = false;
2898
+ this.currentMsgIndex = -1;
2899
+ this.updatePlayingIndicator();
2900
+ return;
2901
+ }
2902
+
2903
+ if (this._audioQueue.length === 0) {
2904
+ // 队列空了,检查流式是否已结束
2905
+ if (!this._streamActive) {
2906
+ // 流结束且队列为空 → 播放完成
2907
+ this.isPlaying = false;
2908
+ this._audioPlaying = false;
2909
+ this.currentMsgIndex = -1;
2910
+ this.updatePlayingIndicator();
2911
+ }
2912
+ // 如果流还在继续,等待新的音频入队
2913
+ return;
2914
+ }
2915
+
2916
+ var blobUrl = this._audioQueue.shift();
2917
+ this.audio.src = blobUrl;
2918
+ this._audioPlaying = true;
2919
+
2920
+ var self = this;
2921
+ this.audio.play().catch(function(e) {
2922
+ console.error('TTS play queue error:', e);
2923
+ self._playNextInQueue();
2924
+ });
2925
+ },
2926
+
2927
+ // ════════════════════════════════════════════
2928
+ // ── 完整消息 TTS(非流式,兼容手动点击) ──
2929
+ // ════════════════════════════════════════════
2930
+
2669
2931
  async speak(msgIndex) {
2670
2932
  if (msgIndex < 0 || msgIndex >= state.messages.length) return;
2671
2933
  const msg = state.messages[msgIndex];
2672
- if (!msg || msg.role !== 'user' && !msg.content) return;
2934
+ if (!msg || msg.role !== 'assistant' && !msg.content) return;
2673
2935
 
2674
2936
  // 跳过命令执行结果(以 [执行结果] 开头的消息)
2675
2937
  var rawText = msg.content.replace(/<[^>]+>/g, '');
@@ -2677,24 +2939,23 @@ const ttsManager = {
2677
2939
 
2678
2940
  // 去除 HTML 标签(msg.content 是 HTML 格式,SVG 图标等会被朗读)
2679
2941
  let text = msg.content
2680
- .replace(/<svg[^>]*>[\s\S]*?<\/svg>/gi, '') // 移除 SVG 图标
2681
- .replace(/<img[^>]*>/gi, '[图片]') // 图片替换为文字
2682
- .replace(/<br\s*\/?>/gi, '\n') // <br> 转换为换行
2683
- .replace(/<[^>]+>/g, '') // 移除所有 HTML 标签
2684
- // emoji 和特殊符号过滤
2685
- .replace(/[\u{1F300}-\u{1FAFF}]/gu, '') // 全部 Emoji 范围
2686
- .replace(/[\u{2600}-\u{27BF}]/gu, '') // 杂项/装饰符号
2687
- .replace(/[\u{FE00}-\u{FE0F}]/gu, '') // 变体选择符
2688
- .replace(/[\u{200D}]/gu, '') // ZWJ 零宽连接符
2689
- .replace(/[\u{20E3}]/gu, '') // 组合符号
2690
- .replace(/[\u{2300}-\u{23FF}]/gu, '') // 技术符号
2691
- .replace(/[\u{2B50}-\u{2B55}]/gu, '') // 星星等
2692
- .replace(/[\u{203C}-\u{3299}]/gu, '') // CJK 符号
2693
- .replace(/[\u{E0020}-\u{E007F}]/gu, '') // 标签
2694
- .replace(/[✅❌⚠️🔄⏰🔒💻🔍📁🧠🌐🛠️👋🤖🎯💡🚀👍🎯📊📝🔊🔍💬📌✨✓✗→←↓↑⏹⬇⬆↩]/g, '') // 常用图标
2695
- .replace(/```[\s\S]*?```/g, '代码块') // 代码块替换为文字
2696
- .replace(/`[^`]+`/g, function(m) { return m.slice(1,-1); }) // 保留内联代码文字但去引号
2697
- .replace(/\n{2,}/g, '\n') // 多余换行
2942
+ .replace(/<svg[^>]*>[\s\S]*?<\/svg>/gi, '')
2943
+ .replace(/<img[^>]*>/gi, '[图片]')
2944
+ .replace(/<br\s*\/?>/gi, '\n')
2945
+ .replace(/<[^>]+>/g, '')
2946
+ .replace(/[\u{1F300}-\u{1FAFF}]/gu, '')
2947
+ .replace(/[\u{2600}-\u{27BF}]/gu, '')
2948
+ .replace(/[\u{FE00}-\u{FE0F}]/gu, '')
2949
+ .replace(/[\u{200D}]/gu, '')
2950
+ .replace(/[\u{20E3}]/gu, '')
2951
+ .replace(/[\u{2300}-\u{23FF}]/gu, '')
2952
+ .replace(/[\u{2B50}-\u{2B55}]/gu, '')
2953
+ .replace(/[\u{203C}-\u{3299}]/gu, '')
2954
+ .replace(/[\u{E0020}-\u{E007F}]/gu, '')
2955
+ .replace(/[✅❌⚠️🔄⏰🔒💻🔍📁🧠🌐🛠👋🤖🎯💡🚀👍🎯📊📝🔊🔍💬📌✨✓✗→←↓↑⏹⬇⬆↩]/g, '')
2956
+ .replace(/```[\s\S]*?```/g, '代码块')
2957
+ .replace(/`[^`]+`/g, function(m) { return m.slice(1,-1); })
2958
+ .replace(/\n{2,}/g, '\n')
2698
2959
  .trim();
2699
2960
 
2700
2961
  if (!text) return;
@@ -350,22 +350,87 @@ function updateStreamingMessage(msgIdx) {
350
350
  }
351
351
  }
352
352
 
353
- // Update content bubble
354
- let bubble = contentArea.querySelector('.message-bubble');
355
- const content = renderMarkdown(msg.content);
356
- if (content && !bubble) {
357
- // Create bubble
358
- bubble = document.createElement('div');
359
- bubble.className = 'message-bubble';
360
- contentArea.appendChild(bubble);
361
- }
362
- if (bubble && content) {
363
- bubble.innerHTML = content;
353
+ // Update content - timeline (interleaved text + exec events) or single bubble (backward compat)
354
+ const hasParts = Array.isArray(msg.parts);
355
+ if (hasParts) {
356
+ // ── Timeline rendering for interleaved text + exec events ──
357
+ let timeline = contentArea.querySelector('.msg-timeline');
358
+ if (!timeline) {
359
+ // Remove old single bubble if exists
360
+ const oldBubble = contentArea.querySelector(':scope > .message-bubble');
361
+ if (oldBubble) oldBubble.remove();
362
+ // Create timeline container
363
+ timeline = document.createElement('div');
364
+ timeline.className = 'msg-timeline';
365
+ // Insert after thought blocks or at beginning
366
+ const allThoughts = contentArea.querySelectorAll(':scope > .thought-block');
367
+ if (allThoughts.length > 0) {
368
+ allThoughts[allThoughts.length - 1].insertAdjacentElement('afterend', timeline);
369
+ } else {
370
+ contentArea.appendChild(timeline);
371
+ }
372
+ }
373
+
374
+ // Cache completed parts rendering (only re-render when parts count changes)
375
+ const partsCount = msg.parts.length;
376
+ if (!msg._renderedPartsHtml || msg._lastPartsCount !== partsCount) {
377
+ let html = '';
378
+ for (const part of msg.parts) {
379
+ if (part.type === 'text' && part.content.trim()) {
380
+ html += '<div class="message-bubble">' + renderMarkdown(part.content) + '</div>';
381
+ } else if (part.type === 'exec') {
382
+ html += renderInlineExecEvent(part.data, msgIdx);
383
+ }
384
+ }
385
+ msg._renderedPartsHtml = html;
386
+ msg._lastPartsCount = partsCount;
387
+ }
388
+
389
+ // Build streaming bubble for current in-progress text
390
+ const streamingText = msg._streamingText || '';
391
+ const streamingBubbleHtml = streamingText.trim()
392
+ ? '<div class="message-bubble">' + renderMarkdown(streamingText) + '</div>'
393
+ : '';
394
+
395
+ timeline.innerHTML = msg._renderedPartsHtml + streamingBubbleHtml;
396
+
397
+ // Remove exec events panel if present (events are now inline in timeline)
398
+ const execPanel = contentArea.querySelector('.exec-events-panel');
399
+ if (execPanel) execPanel.remove();
400
+ } else {
401
+ // ── Backward compat: single content bubble + exec events panel ──
402
+ let bubble = contentArea.querySelector('.message-bubble');
403
+ const content = renderMarkdown(msg.content);
404
+ if (content && !bubble) {
405
+ bubble = document.createElement('div');
406
+ bubble.className = 'message-bubble';
407
+ contentArea.appendChild(bubble);
408
+ }
409
+ if (bubble && content) {
410
+ bubble.innerHTML = content;
411
+ }
412
+
413
+ // Exec events panel (only for backward compat messages without parts)
414
+ if (msg.exec_events && msg.exec_events.length > 0) {
415
+ let execPanel = contentArea.querySelector('.exec-events-panel');
416
+ const newExecHtml = renderExecEvents(msg.exec_events, msgIdx);
417
+ if (execPanel) {
418
+ execPanel.outerHTML = newExecHtml;
419
+ } else {
420
+ const timeEl = contentArea.querySelector('.message-time');
421
+ if (timeEl) {
422
+ timeEl.insertAdjacentHTML('beforebegin', newExecHtml);
423
+ } else {
424
+ contentArea.insertAdjacentHTML('beforeend', newExecHtml);
425
+ }
426
+ }
427
+ }
364
428
  }
365
429
 
366
430
  // Update streaming indicator
367
431
  let indicator = contentArea.querySelector('.streaming-indicator');
368
- const streamingIndicator = msg.streaming && !msg.content && !msg.thought ? `
432
+ const anyContent = msg.content || msg._streamingText || (msg.parts && msg.parts.length > 0);
433
+ const streamingIndicator = msg.streaming && !anyContent && !msg.thought ? `
369
434
  <div class="streaming-indicator">
370
435
  <div class="streaming-dots">
371
436
  <span class="dot"></span><span class="dot"></span><span class="dot"></span>
@@ -380,23 +445,6 @@ function updateStreamingMessage(msgIdx) {
380
445
  indicator.remove();
381
446
  }
382
447
 
383
- // Update exec events panel
384
- if (msg.exec_events && msg.exec_events.length > 0) {
385
- let execPanel = contentArea.querySelector('.exec-events-panel');
386
- const newExecHtml = renderExecEvents(msg.exec_events, msgIdx);
387
- if (execPanel) {
388
- execPanel.outerHTML = newExecHtml;
389
- } else {
390
- // Insert before time element or at end
391
- const timeEl = contentArea.querySelector('.message-time');
392
- if (timeEl) {
393
- timeEl.insertAdjacentHTML('beforebegin', newExecHtml);
394
- } else {
395
- contentArea.insertAdjacentHTML('beforeend', newExecHtml);
396
- }
397
- }
398
- }
399
-
400
448
  // Auto-scroll
401
449
  scrollToBottom();
402
450
  }
@@ -507,6 +555,52 @@ function toggleExecEventsPanel(header) {
507
555
  body.classList.toggle('expanded');
508
556
  }
509
557
 
558
+ // ══════════════════════════════════════════════════════
559
+ // ── Inline Exec Event (Timeline Card) ──
560
+ // ══════════════════════════════════════════════════════
561
+
562
+ function renderInlineExecEvent(data, msgIdx) {
563
+ const iconEmoji = getEventIconEmoji(data);
564
+ const title = data.title || (data.tool_name || data.skill_name || '执行事件');
565
+
566
+ // Build meta text
567
+ let metaParts = [];
568
+ if (data.execution_time !== undefined) metaParts.push('耗时 ' + data.execution_time + 's');
569
+ if (data.language) metaParts.push(escapeHtml(data.language));
570
+ if (data.tool_name || data.skill_name) metaParts.push(escapeHtml(data.tool_name || data.skill_name));
571
+ if (data.timed_out) metaParts.push('超时');
572
+ if (data.exit_code !== undefined) metaParts.push('exit: ' + data.exit_code);
573
+ const metaText = metaParts.join(' · ');
574
+
575
+ // Build body content
576
+ let bodyHtml = '';
577
+ // Code preview for code_exec/code_result
578
+ if (data.code_preview && (data.type === 'code_exec' || data.type === 'code_result')) {
579
+ bodyHtml += '<div class="inline-exec-code" onclick="showExecResultModal(' + msgIdx + ', ' + data.id + ')" title="点击查看完整结果">' + escapeHtml(data.code_preview) + '</div>';
580
+ }
581
+ // Summary for tool_result/skill_result
582
+ if (data.summary && (data.type === 'tool_result' || data.type === 'skill_result')) {
583
+ bodyHtml += '<div class="inline-exec-summary">' + escapeHtml(data.summary) + '</div>';
584
+ }
585
+ // Result button for code_result
586
+ if (data.type === 'code_result' && (data.stdout || data.stderr || data.error)) {
587
+ bodyHtml += '<button class="inline-exec-result-btn" onclick="showExecResultModal(' + msgIdx + ', ' + data.id + ')">查看详情</button>';
588
+ }
589
+ // Result button for tool_result/skill_result
590
+ if ((data.type === 'tool_result' || data.type === 'skill_result') && data.result) {
591
+ bodyHtml += '<button class="inline-exec-result-btn" onclick="showToolResultModal(' + msgIdx + ', ' + data.id + ')">查看详情</button>';
592
+ }
593
+
594
+ return '<div class="inline-exec-event">' +
595
+ '<div class="inline-exec-header">' +
596
+ '<span class="inline-exec-icon">' + iconEmoji + '</span>' +
597
+ '<span class="inline-exec-title">' + escapeHtml(title) + '</span>' +
598
+ (metaText ? '<span class="inline-exec-meta">' + metaText + '</span>' : '') +
599
+ '</div>' +
600
+ bodyHtml +
601
+ '</div>';
602
+ }
603
+
510
604
  // ══════════════════════════════════════════════════════
511
605
  // ── Execution Result Modal (执行结果弹窗) ──
512
606
  // ══════════════════════════════════════════════════════
@@ -702,14 +796,22 @@ async function sendMessage() {
702
796
  const reader = resp.body.getReader();
703
797
  const decoder = new TextDecoder();
704
798
  let buffer = '';
705
- let fullResponse = '';
799
+ let msgParts = []; // Timeline: [{type:'text', content:'...'}, {type:'exec', data:{...}}]
800
+ let currentText = ''; // Accumulator for current streaming text segment
801
+ let allExecEvents = []; // All exec events (for summary panel at bottom)
706
802
  let msgIdx = state.messages.length;
707
803
  let sessionIdReceived = sessionId;
708
- let execEventsReceived = [];
709
804
  let fullThought = '';
710
-
805
+
806
+ function flushCurrentText() {
807
+ if (currentText.trim()) {
808
+ msgParts.push({type: 'text', content: currentText});
809
+ }
810
+ currentText = '';
811
+ }
812
+
711
813
  // Add placeholder for streaming response
712
- state.messages.push({ role: 'assistant', content: '', thought: '', time: new Date().toISOString(), streaming: true });
814
+ state.messages.push({ role: 'assistant', content: '', thought: '', parts: [], time: new Date().toISOString(), streaming: true });
713
815
  renderMessages();
714
816
 
715
817
  while (true) {
@@ -730,14 +832,28 @@ async function sendMessage() {
730
832
  // Sync the actual session ID (backend may prefix with agent_path)
731
833
  state.activeSessionId = evt.session_id;
732
834
  } else if (evt.type === 'text') {
733
- fullResponse = evt.content;
835
+ // Full text event (non-streaming replacement)
836
+ flushCurrentText();
837
+ msgParts.push({type: 'text', content: evt.content});
838
+ state.messages[msgIdx].parts = [...msgParts];
839
+ state.messages[msgIdx]._streamingText = '';
734
840
  state.messages[msgIdx].content = evt.content;
735
841
  renderMessages();
736
842
  } else if (evt.type === 'text_delta') {
737
843
  // Incremental streaming token
738
- fullResponse += evt.content;
739
- state.messages[msgIdx].content = fullResponse;
844
+ currentText += evt.content;
845
+ // Build backward-compat content from all parts + streaming text
846
+ const allText = msgParts.filter(p => p.type === 'text').map(p => p.content).join('\n\n')
847
+ + (currentText.trim() ? '\n\n' + currentText : '');
848
+ state.messages[msgIdx].parts = [...msgParts];
849
+ state.messages[msgIdx]._streamingText = currentText;
850
+ state.messages[msgIdx].content = allText;
740
851
  throttledStreamUpdate(msgIdx);
852
+ // ── 分段流式 TTS:推送增量文本 ──
853
+ if (ttsManager.enabled && !ttsManager._streamActive) {
854
+ ttsManager._startStream(msgIdx);
855
+ }
856
+ ttsManager.streamDelta(evt.content);
741
857
  } else if (evt.type === 'thought_delta') {
742
858
  // Agent 思考过程增量文本(流式推送,单独显示)
743
859
  fullThought += evt.content;
@@ -753,28 +869,39 @@ async function sendMessage() {
753
869
  state.messages[msgIdx].thought = fullThought;
754
870
  throttledStreamUpdate(msgIdx);
755
871
  } else if (evt.type === 'queue_start') {
756
- // New message starting from queue
872
+ // Finalize previous message
873
+ flushCurrentText();
757
874
  if (state.messages[msgIdx]) {
758
875
  state.messages[msgIdx].streaming = false;
759
- if (execEventsReceived.length > 0) state.messages[msgIdx].exec_events = [...execEventsReceived];
876
+ state.messages[msgIdx].parts = [...msgParts];
877
+ state.messages[msgIdx].content = msgParts.filter(p => p.type === 'text').map(p => p.content).join('\n\n') || '(无回复)';
878
+ state.messages[msgIdx]._streamingText = '';
879
+ if (allExecEvents.length > 0) state.messages[msgIdx].exec_events = [...allExecEvents];
760
880
  }
881
+ // Start new message
761
882
  state.messages.push({ role: 'user', content: evt.message, time: new Date().toISOString() });
762
883
  msgIdx = state.messages.length;
763
- fullResponse = '';
884
+ msgParts = [];
885
+ currentText = '';
886
+ allExecEvents = [];
764
887
  fullThought = '';
765
- execEventsReceived = [];
766
- state.messages.push({ role: 'assistant', content: '', thought: '', time: new Date().toISOString(), streaming: true });
888
+ state.messages.push({ role: 'assistant', content: '', thought: '', parts: [], time: new Date().toISOString(), streaming: true });
767
889
  renderMessages();
768
890
  } else if (evt.type === 'clear_text') {
769
891
  // Clear intermediate text from previous agent loop iterations
770
- fullResponse = '';
771
- state.messages[msgIdx].content = '';
892
+ flushCurrentText();
893
+ state.messages[msgIdx].parts = [...msgParts];
894
+ state.messages[msgIdx]._streamingText = '';
895
+ state.messages[msgIdx].content = msgParts.filter(p => p.type === 'text').map(p => p.content).join('\n\n') || '';
772
896
  throttledStreamUpdate(msgIdx);
773
897
  } else if (evt.type === 'exec_event') {
774
898
  // Real-time execution event (tool call, code exec, skill result, etc.)
775
- execEventsReceived.push(evt.data);
776
- // 立即更新消息的 exec_events 并渲染
777
- state.messages[msgIdx].exec_events = [...execEventsReceived];
899
+ flushCurrentText();
900
+ msgParts.push({type: 'exec', data: evt.data});
901
+ allExecEvents.push(evt.data);
902
+ state.messages[msgIdx].parts = [...msgParts];
903
+ state.messages[msgIdx]._streamingText = '';
904
+ state.messages[msgIdx].exec_events = [...allExecEvents];
778
905
  throttledStreamUpdate(msgIdx);
779
906
  } else if (evt.type === 'task_list_update') {
780
907
  // 任务列表 JSON 直推更新(exec 模式)
@@ -797,11 +924,15 @@ async function sendMessage() {
797
924
  }
798
925
  }
799
926
  } else if (evt.type === 'done') {
927
+ flushCurrentText();
800
928
  // done 事件提供最终事件列表(可能有去重/合并)
801
929
  if (evt.exec_events && evt.exec_events.length > 0) {
802
- execEventsReceived = evt.exec_events;
803
- state.messages[msgIdx].exec_events = [...execEventsReceived];
930
+ allExecEvents = evt.exec_events;
804
931
  }
932
+ state.messages[msgIdx].parts = [...msgParts];
933
+ state.messages[msgIdx]._streamingText = '';
934
+ state.messages[msgIdx].exec_events = [...allExecEvents];
935
+ state.messages[msgIdx].content = msgParts.filter(p => p.type === 'text').map(p => p.content).join('\n\n') || '(无回复)';
805
936
  } else if (evt.type === 'reasoning_delta') {
806
937
  // 模型推理过程增量文本(OpenAI o1/o3/DeepSeek-R1 等推理模型)
807
938
  if (!state.messages[msgIdx].reasoning) state.messages[msgIdx].reasoning = '';
@@ -812,22 +943,25 @@ async function sendMessage() {
812
943
  state.messages[msgIdx].reasoning = evt.content;
813
944
  throttledStreamUpdate(msgIdx);
814
945
  } else if (evt.type === 'error') {
815
- fullResponse = '❌ ' + evt.error;
816
- state.messages[msgIdx].content = fullResponse;
946
+ flushCurrentText();
947
+ currentText = '❌ ' + evt.error;
948
+ msgParts.push({type: 'text', content: currentText});
949
+ state.messages[msgIdx].parts = [...msgParts];
950
+ state.messages[msgIdx]._streamingText = '';
951
+ state.messages[msgIdx].content = msgParts.filter(p => p.type === 'text').map(p => p.content).join('\n\n');
817
952
  }
818
953
  } catch (e) { /* skip malformed */ }
819
954
  }
820
955
  }
821
956
 
822
957
  // Finalize message
958
+ flushCurrentText();
823
959
  if (state.messages[msgIdx]) {
824
960
  state.messages[msgIdx].streaming = false;
825
- if (execEventsReceived.length > 0) {
826
- state.messages[msgIdx].exec_events = execEventsReceived;
827
- }
828
- if (!state.messages[msgIdx].content) {
829
- state.messages[msgIdx].content = '(无回复)';
830
- }
961
+ state.messages[msgIdx].parts = [...msgParts];
962
+ state.messages[msgIdx]._streamingText = '';
963
+ state.messages[msgIdx].exec_events = allExecEvents;
964
+ state.messages[msgIdx].content = msgParts.filter(p => p.type === 'text').map(p => p.content).join('\n\n') || '(无回复)';
831
965
  }
832
966
 
833
967
  // Task list 已通过 SSE task_list_update 事件实时推送,无需再轮询
@@ -843,10 +977,9 @@ async function sendMessage() {
843
977
  state.agentSessions[state.activeAgent] = [...state.sessions];
844
978
  renderSessions();
845
979
 
846
- // Auto-play TTS if enabled (skip command execution results)
847
- if (ttsManager.enabled && fullResponse && !fullResponse.match(/^\s*[✅❌⏰]\s*\[执行结果\]/m)) {
848
- const idx = state.messages.length - 1;
849
- ttsManager.speak(idx);
980
+ // ── 分段流式 TTS:刷新剩余缓冲区 ──
981
+ if (ttsManager.enabled && ttsManager._streamActive) {
982
+ ttsManager.streamFlush();
850
983
  }
851
984
  } catch (e) {
852
985
  if (e.name === 'AbortError') {