myagent-ai 1.6.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/agents/base.py CHANGED
@@ -110,7 +110,11 @@ class BaseAgent(ABC):
110
110
  return response
111
111
 
112
112
  async def _call_llm_stream(self, messages, tools=None, stream_response=None, **kwargs):
113
- """调用LLM并流式输出token到SSE response"""
113
+ """调用LLM并流式输出token到SSE response
114
+
115
+ 当 stream_response 提供时,逐 token 将内容写入 SSE 流。
116
+ 同时积累 tool_call 增量,在流结束时返回完整的 LLMResponse。
117
+ """
114
118
  if not self.llm:
115
119
  return LLMResponse(success=False, error="LLM 未初始化")
116
120
 
@@ -118,6 +122,8 @@ class BaseAgent(ABC):
118
122
  if not stream_response:
119
123
  return await self._call_llm(messages, tools=tools, **kwargs)
120
124
 
125
+ import asyncio as _asyncio
126
+
121
127
  self.llm._ensure_client()
122
128
  msg_dicts = [m.to_dict() if hasattr(m, 'to_dict') else m for m in messages]
123
129
  request_kwargs = {
@@ -125,27 +131,209 @@ class BaseAgent(ABC):
125
131
  "messages": msg_dicts,
126
132
  "temperature": self.llm.temperature,
127
133
  "max_tokens": self.llm.max_tokens,
128
- "stream": False, # We'll handle streaming ourselves
134
+ "stream": True,
129
135
  }
130
136
  if tools:
131
137
  request_kwargs["tools"] = tools
132
138
  request_kwargs["tool_choice"] = "auto"
133
139
  request_kwargs.update(kwargs)
134
140
 
141
+ full_text = ""
142
+ tool_calls_acc: Dict[int, Dict] = {} # index -> {id, name, arguments_str}
143
+ finish_reason = ""
144
+
145
+ async def _write_sse(data: dict):
146
+ """将一个事件写入 SSE 流,忽略客户端断开错误"""
147
+ try:
148
+ await stream_response.write(
149
+ ("data: " + json.dumps(data, ensure_ascii=False) + "\n\n").encode()
150
+ )
151
+ except Exception:
152
+ pass # Client disconnected
153
+
135
154
  try:
136
155
  if self.llm.provider in self.llm._OPENAI_COMPATIBLE_PROVIDERS or self.llm.provider == "zhipu":
137
- response = await self.llm._run_with_retry(self.llm._chat_openai, request_kwargs)
156
+ loop = _asyncio.get_running_loop()
157
+
158
+ def _create_stream():
159
+ return self.llm._client.chat.completions.create(**request_kwargs)
160
+
161
+ stream = await loop.run_in_executor(None, _create_stream)
162
+
163
+ def _next_chunk(it):
164
+ try:
165
+ return next(it)
166
+ except StopIteration:
167
+ return None
168
+
169
+ iterator = iter(stream)
170
+ while True:
171
+ chunk = await loop.run_in_executor(None, _next_chunk, iterator)
172
+ if chunk is None:
173
+ break
174
+ if not chunk.choices:
175
+ if hasattr(chunk, 'usage') and chunk.usage:
176
+ self.llm._record_usage(
177
+ {"prompt_tokens": chunk.usage.prompt_tokens,
178
+ "completion_tokens": chunk.usage.completion_tokens,
179
+ "total_tokens": chunk.usage.total_tokens},
180
+ request_kwargs["model"],
181
+ )
182
+ continue
183
+
184
+ delta = chunk.choices[0].delta
185
+ if chunk.choices[0].finish_reason:
186
+ finish_reason = chunk.choices[0].finish_reason
187
+
188
+ # Handle content delta (stream to client)
189
+ if delta.content:
190
+ full_text += delta.content
191
+ await _write_sse({"type": "text_delta", "content": delta.content})
192
+
193
+ # Handle tool_call deltas (accumulate)
194
+ if hasattr(delta, 'tool_calls') and delta.tool_calls:
195
+ for tc_delta in delta.tool_calls:
196
+ idx = tc_delta.index if hasattr(tc_delta, 'index') else 0
197
+ if idx not in tool_calls_acc:
198
+ tool_calls_acc[idx] = {"id": "", "name": "", "arguments": ""}
199
+ if tc_delta.id:
200
+ tool_calls_acc[idx]["id"] = tc_delta.id
201
+ if hasattr(tc_delta, 'function') and tc_delta.function:
202
+ if tc_delta.function.name:
203
+ tool_calls_acc[idx]["name"] = tc_delta.function.name
204
+ if tc_delta.function.arguments:
205
+ tool_calls_acc[idx]["arguments"] += tc_delta.function.arguments
206
+
207
+ # Handle usage in final chunk
208
+ if hasattr(chunk, 'usage') and chunk.usage:
209
+ self.llm._record_usage(
210
+ {"prompt_tokens": chunk.usage.prompt_tokens,
211
+ "completion_tokens": chunk.usage.completion_tokens,
212
+ "total_tokens": chunk.usage.total_tokens},
213
+ request_kwargs["model"],
214
+ )
215
+
138
216
  elif self.llm.provider == "anthropic":
139
- response = await self.llm._run_with_retry(self.llm._chat_anthropic, messages, request_kwargs)
217
+ loop = _asyncio.get_running_loop()
218
+
219
+ system_msg = ""
220
+ anth_messages = []
221
+ for m in messages:
222
+ role = m.role if hasattr(m, 'role') else m.get("role", "user")
223
+ content = m.content if hasattr(m, 'content') else m.get("content", "")
224
+ if role == "system":
225
+ system_msg = content
226
+ continue
227
+ anth_messages.append({"role": role, "content": content})
228
+
229
+ create_kwargs = {
230
+ "model": self.llm.model,
231
+ "messages": anth_messages,
232
+ "max_tokens": self.llm.max_tokens,
233
+ "stream": True,
234
+ }
235
+ if system_msg:
236
+ create_kwargs["system"] = system_msg
237
+
238
+ def _create_stream():
239
+ return self.llm._client.messages.create(**create_kwargs)
240
+
241
+ stream = await loop.run_in_executor(None, _create_stream)
242
+
243
+ def _next_event(it):
244
+ try:
245
+ return next(it)
246
+ except StopIteration:
247
+ return None
248
+
249
+ iterator = iter(stream)
250
+ while True:
251
+ event = await loop.run_in_executor(None, _next_event, iterator)
252
+ if event is None:
253
+ break
254
+ if event.type == "content_block_delta":
255
+ if hasattr(event.delta, "text"):
256
+ full_text += event.delta.text
257
+ await _write_sse({"type": "text_delta", "content": event.delta.text})
258
+ elif event.type == "message_stop":
259
+ finish_reason = "stop"
260
+
140
261
  elif self.llm.provider == "ollama":
141
- response = await self.llm._run_with_retry(self.llm._chat_ollama, request_kwargs)
262
+ loop = _asyncio.get_running_loop()
263
+ import requests as req_lib
264
+
265
+ url = f"{self.llm.base_url}/api/chat"
266
+ payload = {
267
+ "model": self.llm.model,
268
+ "messages": msg_dicts,
269
+ "stream": True,
270
+ "options": {
271
+ "temperature": self.llm.temperature,
272
+ "num_predict": self.llm.max_tokens,
273
+ },
274
+ }
275
+
276
+ def _request():
277
+ r = req_lib.post(url, json=payload, stream=True, timeout=self.llm.timeout)
278
+ r.raise_for_status()
279
+ return r.iter_lines()
280
+
281
+ lines_iter = await loop.run_in_executor(None, _request)
282
+
283
+ def _next_line(it):
284
+ try:
285
+ return next(it)
286
+ except StopIteration:
287
+ return None
288
+
289
+ iterator = iter(lines_iter)
290
+ while True:
291
+ line = await loop.run_in_executor(None, _next_line, iterator)
292
+ if line is None:
293
+ break
294
+ try:
295
+ data = json.loads(line.decode('utf-8') if isinstance(line, bytes) else line)
296
+ content = data.get("message", {}).get("content", "")
297
+ if content:
298
+ full_text += content
299
+ await _write_sse({"type": "text_delta", "content": content})
300
+ if data.get("done"):
301
+ finish_reason = "stop"
302
+ # Record usage from Ollama
303
+ usage = data.get("prompt_eval_count") or data.get("eval_count")
304
+ if data.get("prompt_eval_count"):
305
+ self.llm._record_usage(
306
+ {"prompt_tokens": data.get("prompt_eval_count", 0),
307
+ "completion_tokens": data.get("eval_count", 0),
308
+ "total_tokens": data.get("prompt_eval_count", 0) + data.get("eval_count", 0)},
309
+ self.llm.model,
310
+ )
311
+ except Exception:
312
+ continue
142
313
  else:
143
- return LLMResponse(success=False, error="未知提供商")
144
-
145
- if response.usage:
146
- self.llm._record_usage(response.usage, response.model)
147
-
148
- return response
314
+ return LLMResponse(success=False, error="未知提供商,不支持流式")
315
+
316
+ # Build tool_calls list from accumulated deltas
317
+ final_tool_calls = []
318
+ for idx in sorted(tool_calls_acc.keys()):
319
+ tc = tool_calls_acc[idx]
320
+ try:
321
+ args = json.loads(tc["arguments"]) if tc["arguments"] else {}
322
+ except json.JSONDecodeError:
323
+ args = {}
324
+ final_tool_calls.append({
325
+ "id": tc["id"],
326
+ "name": tc["name"],
327
+ "arguments": args,
328
+ })
329
+
330
+ return LLMResponse(
331
+ success=True,
332
+ content=full_text,
333
+ tool_calls=final_tool_calls,
334
+ finish_reason=finish_reason,
335
+ model=request_kwargs.get("model", self.llm.model),
336
+ )
149
337
  except Exception as e:
150
338
  logger.error(f"LLM 流式调用失败: {e}")
151
339
  return LLMResponse(success=False, error=str(e))
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "myagent-ai",
3
- "version": "1.6.0",
3
+ "version": "1.6.1",
4
4
  "description": "本地桌面端执行型AI助手 - Open Interpreter 风格 | Local Desktop Execution-Oriented AI Assistant",
5
5
  "main": "main.py",
6
6
  "bin": {
package/web/api_server.py CHANGED
@@ -394,18 +394,23 @@ class ApiServer:
394
394
  chat_mode = data.get("mode", "") # "exec" = 执行模式
395
395
  escalated = data.get("escalated", False) # 临时提权到 local
396
396
 
397
- # ── 全局执行锁检查 ──
397
+ # ── 全局执行锁检查 + 获取(原子操作,check+set 之间无 await)──
398
398
  agent_cfg_early = self._read_agent_config(agent_path)
399
399
  execution_mode = agent_cfg_early.get("execution_mode", "sandbox") if agent_cfg_early else "sandbox"
400
400
  needs_lock_check = (execution_mode == "local") or escalated
401
- if needs_lock_check and self._execution_lock["locked"]:
402
- locked_by = self._execution_lock["locked_by"]
403
- if locked_by and locked_by != agent_path:
404
- return web.json_response({
405
- "error": f"该Agent当前无法以本地模式运行,因为全局锁被 {locked_by} 持有。请先释放锁或切换到沙盒模式。",
406
- "locked_by": locked_by,
407
- "locked_at": self._execution_lock.get("locked_at"),
408
- }, status=423)
401
+ if needs_lock_check:
402
+ if self._execution_lock["locked"]:
403
+ locked_by = self._execution_lock["locked_by"]
404
+ if locked_by and locked_by != agent_path:
405
+ return web.json_response({
406
+ "error": f"该Agent当前无法以本地模式运行,因为全局锁被 {locked_by} 持有。请先释放锁或切换到沙盒模式。",
407
+ "locked_by": locked_by,
408
+ "locked_at": self._execution_lock.get("locked_at"),
409
+ }, status=423)
410
+ # Acquire lock atomically (no await between check and set)
411
+ self._execution_lock["locked"] = True
412
+ self._execution_lock["locked_by"] = agent_path
413
+ self._execution_lock["locked_at"] = time.time()
409
414
 
410
415
  # ── 执行模式: 注入任务规划上下文 ──
411
416
  task_plan_context = ""
@@ -467,6 +472,12 @@ class ApiServer:
467
472
  except Exception as e:
468
473
  logger.error(f"Chat error: {e}", exc_info=True)
469
474
  return web.json_response({"error": str(e)}, status=500)
475
+ finally:
476
+ # Release execution lock if we acquired it
477
+ if needs_lock_check and self._execution_lock["locked_by"] == agent_path:
478
+ self._execution_lock["locked"] = False
479
+ self._execution_lock["locked_by"] = None
480
+ self._execution_lock["locked_at"] = None
470
481
 
471
482
  async def handle_chat_stream(self, request):
472
483
  """POST /api/chat/stream - SSE 流式聊天"""
@@ -2369,7 +2380,11 @@ class ApiServer:
2369
2380
 
2370
2381
  async def _stream_process_message(self, user_message, session_id, stream_response,
2371
2382
  agent_path=None, agent_system_prompt=None, chat_mode=""):
2372
- """使用流式LLM调用处理消息,实时推送token到SSE"""
2383
+ """使用流式LLM调用处理消息,支持完整的agent循环(工具调用/操作执行)+ 实时流式输出
2384
+
2385
+ 实现与 MainAgent._process_inner() 相同的计划-执行-反思循环,
2386
+ 但将 LLM 的文本响应逐 token 流式推送到 SSE。
2387
+ """
2373
2388
  if not self.core.main_agent or not self.core.llm:
2374
2389
  result = await self.core.process_message(user_message, session_id)
2375
2390
  await stream_response.write(("data: " + json.dumps({"type": "text", "content": result}) + "\n\n").encode())
@@ -2377,6 +2392,7 @@ class ApiServer:
2377
2392
 
2378
2393
  agent = self.core.main_agent
2379
2394
  from agents.base import AgentContext
2395
+ from core.utils import safe_json_parse, truncate_str
2380
2396
  context = AgentContext(session_id=session_id, user_message=user_message)
2381
2397
 
2382
2398
  # Set agent context through context metadata instead of instance attributes
@@ -2384,6 +2400,9 @@ class ApiServer:
2384
2400
  context.metadata["agent_override_path"] = agent_path
2385
2401
  context.metadata["chat_mode"] = chat_mode
2386
2402
 
2403
+ # Clear execution events from previous runs
2404
+ agent.clear_execution_events()
2405
+
2387
2406
  # Load memory
2388
2407
  if agent.memory_agent:
2389
2408
  mem_ctx = AgentContext(task_id="", session_id=session_id, user_message=user_message,
@@ -2396,30 +2415,185 @@ class ApiServer:
2396
2415
  if agent.memory:
2397
2416
  agent.memory.add_short_term(session_id=session_id, role="user", content=user_message)
2398
2417
 
2399
- # Build messages
2400
- messages = agent._build_messages(context)
2401
- tools = agent._get_tools()
2418
+ async def _write_sse(data: dict):
2419
+ """Write SSE event, ignoring client disconnect errors"""
2420
+ try:
2421
+ await stream_response.write(
2422
+ ("data: " + json.dumps(data, ensure_ascii=False) + "\n\n").encode()
2423
+ )
2424
+ except Exception:
2425
+ pass # Client disconnected
2402
2426
 
2403
- # Call LLM with streaming
2404
- full_text = ""
2405
- try:
2427
+ # Full agent loop (plan-execute-reflect) — mirrors MainAgent._process_inner
2428
+ max_iter = agent.config.agent.max_iterations if agent.config else 30
2429
+ final_response = ""
2430
+ iteration = 0
2431
+
2432
+ while iteration < max_iter:
2433
+ iteration += 1
2434
+
2435
+ # Clear any intermediate text from previous iterations on the frontend
2436
+ if iteration > 1:
2437
+ await _write_sse({"type": "clear_text"})
2438
+
2439
+ # Build messages
2440
+ messages = agent._build_messages(context)
2441
+ tools = agent._get_tools()
2442
+
2443
+ # Call LLM with streaming — tokens are pushed to SSE in real-time
2406
2444
  response = await agent._call_llm_stream(messages, tools=tools, stream_response=stream_response)
2407
2445
  if not response.success:
2408
- await stream_response.write(("data: " + json.dumps({"type": "text", "content": f"⚠️ LLM调用失败: {response.error}"}) + "\n\n").encode())
2446
+ await _write_sse({"type": "text", "content": f"⚠️ LLM调用失败: {response.error}"})
2409
2447
  return f"⚠️ LLM 调用失败: {response.error}"
2410
2448
 
2411
- full_text = response.content or ""
2412
- await stream_response.write(("data: " + json.dumps({"type": "text", "content": full_text}) + "\n\n").encode())
2413
- except Exception as e:
2414
- logger.error(f"Stream LLM error: {e}")
2415
- await stream_response.write(("data: " + json.dumps({"type": "text", "content": f"❌ 处理失败: {str(e)}"}) + "\n\n").encode())
2416
- return f"❌ 处理失败: {str(e)}"
2449
+ content = response.content or ""
2417
2450
 
2418
- # Save assistant response
2419
- if agent.memory and full_text:
2420
- agent.memory.add_short_term(session_id=session_id, role="assistant", content=full_text)
2451
+ # ── Check for tool calls (OpenAI function calling) ──
2452
+ if response.tool_calls:
2453
+ # Send tool_call event to frontend
2454
+ agent._add_exec_event("tool_call", {
2455
+ "title": f"调用 {len(response.tool_calls)} 个工具",
2456
+ "tool_names": [tc["name"] for tc in response.tool_calls],
2457
+ })
2458
+ await _write_sse({"type": "exec_event", "data": {
2459
+ "type": "tool_call",
2460
+ "title": f"调用 {len(response.tool_calls)} 个工具",
2461
+ }})
2462
+
2463
+ # Add assistant tool_calls message to history (OpenAI format requirement)
2464
+ context.conversation_history.append(
2465
+ Message(role="assistant", content=response.content or "",
2466
+ tool_calls=response.tool_calls)
2467
+ )
2421
2468
 
2422
- return full_text
2469
+ # Execute tool calls
2470
+ tool_results = await agent._handle_tool_calls(response.tool_calls, context, "")
2471
+
2472
+ # Send tool_result event to frontend
2473
+ for tc, result in tool_results:
2474
+ success = result.get("success", False)
2475
+ agent._add_exec_event("tool_result", {
2476
+ "title": f"工具结果: {tc['name']}",
2477
+ "tool_name": tc["name"],
2478
+ "success": success,
2479
+ "summary": truncate_str(result.get("output", result.get("error", "")), 500),
2480
+ })
2481
+ await _write_sse({"type": "exec_event", "data": {
2482
+ "type": "tool_result",
2483
+ "title": f"工具结果: {tc['name']}",
2484
+ "success": success,
2485
+ }})
2486
+
2487
+ # Add tool results to history
2488
+ for tc, result in tool_results:
2489
+ context.conversation_history.append(
2490
+ Message(role="tool", content=json.dumps(result, ensure_ascii=False),
2491
+ tool_call_id=tc["id"], name=tc["name"])
2492
+ )
2493
+ continue # Next iteration — let LLM process tool results
2494
+
2495
+ # ── Try parsing JSON action instructions ──
2496
+ action_data = safe_json_parse(content)
2497
+
2498
+ if action_data and isinstance(action_data, dict):
2499
+ # Has structured action instructions
2500
+ if "actions" in action_data:
2501
+ # Execute action list
2502
+ results = await agent._execute_actions(action_data, context, "")
2503
+
2504
+ # Send execution events accumulated by _execute_actions
2505
+ # _execute_actions calls _add_exec_event internally; relay them
2506
+ recent_events = agent.get_execution_events()
2507
+ # Only send events from this round (events added since last check)
2508
+ # Since we cleared at start, send all new events
2509
+ for evt in recent_events:
2510
+ await _write_sse({"type": "exec_event", "data": evt})
2511
+
2512
+ # Add assistant action message to conversation history
2513
+ context.conversation_history.append(Message(role="assistant", content=content))
2514
+
2515
+ result_summary = agent._summarize_action_results(results)
2516
+
2517
+ # Handle timeout diagnostics (same as _process_inner)
2518
+ has_timeout = any(r.get("timed_out") for r in results)
2519
+ timeout_detail = ""
2520
+ if has_timeout:
2521
+ timeout_details = []
2522
+ for i, r in enumerate(results, 1):
2523
+ if r.get("timed_out"):
2524
+ diag = r.get("timeout_diagnosis", {})
2525
+ timeout_details.append(
2526
+ f"### 命令 {i} 超时诊断\n"
2527
+ f"- 原因: {diag.get('diagnosis', '未知')}\n"
2528
+ f"- 进展: {diag.get('progress', '未知')}\n"
2529
+ f"- 是否建议重试: {'是' if diag.get('should_retry') else '否'}\n"
2530
+ f"- 重试策略: {diag.get('retry_strategy', '无')}\n"
2531
+ )
2532
+ timeout_detail = "\n\n## ⏰ 超时诊断详情\n" + "\n".join(timeout_details)
2533
+
2534
+ feedback_msg = f"[执行结果]\n{result_summary}\n\n请基于以上结果继续。"
2535
+ if timeout_detail:
2536
+ feedback_msg += timeout_detail + "\n\n请根据以上诊断信息决定下一步操作。"
2537
+
2538
+ context.conversation_history.append(
2539
+ Message(role="user", content=feedback_msg)
2540
+ )
2541
+
2542
+ # Check if all actions succeeded
2543
+ all_success = all(r.get("success", False) for r in results)
2544
+ if all_success and results:
2545
+ final_response = action_data.get("thought", "")
2546
+ if "plan" in action_data and action_data["plan"]:
2547
+ final_response += "\n\n已完成: " + " → ".join(action_data["plan"])
2548
+ break
2549
+
2550
+ # Check if timeout diagnosis suggests aborting
2551
+ if has_timeout:
2552
+ should_abort = False
2553
+ abort_reasons = []
2554
+ for i, r in enumerate(results, 1):
2555
+ if r.get("timed_out"):
2556
+ diag = r.get("timeout_diagnosis", {})
2557
+ if diag.get("should_retry") is False:
2558
+ should_abort = True
2559
+ abort_reasons.append(
2560
+ f"命令{i}: {diag.get('diagnosis', '不可恢复的超时')}"
2561
+ )
2562
+ if should_abort:
2563
+ abort_msg = (
2564
+ "[系统通知] 以下命令因超时被终止,且超时诊断结果表明不应重试:\n"
2565
+ )
2566
+ for reason in abort_reasons:
2567
+ abort_msg += f"- {reason}\n"
2568
+ abort_msg += (
2569
+ "\n请直接以纯文本或 {\"type\": \"final_answer\", \"content\": \"...\"} "
2570
+ "格式回复,告知用户任务无法完成的原因和建议的替代方案。"
2571
+ )
2572
+ context.conversation_history.append(
2573
+ Message(role="user", content=abort_msg)
2574
+ )
2575
+ continue # Let LLM generate final_answer
2576
+
2577
+ continue # Next iteration
2578
+
2579
+ # Single action — final_answer
2580
+ if action_data.get("type") == "final_answer":
2581
+ final_response = action_data.get("content", content)
2582
+ # Stream the final answer text if not already streamed
2583
+ if final_response and final_response != content:
2584
+ await _write_sse({"type": "text_delta", "content": final_response})
2585
+ break
2586
+
2587
+ # ── Pure text response (no actions/tool calls) — this is the final answer ──
2588
+ # Content was already streamed token-by-token via _call_llm_stream
2589
+ final_response = content
2590
+ break
2591
+
2592
+ # Save assistant response to memory
2593
+ if agent.memory and final_response:
2594
+ agent.memory.add_short_term(session_id=session_id, role="assistant", content=final_response)
2595
+
2596
+ return final_response
2423
2597
 
2424
2598
  async def handle_reload_config(self, request):
2425
2599
  """POST /api/config/reload - 从配置文件热重载(无需重启)
@@ -3228,11 +3402,14 @@ class ApiServer:
3228
3402
  )
3229
3403
 
3230
3404
  # 3. 广播到所有非禁言成员agent,并行处理
3231
- responses = []
3232
3405
  active_members = [m for m in group.members if not m.muted]
3233
3406
 
3234
3407
  import asyncio
3408
+ # Build a member_order map for deterministic sorting after gather
3409
+ member_order = {m.agent_path: i for i, m in enumerate(active_members)}
3410
+
3235
3411
  async def process_agent_member(member):
3412
+ """Process a single member's response (DO NOT save messages here)"""
3236
3413
  try:
3237
3414
  agent_path = member.agent_path
3238
3415
  agent_cfg = self._read_agent_config(agent_path)
@@ -3257,17 +3434,6 @@ class ApiServer:
3257
3434
  avatar = agent_cfg.get("avatar_emoji", "🤖") or "🤖"
3258
3435
  display_name = agent_cfg.get("name", agent_path)
3259
3436
 
3260
- # 保存agent回复到群消息
3261
- agent_msg = GroupMessage(
3262
- group_id=gid,
3263
- sender="agent",
3264
- sender_name=display_name,
3265
- sender_avatar=avatar,
3266
- content=response,
3267
- agent_path=agent_path,
3268
- )
3269
- mgr.add_message(agent_msg)
3270
-
3271
3437
  return {
3272
3438
  "ok": True,
3273
3439
  "agent_path": agent_path,
@@ -3288,23 +3454,41 @@ class ApiServer:
3288
3454
  # 并行调用所有成员agent
3289
3455
  tasks = [process_agent_member(m) for m in active_members]
3290
3456
  try:
3291
- responses = await asyncio.gather(*tasks, return_exceptions=True)
3457
+ gather_results = await asyncio.gather(*tasks, return_exceptions=True)
3292
3458
  except Exception as e:
3293
3459
  logger.error(f"群消息广播异常: {e}")
3294
3460
  tp.update_task_status(task_id, "failed", last_message=f"广播异常: {str(e)}")
3295
3461
  return web.json_response({"error": f"群消息广播异常: {str(e)}"}, status=500)
3296
3462
 
3297
- # 处理异常结果
3298
- final_responses = []
3299
- for r in responses:
3463
+ # 处理异常结果并按原始成员顺序排序
3464
+ raw_responses = []
3465
+ for r in gather_results:
3300
3466
  if isinstance(r, Exception):
3301
- final_responses.append({
3467
+ raw_responses.append({
3302
3468
  "ok": False, "agent_path": "unknown",
3303
3469
  "name": "unknown", "avatar": "❌",
3304
3470
  "response": f"异常: {str(r)}",
3305
3471
  })
3306
3472
  else:
3307
- final_responses.append(r)
3473
+ raw_responses.append(r)
3474
+
3475
+ # Sort by original member order to ensure deterministic message ordering
3476
+ final_responses = sorted(
3477
+ raw_responses,
3478
+ key=lambda r: member_order.get(r.get("agent_path", ""), 999999)
3479
+ )
3480
+
3481
+ # Save agent messages sequentially in sorted order
3482
+ for resp in final_responses:
3483
+ agent_msg = GroupMessage(
3484
+ group_id=gid,
3485
+ sender="agent",
3486
+ sender_name=resp["name"],
3487
+ sender_avatar=resp["avatar"],
3488
+ content=resp["response"],
3489
+ agent_path=resp["agent_path"],
3490
+ )
3491
+ mgr.add_message(agent_msg)
3308
3492
 
3309
3493
  # 更新任务状态
3310
3494
  has_failure = any(not r.get("ok") for r in final_responses)
package/web/ui/chat.html CHANGED
@@ -3608,6 +3608,19 @@ async function sendMessage() {
3608
3608
  fullResponse = evt.content;
3609
3609
  state.messages[msgIdx].content = evt.content;
3610
3610
  renderMessages();
3611
+ } else if (evt.type === 'text_delta') {
3612
+ // Incremental streaming token
3613
+ fullResponse += evt.content;
3614
+ state.messages[msgIdx].content = fullResponse;
3615
+ renderMessages();
3616
+ } else if (evt.type === 'clear_text') {
3617
+ // Clear intermediate text from previous agent loop iterations
3618
+ fullResponse = '';
3619
+ state.messages[msgIdx].content = '';
3620
+ renderMessages();
3621
+ } else if (evt.type === 'exec_event') {
3622
+ // Real-time execution event (tool call, code exec, skill result, etc.)
3623
+ execEventsReceived.push(evt.data);
3611
3624
  } else if (evt.type === 'done') {
3612
3625
  execEventsReceived = evt.exec_events || [];
3613
3626
  } else if (evt.type === 'error') {