autoglm-gui 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. AutoGLM_GUI/__init__.py +11 -0
  2. AutoGLM_GUI/__main__.py +26 -8
  3. AutoGLM_GUI/actions/__init__.py +6 -0
  4. AutoGLM_GUI/actions/handler.py +196 -0
  5. AutoGLM_GUI/actions/types.py +15 -0
  6. AutoGLM_GUI/adb/__init__.py +53 -0
  7. AutoGLM_GUI/adb/apps.py +227 -0
  8. AutoGLM_GUI/adb/connection.py +323 -0
  9. AutoGLM_GUI/adb/device.py +171 -0
  10. AutoGLM_GUI/adb/input.py +67 -0
  11. AutoGLM_GUI/adb/screenshot.py +11 -0
  12. AutoGLM_GUI/adb/timing.py +167 -0
  13. AutoGLM_GUI/adb_plus/keyboard_installer.py +4 -2
  14. AutoGLM_GUI/adb_plus/qr_pair.py +8 -8
  15. AutoGLM_GUI/adb_plus/screenshot.py +22 -1
  16. AutoGLM_GUI/adb_plus/serial.py +38 -20
  17. AutoGLM_GUI/adb_plus/touch.py +4 -9
  18. AutoGLM_GUI/agents/__init__.py +51 -0
  19. AutoGLM_GUI/agents/events.py +19 -0
  20. AutoGLM_GUI/agents/factory.py +153 -0
  21. AutoGLM_GUI/agents/glm/__init__.py +7 -0
  22. AutoGLM_GUI/agents/glm/agent.py +292 -0
  23. AutoGLM_GUI/agents/glm/message_builder.py +81 -0
  24. AutoGLM_GUI/agents/glm/parser.py +110 -0
  25. AutoGLM_GUI/agents/glm/prompts_en.py +77 -0
  26. AutoGLM_GUI/agents/glm/prompts_zh.py +75 -0
  27. AutoGLM_GUI/agents/mai/__init__.py +28 -0
  28. AutoGLM_GUI/agents/mai/agent.py +405 -0
  29. AutoGLM_GUI/agents/mai/parser.py +254 -0
  30. AutoGLM_GUI/agents/mai/prompts.py +103 -0
  31. AutoGLM_GUI/agents/mai/traj_memory.py +91 -0
  32. AutoGLM_GUI/agents/protocols.py +27 -0
  33. AutoGLM_GUI/agents/stream_runner.py +188 -0
  34. AutoGLM_GUI/api/__init__.py +71 -11
  35. AutoGLM_GUI/api/agents.py +190 -229
  36. AutoGLM_GUI/api/control.py +9 -6
  37. AutoGLM_GUI/api/devices.py +112 -28
  38. AutoGLM_GUI/api/health.py +13 -0
  39. AutoGLM_GUI/api/history.py +78 -0
  40. AutoGLM_GUI/api/layered_agent.py +306 -181
  41. AutoGLM_GUI/api/mcp.py +11 -10
  42. AutoGLM_GUI/api/media.py +64 -1
  43. AutoGLM_GUI/api/scheduled_tasks.py +98 -0
  44. AutoGLM_GUI/api/version.py +23 -10
  45. AutoGLM_GUI/api/workflows.py +2 -1
  46. AutoGLM_GUI/config.py +72 -14
  47. AutoGLM_GUI/config_manager.py +98 -27
  48. AutoGLM_GUI/device_adapter.py +263 -0
  49. AutoGLM_GUI/device_manager.py +248 -29
  50. AutoGLM_GUI/device_protocol.py +266 -0
  51. AutoGLM_GUI/devices/__init__.py +49 -0
  52. AutoGLM_GUI/devices/adb_device.py +200 -0
  53. AutoGLM_GUI/devices/mock_device.py +185 -0
  54. AutoGLM_GUI/devices/remote_device.py +177 -0
  55. AutoGLM_GUI/exceptions.py +3 -3
  56. AutoGLM_GUI/history_manager.py +164 -0
  57. AutoGLM_GUI/i18n.py +81 -0
  58. AutoGLM_GUI/metrics.py +13 -20
  59. AutoGLM_GUI/model/__init__.py +5 -0
  60. AutoGLM_GUI/model/message_builder.py +69 -0
  61. AutoGLM_GUI/model/types.py +24 -0
  62. AutoGLM_GUI/models/__init__.py +10 -0
  63. AutoGLM_GUI/models/history.py +96 -0
  64. AutoGLM_GUI/models/scheduled_task.py +71 -0
  65. AutoGLM_GUI/parsers/__init__.py +22 -0
  66. AutoGLM_GUI/parsers/base.py +50 -0
  67. AutoGLM_GUI/parsers/phone_parser.py +58 -0
  68. AutoGLM_GUI/phone_agent_manager.py +118 -367
  69. AutoGLM_GUI/platform_utils.py +31 -2
  70. AutoGLM_GUI/prompt_config.py +15 -0
  71. AutoGLM_GUI/prompts/__init__.py +32 -0
  72. AutoGLM_GUI/scheduler_manager.py +304 -0
  73. AutoGLM_GUI/schemas.py +272 -63
  74. AutoGLM_GUI/scrcpy_stream.py +159 -37
  75. AutoGLM_GUI/server.py +3 -1
  76. AutoGLM_GUI/socketio_server.py +114 -29
  77. AutoGLM_GUI/state.py +10 -30
  78. AutoGLM_GUI/static/assets/{about-DeclntHg.js → about-BQm96DAl.js} +1 -1
  79. AutoGLM_GUI/static/assets/alert-dialog-B42XxGPR.js +1 -0
  80. AutoGLM_GUI/static/assets/chat-C0L2gQYG.js +129 -0
  81. AutoGLM_GUI/static/assets/circle-alert-D4rSJh37.js +1 -0
  82. AutoGLM_GUI/static/assets/dialog-DZ78cEcj.js +45 -0
  83. AutoGLM_GUI/static/assets/history-DFBv7TGc.js +1 -0
  84. AutoGLM_GUI/static/assets/index-Bzyv2yQ2.css +1 -0
  85. AutoGLM_GUI/static/assets/{index-zQ4KKDHt.js → index-CmZSnDqc.js} +1 -1
  86. AutoGLM_GUI/static/assets/index-CssG-3TH.js +11 -0
  87. AutoGLM_GUI/static/assets/label-BCUzE_nm.js +1 -0
  88. AutoGLM_GUI/static/assets/logs-eoFxn5of.js +1 -0
  89. AutoGLM_GUI/static/assets/popover-DLsuV5Sx.js +1 -0
  90. AutoGLM_GUI/static/assets/scheduled-tasks-MyqGJvy_.js +1 -0
  91. AutoGLM_GUI/static/assets/square-pen-zGWYrdfj.js +1 -0
  92. AutoGLM_GUI/static/assets/textarea-BX6y7uM5.js +1 -0
  93. AutoGLM_GUI/static/assets/workflows-CYFs6ssC.js +1 -0
  94. AutoGLM_GUI/static/index.html +2 -2
  95. AutoGLM_GUI/types.py +142 -0
  96. {autoglm_gui-1.4.0.dist-info → autoglm_gui-1.5.0.dist-info}/METADATA +178 -92
  97. autoglm_gui-1.5.0.dist-info/RECORD +157 -0
  98. mai_agent/base.py +137 -0
  99. mai_agent/mai_grounding_agent.py +263 -0
  100. mai_agent/mai_naivigation_agent.py +526 -0
  101. mai_agent/prompt.py +148 -0
  102. mai_agent/unified_memory.py +67 -0
  103. mai_agent/utils.py +73 -0
  104. AutoGLM_GUI/api/dual_model.py +0 -311
  105. AutoGLM_GUI/dual_model/__init__.py +0 -53
  106. AutoGLM_GUI/dual_model/decision_model.py +0 -664
  107. AutoGLM_GUI/dual_model/dual_agent.py +0 -917
  108. AutoGLM_GUI/dual_model/protocols.py +0 -354
  109. AutoGLM_GUI/dual_model/vision_model.py +0 -442
  110. AutoGLM_GUI/mai_ui_adapter/agent_wrapper.py +0 -291
  111. AutoGLM_GUI/phone_agent_patches.py +0 -146
  112. AutoGLM_GUI/static/assets/chat-Iut2yhSw.js +0 -125
  113. AutoGLM_GUI/static/assets/dialog-BfdcBs1x.js +0 -45
  114. AutoGLM_GUI/static/assets/index-5hCCwHA7.css +0 -1
  115. AutoGLM_GUI/static/assets/index-DHF1NZh0.js +0 -12
  116. AutoGLM_GUI/static/assets/workflows-xiplap-r.js +0 -1
  117. autoglm_gui-1.4.0.dist-info/RECORD +0 -100
  118. {autoglm_gui-1.4.0.dist-info → autoglm_gui-1.5.0.dist-info}/WHEEL +0 -0
  119. {autoglm_gui-1.4.0.dist-info → autoglm_gui-1.5.0.dist-info}/entry_points.txt +0 -0
  120. {autoglm_gui-1.4.0.dist-info → autoglm_gui-1.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -6,9 +6,13 @@ a decision model for planning and autoglm-phone for execution.
6
6
 
7
7
  import asyncio
8
8
  import json
9
- from typing import Any
9
+ import threading
10
+ from typing import TYPE_CHECKING, Any
10
11
 
11
12
  from agents import Agent, Runner, SQLiteSession, function_tool
13
+
14
+ if TYPE_CHECKING:
15
+ from agents.result import RunResultStreaming
12
16
  from agents.models.openai_chatcompletions import OpenAIChatCompletionsModel
13
17
  from fastapi import APIRouter
14
18
  from fastapi.responses import StreamingResponse
@@ -24,6 +28,11 @@ router = APIRouter()
24
28
  # 存储每个 session_id 对应的 SQLiteSession(内存模式)
25
29
  _sessions: dict[str, SQLiteSession] = {}
26
30
 
31
+ # ==================== 活跃运行管理 ====================
32
+ # 存储每个 session_id 对应的活跃 RunResultStreaming 实例,用于 abort
33
+ _active_runs: dict[str, "RunResultStreaming"] = {}
34
+ _active_runs_lock = threading.Lock()
35
+
27
36
 
28
37
  def _get_or_create_session(session_id: str) -> SQLiteSession:
29
38
  """获取或创建指定 session_id 的内存 session."""
@@ -44,9 +53,20 @@ def _clear_session(session_id: str) -> bool:
44
53
 
45
54
 
46
55
  def get_planner_model() -> str:
47
- """获取规划层使用的模型名称,从配置读取."""
48
- config = config_manager.get_effective_config()
49
- return config.decision_model_name or "glm-4.7"
56
+ """获取规划层使用的模型名称."""
57
+ config_manager.load_file_config()
58
+ effective_config = config_manager.get_effective_config()
59
+
60
+ model_name = effective_config.decision_model_name
61
+
62
+ if not model_name:
63
+ raise ValueError(
64
+ "决策模型未配置。使用分层代理模式需要配置决策模型。\n"
65
+ "请在全局配置中设置决策模型的 Base URL、模型名称和 API Key。"
66
+ )
67
+
68
+ logger.info(f"[LayeredAgent] Using decision model: {model_name}")
69
+ return model_name
50
70
 
51
71
 
52
72
  PLANNER_INSTRUCTIONS = """## 核心目标
@@ -146,7 +166,9 @@ def _sync_list_devices() -> str:
146
166
  _build_device_response_with_agent(d, agent_manager) for d in managed_devices
147
167
  ]
148
168
 
149
- return json.dumps(devices_with_agents, ensure_ascii=False, indent=2)
169
+ # Convert DeviceResponse Pydantic models to dicts before JSON serialization
170
+ devices_dict = [device.model_dump() for device in devices_with_agents]
171
+ return json.dumps(devices_dict, ensure_ascii=False, indent=2)
150
172
 
151
173
 
152
174
  @function_tool
@@ -271,20 +293,31 @@ async def chat(device_id: str, message: str) -> str:
271
293
 
272
294
 
273
295
  def _setup_openai_client() -> AsyncOpenAI:
274
- """设置 OpenAI 客户端,使用 AutoGLM 的配置"""
296
+ """设置 OpenAI 客户端,使用决策模型配置"""
275
297
  config_manager.load_file_config()
276
298
  effective_config = config_manager.get_effective_config()
277
299
 
278
- if not effective_config.base_url:
279
- raise ValueError("base_url not configured")
300
+ # 检查决策模型配置
301
+ decision_base_url = effective_config.decision_base_url
302
+ decision_api_key = effective_config.decision_api_key
280
303
 
281
- planner_model = get_planner_model()
282
- logger.info(f"[LayeredAgent] API Base URL: {effective_config.base_url}")
283
- logger.info(f"[LayeredAgent] Planner Model: {planner_model}")
304
+ if not decision_base_url:
305
+ raise ValueError(
306
+ "决策模型 Base URL 未配置。使用分层代理模式需要配置决策模型。\n"
307
+ "请在全局配置中设置决策模型的 Base URL、模型名称和 API Key。"
308
+ )
309
+
310
+ # decision_api_key 可以为 None(某些本地模型不需要)
311
+ planner_model = get_planner_model() # 这里会再次检查 model_name
312
+
313
+ logger.info("[LayeredAgent] Decision model config:")
314
+ logger.info(f" - Base URL: {decision_base_url}")
315
+ logger.info(f" - Model: {planner_model}")
316
+ logger.info(f" - API Key: {'***' if decision_api_key else 'None'}")
284
317
 
285
318
  return AsyncOpenAI(
286
- base_url=effective_config.base_url,
287
- api_key=effective_config.api_key,
319
+ base_url=decision_base_url,
320
+ api_key=decision_api_key or "EMPTY", # 某些本地模型需要非空字符串
288
321
  )
289
322
 
290
323
 
@@ -307,14 +340,35 @@ def _create_planner_agent(client: AsyncOpenAI) -> Agent[Any]:
307
340
  # Global agent instance (lazy initialized)
308
341
  _client: AsyncOpenAI | None = None
309
342
  _agent: Agent[Any] | None = None
343
+ _cached_config_hash: str | None = None
344
+
345
+
346
+ def _compute_config_hash() -> str:
347
+ import hashlib
348
+
349
+ config = config_manager.get_effective_config()
350
+ config_str = config.model_dump_json()
351
+ return hashlib.md5(config_str.encode()).hexdigest()
310
352
 
311
353
 
312
354
  def _ensure_agent() -> Agent[Any]:
313
- """Ensure the planner agent is initialized."""
314
- global _client, _agent
315
- if _agent is None:
355
+ global _client, _agent, _cached_config_hash
356
+
357
+ current_hash = _compute_config_hash()
358
+
359
+ if _agent is None or _cached_config_hash != current_hash:
360
+ if _agent is not None and _cached_config_hash != current_hash:
361
+ logger.info(
362
+ f"[LayeredAgent] Config changed (hash: {_cached_config_hash} -> {current_hash}), reloading agent..."
363
+ )
364
+
316
365
  _client = _setup_openai_client()
317
366
  _agent = _create_planner_agent(_client)
367
+ _cached_config_hash = current_hash
368
+ logger.info(
369
+ f"[LayeredAgent] Agent initialized/reloaded with config hash: {current_hash}"
370
+ )
371
+
318
372
  return _agent
319
373
 
320
374
 
@@ -343,18 +397,24 @@ async def layered_agent_chat(request: LayeredAgentRequest):
343
397
  - done: Final response
344
398
  - error: Error occurred
345
399
  """
400
+ from datetime import datetime
401
+
346
402
  from agents.stream_events import (
347
403
  RawResponsesStreamEvent,
348
404
  RunItemStreamEvent,
349
405
  )
350
406
 
407
+ from AutoGLM_GUI.history_manager import history_manager
408
+ from AutoGLM_GUI.models.history import ConversationRecord
409
+
351
410
  async def event_generator():
411
+ start_time = datetime.now()
412
+ final_output = ""
413
+ final_success = False
414
+
352
415
  try:
353
- # Ensure agent is initialized
354
416
  agent = _ensure_agent()
355
417
 
356
- # 获取或创建 session 以保持对话上下文
357
- # 优先使用 session_id,其次使用 device_id,最后使用默认值
358
418
  session_id = request.session_id or request.device_id or "default"
359
419
  session = _get_or_create_session(session_id)
360
420
 
@@ -366,179 +426,186 @@ async def layered_agent_chat(request: LayeredAgentRequest):
366
426
  session=session,
367
427
  )
368
428
 
429
+ # 保存活跃运行实例,用于 abort
430
+ with _active_runs_lock:
431
+ _active_runs[session_id] = result
432
+
369
433
  current_tool_call: dict[str, Any] | None = None
370
434
 
371
- async for event in result.stream_events():
372
- if isinstance(event, RawResponsesStreamEvent):
373
- # Raw response chunk - could contain thinking
374
- pass
375
-
376
- elif isinstance(event, RunItemStreamEvent):
377
- item = event.item
378
-
379
- # Handle different item types
380
- item_type = getattr(item, "type", None)
381
-
382
- if item_type == "tool_call_item":
383
- # Tool call started - extract name from raw_item
384
- tool_name = "unknown"
385
- tool_args: dict[str, Any] = {}
386
-
387
- # Try to get from raw_item
388
- if hasattr(item, "raw_item") and item.raw_item:
389
- raw = item.raw_item
390
-
391
- # Handle dict format (sometimes returned as dict)
392
- if isinstance(raw, dict):
393
- tool_name = raw.get(
394
- "name",
395
- raw.get("function", {}).get("name", "unknown"),
396
- )
397
- args_str = raw.get(
398
- "arguments",
399
- raw.get("function", {}).get("arguments", "{}"),
400
- )
401
- try:
402
- tool_args = (
403
- json.loads(args_str)
404
- if isinstance(args_str, str)
405
- else args_str
435
+ try:
436
+ async for event in result.stream_events():
437
+ if isinstance(event, RawResponsesStreamEvent):
438
+ # Raw response chunk - could contain thinking
439
+ pass
440
+
441
+ elif isinstance(event, RunItemStreamEvent):
442
+ item = event.item
443
+
444
+ # Handle different item types
445
+ item_type = getattr(item, "type", None)
446
+
447
+ if item_type == "tool_call_item":
448
+ # Tool call started - extract name from raw_item
449
+ tool_name = "unknown"
450
+ tool_args: dict[str, Any] = {}
451
+
452
+ # Try to get from raw_item
453
+ if hasattr(item, "raw_item") and item.raw_item:
454
+ raw = item.raw_item
455
+
456
+ # Handle dict format (sometimes returned as dict)
457
+ if isinstance(raw, dict):
458
+ tool_name = raw.get(
459
+ "name",
460
+ raw.get("function", {}).get("name", "unknown"),
406
461
  )
407
- except Exception:
408
- tool_args = {"raw": str(args_str)}
409
- else:
410
- # Chat Completions API format: raw_item.function.name
411
- if hasattr(raw, "function") and raw.function:
412
- func = raw.function
413
- if hasattr(func, "name"):
414
- tool_name = func.name
415
- if hasattr(func, "arguments"):
416
- try:
417
- tool_args = (
418
- json.loads(func.arguments)
419
- if isinstance(func.arguments, str)
420
- else func.arguments
421
- )
422
- except Exception:
423
- tool_args = {"raw": str(func.arguments)}
424
- # Responses API format: raw_item.name directly
425
- elif hasattr(raw, "name") and raw.name:
426
- tool_name = raw.name
427
- if hasattr(raw, "arguments"):
428
- try:
429
- tool_args = (
430
- json.loads(raw.arguments)
431
- if isinstance(raw.arguments, str)
432
- else raw.arguments
433
- )
434
- except Exception:
435
- tool_args = {"raw": str(raw.arguments)}
436
-
437
- # Fallback to direct item attributes
438
- if tool_name == "unknown":
439
- if hasattr(item, "name") and item.name:
440
- tool_name = item.name
441
- elif hasattr(item, "call") and item.call:
442
- call = item.call
443
- if hasattr(call, "function") and call.function:
444
- if hasattr(call.function, "name"):
445
- tool_name = call.function.name
446
- if hasattr(call.function, "arguments"):
447
- try:
448
- tool_args = (
449
- json.loads(call.function.arguments)
450
- if isinstance(
451
- call.function.arguments, str
462
+ args_str = raw.get(
463
+ "arguments",
464
+ raw.get("function", {}).get("arguments", "{}"),
465
+ )
466
+ try:
467
+ tool_args = (
468
+ json.loads(args_str)
469
+ if isinstance(args_str, str)
470
+ else args_str
471
+ )
472
+ except Exception:
473
+ tool_args = {"raw": str(args_str)}
474
+ else:
475
+ func = getattr(raw, "function", None)
476
+ if func:
477
+ tool_name = getattr(func, "name", "unknown")
478
+ args_val = getattr(func, "arguments", None)
479
+ if args_val:
480
+ try:
481
+ tool_args = (
482
+ json.loads(args_val)
483
+ if isinstance(args_val, str)
484
+ else args_val
485
+ )
486
+ except Exception:
487
+ tool_args = {"raw": str(args_val)}
488
+ else:
489
+ name_val = getattr(raw, "name", None)
490
+ if name_val:
491
+ tool_name = name_val
492
+ args_val = getattr(raw, "arguments", None)
493
+ if args_val:
494
+ try:
495
+ tool_args = (
496
+ json.loads(args_val)
497
+ if isinstance(args_val, str)
498
+ else args_val
499
+ )
500
+ except Exception:
501
+ tool_args = {"raw": str(args_val)}
502
+
503
+ # Fallback to direct item attributes
504
+ if tool_name == "unknown":
505
+ if hasattr(item, "name") and item.name:
506
+ tool_name = item.name
507
+ elif hasattr(item, "call") and item.call:
508
+ call = item.call
509
+ if hasattr(call, "function") and call.function:
510
+ if hasattr(call.function, "name"):
511
+ tool_name = call.function.name
512
+ if hasattr(call.function, "arguments"):
513
+ try:
514
+ tool_args = (
515
+ json.loads(call.function.arguments)
516
+ if isinstance(
517
+ call.function.arguments, str
518
+ )
519
+ else call.function.arguments
520
+ )
521
+ except Exception:
522
+ tool_args = {
523
+ "raw": str(call.function.arguments)
524
+ }
525
+ elif hasattr(call, "name"):
526
+ tool_name = call.name
527
+ if hasattr(call, "arguments"):
528
+ try:
529
+ tool_args = (
530
+ json.loads(call.arguments)
531
+ if isinstance(call.arguments, str)
532
+ else call.arguments
452
533
  )
453
- else call.function.arguments
454
- )
455
- except Exception:
456
- tool_args = {
457
- "raw": str(call.function.arguments)
458
- }
459
- elif hasattr(call, "name"):
460
- tool_name = call.name
461
- if hasattr(call, "arguments"):
462
- try:
463
- tool_args = (
464
- json.loads(call.arguments)
465
- if isinstance(call.arguments, str)
466
- else call.arguments
467
- )
468
- except Exception:
469
- tool_args = {"raw": str(call.arguments)}
470
-
471
- logger.info(
472
- f"[LayeredAgent] Tool call: {tool_name}, args keys: {list(tool_args.keys()) if isinstance(tool_args, dict) else 'not dict'}"
473
- )
474
-
475
- current_tool_call = {
476
- "name": tool_name,
477
- "args": tool_args,
478
- }
479
-
480
- event_data = {
481
- "type": "tool_call",
482
- "tool_name": tool_name,
483
- "tool_args": tool_args,
484
- }
485
- yield f"data: {json.dumps(event_data, ensure_ascii=False)}\n\n"
486
-
487
- elif item_type == "tool_call_output_item":
488
- # Tool call result
489
- output = getattr(item, "output", "")
490
-
491
- # Get tool name from current_tool_call or try to extract from item
492
- tool_name = (
493
- current_tool_call["name"]
494
- if current_tool_call
495
- else "unknown"
496
- )
497
-
498
- # Try to get tool name from raw_item if available
499
- if (
500
- tool_name == "unknown"
501
- and hasattr(item, "raw_item")
502
- and item.raw_item
503
- ):
504
- if hasattr(item.raw_item, "name"):
505
- tool_name = item.raw_item.name
506
-
507
- logger.info(
508
- f"[LayeredAgent] Tool result for {tool_name}: {str(output)[:100] if output else 'empty'}..."
509
- )
510
-
511
- event_data = {
512
- "type": "tool_result",
513
- "tool_name": tool_name,
514
- "result": output,
515
- }
516
- yield f"data: {json.dumps(event_data, ensure_ascii=False)}\n\n"
517
- current_tool_call = None
518
-
519
- elif item_type == "message_output_item":
520
- # Final message
521
- content = ""
522
- if hasattr(item, "raw_item") and item.raw_item:
523
- if (
524
- hasattr(item.raw_item, "content")
525
- and item.raw_item.content
526
- ):
527
- for c in item.raw_item.content:
528
- if hasattr(c, "text"):
529
- content += c.text
530
-
531
- if content:
534
+ except Exception:
535
+ tool_args = {"raw": str(call.arguments)}
536
+
537
+ logger.info(
538
+ f"[LayeredAgent] Tool call: {tool_name}, args keys: {list(tool_args.keys()) if isinstance(tool_args, dict) else 'not dict'}"
539
+ )
540
+
541
+ current_tool_call = {
542
+ "name": tool_name,
543
+ "args": tool_args,
544
+ }
545
+
532
546
  event_data = {
533
- "type": "message",
534
- "content": content,
547
+ "type": "tool_call",
548
+ "tool_name": tool_name,
549
+ "tool_args": tool_args,
535
550
  }
536
551
  yield f"data: {json.dumps(event_data, ensure_ascii=False)}\n\n"
537
552
 
538
- # Final result
553
+ elif item_type == "tool_call_output_item":
554
+ # Tool call result
555
+ output = getattr(item, "output", "")
556
+
557
+ # Get tool name from current_tool_call or try to extract from item
558
+ tool_name = (
559
+ current_tool_call["name"]
560
+ if current_tool_call
561
+ else "unknown"
562
+ )
563
+
564
+ raw_item = getattr(item, "raw_item", None)
565
+ if tool_name == "unknown" and raw_item:
566
+ name_val = getattr(raw_item, "name", None)
567
+ if name_val:
568
+ tool_name = name_val
569
+
570
+ logger.info(
571
+ f"[LayeredAgent] Tool result for {tool_name}: {str(output)[:100] if output else 'empty'}..."
572
+ )
573
+
574
+ event_data = {
575
+ "type": "tool_result",
576
+ "tool_name": tool_name,
577
+ "result": output,
578
+ }
579
+ yield f"data: {json.dumps(event_data, ensure_ascii=False)}\n\n"
580
+ current_tool_call = None
581
+
582
+ elif item_type == "message_output_item":
583
+ content = ""
584
+ raw_item = getattr(item, "raw_item", None)
585
+ if raw_item:
586
+ raw_content = getattr(raw_item, "content", None)
587
+ if raw_content:
588
+ for c in raw_content:
589
+ text_val = getattr(c, "text", None)
590
+ if text_val:
591
+ content += text_val
592
+
593
+ if content:
594
+ event_data = {
595
+ "type": "message",
596
+ "content": content,
597
+ }
598
+ yield f"data: {json.dumps(event_data, ensure_ascii=False)}\n\n"
599
+
600
+ finally:
601
+ # 清理活跃运行实例
602
+ with _active_runs_lock:
603
+ _active_runs.pop(session_id, None)
604
+
539
605
  final_output = (
540
606
  result.final_output if hasattr(result, "final_output") else ""
541
607
  )
608
+ final_success = True
542
609
  event_data = {
543
610
  "type": "done",
544
611
  "content": final_output,
@@ -548,12 +615,36 @@ async def layered_agent_chat(request: LayeredAgentRequest):
548
615
 
549
616
  except Exception as e:
550
617
  logger.exception(f"[LayeredAgent] Error: {e}")
618
+ final_output = str(e)
619
+ final_success = False
551
620
  event_data = {
552
621
  "type": "error",
553
622
  "message": str(e),
554
623
  }
555
624
  yield f"data: {json.dumps(event_data, ensure_ascii=False)}\n\n"
556
625
 
626
+ finally:
627
+ if request.device_id and final_output:
628
+ from AutoGLM_GUI.device_manager import DeviceManager
629
+
630
+ device_manager = DeviceManager.get_instance()
631
+ serialno = device_manager.get_serial_by_device_id(request.device_id)
632
+ if serialno:
633
+ end_time = datetime.now()
634
+ record = ConversationRecord(
635
+ task_text=request.message,
636
+ final_message=final_output,
637
+ success=final_success,
638
+ steps=0,
639
+ start_time=start_time,
640
+ end_time=end_time,
641
+ duration_ms=int((end_time - start_time).total_seconds() * 1000),
642
+ source="layered",
643
+ source_detail=request.session_id or "",
644
+ error_message=None if final_success else final_output,
645
+ )
646
+ history_manager.add_record(serialno, record)
647
+
557
648
  return StreamingResponse(
558
649
  event_generator(),
559
650
  media_type="text/event-stream",
@@ -565,6 +656,40 @@ async def layered_agent_chat(request: LayeredAgentRequest):
565
656
  )
566
657
 
567
658
 
659
+ class AbortSessionRequest(BaseModel):
660
+ """Request for aborting a running session."""
661
+
662
+ session_id: str
663
+
664
+
665
+ @router.post("/api/layered-agent/abort")
666
+ def abort_session(request: AbortSessionRequest):
667
+ """
668
+ Abort a running layered agent session.
669
+
670
+ Uses the OpenAI agents SDK's native cancel() method to stop execution.
671
+ """
672
+ session_id = request.session_id
673
+
674
+ with _active_runs_lock:
675
+ if session_id in _active_runs:
676
+ result = _active_runs[session_id]
677
+ result.cancel(mode="immediate")
678
+ logger.info(f"[LayeredAgent] Aborted session: {session_id}")
679
+ return {
680
+ "success": True,
681
+ "message": f"Session {session_id} abort signal sent",
682
+ }
683
+ else:
684
+ logger.warning(
685
+ f"[LayeredAgent] No active run found for session: {session_id}"
686
+ )
687
+ return {
688
+ "success": False,
689
+ "message": f"No active run found for session {session_id}",
690
+ }
691
+
692
+
568
693
  class ResetSessionRequest(BaseModel):
569
694
  """Request for resetting a session."""
570
695
 
AutoGLM_GUI/api/mcp.py CHANGED
@@ -1,11 +1,19 @@
1
1
  """MCP (Model Context Protocol) tools for AutoGLM-GUI."""
2
2
 
3
- from typing import Any, Dict, List
3
+ from typing_extensions import TypedDict
4
4
 
5
5
  from fastmcp import FastMCP
6
6
 
7
7
  from AutoGLM_GUI.logger import logger
8
8
  from AutoGLM_GUI.prompts import MCP_SYSTEM_PROMPT_ZH
9
+ from AutoGLM_GUI.schemas import DeviceResponse
10
+
11
+
12
+ class ChatResult(TypedDict):
13
+ result: str
14
+ steps: int
15
+ success: bool
16
+
9
17
 
10
18
  # 创建 MCP 服务器实例
11
19
  mcp = FastMCP("AutoGLM-GUI MCP Server")
@@ -15,7 +23,7 @@ MCP_MAX_STEPS = 5
15
23
 
16
24
 
17
25
  @mcp.tool()
18
- def chat(device_id: str, message: str) -> Dict[str, Any]:
26
+ def chat(device_id: str, message: str) -> ChatResult:
19
27
  """
20
28
  Send a task to the AutoGLM Phone Agent for execution.
21
29
 
@@ -26,13 +34,6 @@ def chat(device_id: str, message: str) -> Dict[str, Any]:
26
34
  Args:
27
35
  device_id: Device identifier (e.g., "192.168.1.100:5555" or serial)
28
36
  message: Natural language task (e.g., "打开微信", "发送消息")
29
-
30
- Returns:
31
- {
32
- "result": str, # Task execution result
33
- "steps": int, # Number of steps taken
34
- "success": bool # Success flag
35
- }
36
37
  """
37
38
  from AutoGLM_GUI.exceptions import DeviceBusyError
38
39
  from AutoGLM_GUI.phone_agent_manager import PhoneAgentManager
@@ -84,7 +85,7 @@ def chat(device_id: str, message: str) -> Dict[str, Any]:
84
85
 
85
86
 
86
87
  @mcp.tool()
87
- def list_devices() -> List[Dict[str, Any]]:
88
+ def list_devices() -> list[DeviceResponse]:
88
89
  """
89
90
  List all connected ADB devices and their agent status.
90
91