PyPI - autoglm-gui - Versions diffs - 1.4.1__py3-none-any.whl → 1.5.1__py3-none-any.whl - Mend

autoglm-gui 1.4.1py3-none-any.whl → 1.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (135) hide show

AutoGLM_GUI/__init__.py +11 -0
AutoGLM_GUI/__main__.py +26 -4
AutoGLM_GUI/actions/__init__.py +6 -0
phone_agent/actions/handler_ios.py → AutoGLM_GUI/actions/handler.py +30 -112
AutoGLM_GUI/actions/types.py +15 -0
{phone_agent → AutoGLM_GUI}/adb/__init__.py +25 -23
{phone_agent → AutoGLM_GUI}/adb/connection.py +5 -40
{phone_agent → AutoGLM_GUI}/adb/device.py +12 -94
{phone_agent → AutoGLM_GUI}/adb/input.py +6 -47
AutoGLM_GUI/adb/screenshot.py +11 -0
{phone_agent/config → AutoGLM_GUI/adb}/timing.py +1 -1
AutoGLM_GUI/adb_plus/keyboard_installer.py +4 -2
AutoGLM_GUI/adb_plus/screenshot.py +22 -1
AutoGLM_GUI/adb_plus/serial.py +38 -20
AutoGLM_GUI/adb_plus/touch.py +4 -9
AutoGLM_GUI/agents/__init__.py +43 -12
AutoGLM_GUI/agents/events.py +19 -0
AutoGLM_GUI/agents/factory.py +31 -38
AutoGLM_GUI/agents/glm/__init__.py +7 -0
AutoGLM_GUI/agents/glm/agent.py +297 -0
AutoGLM_GUI/agents/glm/message_builder.py +81 -0
AutoGLM_GUI/agents/glm/parser.py +110 -0
{phone_agent/config → AutoGLM_GUI/agents/glm}/prompts_en.py +7 -9
{phone_agent/config → AutoGLM_GUI/agents/glm}/prompts_zh.py +18 -25
AutoGLM_GUI/agents/mai/__init__.py +28 -0
AutoGLM_GUI/agents/mai/agent.py +408 -0
AutoGLM_GUI/agents/mai/parser.py +254 -0
AutoGLM_GUI/agents/mai/prompts.py +103 -0
AutoGLM_GUI/agents/mai/traj_memory.py +91 -0
AutoGLM_GUI/agents/protocols.py +12 -8
AutoGLM_GUI/agents/stream_runner.py +193 -0
AutoGLM_GUI/api/__init__.py +40 -21
AutoGLM_GUI/api/agents.py +181 -239
AutoGLM_GUI/api/control.py +9 -6
AutoGLM_GUI/api/devices.py +102 -12
AutoGLM_GUI/api/history.py +104 -0
AutoGLM_GUI/api/layered_agent.py +67 -15
AutoGLM_GUI/api/media.py +64 -1
AutoGLM_GUI/api/scheduled_tasks.py +98 -0
AutoGLM_GUI/config.py +81 -0
AutoGLM_GUI/config_manager.py +68 -51
AutoGLM_GUI/device_manager.py +248 -29
AutoGLM_GUI/device_protocol.py +1 -1
AutoGLM_GUI/devices/adb_device.py +5 -10
AutoGLM_GUI/devices/mock_device.py +4 -2
AutoGLM_GUI/devices/remote_device.py +8 -3
AutoGLM_GUI/history_manager.py +164 -0
AutoGLM_GUI/model/__init__.py +5 -0
AutoGLM_GUI/model/message_builder.py +69 -0
AutoGLM_GUI/model/types.py +24 -0
AutoGLM_GUI/models/__init__.py +10 -0
AutoGLM_GUI/models/history.py +140 -0
AutoGLM_GUI/models/scheduled_task.py +71 -0
AutoGLM_GUI/parsers/__init__.py +22 -0
AutoGLM_GUI/parsers/base.py +50 -0
AutoGLM_GUI/parsers/phone_parser.py +58 -0
AutoGLM_GUI/phone_agent_manager.py +62 -396
AutoGLM_GUI/platform_utils.py +26 -0
AutoGLM_GUI/prompt_config.py +15 -0
AutoGLM_GUI/prompts/__init__.py +32 -0
AutoGLM_GUI/scheduler_manager.py +350 -0
AutoGLM_GUI/schemas.py +246 -72
AutoGLM_GUI/scrcpy_stream.py +142 -24
AutoGLM_GUI/socketio_server.py +100 -27
AutoGLM_GUI/static/assets/{about-_XNhzQZX.js → about-CfwX1Cmc.js} +1 -1
AutoGLM_GUI/static/assets/alert-dialog-CtGlN2IJ.js +1 -0
AutoGLM_GUI/static/assets/chat-BYa-foUI.js +129 -0
AutoGLM_GUI/static/assets/circle-alert-t08bEMPO.js +1 -0
AutoGLM_GUI/static/assets/dialog-FNwZJFwk.js +45 -0
AutoGLM_GUI/static/assets/eye-D0UPWCWC.js +1 -0
AutoGLM_GUI/static/assets/history-CRo95B7i.js +1 -0
AutoGLM_GUI/static/assets/{index-Cy8TmmHV.js → index-BaLMSqd3.js} +1 -1
AutoGLM_GUI/static/assets/index-CTHbFvKl.js +11 -0
AutoGLM_GUI/static/assets/index-CV7jGxGm.css +1 -0
AutoGLM_GUI/static/assets/label-DJFevVmr.js +1 -0
AutoGLM_GUI/static/assets/logs-RW09DyYY.js +1 -0
AutoGLM_GUI/static/assets/popover--JTJrE5v.js +1 -0
AutoGLM_GUI/static/assets/scheduled-tasks-DTRKsQXF.js +1 -0
AutoGLM_GUI/static/assets/square-pen-CPK_K680.js +1 -0
AutoGLM_GUI/static/assets/textarea-PRmVnWq5.js +1 -0
AutoGLM_GUI/static/assets/workflows-CdcsAoaT.js +1 -0
AutoGLM_GUI/static/index.html +2 -2
AutoGLM_GUI/types.py +17 -0
{autoglm_gui-1.4.1.dist-info → autoglm_gui-1.5.1.dist-info}/METADATA +179 -130
autoglm_gui-1.5.1.dist-info/RECORD +118 -0
AutoGLM_GUI/agents/mai_adapter.py +0 -627
AutoGLM_GUI/api/dual_model.py +0 -317
AutoGLM_GUI/device_adapter.py +0 -263
AutoGLM_GUI/dual_model/__init__.py +0 -53
AutoGLM_GUI/dual_model/decision_model.py +0 -664
AutoGLM_GUI/dual_model/dual_agent.py +0 -917
AutoGLM_GUI/dual_model/protocols.py +0 -354
AutoGLM_GUI/dual_model/vision_model.py +0 -442
AutoGLM_GUI/mai_ui_adapter/agent_wrapper.py +0 -291
AutoGLM_GUI/phone_agent_patches.py +0 -147
AutoGLM_GUI/static/assets/chat-DwJpiAWf.js +0 -126
AutoGLM_GUI/static/assets/dialog-B3uW4T8V.js +0 -45
AutoGLM_GUI/static/assets/index-Cpv2gSF1.css +0 -1
AutoGLM_GUI/static/assets/index-UYYauTly.js +0 -12
AutoGLM_GUI/static/assets/workflows-Du_de-dt.js +0 -1
autoglm_gui-1.4.1.dist-info/RECORD +0 -117
mai_agent/base.py +0 -137
mai_agent/mai_grounding_agent.py +0 -263
mai_agent/mai_naivigation_agent.py +0 -526
mai_agent/prompt.py +0 -148
mai_agent/unified_memory.py +0 -67
mai_agent/utils.py +0 -73
phone_agent/__init__.py +0 -12
phone_agent/actions/__init__.py +0 -5
phone_agent/actions/handler.py +0 -400
phone_agent/adb/screenshot.py +0 -108
phone_agent/agent.py +0 -253
phone_agent/agent_ios.py +0 -277
phone_agent/config/__init__.py +0 -53
phone_agent/config/apps_harmonyos.py +0 -256
phone_agent/config/apps_ios.py +0 -339
phone_agent/config/prompts.py +0 -80
phone_agent/device_factory.py +0 -166
phone_agent/hdc/__init__.py +0 -53
phone_agent/hdc/connection.py +0 -384
phone_agent/hdc/device.py +0 -269
phone_agent/hdc/input.py +0 -145
phone_agent/hdc/screenshot.py +0 -127
phone_agent/model/__init__.py +0 -5
phone_agent/model/client.py +0 -290
phone_agent/xctest/__init__.py +0 -47
phone_agent/xctest/connection.py +0 -379
phone_agent/xctest/device.py +0 -472
phone_agent/xctest/input.py +0 -311
phone_agent/xctest/screenshot.py +0 -226
{phone_agent/config → AutoGLM_GUI/adb}/apps.py +0 -0
{phone_agent/config → AutoGLM_GUI}/i18n.py +0 -0
{autoglm_gui-1.4.1.dist-info → autoglm_gui-1.5.1.dist-info}/WHEEL +0 -0
{autoglm_gui-1.4.1.dist-info → autoglm_gui-1.5.1.dist-info}/entry_points.txt +0 -0
{autoglm_gui-1.4.1.dist-info → autoglm_gui-1.5.1.dist-info}/licenses/LICENSE +0 -0

AutoGLM_GUI/agents/mai/parser.py ADDED Viewed

@@ -0,0 +1,254 @@
+"""MAI Agent parser using XML tags and JSON.
+从 mai_agent 的 XML 格式中提取 thinking 和 action，并转换为
+AutoGLM_GUI 的标准格式。
+迁移说明：基于原有实现增强，添加 parse_with_thinking 方法。
+"""
+import json
+import re
+from typing import Any
+SCALE_FACTOR = 999
+class MAIParseError(ValueError):
+    pass
+class MAIParser:
+    """Parse MAI Agent XML + JSON format outputs.
+    Handles format like:
+        <thinking>Reasoning process</thinking>
+        <tool_call>{"name": "mobile_use", "arguments": {...}}</tool_call>
+    Converts MAI-specific actions to standard ActionHandler format.
+    Coordinate scale: 0-999 (automatically converted to 0-1000)
+    """
+    @property
+    def coordinate_scale(self) -> int:
+        return 999
+    def parse_with_thinking(self, raw_response: str) -> dict[str, Any]:
+        text = raw_response.strip()
+        if "</think>" in text and "</thinking>" not in text:
+            text = text.replace("</think>", "</thinking>")
+            text = "<thinking>" + text
+        pattern = r"<thinking>(.*?)</thinking>.*?<tool_call>(.*?)</tool_call>"
+        match = re.search(pattern, text, re.DOTALL)
+        if not match:
+            raise MAIParseError("Failed to find <thinking> and <tool_call> tags")
+        thinking = match.group(1).strip().strip('"')
+        tool_call_str = match.group(2).strip().strip('"')
+        try:
+            tool_call = json.loads(tool_call_str)
+        except json.JSONDecodeError as e:
+            raise MAIParseError(f"Invalid JSON in tool_call: {e}") from e
+        mai_action = tool_call.get("arguments", {})
+        if "coordinate" in mai_action:
+            mai_action["coordinate"] = self._normalize_coordinate_to_0_1(
+                mai_action["coordinate"]
+            )
+        return {
+            "thinking": thinking,
+            "raw_action": mai_action,
+            "converted_action": self._convert_action(mai_action),
+        }
+    def _normalize_coordinate_to_0_1(
+        self, coordinate: list[int | float]
+    ) -> list[float]:
+        if len(coordinate) == 2:
+            x, y = coordinate
+        elif len(coordinate) == 4:
+            x1, y1, x2, y2 = coordinate
+            x = (x1 + x2) / 2
+            y = (y1 + y2) / 2
+        else:
+            raise MAIParseError(
+                f"Invalid coordinate format: expected 2 or 4 values, got {len(coordinate)}"
+            )
+        return [x / SCALE_FACTOR, y / SCALE_FACTOR]
+    def parse(self, raw_response: str) -> dict[str, Any]:
+        """Parse MAI agent XML+JSON output.
+        Args:
+            raw_response: Model output containing <thinking> and <tool_call> tags.
+        Returns:
+            Standardized action dictionary with coordinates converted to 0-1000 scale.
+        Raises:
+            ValueError: If parsing fails or content is invalid JSON.
+        """
+        text = raw_response.strip()
+        if "</think>" in text and "</thinking>" not in text:
+            text = text.replace("</think>", "</thinking>")
+            text = "<thinking>" + text
+        pattern = r"<thinking>(.*?)</thinking>.*?<tool_call>(.*?)</tool_call>"
+        match = re.search(pattern, text, re.DOTALL)
+        if not match:
+            raise ValueError("Failed to find <thinking> and <tool_call> tags")
+        tool_call_str = match.group(2).strip().strip('"')
+        try:
+            tool_call = json.loads(tool_call_str)
+        except json.JSONDecodeError as e:
+            raise ValueError(f"Invalid JSON in tool_call: {e}") from e
+        mai_action = tool_call.get("arguments", {})
+        return self._convert_action(mai_action)
+    def _convert_action(self, mai_action: dict[str, Any]) -> dict[str, Any]:
+        """Convert MAI action format to standard ActionHandler format.
+        MAI format: {"action": "click", "coordinate": [x, y]}
+        Standard format: {"_metadata": "do", "action": "Tap", "element": [x, y]}
+        """
+        action_type = mai_action.get("action")
+        if action_type == "terminate":
+            status = mai_action.get("status", "success")
+            return {
+                "_metadata": "finish",
+                "message": "Task completed" if status == "success" else "Task failed",
+            }
+        if action_type == "answer":
+            return {
+                "_metadata": "finish",
+                "message": mai_action.get("text", ""),
+            }
+        if action_type == "wait":
+            return {
+                "_metadata": "do",
+                "action": "Wait",
+                "duration": "1 seconds",
+            }
+        if action_type == "system_button":
+            button_name = mai_action.get("button", "")
+            action_map = {
+                "back": "Back",
+                "home": "Home",
+                "enter": "Enter",
+            }
+            return {
+                "_metadata": "do",
+                "action": action_map.get(button_name, "Back"),
+            }
+        coordinate = mai_action.get("coordinate")
+        if coordinate:
+            x = self._convert_coordinate(coordinate[0])
+            y = self._convert_coordinate(coordinate[1])
+            if action_type == "click":
+                return {
+                    "_metadata": "do",
+                    "action": "Tap",
+                    "element": [x, y],
+                }
+            elif action_type == "long_press":
+                return {
+                    "_metadata": "do",
+                    "action": "Long Press",
+                    "element": [x, y],
+                }
+            elif action_type == "double_click":
+                return {
+                    "_metadata": "do",
+                    "action": "Double Tap",
+                    "element": [x, y],
+                }
+        if action_type == "swipe":
+            direction = mai_action.get("direction", "up")
+            coordinate = mai_action.get("coordinate") or [0.5, 0.5]
+            x = self._convert_coordinate(coordinate[0])
+            y = self._convert_coordinate(coordinate[1])
+            start, end = self._calculate_swipe_coordinates(direction, x, y)
+            return {
+                "_metadata": "do",
+                "action": "Swipe",
+                "start": start,
+                "end": end,
+            }
+        if action_type == "drag":
+            start_coord = mai_action.get("start_coordinate", [0, 0])
+            end_coord = mai_action.get("end_coordinate", [0, 0])
+            start = [
+                self._convert_coordinate_from_scale_factor(start_coord[0]),
+                self._convert_coordinate_from_scale_factor(start_coord[1]),
+            ]
+            end = [
+                self._convert_coordinate_from_scale_factor(end_coord[0]),
+                self._convert_coordinate_from_scale_factor(end_coord[1]),
+            ]
+            return {
+                "_metadata": "do",
+                "action": "Swipe",
+                "start": start,
+                "end": end,
+            }
+        if action_type == "type":
+            return {
+                "_metadata": "do",
+                "action": "Type",
+                "text": mai_action.get("text", ""),
+            }
+        if action_type == "open":
+            return {
+                "_metadata": "do",
+                "action": "Launch",
+                "app": mai_action.get("app", ""),
+            }
+        raise ValueError(f"Unknown MAI action type: {action_type}")
+    def _convert_coordinate(self, value: float) -> int:
+        """Convert MAI normalized coordinate [0, 1] to standard scale [0, 1000]."""
+        return int(value * 1000)
+    def _convert_coordinate_from_scale_factor(self, value: float) -> int:
+        """Convert MAI scale factor coordinate [0, 999] to standard scale [0, 1000]."""
+        return int((value / SCALE_FACTOR) * 1000)
+    def _calculate_swipe_coordinates(
+        self, direction: str, x: int, y: int
+    ) -> tuple[list[int], list[int]]:
+        """Calculate start and end coordinates for swipe based on direction."""
+        swipe_distance = 300
+        direction_map = {
+            "up": ([x, y + swipe_distance], [x, y - swipe_distance]),
+            "down": ([x, y - swipe_distance], [x, y + swipe_distance]),
+            "left": ([x + swipe_distance, y], [x - swipe_distance, y]),
+            "right": ([x - swipe_distance, y], [x + swipe_distance, y]),
+        }
+        return direction_map.get(direction, ([x, y], [x, y]))

AutoGLM_GUI/agents/mai/prompts.py ADDED Viewed

@@ -0,0 +1,103 @@
+"""MAI Agent 系统提示模板
+基于 mai_agent/prompt.py 迁移，针对中文环境和国内应用优化。
+"""
+MAI_MOBILE_SYSTEM_PROMPT = """你是一个 GUI 自动化助手。你会收到一个任务和历史操作记录（包含多张截图），你需要分析当前屏幕状态，执行下一步操作来完成任务。
+## 输出格式
+每次操作必须包含两部分：
+1. **思考过程**：在 <thinking></thinking> 标签中详细说明你的分析和决策
+2. **动作指令**：在 <tool_call></tool_call> 标签中返回 JSON 格式的函数调用
+示例：
+```
+<thinking>
+当前屏幕显示美团首页。我需要点击顶部搜索框输入"霸王茶姬"。搜索框位于屏幕上方中央，坐标大约在 [500, 100]。
+下一步操作：点击搜索框。
+</thinking>
+<tool_call>
+{"name": "mobile_use", "arguments": {"action": "click", "coordinate": [500, 100]}}
+</tool_call>
+```
+## 动作空间（严格遵守）
+### 基础操作
+- **点击**：`{"action": "click", "coordinate": [x, y]}`
+  用于点击按钮、链接、输入框等可点击元素
+- **长按**：`{"action": "long_press", "coordinate": [x, y]}`
+  用于触发长按菜单或特殊功能
+- **输入文本**：`{"action": "type", "text": "要输入的文字"}`
+  必须先点击输入框聚焦，再使用此动作输入文本
+  注意：文本中的特殊字符需要转义（\\'、\\"、\\n）
+### 滑动操作
+- **滑动**：`{"action": "swipe", "direction": "up|down|left|right", "coordinate": [x, y]}`
+  direction 可选值：up（向上滑）、down（向下滑）、left（向左滑）、right（向右滑）
+  coordinate 可选：指定滑动起点坐标（用于滑动特定 UI 元素）
+- **拖动**：`{"action": "drag", "start_coordinate": [x1, y1], "end_coordinate": [x2, y2]}`
+  用于拖拽元素到新位置
+### 系统操作
+- **打开应用**：`{"action": "open", "text": "应用名称"}`
+  推荐优先使用此方式打开应用（比手动点击更快）
+- **系统按键**：`{"action": "system_button", "button": "back|home|menu|enter"}`
+  可选值：back（返回）、home（主页）、menu（菜单）、enter（确认）
+### 任务控制
+- **等待**：`{"action": "wait"}`
+  用于等待页面加载或动画完成（建议谨慎使用，大多数情况不需要）
+- **结束任务**：`{"action": "terminate", "status": "success|fail"}`
+  任务完成或失败时必须调用此动作
+- **回答问题**：`{"action": "answer", "text": "答案内容"}`
+  当用户要求你查找信息或回答问题时使用
+## 坐标系统
+- **范围**：x 和 y 都在 [0, 999] 之间
+- **原点**：(0, 0) 是屏幕左上角
+- **边界**：(999, 999) 是屏幕右下角
+- **精度**：坐标是归一化的，会自动映射到实际屏幕分辨率
+## 操作指南
+### 思考过程建议
+在 <thinking> 部分应包含：
+1. **观察**：当前屏幕显示的内容和状态
+2. **分析**：识别目标元素的位置和特征
+3. **决策**：选择最合适的操作和参数
+4. **总结**：用一句话明确说明下一步要做什么
+### 常见应用操作技巧
+**国内常用应用**：
+- 外卖应用（美团、饿了么）：优先使用顶部搜索框查找商家
+- 打车应用（滴滴、高德）：注意起点/终点输入框的位置区分
+- 电商应用（淘宝、京东）：搜索框通常在顶部，商品列表需要向下滑动浏览
+- 社交应用（微信、QQ）：注意顶部/底部导航栏的切换
+**通用技巧**：
+- 如果页面内容未完全显示，使用 swipe 滚动查看
+- 输入文本前必须先 click 输入框获得焦点
+- 遇到加载动画可以 wait 一次，但不要连续 wait
+- 无法找到目标元素时，尝试返回上一级（system_button back）重新导航
+### 常见错误避免
+- ❌ 不要在未点击输入框的情况下直接 type
+- ❌ 不要使用超出 [0, 999] 范围的坐标
+- ❌ 不要遗漏 <thinking> 或 <tool_call> 标签
+- ❌ 不要在 JSON 中使用注释或多余的字段
+- ❌ 不要连续执行多个相同的无效操作
+## 注意事项
+- 必须严格遵循动作空间，所有动作参数必须符合上述格式
+- 坐标必须是整数，范围在 [0, 999]
+- 文本输入中的引号、换行等特殊字符必须转义
+- 每次只返回一个动作，不要尝试批量操作
+- 仔细观察截图中的 UI 元素位置，准确估算坐标
+""".strip()

AutoGLM_GUI/agents/mai/traj_memory.py ADDED Viewed

@@ -0,0 +1,91 @@
+"""轨迹记忆数据结构 - MAI Agent 内部实现
+本模块定义了 MAI Agent 的轨迹记忆系统，用于存储和管理 Agent 执行过程中的历史信息。
+设计说明：
+- 从 mai_agent/unified_memory.py 迁移而来
+- 适配 Python 3.10+ 类型注解
+- 与 AutoGLM_GUI 架构集成
+"""
+from dataclasses import dataclass, field
+from typing import Any
+from PIL import Image
+@dataclass
+class TrajStep:
+    """轨迹中的单个步骤
+    记录 Agent 在某一步的完整状态，包括观察、思考、动作和结果。
+    Attributes:
+        screenshot: 当前步骤的截图 (PIL Image 格式)
+        accessibility_tree: 可访问性树数据（可选，用于辅助 UI 理解）
+        prediction: 模型的原始响应文本（包含 <thinking> 和 <tool_call>）
+        action: 解析后的动作字典（如 {"action": "click", "coordinate": [0.5, 0.8]}）
+        conclusion: 本步骤的结论或总结
+        thought: 模型的思考过程（从 <thinking> 标签中提取）
+        step_index: 步骤索引（从 0 开始）
+        agent_type: 生成此步骤的 Agent 类型（如 "InternalMAIAgent"）
+        model_name: 使用的模型名称（如 "qwen2-vl-7b"）
+        screenshot_bytes: 截图的字节数据（可选，用于序列化）
+        structured_action: 结构化的动作数据（可选，包含额外元数据）
+    """
+    screenshot: Image.Image
+    accessibility_tree: dict[str, Any] | None
+    prediction: str
+    action: dict[str, Any]
+    conclusion: str
+    thought: str
+    step_index: int
+    agent_type: str
+    model_name: str
+    screenshot_bytes: bytes | None = None
+    structured_action: dict[str, Any] | None = None
+@dataclass
+class TrajMemory:
+    """完整任务的轨迹记忆容器
+    存储一个完整任务的所有步骤，提供历史查询和状态管理功能。
+    Attributes:
+        task_goal: 任务目标描述（用户的原始指令）
+        task_id: 任务唯一标识符
+        steps: 步骤列表（按执行顺序）
+    """
+    task_goal: str
+    task_id: str
+    steps: list[TrajStep] = field(default_factory=list)
+    def add_step(self, step: TrajStep) -> None:
+        self.steps.append(step)
+    def get_history_images(self, n: int = -1) -> list[bytes]:
+        images = [step.screenshot_bytes for step in self.steps if step.screenshot_bytes]
+        if n > 0:
+            return images[-n:]
+        return images
+    def get_history_thoughts(self, n: int = -1) -> list[str]:
+        thoughts = [step.thought for step in self.steps if step.thought]
+        if n > 0:
+            return thoughts[-n:]
+        return thoughts
+    def get_history_actions(self, n: int = -1) -> list[dict[str, Any]]:
+        actions = [step.action for step in self.steps]
+        if n > 0:
+            return actions[-n:]
+        return actions
+    def clear(self) -> None:
+        self.steps.clear()
+    def __len__(self) -> int:
+        return len(self.steps)

AutoGLM_GUI/agents/protocols.py CHANGED Viewed

@@ -1,23 +1,27 @@
 from __future__ import annotations
-from typing import TYPE_CHECKING, Any, Protocol
+from typing import Any, Protocol
-if TYPE_CHECKING:
-    from phone_agent.agent import AgentConfig, StepResult
-    from phone_agent.model import ModelConfig
+from AutoGLM_GUI.config import AgentConfig, ModelConfig, StepResult
 class BaseAgent(Protocol):
-    agent_config: "AgentConfig"
-    model_config: "ModelConfig"
+    model_config: ModelConfig
+    agent_config: AgentConfig
     def run(self, task: str) -> str: ...
-    def step(self, task: str | None = None) -> "StepResult": ...
+    def step(self, task: str | None = None) -> StepResult: ...
     def reset(self) -> None: ...
+    def abort(self) -> None: ...
     @property
     def step_count(self) -> int: ...
     @property
     def context(self) -> list[dict[str, Any]]: ...
+    @property
+    def is_running(self) -> bool: ...

AutoGLM_GUI/agents/stream_runner.py ADDED Viewed

@@ -0,0 +1,193 @@
+import queue
+import threading
+import typing
+from contextlib import contextmanager
+from typing import Any, Callable, Iterator, Optional
+from AutoGLM_GUI.agents.events import AgentEvent, AgentEventType
+if typing.TYPE_CHECKING:
+    from AutoGLM_GUI.agents.protocols import BaseAgent
+class AgentStepStreamer:
+    """
+    流式 Agent 执行器（抽取可复用逻辑）.
+    职责：
+    - 管理事件队列
+    - 协调 worker 线程
+    - 转换 StepResult 为事件
+    """
+    def __init__(
+        self,
+        agent: "BaseAgent",
+        task: str,
+    ) -> None:
+        self._agent = agent
+        self._task = task
+        self._event_queue: queue.Queue[Optional[tuple[str, dict[str, Any]]]] = (
+            queue.Queue(maxsize=100)
+        )
+        self._stop_event = threading.Event()
+        self._worker_thread: Optional[threading.Thread] = None
+    def __iter__(self) -> Iterator[AgentEvent]:
+        """返回迭代器."""
+        return self  # type: ignore
+    def __next__(self) -> AgentEvent:
+        """从队列获取下一个事件."""
+        try:
+            if self._worker_thread is None:
+                self._start_worker()
+            item = self._event_queue.get(timeout=0.1)
+            if item is None:
+                raise StopIteration
+            event_type, event_data = item
+            return AgentEvent(type=event_type, data=event_data)
+        except queue.Empty:
+            if self._worker_thread and self._worker_thread.is_alive():
+                return AgentEvent(
+                    type=AgentEventType.STEP.value,
+                    data={
+                        "step": -1,
+                        "thinking": "",
+                        "action": None,
+                        "success": True,
+                        "finished": False,
+                    },
+                )
+            else:
+                raise StopIteration
+        except StopIteration:
+            raise
+        except Exception as e:
+            self._stop_event.set()
+            return AgentEvent(type=AgentEventType.ERROR.value, data={"message": str(e)})
+    def _start_worker(self) -> None:
+        """启动 worker 线程."""
+        def worker():
+            try:
+                # 检查停止事件
+                if self._stop_event.is_set():
+                    return
+                # 注入 thinking 回调
+                # 这是一个 hack，但为了实现 "Zero Agent Change" 目标
+                # 假设 agent 有 _thinking_callback 属性
+                original_callback = getattr(self._agent, "_thinking_callback", None)
+                def on_thinking(chunk: str):
+                    self._event_queue.put(
+                        (AgentEventType.THINKING.value, {"chunk": chunk})
+                    )
+                    if original_callback:
+                        original_callback(chunk)
+                # Monkey-patch thinking callback
+                setattr(self._agent, "_thinking_callback", on_thinking)
+                try:
+                    # 执行 step 循环
+                    # 使用会话级别的标记，而不是 agent.step_count
+                    # 这样每次新对话开始时，第一步都会传递 task
+                    is_first_in_session = True
+                    while not self._stop_event.is_set():
+                        result = self._agent.step(
+                            self._task if is_first_in_session else None
+                        )
+                        is_first_in_session = False
+                        # 发射 step 事件
+                        self._event_queue.put(
+                            (
+                                AgentEventType.STEP.value,
+                                {
+                                    "step": self._agent.step_count,
+                                    "thinking": result.thinking,
+                                    "action": result.action,
+                                    "success": result.success,
+                                    "finished": result.finished,
+                                },
+                            )
+                        )
+                        # 检查是否完成
+                        if result.finished:
+                            # 发射 done 事件
+                            self._event_queue.put(
+                                (
+                                    AgentEventType.DONE.value,
+                                    {
+                                        "message": result.message,
+                                        "steps": self._agent.step_count,
+                                        "success": result.success,
+                                    },
+                                )
+                            )
+                            break
+                        # 检查步数限制
+                        if self._agent.step_count >= self._agent.agent_config.max_steps:
+                            self._event_queue.put(
+                                (
+                                    AgentEventType.DONE.value,
+                                    {
+                                        "message": "Max steps reached",
+                                        "steps": self._agent.step_count,
+                                        "success": result.success,
+                                    },
+                                )
+                            )
+                            break
+                finally:
+                    # 恢复原始回调
+                    setattr(self._agent, "_thinking_callback", original_callback)
+            except Exception as e:
+                # 发射 error 事件
+                self._event_queue.put((AgentEventType.ERROR.value, {"message": str(e)}))
+            finally:
+                # 标记完成
+                self._event_queue.put(None)
+        self._worker_thread = threading.Thread(target=worker, daemon=True)
+        self._worker_thread.start()
+    @contextmanager
+    def stream_context(self) -> Iterator[Callable[[], None]]:
+        """
+        Context manager，自动管理清理.
+        """
+        self._stop_event.clear()
+        try:
+            yield self.abort
+        finally:
+            self._stop_event.set()
+            # 等待 worker 完成
+            if self._worker_thread and self._worker_thread.is_alive():
+                self._worker_thread.join(timeout=5.0)
+            # 清空队列
+            while not self._event_queue.empty():
+                try:
+                    self._event_queue.get_nowait()
+                except queue.Empty:
+                    break
+    def abort(self) -> None:
+        """中止流式执行."""
+        self._stop_event.set()
+        if hasattr(self._agent, "abort"):
+            self._agent.abort()

autoglm-gui 1.4.1__py3-none-any.whl → 1.5.1__py3-none-any.whl

autoglm-gui 1.4.1py3-none-any.whl → 1.5.1py3-none-any.whl