PyPI - autoglm-gui - Versions diffs - 1.3.1__py3-none-any.whl → 1.4.1__py3-none-any.whl - Mend

autoglm-gui 1.3.1py3-none-any.whl → 1.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

AutoGLM_GUI/__main__.py +0 -4
AutoGLM_GUI/adb_plus/qr_pair.py +8 -8
AutoGLM_GUI/agents/__init__.py +20 -0
AutoGLM_GUI/agents/factory.py +160 -0
AutoGLM_GUI/agents/mai_adapter.py +627 -0
AutoGLM_GUI/agents/protocols.py +23 -0
AutoGLM_GUI/api/__init__.py +50 -7
AutoGLM_GUI/api/agents.py +61 -19
AutoGLM_GUI/api/devices.py +12 -18
AutoGLM_GUI/api/dual_model.py +24 -17
AutoGLM_GUI/api/health.py +13 -0
AutoGLM_GUI/api/layered_agent.py +659 -0
AutoGLM_GUI/api/mcp.py +11 -10
AutoGLM_GUI/api/version.py +23 -10
AutoGLM_GUI/api/workflows.py +2 -1
AutoGLM_GUI/config_manager.py +56 -24
AutoGLM_GUI/device_adapter.py +263 -0
AutoGLM_GUI/device_protocol.py +266 -0
AutoGLM_GUI/devices/__init__.py +49 -0
AutoGLM_GUI/devices/adb_device.py +205 -0
AutoGLM_GUI/devices/mock_device.py +183 -0
AutoGLM_GUI/devices/remote_device.py +172 -0
AutoGLM_GUI/dual_model/decision_model.py +4 -4
AutoGLM_GUI/dual_model/protocols.py +3 -3
AutoGLM_GUI/exceptions.py +3 -3
AutoGLM_GUI/mai_ui_adapter/agent_wrapper.py +291 -0
AutoGLM_GUI/metrics.py +13 -20
AutoGLM_GUI/phone_agent_manager.py +219 -134
AutoGLM_GUI/phone_agent_patches.py +2 -1
AutoGLM_GUI/platform_utils.py +5 -2
AutoGLM_GUI/prompts.py +6 -1
AutoGLM_GUI/schemas.py +45 -14
AutoGLM_GUI/scrcpy_stream.py +17 -13
AutoGLM_GUI/server.py +3 -1
AutoGLM_GUI/socketio_server.py +16 -4
AutoGLM_GUI/state.py +10 -30
AutoGLM_GUI/static/assets/{about-Cj6QXqMf.js → about-_XNhzQZX.js} +1 -1
AutoGLM_GUI/static/assets/chat-DwJpiAWf.js +126 -0
AutoGLM_GUI/static/assets/{dialog-CxJlnjzH.js → dialog-B3uW4T8V.js} +3 -3
AutoGLM_GUI/static/assets/index-Cpv2gSF1.css +1 -0
AutoGLM_GUI/static/assets/{index-C_B-Arvf.js → index-Cy8TmmHV.js} +1 -1
AutoGLM_GUI/static/assets/{index-CxJQuE4y.js → index-UYYauTly.js} +6 -6
AutoGLM_GUI/static/assets/{workflows-BTiGCNI0.js → workflows-Du_de-dt.js} +1 -1
AutoGLM_GUI/static/index.html +2 -2
AutoGLM_GUI/types.py +125 -0
{autoglm_gui-1.3.1.dist-info → autoglm_gui-1.4.1.dist-info}/METADATA +147 -65
{autoglm_gui-1.3.1.dist-info → autoglm_gui-1.4.1.dist-info}/RECORD +58 -39
mai_agent/base.py +137 -0
mai_agent/mai_grounding_agent.py +263 -0
mai_agent/mai_naivigation_agent.py +526 -0
mai_agent/prompt.py +148 -0
mai_agent/unified_memory.py +67 -0
mai_agent/utils.py +73 -0
phone_agent/config/prompts.py +6 -1
phone_agent/config/prompts_zh.py +6 -1
AutoGLM_GUI/config.py +0 -23
AutoGLM_GUI/static/assets/chat-BJeomZgh.js +0 -124
AutoGLM_GUI/static/assets/index-Z0uYCPOO.css +0 -1
{autoglm_gui-1.3.1.dist-info → autoglm_gui-1.4.1.dist-info}/WHEEL +0 -0
{autoglm_gui-1.3.1.dist-info → autoglm_gui-1.4.1.dist-info}/entry_points.txt +0 -0
{autoglm_gui-1.3.1.dist-info → autoglm_gui-1.4.1.dist-info}/licenses/LICENSE +0 -0

AutoGLM_GUI/api/mcp.py CHANGED Viewed

@@ -1,11 +1,19 @@
 """MCP (Model Context Protocol) tools for AutoGLM-GUI."""
-from typing import Any, Dict, List
+from typing_extensions import TypedDict
 from fastmcp import FastMCP
 from AutoGLM_GUI.logger import logger
 from AutoGLM_GUI.prompts import MCP_SYSTEM_PROMPT_ZH
+from AutoGLM_GUI.schemas import DeviceResponse
+class ChatResult(TypedDict):
+    result: str
+    steps: int
+    success: bool
 # 创建 MCP 服务器实例
 mcp = FastMCP("AutoGLM-GUI MCP Server")
@@ -15,7 +23,7 @@ MCP_MAX_STEPS = 5
 @mcp.tool()
-def chat(device_id: str, message: str) -> Dict[str, Any]:
+def chat(device_id: str, message: str) -> ChatResult:
     """
     Send a task to the AutoGLM Phone Agent for execution.
@@ -26,13 +34,6 @@ def chat(device_id: str, message: str) -> Dict[str, Any]:
     Args:
         device_id: Device identifier (e.g., "192.168.1.100:5555" or serial)
         message: Natural language task (e.g., "打开微信", "发送消息")
-    Returns:
-        {
-            "result": str,    # Task execution result
-            "steps": int,     # Number of steps taken
-            "success": bool   # Success flag
-        }
     """
     from AutoGLM_GUI.exceptions import DeviceBusyError
     from AutoGLM_GUI.phone_agent_manager import PhoneAgentManager
@@ -84,7 +85,7 @@ def chat(device_id: str, message: str) -> Dict[str, Any]:
 @mcp.tool()
-def list_devices() -> List[Dict[str, Any]]:
+def list_devices() -> list[DeviceResponse]:
     """
     List all connected ADB devices and their agent status.

AutoGLM_GUI/api/version.py CHANGED Viewed

@@ -3,8 +3,9 @@
 import json
 import re
 import time
+import urllib.error
 import urllib.request
-from typing import Any
+from typing_extensions import TypedDict
 from fastapi import APIRouter
@@ -12,13 +13,30 @@ from AutoGLM_GUI.logger import logger
 from AutoGLM_GUI.schemas import VersionCheckResponse
 from AutoGLM_GUI.version import APP_VERSION
+class GitHubRelease(TypedDict, total=False):
+    """GitHub Release API response structure."""
+    tag_name: str
+    html_url: str
+    published_at: str
+class _VersionCache(TypedDict):
+    """Internal cache structure for version checking."""
+    data: VersionCheckResponse | None
+    timestamp: float
+    ttl: int
 router = APIRouter()
 # In-memory cache for version check results
-_version_cache: dict[str, Any] = {
+_version_cache: _VersionCache = {
     "data": None,
     "timestamp": 0,
-    "ttl": 3600,  # 1 hour cache TTL
+    "ttl": 3600,
 }
 # GitHub repository information
@@ -74,13 +92,8 @@ def compare_versions(current: str, latest: str) -> bool:
     return latest_tuple > current_tuple
-def fetch_latest_release() -> dict[str, Any] | None:
-    """
-    Fetch latest release information from GitHub API.
-    Returns:
-        Release data dict with 'tag_name', 'html_url', 'published_at' or None on error
-    """
+def fetch_latest_release() -> GitHubRelease | None:
+    """Fetch latest release information from GitHub API."""
     try:
         # Create request with User-Agent header (required by GitHub API)
         req = urllib.request.Request(

AutoGLM_GUI/api/workflows.py CHANGED Viewed

@@ -17,7 +17,8 @@ def list_workflows() -> WorkflowListResponse:
     """获取所有 workflows."""
     from AutoGLM_GUI.workflow_manager import workflow_manager
-    workflows = workflow_manager.list_workflows()
+    workflow_dicts = workflow_manager.list_workflows()
+    workflows = [WorkflowResponse(**wf) for wf in workflow_dicts]
     return WorkflowListResponse(workflows=workflows)

AutoGLM_GUI/config_manager.py CHANGED Viewed

@@ -54,12 +54,26 @@ class ConfigModel(BaseModel):
     # 双模型配置
     dual_model_enabled: bool = False
-    decision_base_url: str = "https://api-inference.modelscope.cn/v1"
-    decision_model_name: str = "ZhipuAI/GLM-4.7"
+    decision_base_url: str = ""
+    decision_model_name: str = ""
     decision_api_key: str = ""
-    # 思考模式配置
-    thinking_mode: str = "deep"  # "fast" 或 "deep"
+    # Agent 类型配置
+    agent_type: str = "glm"  # Agent type (e.g., "glm", "mai")
+    agent_config_params: dict | None = None  # Agent-specific configuration
+    # Agent 执行配置
+    default_max_steps: int = 100  # 单次任务最大执行步数
+    @field_validator("default_max_steps")
+    @classmethod
+    def validate_default_max_steps(cls, v: int) -> int:
+        """验证 default_max_steps 范围."""
+        if v <= 0:
+            raise ValueError("default_max_steps must be positive")
+        if v > 1000:
+            raise ValueError("default_max_steps must be <= 1000")
+        return v
     @field_validator("base_url")
     @classmethod
@@ -85,14 +99,6 @@ class ConfigModel(BaseModel):
             raise ValueError("decision_base_url must start with http:// or https://")
         return v.rstrip("/")  # 去除尾部斜杠
-    @field_validator("thinking_mode")
-    @classmethod
-    def validate_thinking_mode(cls, v: str) -> str:
-        """验证思考模式."""
-        if v not in ("fast", "deep"):
-            raise ValueError("thinking_mode must be 'fast' or 'deep'")
-        return v
 # ==================== 配置层数据类 ====================
@@ -109,8 +115,11 @@ class ConfigLayer:
     decision_base_url: Optional[str] = None
     decision_model_name: Optional[str] = None
     decision_api_key: Optional[str] = None
-    # 思考模式配置
-    thinking_mode: Optional[str] = None
+    # Agent 类型配置
+    agent_type: Optional[str] = None
+    agent_config_params: Optional[dict] = None
+    # Agent 执行配置
+    default_max_steps: Optional[int] = None
     source: ConfigSource = ConfigSource.DEFAULT
@@ -142,7 +151,9 @@ class ConfigLayer:
                 "decision_base_url": self.decision_base_url,
                 "decision_model_name": self.decision_model_name,
                 "decision_api_key": self.decision_api_key,
-                "thinking_mode": self.thinking_mode,
+                "agent_type": self.agent_type,
+                "agent_config_params": self.agent_config_params,
+                "default_max_steps": self.default_max_steps,
             }.items()
             if v is not None
         }
@@ -202,6 +213,9 @@ class UnifiedConfigManager:
             base_url="",
             model_name="autoglm-phone-9b",
             api_key="EMPTY",
+            agent_type="glm",
+            agent_config_params=None,
+            default_max_steps=100,
             source=ConfigSource.DEFAULT,
         )
@@ -314,7 +328,11 @@ class UnifiedConfigManager:
                 decision_base_url=config_data.get("decision_base_url"),
                 decision_model_name=config_data.get("decision_model_name"),
                 decision_api_key=config_data.get("decision_api_key"),
-                thinking_mode=config_data.get("thinking_mode"),
+                agent_type=config_data.get(
+                    "agent_type", "glm"
+                ),  # 默认 'glm'，兼容旧配置
+                agent_config_params=config_data.get("agent_config_params"),
+                default_max_steps=config_data.get("default_max_steps"),
                 source=ConfigSource.FILE,
             )
             self._effective_config = None  # 清除缓存
@@ -346,7 +364,9 @@ class UnifiedConfigManager:
         decision_base_url: Optional[str] = None,
         decision_model_name: Optional[str] = None,
         decision_api_key: Optional[str] = None,
-        thinking_mode: Optional[str] = None,
+        agent_type: Optional[str] = None,
+        agent_config_params: Optional[dict] = None,
+        default_max_steps: Optional[int] = None,
         merge_mode: bool = True,
     ) -> bool:
         """
@@ -360,7 +380,9 @@ class UnifiedConfigManager:
             decision_base_url: 决策模型 Base URL
             decision_model_name: 决策模型名称
             decision_api_key: 决策模型 API key
-            thinking_mode: 思考模式 (fast/deep)
+            agent_type: Agent 类型（可选，如 "glm", "mai"）
+            agent_config_params: Agent 特定配置参数（可选）
+            default_max_steps: 默认最大执行步数（可选）
             merge_mode: 是否合并现有配置（True: 保留未提供的字段）
         Returns:
@@ -371,7 +393,7 @@ class UnifiedConfigManager:
             self._config_path.parent.mkdir(parents=True, exist_ok=True)
             # 准备新配置
-            new_config = {
+            new_config: dict[str, str | bool | int | dict | None] = {
                 "base_url": base_url,
                 "model_name": model_name,
             }
@@ -386,8 +408,12 @@ class UnifiedConfigManager:
                 new_config["decision_model_name"] = decision_model_name
             if decision_api_key:
                 new_config["decision_api_key"] = decision_api_key
-            if thinking_mode:
-                new_config["thinking_mode"] = thinking_mode
+            if agent_type is not None:
+                new_config["agent_type"] = agent_type
+            if agent_config_params is not None:
+                new_config["agent_config_params"] = agent_config_params
+            if default_max_steps is not None:
+                new_config["default_max_steps"] = default_max_steps
             # 合并模式：保留现有文件中未提供的字段
             if merge_mode and self._config_path.exists():
@@ -402,7 +428,9 @@ class UnifiedConfigManager:
                         "decision_base_url",
                         "decision_model_name",
                         "decision_api_key",
-                        "thinking_mode",
+                        "agent_type",
+                        "agent_config_params",
+                        "default_max_steps",
                     ]
                     for key in preserve_keys:
                         if key not in new_config and key in existing:
@@ -491,7 +519,9 @@ class UnifiedConfigManager:
             "decision_base_url",
             "decision_model_name",
             "decision_api_key",
-            "thinking_mode",
+            "agent_type",
+            "agent_config_params",
+            "default_max_steps",
         ]
         for key in config_keys:
@@ -658,7 +688,9 @@ class UnifiedConfigManager:
             "decision_base_url": config.decision_base_url,
             "decision_model_name": config.decision_model_name,
             "decision_api_key": config.decision_api_key,
-            "thinking_mode": config.thinking_mode,
+            "agent_type": config.agent_type,
+            "agent_config_params": config.agent_config_params,
+            "default_max_steps": config.default_max_steps,
         }

AutoGLM_GUI/device_adapter.py ADDED Viewed

@@ -0,0 +1,263 @@
+"""Device Protocol Adapter for phone_agent integration.
+This module provides an adapter that bridges DeviceProtocol implementations
+to the interface expected by phone_agent's DeviceFactory.
+The adapter allows injecting any DeviceProtocol implementation (ADB, Mock, Remote)
+into phone_agent without modifying the third-party code.
+Example:
+    >>> from AutoGLM_GUI.device_adapter import inject_device_protocol
+    >>> from AutoGLM_GUI.devices import MockDevice, ADBDevice
+    >>>
+    >>> # For testing: inject mock device
+    >>> mock = MockDevice("mock_001", state_machine)
+    >>> inject_device_protocol(lambda _: mock)
+    >>>
+    >>> # For production: inject ADB device
+    >>> devices = {"phone_1": ADBDevice("emulator-5554")}
+    >>> inject_device_protocol(lambda device_id: devices[device_id])
+"""
+from typing import Callable
+import phone_agent.device_factory as device_factory_module
+from AutoGLM_GUI.device_protocol import DeviceProtocol, Screenshot
+class DeviceProtocolAdapter:
+    """
+    Adapter that bridges DeviceProtocol to phone_agent's DeviceFactory interface.
+    This adapter wraps a DeviceProtocol getter function and exposes the same
+    interface as phone_agent's DeviceFactory, allowing seamless injection.
+    The adapter handles:
+    - Routing device operations to the correct DeviceProtocol instance
+    - Converting between DeviceProtocol and DeviceFactory method signatures
+    - Managing device_id parameters (phone_agent passes device_id to each method)
+    """
+    def __init__(
+        self,
+        get_device: Callable[[str | None], DeviceProtocol],
+        default_device_id: str | None = None,
+    ):
+        """
+        Initialize the adapter.
+        Args:
+            get_device: Function that returns a DeviceProtocol given a device_id.
+                       If device_id is None, should return a default device.
+            default_device_id: Default device ID to use when None is passed.
+        """
+        self._get_device = get_device
+        self._default_device_id = default_device_id
+        # For compatibility with code that checks device_type
+        self.device_type = "protocol_adapter"
+    def _device(self, device_id: str | None) -> DeviceProtocol:
+        """Get device for the given ID."""
+        effective_id = device_id or self._default_device_id
+        return self._get_device(effective_id)
+    # === Screenshot ===
+    def get_screenshot(
+        self, device_id: str | None = None, timeout: int = 10
+    ) -> Screenshot:
+        """Get screenshot from device."""
+        return self._device(device_id).get_screenshot(timeout)
+    # === Input Operations ===
+    def tap(
+        self, x: int, y: int, device_id: str | None = None, delay: float | None = None
+    ) -> None:
+        """Tap at coordinates."""
+        self._device(device_id).tap(x, y, delay)
+    def double_tap(
+        self, x: int, y: int, device_id: str | None = None, delay: float | None = None
+    ) -> None:
+        """Double tap at coordinates."""
+        self._device(device_id).double_tap(x, y, delay)
+    def long_press(
+        self,
+        x: int,
+        y: int,
+        duration_ms: int = 3000,
+        device_id: str | None = None,
+        delay: float | None = None,
+    ) -> None:
+        """Long press at coordinates."""
+        self._device(device_id).long_press(x, y, duration_ms, delay)
+    def swipe(
+        self,
+        start_x: int,
+        start_y: int,
+        end_x: int,
+        end_y: int,
+        duration_ms: int | None = None,
+        device_id: str | None = None,
+        delay: float | None = None,
+    ) -> None:
+        """Swipe from start to end."""
+        self._device(device_id).swipe(
+            start_x, start_y, end_x, end_y, duration_ms, delay
+        )
+    def type_text(self, text: str, device_id: str | None = None) -> None:
+        """Type text."""
+        self._device(device_id).type_text(text)
+    def clear_text(self, device_id: str | None = None) -> None:
+        """Clear text."""
+        self._device(device_id).clear_text()
+    # === Navigation ===
+    def back(self, device_id: str | None = None, delay: float | None = None) -> None:
+        """Press back button."""
+        self._device(device_id).back(delay)
+    def home(self, device_id: str | None = None, delay: float | None = None) -> None:
+        """Press home button."""
+        self._device(device_id).home(delay)
+    def launch_app(
+        self, app_name: str, device_id: str | None = None, delay: float | None = None
+    ) -> bool:
+        """Launch an app."""
+        return self._device(device_id).launch_app(app_name, delay)
+    # === State Query ===
+    def get_current_app(self, device_id: str | None = None) -> str:
+        """Get current app name."""
+        return self._device(device_id).get_current_app()
+    # === Keyboard Management ===
+    def detect_and_set_adb_keyboard(self, device_id: str | None = None) -> str:
+        """Detect and set keyboard."""
+        return self._device(device_id).detect_and_set_adb_keyboard()
+    def restore_keyboard(self, ime: str, device_id: str | None = None) -> None:
+        """Restore keyboard."""
+        self._device(device_id).restore_keyboard(ime)
+    # === Device Management ===
+    def list_devices(self) -> list[str]:
+        """
+        List connected devices.
+        Note: This is a simplified implementation. For full device listing,
+        use ADBDeviceManager.list_devices() directly.
+        """
+        # This is called by some parts of phone_agent
+        # Return the default device if available
+        if self._default_device_id:
+            return [self._default_device_id]
+        return []
+    def get_connection_class(self):
+        """Not applicable for protocol adapter."""
+        raise NotImplementedError(
+            "Protocol adapter does not support get_connection_class. "
+            "Use ADBDeviceManager for connection management."
+        )
+# Store original factory for restoration
+_original_factory = None
+def inject_device_protocol(
+    get_device: Callable[[str | None], DeviceProtocol],
+    default_device_id: str | None = None,
+) -> DeviceProtocolAdapter:
+    """
+    Inject a DeviceProtocol implementation into phone_agent.
+    This replaces phone_agent's global _device_factory with an adapter
+    that routes all device operations through the provided DeviceProtocol.
+    Args:
+        get_device: Function that returns a DeviceProtocol given a device_id.
+        default_device_id: Default device ID when None is passed.
+    Returns:
+        The adapter instance (for inspection or further configuration).
+    Example:
+        >>> # Single mock device
+        >>> mock = MockDevice("mock_001", state_machine)
+        >>> inject_device_protocol(lambda _: mock)
+        >>>
+        >>> # Multiple devices
+        >>> devices = {
+        ...     "phone_1": ADBDevice("emulator-5554"),
+        ...     "phone_2": RemoteDevice("phone_2", "http://remote:8080"),
+        ... }
+        >>> inject_device_protocol(lambda did: devices.get(did, devices["phone_1"]))
+    """
+    # TODO： 不应该依赖这种全部变量
+    global _original_factory
+    # Save original factory if not already saved
+    if _original_factory is None:
+        _original_factory = device_factory_module._device_factory
+    # Create and inject adapter
+    adapter = DeviceProtocolAdapter(get_device, default_device_id)
+    device_factory_module._device_factory = adapter
+    return adapter
+def restore_device_factory() -> None:
+    """
+    Restore the original device factory.
+    Call this after testing to restore normal operation.
+    """
+    global _original_factory
+    if _original_factory is not None:
+        device_factory_module._device_factory = _original_factory
+        _original_factory = None
+class DeviceProtocolContext:
+    """
+    Context manager for temporarily injecting a DeviceProtocol.
+    Example:
+        >>> with DeviceProtocolContext(lambda _: mock_device):
+        ...     agent.run("test instruction")
+        >>> # Original factory is automatically restored
+    """
+    def __init__(
+        self,
+        get_device: Callable[[str | None], DeviceProtocol],
+        default_device_id: str | None = None,
+    ):
+        """
+        Initialize context.
+        Args:
+            get_device: Function that returns a DeviceProtocol given a device_id.
+            default_device_id: Default device ID when None is passed.
+        """
+        self._get_device = get_device
+        self._default_device_id = default_device_id
+        self._original_factory = None
+    def __enter__(self) -> DeviceProtocolAdapter:
+        """Enter context and inject adapter."""
+        self._original_factory = device_factory_module._device_factory
+        return inject_device_protocol(self._get_device, self._default_device_id)
+    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
+        """Exit context and restore original factory."""
+        device_factory_module._device_factory = self._original_factory
+        return None

autoglm-gui 1.3.1__py3-none-any.whl → 1.4.1__py3-none-any.whl

autoglm-gui 1.3.1py3-none-any.whl → 1.4.1py3-none-any.whl