PyPI - autoglm-gui - Versions diffs - 1.3.1__py3-none-any.whl → 1.4.1__py3-none-any.whl - Mend

autoglm-gui 1.3.1py3-none-any.whl → 1.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

AutoGLM_GUI/__main__.py +0 -4
AutoGLM_GUI/adb_plus/qr_pair.py +8 -8
AutoGLM_GUI/agents/__init__.py +20 -0
AutoGLM_GUI/agents/factory.py +160 -0
AutoGLM_GUI/agents/mai_adapter.py +627 -0
AutoGLM_GUI/agents/protocols.py +23 -0
AutoGLM_GUI/api/__init__.py +50 -7
AutoGLM_GUI/api/agents.py +61 -19
AutoGLM_GUI/api/devices.py +12 -18
AutoGLM_GUI/api/dual_model.py +24 -17
AutoGLM_GUI/api/health.py +13 -0
AutoGLM_GUI/api/layered_agent.py +659 -0
AutoGLM_GUI/api/mcp.py +11 -10
AutoGLM_GUI/api/version.py +23 -10
AutoGLM_GUI/api/workflows.py +2 -1
AutoGLM_GUI/config_manager.py +56 -24
AutoGLM_GUI/device_adapter.py +263 -0
AutoGLM_GUI/device_protocol.py +266 -0
AutoGLM_GUI/devices/__init__.py +49 -0
AutoGLM_GUI/devices/adb_device.py +205 -0
AutoGLM_GUI/devices/mock_device.py +183 -0
AutoGLM_GUI/devices/remote_device.py +172 -0
AutoGLM_GUI/dual_model/decision_model.py +4 -4
AutoGLM_GUI/dual_model/protocols.py +3 -3
AutoGLM_GUI/exceptions.py +3 -3
AutoGLM_GUI/mai_ui_adapter/agent_wrapper.py +291 -0
AutoGLM_GUI/metrics.py +13 -20
AutoGLM_GUI/phone_agent_manager.py +219 -134
AutoGLM_GUI/phone_agent_patches.py +2 -1
AutoGLM_GUI/platform_utils.py +5 -2
AutoGLM_GUI/prompts.py +6 -1
AutoGLM_GUI/schemas.py +45 -14
AutoGLM_GUI/scrcpy_stream.py +17 -13
AutoGLM_GUI/server.py +3 -1
AutoGLM_GUI/socketio_server.py +16 -4
AutoGLM_GUI/state.py +10 -30
AutoGLM_GUI/static/assets/{about-Cj6QXqMf.js → about-_XNhzQZX.js} +1 -1
AutoGLM_GUI/static/assets/chat-DwJpiAWf.js +126 -0
AutoGLM_GUI/static/assets/{dialog-CxJlnjzH.js → dialog-B3uW4T8V.js} +3 -3
AutoGLM_GUI/static/assets/index-Cpv2gSF1.css +1 -0
AutoGLM_GUI/static/assets/{index-C_B-Arvf.js → index-Cy8TmmHV.js} +1 -1
AutoGLM_GUI/static/assets/{index-CxJQuE4y.js → index-UYYauTly.js} +6 -6
AutoGLM_GUI/static/assets/{workflows-BTiGCNI0.js → workflows-Du_de-dt.js} +1 -1
AutoGLM_GUI/static/index.html +2 -2
AutoGLM_GUI/types.py +125 -0
{autoglm_gui-1.3.1.dist-info → autoglm_gui-1.4.1.dist-info}/METADATA +147 -65
{autoglm_gui-1.3.1.dist-info → autoglm_gui-1.4.1.dist-info}/RECORD +58 -39
mai_agent/base.py +137 -0
mai_agent/mai_grounding_agent.py +263 -0
mai_agent/mai_naivigation_agent.py +526 -0
mai_agent/prompt.py +148 -0
mai_agent/unified_memory.py +67 -0
mai_agent/utils.py +73 -0
phone_agent/config/prompts.py +6 -1
phone_agent/config/prompts_zh.py +6 -1
AutoGLM_GUI/config.py +0 -23
AutoGLM_GUI/static/assets/chat-BJeomZgh.js +0 -124
AutoGLM_GUI/static/assets/index-Z0uYCPOO.css +0 -1
{autoglm_gui-1.3.1.dist-info → autoglm_gui-1.4.1.dist-info}/WHEEL +0 -0
{autoglm_gui-1.3.1.dist-info → autoglm_gui-1.4.1.dist-info}/entry_points.txt +0 -0
{autoglm_gui-1.3.1.dist-info → autoglm_gui-1.4.1.dist-info}/licenses/LICENSE +0 -0

AutoGLM_GUI/devices/remote_device.py ADDED Viewed

@@ -0,0 +1,172 @@
+"""Remote Device implementation using HTTP.
+This module provides a RemoteDevice that connects to a Device Agent
+via HTTP, allowing remote control of devices.
+"""
+import httpx
+from AutoGLM_GUI.device_protocol import DeviceInfo, Screenshot
+class RemoteDevice:
+    """
+    Remote device implementation using HTTP.
+    Connects to a Device Agent server that handles actual device operations.
+    The server decides the implementation (ADB, Accessibility, Mock, etc.).
+    Example:
+        >>> device = RemoteDevice("phone_001", "http://localhost:8001")
+        >>> screenshot = device.get_screenshot()
+        >>> device.tap(100, 200)
+    """
+    def __init__(self, device_id: str, base_url: str, timeout: float = 30.0):
+        self._device_id = device_id
+        self._base_url = base_url.rstrip("/")
+        self._client = httpx.Client(timeout=timeout)
+    @property
+    def device_id(self) -> str:
+        return self._device_id
+    def _post(self, endpoint: str, json: dict | None = None) -> dict:
+        """POST request helper."""
+        url = f"{self._base_url}/device/{self._device_id}{endpoint}"
+        resp = self._client.post(url, json=json or {})
+        resp.raise_for_status()
+        return resp.json()
+    def _get(self, endpoint: str) -> dict:
+        """GET request helper."""
+        url = f"{self._base_url}/device/{self._device_id}{endpoint}"
+        resp = self._client.get(url)
+        resp.raise_for_status()
+        return resp.json()
+    def get_screenshot(self, timeout: int = 10) -> Screenshot:
+        data = self._post("/screenshot", {"timeout": timeout})
+        return Screenshot(
+            base64_data=data["base64_data"],
+            width=data["width"],
+            height=data["height"],
+            is_sensitive=data.get("is_sensitive", False),
+        )
+    def tap(self, x: int, y: int, delay: float | None = None) -> None:
+        self._post("/tap", {"x": x, "y": y, "delay": delay})
+    def double_tap(self, x: int, y: int, delay: float | None = None) -> None:
+        self._post("/double_tap", {"x": x, "y": y, "delay": delay})
+    def long_press(
+        self, x: int, y: int, duration_ms: int = 3000, delay: float | None = None
+    ) -> None:
+        self._post(
+            "/long_press", {"x": x, "y": y, "duration_ms": duration_ms, "delay": delay}
+        )
+    def swipe(
+        self,
+        start_x: int,
+        start_y: int,
+        end_x: int,
+        end_y: int,
+        duration_ms: int | None = None,
+        delay: float | None = None,
+    ) -> None:
+        self._post(
+            "/swipe",
+            {
+                "start_x": start_x,
+                "start_y": start_y,
+                "end_x": end_x,
+                "end_y": end_y,
+                "duration_ms": duration_ms,
+                "delay": delay,
+            },
+        )
+    def type_text(self, text: str) -> None:
+        self._post("/type_text", {"text": text})
+    def clear_text(self) -> None:
+        self._post("/clear_text")
+    def back(self, delay: float | None = None) -> None:
+        self._post("/back", {"delay": delay})
+    def home(self, delay: float | None = None) -> None:
+        self._post("/home", {"delay": delay})
+    def launch_app(self, app_name: str, delay: float | None = None) -> bool:
+        data = self._post("/launch_app", {"app_name": app_name, "delay": delay})
+        return data.get("success", True)
+    def get_current_app(self) -> str:
+        data = self._get("/current_app")
+        return data["app_name"]
+    def detect_and_set_adb_keyboard(self) -> str:
+        data = self._post("/detect_keyboard")
+        return data.get("original_ime", "")
+    def restore_keyboard(self, ime: str) -> None:
+        self._post("/restore_keyboard", {"ime": ime})
+    def close(self) -> None:
+        """Close the HTTP client."""
+        self._client.close()
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+class RemoteDeviceManager:
+    """
+    Remote device manager using HTTP.
+    Manages connections to a Device Agent server.
+    """
+    def __init__(self, base_url: str, timeout: float = 30.0):
+        self._base_url = base_url.rstrip("/")
+        self._timeout = timeout
+        self._client = httpx.Client(timeout=timeout)
+        self._devices: dict[str, RemoteDevice] = {}
+    def list_devices(self) -> list[DeviceInfo]:
+        resp = self._client.get(f"{self._base_url}/devices")
+        resp.raise_for_status()
+        return [DeviceInfo(**d) for d in resp.json()]
+    def get_device(self, device_id: str) -> RemoteDevice:
+        if device_id not in self._devices:
+            self._devices[device_id] = RemoteDevice(
+                device_id, self._base_url, self._timeout
+            )
+        return self._devices[device_id]
+    def connect(self, address: str, timeout: int = 10) -> tuple[bool, str]:
+        resp = self._client.post(
+            f"{self._base_url}/connect", json={"address": address, "timeout": timeout}
+        )
+        data = resp.json()
+        return data.get("success", False), data.get("message", "")
+    def disconnect(self, device_id: str) -> tuple[bool, str]:
+        self._devices.pop(device_id, None)
+        resp = self._client.post(
+            f"{self._base_url}/disconnect", json={"device_id": device_id}
+        )
+        data = resp.json()
+        return data.get("success", True), data.get("message", "Disconnected")
+    def close(self) -> None:
+        for device in self._devices.values():
+            device.close()
+        self._client.close()

AutoGLM_GUI/dual_model/decision_model.py CHANGED Viewed

@@ -49,8 +49,8 @@ class ActionStep:
     need_generate: bool = False
     direction: Optional[str] = None
-    def to_dict(self) -> dict:
-        result = {"action": self.action, "target": self.target}
+    def to_dict(self) -> dict[str, str | bool]:
+        result: dict[str, str | bool] = {"action": self.action, "target": self.target}
         if self.content:
             result["content"] = self.content
         if self.need_generate:
@@ -127,7 +127,7 @@ class DecisionModel:
         self.client = OpenAI(
             base_url=config.base_url,
             api_key=config.api_key,
-        )
+        )  # type: ignore[call-arg]
         self.model_name = config.model_name
         self.conversation_history: list[dict] = []
         self.current_task: str = ""
@@ -159,7 +159,7 @@ class DecisionModel:
         try:
             response = self.client.chat.completions.create(
                 model=self.model_name,
-                messages=messages,
+                messages=messages,  # type: ignore[arg-type]
                 max_tokens=self.config.max_tokens,
                 temperature=self.config.temperature,
                 stream=True,

AutoGLM_GUI/dual_model/protocols.py CHANGED Viewed

@@ -21,9 +21,9 @@ class ThinkingMode(str, Enum):
 class DecisionModelConfig(BaseModel):
     """决策大模型配置"""
-    base_url: str = "https://api-inference.modelscope.cn/v1"
+    base_url: str
     api_key: str = ""
-    model_name: str = "ZhipuAI/GLM-4.7"
+    model_name: str
     max_tokens: int = 4096
     temperature: float = 0.7
     thinking_mode: ThinkingMode = ThinkingMode.DEEP
@@ -33,7 +33,7 @@ class DualModelConfig(BaseModel):
     """双模型协作配置"""
     enabled: bool = False
-    decision_model: DecisionModelConfig = DecisionModelConfig()
+    decision_model: Optional[DecisionModelConfig] = None
 class ModelRole(str, Enum):

AutoGLM_GUI/exceptions.py CHANGED Viewed

@@ -79,9 +79,9 @@ class AgentInitializationError(Exception):
     How to fix:
         1. Check configuration:
             >>> from AutoGLM_GUI.config_manager import config_manager
-            >>> config = config_manager.get_effective_config()
-            >>> print(f"base_url: {config.base_url}")
-            >>> print(f"model_name: {config.model_name}")
+            >>> effective_config = config_manager.get_effective_config()
+            >>> print(f"base_url: {effective_config.base_url}")
+            >>> print(f"model_name: {effective_config.model_name}")
         2. Set configuration:
             >>> via API: POST /api/config {"base_url": "...", "model_name": "...", "api_key": "..."}

AutoGLM_GUI/mai_ui_adapter/agent_wrapper.py ADDED Viewed

@@ -0,0 +1,291 @@
+"""MAI-UI PhoneAgent wrapper for compatibility with AutoGLM-GUI interface."""
+from dataclasses import dataclass
+from typing import Any, Callable, Optional
+from phone_agent.agent import AgentConfig, StepResult
+from phone_agent.actions.handler import ActionHandler
+from phone_agent.model import ModelConfig
+from AutoGLM_GUI.logger import logger
+from AutoGLM_GUI.mai_ui.mai_navigation_agent import MAIUINaivigationAgent  # type: ignore[import-not-found]
+from AutoGLM_GUI.mai_ui_adapter.action_adapter import MAIUIActionAdapter  # type: ignore[import-not-found]
+@dataclass
+class MAIUIConfig:
+    """MAI-UI specific configuration."""
+    history_n: int = 3
+    temperature: float = 0.0
+    top_k: int = -1
+    top_p: float = 1.0
+    max_tokens: int = 2048
+class MAIUIPhoneAgent:
+    """
+    MAI-UI Agent wrapper that implements the PhoneAgent interface.
+    This wrapper allows MAI-UI agents to be used transparently in place of
+    the standard PhoneAgent, providing compatibility with the existing
+    PhoneAgentManager and API infrastructure.
+    Usage:
+        agent = MAIUIPhoneAgent(
+            model_config=model_config,
+            agent_config=agent_config,
+        )
+        result = agent.run("Open WeChat")
+    """
+    def __init__(
+        self,
+        model_config: ModelConfig,
+        agent_config: AgentConfig,
+        mai_config: Optional[MAIUIConfig] = None,
+        takeover_callback: Optional[Callable[[str], None]] = None,
+    ):
+        """
+        Initialize MAI-UI PhoneAgent wrapper.
+        Args:
+            model_config: Model configuration (base_url, api_key, model_name).
+            agent_config: Agent configuration (device_id, max_steps, etc.).
+            mai_config: MAI-UI specific configuration.
+            takeover_callback: Callback for takeover requests.
+        """
+        self.model_config = model_config
+        self.agent_config = agent_config
+        self.mai_config = mai_config or MAIUIConfig()
+        # Create MAI-UI navigation agent
+        self._mai_agent = MAIUINaivigationAgent(
+            llm_base_url=model_config.base_url,
+            model_name=model_config.model_name,
+            api_key=model_config.api_key,
+            runtime_conf={
+                "history_n": self.mai_config.history_n,
+                "temperature": self.mai_config.temperature,
+                "top_k": self.mai_config.top_k,
+                "top_p": self.mai_config.top_p,
+                "max_tokens": self.mai_config.max_tokens,
+            },
+        )
+        # Action adapter and handler
+        self._action_adapter = MAIUIActionAdapter()
+        self.action_handler = ActionHandler(
+            device_id=agent_config.device_id,
+            takeover_callback=takeover_callback,
+        )
+        # PhoneAgent-compatible state
+        self._context: list[dict[str, Any]] = []
+        self._step_count = 0
+        self._current_task: str = ""
+        # For model_client compatibility (used by streaming patches)
+        self.model_client = _DummyModelClient()
+        # Debug: Print model configuration for troubleshooting
+        logger.info("=" * 60)
+        logger.info("[MAI-UI Agent] Initialization")
+        logger.info(f"  Device ID: {agent_config.device_id}")
+        logger.info(f"  Base URL:  {model_config.base_url}")
+        logger.info(f"  Model:     {model_config.model_name}")
+        logger.info("=" * 60)
+    def run(self, task: str) -> str:
+        """
+        Execute a complete task.
+        Args:
+            task: Natural language task description.
+        Returns:
+            Final message from the agent.
+        """
+        self.reset()
+        self._current_task = task
+        # First step
+        result = self._execute_step(task, is_first=True)
+        if result.finished:
+            return result.message or "Task completed"
+        # Continue until finished or max steps reached
+        while self._step_count < self.agent_config.max_steps:
+            result = self._execute_step(is_first=False)
+            if result.finished:
+                return result.message or "Task completed"
+        return "Max steps reached"
+    def step(self, task: Optional[str] = None) -> StepResult:
+        """
+        Execute a single step.
+        Args:
+            task: Task description (required for first step).
+        Returns:
+            StepResult with step details.
+        """
+        is_first = len(self._context) == 0
+        if is_first:
+            if not task:
+                raise ValueError("Task is required for the first step")
+            self._current_task = task
+        return self._execute_step(task, is_first)
+    def _execute_step(
+        self, user_prompt: Optional[str] = None, is_first: bool = False
+    ) -> StepResult:
+        """Execute a single step of the agent loop."""
+        from phone_agent.device_factory import get_device_factory
+        from PIL import Image
+        from io import BytesIO
+        self._step_count += 1
+        logger.info(f"[MAI-UI] Executing step {self._step_count}")
+        # Get screenshot
+        device_factory = get_device_factory()
+        screenshot = device_factory.get_screenshot(self.agent_config.device_id)
+        # Convert base64 to PIL Image
+        import base64
+        image_bytes = base64.b64decode(screenshot.base64_data)
+        pil_image = Image.open(BytesIO(image_bytes))
+        # Build observation
+        obs = {
+            "screenshot": pil_image,
+            "accessibility_tree": None,
+        }
+        # Get instruction
+        instruction = user_prompt or self._current_task
+        # Call MAI-UI predict
+        try:
+            raw_response, action_json = self._mai_agent.predict(
+                instruction=instruction,
+                obs=obs,
+            )
+        except Exception as e:
+            logger.error(f"[MAI-UI] Predict failed: {e}")
+            return StepResult(
+                success=False,
+                finished=True,
+                action=None,
+                thinking="",
+                message=f"Prediction failed: {e}",
+            )
+        # Check for error
+        if action_json.get("action") is None:
+            logger.error("[MAI-UI] Invalid action returned")
+            return StepResult(
+                success=False,
+                finished=True,
+                action=None,
+                thinking="",
+                message="Invalid action from model",
+            )
+        # Get thinking from trajectory
+        thinking = ""
+        if self._mai_agent.traj_memory.steps:
+            last_step = self._mai_agent.traj_memory.steps[-1]
+            thinking = last_step.thought or ""
+        # Convert action to AutoGLM-GUI format
+        converted_action = self._action_adapter.convert(action_json)
+        logger.debug(f"[MAI-UI] Converted action: {converted_action}")
+        # Check if finished (terminate action)
+        if converted_action.get("_metadata") == "finish":
+            return StepResult(
+                success=True,
+                finished=True,
+                action=converted_action,
+                thinking=thinking,
+                message=converted_action.get("message", "Task completed"),
+            )
+        # Execute action
+        try:
+            result = self.action_handler.execute(
+                converted_action,
+                screenshot.width,
+                screenshot.height,
+            )
+        except Exception as e:
+            logger.error(f"[MAI-UI] Action execution failed: {e}")
+            return StepResult(
+                success=False,
+                finished=False,
+                action=converted_action,
+                thinking=thinking,
+                message=f"Action failed: {e}",
+            )
+        # Update context for compatibility
+        self._context.append(
+            {
+                "step": self._step_count,
+                "action": action_json,
+                "converted_action": converted_action,
+                "result": result.success,
+                "thinking": thinking,
+            }
+        )
+        return StepResult(
+            success=result.success,
+            finished=result.should_finish,
+            action=converted_action,
+            thinking=thinking,
+            message=result.message,
+        )
+    def reset(self) -> None:
+        """Reset agent state for a new task."""
+        self._context = []
+        self._step_count = 0
+        self._current_task = ""
+        self._mai_agent.reset()
+        logger.debug("[MAI-UI] Agent reset")
+    @property
+    def step_count(self) -> int:
+        """Get current step count."""
+        return self._step_count
+    @property
+    def context(self) -> list[dict[str, Any]]:
+        """Get conversation context (for compatibility)."""
+        return self._context.copy()
+class _DummyModelClient:
+    """
+    Dummy model client for compatibility with streaming patches.
+    The actual model calls are handled by MAI-UI agent internally.
+    This exists to satisfy code that expects model_client attribute.
+    """
+    def request(self, messages: list, **kwargs) -> Any:
+        """Dummy request method - should not be called directly."""
+        raise NotImplementedError(
+            "MAIUIPhoneAgent handles model calls internally. "
+            "Do not call model_client.request() directly."
+        )

AutoGLM_GUI/metrics.py CHANGED Viewed

@@ -87,14 +87,12 @@ class AutoGLMMetricsCollector(Collector):
         busy_count = 0
         with manager._manager_lock:
-            # Get snapshots (shallow copy to minimize lock time)
+            # Get snapshot (shallow copy to minimize lock time)
             metadata_snapshot = dict(manager._metadata)
-            states_snapshot = dict(manager._states)
-        # Iterate over _states (not _metadata) to capture failed agents
-        for device_id, state in states_snapshot.items():
-            # Get metadata if exists (will be None for failed initialization)
-            metadata = metadata_snapshot.get(device_id)
+        # Iterate over _metadata (state is stored in AgentMetadata.state)
+        for device_id, metadata in metadata_snapshot.items():
+            state = metadata.state
             # Get serial from DeviceManager
             with device_manager._devices_lock:
@@ -113,20 +111,15 @@ class AutoGLMMetricsCollector(Collector):
             if state == AgentState.BUSY:
                 busy_count += 1
-            # Timestamps (0 if metadata doesn't exist, e.g., failed init)
-            if metadata:
-                last_used_gauge.add_metric(
-                    [device_id, serial],
-                    metadata.last_used,
-                )
-                created_gauge.add_metric(
-                    [device_id, serial],
-                    metadata.created_at,
-                )
-            else:
-                # Failed initialization: report 0 timestamps
-                last_used_gauge.add_metric([device_id, serial], 0)
-                created_gauge.add_metric([device_id, serial], 0)
+            # Timestamps from metadata
+            last_used_gauge.add_metric(
+                [device_id, serial],
+                metadata.last_used,
+            )
+            created_gauge.add_metric(
+                [device_id, serial],
+                metadata.created_at,
+            )
         metrics.extend([agents_gauge, last_used_gauge, created_gauge])

autoglm-gui 1.3.1__py3-none-any.whl → 1.4.1__py3-none-any.whl

autoglm-gui 1.3.1py3-none-any.whl → 1.4.1py3-none-any.whl