PyPI - autoglm-gui - Versions diffs - 1.5.0__py3-none-any.whl → 1.5.1__py3-none-any.whl - Mend

autoglm-gui 1.5.0py3-none-any.whl → 1.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

AutoGLM_GUI/agents/glm/agent.py +6 -1
AutoGLM_GUI/agents/mai/agent.py +3 -0
AutoGLM_GUI/agents/stream_runner.py +7 -2
AutoGLM_GUI/api/agents.py +26 -1
AutoGLM_GUI/api/history.py +27 -1
AutoGLM_GUI/models/history.py +45 -1
AutoGLM_GUI/scheduler_manager.py +52 -6
AutoGLM_GUI/schemas.py +12 -0
AutoGLM_GUI/static/assets/{about-BQm96DAl.js → about-CfwX1Cmc.js} +1 -1
AutoGLM_GUI/static/assets/{alert-dialog-B42XxGPR.js → alert-dialog-CtGlN2IJ.js} +1 -1
AutoGLM_GUI/static/assets/chat-BYa-foUI.js +129 -0
AutoGLM_GUI/static/assets/{circle-alert-D4rSJh37.js → circle-alert-t08bEMPO.js} +1 -1
AutoGLM_GUI/static/assets/{dialog-DZ78cEcj.js → dialog-FNwZJFwk.js} +1 -1
AutoGLM_GUI/static/assets/eye-D0UPWCWC.js +1 -0
AutoGLM_GUI/static/assets/history-CRo95B7i.js +1 -0
AutoGLM_GUI/static/assets/{index-CmZSnDqc.js → index-BaLMSqd3.js} +1 -1
AutoGLM_GUI/static/assets/{index-CssG-3TH.js → index-CTHbFvKl.js} +5 -5
AutoGLM_GUI/static/assets/index-CV7jGxGm.css +1 -0
AutoGLM_GUI/static/assets/{label-BCUzE_nm.js → label-DJFevVmr.js} +1 -1
AutoGLM_GUI/static/assets/{logs-eoFxn5of.js → logs-RW09DyYY.js} +1 -1
AutoGLM_GUI/static/assets/{popover-DLsuV5Sx.js → popover--JTJrE5v.js} +1 -1
AutoGLM_GUI/static/assets/{scheduled-tasks-MyqGJvy_.js → scheduled-tasks-DTRKsQXF.js} +1 -1
AutoGLM_GUI/static/assets/{square-pen-zGWYrdfj.js → square-pen-CPK_K680.js} +1 -1
AutoGLM_GUI/static/assets/{textarea-BX6y7uM5.js → textarea-PRmVnWq5.js} +1 -1
AutoGLM_GUI/static/assets/{workflows-CYFs6ssC.js → workflows-CdcsAoaT.js} +1 -1
AutoGLM_GUI/static/index.html +2 -2
{autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.1.dist-info}/METADATA +49 -7
{autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.1.dist-info}/RECORD +31 -70
AutoGLM_GUI/device_adapter.py +0 -263
AutoGLM_GUI/static/assets/chat-C0L2gQYG.js +0 -129
AutoGLM_GUI/static/assets/history-DFBv7TGc.js +0 -1
AutoGLM_GUI/static/assets/index-Bzyv2yQ2.css +0 -1
mai_agent/base.py +0 -137
mai_agent/mai_grounding_agent.py +0 -263
mai_agent/mai_naivigation_agent.py +0 -526
mai_agent/prompt.py +0 -148
mai_agent/unified_memory.py +0 -67
mai_agent/utils.py +0 -73
phone_agent/__init__.py +0 -12
phone_agent/actions/__init__.py +0 -5
phone_agent/actions/handler.py +0 -400
phone_agent/actions/handler_ios.py +0 -278
phone_agent/adb/__init__.py +0 -51
phone_agent/adb/connection.py +0 -358
phone_agent/adb/device.py +0 -253
phone_agent/adb/input.py +0 -108
phone_agent/adb/screenshot.py +0 -108
phone_agent/agent.py +0 -253
phone_agent/agent_ios.py +0 -277
phone_agent/config/__init__.py +0 -53
phone_agent/config/apps.py +0 -227
phone_agent/config/apps_harmonyos.py +0 -256
phone_agent/config/apps_ios.py +0 -339
phone_agent/config/i18n.py +0 -81
phone_agent/config/prompts.py +0 -80
phone_agent/config/prompts_en.py +0 -79
phone_agent/config/prompts_zh.py +0 -82
phone_agent/config/timing.py +0 -167
phone_agent/device_factory.py +0 -166
phone_agent/hdc/__init__.py +0 -53
phone_agent/hdc/connection.py +0 -384
phone_agent/hdc/device.py +0 -269
phone_agent/hdc/input.py +0 -145
phone_agent/hdc/screenshot.py +0 -127
phone_agent/model/__init__.py +0 -5
phone_agent/model/client.py +0 -290
phone_agent/xctest/__init__.py +0 -47
phone_agent/xctest/connection.py +0 -379
phone_agent/xctest/device.py +0 -472
phone_agent/xctest/input.py +0 -311
phone_agent/xctest/screenshot.py +0 -226
{autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.1.dist-info}/WHEEL +0 -0
{autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.1.dist-info}/entry_points.txt +0 -0
{autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.1.dist-info}/licenses/LICENSE +0 -0

mai_agent/unified_memory.py DELETED Viewed

@@ -1,67 +0,0 @@
-# Copyright (c) 2025, Alibaba Cloud and its affiliates;
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Unified memory structures for trajectory tracking."""
-from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional
-from PIL import Image
-@dataclass
-class TrajStep:
-    """
-    Represents a single step in an agent's trajectory.
-    Attributes:
-        screenshot: PIL Image of the screen at this step.
-        accessibility_tree: Accessibility tree data for the screen.
-        prediction: Raw model prediction/response.
-        action: Parsed action dictionary.
-        conclusion: Conclusion or summary of the step.
-        thought: Model's reasoning/thinking process.
-        step_index: Index of this step in the trajectory.
-        agent_type: Type of agent that produced this step.
-        model_name: Name of the model used.
-        screenshot_bytes: Original screenshot as bytes (for compatibility).
-        structured_action: Structured action with metadata.
-    """
-    screenshot: Image.Image
-    accessibility_tree: Optional[Dict[str, Any]]
-    prediction: str
-    action: Dict[str, Any]
-    conclusion: str
-    thought: str
-    step_index: int
-    agent_type: str
-    model_name: str
-    screenshot_bytes: Optional[bytes] = None
-    structured_action: Optional[Dict[str, Any]] = None
-@dataclass
-class TrajMemory:
-    """
-    Container for a complete trajectory of agent steps.
-    Attributes:
-        task_goal: The goal/instruction for this trajectory.
-        task_id: Unique identifier for the task.
-        steps: List of trajectory steps.
-    """
-    task_goal: str
-    task_id: str
-    steps: List[TrajStep] = field(default_factory=list)

mai_agent/utils.py DELETED Viewed

@@ -1,73 +0,0 @@
-# Copyright (c) 2025, Alibaba Cloud and its affiliates;
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Utility functions for image processing and conversion."""
-import base64
-from io import BytesIO
-from typing import Union, Optional, Tuple, Dict, Any
-from PIL import Image
-from PIL import ImageDraw
-def safe_pil_to_bytes(image: Union[Image.Image, bytes]) -> bytes:
-    if isinstance(image, Image.Image):
-        img_byte_arr = BytesIO()
-        image.save(img_byte_arr, format="PNG")
-        return img_byte_arr.getvalue()
-    elif isinstance(image, bytes):
-        return image
-    else:
-        raise TypeError(f"Expected PIL Image or bytes, got {type(image)}")
-def pil_to_base64(image: Image.Image) -> str:
-    buffer = BytesIO()
-    image.save(buffer, format="PNG")
-    return base64.b64encode(buffer.getvalue()).decode("utf-8")
-def save_screenshot(screenshot: Image.Image, path: str) -> None:
-    screenshot.save(path)
-    print(f"Screenshot saved in {path}")
-def extract_click_coordinates(action: Dict[str, Any]) -> Tuple[float, float]:
-    x = action.get("coordinate")[0]
-    y = action.get("coordinate")[1]
-    action_corr = (x, y)
-    return action_corr
-# Function to draw points on an image
-def draw_clicks_on_image(
-    image_path: str,
-    click_coords: Tuple[float, float],
-    output_path: Optional[str] = None,
-) -> Image.Image:
-    image = Image.open(image_path)
-    draw = ImageDraw.Draw(image)
-    # Draw each click coordinate as a red circle
-    (x, y) = click_coords
-    radius = 20
-    if x and y:  # if get the coordinate, draw a circle
-        draw.ellipse(
-            (x - radius, y - radius, x + radius, y + radius), fill="red", outline="red"
-        )
-    # Save the modified image
-    if output_path:
-        save_screenshot(image, output_path)
-    return image

phone_agent/__init__.py DELETED Viewed

@@ -1,12 +0,0 @@
-"""
-Phone Agent - An AI-powered phone automation framework.
-This package provides tools for automating Android and iOS phone interactions
-using AI models for visual understanding and decision making.
-"""
-from phone_agent.agent import PhoneAgent
-from phone_agent.agent_ios import IOSPhoneAgent
-__version__ = "0.1.0"
-__all__ = ["PhoneAgent", "IOSPhoneAgent"]

phone_agent/actions/__init__.py DELETED Viewed

@@ -1,5 +0,0 @@
-"""Action handling module for Phone Agent."""
-from phone_agent.actions.handler import ActionHandler, ActionResult
-__all__ = ["ActionHandler", "ActionResult"]

phone_agent/actions/handler.py DELETED Viewed

@@ -1,400 +0,0 @@
-"""Action handler for processing AI model outputs."""
-import ast
-import subprocess
-import time
-from dataclasses import dataclass
-from typing import Any, Callable
-from phone_agent.config.timing import TIMING_CONFIG
-from phone_agent.device_factory import get_device_factory
-@dataclass
-class ActionResult:
-    """Result of an action execution."""
-    success: bool
-    should_finish: bool
-    message: str | None = None
-    requires_confirmation: bool = False
-class ActionHandler:
-    """
-    Handles execution of actions from AI model output.
-    Args:
-        device_id: Optional ADB device ID for multi-device setups.
-        confirmation_callback: Optional callback for sensitive action confirmation.
-            Should return True to proceed, False to cancel.
-        takeover_callback: Optional callback for takeover requests (login, captcha).
-    """
-    def __init__(
-        self,
-        device_id: str | None = None,
-        confirmation_callback: Callable[[str], bool] | None = None,
-        takeover_callback: Callable[[str], None] | None = None,
-    ):
-        self.device_id = device_id
-        self.confirmation_callback = confirmation_callback or self._default_confirmation
-        self.takeover_callback = takeover_callback or self._default_takeover
-    def execute(
-        self, action: dict[str, Any], screen_width: int, screen_height: int
-    ) -> ActionResult:
-        """
-        Execute an action from the AI model.
-        Args:
-            action: The action dictionary from the model.
-            screen_width: Current screen width in pixels.
-            screen_height: Current screen height in pixels.
-        Returns:
-            ActionResult indicating success and whether to finish.
-        """
-        action_type = action.get("_metadata")
-        if action_type == "finish":
-            return ActionResult(
-                success=True, should_finish=True, message=action.get("message")
-            )
-        if action_type != "do":
-            return ActionResult(
-                success=False,
-                should_finish=True,
-                message=f"Unknown action type: {action_type}",
-            )
-        action_name = action.get("action")
-        handler_method = self._get_handler(action_name)
-        if handler_method is None:
-            return ActionResult(
-                success=False,
-                should_finish=False,
-                message=f"Unknown action: {action_name}",
-            )
-        try:
-            return handler_method(action, screen_width, screen_height)
-        except Exception as e:
-            return ActionResult(
-                success=False, should_finish=False, message=f"Action failed: {e}"
-            )
-    def _get_handler(self, action_name: str) -> Callable | None:
-        """Get the handler method for an action."""
-        handlers = {
-            "Launch": self._handle_launch,
-            "Tap": self._handle_tap,
-            "Type": self._handle_type,
-            "Type_Name": self._handle_type,
-            "Swipe": self._handle_swipe,
-            "Back": self._handle_back,
-            "Home": self._handle_home,
-            "Double Tap": self._handle_double_tap,
-            "Long Press": self._handle_long_press,
-            "Wait": self._handle_wait,
-            "Take_over": self._handle_takeover,
-            "Note": self._handle_note,
-            "Call_API": self._handle_call_api,
-            "Interact": self._handle_interact,
-        }
-        return handlers.get(action_name)
-    def _convert_relative_to_absolute(
-        self, element: list[int], screen_width: int, screen_height: int
-    ) -> tuple[int, int]:
-        """Convert relative coordinates (0-1000) to absolute pixels."""
-        x = int(element[0] / 1000 * screen_width)
-        y = int(element[1] / 1000 * screen_height)
-        return x, y
-    def _handle_launch(self, action: dict, width: int, height: int) -> ActionResult:
-        """Handle app launch action."""
-        app_name = action.get("app")
-        if not app_name:
-            return ActionResult(False, False, "No app name specified")
-        device_factory = get_device_factory()
-        success = device_factory.launch_app(app_name, self.device_id)
-        if success:
-            return ActionResult(True, False)
-        return ActionResult(False, False, f"App not found: {app_name}")
-    def _handle_tap(self, action: dict, width: int, height: int) -> ActionResult:
-        """Handle tap action."""
-        element = action.get("element")
-        if not element:
-            return ActionResult(False, False, "No element coordinates")
-        x, y = self._convert_relative_to_absolute(element, width, height)
-        # Check for sensitive operation
-        if "message" in action:
-            if not self.confirmation_callback(action["message"]):
-                return ActionResult(
-                    success=False,
-                    should_finish=True,
-                    message="User cancelled sensitive operation",
-                )
-        device_factory = get_device_factory()
-        device_factory.tap(x, y, self.device_id)
-        return ActionResult(True, False)
-    def _handle_type(self, action: dict, width: int, height: int) -> ActionResult:
-        """Handle text input action."""
-        text = action.get("text", "")
-        device_factory = get_device_factory()
-        # Switch to ADB keyboard
-        original_ime = device_factory.detect_and_set_adb_keyboard(self.device_id)
-        time.sleep(TIMING_CONFIG.action.keyboard_switch_delay)
-        # Clear existing text and type new text
-        device_factory.clear_text(self.device_id)
-        time.sleep(TIMING_CONFIG.action.text_clear_delay)
-        # Handle multiline text by splitting on newlines
-        device_factory.type_text(text, self.device_id)
-        time.sleep(TIMING_CONFIG.action.text_input_delay)
-        # Restore original keyboard
-        device_factory.restore_keyboard(original_ime, self.device_id)
-        time.sleep(TIMING_CONFIG.action.keyboard_restore_delay)
-        return ActionResult(True, False)
-    def _handle_swipe(self, action: dict, width: int, height: int) -> ActionResult:
-        """Handle swipe action."""
-        start = action.get("start")
-        end = action.get("end")
-        if not start or not end:
-            return ActionResult(False, False, "Missing swipe coordinates")
-        start_x, start_y = self._convert_relative_to_absolute(start, width, height)
-        end_x, end_y = self._convert_relative_to_absolute(end, width, height)
-        device_factory = get_device_factory()
-        device_factory.swipe(start_x, start_y, end_x, end_y, device_id=self.device_id)
-        return ActionResult(True, False)
-    def _handle_back(self, action: dict, width: int, height: int) -> ActionResult:
-        """Handle back button action."""
-        device_factory = get_device_factory()
-        device_factory.back(self.device_id)
-        return ActionResult(True, False)
-    def _handle_home(self, action: dict, width: int, height: int) -> ActionResult:
-        """Handle home button action."""
-        device_factory = get_device_factory()
-        device_factory.home(self.device_id)
-        return ActionResult(True, False)
-    def _handle_double_tap(self, action: dict, width: int, height: int) -> ActionResult:
-        """Handle double tap action."""
-        element = action.get("element")
-        if not element:
-            return ActionResult(False, False, "No element coordinates")
-        x, y = self._convert_relative_to_absolute(element, width, height)
-        device_factory = get_device_factory()
-        device_factory.double_tap(x, y, self.device_id)
-        return ActionResult(True, False)
-    def _handle_long_press(self, action: dict, width: int, height: int) -> ActionResult:
-        """Handle long press action."""
-        element = action.get("element")
-        if not element:
-            return ActionResult(False, False, "No element coordinates")
-        x, y = self._convert_relative_to_absolute(element, width, height)
-        device_factory = get_device_factory()
-        device_factory.long_press(x, y, device_id=self.device_id)
-        return ActionResult(True, False)
-    def _handle_wait(self, action: dict, width: int, height: int) -> ActionResult:
-        """Handle wait action."""
-        duration_str = action.get("duration", "1 seconds")
-        try:
-            duration = float(duration_str.replace("seconds", "").strip())
-        except ValueError:
-            duration = 1.0
-        time.sleep(duration)
-        return ActionResult(True, False)
-    def _handle_takeover(self, action: dict, width: int, height: int) -> ActionResult:
-        """Handle takeover request (login, captcha, etc.)."""
-        message = action.get("message", "User intervention required")
-        self.takeover_callback(message)
-        return ActionResult(True, False)
-    def _handle_note(self, action: dict, width: int, height: int) -> ActionResult:
-        """Handle note action (placeholder for content recording)."""
-        # This action is typically used for recording page content
-        # Implementation depends on specific requirements
-        return ActionResult(True, False)
-    def _handle_call_api(self, action: dict, width: int, height: int) -> ActionResult:
-        """Handle API call action (placeholder for summarization)."""
-        # This action is typically used for content summarization
-        # Implementation depends on specific requirements
-        return ActionResult(True, False)
-    def _handle_interact(self, action: dict, width: int, height: int) -> ActionResult:
-        """Handle interaction request (user choice needed)."""
-        # This action signals that user input is needed
-        return ActionResult(True, False, message="User interaction required")
-    def _send_keyevent(self, keycode: str) -> None:
-        """Send a keyevent to the device."""
-        from phone_agent.device_factory import DeviceType, get_device_factory
-        from phone_agent.hdc.connection import _run_hdc_command
-        device_factory = get_device_factory()
-        # Handle HDC devices with HarmonyOS-specific keyEvent command
-        if device_factory.device_type == DeviceType.HDC:
-            hdc_prefix = ["hdc", "-t", self.device_id] if self.device_id else ["hdc"]
-            # Map common keycodes to HarmonyOS keyEvent codes
-            # KEYCODE_ENTER (66) -> 2054 (HarmonyOS Enter key code)
-            if keycode == "KEYCODE_ENTER" or keycode == "66":
-                _run_hdc_command(
-                    hdc_prefix + ["shell", "uitest", "uiInput", "keyEvent", "2054"],
-                    capture_output=True,
-                    text=True,
-                )
-            else:
-                # For other keys, try to use the numeric code directly
-                # If keycode is a string like "KEYCODE_ENTER", convert it
-                try:
-                    # Try to extract numeric code from string or use as-is
-                    if keycode.startswith("KEYCODE_"):
-                        # For now, only handle ENTER, other keys may need mapping
-                        if "ENTER" in keycode:
-                            _run_hdc_command(
-                                hdc_prefix
-                                + ["shell", "uitest", "uiInput", "keyEvent", "2054"],
-                                capture_output=True,
-                                text=True,
-                            )
-                        else:
-                            # Fallback to ADB-style command for unsupported keys
-                            subprocess.run(
-                                hdc_prefix + ["shell", "input", "keyevent", keycode],
-                                capture_output=True,
-                                text=True,
-                            )
-                    else:
-                        # Assume it's a numeric code
-                        _run_hdc_command(
-                            hdc_prefix
-                            + ["shell", "uitest", "uiInput", "keyEvent", str(keycode)],
-                            capture_output=True,
-                            text=True,
-                        )
-                except Exception:
-                    # Fallback to ADB-style command
-                    subprocess.run(
-                        hdc_prefix + ["shell", "input", "keyevent", keycode],
-                        capture_output=True,
-                        text=True,
-                    )
-        else:
-            # ADB devices use standard input keyevent command
-            cmd_prefix = ["adb", "-s", self.device_id] if self.device_id else ["adb"]
-            subprocess.run(
-                cmd_prefix + ["shell", "input", "keyevent", keycode],
-                capture_output=True,
-                text=True,
-            )
-    @staticmethod
-    def _default_confirmation(message: str) -> bool:
-        """Default confirmation callback using console input."""
-        response = input(f"Sensitive operation: {message}\nConfirm? (Y/N): ")
-        return response.upper() == "Y"
-    @staticmethod
-    def _default_takeover(message: str) -> None:
-        """Default takeover callback using console input."""
-        input(f"{message}\nPress Enter after completing manual operation...")
-def parse_action(response: str) -> dict[str, Any]:
-    """
-    Parse action from model response.
-    Args:
-        response: Raw response string from the model.
-    Returns:
-        Parsed action dictionary.
-    Raises:
-        ValueError: If the response cannot be parsed.
-    """
-    print(f"Parsing action: {response}")
-    try:
-        response = response.strip()
-        if response.startswith('do(action="Type"') or response.startswith(
-            'do(action="Type_Name"'
-        ):
-            text = response.split("text=", 1)[1][1:-2]
-            action = {"_metadata": "do", "action": "Type", "text": text}
-            return action
-        elif response.startswith("do"):
-            # Use AST parsing instead of eval for safety
-            try:
-                # Escape special characters (newlines, tabs, etc.) for valid Python syntax
-                response = response.replace("\n", "\\n")
-                response = response.replace("\r", "\\r")
-                response = response.replace("\t", "\\t")
-                tree = ast.parse(response, mode="eval")
-                if not isinstance(tree.body, ast.Call):
-                    raise ValueError("Expected a function call")
-                call = tree.body
-                # Extract keyword arguments safely
-                action = {"_metadata": "do"}
-                for keyword in call.keywords:
-                    key = keyword.arg
-                    value = ast.literal_eval(keyword.value)
-                    action[key] = value
-                return action
-            except (SyntaxError, ValueError) as e:
-                raise ValueError(f"Failed to parse do() action: {e}")
-        elif response.startswith("finish"):
-            action = {
-                "_metadata": "finish",
-                "message": response.replace("finish(message=", "")[1:-2],
-            }
-        else:
-            raise ValueError(f"Failed to parse action: {response}")
-        return action
-    except Exception as e:
-        raise ValueError(f"Failed to parse action: {e}")
-def do(**kwargs) -> dict[str, Any]:
-    """Helper function for creating 'do' actions."""
-    kwargs["_metadata"] = "do"
-    return kwargs
-def finish(**kwargs) -> dict[str, Any]:
-    """Helper function for creating 'finish' actions."""
-    kwargs["_metadata"] = "finish"
-    return kwargs

autoglm-gui 1.5.0__py3-none-any.whl → 1.5.1__py3-none-any.whl

autoglm-gui 1.5.0py3-none-any.whl → 1.5.1py3-none-any.whl