PyPI - autoglm-gui - Versions diffs - 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl - Mend

autoglm-gui 1.0.0py3-none-any.whl → 1.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

AutoGLM_GUI/api/devices.py +49 -0
AutoGLM_GUI/schemas.py +16 -0
AutoGLM_GUI/static/assets/{about-29B5FDM8.js → about-BOnRPlKQ.js} +1 -1
AutoGLM_GUI/static/assets/chat-CGW6uMKB.js +149 -0
AutoGLM_GUI/static/assets/{index-mVNV0VwM.js → index-CRFVU0eu.js} +1 -1
AutoGLM_GUI/static/assets/{index-wu8Wjf12.js → index-DH-Dl4tK.js} +5 -5
AutoGLM_GUI/static/assets/index-DzUQ89YC.css +1 -0
AutoGLM_GUI/static/index.html +2 -2
{autoglm_gui-1.0.0.dist-info → autoglm_gui-1.0.2.dist-info}/METADATA +9 -4
autoglm_gui-1.0.2.dist-info/RECORD +73 -0
phone_agent/__init__.py +3 -2
phone_agent/actions/handler.py +124 -31
phone_agent/actions/handler_ios.py +278 -0
phone_agent/adb/connection.py +14 -5
phone_agent/adb/device.py +47 -16
phone_agent/agent.py +8 -8
phone_agent/agent_ios.py +277 -0
phone_agent/config/__init__.py +18 -0
phone_agent/config/apps.py +1 -1
phone_agent/config/apps_harmonyos.py +256 -0
phone_agent/config/apps_ios.py +339 -0
phone_agent/config/i18n.py +8 -0
phone_agent/config/timing.py +167 -0
phone_agent/device_factory.py +166 -0
phone_agent/hdc/__init__.py +53 -0
phone_agent/hdc/connection.py +384 -0
phone_agent/hdc/device.py +269 -0
phone_agent/hdc/input.py +145 -0
phone_agent/hdc/screenshot.py +127 -0
phone_agent/model/client.py +104 -4
phone_agent/xctest/__init__.py +47 -0
phone_agent/xctest/connection.py +379 -0
phone_agent/xctest/device.py +472 -0
phone_agent/xctest/input.py +311 -0
phone_agent/xctest/screenshot.py +226 -0
AutoGLM_GUI/static/assets/chat-DTN2oKtA.js +0 -149
AutoGLM_GUI/static/assets/index-Dy550Qqg.css +0 -1
autoglm_gui-1.0.0.dist-info/RECORD +0 -57
{autoglm_gui-1.0.0.dist-info → autoglm_gui-1.0.2.dist-info}/WHEEL +0 -0
{autoglm_gui-1.0.0.dist-info → autoglm_gui-1.0.2.dist-info}/entry_points.txt +0 -0
{autoglm_gui-1.0.0.dist-info → autoglm_gui-1.0.2.dist-info}/licenses/LICENSE +0 -0

phone_agent/hdc/input.py ADDED Viewed

@@ -0,0 +1,145 @@
+"""Input utilities for HarmonyOS device text input."""
+from phone_agent.hdc.connection import _run_hdc_command
+def type_text(text: str, device_id: str | None = None) -> None:
+    """
+    Type text into the currently focused input field.
+    Args:
+        text: The text to type. Supports multi-line text with newline characters.
+        device_id: Optional HDC device ID for multi-device setups.
+    Note:
+        HarmonyOS uses: hdc shell uitest uiInput text "文本内容"
+        This command works without coordinates when input field is focused.
+        For multi-line text, the function splits by newlines and sends ENTER keyEvents.
+        ENTER key code in HarmonyOS: 2054
+        Recommendation: Click on the input field first to focus it, then use this function.
+    """
+    hdc_prefix = _get_hdc_prefix(device_id)
+    # Handle multi-line text by splitting on newlines
+    if "\n" in text:
+        lines = text.split("\n")
+        for i, line in enumerate(lines):
+            if line:  # Only process non-empty lines
+                # Escape special characters for shell
+                escaped_line = line.replace('"', '\\"').replace("$", "\\$")
+                _run_hdc_command(
+                    hdc_prefix + ["shell", "uitest", "uiInput", "text", escaped_line],
+                    capture_output=True,
+                    text=True,
+                )
+            # Send ENTER key event after each line except the last one
+            if i < len(lines) - 1:
+                try:
+                    _run_hdc_command(
+                        hdc_prefix + ["shell", "uitest", "uiInput", "keyEvent", "2054"],
+                        capture_output=True,
+                        text=True,
+                    )
+                except Exception as e:
+                    print(f"[HDC] ENTER keyEvent failed: {e}")
+    else:
+        # Single line text - original logic
+        # Escape special characters for shell (keep quotes for proper text handling)
+        # The text will be wrapped in quotes in the command
+        escaped_text = text.replace('"', '\\"').replace("$", "\\$")
+        # HarmonyOS uitest uiInput text command
+        # Format: hdc shell uitest uiInput text "文本内容"
+        _run_hdc_command(
+            hdc_prefix + ["shell", "uitest", "uiInput", "text", escaped_text],
+            capture_output=True,
+            text=True,
+        )
+def clear_text(device_id: str | None = None) -> None:
+    """
+    Clear text in the currently focused input field.
+    Args:
+        device_id: Optional HDC device ID for multi-device setups.
+    Note:
+        This method uses repeated delete key events to clear text.
+        For HarmonyOS, you might also use select all + delete for better efficiency.
+    """
+    hdc_prefix = _get_hdc_prefix(device_id)
+    # Ctrl+A to select all (key code 2072 for Ctrl, 2017 for A)
+    # Then delete
+    _run_hdc_command(
+        hdc_prefix + ["shell", "uitest", "uiInput", "keyEvent", "2072", "2017"],
+        capture_output=True,
+        text=True,
+    )
+    _run_hdc_command(
+        hdc_prefix + ["shell", "uitest", "uiInput", "keyEvent", "2055"],  # Delete key
+        capture_output=True,
+        text=True,
+    )
+def detect_and_set_adb_keyboard(device_id: str | None = None) -> str:
+    """
+    Detect current keyboard and switch to ADB Keyboard if available.
+    Args:
+        device_id: Optional HDC device ID for multi-device setups.
+    Returns:
+        The original keyboard IME identifier for later restoration.
+    Note:
+        This is a placeholder. HarmonyOS may not support ADB Keyboard.
+        If there's a similar tool for HarmonyOS, integrate it here.
+    """
+    hdc_prefix = _get_hdc_prefix(device_id)
+    # Get current IME (if HarmonyOS supports this)
+    try:
+        result = _run_hdc_command(
+            hdc_prefix + ["shell", "settings", "get", "secure", "default_input_method"],
+            capture_output=True,
+            text=True,
+        )
+        current_ime = (result.stdout + result.stderr).strip()
+        # If ADB Keyboard equivalent exists for HarmonyOS, switch to it
+        # For now, we'll just return the current IME
+        return current_ime
+    except Exception:
+        return ""
+def restore_keyboard(ime: str, device_id: str | None = None) -> None:
+    """
+    Restore the original keyboard IME.
+    Args:
+        ime: The IME identifier to restore.
+        device_id: Optional HDC device ID for multi-device setups.
+    """
+    if not ime:
+        return
+    hdc_prefix = _get_hdc_prefix(device_id)
+    try:
+        _run_hdc_command(
+            hdc_prefix + ["shell", "ime", "set", ime], capture_output=True, text=True
+        )
+    except Exception:
+        pass
+def _get_hdc_prefix(device_id: str | None) -> list:
+    """Get HDC command prefix with optional device specifier."""
+    if device_id:
+        return ["hdc", "-t", device_id]
+    return ["hdc"]

phone_agent/hdc/screenshot.py ADDED Viewed

@@ -0,0 +1,127 @@
+"""Screenshot utilities for capturing HarmonyOS device screen."""
+import base64
+import os
+import tempfile
+import uuid
+from dataclasses import dataclass
+from io import BytesIO
+from PIL import Image
+from phone_agent.hdc.connection import _run_hdc_command
+@dataclass
+class Screenshot:
+    """Represents a captured screenshot."""
+    base64_data: str
+    width: int
+    height: int
+    is_sensitive: bool = False
+def get_screenshot(device_id: str | None = None, timeout: int = 10) -> Screenshot:
+    """
+    Capture a screenshot from the connected HarmonyOS device.
+    Args:
+        device_id: Optional HDC device ID for multi-device setups.
+        timeout: Timeout in seconds for screenshot operations.
+    Returns:
+        Screenshot object containing base64 data and dimensions.
+    Note:
+        If the screenshot fails (e.g., on sensitive screens like payment pages),
+        a black fallback image is returned with is_sensitive=True.
+    """
+    temp_path = os.path.join(tempfile.gettempdir(), f"screenshot_{uuid.uuid4()}.png")
+    hdc_prefix = _get_hdc_prefix(device_id)
+    try:
+        # Execute screenshot command
+        # HarmonyOS HDC only supports JPEG format
+        remote_path = "/data/local/tmp/tmp_screenshot.jpeg"
+        # Try method 1: hdc shell screenshot (newer HarmonyOS versions)
+        result = _run_hdc_command(
+            hdc_prefix + ["shell", "screenshot", remote_path],
+            capture_output=True,
+            text=True,
+            timeout=timeout,
+        )
+        # Check for screenshot failure (sensitive screen)
+        output = result.stdout + result.stderr
+        if (
+            "fail" in output.lower()
+            or "error" in output.lower()
+            or "not found" in output.lower()
+        ):
+            # Try method 2: snapshot_display (older versions or different devices)
+            result = _run_hdc_command(
+                hdc_prefix + ["shell", "snapshot_display", "-f", remote_path],
+                capture_output=True,
+                text=True,
+                timeout=timeout,
+            )
+            output = result.stdout + result.stderr
+            if "fail" in output.lower() or "error" in output.lower():
+                return _create_fallback_screenshot(is_sensitive=True)
+        # Pull screenshot to local temp path
+        # Note: remote file is JPEG, but PIL can open it regardless of local extension
+        _run_hdc_command(
+            hdc_prefix + ["file", "recv", remote_path, temp_path],
+            capture_output=True,
+            text=True,
+            timeout=5,
+        )
+        if not os.path.exists(temp_path):
+            return _create_fallback_screenshot(is_sensitive=False)
+        # Read JPEG image and convert to PNG for model inference
+        # PIL automatically detects the image format from file content
+        img = Image.open(temp_path)
+        width, height = img.size
+        buffered = BytesIO()
+        img.save(buffered, format="PNG")
+        base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
+        # Cleanup
+        os.remove(temp_path)
+        return Screenshot(
+            base64_data=base64_data, width=width, height=height, is_sensitive=False
+        )
+    except Exception as e:
+        print(f"Screenshot error: {e}")
+        return _create_fallback_screenshot(is_sensitive=False)
+def _get_hdc_prefix(device_id: str | None) -> list:
+    """Get HDC command prefix with optional device specifier."""
+    if device_id:
+        return ["hdc", "-t", device_id]
+    return ["hdc"]
+def _create_fallback_screenshot(is_sensitive: bool) -> Screenshot:
+    """Create a black fallback image when screenshot fails."""
+    default_width, default_height = 1080, 2400
+    black_img = Image.new("RGB", (default_width, default_height), color="black")
+    buffered = BytesIO()
+    black_img.save(buffered, format="PNG")
+    base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    return Screenshot(
+        base64_data=base64_data,
+        width=default_width,
+        height=default_height,
+        is_sensitive=is_sensitive,
+    )

phone_agent/model/client.py CHANGED Viewed

@@ -1,11 +1,14 @@
 """Model client for AI inference using OpenAI-compatible API."""
 import json
+import time
 from dataclasses import dataclass, field
 from typing import Any
 from openai import OpenAI
+from phone_agent.config.i18n import get_message
 @dataclass
 class ModelConfig:
@@ -19,6 +22,7 @@ class ModelConfig:
     top_p: float = 0.85
     frequency_penalty: float = 0.2
     extra_body: dict[str, Any] = field(default_factory=dict)
+    lang: str = "cn"  # Language for UI messages: 'cn' or 'en'
 @dataclass
@@ -28,6 +32,10 @@ class ModelResponse:
     thinking: str
     action: str
     raw_content: str
+    # Performance metrics
+    time_to_first_token: float | None = None  # Time to first token (seconds)
+    time_to_thinking_end: float | None = None  # Time to thinking end (seconds)
+    total_time: float | None = None  # Total inference time (seconds)
 class ModelClient:
@@ -55,7 +63,12 @@ class ModelClient:
         Raises:
             ValueError: If the response cannot be parsed.
         """
-        response = self.client.chat.completions.create(
+        # Start timing
+        start_time = time.time()
+        time_to_first_token = None
+        time_to_thinking_end = None
+        stream = self.client.chat.completions.create(
             messages=messages,
             model=self.config.model_name,
             max_tokens=self.config.max_tokens,
@@ -63,15 +76,102 @@ class ModelClient:
             top_p=self.config.top_p,
             frequency_penalty=self.config.frequency_penalty,
             extra_body=self.config.extra_body,
-            stream=False,
+            stream=True,
         )
-        raw_content = response.choices[0].message.content
+        raw_content = ""
+        buffer = ""  # Buffer to hold content that might be part of a marker
+        action_markers = ["finish(message=", "do(action="]
+        in_action_phase = False  # Track if we've entered the action phase
+        first_token_received = False
+        for chunk in stream:
+            if len(chunk.choices) == 0:
+                continue
+            if chunk.choices[0].delta.content is not None:
+                content = chunk.choices[0].delta.content
+                raw_content += content
+                # Record time to first token
+                if not first_token_received:
+                    time_to_first_token = time.time() - start_time
+                    first_token_received = True
+                if in_action_phase:
+                    # Already in action phase, just accumulate content without printing
+                    continue
+                buffer += content
+                # Check if any marker is fully present in buffer
+                marker_found = False
+                for marker in action_markers:
+                    if marker in buffer:
+                        # Marker found, print everything before it
+                        thinking_part = buffer.split(marker, 1)[0]
+                        print(thinking_part, end="", flush=True)
+                        print()  # Print newline after thinking is complete
+                        in_action_phase = True
+                        marker_found = True
+                        # Record time to thinking end
+                        if time_to_thinking_end is None:
+                            time_to_thinking_end = time.time() - start_time
+                        break
+                if marker_found:
+                    continue  # Continue to collect remaining content
+                # Check if buffer ends with a prefix of any marker
+                # If so, don't print yet (wait for more content)
+                is_potential_marker = False
+                for marker in action_markers:
+                    for i in range(1, len(marker)):
+                        if buffer.endswith(marker[:i]):
+                            is_potential_marker = True
+                            break
+                    if is_potential_marker:
+                        break
+                if not is_potential_marker:
+                    # Safe to print the buffer
+                    print(buffer, end="", flush=True)
+                    buffer = ""
+        # Calculate total time
+        total_time = time.time() - start_time
         # Parse thinking and action from response
         thinking, action = self._parse_response(raw_content)
-        return ModelResponse(thinking=thinking, action=action, raw_content=raw_content)
+        # Print performance metrics
+        lang = self.config.lang
+        print()
+        print("=" * 50)
+        print(f"⏱️  {get_message('performance_metrics', lang)}:")
+        print("-" * 50)
+        if time_to_first_token is not None:
+            print(
+                f"{get_message('time_to_first_token', lang)}: {time_to_first_token:.3f}s"
+            )
+        if time_to_thinking_end is not None:
+            print(
+                f"{get_message('time_to_thinking_end', lang)}:        {time_to_thinking_end:.3f}s"
+            )
+        print(
+            f"{get_message('total_inference_time', lang)}:          {total_time:.3f}s"
+        )
+        print("=" * 50)
+        return ModelResponse(
+            thinking=thinking,
+            action=action,
+            raw_content=raw_content,
+            time_to_first_token=time_to_first_token,
+            time_to_thinking_end=time_to_thinking_end,
+            total_time=total_time,
+        )
     def _parse_response(self, content: str) -> tuple[str, str]:
         """

phone_agent/xctest/__init__.py ADDED Viewed

@@ -0,0 +1,47 @@
+"""XCTest utilities for iOS device interaction via WebDriverAgent/XCUITest."""
+from phone_agent.xctest.connection import (
+    ConnectionType,
+    DeviceInfo,
+    XCTestConnection,
+    list_devices,
+    quick_connect,
+)
+from phone_agent.xctest.device import (
+    back,
+    double_tap,
+    get_current_app,
+    home,
+    launch_app,
+    long_press,
+    swipe,
+    tap,
+)
+from phone_agent.xctest.input import (
+    clear_text,
+    type_text,
+)
+from phone_agent.xctest.screenshot import get_screenshot
+__all__ = [
+    # Screenshot
+    "get_screenshot",
+    # Input
+    "type_text",
+    "clear_text",
+    # Device control
+    "get_current_app",
+    "tap",
+    "swipe",
+    "back",
+    "home",
+    "double_tap",
+    "long_press",
+    "launch_app",
+    # Connection management
+    "XCTestConnection",
+    "DeviceInfo",
+    "ConnectionType",
+    "quick_connect",
+    "list_devices",
+]

autoglm-gui 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

autoglm-gui 1.0.0py3-none-any.whl → 1.0.2py3-none-any.whl