PyPI - autoglm-gui - Versions diffs - 1.5.0__py3-none-any.whl → 1.5.1__py3-none-any.whl - Mend

autoglm-gui 1.5.0py3-none-any.whl → 1.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

AutoGLM_GUI/agents/glm/agent.py +6 -1
AutoGLM_GUI/agents/mai/agent.py +3 -0
AutoGLM_GUI/agents/stream_runner.py +7 -2
AutoGLM_GUI/api/agents.py +26 -1
AutoGLM_GUI/api/history.py +27 -1
AutoGLM_GUI/models/history.py +45 -1
AutoGLM_GUI/scheduler_manager.py +52 -6
AutoGLM_GUI/schemas.py +12 -0
AutoGLM_GUI/static/assets/{about-BQm96DAl.js → about-CfwX1Cmc.js} +1 -1
AutoGLM_GUI/static/assets/{alert-dialog-B42XxGPR.js → alert-dialog-CtGlN2IJ.js} +1 -1
AutoGLM_GUI/static/assets/chat-BYa-foUI.js +129 -0
AutoGLM_GUI/static/assets/{circle-alert-D4rSJh37.js → circle-alert-t08bEMPO.js} +1 -1
AutoGLM_GUI/static/assets/{dialog-DZ78cEcj.js → dialog-FNwZJFwk.js} +1 -1
AutoGLM_GUI/static/assets/eye-D0UPWCWC.js +1 -0
AutoGLM_GUI/static/assets/history-CRo95B7i.js +1 -0
AutoGLM_GUI/static/assets/{index-CmZSnDqc.js → index-BaLMSqd3.js} +1 -1
AutoGLM_GUI/static/assets/{index-CssG-3TH.js → index-CTHbFvKl.js} +5 -5
AutoGLM_GUI/static/assets/index-CV7jGxGm.css +1 -0
AutoGLM_GUI/static/assets/{label-BCUzE_nm.js → label-DJFevVmr.js} +1 -1
AutoGLM_GUI/static/assets/{logs-eoFxn5of.js → logs-RW09DyYY.js} +1 -1
AutoGLM_GUI/static/assets/{popover-DLsuV5Sx.js → popover--JTJrE5v.js} +1 -1
AutoGLM_GUI/static/assets/{scheduled-tasks-MyqGJvy_.js → scheduled-tasks-DTRKsQXF.js} +1 -1
AutoGLM_GUI/static/assets/{square-pen-zGWYrdfj.js → square-pen-CPK_K680.js} +1 -1
AutoGLM_GUI/static/assets/{textarea-BX6y7uM5.js → textarea-PRmVnWq5.js} +1 -1
AutoGLM_GUI/static/assets/{workflows-CYFs6ssC.js → workflows-CdcsAoaT.js} +1 -1
AutoGLM_GUI/static/index.html +2 -2
{autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.1.dist-info}/METADATA +49 -7
{autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.1.dist-info}/RECORD +31 -70
AutoGLM_GUI/device_adapter.py +0 -263
AutoGLM_GUI/static/assets/chat-C0L2gQYG.js +0 -129
AutoGLM_GUI/static/assets/history-DFBv7TGc.js +0 -1
AutoGLM_GUI/static/assets/index-Bzyv2yQ2.css +0 -1
mai_agent/base.py +0 -137
mai_agent/mai_grounding_agent.py +0 -263
mai_agent/mai_naivigation_agent.py +0 -526
mai_agent/prompt.py +0 -148
mai_agent/unified_memory.py +0 -67
mai_agent/utils.py +0 -73
phone_agent/__init__.py +0 -12
phone_agent/actions/__init__.py +0 -5
phone_agent/actions/handler.py +0 -400
phone_agent/actions/handler_ios.py +0 -278
phone_agent/adb/__init__.py +0 -51
phone_agent/adb/connection.py +0 -358
phone_agent/adb/device.py +0 -253
phone_agent/adb/input.py +0 -108
phone_agent/adb/screenshot.py +0 -108
phone_agent/agent.py +0 -253
phone_agent/agent_ios.py +0 -277
phone_agent/config/__init__.py +0 -53
phone_agent/config/apps.py +0 -227
phone_agent/config/apps_harmonyos.py +0 -256
phone_agent/config/apps_ios.py +0 -339
phone_agent/config/i18n.py +0 -81
phone_agent/config/prompts.py +0 -80
phone_agent/config/prompts_en.py +0 -79
phone_agent/config/prompts_zh.py +0 -82
phone_agent/config/timing.py +0 -167
phone_agent/device_factory.py +0 -166
phone_agent/hdc/__init__.py +0 -53
phone_agent/hdc/connection.py +0 -384
phone_agent/hdc/device.py +0 -269
phone_agent/hdc/input.py +0 -145
phone_agent/hdc/screenshot.py +0 -127
phone_agent/model/__init__.py +0 -5
phone_agent/model/client.py +0 -290
phone_agent/xctest/__init__.py +0 -47
phone_agent/xctest/connection.py +0 -379
phone_agent/xctest/device.py +0 -472
phone_agent/xctest/input.py +0 -311
phone_agent/xctest/screenshot.py +0 -226
{autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.1.dist-info}/WHEEL +0 -0
{autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.1.dist-info}/entry_points.txt +0 -0
{autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.1.dist-info}/licenses/LICENSE +0 -0

phone_agent/agent.py DELETED Viewed

@@ -1,253 +0,0 @@
-"""Main PhoneAgent class for orchestrating phone automation."""
-import json
-import traceback
-from dataclasses import dataclass
-from typing import Any, Callable
-from phone_agent.actions import ActionHandler
-from phone_agent.actions.handler import finish, parse_action
-from phone_agent.config import get_messages, get_system_prompt
-from phone_agent.device_factory import get_device_factory
-from phone_agent.model import ModelClient, ModelConfig
-from phone_agent.model.client import MessageBuilder
-@dataclass
-class AgentConfig:
-    """Configuration for the PhoneAgent."""
-    max_steps: int = 100
-    device_id: str | None = None
-    lang: str = "cn"
-    system_prompt: str | None = None
-    verbose: bool = True
-    def __post_init__(self):
-        if self.system_prompt is None:
-            self.system_prompt = get_system_prompt(self.lang)
-@dataclass
-class StepResult:
-    """Result of a single agent step."""
-    success: bool
-    finished: bool
-    action: dict[str, Any] | None
-    thinking: str
-    message: str | None = None
-class PhoneAgent:
-    """
-    AI-powered agent for automating Android phone interactions.
-    The agent uses a vision-language model to understand screen content
-    and decide on actions to complete user tasks.
-    Args:
-        model_config: Configuration for the AI model.
-        agent_config: Configuration for the agent behavior.
-        confirmation_callback: Optional callback for sensitive action confirmation.
-        takeover_callback: Optional callback for takeover requests.
-    Example:
-        >>> from phone_agent import PhoneAgent
-        >>> from phone_agent.model import ModelConfig
-        >>>
-        >>> model_config = ModelConfig(base_url="http://localhost:8000/v1")
-        >>> agent = PhoneAgent(model_config)
-        >>> agent.run("Open WeChat and send a message to John")
-    """
-    def __init__(
-        self,
-        model_config: ModelConfig | None = None,
-        agent_config: AgentConfig | None = None,
-        confirmation_callback: Callable[[str], bool] | None = None,
-        takeover_callback: Callable[[str], None] | None = None,
-    ):
-        self.model_config = model_config or ModelConfig()
-        self.agent_config = agent_config or AgentConfig()
-        self.model_client = ModelClient(self.model_config)
-        self.action_handler = ActionHandler(
-            device_id=self.agent_config.device_id,
-            confirmation_callback=confirmation_callback,
-            takeover_callback=takeover_callback,
-        )
-        self._context: list[dict[str, Any]] = []
-        self._step_count = 0
-    def run(self, task: str) -> str:
-        """
-        Run the agent to complete a task.
-        Args:
-            task: Natural language description of the task.
-        Returns:
-            Final message from the agent.
-        """
-        self._context = []
-        self._step_count = 0
-        # First step with user prompt
-        result = self._execute_step(task, is_first=True)
-        if result.finished:
-            return result.message or "Task completed"
-        # Continue until finished or max steps reached
-        while self._step_count < self.agent_config.max_steps:
-            result = self._execute_step(is_first=False)
-            if result.finished:
-                return result.message or "Task completed"
-        return "Max steps reached"
-    def step(self, task: str | None = None) -> StepResult:
-        """
-        Execute a single step of the agent.
-        Useful for manual control or debugging.
-        Args:
-            task: Task description (only needed for first step).
-        Returns:
-            StepResult with step details.
-        """
-        is_first = len(self._context) == 0
-        if is_first and not task:
-            raise ValueError("Task is required for the first step")
-        return self._execute_step(task, is_first)
-    def reset(self) -> None:
-        """Reset the agent state for a new task."""
-        self._context = []
-        self._step_count = 0
-    def _execute_step(
-        self, user_prompt: str | None = None, is_first: bool = False
-    ) -> StepResult:
-        """Execute a single step of the agent loop."""
-        self._step_count += 1
-        # Capture current screen state
-        device_factory = get_device_factory()
-        screenshot = device_factory.get_screenshot(self.agent_config.device_id)
-        current_app = device_factory.get_current_app(self.agent_config.device_id)
-        # Build messages
-        if is_first:
-            self._context.append(
-                MessageBuilder.create_system_message(self.agent_config.system_prompt)
-            )
-            screen_info = MessageBuilder.build_screen_info(current_app)
-            text_content = f"{user_prompt}\n\n{screen_info}"
-            self._context.append(
-                MessageBuilder.create_user_message(
-                    text=text_content, image_base64=screenshot.base64_data
-                )
-            )
-        else:
-            screen_info = MessageBuilder.build_screen_info(current_app)
-            text_content = f"** Screen Info **\n\n{screen_info}"
-            self._context.append(
-                MessageBuilder.create_user_message(
-                    text=text_content, image_base64=screenshot.base64_data
-                )
-            )
-        # Get model response
-        try:
-            msgs = get_messages(self.agent_config.lang)
-            print("\n" + "=" * 50)
-            print(f"💭 {msgs['thinking']}:")
-            print("-" * 50)
-            response = self.model_client.request(self._context)
-        except Exception as e:
-            if self.agent_config.verbose:
-                traceback.print_exc()
-            return StepResult(
-                success=False,
-                finished=True,
-                action=None,
-                thinking="",
-                message=f"Model error: {e}",
-            )
-        # Parse action from response
-        try:
-            action = parse_action(response.action)
-        except ValueError:
-            if self.agent_config.verbose:
-                traceback.print_exc()
-            action = finish(message=response.action)
-        if self.agent_config.verbose:
-            # Print thinking process
-            print("-" * 50)
-            print(f"🎯 {msgs['action']}:")
-            print(json.dumps(action, ensure_ascii=False, indent=2))
-            print("=" * 50 + "\n")
-        # Remove image from context to save space
-        self._context[-1] = MessageBuilder.remove_images_from_message(self._context[-1])
-        # Execute action
-        try:
-            result = self.action_handler.execute(
-                action, screenshot.width, screenshot.height
-            )
-        except Exception as e:
-            if self.agent_config.verbose:
-                traceback.print_exc()
-            result = self.action_handler.execute(
-                finish(message=str(e)), screenshot.width, screenshot.height
-            )
-        # Add assistant response to context
-        self._context.append(
-            MessageBuilder.create_assistant_message(
-                f"<think>{response.thinking}</think><answer>{response.action}</answer>"
-            )
-        )
-        # Check if finished
-        finished = action.get("_metadata") == "finish" or result.should_finish
-        if finished and self.agent_config.verbose:
-            msgs = get_messages(self.agent_config.lang)
-            print("\n" + "🎉 " + "=" * 48)
-            print(
-                f"✅ {msgs['task_completed']}: {result.message or action.get('message', msgs['done'])}"
-            )
-            print("=" * 50 + "\n")
-        return StepResult(
-            success=result.success,
-            finished=finished,
-            action=action,
-            thinking=response.thinking,
-            message=result.message or action.get("message"),
-        )
-    @property
-    def context(self) -> list[dict[str, Any]]:
-        """Get the current conversation context."""
-        return self._context.copy()
-    @property
-    def step_count(self) -> int:
-        """Get the current step count."""
-        return self._step_count

phone_agent/agent_ios.py DELETED Viewed

@@ -1,277 +0,0 @@
-"""iOS PhoneAgent class for orchestrating iOS phone automation."""
-import json
-import traceback
-from dataclasses import dataclass
-from typing import Any, Callable
-from phone_agent.actions.handler import finish, parse_action
-from phone_agent.actions.handler_ios import IOSActionHandler
-from phone_agent.config import get_messages, get_system_prompt
-from phone_agent.model import ModelClient, ModelConfig
-from phone_agent.model.client import MessageBuilder
-from phone_agent.xctest import XCTestConnection, get_current_app, get_screenshot
-@dataclass
-class IOSAgentConfig:
-    """Configuration for the iOS PhoneAgent."""
-    max_steps: int = 100
-    wda_url: str = "http://localhost:8100"
-    session_id: str | None = None
-    device_id: str | None = None  # iOS device UDID
-    lang: str = "cn"
-    system_prompt: str | None = None
-    verbose: bool = True
-    def __post_init__(self):
-        if self.system_prompt is None:
-            self.system_prompt = get_system_prompt(self.lang)
-@dataclass
-class StepResult:
-    """Result of a single agent step."""
-    success: bool
-    finished: bool
-    action: dict[str, Any] | None
-    thinking: str
-    message: str | None = None
-class IOSPhoneAgent:
-    """
-    AI-powered agent for automating iOS phone interactions.
-    The agent uses a vision-language model to understand screen content
-    and decide on actions to complete user tasks via WebDriverAgent.
-    Args:
-        model_config: Configuration for the AI model.
-        agent_config: Configuration for the iOS agent behavior.
-        confirmation_callback: Optional callback for sensitive action confirmation.
-        takeover_callback: Optional callback for takeover requests.
-    Example:
-        >>> from phone_agent.agent_ios import IOSPhoneAgent, IOSAgentConfig
-        >>> from phone_agent.model import ModelConfig
-        >>>
-        >>> model_config = ModelConfig(base_url="http://localhost:8000/v1")
-        >>> agent_config = IOSAgentConfig(wda_url="http://localhost:8100")
-        >>> agent = IOSPhoneAgent(model_config, agent_config)
-        >>> agent.run("Open Safari and search for Apple")
-    """
-    def __init__(
-        self,
-        model_config: ModelConfig | None = None,
-        agent_config: IOSAgentConfig | None = None,
-        confirmation_callback: Callable[[str], bool] | None = None,
-        takeover_callback: Callable[[str], None] | None = None,
-    ):
-        self.model_config = model_config or ModelConfig()
-        self.agent_config = agent_config or IOSAgentConfig()
-        self.model_client = ModelClient(self.model_config)
-        # Initialize WDA connection and create session if needed
-        self.wda_connection = XCTestConnection(wda_url=self.agent_config.wda_url)
-        # Auto-create session if not provided
-        if self.agent_config.session_id is None:
-            success, session_id = self.wda_connection.start_wda_session()
-            if success and session_id != "session_started":
-                self.agent_config.session_id = session_id
-                if self.agent_config.verbose:
-                    print(f"✅ Created WDA session: {session_id}")
-            elif self.agent_config.verbose:
-                print("⚠️  Using default WDA session (no explicit session ID)")
-        self.action_handler = IOSActionHandler(
-            wda_url=self.agent_config.wda_url,
-            session_id=self.agent_config.session_id,
-            confirmation_callback=confirmation_callback,
-            takeover_callback=takeover_callback,
-        )
-        self._context: list[dict[str, Any]] = []
-        self._step_count = 0
-    def run(self, task: str) -> str:
-        """
-        Run the agent to complete a task.
-        Args:
-            task: Natural language description of the task.
-        Returns:
-            Final message from the agent.
-        """
-        self._context = []
-        self._step_count = 0
-        # First step with user prompt
-        result = self._execute_step(task, is_first=True)
-        if result.finished:
-            return result.message or "Task completed"
-        # Continue until finished or max steps reached
-        while self._step_count < self.agent_config.max_steps:
-            result = self._execute_step(is_first=False)
-            if result.finished:
-                return result.message or "Task completed"
-        return "Max steps reached"
-    def step(self, task: str | None = None) -> StepResult:
-        """
-        Execute a single step of the agent.
-        Useful for manual control or debugging.
-        Args:
-            task: Task description (only needed for first step).
-        Returns:
-            StepResult with step details.
-        """
-        is_first = len(self._context) == 0
-        if is_first and not task:
-            raise ValueError("Task is required for the first step")
-        return self._execute_step(task, is_first)
-    def reset(self) -> None:
-        """Reset the agent state for a new task."""
-        self._context = []
-        self._step_count = 0
-    def _execute_step(
-        self, user_prompt: str | None = None, is_first: bool = False
-    ) -> StepResult:
-        """Execute a single step of the agent loop."""
-        self._step_count += 1
-        # Capture current screen state
-        screenshot = get_screenshot(
-            wda_url=self.agent_config.wda_url,
-            session_id=self.agent_config.session_id,
-            device_id=self.agent_config.device_id,
-        )
-        current_app = get_current_app(
-            wda_url=self.agent_config.wda_url, session_id=self.agent_config.session_id
-        )
-        # Build messages
-        if is_first:
-            self._context.append(
-                MessageBuilder.create_system_message(self.agent_config.system_prompt)
-            )
-            screen_info = MessageBuilder.build_screen_info(current_app)
-            text_content = f"{user_prompt}\n\n{screen_info}"
-            self._context.append(
-                MessageBuilder.create_user_message(
-                    text=text_content, image_base64=screenshot.base64_data
-                )
-            )
-        else:
-            screen_info = MessageBuilder.build_screen_info(current_app)
-            text_content = f"** Screen Info **\n\n{screen_info}"
-            self._context.append(
-                MessageBuilder.create_user_message(
-                    text=text_content, image_base64=screenshot.base64_data
-                )
-            )
-        # Get model response
-        try:
-            response = self.model_client.request(self._context)
-        except Exception as e:
-            if self.agent_config.verbose:
-                traceback.print_exc()
-            return StepResult(
-                success=False,
-                finished=True,
-                action=None,
-                thinking="",
-                message=f"Model error: {e}",
-            )
-        # Parse action from response
-        try:
-            action = parse_action(response.action)
-        except ValueError:
-            if self.agent_config.verbose:
-                traceback.print_exc()
-            action = finish(message=response.action)
-        if self.agent_config.verbose:
-            # Print thinking process
-            msgs = get_messages(self.agent_config.lang)
-            print("\n" + "=" * 50)
-            print(f"💭 {msgs['thinking']}:")
-            print("-" * 50)
-            print(response.thinking)
-            print("-" * 50)
-            print(f"🎯 {msgs['action']}:")
-            print(json.dumps(action, ensure_ascii=False, indent=2))
-            print("=" * 50 + "\n")
-        # Remove image from context to save space
-        self._context[-1] = MessageBuilder.remove_images_from_message(self._context[-1])
-        # Execute action
-        try:
-            result = self.action_handler.execute(
-                action, screenshot.width, screenshot.height
-            )
-        except Exception as e:
-            if self.agent_config.verbose:
-                traceback.print_exc()
-            result = self.action_handler.execute(
-                finish(message=str(e)), screenshot.width, screenshot.height
-            )
-        # Add assistant response to context
-        self._context.append(
-            MessageBuilder.create_assistant_message(
-                f"<think>{response.thinking}</think><answer>{response.action}</answer>"
-            )
-        )
-        # Check if finished
-        finished = action.get("_metadata") == "finish" or result.should_finish
-        if finished and self.agent_config.verbose:
-            msgs = get_messages(self.agent_config.lang)
-            print("\n" + "🎉 " + "=" * 48)
-            print(
-                f"✅ {msgs['task_completed']}: {result.message or action.get('message', msgs['done'])}"
-            )
-            print("=" * 50 + "\n")
-        return StepResult(
-            success=result.success,
-            finished=finished,
-            action=action,
-            thinking=response.thinking,
-            message=result.message or action.get("message"),
-        )
-    @property
-    def context(self) -> list[dict[str, Any]]:
-        """Get the current conversation context."""
-        return self._context.copy()
-    @property
-    def step_count(self) -> int:
-        """Get the current step count."""
-        return self._step_count

phone_agent/config/__init__.py DELETED Viewed

@@ -1,53 +0,0 @@
-"""Configuration module for Phone Agent."""
-from phone_agent.config.apps import APP_PACKAGES
-from phone_agent.config.apps_ios import APP_PACKAGES_IOS
-from phone_agent.config.i18n import get_message, get_messages
-from phone_agent.config.prompts_en import SYSTEM_PROMPT as SYSTEM_PROMPT_EN
-from phone_agent.config.prompts_zh import SYSTEM_PROMPT as SYSTEM_PROMPT_ZH
-from phone_agent.config.timing import (
-    TIMING_CONFIG,
-    ActionTimingConfig,
-    ConnectionTimingConfig,
-    DeviceTimingConfig,
-    TimingConfig,
-    get_timing_config,
-    update_timing_config,
-)
-def get_system_prompt(lang: str = "cn") -> str:
-    """
-    Get system prompt by language.
-    Args:
-        lang: Language code, 'cn' for Chinese, 'en' for English.
-    Returns:
-        System prompt string.
-    """
-    if lang == "en":
-        return SYSTEM_PROMPT_EN
-    return SYSTEM_PROMPT_ZH
-# Default to Chinese for backward compatibility
-SYSTEM_PROMPT = SYSTEM_PROMPT_ZH
-__all__ = [
-    "APP_PACKAGES",
-    "APP_PACKAGES_IOS",
-    "SYSTEM_PROMPT",
-    "SYSTEM_PROMPT_ZH",
-    "SYSTEM_PROMPT_EN",
-    "get_system_prompt",
-    "get_messages",
-    "get_message",
-    "TIMING_CONFIG",
-    "TimingConfig",
-    "ActionTimingConfig",
-    "DeviceTimingConfig",
-    "ConnectionTimingConfig",
-    "get_timing_config",
-    "update_timing_config",
-]

autoglm-gui 1.5.0__py3-none-any.whl → 1.5.1__py3-none-any.whl

autoglm-gui 1.5.0py3-none-any.whl → 1.5.1py3-none-any.whl