PyPI - autoglm-gui - Versions diffs - 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

autoglm-gui 1.4.0py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (120) hide show

AutoGLM_GUI/__init__.py +11 -0
AutoGLM_GUI/__main__.py +26 -8
AutoGLM_GUI/actions/__init__.py +6 -0
AutoGLM_GUI/actions/handler.py +196 -0
AutoGLM_GUI/actions/types.py +15 -0
AutoGLM_GUI/adb/__init__.py +53 -0
AutoGLM_GUI/adb/apps.py +227 -0
AutoGLM_GUI/adb/connection.py +323 -0
AutoGLM_GUI/adb/device.py +171 -0
AutoGLM_GUI/adb/input.py +67 -0
AutoGLM_GUI/adb/screenshot.py +11 -0
AutoGLM_GUI/adb/timing.py +167 -0
AutoGLM_GUI/adb_plus/keyboard_installer.py +4 -2
AutoGLM_GUI/adb_plus/qr_pair.py +8 -8
AutoGLM_GUI/adb_plus/screenshot.py +22 -1
AutoGLM_GUI/adb_plus/serial.py +38 -20
AutoGLM_GUI/adb_plus/touch.py +4 -9
AutoGLM_GUI/agents/__init__.py +51 -0
AutoGLM_GUI/agents/events.py +19 -0
AutoGLM_GUI/agents/factory.py +153 -0
AutoGLM_GUI/agents/glm/__init__.py +7 -0
AutoGLM_GUI/agents/glm/agent.py +292 -0
AutoGLM_GUI/agents/glm/message_builder.py +81 -0
AutoGLM_GUI/agents/glm/parser.py +110 -0
AutoGLM_GUI/agents/glm/prompts_en.py +77 -0
AutoGLM_GUI/agents/glm/prompts_zh.py +75 -0
AutoGLM_GUI/agents/mai/__init__.py +28 -0
AutoGLM_GUI/agents/mai/agent.py +405 -0
AutoGLM_GUI/agents/mai/parser.py +254 -0
AutoGLM_GUI/agents/mai/prompts.py +103 -0
AutoGLM_GUI/agents/mai/traj_memory.py +91 -0
AutoGLM_GUI/agents/protocols.py +27 -0
AutoGLM_GUI/agents/stream_runner.py +188 -0
AutoGLM_GUI/api/__init__.py +71 -11
AutoGLM_GUI/api/agents.py +190 -229
AutoGLM_GUI/api/control.py +9 -6
AutoGLM_GUI/api/devices.py +112 -28
AutoGLM_GUI/api/health.py +13 -0
AutoGLM_GUI/api/history.py +78 -0
AutoGLM_GUI/api/layered_agent.py +306 -181
AutoGLM_GUI/api/mcp.py +11 -10
AutoGLM_GUI/api/media.py +64 -1
AutoGLM_GUI/api/scheduled_tasks.py +98 -0
AutoGLM_GUI/api/version.py +23 -10
AutoGLM_GUI/api/workflows.py +2 -1
AutoGLM_GUI/config.py +72 -14
AutoGLM_GUI/config_manager.py +98 -27
AutoGLM_GUI/device_adapter.py +263 -0
AutoGLM_GUI/device_manager.py +248 -29
AutoGLM_GUI/device_protocol.py +266 -0
AutoGLM_GUI/devices/__init__.py +49 -0
AutoGLM_GUI/devices/adb_device.py +200 -0
AutoGLM_GUI/devices/mock_device.py +185 -0
AutoGLM_GUI/devices/remote_device.py +177 -0
AutoGLM_GUI/exceptions.py +3 -3
AutoGLM_GUI/history_manager.py +164 -0
AutoGLM_GUI/i18n.py +81 -0
AutoGLM_GUI/metrics.py +13 -20
AutoGLM_GUI/model/__init__.py +5 -0
AutoGLM_GUI/model/message_builder.py +69 -0
AutoGLM_GUI/model/types.py +24 -0
AutoGLM_GUI/models/__init__.py +10 -0
AutoGLM_GUI/models/history.py +96 -0
AutoGLM_GUI/models/scheduled_task.py +71 -0
AutoGLM_GUI/parsers/__init__.py +22 -0
AutoGLM_GUI/parsers/base.py +50 -0
AutoGLM_GUI/parsers/phone_parser.py +58 -0
AutoGLM_GUI/phone_agent_manager.py +118 -367
AutoGLM_GUI/platform_utils.py +31 -2
AutoGLM_GUI/prompt_config.py +15 -0
AutoGLM_GUI/prompts/__init__.py +32 -0
AutoGLM_GUI/scheduler_manager.py +304 -0
AutoGLM_GUI/schemas.py +272 -63
AutoGLM_GUI/scrcpy_stream.py +159 -37
AutoGLM_GUI/server.py +3 -1
AutoGLM_GUI/socketio_server.py +114 -29
AutoGLM_GUI/state.py +10 -30
AutoGLM_GUI/static/assets/{about-DeclntHg.js → about-BQm96DAl.js} +1 -1
AutoGLM_GUI/static/assets/alert-dialog-B42XxGPR.js +1 -0
AutoGLM_GUI/static/assets/chat-C0L2gQYG.js +129 -0
AutoGLM_GUI/static/assets/circle-alert-D4rSJh37.js +1 -0
AutoGLM_GUI/static/assets/dialog-DZ78cEcj.js +45 -0
AutoGLM_GUI/static/assets/history-DFBv7TGc.js +1 -0
AutoGLM_GUI/static/assets/index-Bzyv2yQ2.css +1 -0
AutoGLM_GUI/static/assets/{index-zQ4KKDHt.js → index-CmZSnDqc.js} +1 -1
AutoGLM_GUI/static/assets/index-CssG-3TH.js +11 -0
AutoGLM_GUI/static/assets/label-BCUzE_nm.js +1 -0
AutoGLM_GUI/static/assets/logs-eoFxn5of.js +1 -0
AutoGLM_GUI/static/assets/popover-DLsuV5Sx.js +1 -0
AutoGLM_GUI/static/assets/scheduled-tasks-MyqGJvy_.js +1 -0
AutoGLM_GUI/static/assets/square-pen-zGWYrdfj.js +1 -0
AutoGLM_GUI/static/assets/textarea-BX6y7uM5.js +1 -0
AutoGLM_GUI/static/assets/workflows-CYFs6ssC.js +1 -0
AutoGLM_GUI/static/index.html +2 -2
AutoGLM_GUI/types.py +142 -0
{autoglm_gui-1.4.0.dist-info → autoglm_gui-1.5.0.dist-info}/METADATA +178 -92
autoglm_gui-1.5.0.dist-info/RECORD +157 -0
mai_agent/base.py +137 -0
mai_agent/mai_grounding_agent.py +263 -0
mai_agent/mai_naivigation_agent.py +526 -0
mai_agent/prompt.py +148 -0
mai_agent/unified_memory.py +67 -0
mai_agent/utils.py +73 -0
AutoGLM_GUI/api/dual_model.py +0 -311
AutoGLM_GUI/dual_model/__init__.py +0 -53
AutoGLM_GUI/dual_model/decision_model.py +0 -664
AutoGLM_GUI/dual_model/dual_agent.py +0 -917
AutoGLM_GUI/dual_model/protocols.py +0 -354
AutoGLM_GUI/dual_model/vision_model.py +0 -442
AutoGLM_GUI/mai_ui_adapter/agent_wrapper.py +0 -291
AutoGLM_GUI/phone_agent_patches.py +0 -146
AutoGLM_GUI/static/assets/chat-Iut2yhSw.js +0 -125
AutoGLM_GUI/static/assets/dialog-BfdcBs1x.js +0 -45
AutoGLM_GUI/static/assets/index-5hCCwHA7.css +0 -1
AutoGLM_GUI/static/assets/index-DHF1NZh0.js +0 -12
AutoGLM_GUI/static/assets/workflows-xiplap-r.js +0 -1
autoglm_gui-1.4.0.dist-info/RECORD +0 -100
{autoglm_gui-1.4.0.dist-info → autoglm_gui-1.5.0.dist-info}/WHEEL +0 -0
{autoglm_gui-1.4.0.dist-info → autoglm_gui-1.5.0.dist-info}/entry_points.txt +0 -0
{autoglm_gui-1.4.0.dist-info → autoglm_gui-1.5.0.dist-info}/licenses/LICENSE +0 -0

mai_agent/unified_memory.py ADDED Viewed

@@ -0,0 +1,67 @@
+# Copyright (c) 2025, Alibaba Cloud and its affiliates;
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Unified memory structures for trajectory tracking."""
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+from PIL import Image
+@dataclass
+class TrajStep:
+    """
+    Represents a single step in an agent's trajectory.
+    Attributes:
+        screenshot: PIL Image of the screen at this step.
+        accessibility_tree: Accessibility tree data for the screen.
+        prediction: Raw model prediction/response.
+        action: Parsed action dictionary.
+        conclusion: Conclusion or summary of the step.
+        thought: Model's reasoning/thinking process.
+        step_index: Index of this step in the trajectory.
+        agent_type: Type of agent that produced this step.
+        model_name: Name of the model used.
+        screenshot_bytes: Original screenshot as bytes (for compatibility).
+        structured_action: Structured action with metadata.
+    """
+    screenshot: Image.Image
+    accessibility_tree: Optional[Dict[str, Any]]
+    prediction: str
+    action: Dict[str, Any]
+    conclusion: str
+    thought: str
+    step_index: int
+    agent_type: str
+    model_name: str
+    screenshot_bytes: Optional[bytes] = None
+    structured_action: Optional[Dict[str, Any]] = None
+@dataclass
+class TrajMemory:
+    """
+    Container for a complete trajectory of agent steps.
+    Attributes:
+        task_goal: The goal/instruction for this trajectory.
+        task_id: Unique identifier for the task.
+        steps: List of trajectory steps.
+    """
+    task_goal: str
+    task_id: str
+    steps: List[TrajStep] = field(default_factory=list)

mai_agent/utils.py ADDED Viewed

@@ -0,0 +1,73 @@
+# Copyright (c) 2025, Alibaba Cloud and its affiliates;
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utility functions for image processing and conversion."""
+import base64
+from io import BytesIO
+from typing import Union, Optional, Tuple, Dict, Any
+from PIL import Image
+from PIL import ImageDraw
+def safe_pil_to_bytes(image: Union[Image.Image, bytes]) -> bytes:
+    if isinstance(image, Image.Image):
+        img_byte_arr = BytesIO()
+        image.save(img_byte_arr, format="PNG")
+        return img_byte_arr.getvalue()
+    elif isinstance(image, bytes):
+        return image
+    else:
+        raise TypeError(f"Expected PIL Image or bytes, got {type(image)}")
+def pil_to_base64(image: Image.Image) -> str:
+    buffer = BytesIO()
+    image.save(buffer, format="PNG")
+    return base64.b64encode(buffer.getvalue()).decode("utf-8")
+def save_screenshot(screenshot: Image.Image, path: str) -> None:
+    screenshot.save(path)
+    print(f"Screenshot saved in {path}")
+def extract_click_coordinates(action: Dict[str, Any]) -> Tuple[float, float]:
+    x = action.get("coordinate")[0]
+    y = action.get("coordinate")[1]
+    action_corr = (x, y)
+    return action_corr
+# Function to draw points on an image
+def draw_clicks_on_image(
+    image_path: str,
+    click_coords: Tuple[float, float],
+    output_path: Optional[str] = None,
+) -> Image.Image:
+    image = Image.open(image_path)
+    draw = ImageDraw.Draw(image)
+    # Draw each click coordinate as a red circle
+    (x, y) = click_coords
+    radius = 20
+    if x and y:  # if get the coordinate, draw a circle
+        draw.ellipse(
+            (x - radius, y - radius, x + radius, y + radius), fill="red", outline="red"
+        )
+    # Save the modified image
+    if output_path:
+        save_screenshot(image, output_path)
+    return image

AutoGLM_GUI/api/dual_model.py DELETED Viewed

@@ -1,311 +0,0 @@
-"""双模型协作API端点"""
-import threading
-from typing import Any, Optional
-from fastapi import APIRouter, HTTPException
-from fastapi.responses import StreamingResponse
-from pydantic import BaseModel
-from AutoGLM_GUI.logger import logger
-from AutoGLM_GUI.dual_model import (
-    DecisionModelConfig,
-    DualModelAgent,
-    DualModelEvent,
-    DualModelEventType,
-)
-from AutoGLM_GUI.dual_model.protocols import ThinkingMode
-from phone_agent.model import ModelConfig
-router = APIRouter(prefix="/api/dual", tags=["dual-model"])
-# 活跃的双模型会话 (device_id -> (agent, stop_event))
-_active_dual_sessions: dict[str, tuple[DualModelAgent, threading.Event]] = {}
-_active_dual_sessions_lock = threading.Lock()
-class DualModelInitRequest(BaseModel):
-    """双模型初始化请求"""
-    device_id: str
-    # 决策大模型配置
-    decision_base_url: str
-    decision_api_key: str
-    decision_model_name: str
-    # 视觉小模型配置(复用现有配置)
-    vision_base_url: Optional[str] = None
-    vision_api_key: Optional[str] = None
-    vision_model_name: Optional[str] = None
-    max_steps: int = 50
-    thinking_mode: str = "deep"  # fast, deep, turbo
-class DualModelChatRequest(BaseModel):
-    """双模型聊天请求"""
-    device_id: str
-    message: str
-class DualModelAbortRequest(BaseModel):
-    """中止请求"""
-    device_id: str
-class DualModelStatusResponse(BaseModel):
-    """状态响应"""
-    active: bool
-    device_id: Optional[str] = None
-    state: Optional[dict] = None
-@router.post("/init")
-def init_dual_model(request: DualModelInitRequest) -> dict:
-    """初始化双模型Agent"""
-    from AutoGLM_GUI.config import config
-    from AutoGLM_GUI.phone_agent_manager import PhoneAgentManager
-    device_id = request.device_id
-    thinking_mode_map = {
-        "fast": ThinkingMode.FAST,
-        "deep": ThinkingMode.DEEP,
-        "turbo": ThinkingMode.TURBO,
-    }
-    thinking_mode = thinking_mode_map.get(request.thinking_mode, ThinkingMode.DEEP)
-    logger.info(f"初始化双模型Agent: {device_id}, 模式: {thinking_mode.value}")
-    # 检查设备是否已有单模型Agent初始化
-    manager = PhoneAgentManager.get_instance()
-    if not manager.is_initialized(device_id):
-        raise HTTPException(
-            status_code=400, detail="设备尚未初始化单模型Agent，请先调用 /api/init"
-        )
-    # 获取视觉模型配置
-    vision_base_url = request.vision_base_url or config.base_url
-    vision_api_key = request.vision_api_key or config.api_key
-    vision_model_name = request.vision_model_name or config.model_name
-    if not vision_base_url:
-        raise HTTPException(status_code=400, detail="视觉模型base_url未配置")
-    # 创建配置
-    decision_config = DecisionModelConfig(
-        base_url=request.decision_base_url,
-        api_key=request.decision_api_key,
-        model_name=request.decision_model_name,
-        thinking_mode=thinking_mode,
-    )
-    vision_config = ModelConfig(
-        base_url=vision_base_url,
-        api_key=vision_api_key,
-        model_name=vision_model_name,
-    )
-    # 创建双模型Agent
-    try:
-        agent = DualModelAgent(
-            decision_config=decision_config,
-            vision_config=vision_config,
-            device_id=device_id,
-            max_steps=request.max_steps,
-            thinking_mode=thinking_mode,
-        )
-        # 存储到活跃会话
-        with _active_dual_sessions_lock:
-            # 清理旧会话
-            if device_id in _active_dual_sessions:
-                old_agent, old_event = _active_dual_sessions[device_id]
-                old_event.set()
-            _active_dual_sessions[device_id] = (agent, threading.Event())
-        logger.info(f"双模型Agent初始化成功: {device_id}")
-        return {
-            "success": True,
-            "device_id": device_id,
-            "message": "双模型Agent初始化成功",
-            "decision_model": request.decision_model_name,
-            "vision_model": vision_model_name,
-            "thinking_mode": thinking_mode.value,
-        }
-    except Exception as e:
-        logger.error(f"双模型Agent初始化失败: {e}")
-        raise HTTPException(status_code=500, detail=str(e))
-@router.post("/chat/stream")
-def dual_model_chat_stream(request: DualModelChatRequest):
-    """双模型聊天(SSE流式)"""
-    device_id = request.device_id
-    with _active_dual_sessions_lock:
-        if device_id not in _active_dual_sessions:
-            raise HTTPException(
-                status_code=400, detail="双模型Agent未初始化，请先调用 /api/dual/init"
-            )
-        agent, stop_event = _active_dual_sessions[device_id]
-    # 重置停止事件
-    stop_event.clear()
-    def event_generator():
-        """SSE事件生成器"""
-        try:
-            logger.info(f"开始双模型任务: {request.message[:50]}...")
-            # 在后台线程运行Agent
-            result_holder: list[Any] = [None]
-            error_holder: list[Any] = [None]
-            def run_agent():
-                try:
-                    result = agent.run(request.message)
-                    result_holder[0] = result
-                except Exception as e:
-                    error_holder[0] = e
-            thread = threading.Thread(target=run_agent, daemon=True)
-            thread.start()
-            # 持续发送事件
-            while thread.is_alive() or not agent.event_queue.empty():
-                if stop_event.is_set():
-                    agent.abort()
-                    yield "event: aborted\n"
-                    yield 'data: {"type": "aborted", "message": "任务被用户中断"}\n\n'
-                    break
-                # 获取事件
-                try:
-                    events = agent.get_events(timeout=0.1)
-                    for event in events:
-                        yield event.to_sse()
-                        # 如果是完成或错误事件，结束循环
-                        if event.type in [
-                            DualModelEventType.TASK_COMPLETE,
-                            DualModelEventType.ERROR,
-                        ]:
-                            return
-                except Exception:
-                    continue
-            # 等待线程完成
-            thread.join(timeout=5)
-            # 检查错误
-            if error_holder[0]:
-                error_event = DualModelEvent(
-                    type=DualModelEventType.ERROR,
-                    data={"message": str(error_holder[0])},
-                )
-                yield error_event.to_sse()
-            # 如果没有发送完成事件，发送一个
-            if result_holder[0] and not stop_event.is_set():
-                result = result_holder[0]
-                if isinstance(result, dict):
-                    done_event = DualModelEvent(
-                        type=DualModelEventType.TASK_COMPLETE,
-                        data={
-                            "success": result.get("success", False),
-                            "message": result.get("message", ""),
-                            "steps": result.get("steps", 0),
-                        },
-                    )
-                    yield done_event.to_sse()
-        except Exception as e:
-            logger.exception(f"双模型任务异常: {e}")
-            error_event = DualModelEvent(
-                type=DualModelEventType.ERROR,
-                data={"message": str(e)},
-            )
-            yield error_event.to_sse()
-    return StreamingResponse(
-        event_generator(),
-        media_type="text/event-stream",
-        headers={
-            "Cache-Control": "no-cache",
-            "Connection": "keep-alive",
-            "X-Accel-Buffering": "no",
-        },
-    )
-@router.post("/chat/abort")
-def abort_dual_model_chat(request: DualModelAbortRequest) -> dict:
-    """中止双模型聊天"""
-    device_id = request.device_id
-    with _active_dual_sessions_lock:
-        if device_id in _active_dual_sessions:
-            agent, stop_event = _active_dual_sessions[device_id]
-            stop_event.set()
-            agent.abort()
-            logger.info(f"双模型任务已中止: {device_id}")
-            return {"success": True, "message": "已发送中止信号"}
-        else:
-            return {"success": False, "message": "未找到活跃的双模型会话"}
-@router.get("/status")
-def get_dual_model_status(device_id: Optional[str] = None) -> DualModelStatusResponse:
-    """获取双模型状态"""
-    with _active_dual_sessions_lock:
-        if device_id:
-            if device_id in _active_dual_sessions:
-                agent, _ = _active_dual_sessions[device_id]
-                return DualModelStatusResponse(
-                    active=True,
-                    device_id=device_id,
-                    state=agent.get_state(),
-                )
-            else:
-                return DualModelStatusResponse(active=False, device_id=device_id)
-        else:
-            # 返回所有活跃会话
-            return DualModelStatusResponse(
-                active=len(_active_dual_sessions) > 0,
-                state={"active_devices": list(_active_dual_sessions.keys())},
-            )
-@router.post("/reset")
-def reset_dual_model(request: DualModelAbortRequest) -> dict:
-    """重置双模型Agent"""
-    device_id = request.device_id
-    with _active_dual_sessions_lock:
-        if device_id in _active_dual_sessions:
-            agent, stop_event = _active_dual_sessions[device_id]
-            stop_event.set()
-            agent.reset()
-            logger.info(f"双模型Agent已重置: {device_id}")
-            return {"success": True, "message": "双模型Agent已重置"}
-        else:
-            return {"success": False, "message": "未找到双模型会话"}
-@router.delete("/session/{device_id}")
-def delete_dual_model_session(device_id: str) -> dict:
-    """删除双模型会话"""
-    with _active_dual_sessions_lock:
-        if device_id in _active_dual_sessions:
-            agent, stop_event = _active_dual_sessions.pop(device_id)
-            stop_event.set()
-            logger.info(f"双模型会话已删除: {device_id}")
-            return {"success": True, "message": "双模型会话已删除"}
-        else:
-            return {"success": False, "message": "未找到双模型会话"}

AutoGLM_GUI/dual_model/__init__.py DELETED Viewed

@@ -1,53 +0,0 @@
-"""
-双模型协作模块
-大模型(GLM-4.7): 负责任务分析、决策制定、内容生成
-小模型(autoglm-phone): 负责屏幕识别、动作执行
-"""
-from .decision_model import (
-    DecisionModel,
-    Decision,
-    TaskPlan,
-    ActionSequence,
-    ActionStep,
-)
-from .vision_model import VisionModel, ScreenDescription, ExecutionResult
-from .dual_agent import DualModelAgent, DualModelCallbacks
-from .protocols import (
-    DualModelConfig,
-    DecisionModelConfig,
-    DualModelState,
-    DualModelEvent,
-    DualModelEventType,
-    ModelRole,
-    ModelStage,
-    ThinkingMode,
-    DECISION_SYSTEM_PROMPT,
-    DECISION_SYSTEM_PROMPT_TURBO,
-    VISION_DESCRIBE_PROMPT,
-)
-__all__ = [
-    "DecisionModel",
-    "Decision",
-    "TaskPlan",
-    "ActionSequence",
-    "ActionStep",
-    "VisionModel",
-    "ScreenDescription",
-    "ExecutionResult",
-    "DualModelAgent",
-    "DualModelCallbacks",
-    "DualModelConfig",
-    "DecisionModelConfig",
-    "DualModelState",
-    "DualModelEvent",
-    "DualModelEventType",
-    "ModelRole",
-    "ModelStage",
-    "ThinkingMode",
-    "DECISION_SYSTEM_PROMPT",
-    "DECISION_SYSTEM_PROMPT_TURBO",
-    "VISION_DESCRIBE_PROMPT",
-]

autoglm-gui 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

autoglm-gui 1.4.0py3-none-any.whl → 1.5.0py3-none-any.whl