autoglm-gui 1.3.1__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- AutoGLM_GUI/__main__.py +0 -4
- AutoGLM_GUI/adb_plus/qr_pair.py +8 -8
- AutoGLM_GUI/agents/__init__.py +20 -0
- AutoGLM_GUI/agents/factory.py +160 -0
- AutoGLM_GUI/agents/mai_adapter.py +627 -0
- AutoGLM_GUI/agents/protocols.py +23 -0
- AutoGLM_GUI/api/__init__.py +50 -7
- AutoGLM_GUI/api/agents.py +61 -19
- AutoGLM_GUI/api/devices.py +12 -18
- AutoGLM_GUI/api/dual_model.py +24 -17
- AutoGLM_GUI/api/health.py +13 -0
- AutoGLM_GUI/api/layered_agent.py +659 -0
- AutoGLM_GUI/api/mcp.py +11 -10
- AutoGLM_GUI/api/version.py +23 -10
- AutoGLM_GUI/api/workflows.py +2 -1
- AutoGLM_GUI/config_manager.py +56 -24
- AutoGLM_GUI/device_adapter.py +263 -0
- AutoGLM_GUI/device_protocol.py +266 -0
- AutoGLM_GUI/devices/__init__.py +49 -0
- AutoGLM_GUI/devices/adb_device.py +205 -0
- AutoGLM_GUI/devices/mock_device.py +183 -0
- AutoGLM_GUI/devices/remote_device.py +172 -0
- AutoGLM_GUI/dual_model/decision_model.py +4 -4
- AutoGLM_GUI/dual_model/protocols.py +3 -3
- AutoGLM_GUI/exceptions.py +3 -3
- AutoGLM_GUI/mai_ui_adapter/agent_wrapper.py +291 -0
- AutoGLM_GUI/metrics.py +13 -20
- AutoGLM_GUI/phone_agent_manager.py +219 -134
- AutoGLM_GUI/phone_agent_patches.py +2 -1
- AutoGLM_GUI/platform_utils.py +5 -2
- AutoGLM_GUI/prompts.py +6 -1
- AutoGLM_GUI/schemas.py +45 -14
- AutoGLM_GUI/scrcpy_stream.py +17 -13
- AutoGLM_GUI/server.py +3 -1
- AutoGLM_GUI/socketio_server.py +16 -4
- AutoGLM_GUI/state.py +10 -30
- AutoGLM_GUI/static/assets/{about-Cj6QXqMf.js → about-_XNhzQZX.js} +1 -1
- AutoGLM_GUI/static/assets/chat-DwJpiAWf.js +126 -0
- AutoGLM_GUI/static/assets/{dialog-CxJlnjzH.js → dialog-B3uW4T8V.js} +3 -3
- AutoGLM_GUI/static/assets/index-Cpv2gSF1.css +1 -0
- AutoGLM_GUI/static/assets/{index-C_B-Arvf.js → index-Cy8TmmHV.js} +1 -1
- AutoGLM_GUI/static/assets/{index-CxJQuE4y.js → index-UYYauTly.js} +6 -6
- AutoGLM_GUI/static/assets/{workflows-BTiGCNI0.js → workflows-Du_de-dt.js} +1 -1
- AutoGLM_GUI/static/index.html +2 -2
- AutoGLM_GUI/types.py +125 -0
- {autoglm_gui-1.3.1.dist-info → autoglm_gui-1.4.1.dist-info}/METADATA +147 -65
- {autoglm_gui-1.3.1.dist-info → autoglm_gui-1.4.1.dist-info}/RECORD +58 -39
- mai_agent/base.py +137 -0
- mai_agent/mai_grounding_agent.py +263 -0
- mai_agent/mai_naivigation_agent.py +526 -0
- mai_agent/prompt.py +148 -0
- mai_agent/unified_memory.py +67 -0
- mai_agent/utils.py +73 -0
- phone_agent/config/prompts.py +6 -1
- phone_agent/config/prompts_zh.py +6 -1
- AutoGLM_GUI/config.py +0 -23
- AutoGLM_GUI/static/assets/chat-BJeomZgh.js +0 -124
- AutoGLM_GUI/static/assets/index-Z0uYCPOO.css +0 -1
- {autoglm_gui-1.3.1.dist-info → autoglm_gui-1.4.1.dist-info}/WHEEL +0 -0
- {autoglm_gui-1.3.1.dist-info → autoglm_gui-1.4.1.dist-info}/entry_points.txt +0 -0
- {autoglm_gui-1.3.1.dist-info → autoglm_gui-1.4.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
"""Remote Device implementation using HTTP.
|
|
2
|
+
|
|
3
|
+
This module provides a RemoteDevice that connects to a Device Agent
|
|
4
|
+
via HTTP, allowing remote control of devices.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
|
|
9
|
+
from AutoGLM_GUI.device_protocol import DeviceInfo, Screenshot
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class RemoteDevice:
|
|
13
|
+
"""
|
|
14
|
+
Remote device implementation using HTTP.
|
|
15
|
+
|
|
16
|
+
Connects to a Device Agent server that handles actual device operations.
|
|
17
|
+
The server decides the implementation (ADB, Accessibility, Mock, etc.).
|
|
18
|
+
|
|
19
|
+
Example:
|
|
20
|
+
>>> device = RemoteDevice("phone_001", "http://localhost:8001")
|
|
21
|
+
>>> screenshot = device.get_screenshot()
|
|
22
|
+
>>> device.tap(100, 200)
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, device_id: str, base_url: str, timeout: float = 30.0):
|
|
26
|
+
self._device_id = device_id
|
|
27
|
+
self._base_url = base_url.rstrip("/")
|
|
28
|
+
self._client = httpx.Client(timeout=timeout)
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def device_id(self) -> str:
|
|
32
|
+
return self._device_id
|
|
33
|
+
|
|
34
|
+
def _post(self, endpoint: str, json: dict | None = None) -> dict:
|
|
35
|
+
"""POST request helper."""
|
|
36
|
+
url = f"{self._base_url}/device/{self._device_id}{endpoint}"
|
|
37
|
+
resp = self._client.post(url, json=json or {})
|
|
38
|
+
resp.raise_for_status()
|
|
39
|
+
return resp.json()
|
|
40
|
+
|
|
41
|
+
def _get(self, endpoint: str) -> dict:
|
|
42
|
+
"""GET request helper."""
|
|
43
|
+
url = f"{self._base_url}/device/{self._device_id}{endpoint}"
|
|
44
|
+
resp = self._client.get(url)
|
|
45
|
+
resp.raise_for_status()
|
|
46
|
+
return resp.json()
|
|
47
|
+
|
|
48
|
+
def get_screenshot(self, timeout: int = 10) -> Screenshot:
|
|
49
|
+
data = self._post("/screenshot", {"timeout": timeout})
|
|
50
|
+
return Screenshot(
|
|
51
|
+
base64_data=data["base64_data"],
|
|
52
|
+
width=data["width"],
|
|
53
|
+
height=data["height"],
|
|
54
|
+
is_sensitive=data.get("is_sensitive", False),
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
def tap(self, x: int, y: int, delay: float | None = None) -> None:
|
|
58
|
+
self._post("/tap", {"x": x, "y": y, "delay": delay})
|
|
59
|
+
|
|
60
|
+
def double_tap(self, x: int, y: int, delay: float | None = None) -> None:
|
|
61
|
+
self._post("/double_tap", {"x": x, "y": y, "delay": delay})
|
|
62
|
+
|
|
63
|
+
def long_press(
|
|
64
|
+
self, x: int, y: int, duration_ms: int = 3000, delay: float | None = None
|
|
65
|
+
) -> None:
|
|
66
|
+
self._post(
|
|
67
|
+
"/long_press", {"x": x, "y": y, "duration_ms": duration_ms, "delay": delay}
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
def swipe(
|
|
71
|
+
self,
|
|
72
|
+
start_x: int,
|
|
73
|
+
start_y: int,
|
|
74
|
+
end_x: int,
|
|
75
|
+
end_y: int,
|
|
76
|
+
duration_ms: int | None = None,
|
|
77
|
+
delay: float | None = None,
|
|
78
|
+
) -> None:
|
|
79
|
+
self._post(
|
|
80
|
+
"/swipe",
|
|
81
|
+
{
|
|
82
|
+
"start_x": start_x,
|
|
83
|
+
"start_y": start_y,
|
|
84
|
+
"end_x": end_x,
|
|
85
|
+
"end_y": end_y,
|
|
86
|
+
"duration_ms": duration_ms,
|
|
87
|
+
"delay": delay,
|
|
88
|
+
},
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
def type_text(self, text: str) -> None:
|
|
92
|
+
self._post("/type_text", {"text": text})
|
|
93
|
+
|
|
94
|
+
def clear_text(self) -> None:
|
|
95
|
+
self._post("/clear_text")
|
|
96
|
+
|
|
97
|
+
def back(self, delay: float | None = None) -> None:
|
|
98
|
+
self._post("/back", {"delay": delay})
|
|
99
|
+
|
|
100
|
+
def home(self, delay: float | None = None) -> None:
|
|
101
|
+
self._post("/home", {"delay": delay})
|
|
102
|
+
|
|
103
|
+
def launch_app(self, app_name: str, delay: float | None = None) -> bool:
|
|
104
|
+
data = self._post("/launch_app", {"app_name": app_name, "delay": delay})
|
|
105
|
+
return data.get("success", True)
|
|
106
|
+
|
|
107
|
+
def get_current_app(self) -> str:
|
|
108
|
+
data = self._get("/current_app")
|
|
109
|
+
return data["app_name"]
|
|
110
|
+
|
|
111
|
+
def detect_and_set_adb_keyboard(self) -> str:
|
|
112
|
+
data = self._post("/detect_keyboard")
|
|
113
|
+
return data.get("original_ime", "")
|
|
114
|
+
|
|
115
|
+
def restore_keyboard(self, ime: str) -> None:
|
|
116
|
+
self._post("/restore_keyboard", {"ime": ime})
|
|
117
|
+
|
|
118
|
+
def close(self) -> None:
|
|
119
|
+
"""Close the HTTP client."""
|
|
120
|
+
self._client.close()
|
|
121
|
+
|
|
122
|
+
def __enter__(self):
|
|
123
|
+
return self
|
|
124
|
+
|
|
125
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
126
|
+
self.close()
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class RemoteDeviceManager:
|
|
130
|
+
"""
|
|
131
|
+
Remote device manager using HTTP.
|
|
132
|
+
|
|
133
|
+
Manages connections to a Device Agent server.
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
def __init__(self, base_url: str, timeout: float = 30.0):
|
|
137
|
+
self._base_url = base_url.rstrip("/")
|
|
138
|
+
self._timeout = timeout
|
|
139
|
+
self._client = httpx.Client(timeout=timeout)
|
|
140
|
+
self._devices: dict[str, RemoteDevice] = {}
|
|
141
|
+
|
|
142
|
+
def list_devices(self) -> list[DeviceInfo]:
|
|
143
|
+
resp = self._client.get(f"{self._base_url}/devices")
|
|
144
|
+
resp.raise_for_status()
|
|
145
|
+
return [DeviceInfo(**d) for d in resp.json()]
|
|
146
|
+
|
|
147
|
+
def get_device(self, device_id: str) -> RemoteDevice:
|
|
148
|
+
if device_id not in self._devices:
|
|
149
|
+
self._devices[device_id] = RemoteDevice(
|
|
150
|
+
device_id, self._base_url, self._timeout
|
|
151
|
+
)
|
|
152
|
+
return self._devices[device_id]
|
|
153
|
+
|
|
154
|
+
def connect(self, address: str, timeout: int = 10) -> tuple[bool, str]:
|
|
155
|
+
resp = self._client.post(
|
|
156
|
+
f"{self._base_url}/connect", json={"address": address, "timeout": timeout}
|
|
157
|
+
)
|
|
158
|
+
data = resp.json()
|
|
159
|
+
return data.get("success", False), data.get("message", "")
|
|
160
|
+
|
|
161
|
+
def disconnect(self, device_id: str) -> tuple[bool, str]:
|
|
162
|
+
self._devices.pop(device_id, None)
|
|
163
|
+
resp = self._client.post(
|
|
164
|
+
f"{self._base_url}/disconnect", json={"device_id": device_id}
|
|
165
|
+
)
|
|
166
|
+
data = resp.json()
|
|
167
|
+
return data.get("success", True), data.get("message", "Disconnected")
|
|
168
|
+
|
|
169
|
+
def close(self) -> None:
|
|
170
|
+
for device in self._devices.values():
|
|
171
|
+
device.close()
|
|
172
|
+
self._client.close()
|
|
@@ -49,8 +49,8 @@ class ActionStep:
|
|
|
49
49
|
need_generate: bool = False
|
|
50
50
|
direction: Optional[str] = None
|
|
51
51
|
|
|
52
|
-
def to_dict(self) -> dict:
|
|
53
|
-
result = {"action": self.action, "target": self.target}
|
|
52
|
+
def to_dict(self) -> dict[str, str | bool]:
|
|
53
|
+
result: dict[str, str | bool] = {"action": self.action, "target": self.target}
|
|
54
54
|
if self.content:
|
|
55
55
|
result["content"] = self.content
|
|
56
56
|
if self.need_generate:
|
|
@@ -127,7 +127,7 @@ class DecisionModel:
|
|
|
127
127
|
self.client = OpenAI(
|
|
128
128
|
base_url=config.base_url,
|
|
129
129
|
api_key=config.api_key,
|
|
130
|
-
)
|
|
130
|
+
) # type: ignore[call-arg]
|
|
131
131
|
self.model_name = config.model_name
|
|
132
132
|
self.conversation_history: list[dict] = []
|
|
133
133
|
self.current_task: str = ""
|
|
@@ -159,7 +159,7 @@ class DecisionModel:
|
|
|
159
159
|
try:
|
|
160
160
|
response = self.client.chat.completions.create(
|
|
161
161
|
model=self.model_name,
|
|
162
|
-
messages=messages,
|
|
162
|
+
messages=messages, # type: ignore[arg-type]
|
|
163
163
|
max_tokens=self.config.max_tokens,
|
|
164
164
|
temperature=self.config.temperature,
|
|
165
165
|
stream=True,
|
|
@@ -21,9 +21,9 @@ class ThinkingMode(str, Enum):
|
|
|
21
21
|
class DecisionModelConfig(BaseModel):
|
|
22
22
|
"""决策大模型配置"""
|
|
23
23
|
|
|
24
|
-
base_url: str
|
|
24
|
+
base_url: str
|
|
25
25
|
api_key: str = ""
|
|
26
|
-
model_name: str
|
|
26
|
+
model_name: str
|
|
27
27
|
max_tokens: int = 4096
|
|
28
28
|
temperature: float = 0.7
|
|
29
29
|
thinking_mode: ThinkingMode = ThinkingMode.DEEP
|
|
@@ -33,7 +33,7 @@ class DualModelConfig(BaseModel):
|
|
|
33
33
|
"""双模型协作配置"""
|
|
34
34
|
|
|
35
35
|
enabled: bool = False
|
|
36
|
-
decision_model: DecisionModelConfig =
|
|
36
|
+
decision_model: Optional[DecisionModelConfig] = None
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
class ModelRole(str, Enum):
|
AutoGLM_GUI/exceptions.py
CHANGED
|
@@ -79,9 +79,9 @@ class AgentInitializationError(Exception):
|
|
|
79
79
|
How to fix:
|
|
80
80
|
1. Check configuration:
|
|
81
81
|
>>> from AutoGLM_GUI.config_manager import config_manager
|
|
82
|
-
>>>
|
|
83
|
-
>>> print(f"base_url: {
|
|
84
|
-
>>> print(f"model_name: {
|
|
82
|
+
>>> effective_config = config_manager.get_effective_config()
|
|
83
|
+
>>> print(f"base_url: {effective_config.base_url}")
|
|
84
|
+
>>> print(f"model_name: {effective_config.model_name}")
|
|
85
85
|
|
|
86
86
|
2. Set configuration:
|
|
87
87
|
>>> via API: POST /api/config {"base_url": "...", "model_name": "...", "api_key": "..."}
|
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
"""MAI-UI PhoneAgent wrapper for compatibility with AutoGLM-GUI interface."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Any, Callable, Optional
|
|
5
|
+
|
|
6
|
+
from phone_agent.agent import AgentConfig, StepResult
|
|
7
|
+
from phone_agent.actions.handler import ActionHandler
|
|
8
|
+
from phone_agent.model import ModelConfig
|
|
9
|
+
|
|
10
|
+
from AutoGLM_GUI.logger import logger
|
|
11
|
+
from AutoGLM_GUI.mai_ui.mai_navigation_agent import MAIUINaivigationAgent # type: ignore[import-not-found]
|
|
12
|
+
from AutoGLM_GUI.mai_ui_adapter.action_adapter import MAIUIActionAdapter # type: ignore[import-not-found]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class MAIUIConfig:
|
|
17
|
+
"""MAI-UI specific configuration."""
|
|
18
|
+
|
|
19
|
+
history_n: int = 3
|
|
20
|
+
temperature: float = 0.0
|
|
21
|
+
top_k: int = -1
|
|
22
|
+
top_p: float = 1.0
|
|
23
|
+
max_tokens: int = 2048
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class MAIUIPhoneAgent:
|
|
27
|
+
"""
|
|
28
|
+
MAI-UI Agent wrapper that implements the PhoneAgent interface.
|
|
29
|
+
|
|
30
|
+
This wrapper allows MAI-UI agents to be used transparently in place of
|
|
31
|
+
the standard PhoneAgent, providing compatibility with the existing
|
|
32
|
+
PhoneAgentManager and API infrastructure.
|
|
33
|
+
|
|
34
|
+
Usage:
|
|
35
|
+
agent = MAIUIPhoneAgent(
|
|
36
|
+
model_config=model_config,
|
|
37
|
+
agent_config=agent_config,
|
|
38
|
+
)
|
|
39
|
+
result = agent.run("Open WeChat")
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
model_config: ModelConfig,
|
|
45
|
+
agent_config: AgentConfig,
|
|
46
|
+
mai_config: Optional[MAIUIConfig] = None,
|
|
47
|
+
takeover_callback: Optional[Callable[[str], None]] = None,
|
|
48
|
+
):
|
|
49
|
+
"""
|
|
50
|
+
Initialize MAI-UI PhoneAgent wrapper.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
model_config: Model configuration (base_url, api_key, model_name).
|
|
54
|
+
agent_config: Agent configuration (device_id, max_steps, etc.).
|
|
55
|
+
mai_config: MAI-UI specific configuration.
|
|
56
|
+
takeover_callback: Callback for takeover requests.
|
|
57
|
+
"""
|
|
58
|
+
self.model_config = model_config
|
|
59
|
+
self.agent_config = agent_config
|
|
60
|
+
self.mai_config = mai_config or MAIUIConfig()
|
|
61
|
+
|
|
62
|
+
# Create MAI-UI navigation agent
|
|
63
|
+
self._mai_agent = MAIUINaivigationAgent(
|
|
64
|
+
llm_base_url=model_config.base_url,
|
|
65
|
+
model_name=model_config.model_name,
|
|
66
|
+
api_key=model_config.api_key,
|
|
67
|
+
runtime_conf={
|
|
68
|
+
"history_n": self.mai_config.history_n,
|
|
69
|
+
"temperature": self.mai_config.temperature,
|
|
70
|
+
"top_k": self.mai_config.top_k,
|
|
71
|
+
"top_p": self.mai_config.top_p,
|
|
72
|
+
"max_tokens": self.mai_config.max_tokens,
|
|
73
|
+
},
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Action adapter and handler
|
|
77
|
+
self._action_adapter = MAIUIActionAdapter()
|
|
78
|
+
self.action_handler = ActionHandler(
|
|
79
|
+
device_id=agent_config.device_id,
|
|
80
|
+
takeover_callback=takeover_callback,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# PhoneAgent-compatible state
|
|
84
|
+
self._context: list[dict[str, Any]] = []
|
|
85
|
+
self._step_count = 0
|
|
86
|
+
self._current_task: str = ""
|
|
87
|
+
|
|
88
|
+
# For model_client compatibility (used by streaming patches)
|
|
89
|
+
self.model_client = _DummyModelClient()
|
|
90
|
+
|
|
91
|
+
# Debug: Print model configuration for troubleshooting
|
|
92
|
+
logger.info("=" * 60)
|
|
93
|
+
logger.info("[MAI-UI Agent] Initialization")
|
|
94
|
+
logger.info(f" Device ID: {agent_config.device_id}")
|
|
95
|
+
logger.info(f" Base URL: {model_config.base_url}")
|
|
96
|
+
logger.info(f" Model: {model_config.model_name}")
|
|
97
|
+
logger.info("=" * 60)
|
|
98
|
+
|
|
99
|
+
def run(self, task: str) -> str:
|
|
100
|
+
"""
|
|
101
|
+
Execute a complete task.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
task: Natural language task description.
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
Final message from the agent.
|
|
108
|
+
"""
|
|
109
|
+
self.reset()
|
|
110
|
+
self._current_task = task
|
|
111
|
+
|
|
112
|
+
# First step
|
|
113
|
+
result = self._execute_step(task, is_first=True)
|
|
114
|
+
|
|
115
|
+
if result.finished:
|
|
116
|
+
return result.message or "Task completed"
|
|
117
|
+
|
|
118
|
+
# Continue until finished or max steps reached
|
|
119
|
+
while self._step_count < self.agent_config.max_steps:
|
|
120
|
+
result = self._execute_step(is_first=False)
|
|
121
|
+
|
|
122
|
+
if result.finished:
|
|
123
|
+
return result.message or "Task completed"
|
|
124
|
+
|
|
125
|
+
return "Max steps reached"
|
|
126
|
+
|
|
127
|
+
def step(self, task: Optional[str] = None) -> StepResult:
|
|
128
|
+
"""
|
|
129
|
+
Execute a single step.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
task: Task description (required for first step).
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
StepResult with step details.
|
|
136
|
+
"""
|
|
137
|
+
is_first = len(self._context) == 0
|
|
138
|
+
|
|
139
|
+
if is_first:
|
|
140
|
+
if not task:
|
|
141
|
+
raise ValueError("Task is required for the first step")
|
|
142
|
+
self._current_task = task
|
|
143
|
+
|
|
144
|
+
return self._execute_step(task, is_first)
|
|
145
|
+
|
|
146
|
+
def _execute_step(
|
|
147
|
+
self, user_prompt: Optional[str] = None, is_first: bool = False
|
|
148
|
+
) -> StepResult:
|
|
149
|
+
"""Execute a single step of the agent loop."""
|
|
150
|
+
from phone_agent.device_factory import get_device_factory
|
|
151
|
+
from PIL import Image
|
|
152
|
+
from io import BytesIO
|
|
153
|
+
|
|
154
|
+
self._step_count += 1
|
|
155
|
+
logger.info(f"[MAI-UI] Executing step {self._step_count}")
|
|
156
|
+
|
|
157
|
+
# Get screenshot
|
|
158
|
+
device_factory = get_device_factory()
|
|
159
|
+
screenshot = device_factory.get_screenshot(self.agent_config.device_id)
|
|
160
|
+
|
|
161
|
+
# Convert base64 to PIL Image
|
|
162
|
+
import base64
|
|
163
|
+
|
|
164
|
+
image_bytes = base64.b64decode(screenshot.base64_data)
|
|
165
|
+
pil_image = Image.open(BytesIO(image_bytes))
|
|
166
|
+
|
|
167
|
+
# Build observation
|
|
168
|
+
obs = {
|
|
169
|
+
"screenshot": pil_image,
|
|
170
|
+
"accessibility_tree": None,
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
# Get instruction
|
|
174
|
+
instruction = user_prompt or self._current_task
|
|
175
|
+
|
|
176
|
+
# Call MAI-UI predict
|
|
177
|
+
try:
|
|
178
|
+
raw_response, action_json = self._mai_agent.predict(
|
|
179
|
+
instruction=instruction,
|
|
180
|
+
obs=obs,
|
|
181
|
+
)
|
|
182
|
+
except Exception as e:
|
|
183
|
+
logger.error(f"[MAI-UI] Predict failed: {e}")
|
|
184
|
+
return StepResult(
|
|
185
|
+
success=False,
|
|
186
|
+
finished=True,
|
|
187
|
+
action=None,
|
|
188
|
+
thinking="",
|
|
189
|
+
message=f"Prediction failed: {e}",
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
# Check for error
|
|
193
|
+
if action_json.get("action") is None:
|
|
194
|
+
logger.error("[MAI-UI] Invalid action returned")
|
|
195
|
+
return StepResult(
|
|
196
|
+
success=False,
|
|
197
|
+
finished=True,
|
|
198
|
+
action=None,
|
|
199
|
+
thinking="",
|
|
200
|
+
message="Invalid action from model",
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
# Get thinking from trajectory
|
|
204
|
+
thinking = ""
|
|
205
|
+
if self._mai_agent.traj_memory.steps:
|
|
206
|
+
last_step = self._mai_agent.traj_memory.steps[-1]
|
|
207
|
+
thinking = last_step.thought or ""
|
|
208
|
+
|
|
209
|
+
# Convert action to AutoGLM-GUI format
|
|
210
|
+
converted_action = self._action_adapter.convert(action_json)
|
|
211
|
+
logger.debug(f"[MAI-UI] Converted action: {converted_action}")
|
|
212
|
+
|
|
213
|
+
# Check if finished (terminate action)
|
|
214
|
+
if converted_action.get("_metadata") == "finish":
|
|
215
|
+
return StepResult(
|
|
216
|
+
success=True,
|
|
217
|
+
finished=True,
|
|
218
|
+
action=converted_action,
|
|
219
|
+
thinking=thinking,
|
|
220
|
+
message=converted_action.get("message", "Task completed"),
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
# Execute action
|
|
224
|
+
try:
|
|
225
|
+
result = self.action_handler.execute(
|
|
226
|
+
converted_action,
|
|
227
|
+
screenshot.width,
|
|
228
|
+
screenshot.height,
|
|
229
|
+
)
|
|
230
|
+
except Exception as e:
|
|
231
|
+
logger.error(f"[MAI-UI] Action execution failed: {e}")
|
|
232
|
+
return StepResult(
|
|
233
|
+
success=False,
|
|
234
|
+
finished=False,
|
|
235
|
+
action=converted_action,
|
|
236
|
+
thinking=thinking,
|
|
237
|
+
message=f"Action failed: {e}",
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
# Update context for compatibility
|
|
241
|
+
self._context.append(
|
|
242
|
+
{
|
|
243
|
+
"step": self._step_count,
|
|
244
|
+
"action": action_json,
|
|
245
|
+
"converted_action": converted_action,
|
|
246
|
+
"result": result.success,
|
|
247
|
+
"thinking": thinking,
|
|
248
|
+
}
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
return StepResult(
|
|
252
|
+
success=result.success,
|
|
253
|
+
finished=result.should_finish,
|
|
254
|
+
action=converted_action,
|
|
255
|
+
thinking=thinking,
|
|
256
|
+
message=result.message,
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
def reset(self) -> None:
|
|
260
|
+
"""Reset agent state for a new task."""
|
|
261
|
+
self._context = []
|
|
262
|
+
self._step_count = 0
|
|
263
|
+
self._current_task = ""
|
|
264
|
+
self._mai_agent.reset()
|
|
265
|
+
logger.debug("[MAI-UI] Agent reset")
|
|
266
|
+
|
|
267
|
+
@property
|
|
268
|
+
def step_count(self) -> int:
|
|
269
|
+
"""Get current step count."""
|
|
270
|
+
return self._step_count
|
|
271
|
+
|
|
272
|
+
@property
|
|
273
|
+
def context(self) -> list[dict[str, Any]]:
|
|
274
|
+
"""Get conversation context (for compatibility)."""
|
|
275
|
+
return self._context.copy()
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
class _DummyModelClient:
|
|
279
|
+
"""
|
|
280
|
+
Dummy model client for compatibility with streaming patches.
|
|
281
|
+
|
|
282
|
+
The actual model calls are handled by MAI-UI agent internally.
|
|
283
|
+
This exists to satisfy code that expects model_client attribute.
|
|
284
|
+
"""
|
|
285
|
+
|
|
286
|
+
def request(self, messages: list, **kwargs) -> Any:
|
|
287
|
+
"""Dummy request method - should not be called directly."""
|
|
288
|
+
raise NotImplementedError(
|
|
289
|
+
"MAIUIPhoneAgent handles model calls internally. "
|
|
290
|
+
"Do not call model_client.request() directly."
|
|
291
|
+
)
|
AutoGLM_GUI/metrics.py
CHANGED
|
@@ -87,14 +87,12 @@ class AutoGLMMetricsCollector(Collector):
|
|
|
87
87
|
busy_count = 0
|
|
88
88
|
|
|
89
89
|
with manager._manager_lock:
|
|
90
|
-
# Get
|
|
90
|
+
# Get snapshot (shallow copy to minimize lock time)
|
|
91
91
|
metadata_snapshot = dict(manager._metadata)
|
|
92
|
-
states_snapshot = dict(manager._states)
|
|
93
92
|
|
|
94
|
-
# Iterate over
|
|
95
|
-
for device_id,
|
|
96
|
-
|
|
97
|
-
metadata = metadata_snapshot.get(device_id)
|
|
93
|
+
# Iterate over _metadata (state is stored in AgentMetadata.state)
|
|
94
|
+
for device_id, metadata in metadata_snapshot.items():
|
|
95
|
+
state = metadata.state
|
|
98
96
|
|
|
99
97
|
# Get serial from DeviceManager
|
|
100
98
|
with device_manager._devices_lock:
|
|
@@ -113,20 +111,15 @@ class AutoGLMMetricsCollector(Collector):
|
|
|
113
111
|
if state == AgentState.BUSY:
|
|
114
112
|
busy_count += 1
|
|
115
113
|
|
|
116
|
-
# Timestamps
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
)
|
|
126
|
-
else:
|
|
127
|
-
# Failed initialization: report 0 timestamps
|
|
128
|
-
last_used_gauge.add_metric([device_id, serial], 0)
|
|
129
|
-
created_gauge.add_metric([device_id, serial], 0)
|
|
114
|
+
# Timestamps from metadata
|
|
115
|
+
last_used_gauge.add_metric(
|
|
116
|
+
[device_id, serial],
|
|
117
|
+
metadata.last_used,
|
|
118
|
+
)
|
|
119
|
+
created_gauge.add_metric(
|
|
120
|
+
[device_id, serial],
|
|
121
|
+
metadata.created_at,
|
|
122
|
+
)
|
|
130
123
|
|
|
131
124
|
metrics.extend([agents_gauge, last_used_gauge, created_gauge])
|
|
132
125
|
|