autoglm-gui 0.4.14__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- AutoGLM_GUI/api/devices.py +49 -0
- AutoGLM_GUI/schemas.py +16 -0
- AutoGLM_GUI/static/assets/{about-29B5FDM8.js → about-BOnRPlKQ.js} +1 -1
- AutoGLM_GUI/static/assets/chat-CGW6uMKB.js +149 -0
- AutoGLM_GUI/static/assets/{index-mVNV0VwM.js → index-CRFVU0eu.js} +1 -1
- AutoGLM_GUI/static/assets/{index-wu8Wjf12.js → index-DH-Dl4tK.js} +5 -5
- AutoGLM_GUI/static/assets/index-DzUQ89YC.css +1 -0
- AutoGLM_GUI/static/index.html +2 -2
- {autoglm_gui-0.4.14.dist-info → autoglm_gui-1.0.1.dist-info}/METADATA +3 -3
- autoglm_gui-1.0.1.dist-info/RECORD +73 -0
- phone_agent/__init__.py +3 -2
- phone_agent/actions/handler.py +124 -31
- phone_agent/actions/handler_ios.py +278 -0
- phone_agent/adb/connection.py +14 -5
- phone_agent/adb/device.py +47 -16
- phone_agent/agent.py +8 -8
- phone_agent/agent_ios.py +277 -0
- phone_agent/config/__init__.py +18 -0
- phone_agent/config/apps.py +1 -1
- phone_agent/config/apps_harmonyos.py +256 -0
- phone_agent/config/apps_ios.py +339 -0
- phone_agent/config/i18n.py +8 -0
- phone_agent/config/timing.py +167 -0
- phone_agent/device_factory.py +166 -0
- phone_agent/hdc/__init__.py +53 -0
- phone_agent/hdc/connection.py +384 -0
- phone_agent/hdc/device.py +269 -0
- phone_agent/hdc/input.py +145 -0
- phone_agent/hdc/screenshot.py +127 -0
- phone_agent/model/client.py +104 -4
- phone_agent/xctest/__init__.py +47 -0
- phone_agent/xctest/connection.py +379 -0
- phone_agent/xctest/device.py +472 -0
- phone_agent/xctest/input.py +311 -0
- phone_agent/xctest/screenshot.py +226 -0
- AutoGLM_GUI/static/assets/chat-DTN2oKtA.js +0 -149
- AutoGLM_GUI/static/assets/index-Dy550Qqg.css +0 -1
- autoglm_gui-0.4.14.dist-info/RECORD +0 -57
- {autoglm_gui-0.4.14.dist-info → autoglm_gui-1.0.1.dist-info}/WHEEL +0 -0
- {autoglm_gui-0.4.14.dist-info → autoglm_gui-1.0.1.dist-info}/entry_points.txt +0 -0
- {autoglm_gui-0.4.14.dist-info → autoglm_gui-1.0.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
"""Action handler for iOS automation using WebDriverAgent."""
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any, Callable
|
|
6
|
+
|
|
7
|
+
from phone_agent.xctest import (
|
|
8
|
+
back,
|
|
9
|
+
double_tap,
|
|
10
|
+
home,
|
|
11
|
+
launch_app,
|
|
12
|
+
long_press,
|
|
13
|
+
swipe,
|
|
14
|
+
tap,
|
|
15
|
+
)
|
|
16
|
+
from phone_agent.xctest.input import clear_text, hide_keyboard, type_text
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class ActionResult:
|
|
21
|
+
"""Result of an action execution."""
|
|
22
|
+
|
|
23
|
+
success: bool
|
|
24
|
+
should_finish: bool
|
|
25
|
+
message: str | None = None
|
|
26
|
+
requires_confirmation: bool = False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class IOSActionHandler:
|
|
30
|
+
"""
|
|
31
|
+
Handles execution of actions from AI model output for iOS devices.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
wda_url: WebDriverAgent URL.
|
|
35
|
+
session_id: Optional WDA session ID.
|
|
36
|
+
confirmation_callback: Optional callback for sensitive action confirmation.
|
|
37
|
+
Should return True to proceed, False to cancel.
|
|
38
|
+
takeover_callback: Optional callback for takeover requests (login, captcha).
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
wda_url: str = "http://localhost:8100",
|
|
44
|
+
session_id: str | None = None,
|
|
45
|
+
confirmation_callback: Callable[[str], bool] | None = None,
|
|
46
|
+
takeover_callback: Callable[[str], None] | None = None,
|
|
47
|
+
):
|
|
48
|
+
self.wda_url = wda_url
|
|
49
|
+
self.session_id = session_id
|
|
50
|
+
self.confirmation_callback = confirmation_callback or self._default_confirmation
|
|
51
|
+
self.takeover_callback = takeover_callback or self._default_takeover
|
|
52
|
+
|
|
53
|
+
def execute(
|
|
54
|
+
self, action: dict[str, Any], screen_width: int, screen_height: int
|
|
55
|
+
) -> ActionResult:
|
|
56
|
+
"""
|
|
57
|
+
Execute an action from the AI model.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
action: The action dictionary from the model.
|
|
61
|
+
screen_width: Current screen width in pixels.
|
|
62
|
+
screen_height: Current screen height in pixels.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
ActionResult indicating success and whether to finish.
|
|
66
|
+
"""
|
|
67
|
+
action_type = action.get("_metadata")
|
|
68
|
+
|
|
69
|
+
if action_type == "finish":
|
|
70
|
+
return ActionResult(
|
|
71
|
+
success=True, should_finish=True, message=action.get("message")
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
if action_type != "do":
|
|
75
|
+
return ActionResult(
|
|
76
|
+
success=False,
|
|
77
|
+
should_finish=True,
|
|
78
|
+
message=f"Unknown action type: {action_type}",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
action_name = action.get("action")
|
|
82
|
+
handler_method = self._get_handler(action_name)
|
|
83
|
+
|
|
84
|
+
if handler_method is None:
|
|
85
|
+
return ActionResult(
|
|
86
|
+
success=False,
|
|
87
|
+
should_finish=False,
|
|
88
|
+
message=f"Unknown action: {action_name}",
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
try:
|
|
92
|
+
return handler_method(action, screen_width, screen_height)
|
|
93
|
+
except Exception as e:
|
|
94
|
+
return ActionResult(
|
|
95
|
+
success=False, should_finish=False, message=f"Action failed: {e}"
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
def _get_handler(self, action_name: str) -> Callable | None:
|
|
99
|
+
"""Get the handler method for an action."""
|
|
100
|
+
handlers = {
|
|
101
|
+
"Launch": self._handle_launch,
|
|
102
|
+
"Tap": self._handle_tap,
|
|
103
|
+
"Type": self._handle_type,
|
|
104
|
+
"Type_Name": self._handle_type,
|
|
105
|
+
"Swipe": self._handle_swipe,
|
|
106
|
+
"Back": self._handle_back,
|
|
107
|
+
"Home": self._handle_home,
|
|
108
|
+
"Double Tap": self._handle_double_tap,
|
|
109
|
+
"Long Press": self._handle_long_press,
|
|
110
|
+
"Wait": self._handle_wait,
|
|
111
|
+
"Take_over": self._handle_takeover,
|
|
112
|
+
"Note": self._handle_note,
|
|
113
|
+
"Call_API": self._handle_call_api,
|
|
114
|
+
"Interact": self._handle_interact,
|
|
115
|
+
}
|
|
116
|
+
return handlers.get(action_name)
|
|
117
|
+
|
|
118
|
+
def _convert_relative_to_absolute(
|
|
119
|
+
self, element: list[int], screen_width: int, screen_height: int
|
|
120
|
+
) -> tuple[int, int]:
|
|
121
|
+
"""Convert relative coordinates (0-1000) to absolute pixels."""
|
|
122
|
+
x = int(element[0] / 1000 * screen_width)
|
|
123
|
+
y = int(element[1] / 1000 * screen_height)
|
|
124
|
+
return x, y
|
|
125
|
+
|
|
126
|
+
def _handle_launch(self, action: dict, width: int, height: int) -> ActionResult:
|
|
127
|
+
"""Handle app launch action."""
|
|
128
|
+
app_name = action.get("app")
|
|
129
|
+
if not app_name:
|
|
130
|
+
return ActionResult(False, False, "No app name specified")
|
|
131
|
+
|
|
132
|
+
success = launch_app(app_name, wda_url=self.wda_url, session_id=self.session_id)
|
|
133
|
+
if success:
|
|
134
|
+
return ActionResult(True, False)
|
|
135
|
+
return ActionResult(False, False, f"App not found: {app_name}")
|
|
136
|
+
|
|
137
|
+
def _handle_tap(self, action: dict, width: int, height: int) -> ActionResult:
|
|
138
|
+
"""Handle tap action."""
|
|
139
|
+
element = action.get("element")
|
|
140
|
+
if not element:
|
|
141
|
+
return ActionResult(False, False, "No element coordinates")
|
|
142
|
+
|
|
143
|
+
x, y = self._convert_relative_to_absolute(element, width, height)
|
|
144
|
+
|
|
145
|
+
print(f"Physically tap on ({x}, {y})")
|
|
146
|
+
|
|
147
|
+
# Check for sensitive operation
|
|
148
|
+
if "message" in action:
|
|
149
|
+
if not self.confirmation_callback(action["message"]):
|
|
150
|
+
return ActionResult(
|
|
151
|
+
success=False,
|
|
152
|
+
should_finish=True,
|
|
153
|
+
message="User cancelled sensitive operation",
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
tap(x, y, wda_url=self.wda_url, session_id=self.session_id)
|
|
157
|
+
return ActionResult(True, False)
|
|
158
|
+
|
|
159
|
+
def _handle_type(self, action: dict, width: int, height: int) -> ActionResult:
|
|
160
|
+
"""Handle text input action."""
|
|
161
|
+
text = action.get("text", "")
|
|
162
|
+
|
|
163
|
+
# Clear existing text and type new text
|
|
164
|
+
clear_text(wda_url=self.wda_url, session_id=self.session_id)
|
|
165
|
+
time.sleep(0.5)
|
|
166
|
+
|
|
167
|
+
type_text(text, wda_url=self.wda_url, session_id=self.session_id)
|
|
168
|
+
time.sleep(0.5)
|
|
169
|
+
|
|
170
|
+
# Hide keyboard after typing
|
|
171
|
+
hide_keyboard(wda_url=self.wda_url, session_id=self.session_id)
|
|
172
|
+
time.sleep(0.5)
|
|
173
|
+
|
|
174
|
+
return ActionResult(True, False)
|
|
175
|
+
|
|
176
|
+
def _handle_swipe(self, action: dict, width: int, height: int) -> ActionResult:
|
|
177
|
+
"""Handle swipe action."""
|
|
178
|
+
start = action.get("start")
|
|
179
|
+
end = action.get("end")
|
|
180
|
+
|
|
181
|
+
if not start or not end:
|
|
182
|
+
return ActionResult(False, False, "Missing swipe coordinates")
|
|
183
|
+
|
|
184
|
+
start_x, start_y = self._convert_relative_to_absolute(start, width, height)
|
|
185
|
+
end_x, end_y = self._convert_relative_to_absolute(end, width, height)
|
|
186
|
+
|
|
187
|
+
print(f"Physically scroll from ({start_x}, {start_y}) to ({end_x}, {end_y})")
|
|
188
|
+
|
|
189
|
+
swipe(
|
|
190
|
+
start_x,
|
|
191
|
+
start_y,
|
|
192
|
+
end_x,
|
|
193
|
+
end_y,
|
|
194
|
+
wda_url=self.wda_url,
|
|
195
|
+
session_id=self.session_id,
|
|
196
|
+
)
|
|
197
|
+
return ActionResult(True, False)
|
|
198
|
+
|
|
199
|
+
def _handle_back(self, action: dict, width: int, height: int) -> ActionResult:
|
|
200
|
+
"""Handle back gesture (swipe from left edge)."""
|
|
201
|
+
back(wda_url=self.wda_url, session_id=self.session_id)
|
|
202
|
+
return ActionResult(True, False)
|
|
203
|
+
|
|
204
|
+
def _handle_home(self, action: dict, width: int, height: int) -> ActionResult:
|
|
205
|
+
"""Handle home button action."""
|
|
206
|
+
home(wda_url=self.wda_url, session_id=self.session_id)
|
|
207
|
+
return ActionResult(True, False)
|
|
208
|
+
|
|
209
|
+
def _handle_double_tap(self, action: dict, width: int, height: int) -> ActionResult:
|
|
210
|
+
"""Handle double tap action."""
|
|
211
|
+
element = action.get("element")
|
|
212
|
+
if not element:
|
|
213
|
+
return ActionResult(False, False, "No element coordinates")
|
|
214
|
+
|
|
215
|
+
x, y = self._convert_relative_to_absolute(element, width, height)
|
|
216
|
+
double_tap(x, y, wda_url=self.wda_url, session_id=self.session_id)
|
|
217
|
+
return ActionResult(True, False)
|
|
218
|
+
|
|
219
|
+
def _handle_long_press(self, action: dict, width: int, height: int) -> ActionResult:
|
|
220
|
+
"""Handle long press action."""
|
|
221
|
+
element = action.get("element")
|
|
222
|
+
if not element:
|
|
223
|
+
return ActionResult(False, False, "No element coordinates")
|
|
224
|
+
|
|
225
|
+
x, y = self._convert_relative_to_absolute(element, width, height)
|
|
226
|
+
long_press(
|
|
227
|
+
x,
|
|
228
|
+
y,
|
|
229
|
+
duration=3.0,
|
|
230
|
+
wda_url=self.wda_url,
|
|
231
|
+
session_id=self.session_id,
|
|
232
|
+
)
|
|
233
|
+
return ActionResult(True, False)
|
|
234
|
+
|
|
235
|
+
def _handle_wait(self, action: dict, width: int, height: int) -> ActionResult:
|
|
236
|
+
"""Handle wait action."""
|
|
237
|
+
duration_str = action.get("duration", "1 seconds")
|
|
238
|
+
try:
|
|
239
|
+
duration = float(duration_str.replace("seconds", "").strip())
|
|
240
|
+
except ValueError:
|
|
241
|
+
duration = 1.0
|
|
242
|
+
|
|
243
|
+
time.sleep(duration)
|
|
244
|
+
return ActionResult(True, False)
|
|
245
|
+
|
|
246
|
+
def _handle_takeover(self, action: dict, width: int, height: int) -> ActionResult:
|
|
247
|
+
"""Handle takeover request (login, captcha, etc.)."""
|
|
248
|
+
message = action.get("message", "User intervention required")
|
|
249
|
+
self.takeover_callback(message)
|
|
250
|
+
return ActionResult(True, False)
|
|
251
|
+
|
|
252
|
+
def _handle_note(self, action: dict, width: int, height: int) -> ActionResult:
|
|
253
|
+
"""Handle note action (placeholder for content recording)."""
|
|
254
|
+
# This action is typically used for recording page content
|
|
255
|
+
# Implementation depends on specific requirements
|
|
256
|
+
return ActionResult(True, False)
|
|
257
|
+
|
|
258
|
+
def _handle_call_api(self, action: dict, width: int, height: int) -> ActionResult:
|
|
259
|
+
"""Handle API call action (placeholder for summarization)."""
|
|
260
|
+
# This action is typically used for content summarization
|
|
261
|
+
# Implementation depends on specific requirements
|
|
262
|
+
return ActionResult(True, False)
|
|
263
|
+
|
|
264
|
+
def _handle_interact(self, action: dict, width: int, height: int) -> ActionResult:
|
|
265
|
+
"""Handle interaction request (user choice needed)."""
|
|
266
|
+
# This action signals that user input is needed
|
|
267
|
+
return ActionResult(True, False, message="User interaction required")
|
|
268
|
+
|
|
269
|
+
@staticmethod
|
|
270
|
+
def _default_confirmation(message: str) -> bool:
|
|
271
|
+
"""Default confirmation callback using console input."""
|
|
272
|
+
response = input(f"Sensitive operation: {message}\nConfirm? (Y/N): ")
|
|
273
|
+
return response.upper() == "Y"
|
|
274
|
+
|
|
275
|
+
@staticmethod
|
|
276
|
+
def _default_takeover(message: str) -> None:
|
|
277
|
+
"""Default takeover callback using console input."""
|
|
278
|
+
input(f"{message}\nPress Enter after completing manual operation...")
|
phone_agent/adb/connection.py
CHANGED
|
@@ -5,6 +5,8 @@ import time
|
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
from enum import Enum
|
|
7
7
|
|
|
8
|
+
from phone_agent.config.timing import TIMING_CONFIG
|
|
9
|
+
|
|
8
10
|
|
|
9
11
|
class ConnectionType(Enum):
|
|
10
12
|
"""Type of ADB connection."""
|
|
@@ -106,7 +108,9 @@ class ADBConnection:
|
|
|
106
108
|
if address:
|
|
107
109
|
cmd.append(address)
|
|
108
110
|
|
|
109
|
-
result = subprocess.run(
|
|
111
|
+
result = subprocess.run(
|
|
112
|
+
cmd, capture_output=True, text=True, encoding="utf-8", timeout=5
|
|
113
|
+
)
|
|
110
114
|
|
|
111
115
|
output = result.stdout + result.stderr
|
|
112
116
|
return True, output.strip() or "Disconnected"
|
|
@@ -238,12 +242,14 @@ class ADBConnection:
|
|
|
238
242
|
cmd.extend(["-s", device_id])
|
|
239
243
|
cmd.extend(["tcpip", str(port)])
|
|
240
244
|
|
|
241
|
-
result = subprocess.run(
|
|
245
|
+
result = subprocess.run(
|
|
246
|
+
cmd, capture_output=True, text=True, encoding="utf-8", timeout=10
|
|
247
|
+
)
|
|
242
248
|
|
|
243
249
|
output = result.stdout + result.stderr
|
|
244
250
|
|
|
245
251
|
if "restarting" in output.lower() or result.returncode == 0:
|
|
246
|
-
time.sleep(
|
|
252
|
+
time.sleep(TIMING_CONFIG.connection.adb_restart_delay)
|
|
247
253
|
return True, f"TCP/IP mode enabled on port {port}"
|
|
248
254
|
else:
|
|
249
255
|
return False, output.strip()
|
|
@@ -267,7 +273,9 @@ class ADBConnection:
|
|
|
267
273
|
cmd.extend(["-s", device_id])
|
|
268
274
|
cmd.extend(["shell", "ip", "route"])
|
|
269
275
|
|
|
270
|
-
result = subprocess.run(
|
|
276
|
+
result = subprocess.run(
|
|
277
|
+
cmd, capture_output=True, text=True, encoding="utf-8", timeout=5
|
|
278
|
+
)
|
|
271
279
|
|
|
272
280
|
# Parse IP from route output
|
|
273
281
|
for line in result.stdout.split("\n"):
|
|
@@ -283,6 +291,7 @@ class ADBConnection:
|
|
|
283
291
|
cmd[:-1] + ["shell", "ip", "addr", "show", "wlan0"],
|
|
284
292
|
capture_output=True,
|
|
285
293
|
text=True,
|
|
294
|
+
encoding="utf-8",
|
|
286
295
|
timeout=5,
|
|
287
296
|
)
|
|
288
297
|
|
|
@@ -311,7 +320,7 @@ class ADBConnection:
|
|
|
311
320
|
[self.adb_path, "kill-server"], capture_output=True, timeout=5
|
|
312
321
|
)
|
|
313
322
|
|
|
314
|
-
time.sleep(
|
|
323
|
+
time.sleep(TIMING_CONFIG.connection.server_restart_delay)
|
|
315
324
|
|
|
316
325
|
# Start server
|
|
317
326
|
subprocess.run(
|
phone_agent/adb/device.py
CHANGED
|
@@ -4,6 +4,7 @@ import subprocess
|
|
|
4
4
|
import time
|
|
5
5
|
|
|
6
6
|
from phone_agent.config.apps import APP_PACKAGES
|
|
7
|
+
from phone_agent.config.timing import TIMING_CONFIG
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
def get_current_app(device_id: str | None = None) -> str:
|
|
@@ -19,9 +20,14 @@ def get_current_app(device_id: str | None = None) -> str:
|
|
|
19
20
|
adb_prefix = _get_adb_prefix(device_id)
|
|
20
21
|
|
|
21
22
|
result = subprocess.run(
|
|
22
|
-
adb_prefix + ["shell", "dumpsys", "window"],
|
|
23
|
+
adb_prefix + ["shell", "dumpsys", "window"],
|
|
24
|
+
capture_output=True,
|
|
25
|
+
text=True,
|
|
26
|
+
encoding="utf-8",
|
|
23
27
|
)
|
|
24
28
|
output = result.stdout
|
|
29
|
+
if not output:
|
|
30
|
+
raise ValueError("No output from dumpsys window")
|
|
25
31
|
|
|
26
32
|
# Parse window focus info
|
|
27
33
|
for line in output.split("\n"):
|
|
@@ -33,7 +39,9 @@ def get_current_app(device_id: str | None = None) -> str:
|
|
|
33
39
|
return "System Home"
|
|
34
40
|
|
|
35
41
|
|
|
36
|
-
def tap(
|
|
42
|
+
def tap(
|
|
43
|
+
x: int, y: int, device_id: str | None = None, delay: float | None = None
|
|
44
|
+
) -> None:
|
|
37
45
|
"""
|
|
38
46
|
Tap at the specified coordinates.
|
|
39
47
|
|
|
@@ -41,8 +49,11 @@ def tap(x: int, y: int, device_id: str | None = None, delay: float = 1.0) -> Non
|
|
|
41
49
|
x: X coordinate.
|
|
42
50
|
y: Y coordinate.
|
|
43
51
|
device_id: Optional ADB device ID.
|
|
44
|
-
delay: Delay in seconds after tap.
|
|
52
|
+
delay: Delay in seconds after tap. If None, uses configured default.
|
|
45
53
|
"""
|
|
54
|
+
if delay is None:
|
|
55
|
+
delay = TIMING_CONFIG.device.default_tap_delay
|
|
56
|
+
|
|
46
57
|
adb_prefix = _get_adb_prefix(device_id)
|
|
47
58
|
|
|
48
59
|
subprocess.run(
|
|
@@ -52,7 +63,7 @@ def tap(x: int, y: int, device_id: str | None = None, delay: float = 1.0) -> Non
|
|
|
52
63
|
|
|
53
64
|
|
|
54
65
|
def double_tap(
|
|
55
|
-
x: int, y: int, device_id: str | None = None, delay: float =
|
|
66
|
+
x: int, y: int, device_id: str | None = None, delay: float | None = None
|
|
56
67
|
) -> None:
|
|
57
68
|
"""
|
|
58
69
|
Double tap at the specified coordinates.
|
|
@@ -61,14 +72,17 @@ def double_tap(
|
|
|
61
72
|
x: X coordinate.
|
|
62
73
|
y: Y coordinate.
|
|
63
74
|
device_id: Optional ADB device ID.
|
|
64
|
-
delay: Delay in seconds after double tap.
|
|
75
|
+
delay: Delay in seconds after double tap. If None, uses configured default.
|
|
65
76
|
"""
|
|
77
|
+
if delay is None:
|
|
78
|
+
delay = TIMING_CONFIG.device.default_double_tap_delay
|
|
79
|
+
|
|
66
80
|
adb_prefix = _get_adb_prefix(device_id)
|
|
67
81
|
|
|
68
82
|
subprocess.run(
|
|
69
83
|
adb_prefix + ["shell", "input", "tap", str(x), str(y)], capture_output=True
|
|
70
84
|
)
|
|
71
|
-
time.sleep(
|
|
85
|
+
time.sleep(TIMING_CONFIG.device.double_tap_interval)
|
|
72
86
|
subprocess.run(
|
|
73
87
|
adb_prefix + ["shell", "input", "tap", str(x), str(y)], capture_output=True
|
|
74
88
|
)
|
|
@@ -80,7 +94,7 @@ def long_press(
|
|
|
80
94
|
y: int,
|
|
81
95
|
duration_ms: int = 3000,
|
|
82
96
|
device_id: str | None = None,
|
|
83
|
-
delay: float =
|
|
97
|
+
delay: float | None = None,
|
|
84
98
|
) -> None:
|
|
85
99
|
"""
|
|
86
100
|
Long press at the specified coordinates.
|
|
@@ -90,8 +104,11 @@ def long_press(
|
|
|
90
104
|
y: Y coordinate.
|
|
91
105
|
duration_ms: Duration of press in milliseconds.
|
|
92
106
|
device_id: Optional ADB device ID.
|
|
93
|
-
delay: Delay in seconds after long press.
|
|
107
|
+
delay: Delay in seconds after long press. If None, uses configured default.
|
|
94
108
|
"""
|
|
109
|
+
if delay is None:
|
|
110
|
+
delay = TIMING_CONFIG.device.default_long_press_delay
|
|
111
|
+
|
|
95
112
|
adb_prefix = _get_adb_prefix(device_id)
|
|
96
113
|
|
|
97
114
|
subprocess.run(
|
|
@@ -109,7 +126,7 @@ def swipe(
|
|
|
109
126
|
end_y: int,
|
|
110
127
|
duration_ms: int | None = None,
|
|
111
128
|
device_id: str | None = None,
|
|
112
|
-
delay: float =
|
|
129
|
+
delay: float | None = None,
|
|
113
130
|
) -> None:
|
|
114
131
|
"""
|
|
115
132
|
Swipe from start to end coordinates.
|
|
@@ -121,8 +138,11 @@ def swipe(
|
|
|
121
138
|
end_y: Ending Y coordinate.
|
|
122
139
|
duration_ms: Duration of swipe in milliseconds (auto-calculated if None).
|
|
123
140
|
device_id: Optional ADB device ID.
|
|
124
|
-
delay: Delay in seconds after swipe.
|
|
141
|
+
delay: Delay in seconds after swipe. If None, uses configured default.
|
|
125
142
|
"""
|
|
143
|
+
if delay is None:
|
|
144
|
+
delay = TIMING_CONFIG.device.default_swipe_delay
|
|
145
|
+
|
|
126
146
|
adb_prefix = _get_adb_prefix(device_id)
|
|
127
147
|
|
|
128
148
|
if duration_ms is None:
|
|
@@ -148,14 +168,17 @@ def swipe(
|
|
|
148
168
|
time.sleep(delay)
|
|
149
169
|
|
|
150
170
|
|
|
151
|
-
def back(device_id: str | None = None, delay: float =
|
|
171
|
+
def back(device_id: str | None = None, delay: float | None = None) -> None:
|
|
152
172
|
"""
|
|
153
173
|
Press the back button.
|
|
154
174
|
|
|
155
175
|
Args:
|
|
156
176
|
device_id: Optional ADB device ID.
|
|
157
|
-
delay: Delay in seconds after pressing back.
|
|
177
|
+
delay: Delay in seconds after pressing back. If None, uses configured default.
|
|
158
178
|
"""
|
|
179
|
+
if delay is None:
|
|
180
|
+
delay = TIMING_CONFIG.device.default_back_delay
|
|
181
|
+
|
|
159
182
|
adb_prefix = _get_adb_prefix(device_id)
|
|
160
183
|
|
|
161
184
|
subprocess.run(
|
|
@@ -164,14 +187,17 @@ def back(device_id: str | None = None, delay: float = 1.0) -> None:
|
|
|
164
187
|
time.sleep(delay)
|
|
165
188
|
|
|
166
189
|
|
|
167
|
-
def home(device_id: str | None = None, delay: float =
|
|
190
|
+
def home(device_id: str | None = None, delay: float | None = None) -> None:
|
|
168
191
|
"""
|
|
169
192
|
Press the home button.
|
|
170
193
|
|
|
171
194
|
Args:
|
|
172
195
|
device_id: Optional ADB device ID.
|
|
173
|
-
delay: Delay in seconds after pressing home.
|
|
196
|
+
delay: Delay in seconds after pressing home. If None, uses configured default.
|
|
174
197
|
"""
|
|
198
|
+
if delay is None:
|
|
199
|
+
delay = TIMING_CONFIG.device.default_home_delay
|
|
200
|
+
|
|
175
201
|
adb_prefix = _get_adb_prefix(device_id)
|
|
176
202
|
|
|
177
203
|
subprocess.run(
|
|
@@ -180,18 +206,23 @@ def home(device_id: str | None = None, delay: float = 1.0) -> None:
|
|
|
180
206
|
time.sleep(delay)
|
|
181
207
|
|
|
182
208
|
|
|
183
|
-
def launch_app(
|
|
209
|
+
def launch_app(
|
|
210
|
+
app_name: str, device_id: str | None = None, delay: float | None = None
|
|
211
|
+
) -> bool:
|
|
184
212
|
"""
|
|
185
213
|
Launch an app by name.
|
|
186
214
|
|
|
187
215
|
Args:
|
|
188
216
|
app_name: The app name (must be in APP_PACKAGES).
|
|
189
217
|
device_id: Optional ADB device ID.
|
|
190
|
-
delay: Delay in seconds after launching.
|
|
218
|
+
delay: Delay in seconds after launching. If None, uses configured default.
|
|
191
219
|
|
|
192
220
|
Returns:
|
|
193
221
|
True if app was launched, False if app not found.
|
|
194
222
|
"""
|
|
223
|
+
if delay is None:
|
|
224
|
+
delay = TIMING_CONFIG.device.default_launch_delay
|
|
225
|
+
|
|
195
226
|
if app_name not in APP_PACKAGES:
|
|
196
227
|
return False
|
|
197
228
|
|
phone_agent/agent.py
CHANGED
|
@@ -7,8 +7,8 @@ from typing import Any, Callable
|
|
|
7
7
|
|
|
8
8
|
from phone_agent.actions import ActionHandler
|
|
9
9
|
from phone_agent.actions.handler import finish, parse_action
|
|
10
|
-
from phone_agent.adb import get_current_app, get_screenshot
|
|
11
10
|
from phone_agent.config import get_messages, get_system_prompt
|
|
11
|
+
from phone_agent.device_factory import get_device_factory
|
|
12
12
|
from phone_agent.model import ModelClient, ModelConfig
|
|
13
13
|
from phone_agent.model.client import MessageBuilder
|
|
14
14
|
|
|
@@ -140,8 +140,9 @@ class PhoneAgent:
|
|
|
140
140
|
self._step_count += 1
|
|
141
141
|
|
|
142
142
|
# Capture current screen state
|
|
143
|
-
|
|
144
|
-
|
|
143
|
+
device_factory = get_device_factory()
|
|
144
|
+
screenshot = device_factory.get_screenshot(self.agent_config.device_id)
|
|
145
|
+
current_app = device_factory.get_current_app(self.agent_config.device_id)
|
|
145
146
|
|
|
146
147
|
# Build messages
|
|
147
148
|
if is_first:
|
|
@@ -169,6 +170,10 @@ class PhoneAgent:
|
|
|
169
170
|
|
|
170
171
|
# Get model response
|
|
171
172
|
try:
|
|
173
|
+
msgs = get_messages(self.agent_config.lang)
|
|
174
|
+
print("\n" + "=" * 50)
|
|
175
|
+
print(f"💭 {msgs['thinking']}:")
|
|
176
|
+
print("-" * 50)
|
|
172
177
|
response = self.model_client.request(self._context)
|
|
173
178
|
except Exception as e:
|
|
174
179
|
if self.agent_config.verbose:
|
|
@@ -191,11 +196,6 @@ class PhoneAgent:
|
|
|
191
196
|
|
|
192
197
|
if self.agent_config.verbose:
|
|
193
198
|
# Print thinking process
|
|
194
|
-
msgs = get_messages(self.agent_config.lang)
|
|
195
|
-
print("\n" + "=" * 50)
|
|
196
|
-
print(f"💭 {msgs['thinking']}:")
|
|
197
|
-
print("-" * 50)
|
|
198
|
-
print(response.thinking)
|
|
199
199
|
print("-" * 50)
|
|
200
200
|
print(f"🎯 {msgs['action']}:")
|
|
201
201
|
print(json.dumps(action, ensure_ascii=False, indent=2))
|