minitap-mobile-use 2.5.3__py3-none-any.whl → 2.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of minitap-mobile-use might be problematic. Click here for more details.
- minitap/mobile_use/agents/contextor/contextor.py +0 -8
- minitap/mobile_use/agents/cortex/cortex.md +122 -36
- minitap/mobile_use/agents/cortex/cortex.py +32 -17
- minitap/mobile_use/agents/cortex/types.py +18 -4
- minitap/mobile_use/agents/executor/executor.md +3 -3
- minitap/mobile_use/agents/executor/executor.py +10 -3
- minitap/mobile_use/agents/hopper/hopper.md +30 -2
- minitap/mobile_use/agents/hopper/hopper.py +19 -15
- minitap/mobile_use/agents/orchestrator/orchestrator.py +14 -5
- minitap/mobile_use/agents/outputter/outputter.py +13 -3
- minitap/mobile_use/agents/planner/planner.md +20 -9
- minitap/mobile_use/agents/planner/planner.py +12 -5
- minitap/mobile_use/agents/screen_analyzer/human.md +16 -0
- minitap/mobile_use/agents/screen_analyzer/screen_analyzer.py +111 -0
- minitap/mobile_use/clients/ios_client.py +7 -3
- minitap/mobile_use/config.py +87 -24
- minitap/mobile_use/controllers/mobile_command_controller.py +354 -88
- minitap/mobile_use/controllers/platform_specific_commands_controller.py +41 -27
- minitap/mobile_use/controllers/types.py +95 -0
- minitap/mobile_use/graph/graph.py +55 -11
- minitap/mobile_use/graph/state.py +10 -3
- minitap/mobile_use/main.py +12 -4
- minitap/mobile_use/sdk/agent.py +109 -72
- minitap/mobile_use/sdk/examples/smart_notification_assistant.py +59 -10
- minitap/mobile_use/servers/device_hardware_bridge.py +13 -6
- minitap/mobile_use/services/llm.py +5 -2
- minitap/mobile_use/tools/index.py +7 -9
- minitap/mobile_use/tools/mobile/{clear_text.py → focus_and_clear_text.py} +7 -7
- minitap/mobile_use/tools/mobile/{input_text.py → focus_and_input_text.py} +8 -8
- minitap/mobile_use/tools/mobile/long_press_on.py +130 -15
- minitap/mobile_use/tools/mobile/swipe.py +3 -26
- minitap/mobile_use/tools/mobile/tap.py +41 -28
- minitap/mobile_use/tools/mobile/wait_for_delay.py +84 -0
- minitap/mobile_use/utils/cli_helpers.py +10 -6
- {minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.6.0.dist-info}/METADATA +1 -1
- {minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.6.0.dist-info}/RECORD +38 -36
- minitap/mobile_use/tools/mobile/glimpse_screen.py +0 -74
- minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +0 -64
- {minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.6.0.dist-info}/WHEEL +0 -0
- {minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.6.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,22 +1,40 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import time
|
|
1
3
|
import uuid
|
|
2
4
|
from enum import Enum
|
|
3
|
-
from typing import Annotated, Literal
|
|
4
5
|
|
|
5
6
|
import yaml
|
|
7
|
+
from adbutils import AdbClient
|
|
6
8
|
from langgraph.types import Command
|
|
7
|
-
from pydantic import BaseModel,
|
|
9
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
8
10
|
from requests import JSONDecodeError
|
|
9
11
|
|
|
10
12
|
from minitap.mobile_use.clients.device_hardware_client import DeviceHardwareClient
|
|
11
13
|
from minitap.mobile_use.clients.screen_api_client import ScreenApiClient
|
|
12
14
|
from minitap.mobile_use.config import initialize_llm_config
|
|
13
15
|
from minitap.mobile_use.context import DeviceContext, DevicePlatform, MobileUseContext
|
|
16
|
+
from minitap.mobile_use.controllers.types import (
|
|
17
|
+
Bounds,
|
|
18
|
+
CoordinatesSelectorRequest,
|
|
19
|
+
PercentagesSelectorRequest,
|
|
20
|
+
SwipeRequest,
|
|
21
|
+
SwipeStartEndCoordinatesRequest,
|
|
22
|
+
SwipeStartEndPercentagesRequest,
|
|
23
|
+
TapOutput,
|
|
24
|
+
)
|
|
14
25
|
from minitap.mobile_use.utils.errors import ControllerErrors
|
|
15
26
|
from minitap.mobile_use.utils.logger import get_logger
|
|
16
27
|
|
|
17
28
|
logger = get_logger(__name__)
|
|
18
29
|
|
|
19
30
|
|
|
31
|
+
def _get_adb_device(ctx: MobileUseContext):
|
|
32
|
+
"""Get ADB device object from the client."""
|
|
33
|
+
if ctx.adb_client is None:
|
|
34
|
+
raise ValueError("ADB client is not initialized")
|
|
35
|
+
return ctx.adb_client.device(serial=ctx.device.device_id)
|
|
36
|
+
|
|
37
|
+
|
|
20
38
|
###### Screen elements retrieval ######
|
|
21
39
|
|
|
22
40
|
|
|
@@ -71,30 +89,6 @@ def run_flow(ctx: MobileUseContext, flow_steps: list, dry_run: bool = False) ->
|
|
|
71
89
|
return None
|
|
72
90
|
|
|
73
91
|
|
|
74
|
-
class CoordinatesSelectorRequest(BaseModel):
|
|
75
|
-
model_config = ConfigDict(extra="forbid")
|
|
76
|
-
x: int
|
|
77
|
-
y: int
|
|
78
|
-
|
|
79
|
-
def to_str(self):
|
|
80
|
-
return f"{self.x}, {self.y}"
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
class PercentagesSelectorRequest(BaseModel):
|
|
84
|
-
model_config = ConfigDict(extra="forbid")
|
|
85
|
-
"""
|
|
86
|
-
0%,0% # top-left corner
|
|
87
|
-
100%,100% # bottom-right corner
|
|
88
|
-
50%,50% # center
|
|
89
|
-
"""
|
|
90
|
-
|
|
91
|
-
x_percent: int
|
|
92
|
-
y_percent: int
|
|
93
|
-
|
|
94
|
-
def to_str(self):
|
|
95
|
-
return f"{self.x_percent}%, {self.y_percent}%"
|
|
96
|
-
|
|
97
|
-
|
|
98
92
|
class IdSelectorRequest(BaseModel):
|
|
99
93
|
model_config = ConfigDict(extra="forbid")
|
|
100
94
|
id: str
|
|
@@ -146,16 +140,210 @@ SelectorRequest = (
|
|
|
146
140
|
)
|
|
147
141
|
|
|
148
142
|
|
|
143
|
+
##### Tap helper functions #####
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def get_bounds_for_element(element: dict) -> Bounds | None:
|
|
147
|
+
"""Extract bounds from a UI element."""
|
|
148
|
+
bounds_str = element.get("bounds")
|
|
149
|
+
if not bounds_str:
|
|
150
|
+
return None
|
|
151
|
+
try:
|
|
152
|
+
# Parse bounds string like "[x1,y1][x2,y2]" using regex
|
|
153
|
+
match = re.match(r"\[(\d+),(\d+)\]\[(\d+),(\d+)\]", bounds_str)
|
|
154
|
+
if match:
|
|
155
|
+
return Bounds(
|
|
156
|
+
x1=int(match.group(1)),
|
|
157
|
+
y1=int(match.group(2)),
|
|
158
|
+
x2=int(match.group(3)),
|
|
159
|
+
y2=int(match.group(4)),
|
|
160
|
+
)
|
|
161
|
+
except (ValueError, IndexError):
|
|
162
|
+
return None
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _extract_resource_id_and_text_from_selector(
|
|
166
|
+
selector: SelectorRequest,
|
|
167
|
+
) -> tuple[str | None, str | None]:
|
|
168
|
+
"""Extract resource_id and text from a selector."""
|
|
169
|
+
resource_id = None
|
|
170
|
+
text = None
|
|
171
|
+
|
|
172
|
+
if isinstance(selector, IdSelectorRequest):
|
|
173
|
+
resource_id = selector.id
|
|
174
|
+
elif isinstance(selector, TextSelectorRequest):
|
|
175
|
+
text = selector.text
|
|
176
|
+
elif isinstance(selector, IdWithTextSelectorRequest):
|
|
177
|
+
resource_id = selector.id
|
|
178
|
+
text = selector.text
|
|
179
|
+
|
|
180
|
+
return resource_id, text
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _get_ui_element(
|
|
184
|
+
ui_hierarchy: list[dict],
|
|
185
|
+
resource_id: str | None = None,
|
|
186
|
+
text: str | None = None,
|
|
187
|
+
index: int | None = None,
|
|
188
|
+
) -> tuple[dict | None, str | None]:
|
|
189
|
+
"""Find a UI element in the hierarchy by resource_id or text."""
|
|
190
|
+
if not resource_id and not text:
|
|
191
|
+
return None, "No resource_id or text provided"
|
|
192
|
+
|
|
193
|
+
matches = []
|
|
194
|
+
for element in ui_hierarchy:
|
|
195
|
+
if resource_id and element.get("resource-id") == resource_id:
|
|
196
|
+
matches.append(element)
|
|
197
|
+
elif text and (element.get("text") == text or element.get("accessibilityText") == text):
|
|
198
|
+
matches.append(element)
|
|
199
|
+
|
|
200
|
+
if not matches:
|
|
201
|
+
criteria = f"resource_id='{resource_id}'" if resource_id else f"text='{text}'"
|
|
202
|
+
return None, f"No element found with {criteria}"
|
|
203
|
+
|
|
204
|
+
target_index = index if index is not None else 0
|
|
205
|
+
if target_index >= len(matches):
|
|
206
|
+
criteria = f"resource_id='{resource_id}'" if resource_id else f"text='{text}'"
|
|
207
|
+
return (
|
|
208
|
+
None,
|
|
209
|
+
f"Index {target_index} out of range for {criteria} (found {len(matches)} matches)",
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
return matches[target_index], None
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _android_tap_by_coordinates(
|
|
216
|
+
ctx: MobileUseContext,
|
|
217
|
+
coords: CoordinatesSelectorRequest,
|
|
218
|
+
long_press: bool = False,
|
|
219
|
+
long_press_duration: int = 1000,
|
|
220
|
+
) -> TapOutput:
|
|
221
|
+
"""Tap at specific coordinates using ADB."""
|
|
222
|
+
if ctx.adb_client is None:
|
|
223
|
+
return TapOutput(error="ADB client is not initialized")
|
|
224
|
+
|
|
225
|
+
if long_press:
|
|
226
|
+
# Long press is simulated as a swipe at the same location
|
|
227
|
+
cmd = f"input swipe {coords.x} {coords.y} {coords.x} {coords.y} {long_press_duration}"
|
|
228
|
+
else:
|
|
229
|
+
cmd = f"input tap {coords.x} {coords.y}"
|
|
230
|
+
|
|
231
|
+
try:
|
|
232
|
+
device = _get_adb_device(ctx)
|
|
233
|
+
device.shell(cmd)
|
|
234
|
+
return TapOutput(error=None)
|
|
235
|
+
except Exception as e:
|
|
236
|
+
return TapOutput(error=f"ADB tap failed: {str(e)}")
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _android_tap_by_resource_id_or_text(
|
|
240
|
+
ctx: MobileUseContext,
|
|
241
|
+
ui_hierarchy: list[dict],
|
|
242
|
+
resource_id: str | None = None,
|
|
243
|
+
text: str | None = None,
|
|
244
|
+
index: int | None = None,
|
|
245
|
+
long_press: bool = False,
|
|
246
|
+
long_press_duration: int = 1000,
|
|
247
|
+
) -> TapOutput:
|
|
248
|
+
"""Tap on an element by finding it in the UI hierarchy."""
|
|
249
|
+
if ctx.adb_client is None:
|
|
250
|
+
return TapOutput(error="ADB client is not initialized")
|
|
251
|
+
|
|
252
|
+
ui_element, error_msg = _get_ui_element(
|
|
253
|
+
ui_hierarchy=ui_hierarchy,
|
|
254
|
+
resource_id=resource_id,
|
|
255
|
+
text=text,
|
|
256
|
+
index=index,
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
if not ui_element:
|
|
260
|
+
return TapOutput(error=error_msg)
|
|
261
|
+
|
|
262
|
+
bounds = get_bounds_for_element(ui_element)
|
|
263
|
+
if not bounds:
|
|
264
|
+
criteria = f"resource_id='{resource_id}'" if resource_id else f"text='{text}'"
|
|
265
|
+
return TapOutput(error=f"Could not extract bounds for element with {criteria}")
|
|
266
|
+
|
|
267
|
+
center = bounds.get_center()
|
|
268
|
+
return _android_tap_by_coordinates(
|
|
269
|
+
ctx=ctx, coords=center, long_press=long_press, long_press_duration=long_press_duration
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def tap_android(
|
|
274
|
+
ctx: MobileUseContext,
|
|
275
|
+
selector: SelectorRequest,
|
|
276
|
+
index: int | None = None,
|
|
277
|
+
ui_hierarchy: list[dict] | None = None,
|
|
278
|
+
long_press: bool = False,
|
|
279
|
+
long_press_duration: int = 1000,
|
|
280
|
+
) -> TapOutput:
|
|
281
|
+
"""Execute tap using ADB with fallback strategies."""
|
|
282
|
+
if not ctx.adb_client:
|
|
283
|
+
raise ValueError("ADB client is not initialized")
|
|
284
|
+
|
|
285
|
+
# Direct coordinate tap
|
|
286
|
+
if isinstance(selector, SelectorRequestWithCoordinates):
|
|
287
|
+
return _android_tap_by_coordinates(
|
|
288
|
+
ctx=ctx,
|
|
289
|
+
coords=selector.coordinates,
|
|
290
|
+
long_press=long_press,
|
|
291
|
+
long_press_duration=long_press_duration,
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
# Convert percentage-based selectors to coordinates
|
|
295
|
+
if isinstance(selector, SelectorRequestWithPercentages):
|
|
296
|
+
coords = selector.percentages.to_coords(
|
|
297
|
+
width=ctx.device.device_width,
|
|
298
|
+
height=ctx.device.device_height,
|
|
299
|
+
)
|
|
300
|
+
return _android_tap_by_coordinates(
|
|
301
|
+
ctx=ctx,
|
|
302
|
+
coords=coords,
|
|
303
|
+
long_press=long_press,
|
|
304
|
+
long_press_duration=long_press_duration,
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
# For other selectors, we need the UI hierarchy
|
|
308
|
+
resource_id, text = _extract_resource_id_and_text_from_selector(selector)
|
|
309
|
+
|
|
310
|
+
if not ui_hierarchy:
|
|
311
|
+
ui_hierarchy = get_screen_data(screen_api_client=ctx.screen_api_client).elements
|
|
312
|
+
|
|
313
|
+
return _android_tap_by_resource_id_or_text(
|
|
314
|
+
ctx=ctx,
|
|
315
|
+
ui_hierarchy=ui_hierarchy,
|
|
316
|
+
resource_id=resource_id,
|
|
317
|
+
text=text,
|
|
318
|
+
index=index,
|
|
319
|
+
long_press=long_press,
|
|
320
|
+
long_press_duration=long_press_duration,
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
|
|
149
324
|
def tap(
|
|
150
325
|
ctx: MobileUseContext,
|
|
151
326
|
selector_request: SelectorRequest,
|
|
152
327
|
dry_run: bool = False,
|
|
153
328
|
index: int | None = None,
|
|
329
|
+
ui_hierarchy: list[dict] | None = None,
|
|
154
330
|
):
|
|
155
331
|
"""
|
|
156
332
|
Tap on a selector.
|
|
157
333
|
Index is optional and is used when you have multiple views matching the same selector.
|
|
334
|
+
ui_hierarchy is optional and used for ADB taps to find elements.
|
|
158
335
|
"""
|
|
336
|
+
# Prioritize ADB
|
|
337
|
+
if ctx.adb_client:
|
|
338
|
+
output = tap_android(
|
|
339
|
+
ctx=ctx,
|
|
340
|
+
selector=selector_request,
|
|
341
|
+
index=index,
|
|
342
|
+
ui_hierarchy=ui_hierarchy,
|
|
343
|
+
)
|
|
344
|
+
return output.error if output.error else None
|
|
345
|
+
|
|
346
|
+
# Fallback to Maestro
|
|
159
347
|
tap_body = selector_request.to_dict()
|
|
160
348
|
if not tap_body:
|
|
161
349
|
error = "Invalid tap selector request, could not format yaml"
|
|
@@ -164,7 +352,7 @@ def tap(
|
|
|
164
352
|
if index:
|
|
165
353
|
tap_body["index"] = index
|
|
166
354
|
flow_input = [{"tapOn": tap_body}]
|
|
167
|
-
return
|
|
355
|
+
return run_flow(ctx, flow_input, dry_run=dry_run)
|
|
168
356
|
|
|
169
357
|
|
|
170
358
|
def long_press_on(
|
|
@@ -172,7 +360,27 @@ def long_press_on(
|
|
|
172
360
|
selector_request: SelectorRequest,
|
|
173
361
|
dry_run: bool = False,
|
|
174
362
|
index: int | None = None,
|
|
363
|
+
ui_hierarchy: list[dict] | None = None,
|
|
364
|
+
long_press_duration: int = 1000,
|
|
175
365
|
):
|
|
366
|
+
"""
|
|
367
|
+
Long press on a selector.
|
|
368
|
+
Index is optional and is used when you have multiple views matching the same selector.
|
|
369
|
+
ui_hierarchy is optional and used for ADB long press to find elements.
|
|
370
|
+
"""
|
|
371
|
+
# Prioritize ADB
|
|
372
|
+
if ctx.adb_client:
|
|
373
|
+
output = tap_android(
|
|
374
|
+
ctx=ctx,
|
|
375
|
+
selector=selector_request,
|
|
376
|
+
index=index,
|
|
377
|
+
ui_hierarchy=ui_hierarchy,
|
|
378
|
+
long_press=True,
|
|
379
|
+
long_press_duration=long_press_duration,
|
|
380
|
+
)
|
|
381
|
+
return output.error if output.error else None
|
|
382
|
+
|
|
383
|
+
# Fallback to Maestro
|
|
176
384
|
long_press_on_body = selector_request.to_dict()
|
|
177
385
|
if not long_press_on_body:
|
|
178
386
|
error = "Invalid longPressOn selector request, could not format yaml"
|
|
@@ -184,62 +392,73 @@ def long_press_on(
|
|
|
184
392
|
return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
|
|
185
393
|
|
|
186
394
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
SwipeDirection = Annotated[
|
|
206
|
-
Literal["UP", "DOWN", "LEFT", "RIGHT"],
|
|
207
|
-
BeforeValidator(lambda v: v.upper() if isinstance(v, str) else v),
|
|
208
|
-
]
|
|
395
|
+
def swipe_android(
|
|
396
|
+
ctx: MobileUseContext,
|
|
397
|
+
request: SwipeRequest,
|
|
398
|
+
) -> str | None:
|
|
399
|
+
"""Returns an error_message in case of failure."""
|
|
400
|
+
if not ctx.adb_client:
|
|
401
|
+
raise ValueError("ADB client is not initialized")
|
|
402
|
+
|
|
403
|
+
mode = request.swipe_mode
|
|
404
|
+
if isinstance(mode, SwipeStartEndCoordinatesRequest):
|
|
405
|
+
swipe_coords = mode
|
|
406
|
+
elif isinstance(mode, SwipeStartEndPercentagesRequest):
|
|
407
|
+
swipe_coords = mode.to_coords(
|
|
408
|
+
width=ctx.device.device_width,
|
|
409
|
+
height=ctx.device.device_height,
|
|
410
|
+
)
|
|
411
|
+
else:
|
|
412
|
+
return "Unsupported selector type"
|
|
209
413
|
|
|
414
|
+
duration = request.duration if request.duration else 400 # in ms
|
|
210
415
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
elif isinstance(self.swipe_mode, SwipeStartEndPercentagesRequest):
|
|
221
|
-
res |= self.swipe_mode.to_dict()
|
|
222
|
-
elif self.swipe_mode in ["UP", "DOWN", "LEFT", "RIGHT"]:
|
|
223
|
-
res |= {"direction": self.swipe_mode}
|
|
224
|
-
if self.duration:
|
|
225
|
-
res |= {"duration": self.duration}
|
|
226
|
-
return res
|
|
416
|
+
cmd = (
|
|
417
|
+
"input touchscreen swipe "
|
|
418
|
+
f"{swipe_coords.start.x} {swipe_coords.start.y} "
|
|
419
|
+
f"{swipe_coords.end.x} {swipe_coords.end.y} "
|
|
420
|
+
f"{duration}"
|
|
421
|
+
)
|
|
422
|
+
device = _get_adb_device(ctx)
|
|
423
|
+
device.shell(cmd)
|
|
424
|
+
return None
|
|
227
425
|
|
|
228
426
|
|
|
229
427
|
def swipe(ctx: MobileUseContext, swipe_request: SwipeRequest, dry_run: bool = False):
|
|
428
|
+
if ctx.adb_client:
|
|
429
|
+
error_msg = swipe_android(ctx=ctx, request=swipe_request)
|
|
430
|
+
return {"error": error_msg} if error_msg else None
|
|
230
431
|
swipe_body = swipe_request.to_dict()
|
|
231
432
|
if not swipe_body:
|
|
232
433
|
error = "Invalid swipe selector request, could not format yaml"
|
|
233
434
|
logger.error(error)
|
|
234
435
|
raise ControllerErrors(error)
|
|
235
436
|
flow_input = [{"swipe": swipe_body}]
|
|
236
|
-
return
|
|
437
|
+
return run_flow(ctx, flow_input, dry_run=dry_run)
|
|
237
438
|
|
|
238
439
|
|
|
239
440
|
##### Text related commands #####
|
|
240
441
|
|
|
241
442
|
|
|
242
443
|
def input_text(ctx: MobileUseContext, text: str, dry_run: bool = False):
|
|
444
|
+
adb_client = ctx.adb_client
|
|
445
|
+
if adb_client:
|
|
446
|
+
logger.info("Inputting text with adb")
|
|
447
|
+
parts = text.split("%s")
|
|
448
|
+
for i, part in enumerate(parts):
|
|
449
|
+
to_write = ""
|
|
450
|
+
if i > 0:
|
|
451
|
+
to_write += "s"
|
|
452
|
+
to_write += part
|
|
453
|
+
if i < len(parts) - 1:
|
|
454
|
+
to_write += "%"
|
|
455
|
+
|
|
456
|
+
device = _get_adb_device(ctx)
|
|
457
|
+
device.shell(["input", "text", to_write])
|
|
458
|
+
|
|
459
|
+
return None
|
|
460
|
+
|
|
461
|
+
# Fallback to Maestro
|
|
243
462
|
return run_flow(ctx, [{"inputText": text}], dry_run=dry_run)
|
|
244
463
|
|
|
245
464
|
|
|
@@ -248,6 +467,16 @@ def erase_text(ctx: MobileUseContext, nb_chars: int | None = None, dry_run: bool
|
|
|
248
467
|
Removes characters from the currently selected textfield (if any)
|
|
249
468
|
Removes 50 characters if nb_chars is not specified.
|
|
250
469
|
"""
|
|
470
|
+
adb_client = ctx.adb_client
|
|
471
|
+
if adb_client:
|
|
472
|
+
logger.info("Erasing text with adb")
|
|
473
|
+
chars_to_delete = nb_chars if nb_chars is not None else 50
|
|
474
|
+
for _ in range(chars_to_delete):
|
|
475
|
+
device = _get_adb_device(ctx)
|
|
476
|
+
device.shell("input keyevent KEYCODE_DEL")
|
|
477
|
+
return None
|
|
478
|
+
|
|
479
|
+
# Fallback to Maestro
|
|
251
480
|
if nb_chars is None:
|
|
252
481
|
return run_flow(ctx, ["eraseText"], dry_run=dry_run)
|
|
253
482
|
return run_flow(ctx, [{"eraseText": nb_chars}], dry_run=dry_run)
|
|
@@ -257,8 +486,31 @@ def erase_text(ctx: MobileUseContext, nb_chars: int | None = None, dry_run: bool
|
|
|
257
486
|
|
|
258
487
|
|
|
259
488
|
def launch_app(ctx: MobileUseContext, package_name: str, dry_run: bool = False):
|
|
489
|
+
adb_client = ctx.adb_client
|
|
490
|
+
if adb_client:
|
|
491
|
+
logger.info("Launching app with adb")
|
|
492
|
+
# Use am start with MAIN/LAUNCHER intent - more reliable than monkey
|
|
493
|
+
# First try to resolve the main activity, fallback to monkey if that fails
|
|
494
|
+
resolve_cmd = f"cmd package resolve-activity --brief {package_name}"
|
|
495
|
+
device = _get_adb_device(ctx)
|
|
496
|
+
result = str(
|
|
497
|
+
device.shell(
|
|
498
|
+
f"am start -n $({resolve_cmd} | tail -n 1) 2>&1 "
|
|
499
|
+
f"|| monkey -p {package_name} -c android.intent.category.LAUNCHER 1"
|
|
500
|
+
)
|
|
501
|
+
)
|
|
502
|
+
# Check if launch failed
|
|
503
|
+
result_lower = result.lower()
|
|
504
|
+
if "error" in result_lower or "not found" in result_lower:
|
|
505
|
+
logger.error(f"Failed to launch {package_name}: {result}")
|
|
506
|
+
return {"error": result}
|
|
507
|
+
return None
|
|
508
|
+
|
|
509
|
+
# Fallback to Maestro
|
|
260
510
|
flow_input = [{"launchApp": package_name}]
|
|
261
|
-
return run_flow_with_wait_for_animation_to_end(
|
|
511
|
+
return run_flow_with_wait_for_animation_to_end(
|
|
512
|
+
ctx, flow_input, dry_run=dry_run, wait_for_animation_to_end=True
|
|
513
|
+
)
|
|
262
514
|
|
|
263
515
|
|
|
264
516
|
def stop_app(ctx: MobileUseContext, package_name: str | None = None, dry_run: bool = False):
|
|
@@ -270,6 +522,14 @@ def stop_app(ctx: MobileUseContext, package_name: str | None = None, dry_run: bo
|
|
|
270
522
|
|
|
271
523
|
|
|
272
524
|
def open_link(ctx: MobileUseContext, url: str, dry_run: bool = False):
|
|
525
|
+
adb_client = ctx.adb_client
|
|
526
|
+
if adb_client:
|
|
527
|
+
logger.info("Opening link with adb")
|
|
528
|
+
device = _get_adb_device(ctx)
|
|
529
|
+
device.shell(["am", "start", "-a", "android.intent.action.VIEW", "-d", url])
|
|
530
|
+
return None
|
|
531
|
+
|
|
532
|
+
# Fallback to Maestro
|
|
273
533
|
flow_input = [{"openLink": url}]
|
|
274
534
|
return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
|
|
275
535
|
|
|
@@ -278,6 +538,14 @@ def open_link(ctx: MobileUseContext, url: str, dry_run: bool = False):
|
|
|
278
538
|
|
|
279
539
|
|
|
280
540
|
def back(ctx: MobileUseContext, dry_run: bool = False):
|
|
541
|
+
adb_client = ctx.adb_client
|
|
542
|
+
if adb_client:
|
|
543
|
+
logger.info("Pressing back with adb")
|
|
544
|
+
device = _get_adb_device(ctx)
|
|
545
|
+
device.shell("input keyevent KEYCODE_BACK")
|
|
546
|
+
return None
|
|
547
|
+
|
|
548
|
+
# Fallback to Maestro
|
|
281
549
|
flow_input = ["back"]
|
|
282
550
|
return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
|
|
283
551
|
|
|
@@ -296,40 +564,38 @@ def press_key(ctx: MobileUseContext, key: Key, dry_run: bool = False):
|
|
|
296
564
|
#### Other commands ####
|
|
297
565
|
|
|
298
566
|
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
def wait_for_animation_to_end(
|
|
306
|
-
ctx: MobileUseContext, timeout: WaitTimeout | None = None, dry_run: bool = False
|
|
307
|
-
):
|
|
308
|
-
if timeout is None:
|
|
309
|
-
return run_flow(ctx, ["waitForAnimationToEnd"], dry_run=dry_run)
|
|
310
|
-
return run_flow(ctx, [{"waitForAnimationToEnd": {"timeout": timeout.value}}], dry_run=dry_run)
|
|
567
|
+
def wait_for_delay(time_in_ms: int):
|
|
568
|
+
"""Wait for a specified delay in milliseconds."""
|
|
569
|
+
time.sleep(time_in_ms / 1000)
|
|
570
|
+
return None
|
|
311
571
|
|
|
312
572
|
|
|
313
573
|
def run_flow_with_wait_for_animation_to_end(
|
|
314
|
-
ctx: MobileUseContext,
|
|
574
|
+
ctx: MobileUseContext,
|
|
575
|
+
base_flow: list,
|
|
576
|
+
dry_run: bool = False,
|
|
577
|
+
wait_for_animation_to_end: bool = False,
|
|
315
578
|
):
|
|
316
|
-
|
|
579
|
+
if wait_for_animation_to_end:
|
|
580
|
+
base_flow.append({"waitForAnimationToEnd": {"timeout": 500}})
|
|
317
581
|
return run_flow(ctx, base_flow, dry_run=dry_run)
|
|
318
582
|
|
|
319
583
|
|
|
320
584
|
if __name__ == "__main__":
|
|
585
|
+
adb_client = AdbClient(host="192.168.43.107", port=5037)
|
|
321
586
|
ctx = MobileUseContext(
|
|
322
587
|
trace_id="trace_id",
|
|
323
588
|
llm_config=initialize_llm_config(),
|
|
324
589
|
device=DeviceContext(
|
|
325
590
|
host_platform="WINDOWS",
|
|
326
591
|
mobile_platform=DevicePlatform.ANDROID,
|
|
327
|
-
device_id="
|
|
592
|
+
device_id="986066a",
|
|
328
593
|
device_width=1080,
|
|
329
|
-
device_height=
|
|
594
|
+
device_height=2340,
|
|
330
595
|
),
|
|
331
596
|
hw_bridge_client=DeviceHardwareClient("http://localhost:9999"),
|
|
332
597
|
screen_api_client=ScreenApiClient("http://localhost:9998"),
|
|
598
|
+
adb_client=adb_client,
|
|
333
599
|
)
|
|
334
600
|
screen_data = get_screen_data(ctx.screen_api_client)
|
|
335
601
|
from minitap.mobile_use.graph.state import State
|
|
@@ -339,20 +605,20 @@ if __name__ == "__main__":
|
|
|
339
605
|
messages=[],
|
|
340
606
|
initial_goal="",
|
|
341
607
|
subgoal_plan=[],
|
|
342
|
-
latest_screenshot_base64=screen_data.base64,
|
|
343
608
|
focused_app_info=None,
|
|
344
609
|
device_date="",
|
|
345
610
|
structured_decisions=None,
|
|
346
611
|
complete_subgoals_by_ids=[],
|
|
612
|
+
screen_analysis_prompt=None,
|
|
347
613
|
executor_messages=[],
|
|
348
614
|
cortex_last_thought="",
|
|
349
615
|
agents_thoughts=[],
|
|
350
616
|
)
|
|
351
617
|
|
|
352
|
-
# from minitap.mobile_use.tools.mobile.
|
|
618
|
+
# from minitap.mobile_use.tools.mobile.focus_and_input_text import get_focus_and_input_text_tool
|
|
353
619
|
|
|
354
620
|
# input_resource_id = "com.google.android.apps.nexuslauncher:id/search_container_hotseat"
|
|
355
|
-
# command_output: Command =
|
|
621
|
+
# command_output: Command = get_focus_and_input_text_tool(ctx=ctx).invoke(
|
|
356
622
|
# {
|
|
357
623
|
# "tool_call_id": uuid.uuid4().hex,
|
|
358
624
|
# "agent_thought": "",
|
|
@@ -362,10 +628,10 @@ if __name__ == "__main__":
|
|
|
362
628
|
# "executor_metadata": None,
|
|
363
629
|
# }
|
|
364
630
|
# )
|
|
365
|
-
from minitap.mobile_use.tools.mobile.
|
|
631
|
+
from minitap.mobile_use.tools.mobile.focus_and_clear_text import get_focus_and_clear_text_tool
|
|
366
632
|
|
|
367
633
|
input_resource_id = "com.google.android.apps.nexuslauncher:id/input"
|
|
368
|
-
command_output: Command =
|
|
634
|
+
command_output: Command = get_focus_and_clear_text_tool(ctx=ctx).invoke(
|
|
369
635
|
{
|
|
370
636
|
"tool_call_id": uuid.uuid4().hex,
|
|
371
637
|
"agent_thought": "",
|
|
@@ -1,11 +1,12 @@
|
|
|
1
|
-
from datetime import date
|
|
2
1
|
import json
|
|
2
|
+
from datetime import date
|
|
3
|
+
from shutil import which
|
|
3
4
|
|
|
4
5
|
from adbutils import AdbDevice
|
|
6
|
+
|
|
7
|
+
from minitap.mobile_use.context import DevicePlatform, MobileUseContext
|
|
5
8
|
from minitap.mobile_use.utils.logger import MobileUseLogger
|
|
6
9
|
from minitap.mobile_use.utils.shell_utils import run_shell_command_on_host
|
|
7
|
-
from minitap.mobile_use.context import MobileUseContext
|
|
8
|
-
from minitap.mobile_use.context import DevicePlatform
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
def get_adb_device(ctx: MobileUseContext) -> AdbDevice:
|
|
@@ -22,29 +23,30 @@ def get_first_device(
|
|
|
22
23
|
logger: MobileUseLogger | None = None,
|
|
23
24
|
) -> tuple[str | None, DevicePlatform | None]:
|
|
24
25
|
"""Gets the first available device."""
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
logger
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
logger
|
|
26
|
+
if which("adb"):
|
|
27
|
+
try:
|
|
28
|
+
android_output = run_shell_command_on_host("adb devices")
|
|
29
|
+
lines = android_output.strip().split("\n")
|
|
30
|
+
for line in lines:
|
|
31
|
+
if "device" in line and not line.startswith("List of devices"):
|
|
32
|
+
return line.split()[0], DevicePlatform.ANDROID
|
|
33
|
+
except RuntimeError as e:
|
|
34
|
+
if logger:
|
|
35
|
+
logger.error(f"ADB command failed: {e}")
|
|
36
|
+
|
|
37
|
+
if which("xcrun"):
|
|
38
|
+
try:
|
|
39
|
+
ios_output = run_shell_command_on_host("xcrun simctl list devices booted -j")
|
|
40
|
+
data = json.loads(ios_output)
|
|
41
|
+
for runtime, devices in data.get("devices", {}).items():
|
|
42
|
+
if "iOS" not in runtime:
|
|
43
|
+
continue
|
|
44
|
+
for device in devices:
|
|
45
|
+
if device.get("state") == "Booted":
|
|
46
|
+
return device["udid"], DevicePlatform.IOS
|
|
47
|
+
except RuntimeError as e:
|
|
48
|
+
if logger:
|
|
49
|
+
logger.error(f"xcrun command failed: {e}")
|
|
48
50
|
|
|
49
51
|
return None, None
|
|
50
52
|
|
|
@@ -69,5 +71,17 @@ def list_packages(ctx: MobileUseContext) -> str:
|
|
|
69
71
|
return run_shell_command_on_host(" ".join(cmd))
|
|
70
72
|
else:
|
|
71
73
|
device = get_adb_device(ctx)
|
|
74
|
+
# Get full package list with paths
|
|
72
75
|
cmd = ["pm", "list", "packages", "-f"]
|
|
73
|
-
|
|
76
|
+
raw_output = str(device.shell(" ".join(cmd)))
|
|
77
|
+
|
|
78
|
+
# Extract only package names (remove paths and "package:" prefix)
|
|
79
|
+
# Format: "package:/path/to/app.apk=com.example.app" -> "com.example.app"
|
|
80
|
+
lines = raw_output.strip().split("\n")
|
|
81
|
+
packages = []
|
|
82
|
+
for line in lines:
|
|
83
|
+
if "=" in line:
|
|
84
|
+
package_name = line.split("=")[-1].strip()
|
|
85
|
+
packages.append(package_name)
|
|
86
|
+
|
|
87
|
+
return "\n".join(sorted(packages))
|