minitap-mobile-use 2.5.3__py3-none-any.whl → 2.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of minitap-mobile-use might be problematic. Click here for more details.

Files changed (40) hide show
  1. minitap/mobile_use/agents/contextor/contextor.py +0 -8
  2. minitap/mobile_use/agents/cortex/cortex.md +122 -36
  3. minitap/mobile_use/agents/cortex/cortex.py +32 -17
  4. minitap/mobile_use/agents/cortex/types.py +18 -4
  5. minitap/mobile_use/agents/executor/executor.md +3 -3
  6. minitap/mobile_use/agents/executor/executor.py +10 -3
  7. minitap/mobile_use/agents/hopper/hopper.md +30 -2
  8. minitap/mobile_use/agents/hopper/hopper.py +19 -15
  9. minitap/mobile_use/agents/orchestrator/orchestrator.py +14 -5
  10. minitap/mobile_use/agents/outputter/outputter.py +13 -3
  11. minitap/mobile_use/agents/planner/planner.md +20 -9
  12. minitap/mobile_use/agents/planner/planner.py +12 -5
  13. minitap/mobile_use/agents/screen_analyzer/human.md +16 -0
  14. minitap/mobile_use/agents/screen_analyzer/screen_analyzer.py +111 -0
  15. minitap/mobile_use/clients/ios_client.py +7 -3
  16. minitap/mobile_use/config.py +87 -24
  17. minitap/mobile_use/controllers/mobile_command_controller.py +354 -88
  18. minitap/mobile_use/controllers/platform_specific_commands_controller.py +41 -27
  19. minitap/mobile_use/controllers/types.py +95 -0
  20. minitap/mobile_use/graph/graph.py +55 -11
  21. minitap/mobile_use/graph/state.py +10 -3
  22. minitap/mobile_use/main.py +12 -4
  23. minitap/mobile_use/sdk/agent.py +109 -72
  24. minitap/mobile_use/sdk/examples/smart_notification_assistant.py +59 -10
  25. minitap/mobile_use/servers/device_hardware_bridge.py +13 -6
  26. minitap/mobile_use/services/llm.py +5 -2
  27. minitap/mobile_use/tools/index.py +7 -9
  28. minitap/mobile_use/tools/mobile/{clear_text.py → focus_and_clear_text.py} +7 -7
  29. minitap/mobile_use/tools/mobile/{input_text.py → focus_and_input_text.py} +8 -8
  30. minitap/mobile_use/tools/mobile/long_press_on.py +130 -15
  31. minitap/mobile_use/tools/mobile/swipe.py +3 -26
  32. minitap/mobile_use/tools/mobile/tap.py +41 -28
  33. minitap/mobile_use/tools/mobile/wait_for_delay.py +84 -0
  34. minitap/mobile_use/utils/cli_helpers.py +10 -6
  35. {minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.6.0.dist-info}/METADATA +1 -1
  36. {minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.6.0.dist-info}/RECORD +38 -36
  37. minitap/mobile_use/tools/mobile/glimpse_screen.py +0 -74
  38. minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +0 -64
  39. {minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.6.0.dist-info}/WHEEL +0 -0
  40. {minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.6.0.dist-info}/entry_points.txt +0 -0
@@ -1,22 +1,40 @@
1
+ import re
2
+ import time
1
3
  import uuid
2
4
  from enum import Enum
3
- from typing import Annotated, Literal
4
5
 
5
6
  import yaml
7
+ from adbutils import AdbClient
6
8
  from langgraph.types import Command
7
- from pydantic import BaseModel, BeforeValidator, ConfigDict, Field
9
+ from pydantic import BaseModel, ConfigDict, Field
8
10
  from requests import JSONDecodeError
9
11
 
10
12
  from minitap.mobile_use.clients.device_hardware_client import DeviceHardwareClient
11
13
  from minitap.mobile_use.clients.screen_api_client import ScreenApiClient
12
14
  from minitap.mobile_use.config import initialize_llm_config
13
15
  from minitap.mobile_use.context import DeviceContext, DevicePlatform, MobileUseContext
16
+ from minitap.mobile_use.controllers.types import (
17
+ Bounds,
18
+ CoordinatesSelectorRequest,
19
+ PercentagesSelectorRequest,
20
+ SwipeRequest,
21
+ SwipeStartEndCoordinatesRequest,
22
+ SwipeStartEndPercentagesRequest,
23
+ TapOutput,
24
+ )
14
25
  from minitap.mobile_use.utils.errors import ControllerErrors
15
26
  from minitap.mobile_use.utils.logger import get_logger
16
27
 
17
28
  logger = get_logger(__name__)
18
29
 
19
30
 
31
+ def _get_adb_device(ctx: MobileUseContext):
32
+ """Get ADB device object from the client."""
33
+ if ctx.adb_client is None:
34
+ raise ValueError("ADB client is not initialized")
35
+ return ctx.adb_client.device(serial=ctx.device.device_id)
36
+
37
+
20
38
  ###### Screen elements retrieval ######
21
39
 
22
40
 
@@ -71,30 +89,6 @@ def run_flow(ctx: MobileUseContext, flow_steps: list, dry_run: bool = False) ->
71
89
  return None
72
90
 
73
91
 
74
- class CoordinatesSelectorRequest(BaseModel):
75
- model_config = ConfigDict(extra="forbid")
76
- x: int
77
- y: int
78
-
79
- def to_str(self):
80
- return f"{self.x}, {self.y}"
81
-
82
-
83
- class PercentagesSelectorRequest(BaseModel):
84
- model_config = ConfigDict(extra="forbid")
85
- """
86
- 0%,0% # top-left corner
87
- 100%,100% # bottom-right corner
88
- 50%,50% # center
89
- """
90
-
91
- x_percent: int
92
- y_percent: int
93
-
94
- def to_str(self):
95
- return f"{self.x_percent}%, {self.y_percent}%"
96
-
97
-
98
92
  class IdSelectorRequest(BaseModel):
99
93
  model_config = ConfigDict(extra="forbid")
100
94
  id: str
@@ -146,16 +140,210 @@ SelectorRequest = (
146
140
  )
147
141
 
148
142
 
143
+ ##### Tap helper functions #####
144
+
145
+
146
+ def get_bounds_for_element(element: dict) -> Bounds | None:
147
+ """Extract bounds from a UI element."""
148
+ bounds_str = element.get("bounds")
149
+ if not bounds_str:
150
+ return None
151
+ try:
152
+ # Parse bounds string like "[x1,y1][x2,y2]" using regex
153
+ match = re.match(r"\[(\d+),(\d+)\]\[(\d+),(\d+)\]", bounds_str)
154
+ if match:
155
+ return Bounds(
156
+ x1=int(match.group(1)),
157
+ y1=int(match.group(2)),
158
+ x2=int(match.group(3)),
159
+ y2=int(match.group(4)),
160
+ )
161
+ except (ValueError, IndexError):
162
+ return None
163
+
164
+
165
+ def _extract_resource_id_and_text_from_selector(
166
+ selector: SelectorRequest,
167
+ ) -> tuple[str | None, str | None]:
168
+ """Extract resource_id and text from a selector."""
169
+ resource_id = None
170
+ text = None
171
+
172
+ if isinstance(selector, IdSelectorRequest):
173
+ resource_id = selector.id
174
+ elif isinstance(selector, TextSelectorRequest):
175
+ text = selector.text
176
+ elif isinstance(selector, IdWithTextSelectorRequest):
177
+ resource_id = selector.id
178
+ text = selector.text
179
+
180
+ return resource_id, text
181
+
182
+
183
+ def _get_ui_element(
184
+ ui_hierarchy: list[dict],
185
+ resource_id: str | None = None,
186
+ text: str | None = None,
187
+ index: int | None = None,
188
+ ) -> tuple[dict | None, str | None]:
189
+ """Find a UI element in the hierarchy by resource_id or text."""
190
+ if not resource_id and not text:
191
+ return None, "No resource_id or text provided"
192
+
193
+ matches = []
194
+ for element in ui_hierarchy:
195
+ if resource_id and element.get("resource-id") == resource_id:
196
+ matches.append(element)
197
+ elif text and (element.get("text") == text or element.get("accessibilityText") == text):
198
+ matches.append(element)
199
+
200
+ if not matches:
201
+ criteria = f"resource_id='{resource_id}'" if resource_id else f"text='{text}'"
202
+ return None, f"No element found with {criteria}"
203
+
204
+ target_index = index if index is not None else 0
205
+ if target_index >= len(matches):
206
+ criteria = f"resource_id='{resource_id}'" if resource_id else f"text='{text}'"
207
+ return (
208
+ None,
209
+ f"Index {target_index} out of range for {criteria} (found {len(matches)} matches)",
210
+ )
211
+
212
+ return matches[target_index], None
213
+
214
+
215
+ def _android_tap_by_coordinates(
216
+ ctx: MobileUseContext,
217
+ coords: CoordinatesSelectorRequest,
218
+ long_press: bool = False,
219
+ long_press_duration: int = 1000,
220
+ ) -> TapOutput:
221
+ """Tap at specific coordinates using ADB."""
222
+ if ctx.adb_client is None:
223
+ return TapOutput(error="ADB client is not initialized")
224
+
225
+ if long_press:
226
+ # Long press is simulated as a swipe at the same location
227
+ cmd = f"input swipe {coords.x} {coords.y} {coords.x} {coords.y} {long_press_duration}"
228
+ else:
229
+ cmd = f"input tap {coords.x} {coords.y}"
230
+
231
+ try:
232
+ device = _get_adb_device(ctx)
233
+ device.shell(cmd)
234
+ return TapOutput(error=None)
235
+ except Exception as e:
236
+ return TapOutput(error=f"ADB tap failed: {str(e)}")
237
+
238
+
239
+ def _android_tap_by_resource_id_or_text(
240
+ ctx: MobileUseContext,
241
+ ui_hierarchy: list[dict],
242
+ resource_id: str | None = None,
243
+ text: str | None = None,
244
+ index: int | None = None,
245
+ long_press: bool = False,
246
+ long_press_duration: int = 1000,
247
+ ) -> TapOutput:
248
+ """Tap on an element by finding it in the UI hierarchy."""
249
+ if ctx.adb_client is None:
250
+ return TapOutput(error="ADB client is not initialized")
251
+
252
+ ui_element, error_msg = _get_ui_element(
253
+ ui_hierarchy=ui_hierarchy,
254
+ resource_id=resource_id,
255
+ text=text,
256
+ index=index,
257
+ )
258
+
259
+ if not ui_element:
260
+ return TapOutput(error=error_msg)
261
+
262
+ bounds = get_bounds_for_element(ui_element)
263
+ if not bounds:
264
+ criteria = f"resource_id='{resource_id}'" if resource_id else f"text='{text}'"
265
+ return TapOutput(error=f"Could not extract bounds for element with {criteria}")
266
+
267
+ center = bounds.get_center()
268
+ return _android_tap_by_coordinates(
269
+ ctx=ctx, coords=center, long_press=long_press, long_press_duration=long_press_duration
270
+ )
271
+
272
+
273
+ def tap_android(
274
+ ctx: MobileUseContext,
275
+ selector: SelectorRequest,
276
+ index: int | None = None,
277
+ ui_hierarchy: list[dict] | None = None,
278
+ long_press: bool = False,
279
+ long_press_duration: int = 1000,
280
+ ) -> TapOutput:
281
+ """Execute tap using ADB with fallback strategies."""
282
+ if not ctx.adb_client:
283
+ raise ValueError("ADB client is not initialized")
284
+
285
+ # Direct coordinate tap
286
+ if isinstance(selector, SelectorRequestWithCoordinates):
287
+ return _android_tap_by_coordinates(
288
+ ctx=ctx,
289
+ coords=selector.coordinates,
290
+ long_press=long_press,
291
+ long_press_duration=long_press_duration,
292
+ )
293
+
294
+ # Convert percentage-based selectors to coordinates
295
+ if isinstance(selector, SelectorRequestWithPercentages):
296
+ coords = selector.percentages.to_coords(
297
+ width=ctx.device.device_width,
298
+ height=ctx.device.device_height,
299
+ )
300
+ return _android_tap_by_coordinates(
301
+ ctx=ctx,
302
+ coords=coords,
303
+ long_press=long_press,
304
+ long_press_duration=long_press_duration,
305
+ )
306
+
307
+ # For other selectors, we need the UI hierarchy
308
+ resource_id, text = _extract_resource_id_and_text_from_selector(selector)
309
+
310
+ if not ui_hierarchy:
311
+ ui_hierarchy = get_screen_data(screen_api_client=ctx.screen_api_client).elements
312
+
313
+ return _android_tap_by_resource_id_or_text(
314
+ ctx=ctx,
315
+ ui_hierarchy=ui_hierarchy,
316
+ resource_id=resource_id,
317
+ text=text,
318
+ index=index,
319
+ long_press=long_press,
320
+ long_press_duration=long_press_duration,
321
+ )
322
+
323
+
149
324
  def tap(
150
325
  ctx: MobileUseContext,
151
326
  selector_request: SelectorRequest,
152
327
  dry_run: bool = False,
153
328
  index: int | None = None,
329
+ ui_hierarchy: list[dict] | None = None,
154
330
  ):
155
331
  """
156
332
  Tap on a selector.
157
333
  Index is optional and is used when you have multiple views matching the same selector.
334
+ ui_hierarchy is optional and used for ADB taps to find elements.
158
335
  """
336
+ # Prioritize ADB
337
+ if ctx.adb_client:
338
+ output = tap_android(
339
+ ctx=ctx,
340
+ selector=selector_request,
341
+ index=index,
342
+ ui_hierarchy=ui_hierarchy,
343
+ )
344
+ return output.error if output.error else None
345
+
346
+ # Fallback to Maestro
159
347
  tap_body = selector_request.to_dict()
160
348
  if not tap_body:
161
349
  error = "Invalid tap selector request, could not format yaml"
@@ -164,7 +352,7 @@ def tap(
164
352
  if index:
165
353
  tap_body["index"] = index
166
354
  flow_input = [{"tapOn": tap_body}]
167
- return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
355
+ return run_flow(ctx, flow_input, dry_run=dry_run)
168
356
 
169
357
 
170
358
  def long_press_on(
@@ -172,7 +360,27 @@ def long_press_on(
172
360
  selector_request: SelectorRequest,
173
361
  dry_run: bool = False,
174
362
  index: int | None = None,
363
+ ui_hierarchy: list[dict] | None = None,
364
+ long_press_duration: int = 1000,
175
365
  ):
366
+ """
367
+ Long press on a selector.
368
+ Index is optional and is used when you have multiple views matching the same selector.
369
+ ui_hierarchy is optional and used for ADB long press to find elements.
370
+ """
371
+ # Prioritize ADB
372
+ if ctx.adb_client:
373
+ output = tap_android(
374
+ ctx=ctx,
375
+ selector=selector_request,
376
+ index=index,
377
+ ui_hierarchy=ui_hierarchy,
378
+ long_press=True,
379
+ long_press_duration=long_press_duration,
380
+ )
381
+ return output.error if output.error else None
382
+
383
+ # Fallback to Maestro
176
384
  long_press_on_body = selector_request.to_dict()
177
385
  if not long_press_on_body:
178
386
  error = "Invalid longPressOn selector request, could not format yaml"
@@ -184,62 +392,73 @@ def long_press_on(
184
392
  return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
185
393
 
186
394
 
187
- class SwipeStartEndCoordinatesRequest(BaseModel):
188
- model_config = ConfigDict(extra="forbid")
189
- start: CoordinatesSelectorRequest
190
- end: CoordinatesSelectorRequest
191
-
192
- def to_dict(self):
193
- return {"start": self.start.to_str(), "end": self.end.to_str()}
194
-
195
-
196
- class SwipeStartEndPercentagesRequest(BaseModel):
197
- model_config = ConfigDict(extra="forbid")
198
- start: PercentagesSelectorRequest
199
- end: PercentagesSelectorRequest
200
-
201
- def to_dict(self):
202
- return {"start": self.start.to_str(), "end": self.end.to_str()}
203
-
204
-
205
- SwipeDirection = Annotated[
206
- Literal["UP", "DOWN", "LEFT", "RIGHT"],
207
- BeforeValidator(lambda v: v.upper() if isinstance(v, str) else v),
208
- ]
395
+ def swipe_android(
396
+ ctx: MobileUseContext,
397
+ request: SwipeRequest,
398
+ ) -> str | None:
399
+ """Returns an error_message in case of failure."""
400
+ if not ctx.adb_client:
401
+ raise ValueError("ADB client is not initialized")
402
+
403
+ mode = request.swipe_mode
404
+ if isinstance(mode, SwipeStartEndCoordinatesRequest):
405
+ swipe_coords = mode
406
+ elif isinstance(mode, SwipeStartEndPercentagesRequest):
407
+ swipe_coords = mode.to_coords(
408
+ width=ctx.device.device_width,
409
+ height=ctx.device.device_height,
410
+ )
411
+ else:
412
+ return "Unsupported selector type"
209
413
 
414
+ duration = request.duration if request.duration else 400 # in ms
210
415
 
211
- class SwipeRequest(BaseModel):
212
- model_config = ConfigDict(extra="forbid")
213
- swipe_mode: SwipeStartEndCoordinatesRequest | SwipeStartEndPercentagesRequest | SwipeDirection
214
- duration: int | None = None # in ms, default is 400ms
215
-
216
- def to_dict(self):
217
- res = {}
218
- if isinstance(self.swipe_mode, SwipeStartEndCoordinatesRequest):
219
- res |= self.swipe_mode.to_dict()
220
- elif isinstance(self.swipe_mode, SwipeStartEndPercentagesRequest):
221
- res |= self.swipe_mode.to_dict()
222
- elif self.swipe_mode in ["UP", "DOWN", "LEFT", "RIGHT"]:
223
- res |= {"direction": self.swipe_mode}
224
- if self.duration:
225
- res |= {"duration": self.duration}
226
- return res
416
+ cmd = (
417
+ "input touchscreen swipe "
418
+ f"{swipe_coords.start.x} {swipe_coords.start.y} "
419
+ f"{swipe_coords.end.x} {swipe_coords.end.y} "
420
+ f"{duration}"
421
+ )
422
+ device = _get_adb_device(ctx)
423
+ device.shell(cmd)
424
+ return None
227
425
 
228
426
 
229
427
  def swipe(ctx: MobileUseContext, swipe_request: SwipeRequest, dry_run: bool = False):
428
+ if ctx.adb_client:
429
+ error_msg = swipe_android(ctx=ctx, request=swipe_request)
430
+ return {"error": error_msg} if error_msg else None
230
431
  swipe_body = swipe_request.to_dict()
231
432
  if not swipe_body:
232
433
  error = "Invalid swipe selector request, could not format yaml"
233
434
  logger.error(error)
234
435
  raise ControllerErrors(error)
235
436
  flow_input = [{"swipe": swipe_body}]
236
- return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
437
+ return run_flow(ctx, flow_input, dry_run=dry_run)
237
438
 
238
439
 
239
440
  ##### Text related commands #####
240
441
 
241
442
 
242
443
  def input_text(ctx: MobileUseContext, text: str, dry_run: bool = False):
444
+ adb_client = ctx.adb_client
445
+ if adb_client:
446
+ logger.info("Inputting text with adb")
447
+ parts = text.split("%s")
448
+ for i, part in enumerate(parts):
449
+ to_write = ""
450
+ if i > 0:
451
+ to_write += "s"
452
+ to_write += part
453
+ if i < len(parts) - 1:
454
+ to_write += "%"
455
+
456
+ device = _get_adb_device(ctx)
457
+ device.shell(["input", "text", to_write])
458
+
459
+ return None
460
+
461
+ # Fallback to Maestro
243
462
  return run_flow(ctx, [{"inputText": text}], dry_run=dry_run)
244
463
 
245
464
 
@@ -248,6 +467,16 @@ def erase_text(ctx: MobileUseContext, nb_chars: int | None = None, dry_run: bool
248
467
  Removes characters from the currently selected textfield (if any)
249
468
  Removes 50 characters if nb_chars is not specified.
250
469
  """
470
+ adb_client = ctx.adb_client
471
+ if adb_client:
472
+ logger.info("Erasing text with adb")
473
+ chars_to_delete = nb_chars if nb_chars is not None else 50
474
+ for _ in range(chars_to_delete):
475
+ device = _get_adb_device(ctx)
476
+ device.shell("input keyevent KEYCODE_DEL")
477
+ return None
478
+
479
+ # Fallback to Maestro
251
480
  if nb_chars is None:
252
481
  return run_flow(ctx, ["eraseText"], dry_run=dry_run)
253
482
  return run_flow(ctx, [{"eraseText": nb_chars}], dry_run=dry_run)
@@ -257,8 +486,31 @@ def erase_text(ctx: MobileUseContext, nb_chars: int | None = None, dry_run: bool
257
486
 
258
487
 
259
488
  def launch_app(ctx: MobileUseContext, package_name: str, dry_run: bool = False):
489
+ adb_client = ctx.adb_client
490
+ if adb_client:
491
+ logger.info("Launching app with adb")
492
+ # Use am start with MAIN/LAUNCHER intent - more reliable than monkey
493
+ # First try to resolve the main activity, fallback to monkey if that fails
494
+ resolve_cmd = f"cmd package resolve-activity --brief {package_name}"
495
+ device = _get_adb_device(ctx)
496
+ result = str(
497
+ device.shell(
498
+ f"am start -n $({resolve_cmd} | tail -n 1) 2>&1 "
499
+ f"|| monkey -p {package_name} -c android.intent.category.LAUNCHER 1"
500
+ )
501
+ )
502
+ # Check if launch failed
503
+ result_lower = result.lower()
504
+ if "error" in result_lower or "not found" in result_lower:
505
+ logger.error(f"Failed to launch {package_name}: {result}")
506
+ return {"error": result}
507
+ return None
508
+
509
+ # Fallback to Maestro
260
510
  flow_input = [{"launchApp": package_name}]
261
- return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
511
+ return run_flow_with_wait_for_animation_to_end(
512
+ ctx, flow_input, dry_run=dry_run, wait_for_animation_to_end=True
513
+ )
262
514
 
263
515
 
264
516
  def stop_app(ctx: MobileUseContext, package_name: str | None = None, dry_run: bool = False):
@@ -270,6 +522,14 @@ def stop_app(ctx: MobileUseContext, package_name: str | None = None, dry_run: bo
270
522
 
271
523
 
272
524
  def open_link(ctx: MobileUseContext, url: str, dry_run: bool = False):
525
+ adb_client = ctx.adb_client
526
+ if adb_client:
527
+ logger.info("Opening link with adb")
528
+ device = _get_adb_device(ctx)
529
+ device.shell(["am", "start", "-a", "android.intent.action.VIEW", "-d", url])
530
+ return None
531
+
532
+ # Fallback to Maestro
273
533
  flow_input = [{"openLink": url}]
274
534
  return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
275
535
 
@@ -278,6 +538,14 @@ def open_link(ctx: MobileUseContext, url: str, dry_run: bool = False):
278
538
 
279
539
 
280
540
  def back(ctx: MobileUseContext, dry_run: bool = False):
541
+ adb_client = ctx.adb_client
542
+ if adb_client:
543
+ logger.info("Pressing back with adb")
544
+ device = _get_adb_device(ctx)
545
+ device.shell("input keyevent KEYCODE_BACK")
546
+ return None
547
+
548
+ # Fallback to Maestro
281
549
  flow_input = ["back"]
282
550
  return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
283
551
 
@@ -296,40 +564,38 @@ def press_key(ctx: MobileUseContext, key: Key, dry_run: bool = False):
296
564
  #### Other commands ####
297
565
 
298
566
 
299
- class WaitTimeout(Enum):
300
- SHORT = "500"
301
- MEDIUM = "1000"
302
- LONG = "5000"
303
-
304
-
305
- def wait_for_animation_to_end(
306
- ctx: MobileUseContext, timeout: WaitTimeout | None = None, dry_run: bool = False
307
- ):
308
- if timeout is None:
309
- return run_flow(ctx, ["waitForAnimationToEnd"], dry_run=dry_run)
310
- return run_flow(ctx, [{"waitForAnimationToEnd": {"timeout": timeout.value}}], dry_run=dry_run)
567
+ def wait_for_delay(time_in_ms: int):
568
+ """Wait for a specified delay in milliseconds."""
569
+ time.sleep(time_in_ms / 1000)
570
+ return None
311
571
 
312
572
 
313
573
  def run_flow_with_wait_for_animation_to_end(
314
- ctx: MobileUseContext, base_flow: list, dry_run: bool = False
574
+ ctx: MobileUseContext,
575
+ base_flow: list,
576
+ dry_run: bool = False,
577
+ wait_for_animation_to_end: bool = False,
315
578
  ):
316
- base_flow.append({"waitForAnimationToEnd": {"timeout": int(WaitTimeout.MEDIUM.value)}})
579
+ if wait_for_animation_to_end:
580
+ base_flow.append({"waitForAnimationToEnd": {"timeout": 500}})
317
581
  return run_flow(ctx, base_flow, dry_run=dry_run)
318
582
 
319
583
 
320
584
  if __name__ == "__main__":
585
+ adb_client = AdbClient(host="192.168.43.107", port=5037)
321
586
  ctx = MobileUseContext(
322
587
  trace_id="trace_id",
323
588
  llm_config=initialize_llm_config(),
324
589
  device=DeviceContext(
325
590
  host_platform="WINDOWS",
326
591
  mobile_platform=DevicePlatform.ANDROID,
327
- device_id="emulator-5554",
592
+ device_id="986066a",
328
593
  device_width=1080,
329
- device_height=1920,
594
+ device_height=2340,
330
595
  ),
331
596
  hw_bridge_client=DeviceHardwareClient("http://localhost:9999"),
332
597
  screen_api_client=ScreenApiClient("http://localhost:9998"),
598
+ adb_client=adb_client,
333
599
  )
334
600
  screen_data = get_screen_data(ctx.screen_api_client)
335
601
  from minitap.mobile_use.graph.state import State
@@ -339,20 +605,20 @@ if __name__ == "__main__":
339
605
  messages=[],
340
606
  initial_goal="",
341
607
  subgoal_plan=[],
342
- latest_screenshot_base64=screen_data.base64,
343
608
  focused_app_info=None,
344
609
  device_date="",
345
610
  structured_decisions=None,
346
611
  complete_subgoals_by_ids=[],
612
+ screen_analysis_prompt=None,
347
613
  executor_messages=[],
348
614
  cortex_last_thought="",
349
615
  agents_thoughts=[],
350
616
  )
351
617
 
352
- # from minitap.mobile_use.tools.mobile.input_text import get_input_text_tool
618
+ # from minitap.mobile_use.tools.mobile.focus_and_input_text import get_focus_and_input_text_tool
353
619
 
354
620
  # input_resource_id = "com.google.android.apps.nexuslauncher:id/search_container_hotseat"
355
- # command_output: Command = get_input_text_tool(ctx=ctx).invoke(
621
+ # command_output: Command = get_focus_and_input_text_tool(ctx=ctx).invoke(
356
622
  # {
357
623
  # "tool_call_id": uuid.uuid4().hex,
358
624
  # "agent_thought": "",
@@ -362,10 +628,10 @@ if __name__ == "__main__":
362
628
  # "executor_metadata": None,
363
629
  # }
364
630
  # )
365
- from minitap.mobile_use.tools.mobile.clear_text import get_clear_text_tool
631
+ from minitap.mobile_use.tools.mobile.focus_and_clear_text import get_focus_and_clear_text_tool
366
632
 
367
633
  input_resource_id = "com.google.android.apps.nexuslauncher:id/input"
368
- command_output: Command = get_clear_text_tool(ctx=ctx).invoke(
634
+ command_output: Command = get_focus_and_clear_text_tool(ctx=ctx).invoke(
369
635
  {
370
636
  "tool_call_id": uuid.uuid4().hex,
371
637
  "agent_thought": "",
@@ -1,11 +1,12 @@
1
- from datetime import date
2
1
  import json
2
+ from datetime import date
3
+ from shutil import which
3
4
 
4
5
  from adbutils import AdbDevice
6
+
7
+ from minitap.mobile_use.context import DevicePlatform, MobileUseContext
5
8
  from minitap.mobile_use.utils.logger import MobileUseLogger
6
9
  from minitap.mobile_use.utils.shell_utils import run_shell_command_on_host
7
- from minitap.mobile_use.context import MobileUseContext
8
- from minitap.mobile_use.context import DevicePlatform
9
10
 
10
11
 
11
12
  def get_adb_device(ctx: MobileUseContext) -> AdbDevice:
@@ -22,29 +23,30 @@ def get_first_device(
22
23
  logger: MobileUseLogger | None = None,
23
24
  ) -> tuple[str | None, DevicePlatform | None]:
24
25
  """Gets the first available device."""
25
- try:
26
- android_output = run_shell_command_on_host("adb devices")
27
- lines = android_output.strip().split("\n")
28
- for line in lines:
29
- if "device" in line and not line.startswith("List of devices"):
30
- return line.split()[0], DevicePlatform.ANDROID
31
- except RuntimeError as e:
32
- if logger:
33
- logger.error(f"ADB command failed: {e}")
34
- return None, None
35
-
36
- try:
37
- ios_output = run_shell_command_on_host("xcrun simctl list devices booted -j")
38
- data = json.loads(ios_output)
39
- for runtime, devices in data.get("devices", {}).items():
40
- if "iOS" not in runtime:
41
- continue
42
- for device in devices:
43
- if device.get("state") == "Booted":
44
- return device["udid"], DevicePlatform.IOS
45
- except RuntimeError as e:
46
- if logger:
47
- logger.error(f"xcrun command failed: {e}")
26
+ if which("adb"):
27
+ try:
28
+ android_output = run_shell_command_on_host("adb devices")
29
+ lines = android_output.strip().split("\n")
30
+ for line in lines:
31
+ if "device" in line and not line.startswith("List of devices"):
32
+ return line.split()[0], DevicePlatform.ANDROID
33
+ except RuntimeError as e:
34
+ if logger:
35
+ logger.error(f"ADB command failed: {e}")
36
+
37
+ if which("xcrun"):
38
+ try:
39
+ ios_output = run_shell_command_on_host("xcrun simctl list devices booted -j")
40
+ data = json.loads(ios_output)
41
+ for runtime, devices in data.get("devices", {}).items():
42
+ if "iOS" not in runtime:
43
+ continue
44
+ for device in devices:
45
+ if device.get("state") == "Booted":
46
+ return device["udid"], DevicePlatform.IOS
47
+ except RuntimeError as e:
48
+ if logger:
49
+ logger.error(f"xcrun command failed: {e}")
48
50
 
49
51
  return None, None
50
52
 
@@ -69,5 +71,17 @@ def list_packages(ctx: MobileUseContext) -> str:
69
71
  return run_shell_command_on_host(" ".join(cmd))
70
72
  else:
71
73
  device = get_adb_device(ctx)
74
+ # Get full package list with paths
72
75
  cmd = ["pm", "list", "packages", "-f"]
73
- return str(device.shell(" ".join(cmd)))
76
+ raw_output = str(device.shell(" ".join(cmd)))
77
+
78
+ # Extract only package names (remove paths and "package:" prefix)
79
+ # Format: "package:/path/to/app.apk=com.example.app" -> "com.example.app"
80
+ lines = raw_output.strip().split("\n")
81
+ packages = []
82
+ for line in lines:
83
+ if "=" in line:
84
+ package_name = line.split("=")[-1].strip()
85
+ packages.append(package_name)
86
+
87
+ return "\n".join(sorted(packages))