minitap-mobile-use 3.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. minitap/mobile_use/__init__.py +0 -0
  2. minitap/mobile_use/agents/contextor/contextor.md +55 -0
  3. minitap/mobile_use/agents/contextor/contextor.py +175 -0
  4. minitap/mobile_use/agents/contextor/types.py +36 -0
  5. minitap/mobile_use/agents/cortex/cortex.md +135 -0
  6. minitap/mobile_use/agents/cortex/cortex.py +152 -0
  7. minitap/mobile_use/agents/cortex/types.py +15 -0
  8. minitap/mobile_use/agents/executor/executor.md +42 -0
  9. minitap/mobile_use/agents/executor/executor.py +87 -0
  10. minitap/mobile_use/agents/executor/tool_node.py +152 -0
  11. minitap/mobile_use/agents/hopper/hopper.md +15 -0
  12. minitap/mobile_use/agents/hopper/hopper.py +44 -0
  13. minitap/mobile_use/agents/orchestrator/human.md +12 -0
  14. minitap/mobile_use/agents/orchestrator/orchestrator.md +21 -0
  15. minitap/mobile_use/agents/orchestrator/orchestrator.py +134 -0
  16. minitap/mobile_use/agents/orchestrator/types.py +11 -0
  17. minitap/mobile_use/agents/outputter/human.md +25 -0
  18. minitap/mobile_use/agents/outputter/outputter.py +85 -0
  19. minitap/mobile_use/agents/outputter/test_outputter.py +167 -0
  20. minitap/mobile_use/agents/planner/human.md +14 -0
  21. minitap/mobile_use/agents/planner/planner.md +126 -0
  22. minitap/mobile_use/agents/planner/planner.py +101 -0
  23. minitap/mobile_use/agents/planner/types.py +51 -0
  24. minitap/mobile_use/agents/planner/utils.py +70 -0
  25. minitap/mobile_use/agents/summarizer/summarizer.py +35 -0
  26. minitap/mobile_use/agents/video_analyzer/__init__.py +5 -0
  27. minitap/mobile_use/agents/video_analyzer/human.md +5 -0
  28. minitap/mobile_use/agents/video_analyzer/video_analyzer.md +37 -0
  29. minitap/mobile_use/agents/video_analyzer/video_analyzer.py +111 -0
  30. minitap/mobile_use/clients/browserstack_client.py +477 -0
  31. minitap/mobile_use/clients/idb_client.py +429 -0
  32. minitap/mobile_use/clients/ios_client.py +332 -0
  33. minitap/mobile_use/clients/ios_client_config.py +141 -0
  34. minitap/mobile_use/clients/ui_automator_client.py +330 -0
  35. minitap/mobile_use/clients/wda_client.py +526 -0
  36. minitap/mobile_use/clients/wda_lifecycle.py +367 -0
  37. minitap/mobile_use/config.py +413 -0
  38. minitap/mobile_use/constants.py +3 -0
  39. minitap/mobile_use/context.py +106 -0
  40. minitap/mobile_use/controllers/__init__.py +0 -0
  41. minitap/mobile_use/controllers/android_controller.py +524 -0
  42. minitap/mobile_use/controllers/controller_factory.py +46 -0
  43. minitap/mobile_use/controllers/device_controller.py +182 -0
  44. minitap/mobile_use/controllers/ios_controller.py +436 -0
  45. minitap/mobile_use/controllers/platform_specific_commands_controller.py +199 -0
  46. minitap/mobile_use/controllers/types.py +106 -0
  47. minitap/mobile_use/controllers/unified_controller.py +193 -0
  48. minitap/mobile_use/graph/graph.py +160 -0
  49. minitap/mobile_use/graph/state.py +115 -0
  50. minitap/mobile_use/main.py +309 -0
  51. minitap/mobile_use/sdk/__init__.py +12 -0
  52. minitap/mobile_use/sdk/agent.py +1294 -0
  53. minitap/mobile_use/sdk/builders/__init__.py +10 -0
  54. minitap/mobile_use/sdk/builders/agent_config_builder.py +307 -0
  55. minitap/mobile_use/sdk/builders/index.py +15 -0
  56. minitap/mobile_use/sdk/builders/task_request_builder.py +236 -0
  57. minitap/mobile_use/sdk/constants.py +1 -0
  58. minitap/mobile_use/sdk/examples/README.md +83 -0
  59. minitap/mobile_use/sdk/examples/__init__.py +1 -0
  60. minitap/mobile_use/sdk/examples/app_lock_messaging.py +54 -0
  61. minitap/mobile_use/sdk/examples/platform_manual_task_example.py +67 -0
  62. minitap/mobile_use/sdk/examples/platform_minimal_example.py +48 -0
  63. minitap/mobile_use/sdk/examples/simple_photo_organizer.py +76 -0
  64. minitap/mobile_use/sdk/examples/smart_notification_assistant.py +225 -0
  65. minitap/mobile_use/sdk/examples/video_transcription_example.py +117 -0
  66. minitap/mobile_use/sdk/services/cloud_mobile.py +656 -0
  67. minitap/mobile_use/sdk/services/platform.py +434 -0
  68. minitap/mobile_use/sdk/types/__init__.py +51 -0
  69. minitap/mobile_use/sdk/types/agent.py +84 -0
  70. minitap/mobile_use/sdk/types/exceptions.py +138 -0
  71. minitap/mobile_use/sdk/types/platform.py +183 -0
  72. minitap/mobile_use/sdk/types/task.py +269 -0
  73. minitap/mobile_use/sdk/utils.py +29 -0
  74. minitap/mobile_use/services/accessibility.py +100 -0
  75. minitap/mobile_use/services/llm.py +247 -0
  76. minitap/mobile_use/services/telemetry.py +421 -0
  77. minitap/mobile_use/tools/index.py +67 -0
  78. minitap/mobile_use/tools/mobile/back.py +52 -0
  79. minitap/mobile_use/tools/mobile/erase_one_char.py +56 -0
  80. minitap/mobile_use/tools/mobile/focus_and_clear_text.py +317 -0
  81. minitap/mobile_use/tools/mobile/focus_and_input_text.py +153 -0
  82. minitap/mobile_use/tools/mobile/launch_app.py +86 -0
  83. minitap/mobile_use/tools/mobile/long_press_on.py +169 -0
  84. minitap/mobile_use/tools/mobile/open_link.py +62 -0
  85. minitap/mobile_use/tools/mobile/press_key.py +83 -0
  86. minitap/mobile_use/tools/mobile/stop_app.py +62 -0
  87. minitap/mobile_use/tools/mobile/swipe.py +156 -0
  88. minitap/mobile_use/tools/mobile/tap.py +154 -0
  89. minitap/mobile_use/tools/mobile/video_recording.py +177 -0
  90. minitap/mobile_use/tools/mobile/wait_for_delay.py +81 -0
  91. minitap/mobile_use/tools/scratchpad.py +147 -0
  92. minitap/mobile_use/tools/test_utils.py +413 -0
  93. minitap/mobile_use/tools/tool_wrapper.py +16 -0
  94. minitap/mobile_use/tools/types.py +35 -0
  95. minitap/mobile_use/tools/utils.py +336 -0
  96. minitap/mobile_use/utils/app_launch_utils.py +173 -0
  97. minitap/mobile_use/utils/cli_helpers.py +37 -0
  98. minitap/mobile_use/utils/cli_selection.py +143 -0
  99. minitap/mobile_use/utils/conversations.py +31 -0
  100. minitap/mobile_use/utils/decorators.py +124 -0
  101. minitap/mobile_use/utils/errors.py +6 -0
  102. minitap/mobile_use/utils/file.py +13 -0
  103. minitap/mobile_use/utils/logger.py +183 -0
  104. minitap/mobile_use/utils/media.py +186 -0
  105. minitap/mobile_use/utils/recorder.py +52 -0
  106. minitap/mobile_use/utils/requests_utils.py +37 -0
  107. minitap/mobile_use/utils/shell_utils.py +20 -0
  108. minitap/mobile_use/utils/test_ui_hierarchy.py +178 -0
  109. minitap/mobile_use/utils/time.py +6 -0
  110. minitap/mobile_use/utils/ui_hierarchy.py +132 -0
  111. minitap/mobile_use/utils/video.py +281 -0
  112. minitap_mobile_use-3.3.0.dist-info/METADATA +329 -0
  113. minitap_mobile_use-3.3.0.dist-info/RECORD +115 -0
  114. minitap_mobile_use-3.3.0.dist-info/WHEEL +4 -0
  115. minitap_mobile_use-3.3.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,199 @@
1
+ import asyncio
2
+ from concurrent.futures.thread import ThreadPoolExecutor
3
+ from datetime import date
4
+ from shutil import which
5
+
6
+ from adbutils import AdbDevice
7
+
8
+ from minitap.mobile_use.clients.ios_client import (
9
+ DeviceType,
10
+ get_all_ios_devices_detailed,
11
+ get_device_type,
12
+ )
13
+ from minitap.mobile_use.context import DevicePlatform, MobileUseContext
14
+ from minitap.mobile_use.utils.logger import MobileUseLogger, get_logger
15
+ from minitap.mobile_use.utils.shell_utils import run_shell_command_on_host
16
+
17
+ logger = get_logger(__name__)
18
+
19
+
20
+ def get_adb_device(ctx: MobileUseContext) -> AdbDevice:
21
+ if ctx.device.mobile_platform != DevicePlatform.ANDROID:
22
+ raise ValueError("Device is not an Android device")
23
+ adb = ctx.get_adb_client()
24
+ device = adb.device(serial=ctx.device.device_id)
25
+ if not device:
26
+ raise ConnectionError(f"Device {ctx.device.device_id} not found.")
27
+ return device
28
+
29
+
30
+ def get_first_device(
31
+ logger: MobileUseLogger | None = None,
32
+ prefer_physical: bool = True,
33
+ ) -> tuple[str | None, DevicePlatform | None, DeviceType | None]:
34
+ """Gets the first available device.
35
+
36
+ Args:
37
+ logger: Optional logger for error messages
38
+ prefer_physical: If True, prefer physical iOS devices over simulators
39
+
40
+ Returns:
41
+ Tuple of (device_id, platform, device_type) or (None, None, None) if no device found.
42
+ device_type is only set for iOS devices (SIMULATOR or PHYSICAL).
43
+ """
44
+ # Check for Android devices first
45
+ if which("adb"):
46
+ try:
47
+ android_output = run_shell_command_on_host("adb devices")
48
+ lines = android_output.strip().split("\n")
49
+ for line in lines:
50
+ if "device" in line and not line.startswith("List of devices"):
51
+ return line.split()[0], DevicePlatform.ANDROID, None
52
+ except RuntimeError as e:
53
+ if logger:
54
+ logger.error(f"ADB command failed: {e}")
55
+
56
+ # Check for iOS devices (both simulators and physical)
57
+ ios_devices = get_all_ios_devices_detailed()
58
+ if ios_devices:
59
+ if prefer_physical:
60
+ # Sort to prefer physical devices
61
+ ios_devices.sort(key=lambda d: d["type"] != DeviceType.PHYSICAL)
62
+
63
+ device = ios_devices[0]
64
+ if logger:
65
+ logger.info(
66
+ f"Selected iOS device: {device['name']} ({device['type'].value}) - {device['udid']}"
67
+ )
68
+ return device["udid"], DevicePlatform.IOS, device["type"]
69
+
70
+ return None, None, None
71
+
72
+
73
+ def get_device_date(ctx: MobileUseContext) -> str:
74
+ if ctx.device.mobile_platform == DevicePlatform.IOS:
75
+ return date.today().strftime("%a %b %d %H:%M:%S %Z %Y")
76
+ device = get_adb_device(ctx)
77
+ return str(device.shell("date"))
78
+
79
+
80
+ def list_packages(ctx: MobileUseContext) -> str:
81
+ if ctx.device.mobile_platform == DevicePlatform.IOS:
82
+ udid = ctx.device.device_id
83
+ device_type = get_device_type(udid)
84
+
85
+ if device_type == DeviceType.SIMULATOR:
86
+ cmd = ["xcrun", "simctl", "listapps", udid, "|", "grep", "CFBundleIdentifier"]
87
+ return run_shell_command_on_host(" ".join(cmd))
88
+
89
+ # Physical device: try ios-deploy first (common with React Native/Cordova)
90
+ if which("ios-deploy"):
91
+ cmd = ["ios-deploy", "--id", udid, "--list_bundle_id"]
92
+ try:
93
+ output = run_shell_command_on_host(" ".join(cmd))
94
+ packages = [line.strip() for line in output.strip().split("\n") if line.strip()]
95
+ return "\n".join(sorted(packages))
96
+ except Exception as e:
97
+ logger.debug(f"ios-deploy failed: {e}")
98
+
99
+ # Fallback: ideviceinstaller (libimobiledevice)
100
+ if which("ideviceinstaller"):
101
+ cmd = ["ideviceinstaller", "-l", "-u", udid]
102
+ try:
103
+ output = run_shell_command_on_host(" ".join(cmd))
104
+ # Parse output: "CFBundleIdentifier, CFBundleVersion, CFBundleDisplayName"
105
+ lines = output.strip().split("\n")
106
+ packages = []
107
+ for line in lines:
108
+ if ", " in line:
109
+ bundle_id = line.split(", ")[0].strip()
110
+ if bundle_id and not bundle_id.startswith("CFBundle"):
111
+ packages.append(bundle_id)
112
+ return "\n".join(sorted(packages))
113
+ except Exception as e:
114
+ logger.debug(f"ideviceinstaller failed: {e}")
115
+
116
+ logger.warning(
117
+ "Cannot list apps on physical iOS device. Install ios-deploy "
118
+ "(npm install -g ios-deploy) or ideviceinstaller (brew install ideviceinstaller)"
119
+ )
120
+ return ""
121
+ else:
122
+ device = get_adb_device(ctx)
123
+ # Get full package list with paths
124
+ cmd = ["pm", "list", "packages", "-f"]
125
+ raw_output = str(device.shell(" ".join(cmd)))
126
+
127
+ # Extract only package names (remove paths and "package:" prefix)
128
+ # Format: "package:/path/to/app.apk=com.example.app" -> "com.example.app"
129
+ lines = raw_output.strip().split("\n")
130
+ packages = []
131
+ for line in lines:
132
+ if "=" in line:
133
+ package_name = line.split("=")[-1].strip()
134
+ packages.append(package_name)
135
+
136
+ return "\n".join(sorted(packages))
137
+
138
+
139
+ def get_current_foreground_package(ctx: MobileUseContext) -> str | None:
140
+ """
141
+ Get the package name of the currently focused/foreground app.
142
+
143
+ Returns only the clean package/bundle name (e.g., 'com.whatsapp'),
144
+ without any metadata or window information.
145
+
146
+ Returns:
147
+ The package/bundle name, or None if unable to determine
148
+ """
149
+ try:
150
+ if ctx.device.mobile_platform == DevicePlatform.IOS:
151
+ return _get_ios_foreground_package(ctx)
152
+
153
+ device = get_adb_device(ctx)
154
+ output = str(device.shell("dumpsys window | grep mCurrentFocus"))
155
+
156
+ if "mCurrentFocus=" not in output:
157
+ return None
158
+
159
+ segment = output.split("mCurrentFocus=")[-1]
160
+
161
+ if "/" in segment:
162
+ tokens = segment.split()
163
+ for token in tokens:
164
+ if "." in token and not token.startswith("Window"):
165
+ package = token.split("/")[0]
166
+ package = package.rstrip("}")
167
+ if package and "." in package:
168
+ return package
169
+
170
+ return None
171
+
172
+ except Exception as e:
173
+ logger.debug(f"Failed to get current foreground package: {e}")
174
+ return None
175
+
176
+
177
+ def _get_ios_foreground_package(ctx: MobileUseContext) -> str | None:
178
+ """Get foreground package for iOS devices (simulator or physical)."""
179
+
180
+ ios_client = ctx.ios_client
181
+
182
+ if not ios_client:
183
+ return None
184
+
185
+ try:
186
+ # Handle both running and non-running event loops
187
+ try:
188
+ asyncio.get_running_loop()
189
+ # Already in async context - run in separate thread
190
+ with ThreadPoolExecutor() as pool:
191
+ app_info = pool.submit(asyncio.run, ios_client.app_current()).result(timeout=10)
192
+ except RuntimeError:
193
+ # No running loop - use asyncio.run()
194
+ app_info = asyncio.run(ios_client.app_current())
195
+ if app_info and app_info.bundle_id:
196
+ return app_info.bundle_id
197
+ except Exception as e:
198
+ logger.debug(f"Failed to get foreground app: {e}")
199
+ return None
@@ -0,0 +1,106 @@
1
+ from pydantic import BaseModel, ConfigDict, Field
2
+
3
+
4
+ class TapOutput(BaseModel):
5
+ """Output from tap operations."""
6
+
7
+ error: str | None = Field(default=None, description="Error message if tap failed")
8
+
9
+
10
+ class Bounds(BaseModel):
11
+ """Represents the bounds of a UI element."""
12
+
13
+ x1: int
14
+ y1: int
15
+ x2: int
16
+ y2: int
17
+
18
+ def get_center(self) -> "CoordinatesSelectorRequest":
19
+ """Get the center point of the bounds."""
20
+ return CoordinatesSelectorRequest(
21
+ x=(self.x1 + self.x2) // 2,
22
+ y=(self.y1 + self.y2) // 2,
23
+ )
24
+
25
+
26
+ class CoordinatesSelectorRequest(BaseModel):
27
+ model_config = ConfigDict(extra="forbid")
28
+ x: int
29
+ y: int
30
+
31
+ def to_str(self):
32
+ return f"{self.x}, {self.y}"
33
+
34
+
35
+ class PercentagesSelectorRequest(BaseModel):
36
+ model_config = ConfigDict(extra="forbid")
37
+ """
38
+ 0%,0% # top-left corner
39
+ 100%,100% # bottom-right corner
40
+ 50%,50% # center
41
+ """
42
+
43
+ x_percent: int = Field(ge=0, le=100, description="X percentage (0-100)")
44
+ y_percent: int = Field(ge=0, le=100, description="Y percentage (0-100)")
45
+
46
+ def to_str(self):
47
+ return f"{self.x_percent}%, {self.y_percent}%"
48
+
49
+ def to_coords(self, width: int, height: int) -> CoordinatesSelectorRequest:
50
+ """Convert percentages to pixel coordinates."""
51
+ x = min(max(int(width * self.x_percent / 100), 0), max(0, width - 1))
52
+ y = min(max(int(height * self.y_percent / 100), 0), max(0, height - 1))
53
+ return CoordinatesSelectorRequest(x=x, y=y)
54
+
55
+
56
+ class SwipeStartEndCoordinatesRequest(BaseModel):
57
+ model_config = ConfigDict(extra="forbid")
58
+ start: CoordinatesSelectorRequest
59
+ end: CoordinatesSelectorRequest
60
+
61
+ def to_dict(self):
62
+ return {"start": self.start.to_str(), "end": self.end.to_str()}
63
+
64
+
65
+ class SwipeStartEndPercentagesRequest(BaseModel):
66
+ model_config = ConfigDict(extra="forbid")
67
+ start: PercentagesSelectorRequest
68
+ end: PercentagesSelectorRequest
69
+
70
+ def to_dict(self):
71
+ return {"start": self.start.to_str(), "end": self.end.to_str()}
72
+
73
+ def to_coords(self, width: int, height: int) -> SwipeStartEndCoordinatesRequest:
74
+ """Convert percentage-based swipe to coordinate-based swipe."""
75
+ return SwipeStartEndCoordinatesRequest(
76
+ start=self.start.to_coords(width, height),
77
+ end=self.end.to_coords(width, height),
78
+ )
79
+
80
+
81
+ class SwipeRequest(BaseModel):
82
+ """
83
+ Swipe from start to end position using coordinates or percentages.
84
+ """
85
+
86
+ model_config = ConfigDict(extra="forbid")
87
+ swipe_mode: SwipeStartEndCoordinatesRequest | SwipeStartEndPercentagesRequest = Field(
88
+ description="Start and end positions. Use EITHER (x, y) OR (x_percent, y_percent)."
89
+ )
90
+ duration: int | None = Field(
91
+ default=None,
92
+ description="Swipe duration in ms. If not provided, tool functions default to 400ms.",
93
+ ge=1,
94
+ le=10000,
95
+ )
96
+
97
+ def to_dict(self):
98
+ res = {}
99
+ if isinstance(
100
+ self.swipe_mode,
101
+ SwipeStartEndCoordinatesRequest | SwipeStartEndPercentagesRequest,
102
+ ):
103
+ res |= self.swipe_mode.to_dict()
104
+ if self.duration:
105
+ res |= {"duration": self.duration}
106
+ return res
@@ -0,0 +1,193 @@
1
+ from minitap.mobile_use.context import MobileUseContext
2
+ from minitap.mobile_use.controllers.controller_factory import get_controller
3
+ from minitap.mobile_use.controllers.device_controller import MobileDeviceController
4
+ from minitap.mobile_use.controllers.types import (
5
+ CoordinatesSelectorRequest,
6
+ PercentagesSelectorRequest,
7
+ SwipeRequest,
8
+ SwipeStartEndCoordinatesRequest,
9
+ SwipeStartEndPercentagesRequest,
10
+ TapOutput,
11
+ )
12
+ from minitap.mobile_use.utils.logger import get_logger
13
+
14
+ logger = get_logger(__name__)
15
+
16
+
17
+ class UnifiedMobileController:
18
+ def __init__(self, ctx: MobileUseContext):
19
+ self.ctx = ctx
20
+ self._controller: MobileDeviceController = get_controller(ctx)
21
+
22
+ @property
23
+ def controller(self) -> MobileDeviceController:
24
+ return self._controller
25
+
26
+ async def tap_at(
27
+ self,
28
+ x: int,
29
+ y: int,
30
+ long_press: bool = False,
31
+ long_press_duration: int = 1000,
32
+ ) -> TapOutput:
33
+ coords = CoordinatesSelectorRequest(x=x, y=y)
34
+ return await self._controller.tap(coords, long_press, long_press_duration)
35
+
36
+ async def tap_percentage(
37
+ self,
38
+ x_percent: int,
39
+ y_percent: int,
40
+ long_press: bool = False,
41
+ long_press_duration: int = 1000,
42
+ ) -> TapOutput:
43
+ """Tap at percentage-based coordinates (0 to 100)."""
44
+ coords = PercentagesSelectorRequest(x_percent=x_percent, y_percent=y_percent).to_coords(
45
+ width=self.ctx.device.device_width,
46
+ height=self.ctx.device.device_height,
47
+ )
48
+ return await self._controller.tap(coords, long_press, long_press_duration)
49
+
50
+ async def tap_element(
51
+ self,
52
+ resource_id: str | None = None,
53
+ text: str | None = None,
54
+ index: int = 0,
55
+ long_press: bool = False,
56
+ long_press_duration: int = 1000,
57
+ ) -> TapOutput:
58
+ """
59
+ Tap on a UI element by finding it in the hierarchy.
60
+
61
+ Args:
62
+ resource_id: Android resource ID or iOS element type
63
+ text: Element text/label/value to match
64
+ index: Which match to tap if multiple elements match
65
+ long_press: Whether to perform long press
66
+ long_press_duration: Duration of long press in milliseconds
67
+
68
+ Returns:
69
+ TapOutput with error field set on failure
70
+ """
71
+ # Get UI hierarchy
72
+ ui_hierarchy = await self._controller.get_ui_hierarchy()
73
+
74
+ # Find element
75
+ element, bounds, error = self._controller.find_element(
76
+ ui_hierarchy=ui_hierarchy,
77
+ resource_id=resource_id,
78
+ text=text,
79
+ index=index,
80
+ )
81
+
82
+ if error or not bounds:
83
+ return TapOutput(error=error or "Could not extract bounds for element")
84
+
85
+ # Tap at element center
86
+ center = bounds.get_center()
87
+ return await self._controller.tap(center, long_press, long_press_duration)
88
+
89
+ async def swipe_coords(
90
+ self,
91
+ start_x: int,
92
+ start_y: int,
93
+ end_x: int,
94
+ end_y: int,
95
+ duration: int = 400,
96
+ ) -> str | None:
97
+ """Swipe between two coordinate points."""
98
+ start = CoordinatesSelectorRequest(x=start_x, y=start_y)
99
+ end = CoordinatesSelectorRequest(x=end_x, y=end_y)
100
+ return await self._controller.swipe(start, end, duration)
101
+
102
+ async def swipe_percentage(
103
+ self,
104
+ start_x_percent: int,
105
+ start_y_percent: int,
106
+ end_x_percent: int,
107
+ end_y_percent: int,
108
+ duration: int = 400,
109
+ ) -> str | None:
110
+ """Swipe using percentage-based coordinates (0 to 100)."""
111
+ start = PercentagesSelectorRequest(
112
+ x_percent=start_x_percent, y_percent=start_y_percent
113
+ ).to_coords(
114
+ width=self.ctx.device.device_width,
115
+ height=self.ctx.device.device_height,
116
+ )
117
+ end = PercentagesSelectorRequest(
118
+ x_percent=end_x_percent, y_percent=end_y_percent
119
+ ).to_coords(
120
+ width=self.ctx.device.device_width,
121
+ height=self.ctx.device.device_height,
122
+ )
123
+ return await self._controller.swipe(start, end, duration)
124
+
125
+ async def swipe_request(self, request: SwipeRequest) -> str | None:
126
+ mode = request.swipe_mode
127
+
128
+ if isinstance(mode, SwipeStartEndCoordinatesRequest):
129
+ return await self._controller.swipe(
130
+ start=mode.start,
131
+ end=mode.end,
132
+ duration=request.duration or 400,
133
+ )
134
+ elif isinstance(mode, SwipeStartEndPercentagesRequest):
135
+ coords = mode.to_coords(
136
+ width=self.ctx.device.device_width,
137
+ height=self.ctx.device.device_height,
138
+ )
139
+ return await self._controller.swipe(
140
+ start=coords.start,
141
+ end=coords.end,
142
+ duration=request.duration or 400,
143
+ )
144
+ else:
145
+ return "Unsupported swipe mode"
146
+
147
+ async def type_text(self, text: str) -> bool:
148
+ return await self._controller.input_text(text)
149
+
150
+ async def take_screenshot(self) -> str:
151
+ return await self._controller.screenshot()
152
+
153
+ async def launch_app(self, package_or_bundle_id: str) -> bool:
154
+ return await self._controller.launch_app(package_or_bundle_id)
155
+
156
+ async def terminate_app(self, package_or_bundle_id: str | None) -> bool:
157
+ return await self._controller.terminate_app(package_or_bundle_id)
158
+
159
+ async def open_url(self, url: str) -> bool:
160
+ return await self._controller.open_url(url)
161
+
162
+ async def go_back(self) -> bool:
163
+ return await self._controller.press_back()
164
+
165
+ async def go_home(self) -> bool:
166
+ return await self._controller.press_home()
167
+
168
+ async def press_enter(self) -> bool:
169
+ return await self._controller.press_enter()
170
+
171
+ async def erase_text(self, nb_chars: int | None = None) -> bool:
172
+ return await self._controller.erase_text(nb_chars)
173
+
174
+ async def get_ui_elements(self) -> list[dict]:
175
+ return await self._controller.get_ui_hierarchy()
176
+
177
+ async def find_element(
178
+ self,
179
+ resource_id: str | None = None,
180
+ text: str | None = None,
181
+ index: int = 0,
182
+ ) -> tuple[dict | None, str | None]:
183
+ ui_hierarchy = await self._controller.get_ui_hierarchy()
184
+ element, bounds, error = self._controller.find_element(
185
+ ui_hierarchy=ui_hierarchy,
186
+ resource_id=resource_id,
187
+ text=text,
188
+ index=index,
189
+ )
190
+ return element, error
191
+
192
+ async def cleanup(self) -> None:
193
+ await self._controller.cleanup()
@@ -0,0 +1,160 @@
1
+ from collections.abc import Sequence
2
+ from typing import Literal
3
+
4
+ from langchain_core.messages import AIMessage
5
+ from langgraph.constants import END, START
6
+ from langgraph.graph import StateGraph
7
+ from langgraph.graph.state import CompiledStateGraph
8
+
9
+ from minitap.mobile_use.agents.contextor.contextor import ContextorNode
10
+ from minitap.mobile_use.agents.cortex.cortex import CortexNode
11
+ from minitap.mobile_use.agents.executor.executor import ExecutorNode
12
+ from minitap.mobile_use.agents.executor.tool_node import ExecutorToolNode
13
+ from minitap.mobile_use.agents.orchestrator.orchestrator import OrchestratorNode
14
+ from minitap.mobile_use.agents.planner.planner import PlannerNode
15
+ from minitap.mobile_use.agents.planner.utils import (
16
+ all_completed,
17
+ get_current_subgoal,
18
+ one_of_them_is_failure,
19
+ )
20
+ from minitap.mobile_use.agents.summarizer.summarizer import SummarizerNode
21
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
22
+ from minitap.mobile_use.context import MobileUseContext
23
+ from minitap.mobile_use.graph.state import State
24
+ from minitap.mobile_use.tools.index import (
25
+ EXECUTOR_WRAPPERS_TOOLS,
26
+ VIDEO_RECORDING_WRAPPERS,
27
+ get_tools_from_wrappers,
28
+ )
29
+ from minitap.mobile_use.utils.logger import get_logger
30
+
31
+ logger = get_logger(__name__)
32
+
33
+
34
+ def convergence_node(state: State):
35
+ """Convergence point for parallel execution paths."""
36
+ return {}
37
+
38
+
39
+ def convergence_gate(
40
+ state: State,
41
+ ) -> Literal["continue", "replan", "end"]:
42
+ """Check if all subgoals are completed at convergence point."""
43
+ logger.info("Starting convergence_gate")
44
+
45
+ if one_of_them_is_failure(state.subgoal_plan):
46
+ logger.info("One of the subgoals is in failure state, asking to replan")
47
+ return "replan"
48
+
49
+ if all_completed(state.subgoal_plan):
50
+ logger.info("All subgoals are completed, ending the goal")
51
+ return "end"
52
+
53
+ if not get_current_subgoal(state.subgoal_plan):
54
+ logger.info("No subgoal running, ending the goal")
55
+ return "end"
56
+
57
+ return "continue"
58
+
59
+
60
+ def post_cortex_gate(
61
+ state: State,
62
+ ) -> Sequence[str]:
63
+ logger.info("Starting post_cortex_gate")
64
+ node_sequence = []
65
+
66
+ if len(state.complete_subgoals_by_ids) > 0 or not state.structured_decisions:
67
+ # If subgoals need to be marked as complete, add the path to the orchestrator.
68
+ # The 'or not state.structured_decisions' ensures we don't get stuck if Cortex does nothing.
69
+ node_sequence.append("review_subgoals")
70
+
71
+ if state.structured_decisions:
72
+ node_sequence.append("execute_decisions")
73
+
74
+ return node_sequence
75
+
76
+
77
+ def post_executor_gate(
78
+ state: State,
79
+ ) -> Literal["invoke_tools", "skip"]:
80
+ logger.info("Starting post_executor_gate")
81
+ messages = state.executor_messages
82
+ if not messages:
83
+ return "skip"
84
+ last_message = messages[-1]
85
+
86
+ if isinstance(last_message, AIMessage):
87
+ tool_calls = getattr(last_message, "tool_calls", None)
88
+ if tool_calls and len(tool_calls) > 0:
89
+ logger.info("[executor] Executing " + str(len(tool_calls)) + " tool calls:")
90
+ for tool_call in tool_calls:
91
+ logger.info("-------------")
92
+ logger.info("[executor] - " + str(tool_call) + "\n")
93
+ logger.info("-------------")
94
+ return "invoke_tools"
95
+ else:
96
+ logger.info("[executor] ❌ No tool calls found")
97
+ return "skip"
98
+
99
+
100
+ async def get_graph(ctx: MobileUseContext) -> CompiledStateGraph:
101
+ graph_builder = StateGraph(State)
102
+
103
+ ## Define nodes
104
+ graph_builder.add_node("planner", PlannerNode(ctx))
105
+ graph_builder.add_node("orchestrator", OrchestratorNode(ctx))
106
+
107
+ graph_builder.add_node("contextor", ContextorNode(ctx))
108
+
109
+ graph_builder.add_node("cortex", CortexNode(ctx))
110
+
111
+ graph_builder.add_node("executor", ExecutorNode(ctx))
112
+
113
+ executor_wrappers = list(EXECUTOR_WRAPPERS_TOOLS)
114
+ if ctx.video_recording_enabled:
115
+ executor_wrappers.extend(VIDEO_RECORDING_WRAPPERS)
116
+
117
+ executor_tool_node = ExecutorToolNode(
118
+ tools=get_tools_from_wrappers(ctx=ctx, wrappers=executor_wrappers),
119
+ messages_key=EXECUTOR_MESSAGES_KEY,
120
+ trace_id=ctx.trace_id,
121
+ )
122
+ graph_builder.add_node("executor_tools", executor_tool_node)
123
+
124
+ graph_builder.add_node("summarizer", SummarizerNode(ctx))
125
+
126
+ graph_builder.add_node(node="convergence", action=convergence_node, defer=True)
127
+
128
+ ## Linking nodes
129
+ graph_builder.add_edge(START, "planner")
130
+ graph_builder.add_edge("planner", "orchestrator")
131
+ graph_builder.add_edge("orchestrator", "convergence")
132
+ graph_builder.add_edge("contextor", "cortex")
133
+ graph_builder.add_conditional_edges(
134
+ "cortex",
135
+ post_cortex_gate,
136
+ {
137
+ "review_subgoals": "orchestrator",
138
+ "execute_decisions": "executor",
139
+ },
140
+ )
141
+ graph_builder.add_conditional_edges(
142
+ "executor",
143
+ post_executor_gate,
144
+ {"invoke_tools": "executor_tools", "skip": "summarizer"},
145
+ )
146
+ graph_builder.add_edge("executor_tools", "summarizer")
147
+
148
+ graph_builder.add_edge("summarizer", "convergence")
149
+
150
+ graph_builder.add_conditional_edges(
151
+ source="convergence",
152
+ path=convergence_gate,
153
+ path_map={
154
+ "continue": "contextor",
155
+ "replan": "planner",
156
+ "end": END,
157
+ },
158
+ )
159
+
160
+ return graph_builder.compile()