minitap-mobile-use 0.0.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of minitap-mobile-use might be problematic. Click here for more details.

Files changed (95) hide show
  1. minitap/mobile_use/__init__.py +0 -0
  2. minitap/mobile_use/agents/contextor/contextor.py +42 -0
  3. minitap/mobile_use/agents/cortex/cortex.md +93 -0
  4. minitap/mobile_use/agents/cortex/cortex.py +107 -0
  5. minitap/mobile_use/agents/cortex/types.py +11 -0
  6. minitap/mobile_use/agents/executor/executor.md +73 -0
  7. minitap/mobile_use/agents/executor/executor.py +84 -0
  8. minitap/mobile_use/agents/executor/executor_context_cleaner.py +27 -0
  9. minitap/mobile_use/agents/executor/utils.py +11 -0
  10. minitap/mobile_use/agents/hopper/hopper.md +13 -0
  11. minitap/mobile_use/agents/hopper/hopper.py +45 -0
  12. minitap/mobile_use/agents/orchestrator/human.md +13 -0
  13. minitap/mobile_use/agents/orchestrator/orchestrator.md +18 -0
  14. minitap/mobile_use/agents/orchestrator/orchestrator.py +114 -0
  15. minitap/mobile_use/agents/orchestrator/types.py +14 -0
  16. minitap/mobile_use/agents/outputter/human.md +25 -0
  17. minitap/mobile_use/agents/outputter/outputter.py +75 -0
  18. minitap/mobile_use/agents/outputter/test_outputter.py +107 -0
  19. minitap/mobile_use/agents/planner/human.md +12 -0
  20. minitap/mobile_use/agents/planner/planner.md +64 -0
  21. minitap/mobile_use/agents/planner/planner.py +64 -0
  22. minitap/mobile_use/agents/planner/types.py +44 -0
  23. minitap/mobile_use/agents/planner/utils.py +45 -0
  24. minitap/mobile_use/agents/summarizer/summarizer.py +34 -0
  25. minitap/mobile_use/clients/device_hardware_client.py +23 -0
  26. minitap/mobile_use/clients/ios_client.py +44 -0
  27. minitap/mobile_use/clients/screen_api_client.py +53 -0
  28. minitap/mobile_use/config.py +285 -0
  29. minitap/mobile_use/constants.py +2 -0
  30. minitap/mobile_use/context.py +65 -0
  31. minitap/mobile_use/controllers/__init__.py +0 -0
  32. minitap/mobile_use/controllers/mobile_command_controller.py +379 -0
  33. minitap/mobile_use/controllers/platform_specific_commands_controller.py +74 -0
  34. minitap/mobile_use/graph/graph.py +149 -0
  35. minitap/mobile_use/graph/state.py +73 -0
  36. minitap/mobile_use/main.py +122 -0
  37. minitap/mobile_use/sdk/__init__.py +12 -0
  38. minitap/mobile_use/sdk/agent.py +524 -0
  39. minitap/mobile_use/sdk/builders/__init__.py +10 -0
  40. minitap/mobile_use/sdk/builders/agent_config_builder.py +213 -0
  41. minitap/mobile_use/sdk/builders/index.py +15 -0
  42. minitap/mobile_use/sdk/builders/task_request_builder.py +218 -0
  43. minitap/mobile_use/sdk/constants.py +14 -0
  44. minitap/mobile_use/sdk/examples/README.md +45 -0
  45. minitap/mobile_use/sdk/examples/__init__.py +1 -0
  46. minitap/mobile_use/sdk/examples/simple_photo_organizer.py +76 -0
  47. minitap/mobile_use/sdk/examples/smart_notification_assistant.py +177 -0
  48. minitap/mobile_use/sdk/types/__init__.py +49 -0
  49. minitap/mobile_use/sdk/types/agent.py +73 -0
  50. minitap/mobile_use/sdk/types/exceptions.py +74 -0
  51. minitap/mobile_use/sdk/types/task.py +191 -0
  52. minitap/mobile_use/sdk/utils.py +28 -0
  53. minitap/mobile_use/servers/config.py +19 -0
  54. minitap/mobile_use/servers/device_hardware_bridge.py +212 -0
  55. minitap/mobile_use/servers/device_screen_api.py +143 -0
  56. minitap/mobile_use/servers/start_servers.py +151 -0
  57. minitap/mobile_use/servers/stop_servers.py +215 -0
  58. minitap/mobile_use/servers/utils.py +11 -0
  59. minitap/mobile_use/services/accessibility.py +100 -0
  60. minitap/mobile_use/services/llm.py +143 -0
  61. minitap/mobile_use/tools/index.py +54 -0
  62. minitap/mobile_use/tools/mobile/back.py +52 -0
  63. minitap/mobile_use/tools/mobile/copy_text_from.py +77 -0
  64. minitap/mobile_use/tools/mobile/erase_text.py +124 -0
  65. minitap/mobile_use/tools/mobile/input_text.py +74 -0
  66. minitap/mobile_use/tools/mobile/launch_app.py +59 -0
  67. minitap/mobile_use/tools/mobile/list_packages.py +78 -0
  68. minitap/mobile_use/tools/mobile/long_press_on.py +62 -0
  69. minitap/mobile_use/tools/mobile/open_link.py +59 -0
  70. minitap/mobile_use/tools/mobile/paste_text.py +66 -0
  71. minitap/mobile_use/tools/mobile/press_key.py +58 -0
  72. minitap/mobile_use/tools/mobile/run_flow.py +57 -0
  73. minitap/mobile_use/tools/mobile/stop_app.py +58 -0
  74. minitap/mobile_use/tools/mobile/swipe.py +56 -0
  75. minitap/mobile_use/tools/mobile/take_screenshot.py +70 -0
  76. minitap/mobile_use/tools/mobile/tap.py +66 -0
  77. minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +68 -0
  78. minitap/mobile_use/tools/tool_wrapper.py +33 -0
  79. minitap/mobile_use/utils/cli_helpers.py +40 -0
  80. minitap/mobile_use/utils/cli_selection.py +144 -0
  81. minitap/mobile_use/utils/conversations.py +31 -0
  82. minitap/mobile_use/utils/decorators.py +123 -0
  83. minitap/mobile_use/utils/errors.py +6 -0
  84. minitap/mobile_use/utils/file.py +13 -0
  85. minitap/mobile_use/utils/logger.py +184 -0
  86. minitap/mobile_use/utils/media.py +73 -0
  87. minitap/mobile_use/utils/recorder.py +55 -0
  88. minitap/mobile_use/utils/requests_utils.py +37 -0
  89. minitap/mobile_use/utils/shell_utils.py +20 -0
  90. minitap/mobile_use/utils/time.py +6 -0
  91. minitap/mobile_use/utils/ui_hierarchy.py +30 -0
  92. minitap_mobile_use-0.0.1.dev0.dist-info/METADATA +274 -0
  93. minitap_mobile_use-0.0.1.dev0.dist-info/RECORD +95 -0
  94. minitap_mobile_use-0.0.1.dev0.dist-info/WHEEL +4 -0
  95. minitap_mobile_use-0.0.1.dev0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,379 @@
1
+ import uuid
2
+ from enum import Enum
3
+ from typing import Annotated, Literal, Optional, Union
4
+
5
+ import yaml
6
+ from langgraph.types import Command
7
+ from pydantic import BaseModel, BeforeValidator, ConfigDict, Field
8
+ from requests import JSONDecodeError
9
+
10
+ from minitap.mobile_use.clients.device_hardware_client import DeviceHardwareClient
11
+ from minitap.mobile_use.clients.screen_api_client import ScreenApiClient
12
+ from minitap.mobile_use.context import DeviceContext, DevicePlatform, MobileUseContext
13
+ from minitap.mobile_use.utils.errors import ControllerErrors
14
+ from minitap.mobile_use.utils.logger import get_logger
15
+
16
+ logger = get_logger(__name__)
17
+
18
+
19
+ ###### Screen elements retrieval ######
20
+
21
+
22
+ class ScreenDataResponse(BaseModel):
23
+ base64: str
24
+ elements: list
25
+ width: int
26
+ height: int
27
+ platform: str
28
+
29
+
30
+ def get_screen_data(screen_api_client: ScreenApiClient):
31
+ response = screen_api_client.get_with_retry("/screen-info")
32
+ return ScreenDataResponse(**response.json())
33
+
34
+
35
+ def take_screenshot(ctx: MobileUseContext):
36
+ return get_screen_data(ctx.screen_api_client).base64
37
+
38
+
39
+ class RunFlowRequest(BaseModel):
40
+ model_config = ConfigDict(extra="forbid")
41
+ yaml: str
42
+ dry_run: bool = Field(default=False, alias="dryRun")
43
+
44
+
45
+ def run_flow(ctx: MobileUseContext, flow_steps: list, dry_run: bool = False) -> Optional[dict]:
46
+ """
47
+ Run a flow i.e, a sequence of commands.
48
+ Returns None on success, or the response body of the failed command.
49
+ """
50
+ logger.info(f"Running flow: {flow_steps}")
51
+
52
+ for step in flow_steps:
53
+ step_yml = yaml.dump(step)
54
+ payload = RunFlowRequest(yaml=step_yml, dryRun=dry_run).model_dump(by_alias=True)
55
+ response = ctx.hw_bridge_client.post("run-command", json=payload)
56
+
57
+ try:
58
+ response_body = response.json()
59
+ except JSONDecodeError:
60
+ response_body = response.text
61
+
62
+ if isinstance(response_body, dict):
63
+ response_body = {k: v for k, v in response_body.items() if v is not None}
64
+
65
+ if response.status_code >= 300:
66
+ logger.error(f"Tool call failed with status code: {response.status_code}")
67
+ return {"status_code": response.status_code, "body": response_body}
68
+
69
+ logger.success("Tool call completed")
70
+ return None
71
+
72
+
73
+ class CoordinatesSelectorRequest(BaseModel):
74
+ model_config = ConfigDict(extra="forbid")
75
+ x: int
76
+ y: int
77
+
78
+ def to_str(self):
79
+ return f"{self.x}, {self.y}"
80
+
81
+
82
+ class PercentagesSelectorRequest(BaseModel):
83
+ model_config = ConfigDict(extra="forbid")
84
+ """
85
+ 0%,0% # top-left corner
86
+ 100%,100% # bottom-right corner
87
+ 50%,50% # center
88
+ """
89
+
90
+ x_percent: int
91
+ y_percent: int
92
+
93
+ def to_str(self):
94
+ return f"{self.x_percent}%, {self.y_percent}%"
95
+
96
+
97
+ class IdSelectorRequest(BaseModel):
98
+ model_config = ConfigDict(extra="forbid")
99
+ id: str
100
+
101
+ def to_dict(self) -> dict[str, str | int]:
102
+ return {"id": self.id}
103
+
104
+
105
+ # Useful to tap on an element when there are multiple views with the same id
106
+ class IdWithTextSelectorRequest(BaseModel):
107
+ model_config = ConfigDict(extra="forbid")
108
+ id: str
109
+ text: str
110
+
111
+ def to_dict(self) -> dict[str, str | int]:
112
+ return {"id": self.id, "text": self.text}
113
+
114
+
115
+ class TextSelectorRequest(BaseModel):
116
+ model_config = ConfigDict(extra="forbid")
117
+ text: str
118
+
119
+ def to_dict(self) -> dict[str, str | int]:
120
+ return {"text": self.text}
121
+
122
+
123
+ class SelectorRequestWithCoordinates(BaseModel):
124
+ model_config = ConfigDict(extra="forbid")
125
+ coordinates: CoordinatesSelectorRequest
126
+
127
+ def to_dict(self) -> dict[str, str | int]:
128
+ return {"point": self.coordinates.to_str()}
129
+
130
+
131
+ class SelectorRequestWithPercentages(BaseModel):
132
+ model_config = ConfigDict(extra="forbid")
133
+ percentages: PercentagesSelectorRequest
134
+
135
+ def to_dict(self) -> dict[str, str | int]:
136
+ return {"point": self.percentages.to_str()}
137
+
138
+
139
+ SelectorRequest = Union[
140
+ IdSelectorRequest,
141
+ SelectorRequestWithCoordinates,
142
+ SelectorRequestWithPercentages,
143
+ TextSelectorRequest,
144
+ IdWithTextSelectorRequest,
145
+ ]
146
+
147
+
148
+ def tap(
149
+ ctx: MobileUseContext,
150
+ selector_request: SelectorRequest,
151
+ dry_run: bool = False,
152
+ index: Optional[int] = None,
153
+ ):
154
+ """
155
+ Tap on a selector.
156
+ Index is optional and is used when you have multiple views matching the same selector.
157
+ """
158
+ tap_body = selector_request.to_dict()
159
+ if not tap_body:
160
+ error = "Invalid tap selector request, could not format yaml"
161
+ logger.error(error)
162
+ raise ControllerErrors(error)
163
+ if index:
164
+ tap_body["index"] = index
165
+ flow_input = [{"tapOn": tap_body}]
166
+ return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
167
+
168
+
169
+ def long_press_on(
170
+ ctx: MobileUseContext,
171
+ selector_request: SelectorRequest,
172
+ dry_run: bool = False,
173
+ index: Optional[int] = None,
174
+ ):
175
+ long_press_on_body = selector_request.to_dict()
176
+ if not long_press_on_body:
177
+ error = "Invalid longPressOn selector request, could not format yaml"
178
+ logger.error(error)
179
+ raise ControllerErrors(error)
180
+ if index:
181
+ long_press_on_body["index"] = index
182
+ flow_input = [{"longPressOn": long_press_on_body}]
183
+ return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
184
+
185
+
186
+ class SwipeStartEndCoordinatesRequest(BaseModel):
187
+ model_config = ConfigDict(extra="forbid")
188
+ start: CoordinatesSelectorRequest
189
+ end: CoordinatesSelectorRequest
190
+
191
+ def to_dict(self):
192
+ return {"start": self.start.to_str(), "end": self.end.to_str()}
193
+
194
+
195
+ class SwipeStartEndPercentagesRequest(BaseModel):
196
+ model_config = ConfigDict(extra="forbid")
197
+ start: PercentagesSelectorRequest
198
+ end: PercentagesSelectorRequest
199
+
200
+ def to_dict(self):
201
+ return {"start": self.start.to_str(), "end": self.end.to_str()}
202
+
203
+
204
+ SwipeDirection = Annotated[
205
+ Literal["UP", "DOWN", "LEFT", "RIGHT"],
206
+ BeforeValidator(lambda v: v.upper() if isinstance(v, str) else v),
207
+ ]
208
+
209
+
210
+ class SwipeRequest(BaseModel):
211
+ model_config = ConfigDict(extra="forbid")
212
+ swipe_mode: SwipeStartEndCoordinatesRequest | SwipeStartEndPercentagesRequest | SwipeDirection
213
+ duration: Optional[int] = None # in ms, default is 400ms
214
+
215
+ def to_dict(self):
216
+ res = {}
217
+ if isinstance(self.swipe_mode, SwipeStartEndCoordinatesRequest):
218
+ res |= self.swipe_mode.to_dict()
219
+ elif isinstance(self.swipe_mode, SwipeStartEndPercentagesRequest):
220
+ res |= self.swipe_mode.to_dict()
221
+ elif self.swipe_mode in ["UP", "DOWN", "LEFT", "RIGHT"]:
222
+ res |= {"direction": self.swipe_mode}
223
+ if self.duration:
224
+ res |= {"duration": self.duration}
225
+ return res
226
+
227
+
228
+ def swipe(ctx: MobileUseContext, swipe_request: SwipeRequest, dry_run: bool = False):
229
+ swipe_body = swipe_request.to_dict()
230
+ if not swipe_body:
231
+ error = "Invalid swipe selector request, could not format yaml"
232
+ logger.error(error)
233
+ raise ControllerErrors(error)
234
+ flow_input = [{"swipe": swipe_body}]
235
+ return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
236
+
237
+
238
+ ##### Text related commands #####
239
+
240
+
241
+ def input_text(ctx: MobileUseContext, text: str, dry_run: bool = False):
242
+ return run_flow(ctx, [{"inputText": text}], dry_run=dry_run)
243
+
244
+
245
+ def copy_text_from(ctx: MobileUseContext, selector_request: SelectorRequest, dry_run: bool = False):
246
+ copy_text_from_body = selector_request.to_dict()
247
+ if not copy_text_from_body:
248
+ error = "Invalid copyTextFrom selector request, could not format yaml"
249
+ logger.error(error)
250
+ raise ControllerErrors(error)
251
+ flow_input = [{"copyTextFrom": copy_text_from_body}]
252
+ return run_flow(ctx, flow_input, dry_run=dry_run)
253
+
254
+
255
+ def paste_text(ctx: MobileUseContext, dry_run: bool = False):
256
+ return run_flow(ctx, ["pasteText"], dry_run=dry_run)
257
+
258
+
259
+ def erase_text(ctx: MobileUseContext, nb_chars: Optional[int] = None, dry_run: bool = False):
260
+ """
261
+ Removes characters from the currently selected textfield (if any)
262
+ Removes 50 characters if nb_chars is not specified.
263
+ """
264
+ if nb_chars is None:
265
+ return run_flow(ctx, ["eraseText"], dry_run=dry_run)
266
+ return run_flow(ctx, [{"eraseText": nb_chars}], dry_run=dry_run)
267
+
268
+
269
+ ##### App related commands #####
270
+
271
+
272
+ def launch_app(ctx: MobileUseContext, package_name: str, dry_run: bool = False):
273
+ flow_input = [{"launchApp": package_name}]
274
+ return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
275
+
276
+
277
+ def stop_app(ctx: MobileUseContext, package_name: Optional[str] = None, dry_run: bool = False):
278
+ if package_name is None:
279
+ flow_input = ["stopApp"]
280
+ else:
281
+ flow_input = [{"stopApp": package_name}]
282
+ return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
283
+
284
+
285
+ def open_link(ctx: MobileUseContext, url: str, dry_run: bool = False):
286
+ flow_input = [{"openLink": url}]
287
+ return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
288
+
289
+
290
+ ##### Key related commands #####
291
+
292
+
293
+ def back(ctx: MobileUseContext, dry_run: bool = False):
294
+ flow_input = ["back"]
295
+ return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
296
+
297
+
298
+ class Key(Enum):
299
+ ENTER = "Enter"
300
+ HOME = "Home"
301
+ BACK = "Back"
302
+
303
+
304
+ def press_key(ctx: MobileUseContext, key: Key, dry_run: bool = False):
305
+ flow_input = [{"pressKey": key.value}]
306
+ return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
307
+
308
+
309
+ #### Other commands ####
310
+
311
+
312
+ class WaitTimeout(Enum):
313
+ SHORT = 500
314
+ MEDIUM = 1000
315
+ LONG = 5000
316
+
317
+
318
+ def wait_for_animation_to_end(
319
+ ctx: MobileUseContext, timeout: Optional[WaitTimeout] = None, dry_run: bool = False
320
+ ):
321
+ if timeout is None:
322
+ return run_flow(ctx, ["waitForAnimationToEnd"], dry_run=dry_run)
323
+ return run_flow(ctx, [{"waitForAnimationToEnd": {"timeout": timeout.value}}], dry_run=dry_run)
324
+
325
+
326
+ def run_flow_with_wait_for_animation_to_end(
327
+ ctx: MobileUseContext, base_flow: list, dry_run: bool = False
328
+ ):
329
+ base_flow.append({"waitForAnimationToEnd": {"timeout": WaitTimeout.MEDIUM.value}})
330
+ return run_flow(ctx, base_flow, dry_run=dry_run)
331
+
332
+
333
+ if __name__ == "__main__":
334
+ # long press, erase
335
+ # input_text(text="test")
336
+ # erase_text()
337
+ ctx = MobileUseContext(
338
+ device=DeviceContext(
339
+ host_platform="LINUX",
340
+ mobile_platform=DevicePlatform.ANDROID,
341
+ device_id="emulator-5554",
342
+ device_width=1080,
343
+ device_height=1920,
344
+ ),
345
+ hw_bridge_client=DeviceHardwareClient("http://localhost:9999"),
346
+ screen_api_client=ScreenApiClient("http://localhost:9998"),
347
+ )
348
+ screen_data = get_screen_data(ctx.screen_api_client)
349
+ from minitap.mobile_use.graph.state import State
350
+ from minitap.mobile_use.tools.mobile.erase_text import get_erase_text_tool
351
+
352
+ dummy_state = State(
353
+ latest_ui_hierarchy=screen_data.elements,
354
+ messages=[],
355
+ initial_goal="",
356
+ subgoal_plan=[],
357
+ latest_screenshot_base64=screen_data.base64,
358
+ focused_app_info=None,
359
+ device_date="",
360
+ structured_decisions=None,
361
+ executor_retrigger=False,
362
+ executor_failed=False,
363
+ executor_messages=[],
364
+ cortex_last_thought="",
365
+ agents_thoughts=[],
366
+ )
367
+
368
+ # invoke erase_text tool
369
+ input_resource_id = "com.google.android.settings.intelligence:id/open_search_view_edit_text"
370
+ command_output: Command = get_erase_text_tool(ctx=ctx).invoke(
371
+ {
372
+ "tool_call_id": uuid.uuid4().hex,
373
+ "agent_thought": "",
374
+ "input_text_resource_id": input_resource_id,
375
+ "state": dummy_state,
376
+ "executor_metadata": None,
377
+ }
378
+ )
379
+ print(command_output)
@@ -0,0 +1,74 @@
1
+ from datetime import date
2
+ import json
3
+ from typing import Optional
4
+
5
+ from adbutils import AdbDevice
6
+ from minitap.mobile_use.utils.logger import MobileUseLogger
7
+ from minitap.mobile_use.utils.shell_utils import run_shell_command_on_host
8
+ from minitap.mobile_use.context import MobileUseContext
9
+ from minitap.mobile_use.context import DevicePlatform
10
+
11
+
12
+ def get_adb_device(ctx: MobileUseContext) -> AdbDevice:
13
+ if ctx.device.mobile_platform != DevicePlatform.ANDROID:
14
+ raise ValueError("Device is not an Android device")
15
+ adb = ctx.get_adb_client()
16
+ device = adb.device(serial=ctx.device.device_id)
17
+ if not device:
18
+ raise ConnectionError(f"Device {ctx.device.device_id} not found.")
19
+ return device
20
+
21
+
22
+ def get_first_device(
23
+ logger: Optional[MobileUseLogger] = None,
24
+ ) -> tuple[Optional[str], Optional[DevicePlatform]]:
25
+ """Gets the first available device."""
26
+ try:
27
+ android_output = run_shell_command_on_host("adb devices")
28
+ lines = android_output.strip().split("\n")
29
+ for line in lines:
30
+ if "device" in line and not line.startswith("List of devices"):
31
+ return line.split()[0], DevicePlatform.ANDROID
32
+ except RuntimeError as e:
33
+ if logger:
34
+ logger.error(f"ADB command failed: {e}")
35
+ return None, None
36
+
37
+ try:
38
+ ios_output = run_shell_command_on_host("xcrun simctl list devices booted -j")
39
+ data = json.loads(ios_output)
40
+ for runtime, devices in data.get("devices", {}).items():
41
+ if "iOS" not in runtime:
42
+ continue
43
+ for device in devices:
44
+ if device.get("state") == "Booted":
45
+ return device["udid"], DevicePlatform.IOS
46
+ except RuntimeError as e:
47
+ if logger:
48
+ logger.error(f"xcrun command failed: {e}")
49
+
50
+ return None, None
51
+
52
+
53
+ def get_focused_app_info(ctx: MobileUseContext) -> Optional[str]:
54
+ if ctx.device.mobile_platform == DevicePlatform.IOS:
55
+ return None
56
+ device = get_adb_device(ctx)
57
+ return str(device.shell("dumpsys window | grep -E 'mCurrentFocus|mFocusedApp'"))
58
+
59
+
60
+ def get_device_date(ctx: MobileUseContext) -> str:
61
+ if ctx.device.mobile_platform == DevicePlatform.IOS:
62
+ return date.today().strftime("%a %b %d %H:%M:%S %Z %Y")
63
+ device = get_adb_device(ctx)
64
+ return str(device.shell("date"))
65
+
66
+
67
+ def list_packages(ctx: MobileUseContext) -> str:
68
+ if ctx.device.mobile_platform == DevicePlatform.IOS:
69
+ cmd = ["xcrun", "simctl", "listapps", "booted", "|", "grep", "CFBundleIdentifier"]
70
+ return run_shell_command_on_host(" ".join(cmd))
71
+ else:
72
+ device = get_adb_device(ctx)
73
+ cmd = ["pm", "list", "packages", "-f"]
74
+ return str(device.shell(" ".join(cmd)))
@@ -0,0 +1,149 @@
1
+ from typing import Literal
2
+
3
+ from langchain_core.messages import (
4
+ AIMessage,
5
+ )
6
+ from langgraph.constants import END, START
7
+ from langgraph.graph import StateGraph
8
+ from langgraph.graph.state import CompiledStateGraph
9
+ from langgraph.prebuilt import ToolNode
10
+ from minitap.mobile_use.agents.contextor.contextor import ContextorNode
11
+ from minitap.mobile_use.agents.cortex.cortex import CortexNode
12
+ from minitap.mobile_use.agents.executor.executor import ExecutorNode
13
+ from minitap.mobile_use.agents.executor.executor_context_cleaner import (
14
+ executor_context_cleaner_node,
15
+ )
16
+ from minitap.mobile_use.agents.orchestrator.orchestrator import OrchestratorNode
17
+ from minitap.mobile_use.agents.planner.planner import PlannerNode
18
+ from minitap.mobile_use.agents.planner.utils import (
19
+ all_completed,
20
+ get_current_subgoal,
21
+ one_of_them_is_failure,
22
+ )
23
+ from minitap.mobile_use.agents.summarizer.summarizer import SummarizerNode
24
+ from minitap.mobile_use.context import MobileUseContext
25
+ from minitap.mobile_use.graph.state import State
26
+ from minitap.mobile_use.tools.index import EXECUTOR_WRAPPERS_TOOLS, get_tools_from_wrappers
27
+ from minitap.mobile_use.utils.logger import get_logger
28
+
29
+ logger = get_logger(__name__)
30
+
31
+
32
+ def post_orchestrator_gate(
33
+ state: State,
34
+ ) -> Literal["continue", "replan", "end"]:
35
+ logger.info("Starting post_orchestrator_gate")
36
+ if one_of_them_is_failure(state.subgoal_plan):
37
+ logger.info("One of the subgoals is in failure state, asking to replan")
38
+ return "replan"
39
+
40
+ if all_completed(state.subgoal_plan):
41
+ logger.info("All subgoals are completed, ending the goal")
42
+ return "end"
43
+
44
+ if not get_current_subgoal(state.subgoal_plan):
45
+ logger.info("No subgoal running, ending the goal")
46
+ return "end"
47
+
48
+ logger.info("Goal is not achieved, continuing")
49
+ return "continue"
50
+
51
+
52
+ def post_cortex_gate(
53
+ state: State,
54
+ ) -> Literal["continue", "end_subgoal"]:
55
+ logger.info("Starting post_cortex_gate")
56
+ if not state.structured_decisions:
57
+ return "end_subgoal"
58
+ return "continue"
59
+
60
+
61
+ def post_executor_gate(
62
+ state: State,
63
+ ) -> Literal["invoke_tools", "skip"]:
64
+ logger.info("Starting post_executor_gate")
65
+ messages = state.messages
66
+ if not messages:
67
+ return "skip"
68
+ last_message = messages[-1]
69
+
70
+ if isinstance(last_message, AIMessage):
71
+ tool_calls = getattr(last_message, "tool_calls", None)
72
+ if tool_calls and len(tool_calls) > 0:
73
+ logger.info("🔨👁️ Found tool calls: " + str(tool_calls))
74
+ return "invoke_tools"
75
+ else:
76
+ logger.info("🔨❌ No tool calls found")
77
+ return "skip"
78
+
79
+
80
+ def post_executor_tools_gate(
81
+ state: State,
82
+ ) -> Literal["continue", "failed", "done"]:
83
+ logger.info("Starting post_executor_tools_gate")
84
+ if state.executor_failed:
85
+ return "failed"
86
+ if state.executor_retrigger:
87
+ return "continue"
88
+ return "done"
89
+
90
+
91
+ async def get_graph(ctx: MobileUseContext) -> CompiledStateGraph:
92
+ graph_builder = StateGraph(State)
93
+
94
+ ## Define nodes
95
+ graph_builder.add_node("planner", PlannerNode(ctx))
96
+ graph_builder.add_node("orchestrator", OrchestratorNode(ctx))
97
+
98
+ graph_builder.add_node("contextor", ContextorNode(ctx))
99
+
100
+ graph_builder.add_node("cortex", CortexNode(ctx))
101
+
102
+ graph_builder.add_node("executor", ExecutorNode(ctx))
103
+ executor_tool_node = ToolNode(
104
+ get_tools_from_wrappers(ctx=ctx, wrappers=EXECUTOR_WRAPPERS_TOOLS)
105
+ )
106
+ graph_builder.add_node("executor_tools", executor_tool_node)
107
+
108
+ graph_builder.add_node("executor_context_cleaner", executor_context_cleaner_node)
109
+ graph_builder.add_node("summarizer", SummarizerNode(ctx))
110
+
111
+ # Linking nodes
112
+ graph_builder.add_edge(START, "planner")
113
+ graph_builder.add_edge("planner", "orchestrator")
114
+ graph_builder.add_conditional_edges(
115
+ "orchestrator",
116
+ post_orchestrator_gate,
117
+ {
118
+ "continue": "contextor",
119
+ "replan": "planner",
120
+ "end": END,
121
+ },
122
+ )
123
+ graph_builder.add_edge("contextor", "cortex")
124
+ graph_builder.add_conditional_edges(
125
+ "cortex",
126
+ post_cortex_gate,
127
+ {
128
+ "continue": "executor",
129
+ "end_subgoal": "orchestrator",
130
+ },
131
+ )
132
+ graph_builder.add_conditional_edges(
133
+ "executor",
134
+ post_executor_gate,
135
+ {"invoke_tools": "executor_tools", "skip": "executor_context_cleaner"},
136
+ )
137
+ graph_builder.add_conditional_edges(
138
+ "executor_tools",
139
+ post_executor_tools_gate,
140
+ {
141
+ "continue": "executor",
142
+ "done": "executor_context_cleaner",
143
+ "failed": "executor_context_cleaner",
144
+ },
145
+ )
146
+ graph_builder.add_edge("executor_context_cleaner", "summarizer")
147
+ graph_builder.add_edge("summarizer", "contextor")
148
+
149
+ return graph_builder.compile()
@@ -0,0 +1,73 @@
1
+ from langchain_core.messages import AIMessage, AnyMessage
2
+ from langgraph.graph import add_messages
3
+ from langgraph.prebuilt.chat_agent_executor import AgentStatePydantic
4
+ from typing_extensions import Annotated, Optional
5
+
6
+ from minitap.mobile_use.agents.planner.types import Subgoal
7
+ from minitap.mobile_use.utils.logger import get_logger
8
+ from minitap.mobile_use.utils.recorder import record_interaction
9
+ from minitap.mobile_use.context import MobileUseContext
10
+
11
+ logger = get_logger(__name__)
12
+
13
+
14
+ def take_last(a, b):
15
+ return b
16
+
17
+
18
+ class State(AgentStatePydantic):
19
+ # planner related keys
20
+ initial_goal: Annotated[str, "Initial goal given by the user"]
21
+
22
+ # orchestrator related keys
23
+ subgoal_plan: Annotated[list[Subgoal], "The current plan, made of subgoals"]
24
+
25
+ # contextor related keys
26
+ latest_screenshot_base64: Annotated[Optional[str], "Latest screenshot of the device", take_last]
27
+ latest_ui_hierarchy: Annotated[
28
+ Optional[list[dict]], "Latest UI hierarchy of the device", take_last
29
+ ]
30
+ focused_app_info: Annotated[Optional[str], "Focused app info", take_last]
31
+ device_date: Annotated[Optional[str], "Date of the device", take_last]
32
+
33
+ # cortex related keys
34
+ structured_decisions: Annotated[
35
+ Optional[str],
36
+ "Structured decisions made by the cortex, for the executor to follow",
37
+ take_last,
38
+ ]
39
+
40
+ # executor related keys
41
+ executor_retrigger: Annotated[Optional[bool], "Whether the executor must be retriggered"]
42
+ executor_failed: Annotated[bool, "Whether a tool call made by the executor failed"]
43
+ executor_messages: Annotated[list[AnyMessage], "Sequential Executor messages", add_messages]
44
+ cortex_last_thought: Annotated[Optional[str], "Last thought of the cortex for the executor"]
45
+
46
+ # common keys
47
+ agents_thoughts: Annotated[
48
+ list[str],
49
+ "All thoughts and reasons that led to actions (why a tool was called, expected outcomes..)",
50
+ ]
51
+
52
+ def sanitize_update(self, ctx: MobileUseContext, update: dict):
53
+ """
54
+ Sanitizes the state update to ensure it is valid and apply side effect logic where required.
55
+ """
56
+ updated_agents_thoughts: Optional[str | list[str]] = update.get("agents_thoughts", None)
57
+ if updated_agents_thoughts is not None:
58
+ if isinstance(updated_agents_thoughts, str):
59
+ updated_agents_thoughts = [updated_agents_thoughts]
60
+ elif not isinstance(updated_agents_thoughts, list):
61
+ raise ValueError("agents_thoughts must be a str or list[str]")
62
+ update["agents_thoughts"] = _add_agent_thoughts(
63
+ ctx=ctx,
64
+ old=self.agents_thoughts,
65
+ new=updated_agents_thoughts,
66
+ )
67
+ return update
68
+
69
+
70
+ def _add_agent_thoughts(ctx: MobileUseContext, old: list[str], new: list[str]) -> list[str]:
71
+ if ctx.execution_setup:
72
+ record_interaction(ctx, response=AIMessage(content=str(new)))
73
+ return old + new