minitap-mobile-use 0.0.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of minitap-mobile-use might be problematic. Click here for more details.
- minitap/mobile_use/__init__.py +0 -0
- minitap/mobile_use/agents/contextor/contextor.py +42 -0
- minitap/mobile_use/agents/cortex/cortex.md +93 -0
- minitap/mobile_use/agents/cortex/cortex.py +107 -0
- minitap/mobile_use/agents/cortex/types.py +11 -0
- minitap/mobile_use/agents/executor/executor.md +73 -0
- minitap/mobile_use/agents/executor/executor.py +84 -0
- minitap/mobile_use/agents/executor/executor_context_cleaner.py +27 -0
- minitap/mobile_use/agents/executor/utils.py +11 -0
- minitap/mobile_use/agents/hopper/hopper.md +13 -0
- minitap/mobile_use/agents/hopper/hopper.py +45 -0
- minitap/mobile_use/agents/orchestrator/human.md +13 -0
- minitap/mobile_use/agents/orchestrator/orchestrator.md +18 -0
- minitap/mobile_use/agents/orchestrator/orchestrator.py +114 -0
- minitap/mobile_use/agents/orchestrator/types.py +14 -0
- minitap/mobile_use/agents/outputter/human.md +25 -0
- minitap/mobile_use/agents/outputter/outputter.py +75 -0
- minitap/mobile_use/agents/outputter/test_outputter.py +107 -0
- minitap/mobile_use/agents/planner/human.md +12 -0
- minitap/mobile_use/agents/planner/planner.md +64 -0
- minitap/mobile_use/agents/planner/planner.py +64 -0
- minitap/mobile_use/agents/planner/types.py +44 -0
- minitap/mobile_use/agents/planner/utils.py +45 -0
- minitap/mobile_use/agents/summarizer/summarizer.py +34 -0
- minitap/mobile_use/clients/device_hardware_client.py +23 -0
- minitap/mobile_use/clients/ios_client.py +44 -0
- minitap/mobile_use/clients/screen_api_client.py +53 -0
- minitap/mobile_use/config.py +285 -0
- minitap/mobile_use/constants.py +2 -0
- minitap/mobile_use/context.py +65 -0
- minitap/mobile_use/controllers/__init__.py +0 -0
- minitap/mobile_use/controllers/mobile_command_controller.py +379 -0
- minitap/mobile_use/controllers/platform_specific_commands_controller.py +74 -0
- minitap/mobile_use/graph/graph.py +149 -0
- minitap/mobile_use/graph/state.py +73 -0
- minitap/mobile_use/main.py +122 -0
- minitap/mobile_use/sdk/__init__.py +12 -0
- minitap/mobile_use/sdk/agent.py +524 -0
- minitap/mobile_use/sdk/builders/__init__.py +10 -0
- minitap/mobile_use/sdk/builders/agent_config_builder.py +213 -0
- minitap/mobile_use/sdk/builders/index.py +15 -0
- minitap/mobile_use/sdk/builders/task_request_builder.py +218 -0
- minitap/mobile_use/sdk/constants.py +14 -0
- minitap/mobile_use/sdk/examples/README.md +45 -0
- minitap/mobile_use/sdk/examples/__init__.py +1 -0
- minitap/mobile_use/sdk/examples/simple_photo_organizer.py +76 -0
- minitap/mobile_use/sdk/examples/smart_notification_assistant.py +177 -0
- minitap/mobile_use/sdk/types/__init__.py +49 -0
- minitap/mobile_use/sdk/types/agent.py +73 -0
- minitap/mobile_use/sdk/types/exceptions.py +74 -0
- minitap/mobile_use/sdk/types/task.py +191 -0
- minitap/mobile_use/sdk/utils.py +28 -0
- minitap/mobile_use/servers/config.py +19 -0
- minitap/mobile_use/servers/device_hardware_bridge.py +212 -0
- minitap/mobile_use/servers/device_screen_api.py +143 -0
- minitap/mobile_use/servers/start_servers.py +151 -0
- minitap/mobile_use/servers/stop_servers.py +215 -0
- minitap/mobile_use/servers/utils.py +11 -0
- minitap/mobile_use/services/accessibility.py +100 -0
- minitap/mobile_use/services/llm.py +143 -0
- minitap/mobile_use/tools/index.py +54 -0
- minitap/mobile_use/tools/mobile/back.py +52 -0
- minitap/mobile_use/tools/mobile/copy_text_from.py +77 -0
- minitap/mobile_use/tools/mobile/erase_text.py +124 -0
- minitap/mobile_use/tools/mobile/input_text.py +74 -0
- minitap/mobile_use/tools/mobile/launch_app.py +59 -0
- minitap/mobile_use/tools/mobile/list_packages.py +78 -0
- minitap/mobile_use/tools/mobile/long_press_on.py +62 -0
- minitap/mobile_use/tools/mobile/open_link.py +59 -0
- minitap/mobile_use/tools/mobile/paste_text.py +66 -0
- minitap/mobile_use/tools/mobile/press_key.py +58 -0
- minitap/mobile_use/tools/mobile/run_flow.py +57 -0
- minitap/mobile_use/tools/mobile/stop_app.py +58 -0
- minitap/mobile_use/tools/mobile/swipe.py +56 -0
- minitap/mobile_use/tools/mobile/take_screenshot.py +70 -0
- minitap/mobile_use/tools/mobile/tap.py +66 -0
- minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +68 -0
- minitap/mobile_use/tools/tool_wrapper.py +33 -0
- minitap/mobile_use/utils/cli_helpers.py +40 -0
- minitap/mobile_use/utils/cli_selection.py +144 -0
- minitap/mobile_use/utils/conversations.py +31 -0
- minitap/mobile_use/utils/decorators.py +123 -0
- minitap/mobile_use/utils/errors.py +6 -0
- minitap/mobile_use/utils/file.py +13 -0
- minitap/mobile_use/utils/logger.py +184 -0
- minitap/mobile_use/utils/media.py +73 -0
- minitap/mobile_use/utils/recorder.py +55 -0
- minitap/mobile_use/utils/requests_utils.py +37 -0
- minitap/mobile_use/utils/shell_utils.py +20 -0
- minitap/mobile_use/utils/time.py +6 -0
- minitap/mobile_use/utils/ui_hierarchy.py +30 -0
- minitap_mobile_use-0.0.1.dev0.dist-info/METADATA +274 -0
- minitap_mobile_use-0.0.1.dev0.dist-info/RECORD +95 -0
- minitap_mobile_use-0.0.1.dev0.dist-info/WHEEL +4 -0
- minitap_mobile_use-0.0.1.dev0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from typing import Annotated, Literal, Optional, Union
|
|
4
|
+
|
|
5
|
+
import yaml
|
|
6
|
+
from langgraph.types import Command
|
|
7
|
+
from pydantic import BaseModel, BeforeValidator, ConfigDict, Field
|
|
8
|
+
from requests import JSONDecodeError
|
|
9
|
+
|
|
10
|
+
from minitap.mobile_use.clients.device_hardware_client import DeviceHardwareClient
|
|
11
|
+
from minitap.mobile_use.clients.screen_api_client import ScreenApiClient
|
|
12
|
+
from minitap.mobile_use.context import DeviceContext, DevicePlatform, MobileUseContext
|
|
13
|
+
from minitap.mobile_use.utils.errors import ControllerErrors
|
|
14
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
15
|
+
|
|
16
|
+
logger = get_logger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
###### Screen elements retrieval ######
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ScreenDataResponse(BaseModel):
|
|
23
|
+
base64: str
|
|
24
|
+
elements: list
|
|
25
|
+
width: int
|
|
26
|
+
height: int
|
|
27
|
+
platform: str
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def get_screen_data(screen_api_client: ScreenApiClient):
|
|
31
|
+
response = screen_api_client.get_with_retry("/screen-info")
|
|
32
|
+
return ScreenDataResponse(**response.json())
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def take_screenshot(ctx: MobileUseContext):
|
|
36
|
+
return get_screen_data(ctx.screen_api_client).base64
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class RunFlowRequest(BaseModel):
|
|
40
|
+
model_config = ConfigDict(extra="forbid")
|
|
41
|
+
yaml: str
|
|
42
|
+
dry_run: bool = Field(default=False, alias="dryRun")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def run_flow(ctx: MobileUseContext, flow_steps: list, dry_run: bool = False) -> Optional[dict]:
|
|
46
|
+
"""
|
|
47
|
+
Run a flow i.e, a sequence of commands.
|
|
48
|
+
Returns None on success, or the response body of the failed command.
|
|
49
|
+
"""
|
|
50
|
+
logger.info(f"Running flow: {flow_steps}")
|
|
51
|
+
|
|
52
|
+
for step in flow_steps:
|
|
53
|
+
step_yml = yaml.dump(step)
|
|
54
|
+
payload = RunFlowRequest(yaml=step_yml, dryRun=dry_run).model_dump(by_alias=True)
|
|
55
|
+
response = ctx.hw_bridge_client.post("run-command", json=payload)
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
response_body = response.json()
|
|
59
|
+
except JSONDecodeError:
|
|
60
|
+
response_body = response.text
|
|
61
|
+
|
|
62
|
+
if isinstance(response_body, dict):
|
|
63
|
+
response_body = {k: v for k, v in response_body.items() if v is not None}
|
|
64
|
+
|
|
65
|
+
if response.status_code >= 300:
|
|
66
|
+
logger.error(f"Tool call failed with status code: {response.status_code}")
|
|
67
|
+
return {"status_code": response.status_code, "body": response_body}
|
|
68
|
+
|
|
69
|
+
logger.success("Tool call completed")
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class CoordinatesSelectorRequest(BaseModel):
|
|
74
|
+
model_config = ConfigDict(extra="forbid")
|
|
75
|
+
x: int
|
|
76
|
+
y: int
|
|
77
|
+
|
|
78
|
+
def to_str(self):
|
|
79
|
+
return f"{self.x}, {self.y}"
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class PercentagesSelectorRequest(BaseModel):
|
|
83
|
+
model_config = ConfigDict(extra="forbid")
|
|
84
|
+
"""
|
|
85
|
+
0%,0% # top-left corner
|
|
86
|
+
100%,100% # bottom-right corner
|
|
87
|
+
50%,50% # center
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
x_percent: int
|
|
91
|
+
y_percent: int
|
|
92
|
+
|
|
93
|
+
def to_str(self):
|
|
94
|
+
return f"{self.x_percent}%, {self.y_percent}%"
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class IdSelectorRequest(BaseModel):
|
|
98
|
+
model_config = ConfigDict(extra="forbid")
|
|
99
|
+
id: str
|
|
100
|
+
|
|
101
|
+
def to_dict(self) -> dict[str, str | int]:
|
|
102
|
+
return {"id": self.id}
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
# Useful to tap on an element when there are multiple views with the same id
|
|
106
|
+
class IdWithTextSelectorRequest(BaseModel):
|
|
107
|
+
model_config = ConfigDict(extra="forbid")
|
|
108
|
+
id: str
|
|
109
|
+
text: str
|
|
110
|
+
|
|
111
|
+
def to_dict(self) -> dict[str, str | int]:
|
|
112
|
+
return {"id": self.id, "text": self.text}
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class TextSelectorRequest(BaseModel):
|
|
116
|
+
model_config = ConfigDict(extra="forbid")
|
|
117
|
+
text: str
|
|
118
|
+
|
|
119
|
+
def to_dict(self) -> dict[str, str | int]:
|
|
120
|
+
return {"text": self.text}
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class SelectorRequestWithCoordinates(BaseModel):
|
|
124
|
+
model_config = ConfigDict(extra="forbid")
|
|
125
|
+
coordinates: CoordinatesSelectorRequest
|
|
126
|
+
|
|
127
|
+
def to_dict(self) -> dict[str, str | int]:
|
|
128
|
+
return {"point": self.coordinates.to_str()}
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class SelectorRequestWithPercentages(BaseModel):
|
|
132
|
+
model_config = ConfigDict(extra="forbid")
|
|
133
|
+
percentages: PercentagesSelectorRequest
|
|
134
|
+
|
|
135
|
+
def to_dict(self) -> dict[str, str | int]:
|
|
136
|
+
return {"point": self.percentages.to_str()}
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
SelectorRequest = Union[
|
|
140
|
+
IdSelectorRequest,
|
|
141
|
+
SelectorRequestWithCoordinates,
|
|
142
|
+
SelectorRequestWithPercentages,
|
|
143
|
+
TextSelectorRequest,
|
|
144
|
+
IdWithTextSelectorRequest,
|
|
145
|
+
]
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def tap(
|
|
149
|
+
ctx: MobileUseContext,
|
|
150
|
+
selector_request: SelectorRequest,
|
|
151
|
+
dry_run: bool = False,
|
|
152
|
+
index: Optional[int] = None,
|
|
153
|
+
):
|
|
154
|
+
"""
|
|
155
|
+
Tap on a selector.
|
|
156
|
+
Index is optional and is used when you have multiple views matching the same selector.
|
|
157
|
+
"""
|
|
158
|
+
tap_body = selector_request.to_dict()
|
|
159
|
+
if not tap_body:
|
|
160
|
+
error = "Invalid tap selector request, could not format yaml"
|
|
161
|
+
logger.error(error)
|
|
162
|
+
raise ControllerErrors(error)
|
|
163
|
+
if index:
|
|
164
|
+
tap_body["index"] = index
|
|
165
|
+
flow_input = [{"tapOn": tap_body}]
|
|
166
|
+
return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def long_press_on(
|
|
170
|
+
ctx: MobileUseContext,
|
|
171
|
+
selector_request: SelectorRequest,
|
|
172
|
+
dry_run: bool = False,
|
|
173
|
+
index: Optional[int] = None,
|
|
174
|
+
):
|
|
175
|
+
long_press_on_body = selector_request.to_dict()
|
|
176
|
+
if not long_press_on_body:
|
|
177
|
+
error = "Invalid longPressOn selector request, could not format yaml"
|
|
178
|
+
logger.error(error)
|
|
179
|
+
raise ControllerErrors(error)
|
|
180
|
+
if index:
|
|
181
|
+
long_press_on_body["index"] = index
|
|
182
|
+
flow_input = [{"longPressOn": long_press_on_body}]
|
|
183
|
+
return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class SwipeStartEndCoordinatesRequest(BaseModel):
|
|
187
|
+
model_config = ConfigDict(extra="forbid")
|
|
188
|
+
start: CoordinatesSelectorRequest
|
|
189
|
+
end: CoordinatesSelectorRequest
|
|
190
|
+
|
|
191
|
+
def to_dict(self):
|
|
192
|
+
return {"start": self.start.to_str(), "end": self.end.to_str()}
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class SwipeStartEndPercentagesRequest(BaseModel):
|
|
196
|
+
model_config = ConfigDict(extra="forbid")
|
|
197
|
+
start: PercentagesSelectorRequest
|
|
198
|
+
end: PercentagesSelectorRequest
|
|
199
|
+
|
|
200
|
+
def to_dict(self):
|
|
201
|
+
return {"start": self.start.to_str(), "end": self.end.to_str()}
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
SwipeDirection = Annotated[
|
|
205
|
+
Literal["UP", "DOWN", "LEFT", "RIGHT"],
|
|
206
|
+
BeforeValidator(lambda v: v.upper() if isinstance(v, str) else v),
|
|
207
|
+
]
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
class SwipeRequest(BaseModel):
|
|
211
|
+
model_config = ConfigDict(extra="forbid")
|
|
212
|
+
swipe_mode: SwipeStartEndCoordinatesRequest | SwipeStartEndPercentagesRequest | SwipeDirection
|
|
213
|
+
duration: Optional[int] = None # in ms, default is 400ms
|
|
214
|
+
|
|
215
|
+
def to_dict(self):
|
|
216
|
+
res = {}
|
|
217
|
+
if isinstance(self.swipe_mode, SwipeStartEndCoordinatesRequest):
|
|
218
|
+
res |= self.swipe_mode.to_dict()
|
|
219
|
+
elif isinstance(self.swipe_mode, SwipeStartEndPercentagesRequest):
|
|
220
|
+
res |= self.swipe_mode.to_dict()
|
|
221
|
+
elif self.swipe_mode in ["UP", "DOWN", "LEFT", "RIGHT"]:
|
|
222
|
+
res |= {"direction": self.swipe_mode}
|
|
223
|
+
if self.duration:
|
|
224
|
+
res |= {"duration": self.duration}
|
|
225
|
+
return res
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def swipe(ctx: MobileUseContext, swipe_request: SwipeRequest, dry_run: bool = False):
|
|
229
|
+
swipe_body = swipe_request.to_dict()
|
|
230
|
+
if not swipe_body:
|
|
231
|
+
error = "Invalid swipe selector request, could not format yaml"
|
|
232
|
+
logger.error(error)
|
|
233
|
+
raise ControllerErrors(error)
|
|
234
|
+
flow_input = [{"swipe": swipe_body}]
|
|
235
|
+
return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
##### Text related commands #####
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def input_text(ctx: MobileUseContext, text: str, dry_run: bool = False):
|
|
242
|
+
return run_flow(ctx, [{"inputText": text}], dry_run=dry_run)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def copy_text_from(ctx: MobileUseContext, selector_request: SelectorRequest, dry_run: bool = False):
|
|
246
|
+
copy_text_from_body = selector_request.to_dict()
|
|
247
|
+
if not copy_text_from_body:
|
|
248
|
+
error = "Invalid copyTextFrom selector request, could not format yaml"
|
|
249
|
+
logger.error(error)
|
|
250
|
+
raise ControllerErrors(error)
|
|
251
|
+
flow_input = [{"copyTextFrom": copy_text_from_body}]
|
|
252
|
+
return run_flow(ctx, flow_input, dry_run=dry_run)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def paste_text(ctx: MobileUseContext, dry_run: bool = False):
|
|
256
|
+
return run_flow(ctx, ["pasteText"], dry_run=dry_run)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def erase_text(ctx: MobileUseContext, nb_chars: Optional[int] = None, dry_run: bool = False):
|
|
260
|
+
"""
|
|
261
|
+
Removes characters from the currently selected textfield (if any)
|
|
262
|
+
Removes 50 characters if nb_chars is not specified.
|
|
263
|
+
"""
|
|
264
|
+
if nb_chars is None:
|
|
265
|
+
return run_flow(ctx, ["eraseText"], dry_run=dry_run)
|
|
266
|
+
return run_flow(ctx, [{"eraseText": nb_chars}], dry_run=dry_run)
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
##### App related commands #####
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def launch_app(ctx: MobileUseContext, package_name: str, dry_run: bool = False):
|
|
273
|
+
flow_input = [{"launchApp": package_name}]
|
|
274
|
+
return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def stop_app(ctx: MobileUseContext, package_name: Optional[str] = None, dry_run: bool = False):
|
|
278
|
+
if package_name is None:
|
|
279
|
+
flow_input = ["stopApp"]
|
|
280
|
+
else:
|
|
281
|
+
flow_input = [{"stopApp": package_name}]
|
|
282
|
+
return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def open_link(ctx: MobileUseContext, url: str, dry_run: bool = False):
|
|
286
|
+
flow_input = [{"openLink": url}]
|
|
287
|
+
return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
##### Key related commands #####
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def back(ctx: MobileUseContext, dry_run: bool = False):
|
|
294
|
+
flow_input = ["back"]
|
|
295
|
+
return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
class Key(Enum):
|
|
299
|
+
ENTER = "Enter"
|
|
300
|
+
HOME = "Home"
|
|
301
|
+
BACK = "Back"
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def press_key(ctx: MobileUseContext, key: Key, dry_run: bool = False):
|
|
305
|
+
flow_input = [{"pressKey": key.value}]
|
|
306
|
+
return run_flow_with_wait_for_animation_to_end(ctx, flow_input, dry_run=dry_run)
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
#### Other commands ####
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
class WaitTimeout(Enum):
|
|
313
|
+
SHORT = 500
|
|
314
|
+
MEDIUM = 1000
|
|
315
|
+
LONG = 5000
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def wait_for_animation_to_end(
|
|
319
|
+
ctx: MobileUseContext, timeout: Optional[WaitTimeout] = None, dry_run: bool = False
|
|
320
|
+
):
|
|
321
|
+
if timeout is None:
|
|
322
|
+
return run_flow(ctx, ["waitForAnimationToEnd"], dry_run=dry_run)
|
|
323
|
+
return run_flow(ctx, [{"waitForAnimationToEnd": {"timeout": timeout.value}}], dry_run=dry_run)
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def run_flow_with_wait_for_animation_to_end(
|
|
327
|
+
ctx: MobileUseContext, base_flow: list, dry_run: bool = False
|
|
328
|
+
):
|
|
329
|
+
base_flow.append({"waitForAnimationToEnd": {"timeout": WaitTimeout.MEDIUM.value}})
|
|
330
|
+
return run_flow(ctx, base_flow, dry_run=dry_run)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
if __name__ == "__main__":
|
|
334
|
+
# long press, erase
|
|
335
|
+
# input_text(text="test")
|
|
336
|
+
# erase_text()
|
|
337
|
+
ctx = MobileUseContext(
|
|
338
|
+
device=DeviceContext(
|
|
339
|
+
host_platform="LINUX",
|
|
340
|
+
mobile_platform=DevicePlatform.ANDROID,
|
|
341
|
+
device_id="emulator-5554",
|
|
342
|
+
device_width=1080,
|
|
343
|
+
device_height=1920,
|
|
344
|
+
),
|
|
345
|
+
hw_bridge_client=DeviceHardwareClient("http://localhost:9999"),
|
|
346
|
+
screen_api_client=ScreenApiClient("http://localhost:9998"),
|
|
347
|
+
)
|
|
348
|
+
screen_data = get_screen_data(ctx.screen_api_client)
|
|
349
|
+
from minitap.mobile_use.graph.state import State
|
|
350
|
+
from minitap.mobile_use.tools.mobile.erase_text import get_erase_text_tool
|
|
351
|
+
|
|
352
|
+
dummy_state = State(
|
|
353
|
+
latest_ui_hierarchy=screen_data.elements,
|
|
354
|
+
messages=[],
|
|
355
|
+
initial_goal="",
|
|
356
|
+
subgoal_plan=[],
|
|
357
|
+
latest_screenshot_base64=screen_data.base64,
|
|
358
|
+
focused_app_info=None,
|
|
359
|
+
device_date="",
|
|
360
|
+
structured_decisions=None,
|
|
361
|
+
executor_retrigger=False,
|
|
362
|
+
executor_failed=False,
|
|
363
|
+
executor_messages=[],
|
|
364
|
+
cortex_last_thought="",
|
|
365
|
+
agents_thoughts=[],
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
# invoke erase_text tool
|
|
369
|
+
input_resource_id = "com.google.android.settings.intelligence:id/open_search_view_edit_text"
|
|
370
|
+
command_output: Command = get_erase_text_tool(ctx=ctx).invoke(
|
|
371
|
+
{
|
|
372
|
+
"tool_call_id": uuid.uuid4().hex,
|
|
373
|
+
"agent_thought": "",
|
|
374
|
+
"input_text_resource_id": input_resource_id,
|
|
375
|
+
"state": dummy_state,
|
|
376
|
+
"executor_metadata": None,
|
|
377
|
+
}
|
|
378
|
+
)
|
|
379
|
+
print(command_output)
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
from datetime import date
|
|
2
|
+
import json
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from adbutils import AdbDevice
|
|
6
|
+
from minitap.mobile_use.utils.logger import MobileUseLogger
|
|
7
|
+
from minitap.mobile_use.utils.shell_utils import run_shell_command_on_host
|
|
8
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
9
|
+
from minitap.mobile_use.context import DevicePlatform
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def get_adb_device(ctx: MobileUseContext) -> AdbDevice:
|
|
13
|
+
if ctx.device.mobile_platform != DevicePlatform.ANDROID:
|
|
14
|
+
raise ValueError("Device is not an Android device")
|
|
15
|
+
adb = ctx.get_adb_client()
|
|
16
|
+
device = adb.device(serial=ctx.device.device_id)
|
|
17
|
+
if not device:
|
|
18
|
+
raise ConnectionError(f"Device {ctx.device.device_id} not found.")
|
|
19
|
+
return device
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get_first_device(
|
|
23
|
+
logger: Optional[MobileUseLogger] = None,
|
|
24
|
+
) -> tuple[Optional[str], Optional[DevicePlatform]]:
|
|
25
|
+
"""Gets the first available device."""
|
|
26
|
+
try:
|
|
27
|
+
android_output = run_shell_command_on_host("adb devices")
|
|
28
|
+
lines = android_output.strip().split("\n")
|
|
29
|
+
for line in lines:
|
|
30
|
+
if "device" in line and not line.startswith("List of devices"):
|
|
31
|
+
return line.split()[0], DevicePlatform.ANDROID
|
|
32
|
+
except RuntimeError as e:
|
|
33
|
+
if logger:
|
|
34
|
+
logger.error(f"ADB command failed: {e}")
|
|
35
|
+
return None, None
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
ios_output = run_shell_command_on_host("xcrun simctl list devices booted -j")
|
|
39
|
+
data = json.loads(ios_output)
|
|
40
|
+
for runtime, devices in data.get("devices", {}).items():
|
|
41
|
+
if "iOS" not in runtime:
|
|
42
|
+
continue
|
|
43
|
+
for device in devices:
|
|
44
|
+
if device.get("state") == "Booted":
|
|
45
|
+
return device["udid"], DevicePlatform.IOS
|
|
46
|
+
except RuntimeError as e:
|
|
47
|
+
if logger:
|
|
48
|
+
logger.error(f"xcrun command failed: {e}")
|
|
49
|
+
|
|
50
|
+
return None, None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def get_focused_app_info(ctx: MobileUseContext) -> Optional[str]:
|
|
54
|
+
if ctx.device.mobile_platform == DevicePlatform.IOS:
|
|
55
|
+
return None
|
|
56
|
+
device = get_adb_device(ctx)
|
|
57
|
+
return str(device.shell("dumpsys window | grep -E 'mCurrentFocus|mFocusedApp'"))
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def get_device_date(ctx: MobileUseContext) -> str:
|
|
61
|
+
if ctx.device.mobile_platform == DevicePlatform.IOS:
|
|
62
|
+
return date.today().strftime("%a %b %d %H:%M:%S %Z %Y")
|
|
63
|
+
device = get_adb_device(ctx)
|
|
64
|
+
return str(device.shell("date"))
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def list_packages(ctx: MobileUseContext) -> str:
|
|
68
|
+
if ctx.device.mobile_platform == DevicePlatform.IOS:
|
|
69
|
+
cmd = ["xcrun", "simctl", "listapps", "booted", "|", "grep", "CFBundleIdentifier"]
|
|
70
|
+
return run_shell_command_on_host(" ".join(cmd))
|
|
71
|
+
else:
|
|
72
|
+
device = get_adb_device(ctx)
|
|
73
|
+
cmd = ["pm", "list", "packages", "-f"]
|
|
74
|
+
return str(device.shell(" ".join(cmd)))
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
from langchain_core.messages import (
|
|
4
|
+
AIMessage,
|
|
5
|
+
)
|
|
6
|
+
from langgraph.constants import END, START
|
|
7
|
+
from langgraph.graph import StateGraph
|
|
8
|
+
from langgraph.graph.state import CompiledStateGraph
|
|
9
|
+
from langgraph.prebuilt import ToolNode
|
|
10
|
+
from minitap.mobile_use.agents.contextor.contextor import ContextorNode
|
|
11
|
+
from minitap.mobile_use.agents.cortex.cortex import CortexNode
|
|
12
|
+
from minitap.mobile_use.agents.executor.executor import ExecutorNode
|
|
13
|
+
from minitap.mobile_use.agents.executor.executor_context_cleaner import (
|
|
14
|
+
executor_context_cleaner_node,
|
|
15
|
+
)
|
|
16
|
+
from minitap.mobile_use.agents.orchestrator.orchestrator import OrchestratorNode
|
|
17
|
+
from minitap.mobile_use.agents.planner.planner import PlannerNode
|
|
18
|
+
from minitap.mobile_use.agents.planner.utils import (
|
|
19
|
+
all_completed,
|
|
20
|
+
get_current_subgoal,
|
|
21
|
+
one_of_them_is_failure,
|
|
22
|
+
)
|
|
23
|
+
from minitap.mobile_use.agents.summarizer.summarizer import SummarizerNode
|
|
24
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
25
|
+
from minitap.mobile_use.graph.state import State
|
|
26
|
+
from minitap.mobile_use.tools.index import EXECUTOR_WRAPPERS_TOOLS, get_tools_from_wrappers
|
|
27
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
28
|
+
|
|
29
|
+
logger = get_logger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def post_orchestrator_gate(
|
|
33
|
+
state: State,
|
|
34
|
+
) -> Literal["continue", "replan", "end"]:
|
|
35
|
+
logger.info("Starting post_orchestrator_gate")
|
|
36
|
+
if one_of_them_is_failure(state.subgoal_plan):
|
|
37
|
+
logger.info("One of the subgoals is in failure state, asking to replan")
|
|
38
|
+
return "replan"
|
|
39
|
+
|
|
40
|
+
if all_completed(state.subgoal_plan):
|
|
41
|
+
logger.info("All subgoals are completed, ending the goal")
|
|
42
|
+
return "end"
|
|
43
|
+
|
|
44
|
+
if not get_current_subgoal(state.subgoal_plan):
|
|
45
|
+
logger.info("No subgoal running, ending the goal")
|
|
46
|
+
return "end"
|
|
47
|
+
|
|
48
|
+
logger.info("Goal is not achieved, continuing")
|
|
49
|
+
return "continue"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def post_cortex_gate(
|
|
53
|
+
state: State,
|
|
54
|
+
) -> Literal["continue", "end_subgoal"]:
|
|
55
|
+
logger.info("Starting post_cortex_gate")
|
|
56
|
+
if not state.structured_decisions:
|
|
57
|
+
return "end_subgoal"
|
|
58
|
+
return "continue"
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def post_executor_gate(
|
|
62
|
+
state: State,
|
|
63
|
+
) -> Literal["invoke_tools", "skip"]:
|
|
64
|
+
logger.info("Starting post_executor_gate")
|
|
65
|
+
messages = state.messages
|
|
66
|
+
if not messages:
|
|
67
|
+
return "skip"
|
|
68
|
+
last_message = messages[-1]
|
|
69
|
+
|
|
70
|
+
if isinstance(last_message, AIMessage):
|
|
71
|
+
tool_calls = getattr(last_message, "tool_calls", None)
|
|
72
|
+
if tool_calls and len(tool_calls) > 0:
|
|
73
|
+
logger.info("🔨👁️ Found tool calls: " + str(tool_calls))
|
|
74
|
+
return "invoke_tools"
|
|
75
|
+
else:
|
|
76
|
+
logger.info("🔨❌ No tool calls found")
|
|
77
|
+
return "skip"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def post_executor_tools_gate(
|
|
81
|
+
state: State,
|
|
82
|
+
) -> Literal["continue", "failed", "done"]:
|
|
83
|
+
logger.info("Starting post_executor_tools_gate")
|
|
84
|
+
if state.executor_failed:
|
|
85
|
+
return "failed"
|
|
86
|
+
if state.executor_retrigger:
|
|
87
|
+
return "continue"
|
|
88
|
+
return "done"
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
async def get_graph(ctx: MobileUseContext) -> CompiledStateGraph:
|
|
92
|
+
graph_builder = StateGraph(State)
|
|
93
|
+
|
|
94
|
+
## Define nodes
|
|
95
|
+
graph_builder.add_node("planner", PlannerNode(ctx))
|
|
96
|
+
graph_builder.add_node("orchestrator", OrchestratorNode(ctx))
|
|
97
|
+
|
|
98
|
+
graph_builder.add_node("contextor", ContextorNode(ctx))
|
|
99
|
+
|
|
100
|
+
graph_builder.add_node("cortex", CortexNode(ctx))
|
|
101
|
+
|
|
102
|
+
graph_builder.add_node("executor", ExecutorNode(ctx))
|
|
103
|
+
executor_tool_node = ToolNode(
|
|
104
|
+
get_tools_from_wrappers(ctx=ctx, wrappers=EXECUTOR_WRAPPERS_TOOLS)
|
|
105
|
+
)
|
|
106
|
+
graph_builder.add_node("executor_tools", executor_tool_node)
|
|
107
|
+
|
|
108
|
+
graph_builder.add_node("executor_context_cleaner", executor_context_cleaner_node)
|
|
109
|
+
graph_builder.add_node("summarizer", SummarizerNode(ctx))
|
|
110
|
+
|
|
111
|
+
# Linking nodes
|
|
112
|
+
graph_builder.add_edge(START, "planner")
|
|
113
|
+
graph_builder.add_edge("planner", "orchestrator")
|
|
114
|
+
graph_builder.add_conditional_edges(
|
|
115
|
+
"orchestrator",
|
|
116
|
+
post_orchestrator_gate,
|
|
117
|
+
{
|
|
118
|
+
"continue": "contextor",
|
|
119
|
+
"replan": "planner",
|
|
120
|
+
"end": END,
|
|
121
|
+
},
|
|
122
|
+
)
|
|
123
|
+
graph_builder.add_edge("contextor", "cortex")
|
|
124
|
+
graph_builder.add_conditional_edges(
|
|
125
|
+
"cortex",
|
|
126
|
+
post_cortex_gate,
|
|
127
|
+
{
|
|
128
|
+
"continue": "executor",
|
|
129
|
+
"end_subgoal": "orchestrator",
|
|
130
|
+
},
|
|
131
|
+
)
|
|
132
|
+
graph_builder.add_conditional_edges(
|
|
133
|
+
"executor",
|
|
134
|
+
post_executor_gate,
|
|
135
|
+
{"invoke_tools": "executor_tools", "skip": "executor_context_cleaner"},
|
|
136
|
+
)
|
|
137
|
+
graph_builder.add_conditional_edges(
|
|
138
|
+
"executor_tools",
|
|
139
|
+
post_executor_tools_gate,
|
|
140
|
+
{
|
|
141
|
+
"continue": "executor",
|
|
142
|
+
"done": "executor_context_cleaner",
|
|
143
|
+
"failed": "executor_context_cleaner",
|
|
144
|
+
},
|
|
145
|
+
)
|
|
146
|
+
graph_builder.add_edge("executor_context_cleaner", "summarizer")
|
|
147
|
+
graph_builder.add_edge("summarizer", "contextor")
|
|
148
|
+
|
|
149
|
+
return graph_builder.compile()
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from langchain_core.messages import AIMessage, AnyMessage
|
|
2
|
+
from langgraph.graph import add_messages
|
|
3
|
+
from langgraph.prebuilt.chat_agent_executor import AgentStatePydantic
|
|
4
|
+
from typing_extensions import Annotated, Optional
|
|
5
|
+
|
|
6
|
+
from minitap.mobile_use.agents.planner.types import Subgoal
|
|
7
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
8
|
+
from minitap.mobile_use.utils.recorder import record_interaction
|
|
9
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
10
|
+
|
|
11
|
+
logger = get_logger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def take_last(a, b):
|
|
15
|
+
return b
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class State(AgentStatePydantic):
|
|
19
|
+
# planner related keys
|
|
20
|
+
initial_goal: Annotated[str, "Initial goal given by the user"]
|
|
21
|
+
|
|
22
|
+
# orchestrator related keys
|
|
23
|
+
subgoal_plan: Annotated[list[Subgoal], "The current plan, made of subgoals"]
|
|
24
|
+
|
|
25
|
+
# contextor related keys
|
|
26
|
+
latest_screenshot_base64: Annotated[Optional[str], "Latest screenshot of the device", take_last]
|
|
27
|
+
latest_ui_hierarchy: Annotated[
|
|
28
|
+
Optional[list[dict]], "Latest UI hierarchy of the device", take_last
|
|
29
|
+
]
|
|
30
|
+
focused_app_info: Annotated[Optional[str], "Focused app info", take_last]
|
|
31
|
+
device_date: Annotated[Optional[str], "Date of the device", take_last]
|
|
32
|
+
|
|
33
|
+
# cortex related keys
|
|
34
|
+
structured_decisions: Annotated[
|
|
35
|
+
Optional[str],
|
|
36
|
+
"Structured decisions made by the cortex, for the executor to follow",
|
|
37
|
+
take_last,
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
# executor related keys
|
|
41
|
+
executor_retrigger: Annotated[Optional[bool], "Whether the executor must be retriggered"]
|
|
42
|
+
executor_failed: Annotated[bool, "Whether a tool call made by the executor failed"]
|
|
43
|
+
executor_messages: Annotated[list[AnyMessage], "Sequential Executor messages", add_messages]
|
|
44
|
+
cortex_last_thought: Annotated[Optional[str], "Last thought of the cortex for the executor"]
|
|
45
|
+
|
|
46
|
+
# common keys
|
|
47
|
+
agents_thoughts: Annotated[
|
|
48
|
+
list[str],
|
|
49
|
+
"All thoughts and reasons that led to actions (why a tool was called, expected outcomes..)",
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
def sanitize_update(self, ctx: MobileUseContext, update: dict):
|
|
53
|
+
"""
|
|
54
|
+
Sanitizes the state update to ensure it is valid and apply side effect logic where required.
|
|
55
|
+
"""
|
|
56
|
+
updated_agents_thoughts: Optional[str | list[str]] = update.get("agents_thoughts", None)
|
|
57
|
+
if updated_agents_thoughts is not None:
|
|
58
|
+
if isinstance(updated_agents_thoughts, str):
|
|
59
|
+
updated_agents_thoughts = [updated_agents_thoughts]
|
|
60
|
+
elif not isinstance(updated_agents_thoughts, list):
|
|
61
|
+
raise ValueError("agents_thoughts must be a str or list[str]")
|
|
62
|
+
update["agents_thoughts"] = _add_agent_thoughts(
|
|
63
|
+
ctx=ctx,
|
|
64
|
+
old=self.agents_thoughts,
|
|
65
|
+
new=updated_agents_thoughts,
|
|
66
|
+
)
|
|
67
|
+
return update
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _add_agent_thoughts(ctx: MobileUseContext, old: list[str], new: list[str]) -> list[str]:
|
|
71
|
+
if ctx.execution_setup:
|
|
72
|
+
record_interaction(ctx, response=AIMessage(content=str(new)))
|
|
73
|
+
return old + new
|