minitap-mobile-use 3.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minitap/mobile_use/__init__.py +0 -0
- minitap/mobile_use/agents/contextor/contextor.md +55 -0
- minitap/mobile_use/agents/contextor/contextor.py +175 -0
- minitap/mobile_use/agents/contextor/types.py +36 -0
- minitap/mobile_use/agents/cortex/cortex.md +135 -0
- minitap/mobile_use/agents/cortex/cortex.py +152 -0
- minitap/mobile_use/agents/cortex/types.py +15 -0
- minitap/mobile_use/agents/executor/executor.md +42 -0
- minitap/mobile_use/agents/executor/executor.py +87 -0
- minitap/mobile_use/agents/executor/tool_node.py +152 -0
- minitap/mobile_use/agents/hopper/hopper.md +15 -0
- minitap/mobile_use/agents/hopper/hopper.py +44 -0
- minitap/mobile_use/agents/orchestrator/human.md +12 -0
- minitap/mobile_use/agents/orchestrator/orchestrator.md +21 -0
- minitap/mobile_use/agents/orchestrator/orchestrator.py +134 -0
- minitap/mobile_use/agents/orchestrator/types.py +11 -0
- minitap/mobile_use/agents/outputter/human.md +25 -0
- minitap/mobile_use/agents/outputter/outputter.py +85 -0
- minitap/mobile_use/agents/outputter/test_outputter.py +167 -0
- minitap/mobile_use/agents/planner/human.md +14 -0
- minitap/mobile_use/agents/planner/planner.md +126 -0
- minitap/mobile_use/agents/planner/planner.py +101 -0
- minitap/mobile_use/agents/planner/types.py +51 -0
- minitap/mobile_use/agents/planner/utils.py +70 -0
- minitap/mobile_use/agents/summarizer/summarizer.py +35 -0
- minitap/mobile_use/agents/video_analyzer/__init__.py +5 -0
- minitap/mobile_use/agents/video_analyzer/human.md +5 -0
- minitap/mobile_use/agents/video_analyzer/video_analyzer.md +37 -0
- minitap/mobile_use/agents/video_analyzer/video_analyzer.py +111 -0
- minitap/mobile_use/clients/browserstack_client.py +477 -0
- minitap/mobile_use/clients/idb_client.py +429 -0
- minitap/mobile_use/clients/ios_client.py +332 -0
- minitap/mobile_use/clients/ios_client_config.py +141 -0
- minitap/mobile_use/clients/ui_automator_client.py +330 -0
- minitap/mobile_use/clients/wda_client.py +526 -0
- minitap/mobile_use/clients/wda_lifecycle.py +367 -0
- minitap/mobile_use/config.py +413 -0
- minitap/mobile_use/constants.py +3 -0
- minitap/mobile_use/context.py +106 -0
- minitap/mobile_use/controllers/__init__.py +0 -0
- minitap/mobile_use/controllers/android_controller.py +524 -0
- minitap/mobile_use/controllers/controller_factory.py +46 -0
- minitap/mobile_use/controllers/device_controller.py +182 -0
- minitap/mobile_use/controllers/ios_controller.py +436 -0
- minitap/mobile_use/controllers/platform_specific_commands_controller.py +199 -0
- minitap/mobile_use/controllers/types.py +106 -0
- minitap/mobile_use/controllers/unified_controller.py +193 -0
- minitap/mobile_use/graph/graph.py +160 -0
- minitap/mobile_use/graph/state.py +115 -0
- minitap/mobile_use/main.py +309 -0
- minitap/mobile_use/sdk/__init__.py +12 -0
- minitap/mobile_use/sdk/agent.py +1294 -0
- minitap/mobile_use/sdk/builders/__init__.py +10 -0
- minitap/mobile_use/sdk/builders/agent_config_builder.py +307 -0
- minitap/mobile_use/sdk/builders/index.py +15 -0
- minitap/mobile_use/sdk/builders/task_request_builder.py +236 -0
- minitap/mobile_use/sdk/constants.py +1 -0
- minitap/mobile_use/sdk/examples/README.md +83 -0
- minitap/mobile_use/sdk/examples/__init__.py +1 -0
- minitap/mobile_use/sdk/examples/app_lock_messaging.py +54 -0
- minitap/mobile_use/sdk/examples/platform_manual_task_example.py +67 -0
- minitap/mobile_use/sdk/examples/platform_minimal_example.py +48 -0
- minitap/mobile_use/sdk/examples/simple_photo_organizer.py +76 -0
- minitap/mobile_use/sdk/examples/smart_notification_assistant.py +225 -0
- minitap/mobile_use/sdk/examples/video_transcription_example.py +117 -0
- minitap/mobile_use/sdk/services/cloud_mobile.py +656 -0
- minitap/mobile_use/sdk/services/platform.py +434 -0
- minitap/mobile_use/sdk/types/__init__.py +51 -0
- minitap/mobile_use/sdk/types/agent.py +84 -0
- minitap/mobile_use/sdk/types/exceptions.py +138 -0
- minitap/mobile_use/sdk/types/platform.py +183 -0
- minitap/mobile_use/sdk/types/task.py +269 -0
- minitap/mobile_use/sdk/utils.py +29 -0
- minitap/mobile_use/services/accessibility.py +100 -0
- minitap/mobile_use/services/llm.py +247 -0
- minitap/mobile_use/services/telemetry.py +421 -0
- minitap/mobile_use/tools/index.py +67 -0
- minitap/mobile_use/tools/mobile/back.py +52 -0
- minitap/mobile_use/tools/mobile/erase_one_char.py +56 -0
- minitap/mobile_use/tools/mobile/focus_and_clear_text.py +317 -0
- minitap/mobile_use/tools/mobile/focus_and_input_text.py +153 -0
- minitap/mobile_use/tools/mobile/launch_app.py +86 -0
- minitap/mobile_use/tools/mobile/long_press_on.py +169 -0
- minitap/mobile_use/tools/mobile/open_link.py +62 -0
- minitap/mobile_use/tools/mobile/press_key.py +83 -0
- minitap/mobile_use/tools/mobile/stop_app.py +62 -0
- minitap/mobile_use/tools/mobile/swipe.py +156 -0
- minitap/mobile_use/tools/mobile/tap.py +154 -0
- minitap/mobile_use/tools/mobile/video_recording.py +177 -0
- minitap/mobile_use/tools/mobile/wait_for_delay.py +81 -0
- minitap/mobile_use/tools/scratchpad.py +147 -0
- minitap/mobile_use/tools/test_utils.py +413 -0
- minitap/mobile_use/tools/tool_wrapper.py +16 -0
- minitap/mobile_use/tools/types.py +35 -0
- minitap/mobile_use/tools/utils.py +336 -0
- minitap/mobile_use/utils/app_launch_utils.py +173 -0
- minitap/mobile_use/utils/cli_helpers.py +37 -0
- minitap/mobile_use/utils/cli_selection.py +143 -0
- minitap/mobile_use/utils/conversations.py +31 -0
- minitap/mobile_use/utils/decorators.py +124 -0
- minitap/mobile_use/utils/errors.py +6 -0
- minitap/mobile_use/utils/file.py +13 -0
- minitap/mobile_use/utils/logger.py +183 -0
- minitap/mobile_use/utils/media.py +186 -0
- minitap/mobile_use/utils/recorder.py +52 -0
- minitap/mobile_use/utils/requests_utils.py +37 -0
- minitap/mobile_use/utils/shell_utils.py +20 -0
- minitap/mobile_use/utils/test_ui_hierarchy.py +178 -0
- minitap/mobile_use/utils/time.py +6 -0
- minitap/mobile_use/utils/ui_hierarchy.py +132 -0
- minitap/mobile_use/utils/video.py +281 -0
- minitap_mobile_use-3.3.0.dist-info/METADATA +329 -0
- minitap_mobile_use-3.3.0.dist-info/RECORD +115 -0
- minitap_mobile_use-3.3.0.dist-info/WHEEL +4 -0
- minitap_mobile_use-3.3.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Builder classes for configuring mobile-use components."""
|
|
2
|
+
|
|
3
|
+
from minitap.mobile_use.sdk.builders.agent_config_builder import AgentConfigBuilder
|
|
4
|
+
from minitap.mobile_use.sdk.builders.task_request_builder import (
|
|
5
|
+
TaskRequestCommonBuilder,
|
|
6
|
+
TaskRequestBuilder,
|
|
7
|
+
)
|
|
8
|
+
from minitap.mobile_use.sdk.builders.index import Builders
|
|
9
|
+
|
|
10
|
+
__all__ = ["AgentConfigBuilder", "TaskRequestCommonBuilder", "TaskRequestBuilder", "Builders"]
|
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Builder for AgentConfig objects using a fluent interface.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import copy
|
|
6
|
+
|
|
7
|
+
from langchain_core.callbacks.base import Callbacks
|
|
8
|
+
|
|
9
|
+
from minitap.mobile_use.clients.ios_client_config import BrowserStackClientConfig, IosClientConfig
|
|
10
|
+
from minitap.mobile_use.config import get_default_llm_config, get_default_minitap_llm_config
|
|
11
|
+
from minitap.mobile_use.context import DevicePlatform
|
|
12
|
+
from minitap.mobile_use.sdk.constants import DEFAULT_PROFILE_NAME
|
|
13
|
+
from minitap.mobile_use.sdk.types.agent import AgentConfig, AgentProfile, ServerConfig
|
|
14
|
+
from minitap.mobile_use.sdk.types.task import TaskRequestCommon
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AgentConfigBuilder:
|
|
18
|
+
"""
|
|
19
|
+
Builder class providing a fluent interface for creating AgentConfig objects.
|
|
20
|
+
|
|
21
|
+
This builder allows for step-by-step construction of an AgentConfig with
|
|
22
|
+
clear methods that make the configuration process intuitive and type-safe.
|
|
23
|
+
|
|
24
|
+
Examples:
|
|
25
|
+
>>> builder = AgentConfigBuilder()
|
|
26
|
+
>>> config = (builder
|
|
27
|
+
... .add_profile(AgentProfile(name="HighReasoning", llm_config=LLMConfig(...)))
|
|
28
|
+
... .add_profile(AgentProfile(name="LowReasoning", llm_config=LLMConfig(...)))
|
|
29
|
+
... .for_device(DevicePlatform.ANDROID, "device123")
|
|
30
|
+
... .with_default_task_config(TaskRequestCommon(max_steps=30))
|
|
31
|
+
... .with_default_profile("HighReasoning")
|
|
32
|
+
... .build()
|
|
33
|
+
... )
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(self):
|
|
37
|
+
"""Initialize an empty AgentConfigBuilder."""
|
|
38
|
+
self._agent_profiles: dict[str, AgentProfile] = {}
|
|
39
|
+
self._task_request_defaults: TaskRequestCommon | None = None
|
|
40
|
+
self._default_profile: str | AgentProfile | None = None
|
|
41
|
+
self._device_id: str | None = None
|
|
42
|
+
self._device_platform: DevicePlatform | None = None
|
|
43
|
+
self._servers: ServerConfig = get_default_servers()
|
|
44
|
+
self._graph_config_callbacks: Callbacks = None
|
|
45
|
+
self._cloud_mobile_id_or_ref: str | None = None
|
|
46
|
+
self._ios_client_config: IosClientConfig | None = None
|
|
47
|
+
self._browserstack_config: BrowserStackClientConfig | None = None
|
|
48
|
+
self._video_recording_enabled: bool = False
|
|
49
|
+
|
|
50
|
+
def add_profile(self, profile: AgentProfile, validate: bool = True) -> "AgentConfigBuilder":
|
|
51
|
+
"""
|
|
52
|
+
Add an agent profile to the mobile-use agent.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
profile: The agent profile to add
|
|
56
|
+
"""
|
|
57
|
+
self._agent_profiles[profile.name] = profile
|
|
58
|
+
if validate:
|
|
59
|
+
profile.llm_config.validate_providers()
|
|
60
|
+
return self
|
|
61
|
+
|
|
62
|
+
def add_profiles(
|
|
63
|
+
self,
|
|
64
|
+
profiles: list[AgentProfile],
|
|
65
|
+
validate: bool = True,
|
|
66
|
+
) -> "AgentConfigBuilder":
|
|
67
|
+
"""
|
|
68
|
+
Add multiple agent profiles to the mobile-use agent.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
profiles: List of agent profiles to add
|
|
72
|
+
"""
|
|
73
|
+
for profile in profiles:
|
|
74
|
+
self.add_profile(profile=profile, validate=validate)
|
|
75
|
+
return self
|
|
76
|
+
|
|
77
|
+
def with_default_profile(self, profile: str | AgentProfile) -> "AgentConfigBuilder":
|
|
78
|
+
"""
|
|
79
|
+
Set the default agent profile used for tasks.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
profile: The name or instance of the default agent profile
|
|
83
|
+
"""
|
|
84
|
+
self._default_profile = profile
|
|
85
|
+
return self
|
|
86
|
+
|
|
87
|
+
def for_device(
|
|
88
|
+
self,
|
|
89
|
+
platform: DevicePlatform,
|
|
90
|
+
device_id: str,
|
|
91
|
+
) -> "AgentConfigBuilder":
|
|
92
|
+
"""
|
|
93
|
+
Configure the mobile-use agent for a specific device.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
platform: The device platform (ANDROID or IOS)
|
|
97
|
+
device_id: The unique identifier for the device
|
|
98
|
+
"""
|
|
99
|
+
if self._cloud_mobile_id_or_ref is not None:
|
|
100
|
+
raise ValueError(
|
|
101
|
+
"Device ID cannot be set when a cloud mobile is already configured.\n"
|
|
102
|
+
"> for_device() and for_cloud_mobile() are mutually exclusive"
|
|
103
|
+
)
|
|
104
|
+
if self._browserstack_config is not None:
|
|
105
|
+
raise ValueError(
|
|
106
|
+
"Device ID cannot be set when BrowserStack is already configured.\n"
|
|
107
|
+
"> for_device() and for_browserstack() are mutually exclusive"
|
|
108
|
+
)
|
|
109
|
+
self._device_id = device_id
|
|
110
|
+
self._device_platform = platform
|
|
111
|
+
return self
|
|
112
|
+
|
|
113
|
+
def for_cloud_mobile(self, cloud_mobile_id_or_ref: str) -> "AgentConfigBuilder":
|
|
114
|
+
"""
|
|
115
|
+
Configure the mobile-use agent to use a cloud mobile.
|
|
116
|
+
|
|
117
|
+
When using a cloud mobile, tasks are executed remotely via the Platform API,
|
|
118
|
+
and only PlatformTaskRequest can be used.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
cloud_mobile_id_or_ref: The unique identifier or reference name for the cloud mobile.
|
|
122
|
+
Can be either a UUID (e.g., '550e8400-e29b-41d4-a716-446655440000')
|
|
123
|
+
or a reference name (e.g., 'my-test-device')
|
|
124
|
+
"""
|
|
125
|
+
if self._device_id is not None:
|
|
126
|
+
raise ValueError(
|
|
127
|
+
"Cloud mobile device ID cannot be set when a device is already configured.\n"
|
|
128
|
+
"> for_device() and for_cloud_mobile() are mutually exclusive"
|
|
129
|
+
)
|
|
130
|
+
if self._browserstack_config is not None:
|
|
131
|
+
raise ValueError(
|
|
132
|
+
"Cloud mobile cannot be set when BrowserStack is already configured.\n"
|
|
133
|
+
"> for_cloud_mobile() and for_browserstack() are mutually exclusive"
|
|
134
|
+
)
|
|
135
|
+
self._cloud_mobile_id_or_ref = cloud_mobile_id_or_ref
|
|
136
|
+
return self
|
|
137
|
+
|
|
138
|
+
def for_browserstack(self, config: BrowserStackClientConfig) -> "AgentConfigBuilder":
|
|
139
|
+
"""
|
|
140
|
+
Configure the mobile-use agent to use BrowserStack cloud devices.
|
|
141
|
+
|
|
142
|
+
When using BrowserStack, the agent connects to BrowserStack's cloud infrastructure
|
|
143
|
+
for iOS device automation. This is mutually exclusive with for_device() and
|
|
144
|
+
for_cloud_mobile().
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
config: BrowserStack configuration with credentials and device settings
|
|
148
|
+
"""
|
|
149
|
+
if self._device_id is not None:
|
|
150
|
+
raise ValueError(
|
|
151
|
+
"BrowserStack cannot be set when a device is already configured.\n"
|
|
152
|
+
"> for_device() and for_browserstack() are mutually exclusive"
|
|
153
|
+
)
|
|
154
|
+
if self._cloud_mobile_id_or_ref is not None:
|
|
155
|
+
raise ValueError(
|
|
156
|
+
"BrowserStack cannot be set when a cloud mobile is already configured.\n"
|
|
157
|
+
"> for_cloud_mobile() and for_browserstack() are mutually exclusive"
|
|
158
|
+
)
|
|
159
|
+
self._browserstack_config = config
|
|
160
|
+
self._device_platform = DevicePlatform.IOS
|
|
161
|
+
return self
|
|
162
|
+
|
|
163
|
+
def with_default_task_config(self, config: TaskRequestCommon) -> "AgentConfigBuilder":
|
|
164
|
+
"""
|
|
165
|
+
Set the default task configuration.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
config: The task configuration to use as default
|
|
169
|
+
"""
|
|
170
|
+
self._task_request_defaults = copy.deepcopy(config)
|
|
171
|
+
return self
|
|
172
|
+
|
|
173
|
+
def with_adb_server(self, host: str, port: int | None = None) -> "AgentConfigBuilder":
|
|
174
|
+
"""
|
|
175
|
+
Set the ADB server host and port.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
host: The ADB server host
|
|
179
|
+
port: The ADB server port
|
|
180
|
+
"""
|
|
181
|
+
self._servers.adb_host = host
|
|
182
|
+
if port is not None:
|
|
183
|
+
self._servers.adb_port = port
|
|
184
|
+
return self
|
|
185
|
+
|
|
186
|
+
def with_servers(self, servers: ServerConfig) -> "AgentConfigBuilder":
|
|
187
|
+
"""
|
|
188
|
+
Set the server settings.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
servers: The server settings to use
|
|
192
|
+
"""
|
|
193
|
+
self._servers = copy.deepcopy(servers)
|
|
194
|
+
return self
|
|
195
|
+
|
|
196
|
+
def with_graph_config_callbacks(self, callbacks: Callbacks) -> "AgentConfigBuilder":
|
|
197
|
+
"""
|
|
198
|
+
Set the graph config callbacks.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
callbacks: The graph config callbacks to use
|
|
202
|
+
"""
|
|
203
|
+
self._graph_config_callbacks = callbacks
|
|
204
|
+
return self
|
|
205
|
+
|
|
206
|
+
def with_ios_client_config(self, config: IosClientConfig) -> "AgentConfigBuilder":
|
|
207
|
+
self._ios_client_config = copy.deepcopy(config)
|
|
208
|
+
return self
|
|
209
|
+
|
|
210
|
+
def with_video_recording_tools(self) -> "AgentConfigBuilder":
|
|
211
|
+
"""
|
|
212
|
+
Enable video recording tools (start_video_recording, stop_video_recording).
|
|
213
|
+
|
|
214
|
+
When enabled, the agent will have access to tools for recording the device
|
|
215
|
+
screen and analyzing the video content using Gemini models.
|
|
216
|
+
|
|
217
|
+
IMPORTANT: This requires:
|
|
218
|
+
1. ffmpeg to be installed on the system (for video compression)
|
|
219
|
+
2. A video-capable model configured in utils.video_analyzer
|
|
220
|
+
|
|
221
|
+
Supported models for video_analyzer:
|
|
222
|
+
- gemini-3-flash-preview (recommended)
|
|
223
|
+
- gemini-3-pro-preview
|
|
224
|
+
- gemini-2.5-flash
|
|
225
|
+
- gemini-2.5-pro
|
|
226
|
+
- gemini-2.0-flash
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
Self for method chaining
|
|
230
|
+
|
|
231
|
+
Raises:
|
|
232
|
+
FFmpegNotInstalledError: If ffmpeg is not installed
|
|
233
|
+
ValueError: When the agent is initialized if any profile lacks video_analyzer config
|
|
234
|
+
"""
|
|
235
|
+
from minitap.mobile_use.utils.video import check_ffmpeg_available
|
|
236
|
+
|
|
237
|
+
check_ffmpeg_available()
|
|
238
|
+
self._video_recording_enabled = True
|
|
239
|
+
return self
|
|
240
|
+
|
|
241
|
+
def build(self, validate_profiles: bool = True) -> AgentConfig:
|
|
242
|
+
"""
|
|
243
|
+
Build the mobile-use AgentConfig object.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
default_profile: Name of the default agent profile to use
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
A configured AgentConfig object
|
|
250
|
+
|
|
251
|
+
Raises:
|
|
252
|
+
ValueError: If default_profile is specified but not found in configured profiles
|
|
253
|
+
"""
|
|
254
|
+
nb_profiles = len(self._agent_profiles)
|
|
255
|
+
|
|
256
|
+
if isinstance(self._default_profile, str):
|
|
257
|
+
profile_name = self._default_profile
|
|
258
|
+
default_profile = self._agent_profiles.get(profile_name, None)
|
|
259
|
+
if default_profile is None:
|
|
260
|
+
raise ValueError(f"Profile '{profile_name}' not found in configured agents")
|
|
261
|
+
elif isinstance(self._default_profile, AgentProfile):
|
|
262
|
+
default_profile = self._default_profile
|
|
263
|
+
if default_profile.name not in self._agent_profiles:
|
|
264
|
+
self.add_profile(default_profile, validate=validate_profiles)
|
|
265
|
+
elif nb_profiles <= 0:
|
|
266
|
+
llm_config = (
|
|
267
|
+
get_default_minitap_llm_config(validate=validate_profiles)
|
|
268
|
+
or get_default_llm_config()
|
|
269
|
+
)
|
|
270
|
+
default_profile = AgentProfile(
|
|
271
|
+
name=DEFAULT_PROFILE_NAME,
|
|
272
|
+
llm_config=llm_config,
|
|
273
|
+
)
|
|
274
|
+
self.add_profile(default_profile, validate=validate_profiles)
|
|
275
|
+
elif nb_profiles == 1:
|
|
276
|
+
# Select the only one available
|
|
277
|
+
default_profile = next(iter(self._agent_profiles.values()))
|
|
278
|
+
else:
|
|
279
|
+
available_profiles = ", ".join(self._agent_profiles.keys())
|
|
280
|
+
raise ValueError(
|
|
281
|
+
f"You must call with_default_profile() to select one among: {available_profiles}"
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
return AgentConfig(
|
|
285
|
+
agent_profiles=self._agent_profiles,
|
|
286
|
+
task_request_defaults=self._task_request_defaults or TaskRequestCommon(),
|
|
287
|
+
default_profile=default_profile,
|
|
288
|
+
device_id=self._device_id,
|
|
289
|
+
device_platform=self._device_platform,
|
|
290
|
+
servers=self._servers,
|
|
291
|
+
graph_config_callbacks=self._graph_config_callbacks,
|
|
292
|
+
cloud_mobile_id_or_ref=self._cloud_mobile_id_or_ref,
|
|
293
|
+
ios_client_config=self._ios_client_config,
|
|
294
|
+
browserstack_config=self._browserstack_config,
|
|
295
|
+
video_recording_enabled=self._video_recording_enabled,
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def get_default_agent_config():
|
|
300
|
+
return AgentConfigBuilder().build()
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def get_default_servers():
|
|
304
|
+
return ServerConfig(
|
|
305
|
+
adb_host="localhost",
|
|
306
|
+
adb_port=5037,
|
|
307
|
+
)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from minitap.mobile_use.sdk.builders.agent_config_builder import AgentConfigBuilder
|
|
2
|
+
from minitap.mobile_use.sdk.builders.task_request_builder import TaskRequestCommonBuilder
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class BuildersWrapper:
|
|
6
|
+
@property
|
|
7
|
+
def AgentConfig(self) -> AgentConfigBuilder:
|
|
8
|
+
return AgentConfigBuilder()
|
|
9
|
+
|
|
10
|
+
@property
|
|
11
|
+
def TaskDefaults(self) -> TaskRequestCommonBuilder:
|
|
12
|
+
return TaskRequestCommonBuilder()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
Builders = BuildersWrapper()
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Builder for TaskRequest objects using a fluent interface.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Self, TypeVar, cast
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
|
|
10
|
+
from minitap.mobile_use.constants import RECURSION_LIMIT
|
|
11
|
+
from minitap.mobile_use.sdk.types.agent import AgentProfile
|
|
12
|
+
from minitap.mobile_use.sdk.types.task import TaskRequest, TaskRequestCommon
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
TIn = TypeVar("TIn", bound=BaseModel | None)
|
|
16
|
+
TOut = TypeVar("TOut", bound=BaseModel)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TaskRequestCommonBuilder(BaseModel):
|
|
20
|
+
"""
|
|
21
|
+
Builder class providing a fluent interface for creating TaskRequestCommon objects.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self):
|
|
25
|
+
self._max_steps = RECURSION_LIMIT
|
|
26
|
+
self._record_trace = False
|
|
27
|
+
self._trace_path = Path("mobile-use-traces")
|
|
28
|
+
self._llm_output_path: Path | None = None
|
|
29
|
+
self._thoughts_output_path: Path | None = None
|
|
30
|
+
self._locked_app_package: str | None = None
|
|
31
|
+
|
|
32
|
+
def with_max_steps(self, max_steps: int) -> Self:
|
|
33
|
+
"""
|
|
34
|
+
Set the maximum number of steps the task can take.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
max_steps: Maximum number of steps
|
|
38
|
+
"""
|
|
39
|
+
self._max_steps = max_steps
|
|
40
|
+
return self
|
|
41
|
+
|
|
42
|
+
def with_trace_recording(self, enabled: bool = True, path: str | None = None) -> Self:
|
|
43
|
+
"""
|
|
44
|
+
Configure trace recording for the task.
|
|
45
|
+
|
|
46
|
+
Traces record screenshots and actions during execution.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
enabled: Whether to enable trace recording
|
|
50
|
+
path: Directory path where traces should be saved
|
|
51
|
+
"""
|
|
52
|
+
self._record_trace = enabled
|
|
53
|
+
if enabled and path:
|
|
54
|
+
self._trace_path = Path(path)
|
|
55
|
+
return self
|
|
56
|
+
|
|
57
|
+
def with_llm_output_saving(self, path: str) -> Self:
|
|
58
|
+
"""
|
|
59
|
+
Configure LLM output saving for the task.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
path: Path where to save the LLM output message
|
|
63
|
+
"""
|
|
64
|
+
self._llm_output_path = Path(path)
|
|
65
|
+
return self
|
|
66
|
+
|
|
67
|
+
def with_thoughts_output_saving(self, path: str) -> Self:
|
|
68
|
+
"""
|
|
69
|
+
Configure thoughts output saving for the task.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
path: Path where to save the thoughts output message
|
|
73
|
+
"""
|
|
74
|
+
self._thoughts_output_path = Path(path)
|
|
75
|
+
return self
|
|
76
|
+
|
|
77
|
+
def with_locked_app_package(self, package_name: str) -> Self:
|
|
78
|
+
"""
|
|
79
|
+
Set the app package to lock execution to.
|
|
80
|
+
|
|
81
|
+
This ensures the specified app is launched and in the foreground before
|
|
82
|
+
the agentic loop starts.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
package_name: Package name (Android, e.g., 'com.whatsapp') or
|
|
86
|
+
bundle ID (iOS, e.g., 'com.apple.mobilesafari')
|
|
87
|
+
"""
|
|
88
|
+
self._locked_app_package = package_name
|
|
89
|
+
return self
|
|
90
|
+
|
|
91
|
+
def build(self) -> TaskRequestCommon:
|
|
92
|
+
"""
|
|
93
|
+
Build the TaskRequestCommon object.
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
A configured TaskRequestCommon object
|
|
97
|
+
|
|
98
|
+
Raises:
|
|
99
|
+
ValueError: If required fields are missing
|
|
100
|
+
"""
|
|
101
|
+
return TaskRequestCommon(
|
|
102
|
+
max_steps=self._max_steps,
|
|
103
|
+
record_trace=self._record_trace,
|
|
104
|
+
trace_path=self._trace_path,
|
|
105
|
+
llm_output_path=self._llm_output_path,
|
|
106
|
+
thoughts_output_path=self._thoughts_output_path,
|
|
107
|
+
locked_app_package=self._locked_app_package,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class TaskRequestBuilder[TIn](TaskRequestCommonBuilder):
|
|
112
|
+
"""
|
|
113
|
+
Builder class providing a fluent interface for creating TaskRequest objects.
|
|
114
|
+
|
|
115
|
+
This builder allows for step-by-step construction of a TaskRequest with
|
|
116
|
+
clear methods that make the configuration process intuitive and type-safe.
|
|
117
|
+
|
|
118
|
+
Examples:
|
|
119
|
+
>>> builder = TaskRequestBuilder[None](goal="Open Gmail and check unread emails")
|
|
120
|
+
>>> task_request = (
|
|
121
|
+
... builder
|
|
122
|
+
... .with_max_steps(30)
|
|
123
|
+
... .using_profile("LowReasoning")
|
|
124
|
+
... .with_output_description("A list of email subjects and senders")
|
|
125
|
+
... .build()
|
|
126
|
+
... )
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
def __init__(self, goal: str):
|
|
130
|
+
"""Initialize an empty TaskRequestBuilder."""
|
|
131
|
+
super().__init__()
|
|
132
|
+
self._goal = goal
|
|
133
|
+
self._profile: str | AgentProfile | None = None
|
|
134
|
+
self._name: str | None = None
|
|
135
|
+
self._output_description = None
|
|
136
|
+
self._output_format: type[TIn] | None = None
|
|
137
|
+
|
|
138
|
+
@classmethod
|
|
139
|
+
def from_common(cls, goal: str, common: TaskRequestCommon):
|
|
140
|
+
res = cls(goal=goal)
|
|
141
|
+
res._max_steps = common.max_steps
|
|
142
|
+
res._record_trace = common.record_trace
|
|
143
|
+
res._trace_path = common.trace_path
|
|
144
|
+
res._llm_output_path = common.llm_output_path
|
|
145
|
+
res._thoughts_output_path = common.thoughts_output_path
|
|
146
|
+
res._locked_app_package = common.locked_app_package
|
|
147
|
+
return res
|
|
148
|
+
|
|
149
|
+
def using_profile(self, profile: str | AgentProfile) -> "TaskRequestBuilder[TIn]":
|
|
150
|
+
"""
|
|
151
|
+
Set the agent profile for executing the task.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
profile: The agent profile to use
|
|
155
|
+
"""
|
|
156
|
+
self._profile = profile
|
|
157
|
+
return self
|
|
158
|
+
|
|
159
|
+
def with_name(self, name: str) -> "TaskRequestBuilder[TIn]":
|
|
160
|
+
"""
|
|
161
|
+
Set the name of the task - useful when recording traces.
|
|
162
|
+
Otherwise, a random name will be generated.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
name: Name of the task
|
|
166
|
+
"""
|
|
167
|
+
self._name = name
|
|
168
|
+
return self
|
|
169
|
+
|
|
170
|
+
def without_llm_output_saving(self) -> Self:
|
|
171
|
+
"""
|
|
172
|
+
Disable LLM output saving for the task.
|
|
173
|
+
"""
|
|
174
|
+
self._llm_output_path = None
|
|
175
|
+
return self
|
|
176
|
+
|
|
177
|
+
def without_thoughts_output_saving(self):
|
|
178
|
+
"""
|
|
179
|
+
Disable thoughts output saving for the task.
|
|
180
|
+
"""
|
|
181
|
+
self._thoughts_output_path = None
|
|
182
|
+
return self
|
|
183
|
+
|
|
184
|
+
def with_output_description(self, description: str) -> "TaskRequestBuilder[TIn]":
|
|
185
|
+
"""
|
|
186
|
+
Set the description of the expected output format.
|
|
187
|
+
This is especially useful for data extraction tasks.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
description: Description of the expected output format
|
|
191
|
+
"""
|
|
192
|
+
self._output_description = description
|
|
193
|
+
return self
|
|
194
|
+
|
|
195
|
+
def with_output_format(self, output_format: type[TOut]) -> "TaskRequestBuilder[TOut]":
|
|
196
|
+
"""
|
|
197
|
+
Set the pydantic model for the expected output format.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
output_format: Pydantic model instance defining the output format
|
|
201
|
+
"""
|
|
202
|
+
self._output_format = output_format # type: ignore
|
|
203
|
+
return cast(TaskRequestBuilder[TOut], self)
|
|
204
|
+
|
|
205
|
+
def build(self) -> TaskRequest[TIn]:
|
|
206
|
+
"""
|
|
207
|
+
Build the TaskRequest object.
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
A configured TaskRequest object
|
|
211
|
+
|
|
212
|
+
Raises:
|
|
213
|
+
ValueError: If required fields are missing
|
|
214
|
+
"""
|
|
215
|
+
if not self._goal:
|
|
216
|
+
raise ValueError("Task goal is required")
|
|
217
|
+
|
|
218
|
+
if self._output_format and self._output_description:
|
|
219
|
+
raise ValueError("Output format and description are mutually exclusive")
|
|
220
|
+
|
|
221
|
+
task_request = TaskRequest(
|
|
222
|
+
goal=self._goal,
|
|
223
|
+
profile=self._profile.name
|
|
224
|
+
if isinstance(self._profile, AgentProfile)
|
|
225
|
+
else self._profile,
|
|
226
|
+
task_name=self._name,
|
|
227
|
+
output_description=self._output_description,
|
|
228
|
+
output_format=self._output_format,
|
|
229
|
+
max_steps=self._max_steps,
|
|
230
|
+
record_trace=self._record_trace,
|
|
231
|
+
trace_path=self._trace_path,
|
|
232
|
+
llm_output_path=self._llm_output_path,
|
|
233
|
+
thoughts_output_path=self._thoughts_output_path,
|
|
234
|
+
locked_app_package=self._locked_app_package,
|
|
235
|
+
)
|
|
236
|
+
return task_request
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
DEFAULT_PROFILE_NAME = "default"
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# mobile-use SDK Examples
|
|
2
|
+
|
|
3
|
+
Location: `src/mobile_use/sdk/examples/`
|
|
4
|
+
|
|
5
|
+
Run any example via:
|
|
6
|
+
|
|
7
|
+
- `python src/mobile_use/sdk/examples/<filename>.py`
|
|
8
|
+
|
|
9
|
+
## Practical Automation Examples
|
|
10
|
+
|
|
11
|
+
These examples demonstrate two different ways to use the SDK, each applying an appropriate level of complexity for the task at hand:
|
|
12
|
+
|
|
13
|
+
### platform_minimal_example.py - Painless integration with the Minitap platform
|
|
14
|
+
|
|
15
|
+
This script shows the simplest way to run minitap :
|
|
16
|
+
|
|
17
|
+
- Visit https://platform.minitap.ai to create a task and get your API key.
|
|
18
|
+
- Initialize the agent with your API key: .init(api_key=...).
|
|
19
|
+
- Ask the agent to run one of the tasks you’ve set up in the Minitap platform
|
|
20
|
+
(e.g., "like-instagram-post").
|
|
21
|
+
- The task’s goal and settings live in the Minitap platform, you don’t need
|
|
22
|
+
to hardcode them here.
|
|
23
|
+
- If you’ve created different profiles (LLM configurations) in the Minitap platform (like "fast-config"),
|
|
24
|
+
you can pick which one to use with the `profile` field.
|
|
25
|
+
|
|
26
|
+
### simple_photo_organizer.py - Straightforward Approach
|
|
27
|
+
|
|
28
|
+
Demonstrates the simplest way to use the SDK for quick automation tasks:
|
|
29
|
+
|
|
30
|
+
- **Direct API calls** without builders or complex configuration
|
|
31
|
+
- Creates a photo album and organizes photos from a specific date
|
|
32
|
+
- Uses structured Pydantic output to capture results
|
|
33
|
+
|
|
34
|
+
### smart_notification_assistant.py - Feature-Rich Approach
|
|
35
|
+
|
|
36
|
+
Showcases more advanced SDK features while remaining practical:
|
|
37
|
+
|
|
38
|
+
- Uses builder pattern for configuring the agent and overriding the default task configurations
|
|
39
|
+
- Implements **multiple specialized agent profiles** for different reasoning tasks:
|
|
40
|
+
- Analyzer profile for detailed inspection of notifications
|
|
41
|
+
- Note taker profile for writing a summary of the notifications
|
|
42
|
+
- Enables **tracing** for debugging and visualization
|
|
43
|
+
- Includes **structured Pydantic models** with enums and nested relationships
|
|
44
|
+
- Demonstrates proper **exception handling** for different error types
|
|
45
|
+
- Shows how to set up task defaults for consistent configuration
|
|
46
|
+
|
|
47
|
+
## Usage Notes
|
|
48
|
+
|
|
49
|
+
- **Choosing an Approach**:
|
|
50
|
+
|
|
51
|
+
- Use the direct approach (like `platform_minimal_example.py`) for painless setup using the Minitap platform. You can configure any task, save, run, and monitor them with a few clicks.
|
|
52
|
+
- Use the simple approach (like `simple_photo_organizer.py`) for straightforward tasks, you configure settings yourself and every LLM call happens on your device.
|
|
53
|
+
- Use the builder approach (like `smart_notification_assistant.py`) when you need more customization.
|
|
54
|
+
|
|
55
|
+
- **Device Detection**: The agent detects the first available device unless you specify one with `AgentConfigBuilder.for_device(...)`.
|
|
56
|
+
|
|
57
|
+
- **Servers**: With default base URLs (`localhost:9998/9999`), the agent starts the servers automatically. When you override URLs, it assumes servers are already running.
|
|
58
|
+
|
|
59
|
+
- **LLM API Keys**: Provide necessary keys (e.g., `OPENAI_API_KEY`) in a `.env` file at repo root; see `mobile_use/config.py`.
|
|
60
|
+
|
|
61
|
+
- **Traces**: When enabled, traces are saved to a specified directory (defaulting to `./mobile-use-traces/`) and can be useful for debugging and visualization.
|
|
62
|
+
|
|
63
|
+
- **Structured Output**: Pydantic models enable type safety when processing task outputs, making it easier to handle and chain results between tasks.
|
|
64
|
+
|
|
65
|
+
## Locked App Execution
|
|
66
|
+
|
|
67
|
+
You can restrict task execution to a specific app using the `with_locked_app_package()` method. This ensures the agent stays within the target application throughout the task execution.
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
# Lock execution to WhatsApp
|
|
71
|
+
result = await agent.run_task(
|
|
72
|
+
request=agent.new_task("Send message to Bob")
|
|
73
|
+
.with_locked_app_package("com.whatsapp")
|
|
74
|
+
.build()
|
|
75
|
+
)
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
**When locked to an app:**
|
|
79
|
+
|
|
80
|
+
- The system verifies the app is open before starting
|
|
81
|
+
- If the app is accidentally closed or navigated away from, the Contextor agent will attempt to relaunch it
|
|
82
|
+
- The Planner and Cortex agents will prioritize in-app actions
|
|
83
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Example scripts for the mobile-use SDK."""
|