minitap-mobile-use 3.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. minitap/mobile_use/__init__.py +0 -0
  2. minitap/mobile_use/agents/contextor/contextor.md +55 -0
  3. minitap/mobile_use/agents/contextor/contextor.py +175 -0
  4. minitap/mobile_use/agents/contextor/types.py +36 -0
  5. minitap/mobile_use/agents/cortex/cortex.md +135 -0
  6. minitap/mobile_use/agents/cortex/cortex.py +152 -0
  7. minitap/mobile_use/agents/cortex/types.py +15 -0
  8. minitap/mobile_use/agents/executor/executor.md +42 -0
  9. minitap/mobile_use/agents/executor/executor.py +87 -0
  10. minitap/mobile_use/agents/executor/tool_node.py +152 -0
  11. minitap/mobile_use/agents/hopper/hopper.md +15 -0
  12. minitap/mobile_use/agents/hopper/hopper.py +44 -0
  13. minitap/mobile_use/agents/orchestrator/human.md +12 -0
  14. minitap/mobile_use/agents/orchestrator/orchestrator.md +21 -0
  15. minitap/mobile_use/agents/orchestrator/orchestrator.py +134 -0
  16. minitap/mobile_use/agents/orchestrator/types.py +11 -0
  17. minitap/mobile_use/agents/outputter/human.md +25 -0
  18. minitap/mobile_use/agents/outputter/outputter.py +85 -0
  19. minitap/mobile_use/agents/outputter/test_outputter.py +167 -0
  20. minitap/mobile_use/agents/planner/human.md +14 -0
  21. minitap/mobile_use/agents/planner/planner.md +126 -0
  22. minitap/mobile_use/agents/planner/planner.py +101 -0
  23. minitap/mobile_use/agents/planner/types.py +51 -0
  24. minitap/mobile_use/agents/planner/utils.py +70 -0
  25. minitap/mobile_use/agents/summarizer/summarizer.py +35 -0
  26. minitap/mobile_use/agents/video_analyzer/__init__.py +5 -0
  27. minitap/mobile_use/agents/video_analyzer/human.md +5 -0
  28. minitap/mobile_use/agents/video_analyzer/video_analyzer.md +37 -0
  29. minitap/mobile_use/agents/video_analyzer/video_analyzer.py +111 -0
  30. minitap/mobile_use/clients/browserstack_client.py +477 -0
  31. minitap/mobile_use/clients/idb_client.py +429 -0
  32. minitap/mobile_use/clients/ios_client.py +332 -0
  33. minitap/mobile_use/clients/ios_client_config.py +141 -0
  34. minitap/mobile_use/clients/ui_automator_client.py +330 -0
  35. minitap/mobile_use/clients/wda_client.py +526 -0
  36. minitap/mobile_use/clients/wda_lifecycle.py +367 -0
  37. minitap/mobile_use/config.py +413 -0
  38. minitap/mobile_use/constants.py +3 -0
  39. minitap/mobile_use/context.py +106 -0
  40. minitap/mobile_use/controllers/__init__.py +0 -0
  41. minitap/mobile_use/controllers/android_controller.py +524 -0
  42. minitap/mobile_use/controllers/controller_factory.py +46 -0
  43. minitap/mobile_use/controllers/device_controller.py +182 -0
  44. minitap/mobile_use/controllers/ios_controller.py +436 -0
  45. minitap/mobile_use/controllers/platform_specific_commands_controller.py +199 -0
  46. minitap/mobile_use/controllers/types.py +106 -0
  47. minitap/mobile_use/controllers/unified_controller.py +193 -0
  48. minitap/mobile_use/graph/graph.py +160 -0
  49. minitap/mobile_use/graph/state.py +115 -0
  50. minitap/mobile_use/main.py +309 -0
  51. minitap/mobile_use/sdk/__init__.py +12 -0
  52. minitap/mobile_use/sdk/agent.py +1294 -0
  53. minitap/mobile_use/sdk/builders/__init__.py +10 -0
  54. minitap/mobile_use/sdk/builders/agent_config_builder.py +307 -0
  55. minitap/mobile_use/sdk/builders/index.py +15 -0
  56. minitap/mobile_use/sdk/builders/task_request_builder.py +236 -0
  57. minitap/mobile_use/sdk/constants.py +1 -0
  58. minitap/mobile_use/sdk/examples/README.md +83 -0
  59. minitap/mobile_use/sdk/examples/__init__.py +1 -0
  60. minitap/mobile_use/sdk/examples/app_lock_messaging.py +54 -0
  61. minitap/mobile_use/sdk/examples/platform_manual_task_example.py +67 -0
  62. minitap/mobile_use/sdk/examples/platform_minimal_example.py +48 -0
  63. minitap/mobile_use/sdk/examples/simple_photo_organizer.py +76 -0
  64. minitap/mobile_use/sdk/examples/smart_notification_assistant.py +225 -0
  65. minitap/mobile_use/sdk/examples/video_transcription_example.py +117 -0
  66. minitap/mobile_use/sdk/services/cloud_mobile.py +656 -0
  67. minitap/mobile_use/sdk/services/platform.py +434 -0
  68. minitap/mobile_use/sdk/types/__init__.py +51 -0
  69. minitap/mobile_use/sdk/types/agent.py +84 -0
  70. minitap/mobile_use/sdk/types/exceptions.py +138 -0
  71. minitap/mobile_use/sdk/types/platform.py +183 -0
  72. minitap/mobile_use/sdk/types/task.py +269 -0
  73. minitap/mobile_use/sdk/utils.py +29 -0
  74. minitap/mobile_use/services/accessibility.py +100 -0
  75. minitap/mobile_use/services/llm.py +247 -0
  76. minitap/mobile_use/services/telemetry.py +421 -0
  77. minitap/mobile_use/tools/index.py +67 -0
  78. minitap/mobile_use/tools/mobile/back.py +52 -0
  79. minitap/mobile_use/tools/mobile/erase_one_char.py +56 -0
  80. minitap/mobile_use/tools/mobile/focus_and_clear_text.py +317 -0
  81. minitap/mobile_use/tools/mobile/focus_and_input_text.py +153 -0
  82. minitap/mobile_use/tools/mobile/launch_app.py +86 -0
  83. minitap/mobile_use/tools/mobile/long_press_on.py +169 -0
  84. minitap/mobile_use/tools/mobile/open_link.py +62 -0
  85. minitap/mobile_use/tools/mobile/press_key.py +83 -0
  86. minitap/mobile_use/tools/mobile/stop_app.py +62 -0
  87. minitap/mobile_use/tools/mobile/swipe.py +156 -0
  88. minitap/mobile_use/tools/mobile/tap.py +154 -0
  89. minitap/mobile_use/tools/mobile/video_recording.py +177 -0
  90. minitap/mobile_use/tools/mobile/wait_for_delay.py +81 -0
  91. minitap/mobile_use/tools/scratchpad.py +147 -0
  92. minitap/mobile_use/tools/test_utils.py +413 -0
  93. minitap/mobile_use/tools/tool_wrapper.py +16 -0
  94. minitap/mobile_use/tools/types.py +35 -0
  95. minitap/mobile_use/tools/utils.py +336 -0
  96. minitap/mobile_use/utils/app_launch_utils.py +173 -0
  97. minitap/mobile_use/utils/cli_helpers.py +37 -0
  98. minitap/mobile_use/utils/cli_selection.py +143 -0
  99. minitap/mobile_use/utils/conversations.py +31 -0
  100. minitap/mobile_use/utils/decorators.py +124 -0
  101. minitap/mobile_use/utils/errors.py +6 -0
  102. minitap/mobile_use/utils/file.py +13 -0
  103. minitap/mobile_use/utils/logger.py +183 -0
  104. minitap/mobile_use/utils/media.py +186 -0
  105. minitap/mobile_use/utils/recorder.py +52 -0
  106. minitap/mobile_use/utils/requests_utils.py +37 -0
  107. minitap/mobile_use/utils/shell_utils.py +20 -0
  108. minitap/mobile_use/utils/test_ui_hierarchy.py +178 -0
  109. minitap/mobile_use/utils/time.py +6 -0
  110. minitap/mobile_use/utils/ui_hierarchy.py +132 -0
  111. minitap/mobile_use/utils/video.py +281 -0
  112. minitap_mobile_use-3.3.0.dist-info/METADATA +329 -0
  113. minitap_mobile_use-3.3.0.dist-info/RECORD +115 -0
  114. minitap_mobile_use-3.3.0.dist-info/WHEEL +4 -0
  115. minitap_mobile_use-3.3.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,1294 @@
1
+ import asyncio
2
+ import sys
3
+ import tempfile
4
+ import uuid
5
+ from collections.abc import Callable, Coroutine
6
+ from datetime import UTC, datetime
7
+ from io import BytesIO
8
+ from pathlib import Path
9
+ from shutil import which
10
+ from types import NoneType
11
+ from typing import Any, TypeVar, overload
12
+
13
+ from adbutils import AdbClient
14
+ from dotenv import load_dotenv
15
+ from langchain_core.messages import AIMessage
16
+ from PIL import Image
17
+ from pydantic import BaseModel
18
+
19
+ from minitap.mobile_use.agents.outputter.outputter import outputter
20
+ from minitap.mobile_use.agents.planner.types import Subgoal
21
+ from minitap.mobile_use.clients.browserstack_client import BrowserStackClientWrapper
22
+ from minitap.mobile_use.clients.idb_client import IdbClientWrapper
23
+ from minitap.mobile_use.clients.ios_client import DeviceType, IosClientWrapper, get_ios_client
24
+ from minitap.mobile_use.clients.ui_automator_client import UIAutomatorClient
25
+ from minitap.mobile_use.clients.wda_client import WdaClientWrapper
26
+ from minitap.mobile_use.config import AgentNode, OutputConfig, record_events, settings
27
+ from minitap.mobile_use.context import (
28
+ DeviceContext,
29
+ DevicePlatform,
30
+ ExecutionSetup,
31
+ IsReplan,
32
+ MobileUseContext,
33
+ )
34
+ from minitap.mobile_use.controllers.platform_specific_commands_controller import get_first_device
35
+ from minitap.mobile_use.graph.graph import get_graph
36
+ from minitap.mobile_use.graph.state import State
37
+ from minitap.mobile_use.sdk.builders.agent_config_builder import get_default_agent_config
38
+ from minitap.mobile_use.sdk.builders.task_request_builder import TaskRequestBuilder
39
+ from minitap.mobile_use.sdk.constants import DEFAULT_PROFILE_NAME
40
+ from minitap.mobile_use.sdk.services.cloud_mobile import CloudMobileService
41
+ from minitap.mobile_use.sdk.services.platform import PlatformService
42
+ from minitap.mobile_use.sdk.types.agent import AgentConfig
43
+ from minitap.mobile_use.sdk.types.exceptions import (
44
+ AgentError,
45
+ AgentNotInitializedError,
46
+ AgentProfileNotFoundError,
47
+ AgentTaskRequestError,
48
+ CloudMobileServiceUninitializedError,
49
+ DeviceNotFoundError,
50
+ ExecutableNotFoundError,
51
+ PlatformServiceUninitializedError,
52
+ ServerStartupError,
53
+ )
54
+ from minitap.mobile_use.sdk.types.platform import TaskRunPlanResponse, TaskRunStatus
55
+ from minitap.mobile_use.sdk.types.task import (
56
+ AgentProfile,
57
+ CloudDevicePlatformTaskRequest,
58
+ PlatformTaskInfo,
59
+ PlatformTaskRequest,
60
+ Task,
61
+ TaskRequest,
62
+ )
63
+ from minitap.mobile_use.services.telemetry import telemetry
64
+ from minitap.mobile_use.utils.app_launch_utils import _handle_initial_app_launch
65
+ from minitap.mobile_use.utils.logger import get_logger
66
+ from minitap.mobile_use.utils.media import (
67
+ create_gif_from_trace_folder,
68
+ create_steps_json_from_trace_folder,
69
+ remove_images_from_trace_folder,
70
+ remove_steps_json_from_trace_folder,
71
+ )
72
+ from minitap.mobile_use.utils.recorder import log_agent_thought
73
+
74
+ logger = get_logger(__name__)
75
+
76
+ TOutput = TypeVar("TOutput", bound=BaseModel | None)
77
+
78
+ load_dotenv()
79
+
80
+
81
+ class Agent:
82
+ _config: AgentConfig
83
+ _tasks: list[Task] = []
84
+ _tmp_traces_dir: Path
85
+ _initialized: bool = False
86
+ _device_context: DeviceContext
87
+ _adb_client: AdbClient | None
88
+ _ui_adb_client: UIAutomatorClient | None
89
+ _ios_client: IosClientWrapper | None
90
+ _ios_device_type: DeviceType | None
91
+ _current_task: asyncio.Task | None = None
92
+ _task_lock: asyncio.Lock
93
+ _cloud_mobile_id: str | None = None
94
+
95
+ def __init__(self, *, config: AgentConfig | None = None):
96
+ self._config = config or get_default_agent_config()
97
+ self._tasks = []
98
+ self._tmp_traces_dir = Path(tempfile.gettempdir()) / "mobile-use-traces"
99
+ self._initialized = False
100
+ self._task_lock = asyncio.Lock()
101
+
102
+ # Initialize platform service if API key is available in environment
103
+ # Note: Can also be initialized later with API key at agent .init()
104
+ if settings.MINITAP_API_KEY:
105
+ self._platform_service = PlatformService()
106
+ self._cloud_mobile_service = CloudMobileService()
107
+ else:
108
+ self._platform_service = None
109
+ self._cloud_mobile_service = None
110
+
111
+ async def init(
112
+ self,
113
+ api_key: str | None = None,
114
+ server_restart_attempts: int = 3,
115
+ retry_count: int = 5,
116
+ retry_wait_seconds: int = 5,
117
+ ):
118
+ # Start telemetry session for SDK usage (if not already started by CLI)
119
+ if not telemetry._session_id:
120
+ telemetry.start_session({"source": "sdk"})
121
+
122
+ try:
123
+ return await self._init_internal(
124
+ api_key=api_key,
125
+ server_restart_attempts=server_restart_attempts,
126
+ retry_count=retry_count,
127
+ retry_wait_seconds=retry_wait_seconds,
128
+ )
129
+ except Exception as e:
130
+ session_id = telemetry._session_id
131
+ telemetry.capture_exception(e, {"phase": "agent_init"})
132
+ telemetry.end_session(success=False, error=str(e))
133
+ if session_id:
134
+ logger.info(f"If you need support, please include this session ID: {session_id}")
135
+ raise
136
+
137
+ async def _init_internal(
138
+ self,
139
+ api_key: str | None = None,
140
+ server_restart_attempts: int = 3,
141
+ retry_count: int = 5,
142
+ retry_wait_seconds: int = 5,
143
+ ):
144
+ if api_key:
145
+ self._platform_service = PlatformService(api_key=api_key)
146
+ self._cloud_mobile_service = CloudMobileService(api_key=api_key)
147
+
148
+ # Skip initialization for cloud devices - no local setup required
149
+ if self._config.cloud_mobile_id_or_ref:
150
+ if not self._cloud_mobile_service:
151
+ raise CloudMobileServiceUninitializedError()
152
+ self._cloud_mobile_id = await self._cloud_mobile_service.resolve_cloud_mobile_id(
153
+ cloud_mobile_id_or_ref=self._config.cloud_mobile_id_or_ref,
154
+ )
155
+ logger.info("Cloud device configured - skipping local initialization")
156
+ self._initialized = True
157
+ return True
158
+
159
+ # Handle BrowserStack initialization
160
+ if self._config.browserstack_config:
161
+ logger.info("Initializing BrowserStack session...")
162
+ self._ios_client = BrowserStackClientWrapper(config=self._config.browserstack_config)
163
+ session_started = await self._ios_client.init_client()
164
+ if not session_started:
165
+ raise ServerStartupError(
166
+ message="Failed to create BrowserStack session. "
167
+ "Please check your credentials and device configuration."
168
+ )
169
+ self._ios_device_type = DeviceType.BROWSERSTACK
170
+ self._adb_client = None
171
+ self._ui_adb_client = None
172
+ logger.success("BrowserStack session created successfully")
173
+
174
+ self._device_context = await self._get_device_context(
175
+ device_id="browserstack", platform=DevicePlatform.IOS
176
+ )
177
+ logger.info(self._device_context.to_str())
178
+ logger.info("✅ Mobile-use agent initialized with BrowserStack.")
179
+ self._initialized = True
180
+ telemetry.capture_agent_initialized(
181
+ platform=DevicePlatform.IOS.value,
182
+ device_id="browserstack",
183
+ )
184
+ return True
185
+
186
+ if not which("adb") and not which("xcrun"):
187
+ raise ExecutableNotFoundError("cli_tools")
188
+
189
+ if self._initialized:
190
+ logger.warning("Agent is already initialized. Skipping...")
191
+ return True
192
+
193
+ # Get first available device ID
194
+ if not self._config.device_id or not self._config.device_platform:
195
+ device_id, platform, ios_device_type = get_first_device(logger=logger)
196
+ else:
197
+ device_id, platform = self._config.device_id, self._config.device_platform
198
+ ios_device_type = None # Will be auto-detected in _init_clients
199
+
200
+ if not device_id or not platform:
201
+ error_msg = "No device found. Exiting."
202
+ logger.error(error_msg)
203
+ raise DeviceNotFoundError(error_msg)
204
+
205
+ # Initialize clients
206
+ self._init_clients(
207
+ device_id=device_id,
208
+ platform=platform,
209
+ ios_device_type=ios_device_type,
210
+ retry_count=retry_count,
211
+ retry_wait_seconds=retry_wait_seconds,
212
+ )
213
+
214
+ # Initialize iOS client (IDB companion for simulators, WDA already running for physical)
215
+ if self._ios_client:
216
+ if isinstance(self._ios_client, IdbClientWrapper):
217
+ logger.info("Starting IDB companion for iOS simulator...")
218
+ companion_started = await self._ios_client.init_companion()
219
+ if not companion_started:
220
+ raise ServerStartupError(
221
+ message="Failed to start IDB companion for iOS simulator. "
222
+ "Please ensure fb-idb is installed: https://fbidb.io/docs/installation/"
223
+ )
224
+ logger.success("IDB companion started successfully")
225
+ elif isinstance(self._ios_client, WdaClientWrapper):
226
+ logger.info("Connecting to WebDriverAgent for physical iOS device...")
227
+ wda_connected = await self._ios_client.init_client()
228
+ if not wda_connected:
229
+ raise ServerStartupError(
230
+ message="Failed to connect to WebDriverAgent. "
231
+ "Please ensure WDA is running on your device. "
232
+ "See the setup instructions above."
233
+ )
234
+ logger.success("WDA client connected for physical device")
235
+
236
+ # Start necessary servers
237
+ restart_attempt = 0
238
+ while restart_attempt < server_restart_attempts:
239
+ success = self._run_servers(
240
+ device_id=device_id,
241
+ platform=platform,
242
+ )
243
+ if success:
244
+ break
245
+
246
+ restart_attempt += 1
247
+ if restart_attempt < server_restart_attempts:
248
+ logger.warning(
249
+ f"Server start failed, attempting restart "
250
+ f"{restart_attempt}/{server_restart_attempts}"
251
+ )
252
+ else:
253
+ error_msg = "Mobile-use servers failed to start after all restart attempts."
254
+ logger.error(error_msg)
255
+ raise ServerStartupError(message=error_msg)
256
+
257
+ self._device_context = await self._get_device_context(
258
+ device_id=device_id, platform=platform
259
+ )
260
+ logger.info(self._device_context.to_str())
261
+ logger.info("✅ Mobile-use agent initialized.")
262
+ self._initialized = True
263
+ telemetry.capture_agent_initialized(
264
+ platform=platform.value,
265
+ device_id=device_id,
266
+ )
267
+ return True
268
+
269
+ async def install_apk(self, apk_path: str | Path) -> None:
270
+ """
271
+ Install an APK on the connected device.
272
+ For cloud mobiles, the APK must be x86_64 compatible.
273
+
274
+ Args:
275
+ apk_path: Path to the local APK file to install
276
+
277
+ Raises:
278
+ AgentNotInitializedError: If the agent is not initialized
279
+ AgentError: If attempting to install on non-Android device or ADB operations fail
280
+ FileNotFoundError: If the APK file doesn't exist
281
+ CloudMobileServiceUninitializedError: If cloud service is unavailable
282
+ """
283
+ try:
284
+ await self._install_apk_internal(apk_path)
285
+ except Exception as e:
286
+ telemetry.capture_exception(e, {"phase": "install_apk"})
287
+ raise
288
+
289
+ async def _install_apk_internal(self, apk_path: str | Path) -> None:
290
+ if isinstance(apk_path, str):
291
+ apk_path = Path(apk_path)
292
+
293
+ if not apk_path.exists():
294
+ raise FileNotFoundError(f"APK file not found: {apk_path}")
295
+
296
+ if self._config.cloud_mobile_id_or_ref:
297
+ await self._install_apk_on_cloud_mobile(apk_path)
298
+ else:
299
+ if not self._initialized:
300
+ raise AgentNotInitializedError()
301
+
302
+ if self._device_context.mobile_platform != DevicePlatform.ANDROID:
303
+ raise AgentError(
304
+ "APK can only be installed on Android devices but got "
305
+ f"'{self._device_context.mobile_platform.value}'"
306
+ )
307
+
308
+ device_id = self._device_context.device_id
309
+ logger.info(f"Installing APK on Android device '{device_id}'")
310
+ if not self._adb_client:
311
+ raise AgentError("ADB client not initialized")
312
+
313
+ device = self._adb_client.device(serial=device_id)
314
+ await asyncio.to_thread(device.install, apk_path)
315
+ logger.info(f"APK installed successfully on Android device '{device_id}'")
316
+
317
+ async def _install_apk_on_cloud_mobile(self, apk_path: Path) -> None:
318
+ """
319
+ Install an APK on a cloud mobile device.
320
+
321
+ This method starts the cloud mobile if needed, then uploads and installs the APK.
322
+ """
323
+ if not self._cloud_mobile_id:
324
+ raise AgentTaskRequestError("Cloud mobile ID is not configured")
325
+
326
+ if not self._cloud_mobile_service:
327
+ raise CloudMobileServiceUninitializedError()
328
+
329
+ # Check platform before starting - fail early if not Android
330
+ vm_info = await self._cloud_mobile_service._get_virtual_mobile_status(self._cloud_mobile_id)
331
+ if vm_info.platform and vm_info.platform != "android":
332
+ raise AgentError(
333
+ f"APK can only be installed on Android cloud mobiles but got '{vm_info.platform}'"
334
+ )
335
+
336
+ # Start cloud mobile if not already started
337
+ logger.info(f"Starting cloud mobile '{self._cloud_mobile_id}' for APK installation...")
338
+ await self._cloud_mobile_service.start_and_wait_for_ready(
339
+ cloud_mobile_id=self._cloud_mobile_id,
340
+ )
341
+
342
+ # Install APK
343
+ logger.info(f"Installing APK '{apk_path.name}' on cloud mobile '{self._cloud_mobile_id}'")
344
+ await self._cloud_mobile_service.install_apk(
345
+ cloud_mobile_id=self._cloud_mobile_id,
346
+ apk_path=apk_path,
347
+ )
348
+ logger.success(f"APK '{apk_path.name}' installed successfully")
349
+
350
+ def new_task(self, goal: str):
351
+ """
352
+ Create a new task request builder.
353
+
354
+ Args:
355
+ goal: Natural language description of what to accomplish
356
+
357
+ Returns:
358
+ TaskRequestBuilder that can be configured with:
359
+ - .with_output_format() for structured output
360
+ - .with_output_description() for output description
361
+ - .with_locked_app_package() to restrict execution to a specific app
362
+ - .using_profile() to specify an LLM profile
363
+ - .with_max_steps() to set maximum execution steps
364
+ - .with_trace_recording() to enable trace recording
365
+ - .with_name() to set a custom task name
366
+ """
367
+ return TaskRequestBuilder[None].from_common(
368
+ goal=goal,
369
+ common=self._config.task_request_defaults,
370
+ )
371
+
372
+ @overload
373
+ async def run_task(
374
+ self,
375
+ *,
376
+ goal: str,
377
+ output: type[TOutput],
378
+ profile: str | AgentProfile | None = None,
379
+ name: str | None = None,
380
+ locked_app_package: str | None = None,
381
+ ) -> TOutput | None: ...
382
+
383
+ @overload
384
+ async def run_task(
385
+ self,
386
+ *,
387
+ goal: str,
388
+ output: str,
389
+ profile: str | AgentProfile | None = None,
390
+ name: str | None = None,
391
+ locked_app_package: str | None = None,
392
+ ) -> str | dict | None: ...
393
+
394
+ @overload
395
+ async def run_task(
396
+ self,
397
+ *,
398
+ goal: str,
399
+ output=None,
400
+ profile: str | AgentProfile | None = None,
401
+ name: str | None = None,
402
+ locked_app_package: str | None = None,
403
+ ) -> str | None: ...
404
+
405
+ @overload
406
+ async def run_task(
407
+ self,
408
+ *,
409
+ request: TaskRequest[None],
410
+ locked_app_package: str | None = None,
411
+ ) -> str | dict | None: ...
412
+
413
+ @overload
414
+ async def run_task(
415
+ self,
416
+ *,
417
+ request: TaskRequest[TOutput],
418
+ locked_app_package: str | None = None,
419
+ ) -> TOutput | None: ...
420
+
421
+ @overload
422
+ async def run_task(
423
+ self,
424
+ *,
425
+ request: PlatformTaskRequest[None],
426
+ locked_app_package: str | None = None,
427
+ ) -> str | dict | None: ...
428
+
429
+ @overload
430
+ async def run_task(
431
+ self,
432
+ *,
433
+ request: PlatformTaskRequest[TOutput],
434
+ locked_app_package: str | None = None,
435
+ ) -> TOutput | None: ...
436
+
437
+ async def run_task(
438
+ self,
439
+ *,
440
+ goal: str | None = None,
441
+ output: type[TOutput] | str | None = None,
442
+ profile: str | AgentProfile | None = None,
443
+ locked_app_package: str | None = None,
444
+ name: str | None = None,
445
+ request: TaskRequest[TOutput] | PlatformTaskRequest[TOutput] | None = None,
446
+ ) -> str | dict | TOutput | None:
447
+ # Check if cloud mobile is configured
448
+ if self._config.cloud_mobile_id_or_ref:
449
+ if request is None or not isinstance(request, PlatformTaskRequest):
450
+ raise AgentTaskRequestError(
451
+ "When using a cloud mobile, only PlatformTaskRequest is supported. "
452
+ "Use AgentConfigBuilder.for_cloud_mobile() only with PlatformTaskRequest."
453
+ )
454
+ # Use cloud mobile execution path
455
+ return await self._run_cloud_mobile_task(
456
+ request=request, locked_app_package=locked_app_package
457
+ )
458
+
459
+ # Normal local execution path
460
+ if request is not None:
461
+ task_info = None
462
+ if isinstance(request, PlatformTaskRequest):
463
+ if not self._platform_service:
464
+ raise PlatformServiceUninitializedError()
465
+ task_info = await self._platform_service.create_task_run(
466
+ request=request,
467
+ locked_app_package=locked_app_package,
468
+ enable_video_tools=self._config.video_recording_enabled,
469
+ )
470
+ if isinstance(request, CloudDevicePlatformTaskRequest):
471
+ request.task_run_id = task_info.task_run.id
472
+ request.task_run_id_available_event.set()
473
+ self._config.agent_profiles[task_info.llm_profile.name] = task_info.llm_profile
474
+ request = task_info.task_request
475
+ elif locked_app_package is not None:
476
+ if request.locked_app_package:
477
+ logger.warning(
478
+ "Locked app package specified both in the request and as a parameter. "
479
+ "Using the parameter value."
480
+ )
481
+ request.locked_app_package = locked_app_package
482
+ return await self._run_task(
483
+ request=request, task_info=task_info, platform_service=self._platform_service
484
+ )
485
+ if goal is None:
486
+ raise AgentTaskRequestError("Goal is required")
487
+ task_request = self.new_task(goal=goal)
488
+ if output is not None:
489
+ if isinstance(output, str):
490
+ task_request.with_output_description(description=output)
491
+ elif output is not NoneType:
492
+ task_request.with_output_format(output_format=output)
493
+ if profile is not None:
494
+ task_request.using_profile(profile=profile)
495
+ if name is not None:
496
+ task_request.with_name(name=name)
497
+ if locked_app_package is not None:
498
+ task_request.with_locked_app_package(package_name=locked_app_package)
499
+ return await self._run_task(task_request.build())
500
+
501
+ async def _run_cloud_mobile_task(
502
+ self,
503
+ request: PlatformTaskRequest[TOutput],
504
+ locked_app_package: str | None = None,
505
+ ) -> str | dict | TOutput | None:
506
+ """
507
+ Execute a task on a cloud mobile.
508
+
509
+ This method triggers the task execution on the Platform and polls
510
+ for completion without running any agentic logic locally.
511
+ """
512
+ if not self._cloud_mobile_id:
513
+ raise AgentTaskRequestError("Cloud mobile ID is not configured")
514
+
515
+ if not self._cloud_mobile_service:
516
+ raise CloudMobileServiceUninitializedError()
517
+
518
+ if not self._platform_service:
519
+ raise PlatformServiceUninitializedError()
520
+
521
+ if self._config.video_recording_enabled:
522
+ profile_name = request.profile or DEFAULT_PROFILE_NAME
523
+ _, profile = await self._platform_service.get_profile(profile_name)
524
+ if not profile.llm_config.utils.video_analyzer:
525
+ raise AgentTaskRequestError(
526
+ f"video_recording_enabled: profile '{profile_name}' "
527
+ "must have a video_analyzer agent configured"
528
+ )
529
+
530
+ # Start cloud mobile if not already started
531
+ logger.info(f"Starting cloud mobile '{self._cloud_mobile_id}'...")
532
+ await self._cloud_mobile_service.start_and_wait_for_ready(
533
+ cloud_mobile_id=self._cloud_mobile_id,
534
+ )
535
+ logger.info(
536
+ f"Starting cloud mobile task execution '{self._cloud_mobile_id}'",
537
+ )
538
+
539
+ def log_callback(message: str):
540
+ """Callback for logging timeline updates."""
541
+ logger.info(message)
542
+
543
+ def status_callback(
544
+ status: TaskRunStatus,
545
+ status_message: str | None,
546
+ ):
547
+ """Callback for status updates."""
548
+ logger.info(f"Task status update: [{status}] {status_message}")
549
+
550
+ async def _execute_cloud(cloud_mobile_service: CloudMobileService, cloud_mobile_id: str):
551
+ try:
552
+ # Execute task on cloud mobile and wait for completion
553
+ final_status, error, output = await cloud_mobile_service.run_task_on_cloud_mobile(
554
+ cloud_mobile_id=cloud_mobile_id,
555
+ request=request,
556
+ on_status_update=status_callback,
557
+ on_log=log_callback,
558
+ locked_app_package=locked_app_package,
559
+ enable_video_tools=self._config.video_recording_enabled,
560
+ )
561
+ if final_status == "completed":
562
+ logger.success("Cloud mobile task completed successfully")
563
+ return output
564
+ if final_status == "failed":
565
+ logger.error(f"Cloud mobile task failed: {error}")
566
+ raise AgentTaskRequestError(
567
+ f"Task execution failed on cloud mobile: {error}",
568
+ )
569
+ if final_status == "cancelled":
570
+ logger.warning("Cloud mobile task was cancelled")
571
+ raise AgentTaskRequestError("Task execution was cancelled")
572
+ logger.error(f"Unknown cloud mobile task status: {final_status}")
573
+ raise AgentTaskRequestError(f"Unknown task status: {final_status}")
574
+ except asyncio.CancelledError:
575
+ # Propagate cancellation to parent coroutine.
576
+ logger.info("Task cancelled during execution, re-raising CancelledError")
577
+ raise
578
+ except AgentTaskRequestError as e:
579
+ # Capture and re-raise known exceptions
580
+ telemetry.capture_exception(e, {"phase": "cloud_mobile_task"})
581
+ raise
582
+ except Exception as e:
583
+ logger.error(f"Unexpected error during cloud mobile task execution: {e}")
584
+ telemetry.capture_exception(e, {"phase": "cloud_mobile_task"})
585
+ raise AgentTaskRequestError(f"Unexpected error: {e}") from e
586
+
587
+ async with self._task_lock:
588
+ if self._current_task and not self._current_task.done():
589
+ logger.warning(
590
+ "Another cloud task is running; cancelling it before starting new one.",
591
+ )
592
+ self.stop_current_task()
593
+ try:
594
+ await self._current_task
595
+ except asyncio.CancelledError:
596
+ pass
597
+ try:
598
+ self._current_task = asyncio.create_task(
599
+ _execute_cloud(
600
+ cloud_mobile_service=self._cloud_mobile_service,
601
+ cloud_mobile_id=self._cloud_mobile_id,
602
+ ),
603
+ )
604
+ return await self._current_task
605
+ finally:
606
+ self._current_task = None
607
+
608
+ async def _run_task(
609
+ self,
610
+ request: TaskRequest[TOutput],
611
+ task_info: PlatformTaskInfo | None = None,
612
+ platform_service: PlatformService | None = None,
613
+ ) -> str | dict | TOutput | None:
614
+ if not self._initialized:
615
+ raise AgentNotInitializedError()
616
+
617
+ if request.profile:
618
+ agent_profile = self._config.agent_profiles.get(request.profile)
619
+ if agent_profile is None:
620
+ raise AgentProfileNotFoundError(request.profile)
621
+ else:
622
+ agent_profile = self._config.default_profile
623
+
624
+ if (
625
+ self._config.video_recording_enabled
626
+ and agent_profile.llm_config.utils.video_analyzer is None
627
+ ):
628
+ raise ValueError(
629
+ f"with_video_recording_tools() requires 'video_analyzer' in utils for "
630
+ f"profile '{agent_profile.name}'. Add 'video_analyzer' with a "
631
+ f"video-capable model (e.g., gemini-3-flash-preview)."
632
+ )
633
+
634
+ logger.info(str(agent_profile))
635
+
636
+ on_status_changed = None
637
+ on_agent_thought = None
638
+ on_plan_changes = None
639
+ task_id = str(uuid.uuid4())
640
+ if task_info:
641
+ on_status_changed = self._get_task_status_change_callback(
642
+ task_info=task_info, platform_service=platform_service
643
+ )
644
+ on_agent_thought = self._get_new_agent_thought_callback(
645
+ task_info=task_info, platform_service=platform_service
646
+ )
647
+ on_plan_changes = self._get_plan_changes_callback(
648
+ task_info=task_info, platform_service=platform_service
649
+ )
650
+ task_id = task_info.task_run.id
651
+
652
+ task = Task(
653
+ id=task_id,
654
+ device=self._device_context,
655
+ status="pending",
656
+ request=request,
657
+ created_at=datetime.now(),
658
+ on_status_changed=on_status_changed,
659
+ )
660
+ self._tasks.append(task)
661
+ task_name = task.get_name()
662
+
663
+ # Extract API key from platform service if available
664
+ api_key = None
665
+ if platform_service:
666
+ api_key = platform_service._api_key
667
+
668
+ context = MobileUseContext(
669
+ trace_id=task.id,
670
+ device=self._device_context,
671
+ adb_client=self._adb_client,
672
+ ui_adb_client=self._ui_adb_client,
673
+ ios_client=self._ios_client,
674
+ llm_config=agent_profile.llm_config,
675
+ on_agent_thought=on_agent_thought,
676
+ on_plan_changes=on_plan_changes,
677
+ minitap_api_key=api_key,
678
+ video_recording_enabled=(
679
+ self._config.video_recording_enabled
680
+ and agent_profile.llm_config.utils.video_analyzer is not None
681
+ ),
682
+ )
683
+
684
+ self._prepare_tracing(task=task, context=context)
685
+ await self._prepare_app_lock(task=task, context=context)
686
+ self._prepare_output_files(task=task)
687
+
688
+ output_config = None
689
+ if request.output_description or request.output_format:
690
+ output_config = OutputConfig(
691
+ output_description=request.output_description,
692
+ structured_output=request.output_format, # type: ignore
693
+ )
694
+ logger.info(str(output_config))
695
+
696
+ logger.info(f"[{task_name}] Starting graph with goal: `{request.goal}`")
697
+ state = self._get_graph_state(task=task)
698
+ graph_input = state.model_dump()
699
+ task_start_time = datetime.now(UTC)
700
+
701
+ telemetry.capture_task_started(
702
+ task_id=task_id,
703
+ platform=self._device_context.mobile_platform.value,
704
+ has_locked_app=request.locked_app_package is not None,
705
+ )
706
+
707
+ async def _execute_task_logic():
708
+ last_state: State | None = None
709
+ last_state_snapshot: dict | None = None
710
+ output = None
711
+ try:
712
+ logger.info(f"[{task_name}] Invoking graph with input: {graph_input}")
713
+ await task.set_status(status="running", message="Invoking graph...")
714
+ async for chunk in (await get_graph(context)).astream(
715
+ input=graph_input,
716
+ config={
717
+ "recursion_limit": task.request.max_steps,
718
+ "callbacks": self._config.graph_config_callbacks,
719
+ },
720
+ stream_mode=["messages", "custom", "updates", "values"],
721
+ ):
722
+ stream_mode, payload = chunk
723
+ if stream_mode == "values":
724
+ last_state_snapshot = payload # type: ignore
725
+ last_state = State(**last_state_snapshot) # type: ignore
726
+ if task.request.thoughts_output_path:
727
+ record_events(
728
+ output_path=task.request.thoughts_output_path,
729
+ events=last_state.agents_thoughts,
730
+ )
731
+
732
+ if stream_mode == "updates":
733
+ for _, value in payload.items(): # type: ignore node name, node output
734
+ if value and "agents_thoughts" in value:
735
+ new_thoughts = value["agents_thoughts"]
736
+ last_item = new_thoughts[-1] if new_thoughts else None
737
+ if last_item:
738
+ log_agent_thought(
739
+ agent_thought=last_item,
740
+ )
741
+
742
+ if not last_state:
743
+ err = f"[{task_name}] No result received from graph"
744
+ logger.warning(err)
745
+ await task.finalize(content=output, state=last_state_snapshot, error=err)
746
+ return None
747
+
748
+ print_ai_response_to_stderr(graph_result=last_state)
749
+ output = await self._extract_output(
750
+ task_name=task_name,
751
+ ctx=context,
752
+ request=request,
753
+ output_config=output_config,
754
+ state=last_state,
755
+ )
756
+ logger.info(f"✅ Automation '{task_name}' is success ✅")
757
+ await task.finalize(content=output, state=last_state_snapshot)
758
+ duration = (datetime.now(UTC) - task_start_time).total_seconds()
759
+ steps_count = len(last_state.agents_thoughts) if last_state else 0
760
+ telemetry.capture_task_completed(
761
+ task_id=task_id,
762
+ success=True,
763
+ steps_count=steps_count,
764
+ duration_seconds=duration,
765
+ )
766
+ return output
767
+ except asyncio.CancelledError:
768
+ err = f"[{task_name}] Task cancelled"
769
+ logger.warning(err)
770
+ await task.finalize(
771
+ content=output,
772
+ state=last_state_snapshot,
773
+ error=err,
774
+ cancelled=True,
775
+ )
776
+ duration = (datetime.now(UTC) - task_start_time).total_seconds()
777
+ steps_count = len(last_state.agents_thoughts) if last_state else 0
778
+ telemetry.capture_task_completed(
779
+ task_id=task_id,
780
+ success=False,
781
+ steps_count=steps_count,
782
+ duration_seconds=duration,
783
+ cancelled=True,
784
+ )
785
+ raise
786
+ except Exception as e:
787
+ err = f"[{task_name}] Error running automation: {e}"
788
+ logger.error(err)
789
+ await task.finalize(
790
+ content=output,
791
+ state=last_state_snapshot,
792
+ error=err,
793
+ )
794
+ duration = (datetime.now(UTC) - task_start_time).total_seconds()
795
+ steps_count = len(last_state.agents_thoughts) if last_state else 0
796
+ telemetry.capture_task_completed(
797
+ task_id=task_id,
798
+ success=False,
799
+ steps_count=steps_count,
800
+ duration_seconds=duration,
801
+ )
802
+ telemetry.capture_exception(e, {"task_id": task_id})
803
+ if telemetry._session_id:
804
+ logger.info(
805
+ "If you need support, please include this session ID: "
806
+ f"{telemetry._session_id}"
807
+ )
808
+ raise
809
+ finally:
810
+ await self._finalize_tracing(task=task, context=context)
811
+
812
+ async with self._task_lock:
813
+ if self._current_task and not self._current_task.done():
814
+ logger.warning(
815
+ "Another automation task is already running. "
816
+ "Stopping it before starting the new one."
817
+ )
818
+ self.stop_current_task()
819
+ try:
820
+ await self._current_task
821
+ except asyncio.CancelledError:
822
+ pass
823
+
824
+ try:
825
+ self._current_task = asyncio.create_task(_execute_task_logic())
826
+ return await self._current_task
827
+ finally:
828
+ self._current_task = None
829
+
830
+ def stop_current_task(self):
831
+ """Requests cancellation of the currently running automation task."""
832
+ if self._current_task and not self._current_task.done():
833
+ logger.info("Requesting to stop the current automation task...")
834
+ was_cancelled = self._current_task.cancel()
835
+ if was_cancelled:
836
+ logger.success("Cancellation request for the current task was sent.")
837
+ else:
838
+ logger.warning(
839
+ "Could not send cancellation request for the current task "
840
+ "(it may already be completing)."
841
+ )
842
+ else:
843
+ logger.info("No active automation task to stop.")
844
+
845
+ async def get_screenshot(self) -> Image.Image:
846
+ """
847
+ Capture a screenshot from the mobile device.
848
+
849
+ For cloud mobiles, this method calls the mobile-manager endpoint.
850
+ For local mobiles, it uses ADB (Android) or xcrun (iOS) directly.
851
+
852
+ Returns:
853
+ Screenshot as PIL Image
854
+
855
+ Raises:
856
+ AgentNotInitializedError: If the agent is not initialized
857
+ PlatformServiceUninitializedError: If cloud mobile service is not available
858
+ Exception: If screenshot capture fails
859
+ """
860
+ # Check if cloud mobile is configured
861
+ if self._cloud_mobile_id:
862
+ if not self._cloud_mobile_service:
863
+ raise CloudMobileServiceUninitializedError()
864
+ screenshot = await self._cloud_mobile_service.get_screenshot(
865
+ cloud_mobile_id=self._cloud_mobile_id,
866
+ )
867
+ return screenshot
868
+
869
+ # Local device - use ADB or xcrun directly
870
+ if not self._initialized:
871
+ raise AgentNotInitializedError()
872
+
873
+ if self._device_context.mobile_platform == DevicePlatform.ANDROID:
874
+ # Use ADB to capture screenshot
875
+ logger.info("Capturing screenshot from local Android device")
876
+ if not self._adb_client:
877
+ raise Exception("ADB client not initialized")
878
+
879
+ device = self._adb_client.device(serial=self._device_context.device_id)
880
+ screenshot = await asyncio.to_thread(device.screenshot)
881
+ logger.info("Screenshot captured from local Android device")
882
+ return screenshot
883
+
884
+ elif self._device_context.mobile_platform == DevicePlatform.IOS:
885
+ # Use xcrun to capture screenshot
886
+ import functools
887
+ import subprocess
888
+ from io import BytesIO
889
+
890
+ logger.info("Capturing screenshot from local iOS device")
891
+ try:
892
+ # xcrun simctl io <device> screenshot --type=png -
893
+ result = await asyncio.to_thread(
894
+ functools.partial(
895
+ subprocess.run,
896
+ [
897
+ "xcrun",
898
+ "simctl",
899
+ "io",
900
+ self._device_context.device_id,
901
+ "screenshot",
902
+ "--type=png",
903
+ "-",
904
+ ],
905
+ capture_output=True,
906
+ check=True,
907
+ )
908
+ )
909
+ # Convert bytes to PIL Image
910
+ screenshot = Image.open(BytesIO(result.stdout))
911
+ logger.info("Screenshot captured from local iOS device")
912
+ return screenshot
913
+ except subprocess.CalledProcessError as e:
914
+ logger.error(f"Failed to capture screenshot: {e}")
915
+ raise Exception(f"Failed to capture screenshot from iOS device: {e}")
916
+
917
+ else:
918
+ raise Exception(f"Unsupported platform: {self._device_context.mobile_platform}")
919
+
920
+ async def clean(self, force: bool = False):
921
+ if self._cloud_mobile_id:
922
+ self._initialized = False
923
+ logger.info("✅ Cloud-mode agent stopped.")
924
+ # End telemetry session if started by SDK (not CLI)
925
+ if telemetry._session_id and telemetry._session_context.get("source") == "sdk":
926
+ telemetry.end_session(success=True)
927
+ return
928
+ if not self._initialized and not force:
929
+ return
930
+
931
+ if self._ios_client:
932
+ await self._ios_client.cleanup()
933
+ self._ios_client = None
934
+
935
+ self._initialized = False
936
+ logger.info("✅ Mobile-use agent stopped.")
937
+
938
+ # End telemetry session if started by SDK (not CLI)
939
+ if telemetry._session_id and telemetry._session_context.get("source") == "sdk":
940
+ telemetry.end_session(success=True)
941
+
942
+ async def _prepare_app_lock(self, task: Task, context: MobileUseContext):
943
+ """Prepare app lock by launching the locked app if specified."""
944
+ if not task.request.locked_app_package:
945
+ return
946
+
947
+ task_name = task.get_name()
948
+ logger.info(f"[{task_name}] Preparing app lock for: {task.request.locked_app_package}")
949
+
950
+ app_lock_status = await _handle_initial_app_launch(
951
+ ctx=context, locked_app_package=task.request.locked_app_package
952
+ )
953
+
954
+ if context.execution_setup is None:
955
+ context.execution_setup = ExecutionSetup(app_lock_status=app_lock_status)
956
+ else:
957
+ context.execution_setup.app_lock_status = app_lock_status
958
+
959
+ if app_lock_status.locked_app_initial_launch_success is False:
960
+ error = app_lock_status.locked_app_initial_launch_error
961
+ logger.warning(f"[{task_name}] Failed to launch locked app: {error}")
962
+
963
+ def _prepare_tracing(self, task: Task, context: MobileUseContext):
964
+ """Prepare tracing setup if record_trace is enabled."""
965
+ if not task.request.record_trace:
966
+ return
967
+
968
+ task_name = task.get_name()
969
+ temp_trace_path = Path(self._tmp_traces_dir / task_name).resolve()
970
+ traces_output_path = Path(task.request.trace_path).resolve()
971
+ logger.info(f"[{task_name}] 📂 Traces output path: {traces_output_path}")
972
+ logger.info(f"[{task_name}] 📄📂 Traces temp path: {temp_trace_path}")
973
+ traces_output_path.mkdir(parents=True, exist_ok=True)
974
+ temp_trace_path.mkdir(parents=True, exist_ok=True)
975
+
976
+ context.execution_setup = ExecutionSetup(
977
+ traces_path=self._tmp_traces_dir,
978
+ trace_name=task_name,
979
+ enable_remote_tracing=task.request.enable_remote_tracing,
980
+ )
981
+
982
+ async def _finalize_tracing(self, task: Task, context: MobileUseContext):
983
+ exec_setup_ctx = context.execution_setup
984
+ if not exec_setup_ctx:
985
+ return
986
+
987
+ if exec_setup_ctx.traces_path is None or exec_setup_ctx.trace_name is None:
988
+ return
989
+
990
+ task_name = task.get_name()
991
+ status = "_PASS" if task.status == "completed" else "_FAIL"
992
+ ts = task.created_at.strftime("%Y-%m-%dT%H-%M-%S")
993
+ new_name = f"{exec_setup_ctx.trace_name}{status}_{ts}"
994
+
995
+ temp_trace_path = (self._tmp_traces_dir / exec_setup_ctx.trace_name).resolve()
996
+ traces_output_path = Path(task.request.trace_path).resolve()
997
+
998
+ logger.info(f"[{task_name}] Compiling trace FROM FOLDER: " + str(temp_trace_path))
999
+ create_gif_from_trace_folder(temp_trace_path)
1000
+ create_steps_json_from_trace_folder(temp_trace_path)
1001
+
1002
+ if exec_setup_ctx.enable_remote_tracing:
1003
+ gif_path = temp_trace_path / "trace.gif"
1004
+ if gif_path.exists() and self._platform_service:
1005
+ try:
1006
+ task_run_id = await self._platform_service.upload_trace_gif(
1007
+ task_run_id=task.id, gif_path=gif_path
1008
+ )
1009
+ if task_run_id:
1010
+ platform_url = f"{settings.MINITAP_BASE_URL}/task-runs/{task_run_id}"
1011
+ logger.info(f"[{task_name}] 🌐 View on platform: {platform_url}")
1012
+ except Exception as e:
1013
+ logger.warning(f"[{task_name}] Failed to upload trace GIF: {e}")
1014
+
1015
+ logger.info(f"[{task_name}] Video created, removing dust...")
1016
+ remove_images_from_trace_folder(temp_trace_path)
1017
+ remove_steps_json_from_trace_folder(temp_trace_path)
1018
+ logger.info(f"[{task_name}] 📽️ Trace compiled, moving to output path 📽️")
1019
+
1020
+ output_folder_path = temp_trace_path.rename(traces_output_path / new_name).resolve()
1021
+ logger.info(f"[{task_name}] 📂✅ Traces located in: {output_folder_path}")
1022
+
1023
+ def _prepare_output_files(self, task: Task):
1024
+ if task.request.llm_output_path:
1025
+ _validate_and_prepare_file(file_path=task.request.llm_output_path)
1026
+ if task.request.thoughts_output_path:
1027
+ _validate_and_prepare_file(file_path=task.request.thoughts_output_path)
1028
+
1029
+ async def _extract_output(
1030
+ self,
1031
+ task_name: str,
1032
+ ctx: MobileUseContext,
1033
+ request: TaskRequest[TOutput],
1034
+ output_config: OutputConfig | None,
1035
+ state: State,
1036
+ ) -> str | dict | TOutput | None:
1037
+ if output_config and output_config.needs_structured_format():
1038
+ logger.info(f"[{task_name}] Generating structured output...")
1039
+ try:
1040
+ structured_output = await outputter(
1041
+ ctx=ctx,
1042
+ output_config=output_config,
1043
+ graph_output=state,
1044
+ )
1045
+ logger.info(f"[{task_name}] Structured output: {structured_output}")
1046
+ record_events(output_path=request.llm_output_path, events=structured_output)
1047
+ if request.output_format is not None and request.output_format is not NoneType:
1048
+ return request.output_format.model_validate(structured_output)
1049
+ return structured_output
1050
+ except Exception as e:
1051
+ logger.error(f"[{task_name}] Failed to generate structured output: {e}")
1052
+ return None
1053
+ if state and state.agents_thoughts:
1054
+ last_msg = state.agents_thoughts[-1]
1055
+ logger.info(str(last_msg))
1056
+ record_events(output_path=request.llm_output_path, events=last_msg)
1057
+ return last_msg
1058
+ return None
1059
+
1060
+ def _get_graph_state(self, task: Task):
1061
+ return State(
1062
+ messages=[],
1063
+ initial_goal=task.request.goal,
1064
+ subgoal_plan=[],
1065
+ latest_ui_hierarchy=None,
1066
+ latest_screenshot=None,
1067
+ focused_app_info=None,
1068
+ device_date=None,
1069
+ structured_decisions=None,
1070
+ complete_subgoals_by_ids=[],
1071
+ agents_thoughts=[],
1072
+ remaining_steps=task.request.max_steps,
1073
+ executor_messages=[],
1074
+ cortex_last_thought=None,
1075
+ scratchpad={},
1076
+ )
1077
+
1078
+ def _init_clients(
1079
+ self,
1080
+ device_id: str,
1081
+ platform: DevicePlatform,
1082
+ ios_device_type: DeviceType | None,
1083
+ retry_count: int,
1084
+ retry_wait_seconds: int,
1085
+ ):
1086
+ self._adb_client = (
1087
+ AdbClient(host=self._config.servers.adb_host, port=self._config.servers.adb_port)
1088
+ if platform == DevicePlatform.ANDROID
1089
+ else None
1090
+ )
1091
+ self._ui_adb_client = (
1092
+ UIAutomatorClient(device_id=device_id) if platform == DevicePlatform.ANDROID else None
1093
+ )
1094
+
1095
+ # Initialize iOS client using factory (auto-detects device type if not provided)
1096
+ if platform == DevicePlatform.IOS:
1097
+ self._ios_client = get_ios_client(
1098
+ udid=device_id,
1099
+ config=self._config.ios_client_config,
1100
+ )
1101
+ self._ios_device_type = ios_device_type or (
1102
+ DeviceType.PHYSICAL
1103
+ if isinstance(self._ios_client, WdaClientWrapper)
1104
+ else DeviceType.SIMULATOR
1105
+ )
1106
+ else:
1107
+ self._ios_client = None
1108
+ self._ios_device_type = None
1109
+
1110
+ def _run_servers(self, device_id: str, platform: DevicePlatform) -> bool:
1111
+ if platform == DevicePlatform.ANDROID:
1112
+ if not self._ui_adb_client:
1113
+ error_msg = (
1114
+ "UIAutomator client is required for Android but not available. "
1115
+ "Please ensure UIAutomator2 is properly installed and configured."
1116
+ )
1117
+ logger.error(error_msg)
1118
+ raise ValueError(error_msg)
1119
+ logger.info("✓ UIAutomator client available for Android")
1120
+ elif platform == DevicePlatform.IOS:
1121
+ if not self._ios_client:
1122
+ error_msg = (
1123
+ "iOS client is required but not available. "
1124
+ "Ensure idb (simulators) or WDA (physical) is available."
1125
+ )
1126
+ logger.error(error_msg)
1127
+ raise ValueError(error_msg)
1128
+ if isinstance(self._ios_client, WdaClientWrapper):
1129
+ client_type = "WDA"
1130
+ elif isinstance(self._ios_client, BrowserStackClientWrapper):
1131
+ client_type = "BrowserStack"
1132
+ else:
1133
+ client_type = "IDB"
1134
+ logger.info(f"✓ iOS client available ({client_type})")
1135
+
1136
+ return True
1137
+
1138
+ async def _get_device_context(
1139
+ self,
1140
+ device_id: str,
1141
+ platform: DevicePlatform,
1142
+ ) -> DeviceContext:
1143
+ from platform import system
1144
+
1145
+ host_platform = system()
1146
+
1147
+ # Get real device dimensions from the device
1148
+ if platform == DevicePlatform.ANDROID:
1149
+ if self._ui_adb_client:
1150
+ try:
1151
+ # Use UIAutomator to get actual screen dimensions
1152
+ screen_data = self._ui_adb_client.get_screen_data()
1153
+ device_width = screen_data.width
1154
+ device_height = screen_data.height
1155
+ logger.info(
1156
+ f"Retrieved Android screen dimensions: {device_width}x{device_height}"
1157
+ )
1158
+ except Exception as e:
1159
+ logger.warning(f"Failed to get Android screen dimensions: {e}, using defaults")
1160
+ device_width, device_height = 1080, 2340
1161
+ else:
1162
+ logger.warning("UIAutomator client not available, using default dimensions")
1163
+ device_width, device_height = 1080, 2340
1164
+ else: # iOS
1165
+ if self._ios_client:
1166
+ try:
1167
+ # Use iOS client to take a screenshot and get dimensions
1168
+ screenshot_data = await self._ios_client.screenshot() # type: ignore[call-arg]
1169
+ if screenshot_data:
1170
+ img = Image.open(BytesIO(screenshot_data))
1171
+ device_width = img.width
1172
+ device_height = img.height
1173
+ logger.info(
1174
+ f"Retrieved iOS screen dimensions: {device_width}x{device_height}"
1175
+ )
1176
+ else:
1177
+ logger.warning("IDB screenshot returned None, using default dimensions")
1178
+ device_width, device_height = 375, 812
1179
+ except Exception as e:
1180
+ logger.warning(f"Failed to get iOS screen dimensions: {e}, using defaults")
1181
+ device_width, device_height = 375, 812
1182
+ else:
1183
+ logger.warning("IDB client not available, using default dimensions")
1184
+ device_width, device_height = 375, 812
1185
+
1186
+ return DeviceContext(
1187
+ host_platform="WINDOWS" if host_platform == "Windows" else "LINUX",
1188
+ mobile_platform=platform,
1189
+ device_id=device_id,
1190
+ device_width=device_width,
1191
+ device_height=device_height,
1192
+ )
1193
+
1194
+ def _get_task_status_change_callback(
1195
+ self,
1196
+ task_info: PlatformTaskInfo,
1197
+ platform_service: PlatformService | None = None,
1198
+ ) -> Callable[[TaskRunStatus, str | None, Any | None], Coroutine]:
1199
+ service = platform_service or self._platform_service
1200
+
1201
+ async def change_status(
1202
+ status: TaskRunStatus,
1203
+ message: str | None = None,
1204
+ output: Any | None = None,
1205
+ ):
1206
+ if not service:
1207
+ raise PlatformServiceUninitializedError()
1208
+ try:
1209
+ await service.update_task_run_status(
1210
+ task_run_id=task_info.task_run.id,
1211
+ status=status,
1212
+ message=message,
1213
+ output=output,
1214
+ )
1215
+ except Exception as e:
1216
+ logger.error(f"Failed to update task run status: {e}")
1217
+
1218
+ return change_status
1219
+
1220
+ def _get_plan_changes_callback(
1221
+ self,
1222
+ task_info: PlatformTaskInfo,
1223
+ platform_service: PlatformService | None = None,
1224
+ ) -> Callable[[list[Subgoal], IsReplan], Coroutine]:
1225
+ service = platform_service or self._platform_service
1226
+ current_plan: TaskRunPlanResponse | None = None
1227
+
1228
+ async def update_plan(plan: list[Subgoal], is_replan: IsReplan):
1229
+ nonlocal current_plan
1230
+
1231
+ if not service:
1232
+ raise PlatformServiceUninitializedError()
1233
+ try:
1234
+ if is_replan and current_plan:
1235
+ # End previous plan
1236
+ await service.upsert_task_run_plan(
1237
+ task_run_id=task_info.task_run.id,
1238
+ started_at=current_plan.started_at,
1239
+ plan=plan,
1240
+ ended_at=datetime.now(UTC),
1241
+ plan_id=current_plan.id,
1242
+ )
1243
+ current_plan = None
1244
+
1245
+ current_plan = await service.upsert_task_run_plan(
1246
+ task_run_id=task_info.task_run.id,
1247
+ started_at=current_plan.started_at if current_plan else datetime.now(UTC),
1248
+ plan=plan,
1249
+ ended_at=current_plan.ended_at if current_plan else None,
1250
+ plan_id=current_plan.id if current_plan else None,
1251
+ )
1252
+ except Exception as e:
1253
+ logger.error(f"Failed to update plan: {e}")
1254
+
1255
+ return update_plan
1256
+
1257
+ def _get_new_agent_thought_callback(
1258
+ self,
1259
+ task_info: PlatformTaskInfo,
1260
+ platform_service: PlatformService | None = None,
1261
+ ) -> Callable[[AgentNode, str], Coroutine]:
1262
+ service = platform_service or self._platform_service
1263
+
1264
+ async def add_agent_thought(agent: AgentNode, thought: str):
1265
+ if not service:
1266
+ raise PlatformServiceUninitializedError()
1267
+ try:
1268
+ await service.add_agent_thought(
1269
+ task_run_id=task_info.task_run.id,
1270
+ agent=agent,
1271
+ thought=thought,
1272
+ )
1273
+ except Exception as e:
1274
+ logger.error(f"Failed to add agent thought: {e}")
1275
+
1276
+ return add_agent_thought
1277
+
1278
+
1279
+ def _validate_and_prepare_file(file_path: Path):
1280
+ path_obj = Path(file_path)
1281
+ if path_obj.exists() and path_obj.is_dir():
1282
+ raise AgentTaskRequestError(f"Error: Path '{file_path}' is a directory, not a file.")
1283
+ try:
1284
+ path_obj.parent.mkdir(parents=True, exist_ok=True)
1285
+ path_obj.touch(exist_ok=True)
1286
+ except OSError as e:
1287
+ raise AgentTaskRequestError(f"Error creating file '{file_path}': {e}")
1288
+
1289
+
1290
+ def print_ai_response_to_stderr(graph_result: State):
1291
+ for msg in reversed(graph_result.messages):
1292
+ if isinstance(msg, AIMessage):
1293
+ print(msg.content, file=sys.stderr)
1294
+ return