minitap-mobile-use 3.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. minitap/mobile_use/__init__.py +0 -0
  2. minitap/mobile_use/agents/contextor/contextor.md +55 -0
  3. minitap/mobile_use/agents/contextor/contextor.py +175 -0
  4. minitap/mobile_use/agents/contextor/types.py +36 -0
  5. minitap/mobile_use/agents/cortex/cortex.md +135 -0
  6. minitap/mobile_use/agents/cortex/cortex.py +152 -0
  7. minitap/mobile_use/agents/cortex/types.py +15 -0
  8. minitap/mobile_use/agents/executor/executor.md +42 -0
  9. minitap/mobile_use/agents/executor/executor.py +87 -0
  10. minitap/mobile_use/agents/executor/tool_node.py +152 -0
  11. minitap/mobile_use/agents/hopper/hopper.md +15 -0
  12. minitap/mobile_use/agents/hopper/hopper.py +44 -0
  13. minitap/mobile_use/agents/orchestrator/human.md +12 -0
  14. minitap/mobile_use/agents/orchestrator/orchestrator.md +21 -0
  15. minitap/mobile_use/agents/orchestrator/orchestrator.py +134 -0
  16. minitap/mobile_use/agents/orchestrator/types.py +11 -0
  17. minitap/mobile_use/agents/outputter/human.md +25 -0
  18. minitap/mobile_use/agents/outputter/outputter.py +85 -0
  19. minitap/mobile_use/agents/outputter/test_outputter.py +167 -0
  20. minitap/mobile_use/agents/planner/human.md +14 -0
  21. minitap/mobile_use/agents/planner/planner.md +126 -0
  22. minitap/mobile_use/agents/planner/planner.py +101 -0
  23. minitap/mobile_use/agents/planner/types.py +51 -0
  24. minitap/mobile_use/agents/planner/utils.py +70 -0
  25. minitap/mobile_use/agents/summarizer/summarizer.py +35 -0
  26. minitap/mobile_use/agents/video_analyzer/__init__.py +5 -0
  27. minitap/mobile_use/agents/video_analyzer/human.md +5 -0
  28. minitap/mobile_use/agents/video_analyzer/video_analyzer.md +37 -0
  29. minitap/mobile_use/agents/video_analyzer/video_analyzer.py +111 -0
  30. minitap/mobile_use/clients/browserstack_client.py +477 -0
  31. minitap/mobile_use/clients/idb_client.py +429 -0
  32. minitap/mobile_use/clients/ios_client.py +332 -0
  33. minitap/mobile_use/clients/ios_client_config.py +141 -0
  34. minitap/mobile_use/clients/ui_automator_client.py +330 -0
  35. minitap/mobile_use/clients/wda_client.py +526 -0
  36. minitap/mobile_use/clients/wda_lifecycle.py +367 -0
  37. minitap/mobile_use/config.py +413 -0
  38. minitap/mobile_use/constants.py +3 -0
  39. minitap/mobile_use/context.py +106 -0
  40. minitap/mobile_use/controllers/__init__.py +0 -0
  41. minitap/mobile_use/controllers/android_controller.py +524 -0
  42. minitap/mobile_use/controllers/controller_factory.py +46 -0
  43. minitap/mobile_use/controllers/device_controller.py +182 -0
  44. minitap/mobile_use/controllers/ios_controller.py +436 -0
  45. minitap/mobile_use/controllers/platform_specific_commands_controller.py +199 -0
  46. minitap/mobile_use/controllers/types.py +106 -0
  47. minitap/mobile_use/controllers/unified_controller.py +193 -0
  48. minitap/mobile_use/graph/graph.py +160 -0
  49. minitap/mobile_use/graph/state.py +115 -0
  50. minitap/mobile_use/main.py +309 -0
  51. minitap/mobile_use/sdk/__init__.py +12 -0
  52. minitap/mobile_use/sdk/agent.py +1294 -0
  53. minitap/mobile_use/sdk/builders/__init__.py +10 -0
  54. minitap/mobile_use/sdk/builders/agent_config_builder.py +307 -0
  55. minitap/mobile_use/sdk/builders/index.py +15 -0
  56. minitap/mobile_use/sdk/builders/task_request_builder.py +236 -0
  57. minitap/mobile_use/sdk/constants.py +1 -0
  58. minitap/mobile_use/sdk/examples/README.md +83 -0
  59. minitap/mobile_use/sdk/examples/__init__.py +1 -0
  60. minitap/mobile_use/sdk/examples/app_lock_messaging.py +54 -0
  61. minitap/mobile_use/sdk/examples/platform_manual_task_example.py +67 -0
  62. minitap/mobile_use/sdk/examples/platform_minimal_example.py +48 -0
  63. minitap/mobile_use/sdk/examples/simple_photo_organizer.py +76 -0
  64. minitap/mobile_use/sdk/examples/smart_notification_assistant.py +225 -0
  65. minitap/mobile_use/sdk/examples/video_transcription_example.py +117 -0
  66. minitap/mobile_use/sdk/services/cloud_mobile.py +656 -0
  67. minitap/mobile_use/sdk/services/platform.py +434 -0
  68. minitap/mobile_use/sdk/types/__init__.py +51 -0
  69. minitap/mobile_use/sdk/types/agent.py +84 -0
  70. minitap/mobile_use/sdk/types/exceptions.py +138 -0
  71. minitap/mobile_use/sdk/types/platform.py +183 -0
  72. minitap/mobile_use/sdk/types/task.py +269 -0
  73. minitap/mobile_use/sdk/utils.py +29 -0
  74. minitap/mobile_use/services/accessibility.py +100 -0
  75. minitap/mobile_use/services/llm.py +247 -0
  76. minitap/mobile_use/services/telemetry.py +421 -0
  77. minitap/mobile_use/tools/index.py +67 -0
  78. minitap/mobile_use/tools/mobile/back.py +52 -0
  79. minitap/mobile_use/tools/mobile/erase_one_char.py +56 -0
  80. minitap/mobile_use/tools/mobile/focus_and_clear_text.py +317 -0
  81. minitap/mobile_use/tools/mobile/focus_and_input_text.py +153 -0
  82. minitap/mobile_use/tools/mobile/launch_app.py +86 -0
  83. minitap/mobile_use/tools/mobile/long_press_on.py +169 -0
  84. minitap/mobile_use/tools/mobile/open_link.py +62 -0
  85. minitap/mobile_use/tools/mobile/press_key.py +83 -0
  86. minitap/mobile_use/tools/mobile/stop_app.py +62 -0
  87. minitap/mobile_use/tools/mobile/swipe.py +156 -0
  88. minitap/mobile_use/tools/mobile/tap.py +154 -0
  89. minitap/mobile_use/tools/mobile/video_recording.py +177 -0
  90. minitap/mobile_use/tools/mobile/wait_for_delay.py +81 -0
  91. minitap/mobile_use/tools/scratchpad.py +147 -0
  92. minitap/mobile_use/tools/test_utils.py +413 -0
  93. minitap/mobile_use/tools/tool_wrapper.py +16 -0
  94. minitap/mobile_use/tools/types.py +35 -0
  95. minitap/mobile_use/tools/utils.py +336 -0
  96. minitap/mobile_use/utils/app_launch_utils.py +173 -0
  97. minitap/mobile_use/utils/cli_helpers.py +37 -0
  98. minitap/mobile_use/utils/cli_selection.py +143 -0
  99. minitap/mobile_use/utils/conversations.py +31 -0
  100. minitap/mobile_use/utils/decorators.py +124 -0
  101. minitap/mobile_use/utils/errors.py +6 -0
  102. minitap/mobile_use/utils/file.py +13 -0
  103. minitap/mobile_use/utils/logger.py +183 -0
  104. minitap/mobile_use/utils/media.py +186 -0
  105. minitap/mobile_use/utils/recorder.py +52 -0
  106. minitap/mobile_use/utils/requests_utils.py +37 -0
  107. minitap/mobile_use/utils/shell_utils.py +20 -0
  108. minitap/mobile_use/utils/test_ui_hierarchy.py +178 -0
  109. minitap/mobile_use/utils/time.py +6 -0
  110. minitap/mobile_use/utils/ui_hierarchy.py +132 -0
  111. minitap/mobile_use/utils/video.py +281 -0
  112. minitap_mobile_use-3.3.0.dist-info/METADATA +329 -0
  113. minitap_mobile_use-3.3.0.dist-info/RECORD +115 -0
  114. minitap_mobile_use-3.3.0.dist-info/WHEEL +4 -0
  115. minitap_mobile_use-3.3.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,111 @@
1
+ """
2
+ Video Analyzer utility for analyzing video content using Gemini models.
3
+
4
+ This utility sends video files to video-capable Gemini models for analysis
5
+ and returns text descriptions based on the provided prompt.
6
+ """
7
+
8
+ import base64
9
+ from pathlib import Path
10
+
11
+ from jinja2 import Template
12
+ from langchain_core.messages import HumanMessage, SystemMessage
13
+
14
+ from minitap.mobile_use.context import MobileUseContext
15
+ from minitap.mobile_use.services.llm import get_llm, invoke_llm_with_timeout_message, with_fallback
16
+ from minitap.mobile_use.utils.logger import get_logger
17
+ from minitap.mobile_use.utils.video import compress_video_for_api
18
+
19
+ logger = get_logger(__name__)
20
+
21
+
22
+ async def analyze_video(
23
+ ctx: MobileUseContext,
24
+ video_path: Path,
25
+ prompt: str,
26
+ ) -> str:
27
+ """
28
+ Analyze a video file using a video-capable Gemini model.
29
+
30
+ Args:
31
+ ctx: The MobileUseContext containing LLM configuration
32
+ video_path: Path to the video file (MP4)
33
+ prompt: The analysis prompt/question about the video
34
+
35
+ Returns:
36
+ Text analysis result from the model
37
+
38
+ Raises:
39
+ Exception: If video analysis fails
40
+ """
41
+ logger.info(f"Starting video analysis for {video_path}")
42
+
43
+ if not video_path.exists():
44
+ raise FileNotFoundError(f"Video file not found: {video_path}")
45
+
46
+ # Compress video if needed to fit within API limits
47
+ compressed_path = await compress_video_for_api(video_path)
48
+
49
+ try:
50
+ with open(compressed_path, "rb") as video_file:
51
+ video_bytes = video_file.read()
52
+
53
+ video_base64 = base64.b64encode(video_bytes).decode("utf-8")
54
+
55
+ suffix = compressed_path.suffix.lower()
56
+ mime_type = "video/mp4" if suffix in [".mp4", ".m4v"] else f"video/{suffix[1:]}"
57
+
58
+ system_message_content = Template(
59
+ Path(__file__).parent.joinpath("video_analyzer.md").read_text(encoding="utf-8")
60
+ ).render()
61
+
62
+ human_message_content = Template(
63
+ Path(__file__).parent.joinpath("human.md").read_text(encoding="utf-8")
64
+ ).render(prompt=prompt)
65
+
66
+ messages = [
67
+ SystemMessage(content=system_message_content),
68
+ HumanMessage(
69
+ content=[
70
+ {
71
+ "type": "text",
72
+ "text": human_message_content,
73
+ },
74
+ {
75
+ "type": "file",
76
+ "source_type": "base64",
77
+ "mime_type": mime_type,
78
+ "data": video_base64,
79
+ },
80
+ ]
81
+ ),
82
+ ]
83
+
84
+ llm = get_llm(ctx=ctx, name="video_analyzer", is_utils=True, temperature=0.2)
85
+ llm_fallback = get_llm(
86
+ ctx=ctx, name="video_analyzer", is_utils=True, use_fallback=True, temperature=0.2
87
+ )
88
+
89
+ logger.info("Sending video to LLM for analysis...")
90
+
91
+ response = await with_fallback(
92
+ main_call=lambda: invoke_llm_with_timeout_message(
93
+ llm.ainvoke(messages), timeout_seconds=120
94
+ ),
95
+ fallback_call=lambda: invoke_llm_with_timeout_message(
96
+ llm_fallback.ainvoke(messages), timeout_seconds=120
97
+ ),
98
+ )
99
+
100
+ content = response.content if hasattr(response, "content") else str(response)
101
+ result = content if isinstance(content, str) else str(content)
102
+ logger.info("Video analysis completed")
103
+
104
+ return result
105
+ finally:
106
+ # Clean up compressed file if it differs from original
107
+ if compressed_path != video_path and compressed_path.exists():
108
+ try:
109
+ compressed_path.unlink()
110
+ except Exception:
111
+ pass
@@ -0,0 +1,477 @@
1
+ import asyncio
2
+ from functools import wraps
3
+ from typing import Any
4
+
5
+ from appium.options.common.base import AppiumOptions
6
+ from appium.webdriver.webdriver import WebDriver
7
+ from selenium.webdriver.common.actions import interaction
8
+ from selenium.webdriver.common.actions.action_builder import ActionBuilder
9
+ from selenium.webdriver.common.actions.pointer_input import PointerInput
10
+
11
+ from minitap.mobile_use.clients.idb_client import IOSAppInfo
12
+ from minitap.mobile_use.clients.ios_client_config import BrowserStackClientConfig
13
+ from minitap.mobile_use.utils.logger import get_logger
14
+
15
+ logger = get_logger(__name__)
16
+
17
+ BROWSERSTACK_HUB_URL = "https://hub-cloud.browserstack.com/wd/hub"
18
+
19
+
20
+ def with_browserstack_client(func):
21
+ """Decorator to handle BrowserStack client error handling.
22
+
23
+ Note: Function must have None or bool in return type for error fallback.
24
+ """
25
+
26
+ @wraps(func)
27
+ async def wrapper(self, *args, **kwargs):
28
+ method_name = func.__name__
29
+ try:
30
+ logger.debug(f"Executing BrowserStack operation: {method_name}...")
31
+ result = await func(self, *args, **kwargs)
32
+ logger.debug(f"{method_name} completed successfully")
33
+ return result
34
+ except Exception as e:
35
+ logger.error(f"Failed to {method_name}: {e}")
36
+ import traceback
37
+
38
+ logger.debug(f"Traceback: {traceback.format_exc()}")
39
+
40
+ return_type = func.__annotations__.get("return")
41
+ if return_type is bool:
42
+ return False
43
+ return None
44
+
45
+ return wrapper
46
+
47
+
48
+ class BrowserStackClientWrapper:
49
+ """Wrapper around Appium WebDriver for BrowserStack iOS device automation.
50
+
51
+ This wrapper provides an interface similar to IdbClientWrapper and WdaClientWrapper
52
+ but uses BrowserStack's cloud infrastructure for physical iOS device automation.
53
+
54
+ BrowserStack is used for:
55
+ - Cloud-based physical iOS devices
56
+ - CI/CD pipelines requiring real device testing
57
+ - Cross-device testing without local hardware
58
+
59
+ Prerequisites:
60
+ 1. BrowserStack account with App Automate access
61
+ 2. Valid username and access_key
62
+ 3. App uploaded to BrowserStack (app_url)
63
+
64
+ Example:
65
+ config = BrowserStackClientConfig(
66
+ username="your_username",
67
+ access_key="your_access_key",
68
+ device_name="iPhone 14",
69
+ platform_version="16",
70
+ app_url="bs://your_app_hash",
71
+ )
72
+ wrapper = BrowserStackClientWrapper(config=config)
73
+ await wrapper.init_client()
74
+ await wrapper.tap(100, 200)
75
+ await wrapper.cleanup()
76
+
77
+ # Using context manager
78
+ async with BrowserStackClientWrapper(config=config) as wrapper:
79
+ await wrapper.tap(100, 200)
80
+ screenshot = await wrapper.screenshot()
81
+ """
82
+
83
+ def __init__(self, config: BrowserStackClientConfig):
84
+ """Initialize the BrowserStack client wrapper.
85
+
86
+ Args:
87
+ config: BrowserStack configuration with credentials and device settings
88
+ """
89
+ self.config = config
90
+ self._driver: WebDriver | None = None
91
+
92
+ async def init_client(self) -> bool:
93
+ """Initialize the Appium WebDriver session on BrowserStack.
94
+
95
+ Returns:
96
+ True if session created successfully, False otherwise
97
+ """
98
+ try:
99
+ logger.info(
100
+ f"Creating BrowserStack session for {self.config.device_name} "
101
+ f"(iOS {self.config.platform_version})"
102
+ )
103
+
104
+ options = AppiumOptions()
105
+
106
+ options.set_capability("platformName", "iOS")
107
+ options.set_capability("appium:deviceName", self.config.device_name)
108
+ options.set_capability("appium:platformVersion", self.config.platform_version)
109
+ options.set_capability("appium:automationName", "XCUITest")
110
+ options.set_capability("appium:app", self.config.app_url)
111
+
112
+ bstack_options: dict[str, Any] = {
113
+ "userName": self.config.username,
114
+ "accessKey": self.config.access_key.get_secret_value(),
115
+ "buildName": self.config.build_name or "mobile-use-session",
116
+ "sessionName": self.config.session_name or "BrowserStack Session",
117
+ "debug": True,
118
+ }
119
+
120
+ if self.config.project_name:
121
+ bstack_options["projectName"] = self.config.project_name
122
+
123
+ options.set_capability("bstack:options", bstack_options)
124
+
125
+ hub_url = self.config.hub_url or BROWSERSTACK_HUB_URL
126
+
127
+ self._driver = await asyncio.to_thread(
128
+ WebDriver,
129
+ command_executor=hub_url,
130
+ options=options,
131
+ )
132
+
133
+ if self._driver:
134
+ session_id = self._driver.session_id
135
+ logger.info(f"BrowserStack session created successfully. Session ID: {session_id}")
136
+ logger.info(
137
+ f"View session: https://app-automate.browserstack.com/dashboard/v2/sessions/{session_id}"
138
+ )
139
+
140
+ return True
141
+
142
+ except Exception as e:
143
+ logger.error(f"Failed to create BrowserStack session: {e}")
144
+ self._driver = None
145
+ return False
146
+
147
+ async def cleanup(self) -> None:
148
+ """Clean up BrowserStack session and quit the driver."""
149
+ if self._driver is not None:
150
+ try:
151
+ logger.info("Ending BrowserStack session")
152
+ await asyncio.to_thread(self._driver.quit)
153
+ except Exception as e:
154
+ logger.debug(f"Error ending BrowserStack session: {e}")
155
+ finally:
156
+ self._driver = None
157
+
158
+ logger.debug("BrowserStack client cleanup completed")
159
+
160
+ async def __aenter__(self):
161
+ """Async context manager entry."""
162
+ if not await self.init_client():
163
+ raise RuntimeError("Failed to create BrowserStack session")
164
+ return self
165
+
166
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
167
+ """Async context manager exit."""
168
+ await self.cleanup()
169
+ return False
170
+
171
+ def _ensure_driver(self) -> WebDriver:
172
+ """Ensure a valid WebDriver session exists.
173
+
174
+ Returns:
175
+ The WebDriver instance
176
+
177
+ Raises:
178
+ RuntimeError: If no driver is available
179
+ """
180
+ if self._driver is None:
181
+ raise RuntimeError(
182
+ "BrowserStack session not initialized. "
183
+ "Call init_client() first or use as context manager."
184
+ )
185
+ return self._driver
186
+
187
+ @with_browserstack_client
188
+ async def tap(self, x: int, y: int, duration: float | None = None) -> bool:
189
+ """Tap at the specified coordinates.
190
+
191
+ Args:
192
+ x: X coordinate
193
+ y: Y coordinate
194
+ duration: Optional tap duration in seconds (for long press)
195
+
196
+ Returns:
197
+ True if tap succeeded, False otherwise
198
+ """
199
+ driver = self._ensure_driver()
200
+
201
+ def perform_tap():
202
+ finger = PointerInput(interaction.POINTER_TOUCH, "finger")
203
+ actions = ActionBuilder(driver, mouse=finger)
204
+ actions.pointer_action.move_to_location(x, y)
205
+ actions.pointer_action.pointer_down()
206
+ if duration:
207
+ actions.pointer_action.pause(duration)
208
+ actions.pointer_action.pointer_up()
209
+ actions.perform()
210
+
211
+ await asyncio.to_thread(perform_tap)
212
+ return True
213
+
214
+ @with_browserstack_client
215
+ async def swipe(
216
+ self,
217
+ x_start: int,
218
+ y_start: int,
219
+ x_end: int,
220
+ y_end: int,
221
+ duration: float | None = None,
222
+ ) -> bool:
223
+ """Swipe from start coordinates to end coordinates.
224
+
225
+ Args:
226
+ x_start: Starting X coordinate
227
+ y_start: Starting Y coordinate
228
+ x_end: Ending X coordinate
229
+ y_end: Ending Y coordinate
230
+ duration: Optional swipe duration in seconds
231
+
232
+ Returns:
233
+ True if swipe succeeded, False otherwise
234
+ """
235
+ driver = self._ensure_driver()
236
+
237
+ swipe_duration = duration or 0.5
238
+
239
+ def perform_swipe():
240
+ finger = PointerInput(interaction.POINTER_TOUCH, "finger")
241
+ actions = ActionBuilder(driver, mouse=finger)
242
+ actions.pointer_action.move_to_location(x_start, y_start)
243
+ actions.pointer_action.pointer_down()
244
+ actions.pointer_action.pause(swipe_duration)
245
+ actions.pointer_action.move_to_location(x_end, y_end)
246
+ actions.pointer_action.pointer_up()
247
+ actions.perform()
248
+
249
+ await asyncio.to_thread(perform_swipe)
250
+ return True
251
+
252
+ @with_browserstack_client
253
+ async def screenshot(self, output_path: str | None = None) -> bytes | None:
254
+ """Take a screenshot and return raw image data.
255
+
256
+ Args:
257
+ output_path: Optional path to save the screenshot
258
+
259
+ Returns:
260
+ Raw image data (PNG bytes) or None on failure
261
+ """
262
+ driver = self._ensure_driver()
263
+
264
+ screenshot_base64 = await asyncio.to_thread(driver.get_screenshot_as_base64)
265
+
266
+ import base64
267
+
268
+ screenshot_data = base64.b64decode(screenshot_base64)
269
+
270
+ if output_path:
271
+ with open(output_path, "wb") as f:
272
+ f.write(screenshot_data)
273
+
274
+ return screenshot_data
275
+
276
+ @with_browserstack_client
277
+ async def launch(
278
+ self,
279
+ bundle_id: str,
280
+ args: list[str] | None = None,
281
+ env: dict[str, str] | None = None,
282
+ ) -> bool:
283
+ """Launch an application by bundle ID.
284
+
285
+ Args:
286
+ bundle_id: The bundle identifier of the app to launch
287
+ args: Optional list of arguments to pass to the app (not supported on BrowserStack)
288
+ env: Optional environment variables for the app (not supported on BrowserStack)
289
+
290
+ Returns:
291
+ True if launch succeeded, False otherwise
292
+ """
293
+ driver = self._ensure_driver()
294
+
295
+ if args or env:
296
+ logger.warning(
297
+ "BrowserStack does not support app launch arguments or environment variables"
298
+ )
299
+
300
+ script = "mobile: launchApp"
301
+ params = {"bundleId": bundle_id}
302
+
303
+ await asyncio.to_thread(driver.execute_script, script, params)
304
+ return True
305
+
306
+ @with_browserstack_client
307
+ async def terminate(self, bundle_id: str) -> bool:
308
+ """Terminate an application by bundle ID.
309
+
310
+ Args:
311
+ bundle_id: The bundle identifier of the app to terminate
312
+
313
+ Returns:
314
+ True if termination succeeded, False otherwise
315
+ """
316
+ driver = self._ensure_driver()
317
+
318
+ script = "mobile: terminateApp"
319
+ params = {"bundleId": bundle_id}
320
+
321
+ await asyncio.to_thread(driver.execute_script, script, params)
322
+ return True
323
+
324
+ @with_browserstack_client
325
+ async def text(self, text: str) -> bool:
326
+ """Type text using the keyboard.
327
+
328
+ Args:
329
+ text: The text to type
330
+
331
+ Returns:
332
+ True if text input succeeded, False otherwise
333
+ """
334
+ driver = self._ensure_driver()
335
+
336
+ active_element = await asyncio.to_thread(lambda: driver.switch_to.active_element)
337
+ await asyncio.to_thread(active_element.send_keys, text)
338
+ return True
339
+
340
+ @with_browserstack_client
341
+ async def open_url(self, url: str) -> bool:
342
+ """Open a URL on the device.
343
+
344
+ Args:
345
+ url: The URL to open
346
+
347
+ Returns:
348
+ True if URL opened successfully, False otherwise
349
+ """
350
+ driver = self._ensure_driver()
351
+
352
+ await asyncio.to_thread(driver.get, url)
353
+ return True
354
+
355
+ @with_browserstack_client
356
+ async def key(self, key_code: int) -> bool:
357
+ """Send a key press.
358
+
359
+ Note: Limited key support on BrowserStack/Appium.
360
+ For delete (key_code=42), we send a backspace.
361
+
362
+ Args:
363
+ key_code: HID key code (42 = delete/backspace)
364
+
365
+ Returns:
366
+ True if key press succeeded, False otherwise
367
+ """
368
+ driver = self._ensure_driver()
369
+
370
+ if key_code == 42: # Delete/backspace
371
+ active_element = await asyncio.to_thread(lambda: driver.switch_to.active_element)
372
+ current_text = await asyncio.to_thread(lambda: active_element.text)
373
+ if current_text:
374
+ await asyncio.to_thread(active_element.clear)
375
+ await asyncio.to_thread(active_element.send_keys, current_text[:-1])
376
+ return True
377
+
378
+ @with_browserstack_client
379
+ async def button(self, button_type: Any) -> bool:
380
+ """Press a hardware button (compatible with IDB's HIDButtonType).
381
+
382
+ Args:
383
+ button_type: Button type (HIDButtonType.HOME, etc.)
384
+
385
+ Returns:
386
+ True if button press succeeded, False otherwise
387
+ """
388
+ driver = self._ensure_driver()
389
+
390
+ button_name = getattr(button_type, "name", str(button_type)).lower()
391
+
392
+ if button_name == "home":
393
+ script = "mobile: pressButton"
394
+ params = {"name": "home"}
395
+ await asyncio.to_thread(driver.execute_script, script, params)
396
+ elif button_name in ("volume_up", "volumeup"):
397
+ script = "mobile: pressButton"
398
+ params = {"name": "volumeUp"}
399
+ await asyncio.to_thread(driver.execute_script, script, params)
400
+ elif button_name in ("volume_down", "volumedown"):
401
+ script = "mobile: pressButton"
402
+ params = {"name": "volumeDown"}
403
+ await asyncio.to_thread(driver.execute_script, script, params)
404
+
405
+ return True
406
+
407
+ async def describe_all(self) -> list[dict[str, Any]] | None:
408
+ """Get UI hierarchy as a flat list (compatible with IDB's describe_all).
409
+
410
+ Returns:
411
+ List of UI elements or None on error
412
+ """
413
+ try:
414
+ driver = self._ensure_driver()
415
+ page_source = await asyncio.to_thread(lambda: driver.page_source)
416
+ if page_source is None:
417
+ return None
418
+ return self._parse_xml_to_elements(page_source)
419
+ except Exception as e:
420
+ logger.error(f"Failed to describe_all: {e}")
421
+ return None
422
+
423
+ def _parse_xml_to_elements(self, xml_source: str) -> list[dict[str, Any]]:
424
+ """Parse Appium XML source into flat element list matching IDB format."""
425
+ import xml.etree.ElementTree as ET
426
+
427
+ elements = []
428
+ try:
429
+ root = ET.fromstring(xml_source)
430
+ for elem in root.iter():
431
+ if elem.tag == "AppiumAUT":
432
+ continue
433
+ frame = {
434
+ "x": float(elem.get("x", 0)),
435
+ "y": float(elem.get("y", 0)),
436
+ "width": float(elem.get("width", 0)),
437
+ "height": float(elem.get("height", 0)),
438
+ }
439
+ element = {
440
+ "type": elem.get("type", elem.tag),
441
+ "value": elem.get("value", ""),
442
+ "label": elem.get("label", elem.get("name", "")),
443
+ "frame": frame,
444
+ "enabled": elem.get("enabled", "false").lower() == "true",
445
+ "visible": elem.get("visible", "true").lower() == "true",
446
+ }
447
+ elements.append(element)
448
+ except ET.ParseError as e:
449
+ logger.error(f"Failed to parse XML: {e}")
450
+ return elements
451
+
452
+ async def app_current(self) -> IOSAppInfo | None:
453
+ """Get information about the currently active app.
454
+
455
+ Note: BrowserStack doesn't support activeAppInfo script directly.
456
+ Returns None as this feature is not available on BrowserStack.
457
+
458
+ Returns:
459
+ None (not supported on BrowserStack)
460
+ """
461
+ logger.debug("app_current is not supported on BrowserStack")
462
+ return None
463
+
464
+ async def install(self, app_path: str) -> list[Any]:
465
+ """Install an app (not supported on BrowserStack - apps must be pre-uploaded).
466
+
467
+ Args:
468
+ app_path: Path to the app (ignored)
469
+
470
+ Returns:
471
+ Empty list with warning
472
+ """
473
+ logger.warning(
474
+ "App installation not supported on BrowserStack. "
475
+ "Please upload your app to BrowserStack first and use the app_url in config."
476
+ )
477
+ return []