minitap-mobile-use 2.2.0__py3-none-any.whl → 2.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of minitap-mobile-use might be problematic. Click here for more details.

Files changed (59) hide show
  1. minitap/mobile_use/agents/contextor/contextor.py +6 -4
  2. minitap/mobile_use/agents/cortex/cortex.md +114 -27
  3. minitap/mobile_use/agents/cortex/cortex.py +8 -5
  4. minitap/mobile_use/agents/executor/executor.md +15 -10
  5. minitap/mobile_use/agents/executor/executor.py +6 -5
  6. minitap/mobile_use/agents/executor/utils.py +2 -1
  7. minitap/mobile_use/agents/hopper/hopper.py +6 -3
  8. minitap/mobile_use/agents/orchestrator/orchestrator.py +26 -11
  9. minitap/mobile_use/agents/outputter/outputter.py +6 -3
  10. minitap/mobile_use/agents/outputter/test_outputter.py +104 -42
  11. minitap/mobile_use/agents/planner/planner.md +20 -22
  12. minitap/mobile_use/agents/planner/planner.py +10 -7
  13. minitap/mobile_use/agents/planner/types.py +4 -2
  14. minitap/mobile_use/agents/planner/utils.py +14 -0
  15. minitap/mobile_use/agents/summarizer/summarizer.py +2 -2
  16. minitap/mobile_use/config.py +6 -1
  17. minitap/mobile_use/context.py +13 -3
  18. minitap/mobile_use/controllers/mobile_command_controller.py +1 -14
  19. minitap/mobile_use/graph/state.py +7 -3
  20. minitap/mobile_use/sdk/agent.py +204 -29
  21. minitap/mobile_use/sdk/examples/README.md +19 -1
  22. minitap/mobile_use/sdk/examples/platform_minimal_example.py +46 -0
  23. minitap/mobile_use/sdk/services/platform.py +244 -0
  24. minitap/mobile_use/sdk/types/__init__.py +14 -14
  25. minitap/mobile_use/sdk/types/exceptions.py +57 -0
  26. minitap/mobile_use/sdk/types/platform.py +125 -0
  27. minitap/mobile_use/sdk/types/task.py +60 -17
  28. minitap/mobile_use/servers/device_hardware_bridge.py +3 -2
  29. minitap/mobile_use/servers/stop_servers.py +11 -12
  30. minitap/mobile_use/servers/utils.py +6 -9
  31. minitap/mobile_use/services/llm.py +89 -5
  32. minitap/mobile_use/tools/index.py +2 -8
  33. minitap/mobile_use/tools/mobile/back.py +3 -3
  34. minitap/mobile_use/tools/mobile/clear_text.py +67 -38
  35. minitap/mobile_use/tools/mobile/erase_one_char.py +5 -4
  36. minitap/mobile_use/tools/mobile/{take_screenshot.py → glimpse_screen.py} +23 -15
  37. minitap/mobile_use/tools/mobile/input_text.py +67 -16
  38. minitap/mobile_use/tools/mobile/launch_app.py +54 -22
  39. minitap/mobile_use/tools/mobile/long_press_on.py +15 -8
  40. minitap/mobile_use/tools/mobile/open_link.py +15 -8
  41. minitap/mobile_use/tools/mobile/press_key.py +15 -8
  42. minitap/mobile_use/tools/mobile/stop_app.py +14 -8
  43. minitap/mobile_use/tools/mobile/swipe.py +11 -5
  44. minitap/mobile_use/tools/mobile/tap.py +103 -21
  45. minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +3 -3
  46. minitap/mobile_use/tools/test_utils.py +377 -0
  47. minitap/mobile_use/tools/types.py +35 -0
  48. minitap/mobile_use/tools/utils.py +149 -39
  49. minitap/mobile_use/utils/recorder.py +1 -1
  50. minitap/mobile_use/utils/test_ui_hierarchy.py +178 -0
  51. minitap/mobile_use/utils/ui_hierarchy.py +11 -4
  52. {minitap_mobile_use-2.2.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/METADATA +6 -4
  53. minitap_mobile_use-2.4.0.dist-info/RECORD +99 -0
  54. minitap/mobile_use/tools/mobile/copy_text_from.py +0 -73
  55. minitap/mobile_use/tools/mobile/find_packages.py +0 -69
  56. minitap/mobile_use/tools/mobile/paste_text.py +0 -62
  57. minitap_mobile_use-2.2.0.dist-info/RECORD +0 -96
  58. {minitap_mobile_use-2.2.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/WHEEL +0 -0
  59. {minitap_mobile_use-2.2.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/entry_points.txt +0 -0
@@ -1,45 +1,132 @@
1
+ from typing import Annotated
2
+
1
3
  from langchain_core.messages import ToolMessage
2
4
  from langchain_core.tools import tool
3
5
  from langchain_core.tools.base import InjectedToolCallId
4
6
  from langgraph.prebuilt import InjectedState
5
7
  from langgraph.types import Command
8
+
6
9
  from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
7
10
  from minitap.mobile_use.context import MobileUseContext
8
- from minitap.mobile_use.controllers.mobile_command_controller import SelectorRequest
11
+ from minitap.mobile_use.controllers.mobile_command_controller import (
12
+ CoordinatesSelectorRequest,
13
+ IdSelectorRequest,
14
+ SelectorRequestWithCoordinates,
15
+ TextSelectorRequest,
16
+ )
9
17
  from minitap.mobile_use.controllers.mobile_command_controller import tap as tap_controller
10
18
  from minitap.mobile_use.graph.state import State
11
19
  from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
12
- from typing import Annotated
20
+ from minitap.mobile_use.tools.types import Target
21
+ from minitap.mobile_use.utils.logger import get_logger
22
+
23
+ logger = get_logger(__name__)
13
24
 
14
25
 
15
26
  def get_tap_tool(ctx: MobileUseContext):
16
27
  @tool
17
- def tap(
28
+ async def tap(
18
29
  tool_call_id: Annotated[str, InjectedToolCallId],
19
30
  state: Annotated[State, InjectedState],
20
31
  agent_thought: str,
21
- selector_request: SelectorRequest,
22
- index: int | None = None,
32
+ target: Target,
23
33
  ):
24
34
  """
25
- Taps on a selector.
26
- Index is optional and is used when you have multiple views matching the same selector.
35
+ Taps on a UI element identified by the 'target' object.
36
+
37
+ The 'target' object allows specifying an element by its resource_id
38
+ (with an optional index), its coordinates, or its text content (with an optional index).
39
+ The tool uses a fallback strategy, trying the locators in that order.
27
40
  """
28
- output = tap_controller(ctx=ctx, selector_request=selector_request, index=index)
41
+ output = {
42
+ "error": "No valid selector provided or all selectors failed."
43
+ } # Default to failure
44
+ final_selector_info = "N/A"
45
+
46
+ # 1. Try with resource_id
47
+ if target.resource_id:
48
+ try:
49
+ selector = IdSelectorRequest(id=target.resource_id)
50
+ logger.info(
51
+ f"Attempting to tap using resource_id: '{target.resource_id}' "
52
+ f"at index {target.resource_id_index}"
53
+ )
54
+ result = tap_controller(
55
+ ctx=ctx, selector_request=selector, index=target.resource_id_index
56
+ )
57
+ if result is None: # Success
58
+ output = None
59
+ final_selector_info = (
60
+ f"resource_id='{target.resource_id}' (index={target.resource_id_index})"
61
+ )
62
+ else:
63
+ logger.warning(
64
+ f"Tap with resource_id '{target.resource_id}' failed. Error: {result}"
65
+ )
66
+ output = result
67
+ except Exception as e:
68
+ logger.warning(f"Exception during tap with resource_id '{target.resource_id}': {e}")
69
+ output = {"error": str(e)}
70
+
71
+ # 2. If resource_id failed or wasn't provided, try with coordinates
72
+ if output is not None and target.coordinates:
73
+ try:
74
+ center_point = target.coordinates.get_center()
75
+ selector = SelectorRequestWithCoordinates(
76
+ coordinates=CoordinatesSelectorRequest(x=center_point.x, y=center_point.y)
77
+ )
78
+ logger.info(
79
+ f"Attempting to tap using coordinates: {center_point.x},{center_point.y}"
80
+ )
81
+ result = tap_controller(ctx=ctx, selector_request=selector)
82
+ if result is None: # Success
83
+ output = None
84
+ final_selector_info = f"coordinates='{target.coordinates}'"
85
+ else:
86
+ logger.warning(
87
+ f"Tap with coordinates '{target.coordinates}' failed. Error: {result}"
88
+ )
89
+ output = result
90
+ except Exception as e:
91
+ logger.warning(f"Exception during tap with coordinates '{target.coordinates}': {e}")
92
+ output = {"error": str(e)}
93
+
94
+ # 3. If coordinates failed or weren't provided, try with text
95
+ if output is not None and target.text:
96
+ try:
97
+ selector = TextSelectorRequest(text=target.text)
98
+ logger.info(
99
+ f"Attempting to tap using text: '{target.text}' at index {target.text_index}"
100
+ )
101
+ result = tap_controller(ctx=ctx, selector_request=selector, index=target.text_index)
102
+ if result is None: # Success
103
+ output = None
104
+ final_selector_info = f"text='{target.text}' (index={target.text_index})"
105
+ else:
106
+ logger.warning(f"Tap with text '{target.text}' failed. Error: {result}")
107
+ output = result
108
+ except Exception as e:
109
+ logger.warning(f"Exception during tap with text '{target.text}': {e}")
110
+ output = {"error": str(e)}
111
+
29
112
  has_failed = output is not None
113
+ agent_outcome = (
114
+ tap_wrapper.on_failure_fn(final_selector_info)
115
+ if has_failed
116
+ else tap_wrapper.on_success_fn(final_selector_info)
117
+ )
118
+
30
119
  tool_message = ToolMessage(
31
120
  tool_call_id=tool_call_id,
32
- content=tap_wrapper.on_failure_fn(selector_request, index)
33
- if has_failed
34
- else tap_wrapper.on_success_fn(selector_request, index),
121
+ content=agent_outcome,
35
122
  additional_kwargs={"error": output} if has_failed else {},
36
123
  status="error" if has_failed else "success",
37
124
  )
38
125
  return Command(
39
- update=state.sanitize_update(
126
+ update=await state.asanitize_update(
40
127
  ctx=ctx,
41
128
  update={
42
- "agents_thoughts": [agent_thought],
129
+ "agents_thoughts": [agent_thought, agent_outcome],
43
130
  EXECUTOR_MESSAGES_KEY: [tool_message],
44
131
  },
45
132
  agent="executor",
@@ -51,12 +138,7 @@ def get_tap_tool(ctx: MobileUseContext):
51
138
 
52
139
  tap_wrapper = ToolWrapper(
53
140
  tool_fn_getter=get_tap_tool,
54
- on_success_fn=(
55
- lambda selector_request,
56
- index: f"Tap on {selector_request} {'at index {index}' if index else ''} is successful."
57
- ),
58
- on_failure_fn=(
59
- lambda selector_request,
60
- index: f"Failed to tap on {selector_request} {'at index {index}' if index else ''}."
61
- ),
141
+ on_success_fn=lambda selector_info: f"Tap on element with {selector_info} was successful.",
142
+ on_failure_fn=lambda selector_info: "Failed to tap on element. "
143
+ + f"Last attempt was with {selector_info}.",
62
144
  )
@@ -16,12 +16,12 @@ from typing import Annotated
16
16
 
17
17
  def get_wait_for_animation_to_end_tool(ctx: MobileUseContext):
18
18
  @tool
19
- def wait_for_animation_to_end(
19
+ async def wait_for_animation_to_end(
20
20
  tool_call_id: Annotated[str, InjectedToolCallId],
21
21
  state: Annotated[State, InjectedState],
22
22
  agent_thought: str,
23
23
  timeout: WaitTimeout | None,
24
- ):
24
+ ) -> Command:
25
25
  """
26
26
  Waits for ongoing animations or videos to finish before continuing.
27
27
 
@@ -44,7 +44,7 @@ def get_wait_for_animation_to_end_tool(ctx: MobileUseContext):
44
44
  status="error" if has_failed else "success",
45
45
  )
46
46
  return Command(
47
- update=state.sanitize_update(
47
+ update=await state.asanitize_update(
48
48
  ctx=ctx,
49
49
  update={
50
50
  "agents_thoughts": [agent_thought],
@@ -0,0 +1,377 @@
1
+ import sys
2
+ from unittest.mock import Mock, patch
3
+
4
+ import pytest
5
+
6
+ # Mock the problematic langgraph import at module level
7
+ sys.modules["langgraph.prebuilt.chat_agent_executor"] = Mock()
8
+ sys.modules["minitap.mobile_use.graph.state"] = Mock()
9
+
10
+ from minitap.mobile_use.context import MobileUseContext # noqa: E402
11
+ from minitap.mobile_use.controllers.mobile_command_controller import ( # noqa: E402
12
+ IdSelectorRequest,
13
+ SelectorRequestWithCoordinates,
14
+ )
15
+ from minitap.mobile_use.tools.types import Target # noqa: E402
16
+ from minitap.mobile_use.tools.utils import ( # noqa: E402
17
+ focus_element_if_needed,
18
+ move_cursor_to_end_if_bounds,
19
+ )
20
+ from minitap.mobile_use.utils.ui_hierarchy import ElementBounds # noqa: E402
21
+
22
+
23
+ @pytest.fixture
24
+ def mock_context():
25
+ """Create a mock MobileUseContext for testing."""
26
+ ctx = Mock(spec=MobileUseContext)
27
+ ctx.hw_bridge_client = Mock()
28
+ return ctx
29
+
30
+
31
+ @pytest.fixture
32
+ def mock_state():
33
+ """Create a mock State for testing."""
34
+ state = Mock()
35
+ state.latest_ui_hierarchy = []
36
+ return state
37
+
38
+
39
+ @pytest.fixture
40
+ def sample_element():
41
+ """Create a sample UI element for testing."""
42
+ return {
43
+ "resourceId": "com.example:id/text_input",
44
+ "text": "Sample text",
45
+ "bounds": {"x": 100, "y": 200, "width": 300, "height": 50},
46
+ "focused": "false",
47
+ }
48
+
49
+
50
+ @pytest.fixture
51
+ def sample_rich_element():
52
+ """Create a sample rich UI element for testing."""
53
+ return {
54
+ "attributes": {
55
+ "resource-id": "com.example:id/text_input",
56
+ "focused": "false",
57
+ "text": "Sample text",
58
+ "bounds": {"x": 100, "y": 200, "width": 300, "height": 50},
59
+ },
60
+ "children": [],
61
+ }
62
+
63
+
64
+ class TestMoveCursorToEndIfBounds:
65
+ """Test cases for move_cursor_to_end_if_bounds function."""
66
+
67
+ @patch("minitap.mobile_use.tools.utils.tap")
68
+ @patch("minitap.mobile_use.tools.utils.find_element_by_resource_id")
69
+ def test_move_cursor_with_resource_id(
70
+ self, mock_find_element, mock_tap, mock_context, mock_state, sample_element
71
+ ):
72
+ """Test moving cursor using resource_id (highest priority)."""
73
+ mock_state.latest_ui_hierarchy = [sample_element]
74
+ mock_find_element.return_value = sample_element
75
+
76
+ target = Target(
77
+ resource_id="com.example:id/text_input",
78
+ resource_id_index=None,
79
+ text=None,
80
+ text_index=None,
81
+ coordinates=None,
82
+ )
83
+ result = move_cursor_to_end_if_bounds(ctx=mock_context, state=mock_state, target=target)
84
+
85
+ mock_find_element.assert_called_once_with(
86
+ ui_hierarchy=[sample_element],
87
+ resource_id="com.example:id/text_input",
88
+ index=0,
89
+ )
90
+ mock_tap.assert_called_once()
91
+ call_args = mock_tap.call_args[1]
92
+ selector_request = call_args["selector_request"]
93
+ assert isinstance(selector_request, SelectorRequestWithCoordinates)
94
+ coords = selector_request.coordinates
95
+ assert coords.x == 397 # 100 + 300 * 0.99
96
+ assert coords.y == 249 # 200 + 50 * 0.99
97
+ assert result == sample_element
98
+
99
+ @patch("minitap.mobile_use.tools.utils.tap")
100
+ @patch("minitap.mobile_use.tools.utils.find_element_by_resource_id")
101
+ def test_move_cursor_with_coordinates_only(
102
+ self, mock_find_element, mock_tap, mock_context, mock_state
103
+ ):
104
+ """Test moving cursor when only coordinates are provided."""
105
+ bounds = ElementBounds(x=50, y=150, width=200, height=40)
106
+ target = Target(
107
+ resource_id=None,
108
+ resource_id_index=None,
109
+ text=None,
110
+ text_index=None,
111
+ coordinates=bounds,
112
+ )
113
+
114
+ result = move_cursor_to_end_if_bounds(ctx=mock_context, state=mock_state, target=target)
115
+
116
+ mock_find_element.assert_not_called()
117
+ mock_tap.assert_called_once()
118
+ call_args = mock_tap.call_args[1]
119
+ selector_request = call_args["selector_request"]
120
+ coords = selector_request.coordinates
121
+ assert coords.x == 248 # 50 + 200 * 0.99
122
+ assert coords.y == 189 # 150 + 40 * 0.99
123
+ assert result is None # No element is returned when using coords directly
124
+
125
+ @patch("minitap.mobile_use.tools.utils.tap")
126
+ @patch("minitap.mobile_use.tools.utils.find_element_by_text")
127
+ def test_move_cursor_with_text_only_success(
128
+ self, mock_find_text, mock_tap, mock_context, mock_state, sample_element
129
+ ):
130
+ """Test moving cursor when only text is provided and succeeds."""
131
+ mock_state.latest_ui_hierarchy = [sample_element]
132
+ mock_find_text.return_value = sample_element
133
+
134
+ target = Target(
135
+ resource_id=None,
136
+ resource_id_index=None,
137
+ text="Sample text",
138
+ text_index=0,
139
+ coordinates=None,
140
+ )
141
+ result = move_cursor_to_end_if_bounds(ctx=mock_context, state=mock_state, target=target)
142
+
143
+ mock_find_text.assert_called_once_with([sample_element], "Sample text", index=0)
144
+ mock_tap.assert_called_once()
145
+ assert result == sample_element
146
+
147
+ @patch("minitap.mobile_use.tools.utils.tap")
148
+ @patch("minitap.mobile_use.tools.utils.find_element_by_text")
149
+ def test_move_cursor_with_text_only_element_not_found(
150
+ self, mock_find_text, mock_tap, mock_context, mock_state
151
+ ):
152
+ """Test when searching by text finds no element."""
153
+ mock_state.latest_ui_hierarchy = []
154
+ mock_find_text.return_value = None
155
+
156
+ target = Target(
157
+ resource_id=None,
158
+ resource_id_index=None,
159
+ text="Nonexistent text",
160
+ text_index=None,
161
+ coordinates=None,
162
+ )
163
+ result = move_cursor_to_end_if_bounds(ctx=mock_context, state=mock_state, target=target)
164
+
165
+ mock_tap.assert_not_called()
166
+ assert result is None
167
+
168
+ @patch("minitap.mobile_use.tools.utils.tap")
169
+ @patch("minitap.mobile_use.tools.utils.find_element_by_text")
170
+ def test_move_cursor_with_text_only_no_bounds(
171
+ self, mock_find_text, mock_tap, mock_context, mock_state
172
+ ):
173
+ """Test when element is found by text but has no bounds."""
174
+ element_no_bounds = {"text": "Text without bounds"}
175
+ mock_state.latest_ui_hierarchy = [element_no_bounds]
176
+ mock_find_text.return_value = element_no_bounds
177
+
178
+ target = Target(
179
+ resource_id=None,
180
+ resource_id_index=None,
181
+ text="Text without bounds",
182
+ text_index=None,
183
+ coordinates=None,
184
+ )
185
+ result = move_cursor_to_end_if_bounds(ctx=mock_context, state=mock_state, target=target)
186
+
187
+ mock_tap.assert_not_called()
188
+ assert result is None # Should return None as no action was taken
189
+
190
+ @patch("minitap.mobile_use.tools.utils.find_element_by_resource_id")
191
+ def test_move_cursor_element_not_found_by_id(self, mock_find_element, mock_context, mock_state):
192
+ """Test when element is not found by resource_id."""
193
+ mock_find_element.return_value = None
194
+
195
+ target = Target(
196
+ resource_id="com.example:id/nonexistent",
197
+ resource_id_index=None,
198
+ text=None,
199
+ text_index=None,
200
+ coordinates=None,
201
+ )
202
+ result = move_cursor_to_end_if_bounds(ctx=mock_context, state=mock_state, target=target)
203
+
204
+ assert result is None
205
+
206
+
207
+ class TestFocusElementIfNeeded:
208
+ """Test cases for focus_element_if_needed function."""
209
+
210
+ @patch("minitap.mobile_use.tools.utils.tap")
211
+ @patch("minitap.mobile_use.tools.utils.find_element_by_resource_id")
212
+ def test_focus_element_already_focused(
213
+ self, mock_find_element, mock_tap, mock_context, sample_rich_element
214
+ ):
215
+ """Test when element is already focused."""
216
+ focused_element = sample_rich_element.copy()
217
+ focused_element["attributes"]["focused"] = "true"
218
+
219
+ mock_context.hw_bridge_client.get_rich_hierarchy.return_value = [focused_element]
220
+ mock_find_element.return_value = focused_element["attributes"]
221
+
222
+ target = Target(
223
+ resource_id="com.example:id/text_input",
224
+ resource_id_index=None,
225
+ text=None,
226
+ text_index=None,
227
+ coordinates=None,
228
+ )
229
+ result = focus_element_if_needed(ctx=mock_context, target=target)
230
+
231
+ mock_tap.assert_not_called()
232
+ assert result is True
233
+ mock_context.hw_bridge_client.get_rich_hierarchy.assert_called_once()
234
+
235
+ @patch("minitap.mobile_use.tools.utils.tap")
236
+ @patch("minitap.mobile_use.tools.utils.find_element_by_resource_id")
237
+ def test_focus_element_needs_focus_success(
238
+ self, mock_find_element, mock_tap, mock_context, sample_rich_element
239
+ ):
240
+ """Test when element needs focus and focusing succeeds."""
241
+ unfocused_element = sample_rich_element
242
+ focused_element = {
243
+ "attributes": {
244
+ "resource-id": "com.example:id/text_input",
245
+ "focused": "true",
246
+ },
247
+ "children": [],
248
+ }
249
+
250
+ mock_context.hw_bridge_client.get_rich_hierarchy.side_effect = [
251
+ [unfocused_element],
252
+ [focused_element],
253
+ ]
254
+ mock_find_element.side_effect = [
255
+ unfocused_element["attributes"],
256
+ focused_element["attributes"],
257
+ ]
258
+
259
+ target = Target(
260
+ resource_id="com.example:id/text_input",
261
+ resource_id_index=None,
262
+ text=None,
263
+ text_index=None,
264
+ coordinates=None,
265
+ )
266
+ result = focus_element_if_needed(ctx=mock_context, target=target)
267
+
268
+ mock_tap.assert_called_once_with(
269
+ ctx=mock_context,
270
+ selector_request=IdSelectorRequest(id="com.example:id/text_input"),
271
+ index=0,
272
+ )
273
+ assert mock_context.hw_bridge_client.get_rich_hierarchy.call_count == 2
274
+ assert result is True
275
+
276
+ @patch("minitap.mobile_use.tools.utils.tap")
277
+ @patch("minitap.mobile_use.tools.utils.logger")
278
+ @patch("minitap.mobile_use.tools.utils.find_element_by_resource_id")
279
+ def test_focus_id_and_text_mismatch_fallback_to_text(
280
+ self, mock_find_id, mock_logger, mock_tap, mock_context, sample_rich_element
281
+ ):
282
+ """Test fallback when resource_id and text point to different elements."""
283
+ element_from_id = sample_rich_element["attributes"].copy()
284
+ element_from_id["text"] = "Different text"
285
+
286
+ element_from_text = sample_rich_element.copy()
287
+ element_from_text["attributes"]["bounds"] = {
288
+ "x": 10,
289
+ "y": 20,
290
+ "width": 100,
291
+ "height": 30,
292
+ }
293
+
294
+ mock_context.hw_bridge_client.get_rich_hierarchy.return_value = [element_from_text]
295
+ mock_find_id.return_value = element_from_id
296
+
297
+ with patch("minitap.mobile_use.tools.utils.find_element_by_text") as mock_find_text:
298
+ mock_find_text.return_value = element_from_text["attributes"]
299
+
300
+ target = Target(
301
+ resource_id="com.example:id/text_input",
302
+ resource_id_index=None,
303
+ text="Sample text",
304
+ text_index=None,
305
+ coordinates=None,
306
+ )
307
+ result = focus_element_if_needed(ctx=mock_context, target=target)
308
+
309
+ mock_logger.warning.assert_called_once()
310
+ mock_tap.assert_called_once()
311
+ assert result is True
312
+
313
+ @patch("minitap.mobile_use.tools.utils.tap")
314
+ @patch("minitap.mobile_use.tools.utils.find_element_by_text")
315
+ def test_focus_fallback_to_text(
316
+ self, mock_find_text, mock_tap, mock_context, sample_rich_element
317
+ ):
318
+ """Test fallback to focusing using text."""
319
+ element_with_bounds = sample_rich_element.copy()
320
+ element_with_bounds["attributes"]["bounds"] = {
321
+ "x": 10,
322
+ "y": 20,
323
+ "width": 100,
324
+ "height": 30,
325
+ }
326
+
327
+ mock_context.hw_bridge_client.get_rich_hierarchy.return_value = [element_with_bounds]
328
+ mock_find_text.return_value = element_with_bounds["attributes"]
329
+
330
+ target = Target(
331
+ resource_id=None,
332
+ resource_id_index=None,
333
+ text="Sample text",
334
+ text_index=None,
335
+ coordinates=None,
336
+ )
337
+ result = focus_element_if_needed(ctx=mock_context, target=target)
338
+
339
+ mock_find_text.assert_called_once()
340
+ mock_tap.assert_called_once()
341
+ call_args = mock_tap.call_args[1]
342
+ selector = call_args["selector_request"]
343
+ assert isinstance(selector, SelectorRequestWithCoordinates)
344
+ assert selector.coordinates.x == 60 # 10 + 100/2
345
+ assert selector.coordinates.y == 35 # 20 + 30/2
346
+ assert result is True
347
+
348
+ @patch("minitap.mobile_use.tools.utils.logger")
349
+ def test_focus_all_locators_fail(self, mock_logger, mock_context):
350
+ """Test failure when no locator can find an element."""
351
+ mock_context.hw_bridge_client.get_rich_hierarchy.return_value = []
352
+
353
+ with (
354
+ patch("minitap.mobile_use.tools.utils.find_element_by_resource_id") as mock_find_id,
355
+ patch("minitap.mobile_use.tools.utils.find_element_by_text") as mock_find_text,
356
+ ):
357
+ mock_find_id.return_value = None
358
+ mock_find_text.return_value = None
359
+
360
+ target = Target(
361
+ resource_id="nonexistent",
362
+ resource_id_index=None,
363
+ text="nonexistent",
364
+ text_index=None,
365
+ coordinates=None,
366
+ )
367
+ result = focus_element_if_needed(ctx=mock_context, target=target)
368
+
369
+ mock_logger.error.assert_called_once_with(
370
+ "Failed to focus element."
371
+ + " No valid locator (resource_id, coordinates, or text) succeeded."
372
+ )
373
+ assert result is False
374
+
375
+
376
+ if __name__ == "__main__":
377
+ pytest.main([__file__])
@@ -0,0 +1,35 @@
1
+ from pydantic import BaseModel, Field, model_validator
2
+
3
+ from minitap.mobile_use.utils.ui_hierarchy import ElementBounds
4
+
5
+
6
+ class Target(BaseModel):
7
+ """
8
+ A comprehensive locator for a UI element, supporting a fallback mechanism.
9
+ """
10
+
11
+ resource_id: str | None = Field(None, description="The resource-id of the element.")
12
+ resource_id_index: int | None = Field(
13
+ None,
14
+ description="The zero-based index if multiple elements share the same resource-id.",
15
+ )
16
+ text: str | None = Field(
17
+ None, description="The text content of the element (e.g., a label or placeholder)."
18
+ )
19
+ text_index: int | None = Field(
20
+ None, description="The zero-based index if multiple elements share the same text."
21
+ )
22
+ coordinates: ElementBounds | None = Field(
23
+ None, description="The x, y, width, and height of the element."
24
+ )
25
+
26
+ @model_validator(mode="after")
27
+ def _default_indices(self):
28
+ # Treat empty strings like “not provided”
29
+ if (
30
+ self.resource_id is not None and self.resource_id != ""
31
+ ) and self.resource_id_index is None:
32
+ self.resource_id_index = 0
33
+ if (self.text is not None and self.text != "") and self.text_index is None:
34
+ self.text_index = 0
35
+ return self