PyPI - minitap-mobile-use - Versions diffs - 2.3.0__py3-none-any.whl → 2.4.0__py3-none-any.whl - Mend

minitap-mobile-use 2.3.0py3-none-any.whl → 2.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of minitap-mobile-use might be problematic. Click here for more details.

Files changed (54) hide show

minitap/mobile_use/agents/contextor/contextor.py +2 -2
minitap/mobile_use/agents/cortex/cortex.md +49 -8
minitap/mobile_use/agents/cortex/cortex.py +8 -4
minitap/mobile_use/agents/executor/executor.md +14 -11
minitap/mobile_use/agents/executor/executor.py +6 -5
minitap/mobile_use/agents/hopper/hopper.py +6 -3
minitap/mobile_use/agents/orchestrator/orchestrator.py +26 -11
minitap/mobile_use/agents/outputter/outputter.py +6 -3
minitap/mobile_use/agents/planner/planner.md +20 -22
minitap/mobile_use/agents/planner/planner.py +10 -7
minitap/mobile_use/agents/planner/types.py +4 -2
minitap/mobile_use/agents/planner/utils.py +14 -0
minitap/mobile_use/agents/summarizer/summarizer.py +2 -2
minitap/mobile_use/config.py +6 -1
minitap/mobile_use/context.py +13 -3
minitap/mobile_use/controllers/mobile_command_controller.py +1 -14
minitap/mobile_use/graph/state.py +7 -3
minitap/mobile_use/sdk/agent.py +188 -23
minitap/mobile_use/sdk/examples/README.md +19 -1
minitap/mobile_use/sdk/examples/platform_minimal_example.py +46 -0
minitap/mobile_use/sdk/services/platform.py +244 -0
minitap/mobile_use/sdk/types/__init__.py +14 -14
minitap/mobile_use/sdk/types/exceptions.py +27 -0
minitap/mobile_use/sdk/types/platform.py +125 -0
minitap/mobile_use/sdk/types/task.py +60 -17
minitap/mobile_use/servers/device_hardware_bridge.py +1 -1
minitap/mobile_use/servers/stop_servers.py +11 -12
minitap/mobile_use/services/llm.py +89 -5
minitap/mobile_use/tools/index.py +0 -6
minitap/mobile_use/tools/mobile/back.py +3 -3
minitap/mobile_use/tools/mobile/clear_text.py +24 -43
minitap/mobile_use/tools/mobile/erase_one_char.py +5 -4
minitap/mobile_use/tools/mobile/glimpse_screen.py +11 -7
minitap/mobile_use/tools/mobile/input_text.py +21 -51
minitap/mobile_use/tools/mobile/launch_app.py +54 -22
minitap/mobile_use/tools/mobile/long_press_on.py +15 -8
minitap/mobile_use/tools/mobile/open_link.py +15 -8
minitap/mobile_use/tools/mobile/press_key.py +15 -8
minitap/mobile_use/tools/mobile/stop_app.py +14 -8
minitap/mobile_use/tools/mobile/swipe.py +11 -5
minitap/mobile_use/tools/mobile/tap.py +103 -21
minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +3 -3
minitap/mobile_use/tools/test_utils.py +104 -78
minitap/mobile_use/tools/types.py +35 -0
minitap/mobile_use/tools/utils.py +51 -48
minitap/mobile_use/utils/recorder.py +1 -1
minitap/mobile_use/utils/ui_hierarchy.py +9 -2
{minitap_mobile_use-2.3.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/METADATA +3 -1
{minitap_mobile_use-2.3.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/RECORD +51 -50
minitap/mobile_use/tools/mobile/copy_text_from.py +0 -75
minitap/mobile_use/tools/mobile/find_packages.py +0 -69
minitap/mobile_use/tools/mobile/paste_text.py +0 -88
{minitap_mobile_use-2.3.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/WHEEL +0 -0
{minitap_mobile_use-2.3.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/entry_points.txt +0 -0

minitap/mobile_use/tools/test_utils.py CHANGED Viewed

@@ -12,6 +12,7 @@ from minitap.mobile_use.controllers.mobile_command_controller import (  # noqa:
     IdSelectorRequest,
     SelectorRequestWithCoordinates,
 )
+from minitap.mobile_use.tools.types import Target  # noqa: E402
 from minitap.mobile_use.tools.utils import (  # noqa: E402
     focus_element_if_needed,
     move_cursor_to_end_if_bounds,
@@ -54,6 +55,7 @@ def sample_rich_element():
             "resource-id": "com.example:id/text_input",
             "focused": "false",
             "text": "Sample text",
+            "bounds": {"x": 100, "y": 200, "width": 300, "height": 50},
         },
         "children": [],
     }
@@ -71,16 +73,19 @@ class TestMoveCursorToEndIfBounds:
         mock_state.latest_ui_hierarchy = [sample_element]
         mock_find_element.return_value = sample_element
-        result = move_cursor_to_end_if_bounds(
-            ctx=mock_context,
-            state=mock_state,
-            text_input_resource_id="com.example:id/text_input",
-            text_input_coordinates=None,
-            text_input_text=None,
+        target = Target(
+            resource_id="com.example:id/text_input",
+            resource_id_index=None,
+            text=None,
+            text_index=None,
+            coordinates=None,
         )
+        result = move_cursor_to_end_if_bounds(ctx=mock_context, state=mock_state, target=target)
         mock_find_element.assert_called_once_with(
-            ui_hierarchy=[sample_element], resource_id="com.example:id/text_input"
+            ui_hierarchy=[sample_element],
+            resource_id="com.example:id/text_input",
+            index=0,
         )
         mock_tap.assert_called_once()
         call_args = mock_tap.call_args[1]
@@ -98,15 +103,16 @@ class TestMoveCursorToEndIfBounds:
     ):
         """Test moving cursor when only coordinates are provided."""
         bounds = ElementBounds(x=50, y=150, width=200, height=40)
-        result = move_cursor_to_end_if_bounds(
-            ctx=mock_context,
-            state=mock_state,
-            text_input_resource_id=None,
-            text_input_coordinates=bounds,
-            text_input_text=None,
+        target = Target(
+            resource_id=None,
+            resource_id_index=None,
+            text=None,
+            text_index=None,
+            coordinates=bounds,
         )
+        result = move_cursor_to_end_if_bounds(ctx=mock_context, state=mock_state, target=target)
         mock_find_element.assert_not_called()
         mock_tap.assert_called_once()
         call_args = mock_tap.call_args[1]
@@ -125,15 +131,16 @@ class TestMoveCursorToEndIfBounds:
         mock_state.latest_ui_hierarchy = [sample_element]
         mock_find_text.return_value = sample_element
-        result = move_cursor_to_end_if_bounds(
-            ctx=mock_context,
-            state=mock_state,
-            text_input_resource_id=None,
-            text_input_coordinates=None,
-            text_input_text="Sample text",
+        target = Target(
+            resource_id=None,
+            resource_id_index=None,
+            text="Sample text",
+            text_index=0,
+            coordinates=None,
         )
+        result = move_cursor_to_end_if_bounds(ctx=mock_context, state=mock_state, target=target)
-        mock_find_text.assert_called_once_with([sample_element], "Sample text")
+        mock_find_text.assert_called_once_with([sample_element], "Sample text", index=0)
         mock_tap.assert_called_once()
         assert result == sample_element
@@ -146,13 +153,14 @@ class TestMoveCursorToEndIfBounds:
         mock_state.latest_ui_hierarchy = []
         mock_find_text.return_value = None
-        result = move_cursor_to_end_if_bounds(
-            ctx=mock_context,
-            state=mock_state,
-            text_input_resource_id=None,
-            text_input_coordinates=None,
-            text_input_text="Nonexistent text",
+        target = Target(
+            resource_id=None,
+            resource_id_index=None,
+            text="Nonexistent text",
+            text_index=None,
+            coordinates=None,
         )
+        result = move_cursor_to_end_if_bounds(ctx=mock_context, state=mock_state, target=target)
         mock_tap.assert_not_called()
         assert result is None
@@ -167,13 +175,14 @@ class TestMoveCursorToEndIfBounds:
         mock_state.latest_ui_hierarchy = [element_no_bounds]
         mock_find_text.return_value = element_no_bounds
-        result = move_cursor_to_end_if_bounds(
-            ctx=mock_context,
-            state=mock_state,
-            text_input_resource_id=None,
-            text_input_coordinates=None,
-            text_input_text="Text without bounds",
+        target = Target(
+            resource_id=None,
+            resource_id_index=None,
+            text="Text without bounds",
+            text_index=None,
+            coordinates=None,
         )
+        result = move_cursor_to_end_if_bounds(ctx=mock_context, state=mock_state, target=target)
         mock_tap.assert_not_called()
         assert result is None  # Should return None as no action was taken
@@ -183,13 +192,14 @@ class TestMoveCursorToEndIfBounds:
         """Test when element is not found by resource_id."""
         mock_find_element.return_value = None
-        result = move_cursor_to_end_if_bounds(
-            ctx=mock_context,
-            state=mock_state,
-            text_input_resource_id="com.example:id/nonexistent",
-            text_input_coordinates=None,
-            text_input_text=None,
+        target = Target(
+            resource_id="com.example:id/nonexistent",
+            resource_id_index=None,
+            text=None,
+            text_index=None,
+            coordinates=None,
         )
+        result = move_cursor_to_end_if_bounds(ctx=mock_context, state=mock_state, target=target)
         assert result is None
@@ -209,12 +219,14 @@ class TestFocusElementIfNeeded:
         mock_context.hw_bridge_client.get_rich_hierarchy.return_value = [focused_element]
         mock_find_element.return_value = focused_element["attributes"]
-        result = focus_element_if_needed(
-            ctx=mock_context,
-            input_resource_id="com.example:id/text_input",
-            input_coordinates=None,
-            input_text=None,
+        target = Target(
+            resource_id="com.example:id/text_input",
+            resource_id_index=None,
+            text=None,
+            text_index=None,
+            coordinates=None,
         )
+        result = focus_element_if_needed(ctx=mock_context, target=target)
         mock_tap.assert_not_called()
         assert result is True
@@ -244,16 +256,19 @@ class TestFocusElementIfNeeded:
             focused_element["attributes"],
         ]
-        result = focus_element_if_needed(
-            ctx=mock_context,
-            input_resource_id="com.example:id/text_input",
-            input_coordinates=None,
-            input_text=None,
+        target = Target(
+            resource_id="com.example:id/text_input",
+            resource_id_index=None,
+            text=None,
+            text_index=None,
+            coordinates=None,
         )
+        result = focus_element_if_needed(ctx=mock_context, target=target)
         mock_tap.assert_called_once_with(
             ctx=mock_context,
             selector_request=IdSelectorRequest(id="com.example:id/text_input"),
+            index=0,
         )
         assert mock_context.hw_bridge_client.get_rich_hierarchy.call_count == 2
         assert result is True
@@ -268,25 +283,30 @@ class TestFocusElementIfNeeded:
         element_from_id = sample_rich_element["attributes"].copy()
         element_from_id["text"] = "Different text"
-        # L'élément qui sera trouvé par le texte doit avoir des "bounds"
         element_from_text = sample_rich_element.copy()
-        element_from_text["bounds"] = {"x": 10, "y": 20, "width": 100, "height": 30}
+        element_from_text["attributes"]["bounds"] = {
+            "x": 10,
+            "y": 20,
+            "width": 100,
+            "height": 30,
+        }
         mock_context.hw_bridge_client.get_rich_hierarchy.return_value = [element_from_text]
         mock_find_id.return_value = element_from_id
         with patch("minitap.mobile_use.tools.utils.find_element_by_text") as mock_find_text:
-            mock_find_text.return_value = element_from_text  # Trouvé par le texte
-            result = focus_element_if_needed(
-                ctx=mock_context,
-                input_resource_id="com.example:id/text_input",
-                input_coordinates=None,
-                input_text="Sample text",  # Le texte correct à rechercher
+            mock_find_text.return_value = element_from_text["attributes"]
+            target = Target(
+                resource_id="com.example:id/text_input",
+                resource_id_index=None,
+                text="Sample text",
+                text_index=None,
+                coordinates=None,
             )
+            result = focus_element_if_needed(ctx=mock_context, target=target)
             mock_logger.warning.assert_called_once()
-            # Maintenant, tap devrait être appelé car l'élément trouvé a des "bounds"
             mock_tap.assert_called_once()
             assert result is True
@@ -296,26 +316,31 @@ class TestFocusElementIfNeeded:
         self, mock_find_text, mock_tap, mock_context, sample_rich_element
     ):
         """Test fallback to focusing using text."""
-        # L'élément doit avoir des "bounds" au premier niveau pour
-        # que get_bounds_for_element fonctionne
         element_with_bounds = sample_rich_element.copy()
-        element_with_bounds["bounds"] = {"x": 10, "y": 20, "width": 100, "height": 30}
+        element_with_bounds["attributes"]["bounds"] = {
+            "x": 10,
+            "y": 20,
+            "width": 100,
+            "height": 30,
+        }
         mock_context.hw_bridge_client.get_rich_hierarchy.return_value = [element_with_bounds]
-        mock_find_text.return_value = element_with_bounds
-        result = focus_element_if_needed(
-            ctx=mock_context,
-            input_resource_id=None,
-            input_coordinates=None,
-            input_text="Sample text",
+        mock_find_text.return_value = element_with_bounds["attributes"]
+        target = Target(
+            resource_id=None,
+            resource_id_index=None,
+            text="Sample text",
+            text_index=None,
+            coordinates=None,
         )
+        result = focus_element_if_needed(ctx=mock_context, target=target)
         mock_find_text.assert_called_once()
         mock_tap.assert_called_once()
         call_args = mock_tap.call_args[1]
         selector = call_args["selector_request"]
-        # Vérifie que le tap se fait bien au centre des "bounds"
+        assert isinstance(selector, SelectorRequestWithCoordinates)
         assert selector.coordinates.x == 60  # 10 + 100/2
         assert selector.coordinates.y == 35  # 20 + 30/2
         assert result is True
@@ -325,7 +350,6 @@ class TestFocusElementIfNeeded:
         """Test failure when no locator can find an element."""
         mock_context.hw_bridge_client.get_rich_hierarchy.return_value = []
-        # Mock find_element functions to return None
         with (
             patch("minitap.mobile_use.tools.utils.find_element_by_resource_id") as mock_find_id,
             patch("minitap.mobile_use.tools.utils.find_element_by_text") as mock_find_text,
@@ -333,16 +357,18 @@ class TestFocusElementIfNeeded:
             mock_find_id.return_value = None
             mock_find_text.return_value = None
-            result = focus_element_if_needed(
-                ctx=mock_context,
-                input_resource_id="nonexistent",
-                input_coordinates=None,
-                input_text="nonexistent",
+            target = Target(
+                resource_id="nonexistent",
+                resource_id_index=None,
+                text="nonexistent",
+                text_index=None,
+                coordinates=None,
             )
+            result = focus_element_if_needed(ctx=mock_context, target=target)
         mock_logger.error.assert_called_once_with(
-            "Failed to focus element. No valid locator"
-            + "(resource_id, coordinates, or text) succeeded."
+            "Failed to focus element."
+            + " No valid locator (resource_id, coordinates, or text) succeeded."
         )
         assert result is False

minitap/mobile_use/tools/types.py ADDED Viewed

@@ -0,0 +1,35 @@
+from pydantic import BaseModel, Field, model_validator
+from minitap.mobile_use.utils.ui_hierarchy import ElementBounds
+class Target(BaseModel):
+    """
+    A comprehensive locator for a UI element, supporting a fallback mechanism.
+    """
+    resource_id: str | None = Field(None, description="The resource-id of the element.")
+    resource_id_index: int | None = Field(
+        None,
+        description="The zero-based index if multiple elements share the same resource-id.",
+    )
+    text: str | None = Field(
+        None, description="The text content of the element (e.g., a label or placeholder)."
+    )
+    text_index: int | None = Field(
+        None, description="The zero-based index if multiple elements share the same text."
+    )
+    coordinates: ElementBounds | None = Field(
+        None, description="The x, y, width, and height of the element."
+    )
+    @model_validator(mode="after")
+    def _default_indices(self):
+        # Treat empty strings like “not provided”
+        if (
+            self.resource_id is not None and self.resource_id != ""
+        ) and self.resource_id_index is None:
+            self.resource_id_index = 0
+        if (self.text is not None and self.text != "") and self.text_index is None:
+            self.text_index = 0
+        return self

minitap/mobile_use/tools/utils.py CHANGED Viewed

@@ -8,6 +8,7 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
     tap,
 )
 from minitap.mobile_use.graph.state import State
+from minitap.mobile_use.tools.types import Target
 from minitap.mobile_use.utils.logger import get_logger
 from minitap.mobile_use.utils.ui_hierarchy import (
     ElementBounds,
@@ -21,7 +22,9 @@ from minitap.mobile_use.utils.ui_hierarchy import (
 logger = get_logger(__name__)
-def find_element_by_text(ui_hierarchy: list[dict], text: str) -> dict | None:
+def find_element_by_text(
+    ui_hierarchy: list[dict], text: str, index: int | None = None
+) -> dict | None:
     """
     Find a UI element by its text content (adapted to both flat and rich hierarchy)
@@ -40,7 +43,11 @@ def find_element_by_text(ui_hierarchy: list[dict], text: str) -> dict | None:
             if isinstance(element, dict):
                 src = element.get("attributes", element)
                 if text and text.lower() == src.get("text", "").lower():
-                    return element
+                    idx = index or 0
+                    if idx == 0:
+                        return element
+                    idx -= 1
+                    continue
                 if (children := element.get("children", [])) and (
                     found := search_recursive(children)
                 ):
@@ -66,23 +73,22 @@ def tap_bottom_right_of_element(bounds: ElementBounds, ctx: MobileUseContext):
 def move_cursor_to_end_if_bounds(
     ctx: MobileUseContext,
     state: State,
-    text_input_resource_id: str | None,
-    text_input_coordinates: ElementBounds | None,
-    text_input_text: str | None,
+    target: Target,
     elt: dict | None = None,
 ) -> dict | None:
     """
     Best-effort move of the text cursor near the end of the input by tapping the
     bottom-right area of the focused element (if bounds are available).
     """
-    if text_input_resource_id:
+    if target.resource_id:
         if not elt:
             elt = find_element_by_resource_id(
                 ui_hierarchy=state.latest_ui_hierarchy or [],
-                resource_id=text_input_resource_id,
+                resource_id=target.resource_id,
+                index=target.resource_id_index,
             )
         if not elt:
-            return
+            return None
         bounds = get_bounds_for_element(elt)
         if not bounds:
@@ -90,86 +96,85 @@ def move_cursor_to_end_if_bounds(
         logger.debug("Tapping near the end of the input to move the cursor")
         tap_bottom_right_of_element(bounds=bounds, ctx=ctx)
-        logger.debug(f"Tapped end of input {text_input_resource_id}")
+        logger.debug(f"Tapped end of input {target.resource_id}")
         return elt
-    if text_input_coordinates:
-        tap_bottom_right_of_element(text_input_coordinates, ctx=ctx)
+    if target.coordinates:
+        tap_bottom_right_of_element(target.coordinates, ctx=ctx)
         logger.debug("Tapped end of input by coordinates")
         return elt
-    if text_input_text:
-        text_elt = find_element_by_text(state.latest_ui_hierarchy or [], text_input_text)
+    if target.text:
+        text_elt = find_element_by_text(
+            state.latest_ui_hierarchy or [], target.text, index=target.text_index
+        )
         if text_elt:
             bounds = get_bounds_for_element(text_elt)
             if bounds:
                 tap_bottom_right_of_element(bounds=bounds, ctx=ctx)
-                logger.debug(f"Tapped end of input that had text'{text_input_text}'")
+                logger.debug(f"Tapped end of input that had text'{target.text}'")
                 return text_elt
         return None
     return None
-def focus_element_if_needed(
-    ctx: MobileUseContext,
-    input_resource_id: str | None,
-    input_coordinates: ElementBounds | None,
-    input_text: str | None,
-) -> bool:
+def focus_element_if_needed(ctx: MobileUseContext, target: Target) -> bool:
     """
     Ensures the element is focused, with a sanity check to prevent trusting misleading IDs.
     """
     rich_hierarchy = ctx.hw_bridge_client.get_rich_hierarchy()
     elt_from_id = None
-    if input_resource_id:
+    if target.resource_id:
         elt_from_id = find_element_by_resource_id(
-            ui_hierarchy=rich_hierarchy, resource_id=input_resource_id, is_rich_hierarchy=True
+            ui_hierarchy=rich_hierarchy,
+            resource_id=target.resource_id,
+            index=target.resource_id_index,
+            is_rich_hierarchy=True,
         )
-    if elt_from_id and input_text:
+    if elt_from_id and target.text:
         text_from_id_elt = get_element_text(elt_from_id)
-        if not text_from_id_elt or input_text.lower() != text_from_id_elt.lower():
+        if not text_from_id_elt or target.text.lower() != text_from_id_elt.lower():
             logger.warning(
-                f"ID '{input_resource_id}' and text '{input_text}'"
-                + "seem to be on different elements. "
-                "Ignoring the resource_id and falling back to other locators."
+                f"ID '{target.resource_id}' and text '{target.text}' seem to be on different "
+                "elements. Ignoring the resource_id and falling back to other locators."
             )
             elt_from_id = None
     if elt_from_id:
         if not is_element_focused(elt_from_id):
-            tap(ctx=ctx, selector_request=IdSelectorRequest(id=input_resource_id))  # type: ignore
-            logger.debug(f"Focused (tap) on resource_id={input_resource_id}")
+            tap(
+                ctx=ctx,
+                selector_request=IdSelectorRequest(id=target.resource_id),  # type: ignore
+                index=target.resource_id_index,
+            )
+            logger.debug(f"Focused (tap) on resource_id={target.resource_id}")
             rich_hierarchy = ctx.hw_bridge_client.get_rich_hierarchy()
             elt_from_id = find_element_by_resource_id(
                 ui_hierarchy=rich_hierarchy,
-                resource_id=input_resource_id,  # type: ignore
+                resource_id=target.resource_id,  # type: ignore
+                index=target.resource_id_index,
                 is_rich_hierarchy=True,
             )
         if elt_from_id and is_element_focused(elt_from_id):
-            logger.debug(f"Text input is focused: {input_resource_id}")
+            logger.debug(f"Text input is focused: {target.resource_id}")
             return True
+        logger.warning(f"Failed to focus using resource_id='{target.resource_id}'. Fallback...")
-        logger.warning(f"Failed to focus using resource_id='{input_resource_id}'. Fallback...")
-    if input_coordinates:
-        relative_point = input_coordinates.get_center()
+    if target.coordinates:
+        relative_point = target.coordinates.get_center()
         tap(
             ctx=ctx,
             selector_request=SelectorRequestWithCoordinates(
-                coordinates=CoordinatesSelectorRequest(
-                    x=relative_point.x,
-                    y=relative_point.y,
-                ),
+                coordinates=CoordinatesSelectorRequest(x=relative_point.x, y=relative_point.y)
             ),
         )
         logger.debug(f"Tapped on coordinates ({relative_point.x}, {relative_point.y}) to focus.")
         return True
-    if input_text:
-        text_elt = find_element_by_text(rich_hierarchy, input_text)
+    if target.text:
+        text_elt = find_element_by_text(rich_hierarchy, target.text, index=target.text_index)
         if text_elt:
             bounds = get_bounds_for_element(text_elt)
             if bounds:
@@ -178,16 +183,14 @@ def focus_element_if_needed(
                     ctx=ctx,
                     selector_request=SelectorRequestWithCoordinates(
                         coordinates=CoordinatesSelectorRequest(
-                            x=relative_point.x,
-                            y=relative_point.y,
-                        ),
+                            x=relative_point.x, y=relative_point.y
+                        )
                     ),
                 )
-                logger.debug(f"Tapped on text element '{input_text}' to focus.")
+                logger.debug(f"Tapped on text element '{target.text}' to focus.")
                 return True
     logger.error(
-        "Failed to focus element. No valid locator"
-        + "(resource_id, coordinates, or text) succeeded."
+        "Failed to focus element. No valid locator (resource_id, coordinates, or text) succeeded."
     )
     return False

minitap/mobile_use/utils/recorder.py CHANGED Viewed

@@ -25,7 +25,7 @@ def record_interaction(ctx: MobileUseContext, response: BaseMessage):
         logger.error(f"Error compressing screenshot: {e}")
         return "Could not record this interaction"
     timestamp = time.time()
-    folder = ctx.execution_setup.traces_path.joinpath(ctx.execution_setup.trace_id).resolve()
+    folder = ctx.execution_setup.traces_path.joinpath(ctx.execution_setup.trace_name).resolve()
     folder.mkdir(parents=True, exist_ok=True)
     try:
         with open(

minitap/mobile_use/utils/ui_hierarchy.py CHANGED Viewed

@@ -40,7 +40,10 @@ def text_input_is_empty(text: str | None, hint_text: str | None) -> bool:
 def find_element_by_resource_id(
-    ui_hierarchy: list[dict], resource_id: str, is_rich_hierarchy: bool = False
+    ui_hierarchy: list[dict],
+    resource_id: str,
+    index: int | None = None,
+    is_rich_hierarchy: bool = False,
 ) -> dict | None:
     """
     Find a UI element by its resource-id in the UI hierarchy.
@@ -60,7 +63,11 @@ def find_element_by_resource_id(
         for element in elements:
             if isinstance(element, dict):
                 if element.get("resourceId") == resource_id:
-                    return element
+                    idx = index or 0
+                    if idx == 0:
+                        return element
+                    idx -= 1
+                    continue
                 children = element.get("children", [])
                 if children:

{minitap_mobile_use-2.3.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: minitap-mobile-use
-Version: 2.3.0
+Version: 2.4.0
 Summary: AI-powered multi-agent system that automates real Android and iOS devices through low-level control using LangGraph.
 Author: Pierre-Louis Favreau, Jean-Pierre Lo, Nicolas Dehandschoewercker
 License: MIT License
@@ -43,9 +43,11 @@ Requires-Dist: uvicorn[standard]==0.30.1
 Requires-Dist: colorama>=0.4.6
 Requires-Dist: psutil>=5.9.0
 Requires-Dist: langchain-google-vertexai>=2.0.28
+Requires-Dist: httpx>=0.28.1
 Requires-Dist: ruff==0.5.3 ; extra == 'dev'
 Requires-Dist: pytest==8.4.1 ; extra == 'dev'
 Requires-Dist: pytest-cov==5.0.0 ; extra == 'dev'
+Requires-Dist: pyright==1.1.405 ; extra == 'dev'
 Requires-Python: >=3.12
 Project-URL: Homepage, https://minitap.ai/
 Project-URL: Source, https://github.com/minitap-ai/mobile-use

minitap-mobile-use 2.3.0__py3-none-any.whl → 2.4.0__py3-none-any.whl

Potentially problematic release.

minitap-mobile-use 2.3.0py3-none-any.whl → 2.4.0py3-none-any.whl