minitap-mobile-use 2.0.1__py3-none-any.whl → 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of minitap-mobile-use might be problematic. Click here for more details.
- minitap/mobile_use/agents/cortex/cortex.md +7 -5
- minitap/mobile_use/agents/cortex/cortex.py +4 -1
- minitap/mobile_use/agents/cortex/types.py +1 -3
- minitap/mobile_use/agents/executor/executor.md +4 -5
- minitap/mobile_use/agents/executor/executor.py +3 -1
- minitap/mobile_use/agents/executor/tool_node.py +6 -6
- minitap/mobile_use/agents/outputter/outputter.py +1 -2
- minitap/mobile_use/agents/planner/planner.md +11 -2
- minitap/mobile_use/agents/planner/planner.py +7 -2
- minitap/mobile_use/agents/planner/types.py +3 -4
- minitap/mobile_use/agents/summarizer/summarizer.py +2 -1
- minitap/mobile_use/config.py +31 -16
- minitap/mobile_use/context.py +3 -4
- minitap/mobile_use/controllers/mobile_command_controller.py +36 -24
- minitap/mobile_use/controllers/platform_specific_commands_controller.py +3 -4
- minitap/mobile_use/graph/graph.py +1 -0
- minitap/mobile_use/graph/state.py +9 -9
- minitap/mobile_use/main.py +7 -8
- minitap/mobile_use/sdk/agent.py +25 -26
- minitap/mobile_use/sdk/builders/agent_config_builder.py +9 -10
- minitap/mobile_use/sdk/builders/task_request_builder.py +9 -9
- minitap/mobile_use/sdk/examples/smart_notification_assistant.py +1 -2
- minitap/mobile_use/sdk/types/agent.py +5 -5
- minitap/mobile_use/sdk/types/task.py +19 -18
- minitap/mobile_use/sdk/utils.py +4 -3
- minitap/mobile_use/servers/config.py +1 -2
- minitap/mobile_use/servers/device_hardware_bridge.py +3 -4
- minitap/mobile_use/servers/start_servers.py +4 -4
- minitap/mobile_use/servers/stop_servers.py +2 -3
- minitap/mobile_use/services/llm.py +24 -6
- minitap/mobile_use/tools/index.py +26 -14
- minitap/mobile_use/tools/mobile/back.py +1 -1
- minitap/mobile_use/tools/mobile/clear_text.py +277 -0
- minitap/mobile_use/tools/mobile/copy_text_from.py +1 -1
- minitap/mobile_use/tools/mobile/erase_one_char.py +56 -0
- minitap/mobile_use/tools/mobile/find_packages.py +1 -1
- minitap/mobile_use/tools/mobile/input_text.py +4 -80
- minitap/mobile_use/tools/mobile/launch_app.py +1 -1
- minitap/mobile_use/tools/mobile/long_press_on.py +2 -4
- minitap/mobile_use/tools/mobile/open_link.py +1 -1
- minitap/mobile_use/tools/mobile/paste_text.py +1 -1
- minitap/mobile_use/tools/mobile/press_key.py +1 -1
- minitap/mobile_use/tools/mobile/stop_app.py +2 -4
- minitap/mobile_use/tools/mobile/swipe.py +107 -9
- minitap/mobile_use/tools/mobile/take_screenshot.py +1 -1
- minitap/mobile_use/tools/mobile/tap.py +2 -4
- minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +2 -4
- minitap/mobile_use/tools/tool_wrapper.py +6 -1
- minitap/mobile_use/tools/utils.py +86 -0
- minitap/mobile_use/utils/cli_helpers.py +1 -2
- minitap/mobile_use/utils/cli_selection.py +5 -6
- minitap/mobile_use/utils/decorators.py +21 -20
- minitap/mobile_use/utils/logger.py +3 -4
- minitap/mobile_use/utils/media.py +1 -1
- minitap/mobile_use/utils/recorder.py +2 -9
- minitap/mobile_use/utils/ui_hierarchy.py +13 -5
- {minitap_mobile_use-2.0.1.dist-info → minitap_mobile_use-2.2.0.dist-info}/METADATA +35 -5
- minitap_mobile_use-2.2.0.dist-info/RECORD +96 -0
- minitap/mobile_use/tools/mobile/erase_text.py +0 -122
- minitap_mobile_use-2.0.1.dist-info/RECORD +0 -94
- {minitap_mobile_use-2.0.1.dist-info → minitap_mobile_use-2.2.0.dist-info}/WHEEL +0 -0
- {minitap_mobile_use-2.0.1.dist-info → minitap_mobile_use-2.2.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,18 +1,28 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
1
3
|
from langchain_core.messages import ToolMessage
|
|
2
4
|
from langchain_core.tools import tool
|
|
3
|
-
from langchain_core.tools.base import InjectedToolCallId
|
|
5
|
+
from langchain_core.tools.base import BaseTool, InjectedToolCallId
|
|
4
6
|
from langgraph.prebuilt import InjectedState
|
|
5
7
|
from langgraph.types import Command
|
|
8
|
+
from pydantic import Field
|
|
9
|
+
|
|
6
10
|
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
7
11
|
from minitap.mobile_use.context import MobileUseContext
|
|
8
|
-
from minitap.mobile_use.controllers.mobile_command_controller import
|
|
12
|
+
from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
13
|
+
CoordinatesSelectorRequest,
|
|
14
|
+
PercentagesSelectorRequest,
|
|
15
|
+
SwipeDirection,
|
|
16
|
+
SwipeRequest,
|
|
17
|
+
SwipeStartEndCoordinatesRequest,
|
|
18
|
+
SwipeStartEndPercentagesRequest,
|
|
19
|
+
)
|
|
9
20
|
from minitap.mobile_use.controllers.mobile_command_controller import swipe as swipe_controller
|
|
10
21
|
from minitap.mobile_use.graph.state import State
|
|
11
|
-
from minitap.mobile_use.tools.tool_wrapper import
|
|
12
|
-
from typing_extensions import Annotated
|
|
22
|
+
from minitap.mobile_use.tools.tool_wrapper import CompositeToolWrapper
|
|
13
23
|
|
|
14
24
|
|
|
15
|
-
def get_swipe_tool(ctx: MobileUseContext):
|
|
25
|
+
def get_swipe_tool(ctx: MobileUseContext) -> BaseTool:
|
|
16
26
|
@tool
|
|
17
27
|
def swipe(
|
|
18
28
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
@@ -20,9 +30,7 @@ def get_swipe_tool(ctx: MobileUseContext):
|
|
|
20
30
|
agent_thought: str,
|
|
21
31
|
swipe_request: SwipeRequest,
|
|
22
32
|
):
|
|
23
|
-
"""
|
|
24
|
-
Swipes on the screen.
|
|
25
|
-
"""
|
|
33
|
+
"""Swipes on the screen."""
|
|
26
34
|
output = swipe_controller(ctx=ctx, swipe_request=swipe_request)
|
|
27
35
|
has_failed = output is not None
|
|
28
36
|
tool_message = ToolMessage(
|
|
@@ -45,8 +53,98 @@ def get_swipe_tool(ctx: MobileUseContext):
|
|
|
45
53
|
return swipe
|
|
46
54
|
|
|
47
55
|
|
|
48
|
-
|
|
56
|
+
def get_composite_swipe_tools(ctx: MobileUseContext) -> list[BaseTool]:
|
|
57
|
+
"""
|
|
58
|
+
Returns composite swipe tools for use with Vertex AI LLMs.
|
|
59
|
+
Each tool handles a specific swipe mode to avoid complex Union type issues.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
@tool
|
|
63
|
+
def swipe_coordinates(
|
|
64
|
+
agent_thought: str,
|
|
65
|
+
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
66
|
+
state: Annotated[State, InjectedState],
|
|
67
|
+
start_x: int = Field(description="Start X coordinate in pixels"),
|
|
68
|
+
start_y: int = Field(description="Start Y coordinate in pixels"),
|
|
69
|
+
end_x: int = Field(description="End X coordinate in pixels"),
|
|
70
|
+
end_y: int = Field(description="End Y coordinate in pixels"),
|
|
71
|
+
duration: int = Field(description="Duration in ms", ge=1, le=10000, default=400),
|
|
72
|
+
):
|
|
73
|
+
"""Swipe using pixel coordinates from start position to end position."""
|
|
74
|
+
swipe_request = SwipeRequest(
|
|
75
|
+
swipe_mode=SwipeStartEndCoordinatesRequest(
|
|
76
|
+
start=CoordinatesSelectorRequest(x=start_x, y=start_y),
|
|
77
|
+
end=CoordinatesSelectorRequest(x=end_x, y=end_y),
|
|
78
|
+
),
|
|
79
|
+
duration=duration,
|
|
80
|
+
)
|
|
81
|
+
return get_swipe_tool(ctx=ctx).invoke(
|
|
82
|
+
input={
|
|
83
|
+
"tool_call_id": tool_call_id,
|
|
84
|
+
"state": state,
|
|
85
|
+
"agent_thought": agent_thought,
|
|
86
|
+
"swipe_request": swipe_request,
|
|
87
|
+
}
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
@tool
|
|
91
|
+
def swipe_percentages(
|
|
92
|
+
agent_thought: str,
|
|
93
|
+
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
94
|
+
state: Annotated[State, InjectedState],
|
|
95
|
+
start_x_percent: int = Field(description="Start X percent (0-100)", ge=0, le=100),
|
|
96
|
+
start_y_percent: int = Field(description="Start Y percent (0-100)", ge=0, le=100),
|
|
97
|
+
end_x_percent: int = Field(description="End X percent (0-100)", ge=0, le=100),
|
|
98
|
+
end_y_percent: int = Field(description="End Y percent (0-100)", ge=0, le=100),
|
|
99
|
+
duration: int = Field(description="Duration in ms", ge=1, le=10000, default=400),
|
|
100
|
+
):
|
|
101
|
+
"""Swipe using percentage coordinates from start position to end position."""
|
|
102
|
+
swipe_request = SwipeRequest(
|
|
103
|
+
swipe_mode=SwipeStartEndPercentagesRequest(
|
|
104
|
+
start=PercentagesSelectorRequest(
|
|
105
|
+
x_percent=start_x_percent, y_percent=start_y_percent
|
|
106
|
+
),
|
|
107
|
+
end=PercentagesSelectorRequest(x_percent=end_x_percent, y_percent=end_y_percent),
|
|
108
|
+
),
|
|
109
|
+
duration=duration,
|
|
110
|
+
)
|
|
111
|
+
return get_swipe_tool(ctx=ctx).invoke(
|
|
112
|
+
input={
|
|
113
|
+
"tool_call_id": tool_call_id,
|
|
114
|
+
"state": state,
|
|
115
|
+
"agent_thought": agent_thought,
|
|
116
|
+
"swipe_request": swipe_request,
|
|
117
|
+
}
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
@tool
|
|
121
|
+
def swipe_direction(
|
|
122
|
+
agent_thought: str,
|
|
123
|
+
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
124
|
+
state: Annotated[State, InjectedState],
|
|
125
|
+
direction: SwipeDirection,
|
|
126
|
+
duration: int = Field(description="Duration in ms", ge=1, le=10000, default=400),
|
|
127
|
+
):
|
|
128
|
+
"""Swipe in a specific direction across the screen."""
|
|
129
|
+
swipe_request = SwipeRequest(
|
|
130
|
+
swipe_mode=direction,
|
|
131
|
+
duration=duration,
|
|
132
|
+
)
|
|
133
|
+
return get_swipe_tool(ctx=ctx).invoke(
|
|
134
|
+
input={
|
|
135
|
+
"tool_call_id": tool_call_id,
|
|
136
|
+
"state": state,
|
|
137
|
+
"agent_thought": agent_thought,
|
|
138
|
+
"swipe_request": swipe_request,
|
|
139
|
+
}
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
return [swipe_coordinates, swipe_percentages, swipe_direction]
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
swipe_wrapper = CompositeToolWrapper(
|
|
49
146
|
tool_fn_getter=get_swipe_tool,
|
|
147
|
+
composite_tools_fn_getter=get_composite_swipe_tools,
|
|
50
148
|
on_success_fn=lambda: "Swipe is successful.",
|
|
51
149
|
on_failure_fn=lambda: "Failed to swipe.",
|
|
52
150
|
)
|
|
@@ -11,7 +11,7 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
|
11
11
|
from minitap.mobile_use.graph.state import State
|
|
12
12
|
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
13
13
|
from minitap.mobile_use.utils.media import compress_base64_jpeg
|
|
14
|
-
from
|
|
14
|
+
from typing import Annotated
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
def get_take_screenshot_tool(ctx: MobileUseContext):
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
|
|
3
1
|
from langchain_core.messages import ToolMessage
|
|
4
2
|
from langchain_core.tools import tool
|
|
5
3
|
from langchain_core.tools.base import InjectedToolCallId
|
|
@@ -11,7 +9,7 @@ from minitap.mobile_use.controllers.mobile_command_controller import SelectorReq
|
|
|
11
9
|
from minitap.mobile_use.controllers.mobile_command_controller import tap as tap_controller
|
|
12
10
|
from minitap.mobile_use.graph.state import State
|
|
13
11
|
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
14
|
-
from
|
|
12
|
+
from typing import Annotated
|
|
15
13
|
|
|
16
14
|
|
|
17
15
|
def get_tap_tool(ctx: MobileUseContext):
|
|
@@ -21,7 +19,7 @@ def get_tap_tool(ctx: MobileUseContext):
|
|
|
21
19
|
state: Annotated[State, InjectedState],
|
|
22
20
|
agent_thought: str,
|
|
23
21
|
selector_request: SelectorRequest,
|
|
24
|
-
index:
|
|
22
|
+
index: int | None = None,
|
|
25
23
|
):
|
|
26
24
|
"""
|
|
27
25
|
Taps on a selector.
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
|
|
3
1
|
from langchain_core.messages import ToolMessage
|
|
4
2
|
from langchain_core.tools import tool
|
|
5
3
|
from langchain_core.tools.base import InjectedToolCallId
|
|
@@ -13,7 +11,7 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
|
13
11
|
)
|
|
14
12
|
from minitap.mobile_use.graph.state import State
|
|
15
13
|
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
16
|
-
from
|
|
14
|
+
from typing import Annotated
|
|
17
15
|
|
|
18
16
|
|
|
19
17
|
def get_wait_for_animation_to_end_tool(ctx: MobileUseContext):
|
|
@@ -22,7 +20,7 @@ def get_wait_for_animation_to_end_tool(ctx: MobileUseContext):
|
|
|
22
20
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
23
21
|
state: Annotated[State, InjectedState],
|
|
24
22
|
agent_thought: str,
|
|
25
|
-
timeout:
|
|
23
|
+
timeout: WaitTimeout | None,
|
|
26
24
|
):
|
|
27
25
|
"""
|
|
28
26
|
Waits for ongoing animations or videos to finish before continuing.
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
from
|
|
1
|
+
from collections.abc import Callable
|
|
2
2
|
|
|
3
3
|
from langchain_core.tools import BaseTool
|
|
4
4
|
from pydantic import BaseModel
|
|
5
|
+
|
|
5
6
|
from minitap.mobile_use.context import MobileUseContext
|
|
6
7
|
|
|
7
8
|
|
|
@@ -9,3 +10,7 @@ class ToolWrapper(BaseModel):
|
|
|
9
10
|
tool_fn_getter: Callable[[MobileUseContext], BaseTool]
|
|
10
11
|
on_success_fn: Callable[..., str]
|
|
11
12
|
on_failure_fn: Callable[..., str]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class CompositeToolWrapper(ToolWrapper):
|
|
16
|
+
composite_tools_fn_getter: Callable[[MobileUseContext], list[BaseTool]]
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
4
|
+
from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
5
|
+
CoordinatesSelectorRequest,
|
|
6
|
+
IdSelectorRequest,
|
|
7
|
+
SelectorRequestWithCoordinates,
|
|
8
|
+
tap,
|
|
9
|
+
)
|
|
10
|
+
from minitap.mobile_use.graph.state import State
|
|
11
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
12
|
+
from minitap.mobile_use.utils.ui_hierarchy import (
|
|
13
|
+
Point,
|
|
14
|
+
find_element_by_resource_id,
|
|
15
|
+
get_bounds_for_element,
|
|
16
|
+
is_element_focused,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
logger = get_logger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def move_cursor_to_end_if_bounds(
|
|
23
|
+
ctx: MobileUseContext,
|
|
24
|
+
state: State,
|
|
25
|
+
resource_id: str,
|
|
26
|
+
elt: dict | None = None,
|
|
27
|
+
) -> dict | None:
|
|
28
|
+
"""
|
|
29
|
+
Best-effort move of the text cursor near the end of the input by tapping the
|
|
30
|
+
bottom-right area of the focused element (if bounds are available).
|
|
31
|
+
"""
|
|
32
|
+
if not elt:
|
|
33
|
+
elt = find_element_by_resource_id(
|
|
34
|
+
ui_hierarchy=state.latest_ui_hierarchy or [],
|
|
35
|
+
resource_id=resource_id,
|
|
36
|
+
)
|
|
37
|
+
if not elt:
|
|
38
|
+
return
|
|
39
|
+
|
|
40
|
+
bounds = get_bounds_for_element(elt)
|
|
41
|
+
if not bounds:
|
|
42
|
+
return elt
|
|
43
|
+
|
|
44
|
+
logger.debug("Tapping near the end of the input to move the cursor")
|
|
45
|
+
bottom_right: Point = bounds.get_relative_point(x_percent=0.99, y_percent=0.99)
|
|
46
|
+
tap(
|
|
47
|
+
ctx=ctx,
|
|
48
|
+
selector_request=SelectorRequestWithCoordinates(
|
|
49
|
+
coordinates=CoordinatesSelectorRequest(
|
|
50
|
+
x=bottom_right.x,
|
|
51
|
+
y=bottom_right.y,
|
|
52
|
+
),
|
|
53
|
+
),
|
|
54
|
+
)
|
|
55
|
+
logger.debug(f"Tapped end of input {resource_id} at ({bottom_right.x}, {bottom_right.y})")
|
|
56
|
+
return elt
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def focus_element_if_needed(
|
|
60
|
+
ctx: MobileUseContext,
|
|
61
|
+
resource_id: str,
|
|
62
|
+
) -> bool:
|
|
63
|
+
"""
|
|
64
|
+
Ensures the element identified by `resource_id` is focused.
|
|
65
|
+
"""
|
|
66
|
+
rich_hierarchy: list[dict] = ctx.hw_bridge_client.get_rich_hierarchy()
|
|
67
|
+
rich_elt = find_element_by_resource_id(
|
|
68
|
+
ui_hierarchy=rich_hierarchy,
|
|
69
|
+
resource_id=resource_id,
|
|
70
|
+
is_rich_hierarchy=True,
|
|
71
|
+
)
|
|
72
|
+
if rich_elt and not is_element_focused(rich_elt):
|
|
73
|
+
tap(ctx=ctx, selector_request=IdSelectorRequest(id=resource_id))
|
|
74
|
+
logger.debug(f"Focused (tap) on resource_id={resource_id}")
|
|
75
|
+
rich_hierarchy = ctx.hw_bridge_client.get_rich_hierarchy()
|
|
76
|
+
rich_elt = find_element_by_resource_id(
|
|
77
|
+
ui_hierarchy=rich_hierarchy,
|
|
78
|
+
resource_id=resource_id,
|
|
79
|
+
is_rich_hierarchy=True,
|
|
80
|
+
)
|
|
81
|
+
if rich_elt and is_element_focused(rich_elt):
|
|
82
|
+
logger.debug(f"Text input is focused: {resource_id}")
|
|
83
|
+
return True
|
|
84
|
+
|
|
85
|
+
logger.warning(f"Failed to focus resource_id={resource_id}")
|
|
86
|
+
return False
|
|
@@ -3,10 +3,9 @@ import sys
|
|
|
3
3
|
from minitap.mobile_use.clients.ios_client import get_ios_devices
|
|
4
4
|
from adbutils import AdbClient
|
|
5
5
|
from rich.console import Console
|
|
6
|
-
from typing import Optional
|
|
7
6
|
|
|
8
7
|
|
|
9
|
-
def display_device_status(console: Console, adb_client:
|
|
8
|
+
def display_device_status(console: Console, adb_client: AdbClient | None = None):
|
|
10
9
|
"""Checks for connected devices and displays the status."""
|
|
11
10
|
console.print("\n[bold]📱 Device Status[/bold]")
|
|
12
11
|
devices = None
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import sys
|
|
2
|
-
from typing import List, Optional
|
|
3
2
|
|
|
4
3
|
import inquirer
|
|
5
4
|
from rich.console import Console
|
|
@@ -8,12 +7,12 @@ from rich.prompt import Prompt
|
|
|
8
7
|
|
|
9
8
|
def select_provider_and_model(
|
|
10
9
|
console: Console,
|
|
11
|
-
available_providers:
|
|
10
|
+
available_providers: list[str],
|
|
12
11
|
available_models: dict,
|
|
13
12
|
default_provider: str,
|
|
14
13
|
default_model: str,
|
|
15
|
-
provider:
|
|
16
|
-
model:
|
|
14
|
+
provider: str | None = None,
|
|
15
|
+
model: str | None = None,
|
|
17
16
|
) -> tuple[str, str]:
|
|
18
17
|
"""
|
|
19
18
|
Interactive selection of LLM provider and model with arrow-key dropdowns when available.
|
|
@@ -71,7 +70,7 @@ def select_provider_and_model(
|
|
|
71
70
|
def _select_from_list(
|
|
72
71
|
console: Console,
|
|
73
72
|
item_type: str,
|
|
74
|
-
choices:
|
|
73
|
+
choices: list[str],
|
|
75
74
|
default: str,
|
|
76
75
|
message: str,
|
|
77
76
|
) -> str:
|
|
@@ -108,7 +107,7 @@ def _select_from_list(
|
|
|
108
107
|
return _numbered_selection(console, item_type, choices, default)
|
|
109
108
|
|
|
110
109
|
|
|
111
|
-
def _numbered_selection(console: Console, item_type: str, choices:
|
|
110
|
+
def _numbered_selection(console: Console, item_type: str, choices: list[str], default: str) -> str:
|
|
112
111
|
"""Fallback numbered selection when arrow keys aren't available."""
|
|
113
112
|
choices_text = "\n".join([f" {i + 1}. {choice}" for i, choice in enumerate(choices)])
|
|
114
113
|
console.print(f"Available {item_type}s:\n{choices_text}")
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
from functools import wraps
|
|
3
|
-
from typing import Any,
|
|
3
|
+
from typing import Any, TypeVar, cast, overload
|
|
4
|
+
from collections.abc import Awaitable, Callable
|
|
4
5
|
|
|
5
6
|
R = TypeVar("R")
|
|
6
7
|
|
|
@@ -8,9 +9,9 @@ R = TypeVar("R")
|
|
|
8
9
|
def wrap_with_callbacks_sync(
|
|
9
10
|
fn: Callable[..., R],
|
|
10
11
|
*,
|
|
11
|
-
before:
|
|
12
|
-
on_success:
|
|
13
|
-
on_failure:
|
|
12
|
+
before: Callable[..., None] | None = None,
|
|
13
|
+
on_success: Callable[[R], None] | None = None,
|
|
14
|
+
on_failure: Callable[[Exception], None] | None = None,
|
|
14
15
|
suppress_exceptions: bool = False,
|
|
15
16
|
) -> Callable[..., R]:
|
|
16
17
|
@wraps(fn)
|
|
@@ -35,9 +36,9 @@ def wrap_with_callbacks_sync(
|
|
|
35
36
|
def wrap_with_callbacks_async(
|
|
36
37
|
fn: Callable[..., Awaitable[R]],
|
|
37
38
|
*,
|
|
38
|
-
before:
|
|
39
|
-
on_success:
|
|
40
|
-
on_failure:
|
|
39
|
+
before: Callable[..., None] | None = None,
|
|
40
|
+
on_success: Callable[[R], None] | None = None,
|
|
41
|
+
on_failure: Callable[[Exception], None] | None = None,
|
|
41
42
|
suppress_exceptions: bool = False,
|
|
42
43
|
) -> Callable[..., Awaitable[R]]:
|
|
43
44
|
@wraps(fn)
|
|
@@ -63,9 +64,9 @@ def wrap_with_callbacks_async(
|
|
|
63
64
|
def wrap_with_callbacks(
|
|
64
65
|
fn: Callable[..., Awaitable[R]],
|
|
65
66
|
*,
|
|
66
|
-
before:
|
|
67
|
-
on_success:
|
|
68
|
-
on_failure:
|
|
67
|
+
before: Callable[[], None] | None = ...,
|
|
68
|
+
on_success: Callable[[R], None] | None = ...,
|
|
69
|
+
on_failure: Callable[[Exception], None] | None = ...,
|
|
69
70
|
suppress_exceptions: bool = ...,
|
|
70
71
|
) -> Callable[..., Awaitable[R]]: ...
|
|
71
72
|
|
|
@@ -73,9 +74,9 @@ def wrap_with_callbacks(
|
|
|
73
74
|
@overload
|
|
74
75
|
def wrap_with_callbacks(
|
|
75
76
|
*,
|
|
76
|
-
before:
|
|
77
|
-
on_success:
|
|
78
|
-
on_failure:
|
|
77
|
+
before: Callable[..., None] | None = ...,
|
|
78
|
+
on_success: Callable[[Any], None] | None = ...,
|
|
79
|
+
on_failure: Callable[[Exception], None] | None = ...,
|
|
79
80
|
suppress_exceptions: bool = ...,
|
|
80
81
|
) -> Callable[[Callable[..., R]], Callable[..., R]]: ...
|
|
81
82
|
|
|
@@ -84,19 +85,19 @@ def wrap_with_callbacks(
|
|
|
84
85
|
def wrap_with_callbacks(
|
|
85
86
|
fn: Callable[..., R],
|
|
86
87
|
*,
|
|
87
|
-
before:
|
|
88
|
-
on_success:
|
|
89
|
-
on_failure:
|
|
88
|
+
before: Callable[[], None] | None = ...,
|
|
89
|
+
on_success: Callable[[R], None] | None = ...,
|
|
90
|
+
on_failure: Callable[[Exception], None] | None = ...,
|
|
90
91
|
suppress_exceptions: bool = ...,
|
|
91
92
|
) -> Callable[..., R]: ...
|
|
92
93
|
|
|
93
94
|
|
|
94
95
|
def wrap_with_callbacks(
|
|
95
|
-
fn:
|
|
96
|
+
fn: Callable[..., Any] | None = None,
|
|
96
97
|
*,
|
|
97
|
-
before:
|
|
98
|
-
on_success:
|
|
99
|
-
on_failure:
|
|
98
|
+
before: Callable[[], None] | None = None,
|
|
99
|
+
on_success: Callable[[Any], None] | None = None,
|
|
100
|
+
on_failure: Callable[[Exception], None] | None = None,
|
|
100
101
|
suppress_exceptions: bool = False,
|
|
101
102
|
) -> Any:
|
|
102
103
|
def decorator(func: Callable[..., Any]) -> Any:
|
|
@@ -2,7 +2,6 @@ import logging
|
|
|
2
2
|
import sys
|
|
3
3
|
from enum import Enum
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import Optional, Union
|
|
6
5
|
|
|
7
6
|
from colorama import Fore, Style, init
|
|
8
7
|
|
|
@@ -22,7 +21,7 @@ class MobileUseLogger:
|
|
|
22
21
|
def __init__(
|
|
23
22
|
self,
|
|
24
23
|
name: str,
|
|
25
|
-
log_file:
|
|
24
|
+
log_file: str | Path | None = None,
|
|
26
25
|
console_level: str = "INFO",
|
|
27
26
|
file_level: str = "DEBUG",
|
|
28
27
|
enable_file_logging: bool = True,
|
|
@@ -57,7 +56,7 @@ class MobileUseLogger:
|
|
|
57
56
|
|
|
58
57
|
self.logger.addHandler(console_handler)
|
|
59
58
|
|
|
60
|
-
def _setup_file_handler(self, log_file:
|
|
59
|
+
def _setup_file_handler(self, log_file: str | Path | None, level: str):
|
|
61
60
|
if log_file is None:
|
|
62
61
|
log_file = Path("logs") / f"{self.name.replace('.', '_')}.log"
|
|
63
62
|
|
|
@@ -118,7 +117,7 @@ _loggers = {}
|
|
|
118
117
|
|
|
119
118
|
def get_logger(
|
|
120
119
|
name: str,
|
|
121
|
-
log_file:
|
|
120
|
+
log_file: str | Path | None = None,
|
|
122
121
|
console_level: str = "INFO",
|
|
123
122
|
file_level: str = "DEBUG",
|
|
124
123
|
enable_file_logging: bool = False,
|
|
@@ -55,7 +55,7 @@ def create_steps_json_from_trace_folder(trace_folder_path: Path):
|
|
|
55
55
|
steps = []
|
|
56
56
|
for file in trace_folder_path.iterdir():
|
|
57
57
|
if file.suffix == ".json":
|
|
58
|
-
with open(file,
|
|
58
|
+
with open(file, encoding="utf-8", errors="ignore") as f:
|
|
59
59
|
json_content = f.read()
|
|
60
60
|
steps.append({"timestamp": int(file.stem), "data": json_content})
|
|
61
61
|
|
|
@@ -45,12 +45,5 @@ def record_interaction(ctx: MobileUseContext, response: BaseMessage):
|
|
|
45
45
|
return "Screenshot recorded successfully"
|
|
46
46
|
|
|
47
47
|
|
|
48
|
-
def log_agent_thought(
|
|
49
|
-
|
|
50
|
-
prefix = prefix[0].upper() + prefix[1:]
|
|
51
|
-
else:
|
|
52
|
-
prefix = "New agent thought"
|
|
53
|
-
logger.info(
|
|
54
|
-
f"💭 {Fore.LIGHTMAGENTA_EX + Style.BRIGHT}{prefix}{Style.RESET_ALL}: "
|
|
55
|
-
f"{Fore.LIGHTMAGENTA_EX}{agent_thought}{Style.RESET_ALL}"
|
|
56
|
-
)
|
|
48
|
+
def log_agent_thought(agent_thought: str):
|
|
49
|
+
logger.info(f"💭 {Fore.LIGHTMAGENTA_EX}{agent_thought}{Style.RESET_ALL}")
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
|
|
3
1
|
from pydantic import BaseModel
|
|
4
2
|
|
|
5
3
|
from minitap.mobile_use.utils.logger import get_logger
|
|
@@ -9,7 +7,7 @@ logger = get_logger(__name__)
|
|
|
9
7
|
|
|
10
8
|
def __find_element_by_ressource_id_in_rich_hierarchy(
|
|
11
9
|
hierarchy: list[dict], resource_id: str
|
|
12
|
-
) ->
|
|
10
|
+
) -> dict | None:
|
|
13
11
|
"""
|
|
14
12
|
Retrieves all the sibling elements for a given resource ID from a nested dictionary.
|
|
15
13
|
|
|
@@ -37,9 +35,13 @@ def __find_element_by_ressource_id_in_rich_hierarchy(
|
|
|
37
35
|
return None
|
|
38
36
|
|
|
39
37
|
|
|
38
|
+
def text_input_is_empty(text: str | None, hint_text: str | None) -> bool:
|
|
39
|
+
return not text or text == hint_text
|
|
40
|
+
|
|
41
|
+
|
|
40
42
|
def find_element_by_resource_id(
|
|
41
43
|
ui_hierarchy: list[dict], resource_id: str, is_rich_hierarchy: bool = False
|
|
42
|
-
) ->
|
|
44
|
+
) -> dict | None:
|
|
43
45
|
"""
|
|
44
46
|
Find a UI element by its resource-id in the UI hierarchy.
|
|
45
47
|
|
|
@@ -54,7 +56,7 @@ def find_element_by_resource_id(
|
|
|
54
56
|
if is_rich_hierarchy:
|
|
55
57
|
return __find_element_by_ressource_id_in_rich_hierarchy(ui_hierarchy, resource_id)
|
|
56
58
|
|
|
57
|
-
def search_recursive(elements: list[dict]) ->
|
|
59
|
+
def search_recursive(elements: list[dict]) -> dict | None:
|
|
58
60
|
for element in elements:
|
|
59
61
|
if isinstance(element, dict):
|
|
60
62
|
if element.get("resourceId") == resource_id:
|
|
@@ -74,6 +76,12 @@ def is_element_focused(element: dict) -> bool:
|
|
|
74
76
|
return element.get("focused", None) == "true"
|
|
75
77
|
|
|
76
78
|
|
|
79
|
+
def get_element_text(element: dict, hint_text: bool = False) -> str | None:
|
|
80
|
+
if hint_text:
|
|
81
|
+
return element.get("hintText", None)
|
|
82
|
+
return element.get("text", None)
|
|
83
|
+
|
|
84
|
+
|
|
77
85
|
class Point(BaseModel):
|
|
78
86
|
x: int
|
|
79
87
|
y: int
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: minitap-mobile-use
|
|
3
|
-
Version: 2.0
|
|
3
|
+
Version: 2.2.0
|
|
4
4
|
Summary: AI-powered multi-agent system that automates real Android and iOS devices through low-level control using LangGraph.
|
|
5
5
|
Author: Pierre-Louis Favreau, Jean-Pierre Lo, Nicolas Dehandschoewercker
|
|
6
6
|
License: MIT License
|
|
@@ -24,11 +24,11 @@ License: MIT License
|
|
|
24
24
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
25
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
26
|
SOFTWARE.
|
|
27
|
-
Requires-Dist: langgraph
|
|
27
|
+
Requires-Dist: langgraph>=0.6.6
|
|
28
28
|
Requires-Dist: adbutils==2.9.3
|
|
29
|
-
Requires-Dist: langchain-google-genai
|
|
30
|
-
Requires-Dist: langchain
|
|
31
|
-
Requires-Dist: langchain-core
|
|
29
|
+
Requires-Dist: langchain-google-genai>=2.1.10
|
|
30
|
+
Requires-Dist: langchain>=0.3.27
|
|
31
|
+
Requires-Dist: langchain-core>=0.3.75
|
|
32
32
|
Requires-Dist: jinja2==3.1.6
|
|
33
33
|
Requires-Dist: python-dotenv==1.1.1
|
|
34
34
|
Requires-Dist: pydantic-settings==2.10.1
|
|
@@ -42,6 +42,7 @@ Requires-Dist: fastapi==0.111.0
|
|
|
42
42
|
Requires-Dist: uvicorn[standard]==0.30.1
|
|
43
43
|
Requires-Dist: colorama>=0.4.6
|
|
44
44
|
Requires-Dist: psutil>=5.9.0
|
|
45
|
+
Requires-Dist: langchain-google-vertexai>=2.0.28
|
|
45
46
|
Requires-Dist: ruff==0.5.3 ; extra == 'dev'
|
|
46
47
|
Requires-Dist: pytest==8.4.1 ; extra == 'dev'
|
|
47
48
|
Requires-Dist: pytest-cov==5.0.0 ; extra == 'dev'
|
|
@@ -69,6 +70,10 @@ Description-Content-Type: text/markdown
|
|
|
69
70
|
<a href="https://x.com/minitap_ai?t=iRWtI497UhRGLeCKYQekig&s=09"><b>Twitter / X</b></a>
|
|
70
71
|
</p>
|
|
71
72
|
|
|
73
|
+
[](https://pypi.org/project/minitap-mobile-use/)
|
|
74
|
+
[](https://www.python.org/downloads/)
|
|
75
|
+
[](https://github.com/minitap-ai/mobile-use/blob/main/LICENSE)
|
|
76
|
+
|
|
72
77
|
</div>
|
|
73
78
|
|
|
74
79
|
Mobile-use is a powerful, open-source AI agent that controls your Android or IOS device using natural language. It understands your commands and interacts with the UI to perform tasks, from sending messages to navigating complex apps.
|
|
@@ -107,11 +112,26 @@ Ready to automate your mobile experience? Follow these steps to get mobile-use u
|
|
|
107
112
|
|
|
108
113
|
2. **(Optional) Customize LLM Configuration:**
|
|
109
114
|
To use different models or providers, create your own LLM configuration file.
|
|
115
|
+
|
|
110
116
|
```bash
|
|
111
117
|
cp llm-config.override.template.jsonc llm-config.override.jsonc
|
|
112
118
|
```
|
|
119
|
+
|
|
113
120
|
Then, edit `llm-config.override.jsonc` to fit your needs.
|
|
114
121
|
|
|
122
|
+
You can also use local LLMs or any other openai-api compatible providers :
|
|
123
|
+
|
|
124
|
+
1. Set `OPENAI_BASE_URL` and `OPENAI_API_KEY` in your `.env`
|
|
125
|
+
2. In your `llm-config.override.jsonc`, set `openai` as the provider for the agent nodes you want, and choose a model supported by your provider.
|
|
126
|
+
|
|
127
|
+
> [!NOTE]
|
|
128
|
+
> If you want to use Google Vertex AI, you must either:
|
|
129
|
+
>
|
|
130
|
+
> - Have credentials configured for your environment (gcloud, workload identity, etc…)
|
|
131
|
+
> - Store the path to a service account JSON file as the GOOGLE_APPLICATION_CREDENTIALS environment variable
|
|
132
|
+
>
|
|
133
|
+
> More information: - [Credential types](https://cloud.google.com/docs/authentication/application-default-credentials#GAC) - [google.auth API reference](https://googleapis.dev/python/google-auth/latest/reference/google.auth.html#module-google.auth)
|
|
134
|
+
|
|
115
135
|
### Quick Launch (Docker)
|
|
116
136
|
|
|
117
137
|
> [!NOTE]
|
|
@@ -257,6 +277,16 @@ python ./src/mobile_use/main.py \
|
|
|
257
277
|
> [!NOTE]
|
|
258
278
|
> If you haven't configured a specific model, mobile-use will prompt you to choose one from the available options.
|
|
259
279
|
|
|
280
|
+
## 🔎 Agentic System Overview
|
|
281
|
+
|
|
282
|
+
<div align="center">
|
|
283
|
+
|
|
284
|
+

|
|
285
|
+
|
|
286
|
+
_This diagram is automatically updated from the codebase. This is our current agentic system architecture._
|
|
287
|
+
|
|
288
|
+
</div>
|
|
289
|
+
|
|
260
290
|
## ❤️ Contributing
|
|
261
291
|
|
|
262
292
|
We love contributions! Whether you're fixing a bug, adding a feature, or improving documentation, your help is welcome. Please read our **[Contributing Guidelines](CONTRIBUTING.md)** to get started.
|