minitap-mobile-use 2.3.0__py3-none-any.whl → 2.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of minitap-mobile-use might be problematic. Click here for more details.
- minitap/mobile_use/agents/contextor/contextor.py +2 -2
- minitap/mobile_use/agents/cortex/cortex.md +49 -8
- minitap/mobile_use/agents/cortex/cortex.py +8 -4
- minitap/mobile_use/agents/executor/executor.md +14 -11
- minitap/mobile_use/agents/executor/executor.py +6 -5
- minitap/mobile_use/agents/hopper/hopper.py +6 -3
- minitap/mobile_use/agents/orchestrator/orchestrator.py +26 -11
- minitap/mobile_use/agents/outputter/outputter.py +6 -3
- minitap/mobile_use/agents/planner/planner.md +20 -22
- minitap/mobile_use/agents/planner/planner.py +10 -7
- minitap/mobile_use/agents/planner/types.py +4 -2
- minitap/mobile_use/agents/planner/utils.py +14 -0
- minitap/mobile_use/agents/summarizer/summarizer.py +2 -2
- minitap/mobile_use/config.py +6 -1
- minitap/mobile_use/context.py +13 -3
- minitap/mobile_use/controllers/mobile_command_controller.py +1 -14
- minitap/mobile_use/graph/state.py +7 -3
- minitap/mobile_use/sdk/agent.py +188 -23
- minitap/mobile_use/sdk/examples/README.md +19 -1
- minitap/mobile_use/sdk/examples/platform_manual_task_example.py +65 -0
- minitap/mobile_use/sdk/examples/platform_minimal_example.py +46 -0
- minitap/mobile_use/sdk/services/platform.py +307 -0
- minitap/mobile_use/sdk/types/__init__.py +16 -14
- minitap/mobile_use/sdk/types/exceptions.py +27 -0
- minitap/mobile_use/sdk/types/platform.py +127 -0
- minitap/mobile_use/sdk/types/task.py +78 -17
- minitap/mobile_use/servers/device_hardware_bridge.py +1 -1
- minitap/mobile_use/servers/stop_servers.py +11 -12
- minitap/mobile_use/services/llm.py +89 -5
- minitap/mobile_use/tools/index.py +0 -6
- minitap/mobile_use/tools/mobile/back.py +3 -3
- minitap/mobile_use/tools/mobile/clear_text.py +24 -43
- minitap/mobile_use/tools/mobile/erase_one_char.py +5 -4
- minitap/mobile_use/tools/mobile/glimpse_screen.py +11 -7
- minitap/mobile_use/tools/mobile/input_text.py +21 -51
- minitap/mobile_use/tools/mobile/launch_app.py +54 -22
- minitap/mobile_use/tools/mobile/long_press_on.py +15 -8
- minitap/mobile_use/tools/mobile/open_link.py +15 -8
- minitap/mobile_use/tools/mobile/press_key.py +15 -8
- minitap/mobile_use/tools/mobile/stop_app.py +14 -8
- minitap/mobile_use/tools/mobile/swipe.py +11 -5
- minitap/mobile_use/tools/mobile/tap.py +103 -21
- minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +3 -3
- minitap/mobile_use/tools/test_utils.py +104 -78
- minitap/mobile_use/tools/types.py +35 -0
- minitap/mobile_use/tools/utils.py +51 -48
- minitap/mobile_use/utils/recorder.py +1 -1
- minitap/mobile_use/utils/ui_hierarchy.py +9 -2
- {minitap_mobile_use-2.3.0.dist-info → minitap_mobile_use-2.5.0.dist-info}/METADATA +3 -1
- minitap_mobile_use-2.5.0.dist-info/RECORD +100 -0
- minitap/mobile_use/tools/mobile/copy_text_from.py +0 -75
- minitap/mobile_use/tools/mobile/find_packages.py +0 -69
- minitap/mobile_use/tools/mobile/paste_text.py +0 -88
- minitap_mobile_use-2.3.0.dist-info/RECORD +0 -98
- {minitap_mobile_use-2.3.0.dist-info → minitap_mobile_use-2.5.0.dist-info}/WHEEL +0 -0
- {minitap_mobile_use-2.3.0.dist-info → minitap_mobile_use-2.5.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,44 +1,76 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
1
3
|
from langchain_core.messages import ToolMessage
|
|
2
4
|
from langchain_core.tools import tool
|
|
3
5
|
from langchain_core.tools.base import InjectedToolCallId
|
|
6
|
+
from langgraph.prebuilt import InjectedState
|
|
4
7
|
from langgraph.types import Command
|
|
8
|
+
|
|
9
|
+
from minitap.mobile_use.agents.hopper.hopper import HopperOutput, hopper
|
|
5
10
|
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
11
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
6
12
|
from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
7
13
|
launch_app as launch_app_controller,
|
|
8
14
|
)
|
|
9
|
-
from minitap.mobile_use.
|
|
10
|
-
from typing import Annotated
|
|
11
|
-
from minitap.mobile_use.context import MobileUseContext
|
|
15
|
+
from minitap.mobile_use.controllers.platform_specific_commands_controller import list_packages
|
|
12
16
|
from minitap.mobile_use.graph.state import State
|
|
13
|
-
from
|
|
17
|
+
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
async def find_package(ctx: MobileUseContext, app_name: str) -> str | None:
|
|
21
|
+
"""
|
|
22
|
+
Finds the package name for a given application name.
|
|
23
|
+
"""
|
|
24
|
+
all_packages = list_packages(ctx=ctx)
|
|
25
|
+
try:
|
|
26
|
+
hopper_output: HopperOutput = await hopper(
|
|
27
|
+
ctx=ctx,
|
|
28
|
+
request=f"I'm looking for the package name of the following app: '{app_name}'",
|
|
29
|
+
data=all_packages,
|
|
30
|
+
)
|
|
31
|
+
# Assuming hopper_output.output directly contains the package name
|
|
32
|
+
return hopper_output.output
|
|
33
|
+
except Exception as e:
|
|
34
|
+
print(f"Failed to find package for '{app_name}': {e}")
|
|
35
|
+
return None
|
|
14
36
|
|
|
15
37
|
|
|
16
38
|
def get_launch_app_tool(ctx: MobileUseContext):
|
|
17
39
|
@tool
|
|
18
|
-
def launch_app(
|
|
40
|
+
async def launch_app(
|
|
19
41
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
20
42
|
state: Annotated[State, InjectedState],
|
|
43
|
+
app_name: str,
|
|
21
44
|
agent_thought: str,
|
|
22
|
-
|
|
23
|
-
):
|
|
45
|
+
) -> Command:
|
|
24
46
|
"""
|
|
25
|
-
|
|
47
|
+
Finds and launches an application on the device using its natural language name.
|
|
26
48
|
"""
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
49
|
+
package_name = await find_package(ctx=ctx, app_name=app_name)
|
|
50
|
+
|
|
51
|
+
if not package_name:
|
|
52
|
+
tool_message = ToolMessage(
|
|
53
|
+
tool_call_id=tool_call_id,
|
|
54
|
+
content=launch_app_wrapper.on_failure_fn(app_name, "Package not found."),
|
|
55
|
+
status="error",
|
|
56
|
+
)
|
|
57
|
+
else:
|
|
58
|
+
output = launch_app_controller(ctx=ctx, package_name=package_name)
|
|
59
|
+
has_failed = output is not None
|
|
60
|
+
tool_message = ToolMessage(
|
|
61
|
+
tool_call_id=tool_call_id,
|
|
62
|
+
content=launch_app_wrapper.on_failure_fn(app_name, output)
|
|
63
|
+
if has_failed
|
|
64
|
+
else launch_app_wrapper.on_success_fn(app_name),
|
|
65
|
+
additional_kwargs={"error": output} if has_failed else {},
|
|
66
|
+
status="error" if has_failed else "success",
|
|
67
|
+
)
|
|
68
|
+
|
|
37
69
|
return Command(
|
|
38
|
-
update=state.
|
|
70
|
+
update=await state.asanitize_update(
|
|
39
71
|
ctx=ctx,
|
|
40
72
|
update={
|
|
41
|
-
"agents_thoughts": [agent_thought],
|
|
73
|
+
"agents_thoughts": [agent_thought, tool_message.content],
|
|
42
74
|
EXECUTOR_MESSAGES_KEY: [tool_message],
|
|
43
75
|
},
|
|
44
76
|
agent="executor",
|
|
@@ -50,6 +82,6 @@ def get_launch_app_tool(ctx: MobileUseContext):
|
|
|
50
82
|
|
|
51
83
|
launch_app_wrapper = ToolWrapper(
|
|
52
84
|
tool_fn_getter=get_launch_app_tool,
|
|
53
|
-
on_success_fn=lambda
|
|
54
|
-
on_failure_fn=lambda
|
|
85
|
+
on_success_fn=lambda app_name: f"App '{app_name}' launched successfully.",
|
|
86
|
+
on_failure_fn=lambda app_name, error: f"Failed to launch app '{app_name}': {error}",
|
|
55
87
|
)
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
1
3
|
from langchain_core.messages import ToolMessage
|
|
2
4
|
from langchain_core.tools import tool
|
|
3
5
|
from langchain_core.tools.base import InjectedToolCallId
|
|
4
6
|
from langgraph.prebuilt import InjectedState
|
|
5
7
|
from langgraph.types import Command
|
|
8
|
+
|
|
6
9
|
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
7
10
|
from minitap.mobile_use.context import MobileUseContext
|
|
8
11
|
from minitap.mobile_use.controllers.mobile_command_controller import SelectorRequest
|
|
@@ -11,37 +14,41 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
|
11
14
|
)
|
|
12
15
|
from minitap.mobile_use.graph.state import State
|
|
13
16
|
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
14
|
-
from typing import Annotated
|
|
15
17
|
|
|
16
18
|
|
|
17
19
|
def get_long_press_on_tool(ctx: MobileUseContext):
|
|
18
20
|
@tool
|
|
19
|
-
def long_press_on(
|
|
21
|
+
async def long_press_on(
|
|
20
22
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
21
23
|
state: Annotated[State, InjectedState],
|
|
22
24
|
agent_thought: str,
|
|
23
25
|
selector_request: SelectorRequest,
|
|
24
26
|
index: int | None = None,
|
|
25
|
-
):
|
|
27
|
+
) -> Command:
|
|
26
28
|
"""
|
|
27
29
|
Long press on a UI element identified by the given selector.
|
|
28
30
|
An index can be specified to select a specific element if multiple are found.
|
|
29
31
|
"""
|
|
30
32
|
output = long_press_on_controller(ctx=ctx, selector_request=selector_request, index=index)
|
|
31
33
|
has_failed = output is not None
|
|
34
|
+
|
|
35
|
+
agent_outcome = (
|
|
36
|
+
long_press_on_wrapper.on_failure_fn()
|
|
37
|
+
if has_failed
|
|
38
|
+
else long_press_on_wrapper.on_success_fn()
|
|
39
|
+
)
|
|
40
|
+
|
|
32
41
|
tool_message = ToolMessage(
|
|
33
42
|
tool_call_id=tool_call_id,
|
|
34
|
-
content=
|
|
35
|
-
if has_failed
|
|
36
|
-
else long_press_on_wrapper.on_success_fn(),
|
|
43
|
+
content=agent_outcome,
|
|
37
44
|
additional_kwargs={"error": output} if has_failed else {},
|
|
38
45
|
status="error" if has_failed else "success",
|
|
39
46
|
)
|
|
40
47
|
return Command(
|
|
41
|
-
update=state.
|
|
48
|
+
update=await state.asanitize_update(
|
|
42
49
|
ctx=ctx,
|
|
43
50
|
update={
|
|
44
|
-
"agents_thoughts": [agent_thought],
|
|
51
|
+
"agents_thoughts": [agent_thought, agent_outcome],
|
|
45
52
|
EXECUTOR_MESSAGES_KEY: [tool_message],
|
|
46
53
|
},
|
|
47
54
|
agent="executor",
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
1
3
|
from langchain_core.messages import ToolMessage
|
|
2
4
|
from langchain_core.tools import tool
|
|
3
5
|
from langchain_core.tools.base import InjectedToolCallId
|
|
4
6
|
from langgraph.prebuilt import InjectedState
|
|
5
7
|
from langgraph.types import Command
|
|
8
|
+
|
|
6
9
|
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
7
10
|
from minitap.mobile_use.context import MobileUseContext
|
|
8
11
|
from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
@@ -10,35 +13,39 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
|
10
13
|
)
|
|
11
14
|
from minitap.mobile_use.graph.state import State
|
|
12
15
|
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
13
|
-
from typing import Annotated
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
def get_open_link_tool(ctx: MobileUseContext):
|
|
17
19
|
@tool
|
|
18
|
-
def open_link(
|
|
20
|
+
async def open_link(
|
|
19
21
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
20
22
|
state: Annotated[State, InjectedState],
|
|
21
23
|
agent_thought: str,
|
|
22
24
|
url: str,
|
|
23
|
-
):
|
|
25
|
+
) -> Command:
|
|
24
26
|
"""
|
|
25
27
|
Open a link on a device (i.e. a deep link).
|
|
26
28
|
"""
|
|
27
29
|
output = open_link_controller(ctx=ctx, url=url)
|
|
28
30
|
has_failed = output is not None
|
|
31
|
+
|
|
32
|
+
agent_outcome = (
|
|
33
|
+
open_link_wrapper.on_failure_fn()
|
|
34
|
+
if has_failed
|
|
35
|
+
else open_link_wrapper.on_success_fn(url)
|
|
36
|
+
)
|
|
37
|
+
|
|
29
38
|
tool_message = ToolMessage(
|
|
30
39
|
tool_call_id=tool_call_id,
|
|
31
|
-
content=
|
|
32
|
-
if has_failed
|
|
33
|
-
else open_link_wrapper.on_success_fn(url),
|
|
40
|
+
content=agent_outcome,
|
|
34
41
|
additional_kwargs={"error": output} if has_failed else {},
|
|
35
42
|
status="error" if has_failed else "success",
|
|
36
43
|
)
|
|
37
44
|
return Command(
|
|
38
|
-
update=state.
|
|
45
|
+
update=await state.asanitize_update(
|
|
39
46
|
ctx=ctx,
|
|
40
47
|
update={
|
|
41
|
-
"agents_thoughts": [agent_thought],
|
|
48
|
+
"agents_thoughts": [agent_thought, agent_outcome],
|
|
42
49
|
EXECUTOR_MESSAGES_KEY: [tool_message],
|
|
43
50
|
},
|
|
44
51
|
agent="executor",
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
1
3
|
from langchain_core.messages import ToolMessage
|
|
2
4
|
from langchain_core.tools import tool
|
|
3
5
|
from langchain_core.tools.base import InjectedToolCallId
|
|
4
6
|
from langgraph.prebuilt import InjectedState
|
|
5
7
|
from langgraph.types import Command
|
|
8
|
+
|
|
6
9
|
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
7
10
|
from minitap.mobile_use.context import MobileUseContext
|
|
8
11
|
from minitap.mobile_use.controllers.mobile_command_controller import Key
|
|
@@ -11,33 +14,37 @@ from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
|
11
14
|
)
|
|
12
15
|
from minitap.mobile_use.graph.state import State
|
|
13
16
|
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
14
|
-
from typing import Annotated
|
|
15
17
|
|
|
16
18
|
|
|
17
19
|
def get_press_key_tool(ctx: MobileUseContext):
|
|
18
20
|
@tool
|
|
19
|
-
def press_key(
|
|
21
|
+
async def press_key(
|
|
20
22
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
21
23
|
state: Annotated[State, InjectedState],
|
|
22
24
|
agent_thought: str,
|
|
23
25
|
key: Key,
|
|
24
|
-
):
|
|
26
|
+
) -> Command:
|
|
25
27
|
"""Press a key on the device."""
|
|
26
28
|
output = press_key_controller(ctx=ctx, key=key)
|
|
27
29
|
has_failed = output is not None
|
|
30
|
+
|
|
31
|
+
agent_outcome = (
|
|
32
|
+
press_key_wrapper.on_failure_fn(key)
|
|
33
|
+
if has_failed
|
|
34
|
+
else press_key_wrapper.on_success_fn(key)
|
|
35
|
+
)
|
|
36
|
+
|
|
28
37
|
tool_message = ToolMessage(
|
|
29
38
|
tool_call_id=tool_call_id,
|
|
30
|
-
content=
|
|
31
|
-
if has_failed
|
|
32
|
-
else press_key_wrapper.on_success_fn(key),
|
|
39
|
+
content=agent_outcome,
|
|
33
40
|
additional_kwargs={"error": output} if has_failed else {},
|
|
34
41
|
status="error" if has_failed else "success",
|
|
35
42
|
)
|
|
36
43
|
return Command(
|
|
37
|
-
update=state.
|
|
44
|
+
update=await state.asanitize_update(
|
|
38
45
|
ctx=ctx,
|
|
39
46
|
update={
|
|
40
|
-
"agents_thoughts": [agent_thought],
|
|
47
|
+
"agents_thoughts": [agent_thought, agent_outcome],
|
|
41
48
|
EXECUTOR_MESSAGES_KEY: [tool_message],
|
|
42
49
|
},
|
|
43
50
|
agent="executor",
|
|
@@ -1,43 +1,49 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
1
3
|
from langchain_core.messages import ToolMessage
|
|
2
4
|
from langchain_core.tools import tool
|
|
3
5
|
from langchain_core.tools.base import InjectedToolCallId
|
|
4
6
|
from langgraph.prebuilt import InjectedState
|
|
5
7
|
from langgraph.types import Command
|
|
8
|
+
|
|
6
9
|
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
7
10
|
from minitap.mobile_use.context import MobileUseContext
|
|
8
11
|
from minitap.mobile_use.controllers.mobile_command_controller import stop_app as stop_app_controller
|
|
9
12
|
from minitap.mobile_use.graph.state import State
|
|
10
13
|
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
11
|
-
from typing import Annotated
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
def get_stop_app_tool(ctx: MobileUseContext):
|
|
15
17
|
@tool
|
|
16
|
-
def stop_app(
|
|
18
|
+
async def stop_app(
|
|
17
19
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
18
20
|
state: Annotated[State, InjectedState],
|
|
19
21
|
agent_thought: str,
|
|
20
22
|
package_name: str | None = None,
|
|
21
|
-
):
|
|
23
|
+
) -> Command:
|
|
22
24
|
"""
|
|
23
25
|
Stops current application if it is running.
|
|
24
26
|
You can also specify the package name of the app to be stopped.
|
|
25
27
|
"""
|
|
26
28
|
output = stop_app_controller(ctx=ctx, package_name=package_name)
|
|
27
29
|
has_failed = output is not None
|
|
30
|
+
|
|
31
|
+
agent_outcome = (
|
|
32
|
+
stop_app_wrapper.on_failure_fn(package_name)
|
|
33
|
+
if has_failed
|
|
34
|
+
else stop_app_wrapper.on_success_fn(package_name)
|
|
35
|
+
)
|
|
28
36
|
tool_message = ToolMessage(
|
|
29
37
|
tool_call_id=tool_call_id,
|
|
30
|
-
content=
|
|
31
|
-
if has_failed
|
|
32
|
-
else stop_app_wrapper.on_success_fn(package_name),
|
|
38
|
+
content=agent_outcome,
|
|
33
39
|
additional_kwargs={"error": output} if has_failed else {},
|
|
34
40
|
status="error" if has_failed else "success",
|
|
35
41
|
)
|
|
36
42
|
return Command(
|
|
37
|
-
update=state.
|
|
43
|
+
update=await state.asanitize_update(
|
|
38
44
|
ctx=ctx,
|
|
39
45
|
update={
|
|
40
|
-
"agents_thoughts": [agent_thought],
|
|
46
|
+
"agents_thoughts": [agent_thought, agent_outcome],
|
|
41
47
|
EXECUTOR_MESSAGES_KEY: [tool_message],
|
|
42
48
|
},
|
|
43
49
|
agent="executor",
|
|
@@ -24,26 +24,32 @@ from minitap.mobile_use.tools.tool_wrapper import CompositeToolWrapper
|
|
|
24
24
|
|
|
25
25
|
def get_swipe_tool(ctx: MobileUseContext) -> BaseTool:
|
|
26
26
|
@tool
|
|
27
|
-
def swipe(
|
|
27
|
+
async def swipe(
|
|
28
28
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
29
29
|
state: Annotated[State, InjectedState],
|
|
30
30
|
agent_thought: str,
|
|
31
31
|
swipe_request: SwipeRequest,
|
|
32
|
-
):
|
|
32
|
+
) -> Command:
|
|
33
33
|
"""Swipes on the screen."""
|
|
34
34
|
output = swipe_controller(ctx=ctx, swipe_request=swipe_request)
|
|
35
35
|
has_failed = output is not None
|
|
36
|
+
|
|
37
|
+
agent_outcome = (
|
|
38
|
+
swipe_wrapper.on_success_fn() if not has_failed else swipe_wrapper.on_failure_fn()
|
|
39
|
+
)
|
|
40
|
+
|
|
36
41
|
tool_message = ToolMessage(
|
|
37
42
|
tool_call_id=tool_call_id,
|
|
38
|
-
content=
|
|
43
|
+
content=agent_outcome,
|
|
39
44
|
additional_kwargs={"error": output} if has_failed else {},
|
|
40
45
|
status="error" if has_failed else "success",
|
|
41
46
|
)
|
|
47
|
+
|
|
42
48
|
return Command(
|
|
43
|
-
update=state.
|
|
49
|
+
update=await state.asanitize_update(
|
|
44
50
|
ctx=ctx,
|
|
45
51
|
update={
|
|
46
|
-
"agents_thoughts": [agent_thought],
|
|
52
|
+
"agents_thoughts": [agent_thought, agent_outcome],
|
|
47
53
|
EXECUTOR_MESSAGES_KEY: [tool_message],
|
|
48
54
|
},
|
|
49
55
|
agent="executor",
|
|
@@ -1,45 +1,132 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
1
3
|
from langchain_core.messages import ToolMessage
|
|
2
4
|
from langchain_core.tools import tool
|
|
3
5
|
from langchain_core.tools.base import InjectedToolCallId
|
|
4
6
|
from langgraph.prebuilt import InjectedState
|
|
5
7
|
from langgraph.types import Command
|
|
8
|
+
|
|
6
9
|
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
7
10
|
from minitap.mobile_use.context import MobileUseContext
|
|
8
|
-
from minitap.mobile_use.controllers.mobile_command_controller import
|
|
11
|
+
from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
12
|
+
CoordinatesSelectorRequest,
|
|
13
|
+
IdSelectorRequest,
|
|
14
|
+
SelectorRequestWithCoordinates,
|
|
15
|
+
TextSelectorRequest,
|
|
16
|
+
)
|
|
9
17
|
from minitap.mobile_use.controllers.mobile_command_controller import tap as tap_controller
|
|
10
18
|
from minitap.mobile_use.graph.state import State
|
|
11
19
|
from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
|
|
12
|
-
from
|
|
20
|
+
from minitap.mobile_use.tools.types import Target
|
|
21
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
22
|
+
|
|
23
|
+
logger = get_logger(__name__)
|
|
13
24
|
|
|
14
25
|
|
|
15
26
|
def get_tap_tool(ctx: MobileUseContext):
|
|
16
27
|
@tool
|
|
17
|
-
def tap(
|
|
28
|
+
async def tap(
|
|
18
29
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
19
30
|
state: Annotated[State, InjectedState],
|
|
20
31
|
agent_thought: str,
|
|
21
|
-
|
|
22
|
-
index: int | None = None,
|
|
32
|
+
target: Target,
|
|
23
33
|
):
|
|
24
34
|
"""
|
|
25
|
-
Taps on a
|
|
26
|
-
|
|
35
|
+
Taps on a UI element identified by the 'target' object.
|
|
36
|
+
|
|
37
|
+
The 'target' object allows specifying an element by its resource_id
|
|
38
|
+
(with an optional index), its coordinates, or its text content (with an optional index).
|
|
39
|
+
The tool uses a fallback strategy, trying the locators in that order.
|
|
27
40
|
"""
|
|
28
|
-
output =
|
|
41
|
+
output = {
|
|
42
|
+
"error": "No valid selector provided or all selectors failed."
|
|
43
|
+
} # Default to failure
|
|
44
|
+
final_selector_info = "N/A"
|
|
45
|
+
|
|
46
|
+
# 1. Try with resource_id
|
|
47
|
+
if target.resource_id:
|
|
48
|
+
try:
|
|
49
|
+
selector = IdSelectorRequest(id=target.resource_id)
|
|
50
|
+
logger.info(
|
|
51
|
+
f"Attempting to tap using resource_id: '{target.resource_id}' "
|
|
52
|
+
f"at index {target.resource_id_index}"
|
|
53
|
+
)
|
|
54
|
+
result = tap_controller(
|
|
55
|
+
ctx=ctx, selector_request=selector, index=target.resource_id_index
|
|
56
|
+
)
|
|
57
|
+
if result is None: # Success
|
|
58
|
+
output = None
|
|
59
|
+
final_selector_info = (
|
|
60
|
+
f"resource_id='{target.resource_id}' (index={target.resource_id_index})"
|
|
61
|
+
)
|
|
62
|
+
else:
|
|
63
|
+
logger.warning(
|
|
64
|
+
f"Tap with resource_id '{target.resource_id}' failed. Error: {result}"
|
|
65
|
+
)
|
|
66
|
+
output = result
|
|
67
|
+
except Exception as e:
|
|
68
|
+
logger.warning(f"Exception during tap with resource_id '{target.resource_id}': {e}")
|
|
69
|
+
output = {"error": str(e)}
|
|
70
|
+
|
|
71
|
+
# 2. If resource_id failed or wasn't provided, try with coordinates
|
|
72
|
+
if output is not None and target.coordinates:
|
|
73
|
+
try:
|
|
74
|
+
center_point = target.coordinates.get_center()
|
|
75
|
+
selector = SelectorRequestWithCoordinates(
|
|
76
|
+
coordinates=CoordinatesSelectorRequest(x=center_point.x, y=center_point.y)
|
|
77
|
+
)
|
|
78
|
+
logger.info(
|
|
79
|
+
f"Attempting to tap using coordinates: {center_point.x},{center_point.y}"
|
|
80
|
+
)
|
|
81
|
+
result = tap_controller(ctx=ctx, selector_request=selector)
|
|
82
|
+
if result is None: # Success
|
|
83
|
+
output = None
|
|
84
|
+
final_selector_info = f"coordinates='{target.coordinates}'"
|
|
85
|
+
else:
|
|
86
|
+
logger.warning(
|
|
87
|
+
f"Tap with coordinates '{target.coordinates}' failed. Error: {result}"
|
|
88
|
+
)
|
|
89
|
+
output = result
|
|
90
|
+
except Exception as e:
|
|
91
|
+
logger.warning(f"Exception during tap with coordinates '{target.coordinates}': {e}")
|
|
92
|
+
output = {"error": str(e)}
|
|
93
|
+
|
|
94
|
+
# 3. If coordinates failed or weren't provided, try with text
|
|
95
|
+
if output is not None and target.text:
|
|
96
|
+
try:
|
|
97
|
+
selector = TextSelectorRequest(text=target.text)
|
|
98
|
+
logger.info(
|
|
99
|
+
f"Attempting to tap using text: '{target.text}' at index {target.text_index}"
|
|
100
|
+
)
|
|
101
|
+
result = tap_controller(ctx=ctx, selector_request=selector, index=target.text_index)
|
|
102
|
+
if result is None: # Success
|
|
103
|
+
output = None
|
|
104
|
+
final_selector_info = f"text='{target.text}' (index={target.text_index})"
|
|
105
|
+
else:
|
|
106
|
+
logger.warning(f"Tap with text '{target.text}' failed. Error: {result}")
|
|
107
|
+
output = result
|
|
108
|
+
except Exception as e:
|
|
109
|
+
logger.warning(f"Exception during tap with text '{target.text}': {e}")
|
|
110
|
+
output = {"error": str(e)}
|
|
111
|
+
|
|
29
112
|
has_failed = output is not None
|
|
113
|
+
agent_outcome = (
|
|
114
|
+
tap_wrapper.on_failure_fn(final_selector_info)
|
|
115
|
+
if has_failed
|
|
116
|
+
else tap_wrapper.on_success_fn(final_selector_info)
|
|
117
|
+
)
|
|
118
|
+
|
|
30
119
|
tool_message = ToolMessage(
|
|
31
120
|
tool_call_id=tool_call_id,
|
|
32
|
-
content=
|
|
33
|
-
if has_failed
|
|
34
|
-
else tap_wrapper.on_success_fn(selector_request, index),
|
|
121
|
+
content=agent_outcome,
|
|
35
122
|
additional_kwargs={"error": output} if has_failed else {},
|
|
36
123
|
status="error" if has_failed else "success",
|
|
37
124
|
)
|
|
38
125
|
return Command(
|
|
39
|
-
update=state.
|
|
126
|
+
update=await state.asanitize_update(
|
|
40
127
|
ctx=ctx,
|
|
41
128
|
update={
|
|
42
|
-
"agents_thoughts": [agent_thought],
|
|
129
|
+
"agents_thoughts": [agent_thought, agent_outcome],
|
|
43
130
|
EXECUTOR_MESSAGES_KEY: [tool_message],
|
|
44
131
|
},
|
|
45
132
|
agent="executor",
|
|
@@ -51,12 +138,7 @@ def get_tap_tool(ctx: MobileUseContext):
|
|
|
51
138
|
|
|
52
139
|
tap_wrapper = ToolWrapper(
|
|
53
140
|
tool_fn_getter=get_tap_tool,
|
|
54
|
-
on_success_fn=
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
),
|
|
58
|
-
on_failure_fn=(
|
|
59
|
-
lambda selector_request,
|
|
60
|
-
index: f"Failed to tap on {selector_request} {'at index {index}' if index else ''}."
|
|
61
|
-
),
|
|
141
|
+
on_success_fn=lambda selector_info: f"Tap on element with {selector_info} was successful.",
|
|
142
|
+
on_failure_fn=lambda selector_info: "Failed to tap on element. "
|
|
143
|
+
+ f"Last attempt was with {selector_info}.",
|
|
62
144
|
)
|
|
@@ -16,12 +16,12 @@ from typing import Annotated
|
|
|
16
16
|
|
|
17
17
|
def get_wait_for_animation_to_end_tool(ctx: MobileUseContext):
|
|
18
18
|
@tool
|
|
19
|
-
def wait_for_animation_to_end(
|
|
19
|
+
async def wait_for_animation_to_end(
|
|
20
20
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
21
21
|
state: Annotated[State, InjectedState],
|
|
22
22
|
agent_thought: str,
|
|
23
23
|
timeout: WaitTimeout | None,
|
|
24
|
-
):
|
|
24
|
+
) -> Command:
|
|
25
25
|
"""
|
|
26
26
|
Waits for ongoing animations or videos to finish before continuing.
|
|
27
27
|
|
|
@@ -44,7 +44,7 @@ def get_wait_for_animation_to_end_tool(ctx: MobileUseContext):
|
|
|
44
44
|
status="error" if has_failed else "success",
|
|
45
45
|
)
|
|
46
46
|
return Command(
|
|
47
|
-
update=state.
|
|
47
|
+
update=await state.asanitize_update(
|
|
48
48
|
ctx=ctx,
|
|
49
49
|
update={
|
|
50
50
|
"agents_thoughts": [agent_thought],
|