minitap-mobile-use 2.3.0__tar.gz → 2.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of minitap-mobile-use might be problematic. Click here for more details.
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/PKG-INFO +3 -1
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/contextor/contextor.py +2 -2
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/cortex/cortex.md +49 -8
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/cortex/cortex.py +8 -4
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/executor/executor.md +14 -11
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/executor/executor.py +6 -5
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/hopper/hopper.py +6 -3
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/orchestrator/orchestrator.py +26 -11
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/outputter/outputter.py +6 -3
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/planner/planner.md +20 -22
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/planner/planner.py +10 -7
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/planner/types.py +4 -2
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/planner/utils.py +14 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/summarizer/summarizer.py +2 -2
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/config.py +6 -1
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/context.py +13 -3
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/controllers/mobile_command_controller.py +1 -14
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/graph/state.py +7 -3
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/agent.py +188 -23
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/examples/README.md +19 -1
- minitap_mobile_use-2.4.0/minitap/mobile_use/sdk/examples/platform_minimal_example.py +46 -0
- minitap_mobile_use-2.4.0/minitap/mobile_use/sdk/services/platform.py +244 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/types/__init__.py +14 -14
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/types/exceptions.py +27 -0
- minitap_mobile_use-2.4.0/minitap/mobile_use/sdk/types/platform.py +125 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/types/task.py +60 -17
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/servers/device_hardware_bridge.py +1 -1
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/servers/stop_servers.py +11 -12
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/services/llm.py +89 -5
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/index.py +0 -6
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/mobile/back.py +3 -3
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/mobile/clear_text.py +24 -43
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/mobile/erase_one_char.py +5 -4
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/mobile/glimpse_screen.py +11 -7
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/mobile/input_text.py +21 -51
- minitap_mobile_use-2.4.0/minitap/mobile_use/tools/mobile/launch_app.py +87 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/mobile/long_press_on.py +15 -8
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/mobile/open_link.py +15 -8
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/mobile/press_key.py +15 -8
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/mobile/stop_app.py +14 -8
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/mobile/swipe.py +11 -5
- minitap_mobile_use-2.4.0/minitap/mobile_use/tools/mobile/tap.py +144 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +3 -3
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/test_utils.py +104 -78
- minitap_mobile_use-2.4.0/minitap/mobile_use/tools/types.py +35 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/utils.py +51 -48
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/recorder.py +1 -1
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/ui_hierarchy.py +9 -2
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/pyproject.toml +5 -2
- minitap_mobile_use-2.3.0/minitap/mobile_use/tools/mobile/copy_text_from.py +0 -75
- minitap_mobile_use-2.3.0/minitap/mobile_use/tools/mobile/find_packages.py +0 -69
- minitap_mobile_use-2.3.0/minitap/mobile_use/tools/mobile/launch_app.py +0 -55
- minitap_mobile_use-2.3.0/minitap/mobile_use/tools/mobile/paste_text.py +0 -88
- minitap_mobile_use-2.3.0/minitap/mobile_use/tools/mobile/tap.py +0 -62
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/LICENSE +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/README.md +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/__init__.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/cortex/types.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/executor/tool_node.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/executor/utils.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/hopper/hopper.md +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/orchestrator/human.md +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/orchestrator/orchestrator.md +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/orchestrator/types.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/outputter/human.md +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/outputter/test_outputter.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/planner/human.md +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/clients/device_hardware_client.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/clients/ios_client.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/clients/screen_api_client.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/constants.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/controllers/__init__.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/controllers/platform_specific_commands_controller.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/graph/graph.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/main.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/__init__.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/builders/__init__.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/builders/agent_config_builder.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/builders/index.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/builders/task_request_builder.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/constants.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/examples/__init__.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/examples/simple_photo_organizer.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/examples/smart_notification_assistant.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/types/agent.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/utils.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/servers/config.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/servers/device_screen_api.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/servers/start_servers.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/servers/utils.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/services/accessibility.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/tool_wrapper.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/cli_helpers.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/cli_selection.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/conversations.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/decorators.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/errors.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/file.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/logger.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/media.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/requests_utils.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/shell_utils.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/test_ui_hierarchy.py +0 -0
- {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/time.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: minitap-mobile-use
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.4.0
|
|
4
4
|
Summary: AI-powered multi-agent system that automates real Android and iOS devices through low-level control using LangGraph.
|
|
5
5
|
Author: Pierre-Louis Favreau, Jean-Pierre Lo, Nicolas Dehandschoewercker
|
|
6
6
|
License: MIT License
|
|
@@ -43,9 +43,11 @@ Requires-Dist: uvicorn[standard]==0.30.1
|
|
|
43
43
|
Requires-Dist: colorama>=0.4.6
|
|
44
44
|
Requires-Dist: psutil>=5.9.0
|
|
45
45
|
Requires-Dist: langchain-google-vertexai>=2.0.28
|
|
46
|
+
Requires-Dist: httpx>=0.28.1
|
|
46
47
|
Requires-Dist: ruff==0.5.3 ; extra == 'dev'
|
|
47
48
|
Requires-Dist: pytest==8.4.1 ; extra == 'dev'
|
|
48
49
|
Requires-Dist: pytest-cov==5.0.0 ; extra == 'dev'
|
|
50
|
+
Requires-Dist: pyright==1.1.405 ; extra == 'dev'
|
|
49
51
|
Requires-Python: >=3.12
|
|
50
52
|
Project-URL: Homepage, https://minitap.ai/
|
|
51
53
|
Project-URL: Source, https://github.com/minitap-ai/mobile-use
|
|
@@ -21,7 +21,7 @@ class ContextorNode:
|
|
|
21
21
|
on_success=lambda _: logger.success("Contextor Agent"),
|
|
22
22
|
on_failure=lambda _: logger.error("Contextor Agent"),
|
|
23
23
|
)
|
|
24
|
-
def __call__(self, state: State):
|
|
24
|
+
async def __call__(self, state: State):
|
|
25
25
|
device_data = get_screen_data(self.ctx.screen_api_client)
|
|
26
26
|
focused_app_info = get_focused_app_info(self.ctx)
|
|
27
27
|
device_date = get_device_date(self.ctx)
|
|
@@ -30,7 +30,7 @@ class ContextorNode:
|
|
|
30
30
|
list(state.executor_messages)
|
|
31
31
|
)
|
|
32
32
|
|
|
33
|
-
return state.
|
|
33
|
+
return await state.asanitize_update(
|
|
34
34
|
ctx=self.ctx,
|
|
35
35
|
update={
|
|
36
36
|
"latest_screenshot_base64": device_data.base64
|
{minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/cortex/cortex.md
RENAMED
|
@@ -31,6 +31,12 @@ To understand the device state, you have two senses, each with its purpose:
|
|
|
31
31
|
* **Golden Rule:** When the UI hierarchy is ambiguous, seems incomplete, or when you need to verify a visual detail before acting, **`glimpse_screen` is always the most effective and reliable action.** Never guess what the screen looks like; use your sight to be sure.
|
|
32
32
|
|
|
33
33
|
**CRITICAL NOTE ON SIGHT:** The visual information from `glimpse_screen` is **ephemeral**. It is available for **THIS decision turn ONLY**. You MUST extract all necessary information from it IMMEDIATELY, as it will be cleared before the next step.
|
|
34
|
+
|
|
35
|
+
### CRITICAL ACTION DIRECTIVES
|
|
36
|
+
|
|
37
|
+
- **To open an application, you MUST use the `launch_app` tool.** Provide the natural language name of the app (e.g., "Uber Eats"). Do NOT attempt to open apps manually by swiping to the app drawer and searching. The `launch_app` tool is the fastest and most reliable method.
|
|
38
|
+
- **To open URLs/links, you MUST use the `open_link` tool.** This handles all links, including deep links, correctly.
|
|
39
|
+
|
|
34
40
|
### Context You Receive:
|
|
35
41
|
|
|
36
42
|
- 📱 **Device state**:
|
|
@@ -75,13 +81,32 @@ Focus on the **current PENDING subgoal and the next subgoals not yet started**.
|
|
|
75
81
|
|
|
76
82
|
**You MUST follow it for every element interaction.**
|
|
77
83
|
|
|
78
|
-
When you target a UI element (for a `tap`, `input_text`, `clear_text`, etc.), you **MUST** provide a comprehensive target object containing every piece of information you can find about
|
|
84
|
+
When you target a UI element (for a `tap`, `input_text`, `clear_text`, etc.), you **MUST** provide a comprehensive `target` object containing every piece of information you can find about **that single element**.
|
|
79
85
|
|
|
80
86
|
* **1. `resource_id`**: Include this if it is present in the UI hierarchy.
|
|
81
|
-
* **2. `
|
|
82
|
-
* **3. `
|
|
87
|
+
* **2. `resource_id_index`**: If there are multiple elements with the same `resource_id`, provide the zero-based index of the specific one you are targeting.
|
|
88
|
+
* **3. `coordinates`**: Include the full bounds (`x`, `y`, `width`, `height`) if they are available.
|
|
89
|
+
* **4. `text`**: Include the *current text* content of the element (e.g., placeholder text for an input).
|
|
90
|
+
* **5. `text_index`**: If there are multiple elements with the same `text`, provide the zero-based index of the specific one you are targeting.
|
|
91
|
+
|
|
92
|
+
**CRITICAL: The index must correspond to its identifier.** `resource_id_index` is only used when targeting by `resource_id`. `text_index` is only used when targeting by `text`. This ensures the fallback logic targets the correct element.
|
|
93
|
+
|
|
94
|
+
**This is NOT optional.** Providing all locators if we have, it is the foundation of the system's reliability. It allows next steps to use a fallback mechanism: if the ID fails, it tries the coordinates, etc. Failing to provide this complete context will lead to action failures.
|
|
95
|
+
|
|
96
|
+
### The Rule of Unpredictable Actions
|
|
83
97
|
|
|
84
|
-
|
|
98
|
+
Certain actions have outcomes that can significantly and sometimes unpredictably change the UI. These include:
|
|
99
|
+
- `back`
|
|
100
|
+
- `launch_app`
|
|
101
|
+
- `stop_app`
|
|
102
|
+
- `open_link`
|
|
103
|
+
- `tap` on an element that is clearly for navigation (e.g., a "Back" button, a menu item, a link to another screen).
|
|
104
|
+
|
|
105
|
+
**CRITICAL RULE: If your decision includes one of these unpredictable actions, it MUST be the only action in your `Structured Decisions` for this turn. Else, use flows to group actions together.**
|
|
106
|
+
|
|
107
|
+
This is not optional. Failing to isolate these actions will cause the system to act on an outdated understanding of the screen, leading to catastrophic errors. For example, after a `back` command, you MUST wait to see the new screen before deciding what to tap next.
|
|
108
|
+
|
|
109
|
+
You may only group simple, predictable actions together, such as tapping a text field and then immediately typing into it (`tap` followed by `input_text`).
|
|
85
110
|
|
|
86
111
|
### Outputting Your Decisions
|
|
87
112
|
|
|
@@ -90,8 +115,8 @@ If you decide to act, output a **valid JSON stringified structured set of instru
|
|
|
90
115
|
- These must be **concrete low-level actions**.
|
|
91
116
|
- The executor has the following available tools: {{ executor_tools_list }}.
|
|
92
117
|
- Your goal is to achieve subgoals **fast** - so you must put as much actions as possible in your instructions to complete all achievable subgoals (based on your observations) in one go.
|
|
93
|
-
-
|
|
94
|
-
-
|
|
118
|
+
- If you refer to a UI element or coordinates, specify it clearly (e.g., `resource-id: com.whatsapp:id/search`, `resource-id-index: 0`, `text: "Alice"`, `resource-id-index: 0`, `x: 100, y: 200, width: 100, height: 100`).
|
|
119
|
+
- **The structure is up to you**, but it must be valid **JSON stringified output**. You will accompany this output with a **natural-language summary** of your reasoning and approach in your agent thought.
|
|
95
120
|
- **Always use a single `input_text` action** to type in a field. This tool handles focusing the element and placing the cursor correctly. If the tool feedback indicates verification is needed or shows None/empty content, perform verification before proceeding.
|
|
96
121
|
- **Only reference UI element IDs or visible texts that are explicitly present in the provided UI hierarchy or screenshot. Do not invent, infer, or guess any IDs or texts that are not directly observed**.
|
|
97
122
|
- **For text clearing**: When you need to completely clear text from an input field, always call the `clear_text` tool with the correct resource_id. This tool automatically focuses the element, and ensures the field is emptied. If you notice this tool fails to clear the text, try to long press the input, select all, and call `erase_one_char`.
|
|
@@ -116,7 +141,23 @@ If you decide to act, output a **valid JSON stringified structured set of instru
|
|
|
116
141
|
|
|
117
142
|
---
|
|
118
143
|
|
|
119
|
-
### Example
|
|
144
|
+
### Example 1
|
|
145
|
+
|
|
146
|
+
#### Current Subgoal:
|
|
147
|
+
|
|
148
|
+
> "Open WhatsApp"
|
|
149
|
+
|
|
150
|
+
#### Structured Decisions:
|
|
151
|
+
|
|
152
|
+
```text
|
|
153
|
+
"{\"action\": \"launch_app\", \"app_name\": \"WhatsApp\"}"
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
#### Agent Thought:
|
|
157
|
+
|
|
158
|
+
> I need to launch the WhatsApp app. I will use the `launch_app` tool to open it.
|
|
159
|
+
|
|
160
|
+
### Exemple 2
|
|
120
161
|
|
|
121
162
|
#### Current Subgoal:
|
|
122
163
|
|
|
@@ -125,7 +166,7 @@ If you decide to act, output a **valid JSON stringified structured set of instru
|
|
|
125
166
|
#### Structured Decisions:
|
|
126
167
|
|
|
127
168
|
```text
|
|
128
|
-
"{\"action\": \"tap\", \"target\": {\"
|
|
169
|
+
"[{\"action\": \"tap\", \"target\": {\"resource_id\": \"com.whatsapp:id/menuitem_search\", \"resource_id_index\": 1, \"text\": \"Search\", \"text_index\": 0, \"coordinates\": {\"x\": 880, \"y\": 150, \"width\": 120, \"height\": 120}}}]"
|
|
129
170
|
```
|
|
130
171
|
|
|
131
172
|
#### Agent Thought:
|
{minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/cortex/cortex.py
RENAMED
|
@@ -16,7 +16,7 @@ from minitap.mobile_use.agents.planner.utils import get_current_subgoal
|
|
|
16
16
|
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
17
17
|
from minitap.mobile_use.context import MobileUseContext
|
|
18
18
|
from minitap.mobile_use.graph.state import State
|
|
19
|
-
from minitap.mobile_use.services.llm import get_llm, with_fallback
|
|
19
|
+
from minitap.mobile_use.services.llm import get_llm, invoke_llm_with_timeout_message, with_fallback
|
|
20
20
|
from minitap.mobile_use.tools.index import EXECUTOR_WRAPPERS_TOOLS, format_tools_list
|
|
21
21
|
from minitap.mobile_use.utils.conversations import get_screenshot_message_for_llm
|
|
22
22
|
from minitap.mobile_use.utils.decorators import wrap_with_callbacks
|
|
@@ -78,8 +78,12 @@ class CortexNode:
|
|
|
78
78
|
ctx=self.ctx, name="cortex", use_fallback=True, temperature=1
|
|
79
79
|
).with_structured_output(CortexOutput)
|
|
80
80
|
response: CortexOutput = await with_fallback(
|
|
81
|
-
main_call=lambda:
|
|
82
|
-
|
|
81
|
+
main_call=lambda: invoke_llm_with_timeout_message(
|
|
82
|
+
llm.ainvoke(messages), agent_name="Cortex"
|
|
83
|
+
),
|
|
84
|
+
fallback_call=lambda: invoke_llm_with_timeout_message(
|
|
85
|
+
llm_fallback.ainvoke(messages), agent_name="Cortex (Fallback)"
|
|
86
|
+
),
|
|
83
87
|
) # type: ignore
|
|
84
88
|
|
|
85
89
|
is_subgoal_completed = (
|
|
@@ -90,7 +94,7 @@ class CortexNode:
|
|
|
90
94
|
if not is_subgoal_completed:
|
|
91
95
|
response.complete_subgoals_by_ids = []
|
|
92
96
|
|
|
93
|
-
return state.
|
|
97
|
+
return await state.asanitize_update(
|
|
94
98
|
ctx=self.ctx,
|
|
95
99
|
update={
|
|
96
100
|
"agents_thoughts": [response.agent_thought],
|
{minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/executor/executor.md
RENAMED
|
@@ -25,12 +25,7 @@ and your previous actions, you must:
|
|
|
25
25
|
"I'm tapping on the chat item labeled 'Alice' to open the conversation."
|
|
26
26
|
|
|
27
27
|
```json
|
|
28
|
-
{
|
|
29
|
-
"action": "tap",
|
|
30
|
-
"target": {
|
|
31
|
-
"resource_id": "com.whatsapp:id/conversation_item"
|
|
32
|
-
}
|
|
33
|
-
}
|
|
28
|
+
"[{\"tool_name\": \"tap\", \"arguments\": {\"target\": {\"resource_id\": \"com.whatsapp:id/conversation_item\", \"resource_id_index\": 0, \"text\": \"Alice\", \"text_index\": 0, \"coordinates\": {\"x\": 0, \"y\": 350, \"width\": 1080, \"height\": 80}}}}]"
|
|
34
29
|
```
|
|
35
30
|
|
|
36
31
|
**→ Executor Action**:
|
|
@@ -38,13 +33,17 @@ and your previous actions, you must:
|
|
|
38
33
|
Call the `tap_on_element` tool with:
|
|
39
34
|
|
|
40
35
|
- `resource_id = "com.whatsapp:id/conversation_item"`
|
|
36
|
+
- `resource_id_index = 0`
|
|
37
|
+
- `text = "Alice"`
|
|
38
|
+
- `text_index = 0`
|
|
39
|
+
- `coordinates = {"x": 0, "y": 350, "width": 1080, "height": 80}`
|
|
41
40
|
- `agent_thought = "I'm tapping on the chat item labeled 'Alice' to open the conversation."`
|
|
42
41
|
|
|
43
42
|
---
|
|
44
43
|
|
|
45
44
|
### ⚙️ Tools
|
|
46
45
|
|
|
47
|
-
- Tools may include actions like: `tap`, `swipe`, `
|
|
46
|
+
- Tools may include actions like: `tap`, `swipe`, `launch_app`, `stop_app`, etc.
|
|
48
47
|
- You **must not hardcode tool definitions** here.
|
|
49
48
|
- Just use the right tool based on what the `structured_decisions` requires.
|
|
50
49
|
- The tools are provided dynamically via LangGraph's tool binding mechanism.
|
|
@@ -53,10 +52,12 @@ Call the `tap_on_element` tool with:
|
|
|
53
52
|
|
|
54
53
|
When using the `input_text` tool:
|
|
55
54
|
|
|
56
|
-
- **Provide all available information**
|
|
57
|
-
- `
|
|
58
|
-
- `
|
|
59
|
-
- `
|
|
55
|
+
- **Provide all available information** in the target object to identify text input element
|
|
56
|
+
- `resource_id`: The resource ID of the text input element (when available)
|
|
57
|
+
- `resource_id_index`: The zero-based index of the specific resource ID you are targeting (when available)
|
|
58
|
+
- `text`: The current text content of the text input element (when available)
|
|
59
|
+
- `text_index`: The zero-based index of the specific text you are targeting (when available)
|
|
60
|
+
- `coordinates`: The bounds (ElementBounds) of the text input element (when available)
|
|
60
61
|
|
|
61
62
|
- The tool will automatically:
|
|
62
63
|
|
|
@@ -64,6 +65,8 @@ When using the `input_text` tool:
|
|
|
64
65
|
2. **Move the cursor to the end** of the existing text
|
|
65
66
|
3. **Then type the new text**
|
|
66
67
|
|
|
68
|
+
- **Important**: Special characters and markdown-like escape sequences (e.g., \n, \t, *, _) are not interpreted. For example, typing \n will insert the literal characters \ and n, not a line break.
|
|
69
|
+
|
|
67
70
|
#### 🔄 Text Clearing Best Practice
|
|
68
71
|
|
|
69
72
|
When you need to completely clear text from an input field, always use the clear_text tool with the correct resource_id.
|
{minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/executor/executor.py
RENAMED
|
@@ -8,7 +8,7 @@ from langchain_google_vertexai.chat_models import ChatVertexAI
|
|
|
8
8
|
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
9
9
|
from minitap.mobile_use.context import MobileUseContext
|
|
10
10
|
from minitap.mobile_use.graph.state import State
|
|
11
|
-
from minitap.mobile_use.services.llm import get_llm
|
|
11
|
+
from minitap.mobile_use.services.llm import get_llm, invoke_llm_with_timeout_message
|
|
12
12
|
from minitap.mobile_use.tools.index import EXECUTOR_WRAPPERS_TOOLS, get_tools_from_wrappers
|
|
13
13
|
from minitap.mobile_use.utils.decorators import wrap_with_callbacks
|
|
14
14
|
from minitap.mobile_use.utils.logger import get_logger
|
|
@@ -29,7 +29,7 @@ class ExecutorNode:
|
|
|
29
29
|
structured_decisions = state.structured_decisions
|
|
30
30
|
if not structured_decisions:
|
|
31
31
|
logger.warning("No structured decisions found.")
|
|
32
|
-
return state.
|
|
32
|
+
return await state.asanitize_update(
|
|
33
33
|
ctx=self.ctx,
|
|
34
34
|
update={
|
|
35
35
|
"agents_thoughts": [
|
|
@@ -62,9 +62,10 @@ class ExecutorNode:
|
|
|
62
62
|
llm_bind_tools_kwargs["parallel_tool_calls"] = True
|
|
63
63
|
|
|
64
64
|
llm = llm.bind_tools(**llm_bind_tools_kwargs)
|
|
65
|
-
response = await
|
|
66
|
-
|
|
67
|
-
|
|
65
|
+
response = await invoke_llm_with_timeout_message(
|
|
66
|
+
llm.ainvoke(messages), agent_name="Executor"
|
|
67
|
+
)
|
|
68
|
+
return await state.asanitize_update(
|
|
68
69
|
ctx=self.ctx,
|
|
69
70
|
update={
|
|
70
71
|
"cortex_last_thought": cortex_last_thought,
|
{minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/hopper/hopper.py
RENAMED
|
@@ -2,10 +2,11 @@ from pathlib import Path
|
|
|
2
2
|
|
|
3
3
|
from jinja2 import Template
|
|
4
4
|
from langchain_core.messages import HumanMessage, SystemMessage
|
|
5
|
-
from minitap.mobile_use.context import MobileUseContext
|
|
6
|
-
from minitap.mobile_use.services.llm import get_llm
|
|
7
5
|
from pydantic import BaseModel, Field
|
|
8
6
|
|
|
7
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
8
|
+
from minitap.mobile_use.services.llm import get_llm, invoke_llm_with_timeout_message
|
|
9
|
+
|
|
9
10
|
|
|
10
11
|
class HopperOutput(BaseModel):
|
|
11
12
|
step: str = Field(
|
|
@@ -33,7 +34,9 @@ async def hopper(
|
|
|
33
34
|
|
|
34
35
|
llm = get_llm(ctx=ctx, name="hopper", is_utils=True, temperature=0)
|
|
35
36
|
structured_llm = llm.with_structured_output(HopperOutput)
|
|
36
|
-
response: HopperOutput = await
|
|
37
|
+
response: HopperOutput = await invoke_llm_with_timeout_message(
|
|
38
|
+
structured_llm.ainvoke(messages), agent_name="Hopper"
|
|
39
|
+
) # type: ignore
|
|
37
40
|
return HopperOutput(
|
|
38
41
|
step=response.step,
|
|
39
42
|
output=response.output,
|
|
@@ -15,7 +15,7 @@ from minitap.mobile_use.agents.planner.utils import (
|
|
|
15
15
|
)
|
|
16
16
|
from minitap.mobile_use.context import MobileUseContext
|
|
17
17
|
from minitap.mobile_use.graph.state import State
|
|
18
|
-
from minitap.mobile_use.services.llm import get_llm
|
|
18
|
+
from minitap.mobile_use.services.llm import get_llm, invoke_llm_with_timeout_message
|
|
19
19
|
from minitap.mobile_use.utils.decorators import wrap_with_callbacks
|
|
20
20
|
from minitap.mobile_use.utils.logger import get_logger
|
|
21
21
|
|
|
@@ -45,14 +45,18 @@ class OrchestratorNode:
|
|
|
45
45
|
else f"Starting the next subgoal: {new_subgoal}"
|
|
46
46
|
)
|
|
47
47
|
]
|
|
48
|
-
return _get_state_update(
|
|
48
|
+
return await _get_state_update(
|
|
49
|
+
ctx=self.ctx, state=state, thoughts=thoughts, update_plan=True
|
|
50
|
+
)
|
|
49
51
|
|
|
50
52
|
subgoals_to_examine = get_subgoals_by_ids(
|
|
51
53
|
subgoals=state.subgoal_plan,
|
|
52
54
|
ids=state.complete_subgoals_by_ids,
|
|
53
55
|
)
|
|
54
56
|
if len(subgoals_to_examine) <= 0:
|
|
55
|
-
return _get_state_update(
|
|
57
|
+
return await _get_state_update(
|
|
58
|
+
ctx=self.ctx, state=state, thoughts=["No subgoal to examine."]
|
|
59
|
+
)
|
|
56
60
|
|
|
57
61
|
system_message = Template(
|
|
58
62
|
Path(__file__).parent.joinpath("orchestrator.md").read_text(encoding="utf-8")
|
|
@@ -72,13 +76,16 @@ class OrchestratorNode:
|
|
|
72
76
|
|
|
73
77
|
llm = get_llm(ctx=self.ctx, name="orchestrator", temperature=1)
|
|
74
78
|
llm = llm.with_structured_output(OrchestratorOutput)
|
|
75
|
-
response: OrchestratorOutput = await
|
|
76
|
-
|
|
79
|
+
response: OrchestratorOutput = await invoke_llm_with_timeout_message(
|
|
80
|
+
llm.ainvoke(messages), agent_name="Orchestrator"
|
|
81
|
+
) # type: ignore
|
|
77
82
|
if response.needs_replaning:
|
|
78
83
|
thoughts = [response.reason]
|
|
79
84
|
state.subgoal_plan = fail_current_subgoal(state.subgoal_plan)
|
|
80
85
|
thoughts.append("==== END OF PLAN, REPLANNING ====")
|
|
81
|
-
return _get_state_update(
|
|
86
|
+
return await _get_state_update(
|
|
87
|
+
ctx=self.ctx, state=state, thoughts=thoughts, update_plan=True
|
|
88
|
+
)
|
|
82
89
|
|
|
83
90
|
state.subgoal_plan = complete_subgoals_by_ids(
|
|
84
91
|
subgoals=state.subgoal_plan,
|
|
@@ -87,19 +94,25 @@ class OrchestratorNode:
|
|
|
87
94
|
thoughts = [response.reason]
|
|
88
95
|
if all_completed(state.subgoal_plan):
|
|
89
96
|
logger.success("All the subgoals have been completed successfully.")
|
|
90
|
-
return _get_state_update(
|
|
97
|
+
return await _get_state_update(
|
|
98
|
+
ctx=self.ctx, state=state, thoughts=thoughts, update_plan=True
|
|
99
|
+
)
|
|
91
100
|
|
|
92
101
|
if current_subgoal.id not in response.completed_subgoal_ids:
|
|
93
102
|
# The current subgoal is not yet complete.
|
|
94
|
-
return _get_state_update(
|
|
103
|
+
return await _get_state_update(
|
|
104
|
+
ctx=self.ctx, state=state, thoughts=thoughts, update_plan=True
|
|
105
|
+
)
|
|
95
106
|
|
|
96
107
|
state.subgoal_plan = start_next_subgoal(state.subgoal_plan)
|
|
97
108
|
new_subgoal = get_current_subgoal(state.subgoal_plan)
|
|
98
109
|
thoughts.append(f"==== NEXT SUBGOAL: {new_subgoal} ====")
|
|
99
|
-
return _get_state_update(
|
|
110
|
+
return await _get_state_update(
|
|
111
|
+
ctx=self.ctx, state=state, thoughts=thoughts, update_plan=True
|
|
112
|
+
)
|
|
100
113
|
|
|
101
114
|
|
|
102
|
-
def _get_state_update(
|
|
115
|
+
async def _get_state_update(
|
|
103
116
|
ctx: MobileUseContext,
|
|
104
117
|
state: State,
|
|
105
118
|
thoughts: list[str],
|
|
@@ -111,4 +124,6 @@ def _get_state_update(
|
|
|
111
124
|
}
|
|
112
125
|
if update_plan:
|
|
113
126
|
update["subgoal_plan"] = state.subgoal_plan
|
|
114
|
-
|
|
127
|
+
if ctx.on_plan_changes:
|
|
128
|
+
await ctx.on_plan_changes(state.subgoal_plan, False)
|
|
129
|
+
return await state.asanitize_update(ctx=ctx, update=update, agent="orchestrator")
|
|
@@ -3,13 +3,14 @@ from pathlib import Path
|
|
|
3
3
|
|
|
4
4
|
from jinja2 import Template
|
|
5
5
|
from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
|
|
6
8
|
from minitap.mobile_use.config import OutputConfig
|
|
7
9
|
from minitap.mobile_use.context import MobileUseContext
|
|
8
10
|
from minitap.mobile_use.graph.state import State
|
|
9
|
-
from minitap.mobile_use.services.llm import get_llm
|
|
11
|
+
from minitap.mobile_use.services.llm import get_llm, invoke_llm_with_timeout_message
|
|
10
12
|
from minitap.mobile_use.utils.conversations import is_ai_message
|
|
11
13
|
from minitap.mobile_use.utils.logger import get_logger
|
|
12
|
-
from pydantic import BaseModel
|
|
13
14
|
|
|
14
15
|
logger = get_logger(__name__)
|
|
15
16
|
|
|
@@ -61,7 +62,9 @@ async def outputter(
|
|
|
61
62
|
if schema is not None:
|
|
62
63
|
structured_llm = llm.with_structured_output(schema)
|
|
63
64
|
|
|
64
|
-
response = await
|
|
65
|
+
response = await invoke_llm_with_timeout_message(
|
|
66
|
+
structured_llm.ainvoke(messages), agent_name="Outputter"
|
|
67
|
+
) # type: ignore
|
|
65
68
|
if isinstance(response, BaseModel):
|
|
66
69
|
if output_config.output_description and hasattr(response, "content"):
|
|
67
70
|
response = json.loads(response.content) # type: ignore
|
{minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/planner/planner.md
RENAMED
|
@@ -9,12 +9,13 @@ You work like an agile tech lead: defining the key milestones without locking in
|
|
|
9
9
|
Given the **user's goal**:
|
|
10
10
|
|
|
11
11
|
- Create a **high-level sequence of subgoals** to complete that goal.
|
|
12
|
-
- Subgoals should reflect real interactions with mobile UIs (e.g
|
|
12
|
+
- Subgoals should reflect real interactions with mobile UIs and describe the intent of the action (e.g., "Open the app to find a contact," "View the image to extract information," "Send a message to Bob confirming the appointment").
|
|
13
|
+
- Focus on the goal of the interaction, not just the physical action. For example, instead of 'View the receipt,' a better subgoal is 'Open and analyze the receipt to identify transactions.
|
|
13
14
|
- Don't assume the full UI is visible yet. Plan based on how most mobile apps work, and keep flexibility.
|
|
14
|
-
- List of agents thoughts is empty which is expected, since it is the first plan.
|
|
15
|
-
- Avoid too granular UI actions based tasks (e.g. "tap", "swipe", "copy", "paste") unless explicitly required.
|
|
16
15
|
- The executor has the following available tools: {{ executor_tools_list }}.
|
|
17
16
|
When one of these tools offers a direct shortcut (e.g. `openLink` instead of manually launching a browser and typing a URL), prefer it over decomposed manual steps.
|
|
17
|
+
- Ensure that each subgoal prepares the ground for the next. If data needs to be gathered in one step to be used in another, the subgoal should reflect the intent to gather that data.
|
|
18
|
+
|
|
18
19
|
|
|
19
20
|
2. **Replanning**
|
|
20
21
|
If you're asked to **revise a previous plan**, you'll also receive:
|
|
@@ -27,38 +28,35 @@ You work like an agile tech lead: defining the key milestones without locking in
|
|
|
27
28
|
|
|
28
29
|
### Output
|
|
29
30
|
|
|
30
|
-
You must output a **list of subgoals (description
|
|
31
|
+
You must output a **list of subgoals (description)**, each representing a clear subgoal.
|
|
31
32
|
Each subgoal should be:
|
|
32
33
|
|
|
33
|
-
- Focused on **
|
|
34
|
+
- Focused on **purpose-driven mobile interactions** that clearly state the intent
|
|
34
35
|
- Neither too vague nor too granular
|
|
35
36
|
- Sequential (later steps may depend on earlier ones)
|
|
36
37
|
- Don't use loop-like formulation unless necessary (e.g. don't say "repeat this X times", instead reuse the same steps X times as subgoals)
|
|
37
38
|
|
|
38
|
-
If you're replaning and need to keep a previous subgoal, you **must keep the same subgoal ID**.
|
|
39
|
-
|
|
40
39
|
### Examples
|
|
41
40
|
|
|
42
|
-
#### **Initial Goal**: "
|
|
41
|
+
#### **Initial Goal**: "Go on https://tesla.com, and tell me what is the first car being displayed"
|
|
43
42
|
|
|
44
43
|
**Plan**:
|
|
45
44
|
|
|
46
|
-
- Open the
|
|
47
|
-
-
|
|
48
|
-
- Open the conversation with Alice (ID: None)
|
|
49
|
-
- Type the message "I’m running late" (ID: None)
|
|
50
|
-
- Send the message (ID: None)
|
|
45
|
+
- Open the link https://tesla.com to find information
|
|
46
|
+
- Analyze the home page to identify the first car displayed
|
|
51
47
|
|
|
52
|
-
#### **Initial Goal**: "
|
|
48
|
+
#### **Initial Goal**: "Open WhatsApp and send 'I’m running late' to Alice"
|
|
53
49
|
|
|
54
50
|
**Plan**:
|
|
55
51
|
|
|
56
|
-
- Open the
|
|
57
|
-
-
|
|
52
|
+
- Open the WhatsApp app to find the contact "Alice"
|
|
53
|
+
- Open the conversation with Alice to send a message
|
|
54
|
+
- Type the message "I’m running late" into the message field
|
|
55
|
+
- Send the message
|
|
58
56
|
|
|
59
57
|
#### **Replanning Example**
|
|
60
58
|
|
|
61
|
-
**Original Plan**: same as above
|
|
59
|
+
**Original Plan**: same as above
|
|
62
60
|
**Agent Thoughts**:
|
|
63
61
|
|
|
64
62
|
- Couldn't find Alice in recent chats
|
|
@@ -67,8 +65,8 @@ If you're replaning and need to keep a previous subgoal, you **must keep the sam
|
|
|
67
65
|
|
|
68
66
|
**New Plan**:
|
|
69
67
|
|
|
70
|
-
- Open WhatsApp
|
|
71
|
-
- Tap the search bar
|
|
72
|
-
- Search for "Alice"
|
|
73
|
-
- Select the correct chat
|
|
74
|
-
- Type and send "I’m running late"
|
|
68
|
+
- Open WhatsApp
|
|
69
|
+
- Tap the search bar to find a contact
|
|
70
|
+
- Search for "Alice" in the search field
|
|
71
|
+
- Select the correct chat to open the conversation
|
|
72
|
+
- Type and send "I’m running late"
|
{minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/planner/planner.py
RENAMED
|
@@ -1,14 +1,13 @@
|
|
|
1
|
-
import uuid
|
|
2
1
|
from pathlib import Path
|
|
3
2
|
|
|
4
3
|
from jinja2 import Template
|
|
5
4
|
from langchain_core.messages import HumanMessage, SystemMessage
|
|
6
5
|
|
|
7
6
|
from minitap.mobile_use.agents.planner.types import PlannerOutput, Subgoal, SubgoalStatus
|
|
8
|
-
from minitap.mobile_use.agents.planner.utils import one_of_them_is_failure
|
|
7
|
+
from minitap.mobile_use.agents.planner.utils import generate_id, one_of_them_is_failure
|
|
9
8
|
from minitap.mobile_use.context import MobileUseContext
|
|
10
9
|
from minitap.mobile_use.graph.state import State
|
|
11
|
-
from minitap.mobile_use.services.llm import get_llm
|
|
10
|
+
from minitap.mobile_use.services.llm import get_llm, invoke_llm_with_timeout_message
|
|
12
11
|
from minitap.mobile_use.tools.index import EXECUTOR_WRAPPERS_TOOLS, format_tools_list
|
|
13
12
|
from minitap.mobile_use.utils.decorators import wrap_with_callbacks
|
|
14
13
|
from minitap.mobile_use.utils.logger import get_logger
|
|
@@ -49,11 +48,12 @@ class PlannerNode:
|
|
|
49
48
|
|
|
50
49
|
llm = get_llm(ctx=self.ctx, name="planner")
|
|
51
50
|
llm = llm.with_structured_output(PlannerOutput)
|
|
52
|
-
response: PlannerOutput = await
|
|
53
|
-
|
|
51
|
+
response: PlannerOutput = await invoke_llm_with_timeout_message(
|
|
52
|
+
llm.ainvoke(messages), agent_name="Planner"
|
|
53
|
+
) # type: ignore
|
|
54
54
|
subgoals_plan = [
|
|
55
55
|
Subgoal(
|
|
56
|
-
id=
|
|
56
|
+
id=generate_id(),
|
|
57
57
|
description=subgoal.description,
|
|
58
58
|
status=SubgoalStatus.NOT_STARTED,
|
|
59
59
|
completion_reason=None,
|
|
@@ -63,7 +63,10 @@ class PlannerNode:
|
|
|
63
63
|
logger.info("📜 Generated plan:")
|
|
64
64
|
logger.info("\n".join(str(s) for s in subgoals_plan))
|
|
65
65
|
|
|
66
|
-
|
|
66
|
+
if self.ctx.on_plan_changes:
|
|
67
|
+
await self.ctx.on_plan_changes(subgoals_plan, needs_replan)
|
|
68
|
+
|
|
69
|
+
return await state.asanitize_update(
|
|
67
70
|
ctx=self.ctx,
|
|
68
71
|
update={
|
|
69
72
|
"subgoal_plan": subgoals_plan,
|
{minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/planner/types.py
RENAMED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
+
from datetime import datetime
|
|
1
2
|
from enum import Enum
|
|
3
|
+
from typing import Annotated
|
|
2
4
|
|
|
3
5
|
from pydantic import BaseModel
|
|
4
|
-
from typing import Annotated
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
class PlannerSubgoalOutput(BaseModel):
|
|
8
|
-
id: Annotated[str | None, "If not provided, it will be generated"] = None
|
|
9
9
|
description: str
|
|
10
10
|
|
|
11
11
|
|
|
@@ -27,6 +27,8 @@ class Subgoal(BaseModel):
|
|
|
27
27
|
str | None, "Reason why the subgoal was completed (failure or success)"
|
|
28
28
|
] = None
|
|
29
29
|
status: SubgoalStatus
|
|
30
|
+
started_at: Annotated[datetime | None, "When the subgoal started"] = None
|
|
31
|
+
ended_at: Annotated[datetime | None, "When the subgoal ended"] = None
|
|
30
32
|
|
|
31
33
|
def __str__(self):
|
|
32
34
|
status_emoji = "❓"
|
{minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/planner/utils.py
RENAMED
|
@@ -1,4 +1,8 @@
|
|
|
1
|
+
import random
|
|
2
|
+
import string
|
|
3
|
+
|
|
1
4
|
from minitap.mobile_use.agents.planner.types import Subgoal, SubgoalStatus
|
|
5
|
+
from datetime import datetime, UTC
|
|
2
6
|
|
|
3
7
|
|
|
4
8
|
def get_current_subgoal(subgoals: list[Subgoal]) -> Subgoal | None:
|
|
@@ -22,6 +26,7 @@ def complete_current_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
|
|
|
22
26
|
if not current_subgoal:
|
|
23
27
|
return subgoals
|
|
24
28
|
current_subgoal.status = SubgoalStatus.SUCCESS
|
|
29
|
+
current_subgoal.ended_at = datetime.now(UTC)
|
|
25
30
|
return subgoals
|
|
26
31
|
|
|
27
32
|
|
|
@@ -29,6 +34,7 @@ def complete_subgoals_by_ids(subgoals: list[Subgoal], ids: list[str]) -> list[Su
|
|
|
29
34
|
for subgoal in subgoals:
|
|
30
35
|
if subgoal.id in ids:
|
|
31
36
|
subgoal.status = SubgoalStatus.SUCCESS
|
|
37
|
+
subgoal.ended_at = datetime.now(UTC)
|
|
32
38
|
return subgoals
|
|
33
39
|
|
|
34
40
|
|
|
@@ -37,6 +43,7 @@ def fail_current_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
|
|
|
37
43
|
if not current_subgoal:
|
|
38
44
|
return subgoals
|
|
39
45
|
current_subgoal.status = SubgoalStatus.FAILURE
|
|
46
|
+
current_subgoal.ended_at = datetime.now(UTC)
|
|
40
47
|
return subgoals
|
|
41
48
|
|
|
42
49
|
|
|
@@ -53,4 +60,11 @@ def start_next_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
|
|
|
53
60
|
if not next_subgoal:
|
|
54
61
|
return subgoals
|
|
55
62
|
next_subgoal.status = SubgoalStatus.PENDING
|
|
63
|
+
next_subgoal.started_at = datetime.now(UTC)
|
|
56
64
|
return subgoals
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def generate_id(length: int = 6) -> str:
|
|
68
|
+
"""Generates a small and distinct random string ID."""
|
|
69
|
+
chars = string.ascii_lowercase + string.digits
|
|
70
|
+
return "".join(random.choice(chars) for _ in range(length))
|
|
@@ -13,7 +13,7 @@ class SummarizerNode:
|
|
|
13
13
|
def __init__(self, ctx: MobileUseContext):
|
|
14
14
|
self.ctx = ctx
|
|
15
15
|
|
|
16
|
-
def __call__(self, state: State):
|
|
16
|
+
async def __call__(self, state: State):
|
|
17
17
|
if len(state.messages) <= MAX_MESSAGES_IN_HISTORY:
|
|
18
18
|
return {}
|
|
19
19
|
|
|
@@ -27,7 +27,7 @@ class SummarizerNode:
|
|
|
27
27
|
start_removal = True
|
|
28
28
|
if start_removal and msg.id:
|
|
29
29
|
remove_messages.append(RemoveMessage(id=msg.id))
|
|
30
|
-
return state.
|
|
30
|
+
return await state.asanitize_update(
|
|
31
31
|
ctx=self.ctx,
|
|
32
32
|
update={
|
|
33
33
|
"messages": remove_messages,
|
|
@@ -23,8 +23,10 @@ class Settings(BaseSettings):
|
|
|
23
23
|
GOOGLE_API_KEY: SecretStr | None = None
|
|
24
24
|
XAI_API_KEY: SecretStr | None = None
|
|
25
25
|
OPEN_ROUTER_API_KEY: SecretStr | None = None
|
|
26
|
+
MINITAP_API_KEY: SecretStr | None = None
|
|
26
27
|
|
|
27
28
|
OPENAI_BASE_URL: str | None = None
|
|
29
|
+
MINITAP_API_BASE_URL: str = "https://platform.minitap.ai"
|
|
28
30
|
|
|
29
31
|
DEVICE_SCREEN_API_BASE_URL: str | None = None
|
|
30
32
|
DEVICE_HARDWARE_BRIDGE_BASE_URL: str | None = None
|
|
@@ -90,7 +92,7 @@ def record_events(output_path: Path | None, events: list[str] | BaseModel | Any)
|
|
|
90
92
|
|
|
91
93
|
### LLM Configuration
|
|
92
94
|
|
|
93
|
-
LLMProvider = Literal["openai", "google", "openrouter", "xai", "vertexai"]
|
|
95
|
+
LLMProvider = Literal["openai", "google", "openrouter", "xai", "vertexai", "minitap"]
|
|
94
96
|
LLMUtilsNode = Literal["outputter", "hopper"]
|
|
95
97
|
AgentNode = Literal["planner", "orchestrator", "cortex", "executor"]
|
|
96
98
|
AgentNodeWithFallback = Literal["cortex"]
|
|
@@ -131,6 +133,9 @@ class LLM(BaseModel):
|
|
|
131
133
|
case "xai":
|
|
132
134
|
if not settings.XAI_API_KEY:
|
|
133
135
|
raise Exception(f"{name} requires XAI_API_KEY in .env")
|
|
136
|
+
case "minitap":
|
|
137
|
+
if not settings.MINITAP_API_KEY:
|
|
138
|
+
raise Exception(f"{name} requires MINITAP_API_KEY in .env")
|
|
134
139
|
|
|
135
140
|
def __str__(self):
|
|
136
141
|
return f"{self.provider}/{self.model}"
|