minitap-mobile-use 2.2.0__py3-none-any.whl → 2.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of minitap-mobile-use might be problematic. Click here for more details.
- minitap/mobile_use/agents/contextor/contextor.py +6 -4
- minitap/mobile_use/agents/cortex/cortex.md +114 -27
- minitap/mobile_use/agents/cortex/cortex.py +8 -5
- minitap/mobile_use/agents/executor/executor.md +15 -10
- minitap/mobile_use/agents/executor/executor.py +6 -5
- minitap/mobile_use/agents/executor/utils.py +2 -1
- minitap/mobile_use/agents/hopper/hopper.py +6 -3
- minitap/mobile_use/agents/orchestrator/orchestrator.py +26 -11
- minitap/mobile_use/agents/outputter/outputter.py +6 -3
- minitap/mobile_use/agents/outputter/test_outputter.py +104 -42
- minitap/mobile_use/agents/planner/planner.md +20 -22
- minitap/mobile_use/agents/planner/planner.py +10 -7
- minitap/mobile_use/agents/planner/types.py +4 -2
- minitap/mobile_use/agents/planner/utils.py +14 -0
- minitap/mobile_use/agents/summarizer/summarizer.py +2 -2
- minitap/mobile_use/config.py +6 -1
- minitap/mobile_use/context.py +13 -3
- minitap/mobile_use/controllers/mobile_command_controller.py +1 -14
- minitap/mobile_use/graph/state.py +7 -3
- minitap/mobile_use/sdk/agent.py +204 -29
- minitap/mobile_use/sdk/examples/README.md +19 -1
- minitap/mobile_use/sdk/examples/platform_minimal_example.py +46 -0
- minitap/mobile_use/sdk/services/platform.py +244 -0
- minitap/mobile_use/sdk/types/__init__.py +14 -14
- minitap/mobile_use/sdk/types/exceptions.py +57 -0
- minitap/mobile_use/sdk/types/platform.py +125 -0
- minitap/mobile_use/sdk/types/task.py +60 -17
- minitap/mobile_use/servers/device_hardware_bridge.py +3 -2
- minitap/mobile_use/servers/stop_servers.py +11 -12
- minitap/mobile_use/servers/utils.py +6 -9
- minitap/mobile_use/services/llm.py +89 -5
- minitap/mobile_use/tools/index.py +2 -8
- minitap/mobile_use/tools/mobile/back.py +3 -3
- minitap/mobile_use/tools/mobile/clear_text.py +67 -38
- minitap/mobile_use/tools/mobile/erase_one_char.py +5 -4
- minitap/mobile_use/tools/mobile/{take_screenshot.py → glimpse_screen.py} +23 -15
- minitap/mobile_use/tools/mobile/input_text.py +67 -16
- minitap/mobile_use/tools/mobile/launch_app.py +54 -22
- minitap/mobile_use/tools/mobile/long_press_on.py +15 -8
- minitap/mobile_use/tools/mobile/open_link.py +15 -8
- minitap/mobile_use/tools/mobile/press_key.py +15 -8
- minitap/mobile_use/tools/mobile/stop_app.py +14 -8
- minitap/mobile_use/tools/mobile/swipe.py +11 -5
- minitap/mobile_use/tools/mobile/tap.py +103 -21
- minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +3 -3
- minitap/mobile_use/tools/test_utils.py +377 -0
- minitap/mobile_use/tools/types.py +35 -0
- minitap/mobile_use/tools/utils.py +149 -39
- minitap/mobile_use/utils/recorder.py +1 -1
- minitap/mobile_use/utils/test_ui_hierarchy.py +178 -0
- minitap/mobile_use/utils/ui_hierarchy.py +11 -4
- {minitap_mobile_use-2.2.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/METADATA +6 -4
- minitap_mobile_use-2.4.0.dist-info/RECORD +99 -0
- minitap/mobile_use/tools/mobile/copy_text_from.py +0 -73
- minitap/mobile_use/tools/mobile/find_packages.py +0 -69
- minitap/mobile_use/tools/mobile/paste_text.py +0 -62
- minitap_mobile_use-2.2.0.dist-info/RECORD +0 -96
- {minitap_mobile_use-2.2.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/WHEEL +0 -0
- {minitap_mobile_use-2.2.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,9 +1,21 @@
|
|
|
1
|
-
|
|
2
|
-
from
|
|
3
|
-
|
|
4
|
-
|
|
1
|
+
import sys
|
|
2
|
+
from unittest.mock import AsyncMock, Mock, patch
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
5
|
from pydantic import BaseModel
|
|
6
6
|
|
|
7
|
+
sys.modules["langgraph.prebuilt.chat_agent_executor"] = Mock()
|
|
8
|
+
sys.modules["minitap.mobile_use.graph.state"] = Mock()
|
|
9
|
+
sys.modules["langchain_google_vertexai"] = Mock()
|
|
10
|
+
sys.modules["langchain_google_genai"] = Mock()
|
|
11
|
+
sys.modules["langchain_openai"] = Mock()
|
|
12
|
+
sys.modules["langchain_cerebras"] = Mock()
|
|
13
|
+
|
|
14
|
+
from minitap.mobile_use.agents.outputter.outputter import outputter # noqa: E402
|
|
15
|
+
from minitap.mobile_use.config import LLM, OutputConfig # noqa: E402
|
|
16
|
+
from minitap.mobile_use.context import MobileUseContext # noqa: E402
|
|
17
|
+
from minitap.mobile_use.utils.logger import get_logger # noqa: E402
|
|
18
|
+
|
|
7
19
|
logger = get_logger(__name__)
|
|
8
20
|
|
|
9
21
|
|
|
@@ -40,68 +52,118 @@ mocked_state = DummyState(
|
|
|
40
52
|
],
|
|
41
53
|
)
|
|
42
54
|
|
|
43
|
-
|
|
44
|
-
|
|
55
|
+
|
|
56
|
+
@pytest.fixture
|
|
57
|
+
def mock_context():
|
|
58
|
+
"""Create a properly mocked context with all required fields."""
|
|
59
|
+
ctx = Mock(spec=MobileUseContext)
|
|
60
|
+
ctx.llm_config = {
|
|
45
61
|
"executor": LLM(provider="openai", model="gpt-5-nano"),
|
|
46
62
|
"cortex": LLM(provider="openai", model="gpt-5-nano"),
|
|
47
63
|
"planner": LLM(provider="openai", model="gpt-5-nano"),
|
|
48
64
|
"orchestrator": LLM(provider="openai", model="gpt-5-nano"),
|
|
49
|
-
}
|
|
50
|
-
|
|
65
|
+
}
|
|
66
|
+
ctx.device = Mock()
|
|
67
|
+
ctx.hw_bridge_client = Mock()
|
|
68
|
+
ctx.screen_api_client = Mock()
|
|
69
|
+
return ctx
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@pytest.fixture
|
|
73
|
+
def mock_state():
|
|
74
|
+
"""Create a mock state with test data."""
|
|
75
|
+
return DummyState(
|
|
76
|
+
messages=[],
|
|
77
|
+
initial_goal="Find a green product on my website",
|
|
78
|
+
agents_thoughts=[
|
|
79
|
+
"Going on http://superwebsite.fr",
|
|
80
|
+
"Searching for products",
|
|
81
|
+
"Filtering by color",
|
|
82
|
+
"Color 'green' found for a 20 dollars product",
|
|
83
|
+
],
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@patch("minitap.mobile_use.agents.outputter.outputter.get_llm")
|
|
88
|
+
@pytest.mark.asyncio
|
|
89
|
+
async def test_outputter_with_pydantic_model(mock_get_llm, mock_context, mock_state):
|
|
90
|
+
"""Test outputter with Pydantic model output."""
|
|
91
|
+
# Mock the structured LLM response
|
|
92
|
+
mock_structured_llm = AsyncMock()
|
|
93
|
+
mock_structured_llm.ainvoke.return_value = MockPydanticSchema(
|
|
94
|
+
color="green", price=20, currency_symbol="$", website_url="http://superwebsite.fr"
|
|
95
|
+
)
|
|
51
96
|
|
|
97
|
+
# Mock the base LLM
|
|
98
|
+
mock_llm = Mock()
|
|
99
|
+
mock_llm.with_structured_output.return_value = mock_structured_llm
|
|
100
|
+
mock_get_llm.return_value = mock_llm
|
|
52
101
|
|
|
53
|
-
async def test_outputter_with_pydantic_model():
|
|
54
|
-
logger.info("Starting test_outputter_with_pydantic_model")
|
|
55
102
|
config = OutputConfig(
|
|
56
103
|
structured_output=MockPydanticSchema,
|
|
57
104
|
output_description=None,
|
|
58
105
|
)
|
|
59
106
|
|
|
60
|
-
result = await outputter(ctx=
|
|
61
|
-
|
|
62
|
-
assert isinstance(result, MockPydanticSchema)
|
|
63
|
-
assert result.color.lower() == "green"
|
|
64
|
-
logger.success(str(result))
|
|
107
|
+
result = await outputter(ctx=mock_context, output_config=config, graph_output=mock_state)
|
|
65
108
|
|
|
109
|
+
assert isinstance(result, dict)
|
|
110
|
+
assert result.get("color") == "green"
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@patch("minitap.mobile_use.agents.outputter.outputter.get_llm")
|
|
114
|
+
@pytest.mark.asyncio
|
|
115
|
+
async def test_outputter_with_dict(mock_get_llm, mock_context, mock_state):
|
|
116
|
+
"""Test outputter with dictionary output."""
|
|
117
|
+
# Mock the structured LLM response for dict
|
|
118
|
+
mock_structured_llm = AsyncMock()
|
|
119
|
+
expected_dict = {
|
|
120
|
+
"color": "green",
|
|
121
|
+
"price": 20,
|
|
122
|
+
"currency_symbol": "$",
|
|
123
|
+
"website_url": "http://superwebsite.fr",
|
|
124
|
+
}
|
|
125
|
+
mock_structured_llm.ainvoke.return_value = expected_dict
|
|
126
|
+
|
|
127
|
+
# Mock the base LLM
|
|
128
|
+
mock_llm = Mock()
|
|
129
|
+
mock_llm.with_structured_output.return_value = mock_structured_llm
|
|
130
|
+
mock_get_llm.return_value = mock_llm
|
|
66
131
|
|
|
67
|
-
async def test_outputter_with_dict():
|
|
68
|
-
logger.info("Starting test_outputter_with_dict")
|
|
69
132
|
config = OutputConfig(
|
|
70
133
|
structured_output=mock_dict,
|
|
71
134
|
output_description=None,
|
|
72
135
|
)
|
|
73
136
|
|
|
74
|
-
result = await outputter(ctx=
|
|
137
|
+
result = await outputter(ctx=mock_context, output_config=config, graph_output=mock_state)
|
|
75
138
|
|
|
76
139
|
assert isinstance(result, dict)
|
|
77
|
-
assert result.get("color"
|
|
78
|
-
assert result.get("price"
|
|
79
|
-
assert result.get("currency_symbol"
|
|
80
|
-
assert result.get("website_url"
|
|
81
|
-
|
|
82
|
-
|
|
140
|
+
assert result.get("color") == "green"
|
|
141
|
+
assert result.get("price") == 20
|
|
142
|
+
assert result.get("currency_symbol") == "$"
|
|
143
|
+
assert result.get("website_url") == "http://superwebsite.fr"
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
@patch("minitap.mobile_use.agents.outputter.outputter.get_llm")
|
|
147
|
+
@pytest.mark.asyncio
|
|
148
|
+
async def test_outputter_with_natural_language_output(mock_get_llm, mock_context, mock_state):
|
|
149
|
+
"""Test outputter with natural language description output."""
|
|
150
|
+
# Mock the LLM response for natural language output (no structured output)
|
|
151
|
+
mock_llm = AsyncMock()
|
|
152
|
+
expected_json = '{"color": "green", "price": 20, "currency_symbol": "$", "website_url": "http://superwebsite.fr"}'
|
|
153
|
+
mock_llm.ainvoke.return_value = Mock(content=expected_json)
|
|
154
|
+
mock_get_llm.return_value = mock_llm
|
|
83
155
|
|
|
84
|
-
async def test_outputter_with_natural_language_output():
|
|
85
|
-
logger.info("Starting test_outputter_with_natural_language_output")
|
|
86
156
|
config = OutputConfig(
|
|
87
157
|
structured_output=None,
|
|
88
|
-
output_description=
|
|
89
|
-
|
|
158
|
+
output_description=(
|
|
159
|
+
"A JSON object with a color, a price, a currency_symbol and a website_url key"
|
|
160
|
+
),
|
|
90
161
|
)
|
|
91
162
|
|
|
92
|
-
result = await outputter(ctx=
|
|
93
|
-
logger.info(str(result))
|
|
163
|
+
result = await outputter(ctx=mock_context, output_config=config, graph_output=mock_state)
|
|
94
164
|
|
|
95
165
|
assert isinstance(result, dict)
|
|
96
|
-
assert result.get("color"
|
|
97
|
-
assert result.get("price"
|
|
98
|
-
assert result.get("currency_symbol"
|
|
99
|
-
assert result.get("website_url"
|
|
100
|
-
logger.success(str(result))
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
if __name__ == "__main__":
|
|
104
|
-
import asyncio
|
|
105
|
-
|
|
106
|
-
asyncio.run(test_outputter_with_pydantic_model())
|
|
107
|
-
asyncio.run(test_outputter_with_natural_language_output())
|
|
166
|
+
assert result.get("color") == "green"
|
|
167
|
+
assert result.get("price") == 20
|
|
168
|
+
assert result.get("currency_symbol") == "$"
|
|
169
|
+
assert result.get("website_url") == "http://superwebsite.fr"
|
|
@@ -9,12 +9,13 @@ You work like an agile tech lead: defining the key milestones without locking in
|
|
|
9
9
|
Given the **user's goal**:
|
|
10
10
|
|
|
11
11
|
- Create a **high-level sequence of subgoals** to complete that goal.
|
|
12
|
-
- Subgoals should reflect real interactions with mobile UIs (e.g
|
|
12
|
+
- Subgoals should reflect real interactions with mobile UIs and describe the intent of the action (e.g., "Open the app to find a contact," "View the image to extract information," "Send a message to Bob confirming the appointment").
|
|
13
|
+
- Focus on the goal of the interaction, not just the physical action. For example, instead of 'View the receipt,' a better subgoal is 'Open and analyze the receipt to identify transactions.
|
|
13
14
|
- Don't assume the full UI is visible yet. Plan based on how most mobile apps work, and keep flexibility.
|
|
14
|
-
- List of agents thoughts is empty which is expected, since it is the first plan.
|
|
15
|
-
- Avoid too granular UI actions based tasks (e.g. "tap", "swipe", "copy", "paste") unless explicitly required.
|
|
16
15
|
- The executor has the following available tools: {{ executor_tools_list }}.
|
|
17
16
|
When one of these tools offers a direct shortcut (e.g. `openLink` instead of manually launching a browser and typing a URL), prefer it over decomposed manual steps.
|
|
17
|
+
- Ensure that each subgoal prepares the ground for the next. If data needs to be gathered in one step to be used in another, the subgoal should reflect the intent to gather that data.
|
|
18
|
+
|
|
18
19
|
|
|
19
20
|
2. **Replanning**
|
|
20
21
|
If you're asked to **revise a previous plan**, you'll also receive:
|
|
@@ -27,38 +28,35 @@ You work like an agile tech lead: defining the key milestones without locking in
|
|
|
27
28
|
|
|
28
29
|
### Output
|
|
29
30
|
|
|
30
|
-
You must output a **list of subgoals (description
|
|
31
|
+
You must output a **list of subgoals (description)**, each representing a clear subgoal.
|
|
31
32
|
Each subgoal should be:
|
|
32
33
|
|
|
33
|
-
- Focused on **
|
|
34
|
+
- Focused on **purpose-driven mobile interactions** that clearly state the intent
|
|
34
35
|
- Neither too vague nor too granular
|
|
35
36
|
- Sequential (later steps may depend on earlier ones)
|
|
36
37
|
- Don't use loop-like formulation unless necessary (e.g. don't say "repeat this X times", instead reuse the same steps X times as subgoals)
|
|
37
38
|
|
|
38
|
-
If you're replaning and need to keep a previous subgoal, you **must keep the same subgoal ID**.
|
|
39
|
-
|
|
40
39
|
### Examples
|
|
41
40
|
|
|
42
|
-
#### **Initial Goal**: "
|
|
41
|
+
#### **Initial Goal**: "Go on https://tesla.com, and tell me what is the first car being displayed"
|
|
43
42
|
|
|
44
43
|
**Plan**:
|
|
45
44
|
|
|
46
|
-
- Open the
|
|
47
|
-
-
|
|
48
|
-
- Open the conversation with Alice (ID: None)
|
|
49
|
-
- Type the message "I’m running late" (ID: None)
|
|
50
|
-
- Send the message (ID: None)
|
|
45
|
+
- Open the link https://tesla.com to find information
|
|
46
|
+
- Analyze the home page to identify the first car displayed
|
|
51
47
|
|
|
52
|
-
#### **Initial Goal**: "
|
|
48
|
+
#### **Initial Goal**: "Open WhatsApp and send 'I’m running late' to Alice"
|
|
53
49
|
|
|
54
50
|
**Plan**:
|
|
55
51
|
|
|
56
|
-
- Open the
|
|
57
|
-
-
|
|
52
|
+
- Open the WhatsApp app to find the contact "Alice"
|
|
53
|
+
- Open the conversation with Alice to send a message
|
|
54
|
+
- Type the message "I’m running late" into the message field
|
|
55
|
+
- Send the message
|
|
58
56
|
|
|
59
57
|
#### **Replanning Example**
|
|
60
58
|
|
|
61
|
-
**Original Plan**: same as above
|
|
59
|
+
**Original Plan**: same as above
|
|
62
60
|
**Agent Thoughts**:
|
|
63
61
|
|
|
64
62
|
- Couldn't find Alice in recent chats
|
|
@@ -67,8 +65,8 @@ If you're replaning and need to keep a previous subgoal, you **must keep the sam
|
|
|
67
65
|
|
|
68
66
|
**New Plan**:
|
|
69
67
|
|
|
70
|
-
- Open WhatsApp
|
|
71
|
-
- Tap the search bar
|
|
72
|
-
- Search for "Alice"
|
|
73
|
-
- Select the correct chat
|
|
74
|
-
- Type and send "I’m running late"
|
|
68
|
+
- Open WhatsApp
|
|
69
|
+
- Tap the search bar to find a contact
|
|
70
|
+
- Search for "Alice" in the search field
|
|
71
|
+
- Select the correct chat to open the conversation
|
|
72
|
+
- Type and send "I’m running late"
|
|
@@ -1,14 +1,13 @@
|
|
|
1
|
-
import uuid
|
|
2
1
|
from pathlib import Path
|
|
3
2
|
|
|
4
3
|
from jinja2 import Template
|
|
5
4
|
from langchain_core.messages import HumanMessage, SystemMessage
|
|
6
5
|
|
|
7
6
|
from minitap.mobile_use.agents.planner.types import PlannerOutput, Subgoal, SubgoalStatus
|
|
8
|
-
from minitap.mobile_use.agents.planner.utils import one_of_them_is_failure
|
|
7
|
+
from minitap.mobile_use.agents.planner.utils import generate_id, one_of_them_is_failure
|
|
9
8
|
from minitap.mobile_use.context import MobileUseContext
|
|
10
9
|
from minitap.mobile_use.graph.state import State
|
|
11
|
-
from minitap.mobile_use.services.llm import get_llm
|
|
10
|
+
from minitap.mobile_use.services.llm import get_llm, invoke_llm_with_timeout_message
|
|
12
11
|
from minitap.mobile_use.tools.index import EXECUTOR_WRAPPERS_TOOLS, format_tools_list
|
|
13
12
|
from minitap.mobile_use.utils.decorators import wrap_with_callbacks
|
|
14
13
|
from minitap.mobile_use.utils.logger import get_logger
|
|
@@ -49,11 +48,12 @@ class PlannerNode:
|
|
|
49
48
|
|
|
50
49
|
llm = get_llm(ctx=self.ctx, name="planner")
|
|
51
50
|
llm = llm.with_structured_output(PlannerOutput)
|
|
52
|
-
response: PlannerOutput = await
|
|
53
|
-
|
|
51
|
+
response: PlannerOutput = await invoke_llm_with_timeout_message(
|
|
52
|
+
llm.ainvoke(messages), agent_name="Planner"
|
|
53
|
+
) # type: ignore
|
|
54
54
|
subgoals_plan = [
|
|
55
55
|
Subgoal(
|
|
56
|
-
id=
|
|
56
|
+
id=generate_id(),
|
|
57
57
|
description=subgoal.description,
|
|
58
58
|
status=SubgoalStatus.NOT_STARTED,
|
|
59
59
|
completion_reason=None,
|
|
@@ -63,7 +63,10 @@ class PlannerNode:
|
|
|
63
63
|
logger.info("📜 Generated plan:")
|
|
64
64
|
logger.info("\n".join(str(s) for s in subgoals_plan))
|
|
65
65
|
|
|
66
|
-
|
|
66
|
+
if self.ctx.on_plan_changes:
|
|
67
|
+
await self.ctx.on_plan_changes(subgoals_plan, needs_replan)
|
|
68
|
+
|
|
69
|
+
return await state.asanitize_update(
|
|
67
70
|
ctx=self.ctx,
|
|
68
71
|
update={
|
|
69
72
|
"subgoal_plan": subgoals_plan,
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
+
from datetime import datetime
|
|
1
2
|
from enum import Enum
|
|
3
|
+
from typing import Annotated
|
|
2
4
|
|
|
3
5
|
from pydantic import BaseModel
|
|
4
|
-
from typing import Annotated
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
class PlannerSubgoalOutput(BaseModel):
|
|
8
|
-
id: Annotated[str | None, "If not provided, it will be generated"] = None
|
|
9
9
|
description: str
|
|
10
10
|
|
|
11
11
|
|
|
@@ -27,6 +27,8 @@ class Subgoal(BaseModel):
|
|
|
27
27
|
str | None, "Reason why the subgoal was completed (failure or success)"
|
|
28
28
|
] = None
|
|
29
29
|
status: SubgoalStatus
|
|
30
|
+
started_at: Annotated[datetime | None, "When the subgoal started"] = None
|
|
31
|
+
ended_at: Annotated[datetime | None, "When the subgoal ended"] = None
|
|
30
32
|
|
|
31
33
|
def __str__(self):
|
|
32
34
|
status_emoji = "❓"
|
|
@@ -1,4 +1,8 @@
|
|
|
1
|
+
import random
|
|
2
|
+
import string
|
|
3
|
+
|
|
1
4
|
from minitap.mobile_use.agents.planner.types import Subgoal, SubgoalStatus
|
|
5
|
+
from datetime import datetime, UTC
|
|
2
6
|
|
|
3
7
|
|
|
4
8
|
def get_current_subgoal(subgoals: list[Subgoal]) -> Subgoal | None:
|
|
@@ -22,6 +26,7 @@ def complete_current_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
|
|
|
22
26
|
if not current_subgoal:
|
|
23
27
|
return subgoals
|
|
24
28
|
current_subgoal.status = SubgoalStatus.SUCCESS
|
|
29
|
+
current_subgoal.ended_at = datetime.now(UTC)
|
|
25
30
|
return subgoals
|
|
26
31
|
|
|
27
32
|
|
|
@@ -29,6 +34,7 @@ def complete_subgoals_by_ids(subgoals: list[Subgoal], ids: list[str]) -> list[Su
|
|
|
29
34
|
for subgoal in subgoals:
|
|
30
35
|
if subgoal.id in ids:
|
|
31
36
|
subgoal.status = SubgoalStatus.SUCCESS
|
|
37
|
+
subgoal.ended_at = datetime.now(UTC)
|
|
32
38
|
return subgoals
|
|
33
39
|
|
|
34
40
|
|
|
@@ -37,6 +43,7 @@ def fail_current_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
|
|
|
37
43
|
if not current_subgoal:
|
|
38
44
|
return subgoals
|
|
39
45
|
current_subgoal.status = SubgoalStatus.FAILURE
|
|
46
|
+
current_subgoal.ended_at = datetime.now(UTC)
|
|
40
47
|
return subgoals
|
|
41
48
|
|
|
42
49
|
|
|
@@ -53,4 +60,11 @@ def start_next_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
|
|
|
53
60
|
if not next_subgoal:
|
|
54
61
|
return subgoals
|
|
55
62
|
next_subgoal.status = SubgoalStatus.PENDING
|
|
63
|
+
next_subgoal.started_at = datetime.now(UTC)
|
|
56
64
|
return subgoals
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def generate_id(length: int = 6) -> str:
|
|
68
|
+
"""Generates a small and distinct random string ID."""
|
|
69
|
+
chars = string.ascii_lowercase + string.digits
|
|
70
|
+
return "".join(random.choice(chars) for _ in range(length))
|
|
@@ -13,7 +13,7 @@ class SummarizerNode:
|
|
|
13
13
|
def __init__(self, ctx: MobileUseContext):
|
|
14
14
|
self.ctx = ctx
|
|
15
15
|
|
|
16
|
-
def __call__(self, state: State):
|
|
16
|
+
async def __call__(self, state: State):
|
|
17
17
|
if len(state.messages) <= MAX_MESSAGES_IN_HISTORY:
|
|
18
18
|
return {}
|
|
19
19
|
|
|
@@ -27,7 +27,7 @@ class SummarizerNode:
|
|
|
27
27
|
start_removal = True
|
|
28
28
|
if start_removal and msg.id:
|
|
29
29
|
remove_messages.append(RemoveMessage(id=msg.id))
|
|
30
|
-
return state.
|
|
30
|
+
return await state.asanitize_update(
|
|
31
31
|
ctx=self.ctx,
|
|
32
32
|
update={
|
|
33
33
|
"messages": remove_messages,
|
minitap/mobile_use/config.py
CHANGED
|
@@ -23,8 +23,10 @@ class Settings(BaseSettings):
|
|
|
23
23
|
GOOGLE_API_KEY: SecretStr | None = None
|
|
24
24
|
XAI_API_KEY: SecretStr | None = None
|
|
25
25
|
OPEN_ROUTER_API_KEY: SecretStr | None = None
|
|
26
|
+
MINITAP_API_KEY: SecretStr | None = None
|
|
26
27
|
|
|
27
28
|
OPENAI_BASE_URL: str | None = None
|
|
29
|
+
MINITAP_API_BASE_URL: str = "https://platform.minitap.ai"
|
|
28
30
|
|
|
29
31
|
DEVICE_SCREEN_API_BASE_URL: str | None = None
|
|
30
32
|
DEVICE_HARDWARE_BRIDGE_BASE_URL: str | None = None
|
|
@@ -90,7 +92,7 @@ def record_events(output_path: Path | None, events: list[str] | BaseModel | Any)
|
|
|
90
92
|
|
|
91
93
|
### LLM Configuration
|
|
92
94
|
|
|
93
|
-
LLMProvider = Literal["openai", "google", "openrouter", "xai", "vertexai"]
|
|
95
|
+
LLMProvider = Literal["openai", "google", "openrouter", "xai", "vertexai", "minitap"]
|
|
94
96
|
LLMUtilsNode = Literal["outputter", "hopper"]
|
|
95
97
|
AgentNode = Literal["planner", "orchestrator", "cortex", "executor"]
|
|
96
98
|
AgentNodeWithFallback = Literal["cortex"]
|
|
@@ -131,6 +133,9 @@ class LLM(BaseModel):
|
|
|
131
133
|
case "xai":
|
|
132
134
|
if not settings.XAI_API_KEY:
|
|
133
135
|
raise Exception(f"{name} requires XAI_API_KEY in .env")
|
|
136
|
+
case "minitap":
|
|
137
|
+
if not settings.MINITAP_API_KEY:
|
|
138
|
+
raise Exception(f"{name} requires MINITAP_API_KEY in .env")
|
|
134
139
|
|
|
135
140
|
def __str__(self):
|
|
136
141
|
return f"{self.provider}/{self.model}"
|
minitap/mobile_use/context.py
CHANGED
|
@@ -4,17 +4,19 @@ Context variables for global state management.
|
|
|
4
4
|
Uses ContextVar to avoid prop drilling and maintain clean function signatures.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
from collections.abc import Callable, Coroutine
|
|
7
8
|
from enum import Enum
|
|
8
9
|
from pathlib import Path
|
|
10
|
+
from typing import Literal
|
|
9
11
|
|
|
10
12
|
from adbutils import AdbClient
|
|
11
13
|
from openai import BaseModel
|
|
12
14
|
from pydantic import ConfigDict
|
|
13
|
-
from typing import Literal
|
|
14
15
|
|
|
16
|
+
from minitap.mobile_use.agents.planner.types import Subgoal
|
|
15
17
|
from minitap.mobile_use.clients.device_hardware_client import DeviceHardwareClient
|
|
16
18
|
from minitap.mobile_use.clients.screen_api_client import ScreenApiClient
|
|
17
|
-
from minitap.mobile_use.config import LLMConfig
|
|
19
|
+
from minitap.mobile_use.config import AgentNode, LLMConfig
|
|
18
20
|
|
|
19
21
|
|
|
20
22
|
class DevicePlatform(str, Enum):
|
|
@@ -45,18 +47,26 @@ class ExecutionSetup(BaseModel):
|
|
|
45
47
|
"""Execution setup for a task."""
|
|
46
48
|
|
|
47
49
|
traces_path: Path
|
|
48
|
-
|
|
50
|
+
trace_name: str
|
|
51
|
+
enable_remote_tracing: bool
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
IsReplan = bool
|
|
49
55
|
|
|
50
56
|
|
|
51
57
|
class MobileUseContext(BaseModel):
|
|
52
58
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
53
59
|
|
|
60
|
+
trace_id: str
|
|
54
61
|
device: DeviceContext
|
|
55
62
|
hw_bridge_client: DeviceHardwareClient
|
|
56
63
|
screen_api_client: ScreenApiClient
|
|
57
64
|
llm_config: LLMConfig
|
|
58
65
|
adb_client: AdbClient | None = None
|
|
59
66
|
execution_setup: ExecutionSetup | None = None
|
|
67
|
+
on_agent_thought: Callable[[AgentNode, str], Coroutine] | None = None
|
|
68
|
+
on_plan_changes: Callable[[list[Subgoal], IsReplan], Coroutine] | None = None
|
|
69
|
+
minitap_api_key: str | None = None
|
|
60
70
|
|
|
61
71
|
def get_adb_client(self) -> AdbClient:
|
|
62
72
|
if self.adb_client is None:
|
|
@@ -243,20 +243,6 @@ def input_text(ctx: MobileUseContext, text: str, dry_run: bool = False):
|
|
|
243
243
|
return run_flow(ctx, [{"inputText": text}], dry_run=dry_run)
|
|
244
244
|
|
|
245
245
|
|
|
246
|
-
def copy_text_from(ctx: MobileUseContext, selector_request: SelectorRequest, dry_run: bool = False):
|
|
247
|
-
copy_text_from_body = selector_request.to_dict()
|
|
248
|
-
if not copy_text_from_body:
|
|
249
|
-
error = "Invalid copyTextFrom selector request, could not format yaml"
|
|
250
|
-
logger.error(error)
|
|
251
|
-
raise ControllerErrors(error)
|
|
252
|
-
flow_input = [{"copyTextFrom": copy_text_from_body}]
|
|
253
|
-
return run_flow(ctx, flow_input, dry_run=dry_run)
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
def paste_text(ctx: MobileUseContext, dry_run: bool = False):
|
|
257
|
-
return run_flow(ctx, ["pasteText"], dry_run=dry_run)
|
|
258
|
-
|
|
259
|
-
|
|
260
246
|
def erase_text(ctx: MobileUseContext, nb_chars: int | None = None, dry_run: bool = False):
|
|
261
247
|
"""
|
|
262
248
|
Removes characters from the currently selected textfield (if any)
|
|
@@ -333,6 +319,7 @@ def run_flow_with_wait_for_animation_to_end(
|
|
|
333
319
|
|
|
334
320
|
if __name__ == "__main__":
|
|
335
321
|
ctx = MobileUseContext(
|
|
322
|
+
trace_id="trace_id",
|
|
336
323
|
llm_config=initialize_llm_config(),
|
|
337
324
|
device=DeviceContext(
|
|
338
325
|
host_platform="WINDOWS",
|
|
@@ -54,7 +54,7 @@ class State(AgentStatePydantic):
|
|
|
54
54
|
take_last,
|
|
55
55
|
]
|
|
56
56
|
|
|
57
|
-
def
|
|
57
|
+
async def asanitize_update(
|
|
58
58
|
self,
|
|
59
59
|
ctx: MobileUseContext,
|
|
60
60
|
update: dict,
|
|
@@ -72,7 +72,7 @@ class State(AgentStatePydantic):
|
|
|
72
72
|
raise ValueError("agents_thoughts must be a str or list[str]")
|
|
73
73
|
if agent is None:
|
|
74
74
|
raise ValueError("Agent is required when updating the 'agents_thoughts' key")
|
|
75
|
-
update["agents_thoughts"] = _add_agent_thoughts(
|
|
75
|
+
update["agents_thoughts"] = await _add_agent_thoughts(
|
|
76
76
|
ctx=ctx,
|
|
77
77
|
old=self.agents_thoughts,
|
|
78
78
|
new=updated_agents_thoughts,
|
|
@@ -81,12 +81,16 @@ class State(AgentStatePydantic):
|
|
|
81
81
|
return update
|
|
82
82
|
|
|
83
83
|
|
|
84
|
-
def _add_agent_thoughts(
|
|
84
|
+
async def _add_agent_thoughts(
|
|
85
85
|
ctx: MobileUseContext,
|
|
86
86
|
old: list[str],
|
|
87
87
|
new: list[str],
|
|
88
88
|
agent: AgentNode,
|
|
89
89
|
) -> list[str]:
|
|
90
|
+
if ctx.on_agent_thought:
|
|
91
|
+
for thought in new:
|
|
92
|
+
await ctx.on_agent_thought(agent, thought)
|
|
93
|
+
|
|
90
94
|
named_thoughts = [f"[{agent}] {thought}" for thought in new]
|
|
91
95
|
if ctx.execution_setup:
|
|
92
96
|
record_interaction(ctx, response=AIMessage(content=str(named_thoughts)))
|