minitap-mobile-use 2.5.3__py3-none-any.whl → 2.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of minitap-mobile-use might be problematic. Click here for more details.
- minitap/mobile_use/agents/contextor/contextor.py +0 -8
- minitap/mobile_use/agents/cortex/cortex.md +122 -36
- minitap/mobile_use/agents/cortex/cortex.py +32 -17
- minitap/mobile_use/agents/cortex/types.py +18 -4
- minitap/mobile_use/agents/executor/executor.md +3 -3
- minitap/mobile_use/agents/executor/executor.py +10 -3
- minitap/mobile_use/agents/hopper/hopper.md +30 -2
- minitap/mobile_use/agents/hopper/hopper.py +19 -15
- minitap/mobile_use/agents/orchestrator/orchestrator.py +14 -5
- minitap/mobile_use/agents/outputter/outputter.py +13 -3
- minitap/mobile_use/agents/planner/planner.md +20 -9
- minitap/mobile_use/agents/planner/planner.py +12 -5
- minitap/mobile_use/agents/screen_analyzer/human.md +16 -0
- minitap/mobile_use/agents/screen_analyzer/screen_analyzer.py +111 -0
- minitap/mobile_use/clients/ios_client.py +7 -3
- minitap/mobile_use/config.py +87 -24
- minitap/mobile_use/controllers/mobile_command_controller.py +354 -88
- minitap/mobile_use/controllers/platform_specific_commands_controller.py +41 -27
- minitap/mobile_use/controllers/types.py +95 -0
- minitap/mobile_use/graph/graph.py +55 -11
- minitap/mobile_use/graph/state.py +10 -3
- minitap/mobile_use/main.py +12 -4
- minitap/mobile_use/sdk/agent.py +113 -72
- minitap/mobile_use/sdk/examples/smart_notification_assistant.py +59 -10
- minitap/mobile_use/sdk/services/platform.py +15 -1
- minitap/mobile_use/sdk/types/platform.py +1 -0
- minitap/mobile_use/sdk/types/task.py +10 -1
- minitap/mobile_use/servers/device_hardware_bridge.py +13 -6
- minitap/mobile_use/services/llm.py +5 -2
- minitap/mobile_use/tools/index.py +7 -9
- minitap/mobile_use/tools/mobile/{clear_text.py → focus_and_clear_text.py} +7 -7
- minitap/mobile_use/tools/mobile/{input_text.py → focus_and_input_text.py} +8 -8
- minitap/mobile_use/tools/mobile/long_press_on.py +130 -15
- minitap/mobile_use/tools/mobile/swipe.py +3 -26
- minitap/mobile_use/tools/mobile/tap.py +41 -28
- minitap/mobile_use/tools/mobile/wait_for_delay.py +84 -0
- minitap/mobile_use/utils/cli_helpers.py +10 -6
- {minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.7.0.dist-info}/METADATA +1 -1
- {minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.7.0.dist-info}/RECORD +41 -39
- minitap/mobile_use/tools/mobile/glimpse_screen.py +0 -74
- minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +0 -64
- {minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.7.0.dist-info}/WHEEL +0 -0
- {minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.7.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class TapOutput(BaseModel):
|
|
5
|
+
"""Output from tap operations."""
|
|
6
|
+
|
|
7
|
+
error: str | None = Field(default=None, description="Error message if tap failed")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Bounds(BaseModel):
|
|
11
|
+
"""Represents the bounds of a UI element."""
|
|
12
|
+
|
|
13
|
+
x1: int
|
|
14
|
+
y1: int
|
|
15
|
+
x2: int
|
|
16
|
+
y2: int
|
|
17
|
+
|
|
18
|
+
def get_center(self) -> "CoordinatesSelectorRequest":
|
|
19
|
+
"""Get the center point of the bounds."""
|
|
20
|
+
return CoordinatesSelectorRequest(
|
|
21
|
+
x=(self.x1 + self.x2) // 2,
|
|
22
|
+
y=(self.y1 + self.y2) // 2,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class CoordinatesSelectorRequest(BaseModel):
|
|
27
|
+
model_config = ConfigDict(extra="forbid")
|
|
28
|
+
x: int
|
|
29
|
+
y: int
|
|
30
|
+
|
|
31
|
+
def to_str(self):
|
|
32
|
+
return f"{self.x}, {self.y}"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class PercentagesSelectorRequest(BaseModel):
|
|
36
|
+
model_config = ConfigDict(extra="forbid")
|
|
37
|
+
"""
|
|
38
|
+
0%,0% # top-left corner
|
|
39
|
+
100%,100% # bottom-right corner
|
|
40
|
+
50%,50% # center
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
x_percent: int = Field(ge=0, le=100, description="X percentage (0-100)")
|
|
44
|
+
y_percent: int = Field(ge=0, le=100, description="Y percentage (0-100)")
|
|
45
|
+
|
|
46
|
+
def to_str(self):
|
|
47
|
+
return f"{self.x_percent}%, {self.y_percent}%"
|
|
48
|
+
|
|
49
|
+
def to_coords(self, width: int, height: int) -> CoordinatesSelectorRequest:
|
|
50
|
+
"""Convert percentages to pixel coordinates."""
|
|
51
|
+
x = min(max(int(width * self.x_percent / 100), 0), max(0, width - 1))
|
|
52
|
+
y = min(max(int(height * self.y_percent / 100), 0), max(0, height - 1))
|
|
53
|
+
return CoordinatesSelectorRequest(x=x, y=y)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class SwipeStartEndCoordinatesRequest(BaseModel):
|
|
57
|
+
model_config = ConfigDict(extra="forbid")
|
|
58
|
+
start: CoordinatesSelectorRequest
|
|
59
|
+
end: CoordinatesSelectorRequest
|
|
60
|
+
|
|
61
|
+
def to_dict(self):
|
|
62
|
+
return {"start": self.start.to_str(), "end": self.end.to_str()}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class SwipeStartEndPercentagesRequest(BaseModel):
|
|
66
|
+
model_config = ConfigDict(extra="forbid")
|
|
67
|
+
start: PercentagesSelectorRequest
|
|
68
|
+
end: PercentagesSelectorRequest
|
|
69
|
+
|
|
70
|
+
def to_dict(self):
|
|
71
|
+
return {"start": self.start.to_str(), "end": self.end.to_str()}
|
|
72
|
+
|
|
73
|
+
def to_coords(self, width: int, height: int) -> SwipeStartEndCoordinatesRequest:
|
|
74
|
+
"""Convert percentage-based swipe to coordinate-based swipe."""
|
|
75
|
+
return SwipeStartEndCoordinatesRequest(
|
|
76
|
+
start=self.start.to_coords(width, height),
|
|
77
|
+
end=self.end.to_coords(width, height),
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class SwipeRequest(BaseModel):
|
|
82
|
+
model_config = ConfigDict(extra="forbid")
|
|
83
|
+
swipe_mode: SwipeStartEndCoordinatesRequest | SwipeStartEndPercentagesRequest
|
|
84
|
+
duration: int | None = None # in ms, default is 400ms
|
|
85
|
+
|
|
86
|
+
def to_dict(self):
|
|
87
|
+
res = {}
|
|
88
|
+
if isinstance(
|
|
89
|
+
self.swipe_mode,
|
|
90
|
+
SwipeStartEndCoordinatesRequest | SwipeStartEndPercentagesRequest,
|
|
91
|
+
):
|
|
92
|
+
res |= self.swipe_mode.to_dict()
|
|
93
|
+
if self.duration:
|
|
94
|
+
res |= {"duration": self.duration}
|
|
95
|
+
return res
|
|
@@ -1,8 +1,7 @@
|
|
|
1
|
+
from collections.abc import Sequence
|
|
1
2
|
from typing import Literal
|
|
2
3
|
|
|
3
|
-
from langchain_core.messages import
|
|
4
|
-
AIMessage,
|
|
5
|
-
)
|
|
4
|
+
from langchain_core.messages import AIMessage
|
|
6
5
|
from langgraph.constants import END, START
|
|
7
6
|
from langgraph.graph import StateGraph
|
|
8
7
|
from langgraph.graph.state import CompiledStateGraph
|
|
@@ -18,6 +17,7 @@ from minitap.mobile_use.agents.planner.utils import (
|
|
|
18
17
|
get_current_subgoal,
|
|
19
18
|
one_of_them_is_failure,
|
|
20
19
|
)
|
|
20
|
+
from minitap.mobile_use.agents.screen_analyzer.screen_analyzer import ScreenAnalyzerNode
|
|
21
21
|
from minitap.mobile_use.agents.summarizer.summarizer import SummarizerNode
|
|
22
22
|
from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
|
|
23
23
|
from minitap.mobile_use.context import MobileUseContext
|
|
@@ -28,6 +28,22 @@ from minitap.mobile_use.utils.logger import get_logger
|
|
|
28
28
|
logger = get_logger(__name__)
|
|
29
29
|
|
|
30
30
|
|
|
31
|
+
def convergence_node(state: State):
|
|
32
|
+
"""Convergence point for parallel execution paths."""
|
|
33
|
+
return {}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def convergence_gate(
|
|
37
|
+
state: State,
|
|
38
|
+
) -> Literal["continue", "end"]:
|
|
39
|
+
"""Check if all subgoals are completed at convergence point."""
|
|
40
|
+
logger.info("Starting convergence_gate")
|
|
41
|
+
if all_completed(state.subgoal_plan):
|
|
42
|
+
logger.info("All subgoals are completed, ending the goal")
|
|
43
|
+
return "end"
|
|
44
|
+
return "continue"
|
|
45
|
+
|
|
46
|
+
|
|
31
47
|
def post_orchestrator_gate(
|
|
32
48
|
state: State,
|
|
33
49
|
) -> Literal["continue", "replan", "end"]:
|
|
@@ -50,11 +66,22 @@ def post_orchestrator_gate(
|
|
|
50
66
|
|
|
51
67
|
def post_cortex_gate(
|
|
52
68
|
state: State,
|
|
53
|
-
) ->
|
|
69
|
+
) -> Sequence[str]:
|
|
54
70
|
logger.info("Starting post_cortex_gate")
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
71
|
+
node_sequence = []
|
|
72
|
+
|
|
73
|
+
if len(state.complete_subgoals_by_ids) > 0 or not state.structured_decisions:
|
|
74
|
+
# If subgoals need to be marked as complete, add the path to the orchestrator.
|
|
75
|
+
# The 'or not state.structured_decisions' ensures we don't get stuck if Cortex does nothing.
|
|
76
|
+
node_sequence.append("review_subgoals")
|
|
77
|
+
|
|
78
|
+
if state.structured_decisions:
|
|
79
|
+
node_sequence.append("execute_decisions")
|
|
80
|
+
|
|
81
|
+
if state.screen_analysis_prompt:
|
|
82
|
+
node_sequence.append("analyze_screen")
|
|
83
|
+
|
|
84
|
+
return node_sequence
|
|
58
85
|
|
|
59
86
|
|
|
60
87
|
def post_executor_gate(
|
|
@@ -96,7 +123,11 @@ async def get_graph(ctx: MobileUseContext) -> CompiledStateGraph:
|
|
|
96
123
|
|
|
97
124
|
graph_builder.add_node("summarizer", SummarizerNode(ctx))
|
|
98
125
|
|
|
99
|
-
|
|
126
|
+
graph_builder.add_node("screen_analyzer", ScreenAnalyzerNode(ctx))
|
|
127
|
+
|
|
128
|
+
graph_builder.add_node(node="convergence", action=convergence_node, defer=True)
|
|
129
|
+
|
|
130
|
+
## Linking nodes
|
|
100
131
|
graph_builder.add_edge(START, "planner")
|
|
101
132
|
graph_builder.add_edge("planner", "orchestrator")
|
|
102
133
|
graph_builder.add_conditional_edges(
|
|
@@ -113,8 +144,9 @@ async def get_graph(ctx: MobileUseContext) -> CompiledStateGraph:
|
|
|
113
144
|
"cortex",
|
|
114
145
|
post_cortex_gate,
|
|
115
146
|
{
|
|
116
|
-
"
|
|
117
|
-
"
|
|
147
|
+
"review_subgoals": "orchestrator",
|
|
148
|
+
"analyze_screen": "screen_analyzer",
|
|
149
|
+
"execute_decisions": "executor",
|
|
118
150
|
},
|
|
119
151
|
)
|
|
120
152
|
graph_builder.add_conditional_edges(
|
|
@@ -123,6 +155,18 @@ async def get_graph(ctx: MobileUseContext) -> CompiledStateGraph:
|
|
|
123
155
|
{"invoke_tools": "executor_tools", "skip": "summarizer"},
|
|
124
156
|
)
|
|
125
157
|
graph_builder.add_edge("executor_tools", "summarizer")
|
|
126
|
-
|
|
158
|
+
|
|
159
|
+
graph_builder.add_edge("orchestrator", "convergence")
|
|
160
|
+
graph_builder.add_edge("screen_analyzer", "convergence")
|
|
161
|
+
graph_builder.add_edge("summarizer", "convergence")
|
|
162
|
+
|
|
163
|
+
graph_builder.add_conditional_edges(
|
|
164
|
+
source="convergence",
|
|
165
|
+
path=convergence_gate,
|
|
166
|
+
path_map={
|
|
167
|
+
"continue": "contextor",
|
|
168
|
+
"end": END,
|
|
169
|
+
},
|
|
170
|
+
)
|
|
127
171
|
|
|
128
172
|
return graph_builder.compile()
|
|
@@ -1,13 +1,14 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
1
3
|
from langchain_core.messages import AIMessage, AnyMessage
|
|
2
4
|
from langgraph.graph import add_messages
|
|
3
5
|
from langgraph.prebuilt.chat_agent_executor import AgentStatePydantic
|
|
4
|
-
from typing import Annotated
|
|
5
6
|
|
|
6
7
|
from minitap.mobile_use.agents.planner.types import Subgoal
|
|
7
8
|
from minitap.mobile_use.config import AgentNode
|
|
9
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
8
10
|
from minitap.mobile_use.utils.logger import get_logger
|
|
9
11
|
from minitap.mobile_use.utils.recorder import record_interaction
|
|
10
|
-
from minitap.mobile_use.context import MobileUseContext
|
|
11
12
|
|
|
12
13
|
logger = get_logger(__name__)
|
|
13
14
|
|
|
@@ -24,7 +25,6 @@ class State(AgentStatePydantic):
|
|
|
24
25
|
subgoal_plan: Annotated[list[Subgoal], "The current plan, made of subgoals"]
|
|
25
26
|
|
|
26
27
|
# contextor related keys
|
|
27
|
-
latest_screenshot_base64: Annotated[str | None, "Latest screenshot of the device", take_last]
|
|
28
28
|
latest_ui_hierarchy: Annotated[
|
|
29
29
|
list[dict] | None, "Latest UI hierarchy of the device", take_last
|
|
30
30
|
]
|
|
@@ -43,6 +43,13 @@ class State(AgentStatePydantic):
|
|
|
43
43
|
take_last,
|
|
44
44
|
]
|
|
45
45
|
|
|
46
|
+
# screen_analyzer related keys
|
|
47
|
+
screen_analysis_prompt: Annotated[
|
|
48
|
+
str | None,
|
|
49
|
+
"Prompt for the screen_analyzer agent to analyze the screen",
|
|
50
|
+
take_last,
|
|
51
|
+
]
|
|
52
|
+
|
|
46
53
|
# executor related keys
|
|
47
54
|
executor_messages: Annotated[list[AnyMessage], "Sequential Executor messages", add_messages]
|
|
48
55
|
cortex_last_thought: Annotated[str | None, "Last thought of the cortex for the executor"]
|
minitap/mobile_use/main.py
CHANGED
|
@@ -6,6 +6,7 @@ from adbutils import AdbClient
|
|
|
6
6
|
from langchain.callbacks.base import Callbacks
|
|
7
7
|
from rich.console import Console
|
|
8
8
|
from typing import Annotated
|
|
9
|
+
from shutil import which
|
|
9
10
|
|
|
10
11
|
from minitap.mobile_use.config import (
|
|
11
12
|
initialize_llm_config,
|
|
@@ -102,10 +103,17 @@ def main(
|
|
|
102
103
|
Run the Mobile-use agent to automate tasks on a mobile device.
|
|
103
104
|
"""
|
|
104
105
|
console = Console()
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
106
|
+
|
|
107
|
+
adb_client = None
|
|
108
|
+
try:
|
|
109
|
+
if which("adb"):
|
|
110
|
+
adb_client = AdbClient(
|
|
111
|
+
host=settings.ADB_HOST or "localhost",
|
|
112
|
+
port=settings.ADB_PORT or 5037,
|
|
113
|
+
)
|
|
114
|
+
except Exception:
|
|
115
|
+
pass # ADB not available, will only support iOS devices
|
|
116
|
+
|
|
109
117
|
display_device_status(console, adb_client=adb_client)
|
|
110
118
|
asyncio.run(
|
|
111
119
|
run_automation(
|
minitap/mobile_use/sdk/agent.py
CHANGED
|
@@ -53,6 +53,7 @@ from minitap.mobile_use.sdk.types.task import (
|
|
|
53
53
|
AgentProfile,
|
|
54
54
|
PlatformTaskInfo,
|
|
55
55
|
PlatformTaskRequest,
|
|
56
|
+
CloudDevicePlatformTaskRequest,
|
|
56
57
|
Task,
|
|
57
58
|
TaskRequest,
|
|
58
59
|
)
|
|
@@ -89,6 +90,8 @@ class Agent:
|
|
|
89
90
|
_screen_api_client: ScreenApiClient
|
|
90
91
|
_hw_bridge_client: DeviceHardwareClient
|
|
91
92
|
_adb_client: AdbClient | None
|
|
93
|
+
_current_task: asyncio.Task | None = None
|
|
94
|
+
_task_lock: asyncio.Lock
|
|
92
95
|
|
|
93
96
|
def __init__(self, *, config: AgentConfig | None = None):
|
|
94
97
|
self._config = config or get_default_agent_config()
|
|
@@ -101,6 +104,7 @@ class Agent:
|
|
|
101
104
|
self._is_default_screen_api = (
|
|
102
105
|
self._config.servers.screen_api_base_url == DEFAULT_SCREEN_API_BASE_URL
|
|
103
106
|
)
|
|
107
|
+
self._task_lock = asyncio.Lock()
|
|
104
108
|
# Initialize platform service if API key is available in environment
|
|
105
109
|
# Note: Can also be initialized later with API key from request
|
|
106
110
|
if settings.MINITAP_API_KEY:
|
|
@@ -125,7 +129,7 @@ class Agent:
|
|
|
125
129
|
|
|
126
130
|
# Get first available device ID
|
|
127
131
|
if not self._config.device_id or not self._config.device_platform:
|
|
128
|
-
device_id, platform = get_first_device()
|
|
132
|
+
device_id, platform = get_first_device(logger=logger)
|
|
129
133
|
else:
|
|
130
134
|
device_id, platform = self._config.device_id, self._config.device_platform
|
|
131
135
|
|
|
@@ -241,6 +245,9 @@ class Agent:
|
|
|
241
245
|
else:
|
|
242
246
|
raise PlatformServiceUninitializedError()
|
|
243
247
|
task_info = await platform_service.create_task_run(request=request)
|
|
248
|
+
if isinstance(request, CloudDevicePlatformTaskRequest):
|
|
249
|
+
request.task_run_id = task_info.task_run.id
|
|
250
|
+
request.task_run_id_available_event.set()
|
|
244
251
|
self._config.agent_profiles[task_info.llm_profile.name] = task_info.llm_profile
|
|
245
252
|
request = task_info.task_request
|
|
246
253
|
return await self._run_task(
|
|
@@ -336,78 +343,112 @@ class Agent:
|
|
|
336
343
|
state = self._get_graph_state(task=task)
|
|
337
344
|
graph_input = state.model_dump()
|
|
338
345
|
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
346
|
+
async def _execute_task_logic():
|
|
347
|
+
last_state: State | None = None
|
|
348
|
+
last_state_snapshot: dict | None = None
|
|
349
|
+
output = None
|
|
350
|
+
try:
|
|
351
|
+
logger.info(f"[{task_name}] Invoking graph with input: {graph_input}")
|
|
352
|
+
await task.set_status(status="running", message="Invoking graph...")
|
|
353
|
+
async for chunk in (await get_graph(context)).astream(
|
|
354
|
+
input=graph_input,
|
|
355
|
+
config={
|
|
356
|
+
"recursion_limit": task.request.max_steps,
|
|
357
|
+
"callbacks": self._config.graph_config_callbacks,
|
|
358
|
+
},
|
|
359
|
+
stream_mode=["messages", "custom", "updates", "values"],
|
|
360
|
+
):
|
|
361
|
+
stream_mode, payload = chunk
|
|
362
|
+
if stream_mode == "values":
|
|
363
|
+
last_state_snapshot = payload # type: ignore
|
|
364
|
+
last_state = State(**last_state_snapshot) # type: ignore
|
|
365
|
+
if task.request.thoughts_output_path:
|
|
366
|
+
record_events(
|
|
367
|
+
output_path=task.request.thoughts_output_path,
|
|
368
|
+
events=last_state.agents_thoughts,
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
if stream_mode == "updates":
|
|
372
|
+
for _, value in payload.items(): # type: ignore node name, node output
|
|
373
|
+
if value and "agents_thoughts" in value:
|
|
374
|
+
new_thoughts = value["agents_thoughts"]
|
|
375
|
+
last_item = new_thoughts[-1] if new_thoughts else None
|
|
376
|
+
if last_item:
|
|
377
|
+
log_agent_thought(
|
|
378
|
+
agent_thought=last_item,
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
if not last_state:
|
|
382
|
+
err = f"[{task_name}] No result received from graph"
|
|
383
|
+
logger.warning(err)
|
|
384
|
+
await task.finalize(content=output, state=last_state_snapshot, error=err)
|
|
385
|
+
return None
|
|
386
|
+
|
|
387
|
+
print_ai_response_to_stderr(graph_result=last_state)
|
|
388
|
+
output = await self._extract_output(
|
|
389
|
+
task_name=task_name,
|
|
390
|
+
ctx=context,
|
|
391
|
+
request=request,
|
|
392
|
+
output_config=output_config,
|
|
393
|
+
state=last_state,
|
|
394
|
+
)
|
|
395
|
+
logger.info(f"✅ Automation '{task_name}' is success ✅")
|
|
396
|
+
await task.finalize(content=output, state=last_state_snapshot)
|
|
397
|
+
return output
|
|
398
|
+
except asyncio.CancelledError:
|
|
399
|
+
err = f"[{task_name}] Task cancelled"
|
|
375
400
|
logger.warning(err)
|
|
376
|
-
await task.finalize(
|
|
377
|
-
|
|
401
|
+
await task.finalize(
|
|
402
|
+
content=output,
|
|
403
|
+
state=last_state_snapshot,
|
|
404
|
+
error=err,
|
|
405
|
+
cancelled=True,
|
|
406
|
+
)
|
|
407
|
+
raise
|
|
408
|
+
except Exception as e:
|
|
409
|
+
err = f"[{task_name}] Error running automation: {e}"
|
|
410
|
+
logger.error(err)
|
|
411
|
+
await task.finalize(
|
|
412
|
+
content=output,
|
|
413
|
+
state=last_state_snapshot,
|
|
414
|
+
error=err,
|
|
415
|
+
)
|
|
416
|
+
raise
|
|
417
|
+
finally:
|
|
418
|
+
self._finalize_tracing(task=task, context=context)
|
|
378
419
|
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
420
|
+
async with self._task_lock:
|
|
421
|
+
if self._current_task and not self._current_task.done():
|
|
422
|
+
logger.warning(
|
|
423
|
+
"Another automation task is already running. "
|
|
424
|
+
"Stopping it before starting the new one."
|
|
425
|
+
)
|
|
426
|
+
self.stop_current_task()
|
|
427
|
+
try:
|
|
428
|
+
await self._current_task
|
|
429
|
+
except asyncio.CancelledError:
|
|
430
|
+
pass
|
|
431
|
+
|
|
432
|
+
try:
|
|
433
|
+
self._current_task = asyncio.create_task(_execute_task_logic())
|
|
434
|
+
return await self._current_task
|
|
435
|
+
finally:
|
|
436
|
+
self._current_task = None
|
|
437
|
+
|
|
438
|
+
def stop_current_task(self):
|
|
439
|
+
"""Requests cancellation of the currently running automation task."""
|
|
440
|
+
if self._current_task and not self._current_task.done():
|
|
441
|
+
logger.info("Requesting to stop the current automation task...")
|
|
442
|
+
was_cancelled = self._current_task.cancel()
|
|
443
|
+
if was_cancelled:
|
|
444
|
+
logger.success("Cancellation request for the current task was sent.")
|
|
445
|
+
else:
|
|
446
|
+
logger.warning(
|
|
447
|
+
"Could not send cancellation request for the current task "
|
|
448
|
+
"(it may already be completing)."
|
|
449
|
+
)
|
|
450
|
+
else:
|
|
451
|
+
logger.info("No active automation task to stop.")
|
|
411
452
|
|
|
412
453
|
def is_healthy(self):
|
|
413
454
|
"""
|
|
@@ -522,11 +563,11 @@ class Agent:
|
|
|
522
563
|
initial_goal=task.request.goal,
|
|
523
564
|
subgoal_plan=[],
|
|
524
565
|
latest_ui_hierarchy=None,
|
|
525
|
-
latest_screenshot_base64=None,
|
|
526
566
|
focused_app_info=None,
|
|
527
567
|
device_date=None,
|
|
528
568
|
structured_decisions=None,
|
|
529
569
|
complete_subgoals_by_ids=[],
|
|
570
|
+
screen_analysis_prompt=None,
|
|
530
571
|
agents_thoughts=[],
|
|
531
572
|
remaining_steps=task.request.max_steps,
|
|
532
573
|
executor_messages=[],
|
|
@@ -22,6 +22,7 @@ from datetime import datetime
|
|
|
22
22
|
from enum import Enum
|
|
23
23
|
|
|
24
24
|
from pydantic import BaseModel, Field
|
|
25
|
+
|
|
25
26
|
from minitap.mobile_use.config import LLM, LLMConfig, LLMConfigUtils, LLMWithFallback
|
|
26
27
|
from minitap.mobile_use.sdk import Agent
|
|
27
28
|
from minitap.mobile_use.sdk.builders import Builders
|
|
@@ -62,17 +63,42 @@ def get_agent() -> Agent:
|
|
|
62
63
|
analyzer_profile = AgentProfile(
|
|
63
64
|
name="analyzer",
|
|
64
65
|
llm_config=LLMConfig(
|
|
65
|
-
planner=
|
|
66
|
-
|
|
66
|
+
planner=LLMWithFallback(
|
|
67
|
+
provider="openrouter",
|
|
68
|
+
model="meta-llama/llama-4-scout",
|
|
69
|
+
fallback=LLM(provider="openrouter", model="meta-llama/llama-4-maverick"),
|
|
70
|
+
),
|
|
71
|
+
orchestrator=LLMWithFallback(
|
|
72
|
+
provider="openrouter",
|
|
73
|
+
model="meta-llama/llama-4-scout",
|
|
74
|
+
fallback=LLM(provider="openrouter", model="meta-llama/llama-4-maverick"),
|
|
75
|
+
),
|
|
67
76
|
cortex=LLMWithFallback(
|
|
68
77
|
provider="openai",
|
|
69
78
|
model="o4-mini",
|
|
70
79
|
fallback=LLM(provider="openai", model="gpt-5"),
|
|
71
80
|
),
|
|
72
|
-
|
|
81
|
+
screen_analyzer=LLMWithFallback(
|
|
82
|
+
provider="openai",
|
|
83
|
+
model="gpt-4o",
|
|
84
|
+
fallback=LLM(provider="openai", model="gpt-5-nano"),
|
|
85
|
+
),
|
|
86
|
+
executor=LLMWithFallback(
|
|
87
|
+
provider="openai",
|
|
88
|
+
model="gpt-5-nano",
|
|
89
|
+
fallback=LLM(provider="openai", model="gpt-5-mini"),
|
|
90
|
+
),
|
|
73
91
|
utils=LLMConfigUtils(
|
|
74
|
-
outputter=
|
|
75
|
-
|
|
92
|
+
outputter=LLMWithFallback(
|
|
93
|
+
provider="openai",
|
|
94
|
+
model="gpt-5-nano",
|
|
95
|
+
fallback=LLM(provider="openai", model="gpt-5-mini"),
|
|
96
|
+
),
|
|
97
|
+
hopper=LLMWithFallback(
|
|
98
|
+
provider="openai",
|
|
99
|
+
model="gpt-5-nano",
|
|
100
|
+
fallback=LLM(provider="openai", model="gpt-5-mini"),
|
|
101
|
+
),
|
|
76
102
|
),
|
|
77
103
|
),
|
|
78
104
|
# from_file="/tmp/analyzer.jsonc" # can be loaded from file
|
|
@@ -82,17 +108,40 @@ def get_agent() -> Agent:
|
|
|
82
108
|
action_profile = AgentProfile(
|
|
83
109
|
name="note_taker",
|
|
84
110
|
llm_config=LLMConfig(
|
|
85
|
-
planner=
|
|
86
|
-
|
|
111
|
+
planner=LLMWithFallback(
|
|
112
|
+
provider="openai", model="o3", fallback=LLM(provider="openai", model="gpt-5")
|
|
113
|
+
),
|
|
114
|
+
orchestrator=LLMWithFallback(
|
|
115
|
+
provider="google",
|
|
116
|
+
model="gemini-2.5-flash",
|
|
117
|
+
fallback=LLM(provider="openai", model="gpt-5"),
|
|
118
|
+
),
|
|
87
119
|
cortex=LLMWithFallback(
|
|
88
120
|
provider="openai",
|
|
89
121
|
model="o4-mini",
|
|
90
122
|
fallback=LLM(provider="openai", model="gpt-5"),
|
|
91
123
|
),
|
|
92
|
-
|
|
124
|
+
screen_analyzer=LLMWithFallback(
|
|
125
|
+
provider="openai",
|
|
126
|
+
model="gpt-4o",
|
|
127
|
+
fallback=LLM(provider="openai", model="gpt-5-nano"),
|
|
128
|
+
),
|
|
129
|
+
executor=LLMWithFallback(
|
|
130
|
+
provider="openai",
|
|
131
|
+
model="gpt-4o-mini",
|
|
132
|
+
fallback=LLM(provider="openai", model="gpt-5-nano"),
|
|
133
|
+
),
|
|
93
134
|
utils=LLMConfigUtils(
|
|
94
|
-
outputter=
|
|
95
|
-
|
|
135
|
+
outputter=LLMWithFallback(
|
|
136
|
+
provider="openai",
|
|
137
|
+
model="gpt-5-nano",
|
|
138
|
+
fallback=LLM(provider="openai", model="gpt-5-mini"),
|
|
139
|
+
),
|
|
140
|
+
hopper=LLMWithFallback(
|
|
141
|
+
provider="openai",
|
|
142
|
+
model="gpt-5-nano",
|
|
143
|
+
fallback=LLM(provider="openai", model="gpt-5-mini"),
|
|
144
|
+
),
|
|
96
145
|
),
|
|
97
146
|
),
|
|
98
147
|
)
|