minitap-mobile-use 2.7.1__py3-none-any.whl → 2.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of minitap-mobile-use might be problematic. Click here for more details.
- minitap/mobile_use/agents/cortex/cortex.py +2 -6
- minitap/mobile_use/agents/executor/executor.py +2 -6
- minitap/mobile_use/agents/executor/tool_node.py +31 -6
- minitap/mobile_use/agents/hopper/hopper.py +2 -6
- minitap/mobile_use/agents/orchestrator/orchestrator.py +2 -6
- minitap/mobile_use/agents/outputter/outputter.py +2 -4
- minitap/mobile_use/agents/planner/planner.py +2 -2
- minitap/mobile_use/agents/screen_analyzer/screen_analyzer.py +2 -6
- minitap/mobile_use/config.py +1 -1
- minitap/mobile_use/graph/graph.py +6 -2
- minitap/mobile_use/main.py +2 -2
- minitap/mobile_use/sdk/agent.py +212 -12
- minitap/mobile_use/sdk/builders/agent_config_builder.py +43 -9
- minitap/mobile_use/sdk/examples/README.md +1 -1
- minitap/mobile_use/sdk/examples/platform_manual_task_example.py +2 -2
- minitap/mobile_use/sdk/examples/platform_minimal_example.py +2 -3
- minitap/mobile_use/sdk/examples/simple_photo_organizer.py +2 -2
- minitap/mobile_use/sdk/examples/smart_notification_assistant.py +2 -2
- minitap/mobile_use/sdk/services/cloud_mobile.py +582 -0
- minitap/mobile_use/sdk/types/agent.py +3 -0
- minitap/mobile_use/sdk/types/exceptions.py +7 -0
- minitap/mobile_use/sdk/types/task.py +0 -3
- minitap/mobile_use/services/llm.py +0 -2
- {minitap_mobile_use-2.7.1.dist-info → minitap_mobile_use-2.8.0.dist-info}/METADATA +1 -1
- {minitap_mobile_use-2.7.1.dist-info → minitap_mobile_use-2.8.0.dist-info}/RECORD +27 -26
- {minitap_mobile_use-2.7.1.dist-info → minitap_mobile_use-2.8.0.dist-info}/WHEEL +0 -0
- {minitap_mobile_use-2.7.1.dist-info → minitap_mobile_use-2.8.0.dist-info}/entry_points.txt +0 -0
|
@@ -73,12 +73,8 @@ class CortexNode:
|
|
|
73
73
|
ctx=self.ctx, name="cortex", use_fallback=True, temperature=1
|
|
74
74
|
).with_structured_output(CortexOutput)
|
|
75
75
|
response: CortexOutput = await with_fallback(
|
|
76
|
-
main_call=lambda: invoke_llm_with_timeout_message(
|
|
77
|
-
|
|
78
|
-
),
|
|
79
|
-
fallback_call=lambda: invoke_llm_with_timeout_message(
|
|
80
|
-
llm_fallback.ainvoke(messages), agent_name="Cortex (Fallback)"
|
|
81
|
-
),
|
|
76
|
+
main_call=lambda: invoke_llm_with_timeout_message(llm.ainvoke(messages)),
|
|
77
|
+
fallback_call=lambda: invoke_llm_with_timeout_message(llm_fallback.ainvoke(messages)),
|
|
82
78
|
) # type: ignore
|
|
83
79
|
|
|
84
80
|
EMPTY_STRING_TOKENS = ["{}", "[]", "null", "", "None"]
|
|
@@ -65,12 +65,8 @@ class ExecutorNode:
|
|
|
65
65
|
llm = llm.bind_tools(**llm_bind_tools_kwargs)
|
|
66
66
|
llm_fallback = llm_fallback.bind_tools(**llm_bind_tools_kwargs)
|
|
67
67
|
response = await with_fallback(
|
|
68
|
-
main_call=lambda: invoke_llm_with_timeout_message(
|
|
69
|
-
|
|
70
|
-
),
|
|
71
|
-
fallback_call=lambda: invoke_llm_with_timeout_message(
|
|
72
|
-
llm_fallback.ainvoke(messages), agent_name="Executor (Fallback)"
|
|
73
|
-
),
|
|
68
|
+
main_call=lambda: invoke_llm_with_timeout_message(llm.ainvoke(messages)),
|
|
69
|
+
fallback_call=lambda: invoke_llm_with_timeout_message(llm_fallback.ainvoke(messages)),
|
|
74
70
|
)
|
|
75
71
|
return await state.asanitize_update(
|
|
76
72
|
ctx=self.ctx,
|
|
@@ -1,12 +1,17 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
|
|
3
|
-
from
|
|
4
|
-
|
|
5
|
-
from typing import override
|
|
6
|
-
from langchain_core.runnables import RunnableConfig
|
|
7
|
-
from langgraph.store.base import BaseStore
|
|
2
|
+
import copy
|
|
3
|
+
from typing import Any, override
|
|
4
|
+
|
|
8
5
|
from langchain_core.messages import AnyMessage, ToolCall, ToolMessage
|
|
6
|
+
from langchain_core.runnables import RunnableConfig
|
|
9
7
|
from langgraph.prebuilt import ToolNode
|
|
8
|
+
from langgraph.store.base import BaseStore
|
|
9
|
+
from langgraph.types import Command
|
|
10
|
+
from pydantic import BaseModel
|
|
11
|
+
|
|
12
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
13
|
+
|
|
14
|
+
logger = get_logger(__name__)
|
|
10
15
|
|
|
11
16
|
|
|
12
17
|
class ExecutorToolNode(ToolNode):
|
|
@@ -67,6 +72,26 @@ class ExecutorToolNode(ToolNode):
|
|
|
67
72
|
message=f"Unexpected tool output type: {type(output)}",
|
|
68
73
|
)
|
|
69
74
|
failed = True
|
|
75
|
+
|
|
76
|
+
call_without_state = copy.deepcopy(call)
|
|
77
|
+
if "args" in call_without_state and "state" in call_without_state["args"]:
|
|
78
|
+
del call_without_state["args"]["state"]
|
|
79
|
+
if failed:
|
|
80
|
+
error_msg = ""
|
|
81
|
+
try:
|
|
82
|
+
if isinstance(output, ToolMessage):
|
|
83
|
+
error_msg = output.content
|
|
84
|
+
elif isinstance(output, Command):
|
|
85
|
+
tool_msg = self._get_tool_message(output)
|
|
86
|
+
error_msg = tool_msg.content
|
|
87
|
+
except Exception:
|
|
88
|
+
error_msg = "Could not extract error details"
|
|
89
|
+
|
|
90
|
+
logger.info(f"❌ Tool call failed: {call_without_state}")
|
|
91
|
+
logger.info(f" Error: {error_msg}")
|
|
92
|
+
else:
|
|
93
|
+
logger.info("✅ Tool call succeeded: " + str(call_without_state))
|
|
94
|
+
|
|
70
95
|
outputs.append(output)
|
|
71
96
|
return self._combine_tool_outputs(outputs, input_type) # type: ignore
|
|
72
97
|
|
|
@@ -37,11 +37,7 @@ async def hopper(
|
|
|
37
37
|
ctx=ctx, name="hopper", is_utils=True, use_fallback=True, temperature=0
|
|
38
38
|
).with_structured_output(HopperOutput)
|
|
39
39
|
response: HopperOutput = await with_fallback(
|
|
40
|
-
main_call=lambda: invoke_llm_with_timeout_message(
|
|
41
|
-
|
|
42
|
-
),
|
|
43
|
-
fallback_call=lambda: invoke_llm_with_timeout_message(
|
|
44
|
-
llm_fallback.ainvoke(messages), agent_name="Hopper (Fallback)"
|
|
45
|
-
),
|
|
40
|
+
main_call=lambda: invoke_llm_with_timeout_message(llm.ainvoke(messages)),
|
|
41
|
+
fallback_call=lambda: invoke_llm_with_timeout_message(llm_fallback.ainvoke(messages)),
|
|
46
42
|
) # type: ignore
|
|
47
43
|
return response
|
|
@@ -81,12 +81,8 @@ class OrchestratorNode:
|
|
|
81
81
|
ctx=self.ctx, name="orchestrator", use_fallback=True, temperature=1
|
|
82
82
|
).with_structured_output(OrchestratorOutput)
|
|
83
83
|
response: OrchestratorOutput = await with_fallback(
|
|
84
|
-
main_call=lambda: invoke_llm_with_timeout_message(
|
|
85
|
-
|
|
86
|
-
),
|
|
87
|
-
fallback_call=lambda: invoke_llm_with_timeout_message(
|
|
88
|
-
llm_fallback.ainvoke(messages), agent_name="Orchestrator (Fallback)"
|
|
89
|
-
),
|
|
84
|
+
main_call=lambda: invoke_llm_with_timeout_message(llm.ainvoke(messages)),
|
|
85
|
+
fallback_call=lambda: invoke_llm_with_timeout_message(llm_fallback.ainvoke(messages)),
|
|
90
86
|
) # type: ignore
|
|
91
87
|
if response.needs_replaning:
|
|
92
88
|
thoughts = [response.reason]
|
|
@@ -68,11 +68,9 @@ async def outputter(
|
|
|
68
68
|
structured_llm_fallback = llm_fallback.with_structured_output(schema)
|
|
69
69
|
|
|
70
70
|
response = await with_fallback(
|
|
71
|
-
main_call=lambda: invoke_llm_with_timeout_message(
|
|
72
|
-
structured_llm.ainvoke(messages), agent_name="Outputter"
|
|
73
|
-
),
|
|
71
|
+
main_call=lambda: invoke_llm_with_timeout_message(structured_llm.ainvoke(messages)),
|
|
74
72
|
fallback_call=lambda: invoke_llm_with_timeout_message(
|
|
75
|
-
structured_llm_fallback.ainvoke(messages)
|
|
73
|
+
structured_llm_fallback.ainvoke(messages)
|
|
76
74
|
),
|
|
77
75
|
) # type: ignore
|
|
78
76
|
if isinstance(response, BaseModel):
|
|
@@ -52,10 +52,10 @@ class PlannerNode:
|
|
|
52
52
|
).with_structured_output(PlannerOutput)
|
|
53
53
|
response: PlannerOutput = await with_fallback(
|
|
54
54
|
main_call=lambda: invoke_llm_with_timeout_message(
|
|
55
|
-
llm.ainvoke(messages),
|
|
55
|
+
llm.ainvoke(messages),
|
|
56
56
|
),
|
|
57
57
|
fallback_call=lambda: invoke_llm_with_timeout_message(
|
|
58
|
-
llm_fallback.ainvoke(messages),
|
|
58
|
+
llm_fallback.ainvoke(messages),
|
|
59
59
|
),
|
|
60
60
|
) # type: ignore
|
|
61
61
|
subgoals_plan = [
|
|
@@ -101,11 +101,7 @@ async def screen_analyzer(ctx: MobileUseContext, screenshot_base64: str, prompt:
|
|
|
101
101
|
llm_fallback = get_llm(ctx=ctx, name="screen_analyzer", use_fallback=True, temperature=0)
|
|
102
102
|
|
|
103
103
|
response = await with_fallback(
|
|
104
|
-
main_call=lambda: invoke_llm_with_timeout_message(
|
|
105
|
-
|
|
106
|
-
),
|
|
107
|
-
fallback_call=lambda: invoke_llm_with_timeout_message(
|
|
108
|
-
llm_fallback.ainvoke(messages), agent_name="ScreenAnalyzer (Fallback)"
|
|
109
|
-
),
|
|
104
|
+
main_call=lambda: invoke_llm_with_timeout_message(llm.ainvoke(messages)),
|
|
105
|
+
fallback_call=lambda: invoke_llm_with_timeout_message(llm_fallback.ainvoke(messages)),
|
|
110
106
|
)
|
|
111
107
|
return response.content # type: ignore
|
minitap/mobile_use/config.py
CHANGED
|
@@ -86,7 +86,7 @@ def record_events(output_path: Path | None, events: list[str] | BaseModel | Any)
|
|
|
86
86
|
else:
|
|
87
87
|
events_content = json.dumps(events, indent=2)
|
|
88
88
|
|
|
89
|
-
with open(output_path, "w") as f:
|
|
89
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
90
90
|
f.write(events_content)
|
|
91
91
|
|
|
92
92
|
|
|
@@ -96,10 +96,14 @@ def post_executor_gate(
|
|
|
96
96
|
if isinstance(last_message, AIMessage):
|
|
97
97
|
tool_calls = getattr(last_message, "tool_calls", None)
|
|
98
98
|
if tool_calls and len(tool_calls) > 0:
|
|
99
|
-
logger.info("
|
|
99
|
+
logger.info("[executor] Executing " + str(len(tool_calls)) + " tool calls:")
|
|
100
|
+
for tool_call in tool_calls:
|
|
101
|
+
logger.info("-------------")
|
|
102
|
+
logger.info("[executor] - " + str(tool_call) + "\n")
|
|
103
|
+
logger.info("-------------")
|
|
100
104
|
return "invoke_tools"
|
|
101
105
|
else:
|
|
102
|
-
logger.info("
|
|
106
|
+
logger.info("[executor] ❌ No tool calls found")
|
|
103
107
|
return "skip"
|
|
104
108
|
|
|
105
109
|
|
minitap/mobile_use/main.py
CHANGED
|
@@ -43,7 +43,7 @@ async def run_automation(
|
|
|
43
43
|
config.with_graph_config_callbacks(graph_config_callbacks)
|
|
44
44
|
|
|
45
45
|
agent = Agent(config=config.build())
|
|
46
|
-
agent.init(
|
|
46
|
+
await agent.init(
|
|
47
47
|
retry_count=int(os.getenv("MOBILE_USE_HEALTH_RETRIES", 5)),
|
|
48
48
|
retry_wait_seconds=int(os.getenv("MOBILE_USE_HEALTH_DELAY", 2)),
|
|
49
49
|
)
|
|
@@ -63,7 +63,7 @@ async def run_automation(
|
|
|
63
63
|
|
|
64
64
|
await agent.run_task(request=task.build())
|
|
65
65
|
|
|
66
|
-
agent.clean()
|
|
66
|
+
await agent.clean()
|
|
67
67
|
|
|
68
68
|
|
|
69
69
|
@app.command()
|
minitap/mobile_use/sdk/agent.py
CHANGED
|
@@ -13,6 +13,7 @@ from typing import Any, TypeVar, overload
|
|
|
13
13
|
from adbutils import AdbClient
|
|
14
14
|
from dotenv import load_dotenv
|
|
15
15
|
from langchain_core.messages import AIMessage
|
|
16
|
+
from PIL import Image
|
|
16
17
|
from pydantic import BaseModel
|
|
17
18
|
|
|
18
19
|
from minitap.mobile_use.agents.outputter.outputter import outputter
|
|
@@ -37,12 +38,14 @@ from minitap.mobile_use.graph.state import State
|
|
|
37
38
|
from minitap.mobile_use.sdk.builders.agent_config_builder import get_default_agent_config
|
|
38
39
|
from minitap.mobile_use.sdk.builders.task_request_builder import TaskRequestBuilder
|
|
39
40
|
from minitap.mobile_use.sdk.constants import DEFAULT_HW_BRIDGE_BASE_URL, DEFAULT_SCREEN_API_BASE_URL
|
|
41
|
+
from minitap.mobile_use.sdk.services.cloud_mobile import CloudMobileService
|
|
40
42
|
from minitap.mobile_use.sdk.services.platform import PlatformService
|
|
41
43
|
from minitap.mobile_use.sdk.types.agent import AgentConfig
|
|
42
44
|
from minitap.mobile_use.sdk.types.exceptions import (
|
|
43
45
|
AgentNotInitializedError,
|
|
44
46
|
AgentProfileNotFoundError,
|
|
45
47
|
AgentTaskRequestError,
|
|
48
|
+
CloudMobileServiceUninitializedError,
|
|
46
49
|
DeviceNotFoundError,
|
|
47
50
|
ExecutableNotFoundError,
|
|
48
51
|
PlatformServiceUninitializedError,
|
|
@@ -92,6 +95,7 @@ class Agent:
|
|
|
92
95
|
_adb_client: AdbClient | None
|
|
93
96
|
_current_task: asyncio.Task | None = None
|
|
94
97
|
_task_lock: asyncio.Lock
|
|
98
|
+
_cloud_mobile_id: str | None = None
|
|
95
99
|
|
|
96
100
|
def __init__(self, *, config: AgentConfig | None = None):
|
|
97
101
|
self._config = config or get_default_agent_config()
|
|
@@ -105,19 +109,38 @@ class Agent:
|
|
|
105
109
|
self._config.servers.screen_api_base_url == DEFAULT_SCREEN_API_BASE_URL
|
|
106
110
|
)
|
|
107
111
|
self._task_lock = asyncio.Lock()
|
|
112
|
+
|
|
108
113
|
# Initialize platform service if API key is available in environment
|
|
109
|
-
# Note: Can also be initialized later with API key
|
|
114
|
+
# Note: Can also be initialized later with API key at agent .init()
|
|
110
115
|
if settings.MINITAP_API_KEY:
|
|
111
116
|
self._platform_service = PlatformService()
|
|
117
|
+
self._cloud_mobile_service = CloudMobileService()
|
|
112
118
|
else:
|
|
113
119
|
self._platform_service = None
|
|
120
|
+
self._cloud_mobile_service = None
|
|
114
121
|
|
|
115
|
-
def init(
|
|
122
|
+
async def init(
|
|
116
123
|
self,
|
|
124
|
+
api_key: str | None = None,
|
|
117
125
|
server_restart_attempts: int = 3,
|
|
118
126
|
retry_count: int = 5,
|
|
119
127
|
retry_wait_seconds: int = 5,
|
|
120
128
|
):
|
|
129
|
+
if api_key:
|
|
130
|
+
self._platform_service = PlatformService(api_key=api_key)
|
|
131
|
+
self._cloud_mobile_service = CloudMobileService(api_key=api_key)
|
|
132
|
+
|
|
133
|
+
# Skip initialization for cloud devices - no local setup required
|
|
134
|
+
if self._config.cloud_mobile_id_or_ref:
|
|
135
|
+
if not self._cloud_mobile_service:
|
|
136
|
+
raise CloudMobileServiceUninitializedError()
|
|
137
|
+
self._cloud_mobile_id = await self._cloud_mobile_service.resolve_cloud_mobile_id(
|
|
138
|
+
cloud_mobile_id_or_ref=self._config.cloud_mobile_id_or_ref,
|
|
139
|
+
)
|
|
140
|
+
logger.info("Cloud device configured - skipping local initialization")
|
|
141
|
+
self._initialized = True
|
|
142
|
+
return True
|
|
143
|
+
|
|
121
144
|
if not which("adb") and not which("xcrun"):
|
|
122
145
|
raise ExecutableNotFoundError("cli_tools")
|
|
123
146
|
if self._is_default_hw_bridge and not which("maestro"):
|
|
@@ -233,25 +256,30 @@ class Agent:
|
|
|
233
256
|
name: str | None = None,
|
|
234
257
|
request: TaskRequest[TOutput] | PlatformTaskRequest[TOutput] | None = None,
|
|
235
258
|
) -> str | dict | TOutput | None:
|
|
259
|
+
# Check if cloud mobile is configured
|
|
260
|
+
if self._config.cloud_mobile_id_or_ref:
|
|
261
|
+
if request is None or not isinstance(request, PlatformTaskRequest):
|
|
262
|
+
raise AgentTaskRequestError(
|
|
263
|
+
"When using a cloud mobile, only PlatformTaskRequest is supported. "
|
|
264
|
+
"Use AgentConfigBuilder.for_cloud_mobile() only with PlatformTaskRequest."
|
|
265
|
+
)
|
|
266
|
+
# Use cloud mobile execution path
|
|
267
|
+
return await self._run_cloud_mobile_task(request=request)
|
|
268
|
+
|
|
269
|
+
# Normal local execution path
|
|
236
270
|
if request is not None:
|
|
237
271
|
task_info = None
|
|
238
|
-
platform_service = None
|
|
239
272
|
if isinstance(request, PlatformTaskRequest):
|
|
240
|
-
|
|
241
|
-
if request.api_key:
|
|
242
|
-
platform_service = PlatformService(api_key=request.api_key)
|
|
243
|
-
elif self._platform_service:
|
|
244
|
-
platform_service = self._platform_service
|
|
245
|
-
else:
|
|
273
|
+
if not self._platform_service:
|
|
246
274
|
raise PlatformServiceUninitializedError()
|
|
247
|
-
task_info = await
|
|
275
|
+
task_info = await self._platform_service.create_task_run(request=request)
|
|
248
276
|
if isinstance(request, CloudDevicePlatformTaskRequest):
|
|
249
277
|
request.task_run_id = task_info.task_run.id
|
|
250
278
|
request.task_run_id_available_event.set()
|
|
251
279
|
self._config.agent_profiles[task_info.llm_profile.name] = task_info.llm_profile
|
|
252
280
|
request = task_info.task_request
|
|
253
281
|
return await self._run_task(
|
|
254
|
-
request=request, task_info=task_info, platform_service=
|
|
282
|
+
request=request, task_info=task_info, platform_service=self._platform_service
|
|
255
283
|
)
|
|
256
284
|
if goal is None:
|
|
257
285
|
raise AgentTaskRequestError("Goal is required")
|
|
@@ -267,6 +295,96 @@ class Agent:
|
|
|
267
295
|
task_request.with_name(name=name)
|
|
268
296
|
return await self._run_task(task_request.build())
|
|
269
297
|
|
|
298
|
+
async def _run_cloud_mobile_task(
|
|
299
|
+
self,
|
|
300
|
+
request: PlatformTaskRequest[TOutput],
|
|
301
|
+
) -> str | dict | TOutput | None:
|
|
302
|
+
"""
|
|
303
|
+
Execute a task on a cloud mobile.
|
|
304
|
+
|
|
305
|
+
This method triggers the task execution on the Platform and polls
|
|
306
|
+
for completion without running any agentic logic locally.
|
|
307
|
+
"""
|
|
308
|
+
if not self._cloud_mobile_id:
|
|
309
|
+
raise AgentTaskRequestError("Cloud mobile ID is not configured")
|
|
310
|
+
|
|
311
|
+
if not self._cloud_mobile_service:
|
|
312
|
+
raise CloudMobileServiceUninitializedError()
|
|
313
|
+
|
|
314
|
+
# Start cloud mobile if not already started
|
|
315
|
+
logger.info(f"Starting cloud mobile '{self._cloud_mobile_id}'...")
|
|
316
|
+
await self._cloud_mobile_service.start_and_wait_for_ready(
|
|
317
|
+
cloud_mobile_id=self._cloud_mobile_id,
|
|
318
|
+
)
|
|
319
|
+
logger.info(
|
|
320
|
+
f"Starting cloud mobile task execution '{self._cloud_mobile_id}'",
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
def log_callback(message: str):
|
|
324
|
+
"""Callback for logging timeline updates."""
|
|
325
|
+
logger.info(message)
|
|
326
|
+
|
|
327
|
+
def status_callback(
|
|
328
|
+
status: TaskRunStatus,
|
|
329
|
+
status_message: str | None,
|
|
330
|
+
):
|
|
331
|
+
"""Callback for status updates."""
|
|
332
|
+
logger.info(f"Task status update: [{status}] {status_message}")
|
|
333
|
+
|
|
334
|
+
async def _execute_cloud(cloud_mobile_service: CloudMobileService, cloud_mobile_id: str):
|
|
335
|
+
try:
|
|
336
|
+
# Execute task on cloud mobile and wait for completion
|
|
337
|
+
final_status, error, output = await cloud_mobile_service.run_task_on_cloud_mobile(
|
|
338
|
+
cloud_mobile_id=cloud_mobile_id,
|
|
339
|
+
request=request,
|
|
340
|
+
on_status_update=status_callback,
|
|
341
|
+
on_log=log_callback,
|
|
342
|
+
)
|
|
343
|
+
if final_status == "completed":
|
|
344
|
+
logger.success("Cloud mobile task completed successfully")
|
|
345
|
+
return output
|
|
346
|
+
if final_status == "failed":
|
|
347
|
+
logger.error(f"Cloud mobile task failed: {error}")
|
|
348
|
+
raise AgentTaskRequestError(
|
|
349
|
+
f"Task execution failed on cloud mobile: {error}",
|
|
350
|
+
)
|
|
351
|
+
if final_status == "cancelled":
|
|
352
|
+
logger.warning("Cloud mobile task was cancelled")
|
|
353
|
+
raise AgentTaskRequestError("Task execution was cancelled")
|
|
354
|
+
logger.error(f"Unknown cloud mobile task status: {final_status}")
|
|
355
|
+
raise AgentTaskRequestError(f"Unknown task status: {final_status}")
|
|
356
|
+
except asyncio.CancelledError:
|
|
357
|
+
# Propagate cancellation to parent coroutine.
|
|
358
|
+
logger.info("Task cancelled during execution, re-raising CancelledError")
|
|
359
|
+
raise
|
|
360
|
+
except AgentTaskRequestError:
|
|
361
|
+
# Re-raise known exceptions
|
|
362
|
+
raise
|
|
363
|
+
except Exception as e:
|
|
364
|
+
logger.error(f"Unexpected error during cloud mobile task execution: {e}")
|
|
365
|
+
raise AgentTaskRequestError(f"Unexpected error: {e}") from e
|
|
366
|
+
|
|
367
|
+
async with self._task_lock:
|
|
368
|
+
if self._current_task and not self._current_task.done():
|
|
369
|
+
logger.warning(
|
|
370
|
+
"Another cloud task is running; cancelling it before starting new one.",
|
|
371
|
+
)
|
|
372
|
+
self.stop_current_task()
|
|
373
|
+
try:
|
|
374
|
+
await self._current_task
|
|
375
|
+
except asyncio.CancelledError:
|
|
376
|
+
pass
|
|
377
|
+
try:
|
|
378
|
+
self._current_task = asyncio.create_task(
|
|
379
|
+
_execute_cloud(
|
|
380
|
+
cloud_mobile_service=self._cloud_mobile_service,
|
|
381
|
+
cloud_mobile_id=self._cloud_mobile_id,
|
|
382
|
+
),
|
|
383
|
+
)
|
|
384
|
+
return await self._current_task
|
|
385
|
+
finally:
|
|
386
|
+
self._current_task = None
|
|
387
|
+
|
|
270
388
|
async def _run_task(
|
|
271
389
|
self,
|
|
272
390
|
request: TaskRequest[TOutput],
|
|
@@ -456,6 +574,9 @@ class Agent:
|
|
|
456
574
|
Uses the configured Screen API base URL instead of hardcoding localhost.
|
|
457
575
|
"""
|
|
458
576
|
try:
|
|
577
|
+
# In cloud mode, local streaming health is irrelevant.
|
|
578
|
+
if self._config.cloud_mobile_id_or_ref:
|
|
579
|
+
return True
|
|
459
580
|
response = self._screen_api_client.get_with_retry("/streaming-status", timeout=2)
|
|
460
581
|
if response.status_code == 200:
|
|
461
582
|
data = response.json()
|
|
@@ -465,7 +586,86 @@ class Agent:
|
|
|
465
586
|
except Exception:
|
|
466
587
|
return False
|
|
467
588
|
|
|
468
|
-
def
|
|
589
|
+
async def get_screenshot(self) -> Image.Image:
|
|
590
|
+
"""
|
|
591
|
+
Capture a screenshot from the mobile device.
|
|
592
|
+
|
|
593
|
+
For cloud mobiles, this method calls the mobile-manager endpoint.
|
|
594
|
+
For local mobiles, it uses ADB (Android) or xcrun (iOS) directly.
|
|
595
|
+
|
|
596
|
+
Returns:
|
|
597
|
+
Screenshot as PIL Image
|
|
598
|
+
|
|
599
|
+
Raises:
|
|
600
|
+
AgentNotInitializedError: If the agent is not initialized
|
|
601
|
+
PlatformServiceUninitializedError: If cloud mobile service is not available
|
|
602
|
+
Exception: If screenshot capture fails
|
|
603
|
+
"""
|
|
604
|
+
# Check if cloud mobile is configured
|
|
605
|
+
if self._cloud_mobile_id:
|
|
606
|
+
if not self._cloud_mobile_service:
|
|
607
|
+
raise CloudMobileServiceUninitializedError()
|
|
608
|
+
screenshot = await self._cloud_mobile_service.get_screenshot(
|
|
609
|
+
cloud_mobile_id=self._cloud_mobile_id,
|
|
610
|
+
)
|
|
611
|
+
return screenshot
|
|
612
|
+
|
|
613
|
+
# Local device - use ADB or xcrun directly
|
|
614
|
+
if not self._initialized:
|
|
615
|
+
raise AgentNotInitializedError()
|
|
616
|
+
|
|
617
|
+
if self._device_context.mobile_platform == DevicePlatform.ANDROID:
|
|
618
|
+
# Use ADB to capture screenshot
|
|
619
|
+
logger.info("Capturing screenshot from local Android device")
|
|
620
|
+
if not self._adb_client:
|
|
621
|
+
raise Exception("ADB client not initialized")
|
|
622
|
+
|
|
623
|
+
device = self._adb_client.device(serial=self._device_context.device_id)
|
|
624
|
+
screenshot = await asyncio.to_thread(device.screenshot)
|
|
625
|
+
logger.info("Screenshot captured from local Android device")
|
|
626
|
+
return screenshot
|
|
627
|
+
|
|
628
|
+
elif self._device_context.mobile_platform == DevicePlatform.IOS:
|
|
629
|
+
# Use xcrun to capture screenshot
|
|
630
|
+
import functools
|
|
631
|
+
import subprocess
|
|
632
|
+
from io import BytesIO
|
|
633
|
+
|
|
634
|
+
logger.info("Capturing screenshot from local iOS device")
|
|
635
|
+
try:
|
|
636
|
+
# xcrun simctl io <device> screenshot --type=png -
|
|
637
|
+
result = await asyncio.to_thread(
|
|
638
|
+
functools.partial(
|
|
639
|
+
subprocess.run,
|
|
640
|
+
[
|
|
641
|
+
"xcrun",
|
|
642
|
+
"simctl",
|
|
643
|
+
"io",
|
|
644
|
+
self._device_context.device_id,
|
|
645
|
+
"screenshot",
|
|
646
|
+
"--type=png",
|
|
647
|
+
"-",
|
|
648
|
+
],
|
|
649
|
+
capture_output=True,
|
|
650
|
+
check=True,
|
|
651
|
+
)
|
|
652
|
+
)
|
|
653
|
+
# Convert bytes to PIL Image
|
|
654
|
+
screenshot = Image.open(BytesIO(result.stdout))
|
|
655
|
+
logger.info("Screenshot captured from local iOS device")
|
|
656
|
+
return screenshot
|
|
657
|
+
except subprocess.CalledProcessError as e:
|
|
658
|
+
logger.error(f"Failed to capture screenshot: {e}")
|
|
659
|
+
raise Exception(f"Failed to capture screenshot from iOS device: {e}")
|
|
660
|
+
|
|
661
|
+
else:
|
|
662
|
+
raise Exception(f"Unsupported platform: {self._device_context.mobile_platform}")
|
|
663
|
+
|
|
664
|
+
async def clean(self, force: bool = False):
|
|
665
|
+
if self._cloud_mobile_id:
|
|
666
|
+
self._initialized = False
|
|
667
|
+
logger.info("✅ Cloud-mode agent stopped.")
|
|
668
|
+
return
|
|
469
669
|
if not self._initialized and not force:
|
|
470
670
|
return
|
|
471
671
|
screen_api_ok, hw_bridge_ok = stop_servers(
|
|
@@ -45,8 +45,9 @@ class AgentConfigBuilder:
|
|
|
45
45
|
self._device_platform: DevicePlatform | None = None
|
|
46
46
|
self._servers: ServerConfig = get_default_servers()
|
|
47
47
|
self._graph_config_callbacks: Callbacks = None
|
|
48
|
+
self._cloud_mobile_id_or_ref: str | None = None
|
|
48
49
|
|
|
49
|
-
def add_profile(self, profile: AgentProfile) -> "AgentConfigBuilder":
|
|
50
|
+
def add_profile(self, profile: AgentProfile, validate: bool = True) -> "AgentConfigBuilder":
|
|
50
51
|
"""
|
|
51
52
|
Add an agent profile to the mobile-use agent.
|
|
52
53
|
|
|
@@ -54,10 +55,15 @@ class AgentConfigBuilder:
|
|
|
54
55
|
profile: The agent profile to add
|
|
55
56
|
"""
|
|
56
57
|
self._agent_profiles[profile.name] = profile
|
|
57
|
-
|
|
58
|
+
if validate:
|
|
59
|
+
profile.llm_config.validate_providers()
|
|
58
60
|
return self
|
|
59
61
|
|
|
60
|
-
def add_profiles(
|
|
62
|
+
def add_profiles(
|
|
63
|
+
self,
|
|
64
|
+
profiles: list[AgentProfile],
|
|
65
|
+
validate: bool = True,
|
|
66
|
+
) -> "AgentConfigBuilder":
|
|
61
67
|
"""
|
|
62
68
|
Add multiple agent profiles to the mobile-use agent.
|
|
63
69
|
|
|
@@ -65,8 +71,7 @@ class AgentConfigBuilder:
|
|
|
65
71
|
profiles: List of agent profiles to add
|
|
66
72
|
"""
|
|
67
73
|
for profile in profiles:
|
|
68
|
-
self.add_profile(profile=profile)
|
|
69
|
-
profile.llm_config.validate_providers()
|
|
74
|
+
self.add_profile(profile=profile, validate=validate)
|
|
70
75
|
return self
|
|
71
76
|
|
|
72
77
|
def with_default_profile(self, profile: str | AgentProfile) -> "AgentConfigBuilder":
|
|
@@ -91,10 +96,35 @@ class AgentConfigBuilder:
|
|
|
91
96
|
platform: The device platform (ANDROID or IOS)
|
|
92
97
|
device_id: The unique identifier for the device
|
|
93
98
|
"""
|
|
99
|
+
if self._cloud_mobile_id_or_ref is not None:
|
|
100
|
+
raise ValueError(
|
|
101
|
+
"Device ID cannot be set when a cloud mobile is already configured.\n"
|
|
102
|
+
"> for_device() and for_cloud_mobile() are mutually exclusive"
|
|
103
|
+
)
|
|
94
104
|
self._device_id = device_id
|
|
95
105
|
self._device_platform = platform
|
|
96
106
|
return self
|
|
97
107
|
|
|
108
|
+
def for_cloud_mobile(self, cloud_mobile_id_or_ref: str) -> "AgentConfigBuilder":
|
|
109
|
+
"""
|
|
110
|
+
Configure the mobile-use agent to use a cloud mobile.
|
|
111
|
+
|
|
112
|
+
When using a cloud mobile, tasks are executed remotely via the Platform API,
|
|
113
|
+
and only PlatformTaskRequest can be used.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
cloud_mobile_id_or_ref: The unique identifier or reference name for the cloud mobile.
|
|
117
|
+
Can be either a UUID (e.g., '550e8400-e29b-41d4-a716-446655440000')
|
|
118
|
+
or a reference name (e.g., 'my-test-device')
|
|
119
|
+
"""
|
|
120
|
+
if self._device_id is not None:
|
|
121
|
+
raise ValueError(
|
|
122
|
+
"Cloud mobile device ID cannot be set when a device is already configured.\n"
|
|
123
|
+
"> for_device() and for_cloud_mobile() are mutually exclusive"
|
|
124
|
+
)
|
|
125
|
+
self._cloud_mobile_id_or_ref = cloud_mobile_id_or_ref
|
|
126
|
+
return self
|
|
127
|
+
|
|
98
128
|
def with_default_task_config(self, config: TaskRequestCommon) -> "AgentConfigBuilder":
|
|
99
129
|
"""
|
|
100
130
|
Set the default task configuration.
|
|
@@ -162,7 +192,7 @@ class AgentConfigBuilder:
|
|
|
162
192
|
self._graph_config_callbacks = callbacks
|
|
163
193
|
return self
|
|
164
194
|
|
|
165
|
-
def build(self) -> AgentConfig:
|
|
195
|
+
def build(self, validate_profiles: bool = True) -> AgentConfig:
|
|
166
196
|
"""
|
|
167
197
|
Build the mobile-use AgentConfig object.
|
|
168
198
|
|
|
@@ -185,14 +215,17 @@ class AgentConfigBuilder:
|
|
|
185
215
|
elif isinstance(self._default_profile, AgentProfile):
|
|
186
216
|
default_profile = self._default_profile
|
|
187
217
|
if default_profile.name not in self._agent_profiles:
|
|
188
|
-
self.add_profile(default_profile)
|
|
218
|
+
self.add_profile(default_profile, validate=validate_profiles)
|
|
189
219
|
elif nb_profiles <= 0:
|
|
190
|
-
llm_config =
|
|
220
|
+
llm_config = (
|
|
221
|
+
get_default_minitap_llm_config(validate=validate_profiles)
|
|
222
|
+
or get_default_llm_config()
|
|
223
|
+
)
|
|
191
224
|
default_profile = AgentProfile(
|
|
192
225
|
name=DEFAULT_PROFILE_NAME,
|
|
193
226
|
llm_config=llm_config,
|
|
194
227
|
)
|
|
195
|
-
self.add_profile(default_profile)
|
|
228
|
+
self.add_profile(default_profile, validate=validate_profiles)
|
|
196
229
|
elif nb_profiles == 1:
|
|
197
230
|
# Select the only one available
|
|
198
231
|
default_profile = next(iter(self._agent_profiles.values()))
|
|
@@ -210,6 +243,7 @@ class AgentConfigBuilder:
|
|
|
210
243
|
device_platform=self._device_platform,
|
|
211
244
|
servers=self._servers,
|
|
212
245
|
graph_config_callbacks=self._graph_config_callbacks,
|
|
246
|
+
cloud_mobile_id_or_ref=self._cloud_mobile_id_or_ref,
|
|
213
247
|
)
|
|
214
248
|
|
|
215
249
|
|
|
@@ -15,7 +15,7 @@ These examples demonstrate two different ways to use the SDK, each applying an a
|
|
|
15
15
|
This script shows the simplest way to run minitap :
|
|
16
16
|
|
|
17
17
|
- Visit https://platform.minitap.ai to create a task and get your API key.
|
|
18
|
-
- Initialize the agent with your API key:
|
|
18
|
+
- Initialize the agent with your API key: .init(api_key=...).
|
|
19
19
|
- Ask the agent to run one of the tasks you’ve set up in the Minitap platform
|
|
20
20
|
(e.g., "like-instagram-post").
|
|
21
21
|
- The task’s goal and settings live in the Minitap platform, you don’t need
|
|
@@ -34,7 +34,7 @@ async def main() -> None:
|
|
|
34
34
|
Set MINITAP_API_KEY and MINITAP_BASE_URL environment variables.
|
|
35
35
|
"""
|
|
36
36
|
agent = Agent()
|
|
37
|
-
agent.init()
|
|
37
|
+
await agent.init()
|
|
38
38
|
|
|
39
39
|
# Example 1: Simple manual task
|
|
40
40
|
result = await agent.run_task(
|
|
@@ -58,7 +58,7 @@ async def main() -> None:
|
|
|
58
58
|
)
|
|
59
59
|
print("Result 2:", result)
|
|
60
60
|
|
|
61
|
-
agent.clean()
|
|
61
|
+
await agent.clean()
|
|
62
62
|
|
|
63
63
|
|
|
64
64
|
if __name__ == "__main__":
|
|
@@ -31,16 +31,15 @@ async def main() -> None:
|
|
|
31
31
|
Set MINITAP_API_KEY and MINITAP_BASE_URL environment variables.
|
|
32
32
|
"""
|
|
33
33
|
agent = Agent()
|
|
34
|
-
agent.init()
|
|
34
|
+
await agent.init(api_key="<api-key>") # or set MINITAP_API_KEY env variable
|
|
35
35
|
result = await agent.run_task(
|
|
36
36
|
request=PlatformTaskRequest(
|
|
37
37
|
task="your-task-name",
|
|
38
38
|
profile="your-profile-name",
|
|
39
|
-
api_key="<api-key>", # or set MINITAP_API_KEY env variable
|
|
40
39
|
)
|
|
41
40
|
)
|
|
42
41
|
print(result)
|
|
43
|
-
agent.clean()
|
|
42
|
+
await agent.clean()
|
|
44
43
|
|
|
45
44
|
|
|
46
45
|
if __name__ == "__main__":
|