minitap-mobile-use 0.0.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of minitap-mobile-use might be problematic. Click here for more details.
- minitap/mobile_use/__init__.py +0 -0
- minitap/mobile_use/agents/contextor/contextor.py +42 -0
- minitap/mobile_use/agents/cortex/cortex.md +93 -0
- minitap/mobile_use/agents/cortex/cortex.py +107 -0
- minitap/mobile_use/agents/cortex/types.py +11 -0
- minitap/mobile_use/agents/executor/executor.md +73 -0
- minitap/mobile_use/agents/executor/executor.py +84 -0
- minitap/mobile_use/agents/executor/executor_context_cleaner.py +27 -0
- minitap/mobile_use/agents/executor/utils.py +11 -0
- minitap/mobile_use/agents/hopper/hopper.md +13 -0
- minitap/mobile_use/agents/hopper/hopper.py +45 -0
- minitap/mobile_use/agents/orchestrator/human.md +13 -0
- minitap/mobile_use/agents/orchestrator/orchestrator.md +18 -0
- minitap/mobile_use/agents/orchestrator/orchestrator.py +114 -0
- minitap/mobile_use/agents/orchestrator/types.py +14 -0
- minitap/mobile_use/agents/outputter/human.md +25 -0
- minitap/mobile_use/agents/outputter/outputter.py +75 -0
- minitap/mobile_use/agents/outputter/test_outputter.py +107 -0
- minitap/mobile_use/agents/planner/human.md +12 -0
- minitap/mobile_use/agents/planner/planner.md +64 -0
- minitap/mobile_use/agents/planner/planner.py +64 -0
- minitap/mobile_use/agents/planner/types.py +44 -0
- minitap/mobile_use/agents/planner/utils.py +45 -0
- minitap/mobile_use/agents/summarizer/summarizer.py +34 -0
- minitap/mobile_use/clients/device_hardware_client.py +23 -0
- minitap/mobile_use/clients/ios_client.py +44 -0
- minitap/mobile_use/clients/screen_api_client.py +53 -0
- minitap/mobile_use/config.py +285 -0
- minitap/mobile_use/constants.py +2 -0
- minitap/mobile_use/context.py +65 -0
- minitap/mobile_use/controllers/__init__.py +0 -0
- minitap/mobile_use/controllers/mobile_command_controller.py +379 -0
- minitap/mobile_use/controllers/platform_specific_commands_controller.py +74 -0
- minitap/mobile_use/graph/graph.py +149 -0
- minitap/mobile_use/graph/state.py +73 -0
- minitap/mobile_use/main.py +122 -0
- minitap/mobile_use/sdk/__init__.py +12 -0
- minitap/mobile_use/sdk/agent.py +524 -0
- minitap/mobile_use/sdk/builders/__init__.py +10 -0
- minitap/mobile_use/sdk/builders/agent_config_builder.py +213 -0
- minitap/mobile_use/sdk/builders/index.py +15 -0
- minitap/mobile_use/sdk/builders/task_request_builder.py +218 -0
- minitap/mobile_use/sdk/constants.py +14 -0
- minitap/mobile_use/sdk/examples/README.md +45 -0
- minitap/mobile_use/sdk/examples/__init__.py +1 -0
- minitap/mobile_use/sdk/examples/simple_photo_organizer.py +76 -0
- minitap/mobile_use/sdk/examples/smart_notification_assistant.py +177 -0
- minitap/mobile_use/sdk/types/__init__.py +49 -0
- minitap/mobile_use/sdk/types/agent.py +73 -0
- minitap/mobile_use/sdk/types/exceptions.py +74 -0
- minitap/mobile_use/sdk/types/task.py +191 -0
- minitap/mobile_use/sdk/utils.py +28 -0
- minitap/mobile_use/servers/config.py +19 -0
- minitap/mobile_use/servers/device_hardware_bridge.py +212 -0
- minitap/mobile_use/servers/device_screen_api.py +143 -0
- minitap/mobile_use/servers/start_servers.py +151 -0
- minitap/mobile_use/servers/stop_servers.py +215 -0
- minitap/mobile_use/servers/utils.py +11 -0
- minitap/mobile_use/services/accessibility.py +100 -0
- minitap/mobile_use/services/llm.py +143 -0
- minitap/mobile_use/tools/index.py +54 -0
- minitap/mobile_use/tools/mobile/back.py +52 -0
- minitap/mobile_use/tools/mobile/copy_text_from.py +77 -0
- minitap/mobile_use/tools/mobile/erase_text.py +124 -0
- minitap/mobile_use/tools/mobile/input_text.py +74 -0
- minitap/mobile_use/tools/mobile/launch_app.py +59 -0
- minitap/mobile_use/tools/mobile/list_packages.py +78 -0
- minitap/mobile_use/tools/mobile/long_press_on.py +62 -0
- minitap/mobile_use/tools/mobile/open_link.py +59 -0
- minitap/mobile_use/tools/mobile/paste_text.py +66 -0
- minitap/mobile_use/tools/mobile/press_key.py +58 -0
- minitap/mobile_use/tools/mobile/run_flow.py +57 -0
- minitap/mobile_use/tools/mobile/stop_app.py +58 -0
- minitap/mobile_use/tools/mobile/swipe.py +56 -0
- minitap/mobile_use/tools/mobile/take_screenshot.py +70 -0
- minitap/mobile_use/tools/mobile/tap.py +66 -0
- minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +68 -0
- minitap/mobile_use/tools/tool_wrapper.py +33 -0
- minitap/mobile_use/utils/cli_helpers.py +40 -0
- minitap/mobile_use/utils/cli_selection.py +144 -0
- minitap/mobile_use/utils/conversations.py +31 -0
- minitap/mobile_use/utils/decorators.py +123 -0
- minitap/mobile_use/utils/errors.py +6 -0
- minitap/mobile_use/utils/file.py +13 -0
- minitap/mobile_use/utils/logger.py +184 -0
- minitap/mobile_use/utils/media.py +73 -0
- minitap/mobile_use/utils/recorder.py +55 -0
- minitap/mobile_use/utils/requests_utils.py +37 -0
- minitap/mobile_use/utils/shell_utils.py +20 -0
- minitap/mobile_use/utils/time.py +6 -0
- minitap/mobile_use/utils/ui_hierarchy.py +30 -0
- minitap_mobile_use-0.0.1.dev0.dist-info/METADATA +274 -0
- minitap_mobile_use-0.0.1.dev0.dist-info/RECORD +95 -0
- minitap_mobile_use-0.0.1.dev0.dist-info/WHEEL +4 -0
- minitap_mobile_use-0.0.1.dev0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import os
|
|
3
|
+
from adbutils import AdbClient
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
import typer
|
|
7
|
+
from rich.console import Console
|
|
8
|
+
from typing_extensions import Annotated
|
|
9
|
+
|
|
10
|
+
from minitap.mobile_use.config import (
|
|
11
|
+
initialize_llm_config,
|
|
12
|
+
settings,
|
|
13
|
+
)
|
|
14
|
+
from minitap.mobile_use.sdk import Agent
|
|
15
|
+
from minitap.mobile_use.sdk.builders import Builders
|
|
16
|
+
from minitap.mobile_use.sdk.types.task import AgentProfile
|
|
17
|
+
from minitap.mobile_use.utils.cli_helpers import display_device_status
|
|
18
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
19
|
+
|
|
20
|
+
app = typer.Typer(add_completion=False, pretty_exceptions_enable=False)
|
|
21
|
+
logger = get_logger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
async def run_automation(
|
|
25
|
+
goal: str,
|
|
26
|
+
test_name: Optional[str] = None,
|
|
27
|
+
traces_output_path_str: str = "traces",
|
|
28
|
+
output_description: Optional[str] = None,
|
|
29
|
+
):
|
|
30
|
+
llm_config = initialize_llm_config()
|
|
31
|
+
agent_profile = AgentProfile(name="default", llm_config=llm_config)
|
|
32
|
+
config = Builders.AgentConfig.with_default_profile(profile=agent_profile)
|
|
33
|
+
|
|
34
|
+
if settings.ADB_HOST:
|
|
35
|
+
config.with_adb_server(host=settings.ADB_HOST, port=settings.ADB_PORT)
|
|
36
|
+
if settings.DEVICE_HARDWARE_BRIDGE_BASE_URL:
|
|
37
|
+
config.with_hw_bridge_base_url(url=settings.DEVICE_HARDWARE_BRIDGE_BASE_URL)
|
|
38
|
+
if settings.DEVICE_SCREEN_API_BASE_URL:
|
|
39
|
+
config.with_screen_api_base_url(url=settings.DEVICE_SCREEN_API_BASE_URL)
|
|
40
|
+
|
|
41
|
+
agent = Agent(config=config.build())
|
|
42
|
+
agent.init(
|
|
43
|
+
retry_count=int(os.getenv("MOBILE_USE_HEALTH_RETRIES", 5)),
|
|
44
|
+
retry_wait_seconds=int(os.getenv("MOBILE_USE_HEALTH_DELAY", 5)),
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
task = agent.new_task(goal)
|
|
48
|
+
if test_name:
|
|
49
|
+
task.with_name(test_name).with_trace_recording(path=traces_output_path_str)
|
|
50
|
+
if output_description:
|
|
51
|
+
task.with_output_description(output_description)
|
|
52
|
+
|
|
53
|
+
agent_thoughts_path = os.getenv("EVENTS_OUTPUT_PATH", None)
|
|
54
|
+
llm_result_path = os.getenv("RESULTS_OUTPUT_PATH", None)
|
|
55
|
+
if agent_thoughts_path:
|
|
56
|
+
task.with_thoughts_output_saving(path=agent_thoughts_path)
|
|
57
|
+
if llm_result_path:
|
|
58
|
+
task.with_llm_output_saving(path=llm_result_path)
|
|
59
|
+
|
|
60
|
+
await agent.run_task(request=task.build())
|
|
61
|
+
|
|
62
|
+
agent.clean()
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@app.command()
|
|
66
|
+
def main(
|
|
67
|
+
goal: Annotated[str, typer.Argument(help="The main goal for the agent to achieve.")],
|
|
68
|
+
test_name: Annotated[
|
|
69
|
+
Optional[str],
|
|
70
|
+
typer.Option(
|
|
71
|
+
"--test-name",
|
|
72
|
+
"-n",
|
|
73
|
+
help="A name for the test recording. If provided, a trace will be saved.",
|
|
74
|
+
),
|
|
75
|
+
] = None,
|
|
76
|
+
traces_path: Annotated[
|
|
77
|
+
str,
|
|
78
|
+
typer.Option(
|
|
79
|
+
"--traces-path",
|
|
80
|
+
"-p",
|
|
81
|
+
help="The path to save the traces.",
|
|
82
|
+
),
|
|
83
|
+
] = "traces",
|
|
84
|
+
output_description: Annotated[
|
|
85
|
+
Optional[str],
|
|
86
|
+
typer.Option(
|
|
87
|
+
"--output-description",
|
|
88
|
+
"-o",
|
|
89
|
+
help=(
|
|
90
|
+
"""
|
|
91
|
+
A dict output description for the agent.
|
|
92
|
+
Ex: a JSON schema with 2 keys: type, price
|
|
93
|
+
"""
|
|
94
|
+
),
|
|
95
|
+
),
|
|
96
|
+
] = None,
|
|
97
|
+
):
|
|
98
|
+
"""
|
|
99
|
+
Run the Mobile-use agent to automate tasks on a mobile device.
|
|
100
|
+
"""
|
|
101
|
+
console = Console()
|
|
102
|
+
adb_client = AdbClient(
|
|
103
|
+
host=settings.ADB_HOST or "localhost",
|
|
104
|
+
port=settings.ADB_PORT or 5037,
|
|
105
|
+
)
|
|
106
|
+
display_device_status(console, adb_client=adb_client)
|
|
107
|
+
asyncio.run(
|
|
108
|
+
run_automation(
|
|
109
|
+
goal=goal,
|
|
110
|
+
test_name=test_name,
|
|
111
|
+
traces_output_path_str=traces_path,
|
|
112
|
+
output_description=output_description,
|
|
113
|
+
)
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def cli():
|
|
118
|
+
app()
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
if __name__ == "__main__":
|
|
122
|
+
cli()
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Mobile-use SDK for running mobile automation tasks.
|
|
3
|
+
|
|
4
|
+
This package provides APIs for interacting with mobile devices and executing tasks.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from minitap.mobile_use.sdk import types, builders
|
|
8
|
+
from minitap.mobile_use.sdk.agent import Agent
|
|
9
|
+
|
|
10
|
+
__all__ = ["Agent"]
|
|
11
|
+
__all__ += types.__all__
|
|
12
|
+
__all__ += builders.__all__
|
|
@@ -0,0 +1,524 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
import sys
|
|
5
|
+
import tempfile
|
|
6
|
+
import time
|
|
7
|
+
from types import NoneType
|
|
8
|
+
from typing import Optional, TypeVar, overload
|
|
9
|
+
import uuid
|
|
10
|
+
from adbutils import AdbClient
|
|
11
|
+
from langchain_core.messages import AIMessage
|
|
12
|
+
from pydantic import BaseModel
|
|
13
|
+
from minitap.mobile_use.agents.outputter.outputter import outputter
|
|
14
|
+
|
|
15
|
+
from minitap.mobile_use.config import OutputConfig, record_events
|
|
16
|
+
from minitap.mobile_use.graph.graph import get_graph
|
|
17
|
+
from minitap.mobile_use.graph.state import State
|
|
18
|
+
from minitap.mobile_use.sdk.builders.agent_config_builder import get_default_agent_config
|
|
19
|
+
from minitap.mobile_use.sdk.builders.task_request_builder import TaskRequestBuilder
|
|
20
|
+
from minitap.mobile_use.sdk.constants import (
|
|
21
|
+
DEFAULT_HW_BRIDGE_BASE_URL,
|
|
22
|
+
DEFAULT_SCREEN_API_BASE_URL,
|
|
23
|
+
)
|
|
24
|
+
from minitap.mobile_use.sdk.types.agent import AgentConfig
|
|
25
|
+
from minitap.mobile_use.context import (
|
|
26
|
+
DeviceContext,
|
|
27
|
+
DevicePlatform,
|
|
28
|
+
ExecutionSetup,
|
|
29
|
+
MobileUseContext,
|
|
30
|
+
)
|
|
31
|
+
from minitap.mobile_use.clients.device_hardware_client import DeviceHardwareClient
|
|
32
|
+
from minitap.mobile_use.clients.screen_api_client import ScreenApiClient
|
|
33
|
+
from minitap.mobile_use.controllers.mobile_command_controller import (
|
|
34
|
+
ScreenDataResponse,
|
|
35
|
+
get_screen_data,
|
|
36
|
+
)
|
|
37
|
+
from minitap.mobile_use.controllers.platform_specific_commands_controller import get_first_device
|
|
38
|
+
|
|
39
|
+
from minitap.mobile_use.servers.stop_servers import stop_servers
|
|
40
|
+
from minitap.mobile_use.servers.device_hardware_bridge import BridgeStatus
|
|
41
|
+
from minitap.mobile_use.servers.start_servers import (
|
|
42
|
+
start_device_hardware_bridge,
|
|
43
|
+
start_device_screen_api,
|
|
44
|
+
)
|
|
45
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
46
|
+
from minitap.mobile_use.sdk.types.exceptions import (
|
|
47
|
+
AgentProfileNotFoundError,
|
|
48
|
+
AgentTaskRequestError,
|
|
49
|
+
DeviceNotFoundError,
|
|
50
|
+
ServerStartupError,
|
|
51
|
+
AgentNotInitializedError,
|
|
52
|
+
)
|
|
53
|
+
from minitap.mobile_use.sdk.types.task import AgentProfile, Task, TaskRequest, TaskStatus
|
|
54
|
+
from minitap.mobile_use.utils.media import (
|
|
55
|
+
create_gif_from_trace_folder,
|
|
56
|
+
create_steps_json_from_trace_folder,
|
|
57
|
+
remove_images_from_trace_folder,
|
|
58
|
+
remove_steps_json_from_trace_folder,
|
|
59
|
+
)
|
|
60
|
+
from minitap.mobile_use.utils.recorder import log_agent_thoughts
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
logger = get_logger(__name__)
|
|
64
|
+
|
|
65
|
+
TOutput = TypeVar("TOutput", bound=Optional[BaseModel])
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class Agent:
|
|
69
|
+
_config: AgentConfig
|
|
70
|
+
_tasks: list[Task] = []
|
|
71
|
+
_tmp_traces_dir: Path
|
|
72
|
+
_initialized: bool = False
|
|
73
|
+
_is_default_screen_api: bool
|
|
74
|
+
_is_default_hw_bridge: bool
|
|
75
|
+
_device_context: DeviceContext
|
|
76
|
+
_screen_api_client: ScreenApiClient
|
|
77
|
+
_hw_bridge_client: DeviceHardwareClient
|
|
78
|
+
_adb_client: Optional[AdbClient]
|
|
79
|
+
|
|
80
|
+
def __init__(self, config: Optional[AgentConfig] = None):
|
|
81
|
+
self._config = config or get_default_agent_config()
|
|
82
|
+
self._tasks = []
|
|
83
|
+
self._tmp_traces_dir = Path(tempfile.gettempdir()) / "mobile-use-traces"
|
|
84
|
+
self._initialized = False
|
|
85
|
+
|
|
86
|
+
def init(
|
|
87
|
+
self,
|
|
88
|
+
server_restart_attempts: int = 3,
|
|
89
|
+
retry_count: int = 5,
|
|
90
|
+
retry_wait_seconds: int = 5,
|
|
91
|
+
):
|
|
92
|
+
if self._initialized:
|
|
93
|
+
logger.warning("Agent is already initialized. Skipping...")
|
|
94
|
+
return True
|
|
95
|
+
|
|
96
|
+
# Get first available device ID
|
|
97
|
+
if not self._config.device_id or not self._config.device_platform:
|
|
98
|
+
device_id, platform = get_first_device()
|
|
99
|
+
else:
|
|
100
|
+
device_id, platform = self._config.device_id, self._config.device_platform
|
|
101
|
+
|
|
102
|
+
if not device_id or not platform:
|
|
103
|
+
error_msg = "No device found. Exiting."
|
|
104
|
+
logger.error(error_msg)
|
|
105
|
+
raise DeviceNotFoundError(error_msg)
|
|
106
|
+
|
|
107
|
+
# Initialize clients
|
|
108
|
+
self._init_clients(
|
|
109
|
+
platform=platform,
|
|
110
|
+
retry_count=retry_count,
|
|
111
|
+
retry_wait_seconds=retry_wait_seconds,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
# Start necessary servers
|
|
115
|
+
restart_attempt = 0
|
|
116
|
+
while restart_attempt < server_restart_attempts:
|
|
117
|
+
success = self._run_servers(
|
|
118
|
+
device_id=device_id,
|
|
119
|
+
platform=platform,
|
|
120
|
+
)
|
|
121
|
+
if success:
|
|
122
|
+
break
|
|
123
|
+
|
|
124
|
+
restart_attempt += 1
|
|
125
|
+
if restart_attempt < server_restart_attempts:
|
|
126
|
+
logger.warning(
|
|
127
|
+
f"Server start failed, attempting restart "
|
|
128
|
+
f"{restart_attempt}/{server_restart_attempts}"
|
|
129
|
+
)
|
|
130
|
+
time.sleep(3)
|
|
131
|
+
else:
|
|
132
|
+
error_msg = "Mobile-use servers failed to start after all restart attempts."
|
|
133
|
+
logger.error(error_msg)
|
|
134
|
+
raise ServerStartupError(message=error_msg)
|
|
135
|
+
|
|
136
|
+
self._device_context = self._get_device_context(device_id=device_id, platform=platform)
|
|
137
|
+
logger.info(self._device_context.to_str())
|
|
138
|
+
logger.info("✅ Mobile-use agent initialized.")
|
|
139
|
+
self._initialized = True
|
|
140
|
+
return True
|
|
141
|
+
|
|
142
|
+
def new_task(self, goal: str):
|
|
143
|
+
return TaskRequestBuilder[None].from_common(
|
|
144
|
+
goal=goal,
|
|
145
|
+
common=self._config.task_request_defaults,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
@overload
|
|
149
|
+
async def run_task(
|
|
150
|
+
self,
|
|
151
|
+
*,
|
|
152
|
+
goal: str,
|
|
153
|
+
output: type[TOutput],
|
|
154
|
+
profile: Optional[str | AgentProfile] = None,
|
|
155
|
+
name: Optional[str] = None,
|
|
156
|
+
) -> Optional[TOutput]: ...
|
|
157
|
+
|
|
158
|
+
@overload
|
|
159
|
+
async def run_task(
|
|
160
|
+
self,
|
|
161
|
+
*,
|
|
162
|
+
goal: str,
|
|
163
|
+
output: str,
|
|
164
|
+
profile: Optional[str | AgentProfile] = None,
|
|
165
|
+
name: Optional[str] = None,
|
|
166
|
+
) -> Optional[str | dict]: ...
|
|
167
|
+
|
|
168
|
+
@overload
|
|
169
|
+
async def run_task(
|
|
170
|
+
self,
|
|
171
|
+
*,
|
|
172
|
+
goal: str,
|
|
173
|
+
output=None,
|
|
174
|
+
profile: Optional[str | AgentProfile] = None,
|
|
175
|
+
name: Optional[str] = None,
|
|
176
|
+
) -> Optional[str]: ...
|
|
177
|
+
|
|
178
|
+
@overload
|
|
179
|
+
async def run_task(self, *, request: TaskRequest[None]) -> Optional[str | dict]: ...
|
|
180
|
+
|
|
181
|
+
@overload
|
|
182
|
+
async def run_task(self, *, request: TaskRequest[TOutput]) -> Optional[TOutput]: ...
|
|
183
|
+
|
|
184
|
+
async def run_task(
|
|
185
|
+
self,
|
|
186
|
+
*,
|
|
187
|
+
goal: Optional[str] = None,
|
|
188
|
+
output: Optional[type[TOutput] | str] = None,
|
|
189
|
+
profile: Optional[str | AgentProfile] = None,
|
|
190
|
+
name: Optional[str] = None,
|
|
191
|
+
request: Optional[TaskRequest[TOutput]] = None,
|
|
192
|
+
) -> Optional[str | dict | TOutput]:
|
|
193
|
+
if request is not None:
|
|
194
|
+
return await self._run_task(request)
|
|
195
|
+
if goal is None:
|
|
196
|
+
raise AgentTaskRequestError("Goal is required")
|
|
197
|
+
task_request = self.new_task(goal=goal)
|
|
198
|
+
if output is not None:
|
|
199
|
+
if isinstance(output, str):
|
|
200
|
+
task_request.with_output_description(description=output)
|
|
201
|
+
elif output is not NoneType:
|
|
202
|
+
task_request.with_output_format(output_format=output)
|
|
203
|
+
if profile is not None:
|
|
204
|
+
task_request.using_profile(profile=profile)
|
|
205
|
+
if name is not None:
|
|
206
|
+
task_request.with_name(name=name)
|
|
207
|
+
return await self._run_task(task_request.build())
|
|
208
|
+
|
|
209
|
+
async def _run_task(self, request: TaskRequest[TOutput]) -> Optional[str | dict | TOutput]:
|
|
210
|
+
if not self._initialized:
|
|
211
|
+
raise AgentNotInitializedError()
|
|
212
|
+
|
|
213
|
+
if request.profile:
|
|
214
|
+
agent_profile = self._config.agent_profiles.get(request.profile)
|
|
215
|
+
if agent_profile is None:
|
|
216
|
+
raise AgentProfileNotFoundError(request.profile)
|
|
217
|
+
else:
|
|
218
|
+
agent_profile = self._config.default_profile
|
|
219
|
+
logger.info(str(agent_profile))
|
|
220
|
+
|
|
221
|
+
task = Task(
|
|
222
|
+
id=str(uuid.uuid4()),
|
|
223
|
+
device=self._device_context,
|
|
224
|
+
status=TaskStatus.PENDING,
|
|
225
|
+
request=request,
|
|
226
|
+
created_at=datetime.now(),
|
|
227
|
+
)
|
|
228
|
+
self._tasks.append(task)
|
|
229
|
+
task_name = task.get_name()
|
|
230
|
+
|
|
231
|
+
context = MobileUseContext(
|
|
232
|
+
device=self._device_context,
|
|
233
|
+
hw_bridge_client=self._hw_bridge_client,
|
|
234
|
+
screen_api_client=self._screen_api_client,
|
|
235
|
+
adb_client=self._adb_client,
|
|
236
|
+
llm_config=agent_profile.llm_config,
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
self._prepare_tracing(task=task, context=context)
|
|
240
|
+
self._prepare_output_files(task=task)
|
|
241
|
+
|
|
242
|
+
output_config = None
|
|
243
|
+
if request.output_description or request.output_format:
|
|
244
|
+
output_config = OutputConfig(
|
|
245
|
+
output_description=request.output_description,
|
|
246
|
+
structured_output=request.output_format, # type: ignore
|
|
247
|
+
)
|
|
248
|
+
logger.info(str(output_config))
|
|
249
|
+
|
|
250
|
+
logger.info(f"[{task_name}] Starting graph with goal: `{request.goal}`")
|
|
251
|
+
state = self._get_graph_state(task=task)
|
|
252
|
+
graph_input = state.model_dump()
|
|
253
|
+
|
|
254
|
+
last_state: State | None = None
|
|
255
|
+
last_state_snapshot: dict | None = None
|
|
256
|
+
output = None
|
|
257
|
+
try:
|
|
258
|
+
logger.info(f"[{task_name}] Invoking graph with input: {graph_input}")
|
|
259
|
+
task.status = TaskStatus.RUNNING
|
|
260
|
+
async for chunk in (await get_graph(context)).astream(
|
|
261
|
+
input=graph_input,
|
|
262
|
+
config={
|
|
263
|
+
"recursion_limit": task.request.max_steps,
|
|
264
|
+
},
|
|
265
|
+
stream_mode=["messages", "custom", "values"],
|
|
266
|
+
):
|
|
267
|
+
stream_mode, content = chunk
|
|
268
|
+
if stream_mode == "values":
|
|
269
|
+
last_state_snapshot = content # type: ignore
|
|
270
|
+
last_state = State(**last_state_snapshot) # type: ignore
|
|
271
|
+
log_agent_thoughts(
|
|
272
|
+
agents_thoughts=last_state.agents_thoughts,
|
|
273
|
+
output_path=task.request.thoughts_output_path,
|
|
274
|
+
)
|
|
275
|
+
if not last_state:
|
|
276
|
+
err = f"[{task_name}] No result received from graph"
|
|
277
|
+
logger.warning(err)
|
|
278
|
+
task.finalize(content=output, state=last_state_snapshot, error=err)
|
|
279
|
+
return None
|
|
280
|
+
|
|
281
|
+
print_ai_response_to_stderr(graph_result=last_state)
|
|
282
|
+
output = await self._extract_output(
|
|
283
|
+
task_name=task_name,
|
|
284
|
+
ctx=context,
|
|
285
|
+
request=request,
|
|
286
|
+
output_config=output_config,
|
|
287
|
+
state=last_state,
|
|
288
|
+
)
|
|
289
|
+
logger.info(f"✅ Automation '{task_name}' is success ✅")
|
|
290
|
+
task.finalize(content=output, state=last_state_snapshot)
|
|
291
|
+
except asyncio.CancelledError:
|
|
292
|
+
err = f"[{task_name}] Task cancelled"
|
|
293
|
+
logger.warning(err)
|
|
294
|
+
task.finalize(content=output, state=last_state_snapshot, error=err, cancelled=True)
|
|
295
|
+
raise
|
|
296
|
+
except Exception as e:
|
|
297
|
+
err = f"[{task_name}] Error running automation: {e}"
|
|
298
|
+
logger.error(err)
|
|
299
|
+
task.finalize(content=output, state=last_state_snapshot, error=err)
|
|
300
|
+
raise
|
|
301
|
+
finally:
|
|
302
|
+
self._finalize_tracing(task=task, context=context)
|
|
303
|
+
return output
|
|
304
|
+
|
|
305
|
+
def clean(self):
|
|
306
|
+
if not self._initialized:
|
|
307
|
+
return
|
|
308
|
+
screen_api_ok, hw_bridge_ok = stop_servers(
|
|
309
|
+
device_screen_api=not self._is_default_screen_api,
|
|
310
|
+
device_hardware_bridge=not self._is_default_hw_bridge,
|
|
311
|
+
)
|
|
312
|
+
if not screen_api_ok:
|
|
313
|
+
logger.warning("Failed to stop Device Screen API.")
|
|
314
|
+
if not hw_bridge_ok:
|
|
315
|
+
logger.warning("Failed to stop Device Hardware Bridge.")
|
|
316
|
+
self._initialized = False
|
|
317
|
+
logger.info("✅ Mobile-use agent stopped.")
|
|
318
|
+
|
|
319
|
+
def _prepare_tracing(self, task: Task, context: MobileUseContext):
|
|
320
|
+
if not task.request.record_trace:
|
|
321
|
+
return
|
|
322
|
+
task_name = task.get_name()
|
|
323
|
+
temp_trace_path = Path(self._tmp_traces_dir / task_name).resolve()
|
|
324
|
+
traces_output_path = Path(task.request.trace_path).resolve()
|
|
325
|
+
logger.info(f"[{task_name}] 📂 Traces output path: {traces_output_path}")
|
|
326
|
+
logger.info(f"[{task_name}] 📄📂 Traces temp path: {temp_trace_path}")
|
|
327
|
+
traces_output_path.mkdir(parents=True, exist_ok=True)
|
|
328
|
+
temp_trace_path.mkdir(parents=True, exist_ok=True)
|
|
329
|
+
context.execution_setup = ExecutionSetup(
|
|
330
|
+
traces_path=self._tmp_traces_dir, trace_id=task_name
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
def _finalize_tracing(self, task: Task, context: MobileUseContext):
|
|
334
|
+
exec_setup_ctx = context.execution_setup
|
|
335
|
+
if not exec_setup_ctx:
|
|
336
|
+
return
|
|
337
|
+
|
|
338
|
+
task_name = task.get_name()
|
|
339
|
+
status = "_PASS" if task.status == TaskStatus.COMPLETED else "_FAIL"
|
|
340
|
+
ts = task.created_at.strftime("%Y-%m-%dT%H-%M-%S")
|
|
341
|
+
new_name = f"{exec_setup_ctx.trace_id}{status}_{ts}"
|
|
342
|
+
|
|
343
|
+
temp_trace_path = (self._tmp_traces_dir / exec_setup_ctx.trace_id).resolve()
|
|
344
|
+
traces_output_path = Path(task.request.trace_path).resolve()
|
|
345
|
+
|
|
346
|
+
logger.info(f"[{task_name}] Compiling trace FROM FOLDER: " + str(temp_trace_path))
|
|
347
|
+
create_gif_from_trace_folder(temp_trace_path)
|
|
348
|
+
create_steps_json_from_trace_folder(temp_trace_path)
|
|
349
|
+
|
|
350
|
+
logger.info(f"[{task_name}] Video created, removing dust...")
|
|
351
|
+
remove_images_from_trace_folder(temp_trace_path)
|
|
352
|
+
remove_steps_json_from_trace_folder(temp_trace_path)
|
|
353
|
+
logger.info(f"[{task_name}] 📽️ Trace compiled, moving to output path 📽️")
|
|
354
|
+
|
|
355
|
+
output_folder_path = temp_trace_path.rename(traces_output_path / new_name).resolve()
|
|
356
|
+
logger.info(f"[{task_name}] 📂✅ Traces located in: {output_folder_path}")
|
|
357
|
+
|
|
358
|
+
def _prepare_output_files(self, task: Task):
|
|
359
|
+
if task.request.llm_output_path:
|
|
360
|
+
_validate_and_prepare_file(file_path=task.request.llm_output_path)
|
|
361
|
+
if task.request.thoughts_output_path:
|
|
362
|
+
_validate_and_prepare_file(file_path=task.request.thoughts_output_path)
|
|
363
|
+
|
|
364
|
+
async def _extract_output(
|
|
365
|
+
self,
|
|
366
|
+
task_name: str,
|
|
367
|
+
ctx: MobileUseContext,
|
|
368
|
+
request: TaskRequest[TOutput],
|
|
369
|
+
output_config: Optional[OutputConfig],
|
|
370
|
+
state: State,
|
|
371
|
+
) -> Optional[str | dict | TOutput]:
|
|
372
|
+
if output_config and output_config.needs_structured_format():
|
|
373
|
+
logger.info(f"[{task_name}] Generating structured output...")
|
|
374
|
+
try:
|
|
375
|
+
structured_output = await outputter(
|
|
376
|
+
ctx=ctx,
|
|
377
|
+
output_config=output_config,
|
|
378
|
+
graph_output=state,
|
|
379
|
+
)
|
|
380
|
+
logger.info(f"[{task_name}] Structured output: {structured_output}")
|
|
381
|
+
record_events(output_path=request.llm_output_path, events=structured_output)
|
|
382
|
+
if request.output_format is not None and request.output_format is not NoneType:
|
|
383
|
+
return request.output_format.model_validate(structured_output)
|
|
384
|
+
return structured_output
|
|
385
|
+
except Exception as e:
|
|
386
|
+
logger.error(f"[{task_name}] Failed to generate structured output: {e}")
|
|
387
|
+
return None
|
|
388
|
+
if state and state.agents_thoughts:
|
|
389
|
+
last_msg = state.agents_thoughts[-1]
|
|
390
|
+
logger.info(str(last_msg))
|
|
391
|
+
record_events(output_path=request.llm_output_path, events=last_msg)
|
|
392
|
+
return last_msg
|
|
393
|
+
return None
|
|
394
|
+
|
|
395
|
+
def _get_graph_state(self, task: Task):
|
|
396
|
+
return State(
|
|
397
|
+
messages=[],
|
|
398
|
+
initial_goal=task.request.goal,
|
|
399
|
+
subgoal_plan=[],
|
|
400
|
+
latest_ui_hierarchy=None,
|
|
401
|
+
latest_screenshot_base64=None,
|
|
402
|
+
focused_app_info=None,
|
|
403
|
+
device_date=None,
|
|
404
|
+
structured_decisions=None,
|
|
405
|
+
agents_thoughts=[],
|
|
406
|
+
remaining_steps=task.request.max_steps,
|
|
407
|
+
executor_retrigger=False,
|
|
408
|
+
executor_failed=False,
|
|
409
|
+
executor_messages=[],
|
|
410
|
+
cortex_last_thought=None,
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
def _init_clients(self, platform: DevicePlatform, retry_count: int, retry_wait_seconds: int):
|
|
414
|
+
self._adb_client = (
|
|
415
|
+
AdbClient(host=self._config.servers.adb_host, port=self._config.servers.adb_port)
|
|
416
|
+
if platform == DevicePlatform.ANDROID
|
|
417
|
+
else None
|
|
418
|
+
)
|
|
419
|
+
self._hw_bridge_client = DeviceHardwareClient(
|
|
420
|
+
base_url=self._config.servers.hw_bridge_base_url.to_url(),
|
|
421
|
+
)
|
|
422
|
+
self._is_default_hw_bridge = (
|
|
423
|
+
self._config.servers.hw_bridge_base_url == DEFAULT_HW_BRIDGE_BASE_URL
|
|
424
|
+
)
|
|
425
|
+
self._screen_api_client = ScreenApiClient(
|
|
426
|
+
base_url=self._config.servers.screen_api_base_url.to_url(),
|
|
427
|
+
retry_count=retry_count,
|
|
428
|
+
retry_wait_seconds=retry_wait_seconds,
|
|
429
|
+
)
|
|
430
|
+
self._is_default_screen_api = (
|
|
431
|
+
self._config.servers.screen_api_base_url == DEFAULT_SCREEN_API_BASE_URL
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
def _run_servers(self, device_id: str, platform: DevicePlatform) -> bool:
|
|
435
|
+
if self._is_default_hw_bridge:
|
|
436
|
+
bridge_instance = start_device_hardware_bridge(device_id=device_id, platform=platform)
|
|
437
|
+
if not bridge_instance:
|
|
438
|
+
logger.warning("Failed to start Device Hardware Bridge.")
|
|
439
|
+
return False
|
|
440
|
+
|
|
441
|
+
logger.info("Waiting for Device Hardware Bridge to connect to a device...")
|
|
442
|
+
while True:
|
|
443
|
+
status_info = bridge_instance.get_status()
|
|
444
|
+
status = status_info.get("status")
|
|
445
|
+
output = status_info.get("output")
|
|
446
|
+
|
|
447
|
+
if status == BridgeStatus.RUNNING.value:
|
|
448
|
+
logger.success(
|
|
449
|
+
"Device Hardware Bridge is running. "
|
|
450
|
+
+ f"Connected to device: {device_id} [{platform.value}]"
|
|
451
|
+
)
|
|
452
|
+
break
|
|
453
|
+
|
|
454
|
+
failed_statuses = [
|
|
455
|
+
BridgeStatus.NO_DEVICE.value,
|
|
456
|
+
BridgeStatus.FAILED.value,
|
|
457
|
+
BridgeStatus.PORT_IN_USE.value,
|
|
458
|
+
BridgeStatus.STOPPED.value,
|
|
459
|
+
]
|
|
460
|
+
if status in failed_statuses:
|
|
461
|
+
logger.error(
|
|
462
|
+
f"Device Hardware Bridge failed to connect. "
|
|
463
|
+
f"Status: {status} - Output: {output}"
|
|
464
|
+
)
|
|
465
|
+
return False
|
|
466
|
+
|
|
467
|
+
time.sleep(1)
|
|
468
|
+
|
|
469
|
+
# Start Device Screen API if not already running
|
|
470
|
+
if self._is_default_screen_api:
|
|
471
|
+
api_process = start_device_screen_api(use_process=True)
|
|
472
|
+
if not api_process:
|
|
473
|
+
logger.error("Failed to start Device Screen API. Exiting.")
|
|
474
|
+
return False
|
|
475
|
+
|
|
476
|
+
# Check API health
|
|
477
|
+
if not self._check_device_screen_api_health():
|
|
478
|
+
logger.error("Device Screen API health check failed. Stopping...")
|
|
479
|
+
return False
|
|
480
|
+
|
|
481
|
+
return True
|
|
482
|
+
|
|
483
|
+
def _check_device_screen_api_health(self) -> bool:
|
|
484
|
+
try:
|
|
485
|
+
self._screen_api_client.get_with_retry("/health", timeout=5)
|
|
486
|
+
return True
|
|
487
|
+
except Exception as e:
|
|
488
|
+
logger.error(f"Device Screen API health check failed: {e}")
|
|
489
|
+
return False
|
|
490
|
+
|
|
491
|
+
def _get_device_context(
|
|
492
|
+
self,
|
|
493
|
+
device_id: str,
|
|
494
|
+
platform: DevicePlatform,
|
|
495
|
+
) -> DeviceContext:
|
|
496
|
+
from platform import system
|
|
497
|
+
|
|
498
|
+
host_platform = system()
|
|
499
|
+
screen_data: ScreenDataResponse = get_screen_data(self._screen_api_client)
|
|
500
|
+
return DeviceContext(
|
|
501
|
+
host_platform="WINDOWS" if host_platform == "Windows" else "LINUX",
|
|
502
|
+
mobile_platform=platform,
|
|
503
|
+
device_id=device_id,
|
|
504
|
+
device_width=screen_data.width,
|
|
505
|
+
device_height=screen_data.height,
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
def _validate_and_prepare_file(file_path: Path):
|
|
510
|
+
path_obj = Path(file_path)
|
|
511
|
+
if path_obj.exists() and path_obj.is_dir():
|
|
512
|
+
raise AgentTaskRequestError(f"Error: Path '{file_path}' is a directory, not a file.")
|
|
513
|
+
try:
|
|
514
|
+
path_obj.parent.mkdir(parents=True, exist_ok=True)
|
|
515
|
+
path_obj.touch(exist_ok=True)
|
|
516
|
+
except OSError as e:
|
|
517
|
+
raise AgentTaskRequestError(f"Error creating file '{file_path}': {e}")
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
def print_ai_response_to_stderr(graph_result: State):
|
|
521
|
+
for msg in reversed(graph_result.messages):
|
|
522
|
+
if isinstance(msg, AIMessage):
|
|
523
|
+
print(msg.content, file=sys.stderr)
|
|
524
|
+
return
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Builder classes for configuring mobile-use components."""
|
|
2
|
+
|
|
3
|
+
from minitap.mobile_use.sdk.builders.agent_config_builder import AgentConfigBuilder
|
|
4
|
+
from minitap.mobile_use.sdk.builders.task_request_builder import (
|
|
5
|
+
TaskRequestCommonBuilder,
|
|
6
|
+
TaskRequestBuilder,
|
|
7
|
+
)
|
|
8
|
+
from minitap.mobile_use.sdk.builders.index import Builders
|
|
9
|
+
|
|
10
|
+
__all__ = ["AgentConfigBuilder", "TaskRequestCommonBuilder", "TaskRequestBuilder", "Builders"]
|