minitap-mobile-use 0.0.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of minitap-mobile-use might be problematic. Click here for more details.
- minitap/mobile_use/__init__.py +0 -0
- minitap/mobile_use/agents/contextor/contextor.py +42 -0
- minitap/mobile_use/agents/cortex/cortex.md +93 -0
- minitap/mobile_use/agents/cortex/cortex.py +107 -0
- minitap/mobile_use/agents/cortex/types.py +11 -0
- minitap/mobile_use/agents/executor/executor.md +73 -0
- minitap/mobile_use/agents/executor/executor.py +84 -0
- minitap/mobile_use/agents/executor/executor_context_cleaner.py +27 -0
- minitap/mobile_use/agents/executor/utils.py +11 -0
- minitap/mobile_use/agents/hopper/hopper.md +13 -0
- minitap/mobile_use/agents/hopper/hopper.py +45 -0
- minitap/mobile_use/agents/orchestrator/human.md +13 -0
- minitap/mobile_use/agents/orchestrator/orchestrator.md +18 -0
- minitap/mobile_use/agents/orchestrator/orchestrator.py +114 -0
- minitap/mobile_use/agents/orchestrator/types.py +14 -0
- minitap/mobile_use/agents/outputter/human.md +25 -0
- minitap/mobile_use/agents/outputter/outputter.py +75 -0
- minitap/mobile_use/agents/outputter/test_outputter.py +107 -0
- minitap/mobile_use/agents/planner/human.md +12 -0
- minitap/mobile_use/agents/planner/planner.md +64 -0
- minitap/mobile_use/agents/planner/planner.py +64 -0
- minitap/mobile_use/agents/planner/types.py +44 -0
- minitap/mobile_use/agents/planner/utils.py +45 -0
- minitap/mobile_use/agents/summarizer/summarizer.py +34 -0
- minitap/mobile_use/clients/device_hardware_client.py +23 -0
- minitap/mobile_use/clients/ios_client.py +44 -0
- minitap/mobile_use/clients/screen_api_client.py +53 -0
- minitap/mobile_use/config.py +285 -0
- minitap/mobile_use/constants.py +2 -0
- minitap/mobile_use/context.py +65 -0
- minitap/mobile_use/controllers/__init__.py +0 -0
- minitap/mobile_use/controllers/mobile_command_controller.py +379 -0
- minitap/mobile_use/controllers/platform_specific_commands_controller.py +74 -0
- minitap/mobile_use/graph/graph.py +149 -0
- minitap/mobile_use/graph/state.py +73 -0
- minitap/mobile_use/main.py +122 -0
- minitap/mobile_use/sdk/__init__.py +12 -0
- minitap/mobile_use/sdk/agent.py +524 -0
- minitap/mobile_use/sdk/builders/__init__.py +10 -0
- minitap/mobile_use/sdk/builders/agent_config_builder.py +213 -0
- minitap/mobile_use/sdk/builders/index.py +15 -0
- minitap/mobile_use/sdk/builders/task_request_builder.py +218 -0
- minitap/mobile_use/sdk/constants.py +14 -0
- minitap/mobile_use/sdk/examples/README.md +45 -0
- minitap/mobile_use/sdk/examples/__init__.py +1 -0
- minitap/mobile_use/sdk/examples/simple_photo_organizer.py +76 -0
- minitap/mobile_use/sdk/examples/smart_notification_assistant.py +177 -0
- minitap/mobile_use/sdk/types/__init__.py +49 -0
- minitap/mobile_use/sdk/types/agent.py +73 -0
- minitap/mobile_use/sdk/types/exceptions.py +74 -0
- minitap/mobile_use/sdk/types/task.py +191 -0
- minitap/mobile_use/sdk/utils.py +28 -0
- minitap/mobile_use/servers/config.py +19 -0
- minitap/mobile_use/servers/device_hardware_bridge.py +212 -0
- minitap/mobile_use/servers/device_screen_api.py +143 -0
- minitap/mobile_use/servers/start_servers.py +151 -0
- minitap/mobile_use/servers/stop_servers.py +215 -0
- minitap/mobile_use/servers/utils.py +11 -0
- minitap/mobile_use/services/accessibility.py +100 -0
- minitap/mobile_use/services/llm.py +143 -0
- minitap/mobile_use/tools/index.py +54 -0
- minitap/mobile_use/tools/mobile/back.py +52 -0
- minitap/mobile_use/tools/mobile/copy_text_from.py +77 -0
- minitap/mobile_use/tools/mobile/erase_text.py +124 -0
- minitap/mobile_use/tools/mobile/input_text.py +74 -0
- minitap/mobile_use/tools/mobile/launch_app.py +59 -0
- minitap/mobile_use/tools/mobile/list_packages.py +78 -0
- minitap/mobile_use/tools/mobile/long_press_on.py +62 -0
- minitap/mobile_use/tools/mobile/open_link.py +59 -0
- minitap/mobile_use/tools/mobile/paste_text.py +66 -0
- minitap/mobile_use/tools/mobile/press_key.py +58 -0
- minitap/mobile_use/tools/mobile/run_flow.py +57 -0
- minitap/mobile_use/tools/mobile/stop_app.py +58 -0
- minitap/mobile_use/tools/mobile/swipe.py +56 -0
- minitap/mobile_use/tools/mobile/take_screenshot.py +70 -0
- minitap/mobile_use/tools/mobile/tap.py +66 -0
- minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +68 -0
- minitap/mobile_use/tools/tool_wrapper.py +33 -0
- minitap/mobile_use/utils/cli_helpers.py +40 -0
- minitap/mobile_use/utils/cli_selection.py +144 -0
- minitap/mobile_use/utils/conversations.py +31 -0
- minitap/mobile_use/utils/decorators.py +123 -0
- minitap/mobile_use/utils/errors.py +6 -0
- minitap/mobile_use/utils/file.py +13 -0
- minitap/mobile_use/utils/logger.py +184 -0
- minitap/mobile_use/utils/media.py +73 -0
- minitap/mobile_use/utils/recorder.py +55 -0
- minitap/mobile_use/utils/requests_utils.py +37 -0
- minitap/mobile_use/utils/shell_utils.py +20 -0
- minitap/mobile_use/utils/time.py +6 -0
- minitap/mobile_use/utils/ui_hierarchy.py +30 -0
- minitap_mobile_use-0.0.1.dev0.dist-info/METADATA +274 -0
- minitap_mobile_use-0.0.1.dev0.dist-info/RECORD +95 -0
- minitap_mobile_use-0.0.1.dev0.dist-info/WHEEL +4 -0
- minitap_mobile_use-0.0.1.dev0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
You are a helpful assistant tasked with generating the final structured output of a multi-agent reasoning process.
|
|
2
|
+
|
|
3
|
+
## The original goal was:
|
|
4
|
+
{{ initial_goal }}
|
|
5
|
+
|
|
6
|
+
{% if agents_thoughts %}
|
|
7
|
+
Throughout the reasoning process, the following agent thoughts were collected:
|
|
8
|
+
{% for thought in agents_thoughts %}
|
|
9
|
+
- {{ thought }}
|
|
10
|
+
{% endfor %}
|
|
11
|
+
{% endif %}
|
|
12
|
+
|
|
13
|
+
{% if last_ai_message %}
|
|
14
|
+
The last message generated by the graph execution was:
|
|
15
|
+
"{{ last_ai_message }}"
|
|
16
|
+
{% endif %}
|
|
17
|
+
|
|
18
|
+
{% if not structured_output %}
|
|
19
|
+
Please generate a well-structured JSON object based on the following instructions:
|
|
20
|
+
|
|
21
|
+
> "{{ output_description }}"
|
|
22
|
+
|
|
23
|
+
Only return the JSON object, with fields matching the description as closely as possible.
|
|
24
|
+
Do not include explanations or markdown, just the raw JSON.
|
|
25
|
+
{% endif %}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Dict, Type, Union
|
|
4
|
+
|
|
5
|
+
from jinja2 import Template
|
|
6
|
+
from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
|
|
7
|
+
from minitap.mobile_use.config import OutputConfig
|
|
8
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
9
|
+
from minitap.mobile_use.graph.state import State
|
|
10
|
+
from minitap.mobile_use.services.llm import get_llm
|
|
11
|
+
from minitap.mobile_use.utils.conversations import is_ai_message
|
|
12
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
13
|
+
from pydantic import BaseModel
|
|
14
|
+
|
|
15
|
+
logger = get_logger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
async def outputter(
|
|
19
|
+
ctx: MobileUseContext, output_config: OutputConfig, graph_output: State
|
|
20
|
+
) -> dict:
|
|
21
|
+
logger.info("Starting Outputter Agent")
|
|
22
|
+
last_message = graph_output.messages[-1] if graph_output.messages else None
|
|
23
|
+
|
|
24
|
+
system_message = (
|
|
25
|
+
"You are a helpful assistant tasked with generating "
|
|
26
|
+
+ "the final structured output of a multi-agent reasoning process."
|
|
27
|
+
)
|
|
28
|
+
human_message = Template(
|
|
29
|
+
Path(__file__).parent.joinpath("human.md").read_text(encoding="utf-8")
|
|
30
|
+
).render(
|
|
31
|
+
initial_goal=graph_output.initial_goal,
|
|
32
|
+
agents_thoughts=graph_output.agents_thoughts,
|
|
33
|
+
structured_output=output_config.structured_output,
|
|
34
|
+
output_description=output_config.output_description,
|
|
35
|
+
last_ai_message=last_message.content
|
|
36
|
+
if last_message and is_ai_message(message=last_message)
|
|
37
|
+
else None,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
messages: list[BaseMessage] = [
|
|
41
|
+
SystemMessage(content=system_message),
|
|
42
|
+
HumanMessage(content=human_message),
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
if output_config.output_description:
|
|
46
|
+
messages.append(HumanMessage(content=output_config.output_description))
|
|
47
|
+
|
|
48
|
+
llm = get_llm(ctx=ctx, name="outputter", is_utils=True, temperature=1)
|
|
49
|
+
structured_llm = llm
|
|
50
|
+
|
|
51
|
+
if output_config.structured_output:
|
|
52
|
+
schema: Union[Dict, Type[BaseModel], None] = None
|
|
53
|
+
so = output_config.structured_output
|
|
54
|
+
|
|
55
|
+
if isinstance(so, dict):
|
|
56
|
+
schema = so
|
|
57
|
+
elif isinstance(so, BaseModel):
|
|
58
|
+
schema = type(so)
|
|
59
|
+
elif isinstance(so, type) and issubclass(so, BaseModel):
|
|
60
|
+
schema = so
|
|
61
|
+
|
|
62
|
+
if schema is not None:
|
|
63
|
+
structured_llm = llm.with_structured_output(schema)
|
|
64
|
+
|
|
65
|
+
response = await structured_llm.ainvoke(messages) # type: ignore
|
|
66
|
+
if isinstance(response, BaseModel):
|
|
67
|
+
if output_config.output_description and hasattr(response, "content"):
|
|
68
|
+
response = json.loads(response.content) # type: ignore
|
|
69
|
+
return response
|
|
70
|
+
return response.model_dump()
|
|
71
|
+
elif hasattr(response, "content"):
|
|
72
|
+
return json.loads(response.content) # type: ignore
|
|
73
|
+
else:
|
|
74
|
+
logger.info("Found unknown response type: " + str(type(response)))
|
|
75
|
+
return response
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
from minitap.mobile_use.agents.outputter.outputter import outputter
|
|
2
|
+
from minitap.mobile_use.config import LLM, OutputConfig
|
|
3
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
4
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
logger = get_logger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class MockPydanticSchema(BaseModel):
|
|
11
|
+
color: str
|
|
12
|
+
price: float
|
|
13
|
+
currency_symbol: str
|
|
14
|
+
website_url: str
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
mock_dict = {
|
|
18
|
+
"color": "green",
|
|
19
|
+
"price": 20,
|
|
20
|
+
"currency_symbol": "$",
|
|
21
|
+
"website_url": "http://superwebsite.fr",
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class DummyState:
|
|
26
|
+
def __init__(self, messages, initial_goal, agents_thoughts):
|
|
27
|
+
self.messages = messages
|
|
28
|
+
self.initial_goal = initial_goal
|
|
29
|
+
self.agents_thoughts = agents_thoughts
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
mocked_state = DummyState(
|
|
33
|
+
messages=[],
|
|
34
|
+
initial_goal="Find a green product on my website",
|
|
35
|
+
agents_thoughts=[
|
|
36
|
+
"Going on http://superwebsite.fr",
|
|
37
|
+
"Searching for products",
|
|
38
|
+
"Filtering by color",
|
|
39
|
+
"Color 'green' found for a 20 dollars product",
|
|
40
|
+
],
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
mocked_ctx = MobileUseContext(
|
|
44
|
+
llm_config={
|
|
45
|
+
"executor": LLM(provider="openai", model="gpt-5-nano"),
|
|
46
|
+
"cortex": LLM(provider="openai", model="gpt-5-nano"),
|
|
47
|
+
"planner": LLM(provider="openai", model="gpt-5-nano"),
|
|
48
|
+
"orchestrator": LLM(provider="openai", model="gpt-5-nano"),
|
|
49
|
+
},
|
|
50
|
+
) # type: ignore
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
async def test_outputter_with_pydantic_model():
|
|
54
|
+
logger.info("Starting test_outputter_with_pydantic_model")
|
|
55
|
+
config = OutputConfig(
|
|
56
|
+
structured_output=MockPydanticSchema,
|
|
57
|
+
output_description=None,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
result = await outputter(ctx=mocked_ctx, output_config=config, graph_output=mocked_state) # type: ignore
|
|
61
|
+
|
|
62
|
+
assert isinstance(result, MockPydanticSchema)
|
|
63
|
+
assert result.color.lower() == "green"
|
|
64
|
+
logger.success(str(result))
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
async def test_outputter_with_dict():
|
|
68
|
+
logger.info("Starting test_outputter_with_dict")
|
|
69
|
+
config = OutputConfig(
|
|
70
|
+
structured_output=mock_dict,
|
|
71
|
+
output_description=None,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
result = await outputter(ctx=mocked_ctx, output_config=config, graph_output=mocked_state) # type: ignore
|
|
75
|
+
|
|
76
|
+
assert isinstance(result, dict)
|
|
77
|
+
assert result.get("color", None) == "green"
|
|
78
|
+
assert result.get("price", None) == 20
|
|
79
|
+
assert result.get("currency_symbol", None) == "$"
|
|
80
|
+
assert result.get("website_url", None) == "http://superwebsite.fr"
|
|
81
|
+
logger.success(str(result))
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
async def test_outputter_with_natural_language_output():
|
|
85
|
+
logger.info("Starting test_outputter_with_natural_language_output")
|
|
86
|
+
config = OutputConfig(
|
|
87
|
+
structured_output=None,
|
|
88
|
+
output_description="A JSON object with a color, \
|
|
89
|
+
a price, a currency_symbol and a website_url key",
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
result = await outputter(ctx=mocked_ctx, output_config=config, graph_output=mocked_state) # type: ignore
|
|
93
|
+
logger.info(str(result))
|
|
94
|
+
|
|
95
|
+
assert isinstance(result, dict)
|
|
96
|
+
assert result.get("color", None) == "green"
|
|
97
|
+
assert result.get("price", None) == 20
|
|
98
|
+
assert result.get("currency_symbol", None) == "$"
|
|
99
|
+
assert result.get("website_url", None) == "http://superwebsite.fr"
|
|
100
|
+
logger.success(str(result))
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
if __name__ == "__main__":
|
|
104
|
+
import asyncio
|
|
105
|
+
|
|
106
|
+
asyncio.run(test_outputter_with_pydantic_model())
|
|
107
|
+
asyncio.run(test_outputter_with_natural_language_output())
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
You are the **Planner**.
|
|
2
|
+
Your role is to **break down a user’s goal into a realistic series of subgoals** that can be executed step-by-step on an {{ platform }} **mobile device**.
|
|
3
|
+
|
|
4
|
+
You work like an agile tech lead: defining the key milestones without locking in details too early. Other agents will handle the specifics later.
|
|
5
|
+
|
|
6
|
+
### Core Responsibilities
|
|
7
|
+
|
|
8
|
+
1. **Initial Planning**
|
|
9
|
+
Given the **user's goal**:
|
|
10
|
+
|
|
11
|
+
- Create a **high-level sequence of subgoals** to complete that goal.
|
|
12
|
+
- Subgoals should reflect real interactions with mobile UIs (e.g. "Open app", "Tap search bar", "Scroll to item", "Send message to Bob", etc).
|
|
13
|
+
- Don't assume the full UI is visible yet. Plan based on how most mobile apps work, and keep flexibility.
|
|
14
|
+
- List of agents thoughts is empty which is expected, since it is the first plan.
|
|
15
|
+
- Don't use precise UI actions when formulating subgoals like "copy", "paste", "tap", "swipe", ... unless explicitly asked in the initial goal.
|
|
16
|
+
|
|
17
|
+
2. **Replanning**
|
|
18
|
+
If you're asked to **revise a previous plan**, you'll also receive:
|
|
19
|
+
|
|
20
|
+
- The **original plan** (with notes about which subgoals succeeded or failed)
|
|
21
|
+
- A list of **agent thoughts**, including observations from the device, challenges encountered, and reasoning about what happened
|
|
22
|
+
- Take into account the agent thoughts/previous plan to update the plan : maybe some steps are not required as we successfully completed them.
|
|
23
|
+
|
|
24
|
+
Use these inputs to update the plan: removing dead ends, adapting to what we learned, and suggesting new directions.
|
|
25
|
+
|
|
26
|
+
### Output
|
|
27
|
+
|
|
28
|
+
You must output a **list of strings**, each representing a clear subgoal.
|
|
29
|
+
Each subgoal should be:
|
|
30
|
+
|
|
31
|
+
- Focused on **realistic mobile interactions**
|
|
32
|
+
- Neither too vague nor too granular
|
|
33
|
+
- Sequential (later steps may depend on earlier ones)
|
|
34
|
+
- Don't use loop-like formulation unless necessary (e.g. don't say "repeat this X times", instead reuse the same steps X times as subgoals)
|
|
35
|
+
|
|
36
|
+
### Examples
|
|
37
|
+
|
|
38
|
+
#### **Initial Goal**: "Open WhatsApp and send 'I’m running late' to Alice"
|
|
39
|
+
|
|
40
|
+
**Plan**:
|
|
41
|
+
|
|
42
|
+
- Open the WhatsApp app
|
|
43
|
+
- Locate or search for Alice
|
|
44
|
+
- Open the conversation with Alice
|
|
45
|
+
- Type the message "I’m running late"
|
|
46
|
+
- Send the message
|
|
47
|
+
|
|
48
|
+
#### **Replanning Example**
|
|
49
|
+
|
|
50
|
+
**Original Plan**: same as above
|
|
51
|
+
**Agent Thoughts**:
|
|
52
|
+
|
|
53
|
+
- Couldn’t find Alice in recent chats
|
|
54
|
+
- Search bar was present on top of the chat screen
|
|
55
|
+
- Keyboard appeared after tapping search
|
|
56
|
+
|
|
57
|
+
**New Plan**:
|
|
58
|
+
|
|
59
|
+
- Unlock the phone if needed
|
|
60
|
+
- Open WhatsApp
|
|
61
|
+
- Tap the search bar
|
|
62
|
+
- Search for "Alice"
|
|
63
|
+
- Select the correct chat
|
|
64
|
+
- Type and send "I’m running late"
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from jinja2 import Template
|
|
4
|
+
from langchain_core.messages import HumanMessage, SystemMessage
|
|
5
|
+
from minitap.mobile_use.agents.planner.types import PlannerOutput, Subgoal, SubgoalStatus
|
|
6
|
+
from minitap.mobile_use.agents.planner.utils import one_of_them_is_failure
|
|
7
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
8
|
+
from minitap.mobile_use.graph.state import State
|
|
9
|
+
from minitap.mobile_use.services.llm import get_llm
|
|
10
|
+
from minitap.mobile_use.utils.decorators import wrap_with_callbacks
|
|
11
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
12
|
+
|
|
13
|
+
logger = get_logger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class PlannerNode:
|
|
17
|
+
def __init__(self, ctx: MobileUseContext):
|
|
18
|
+
self.ctx = ctx
|
|
19
|
+
|
|
20
|
+
@wrap_with_callbacks(
|
|
21
|
+
before=lambda: logger.info("Starting Planner Agent..."),
|
|
22
|
+
on_success=lambda _: logger.success("Planner Agent"),
|
|
23
|
+
on_failure=lambda _: logger.error("Planner Agent"),
|
|
24
|
+
)
|
|
25
|
+
async def __call__(self, state: State):
|
|
26
|
+
needs_replan = one_of_them_is_failure(state.subgoal_plan)
|
|
27
|
+
|
|
28
|
+
system_message = Template(
|
|
29
|
+
Path(__file__).parent.joinpath("planner.md").read_text(encoding="utf-8")
|
|
30
|
+
).render(platform=self.ctx.device.mobile_platform.value)
|
|
31
|
+
human_message = Template(
|
|
32
|
+
Path(__file__).parent.joinpath("human.md").read_text(encoding="utf-8")
|
|
33
|
+
).render(
|
|
34
|
+
action="replan" if needs_replan else "plan",
|
|
35
|
+
initial_goal=state.initial_goal,
|
|
36
|
+
previous_plan="\n".join(str(s) for s in state.subgoal_plan),
|
|
37
|
+
agent_thoughts="\n".join(state.agents_thoughts),
|
|
38
|
+
)
|
|
39
|
+
messages = [
|
|
40
|
+
SystemMessage(content=system_message),
|
|
41
|
+
HumanMessage(content=human_message),
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
llm = get_llm(ctx=self.ctx, name="planner")
|
|
45
|
+
llm = llm.with_structured_output(PlannerOutput)
|
|
46
|
+
response: PlannerOutput = await llm.ainvoke(messages) # type: ignore
|
|
47
|
+
|
|
48
|
+
subgoals_plan = [
|
|
49
|
+
Subgoal(
|
|
50
|
+
description=subgoal,
|
|
51
|
+
status=SubgoalStatus.NOT_STARTED,
|
|
52
|
+
completion_reason=None,
|
|
53
|
+
)
|
|
54
|
+
for subgoal in response.subgoals
|
|
55
|
+
]
|
|
56
|
+
logger.info("📜 Generated plan:")
|
|
57
|
+
logger.info("\n".join(str(s) for s in subgoals_plan))
|
|
58
|
+
|
|
59
|
+
return state.sanitize_update(
|
|
60
|
+
ctx=self.ctx,
|
|
61
|
+
update={
|
|
62
|
+
"subgoal_plan": subgoals_plan,
|
|
63
|
+
},
|
|
64
|
+
)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
from typing_extensions import Annotated
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class PlannerOutput(BaseModel):
|
|
9
|
+
subgoals: list[str]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SubgoalStatus(Enum):
|
|
13
|
+
NOT_STARTED = "NOT_STARTED"
|
|
14
|
+
PENDING = "PENDING"
|
|
15
|
+
SUCCESS = "SUCCESS"
|
|
16
|
+
FAILURE = "FAILURE"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Subgoal(BaseModel):
|
|
20
|
+
description: Annotated[str, "Description of the subgoal"]
|
|
21
|
+
completion_reason: Annotated[
|
|
22
|
+
Optional[str], "Reason why the subgoal was completed (failure or success)"
|
|
23
|
+
] = None
|
|
24
|
+
status: SubgoalStatus
|
|
25
|
+
|
|
26
|
+
def __str__(self):
|
|
27
|
+
status_emoji = "❓"
|
|
28
|
+
match self.status:
|
|
29
|
+
case SubgoalStatus.SUCCESS:
|
|
30
|
+
status_emoji = "✅"
|
|
31
|
+
case SubgoalStatus.FAILURE:
|
|
32
|
+
status_emoji = "❌"
|
|
33
|
+
case SubgoalStatus.PENDING:
|
|
34
|
+
status_emoji = "⏳"
|
|
35
|
+
case SubgoalStatus.NOT_STARTED:
|
|
36
|
+
status_emoji = "(not started yet)"
|
|
37
|
+
|
|
38
|
+
output = f"- {self.description} : {status_emoji}."
|
|
39
|
+
if self.completion_reason:
|
|
40
|
+
output += f" Completion reason: {self.completion_reason}"
|
|
41
|
+
return output
|
|
42
|
+
|
|
43
|
+
def __repr__(self):
|
|
44
|
+
return str(self)
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from minitap.mobile_use.agents.planner.types import Subgoal, SubgoalStatus
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def get_current_subgoal(subgoals: list[Subgoal]) -> Subgoal | None:
|
|
5
|
+
return next((s for s in subgoals if s.status == SubgoalStatus.PENDING), None)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_next_subgoal(subgoals: list[Subgoal]) -> Subgoal | None:
|
|
9
|
+
return next((s for s in subgoals if s.status == SubgoalStatus.NOT_STARTED), None)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def nothing_started(subgoals: list[Subgoal]) -> bool:
|
|
13
|
+
return all(s.status == SubgoalStatus.NOT_STARTED for s in subgoals)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def complete_current_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
|
|
17
|
+
current_subgoal = get_current_subgoal(subgoals)
|
|
18
|
+
if not current_subgoal:
|
|
19
|
+
return subgoals
|
|
20
|
+
current_subgoal.status = SubgoalStatus.SUCCESS
|
|
21
|
+
return subgoals
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def fail_current_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
|
|
25
|
+
current_subgoal = get_current_subgoal(subgoals)
|
|
26
|
+
if not current_subgoal:
|
|
27
|
+
return subgoals
|
|
28
|
+
current_subgoal.status = SubgoalStatus.FAILURE
|
|
29
|
+
return subgoals
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def all_completed(subgoals: list[Subgoal]) -> bool:
|
|
33
|
+
return all(s.status == SubgoalStatus.SUCCESS for s in subgoals)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def one_of_them_is_failure(subgoals: list[Subgoal]) -> bool:
|
|
37
|
+
return any(s.status == SubgoalStatus.FAILURE for s in subgoals)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def start_next_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
|
|
41
|
+
next_subgoal = get_next_subgoal(subgoals)
|
|
42
|
+
if not next_subgoal:
|
|
43
|
+
return subgoals
|
|
44
|
+
next_subgoal.status = SubgoalStatus.PENDING
|
|
45
|
+
return subgoals
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from langchain_core.messages import (
|
|
2
|
+
HumanMessage,
|
|
3
|
+
RemoveMessage,
|
|
4
|
+
ToolMessage,
|
|
5
|
+
)
|
|
6
|
+
from minitap.mobile_use.constants import MAX_MESSAGES_IN_HISTORY
|
|
7
|
+
from minitap.mobile_use.context import MobileUseContext
|
|
8
|
+
from minitap.mobile_use.graph.state import State
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SummarizerNode:
|
|
12
|
+
def __init__(self, ctx: MobileUseContext):
|
|
13
|
+
self.ctx = ctx
|
|
14
|
+
|
|
15
|
+
def __call__(self, state: State):
|
|
16
|
+
if len(state.messages) <= MAX_MESSAGES_IN_HISTORY:
|
|
17
|
+
return {}
|
|
18
|
+
|
|
19
|
+
nb_removal_candidates = len(state.messages) - MAX_MESSAGES_IN_HISTORY
|
|
20
|
+
|
|
21
|
+
remove_messages = []
|
|
22
|
+
start_removal = False
|
|
23
|
+
|
|
24
|
+
for msg in reversed(state.messages[:nb_removal_candidates]):
|
|
25
|
+
if isinstance(msg, (ToolMessage, HumanMessage)):
|
|
26
|
+
start_removal = True
|
|
27
|
+
if start_removal and msg.id:
|
|
28
|
+
remove_messages.append(RemoveMessage(id=msg.id))
|
|
29
|
+
return state.sanitize_update(
|
|
30
|
+
ctx=self.ctx,
|
|
31
|
+
update={
|
|
32
|
+
"messages": remove_messages,
|
|
33
|
+
},
|
|
34
|
+
)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from urllib.parse import urljoin
|
|
2
|
+
|
|
3
|
+
from minitap.mobile_use.utils.requests_utils import get_session_with_curl_logging
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DeviceHardwareClient:
|
|
7
|
+
def __init__(self, base_url: str):
|
|
8
|
+
self.base_url = base_url
|
|
9
|
+
self.session = get_session_with_curl_logging()
|
|
10
|
+
|
|
11
|
+
def get(self, path: str, **kwargs):
|
|
12
|
+
url = urljoin(self.base_url, f"/api/{path.lstrip('/')}")
|
|
13
|
+
return self.session.get(url, **kwargs)
|
|
14
|
+
|
|
15
|
+
def post(self, path: str, **kwargs):
|
|
16
|
+
url = urljoin(self.base_url, f"/api/{path.lstrip('/')}")
|
|
17
|
+
return self.session.post(url, **kwargs)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_client(base_url: str | None = None):
|
|
21
|
+
if not base_url:
|
|
22
|
+
base_url = "http://localhost:9999"
|
|
23
|
+
return DeviceHardwareClient(base_url)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import platform
|
|
3
|
+
|
|
4
|
+
from minitap.mobile_use.utils.shell_utils import run_shell_command_on_host
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def get_ios_devices() -> tuple[bool, list[str], str]:
|
|
8
|
+
"""
|
|
9
|
+
Get UDIDs of iOS simulator devices only.
|
|
10
|
+
|
|
11
|
+
Returns:
|
|
12
|
+
A tuple containing:
|
|
13
|
+
- bool: True if xcrun is available, False otherwise.
|
|
14
|
+
- list[str]: A list of iOS device UDIDs.
|
|
15
|
+
- str: An error message if any.
|
|
16
|
+
"""
|
|
17
|
+
if platform.system() != "Linux":
|
|
18
|
+
return False, [], "xcrun is only available on macOS."
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
cmd = ["xcrun", "simctl", "list", "devices", "--json"]
|
|
22
|
+
output = run_shell_command_on_host(" ".join(cmd))
|
|
23
|
+
data = json.loads(output)
|
|
24
|
+
|
|
25
|
+
serials = []
|
|
26
|
+
devices_dict = data.get("devices", {})
|
|
27
|
+
|
|
28
|
+
for runtime, devices in devices_dict.items():
|
|
29
|
+
if "ios" in runtime.lower(): # e.g. "com.apple.CoreSimulator.SimRuntime.iOS-17-0"
|
|
30
|
+
for dev in devices:
|
|
31
|
+
if "udid" in dev:
|
|
32
|
+
serials.append(dev["udid"])
|
|
33
|
+
|
|
34
|
+
return True, serials, ""
|
|
35
|
+
|
|
36
|
+
except FileNotFoundError:
|
|
37
|
+
error_message = (
|
|
38
|
+
"'xcrun' command not found. Please ensure Xcode Command Line Tools are installed."
|
|
39
|
+
)
|
|
40
|
+
return False, [], error_message
|
|
41
|
+
except json.JSONDecodeError as e:
|
|
42
|
+
return True, [], f"Failed to parse xcrun output as JSON: {e}"
|
|
43
|
+
except Exception as e:
|
|
44
|
+
return True, [], f"Failed to get iOS devices: {e}"
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
from urllib.parse import urljoin
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
7
|
+
from minitap.mobile_use.utils.requests_utils import get_session_with_curl_logging
|
|
8
|
+
|
|
9
|
+
logger = get_logger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ScreenApiClient:
|
|
13
|
+
def __init__(self, base_url: str, retry_count: int = 5, retry_wait_seconds: int = 1):
|
|
14
|
+
self.base_url = base_url
|
|
15
|
+
self.session = get_session_with_curl_logging()
|
|
16
|
+
self.retry_count = retry_count
|
|
17
|
+
self.retry_wait_seconds = retry_wait_seconds
|
|
18
|
+
|
|
19
|
+
def get_with_retry(self, path: str, **kwargs):
|
|
20
|
+
"""
|
|
21
|
+
Make a GET request to the Screen API with retry logic based on the client configuration.
|
|
22
|
+
"""
|
|
23
|
+
for attempt in range(self.retry_count):
|
|
24
|
+
try:
|
|
25
|
+
response = self.session.get(urljoin(self.base_url, path), **kwargs)
|
|
26
|
+
if 200 <= response.status_code and response.status_code < 300:
|
|
27
|
+
return response
|
|
28
|
+
|
|
29
|
+
logger.warning(
|
|
30
|
+
f"Received {response.status_code}, attempt {attempt + 1} of {self.retry_count}."
|
|
31
|
+
f" Retrying in {self.retry_wait_seconds} seconds..."
|
|
32
|
+
)
|
|
33
|
+
time.sleep(self.retry_wait_seconds)
|
|
34
|
+
|
|
35
|
+
except requests.exceptions.RequestException as e:
|
|
36
|
+
if attempt == self.retry_count - 1:
|
|
37
|
+
raise e
|
|
38
|
+
time.sleep(self.retry_wait_seconds)
|
|
39
|
+
|
|
40
|
+
raise requests.exceptions.RequestException(
|
|
41
|
+
f"Failed to get a valid response after {self.retry_count} attempts."
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
def post(self, path: str, **kwargs):
|
|
45
|
+
return self.session.post(urljoin(self.base_url, path), **kwargs)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def get_client(base_url: str | None = None):
|
|
49
|
+
if not base_url:
|
|
50
|
+
base_url = "http://localhost:9998"
|
|
51
|
+
retry_count = int(os.getenv("MOBILE_USE_HEALTH_RETRIES", 5))
|
|
52
|
+
retry_wait_seconds = int(os.getenv("MOBILE_USE_HEALTH_DELAY", 1))
|
|
53
|
+
return ScreenApiClient(base_url, retry_count, retry_wait_seconds)
|