minitap-mobile-use 0.0.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of minitap-mobile-use might be problematic. Click here for more details.

Files changed (95) hide show
  1. minitap/mobile_use/__init__.py +0 -0
  2. minitap/mobile_use/agents/contextor/contextor.py +42 -0
  3. minitap/mobile_use/agents/cortex/cortex.md +93 -0
  4. minitap/mobile_use/agents/cortex/cortex.py +107 -0
  5. minitap/mobile_use/agents/cortex/types.py +11 -0
  6. minitap/mobile_use/agents/executor/executor.md +73 -0
  7. minitap/mobile_use/agents/executor/executor.py +84 -0
  8. minitap/mobile_use/agents/executor/executor_context_cleaner.py +27 -0
  9. minitap/mobile_use/agents/executor/utils.py +11 -0
  10. minitap/mobile_use/agents/hopper/hopper.md +13 -0
  11. minitap/mobile_use/agents/hopper/hopper.py +45 -0
  12. minitap/mobile_use/agents/orchestrator/human.md +13 -0
  13. minitap/mobile_use/agents/orchestrator/orchestrator.md +18 -0
  14. minitap/mobile_use/agents/orchestrator/orchestrator.py +114 -0
  15. minitap/mobile_use/agents/orchestrator/types.py +14 -0
  16. minitap/mobile_use/agents/outputter/human.md +25 -0
  17. minitap/mobile_use/agents/outputter/outputter.py +75 -0
  18. minitap/mobile_use/agents/outputter/test_outputter.py +107 -0
  19. minitap/mobile_use/agents/planner/human.md +12 -0
  20. minitap/mobile_use/agents/planner/planner.md +64 -0
  21. minitap/mobile_use/agents/planner/planner.py +64 -0
  22. minitap/mobile_use/agents/planner/types.py +44 -0
  23. minitap/mobile_use/agents/planner/utils.py +45 -0
  24. minitap/mobile_use/agents/summarizer/summarizer.py +34 -0
  25. minitap/mobile_use/clients/device_hardware_client.py +23 -0
  26. minitap/mobile_use/clients/ios_client.py +44 -0
  27. minitap/mobile_use/clients/screen_api_client.py +53 -0
  28. minitap/mobile_use/config.py +285 -0
  29. minitap/mobile_use/constants.py +2 -0
  30. minitap/mobile_use/context.py +65 -0
  31. minitap/mobile_use/controllers/__init__.py +0 -0
  32. minitap/mobile_use/controllers/mobile_command_controller.py +379 -0
  33. minitap/mobile_use/controllers/platform_specific_commands_controller.py +74 -0
  34. minitap/mobile_use/graph/graph.py +149 -0
  35. minitap/mobile_use/graph/state.py +73 -0
  36. minitap/mobile_use/main.py +122 -0
  37. minitap/mobile_use/sdk/__init__.py +12 -0
  38. minitap/mobile_use/sdk/agent.py +524 -0
  39. minitap/mobile_use/sdk/builders/__init__.py +10 -0
  40. minitap/mobile_use/sdk/builders/agent_config_builder.py +213 -0
  41. minitap/mobile_use/sdk/builders/index.py +15 -0
  42. minitap/mobile_use/sdk/builders/task_request_builder.py +218 -0
  43. minitap/mobile_use/sdk/constants.py +14 -0
  44. minitap/mobile_use/sdk/examples/README.md +45 -0
  45. minitap/mobile_use/sdk/examples/__init__.py +1 -0
  46. minitap/mobile_use/sdk/examples/simple_photo_organizer.py +76 -0
  47. minitap/mobile_use/sdk/examples/smart_notification_assistant.py +177 -0
  48. minitap/mobile_use/sdk/types/__init__.py +49 -0
  49. minitap/mobile_use/sdk/types/agent.py +73 -0
  50. minitap/mobile_use/sdk/types/exceptions.py +74 -0
  51. minitap/mobile_use/sdk/types/task.py +191 -0
  52. minitap/mobile_use/sdk/utils.py +28 -0
  53. minitap/mobile_use/servers/config.py +19 -0
  54. minitap/mobile_use/servers/device_hardware_bridge.py +212 -0
  55. minitap/mobile_use/servers/device_screen_api.py +143 -0
  56. minitap/mobile_use/servers/start_servers.py +151 -0
  57. minitap/mobile_use/servers/stop_servers.py +215 -0
  58. minitap/mobile_use/servers/utils.py +11 -0
  59. minitap/mobile_use/services/accessibility.py +100 -0
  60. minitap/mobile_use/services/llm.py +143 -0
  61. minitap/mobile_use/tools/index.py +54 -0
  62. minitap/mobile_use/tools/mobile/back.py +52 -0
  63. minitap/mobile_use/tools/mobile/copy_text_from.py +77 -0
  64. minitap/mobile_use/tools/mobile/erase_text.py +124 -0
  65. minitap/mobile_use/tools/mobile/input_text.py +74 -0
  66. minitap/mobile_use/tools/mobile/launch_app.py +59 -0
  67. minitap/mobile_use/tools/mobile/list_packages.py +78 -0
  68. minitap/mobile_use/tools/mobile/long_press_on.py +62 -0
  69. minitap/mobile_use/tools/mobile/open_link.py +59 -0
  70. minitap/mobile_use/tools/mobile/paste_text.py +66 -0
  71. minitap/mobile_use/tools/mobile/press_key.py +58 -0
  72. minitap/mobile_use/tools/mobile/run_flow.py +57 -0
  73. minitap/mobile_use/tools/mobile/stop_app.py +58 -0
  74. minitap/mobile_use/tools/mobile/swipe.py +56 -0
  75. minitap/mobile_use/tools/mobile/take_screenshot.py +70 -0
  76. minitap/mobile_use/tools/mobile/tap.py +66 -0
  77. minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +68 -0
  78. minitap/mobile_use/tools/tool_wrapper.py +33 -0
  79. minitap/mobile_use/utils/cli_helpers.py +40 -0
  80. minitap/mobile_use/utils/cli_selection.py +144 -0
  81. minitap/mobile_use/utils/conversations.py +31 -0
  82. minitap/mobile_use/utils/decorators.py +123 -0
  83. minitap/mobile_use/utils/errors.py +6 -0
  84. minitap/mobile_use/utils/file.py +13 -0
  85. minitap/mobile_use/utils/logger.py +184 -0
  86. minitap/mobile_use/utils/media.py +73 -0
  87. minitap/mobile_use/utils/recorder.py +55 -0
  88. minitap/mobile_use/utils/requests_utils.py +37 -0
  89. minitap/mobile_use/utils/shell_utils.py +20 -0
  90. minitap/mobile_use/utils/time.py +6 -0
  91. minitap/mobile_use/utils/ui_hierarchy.py +30 -0
  92. minitap_mobile_use-0.0.1.dev0.dist-info/METADATA +274 -0
  93. minitap_mobile_use-0.0.1.dev0.dist-info/RECORD +95 -0
  94. minitap_mobile_use-0.0.1.dev0.dist-info/WHEEL +4 -0
  95. minitap_mobile_use-0.0.1.dev0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,14 @@
1
+ from enum import Enum
2
+
3
+ from pydantic import BaseModel
4
+
5
+
6
+ class OrchestratorStatus(Enum):
7
+ CONTINUE = "continue"
8
+ RESUME = "resume"
9
+ REPLAN = "replan"
10
+
11
+
12
+ class OrchestratorOutput(BaseModel):
13
+ status: OrchestratorStatus
14
+ reason: str
@@ -0,0 +1,25 @@
1
+ You are a helpful assistant tasked with generating the final structured output of a multi-agent reasoning process.
2
+
3
+ ## The original goal was:
4
+ {{ initial_goal }}
5
+
6
+ {% if agents_thoughts %}
7
+ Throughout the reasoning process, the following agent thoughts were collected:
8
+ {% for thought in agents_thoughts %}
9
+ - {{ thought }}
10
+ {% endfor %}
11
+ {% endif %}
12
+
13
+ {% if last_ai_message %}
14
+ The last message generated by the graph execution was:
15
+ "{{ last_ai_message }}"
16
+ {% endif %}
17
+
18
+ {% if not structured_output %}
19
+ Please generate a well-structured JSON object based on the following instructions:
20
+
21
+ > "{{ output_description }}"
22
+
23
+ Only return the JSON object, with fields matching the description as closely as possible.
24
+ Do not include explanations or markdown, just the raw JSON.
25
+ {% endif %}
@@ -0,0 +1,75 @@
1
+ import json
2
+ from pathlib import Path
3
+ from typing import Dict, Type, Union
4
+
5
+ from jinja2 import Template
6
+ from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
7
+ from minitap.mobile_use.config import OutputConfig
8
+ from minitap.mobile_use.context import MobileUseContext
9
+ from minitap.mobile_use.graph.state import State
10
+ from minitap.mobile_use.services.llm import get_llm
11
+ from minitap.mobile_use.utils.conversations import is_ai_message
12
+ from minitap.mobile_use.utils.logger import get_logger
13
+ from pydantic import BaseModel
14
+
15
+ logger = get_logger(__name__)
16
+
17
+
18
+ async def outputter(
19
+ ctx: MobileUseContext, output_config: OutputConfig, graph_output: State
20
+ ) -> dict:
21
+ logger.info("Starting Outputter Agent")
22
+ last_message = graph_output.messages[-1] if graph_output.messages else None
23
+
24
+ system_message = (
25
+ "You are a helpful assistant tasked with generating "
26
+ + "the final structured output of a multi-agent reasoning process."
27
+ )
28
+ human_message = Template(
29
+ Path(__file__).parent.joinpath("human.md").read_text(encoding="utf-8")
30
+ ).render(
31
+ initial_goal=graph_output.initial_goal,
32
+ agents_thoughts=graph_output.agents_thoughts,
33
+ structured_output=output_config.structured_output,
34
+ output_description=output_config.output_description,
35
+ last_ai_message=last_message.content
36
+ if last_message and is_ai_message(message=last_message)
37
+ else None,
38
+ )
39
+
40
+ messages: list[BaseMessage] = [
41
+ SystemMessage(content=system_message),
42
+ HumanMessage(content=human_message),
43
+ ]
44
+
45
+ if output_config.output_description:
46
+ messages.append(HumanMessage(content=output_config.output_description))
47
+
48
+ llm = get_llm(ctx=ctx, name="outputter", is_utils=True, temperature=1)
49
+ structured_llm = llm
50
+
51
+ if output_config.structured_output:
52
+ schema: Union[Dict, Type[BaseModel], None] = None
53
+ so = output_config.structured_output
54
+
55
+ if isinstance(so, dict):
56
+ schema = so
57
+ elif isinstance(so, BaseModel):
58
+ schema = type(so)
59
+ elif isinstance(so, type) and issubclass(so, BaseModel):
60
+ schema = so
61
+
62
+ if schema is not None:
63
+ structured_llm = llm.with_structured_output(schema)
64
+
65
+ response = await structured_llm.ainvoke(messages) # type: ignore
66
+ if isinstance(response, BaseModel):
67
+ if output_config.output_description and hasattr(response, "content"):
68
+ response = json.loads(response.content) # type: ignore
69
+ return response
70
+ return response.model_dump()
71
+ elif hasattr(response, "content"):
72
+ return json.loads(response.content) # type: ignore
73
+ else:
74
+ logger.info("Found unknown response type: " + str(type(response)))
75
+ return response
@@ -0,0 +1,107 @@
1
+ from minitap.mobile_use.agents.outputter.outputter import outputter
2
+ from minitap.mobile_use.config import LLM, OutputConfig
3
+ from minitap.mobile_use.context import MobileUseContext
4
+ from minitap.mobile_use.utils.logger import get_logger
5
+ from pydantic import BaseModel
6
+
7
+ logger = get_logger(__name__)
8
+
9
+
10
+ class MockPydanticSchema(BaseModel):
11
+ color: str
12
+ price: float
13
+ currency_symbol: str
14
+ website_url: str
15
+
16
+
17
+ mock_dict = {
18
+ "color": "green",
19
+ "price": 20,
20
+ "currency_symbol": "$",
21
+ "website_url": "http://superwebsite.fr",
22
+ }
23
+
24
+
25
+ class DummyState:
26
+ def __init__(self, messages, initial_goal, agents_thoughts):
27
+ self.messages = messages
28
+ self.initial_goal = initial_goal
29
+ self.agents_thoughts = agents_thoughts
30
+
31
+
32
+ mocked_state = DummyState(
33
+ messages=[],
34
+ initial_goal="Find a green product on my website",
35
+ agents_thoughts=[
36
+ "Going on http://superwebsite.fr",
37
+ "Searching for products",
38
+ "Filtering by color",
39
+ "Color 'green' found for a 20 dollars product",
40
+ ],
41
+ )
42
+
43
+ mocked_ctx = MobileUseContext(
44
+ llm_config={
45
+ "executor": LLM(provider="openai", model="gpt-5-nano"),
46
+ "cortex": LLM(provider="openai", model="gpt-5-nano"),
47
+ "planner": LLM(provider="openai", model="gpt-5-nano"),
48
+ "orchestrator": LLM(provider="openai", model="gpt-5-nano"),
49
+ },
50
+ ) # type: ignore
51
+
52
+
53
+ async def test_outputter_with_pydantic_model():
54
+ logger.info("Starting test_outputter_with_pydantic_model")
55
+ config = OutputConfig(
56
+ structured_output=MockPydanticSchema,
57
+ output_description=None,
58
+ )
59
+
60
+ result = await outputter(ctx=mocked_ctx, output_config=config, graph_output=mocked_state) # type: ignore
61
+
62
+ assert isinstance(result, MockPydanticSchema)
63
+ assert result.color.lower() == "green"
64
+ logger.success(str(result))
65
+
66
+
67
+ async def test_outputter_with_dict():
68
+ logger.info("Starting test_outputter_with_dict")
69
+ config = OutputConfig(
70
+ structured_output=mock_dict,
71
+ output_description=None,
72
+ )
73
+
74
+ result = await outputter(ctx=mocked_ctx, output_config=config, graph_output=mocked_state) # type: ignore
75
+
76
+ assert isinstance(result, dict)
77
+ assert result.get("color", None) == "green"
78
+ assert result.get("price", None) == 20
79
+ assert result.get("currency_symbol", None) == "$"
80
+ assert result.get("website_url", None) == "http://superwebsite.fr"
81
+ logger.success(str(result))
82
+
83
+
84
+ async def test_outputter_with_natural_language_output():
85
+ logger.info("Starting test_outputter_with_natural_language_output")
86
+ config = OutputConfig(
87
+ structured_output=None,
88
+ output_description="A JSON object with a color, \
89
+ a price, a currency_symbol and a website_url key",
90
+ )
91
+
92
+ result = await outputter(ctx=mocked_ctx, output_config=config, graph_output=mocked_state) # type: ignore
93
+ logger.info(str(result))
94
+
95
+ assert isinstance(result, dict)
96
+ assert result.get("color", None) == "green"
97
+ assert result.get("price", None) == 20
98
+ assert result.get("currency_symbol", None) == "$"
99
+ assert result.get("website_url", None) == "http://superwebsite.fr"
100
+ logger.success(str(result))
101
+
102
+
103
+ if __name__ == "__main__":
104
+ import asyncio
105
+
106
+ asyncio.run(test_outputter_with_pydantic_model())
107
+ asyncio.run(test_outputter_with_natural_language_output())
@@ -0,0 +1,12 @@
1
+ Here is your input.
2
+
3
+ ---
4
+
5
+ **Action (plan or replan)**: {{ action }}
6
+
7
+ **Initial Goal**: {{ initial_goal }}
8
+
9
+ Relevant only if action is replan:
10
+
11
+ **Previous Plan**: {{ previous_plan }}
12
+ **Agent Thoughts**: {{ agent_thoughts }}
@@ -0,0 +1,64 @@
1
+ You are the **Planner**.
2
+ Your role is to **break down a user’s goal into a realistic series of subgoals** that can be executed step-by-step on an {{ platform }} **mobile device**.
3
+
4
+ You work like an agile tech lead: defining the key milestones without locking in details too early. Other agents will handle the specifics later.
5
+
6
+ ### Core Responsibilities
7
+
8
+ 1. **Initial Planning**
9
+ Given the **user's goal**:
10
+
11
+ - Create a **high-level sequence of subgoals** to complete that goal.
12
+ - Subgoals should reflect real interactions with mobile UIs (e.g. "Open app", "Tap search bar", "Scroll to item", "Send message to Bob", etc).
13
+ - Don't assume the full UI is visible yet. Plan based on how most mobile apps work, and keep flexibility.
14
+ - List of agents thoughts is empty which is expected, since it is the first plan.
15
+ - Don't use precise UI actions when formulating subgoals like "copy", "paste", "tap", "swipe", ... unless explicitly asked in the initial goal.
16
+
17
+ 2. **Replanning**
18
+ If you're asked to **revise a previous plan**, you'll also receive:
19
+
20
+ - The **original plan** (with notes about which subgoals succeeded or failed)
21
+ - A list of **agent thoughts**, including observations from the device, challenges encountered, and reasoning about what happened
22
+ - Take into account the agent thoughts/previous plan to update the plan : maybe some steps are not required as we successfully completed them.
23
+
24
+ Use these inputs to update the plan: removing dead ends, adapting to what we learned, and suggesting new directions.
25
+
26
+ ### Output
27
+
28
+ You must output a **list of strings**, each representing a clear subgoal.
29
+ Each subgoal should be:
30
+
31
+ - Focused on **realistic mobile interactions**
32
+ - Neither too vague nor too granular
33
+ - Sequential (later steps may depend on earlier ones)
34
+ - Don't use loop-like formulation unless necessary (e.g. don't say "repeat this X times", instead reuse the same steps X times as subgoals)
35
+
36
+ ### Examples
37
+
38
+ #### **Initial Goal**: "Open WhatsApp and send 'I’m running late' to Alice"
39
+
40
+ **Plan**:
41
+
42
+ - Open the WhatsApp app
43
+ - Locate or search for Alice
44
+ - Open the conversation with Alice
45
+ - Type the message "I’m running late"
46
+ - Send the message
47
+
48
+ #### **Replanning Example**
49
+
50
+ **Original Plan**: same as above
51
+ **Agent Thoughts**:
52
+
53
+ - Couldn’t find Alice in recent chats
54
+ - Search bar was present on top of the chat screen
55
+ - Keyboard appeared after tapping search
56
+
57
+ **New Plan**:
58
+
59
+ - Unlock the phone if needed
60
+ - Open WhatsApp
61
+ - Tap the search bar
62
+ - Search for "Alice"
63
+ - Select the correct chat
64
+ - Type and send "I’m running late"
@@ -0,0 +1,64 @@
1
+ from pathlib import Path
2
+
3
+ from jinja2 import Template
4
+ from langchain_core.messages import HumanMessage, SystemMessage
5
+ from minitap.mobile_use.agents.planner.types import PlannerOutput, Subgoal, SubgoalStatus
6
+ from minitap.mobile_use.agents.planner.utils import one_of_them_is_failure
7
+ from minitap.mobile_use.context import MobileUseContext
8
+ from minitap.mobile_use.graph.state import State
9
+ from minitap.mobile_use.services.llm import get_llm
10
+ from minitap.mobile_use.utils.decorators import wrap_with_callbacks
11
+ from minitap.mobile_use.utils.logger import get_logger
12
+
13
+ logger = get_logger(__name__)
14
+
15
+
16
+ class PlannerNode:
17
+ def __init__(self, ctx: MobileUseContext):
18
+ self.ctx = ctx
19
+
20
+ @wrap_with_callbacks(
21
+ before=lambda: logger.info("Starting Planner Agent..."),
22
+ on_success=lambda _: logger.success("Planner Agent"),
23
+ on_failure=lambda _: logger.error("Planner Agent"),
24
+ )
25
+ async def __call__(self, state: State):
26
+ needs_replan = one_of_them_is_failure(state.subgoal_plan)
27
+
28
+ system_message = Template(
29
+ Path(__file__).parent.joinpath("planner.md").read_text(encoding="utf-8")
30
+ ).render(platform=self.ctx.device.mobile_platform.value)
31
+ human_message = Template(
32
+ Path(__file__).parent.joinpath("human.md").read_text(encoding="utf-8")
33
+ ).render(
34
+ action="replan" if needs_replan else "plan",
35
+ initial_goal=state.initial_goal,
36
+ previous_plan="\n".join(str(s) for s in state.subgoal_plan),
37
+ agent_thoughts="\n".join(state.agents_thoughts),
38
+ )
39
+ messages = [
40
+ SystemMessage(content=system_message),
41
+ HumanMessage(content=human_message),
42
+ ]
43
+
44
+ llm = get_llm(ctx=self.ctx, name="planner")
45
+ llm = llm.with_structured_output(PlannerOutput)
46
+ response: PlannerOutput = await llm.ainvoke(messages) # type: ignore
47
+
48
+ subgoals_plan = [
49
+ Subgoal(
50
+ description=subgoal,
51
+ status=SubgoalStatus.NOT_STARTED,
52
+ completion_reason=None,
53
+ )
54
+ for subgoal in response.subgoals
55
+ ]
56
+ logger.info("📜 Generated plan:")
57
+ logger.info("\n".join(str(s) for s in subgoals_plan))
58
+
59
+ return state.sanitize_update(
60
+ ctx=self.ctx,
61
+ update={
62
+ "subgoal_plan": subgoals_plan,
63
+ },
64
+ )
@@ -0,0 +1,44 @@
1
+ from enum import Enum
2
+ from typing import Optional
3
+
4
+ from pydantic import BaseModel
5
+ from typing_extensions import Annotated
6
+
7
+
8
+ class PlannerOutput(BaseModel):
9
+ subgoals: list[str]
10
+
11
+
12
+ class SubgoalStatus(Enum):
13
+ NOT_STARTED = "NOT_STARTED"
14
+ PENDING = "PENDING"
15
+ SUCCESS = "SUCCESS"
16
+ FAILURE = "FAILURE"
17
+
18
+
19
+ class Subgoal(BaseModel):
20
+ description: Annotated[str, "Description of the subgoal"]
21
+ completion_reason: Annotated[
22
+ Optional[str], "Reason why the subgoal was completed (failure or success)"
23
+ ] = None
24
+ status: SubgoalStatus
25
+
26
+ def __str__(self):
27
+ status_emoji = "❓"
28
+ match self.status:
29
+ case SubgoalStatus.SUCCESS:
30
+ status_emoji = "✅"
31
+ case SubgoalStatus.FAILURE:
32
+ status_emoji = "❌"
33
+ case SubgoalStatus.PENDING:
34
+ status_emoji = "⏳"
35
+ case SubgoalStatus.NOT_STARTED:
36
+ status_emoji = "(not started yet)"
37
+
38
+ output = f"- {self.description} : {status_emoji}."
39
+ if self.completion_reason:
40
+ output += f" Completion reason: {self.completion_reason}"
41
+ return output
42
+
43
+ def __repr__(self):
44
+ return str(self)
@@ -0,0 +1,45 @@
1
+ from minitap.mobile_use.agents.planner.types import Subgoal, SubgoalStatus
2
+
3
+
4
+ def get_current_subgoal(subgoals: list[Subgoal]) -> Subgoal | None:
5
+ return next((s for s in subgoals if s.status == SubgoalStatus.PENDING), None)
6
+
7
+
8
+ def get_next_subgoal(subgoals: list[Subgoal]) -> Subgoal | None:
9
+ return next((s for s in subgoals if s.status == SubgoalStatus.NOT_STARTED), None)
10
+
11
+
12
+ def nothing_started(subgoals: list[Subgoal]) -> bool:
13
+ return all(s.status == SubgoalStatus.NOT_STARTED for s in subgoals)
14
+
15
+
16
+ def complete_current_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
17
+ current_subgoal = get_current_subgoal(subgoals)
18
+ if not current_subgoal:
19
+ return subgoals
20
+ current_subgoal.status = SubgoalStatus.SUCCESS
21
+ return subgoals
22
+
23
+
24
+ def fail_current_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
25
+ current_subgoal = get_current_subgoal(subgoals)
26
+ if not current_subgoal:
27
+ return subgoals
28
+ current_subgoal.status = SubgoalStatus.FAILURE
29
+ return subgoals
30
+
31
+
32
+ def all_completed(subgoals: list[Subgoal]) -> bool:
33
+ return all(s.status == SubgoalStatus.SUCCESS for s in subgoals)
34
+
35
+
36
+ def one_of_them_is_failure(subgoals: list[Subgoal]) -> bool:
37
+ return any(s.status == SubgoalStatus.FAILURE for s in subgoals)
38
+
39
+
40
+ def start_next_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
41
+ next_subgoal = get_next_subgoal(subgoals)
42
+ if not next_subgoal:
43
+ return subgoals
44
+ next_subgoal.status = SubgoalStatus.PENDING
45
+ return subgoals
@@ -0,0 +1,34 @@
1
+ from langchain_core.messages import (
2
+ HumanMessage,
3
+ RemoveMessage,
4
+ ToolMessage,
5
+ )
6
+ from minitap.mobile_use.constants import MAX_MESSAGES_IN_HISTORY
7
+ from minitap.mobile_use.context import MobileUseContext
8
+ from minitap.mobile_use.graph.state import State
9
+
10
+
11
+ class SummarizerNode:
12
+ def __init__(self, ctx: MobileUseContext):
13
+ self.ctx = ctx
14
+
15
+ def __call__(self, state: State):
16
+ if len(state.messages) <= MAX_MESSAGES_IN_HISTORY:
17
+ return {}
18
+
19
+ nb_removal_candidates = len(state.messages) - MAX_MESSAGES_IN_HISTORY
20
+
21
+ remove_messages = []
22
+ start_removal = False
23
+
24
+ for msg in reversed(state.messages[:nb_removal_candidates]):
25
+ if isinstance(msg, (ToolMessage, HumanMessage)):
26
+ start_removal = True
27
+ if start_removal and msg.id:
28
+ remove_messages.append(RemoveMessage(id=msg.id))
29
+ return state.sanitize_update(
30
+ ctx=self.ctx,
31
+ update={
32
+ "messages": remove_messages,
33
+ },
34
+ )
@@ -0,0 +1,23 @@
1
+ from urllib.parse import urljoin
2
+
3
+ from minitap.mobile_use.utils.requests_utils import get_session_with_curl_logging
4
+
5
+
6
+ class DeviceHardwareClient:
7
+ def __init__(self, base_url: str):
8
+ self.base_url = base_url
9
+ self.session = get_session_with_curl_logging()
10
+
11
+ def get(self, path: str, **kwargs):
12
+ url = urljoin(self.base_url, f"/api/{path.lstrip('/')}")
13
+ return self.session.get(url, **kwargs)
14
+
15
+ def post(self, path: str, **kwargs):
16
+ url = urljoin(self.base_url, f"/api/{path.lstrip('/')}")
17
+ return self.session.post(url, **kwargs)
18
+
19
+
20
+ def get_client(base_url: str | None = None):
21
+ if not base_url:
22
+ base_url = "http://localhost:9999"
23
+ return DeviceHardwareClient(base_url)
@@ -0,0 +1,44 @@
1
+ import json
2
+ import platform
3
+
4
+ from minitap.mobile_use.utils.shell_utils import run_shell_command_on_host
5
+
6
+
7
+ def get_ios_devices() -> tuple[bool, list[str], str]:
8
+ """
9
+ Get UDIDs of iOS simulator devices only.
10
+
11
+ Returns:
12
+ A tuple containing:
13
+ - bool: True if xcrun is available, False otherwise.
14
+ - list[str]: A list of iOS device UDIDs.
15
+ - str: An error message if any.
16
+ """
17
+ if platform.system() != "Linux":
18
+ return False, [], "xcrun is only available on macOS."
19
+
20
+ try:
21
+ cmd = ["xcrun", "simctl", "list", "devices", "--json"]
22
+ output = run_shell_command_on_host(" ".join(cmd))
23
+ data = json.loads(output)
24
+
25
+ serials = []
26
+ devices_dict = data.get("devices", {})
27
+
28
+ for runtime, devices in devices_dict.items():
29
+ if "ios" in runtime.lower(): # e.g. "com.apple.CoreSimulator.SimRuntime.iOS-17-0"
30
+ for dev in devices:
31
+ if "udid" in dev:
32
+ serials.append(dev["udid"])
33
+
34
+ return True, serials, ""
35
+
36
+ except FileNotFoundError:
37
+ error_message = (
38
+ "'xcrun' command not found. Please ensure Xcode Command Line Tools are installed."
39
+ )
40
+ return False, [], error_message
41
+ except json.JSONDecodeError as e:
42
+ return True, [], f"Failed to parse xcrun output as JSON: {e}"
43
+ except Exception as e:
44
+ return True, [], f"Failed to get iOS devices: {e}"
@@ -0,0 +1,53 @@
1
+ import os
2
+ import time
3
+ from urllib.parse import urljoin
4
+
5
+ import requests
6
+ from minitap.mobile_use.utils.logger import get_logger
7
+ from minitap.mobile_use.utils.requests_utils import get_session_with_curl_logging
8
+
9
+ logger = get_logger(__name__)
10
+
11
+
12
+ class ScreenApiClient:
13
+ def __init__(self, base_url: str, retry_count: int = 5, retry_wait_seconds: int = 1):
14
+ self.base_url = base_url
15
+ self.session = get_session_with_curl_logging()
16
+ self.retry_count = retry_count
17
+ self.retry_wait_seconds = retry_wait_seconds
18
+
19
+ def get_with_retry(self, path: str, **kwargs):
20
+ """
21
+ Make a GET request to the Screen API with retry logic based on the client configuration.
22
+ """
23
+ for attempt in range(self.retry_count):
24
+ try:
25
+ response = self.session.get(urljoin(self.base_url, path), **kwargs)
26
+ if 200 <= response.status_code and response.status_code < 300:
27
+ return response
28
+
29
+ logger.warning(
30
+ f"Received {response.status_code}, attempt {attempt + 1} of {self.retry_count}."
31
+ f" Retrying in {self.retry_wait_seconds} seconds..."
32
+ )
33
+ time.sleep(self.retry_wait_seconds)
34
+
35
+ except requests.exceptions.RequestException as e:
36
+ if attempt == self.retry_count - 1:
37
+ raise e
38
+ time.sleep(self.retry_wait_seconds)
39
+
40
+ raise requests.exceptions.RequestException(
41
+ f"Failed to get a valid response after {self.retry_count} attempts."
42
+ )
43
+
44
+ def post(self, path: str, **kwargs):
45
+ return self.session.post(urljoin(self.base_url, path), **kwargs)
46
+
47
+
48
+ def get_client(base_url: str | None = None):
49
+ if not base_url:
50
+ base_url = "http://localhost:9998"
51
+ retry_count = int(os.getenv("MOBILE_USE_HEALTH_RETRIES", 5))
52
+ retry_wait_seconds = int(os.getenv("MOBILE_USE_HEALTH_DELAY", 1))
53
+ return ScreenApiClient(base_url, retry_count, retry_wait_seconds)