minitap-mobile-use 3.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. minitap/mobile_use/__init__.py +0 -0
  2. minitap/mobile_use/agents/contextor/contextor.md +55 -0
  3. minitap/mobile_use/agents/contextor/contextor.py +175 -0
  4. minitap/mobile_use/agents/contextor/types.py +36 -0
  5. minitap/mobile_use/agents/cortex/cortex.md +135 -0
  6. minitap/mobile_use/agents/cortex/cortex.py +152 -0
  7. minitap/mobile_use/agents/cortex/types.py +15 -0
  8. minitap/mobile_use/agents/executor/executor.md +42 -0
  9. minitap/mobile_use/agents/executor/executor.py +87 -0
  10. minitap/mobile_use/agents/executor/tool_node.py +152 -0
  11. minitap/mobile_use/agents/hopper/hopper.md +15 -0
  12. minitap/mobile_use/agents/hopper/hopper.py +44 -0
  13. minitap/mobile_use/agents/orchestrator/human.md +12 -0
  14. minitap/mobile_use/agents/orchestrator/orchestrator.md +21 -0
  15. minitap/mobile_use/agents/orchestrator/orchestrator.py +134 -0
  16. minitap/mobile_use/agents/orchestrator/types.py +11 -0
  17. minitap/mobile_use/agents/outputter/human.md +25 -0
  18. minitap/mobile_use/agents/outputter/outputter.py +85 -0
  19. minitap/mobile_use/agents/outputter/test_outputter.py +167 -0
  20. minitap/mobile_use/agents/planner/human.md +14 -0
  21. minitap/mobile_use/agents/planner/planner.md +126 -0
  22. minitap/mobile_use/agents/planner/planner.py +101 -0
  23. minitap/mobile_use/agents/planner/types.py +51 -0
  24. minitap/mobile_use/agents/planner/utils.py +70 -0
  25. minitap/mobile_use/agents/summarizer/summarizer.py +35 -0
  26. minitap/mobile_use/agents/video_analyzer/__init__.py +5 -0
  27. minitap/mobile_use/agents/video_analyzer/human.md +5 -0
  28. minitap/mobile_use/agents/video_analyzer/video_analyzer.md +37 -0
  29. minitap/mobile_use/agents/video_analyzer/video_analyzer.py +111 -0
  30. minitap/mobile_use/clients/browserstack_client.py +477 -0
  31. minitap/mobile_use/clients/idb_client.py +429 -0
  32. minitap/mobile_use/clients/ios_client.py +332 -0
  33. minitap/mobile_use/clients/ios_client_config.py +141 -0
  34. minitap/mobile_use/clients/ui_automator_client.py +330 -0
  35. minitap/mobile_use/clients/wda_client.py +526 -0
  36. minitap/mobile_use/clients/wda_lifecycle.py +367 -0
  37. minitap/mobile_use/config.py +413 -0
  38. minitap/mobile_use/constants.py +3 -0
  39. minitap/mobile_use/context.py +106 -0
  40. minitap/mobile_use/controllers/__init__.py +0 -0
  41. minitap/mobile_use/controllers/android_controller.py +524 -0
  42. minitap/mobile_use/controllers/controller_factory.py +46 -0
  43. minitap/mobile_use/controllers/device_controller.py +182 -0
  44. minitap/mobile_use/controllers/ios_controller.py +436 -0
  45. minitap/mobile_use/controllers/platform_specific_commands_controller.py +199 -0
  46. minitap/mobile_use/controllers/types.py +106 -0
  47. minitap/mobile_use/controllers/unified_controller.py +193 -0
  48. minitap/mobile_use/graph/graph.py +160 -0
  49. minitap/mobile_use/graph/state.py +115 -0
  50. minitap/mobile_use/main.py +309 -0
  51. minitap/mobile_use/sdk/__init__.py +12 -0
  52. minitap/mobile_use/sdk/agent.py +1294 -0
  53. minitap/mobile_use/sdk/builders/__init__.py +10 -0
  54. minitap/mobile_use/sdk/builders/agent_config_builder.py +307 -0
  55. minitap/mobile_use/sdk/builders/index.py +15 -0
  56. minitap/mobile_use/sdk/builders/task_request_builder.py +236 -0
  57. minitap/mobile_use/sdk/constants.py +1 -0
  58. minitap/mobile_use/sdk/examples/README.md +83 -0
  59. minitap/mobile_use/sdk/examples/__init__.py +1 -0
  60. minitap/mobile_use/sdk/examples/app_lock_messaging.py +54 -0
  61. minitap/mobile_use/sdk/examples/platform_manual_task_example.py +67 -0
  62. minitap/mobile_use/sdk/examples/platform_minimal_example.py +48 -0
  63. minitap/mobile_use/sdk/examples/simple_photo_organizer.py +76 -0
  64. minitap/mobile_use/sdk/examples/smart_notification_assistant.py +225 -0
  65. minitap/mobile_use/sdk/examples/video_transcription_example.py +117 -0
  66. minitap/mobile_use/sdk/services/cloud_mobile.py +656 -0
  67. minitap/mobile_use/sdk/services/platform.py +434 -0
  68. minitap/mobile_use/sdk/types/__init__.py +51 -0
  69. minitap/mobile_use/sdk/types/agent.py +84 -0
  70. minitap/mobile_use/sdk/types/exceptions.py +138 -0
  71. minitap/mobile_use/sdk/types/platform.py +183 -0
  72. minitap/mobile_use/sdk/types/task.py +269 -0
  73. minitap/mobile_use/sdk/utils.py +29 -0
  74. minitap/mobile_use/services/accessibility.py +100 -0
  75. minitap/mobile_use/services/llm.py +247 -0
  76. minitap/mobile_use/services/telemetry.py +421 -0
  77. minitap/mobile_use/tools/index.py +67 -0
  78. minitap/mobile_use/tools/mobile/back.py +52 -0
  79. minitap/mobile_use/tools/mobile/erase_one_char.py +56 -0
  80. minitap/mobile_use/tools/mobile/focus_and_clear_text.py +317 -0
  81. minitap/mobile_use/tools/mobile/focus_and_input_text.py +153 -0
  82. minitap/mobile_use/tools/mobile/launch_app.py +86 -0
  83. minitap/mobile_use/tools/mobile/long_press_on.py +169 -0
  84. minitap/mobile_use/tools/mobile/open_link.py +62 -0
  85. minitap/mobile_use/tools/mobile/press_key.py +83 -0
  86. minitap/mobile_use/tools/mobile/stop_app.py +62 -0
  87. minitap/mobile_use/tools/mobile/swipe.py +156 -0
  88. minitap/mobile_use/tools/mobile/tap.py +154 -0
  89. minitap/mobile_use/tools/mobile/video_recording.py +177 -0
  90. minitap/mobile_use/tools/mobile/wait_for_delay.py +81 -0
  91. minitap/mobile_use/tools/scratchpad.py +147 -0
  92. minitap/mobile_use/tools/test_utils.py +413 -0
  93. minitap/mobile_use/tools/tool_wrapper.py +16 -0
  94. minitap/mobile_use/tools/types.py +35 -0
  95. minitap/mobile_use/tools/utils.py +336 -0
  96. minitap/mobile_use/utils/app_launch_utils.py +173 -0
  97. minitap/mobile_use/utils/cli_helpers.py +37 -0
  98. minitap/mobile_use/utils/cli_selection.py +143 -0
  99. minitap/mobile_use/utils/conversations.py +31 -0
  100. minitap/mobile_use/utils/decorators.py +124 -0
  101. minitap/mobile_use/utils/errors.py +6 -0
  102. minitap/mobile_use/utils/file.py +13 -0
  103. minitap/mobile_use/utils/logger.py +183 -0
  104. minitap/mobile_use/utils/media.py +186 -0
  105. minitap/mobile_use/utils/recorder.py +52 -0
  106. minitap/mobile_use/utils/requests_utils.py +37 -0
  107. minitap/mobile_use/utils/shell_utils.py +20 -0
  108. minitap/mobile_use/utils/test_ui_hierarchy.py +178 -0
  109. minitap/mobile_use/utils/time.py +6 -0
  110. minitap/mobile_use/utils/ui_hierarchy.py +132 -0
  111. minitap/mobile_use/utils/video.py +281 -0
  112. minitap_mobile_use-3.3.0.dist-info/METADATA +329 -0
  113. minitap_mobile_use-3.3.0.dist-info/RECORD +115 -0
  114. minitap_mobile_use-3.3.0.dist-info/WHEEL +4 -0
  115. minitap_mobile_use-3.3.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,87 @@
1
+ from pathlib import Path
2
+
3
+ from jinja2 import Template
4
+ from langchain_core.messages import HumanMessage, SystemMessage
5
+ from langchain_google_genai import ChatGoogleGenerativeAI
6
+ from langchain_google_vertexai.chat_models import ChatVertexAI
7
+
8
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
9
+ from minitap.mobile_use.context import MobileUseContext
10
+ from minitap.mobile_use.graph.state import State
11
+ from minitap.mobile_use.services.llm import get_llm, invoke_llm_with_timeout_message, with_fallback
12
+ from minitap.mobile_use.tools.index import (
13
+ EXECUTOR_WRAPPERS_TOOLS,
14
+ VIDEO_RECORDING_WRAPPERS,
15
+ get_tools_from_wrappers,
16
+ )
17
+ from minitap.mobile_use.utils.decorators import wrap_with_callbacks
18
+ from minitap.mobile_use.utils.logger import get_logger
19
+
20
+ logger = get_logger(__name__)
21
+
22
+
23
+ class ExecutorNode:
24
+ def __init__(self, ctx: MobileUseContext):
25
+ self.ctx = ctx
26
+
27
+ @wrap_with_callbacks(
28
+ before=lambda: logger.info("Starting Executor Agent..."),
29
+ on_success=lambda _: logger.success("Executor Agent"),
30
+ on_failure=lambda _: logger.error("Executor Agent"),
31
+ )
32
+ async def __call__(self, state: State):
33
+ structured_decisions = state.structured_decisions
34
+ if not structured_decisions:
35
+ logger.warning("No structured decisions found.")
36
+ return await state.asanitize_update(
37
+ ctx=self.ctx,
38
+ update={
39
+ "agents_thoughts": [
40
+ "No structured decisions found, I cannot execute anything."
41
+ ],
42
+ },
43
+ agent="executor",
44
+ )
45
+
46
+ system_message = Template(
47
+ Path(__file__).parent.joinpath("executor.md").read_text(encoding="utf-8")
48
+ ).render(platform=self.ctx.device.mobile_platform.value)
49
+ cortex_last_thought = (
50
+ state.cortex_last_thought if state.cortex_last_thought else state.agents_thoughts[-1]
51
+ )
52
+ messages = [
53
+ SystemMessage(content=system_message),
54
+ HumanMessage(content=cortex_last_thought),
55
+ HumanMessage(content=structured_decisions),
56
+ *state.executor_messages,
57
+ ]
58
+
59
+ llm = get_llm(ctx=self.ctx, name="executor")
60
+ llm_fallback = get_llm(ctx=self.ctx, name="executor", use_fallback=True)
61
+
62
+ executor_wrappers = list(EXECUTOR_WRAPPERS_TOOLS)
63
+ if self.ctx.video_recording_enabled:
64
+ executor_wrappers.extend(VIDEO_RECORDING_WRAPPERS)
65
+
66
+ llm_bind_tools_kwargs: dict = {
67
+ "tools": get_tools_from_wrappers(self.ctx, executor_wrappers),
68
+ }
69
+
70
+ # ChatGoogleGenerativeAI does not support the "parallel_tool_calls" keyword
71
+ if not isinstance(llm, ChatGoogleGenerativeAI | ChatVertexAI):
72
+ llm_bind_tools_kwargs["parallel_tool_calls"] = True
73
+
74
+ llm = llm.bind_tools(**llm_bind_tools_kwargs)
75
+ llm_fallback = llm_fallback.bind_tools(**llm_bind_tools_kwargs)
76
+ response = await with_fallback(
77
+ main_call=lambda: invoke_llm_with_timeout_message(llm.ainvoke(messages)),
78
+ fallback_call=lambda: invoke_llm_with_timeout_message(llm_fallback.ainvoke(messages)),
79
+ )
80
+ return await state.asanitize_update(
81
+ ctx=self.ctx,
82
+ update={
83
+ "cortex_last_thought": cortex_last_thought,
84
+ EXECUTOR_MESSAGES_KEY: [response],
85
+ },
86
+ agent="executor",
87
+ )
@@ -0,0 +1,152 @@
1
+ import asyncio
2
+ import copy
3
+ from typing import Any, override
4
+
5
+ from langchain_core.messages import AnyMessage, ToolCall, ToolMessage
6
+ from langchain_core.runnables import RunnableConfig
7
+ from langgraph.prebuilt import ToolNode
8
+ from langgraph.store.base import BaseStore
9
+ from langgraph.types import Command
10
+ from pydantic import BaseModel
11
+
12
+ from minitap.mobile_use.services.telemetry import telemetry
13
+ from minitap.mobile_use.utils.logger import get_logger
14
+
15
+ logger = get_logger(__name__)
16
+
17
+
18
+ class ExecutorToolNode(ToolNode):
19
+ """
20
+ ToolNode that runs tool calls one after the other - not simultaneously.
21
+ If one error occurs, the remaining tool calls are aborted!
22
+ """
23
+
24
+ def __init__(self, tools, messages_key: str, trace_id: str | None = None):
25
+ super().__init__(tools=tools, messages_key=messages_key)
26
+ self._trace_id = trace_id
27
+
28
+ @override
29
+ async def _afunc(
30
+ self,
31
+ input: list[AnyMessage] | dict[str, Any] | BaseModel,
32
+ config: RunnableConfig,
33
+ *,
34
+ store: BaseStore | None,
35
+ ):
36
+ return await self.__func(is_async=True, input=input, config=config, store=store)
37
+
38
+ @override
39
+ def _func(
40
+ self,
41
+ input: list[AnyMessage] | dict[str, Any] | BaseModel,
42
+ config: RunnableConfig,
43
+ *,
44
+ store: BaseStore | None,
45
+ ) -> Any:
46
+ loop = asyncio.get_event_loop()
47
+ return loop.run_until_complete(
48
+ self.__func(is_async=False, input=input, config=config, store=store)
49
+ )
50
+
51
+ async def __func(
52
+ self,
53
+ is_async: bool,
54
+ input: list[AnyMessage] | dict[str, Any] | BaseModel,
55
+ config: RunnableConfig,
56
+ *,
57
+ store: BaseStore | None,
58
+ ) -> Any:
59
+ tool_calls, input_type = self._parse_input(input, store)
60
+ outputs: list[Command | ToolMessage] = []
61
+ failed = False
62
+ for call in tool_calls:
63
+ if failed:
64
+ output = self._get_erroneous_command(
65
+ call=call,
66
+ message="Aborted: a previous tool call failed!",
67
+ )
68
+ else:
69
+ if is_async:
70
+ output = await self._arun_one(call, input_type, config)
71
+ else:
72
+ output = self._run_one(call, input_type, config)
73
+ failed = self._has_tool_call_failed(call, output)
74
+ if failed is None:
75
+ output = self._get_erroneous_command(
76
+ call=call,
77
+ message=f"Unexpected tool output type: {type(output)}",
78
+ )
79
+ failed = True
80
+
81
+ call_without_state = copy.deepcopy(call)
82
+ if "args" in call_without_state and "state" in call_without_state["args"]:
83
+ del call_without_state["args"]["state"]
84
+ if failed:
85
+ error_msg = ""
86
+ try:
87
+ if isinstance(output, ToolMessage):
88
+ error_msg = output.content
89
+ elif isinstance(output, Command):
90
+ tool_msg = self._get_tool_message(output)
91
+ error_msg = tool_msg.content
92
+ except Exception:
93
+ error_msg = "Could not extract error details"
94
+
95
+ logger.info(f"❌ Tool call failed: {call_without_state}")
96
+ logger.info(f" Error: {error_msg}")
97
+
98
+ # Capture executor action telemetry
99
+ if self._trace_id:
100
+ telemetry.capture_executor_action(
101
+ task_id=self._trace_id,
102
+ tool_name=call["name"],
103
+ success=False,
104
+ error=str(error_msg)[:500] if error_msg else None,
105
+ )
106
+ else:
107
+ logger.info("✅ Tool call succeeded: " + str(call_without_state))
108
+
109
+ # Capture executor action telemetry
110
+ if self._trace_id:
111
+ telemetry.capture_executor_action(
112
+ task_id=self._trace_id,
113
+ tool_name=call["name"],
114
+ success=True,
115
+ )
116
+
117
+ outputs.append(output)
118
+ return self._combine_tool_outputs(outputs, input_type) # type: ignore
119
+
120
+ def _has_tool_call_failed(
121
+ self,
122
+ call: ToolCall,
123
+ output: ToolMessage | Command,
124
+ ) -> bool | None:
125
+ if isinstance(output, ToolMessage):
126
+ return output.status == "error"
127
+ if isinstance(output, Command):
128
+ output_msg = self._get_tool_message(output)
129
+ return output_msg.status == "error"
130
+ return None
131
+
132
+ def _get_erroneous_command(self, call: ToolCall, message: str) -> Command:
133
+ tool_message = ToolMessage(
134
+ name=call["name"], tool_call_id=call["id"], content=message, status="error"
135
+ )
136
+ return Command(update={self.messages_key: [tool_message]})
137
+
138
+ def _get_tool_message(self, cmd: Command) -> ToolMessage:
139
+ if isinstance(cmd.update, dict):
140
+ msg = cmd.update.get(self.messages_key)
141
+ if isinstance(msg, list):
142
+ if len(msg) == 0:
143
+ raise ValueError("No messages found in command update")
144
+ if not isinstance(msg[-1], ToolMessage):
145
+ raise ValueError("Last message in command update is not a tool message")
146
+ return msg[-1]
147
+ elif isinstance(msg, ToolMessage):
148
+ return msg
149
+ elif msg is None:
150
+ raise ValueError(f"Missing '{self.messages_key}' in command update")
151
+ raise ValueError(f"Unexpected message type in command update: {type(msg)}")
152
+ raise ValueError("Command update is not a dict")
@@ -0,0 +1,15 @@
1
+ ## Hopper
2
+
3
+ Extract relevant information from batch data. **Keep extracted data exactly as-is** - no reformatting.
4
+
5
+ ## Output
6
+ - **found**: `true` if data was found, `false` otherwise
7
+ - **output**: Extracted information if found, `null` otherwise
8
+ - **reason**: Brief explanation of search logic
9
+
10
+ ## Rules
11
+ 1. **Search entire input** - may contain hundreds of entries
12
+ 2. **For app package lookup**: Match app name (or variations) in package identifier
13
+ - Common patterns: lowercase app name, company+app, brand name, codenames
14
+ 3. **Prefer direct matches** over partial matches
15
+ 4. **Return `null`** if not found or if multiple ambiguous matches exist - don't guess
@@ -0,0 +1,44 @@
1
+ from pathlib import Path
2
+
3
+ from jinja2 import Template
4
+ from langchain_core.messages import HumanMessage, SystemMessage
5
+ from pydantic import BaseModel, Field
6
+
7
+ from minitap.mobile_use.context import MobileUseContext
8
+ from minitap.mobile_use.services.llm import get_llm, invoke_llm_with_timeout_message, with_fallback
9
+
10
+
11
+ class HopperOutput(BaseModel):
12
+ found: bool = Field(description="True if the requested data was found, False otherwise.")
13
+ output: str | None = Field(description="The extracted data if found, null otherwise.")
14
+ reason: str = Field(
15
+ description="A short explanation of what you looked for"
16
+ + " and how you decided what to extract."
17
+ )
18
+
19
+
20
+ async def hopper(
21
+ ctx: MobileUseContext,
22
+ request: str,
23
+ data: str,
24
+ ) -> HopperOutput:
25
+ print("Starting Hopper Agent", flush=True)
26
+ system_message = Template(
27
+ Path(__file__).parent.joinpath("hopper.md").read_text(encoding="utf-8")
28
+ ).render()
29
+ messages = [
30
+ SystemMessage(content=system_message),
31
+ HumanMessage(content=f"{request}\nHere is the data you must dig:\n{data}"),
32
+ ]
33
+
34
+ llm = get_llm(ctx=ctx, name="hopper", is_utils=True, temperature=0).with_structured_output(
35
+ HopperOutput
36
+ )
37
+ llm_fallback = get_llm(
38
+ ctx=ctx, name="hopper", is_utils=True, use_fallback=True, temperature=0
39
+ ).with_structured_output(HopperOutput)
40
+ response: HopperOutput = await with_fallback(
41
+ main_call=lambda: invoke_llm_with_timeout_message(llm.ainvoke(messages)),
42
+ fallback_call=lambda: invoke_llm_with_timeout_message(llm_fallback.ainvoke(messages)),
43
+ ) # type: ignore
44
+ return response
@@ -0,0 +1,12 @@
1
+ Here is the input for your analysis:
2
+
3
+ **Initial goal** : {{ initial_goal }}
4
+
5
+ **Subgoal plan**
6
+ {{ subgoal_plan }}
7
+
8
+ **Subgoals to examine (provided by the Cortex)**
9
+ {{ subgoals_to_examine }}
10
+
11
+ **Agent thoughts**
12
+ {{ agent_thoughts }}
@@ -0,0 +1,21 @@
1
+ ## You are the **Orchestrator**
2
+
3
+ Decide what to do next based on {{ platform }} mobile device execution state.
4
+
5
+ ## Input
6
+ - Current **subgoal plan** with statuses
7
+ - **Subgoals to examine** (PENDING/NOT_STARTED)
8
+ - **Agent thoughts** from execution
9
+ - **Initial goal**
10
+
11
+ ## Your Decisions
12
+
13
+ 1. **Mark subgoals complete**: Add finished subgoal IDs to `completed_subgoal_ids`
14
+ 2. **Set `needs_replanning = TRUE`** if repeated failures make current plan unworkable
15
+ 3. **Fill `reason`**: Final answer if goal complete, or explanation of decisions
16
+
17
+ ## Agent Roles (for context)
18
+ - **Planner**: Creates/updates subgoal plan
19
+ - **Cortex**: Analyzes screen, decides actions (may complete multiple subgoals at once)
20
+ - **Executor**: Executes actions on device
21
+ - **You (Orchestrator)**: Coordinate, track completion, trigger replanning
@@ -0,0 +1,134 @@
1
+ from pathlib import Path
2
+
3
+ from jinja2 import Template
4
+ from langchain_core.messages import HumanMessage, SystemMessage
5
+
6
+ from minitap.mobile_use.agents.orchestrator.types import OrchestratorOutput
7
+ from minitap.mobile_use.agents.planner.utils import (
8
+ all_completed,
9
+ complete_subgoals_by_ids,
10
+ fail_current_subgoal,
11
+ get_current_subgoal,
12
+ get_subgoals_by_ids,
13
+ nothing_started,
14
+ start_next_subgoal,
15
+ )
16
+ from minitap.mobile_use.context import MobileUseContext
17
+ from minitap.mobile_use.graph.state import State
18
+ from minitap.mobile_use.services.llm import get_llm, invoke_llm_with_timeout_message, with_fallback
19
+ from minitap.mobile_use.utils.decorators import wrap_with_callbacks
20
+ from minitap.mobile_use.utils.logger import get_logger
21
+
22
+ logger = get_logger(__name__)
23
+
24
+
25
+ class OrchestratorNode:
26
+ def __init__(self, ctx: MobileUseContext):
27
+ self.ctx = ctx
28
+
29
+ @wrap_with_callbacks(
30
+ before=lambda: logger.info("Starting Orchestrator Agent..."),
31
+ on_success=lambda _: logger.success("Orchestrator Agent"),
32
+ on_failure=lambda _: logger.error("Orchestrator Agent"),
33
+ )
34
+ async def __call__(self, state: State):
35
+ no_subgoal_started = nothing_started(state.subgoal_plan)
36
+ current_subgoal = get_current_subgoal(state.subgoal_plan)
37
+
38
+ if no_subgoal_started or not current_subgoal:
39
+ state.subgoal_plan = start_next_subgoal(state.subgoal_plan)
40
+ new_subgoal = get_current_subgoal(state.subgoal_plan)
41
+ thoughts = [
42
+ (
43
+ f"Starting the first subgoal: {new_subgoal}"
44
+ if no_subgoal_started
45
+ else f"Starting the next subgoal: {new_subgoal}"
46
+ )
47
+ ]
48
+ return await _get_state_update(
49
+ ctx=self.ctx, state=state, thoughts=thoughts, update_plan=True
50
+ )
51
+
52
+ subgoals_to_examine = get_subgoals_by_ids(
53
+ subgoals=state.subgoal_plan,
54
+ ids=state.complete_subgoals_by_ids,
55
+ )
56
+ if len(subgoals_to_examine) <= 0:
57
+ return await _get_state_update(
58
+ ctx=self.ctx, state=state, thoughts=["No subgoal to examine."]
59
+ )
60
+
61
+ system_message = Template(
62
+ Path(__file__).parent.joinpath("orchestrator.md").read_text(encoding="utf-8")
63
+ ).render(platform=self.ctx.device.mobile_platform.value)
64
+ human_message = Template(
65
+ Path(__file__).parent.joinpath("human.md").read_text(encoding="utf-8")
66
+ ).render(
67
+ initial_goal=state.initial_goal,
68
+ subgoal_plan="\n".join(str(s) for s in state.subgoal_plan),
69
+ subgoals_to_examine="\n".join(str(s) for s in subgoals_to_examine),
70
+ agent_thoughts="\n".join(state.agents_thoughts),
71
+ )
72
+ messages = [
73
+ SystemMessage(content=system_message),
74
+ HumanMessage(content=human_message),
75
+ ]
76
+
77
+ llm = get_llm(ctx=self.ctx, name="orchestrator", temperature=1).with_structured_output(
78
+ OrchestratorOutput
79
+ )
80
+ llm_fallback = get_llm(
81
+ ctx=self.ctx, name="orchestrator", use_fallback=True, temperature=1
82
+ ).with_structured_output(OrchestratorOutput)
83
+ response: OrchestratorOutput = await with_fallback(
84
+ main_call=lambda: invoke_llm_with_timeout_message(llm.ainvoke(messages)),
85
+ fallback_call=lambda: invoke_llm_with_timeout_message(llm_fallback.ainvoke(messages)),
86
+ ) # type: ignore
87
+ if response.needs_replaning:
88
+ thoughts = [response.reason]
89
+ state.subgoal_plan = fail_current_subgoal(state.subgoal_plan)
90
+ thoughts.append("==== END OF PLAN, REPLANNING ====")
91
+ return await _get_state_update(
92
+ ctx=self.ctx, state=state, thoughts=thoughts, update_plan=True
93
+ )
94
+
95
+ state.subgoal_plan = complete_subgoals_by_ids(
96
+ subgoals=state.subgoal_plan,
97
+ ids=response.completed_subgoal_ids,
98
+ )
99
+ thoughts = [response.reason]
100
+ if all_completed(state.subgoal_plan):
101
+ logger.success("All the subgoals have been completed successfully.")
102
+ return await _get_state_update(
103
+ ctx=self.ctx, state=state, thoughts=thoughts, update_plan=True
104
+ )
105
+
106
+ if current_subgoal.id not in response.completed_subgoal_ids:
107
+ # The current subgoal is not yet complete.
108
+ return await _get_state_update(
109
+ ctx=self.ctx, state=state, thoughts=thoughts, update_plan=True
110
+ )
111
+
112
+ state.subgoal_plan = start_next_subgoal(state.subgoal_plan)
113
+ new_subgoal = get_current_subgoal(state.subgoal_plan)
114
+ thoughts.append(f"==== NEXT SUBGOAL: {new_subgoal} ====")
115
+ return await _get_state_update(
116
+ ctx=self.ctx, state=state, thoughts=thoughts, update_plan=True
117
+ )
118
+
119
+
120
+ async def _get_state_update(
121
+ ctx: MobileUseContext,
122
+ state: State,
123
+ thoughts: list[str],
124
+ update_plan: bool = False,
125
+ ):
126
+ update = {
127
+ "agents_thoughts": thoughts,
128
+ "complete_subgoals_by_ids": [],
129
+ }
130
+ if update_plan:
131
+ update["subgoal_plan"] = state.subgoal_plan
132
+ if ctx.on_plan_changes:
133
+ await ctx.on_plan_changes(state.subgoal_plan, False)
134
+ return await state.asanitize_update(ctx=ctx, update=update, agent="orchestrator")
@@ -0,0 +1,11 @@
1
+ from typing import Annotated
2
+
3
+ from pydantic import BaseModel
4
+
5
+
6
+ class OrchestratorOutput(BaseModel):
7
+ completed_subgoal_ids: Annotated[
8
+ list[str], "IDs of subgoals that can now be marked as complete"
9
+ ] = []
10
+ needs_replaning: Annotated[bool, "Whether the orchestrator needs to replan the subgoal plan"]
11
+ reason: str
@@ -0,0 +1,25 @@
1
+ You are a helpful assistant tasked with generating the final structured output of a multi-agent reasoning process.
2
+
3
+ ## The original goal was:
4
+ {{ initial_goal }}
5
+
6
+ {% if agents_thoughts %}
7
+ Throughout the reasoning process, the following agent thoughts were collected:
8
+ {% for thought in agents_thoughts %}
9
+ - {{ thought }}
10
+ {% endfor %}
11
+ {% endif %}
12
+
13
+ {% if last_ai_message %}
14
+ The last message generated by the graph execution was:
15
+ "{{ last_ai_message }}"
16
+ {% endif %}
17
+
18
+ {% if not structured_output %}
19
+ Please generate a well-structured JSON object based on the following instructions:
20
+
21
+ > "{{ output_description }}"
22
+
23
+ Only return the JSON object, with fields matching the description as closely as possible.
24
+ Do not include explanations or markdown, just the raw JSON.
25
+ {% endif %}
@@ -0,0 +1,85 @@
1
+ import json
2
+ from pathlib import Path
3
+
4
+ from jinja2 import Template
5
+ from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
6
+ from pydantic import BaseModel
7
+
8
+ from minitap.mobile_use.config import OutputConfig
9
+ from minitap.mobile_use.context import MobileUseContext
10
+ from minitap.mobile_use.graph.state import State
11
+ from minitap.mobile_use.services.llm import get_llm, invoke_llm_with_timeout_message, with_fallback
12
+ from minitap.mobile_use.utils.conversations import is_ai_message
13
+ from minitap.mobile_use.utils.logger import get_logger
14
+
15
+ logger = get_logger(__name__)
16
+
17
+
18
+ async def outputter(
19
+ ctx: MobileUseContext, output_config: OutputConfig, graph_output: State
20
+ ) -> dict:
21
+ logger.info("Starting Outputter Agent")
22
+ last_message = graph_output.messages[-1] if graph_output.messages else None
23
+
24
+ system_message = (
25
+ "You are a helpful assistant tasked with generating "
26
+ + "the final structured output of a multi-agent reasoning process."
27
+ )
28
+ human_message = Template(
29
+ Path(__file__).parent.joinpath("human.md").read_text(encoding="utf-8")
30
+ ).render(
31
+ initial_goal=graph_output.initial_goal,
32
+ agents_thoughts=graph_output.agents_thoughts,
33
+ structured_output=output_config.structured_output,
34
+ output_description=output_config.output_description,
35
+ last_ai_message=last_message.content
36
+ if last_message and is_ai_message(message=last_message)
37
+ else None,
38
+ )
39
+
40
+ messages: list[BaseMessage] = [
41
+ SystemMessage(content=system_message),
42
+ HumanMessage(content=human_message),
43
+ ]
44
+
45
+ if output_config.output_description:
46
+ messages.append(HumanMessage(content=output_config.output_description))
47
+
48
+ llm = get_llm(ctx=ctx, name="outputter", is_utils=True, temperature=1)
49
+ llm_fallback = get_llm(
50
+ ctx=ctx, name="outputter", is_utils=True, use_fallback=True, temperature=1
51
+ )
52
+ structured_llm = llm
53
+ structured_llm_fallback = llm_fallback
54
+
55
+ if output_config.structured_output:
56
+ schema: dict | type[BaseModel] | None = None
57
+ so = output_config.structured_output
58
+
59
+ if isinstance(so, dict):
60
+ schema = so
61
+ elif isinstance(so, BaseModel):
62
+ schema = type(so)
63
+ elif isinstance(so, type) and issubclass(so, BaseModel):
64
+ schema = so
65
+
66
+ if schema is not None:
67
+ structured_llm = llm.with_structured_output(schema)
68
+ structured_llm_fallback = llm_fallback.with_structured_output(schema)
69
+
70
+ response = await with_fallback(
71
+ main_call=lambda: invoke_llm_with_timeout_message(structured_llm.ainvoke(messages)),
72
+ fallback_call=lambda: invoke_llm_with_timeout_message(
73
+ structured_llm_fallback.ainvoke(messages)
74
+ ),
75
+ ) # type: ignore
76
+ if isinstance(response, BaseModel):
77
+ if output_config.output_description and hasattr(response, "content"):
78
+ response = json.loads(response.content) # type: ignore
79
+ return response
80
+ return response.model_dump()
81
+ elif hasattr(response, "content"):
82
+ return json.loads(response.content) # type: ignore
83
+ else:
84
+ logger.info("Found unknown response type: " + str(type(response)))
85
+ return response