minitap-mobile-use 2.5.3__py3-none-any.whl → 2.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of minitap-mobile-use might be problematic. Click here for more details.

Files changed (43) hide show
  1. minitap/mobile_use/agents/contextor/contextor.py +0 -8
  2. minitap/mobile_use/agents/cortex/cortex.md +122 -36
  3. minitap/mobile_use/agents/cortex/cortex.py +32 -17
  4. minitap/mobile_use/agents/cortex/types.py +18 -4
  5. minitap/mobile_use/agents/executor/executor.md +3 -3
  6. minitap/mobile_use/agents/executor/executor.py +10 -3
  7. minitap/mobile_use/agents/hopper/hopper.md +30 -2
  8. minitap/mobile_use/agents/hopper/hopper.py +19 -15
  9. minitap/mobile_use/agents/orchestrator/orchestrator.py +14 -5
  10. minitap/mobile_use/agents/outputter/outputter.py +13 -3
  11. minitap/mobile_use/agents/planner/planner.md +20 -9
  12. minitap/mobile_use/agents/planner/planner.py +12 -5
  13. minitap/mobile_use/agents/screen_analyzer/human.md +16 -0
  14. minitap/mobile_use/agents/screen_analyzer/screen_analyzer.py +111 -0
  15. minitap/mobile_use/clients/ios_client.py +7 -3
  16. minitap/mobile_use/config.py +87 -24
  17. minitap/mobile_use/controllers/mobile_command_controller.py +354 -88
  18. minitap/mobile_use/controllers/platform_specific_commands_controller.py +41 -27
  19. minitap/mobile_use/controllers/types.py +95 -0
  20. minitap/mobile_use/graph/graph.py +55 -11
  21. minitap/mobile_use/graph/state.py +10 -3
  22. minitap/mobile_use/main.py +12 -4
  23. minitap/mobile_use/sdk/agent.py +113 -72
  24. minitap/mobile_use/sdk/examples/smart_notification_assistant.py +59 -10
  25. minitap/mobile_use/sdk/services/platform.py +15 -1
  26. minitap/mobile_use/sdk/types/platform.py +1 -0
  27. minitap/mobile_use/sdk/types/task.py +10 -1
  28. minitap/mobile_use/servers/device_hardware_bridge.py +13 -6
  29. minitap/mobile_use/services/llm.py +5 -2
  30. minitap/mobile_use/tools/index.py +7 -9
  31. minitap/mobile_use/tools/mobile/{clear_text.py → focus_and_clear_text.py} +7 -7
  32. minitap/mobile_use/tools/mobile/{input_text.py → focus_and_input_text.py} +8 -8
  33. minitap/mobile_use/tools/mobile/long_press_on.py +130 -15
  34. minitap/mobile_use/tools/mobile/swipe.py +3 -26
  35. minitap/mobile_use/tools/mobile/tap.py +41 -28
  36. minitap/mobile_use/tools/mobile/wait_for_delay.py +84 -0
  37. minitap/mobile_use/utils/cli_helpers.py +10 -6
  38. {minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.7.0.dist-info}/METADATA +1 -1
  39. {minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.7.0.dist-info}/RECORD +41 -39
  40. minitap/mobile_use/tools/mobile/glimpse_screen.py +0 -74
  41. minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +0 -64
  42. {minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.7.0.dist-info}/WHEEL +0 -0
  43. {minitap_mobile_use-2.5.3.dist-info → minitap_mobile_use-2.7.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,95 @@
1
+ from pydantic import BaseModel, ConfigDict, Field
2
+
3
+
4
+ class TapOutput(BaseModel):
5
+ """Output from tap operations."""
6
+
7
+ error: str | None = Field(default=None, description="Error message if tap failed")
8
+
9
+
10
+ class Bounds(BaseModel):
11
+ """Represents the bounds of a UI element."""
12
+
13
+ x1: int
14
+ y1: int
15
+ x2: int
16
+ y2: int
17
+
18
+ def get_center(self) -> "CoordinatesSelectorRequest":
19
+ """Get the center point of the bounds."""
20
+ return CoordinatesSelectorRequest(
21
+ x=(self.x1 + self.x2) // 2,
22
+ y=(self.y1 + self.y2) // 2,
23
+ )
24
+
25
+
26
+ class CoordinatesSelectorRequest(BaseModel):
27
+ model_config = ConfigDict(extra="forbid")
28
+ x: int
29
+ y: int
30
+
31
+ def to_str(self):
32
+ return f"{self.x}, {self.y}"
33
+
34
+
35
+ class PercentagesSelectorRequest(BaseModel):
36
+ model_config = ConfigDict(extra="forbid")
37
+ """
38
+ 0%,0% # top-left corner
39
+ 100%,100% # bottom-right corner
40
+ 50%,50% # center
41
+ """
42
+
43
+ x_percent: int = Field(ge=0, le=100, description="X percentage (0-100)")
44
+ y_percent: int = Field(ge=0, le=100, description="Y percentage (0-100)")
45
+
46
+ def to_str(self):
47
+ return f"{self.x_percent}%, {self.y_percent}%"
48
+
49
+ def to_coords(self, width: int, height: int) -> CoordinatesSelectorRequest:
50
+ """Convert percentages to pixel coordinates."""
51
+ x = min(max(int(width * self.x_percent / 100), 0), max(0, width - 1))
52
+ y = min(max(int(height * self.y_percent / 100), 0), max(0, height - 1))
53
+ return CoordinatesSelectorRequest(x=x, y=y)
54
+
55
+
56
+ class SwipeStartEndCoordinatesRequest(BaseModel):
57
+ model_config = ConfigDict(extra="forbid")
58
+ start: CoordinatesSelectorRequest
59
+ end: CoordinatesSelectorRequest
60
+
61
+ def to_dict(self):
62
+ return {"start": self.start.to_str(), "end": self.end.to_str()}
63
+
64
+
65
+ class SwipeStartEndPercentagesRequest(BaseModel):
66
+ model_config = ConfigDict(extra="forbid")
67
+ start: PercentagesSelectorRequest
68
+ end: PercentagesSelectorRequest
69
+
70
+ def to_dict(self):
71
+ return {"start": self.start.to_str(), "end": self.end.to_str()}
72
+
73
+ def to_coords(self, width: int, height: int) -> SwipeStartEndCoordinatesRequest:
74
+ """Convert percentage-based swipe to coordinate-based swipe."""
75
+ return SwipeStartEndCoordinatesRequest(
76
+ start=self.start.to_coords(width, height),
77
+ end=self.end.to_coords(width, height),
78
+ )
79
+
80
+
81
+ class SwipeRequest(BaseModel):
82
+ model_config = ConfigDict(extra="forbid")
83
+ swipe_mode: SwipeStartEndCoordinatesRequest | SwipeStartEndPercentagesRequest
84
+ duration: int | None = None # in ms, default is 400ms
85
+
86
+ def to_dict(self):
87
+ res = {}
88
+ if isinstance(
89
+ self.swipe_mode,
90
+ SwipeStartEndCoordinatesRequest | SwipeStartEndPercentagesRequest,
91
+ ):
92
+ res |= self.swipe_mode.to_dict()
93
+ if self.duration:
94
+ res |= {"duration": self.duration}
95
+ return res
@@ -1,8 +1,7 @@
1
+ from collections.abc import Sequence
1
2
  from typing import Literal
2
3
 
3
- from langchain_core.messages import (
4
- AIMessage,
5
- )
4
+ from langchain_core.messages import AIMessage
6
5
  from langgraph.constants import END, START
7
6
  from langgraph.graph import StateGraph
8
7
  from langgraph.graph.state import CompiledStateGraph
@@ -18,6 +17,7 @@ from minitap.mobile_use.agents.planner.utils import (
18
17
  get_current_subgoal,
19
18
  one_of_them_is_failure,
20
19
  )
20
+ from minitap.mobile_use.agents.screen_analyzer.screen_analyzer import ScreenAnalyzerNode
21
21
  from minitap.mobile_use.agents.summarizer.summarizer import SummarizerNode
22
22
  from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
23
23
  from minitap.mobile_use.context import MobileUseContext
@@ -28,6 +28,22 @@ from minitap.mobile_use.utils.logger import get_logger
28
28
  logger = get_logger(__name__)
29
29
 
30
30
 
31
+ def convergence_node(state: State):
32
+ """Convergence point for parallel execution paths."""
33
+ return {}
34
+
35
+
36
+ def convergence_gate(
37
+ state: State,
38
+ ) -> Literal["continue", "end"]:
39
+ """Check if all subgoals are completed at convergence point."""
40
+ logger.info("Starting convergence_gate")
41
+ if all_completed(state.subgoal_plan):
42
+ logger.info("All subgoals are completed, ending the goal")
43
+ return "end"
44
+ return "continue"
45
+
46
+
31
47
  def post_orchestrator_gate(
32
48
  state: State,
33
49
  ) -> Literal["continue", "replan", "end"]:
@@ -50,11 +66,22 @@ def post_orchestrator_gate(
50
66
 
51
67
  def post_cortex_gate(
52
68
  state: State,
53
- ) -> Literal["continue", "end_subgoal"]:
69
+ ) -> Sequence[str]:
54
70
  logger.info("Starting post_cortex_gate")
55
- if len(state.complete_subgoals_by_ids) > 0:
56
- return "end_subgoal"
57
- return "continue"
71
+ node_sequence = []
72
+
73
+ if len(state.complete_subgoals_by_ids) > 0 or not state.structured_decisions:
74
+ # If subgoals need to be marked as complete, add the path to the orchestrator.
75
+ # The 'or not state.structured_decisions' ensures we don't get stuck if Cortex does nothing.
76
+ node_sequence.append("review_subgoals")
77
+
78
+ if state.structured_decisions:
79
+ node_sequence.append("execute_decisions")
80
+
81
+ if state.screen_analysis_prompt:
82
+ node_sequence.append("analyze_screen")
83
+
84
+ return node_sequence
58
85
 
59
86
 
60
87
  def post_executor_gate(
@@ -96,7 +123,11 @@ async def get_graph(ctx: MobileUseContext) -> CompiledStateGraph:
96
123
 
97
124
  graph_builder.add_node("summarizer", SummarizerNode(ctx))
98
125
 
99
- # Linking nodes
126
+ graph_builder.add_node("screen_analyzer", ScreenAnalyzerNode(ctx))
127
+
128
+ graph_builder.add_node(node="convergence", action=convergence_node, defer=True)
129
+
130
+ ## Linking nodes
100
131
  graph_builder.add_edge(START, "planner")
101
132
  graph_builder.add_edge("planner", "orchestrator")
102
133
  graph_builder.add_conditional_edges(
@@ -113,8 +144,9 @@ async def get_graph(ctx: MobileUseContext) -> CompiledStateGraph:
113
144
  "cortex",
114
145
  post_cortex_gate,
115
146
  {
116
- "continue": "executor",
117
- "end_subgoal": "orchestrator",
147
+ "review_subgoals": "orchestrator",
148
+ "analyze_screen": "screen_analyzer",
149
+ "execute_decisions": "executor",
118
150
  },
119
151
  )
120
152
  graph_builder.add_conditional_edges(
@@ -123,6 +155,18 @@ async def get_graph(ctx: MobileUseContext) -> CompiledStateGraph:
123
155
  {"invoke_tools": "executor_tools", "skip": "summarizer"},
124
156
  )
125
157
  graph_builder.add_edge("executor_tools", "summarizer")
126
- graph_builder.add_edge("summarizer", "contextor")
158
+
159
+ graph_builder.add_edge("orchestrator", "convergence")
160
+ graph_builder.add_edge("screen_analyzer", "convergence")
161
+ graph_builder.add_edge("summarizer", "convergence")
162
+
163
+ graph_builder.add_conditional_edges(
164
+ source="convergence",
165
+ path=convergence_gate,
166
+ path_map={
167
+ "continue": "contextor",
168
+ "end": END,
169
+ },
170
+ )
127
171
 
128
172
  return graph_builder.compile()
@@ -1,13 +1,14 @@
1
+ from typing import Annotated
2
+
1
3
  from langchain_core.messages import AIMessage, AnyMessage
2
4
  from langgraph.graph import add_messages
3
5
  from langgraph.prebuilt.chat_agent_executor import AgentStatePydantic
4
- from typing import Annotated
5
6
 
6
7
  from minitap.mobile_use.agents.planner.types import Subgoal
7
8
  from minitap.mobile_use.config import AgentNode
9
+ from minitap.mobile_use.context import MobileUseContext
8
10
  from minitap.mobile_use.utils.logger import get_logger
9
11
  from minitap.mobile_use.utils.recorder import record_interaction
10
- from minitap.mobile_use.context import MobileUseContext
11
12
 
12
13
  logger = get_logger(__name__)
13
14
 
@@ -24,7 +25,6 @@ class State(AgentStatePydantic):
24
25
  subgoal_plan: Annotated[list[Subgoal], "The current plan, made of subgoals"]
25
26
 
26
27
  # contextor related keys
27
- latest_screenshot_base64: Annotated[str | None, "Latest screenshot of the device", take_last]
28
28
  latest_ui_hierarchy: Annotated[
29
29
  list[dict] | None, "Latest UI hierarchy of the device", take_last
30
30
  ]
@@ -43,6 +43,13 @@ class State(AgentStatePydantic):
43
43
  take_last,
44
44
  ]
45
45
 
46
+ # screen_analyzer related keys
47
+ screen_analysis_prompt: Annotated[
48
+ str | None,
49
+ "Prompt for the screen_analyzer agent to analyze the screen",
50
+ take_last,
51
+ ]
52
+
46
53
  # executor related keys
47
54
  executor_messages: Annotated[list[AnyMessage], "Sequential Executor messages", add_messages]
48
55
  cortex_last_thought: Annotated[str | None, "Last thought of the cortex for the executor"]
@@ -6,6 +6,7 @@ from adbutils import AdbClient
6
6
  from langchain.callbacks.base import Callbacks
7
7
  from rich.console import Console
8
8
  from typing import Annotated
9
+ from shutil import which
9
10
 
10
11
  from minitap.mobile_use.config import (
11
12
  initialize_llm_config,
@@ -102,10 +103,17 @@ def main(
102
103
  Run the Mobile-use agent to automate tasks on a mobile device.
103
104
  """
104
105
  console = Console()
105
- adb_client = AdbClient(
106
- host=settings.ADB_HOST or "localhost",
107
- port=settings.ADB_PORT or 5037,
108
- )
106
+
107
+ adb_client = None
108
+ try:
109
+ if which("adb"):
110
+ adb_client = AdbClient(
111
+ host=settings.ADB_HOST or "localhost",
112
+ port=settings.ADB_PORT or 5037,
113
+ )
114
+ except Exception:
115
+ pass # ADB not available, will only support iOS devices
116
+
109
117
  display_device_status(console, adb_client=adb_client)
110
118
  asyncio.run(
111
119
  run_automation(
@@ -53,6 +53,7 @@ from minitap.mobile_use.sdk.types.task import (
53
53
  AgentProfile,
54
54
  PlatformTaskInfo,
55
55
  PlatformTaskRequest,
56
+ CloudDevicePlatformTaskRequest,
56
57
  Task,
57
58
  TaskRequest,
58
59
  )
@@ -89,6 +90,8 @@ class Agent:
89
90
  _screen_api_client: ScreenApiClient
90
91
  _hw_bridge_client: DeviceHardwareClient
91
92
  _adb_client: AdbClient | None
93
+ _current_task: asyncio.Task | None = None
94
+ _task_lock: asyncio.Lock
92
95
 
93
96
  def __init__(self, *, config: AgentConfig | None = None):
94
97
  self._config = config or get_default_agent_config()
@@ -101,6 +104,7 @@ class Agent:
101
104
  self._is_default_screen_api = (
102
105
  self._config.servers.screen_api_base_url == DEFAULT_SCREEN_API_BASE_URL
103
106
  )
107
+ self._task_lock = asyncio.Lock()
104
108
  # Initialize platform service if API key is available in environment
105
109
  # Note: Can also be initialized later with API key from request
106
110
  if settings.MINITAP_API_KEY:
@@ -125,7 +129,7 @@ class Agent:
125
129
 
126
130
  # Get first available device ID
127
131
  if not self._config.device_id or not self._config.device_platform:
128
- device_id, platform = get_first_device()
132
+ device_id, platform = get_first_device(logger=logger)
129
133
  else:
130
134
  device_id, platform = self._config.device_id, self._config.device_platform
131
135
 
@@ -241,6 +245,9 @@ class Agent:
241
245
  else:
242
246
  raise PlatformServiceUninitializedError()
243
247
  task_info = await platform_service.create_task_run(request=request)
248
+ if isinstance(request, CloudDevicePlatformTaskRequest):
249
+ request.task_run_id = task_info.task_run.id
250
+ request.task_run_id_available_event.set()
244
251
  self._config.agent_profiles[task_info.llm_profile.name] = task_info.llm_profile
245
252
  request = task_info.task_request
246
253
  return await self._run_task(
@@ -336,78 +343,112 @@ class Agent:
336
343
  state = self._get_graph_state(task=task)
337
344
  graph_input = state.model_dump()
338
345
 
339
- last_state: State | None = None
340
- last_state_snapshot: dict | None = None
341
- output = None
342
- try:
343
- logger.info(f"[{task_name}] Invoking graph with input: {graph_input}")
344
- await task.set_status(status="running", message="Invoking graph...")
345
- async for chunk in (await get_graph(context)).astream(
346
- input=graph_input,
347
- config={
348
- "recursion_limit": task.request.max_steps,
349
- "callbacks": self._config.graph_config_callbacks,
350
- },
351
- stream_mode=["messages", "custom", "updates", "values"],
352
- ):
353
- stream_mode, payload = chunk
354
- if stream_mode == "values":
355
- last_state_snapshot = payload # type: ignore
356
- last_state = State(**last_state_snapshot) # type: ignore
357
- if task.request.thoughts_output_path:
358
- record_events(
359
- output_path=task.request.thoughts_output_path,
360
- events=last_state.agents_thoughts,
361
- )
362
-
363
- if stream_mode == "updates":
364
- for _, value in payload.items(): # type: ignore node name, node output
365
- if value and "agents_thoughts" in value:
366
- new_thoughts = value["agents_thoughts"]
367
- last_item = new_thoughts[-1] if new_thoughts else None
368
- if last_item:
369
- log_agent_thought(
370
- agent_thought=last_item,
371
- )
372
-
373
- if not last_state:
374
- err = f"[{task_name}] No result received from graph"
346
+ async def _execute_task_logic():
347
+ last_state: State | None = None
348
+ last_state_snapshot: dict | None = None
349
+ output = None
350
+ try:
351
+ logger.info(f"[{task_name}] Invoking graph with input: {graph_input}")
352
+ await task.set_status(status="running", message="Invoking graph...")
353
+ async for chunk in (await get_graph(context)).astream(
354
+ input=graph_input,
355
+ config={
356
+ "recursion_limit": task.request.max_steps,
357
+ "callbacks": self._config.graph_config_callbacks,
358
+ },
359
+ stream_mode=["messages", "custom", "updates", "values"],
360
+ ):
361
+ stream_mode, payload = chunk
362
+ if stream_mode == "values":
363
+ last_state_snapshot = payload # type: ignore
364
+ last_state = State(**last_state_snapshot) # type: ignore
365
+ if task.request.thoughts_output_path:
366
+ record_events(
367
+ output_path=task.request.thoughts_output_path,
368
+ events=last_state.agents_thoughts,
369
+ )
370
+
371
+ if stream_mode == "updates":
372
+ for _, value in payload.items(): # type: ignore node name, node output
373
+ if value and "agents_thoughts" in value:
374
+ new_thoughts = value["agents_thoughts"]
375
+ last_item = new_thoughts[-1] if new_thoughts else None
376
+ if last_item:
377
+ log_agent_thought(
378
+ agent_thought=last_item,
379
+ )
380
+
381
+ if not last_state:
382
+ err = f"[{task_name}] No result received from graph"
383
+ logger.warning(err)
384
+ await task.finalize(content=output, state=last_state_snapshot, error=err)
385
+ return None
386
+
387
+ print_ai_response_to_stderr(graph_result=last_state)
388
+ output = await self._extract_output(
389
+ task_name=task_name,
390
+ ctx=context,
391
+ request=request,
392
+ output_config=output_config,
393
+ state=last_state,
394
+ )
395
+ logger.info(f"✅ Automation '{task_name}' is success ✅")
396
+ await task.finalize(content=output, state=last_state_snapshot)
397
+ return output
398
+ except asyncio.CancelledError:
399
+ err = f"[{task_name}] Task cancelled"
375
400
  logger.warning(err)
376
- await task.finalize(content=output, state=last_state_snapshot, error=err)
377
- return None
401
+ await task.finalize(
402
+ content=output,
403
+ state=last_state_snapshot,
404
+ error=err,
405
+ cancelled=True,
406
+ )
407
+ raise
408
+ except Exception as e:
409
+ err = f"[{task_name}] Error running automation: {e}"
410
+ logger.error(err)
411
+ await task.finalize(
412
+ content=output,
413
+ state=last_state_snapshot,
414
+ error=err,
415
+ )
416
+ raise
417
+ finally:
418
+ self._finalize_tracing(task=task, context=context)
378
419
 
379
- print_ai_response_to_stderr(graph_result=last_state)
380
- output = await self._extract_output(
381
- task_name=task_name,
382
- ctx=context,
383
- request=request,
384
- output_config=output_config,
385
- state=last_state,
386
- )
387
- logger.info(f"✅ Automation '{task_name}' is success ✅")
388
- await task.finalize(content=output, state=last_state_snapshot)
389
- except asyncio.CancelledError:
390
- err = f"[{task_name}] Task cancelled"
391
- logger.warning(err)
392
- await task.finalize(
393
- content=output,
394
- state=last_state_snapshot,
395
- error=err,
396
- cancelled=True,
397
- )
398
- raise
399
- except Exception as e:
400
- err = f"[{task_name}] Error running automation: {e}"
401
- logger.error(err)
402
- await task.finalize(
403
- content=output,
404
- state=last_state_snapshot,
405
- error=err,
406
- )
407
- raise
408
- finally:
409
- self._finalize_tracing(task=task, context=context)
410
- return output
420
+ async with self._task_lock:
421
+ if self._current_task and not self._current_task.done():
422
+ logger.warning(
423
+ "Another automation task is already running. "
424
+ "Stopping it before starting the new one."
425
+ )
426
+ self.stop_current_task()
427
+ try:
428
+ await self._current_task
429
+ except asyncio.CancelledError:
430
+ pass
431
+
432
+ try:
433
+ self._current_task = asyncio.create_task(_execute_task_logic())
434
+ return await self._current_task
435
+ finally:
436
+ self._current_task = None
437
+
438
+ def stop_current_task(self):
439
+ """Requests cancellation of the currently running automation task."""
440
+ if self._current_task and not self._current_task.done():
441
+ logger.info("Requesting to stop the current automation task...")
442
+ was_cancelled = self._current_task.cancel()
443
+ if was_cancelled:
444
+ logger.success("Cancellation request for the current task was sent.")
445
+ else:
446
+ logger.warning(
447
+ "Could not send cancellation request for the current task "
448
+ "(it may already be completing)."
449
+ )
450
+ else:
451
+ logger.info("No active automation task to stop.")
411
452
 
412
453
  def is_healthy(self):
413
454
  """
@@ -522,11 +563,11 @@ class Agent:
522
563
  initial_goal=task.request.goal,
523
564
  subgoal_plan=[],
524
565
  latest_ui_hierarchy=None,
525
- latest_screenshot_base64=None,
526
566
  focused_app_info=None,
527
567
  device_date=None,
528
568
  structured_decisions=None,
529
569
  complete_subgoals_by_ids=[],
570
+ screen_analysis_prompt=None,
530
571
  agents_thoughts=[],
531
572
  remaining_steps=task.request.max_steps,
532
573
  executor_messages=[],
@@ -22,6 +22,7 @@ from datetime import datetime
22
22
  from enum import Enum
23
23
 
24
24
  from pydantic import BaseModel, Field
25
+
25
26
  from minitap.mobile_use.config import LLM, LLMConfig, LLMConfigUtils, LLMWithFallback
26
27
  from minitap.mobile_use.sdk import Agent
27
28
  from minitap.mobile_use.sdk.builders import Builders
@@ -62,17 +63,42 @@ def get_agent() -> Agent:
62
63
  analyzer_profile = AgentProfile(
63
64
  name="analyzer",
64
65
  llm_config=LLMConfig(
65
- planner=LLM(provider="openrouter", model="meta-llama/llama-4-scout"),
66
- orchestrator=LLM(provider="openrouter", model="meta-llama/llama-4-scout"),
66
+ planner=LLMWithFallback(
67
+ provider="openrouter",
68
+ model="meta-llama/llama-4-scout",
69
+ fallback=LLM(provider="openrouter", model="meta-llama/llama-4-maverick"),
70
+ ),
71
+ orchestrator=LLMWithFallback(
72
+ provider="openrouter",
73
+ model="meta-llama/llama-4-scout",
74
+ fallback=LLM(provider="openrouter", model="meta-llama/llama-4-maverick"),
75
+ ),
67
76
  cortex=LLMWithFallback(
68
77
  provider="openai",
69
78
  model="o4-mini",
70
79
  fallback=LLM(provider="openai", model="gpt-5"),
71
80
  ),
72
- executor=LLM(provider="openai", model="gpt-5-nano"),
81
+ screen_analyzer=LLMWithFallback(
82
+ provider="openai",
83
+ model="gpt-4o",
84
+ fallback=LLM(provider="openai", model="gpt-5-nano"),
85
+ ),
86
+ executor=LLMWithFallback(
87
+ provider="openai",
88
+ model="gpt-5-nano",
89
+ fallback=LLM(provider="openai", model="gpt-5-mini"),
90
+ ),
73
91
  utils=LLMConfigUtils(
74
- outputter=LLM(provider="openai", model="gpt-5-nano"),
75
- hopper=LLM(provider="openai", model="gpt-4.1"),
92
+ outputter=LLMWithFallback(
93
+ provider="openai",
94
+ model="gpt-5-nano",
95
+ fallback=LLM(provider="openai", model="gpt-5-mini"),
96
+ ),
97
+ hopper=LLMWithFallback(
98
+ provider="openai",
99
+ model="gpt-5-nano",
100
+ fallback=LLM(provider="openai", model="gpt-5-mini"),
101
+ ),
76
102
  ),
77
103
  ),
78
104
  # from_file="/tmp/analyzer.jsonc" # can be loaded from file
@@ -82,17 +108,40 @@ def get_agent() -> Agent:
82
108
  action_profile = AgentProfile(
83
109
  name="note_taker",
84
110
  llm_config=LLMConfig(
85
- planner=LLM(provider="openai", model="o3"),
86
- orchestrator=LLM(provider="google", model="gemini-2.5-flash"),
111
+ planner=LLMWithFallback(
112
+ provider="openai", model="o3", fallback=LLM(provider="openai", model="gpt-5")
113
+ ),
114
+ orchestrator=LLMWithFallback(
115
+ provider="google",
116
+ model="gemini-2.5-flash",
117
+ fallback=LLM(provider="openai", model="gpt-5"),
118
+ ),
87
119
  cortex=LLMWithFallback(
88
120
  provider="openai",
89
121
  model="o4-mini",
90
122
  fallback=LLM(provider="openai", model="gpt-5"),
91
123
  ),
92
- executor=LLM(provider="openai", model="gpt-4o-mini"),
124
+ screen_analyzer=LLMWithFallback(
125
+ provider="openai",
126
+ model="gpt-4o",
127
+ fallback=LLM(provider="openai", model="gpt-5-nano"),
128
+ ),
129
+ executor=LLMWithFallback(
130
+ provider="openai",
131
+ model="gpt-4o-mini",
132
+ fallback=LLM(provider="openai", model="gpt-5-nano"),
133
+ ),
93
134
  utils=LLMConfigUtils(
94
- outputter=LLM(provider="openai", model="gpt-5-nano"),
95
- hopper=LLM(provider="openai", model="gpt-4.1"),
135
+ outputter=LLMWithFallback(
136
+ provider="openai",
137
+ model="gpt-5-nano",
138
+ fallback=LLM(provider="openai", model="gpt-5-mini"),
139
+ ),
140
+ hopper=LLMWithFallback(
141
+ provider="openai",
142
+ model="gpt-5-nano",
143
+ fallback=LLM(provider="openai", model="gpt-5-mini"),
144
+ ),
96
145
  ),
97
146
  ),
98
147
  )