minitap-mobile-use 2.3.0__tar.gz → 2.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of minitap-mobile-use might be problematic. Click here for more details.

Files changed (104) hide show
  1. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/PKG-INFO +3 -1
  2. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/contextor/contextor.py +2 -2
  3. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/cortex/cortex.md +49 -8
  4. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/cortex/cortex.py +8 -4
  5. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/executor/executor.md +14 -11
  6. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/executor/executor.py +6 -5
  7. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/hopper/hopper.py +6 -3
  8. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/orchestrator/orchestrator.py +26 -11
  9. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/outputter/outputter.py +6 -3
  10. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/planner/planner.md +20 -22
  11. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/planner/planner.py +10 -7
  12. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/planner/types.py +4 -2
  13. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/planner/utils.py +14 -0
  14. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/summarizer/summarizer.py +2 -2
  15. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/config.py +6 -1
  16. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/context.py +13 -3
  17. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/controllers/mobile_command_controller.py +1 -14
  18. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/graph/state.py +7 -3
  19. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/agent.py +188 -23
  20. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/examples/README.md +19 -1
  21. minitap_mobile_use-2.4.0/minitap/mobile_use/sdk/examples/platform_minimal_example.py +46 -0
  22. minitap_mobile_use-2.4.0/minitap/mobile_use/sdk/services/platform.py +244 -0
  23. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/types/__init__.py +14 -14
  24. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/types/exceptions.py +27 -0
  25. minitap_mobile_use-2.4.0/minitap/mobile_use/sdk/types/platform.py +125 -0
  26. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/types/task.py +60 -17
  27. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/servers/device_hardware_bridge.py +1 -1
  28. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/servers/stop_servers.py +11 -12
  29. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/services/llm.py +89 -5
  30. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/index.py +0 -6
  31. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/mobile/back.py +3 -3
  32. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/mobile/clear_text.py +24 -43
  33. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/mobile/erase_one_char.py +5 -4
  34. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/mobile/glimpse_screen.py +11 -7
  35. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/mobile/input_text.py +21 -51
  36. minitap_mobile_use-2.4.0/minitap/mobile_use/tools/mobile/launch_app.py +87 -0
  37. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/mobile/long_press_on.py +15 -8
  38. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/mobile/open_link.py +15 -8
  39. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/mobile/press_key.py +15 -8
  40. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/mobile/stop_app.py +14 -8
  41. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/mobile/swipe.py +11 -5
  42. minitap_mobile_use-2.4.0/minitap/mobile_use/tools/mobile/tap.py +144 -0
  43. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +3 -3
  44. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/test_utils.py +104 -78
  45. minitap_mobile_use-2.4.0/minitap/mobile_use/tools/types.py +35 -0
  46. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/utils.py +51 -48
  47. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/recorder.py +1 -1
  48. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/ui_hierarchy.py +9 -2
  49. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/pyproject.toml +5 -2
  50. minitap_mobile_use-2.3.0/minitap/mobile_use/tools/mobile/copy_text_from.py +0 -75
  51. minitap_mobile_use-2.3.0/minitap/mobile_use/tools/mobile/find_packages.py +0 -69
  52. minitap_mobile_use-2.3.0/minitap/mobile_use/tools/mobile/launch_app.py +0 -55
  53. minitap_mobile_use-2.3.0/minitap/mobile_use/tools/mobile/paste_text.py +0 -88
  54. minitap_mobile_use-2.3.0/minitap/mobile_use/tools/mobile/tap.py +0 -62
  55. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/LICENSE +0 -0
  56. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/README.md +0 -0
  57. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/__init__.py +0 -0
  58. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/cortex/types.py +0 -0
  59. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/executor/tool_node.py +0 -0
  60. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/executor/utils.py +0 -0
  61. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/hopper/hopper.md +0 -0
  62. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/orchestrator/human.md +0 -0
  63. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/orchestrator/orchestrator.md +0 -0
  64. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/orchestrator/types.py +0 -0
  65. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/outputter/human.md +0 -0
  66. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/outputter/test_outputter.py +0 -0
  67. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/agents/planner/human.md +0 -0
  68. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/clients/device_hardware_client.py +0 -0
  69. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/clients/ios_client.py +0 -0
  70. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/clients/screen_api_client.py +0 -0
  71. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/constants.py +0 -0
  72. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/controllers/__init__.py +0 -0
  73. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/controllers/platform_specific_commands_controller.py +0 -0
  74. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/graph/graph.py +0 -0
  75. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/main.py +0 -0
  76. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/__init__.py +0 -0
  77. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/builders/__init__.py +0 -0
  78. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/builders/agent_config_builder.py +0 -0
  79. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/builders/index.py +0 -0
  80. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/builders/task_request_builder.py +0 -0
  81. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/constants.py +0 -0
  82. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/examples/__init__.py +0 -0
  83. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/examples/simple_photo_organizer.py +0 -0
  84. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/examples/smart_notification_assistant.py +0 -0
  85. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/types/agent.py +0 -0
  86. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/sdk/utils.py +0 -0
  87. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/servers/config.py +0 -0
  88. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/servers/device_screen_api.py +0 -0
  89. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/servers/start_servers.py +0 -0
  90. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/servers/utils.py +0 -0
  91. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/services/accessibility.py +0 -0
  92. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/tools/tool_wrapper.py +0 -0
  93. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/cli_helpers.py +0 -0
  94. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/cli_selection.py +0 -0
  95. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/conversations.py +0 -0
  96. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/decorators.py +0 -0
  97. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/errors.py +0 -0
  98. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/file.py +0 -0
  99. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/logger.py +0 -0
  100. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/media.py +0 -0
  101. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/requests_utils.py +0 -0
  102. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/shell_utils.py +0 -0
  103. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/test_ui_hierarchy.py +0 -0
  104. {minitap_mobile_use-2.3.0 → minitap_mobile_use-2.4.0}/minitap/mobile_use/utils/time.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: minitap-mobile-use
3
- Version: 2.3.0
3
+ Version: 2.4.0
4
4
  Summary: AI-powered multi-agent system that automates real Android and iOS devices through low-level control using LangGraph.
5
5
  Author: Pierre-Louis Favreau, Jean-Pierre Lo, Nicolas Dehandschoewercker
6
6
  License: MIT License
@@ -43,9 +43,11 @@ Requires-Dist: uvicorn[standard]==0.30.1
43
43
  Requires-Dist: colorama>=0.4.6
44
44
  Requires-Dist: psutil>=5.9.0
45
45
  Requires-Dist: langchain-google-vertexai>=2.0.28
46
+ Requires-Dist: httpx>=0.28.1
46
47
  Requires-Dist: ruff==0.5.3 ; extra == 'dev'
47
48
  Requires-Dist: pytest==8.4.1 ; extra == 'dev'
48
49
  Requires-Dist: pytest-cov==5.0.0 ; extra == 'dev'
50
+ Requires-Dist: pyright==1.1.405 ; extra == 'dev'
49
51
  Requires-Python: >=3.12
50
52
  Project-URL: Homepage, https://minitap.ai/
51
53
  Project-URL: Source, https://github.com/minitap-ai/mobile-use
@@ -21,7 +21,7 @@ class ContextorNode:
21
21
  on_success=lambda _: logger.success("Contextor Agent"),
22
22
  on_failure=lambda _: logger.error("Contextor Agent"),
23
23
  )
24
- def __call__(self, state: State):
24
+ async def __call__(self, state: State):
25
25
  device_data = get_screen_data(self.ctx.screen_api_client)
26
26
  focused_app_info = get_focused_app_info(self.ctx)
27
27
  device_date = get_device_date(self.ctx)
@@ -30,7 +30,7 @@ class ContextorNode:
30
30
  list(state.executor_messages)
31
31
  )
32
32
 
33
- return state.sanitize_update(
33
+ return await state.asanitize_update(
34
34
  ctx=self.ctx,
35
35
  update={
36
36
  "latest_screenshot_base64": device_data.base64
@@ -31,6 +31,12 @@ To understand the device state, you have two senses, each with its purpose:
31
31
  * **Golden Rule:** When the UI hierarchy is ambiguous, seems incomplete, or when you need to verify a visual detail before acting, **`glimpse_screen` is always the most effective and reliable action.** Never guess what the screen looks like; use your sight to be sure.
32
32
 
33
33
  **CRITICAL NOTE ON SIGHT:** The visual information from `glimpse_screen` is **ephemeral**. It is available for **THIS decision turn ONLY**. You MUST extract all necessary information from it IMMEDIATELY, as it will be cleared before the next step.
34
+
35
+ ### CRITICAL ACTION DIRECTIVES
36
+
37
+ - **To open an application, you MUST use the `launch_app` tool.** Provide the natural language name of the app (e.g., "Uber Eats"). Do NOT attempt to open apps manually by swiping to the app drawer and searching. The `launch_app` tool is the fastest and most reliable method.
38
+ - **To open URLs/links, you MUST use the `open_link` tool.** This handles all links, including deep links, correctly.
39
+
34
40
  ### Context You Receive:
35
41
 
36
42
  - 📱 **Device state**:
@@ -75,13 +81,32 @@ Focus on the **current PENDING subgoal and the next subgoals not yet started**.
75
81
 
76
82
  **You MUST follow it for every element interaction.**
77
83
 
78
- When you target a UI element (for a `tap`, `input_text`, `clear_text`, etc.), you **MUST** provide a comprehensive target object containing every piece of information you can find about it.
84
+ When you target a UI element (for a `tap`, `input_text`, `clear_text`, etc.), you **MUST** provide a comprehensive `target` object containing every piece of information you can find about **that single element**.
79
85
 
80
86
  * **1. `resource_id`**: Include this if it is present in the UI hierarchy.
81
- * **2. `coordinates`**: Include the full bounds (`x`, `y`, `width`, `height`) if they are available.
82
- * **3. `text`**: Include the *current text* content of the element (e.g., "Sign In", "Search...", "First Name").
87
+ * **2. `resource_id_index`**: If there are multiple elements with the same `resource_id`, provide the zero-based index of the specific one you are targeting.
88
+ * **3. `coordinates`**: Include the full bounds (`x`, `y`, `width`, `height`) if they are available.
89
+ * **4. `text`**: Include the *current text* content of the element (e.g., placeholder text for an input).
90
+ * **5. `text_index`**: If there are multiple elements with the same `text`, provide the zero-based index of the specific one you are targeting.
91
+
92
+ **CRITICAL: The index must correspond to its identifier.** `resource_id_index` is only used when targeting by `resource_id`. `text_index` is only used when targeting by `text`. This ensures the fallback logic targets the correct element.
93
+
94
+ **This is NOT optional.** Providing all locators if we have, it is the foundation of the system's reliability. It allows next steps to use a fallback mechanism: if the ID fails, it tries the coordinates, etc. Failing to provide this complete context will lead to action failures.
95
+
96
+ ### The Rule of Unpredictable Actions
83
97
 
84
- **This is NOT optional.** Providing all three locators if we have, it is the foundation of the system's reliability. It allows next steps to use a fallback mechanism: if the ID fails, it tries the coordinates, etc. Failing to provide this complete context will lead to action failures.
98
+ Certain actions have outcomes that can significantly and sometimes unpredictably change the UI. These include:
99
+ - `back`
100
+ - `launch_app`
101
+ - `stop_app`
102
+ - `open_link`
103
+ - `tap` on an element that is clearly for navigation (e.g., a "Back" button, a menu item, a link to another screen).
104
+
105
+ **CRITICAL RULE: If your decision includes one of these unpredictable actions, it MUST be the only action in your `Structured Decisions` for this turn. Else, use flows to group actions together.**
106
+
107
+ This is not optional. Failing to isolate these actions will cause the system to act on an outdated understanding of the screen, leading to catastrophic errors. For example, after a `back` command, you MUST wait to see the new screen before deciding what to tap next.
108
+
109
+ You may only group simple, predictable actions together, such as tapping a text field and then immediately typing into it (`tap` followed by `input_text`).
85
110
 
86
111
  ### Outputting Your Decisions
87
112
 
@@ -90,8 +115,8 @@ If you decide to act, output a **valid JSON stringified structured set of instru
90
115
  - These must be **concrete low-level actions**.
91
116
  - The executor has the following available tools: {{ executor_tools_list }}.
92
117
  - Your goal is to achieve subgoals **fast** - so you must put as much actions as possible in your instructions to complete all achievable subgoals (based on your observations) in one go.
93
- - To open URLs/links directly, use the `open_link` tool - it will automatically handle opening in the appropriate browser. It also handles deep links.
94
- - When you need to open an app, use the `find_packages` low-level action to try and get its name. Then, simply use the `launch_app` low-level action to launch it.
118
+ - If you refer to a UI element or coordinates, specify it clearly (e.g., `resource-id: com.whatsapp:id/search`, `resource-id-index: 0`, `text: "Alice"`, `resource-id-index: 0`, `x: 100, y: 200, width: 100, height: 100`).
119
+ - **The structure is up to you**, but it must be valid **JSON stringified output**. You will accompany this output with a **natural-language summary** of your reasoning and approach in your agent thought.
95
120
  - **Always use a single `input_text` action** to type in a field. This tool handles focusing the element and placing the cursor correctly. If the tool feedback indicates verification is needed or shows None/empty content, perform verification before proceeding.
96
121
  - **Only reference UI element IDs or visible texts that are explicitly present in the provided UI hierarchy or screenshot. Do not invent, infer, or guess any IDs or texts that are not directly observed**.
97
122
  - **For text clearing**: When you need to completely clear text from an input field, always call the `clear_text` tool with the correct resource_id. This tool automatically focuses the element, and ensures the field is emptied. If you notice this tool fails to clear the text, try to long press the input, select all, and call `erase_one_char`.
@@ -116,7 +141,23 @@ If you decide to act, output a **valid JSON stringified structured set of instru
116
141
 
117
142
  ---
118
143
 
119
- ### Example
144
+ ### Example 1
145
+
146
+ #### Current Subgoal:
147
+
148
+ > "Open WhatsApp"
149
+
150
+ #### Structured Decisions:
151
+
152
+ ```text
153
+ "{\"action\": \"launch_app\", \"app_name\": \"WhatsApp\"}"
154
+ ```
155
+
156
+ #### Agent Thought:
157
+
158
+ > I need to launch the WhatsApp app. I will use the `launch_app` tool to open it.
159
+
160
+ ### Exemple 2
120
161
 
121
162
  #### Current Subgoal:
122
163
 
@@ -125,7 +166,7 @@ If you decide to act, output a **valid JSON stringified structured set of instru
125
166
  #### Structured Decisions:
126
167
 
127
168
  ```text
128
- "{\"action\": \"tap\", \"target\": {\"text_input_resource_id\": \"com.whatsapp:id/menuitem_search\", \"text_input_coordinates\": {\"x\": 880, \"y\": 150, \"width\": 120, \"height\": 120}, \"text_input_text\": \"Search\"}}"
169
+ "[{\"action\": \"tap\", \"target\": {\"resource_id\": \"com.whatsapp:id/menuitem_search\", \"resource_id_index\": 1, \"text\": \"Search\", \"text_index\": 0, \"coordinates\": {\"x\": 880, \"y\": 150, \"width\": 120, \"height\": 120}}}]"
129
170
  ```
130
171
 
131
172
  #### Agent Thought:
@@ -16,7 +16,7 @@ from minitap.mobile_use.agents.planner.utils import get_current_subgoal
16
16
  from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
17
17
  from minitap.mobile_use.context import MobileUseContext
18
18
  from minitap.mobile_use.graph.state import State
19
- from minitap.mobile_use.services.llm import get_llm, with_fallback
19
+ from minitap.mobile_use.services.llm import get_llm, invoke_llm_with_timeout_message, with_fallback
20
20
  from minitap.mobile_use.tools.index import EXECUTOR_WRAPPERS_TOOLS, format_tools_list
21
21
  from minitap.mobile_use.utils.conversations import get_screenshot_message_for_llm
22
22
  from minitap.mobile_use.utils.decorators import wrap_with_callbacks
@@ -78,8 +78,12 @@ class CortexNode:
78
78
  ctx=self.ctx, name="cortex", use_fallback=True, temperature=1
79
79
  ).with_structured_output(CortexOutput)
80
80
  response: CortexOutput = await with_fallback(
81
- main_call=lambda: llm.ainvoke(messages),
82
- fallback_call=lambda: llm_fallback.ainvoke(messages),
81
+ main_call=lambda: invoke_llm_with_timeout_message(
82
+ llm.ainvoke(messages), agent_name="Cortex"
83
+ ),
84
+ fallback_call=lambda: invoke_llm_with_timeout_message(
85
+ llm_fallback.ainvoke(messages), agent_name="Cortex (Fallback)"
86
+ ),
83
87
  ) # type: ignore
84
88
 
85
89
  is_subgoal_completed = (
@@ -90,7 +94,7 @@ class CortexNode:
90
94
  if not is_subgoal_completed:
91
95
  response.complete_subgoals_by_ids = []
92
96
 
93
- return state.sanitize_update(
97
+ return await state.asanitize_update(
94
98
  ctx=self.ctx,
95
99
  update={
96
100
  "agents_thoughts": [response.agent_thought],
@@ -25,12 +25,7 @@ and your previous actions, you must:
25
25
  "I'm tapping on the chat item labeled 'Alice' to open the conversation."
26
26
 
27
27
  ```json
28
- {
29
- "action": "tap",
30
- "target": {
31
- "resource_id": "com.whatsapp:id/conversation_item"
32
- }
33
- }
28
+ "[{\"tool_name\": \"tap\", \"arguments\": {\"target\": {\"resource_id\": \"com.whatsapp:id/conversation_item\", \"resource_id_index\": 0, \"text\": \"Alice\", \"text_index\": 0, \"coordinates\": {\"x\": 0, \"y\": 350, \"width\": 1080, \"height\": 80}}}}]"
34
29
  ```
35
30
 
36
31
  **→ Executor Action**:
@@ -38,13 +33,17 @@ and your previous actions, you must:
38
33
  Call the `tap_on_element` tool with:
39
34
 
40
35
  - `resource_id = "com.whatsapp:id/conversation_item"`
36
+ - `resource_id_index = 0`
37
+ - `text = "Alice"`
38
+ - `text_index = 0`
39
+ - `coordinates = {"x": 0, "y": 350, "width": 1080, "height": 80}`
41
40
  - `agent_thought = "I'm tapping on the chat item labeled 'Alice' to open the conversation."`
42
41
 
43
42
  ---
44
43
 
45
44
  ### ⚙️ Tools
46
45
 
47
- - Tools may include actions like: `tap`, `swipe`, `start_app`, `stop_app`, `find_packages`, `get_current_focus`, etc.
46
+ - Tools may include actions like: `tap`, `swipe`, `launch_app`, `stop_app`, etc.
48
47
  - You **must not hardcode tool definitions** here.
49
48
  - Just use the right tool based on what the `structured_decisions` requires.
50
49
  - The tools are provided dynamically via LangGraph's tool binding mechanism.
@@ -53,10 +52,12 @@ Call the `tap_on_element` tool with:
53
52
 
54
53
  When using the `input_text` tool:
55
54
 
56
- - **Provide all available information** from the following optional parameters to identify the text input element:
57
- - `text_input_resource_id`: The resource ID of the text input element (when available)
58
- - `text_input_coordinates`: The bounds (ElementBounds) of the text input element (when available)
59
- - `text_input_text`: The current text content of the text input element (when available)
55
+ - **Provide all available information** in the target object to identify text input element
56
+ - `resource_id`: The resource ID of the text input element (when available)
57
+ - `resource_id_index`: The zero-based index of the specific resource ID you are targeting (when available)
58
+ - `text`: The current text content of the text input element (when available)
59
+ - `text_index`: The zero-based index of the specific text you are targeting (when available)
60
+ - `coordinates`: The bounds (ElementBounds) of the text input element (when available)
60
61
 
61
62
  - The tool will automatically:
62
63
 
@@ -64,6 +65,8 @@ When using the `input_text` tool:
64
65
  2. **Move the cursor to the end** of the existing text
65
66
  3. **Then type the new text**
66
67
 
68
+ - **Important**: Special characters and markdown-like escape sequences (e.g., \n, \t, *, _) are not interpreted. For example, typing \n will insert the literal characters \ and n, not a line break.
69
+
67
70
  #### 🔄 Text Clearing Best Practice
68
71
 
69
72
  When you need to completely clear text from an input field, always use the clear_text tool with the correct resource_id.
@@ -8,7 +8,7 @@ from langchain_google_vertexai.chat_models import ChatVertexAI
8
8
  from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
9
9
  from minitap.mobile_use.context import MobileUseContext
10
10
  from minitap.mobile_use.graph.state import State
11
- from minitap.mobile_use.services.llm import get_llm
11
+ from minitap.mobile_use.services.llm import get_llm, invoke_llm_with_timeout_message
12
12
  from minitap.mobile_use.tools.index import EXECUTOR_WRAPPERS_TOOLS, get_tools_from_wrappers
13
13
  from minitap.mobile_use.utils.decorators import wrap_with_callbacks
14
14
  from minitap.mobile_use.utils.logger import get_logger
@@ -29,7 +29,7 @@ class ExecutorNode:
29
29
  structured_decisions = state.structured_decisions
30
30
  if not structured_decisions:
31
31
  logger.warning("No structured decisions found.")
32
- return state.sanitize_update(
32
+ return await state.asanitize_update(
33
33
  ctx=self.ctx,
34
34
  update={
35
35
  "agents_thoughts": [
@@ -62,9 +62,10 @@ class ExecutorNode:
62
62
  llm_bind_tools_kwargs["parallel_tool_calls"] = True
63
63
 
64
64
  llm = llm.bind_tools(**llm_bind_tools_kwargs)
65
- response = await llm.ainvoke(messages)
66
-
67
- return state.sanitize_update(
65
+ response = await invoke_llm_with_timeout_message(
66
+ llm.ainvoke(messages), agent_name="Executor"
67
+ )
68
+ return await state.asanitize_update(
68
69
  ctx=self.ctx,
69
70
  update={
70
71
  "cortex_last_thought": cortex_last_thought,
@@ -2,10 +2,11 @@ from pathlib import Path
2
2
 
3
3
  from jinja2 import Template
4
4
  from langchain_core.messages import HumanMessage, SystemMessage
5
- from minitap.mobile_use.context import MobileUseContext
6
- from minitap.mobile_use.services.llm import get_llm
7
5
  from pydantic import BaseModel, Field
8
6
 
7
+ from minitap.mobile_use.context import MobileUseContext
8
+ from minitap.mobile_use.services.llm import get_llm, invoke_llm_with_timeout_message
9
+
9
10
 
10
11
  class HopperOutput(BaseModel):
11
12
  step: str = Field(
@@ -33,7 +34,9 @@ async def hopper(
33
34
 
34
35
  llm = get_llm(ctx=ctx, name="hopper", is_utils=True, temperature=0)
35
36
  structured_llm = llm.with_structured_output(HopperOutput)
36
- response: HopperOutput = await structured_llm.ainvoke(messages) # type: ignore
37
+ response: HopperOutput = await invoke_llm_with_timeout_message(
38
+ structured_llm.ainvoke(messages), agent_name="Hopper"
39
+ ) # type: ignore
37
40
  return HopperOutput(
38
41
  step=response.step,
39
42
  output=response.output,
@@ -15,7 +15,7 @@ from minitap.mobile_use.agents.planner.utils import (
15
15
  )
16
16
  from minitap.mobile_use.context import MobileUseContext
17
17
  from minitap.mobile_use.graph.state import State
18
- from minitap.mobile_use.services.llm import get_llm
18
+ from minitap.mobile_use.services.llm import get_llm, invoke_llm_with_timeout_message
19
19
  from minitap.mobile_use.utils.decorators import wrap_with_callbacks
20
20
  from minitap.mobile_use.utils.logger import get_logger
21
21
 
@@ -45,14 +45,18 @@ class OrchestratorNode:
45
45
  else f"Starting the next subgoal: {new_subgoal}"
46
46
  )
47
47
  ]
48
- return _get_state_update(ctx=self.ctx, state=state, thoughts=thoughts, update_plan=True)
48
+ return await _get_state_update(
49
+ ctx=self.ctx, state=state, thoughts=thoughts, update_plan=True
50
+ )
49
51
 
50
52
  subgoals_to_examine = get_subgoals_by_ids(
51
53
  subgoals=state.subgoal_plan,
52
54
  ids=state.complete_subgoals_by_ids,
53
55
  )
54
56
  if len(subgoals_to_examine) <= 0:
55
- return _get_state_update(ctx=self.ctx, state=state, thoughts=["No subgoal to examine."])
57
+ return await _get_state_update(
58
+ ctx=self.ctx, state=state, thoughts=["No subgoal to examine."]
59
+ )
56
60
 
57
61
  system_message = Template(
58
62
  Path(__file__).parent.joinpath("orchestrator.md").read_text(encoding="utf-8")
@@ -72,13 +76,16 @@ class OrchestratorNode:
72
76
 
73
77
  llm = get_llm(ctx=self.ctx, name="orchestrator", temperature=1)
74
78
  llm = llm.with_structured_output(OrchestratorOutput)
75
- response: OrchestratorOutput = await llm.ainvoke(messages) # type: ignore
76
-
79
+ response: OrchestratorOutput = await invoke_llm_with_timeout_message(
80
+ llm.ainvoke(messages), agent_name="Orchestrator"
81
+ ) # type: ignore
77
82
  if response.needs_replaning:
78
83
  thoughts = [response.reason]
79
84
  state.subgoal_plan = fail_current_subgoal(state.subgoal_plan)
80
85
  thoughts.append("==== END OF PLAN, REPLANNING ====")
81
- return _get_state_update(ctx=self.ctx, state=state, thoughts=thoughts, update_plan=True)
86
+ return await _get_state_update(
87
+ ctx=self.ctx, state=state, thoughts=thoughts, update_plan=True
88
+ )
82
89
 
83
90
  state.subgoal_plan = complete_subgoals_by_ids(
84
91
  subgoals=state.subgoal_plan,
@@ -87,19 +94,25 @@ class OrchestratorNode:
87
94
  thoughts = [response.reason]
88
95
  if all_completed(state.subgoal_plan):
89
96
  logger.success("All the subgoals have been completed successfully.")
90
- return _get_state_update(ctx=self.ctx, state=state, thoughts=thoughts, update_plan=True)
97
+ return await _get_state_update(
98
+ ctx=self.ctx, state=state, thoughts=thoughts, update_plan=True
99
+ )
91
100
 
92
101
  if current_subgoal.id not in response.completed_subgoal_ids:
93
102
  # The current subgoal is not yet complete.
94
- return _get_state_update(ctx=self.ctx, state=state, thoughts=thoughts, update_plan=True)
103
+ return await _get_state_update(
104
+ ctx=self.ctx, state=state, thoughts=thoughts, update_plan=True
105
+ )
95
106
 
96
107
  state.subgoal_plan = start_next_subgoal(state.subgoal_plan)
97
108
  new_subgoal = get_current_subgoal(state.subgoal_plan)
98
109
  thoughts.append(f"==== NEXT SUBGOAL: {new_subgoal} ====")
99
- return _get_state_update(ctx=self.ctx, state=state, thoughts=thoughts, update_plan=True)
110
+ return await _get_state_update(
111
+ ctx=self.ctx, state=state, thoughts=thoughts, update_plan=True
112
+ )
100
113
 
101
114
 
102
- def _get_state_update(
115
+ async def _get_state_update(
103
116
  ctx: MobileUseContext,
104
117
  state: State,
105
118
  thoughts: list[str],
@@ -111,4 +124,6 @@ def _get_state_update(
111
124
  }
112
125
  if update_plan:
113
126
  update["subgoal_plan"] = state.subgoal_plan
114
- return state.sanitize_update(ctx=ctx, update=update, agent="orchestrator")
127
+ if ctx.on_plan_changes:
128
+ await ctx.on_plan_changes(state.subgoal_plan, False)
129
+ return await state.asanitize_update(ctx=ctx, update=update, agent="orchestrator")
@@ -3,13 +3,14 @@ from pathlib import Path
3
3
 
4
4
  from jinja2 import Template
5
5
  from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
6
+ from pydantic import BaseModel
7
+
6
8
  from minitap.mobile_use.config import OutputConfig
7
9
  from minitap.mobile_use.context import MobileUseContext
8
10
  from minitap.mobile_use.graph.state import State
9
- from minitap.mobile_use.services.llm import get_llm
11
+ from minitap.mobile_use.services.llm import get_llm, invoke_llm_with_timeout_message
10
12
  from minitap.mobile_use.utils.conversations import is_ai_message
11
13
  from minitap.mobile_use.utils.logger import get_logger
12
- from pydantic import BaseModel
13
14
 
14
15
  logger = get_logger(__name__)
15
16
 
@@ -61,7 +62,9 @@ async def outputter(
61
62
  if schema is not None:
62
63
  structured_llm = llm.with_structured_output(schema)
63
64
 
64
- response = await structured_llm.ainvoke(messages) # type: ignore
65
+ response = await invoke_llm_with_timeout_message(
66
+ structured_llm.ainvoke(messages), agent_name="Outputter"
67
+ ) # type: ignore
65
68
  if isinstance(response, BaseModel):
66
69
  if output_config.output_description and hasattr(response, "content"):
67
70
  response = json.loads(response.content) # type: ignore
@@ -9,12 +9,13 @@ You work like an agile tech lead: defining the key milestones without locking in
9
9
  Given the **user's goal**:
10
10
 
11
11
  - Create a **high-level sequence of subgoals** to complete that goal.
12
- - Subgoals should reflect real interactions with mobile UIs (e.g. "Open app", "Tap search bar", "Scroll to item", "Send message to Bob", etc).
12
+ - Subgoals should reflect real interactions with mobile UIs and describe the intent of the action (e.g., "Open the app to find a contact," "View the image to extract information," "Send a message to Bob confirming the appointment").
13
+ - Focus on the goal of the interaction, not just the physical action. For example, instead of 'View the receipt,' a better subgoal is 'Open and analyze the receipt to identify transactions.
13
14
  - Don't assume the full UI is visible yet. Plan based on how most mobile apps work, and keep flexibility.
14
- - List of agents thoughts is empty which is expected, since it is the first plan.
15
- - Avoid too granular UI actions based tasks (e.g. "tap", "swipe", "copy", "paste") unless explicitly required.
16
15
  - The executor has the following available tools: {{ executor_tools_list }}.
17
16
  When one of these tools offers a direct shortcut (e.g. `openLink` instead of manually launching a browser and typing a URL), prefer it over decomposed manual steps.
17
+ - Ensure that each subgoal prepares the ground for the next. If data needs to be gathered in one step to be used in another, the subgoal should reflect the intent to gather that data.
18
+
18
19
 
19
20
  2. **Replanning**
20
21
  If you're asked to **revise a previous plan**, you'll also receive:
@@ -27,38 +28,35 @@ You work like an agile tech lead: defining the key milestones without locking in
27
28
 
28
29
  ### Output
29
30
 
30
- You must output a **list of subgoals (description + optional subgoal ID)**, each representing a clear subgoal.
31
+ You must output a **list of subgoals (description)**, each representing a clear subgoal.
31
32
  Each subgoal should be:
32
33
 
33
- - Focused on **realistic mobile interactions**
34
+ - Focused on **purpose-driven mobile interactions** that clearly state the intent
34
35
  - Neither too vague nor too granular
35
36
  - Sequential (later steps may depend on earlier ones)
36
37
  - Don't use loop-like formulation unless necessary (e.g. don't say "repeat this X times", instead reuse the same steps X times as subgoals)
37
38
 
38
- If you're replaning and need to keep a previous subgoal, you **must keep the same subgoal ID**.
39
-
40
39
  ### Examples
41
40
 
42
- #### **Initial Goal**: "Open WhatsApp and send 'I’m running late' to Alice"
41
+ #### **Initial Goal**: "Go on https://tesla.com, and tell me what is the first car being displayed"
43
42
 
44
43
  **Plan**:
45
44
 
46
- - Open the WhatsApp app (ID: None -> will be generated as a UUID like bc3c362d-f498-4f1a-991e-4a2d1f8c1226)
47
- - Locate or search for Alice (ID: None)
48
- - Open the conversation with Alice (ID: None)
49
- - Type the message "I’m running late" (ID: None)
50
- - Send the message (ID: None)
45
+ - Open the link https://tesla.com to find information
46
+ - Analyze the home page to identify the first car displayed
51
47
 
52
- #### **Initial Goal**: "Go on https://tesla.com, and tell me what is the first car being displayed"
48
+ #### **Initial Goal**: "Open WhatsApp and send 'I’m running late' to Alice"
53
49
 
54
50
  **Plan**:
55
51
 
56
- - Open the link https://tesla.com (ID: None)
57
- - Find the first car displayed on the home page (ID: None)
52
+ - Open the WhatsApp app to find the contact "Alice"
53
+ - Open the conversation with Alice to send a message
54
+ - Type the message "I’m running late" into the message field
55
+ - Send the message
58
56
 
59
57
  #### **Replanning Example**
60
58
 
61
- **Original Plan**: same as above with IDs set
59
+ **Original Plan**: same as above
62
60
  **Agent Thoughts**:
63
61
 
64
62
  - Couldn't find Alice in recent chats
@@ -67,8 +65,8 @@ If you're replaning and need to keep a previous subgoal, you **must keep the sam
67
65
 
68
66
  **New Plan**:
69
67
 
70
- - Open WhatsApp (ID: bc3c362d-f498-4f1a-991e-4a2d1f8c1226)
71
- - Tap the search bar (ID: None)
72
- - Search for "Alice" (ID: None)
73
- - Select the correct chat (ID: None)
74
- - Type and send "I’m running late" (ID: None)
68
+ - Open WhatsApp
69
+ - Tap the search bar to find a contact
70
+ - Search for "Alice" in the search field
71
+ - Select the correct chat to open the conversation
72
+ - Type and send "I’m running late"
@@ -1,14 +1,13 @@
1
- import uuid
2
1
  from pathlib import Path
3
2
 
4
3
  from jinja2 import Template
5
4
  from langchain_core.messages import HumanMessage, SystemMessage
6
5
 
7
6
  from minitap.mobile_use.agents.planner.types import PlannerOutput, Subgoal, SubgoalStatus
8
- from minitap.mobile_use.agents.planner.utils import one_of_them_is_failure
7
+ from minitap.mobile_use.agents.planner.utils import generate_id, one_of_them_is_failure
9
8
  from minitap.mobile_use.context import MobileUseContext
10
9
  from minitap.mobile_use.graph.state import State
11
- from minitap.mobile_use.services.llm import get_llm
10
+ from minitap.mobile_use.services.llm import get_llm, invoke_llm_with_timeout_message
12
11
  from minitap.mobile_use.tools.index import EXECUTOR_WRAPPERS_TOOLS, format_tools_list
13
12
  from minitap.mobile_use.utils.decorators import wrap_with_callbacks
14
13
  from minitap.mobile_use.utils.logger import get_logger
@@ -49,11 +48,12 @@ class PlannerNode:
49
48
 
50
49
  llm = get_llm(ctx=self.ctx, name="planner")
51
50
  llm = llm.with_structured_output(PlannerOutput)
52
- response: PlannerOutput = await llm.ainvoke(messages) # type: ignore
53
-
51
+ response: PlannerOutput = await invoke_llm_with_timeout_message(
52
+ llm.ainvoke(messages), agent_name="Planner"
53
+ ) # type: ignore
54
54
  subgoals_plan = [
55
55
  Subgoal(
56
- id=subgoal.id or str(uuid.uuid4()),
56
+ id=generate_id(),
57
57
  description=subgoal.description,
58
58
  status=SubgoalStatus.NOT_STARTED,
59
59
  completion_reason=None,
@@ -63,7 +63,10 @@ class PlannerNode:
63
63
  logger.info("📜 Generated plan:")
64
64
  logger.info("\n".join(str(s) for s in subgoals_plan))
65
65
 
66
- return state.sanitize_update(
66
+ if self.ctx.on_plan_changes:
67
+ await self.ctx.on_plan_changes(subgoals_plan, needs_replan)
68
+
69
+ return await state.asanitize_update(
67
70
  ctx=self.ctx,
68
71
  update={
69
72
  "subgoal_plan": subgoals_plan,
@@ -1,11 +1,11 @@
1
+ from datetime import datetime
1
2
  from enum import Enum
3
+ from typing import Annotated
2
4
 
3
5
  from pydantic import BaseModel
4
- from typing import Annotated
5
6
 
6
7
 
7
8
  class PlannerSubgoalOutput(BaseModel):
8
- id: Annotated[str | None, "If not provided, it will be generated"] = None
9
9
  description: str
10
10
 
11
11
 
@@ -27,6 +27,8 @@ class Subgoal(BaseModel):
27
27
  str | None, "Reason why the subgoal was completed (failure or success)"
28
28
  ] = None
29
29
  status: SubgoalStatus
30
+ started_at: Annotated[datetime | None, "When the subgoal started"] = None
31
+ ended_at: Annotated[datetime | None, "When the subgoal ended"] = None
30
32
 
31
33
  def __str__(self):
32
34
  status_emoji = "❓"
@@ -1,4 +1,8 @@
1
+ import random
2
+ import string
3
+
1
4
  from minitap.mobile_use.agents.planner.types import Subgoal, SubgoalStatus
5
+ from datetime import datetime, UTC
2
6
 
3
7
 
4
8
  def get_current_subgoal(subgoals: list[Subgoal]) -> Subgoal | None:
@@ -22,6 +26,7 @@ def complete_current_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
22
26
  if not current_subgoal:
23
27
  return subgoals
24
28
  current_subgoal.status = SubgoalStatus.SUCCESS
29
+ current_subgoal.ended_at = datetime.now(UTC)
25
30
  return subgoals
26
31
 
27
32
 
@@ -29,6 +34,7 @@ def complete_subgoals_by_ids(subgoals: list[Subgoal], ids: list[str]) -> list[Su
29
34
  for subgoal in subgoals:
30
35
  if subgoal.id in ids:
31
36
  subgoal.status = SubgoalStatus.SUCCESS
37
+ subgoal.ended_at = datetime.now(UTC)
32
38
  return subgoals
33
39
 
34
40
 
@@ -37,6 +43,7 @@ def fail_current_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
37
43
  if not current_subgoal:
38
44
  return subgoals
39
45
  current_subgoal.status = SubgoalStatus.FAILURE
46
+ current_subgoal.ended_at = datetime.now(UTC)
40
47
  return subgoals
41
48
 
42
49
 
@@ -53,4 +60,11 @@ def start_next_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
53
60
  if not next_subgoal:
54
61
  return subgoals
55
62
  next_subgoal.status = SubgoalStatus.PENDING
63
+ next_subgoal.started_at = datetime.now(UTC)
56
64
  return subgoals
65
+
66
+
67
+ def generate_id(length: int = 6) -> str:
68
+ """Generates a small and distinct random string ID."""
69
+ chars = string.ascii_lowercase + string.digits
70
+ return "".join(random.choice(chars) for _ in range(length))
@@ -13,7 +13,7 @@ class SummarizerNode:
13
13
  def __init__(self, ctx: MobileUseContext):
14
14
  self.ctx = ctx
15
15
 
16
- def __call__(self, state: State):
16
+ async def __call__(self, state: State):
17
17
  if len(state.messages) <= MAX_MESSAGES_IN_HISTORY:
18
18
  return {}
19
19
 
@@ -27,7 +27,7 @@ class SummarizerNode:
27
27
  start_removal = True
28
28
  if start_removal and msg.id:
29
29
  remove_messages.append(RemoveMessage(id=msg.id))
30
- return state.sanitize_update(
30
+ return await state.asanitize_update(
31
31
  ctx=self.ctx,
32
32
  update={
33
33
  "messages": remove_messages,
@@ -23,8 +23,10 @@ class Settings(BaseSettings):
23
23
  GOOGLE_API_KEY: SecretStr | None = None
24
24
  XAI_API_KEY: SecretStr | None = None
25
25
  OPEN_ROUTER_API_KEY: SecretStr | None = None
26
+ MINITAP_API_KEY: SecretStr | None = None
26
27
 
27
28
  OPENAI_BASE_URL: str | None = None
29
+ MINITAP_API_BASE_URL: str = "https://platform.minitap.ai"
28
30
 
29
31
  DEVICE_SCREEN_API_BASE_URL: str | None = None
30
32
  DEVICE_HARDWARE_BRIDGE_BASE_URL: str | None = None
@@ -90,7 +92,7 @@ def record_events(output_path: Path | None, events: list[str] | BaseModel | Any)
90
92
 
91
93
  ### LLM Configuration
92
94
 
93
- LLMProvider = Literal["openai", "google", "openrouter", "xai", "vertexai"]
95
+ LLMProvider = Literal["openai", "google", "openrouter", "xai", "vertexai", "minitap"]
94
96
  LLMUtilsNode = Literal["outputter", "hopper"]
95
97
  AgentNode = Literal["planner", "orchestrator", "cortex", "executor"]
96
98
  AgentNodeWithFallback = Literal["cortex"]
@@ -131,6 +133,9 @@ class LLM(BaseModel):
131
133
  case "xai":
132
134
  if not settings.XAI_API_KEY:
133
135
  raise Exception(f"{name} requires XAI_API_KEY in .env")
136
+ case "minitap":
137
+ if not settings.MINITAP_API_KEY:
138
+ raise Exception(f"{name} requires MINITAP_API_KEY in .env")
134
139
 
135
140
  def __str__(self):
136
141
  return f"{self.provider}/{self.model}"