minitap-mobile-use 2.2.0__py3-none-any.whl → 2.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of minitap-mobile-use might be problematic. Click here for more details.

Files changed (59) hide show
  1. minitap/mobile_use/agents/contextor/contextor.py +6 -4
  2. minitap/mobile_use/agents/cortex/cortex.md +114 -27
  3. minitap/mobile_use/agents/cortex/cortex.py +8 -5
  4. minitap/mobile_use/agents/executor/executor.md +15 -10
  5. minitap/mobile_use/agents/executor/executor.py +6 -5
  6. minitap/mobile_use/agents/executor/utils.py +2 -1
  7. minitap/mobile_use/agents/hopper/hopper.py +6 -3
  8. minitap/mobile_use/agents/orchestrator/orchestrator.py +26 -11
  9. minitap/mobile_use/agents/outputter/outputter.py +6 -3
  10. minitap/mobile_use/agents/outputter/test_outputter.py +104 -42
  11. minitap/mobile_use/agents/planner/planner.md +20 -22
  12. minitap/mobile_use/agents/planner/planner.py +10 -7
  13. minitap/mobile_use/agents/planner/types.py +4 -2
  14. minitap/mobile_use/agents/planner/utils.py +14 -0
  15. minitap/mobile_use/agents/summarizer/summarizer.py +2 -2
  16. minitap/mobile_use/config.py +6 -1
  17. minitap/mobile_use/context.py +13 -3
  18. minitap/mobile_use/controllers/mobile_command_controller.py +1 -14
  19. minitap/mobile_use/graph/state.py +7 -3
  20. minitap/mobile_use/sdk/agent.py +204 -29
  21. minitap/mobile_use/sdk/examples/README.md +19 -1
  22. minitap/mobile_use/sdk/examples/platform_minimal_example.py +46 -0
  23. minitap/mobile_use/sdk/services/platform.py +244 -0
  24. minitap/mobile_use/sdk/types/__init__.py +14 -14
  25. minitap/mobile_use/sdk/types/exceptions.py +57 -0
  26. minitap/mobile_use/sdk/types/platform.py +125 -0
  27. minitap/mobile_use/sdk/types/task.py +60 -17
  28. minitap/mobile_use/servers/device_hardware_bridge.py +3 -2
  29. minitap/mobile_use/servers/stop_servers.py +11 -12
  30. minitap/mobile_use/servers/utils.py +6 -9
  31. minitap/mobile_use/services/llm.py +89 -5
  32. minitap/mobile_use/tools/index.py +2 -8
  33. minitap/mobile_use/tools/mobile/back.py +3 -3
  34. minitap/mobile_use/tools/mobile/clear_text.py +67 -38
  35. minitap/mobile_use/tools/mobile/erase_one_char.py +5 -4
  36. minitap/mobile_use/tools/mobile/{take_screenshot.py → glimpse_screen.py} +23 -15
  37. minitap/mobile_use/tools/mobile/input_text.py +67 -16
  38. minitap/mobile_use/tools/mobile/launch_app.py +54 -22
  39. minitap/mobile_use/tools/mobile/long_press_on.py +15 -8
  40. minitap/mobile_use/tools/mobile/open_link.py +15 -8
  41. minitap/mobile_use/tools/mobile/press_key.py +15 -8
  42. minitap/mobile_use/tools/mobile/stop_app.py +14 -8
  43. minitap/mobile_use/tools/mobile/swipe.py +11 -5
  44. minitap/mobile_use/tools/mobile/tap.py +103 -21
  45. minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +3 -3
  46. minitap/mobile_use/tools/test_utils.py +377 -0
  47. minitap/mobile_use/tools/types.py +35 -0
  48. minitap/mobile_use/tools/utils.py +149 -39
  49. minitap/mobile_use/utils/recorder.py +1 -1
  50. minitap/mobile_use/utils/test_ui_hierarchy.py +178 -0
  51. minitap/mobile_use/utils/ui_hierarchy.py +11 -4
  52. {minitap_mobile_use-2.2.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/METADATA +6 -4
  53. minitap_mobile_use-2.4.0.dist-info/RECORD +99 -0
  54. minitap/mobile_use/tools/mobile/copy_text_from.py +0 -73
  55. minitap/mobile_use/tools/mobile/find_packages.py +0 -69
  56. minitap/mobile_use/tools/mobile/paste_text.py +0 -62
  57. minitap_mobile_use-2.2.0.dist-info/RECORD +0 -96
  58. {minitap_mobile_use-2.2.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/WHEEL +0 -0
  59. {minitap_mobile_use-2.2.0.dist-info → minitap_mobile_use-2.4.0.dist-info}/entry_points.txt +0 -0
@@ -1,9 +1,21 @@
1
- from minitap.mobile_use.agents.outputter.outputter import outputter
2
- from minitap.mobile_use.config import LLM, OutputConfig
3
- from minitap.mobile_use.context import MobileUseContext
4
- from minitap.mobile_use.utils.logger import get_logger
1
+ import sys
2
+ from unittest.mock import AsyncMock, Mock, patch
3
+
4
+ import pytest
5
5
  from pydantic import BaseModel
6
6
 
7
+ sys.modules["langgraph.prebuilt.chat_agent_executor"] = Mock()
8
+ sys.modules["minitap.mobile_use.graph.state"] = Mock()
9
+ sys.modules["langchain_google_vertexai"] = Mock()
10
+ sys.modules["langchain_google_genai"] = Mock()
11
+ sys.modules["langchain_openai"] = Mock()
12
+ sys.modules["langchain_cerebras"] = Mock()
13
+
14
+ from minitap.mobile_use.agents.outputter.outputter import outputter # noqa: E402
15
+ from minitap.mobile_use.config import LLM, OutputConfig # noqa: E402
16
+ from minitap.mobile_use.context import MobileUseContext # noqa: E402
17
+ from minitap.mobile_use.utils.logger import get_logger # noqa: E402
18
+
7
19
  logger = get_logger(__name__)
8
20
 
9
21
 
@@ -40,68 +52,118 @@ mocked_state = DummyState(
40
52
  ],
41
53
  )
42
54
 
43
- mocked_ctx = MobileUseContext(
44
- llm_config={
55
+
56
+ @pytest.fixture
57
+ def mock_context():
58
+ """Create a properly mocked context with all required fields."""
59
+ ctx = Mock(spec=MobileUseContext)
60
+ ctx.llm_config = {
45
61
  "executor": LLM(provider="openai", model="gpt-5-nano"),
46
62
  "cortex": LLM(provider="openai", model="gpt-5-nano"),
47
63
  "planner": LLM(provider="openai", model="gpt-5-nano"),
48
64
  "orchestrator": LLM(provider="openai", model="gpt-5-nano"),
49
- },
50
- ) # type: ignore
65
+ }
66
+ ctx.device = Mock()
67
+ ctx.hw_bridge_client = Mock()
68
+ ctx.screen_api_client = Mock()
69
+ return ctx
70
+
71
+
72
+ @pytest.fixture
73
+ def mock_state():
74
+ """Create a mock state with test data."""
75
+ return DummyState(
76
+ messages=[],
77
+ initial_goal="Find a green product on my website",
78
+ agents_thoughts=[
79
+ "Going on http://superwebsite.fr",
80
+ "Searching for products",
81
+ "Filtering by color",
82
+ "Color 'green' found for a 20 dollars product",
83
+ ],
84
+ )
85
+
86
+
87
+ @patch("minitap.mobile_use.agents.outputter.outputter.get_llm")
88
+ @pytest.mark.asyncio
89
+ async def test_outputter_with_pydantic_model(mock_get_llm, mock_context, mock_state):
90
+ """Test outputter with Pydantic model output."""
91
+ # Mock the structured LLM response
92
+ mock_structured_llm = AsyncMock()
93
+ mock_structured_llm.ainvoke.return_value = MockPydanticSchema(
94
+ color="green", price=20, currency_symbol="$", website_url="http://superwebsite.fr"
95
+ )
51
96
 
97
+ # Mock the base LLM
98
+ mock_llm = Mock()
99
+ mock_llm.with_structured_output.return_value = mock_structured_llm
100
+ mock_get_llm.return_value = mock_llm
52
101
 
53
- async def test_outputter_with_pydantic_model():
54
- logger.info("Starting test_outputter_with_pydantic_model")
55
102
  config = OutputConfig(
56
103
  structured_output=MockPydanticSchema,
57
104
  output_description=None,
58
105
  )
59
106
 
60
- result = await outputter(ctx=mocked_ctx, output_config=config, graph_output=mocked_state) # type: ignore
61
-
62
- assert isinstance(result, MockPydanticSchema)
63
- assert result.color.lower() == "green"
64
- logger.success(str(result))
107
+ result = await outputter(ctx=mock_context, output_config=config, graph_output=mock_state)
65
108
 
109
+ assert isinstance(result, dict)
110
+ assert result.get("color") == "green"
111
+
112
+
113
+ @patch("minitap.mobile_use.agents.outputter.outputter.get_llm")
114
+ @pytest.mark.asyncio
115
+ async def test_outputter_with_dict(mock_get_llm, mock_context, mock_state):
116
+ """Test outputter with dictionary output."""
117
+ # Mock the structured LLM response for dict
118
+ mock_structured_llm = AsyncMock()
119
+ expected_dict = {
120
+ "color": "green",
121
+ "price": 20,
122
+ "currency_symbol": "$",
123
+ "website_url": "http://superwebsite.fr",
124
+ }
125
+ mock_structured_llm.ainvoke.return_value = expected_dict
126
+
127
+ # Mock the base LLM
128
+ mock_llm = Mock()
129
+ mock_llm.with_structured_output.return_value = mock_structured_llm
130
+ mock_get_llm.return_value = mock_llm
66
131
 
67
- async def test_outputter_with_dict():
68
- logger.info("Starting test_outputter_with_dict")
69
132
  config = OutputConfig(
70
133
  structured_output=mock_dict,
71
134
  output_description=None,
72
135
  )
73
136
 
74
- result = await outputter(ctx=mocked_ctx, output_config=config, graph_output=mocked_state) # type: ignore
137
+ result = await outputter(ctx=mock_context, output_config=config, graph_output=mock_state)
75
138
 
76
139
  assert isinstance(result, dict)
77
- assert result.get("color", None) == "green"
78
- assert result.get("price", None) == 20
79
- assert result.get("currency_symbol", None) == "$"
80
- assert result.get("website_url", None) == "http://superwebsite.fr"
81
- logger.success(str(result))
82
-
140
+ assert result.get("color") == "green"
141
+ assert result.get("price") == 20
142
+ assert result.get("currency_symbol") == "$"
143
+ assert result.get("website_url") == "http://superwebsite.fr"
144
+
145
+
146
+ @patch("minitap.mobile_use.agents.outputter.outputter.get_llm")
147
+ @pytest.mark.asyncio
148
+ async def test_outputter_with_natural_language_output(mock_get_llm, mock_context, mock_state):
149
+ """Test outputter with natural language description output."""
150
+ # Mock the LLM response for natural language output (no structured output)
151
+ mock_llm = AsyncMock()
152
+ expected_json = '{"color": "green", "price": 20, "currency_symbol": "$", "website_url": "http://superwebsite.fr"}'
153
+ mock_llm.ainvoke.return_value = Mock(content=expected_json)
154
+ mock_get_llm.return_value = mock_llm
83
155
 
84
- async def test_outputter_with_natural_language_output():
85
- logger.info("Starting test_outputter_with_natural_language_output")
86
156
  config = OutputConfig(
87
157
  structured_output=None,
88
- output_description="A JSON object with a color, \
89
- a price, a currency_symbol and a website_url key",
158
+ output_description=(
159
+ "A JSON object with a color, a price, a currency_symbol and a website_url key"
160
+ ),
90
161
  )
91
162
 
92
- result = await outputter(ctx=mocked_ctx, output_config=config, graph_output=mocked_state) # type: ignore
93
- logger.info(str(result))
163
+ result = await outputter(ctx=mock_context, output_config=config, graph_output=mock_state)
94
164
 
95
165
  assert isinstance(result, dict)
96
- assert result.get("color", None) == "green"
97
- assert result.get("price", None) == 20
98
- assert result.get("currency_symbol", None) == "$"
99
- assert result.get("website_url", None) == "http://superwebsite.fr"
100
- logger.success(str(result))
101
-
102
-
103
- if __name__ == "__main__":
104
- import asyncio
105
-
106
- asyncio.run(test_outputter_with_pydantic_model())
107
- asyncio.run(test_outputter_with_natural_language_output())
166
+ assert result.get("color") == "green"
167
+ assert result.get("price") == 20
168
+ assert result.get("currency_symbol") == "$"
169
+ assert result.get("website_url") == "http://superwebsite.fr"
@@ -9,12 +9,13 @@ You work like an agile tech lead: defining the key milestones without locking in
9
9
  Given the **user's goal**:
10
10
 
11
11
  - Create a **high-level sequence of subgoals** to complete that goal.
12
- - Subgoals should reflect real interactions with mobile UIs (e.g. "Open app", "Tap search bar", "Scroll to item", "Send message to Bob", etc).
12
+ - Subgoals should reflect real interactions with mobile UIs and describe the intent of the action (e.g., "Open the app to find a contact," "View the image to extract information," "Send a message to Bob confirming the appointment").
13
+ - Focus on the goal of the interaction, not just the physical action. For example, instead of 'View the receipt,' a better subgoal is 'Open and analyze the receipt to identify transactions.
13
14
  - Don't assume the full UI is visible yet. Plan based on how most mobile apps work, and keep flexibility.
14
- - List of agents thoughts is empty which is expected, since it is the first plan.
15
- - Avoid too granular UI actions based tasks (e.g. "tap", "swipe", "copy", "paste") unless explicitly required.
16
15
  - The executor has the following available tools: {{ executor_tools_list }}.
17
16
  When one of these tools offers a direct shortcut (e.g. `openLink` instead of manually launching a browser and typing a URL), prefer it over decomposed manual steps.
17
+ - Ensure that each subgoal prepares the ground for the next. If data needs to be gathered in one step to be used in another, the subgoal should reflect the intent to gather that data.
18
+
18
19
 
19
20
  2. **Replanning**
20
21
  If you're asked to **revise a previous plan**, you'll also receive:
@@ -27,38 +28,35 @@ You work like an agile tech lead: defining the key milestones without locking in
27
28
 
28
29
  ### Output
29
30
 
30
- You must output a **list of subgoals (description + optional subgoal ID)**, each representing a clear subgoal.
31
+ You must output a **list of subgoals (description)**, each representing a clear subgoal.
31
32
  Each subgoal should be:
32
33
 
33
- - Focused on **realistic mobile interactions**
34
+ - Focused on **purpose-driven mobile interactions** that clearly state the intent
34
35
  - Neither too vague nor too granular
35
36
  - Sequential (later steps may depend on earlier ones)
36
37
  - Don't use loop-like formulation unless necessary (e.g. don't say "repeat this X times", instead reuse the same steps X times as subgoals)
37
38
 
38
- If you're replaning and need to keep a previous subgoal, you **must keep the same subgoal ID**.
39
-
40
39
  ### Examples
41
40
 
42
- #### **Initial Goal**: "Open WhatsApp and send 'I’m running late' to Alice"
41
+ #### **Initial Goal**: "Go on https://tesla.com, and tell me what is the first car being displayed"
43
42
 
44
43
  **Plan**:
45
44
 
46
- - Open the WhatsApp app (ID: None -> will be generated as a UUID like bc3c362d-f498-4f1a-991e-4a2d1f8c1226)
47
- - Locate or search for Alice (ID: None)
48
- - Open the conversation with Alice (ID: None)
49
- - Type the message "I’m running late" (ID: None)
50
- - Send the message (ID: None)
45
+ - Open the link https://tesla.com to find information
46
+ - Analyze the home page to identify the first car displayed
51
47
 
52
- #### **Initial Goal**: "Go on https://tesla.com, and tell me what is the first car being displayed"
48
+ #### **Initial Goal**: "Open WhatsApp and send 'I’m running late' to Alice"
53
49
 
54
50
  **Plan**:
55
51
 
56
- - Open the link https://tesla.com (ID: None)
57
- - Find the first car displayed on the home page (ID: None)
52
+ - Open the WhatsApp app to find the contact "Alice"
53
+ - Open the conversation with Alice to send a message
54
+ - Type the message "I’m running late" into the message field
55
+ - Send the message
58
56
 
59
57
  #### **Replanning Example**
60
58
 
61
- **Original Plan**: same as above with IDs set
59
+ **Original Plan**: same as above
62
60
  **Agent Thoughts**:
63
61
 
64
62
  - Couldn't find Alice in recent chats
@@ -67,8 +65,8 @@ If you're replaning and need to keep a previous subgoal, you **must keep the sam
67
65
 
68
66
  **New Plan**:
69
67
 
70
- - Open WhatsApp (ID: bc3c362d-f498-4f1a-991e-4a2d1f8c1226)
71
- - Tap the search bar (ID: None)
72
- - Search for "Alice" (ID: None)
73
- - Select the correct chat (ID: None)
74
- - Type and send "I’m running late" (ID: None)
68
+ - Open WhatsApp
69
+ - Tap the search bar to find a contact
70
+ - Search for "Alice" in the search field
71
+ - Select the correct chat to open the conversation
72
+ - Type and send "I’m running late"
@@ -1,14 +1,13 @@
1
- import uuid
2
1
  from pathlib import Path
3
2
 
4
3
  from jinja2 import Template
5
4
  from langchain_core.messages import HumanMessage, SystemMessage
6
5
 
7
6
  from minitap.mobile_use.agents.planner.types import PlannerOutput, Subgoal, SubgoalStatus
8
- from minitap.mobile_use.agents.planner.utils import one_of_them_is_failure
7
+ from minitap.mobile_use.agents.planner.utils import generate_id, one_of_them_is_failure
9
8
  from minitap.mobile_use.context import MobileUseContext
10
9
  from minitap.mobile_use.graph.state import State
11
- from minitap.mobile_use.services.llm import get_llm
10
+ from minitap.mobile_use.services.llm import get_llm, invoke_llm_with_timeout_message
12
11
  from minitap.mobile_use.tools.index import EXECUTOR_WRAPPERS_TOOLS, format_tools_list
13
12
  from minitap.mobile_use.utils.decorators import wrap_with_callbacks
14
13
  from minitap.mobile_use.utils.logger import get_logger
@@ -49,11 +48,12 @@ class PlannerNode:
49
48
 
50
49
  llm = get_llm(ctx=self.ctx, name="planner")
51
50
  llm = llm.with_structured_output(PlannerOutput)
52
- response: PlannerOutput = await llm.ainvoke(messages) # type: ignore
53
-
51
+ response: PlannerOutput = await invoke_llm_with_timeout_message(
52
+ llm.ainvoke(messages), agent_name="Planner"
53
+ ) # type: ignore
54
54
  subgoals_plan = [
55
55
  Subgoal(
56
- id=subgoal.id or str(uuid.uuid4()),
56
+ id=generate_id(),
57
57
  description=subgoal.description,
58
58
  status=SubgoalStatus.NOT_STARTED,
59
59
  completion_reason=None,
@@ -63,7 +63,10 @@ class PlannerNode:
63
63
  logger.info("📜 Generated plan:")
64
64
  logger.info("\n".join(str(s) for s in subgoals_plan))
65
65
 
66
- return state.sanitize_update(
66
+ if self.ctx.on_plan_changes:
67
+ await self.ctx.on_plan_changes(subgoals_plan, needs_replan)
68
+
69
+ return await state.asanitize_update(
67
70
  ctx=self.ctx,
68
71
  update={
69
72
  "subgoal_plan": subgoals_plan,
@@ -1,11 +1,11 @@
1
+ from datetime import datetime
1
2
  from enum import Enum
3
+ from typing import Annotated
2
4
 
3
5
  from pydantic import BaseModel
4
- from typing import Annotated
5
6
 
6
7
 
7
8
  class PlannerSubgoalOutput(BaseModel):
8
- id: Annotated[str | None, "If not provided, it will be generated"] = None
9
9
  description: str
10
10
 
11
11
 
@@ -27,6 +27,8 @@ class Subgoal(BaseModel):
27
27
  str | None, "Reason why the subgoal was completed (failure or success)"
28
28
  ] = None
29
29
  status: SubgoalStatus
30
+ started_at: Annotated[datetime | None, "When the subgoal started"] = None
31
+ ended_at: Annotated[datetime | None, "When the subgoal ended"] = None
30
32
 
31
33
  def __str__(self):
32
34
  status_emoji = "❓"
@@ -1,4 +1,8 @@
1
+ import random
2
+ import string
3
+
1
4
  from minitap.mobile_use.agents.planner.types import Subgoal, SubgoalStatus
5
+ from datetime import datetime, UTC
2
6
 
3
7
 
4
8
  def get_current_subgoal(subgoals: list[Subgoal]) -> Subgoal | None:
@@ -22,6 +26,7 @@ def complete_current_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
22
26
  if not current_subgoal:
23
27
  return subgoals
24
28
  current_subgoal.status = SubgoalStatus.SUCCESS
29
+ current_subgoal.ended_at = datetime.now(UTC)
25
30
  return subgoals
26
31
 
27
32
 
@@ -29,6 +34,7 @@ def complete_subgoals_by_ids(subgoals: list[Subgoal], ids: list[str]) -> list[Su
29
34
  for subgoal in subgoals:
30
35
  if subgoal.id in ids:
31
36
  subgoal.status = SubgoalStatus.SUCCESS
37
+ subgoal.ended_at = datetime.now(UTC)
32
38
  return subgoals
33
39
 
34
40
 
@@ -37,6 +43,7 @@ def fail_current_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
37
43
  if not current_subgoal:
38
44
  return subgoals
39
45
  current_subgoal.status = SubgoalStatus.FAILURE
46
+ current_subgoal.ended_at = datetime.now(UTC)
40
47
  return subgoals
41
48
 
42
49
 
@@ -53,4 +60,11 @@ def start_next_subgoal(subgoals: list[Subgoal]) -> list[Subgoal]:
53
60
  if not next_subgoal:
54
61
  return subgoals
55
62
  next_subgoal.status = SubgoalStatus.PENDING
63
+ next_subgoal.started_at = datetime.now(UTC)
56
64
  return subgoals
65
+
66
+
67
+ def generate_id(length: int = 6) -> str:
68
+ """Generates a small and distinct random string ID."""
69
+ chars = string.ascii_lowercase + string.digits
70
+ return "".join(random.choice(chars) for _ in range(length))
@@ -13,7 +13,7 @@ class SummarizerNode:
13
13
  def __init__(self, ctx: MobileUseContext):
14
14
  self.ctx = ctx
15
15
 
16
- def __call__(self, state: State):
16
+ async def __call__(self, state: State):
17
17
  if len(state.messages) <= MAX_MESSAGES_IN_HISTORY:
18
18
  return {}
19
19
 
@@ -27,7 +27,7 @@ class SummarizerNode:
27
27
  start_removal = True
28
28
  if start_removal and msg.id:
29
29
  remove_messages.append(RemoveMessage(id=msg.id))
30
- return state.sanitize_update(
30
+ return await state.asanitize_update(
31
31
  ctx=self.ctx,
32
32
  update={
33
33
  "messages": remove_messages,
@@ -23,8 +23,10 @@ class Settings(BaseSettings):
23
23
  GOOGLE_API_KEY: SecretStr | None = None
24
24
  XAI_API_KEY: SecretStr | None = None
25
25
  OPEN_ROUTER_API_KEY: SecretStr | None = None
26
+ MINITAP_API_KEY: SecretStr | None = None
26
27
 
27
28
  OPENAI_BASE_URL: str | None = None
29
+ MINITAP_API_BASE_URL: str = "https://platform.minitap.ai"
28
30
 
29
31
  DEVICE_SCREEN_API_BASE_URL: str | None = None
30
32
  DEVICE_HARDWARE_BRIDGE_BASE_URL: str | None = None
@@ -90,7 +92,7 @@ def record_events(output_path: Path | None, events: list[str] | BaseModel | Any)
90
92
 
91
93
  ### LLM Configuration
92
94
 
93
- LLMProvider = Literal["openai", "google", "openrouter", "xai", "vertexai"]
95
+ LLMProvider = Literal["openai", "google", "openrouter", "xai", "vertexai", "minitap"]
94
96
  LLMUtilsNode = Literal["outputter", "hopper"]
95
97
  AgentNode = Literal["planner", "orchestrator", "cortex", "executor"]
96
98
  AgentNodeWithFallback = Literal["cortex"]
@@ -131,6 +133,9 @@ class LLM(BaseModel):
131
133
  case "xai":
132
134
  if not settings.XAI_API_KEY:
133
135
  raise Exception(f"{name} requires XAI_API_KEY in .env")
136
+ case "minitap":
137
+ if not settings.MINITAP_API_KEY:
138
+ raise Exception(f"{name} requires MINITAP_API_KEY in .env")
134
139
 
135
140
  def __str__(self):
136
141
  return f"{self.provider}/{self.model}"
@@ -4,17 +4,19 @@ Context variables for global state management.
4
4
  Uses ContextVar to avoid prop drilling and maintain clean function signatures.
5
5
  """
6
6
 
7
+ from collections.abc import Callable, Coroutine
7
8
  from enum import Enum
8
9
  from pathlib import Path
10
+ from typing import Literal
9
11
 
10
12
  from adbutils import AdbClient
11
13
  from openai import BaseModel
12
14
  from pydantic import ConfigDict
13
- from typing import Literal
14
15
 
16
+ from minitap.mobile_use.agents.planner.types import Subgoal
15
17
  from minitap.mobile_use.clients.device_hardware_client import DeviceHardwareClient
16
18
  from minitap.mobile_use.clients.screen_api_client import ScreenApiClient
17
- from minitap.mobile_use.config import LLMConfig
19
+ from minitap.mobile_use.config import AgentNode, LLMConfig
18
20
 
19
21
 
20
22
  class DevicePlatform(str, Enum):
@@ -45,18 +47,26 @@ class ExecutionSetup(BaseModel):
45
47
  """Execution setup for a task."""
46
48
 
47
49
  traces_path: Path
48
- trace_id: str
50
+ trace_name: str
51
+ enable_remote_tracing: bool
52
+
53
+
54
+ IsReplan = bool
49
55
 
50
56
 
51
57
  class MobileUseContext(BaseModel):
52
58
  model_config = ConfigDict(arbitrary_types_allowed=True)
53
59
 
60
+ trace_id: str
54
61
  device: DeviceContext
55
62
  hw_bridge_client: DeviceHardwareClient
56
63
  screen_api_client: ScreenApiClient
57
64
  llm_config: LLMConfig
58
65
  adb_client: AdbClient | None = None
59
66
  execution_setup: ExecutionSetup | None = None
67
+ on_agent_thought: Callable[[AgentNode, str], Coroutine] | None = None
68
+ on_plan_changes: Callable[[list[Subgoal], IsReplan], Coroutine] | None = None
69
+ minitap_api_key: str | None = None
60
70
 
61
71
  def get_adb_client(self) -> AdbClient:
62
72
  if self.adb_client is None:
@@ -243,20 +243,6 @@ def input_text(ctx: MobileUseContext, text: str, dry_run: bool = False):
243
243
  return run_flow(ctx, [{"inputText": text}], dry_run=dry_run)
244
244
 
245
245
 
246
- def copy_text_from(ctx: MobileUseContext, selector_request: SelectorRequest, dry_run: bool = False):
247
- copy_text_from_body = selector_request.to_dict()
248
- if not copy_text_from_body:
249
- error = "Invalid copyTextFrom selector request, could not format yaml"
250
- logger.error(error)
251
- raise ControllerErrors(error)
252
- flow_input = [{"copyTextFrom": copy_text_from_body}]
253
- return run_flow(ctx, flow_input, dry_run=dry_run)
254
-
255
-
256
- def paste_text(ctx: MobileUseContext, dry_run: bool = False):
257
- return run_flow(ctx, ["pasteText"], dry_run=dry_run)
258
-
259
-
260
246
  def erase_text(ctx: MobileUseContext, nb_chars: int | None = None, dry_run: bool = False):
261
247
  """
262
248
  Removes characters from the currently selected textfield (if any)
@@ -333,6 +319,7 @@ def run_flow_with_wait_for_animation_to_end(
333
319
 
334
320
  if __name__ == "__main__":
335
321
  ctx = MobileUseContext(
322
+ trace_id="trace_id",
336
323
  llm_config=initialize_llm_config(),
337
324
  device=DeviceContext(
338
325
  host_platform="WINDOWS",
@@ -54,7 +54,7 @@ class State(AgentStatePydantic):
54
54
  take_last,
55
55
  ]
56
56
 
57
- def sanitize_update(
57
+ async def asanitize_update(
58
58
  self,
59
59
  ctx: MobileUseContext,
60
60
  update: dict,
@@ -72,7 +72,7 @@ class State(AgentStatePydantic):
72
72
  raise ValueError("agents_thoughts must be a str or list[str]")
73
73
  if agent is None:
74
74
  raise ValueError("Agent is required when updating the 'agents_thoughts' key")
75
- update["agents_thoughts"] = _add_agent_thoughts(
75
+ update["agents_thoughts"] = await _add_agent_thoughts(
76
76
  ctx=ctx,
77
77
  old=self.agents_thoughts,
78
78
  new=updated_agents_thoughts,
@@ -81,12 +81,16 @@ class State(AgentStatePydantic):
81
81
  return update
82
82
 
83
83
 
84
- def _add_agent_thoughts(
84
+ async def _add_agent_thoughts(
85
85
  ctx: MobileUseContext,
86
86
  old: list[str],
87
87
  new: list[str],
88
88
  agent: AgentNode,
89
89
  ) -> list[str]:
90
+ if ctx.on_agent_thought:
91
+ for thought in new:
92
+ await ctx.on_agent_thought(agent, thought)
93
+
90
94
  named_thoughts = [f"[{agent}] {thought}" for thought in new]
91
95
  if ctx.execution_setup:
92
96
  record_interaction(ctx, response=AIMessage(content=str(named_thoughts)))