minitap-mobile-use 2.1.0__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of minitap-mobile-use might be problematic. Click here for more details.

@@ -38,7 +38,7 @@ Focus on the **current PENDING subgoal and the next subgoals not yet started**.
38
38
  2.2. Otherwise, output a **stringified structured set of instructions** that an **Executor agent** can perform on a real mobile device:
39
39
 
40
40
  - These must be **concrete low-level actions**.
41
- - The executor has the following available tools: **{{ executor_tools_list }}**.
41
+ - The executor has the following available tools: {{ executor_tools_list }}.
42
42
  - Your goal is to achieve subgoals **fast** - so you must put as much actions as possible in your instructions to complete all achievable subgoals (based on your observations) in one go.
43
43
  - To open URLs/links directly, use the `open_link` tool - it will automatically handle opening in the appropriate browser. It also handles deep links.
44
44
  - When you need to open an app, use the `find_packages` low-level action to try and get its name. Then, simply use the `launch_app` low-level action to launch it.
@@ -46,7 +46,7 @@ class CortexNode:
46
46
  current_subgoal=get_current_subgoal(state.subgoal_plan),
47
47
  agents_thoughts=state.agents_thoughts,
48
48
  executor_feedback=executor_feedback,
49
- executor_tools_list=format_tools_list(self.ctx, EXECUTOR_WRAPPERS_TOOLS),
49
+ executor_tools_list=format_tools_list(ctx=self.ctx, wrappers=EXECUTOR_WRAPPERS_TOOLS),
50
50
  )
51
51
  messages = [
52
52
  SystemMessage(content=system_message),
@@ -3,6 +3,8 @@ from pathlib import Path
3
3
  from jinja2 import Template
4
4
  from langchain_core.messages import HumanMessage, SystemMessage
5
5
  from langchain_google_genai import ChatGoogleGenerativeAI
6
+ from langchain_google_vertexai.chat_models import ChatVertexAI
7
+
6
8
  from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
7
9
  from minitap.mobile_use.context import MobileUseContext
8
10
  from minitap.mobile_use.graph.state import State
@@ -56,7 +58,7 @@ class ExecutorNode:
56
58
  }
57
59
 
58
60
  # ChatGoogleGenerativeAI does not support the "parallel_tool_calls" keyword
59
- if not isinstance(llm, ChatGoogleGenerativeAI):
61
+ if not isinstance(llm, ChatGoogleGenerativeAI | ChatVertexAI):
60
62
  llm_bind_tools_kwargs["parallel_tool_calls"] = True
61
63
 
62
64
  llm = llm.bind_tools(**llm_bind_tools_kwargs)
@@ -13,7 +13,7 @@ You work like an agile tech lead: defining the key milestones without locking in
13
13
  - Don't assume the full UI is visible yet. Plan based on how most mobile apps work, and keep flexibility.
14
14
  - List of agents thoughts is empty which is expected, since it is the first plan.
15
15
  - Avoid too granular UI actions based tasks (e.g. "tap", "swipe", "copy", "paste") unless explicitly required.
16
- - The executor has the following available tools: **{{ executor_tools_list }}**.
16
+ - The executor has the following available tools: {{ executor_tools_list }}.
17
17
  When one of these tools offers a direct shortcut (e.g. `openLink` instead of manually launching a browser and typing a URL), prefer it over decomposed manual steps.
18
18
 
19
19
  2. **Replanning**
@@ -30,7 +30,10 @@ class PlannerNode:
30
30
 
31
31
  system_message = Template(
32
32
  Path(__file__).parent.joinpath("planner.md").read_text(encoding="utf-8")
33
- ).render(platform=self.ctx.device.mobile_platform.value)
33
+ ).render(
34
+ platform=self.ctx.device.mobile_platform.value,
35
+ executor_tools_list=format_tools_list(ctx=self.ctx, wrappers=EXECUTOR_WRAPPERS_TOOLS),
36
+ )
34
37
  human_message = Template(
35
38
  Path(__file__).parent.joinpath("human.md").read_text(encoding="utf-8")
36
39
  ).render(
@@ -38,7 +41,6 @@ class PlannerNode:
38
41
  initial_goal=state.initial_goal,
39
42
  previous_plan="\n".join(str(s) for s in state.subgoal_plan),
40
43
  agent_thoughts="\n".join(state.agents_thoughts),
41
- executor_tools_list=format_tools_list(self.ctx, EXECUTOR_WRAPPERS_TOOLS),
42
44
  )
43
45
  messages = [
44
46
  SystemMessage(content=system_message),
@@ -3,7 +3,9 @@ import os
3
3
  from pathlib import Path
4
4
  from typing import Annotated, Any, Literal
5
5
 
6
+ import google.auth
6
7
  from dotenv import load_dotenv
8
+ from google.auth.exceptions import DefaultCredentialsError
7
9
  from pydantic import BaseModel, Field, SecretStr, ValidationError, model_validator
8
10
  from pydantic_settings import BaseSettings
9
11
 
@@ -88,7 +90,7 @@ def record_events(output_path: Path | None, events: list[str] | BaseModel | Any)
88
90
 
89
91
  ### LLM Configuration
90
92
 
91
- LLMProvider = Literal["openai", "google", "openrouter", "xai"]
93
+ LLMProvider = Literal["openai", "google", "openrouter", "xai", "vertexai"]
92
94
  LLMUtilsNode = Literal["outputter", "hopper"]
93
95
  AgentNode = Literal["planner", "orchestrator", "cortex", "executor"]
94
96
  AgentNodeWithFallback = Literal["cortex"]
@@ -98,6 +100,17 @@ DEFAULT_LLM_CONFIG_FILENAME = "llm-config.defaults.jsonc"
98
100
  OVERRIDE_LLM_CONFIG_FILENAME = "llm-config.override.jsonc"
99
101
 
100
102
 
103
+ def validate_vertex_ai_credentials():
104
+ try:
105
+ _, project = google.auth.default()
106
+ if not project:
107
+ raise Exception("VertexAI requires a Google Cloud project to be set.")
108
+ except DefaultCredentialsError as e:
109
+ raise Exception(
110
+ f"VertexAI requires valid Google Application Default Credentials (ADC): {e}"
111
+ )
112
+
113
+
101
114
  class LLM(BaseModel):
102
115
  provider: LLMProvider
103
116
  model: str
@@ -110,6 +123,8 @@ class LLM(BaseModel):
110
123
  case "google":
111
124
  if not settings.GOOGLE_API_KEY:
112
125
  raise Exception(f"{name} requires GOOGLE_API_KEY in .env")
126
+ case "vertexai":
127
+ validate_vertex_ai_credentials()
113
128
  case "openrouter":
114
129
  if not settings.OPEN_ROUTER_API_KEY:
115
130
  raise Exception(f"{name} requires OPEN_ROUTER_API_KEY in .env")
@@ -311,9 +311,9 @@ def press_key(ctx: MobileUseContext, key: Key, dry_run: bool = False):
311
311
 
312
312
 
313
313
  class WaitTimeout(Enum):
314
- SHORT = 500
315
- MEDIUM = 1000
316
- LONG = 5000
314
+ SHORT = "500"
315
+ MEDIUM = "1000"
316
+ LONG = "5000"
317
317
 
318
318
 
319
319
  def wait_for_animation_to_end(
@@ -327,7 +327,7 @@ def wait_for_animation_to_end(
327
327
  def run_flow_with_wait_for_animation_to_end(
328
328
  ctx: MobileUseContext, base_flow: list, dry_run: bool = False
329
329
  ):
330
- base_flow.append({"waitForAnimationToEnd": {"timeout": WaitTimeout.MEDIUM.value}})
330
+ base_flow.append({"waitForAnimationToEnd": {"timeout": int(WaitTimeout.MEDIUM.value)}})
331
331
  return run_flow(ctx, base_flow, dry_run=dry_run)
332
332
 
333
333
 
@@ -35,9 +35,9 @@ async def run_automation(
35
35
  if settings.ADB_HOST:
36
36
  config.with_adb_server(host=settings.ADB_HOST, port=settings.ADB_PORT)
37
37
  if settings.DEVICE_HARDWARE_BRIDGE_BASE_URL:
38
- config.with_hw_bridge_base_url(url=settings.DEVICE_HARDWARE_BRIDGE_BASE_URL)
38
+ config.with_hw_bridge(url=settings.DEVICE_HARDWARE_BRIDGE_BASE_URL)
39
39
  if settings.DEVICE_SCREEN_API_BASE_URL:
40
- config.with_screen_api_base_url(url=settings.DEVICE_SCREEN_API_BASE_URL)
40
+ config.with_screen_api(url=settings.DEVICE_SCREEN_API_BASE_URL)
41
41
  if graph_config_callbacks:
42
42
  config.with_graph_config_callbacks(graph_config_callbacks)
43
43
 
@@ -278,13 +278,12 @@ class Agent:
278
278
  )
279
279
 
280
280
  if stream_mode == "updates":
281
- for key, value in payload.items(): # type: ignore
281
+ for _, value in payload.items(): # type: ignore node name, node output
282
282
  if value and "agents_thoughts" in value:
283
283
  new_thoughts = value["agents_thoughts"]
284
284
  last_item = new_thoughts[-1] if new_thoughts else None
285
285
  if last_item:
286
286
  log_agent_thought(
287
- prefix=key,
288
287
  agent_thought=last_item,
289
288
  )
290
289
 
@@ -105,7 +105,7 @@ class AgentConfigBuilder:
105
105
  self._task_request_defaults = copy.deepcopy(config)
106
106
  return self
107
107
 
108
- def with_hw_bridge_base_url(self, url: str | ApiBaseUrl) -> "AgentConfigBuilder":
108
+ def with_hw_bridge(self, url: str | ApiBaseUrl) -> "AgentConfigBuilder":
109
109
  """
110
110
  Set the base URL for the device HW bridge API.
111
111
 
@@ -117,7 +117,7 @@ class AgentConfigBuilder:
117
117
  self._servers.hw_bridge_base_url = url
118
118
  return self
119
119
 
120
- def with_screen_api_base_url(self, url: str | ApiBaseUrl) -> "AgentConfigBuilder":
120
+ def with_screen_api(self, url: str | ApiBaseUrl) -> "AgentConfigBuilder":
121
121
  """
122
122
  Set the base URL for the device screen API.
123
123
 
@@ -2,11 +2,11 @@ import os
2
2
  from pathlib import Path
3
3
 
4
4
  from pydantic import ValidationError
5
+
5
6
  from minitap.mobile_use.config import LLMConfig, deep_merge_llm_config, get_default_llm_config
6
7
  from minitap.mobile_use.utils.file import load_jsonc
7
8
  from minitap.mobile_use.utils.logger import get_logger
8
9
 
9
-
10
10
  logger = get_logger(__name__)
11
11
 
12
12
 
@@ -24,5 +24,6 @@ def load_llm_config_override(path: Path) -> LLMConfig:
24
24
  try:
25
25
  return deep_merge_llm_config(default_config, override_config_dict)
26
26
  except ValidationError as e:
27
- logger.error(f"Invalid LLM config: {e}. Falling back to default config")
27
+ logger.error(f"Invalid LLM config: {e}")
28
+ logger.info("Falling back to default config")
28
29
  return default_config
@@ -1,10 +1,12 @@
1
1
  import logging
2
- from typing import Literal, TypeVar
3
2
  from collections.abc import Awaitable, Callable
4
- from typing import overload
3
+ from typing import Literal, TypeVar, overload
5
4
 
5
+ from langchain_core.language_models.chat_models import BaseChatModel
6
6
  from langchain_google_genai import ChatGoogleGenerativeAI
7
+ from langchain_google_vertexai import ChatVertexAI
7
8
  from langchain_openai import ChatOpenAI
9
+
8
10
  from minitap.mobile_use.config import (
9
11
  AgentNode,
10
12
  AgentNodeWithFallback,
@@ -32,6 +34,19 @@ def get_google_llm(
32
34
  return client
33
35
 
34
36
 
37
+ def get_vertex_llm(
38
+ model_name: str = "gemini-2.5-pro",
39
+ temperature: float = 0.7,
40
+ ) -> ChatVertexAI:
41
+ client = ChatVertexAI(
42
+ model_name=model_name,
43
+ max_tokens=None,
44
+ temperature=temperature,
45
+ max_retries=2,
46
+ )
47
+ return client
48
+
49
+
35
50
  def get_openai_llm(
36
51
  model_name: str = "o3",
37
52
  temperature: float = 1,
@@ -75,7 +90,7 @@ def get_llm(
75
90
  *,
76
91
  use_fallback: bool = False,
77
92
  temperature: float = 1,
78
- ): ...
93
+ ) -> BaseChatModel: ...
79
94
 
80
95
 
81
96
  @overload
@@ -84,7 +99,7 @@ def get_llm(
84
99
  name: AgentNode,
85
100
  *,
86
101
  temperature: float = 1,
87
- ): ...
102
+ ) -> BaseChatModel: ...
88
103
 
89
104
 
90
105
  @overload
@@ -94,7 +109,7 @@ def get_llm(
94
109
  *,
95
110
  is_utils: Literal[True],
96
111
  temperature: float = 1,
97
- ): ...
112
+ ) -> BaseChatModel: ...
98
113
 
99
114
 
100
115
  def get_llm(
@@ -103,7 +118,7 @@ def get_llm(
103
118
  is_utils: bool = False,
104
119
  use_fallback: bool = False,
105
120
  temperature: float = 1,
106
- ):
121
+ ) -> BaseChatModel:
107
122
  llm = (
108
123
  ctx.llm_config.get_utils(name) # type: ignore
109
124
  if is_utils
@@ -118,6 +133,8 @@ def get_llm(
118
133
  return get_openai_llm(llm.model, temperature)
119
134
  elif llm.provider == "google":
120
135
  return get_google_llm(llm.model, temperature)
136
+ elif llm.provider == "vertexai":
137
+ return get_vertex_llm(llm.model, temperature)
121
138
  elif llm.provider == "openrouter":
122
139
  return get_openrouter_llm(llm.model, temperature)
123
140
  elif llm.provider == "xai":
@@ -19,7 +19,7 @@ from minitap.mobile_use.tools.mobile.tap import tap_wrapper
19
19
  from minitap.mobile_use.tools.mobile.wait_for_animation_to_end import (
20
20
  wait_for_animation_to_end_wrapper,
21
21
  )
22
- from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
22
+ from minitap.mobile_use.tools.tool_wrapper import CompositeToolWrapper, ToolWrapper
23
23
 
24
24
  EXECUTOR_WRAPPERS_TOOLS = [
25
25
  back_wrapper,
@@ -41,18 +41,24 @@ EXECUTOR_WRAPPERS_TOOLS = [
41
41
  ]
42
42
 
43
43
 
44
- def get_tools_from_wrappers(ctx: MobileUseContext, wrappers: list[ToolWrapper]) -> list[BaseTool]:
45
- """Get the tools from the wrappers."""
46
- return [wrapper.tool_fn_getter(ctx) for wrapper in wrappers]
44
+ def get_tools_from_wrappers(
45
+ ctx: "MobileUseContext",
46
+ wrappers: list[ToolWrapper],
47
+ ) -> list[BaseTool]:
48
+ tools: list[BaseTool] = []
49
+ for wrapper in wrappers:
50
+ if ctx.llm_config.get_agent("executor").provider == "vertexai":
51
+ # The main swipe tool argument structure is not supported by vertexai, we need to split
52
+ # this tool into multiple tools
53
+ if wrapper.tool_fn_getter == swipe_wrapper.tool_fn_getter and isinstance(
54
+ wrapper, CompositeToolWrapper
55
+ ):
56
+ tools.extend(wrapper.composite_tools_fn_getter(ctx))
57
+ continue
47
58
 
48
-
49
- def format_tools_list(ctx: MobileUseContext, wrappers: list[ToolWrapper]) -> str:
50
- return "\n".join([tool.name for tool in get_tools_from_wrappers(ctx, wrappers)])
59
+ tools.append(wrapper.tool_fn_getter(ctx))
60
+ return tools
51
61
 
52
62
 
53
- def get_tool_wrapper_from_name(name: str) -> ToolWrapper | None:
54
- """Get the tool wrapper from the name."""
55
- for wrapper in EXECUTOR_WRAPPERS_TOOLS:
56
- if wrapper.tool_fn_getter.__name__ == f"get_{name}_tool":
57
- return wrapper
58
- return None
63
+ def format_tools_list(ctx: MobileUseContext, wrappers: list[ToolWrapper]) -> str:
64
+ return ", ".join([tool.name for tool in get_tools_from_wrappers(ctx, wrappers)])
@@ -1,18 +1,28 @@
1
+ from typing import Annotated
2
+
1
3
  from langchain_core.messages import ToolMessage
2
4
  from langchain_core.tools import tool
3
- from langchain_core.tools.base import InjectedToolCallId
5
+ from langchain_core.tools.base import BaseTool, InjectedToolCallId
4
6
  from langgraph.prebuilt import InjectedState
5
7
  from langgraph.types import Command
8
+ from pydantic import Field
9
+
6
10
  from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
7
11
  from minitap.mobile_use.context import MobileUseContext
8
- from minitap.mobile_use.controllers.mobile_command_controller import SwipeRequest
12
+ from minitap.mobile_use.controllers.mobile_command_controller import (
13
+ CoordinatesSelectorRequest,
14
+ PercentagesSelectorRequest,
15
+ SwipeDirection,
16
+ SwipeRequest,
17
+ SwipeStartEndCoordinatesRequest,
18
+ SwipeStartEndPercentagesRequest,
19
+ )
9
20
  from minitap.mobile_use.controllers.mobile_command_controller import swipe as swipe_controller
10
21
  from minitap.mobile_use.graph.state import State
11
- from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
12
- from typing import Annotated
22
+ from minitap.mobile_use.tools.tool_wrapper import CompositeToolWrapper
13
23
 
14
24
 
15
- def get_swipe_tool(ctx: MobileUseContext):
25
+ def get_swipe_tool(ctx: MobileUseContext) -> BaseTool:
16
26
  @tool
17
27
  def swipe(
18
28
  tool_call_id: Annotated[str, InjectedToolCallId],
@@ -20,9 +30,7 @@ def get_swipe_tool(ctx: MobileUseContext):
20
30
  agent_thought: str,
21
31
  swipe_request: SwipeRequest,
22
32
  ):
23
- """
24
- Swipes on the screen.
25
- """
33
+ """Swipes on the screen."""
26
34
  output = swipe_controller(ctx=ctx, swipe_request=swipe_request)
27
35
  has_failed = output is not None
28
36
  tool_message = ToolMessage(
@@ -45,8 +53,98 @@ def get_swipe_tool(ctx: MobileUseContext):
45
53
  return swipe
46
54
 
47
55
 
48
- swipe_wrapper = ToolWrapper(
56
+ def get_composite_swipe_tools(ctx: MobileUseContext) -> list[BaseTool]:
57
+ """
58
+ Returns composite swipe tools for use with Vertex AI LLMs.
59
+ Each tool handles a specific swipe mode to avoid complex Union type issues.
60
+ """
61
+
62
+ @tool
63
+ def swipe_coordinates(
64
+ agent_thought: str,
65
+ tool_call_id: Annotated[str, InjectedToolCallId],
66
+ state: Annotated[State, InjectedState],
67
+ start_x: int = Field(description="Start X coordinate in pixels"),
68
+ start_y: int = Field(description="Start Y coordinate in pixels"),
69
+ end_x: int = Field(description="End X coordinate in pixels"),
70
+ end_y: int = Field(description="End Y coordinate in pixels"),
71
+ duration: int = Field(description="Duration in ms", ge=1, le=10000, default=400),
72
+ ):
73
+ """Swipe using pixel coordinates from start position to end position."""
74
+ swipe_request = SwipeRequest(
75
+ swipe_mode=SwipeStartEndCoordinatesRequest(
76
+ start=CoordinatesSelectorRequest(x=start_x, y=start_y),
77
+ end=CoordinatesSelectorRequest(x=end_x, y=end_y),
78
+ ),
79
+ duration=duration,
80
+ )
81
+ return get_swipe_tool(ctx=ctx).invoke(
82
+ input={
83
+ "tool_call_id": tool_call_id,
84
+ "state": state,
85
+ "agent_thought": agent_thought,
86
+ "swipe_request": swipe_request,
87
+ }
88
+ )
89
+
90
+ @tool
91
+ def swipe_percentages(
92
+ agent_thought: str,
93
+ tool_call_id: Annotated[str, InjectedToolCallId],
94
+ state: Annotated[State, InjectedState],
95
+ start_x_percent: int = Field(description="Start X percent (0-100)", ge=0, le=100),
96
+ start_y_percent: int = Field(description="Start Y percent (0-100)", ge=0, le=100),
97
+ end_x_percent: int = Field(description="End X percent (0-100)", ge=0, le=100),
98
+ end_y_percent: int = Field(description="End Y percent (0-100)", ge=0, le=100),
99
+ duration: int = Field(description="Duration in ms", ge=1, le=10000, default=400),
100
+ ):
101
+ """Swipe using percentage coordinates from start position to end position."""
102
+ swipe_request = SwipeRequest(
103
+ swipe_mode=SwipeStartEndPercentagesRequest(
104
+ start=PercentagesSelectorRequest(
105
+ x_percent=start_x_percent, y_percent=start_y_percent
106
+ ),
107
+ end=PercentagesSelectorRequest(x_percent=end_x_percent, y_percent=end_y_percent),
108
+ ),
109
+ duration=duration,
110
+ )
111
+ return get_swipe_tool(ctx=ctx).invoke(
112
+ input={
113
+ "tool_call_id": tool_call_id,
114
+ "state": state,
115
+ "agent_thought": agent_thought,
116
+ "swipe_request": swipe_request,
117
+ }
118
+ )
119
+
120
+ @tool
121
+ def swipe_direction(
122
+ agent_thought: str,
123
+ tool_call_id: Annotated[str, InjectedToolCallId],
124
+ state: Annotated[State, InjectedState],
125
+ direction: SwipeDirection,
126
+ duration: int = Field(description="Duration in ms", ge=1, le=10000, default=400),
127
+ ):
128
+ """Swipe in a specific direction across the screen."""
129
+ swipe_request = SwipeRequest(
130
+ swipe_mode=direction,
131
+ duration=duration,
132
+ )
133
+ return get_swipe_tool(ctx=ctx).invoke(
134
+ input={
135
+ "tool_call_id": tool_call_id,
136
+ "state": state,
137
+ "agent_thought": agent_thought,
138
+ "swipe_request": swipe_request,
139
+ }
140
+ )
141
+
142
+ return [swipe_coordinates, swipe_percentages, swipe_direction]
143
+
144
+
145
+ swipe_wrapper = CompositeToolWrapper(
49
146
  tool_fn_getter=get_swipe_tool,
147
+ composite_tools_fn_getter=get_composite_swipe_tools,
50
148
  on_success_fn=lambda: "Swipe is successful.",
51
149
  on_failure_fn=lambda: "Failed to swipe.",
52
150
  )
@@ -2,6 +2,7 @@ from collections.abc import Callable
2
2
 
3
3
  from langchain_core.tools import BaseTool
4
4
  from pydantic import BaseModel
5
+
5
6
  from minitap.mobile_use.context import MobileUseContext
6
7
 
7
8
 
@@ -9,3 +10,7 @@ class ToolWrapper(BaseModel):
9
10
  tool_fn_getter: Callable[[MobileUseContext], BaseTool]
10
11
  on_success_fn: Callable[..., str]
11
12
  on_failure_fn: Callable[..., str]
13
+
14
+
15
+ class CompositeToolWrapper(ToolWrapper):
16
+ composite_tools_fn_getter: Callable[[MobileUseContext], list[BaseTool]]
@@ -45,12 +45,5 @@ def record_interaction(ctx: MobileUseContext, response: BaseMessage):
45
45
  return "Screenshot recorded successfully"
46
46
 
47
47
 
48
- def log_agent_thought(prefix: str, agent_thought: str):
49
- if prefix:
50
- prefix = prefix[0].upper() + prefix[1:]
51
- else:
52
- prefix = "New agent thought"
53
- logger.info(
54
- f"💭 {Fore.LIGHTMAGENTA_EX + Style.BRIGHT}{prefix}{Style.RESET_ALL}: "
55
- f"{Fore.LIGHTMAGENTA_EX}{agent_thought}{Style.RESET_ALL}"
56
- )
48
+ def log_agent_thought(agent_thought: str):
49
+ logger.info(f"💭 {Fore.LIGHTMAGENTA_EX}{agent_thought}{Style.RESET_ALL}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: minitap-mobile-use
3
- Version: 2.1.0
3
+ Version: 2.2.0
4
4
  Summary: AI-powered multi-agent system that automates real Android and iOS devices through low-level control using LangGraph.
5
5
  Author: Pierre-Louis Favreau, Jean-Pierre Lo, Nicolas Dehandschoewercker
6
6
  License: MIT License
@@ -24,11 +24,11 @@ License: MIT License
24
24
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
25
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
26
  SOFTWARE.
27
- Requires-Dist: langgraph==0.5.0
27
+ Requires-Dist: langgraph>=0.6.6
28
28
  Requires-Dist: adbutils==2.9.3
29
- Requires-Dist: langchain-google-genai==2.1.5
30
- Requires-Dist: langchain==0.3.26
31
- Requires-Dist: langchain-core==0.3.66
29
+ Requires-Dist: langchain-google-genai>=2.1.10
30
+ Requires-Dist: langchain>=0.3.27
31
+ Requires-Dist: langchain-core>=0.3.75
32
32
  Requires-Dist: jinja2==3.1.6
33
33
  Requires-Dist: python-dotenv==1.1.1
34
34
  Requires-Dist: pydantic-settings==2.10.1
@@ -42,6 +42,7 @@ Requires-Dist: fastapi==0.111.0
42
42
  Requires-Dist: uvicorn[standard]==0.30.1
43
43
  Requires-Dist: colorama>=0.4.6
44
44
  Requires-Dist: psutil>=5.9.0
45
+ Requires-Dist: langchain-google-vertexai>=2.0.28
45
46
  Requires-Dist: ruff==0.5.3 ; extra == 'dev'
46
47
  Requires-Dist: pytest==8.4.1 ; extra == 'dev'
47
48
  Requires-Dist: pytest-cov==5.0.0 ; extra == 'dev'
@@ -69,6 +70,10 @@ Description-Content-Type: text/markdown
69
70
  <a href="https://x.com/minitap_ai?t=iRWtI497UhRGLeCKYQekig&s=09"><b>Twitter / X</b></a>
70
71
  </p>
71
72
 
73
+ [![PyPI version](https://img.shields.io/pypi/v/minitap-mobile-use.svg?color=blue)](https://pypi.org/project/minitap-mobile-use/)
74
+ [![Python Version](https://img.shields.io/badge/python-3.12%2B-blue)](https://www.python.org/downloads/)
75
+ [![License](https://img.shields.io/badge/license-MIT-blue)](https://github.com/minitap-ai/mobile-use/blob/main/LICENSE)
76
+
72
77
  </div>
73
78
 
74
79
  Mobile-use is a powerful, open-source AI agent that controls your Android or IOS device using natural language. It understands your commands and interacts with the UI to perform tasks, from sending messages to navigating complex apps.
@@ -107,11 +112,26 @@ Ready to automate your mobile experience? Follow these steps to get mobile-use u
107
112
 
108
113
  2. **(Optional) Customize LLM Configuration:**
109
114
  To use different models or providers, create your own LLM configuration file.
115
+
110
116
  ```bash
111
117
  cp llm-config.override.template.jsonc llm-config.override.jsonc
112
118
  ```
119
+
113
120
  Then, edit `llm-config.override.jsonc` to fit your needs.
114
121
 
122
+ You can also use local LLMs or any other openai-api compatible providers :
123
+
124
+ 1. Set `OPENAI_BASE_URL` and `OPENAI_API_KEY` in your `.env`
125
+ 2. In your `llm-config.override.jsonc`, set `openai` as the provider for the agent nodes you want, and choose a model supported by your provider.
126
+
127
+ > [!NOTE]
128
+ > If you want to use Google Vertex AI, you must either:
129
+ >
130
+ > - Have credentials configured for your environment (gcloud, workload identity, etc…)
131
+ > - Store the path to a service account JSON file as the GOOGLE_APPLICATION_CREDENTIALS environment variable
132
+ >
133
+ > More information: - [Credential types](https://cloud.google.com/docs/authentication/application-default-credentials#GAC) - [google.auth API reference](https://googleapis.dev/python/google-auth/latest/reference/google.auth.html#module-google.auth)
134
+
115
135
  ### Quick Launch (Docker)
116
136
 
117
137
  > [!NOTE]
@@ -1,10 +1,10 @@
1
1
  minitap/mobile_use/__init__.py,sha256=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855,0
2
2
  minitap/mobile_use/agents/contextor/contextor.py,sha256=d7c98771a9173281c1660a2ee965149d4f70ecf7005cf75fa89eb1e9a9f2b660,1673
3
- minitap/mobile_use/agents/cortex/cortex.md,sha256=69358852474f65144486fc60196b17bdc5f99d2891fbb4cae26fd2331260f6ba,5598
4
- minitap/mobile_use/agents/cortex/cortex.py,sha256=232dfe2e2dbbfbc34873875ee32cf5f9d4156f6b9afc9d9d9146addd102be40c,4896
3
+ minitap/mobile_use/agents/cortex/cortex.md,sha256=dcf42ec3081d21d1652b601b00276f6f876806855f4337e0c23d99f20a32b88f,5594
4
+ minitap/mobile_use/agents/cortex/cortex.py,sha256=91d672cb6c6b3eb95ab335ec6031d2479446a8d2af087c65f0bd0300799547bb,4909
5
5
  minitap/mobile_use/agents/cortex/types.py,sha256=c33f2277752644d2185d84add03a493adaa530096d046d73366ab9121d99b946,361
6
6
  minitap/mobile_use/agents/executor/executor.md,sha256=800003af904d346ab0a6b8a205c0332930161a8c997ede584097a40e8f1ac478,2806
7
- minitap/mobile_use/agents/executor/executor.py,sha256=be92742d91296650288c12e83f8065589d8eab7aeaa977a47c34ed25879a9bb6,2771
7
+ minitap/mobile_use/agents/executor/executor.py,sha256=e740b76c9d2dfa832afbca60ac533f691963ae068c64dd97a96b2c58c80c925d,2850
8
8
  minitap/mobile_use/agents/executor/tool_node.py,sha256=2ad729ede393882460ae3d180ac1c0e1ab1688f40b2017220aad1b059f6485c5,3900
9
9
  minitap/mobile_use/agents/executor/utils.py,sha256=1a387d30047d3be05b5e550433866abe4388222b3f95d1360847870155ef8f12,368
10
10
  minitap/mobile_use/agents/hopper/hopper.md,sha256=2e9333ece8f6b76401ac2cce98ca06a025faa5dba6bacbbc344793ddf42292d0,362
@@ -17,27 +17,27 @@ minitap/mobile_use/agents/outputter/human.md,sha256=6b9b45c640b163554524b1aec4cd
17
17
  minitap/mobile_use/agents/outputter/outputter.py,sha256=0539cd1bfa307c6e24136488a0481128da17c37f20128e63388b5c4aea5aae50,2750
18
18
  minitap/mobile_use/agents/outputter/test_outputter.py,sha256=4c52988f8f29159657707a0e11500610f3987cac6390b17edec23f09ddfcc0ff,3334
19
19
  minitap/mobile_use/agents/planner/human.md,sha256=cb37be2af568918e60238eaa785837178a3ba8f8112de86850d9a62914c18314,222
20
- minitap/mobile_use/agents/planner/planner.md,sha256=41c973c8d34a6c24c81b275f725fc447d58805dcea1900f4285c5980502fc756,3337
21
- minitap/mobile_use/agents/planner/planner.py,sha256=0b8c92857847d390d98efb8d1130762883e46ea4dfc354ea3352e45450913771,2686
20
+ minitap/mobile_use/agents/planner/planner.md,sha256=dc13218df28b38fd2139803c9d013f0ba50899b2ae58545a3b8f3d2fd2aad734,3333
21
+ minitap/mobile_use/agents/planner/planner.py,sha256=cb7fa02cf5ae9e6f95646afc1c41642e5f9e58113dc17d51839f5236a33e67b7,2722
22
22
  minitap/mobile_use/agents/planner/types.py,sha256=da551df70ae4ae574670bd8d79c4da1a82be88c24216034d6841a83d85c0c43c,1392
23
23
  minitap/mobile_use/agents/planner/utils.py,sha256=11731d0382bf88ef77b38aaf75873486f293343cdd048372c57e626c652b4a22,1839
24
24
  minitap/mobile_use/agents/summarizer/summarizer.py,sha256=3e1e92f9259c040487992636786f972dffe0a38d309a8c3758c9ff1aeff2f62c,1070
25
25
  minitap/mobile_use/clients/device_hardware_client.py,sha256=9593380a7a3df32f02aa22717678c25e91367df26b1743abde9e57aec5dc2474,857
26
26
  minitap/mobile_use/clients/ios_client.py,sha256=332bf47ac01bbd5bf6178a59eea7c7a30fed944f30907caea5388178f312d36b,1452
27
27
  minitap/mobile_use/clients/screen_api_client.py,sha256=3615dc65d25c38b4d8dc5512f9adb3bcf69dca7a0298a472a6812f604a275c47,2019
28
- minitap/mobile_use/config.py,sha256=d5bd2bf71f229d57757bf8375b5caf858b4964178507ecb855942bb197646fdd,9399
28
+ minitap/mobile_use/config.py,sha256=9a2435a69c4453275439555d6af721a0c38bf7ca7ef1916598011c54c198c812,9931
29
29
  minitap/mobile_use/constants.py,sha256=3acd9d6ade5bc772e902b3473f3ba12ddd04e7306963ca2bae49d1132d89ba46,95
30
30
  minitap/mobile_use/context.py,sha256=fa4e43fb580db86c2ed707b1464d8363ead57332b40ea5688e0731ad57a40558,1747
31
31
  minitap/mobile_use/controllers/__init__.py,sha256=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855,0
32
- minitap/mobile_use/controllers/mobile_command_controller.py,sha256=1ea22f569affae3d82ea74afd37629898cf3d10b3bdf6cb1a7671d1a09f2c000,12206
32
+ minitap/mobile_use/controllers/mobile_command_controller.py,sha256=eb5f31cc6f2a711cbe3908f2561e841abe2214805475f77eeed3bc45731baddc,12217
33
33
  minitap/mobile_use/controllers/platform_specific_commands_controller.py,sha256=8b4fc30108c242da41fd998751dbfd5e6a69e2957a2dbbe5d6fc43d6b55f727e,2705
34
34
  minitap/mobile_use/graph/graph.py,sha256=c7b412e725b096eca8f212d704c3faf91d77eea4131f1fae7af1ee39bc57cdae,4269
35
35
  minitap/mobile_use/graph/state.py,sha256=cfe67d15833efa18b28a86293bc5c713ff8c777778b0552bb6e25fa7070534aa,3371
36
- minitap/mobile_use/main.py,sha256=51a826974071997d7ca1d6913e199c7844633e483db0471f3c294688be028834,3743
36
+ minitap/mobile_use/main.py,sha256=7ac4dc592e3ce72bff602d67ba2f25b9b5e45e07a316e548d7c8e73735abf43d,3725
37
37
  minitap/mobile_use/sdk/__init__.py,sha256=4e5555c0597242b9523827194a2500b9c6d7e5c04b1ccd2056c9b1f4d42a31cd,318
38
- minitap/mobile_use/sdk/agent.py,sha256=43b198cfabdffa28add7f1b77046704ee28da74da92b710dc8bb265c8df16472,20898
38
+ minitap/mobile_use/sdk/agent.py,sha256=0d598bc0fab7e4429654e38379d730a021243a0953ad8efcca721b382938be7a,20871
39
39
  minitap/mobile_use/sdk/builders/__init__.py,sha256=d6c96d39b80900a114698ef205ab5061a541f33bfa99c456d9345e5adb8ff6ff,424
40
- minitap/mobile_use/sdk/builders/agent_config_builder.py,sha256=d963ab932fa78117eef3ea4a2503c34b872f7b865c1a2e25b20e7fa5072e466b,7600
40
+ minitap/mobile_use/sdk/builders/agent_config_builder.py,sha256=c63f452350822daab86d29c44a333909b623fc7ff7bcbf74e5f9104b24630bf5,7582
41
41
  minitap/mobile_use/sdk/builders/index.py,sha256=64336ac3b3dea4673a48e95b8c5ac4196ecd5d2196380377d102593d0a1dc138,442
42
42
  minitap/mobile_use/sdk/builders/task_request_builder.py,sha256=9e6cf7afb68af986d6a81487179bb79d28f63047a068725d92996dbcbe753376,6857
43
43
  minitap/mobile_use/sdk/constants.py,sha256=436ba0700c6cf37ac0c9e3995a5f5a0d54ca87af72686eb9667a2c6a96e30f68,292
@@ -49,7 +49,7 @@ minitap/mobile_use/sdk/types/__init__.py,sha256=5dd148d83bf6261ac8ac60c994e2496b
49
49
  minitap/mobile_use/sdk/types/agent.py,sha256=390d5c642b3480f4a2203ddd28ec115c785f2576bec81e82e4db3c129399c020,2260
50
50
  minitap/mobile_use/sdk/types/exceptions.py,sha256=56ac3f749730740951448b1b0f200be21331dc0800916a87587b21e7850120a5,2288
51
51
  minitap/mobile_use/sdk/types/task.py,sha256=74743a398b63af62383528d5906824ae8aaba1e1885c75414b347623d7931f12,5837
52
- minitap/mobile_use/sdk/utils.py,sha256=493c77e43fcb58535eef43416716a3283488577c127060de5c0317d0b737b01f,945
52
+ minitap/mobile_use/sdk/utils.py,sha256=647f1f4a463c3029c3b0eb3c33f7dd778d5f5fd9d293224f5474595a60e1de6f,967
53
53
  minitap/mobile_use/servers/config.py,sha256=8a4a6bce23e2093d047a91e135e2f88627f76ac12177d071f25a3ca739b3afeb,575
54
54
  minitap/mobile_use/servers/device_hardware_bridge.py,sha256=80b93fe1bd8ea9100ac198a83f0aea2c40565a11e810acff9785bbd3f3b31f37,7174
55
55
  minitap/mobile_use/servers/device_screen_api.py,sha256=63bf866f17cde4ab97631b710080866b8427225d3857b2351ab83db38a9c5107,5064
@@ -57,8 +57,8 @@ minitap/mobile_use/servers/start_servers.py,sha256=1e86dc0fcbdf6e6570ae68c709714
57
57
  minitap/mobile_use/servers/stop_servers.py,sha256=9a3dc2eafb3c13e420248b1844694c80112be32f0d336f54ecc1015cb6f27be9,7127
58
58
  minitap/mobile_use/servers/utils.py,sha256=db5d26153a169ab141556337db3693adc1bf8522943316656bdeb05dbf95465b,394
59
59
  minitap/mobile_use/services/accessibility.py,sha256=42bcbe81b427ee6f6e82bcfe420fc40630db950bda354e3e433c2dda2e159628,3404
60
- minitap/mobile_use/services/llm.py,sha256=c3e99290431bcdcbf81cbffd08bf3abbd4a3760c20cb0873278c418df0d28b0f,3724
61
- minitap/mobile_use/tools/index.py,sha256=db61053f24beb88952cc34be37336defd8ebf5e1fec0ecf81e2439a76b29bde1,2449
60
+ minitap/mobile_use/services/llm.py,sha256=3706edcb2132709c5cb931fac86ea31908a209347b341ec7b0f1cfbb27959c66,4260
61
+ minitap/mobile_use/tools/index.py,sha256=b380fa7b756108ca3dd1f81fbe397233f742ddcecaaefa2844c2e67ced7093be,2695
62
62
  minitap/mobile_use/tools/mobile/back.py,sha256=cf1053b22c4fbeb1c219578563d6d857425dcdff08af690149c6e52a0e29c195,1792
63
63
  minitap/mobile_use/tools/mobile/clear_text.py,sha256=28e9af14cbb62be3a8221e7b1e7645e9bc22f40202d84c6047874809137cd927,9316
64
64
  minitap/mobile_use/tools/mobile/copy_text_from.py,sha256=723d7563e19d05060da0071aedfb620ce36bfb90931fcb815ab8d01be7ed9dd5,2716
@@ -71,11 +71,11 @@ minitap/mobile_use/tools/mobile/open_link.py,sha256=ad363f83b254a8e3e5969b4d8b90
71
71
  minitap/mobile_use/tools/mobile/paste_text.py,sha256=dc126297f174cdcdfc6f6d148fdb6a96ecb7813926e441ce0631e963b196b448,2101
72
72
  minitap/mobile_use/tools/mobile/press_key.py,sha256=a22d416279c33f2e843aaf28e4466a7eeae59aa690312c8866b62a3f84b57d5b,1939
73
73
  minitap/mobile_use/tools/mobile/stop_app.py,sha256=8fc1cf5682232d670270a7e909f365a795150643eac5310f042d792b79e7c0c0,2062
74
- minitap/mobile_use/tools/mobile/swipe.py,sha256=4d0da8c0db0995598a9a74ebfc5e7fdc176abc8e5277fd8d5f588de864947dda,1878
74
+ minitap/mobile_use/tools/mobile/swipe.py,sha256=23c31022bef827afbfe90c7f6a7d195e5bef57eb3972cd32b0d2aa054b4c6d26,5754
75
75
  minitap/mobile_use/tools/mobile/take_screenshot.py,sha256=8762be82e0fb55549f1271a8e4c7b25040f906d21ed19f52121a616e70eb9bb0,2271
76
76
  minitap/mobile_use/tools/mobile/tap.py,sha256=d7a3de2ddb78b051b22d7886553d75bab13562abfc8562957c15fb16dd484a0a,2297
77
77
  minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py,sha256=967d0df11dfb073dde6308761e9795e02c93ee2b0cbe17caf6edbe4a8beea28a,2493
78
- minitap/mobile_use/tools/tool_wrapper.py,sha256=166d9c4b7950e66407eb397a81d88354779ee3056a8dc7ad1bf160a3aa54580c,334
78
+ minitap/mobile_use/tools/tool_wrapper.py,sha256=f0f27beaae25a1bcfd9b72bf994de84b2e5fba9e242d8ad18a8d1a97cd7619e4,454
79
79
  minitap/mobile_use/tools/utils.py,sha256=bf6924e3d9e5338c2e04d88fd0ad9ed03a0c02058b62373025fa4b3a805712ae,2694
80
80
  minitap/mobile_use/utils/cli_helpers.py,sha256=1c53b6ea6cd2ba861302b182944c6a3a31dac27e316bca2c65cd6a3ca3256e81,1720
81
81
  minitap/mobile_use/utils/cli_selection.py,sha256=62e949bf075e984b5d23b4a9880ff2bccf8f9e0f7ccb48120030a6a82075352b,4788
@@ -85,12 +85,12 @@ minitap/mobile_use/utils/errors.py,sha256=6c5566484cff48ce1eb168c3cbe93d6e536545
85
85
  minitap/mobile_use/utils/file.py,sha256=1ca968613452a273b23e4f58460ab39f87255b02cdb6fb8ca04f4e628b346070,315
86
86
  minitap/mobile_use/utils/logger.py,sha256=011fe08c39b111997ec685b9f0b378761607e35ac049234b5e86c2b58f29bbe3,5633
87
87
  minitap/mobile_use/utils/media.py,sha256=29fa7f009a0f2bd60de2a9eba4a90e6eecc91cc67c2736b753506d48ab2bf5eb,2228
88
- minitap/mobile_use/utils/recorder.py,sha256=83dcb953b9caf00bf3e4853c3eee447166e1279e7ceefbbeb25d93e932ec698d,1937
88
+ minitap/mobile_use/utils/recorder.py,sha256=f90c511a182553684fc4c51195dec6859c7867b83b66bd56dd13f0cdcab2b3ae,1724
89
89
  minitap/mobile_use/utils/requests_utils.py,sha256=5c3a2e2aff7c521cd6a43b74c084e2244e1ff55065a5b722e6e251c6419861fd,1168
90
90
  minitap/mobile_use/utils/shell_utils.py,sha256=b35ae7f863379adb86c9ba0f9b3b9d4954118d12aef1ffed0bc260b32d73d857,650
91
91
  minitap/mobile_use/utils/time.py,sha256=41bfaabb3751de11443ccb4a3f1f53d5ebacc7744c72e32695fdcc3d23f17d49,160
92
92
  minitap/mobile_use/utils/ui_hierarchy.py,sha256=b52acd081ac18169dab94b7187c6ffed4db72d4e4b7f56aeca9ef5b81166c4e0,3630
93
- minitap_mobile_use-2.1.0.dist-info/WHEEL,sha256=ab6157bc637547491fb4567cd7ddf26b04d63382916ca16c29a5c8e94c9c9ef7,79
94
- minitap_mobile_use-2.1.0.dist-info/entry_points.txt,sha256=663a29cfd551a4eaa0f27335f0bd7e4a732a4e39c76b68ef5c8dc444d4a285fa,60
95
- minitap_mobile_use-2.1.0.dist-info/METADATA,sha256=95abc6db29ea323284374508c8faba3fd6cf83db998a2a45b82585af354b8ab2,10574
96
- minitap_mobile_use-2.1.0.dist-info/RECORD,,
93
+ minitap_mobile_use-2.2.0.dist-info/WHEEL,sha256=ab6157bc637547491fb4567cd7ddf26b04d63382916ca16c29a5c8e94c9c9ef7,79
94
+ minitap_mobile_use-2.2.0.dist-info/entry_points.txt,sha256=663a29cfd551a4eaa0f27335f0bd7e4a732a4e39c76b68ef5c8dc444d4a285fa,60
95
+ minitap_mobile_use-2.2.0.dist-info/METADATA,sha256=b0076a10119155863d990ff779bb901857df8b74c1321d6fef45ac6a16029db8,11825
96
+ minitap_mobile_use-2.2.0.dist-info/RECORD,,