minitap-mobile-use 2.0.0__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of minitap-mobile-use might be problematic. Click here for more details.

Files changed (74) hide show
  1. minitap/mobile_use/agents/cortex/cortex.md +19 -10
  2. minitap/mobile_use/agents/cortex/cortex.py +15 -2
  3. minitap/mobile_use/agents/cortex/types.py +2 -4
  4. minitap/mobile_use/agents/executor/executor.md +20 -15
  5. minitap/mobile_use/agents/executor/executor.py +6 -18
  6. minitap/mobile_use/agents/executor/tool_node.py +105 -0
  7. minitap/mobile_use/agents/hopper/hopper.md +2 -10
  8. minitap/mobile_use/agents/hopper/hopper.py +4 -9
  9. minitap/mobile_use/agents/orchestrator/human.md +3 -4
  10. minitap/mobile_use/agents/orchestrator/orchestrator.md +25 -7
  11. minitap/mobile_use/agents/orchestrator/orchestrator.py +56 -56
  12. minitap/mobile_use/agents/orchestrator/types.py +5 -8
  13. minitap/mobile_use/agents/outputter/outputter.py +1 -2
  14. minitap/mobile_use/agents/planner/planner.md +25 -15
  15. minitap/mobile_use/agents/planner/planner.py +7 -1
  16. minitap/mobile_use/agents/planner/types.py +10 -5
  17. minitap/mobile_use/agents/planner/utils.py +11 -0
  18. minitap/mobile_use/agents/summarizer/summarizer.py +2 -1
  19. minitap/mobile_use/clients/device_hardware_client.py +3 -0
  20. minitap/mobile_use/config.py +16 -14
  21. minitap/mobile_use/constants.py +1 -0
  22. minitap/mobile_use/context.py +3 -4
  23. minitap/mobile_use/controllers/mobile_command_controller.py +37 -26
  24. minitap/mobile_use/controllers/platform_specific_commands_controller.py +3 -4
  25. minitap/mobile_use/graph/graph.py +10 -31
  26. minitap/mobile_use/graph/state.py +34 -14
  27. minitap/mobile_use/main.py +11 -8
  28. minitap/mobile_use/sdk/agent.py +78 -63
  29. minitap/mobile_use/sdk/builders/agent_config_builder.py +23 -11
  30. minitap/mobile_use/sdk/builders/task_request_builder.py +9 -9
  31. minitap/mobile_use/sdk/examples/smart_notification_assistant.py +1 -2
  32. minitap/mobile_use/sdk/types/agent.py +10 -5
  33. minitap/mobile_use/sdk/types/task.py +19 -18
  34. minitap/mobile_use/sdk/utils.py +1 -1
  35. minitap/mobile_use/servers/config.py +1 -2
  36. minitap/mobile_use/servers/device_hardware_bridge.py +3 -4
  37. minitap/mobile_use/servers/start_servers.py +4 -4
  38. minitap/mobile_use/servers/stop_servers.py +12 -18
  39. minitap/mobile_use/services/llm.py +4 -2
  40. minitap/mobile_use/tools/index.py +11 -7
  41. minitap/mobile_use/tools/mobile/back.py +8 -12
  42. minitap/mobile_use/tools/mobile/clear_text.py +277 -0
  43. minitap/mobile_use/tools/mobile/copy_text_from.py +8 -12
  44. minitap/mobile_use/tools/mobile/erase_one_char.py +56 -0
  45. minitap/mobile_use/tools/mobile/find_packages.py +69 -0
  46. minitap/mobile_use/tools/mobile/input_text.py +55 -32
  47. minitap/mobile_use/tools/mobile/launch_app.py +8 -12
  48. minitap/mobile_use/tools/mobile/long_press_on.py +9 -13
  49. minitap/mobile_use/tools/mobile/open_link.py +8 -12
  50. minitap/mobile_use/tools/mobile/paste_text.py +8 -12
  51. minitap/mobile_use/tools/mobile/press_key.py +8 -12
  52. minitap/mobile_use/tools/mobile/stop_app.py +9 -13
  53. minitap/mobile_use/tools/mobile/swipe.py +8 -12
  54. minitap/mobile_use/tools/mobile/take_screenshot.py +8 -12
  55. minitap/mobile_use/tools/mobile/tap.py +9 -13
  56. minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +9 -13
  57. minitap/mobile_use/tools/tool_wrapper.py +1 -23
  58. minitap/mobile_use/tools/utils.py +86 -0
  59. minitap/mobile_use/utils/cli_helpers.py +1 -2
  60. minitap/mobile_use/utils/cli_selection.py +5 -6
  61. minitap/mobile_use/utils/decorators.py +21 -20
  62. minitap/mobile_use/utils/logger.py +3 -4
  63. minitap/mobile_use/utils/media.py +1 -1
  64. minitap/mobile_use/utils/recorder.py +11 -10
  65. minitap/mobile_use/utils/ui_hierarchy.py +98 -3
  66. {minitap_mobile_use-2.0.0.dist-info → minitap_mobile_use-2.1.0.dist-info}/METADATA +12 -2
  67. minitap_mobile_use-2.1.0.dist-info/RECORD +96 -0
  68. minitap/mobile_use/agents/executor/executor_context_cleaner.py +0 -27
  69. minitap/mobile_use/tools/mobile/erase_text.py +0 -124
  70. minitap/mobile_use/tools/mobile/list_packages.py +0 -78
  71. minitap/mobile_use/tools/mobile/run_flow.py +0 -57
  72. minitap_mobile_use-2.0.0.dist-info/RECORD +0 -95
  73. {minitap_mobile_use-2.0.0.dist-info → minitap_mobile_use-2.1.0.dist-info}/WHEEL +0 -0
  74. {minitap_mobile_use-2.0.0.dist-info → minitap_mobile_use-2.1.0.dist-info}/entry_points.txt +0 -0
@@ -1,18 +1,17 @@
1
- from typing import Optional
2
-
3
1
  from langchain_core.messages import ToolMessage
4
2
  from langchain_core.tools import tool
5
3
  from langchain_core.tools.base import InjectedToolCallId
6
4
  from langgraph.prebuilt import InjectedState
7
5
  from langgraph.types import Command
6
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
8
7
  from minitap.mobile_use.context import MobileUseContext
9
8
  from minitap.mobile_use.controllers.mobile_command_controller import (
10
9
  take_screenshot as take_screenshot_controller,
11
10
  )
12
11
  from minitap.mobile_use.graph.state import State
13
- from minitap.mobile_use.tools.tool_wrapper import ExecutorMetadata, ToolWrapper
12
+ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
14
13
  from minitap.mobile_use.utils.media import compress_base64_jpeg
15
- from typing_extensions import Annotated
14
+ from typing import Annotated
16
15
 
17
16
 
18
17
  def get_take_screenshot_tool(ctx: MobileUseContext):
@@ -21,7 +20,6 @@ def get_take_screenshot_tool(ctx: MobileUseContext):
21
20
  tool_call_id: Annotated[str, InjectedToolCallId],
22
21
  state: Annotated[State, InjectedState],
23
22
  agent_thought: str,
24
- executor_metadata: Optional[ExecutorMetadata],
25
23
  ):
26
24
  """
27
25
  Take a screenshot of the device.
@@ -42,21 +40,19 @@ def get_take_screenshot_tool(ctx: MobileUseContext):
42
40
  if has_failed
43
41
  else take_screenshot_wrapper.on_success_fn(),
44
42
  additional_kwargs={"error": output} if has_failed else {},
43
+ status="error" if has_failed else "success",
45
44
  )
46
45
  updates = {
47
46
  "agents_thoughts": [agent_thought],
48
- "messages": [tool_message],
47
+ EXECUTOR_MESSAGES_KEY: [tool_message],
49
48
  }
50
49
  if compressed_image_base64:
51
50
  updates["latest_screenshot_base64"] = compressed_image_base64
52
51
  return Command(
53
- update=take_screenshot_wrapper.handle_executor_state_fields(
52
+ update=state.sanitize_update(
54
53
  ctx=ctx,
55
- state=state,
56
- executor_metadata=executor_metadata,
57
- tool_message=tool_message,
58
- is_failure=has_failed,
59
- updates=updates,
54
+ update=updates,
55
+ agent="executor",
60
56
  ),
61
57
  )
62
58
 
@@ -1,16 +1,15 @@
1
- from typing import Optional
2
-
3
1
  from langchain_core.messages import ToolMessage
4
2
  from langchain_core.tools import tool
5
3
  from langchain_core.tools.base import InjectedToolCallId
6
4
  from langgraph.prebuilt import InjectedState
7
5
  from langgraph.types import Command
6
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
8
7
  from minitap.mobile_use.context import MobileUseContext
9
8
  from minitap.mobile_use.controllers.mobile_command_controller import SelectorRequest
10
9
  from minitap.mobile_use.controllers.mobile_command_controller import tap as tap_controller
11
10
  from minitap.mobile_use.graph.state import State
12
- from minitap.mobile_use.tools.tool_wrapper import ExecutorMetadata, ToolWrapper
13
- from typing_extensions import Annotated
11
+ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
12
+ from typing import Annotated
14
13
 
15
14
 
16
15
  def get_tap_tool(ctx: MobileUseContext):
@@ -19,9 +18,8 @@ def get_tap_tool(ctx: MobileUseContext):
19
18
  tool_call_id: Annotated[str, InjectedToolCallId],
20
19
  state: Annotated[State, InjectedState],
21
20
  agent_thought: str,
22
- executor_metadata: Optional[ExecutorMetadata],
23
21
  selector_request: SelectorRequest,
24
- index: Optional[int] = None,
22
+ index: int | None = None,
25
23
  ):
26
24
  """
27
25
  Taps on a selector.
@@ -35,18 +33,16 @@ def get_tap_tool(ctx: MobileUseContext):
35
33
  if has_failed
36
34
  else tap_wrapper.on_success_fn(selector_request, index),
37
35
  additional_kwargs={"error": output} if has_failed else {},
36
+ status="error" if has_failed else "success",
38
37
  )
39
38
  return Command(
40
- update=tap_wrapper.handle_executor_state_fields(
39
+ update=state.sanitize_update(
41
40
  ctx=ctx,
42
- state=state,
43
- executor_metadata=executor_metadata,
44
- tool_message=tool_message,
45
- is_failure=has_failed,
46
- updates={
41
+ update={
47
42
  "agents_thoughts": [agent_thought],
48
- "messages": [tool_message],
43
+ EXECUTOR_MESSAGES_KEY: [tool_message],
49
44
  },
45
+ agent="executor",
50
46
  ),
51
47
  )
52
48
 
@@ -1,18 +1,17 @@
1
- from typing import Optional
2
-
3
1
  from langchain_core.messages import ToolMessage
4
2
  from langchain_core.tools import tool
5
3
  from langchain_core.tools.base import InjectedToolCallId
6
4
  from langgraph.prebuilt import InjectedState
7
5
  from langgraph.types import Command
6
+ from minitap.mobile_use.constants import EXECUTOR_MESSAGES_KEY
8
7
  from minitap.mobile_use.context import MobileUseContext
9
8
  from minitap.mobile_use.controllers.mobile_command_controller import WaitTimeout
10
9
  from minitap.mobile_use.controllers.mobile_command_controller import (
11
10
  wait_for_animation_to_end as wait_for_animation_to_end_controller,
12
11
  )
13
12
  from minitap.mobile_use.graph.state import State
14
- from minitap.mobile_use.tools.tool_wrapper import ExecutorMetadata, ToolWrapper
15
- from typing_extensions import Annotated
13
+ from minitap.mobile_use.tools.tool_wrapper import ToolWrapper
14
+ from typing import Annotated
16
15
 
17
16
 
18
17
  def get_wait_for_animation_to_end_tool(ctx: MobileUseContext):
@@ -21,8 +20,7 @@ def get_wait_for_animation_to_end_tool(ctx: MobileUseContext):
21
20
  tool_call_id: Annotated[str, InjectedToolCallId],
22
21
  state: Annotated[State, InjectedState],
23
22
  agent_thought: str,
24
- executor_metadata: Optional[ExecutorMetadata],
25
- timeout: Optional[WaitTimeout],
23
+ timeout: WaitTimeout | None,
26
24
  ):
27
25
  """
28
26
  Waits for ongoing animations or videos to finish before continuing.
@@ -43,18 +41,16 @@ def get_wait_for_animation_to_end_tool(ctx: MobileUseContext):
43
41
  if has_failed
44
42
  else wait_for_animation_to_end_wrapper.on_success_fn(timeout),
45
43
  additional_kwargs={"error": output} if has_failed else {},
44
+ status="error" if has_failed else "success",
46
45
  )
47
46
  return Command(
48
- update=wait_for_animation_to_end_wrapper.handle_executor_state_fields(
47
+ update=state.sanitize_update(
49
48
  ctx=ctx,
50
- state=state,
51
- executor_metadata=executor_metadata,
52
- tool_message=tool_message,
53
- is_failure=has_failed,
54
- updates={
49
+ update={
55
50
  "agents_thoughts": [agent_thought],
56
- "messages": [tool_message],
51
+ EXECUTOR_MESSAGES_KEY: [tool_message],
57
52
  },
53
+ agent="executor",
58
54
  ),
59
55
  )
60
56
 
@@ -1,33 +1,11 @@
1
- from typing import Callable, Optional
1
+ from collections.abc import Callable
2
2
 
3
- from langchain_core.messages import ToolMessage
4
3
  from langchain_core.tools import BaseTool
5
4
  from pydantic import BaseModel
6
5
  from minitap.mobile_use.context import MobileUseContext
7
- from minitap.mobile_use.graph.state import State
8
-
9
-
10
- class ExecutorMetadata(BaseModel):
11
- retrigger: bool
12
6
 
13
7
 
14
8
  class ToolWrapper(BaseModel):
15
9
  tool_fn_getter: Callable[[MobileUseContext], BaseTool]
16
10
  on_success_fn: Callable[..., str]
17
11
  on_failure_fn: Callable[..., str]
18
-
19
- def handle_executor_state_fields(
20
- self,
21
- ctx: MobileUseContext,
22
- state: State,
23
- executor_metadata: Optional[ExecutorMetadata],
24
- is_failure: bool,
25
- tool_message: ToolMessage,
26
- updates: dict,
27
- ):
28
- if executor_metadata is None:
29
- return state.sanitize_update(ctx=ctx, update=updates)
30
- updates["executor_retrigger"] = executor_metadata.retrigger
31
- updates["executor_messages"] = [tool_message]
32
- updates["executor_failed"] = is_failure
33
- return state.sanitize_update(ctx=ctx, update=updates)
@@ -0,0 +1,86 @@
1
+ from __future__ import annotations
2
+
3
+ from minitap.mobile_use.context import MobileUseContext
4
+ from minitap.mobile_use.controllers.mobile_command_controller import (
5
+ CoordinatesSelectorRequest,
6
+ IdSelectorRequest,
7
+ SelectorRequestWithCoordinates,
8
+ tap,
9
+ )
10
+ from minitap.mobile_use.graph.state import State
11
+ from minitap.mobile_use.utils.logger import get_logger
12
+ from minitap.mobile_use.utils.ui_hierarchy import (
13
+ Point,
14
+ find_element_by_resource_id,
15
+ get_bounds_for_element,
16
+ is_element_focused,
17
+ )
18
+
19
+ logger = get_logger(__name__)
20
+
21
+
22
+ def move_cursor_to_end_if_bounds(
23
+ ctx: MobileUseContext,
24
+ state: State,
25
+ resource_id: str,
26
+ elt: dict | None = None,
27
+ ) -> dict | None:
28
+ """
29
+ Best-effort move of the text cursor near the end of the input by tapping the
30
+ bottom-right area of the focused element (if bounds are available).
31
+ """
32
+ if not elt:
33
+ elt = find_element_by_resource_id(
34
+ ui_hierarchy=state.latest_ui_hierarchy or [],
35
+ resource_id=resource_id,
36
+ )
37
+ if not elt:
38
+ return
39
+
40
+ bounds = get_bounds_for_element(elt)
41
+ if not bounds:
42
+ return elt
43
+
44
+ logger.debug("Tapping near the end of the input to move the cursor")
45
+ bottom_right: Point = bounds.get_relative_point(x_percent=0.99, y_percent=0.99)
46
+ tap(
47
+ ctx=ctx,
48
+ selector_request=SelectorRequestWithCoordinates(
49
+ coordinates=CoordinatesSelectorRequest(
50
+ x=bottom_right.x,
51
+ y=bottom_right.y,
52
+ ),
53
+ ),
54
+ )
55
+ logger.debug(f"Tapped end of input {resource_id} at ({bottom_right.x}, {bottom_right.y})")
56
+ return elt
57
+
58
+
59
+ def focus_element_if_needed(
60
+ ctx: MobileUseContext,
61
+ resource_id: str,
62
+ ) -> bool:
63
+ """
64
+ Ensures the element identified by `resource_id` is focused.
65
+ """
66
+ rich_hierarchy: list[dict] = ctx.hw_bridge_client.get_rich_hierarchy()
67
+ rich_elt = find_element_by_resource_id(
68
+ ui_hierarchy=rich_hierarchy,
69
+ resource_id=resource_id,
70
+ is_rich_hierarchy=True,
71
+ )
72
+ if rich_elt and not is_element_focused(rich_elt):
73
+ tap(ctx=ctx, selector_request=IdSelectorRequest(id=resource_id))
74
+ logger.debug(f"Focused (tap) on resource_id={resource_id}")
75
+ rich_hierarchy = ctx.hw_bridge_client.get_rich_hierarchy()
76
+ rich_elt = find_element_by_resource_id(
77
+ ui_hierarchy=rich_hierarchy,
78
+ resource_id=resource_id,
79
+ is_rich_hierarchy=True,
80
+ )
81
+ if rich_elt and is_element_focused(rich_elt):
82
+ logger.debug(f"Text input is focused: {resource_id}")
83
+ return True
84
+
85
+ logger.warning(f"Failed to focus resource_id={resource_id}")
86
+ return False
@@ -3,10 +3,9 @@ import sys
3
3
  from minitap.mobile_use.clients.ios_client import get_ios_devices
4
4
  from adbutils import AdbClient
5
5
  from rich.console import Console
6
- from typing import Optional
7
6
 
8
7
 
9
- def display_device_status(console: Console, adb_client: Optional[AdbClient] = None):
8
+ def display_device_status(console: Console, adb_client: AdbClient | None = None):
10
9
  """Checks for connected devices and displays the status."""
11
10
  console.print("\n[bold]📱 Device Status[/bold]")
12
11
  devices = None
@@ -1,5 +1,4 @@
1
1
  import sys
2
- from typing import List, Optional
3
2
 
4
3
  import inquirer
5
4
  from rich.console import Console
@@ -8,12 +7,12 @@ from rich.prompt import Prompt
8
7
 
9
8
  def select_provider_and_model(
10
9
  console: Console,
11
- available_providers: List[str],
10
+ available_providers: list[str],
12
11
  available_models: dict,
13
12
  default_provider: str,
14
13
  default_model: str,
15
- provider: Optional[str] = None,
16
- model: Optional[str] = None,
14
+ provider: str | None = None,
15
+ model: str | None = None,
17
16
  ) -> tuple[str, str]:
18
17
  """
19
18
  Interactive selection of LLM provider and model with arrow-key dropdowns when available.
@@ -71,7 +70,7 @@ def select_provider_and_model(
71
70
  def _select_from_list(
72
71
  console: Console,
73
72
  item_type: str,
74
- choices: List[str],
73
+ choices: list[str],
75
74
  default: str,
76
75
  message: str,
77
76
  ) -> str:
@@ -108,7 +107,7 @@ def _select_from_list(
108
107
  return _numbered_selection(console, item_type, choices, default)
109
108
 
110
109
 
111
- def _numbered_selection(console: Console, item_type: str, choices: List[str], default: str) -> str:
110
+ def _numbered_selection(console: Console, item_type: str, choices: list[str], default: str) -> str:
112
111
  """Fallback numbered selection when arrow keys aren't available."""
113
112
  choices_text = "\n".join([f" {i + 1}. {choice}" for i, choice in enumerate(choices)])
114
113
  console.print(f"Available {item_type}s:\n{choices_text}")
@@ -1,6 +1,7 @@
1
1
  import asyncio
2
2
  from functools import wraps
3
- from typing import Any, Awaitable, Callable, Optional, TypeVar, cast, overload
3
+ from typing import Any, TypeVar, cast, overload
4
+ from collections.abc import Awaitable, Callable
4
5
 
5
6
  R = TypeVar("R")
6
7
 
@@ -8,9 +9,9 @@ R = TypeVar("R")
8
9
  def wrap_with_callbacks_sync(
9
10
  fn: Callable[..., R],
10
11
  *,
11
- before: Optional[Callable[..., None]] = None,
12
- on_success: Optional[Callable[[R], None]] = None,
13
- on_failure: Optional[Callable[[Exception], None]] = None,
12
+ before: Callable[..., None] | None = None,
13
+ on_success: Callable[[R], None] | None = None,
14
+ on_failure: Callable[[Exception], None] | None = None,
14
15
  suppress_exceptions: bool = False,
15
16
  ) -> Callable[..., R]:
16
17
  @wraps(fn)
@@ -35,9 +36,9 @@ def wrap_with_callbacks_sync(
35
36
  def wrap_with_callbacks_async(
36
37
  fn: Callable[..., Awaitable[R]],
37
38
  *,
38
- before: Optional[Callable[..., None]] = None,
39
- on_success: Optional[Callable[[R], None]] = None,
40
- on_failure: Optional[Callable[[Exception], None]] = None,
39
+ before: Callable[..., None] | None = None,
40
+ on_success: Callable[[R], None] | None = None,
41
+ on_failure: Callable[[Exception], None] | None = None,
41
42
  suppress_exceptions: bool = False,
42
43
  ) -> Callable[..., Awaitable[R]]:
43
44
  @wraps(fn)
@@ -63,9 +64,9 @@ def wrap_with_callbacks_async(
63
64
  def wrap_with_callbacks(
64
65
  fn: Callable[..., Awaitable[R]],
65
66
  *,
66
- before: Optional[Callable[[], None]] = ...,
67
- on_success: Optional[Callable[[R], None]] = ...,
68
- on_failure: Optional[Callable[[Exception], None]] = ...,
67
+ before: Callable[[], None] | None = ...,
68
+ on_success: Callable[[R], None] | None = ...,
69
+ on_failure: Callable[[Exception], None] | None = ...,
69
70
  suppress_exceptions: bool = ...,
70
71
  ) -> Callable[..., Awaitable[R]]: ...
71
72
 
@@ -73,9 +74,9 @@ def wrap_with_callbacks(
73
74
  @overload
74
75
  def wrap_with_callbacks(
75
76
  *,
76
- before: Optional[Callable[..., None]] = ...,
77
- on_success: Optional[Callable[[Any], None]] = ...,
78
- on_failure: Optional[Callable[[Exception], None]] = ...,
77
+ before: Callable[..., None] | None = ...,
78
+ on_success: Callable[[Any], None] | None = ...,
79
+ on_failure: Callable[[Exception], None] | None = ...,
79
80
  suppress_exceptions: bool = ...,
80
81
  ) -> Callable[[Callable[..., R]], Callable[..., R]]: ...
81
82
 
@@ -84,19 +85,19 @@ def wrap_with_callbacks(
84
85
  def wrap_with_callbacks(
85
86
  fn: Callable[..., R],
86
87
  *,
87
- before: Optional[Callable[[], None]] = ...,
88
- on_success: Optional[Callable[[R], None]] = ...,
89
- on_failure: Optional[Callable[[Exception], None]] = ...,
88
+ before: Callable[[], None] | None = ...,
89
+ on_success: Callable[[R], None] | None = ...,
90
+ on_failure: Callable[[Exception], None] | None = ...,
90
91
  suppress_exceptions: bool = ...,
91
92
  ) -> Callable[..., R]: ...
92
93
 
93
94
 
94
95
  def wrap_with_callbacks(
95
- fn: Optional[Callable[..., Any]] = None,
96
+ fn: Callable[..., Any] | None = None,
96
97
  *,
97
- before: Optional[Callable[[], None]] = None,
98
- on_success: Optional[Callable[[Any], None]] = None,
99
- on_failure: Optional[Callable[[Exception], None]] = None,
98
+ before: Callable[[], None] | None = None,
99
+ on_success: Callable[[Any], None] | None = None,
100
+ on_failure: Callable[[Exception], None] | None = None,
100
101
  suppress_exceptions: bool = False,
101
102
  ) -> Any:
102
103
  def decorator(func: Callable[..., Any]) -> Any:
@@ -2,7 +2,6 @@ import logging
2
2
  import sys
3
3
  from enum import Enum
4
4
  from pathlib import Path
5
- from typing import Optional, Union
6
5
 
7
6
  from colorama import Fore, Style, init
8
7
 
@@ -22,7 +21,7 @@ class MobileUseLogger:
22
21
  def __init__(
23
22
  self,
24
23
  name: str,
25
- log_file: Optional[Union[str, Path]] = None,
24
+ log_file: str | Path | None = None,
26
25
  console_level: str = "INFO",
27
26
  file_level: str = "DEBUG",
28
27
  enable_file_logging: bool = True,
@@ -57,7 +56,7 @@ class MobileUseLogger:
57
56
 
58
57
  self.logger.addHandler(console_handler)
59
58
 
60
- def _setup_file_handler(self, log_file: Optional[Union[str, Path]], level: str):
59
+ def _setup_file_handler(self, log_file: str | Path | None, level: str):
61
60
  if log_file is None:
62
61
  log_file = Path("logs") / f"{self.name.replace('.', '_')}.log"
63
62
 
@@ -118,7 +117,7 @@ _loggers = {}
118
117
 
119
118
  def get_logger(
120
119
  name: str,
121
- log_file: Optional[Union[str, Path]] = None,
120
+ log_file: str | Path | None = None,
122
121
  console_level: str = "INFO",
123
122
  file_level: str = "DEBUG",
124
123
  enable_file_logging: bool = False,
@@ -55,7 +55,7 @@ def create_steps_json_from_trace_folder(trace_folder_path: Path):
55
55
  steps = []
56
56
  for file in trace_folder_path.iterdir():
57
57
  if file.suffix == ".json":
58
- with open(file, "r", encoding="utf-8", errors="ignore") as f:
58
+ with open(file, encoding="utf-8", errors="ignore") as f:
59
59
  json_content = f.read()
60
60
  steps.append({"timestamp": int(file.stem), "data": json_content})
61
61
 
@@ -1,9 +1,9 @@
1
1
  import base64
2
2
  import time
3
- from pathlib import Path
4
3
 
4
+ from colorama import Fore, Style
5
5
  from langchain_core.messages import BaseMessage
6
- from minitap.mobile_use.config import record_events
6
+
7
7
  from minitap.mobile_use.context import MobileUseContext
8
8
  from minitap.mobile_use.controllers.mobile_command_controller import take_screenshot
9
9
  from minitap.mobile_use.utils.logger import get_logger
@@ -45,11 +45,12 @@ def record_interaction(ctx: MobileUseContext, response: BaseMessage):
45
45
  return "Screenshot recorded successfully"
46
46
 
47
47
 
48
- def log_agent_thoughts(agents_thoughts: list[str], output_path: Path | None):
49
- if len(agents_thoughts) > 0:
50
- last_agents_thoughts = agents_thoughts[-1]
51
- previous_last_agents_thoughts = agents_thoughts[-2] if len(agents_thoughts) > 1 else None
52
- if previous_last_agents_thoughts != last_agents_thoughts:
53
- logger.info(f"💭 {last_agents_thoughts}")
54
- if output_path:
55
- record_events(output_path=output_path, events=agents_thoughts)
48
+ def log_agent_thought(prefix: str, agent_thought: str):
49
+ if prefix:
50
+ prefix = prefix[0].upper() + prefix[1:]
51
+ else:
52
+ prefix = "New agent thought"
53
+ logger.info(
54
+ f"💭 {Fore.LIGHTMAGENTA_EX + Style.BRIGHT}{prefix}{Style.RESET_ALL}: "
55
+ f"{Fore.LIGHTMAGENTA_EX}{agent_thought}{Style.RESET_ALL}"
56
+ )
@@ -1,7 +1,47 @@
1
- from typing import Optional
1
+ from pydantic import BaseModel
2
2
 
3
+ from minitap.mobile_use.utils.logger import get_logger
3
4
 
4
- def find_element_by_resource_id(ui_hierarchy: list[dict], resource_id: str) -> Optional[dict]:
5
+ logger = get_logger(__name__)
6
+
7
+
8
+ def __find_element_by_ressource_id_in_rich_hierarchy(
9
+ hierarchy: list[dict], resource_id: str
10
+ ) -> dict | None:
11
+ """
12
+ Retrieves all the sibling elements for a given resource ID from a nested dictionary.
13
+
14
+ Args:
15
+ hierarchy (dict): The nested dictionary representing the UI hierarchy.
16
+ resource_id (str): The resource-id to find.
17
+
18
+ Returns:
19
+ list: A list of the sibling elements, or None if the resource_id is not found.
20
+ """
21
+ if not hierarchy:
22
+ return None
23
+
24
+ for child in hierarchy:
25
+ if child.get("attributes", {}).get("resource-id") == resource_id:
26
+ return child.get("attributes", {})
27
+
28
+ for child in hierarchy:
29
+ result = __find_element_by_ressource_id_in_rich_hierarchy(
30
+ child.get("children", []), resource_id
31
+ )
32
+ if result is not None:
33
+ return result
34
+
35
+ return None
36
+
37
+
38
+ def text_input_is_empty(text: str | None, hint_text: str | None) -> bool:
39
+ return not text or text == hint_text
40
+
41
+
42
+ def find_element_by_resource_id(
43
+ ui_hierarchy: list[dict], resource_id: str, is_rich_hierarchy: bool = False
44
+ ) -> dict | None:
5
45
  """
6
46
  Find a UI element by its resource-id in the UI hierarchy.
7
47
 
@@ -13,8 +53,10 @@ def find_element_by_resource_id(ui_hierarchy: list[dict], resource_id: str) -> O
13
53
  Returns:
14
54
  The complete UI element dictionary if found, None otherwise
15
55
  """
56
+ if is_rich_hierarchy:
57
+ return __find_element_by_ressource_id_in_rich_hierarchy(ui_hierarchy, resource_id)
16
58
 
17
- def search_recursive(elements: list[dict]) -> Optional[dict]:
59
+ def search_recursive(elements: list[dict]) -> dict | None:
18
60
  for element in elements:
19
61
  if isinstance(element, dict):
20
62
  if element.get("resourceId") == resource_id:
@@ -28,3 +70,56 @@ def find_element_by_resource_id(ui_hierarchy: list[dict], resource_id: str) -> O
28
70
  return None
29
71
 
30
72
  return search_recursive(ui_hierarchy)
73
+
74
+
75
+ def is_element_focused(element: dict) -> bool:
76
+ return element.get("focused", None) == "true"
77
+
78
+
79
+ def get_element_text(element: dict, hint_text: bool = False) -> str | None:
80
+ if hint_text:
81
+ return element.get("hintText", None)
82
+ return element.get("text", None)
83
+
84
+
85
+ class Point(BaseModel):
86
+ x: int
87
+ y: int
88
+
89
+
90
+ class ElementBounds(BaseModel):
91
+ x: int
92
+ y: int
93
+ width: int
94
+ height: int
95
+
96
+ def get_center(self) -> Point:
97
+ return Point(x=self.x + self.width // 2, y=self.y + self.height // 2)
98
+
99
+ def get_relative_point(self, x_percent: float, y_percent: float) -> Point:
100
+ """
101
+ Returns the coordinates of the point at x_percent of the width and y_percent
102
+ of the height of the element.
103
+
104
+ Ex if x_percent = 0.95 and y_percent = 0.95,
105
+ the point is at the bottom right of the element:
106
+ <------>
107
+ | |
108
+ | x|
109
+ <------>
110
+ """
111
+ return Point(
112
+ x=int((self.x + self.width) * x_percent),
113
+ y=int((self.y + self.height) * y_percent),
114
+ )
115
+
116
+
117
+ def get_bounds_for_element(element: dict) -> ElementBounds | None:
118
+ bounds = element.get("bounds", None)
119
+ if bounds:
120
+ try:
121
+ return ElementBounds(**bounds)
122
+ except Exception as e:
123
+ logger.error(f"Failed to validate bounds: {e}")
124
+ return None
125
+ return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: minitap-mobile-use
3
- Version: 2.0.0
3
+ Version: 2.1.0
4
4
  Summary: AI-powered multi-agent system that automates real Android and iOS devices through low-level control using LangGraph.
5
5
  Author: Pierre-Louis Favreau, Jean-Pierre Lo, Nicolas Dehandschoewercker
6
6
  License: MIT License
@@ -45,7 +45,7 @@ Requires-Dist: psutil>=5.9.0
45
45
  Requires-Dist: ruff==0.5.3 ; extra == 'dev'
46
46
  Requires-Dist: pytest==8.4.1 ; extra == 'dev'
47
47
  Requires-Dist: pytest-cov==5.0.0 ; extra == 'dev'
48
- Requires-Python: >=3.10
48
+ Requires-Python: >=3.12
49
49
  Project-URL: Homepage, https://minitap.ai/
50
50
  Project-URL: Source, https://github.com/minitap-ai/mobile-use
51
51
  Provides-Extra: dev
@@ -257,6 +257,16 @@ python ./src/mobile_use/main.py \
257
257
  > [!NOTE]
258
258
  > If you haven't configured a specific model, mobile-use will prompt you to choose one from the available options.
259
259
 
260
+ ## 🔎 Agentic System Overview
261
+
262
+ <div align="center">
263
+
264
+ ![Graph Visualization](doc/graph.png)
265
+
266
+ _This diagram is automatically updated from the codebase. This is our current agentic system architecture._
267
+
268
+ </div>
269
+
260
270
  ## ❤️ Contributing
261
271
 
262
272
  We love contributions! Whether you're fixing a bug, adding a feature, or improving documentation, your help is welcome. Please read our **[Contributing Guidelines](CONTRIBUTING.md)** to get started.