minitap-mobile-use 3.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. minitap/mobile_use/__init__.py +0 -0
  2. minitap/mobile_use/agents/contextor/contextor.md +55 -0
  3. minitap/mobile_use/agents/contextor/contextor.py +175 -0
  4. minitap/mobile_use/agents/contextor/types.py +36 -0
  5. minitap/mobile_use/agents/cortex/cortex.md +135 -0
  6. minitap/mobile_use/agents/cortex/cortex.py +152 -0
  7. minitap/mobile_use/agents/cortex/types.py +15 -0
  8. minitap/mobile_use/agents/executor/executor.md +42 -0
  9. minitap/mobile_use/agents/executor/executor.py +87 -0
  10. minitap/mobile_use/agents/executor/tool_node.py +152 -0
  11. minitap/mobile_use/agents/hopper/hopper.md +15 -0
  12. minitap/mobile_use/agents/hopper/hopper.py +44 -0
  13. minitap/mobile_use/agents/orchestrator/human.md +12 -0
  14. minitap/mobile_use/agents/orchestrator/orchestrator.md +21 -0
  15. minitap/mobile_use/agents/orchestrator/orchestrator.py +134 -0
  16. minitap/mobile_use/agents/orchestrator/types.py +11 -0
  17. minitap/mobile_use/agents/outputter/human.md +25 -0
  18. minitap/mobile_use/agents/outputter/outputter.py +85 -0
  19. minitap/mobile_use/agents/outputter/test_outputter.py +167 -0
  20. minitap/mobile_use/agents/planner/human.md +14 -0
  21. minitap/mobile_use/agents/planner/planner.md +126 -0
  22. minitap/mobile_use/agents/planner/planner.py +101 -0
  23. minitap/mobile_use/agents/planner/types.py +51 -0
  24. minitap/mobile_use/agents/planner/utils.py +70 -0
  25. minitap/mobile_use/agents/summarizer/summarizer.py +35 -0
  26. minitap/mobile_use/agents/video_analyzer/__init__.py +5 -0
  27. minitap/mobile_use/agents/video_analyzer/human.md +5 -0
  28. minitap/mobile_use/agents/video_analyzer/video_analyzer.md +37 -0
  29. minitap/mobile_use/agents/video_analyzer/video_analyzer.py +111 -0
  30. minitap/mobile_use/clients/browserstack_client.py +477 -0
  31. minitap/mobile_use/clients/idb_client.py +429 -0
  32. minitap/mobile_use/clients/ios_client.py +332 -0
  33. minitap/mobile_use/clients/ios_client_config.py +141 -0
  34. minitap/mobile_use/clients/ui_automator_client.py +330 -0
  35. minitap/mobile_use/clients/wda_client.py +526 -0
  36. minitap/mobile_use/clients/wda_lifecycle.py +367 -0
  37. minitap/mobile_use/config.py +413 -0
  38. minitap/mobile_use/constants.py +3 -0
  39. minitap/mobile_use/context.py +106 -0
  40. minitap/mobile_use/controllers/__init__.py +0 -0
  41. minitap/mobile_use/controllers/android_controller.py +524 -0
  42. minitap/mobile_use/controllers/controller_factory.py +46 -0
  43. minitap/mobile_use/controllers/device_controller.py +182 -0
  44. minitap/mobile_use/controllers/ios_controller.py +436 -0
  45. minitap/mobile_use/controllers/platform_specific_commands_controller.py +199 -0
  46. minitap/mobile_use/controllers/types.py +106 -0
  47. minitap/mobile_use/controllers/unified_controller.py +193 -0
  48. minitap/mobile_use/graph/graph.py +160 -0
  49. minitap/mobile_use/graph/state.py +115 -0
  50. minitap/mobile_use/main.py +309 -0
  51. minitap/mobile_use/sdk/__init__.py +12 -0
  52. minitap/mobile_use/sdk/agent.py +1294 -0
  53. minitap/mobile_use/sdk/builders/__init__.py +10 -0
  54. minitap/mobile_use/sdk/builders/agent_config_builder.py +307 -0
  55. minitap/mobile_use/sdk/builders/index.py +15 -0
  56. minitap/mobile_use/sdk/builders/task_request_builder.py +236 -0
  57. minitap/mobile_use/sdk/constants.py +1 -0
  58. minitap/mobile_use/sdk/examples/README.md +83 -0
  59. minitap/mobile_use/sdk/examples/__init__.py +1 -0
  60. minitap/mobile_use/sdk/examples/app_lock_messaging.py +54 -0
  61. minitap/mobile_use/sdk/examples/platform_manual_task_example.py +67 -0
  62. minitap/mobile_use/sdk/examples/platform_minimal_example.py +48 -0
  63. minitap/mobile_use/sdk/examples/simple_photo_organizer.py +76 -0
  64. minitap/mobile_use/sdk/examples/smart_notification_assistant.py +225 -0
  65. minitap/mobile_use/sdk/examples/video_transcription_example.py +117 -0
  66. minitap/mobile_use/sdk/services/cloud_mobile.py +656 -0
  67. minitap/mobile_use/sdk/services/platform.py +434 -0
  68. minitap/mobile_use/sdk/types/__init__.py +51 -0
  69. minitap/mobile_use/sdk/types/agent.py +84 -0
  70. minitap/mobile_use/sdk/types/exceptions.py +138 -0
  71. minitap/mobile_use/sdk/types/platform.py +183 -0
  72. minitap/mobile_use/sdk/types/task.py +269 -0
  73. minitap/mobile_use/sdk/utils.py +29 -0
  74. minitap/mobile_use/services/accessibility.py +100 -0
  75. minitap/mobile_use/services/llm.py +247 -0
  76. minitap/mobile_use/services/telemetry.py +421 -0
  77. minitap/mobile_use/tools/index.py +67 -0
  78. minitap/mobile_use/tools/mobile/back.py +52 -0
  79. minitap/mobile_use/tools/mobile/erase_one_char.py +56 -0
  80. minitap/mobile_use/tools/mobile/focus_and_clear_text.py +317 -0
  81. minitap/mobile_use/tools/mobile/focus_and_input_text.py +153 -0
  82. minitap/mobile_use/tools/mobile/launch_app.py +86 -0
  83. minitap/mobile_use/tools/mobile/long_press_on.py +169 -0
  84. minitap/mobile_use/tools/mobile/open_link.py +62 -0
  85. minitap/mobile_use/tools/mobile/press_key.py +83 -0
  86. minitap/mobile_use/tools/mobile/stop_app.py +62 -0
  87. minitap/mobile_use/tools/mobile/swipe.py +156 -0
  88. minitap/mobile_use/tools/mobile/tap.py +154 -0
  89. minitap/mobile_use/tools/mobile/video_recording.py +177 -0
  90. minitap/mobile_use/tools/mobile/wait_for_delay.py +81 -0
  91. minitap/mobile_use/tools/scratchpad.py +147 -0
  92. minitap/mobile_use/tools/test_utils.py +413 -0
  93. minitap/mobile_use/tools/tool_wrapper.py +16 -0
  94. minitap/mobile_use/tools/types.py +35 -0
  95. minitap/mobile_use/tools/utils.py +336 -0
  96. minitap/mobile_use/utils/app_launch_utils.py +173 -0
  97. minitap/mobile_use/utils/cli_helpers.py +37 -0
  98. minitap/mobile_use/utils/cli_selection.py +143 -0
  99. minitap/mobile_use/utils/conversations.py +31 -0
  100. minitap/mobile_use/utils/decorators.py +124 -0
  101. minitap/mobile_use/utils/errors.py +6 -0
  102. minitap/mobile_use/utils/file.py +13 -0
  103. minitap/mobile_use/utils/logger.py +183 -0
  104. minitap/mobile_use/utils/media.py +186 -0
  105. minitap/mobile_use/utils/recorder.py +52 -0
  106. minitap/mobile_use/utils/requests_utils.py +37 -0
  107. minitap/mobile_use/utils/shell_utils.py +20 -0
  108. minitap/mobile_use/utils/test_ui_hierarchy.py +178 -0
  109. minitap/mobile_use/utils/time.py +6 -0
  110. minitap/mobile_use/utils/ui_hierarchy.py +132 -0
  111. minitap/mobile_use/utils/video.py +281 -0
  112. minitap_mobile_use-3.3.0.dist-info/METADATA +329 -0
  113. minitap_mobile_use-3.3.0.dist-info/RECORD +115 -0
  114. minitap_mobile_use-3.3.0.dist-info/WHEEL +4 -0
  115. minitap_mobile_use-3.3.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,54 @@
1
+ import asyncio
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+ from minitap.mobile_use.sdk import Agent
6
+
7
+
8
+ class MessageResult(BaseModel):
9
+ """Structured result from messaging task."""
10
+
11
+ messages_sent: int = Field(..., description="Number of messages successfully sent")
12
+ contacts: list[str] = Field(..., description="List of contacts messaged")
13
+ success: bool = Field(..., description="Whether all messages were sent successfully")
14
+
15
+
16
+ async def main() -> None:
17
+ # Create agent with default configuration
18
+ agent = Agent()
19
+
20
+ try:
21
+ await agent.init()
22
+
23
+ # Use app lock to keep execution in WhatsApp
24
+ # This ensures the agent stays in the app and relaunches if needed
25
+ task = (
26
+ agent.new_task("Send 'Happy New Year!' message to Alice, Bob, and Charlie on WhatsApp")
27
+ .with_name("send_new_year_messages")
28
+ .with_locked_app_package("com.whatsapp") # Lock to WhatsApp
29
+ .with_output_format(MessageResult)
30
+ .with_max_steps(600) # Messaging tasks may need more steps
31
+ .build()
32
+ )
33
+
34
+ print("Sending messages with app lock enabled...")
35
+ print("The agent will stay in WhatsApp and relaunch if needed.\n")
36
+
37
+ result = await agent.run_task(request=task)
38
+
39
+ if result:
40
+ print("\n=== Messaging Complete ===")
41
+ print(f"Messages sent: {result.messages_sent}")
42
+ print(f"Contacts: {', '.join(result.contacts)}")
43
+ print(f"Success: {result.success}")
44
+ else:
45
+ print("Failed to send messages")
46
+
47
+ except Exception as e:
48
+ print(f"Error: {e}")
49
+ finally:
50
+ await agent.clean()
51
+
52
+
53
+ if __name__ == "__main__":
54
+ asyncio.run(main())
@@ -0,0 +1,67 @@
1
+ """
2
+ Platform Usage - Manual Task Creation Example
3
+
4
+ This example demonstrates how to use the mobile-use SDK with manual task creation:
5
+ - Agent with minitap_api_key
6
+ - PlatformTaskRequest with ManualTaskConfig instead of task_id
7
+ - Task configuration provided directly in code (goal, output_description)
8
+ - No need to pre-create task in platform UI
9
+
10
+ Platform Model:
11
+ - API key provides authentication and agent configuration
12
+ - ManualTaskConfig creates task on-the-fly with:
13
+ - max_steps: 400 (fixed)
14
+ - enable_remote_tracing: True (fixed)
15
+ - profile: "default" (fixed)
16
+ - goal: provided by you
17
+ - output_description: provided by you (optional)
18
+
19
+ Run:
20
+ - python src/mobile_use/sdk/examples/platform_manual_task_example.py
21
+ """
22
+
23
+ import asyncio
24
+
25
+ from minitap.mobile_use.sdk import Agent
26
+ from minitap.mobile_use.sdk.types import ManualTaskConfig, PlatformTaskRequest
27
+
28
+
29
+ async def main() -> None:
30
+ """
31
+ Main execution function demonstrating manual task creation pattern.
32
+
33
+ Visit https://platform.minitap.ai to get your API key.
34
+ Set MINITAP_API_KEY and MINITAP_BASE_URL environment variables.
35
+ """
36
+ agent = Agent()
37
+ await agent.init()
38
+
39
+ # Example 1: Simple manual task
40
+ result = await agent.run_task(
41
+ request=PlatformTaskRequest(
42
+ task=ManualTaskConfig(
43
+ goal="Open the settings app and tell me the battery level",
44
+ ),
45
+ profile="default", # Optional, defaults to "default"
46
+ )
47
+ )
48
+ print("Result 1:", result)
49
+
50
+ # Example 2: Manual task with output description
51
+ result = await agent.run_task(
52
+ request=PlatformTaskRequest(
53
+ task=ManualTaskConfig(
54
+ goal="Find the first 3 unread emails in Gmail",
55
+ output_description="A JSON array with sender and subject for each email",
56
+ ),
57
+ ),
58
+ # Lock gmail to ensure it is automatically started and locked during task execution
59
+ locked_app_package="com.google.android.gm",
60
+ )
61
+ print("Result 2:", result)
62
+
63
+ await agent.clean()
64
+
65
+
66
+ if __name__ == "__main__":
67
+ asyncio.run(main())
@@ -0,0 +1,48 @@
1
+ """
2
+ Platform Usage - Minitap SDK with API Key Example
3
+
4
+ This example demonstrates how to use the mobile-use SDK via the Minitap platform:
5
+ - Agent with minitap_api_key
6
+ - PlatformTaskRequest with platform-provided task_id
7
+ - All task configuration (goal, output format, etc.) managed by platform UI
8
+
9
+ Platform Model:
10
+ - API key provides authentication and agent configuration
11
+ - task_id references pre-configured task from platform UI
12
+ - No goal, output_format, profile selection needed in code
13
+ - Everything bound to task_id + api_key combination
14
+
15
+ Run:
16
+ - python src/mobile_use/sdk/examples/platform_minimal_example.py
17
+ """
18
+
19
+ import asyncio
20
+
21
+ from minitap.mobile_use.sdk import Agent
22
+ from minitap.mobile_use.sdk.types import PlatformTaskRequest
23
+
24
+
25
+ async def main() -> None:
26
+ """
27
+ Main execution function demonstrating minitap platform usage pattern.
28
+
29
+ Visit https://platform.minitap.ai to create a task, customize your profiles,
30
+ and get your API key.
31
+ Pass the api_key parameter to the agent.init() method
32
+ ...or set MINITAP_API_KEY environment variable.
33
+ """
34
+ agent = Agent()
35
+ await agent.init(api_key="<your-minitap-api-key>") # or set MINITAP_API_KEY env variable
36
+ result = await agent.run_task(
37
+ request=PlatformTaskRequest(
38
+ task="<your-task-name>",
39
+ profile="<your-profile-name>",
40
+ ),
41
+ locked_app_package="<locked-app-package>", # optional
42
+ )
43
+ print(result)
44
+ await agent.clean()
45
+
46
+
47
+ if __name__ == "__main__":
48
+ asyncio.run(main())
@@ -0,0 +1,76 @@
1
+ """
2
+ Simple Photo Organizer - Basic SDK Usage Example
3
+
4
+ This example demonstrates a straightforward way to use the mobile-use SDK
5
+ without builders or advanced configuration. It performs a real-world automation task:
6
+ 1. Opens the photo gallery
7
+ 2. Finds photos from a specific date
8
+ 3. Creates an album and moves those photos into it
9
+
10
+ Run:
11
+ - python src/mobile_use/sdk/examples/simple_photo_organizer.py
12
+ """
13
+
14
+ import asyncio
15
+ from datetime import date, timedelta
16
+ from pydantic import BaseModel, Field
17
+ from minitap.mobile_use.sdk import Agent
18
+
19
+
20
+ class PhotosResult(BaseModel):
21
+ """Structured result from photo search."""
22
+
23
+ found_photos: int = Field(..., description="Number of photos found")
24
+ date_range: str = Field(..., description="Date range of photos found")
25
+ album_created: bool = Field(..., description="Whether an album was created")
26
+ album_name: str = Field(..., description="Name of the created album")
27
+ photos_moved: int = Field(0, description="Number of photos moved to the album")
28
+
29
+
30
+ async def main() -> None:
31
+ # Create a simple agent with default configuration
32
+ agent = Agent()
33
+
34
+ try:
35
+ # Initialize agent (finds a device, starts required servers)
36
+ await agent.init()
37
+
38
+ # Calculate yesterday's date for the example
39
+ yesterday = date.today() - timedelta(days=1)
40
+ formatted_date = yesterday.strftime("%B %d") # e.g. "August 22"
41
+
42
+ print(f"Looking for photos from {formatted_date}...")
43
+
44
+ # First task: search for photos and organize them, with typed output
45
+ result = await agent.run_task(
46
+ goal=(
47
+ f"Open the Photos/Gallery app. Find photos taken on {formatted_date}. "
48
+ f"Create a new album named '{formatted_date} Memories' and "
49
+ f"move those photos into it. Count how many photos were moved."
50
+ ),
51
+ output=PhotosResult,
52
+ name="organize_photos",
53
+ )
54
+
55
+ # Handle and display the result
56
+ if result:
57
+ print("\n=== Photo Organization Complete ===")
58
+ print(f"Found: {result.found_photos} photos from {result.date_range}")
59
+
60
+ if result.album_created:
61
+ print(f"Created album: '{result.album_name}'")
62
+ print(f"Moved {result.photos_moved} photos to the album")
63
+ else:
64
+ print("No album was created")
65
+ else:
66
+ print("Failed to organize photos")
67
+
68
+ except Exception as e:
69
+ print(f"Error: {e}")
70
+ finally:
71
+ # Always clean up resources
72
+ await agent.clean()
73
+
74
+
75
+ if __name__ == "__main__":
76
+ asyncio.run(main())
@@ -0,0 +1,225 @@
1
+ """
2
+ Smart Notification Assistant - Intermediate SDK Usage Example
3
+
4
+ This example demonstrates more advanced SDK features including:
5
+ - TaskRequestBuilder pattern
6
+ - Multiple agent profiles for different reasoning tasks
7
+ - Tracing for debugging/visualization
8
+ - Structured output with Pydantic
9
+ - Exception handling
10
+
11
+ It performs a practical automation task:
12
+ 1. Checks notification panel for unread notifications
13
+ 2. Categorizes them by priority/app
14
+ 3. Performs actions based on notification content
15
+
16
+ Run:
17
+ - python src/mobile_use/sdk/examples/smart_notification_assistant.py
18
+ """
19
+
20
+ import asyncio
21
+ from datetime import datetime
22
+ from enum import Enum
23
+
24
+ from pydantic import BaseModel, Field
25
+
26
+ from minitap.mobile_use.config import LLM, LLMConfig, LLMConfigUtils, LLMWithFallback
27
+ from minitap.mobile_use.sdk import Agent
28
+ from minitap.mobile_use.sdk.builders import Builders
29
+ from minitap.mobile_use.sdk.types import AgentProfile
30
+ from minitap.mobile_use.sdk.types.exceptions import AgentError
31
+
32
+
33
+ class NotificationPriority(str, Enum):
34
+ HIGH = "high"
35
+ MEDIUM = "medium"
36
+ LOW = "low"
37
+
38
+
39
+ class Notification(BaseModel):
40
+ """Individual notification details."""
41
+
42
+ app_name: str = Field(..., description="Name of the app that sent the notification")
43
+ title: str = Field(..., description="Title/header of the notification")
44
+ message: str = Field(..., description="Message content of the notification")
45
+ priority: NotificationPriority = Field(
46
+ default=NotificationPriority.MEDIUM, description="Priority level of notification"
47
+ )
48
+
49
+
50
+ class NotificationSummary(BaseModel):
51
+ """Summary of all notifications."""
52
+
53
+ total_count: int = Field(..., description="Total number of notifications found")
54
+ high_priority_count: int = Field(0, description="Count of high priority notifications")
55
+ notifications: list[Notification] = Field(
56
+ default_factory=list, description="List of individual notifications"
57
+ )
58
+
59
+
60
+ def get_agent() -> Agent:
61
+ # Create two specialized profiles:
62
+ # 1. An analyzer profile for detailed inspection tasks
63
+ analyzer_profile = AgentProfile(
64
+ name="analyzer",
65
+ llm_config=LLMConfig(
66
+ planner=LLMWithFallback(
67
+ provider="openrouter",
68
+ model="meta-llama/llama-4-scout",
69
+ fallback=LLM(provider="openrouter", model="meta-llama/llama-4-maverick"),
70
+ ),
71
+ orchestrator=LLMWithFallback(
72
+ provider="openrouter",
73
+ model="meta-llama/llama-4-scout",
74
+ fallback=LLM(provider="openrouter", model="meta-llama/llama-4-maverick"),
75
+ ),
76
+ contextor=LLMWithFallback(
77
+ provider="openrouter",
78
+ model="meta-llama/llama-4-scout",
79
+ fallback=LLM(provider="openrouter", model="meta-llama/llama-4-maverick"),
80
+ ),
81
+ cortex=LLMWithFallback(
82
+ provider="openai",
83
+ model="o4-mini",
84
+ fallback=LLM(provider="openai", model="gpt-5"),
85
+ ),
86
+ executor=LLMWithFallback(
87
+ provider="openai",
88
+ model="gpt-5-nano",
89
+ fallback=LLM(provider="openai", model="gpt-5-mini"),
90
+ ),
91
+ utils=LLMConfigUtils(
92
+ outputter=LLMWithFallback(
93
+ provider="openai",
94
+ model="gpt-5-nano",
95
+ fallback=LLM(provider="openai", model="gpt-5-mini"),
96
+ ),
97
+ hopper=LLMWithFallback(
98
+ provider="openai",
99
+ model="gpt-5-nano",
100
+ fallback=LLM(provider="openai", model="gpt-5-mini"),
101
+ ),
102
+ ),
103
+ ),
104
+ # from_file="/tmp/analyzer.jsonc" # can be loaded from file
105
+ )
106
+
107
+ # 2. An action profile for handling easy & fast actions based on notifications
108
+ action_profile = AgentProfile(
109
+ name="note_taker",
110
+ llm_config=LLMConfig(
111
+ planner=LLMWithFallback(
112
+ provider="openai", model="o3", fallback=LLM(provider="openai", model="gpt-5")
113
+ ),
114
+ orchestrator=LLMWithFallback(
115
+ provider="google",
116
+ model="gemini-2.5-flash",
117
+ fallback=LLM(provider="openai", model="gpt-5"),
118
+ ),
119
+ contextor=LLMWithFallback(
120
+ provider="openai",
121
+ model="gpt-5-nano",
122
+ fallback=LLM(provider="openai", model="gpt-5-mini"),
123
+ ),
124
+ cortex=LLMWithFallback(
125
+ provider="openai",
126
+ model="o4-mini",
127
+ fallback=LLM(provider="openai", model="gpt-5"),
128
+ ),
129
+ executor=LLMWithFallback(
130
+ provider="openai",
131
+ model="gpt-4o-mini",
132
+ fallback=LLM(provider="openai", model="gpt-5-nano"),
133
+ ),
134
+ utils=LLMConfigUtils(
135
+ outputter=LLMWithFallback(
136
+ provider="openai",
137
+ model="gpt-5-nano",
138
+ fallback=LLM(provider="openai", model="gpt-5-mini"),
139
+ ),
140
+ hopper=LLMWithFallback(
141
+ provider="openai",
142
+ model="gpt-5-nano",
143
+ fallback=LLM(provider="openai", model="gpt-5-mini"),
144
+ ),
145
+ ),
146
+ ),
147
+ )
148
+
149
+ # Configure default task settings with tracing
150
+ task_defaults = Builders.TaskDefaults.with_max_steps(200).build()
151
+
152
+ # Configure the agent
153
+ config = (
154
+ Builders.AgentConfig.add_profiles(profiles=[analyzer_profile, action_profile])
155
+ .with_default_profile(profile=action_profile)
156
+ .with_default_task_config(config=task_defaults)
157
+ .build()
158
+ )
159
+ return Agent(config=config)
160
+
161
+
162
+ async def main():
163
+ # Set up traces directory with timestamp for uniqueness
164
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M")
165
+ traces_dir = f"/tmp/notification_traces/{timestamp}"
166
+ agent = get_agent()
167
+
168
+ try:
169
+ # Initialize agent (finds a device, starts required servers)
170
+ await agent.init()
171
+
172
+ print("Checking for notifications...")
173
+
174
+ # Task 1: Get and analyze notifications with analyzer profile
175
+ notification_task = (
176
+ agent.new_task(
177
+ goal="Open the notification panel (swipe down from top). "
178
+ "Scroll through the first 3 unread notifications. "
179
+ "For each notification, identify the app name, title, and content. "
180
+ "Tag messages from messaging apps or email as high priority."
181
+ )
182
+ .with_output_format(NotificationSummary)
183
+ .using_profile("analyzer")
184
+ .with_name("notification_scan")
185
+ .with_max_steps(400)
186
+ .with_trace_recording(enabled=True, path=traces_dir)
187
+ .build()
188
+ )
189
+
190
+ # Execute the task with proper exception handling
191
+ try:
192
+ notifications = await agent.run_task(request=notification_task)
193
+
194
+ # Display the structured results
195
+ if notifications:
196
+ print("\n=== Notification Summary ===")
197
+ print(f"Total notifications: {notifications.total_count}")
198
+ print(f"High priority: {notifications.high_priority_count}")
199
+
200
+ # Task 2: Create a note to store the notification summary
201
+ response = await agent.run_task(
202
+ goal="Open my Notes app and create a new note summarizing the following "
203
+ f"information:\n{notifications}",
204
+ name="email_action",
205
+ profile="note_taker",
206
+ )
207
+ print(f"Action result: {response}")
208
+
209
+ else:
210
+ print("Failed to retrieve notifications")
211
+
212
+ except AgentError as e:
213
+ print(f"Agent error occurred: {e}")
214
+ except Exception as e:
215
+ print(f"Unexpected error: {type(e).__name__}: {e}")
216
+ raise
217
+
218
+ finally:
219
+ # Clean up
220
+ await agent.clean()
221
+ print(f"\nTraces saved to: {traces_dir}")
222
+
223
+
224
+ if __name__ == "__main__":
225
+ asyncio.run(main())
@@ -0,0 +1,117 @@
1
+ """
2
+ Video Transcription Example
3
+
4
+ This example demonstrates how to use the video recording tools to capture
5
+ and analyze video content from a mobile device screen.
6
+
7
+ The agent can:
8
+ 1. Start a screen recording
9
+ 2. Perform actions while recording
10
+ 3. Stop the recording and analyze its content using Gemini models
11
+
12
+ Use case: Recording a video playing on the screen and transcribing its content.
13
+ """
14
+
15
+ import asyncio
16
+
17
+ from minitap.mobile_use.config import LLM, LLMConfig, LLMConfigUtils, LLMWithFallback
18
+ from minitap.mobile_use.sdk.agent import Agent
19
+ from minitap.mobile_use.sdk.builders.agent_config_builder import AgentConfigBuilder
20
+ from minitap.mobile_use.sdk.types.agent import AgentConfig
21
+ from minitap.mobile_use.sdk.types.task import AgentProfile, TaskRequest
22
+
23
+
24
+ def get_video_capable_llm_config() -> LLMConfig:
25
+ """
26
+ Returns an LLM config with video_analyzer configured.
27
+
28
+ The video_analyzer must use a video-capable Gemini model:
29
+ - gemini-3-flash-preview (recommended - fast and capable)
30
+ - gemini-3-pro-preview
31
+ - gemini-2.5-flash
32
+ - gemini-2.5-pro
33
+ - gemini-2.0-flash
34
+ """
35
+ return LLMConfig(
36
+ planner=LLMWithFallback(
37
+ provider="openai",
38
+ model="gpt-5-nano",
39
+ fallback=LLM(provider="openai", model="gpt-5-mini"),
40
+ ),
41
+ orchestrator=LLMWithFallback(
42
+ provider="openai",
43
+ model="gpt-5-nano",
44
+ fallback=LLM(provider="openai", model="gpt-5-mini"),
45
+ ),
46
+ contextor=LLMWithFallback(
47
+ provider="openai",
48
+ model="gpt-5-nano",
49
+ fallback=LLM(provider="openai", model="gpt-5-mini"),
50
+ ),
51
+ cortex=LLMWithFallback(
52
+ provider="openai",
53
+ model="gpt-5",
54
+ fallback=LLM(provider="openai", model="o4-mini"),
55
+ ),
56
+ executor=LLMWithFallback(
57
+ provider="openai",
58
+ model="gpt-5-nano",
59
+ fallback=LLM(provider="openai", model="gpt-5-mini"),
60
+ ),
61
+ utils=LLMConfigUtils(
62
+ outputter=LLMWithFallback(
63
+ provider="openai",
64
+ model="gpt-5-nano",
65
+ fallback=LLM(provider="openai", model="gpt-5-mini"),
66
+ ),
67
+ hopper=LLMWithFallback(
68
+ provider="openai",
69
+ model="gpt-5-nano",
70
+ fallback=LLM(provider="openai", model="gpt-5-mini"),
71
+ ),
72
+ video_analyzer=LLMWithFallback(
73
+ provider="google",
74
+ model="gemini-3-flash-preview",
75
+ fallback=LLM(provider="google", model="gemini-2.5-flash"),
76
+ ),
77
+ ),
78
+ )
79
+
80
+
81
+ async def main():
82
+ config: AgentConfig = (
83
+ AgentConfigBuilder()
84
+ .add_profile(
85
+ AgentProfile(
86
+ name="VideoCapable",
87
+ llm_config=get_video_capable_llm_config(),
88
+ )
89
+ )
90
+ .with_video_recording_tools()
91
+ .build()
92
+ )
93
+
94
+ agent = Agent(config=config)
95
+ try:
96
+ await agent.init()
97
+
98
+ result = await agent.run_task(
99
+ request=TaskRequest(
100
+ goal="""
101
+ 1. Open YouTube app
102
+ 2. Search for "Python tutorial"
103
+ 3. Start recording the screen
104
+ 4. Play the first video
105
+ 5. Wait for the first 30 seconds of the video to play
106
+ 6. Stop recording and tell me what was said in the video
107
+ """,
108
+ profile="VideoCapable",
109
+ )
110
+ )
111
+ print(f"Task result: {result}")
112
+ finally:
113
+ await agent.clean()
114
+
115
+
116
+ if __name__ == "__main__":
117
+ asyncio.run(main())