minitap-mobile-use 3.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minitap/mobile_use/__init__.py +0 -0
- minitap/mobile_use/agents/contextor/contextor.md +55 -0
- minitap/mobile_use/agents/contextor/contextor.py +175 -0
- minitap/mobile_use/agents/contextor/types.py +36 -0
- minitap/mobile_use/agents/cortex/cortex.md +135 -0
- minitap/mobile_use/agents/cortex/cortex.py +152 -0
- minitap/mobile_use/agents/cortex/types.py +15 -0
- minitap/mobile_use/agents/executor/executor.md +42 -0
- minitap/mobile_use/agents/executor/executor.py +87 -0
- minitap/mobile_use/agents/executor/tool_node.py +152 -0
- minitap/mobile_use/agents/hopper/hopper.md +15 -0
- minitap/mobile_use/agents/hopper/hopper.py +44 -0
- minitap/mobile_use/agents/orchestrator/human.md +12 -0
- minitap/mobile_use/agents/orchestrator/orchestrator.md +21 -0
- minitap/mobile_use/agents/orchestrator/orchestrator.py +134 -0
- minitap/mobile_use/agents/orchestrator/types.py +11 -0
- minitap/mobile_use/agents/outputter/human.md +25 -0
- minitap/mobile_use/agents/outputter/outputter.py +85 -0
- minitap/mobile_use/agents/outputter/test_outputter.py +167 -0
- minitap/mobile_use/agents/planner/human.md +14 -0
- minitap/mobile_use/agents/planner/planner.md +126 -0
- minitap/mobile_use/agents/planner/planner.py +101 -0
- minitap/mobile_use/agents/planner/types.py +51 -0
- minitap/mobile_use/agents/planner/utils.py +70 -0
- minitap/mobile_use/agents/summarizer/summarizer.py +35 -0
- minitap/mobile_use/agents/video_analyzer/__init__.py +5 -0
- minitap/mobile_use/agents/video_analyzer/human.md +5 -0
- minitap/mobile_use/agents/video_analyzer/video_analyzer.md +37 -0
- minitap/mobile_use/agents/video_analyzer/video_analyzer.py +111 -0
- minitap/mobile_use/clients/browserstack_client.py +477 -0
- minitap/mobile_use/clients/idb_client.py +429 -0
- minitap/mobile_use/clients/ios_client.py +332 -0
- minitap/mobile_use/clients/ios_client_config.py +141 -0
- minitap/mobile_use/clients/ui_automator_client.py +330 -0
- minitap/mobile_use/clients/wda_client.py +526 -0
- minitap/mobile_use/clients/wda_lifecycle.py +367 -0
- minitap/mobile_use/config.py +413 -0
- minitap/mobile_use/constants.py +3 -0
- minitap/mobile_use/context.py +106 -0
- minitap/mobile_use/controllers/__init__.py +0 -0
- minitap/mobile_use/controllers/android_controller.py +524 -0
- minitap/mobile_use/controllers/controller_factory.py +46 -0
- minitap/mobile_use/controllers/device_controller.py +182 -0
- minitap/mobile_use/controllers/ios_controller.py +436 -0
- minitap/mobile_use/controllers/platform_specific_commands_controller.py +199 -0
- minitap/mobile_use/controllers/types.py +106 -0
- minitap/mobile_use/controllers/unified_controller.py +193 -0
- minitap/mobile_use/graph/graph.py +160 -0
- minitap/mobile_use/graph/state.py +115 -0
- minitap/mobile_use/main.py +309 -0
- minitap/mobile_use/sdk/__init__.py +12 -0
- minitap/mobile_use/sdk/agent.py +1294 -0
- minitap/mobile_use/sdk/builders/__init__.py +10 -0
- minitap/mobile_use/sdk/builders/agent_config_builder.py +307 -0
- minitap/mobile_use/sdk/builders/index.py +15 -0
- minitap/mobile_use/sdk/builders/task_request_builder.py +236 -0
- minitap/mobile_use/sdk/constants.py +1 -0
- minitap/mobile_use/sdk/examples/README.md +83 -0
- minitap/mobile_use/sdk/examples/__init__.py +1 -0
- minitap/mobile_use/sdk/examples/app_lock_messaging.py +54 -0
- minitap/mobile_use/sdk/examples/platform_manual_task_example.py +67 -0
- minitap/mobile_use/sdk/examples/platform_minimal_example.py +48 -0
- minitap/mobile_use/sdk/examples/simple_photo_organizer.py +76 -0
- minitap/mobile_use/sdk/examples/smart_notification_assistant.py +225 -0
- minitap/mobile_use/sdk/examples/video_transcription_example.py +117 -0
- minitap/mobile_use/sdk/services/cloud_mobile.py +656 -0
- minitap/mobile_use/sdk/services/platform.py +434 -0
- minitap/mobile_use/sdk/types/__init__.py +51 -0
- minitap/mobile_use/sdk/types/agent.py +84 -0
- minitap/mobile_use/sdk/types/exceptions.py +138 -0
- minitap/mobile_use/sdk/types/platform.py +183 -0
- minitap/mobile_use/sdk/types/task.py +269 -0
- minitap/mobile_use/sdk/utils.py +29 -0
- minitap/mobile_use/services/accessibility.py +100 -0
- minitap/mobile_use/services/llm.py +247 -0
- minitap/mobile_use/services/telemetry.py +421 -0
- minitap/mobile_use/tools/index.py +67 -0
- minitap/mobile_use/tools/mobile/back.py +52 -0
- minitap/mobile_use/tools/mobile/erase_one_char.py +56 -0
- minitap/mobile_use/tools/mobile/focus_and_clear_text.py +317 -0
- minitap/mobile_use/tools/mobile/focus_and_input_text.py +153 -0
- minitap/mobile_use/tools/mobile/launch_app.py +86 -0
- minitap/mobile_use/tools/mobile/long_press_on.py +169 -0
- minitap/mobile_use/tools/mobile/open_link.py +62 -0
- minitap/mobile_use/tools/mobile/press_key.py +83 -0
- minitap/mobile_use/tools/mobile/stop_app.py +62 -0
- minitap/mobile_use/tools/mobile/swipe.py +156 -0
- minitap/mobile_use/tools/mobile/tap.py +154 -0
- minitap/mobile_use/tools/mobile/video_recording.py +177 -0
- minitap/mobile_use/tools/mobile/wait_for_delay.py +81 -0
- minitap/mobile_use/tools/scratchpad.py +147 -0
- minitap/mobile_use/tools/test_utils.py +413 -0
- minitap/mobile_use/tools/tool_wrapper.py +16 -0
- minitap/mobile_use/tools/types.py +35 -0
- minitap/mobile_use/tools/utils.py +336 -0
- minitap/mobile_use/utils/app_launch_utils.py +173 -0
- minitap/mobile_use/utils/cli_helpers.py +37 -0
- minitap/mobile_use/utils/cli_selection.py +143 -0
- minitap/mobile_use/utils/conversations.py +31 -0
- minitap/mobile_use/utils/decorators.py +124 -0
- minitap/mobile_use/utils/errors.py +6 -0
- minitap/mobile_use/utils/file.py +13 -0
- minitap/mobile_use/utils/logger.py +183 -0
- minitap/mobile_use/utils/media.py +186 -0
- minitap/mobile_use/utils/recorder.py +52 -0
- minitap/mobile_use/utils/requests_utils.py +37 -0
- minitap/mobile_use/utils/shell_utils.py +20 -0
- minitap/mobile_use/utils/test_ui_hierarchy.py +178 -0
- minitap/mobile_use/utils/time.py +6 -0
- minitap/mobile_use/utils/ui_hierarchy.py +132 -0
- minitap/mobile_use/utils/video.py +281 -0
- minitap_mobile_use-3.3.0.dist-info/METADATA +329 -0
- minitap_mobile_use-3.3.0.dist-info/RECORD +115 -0
- minitap_mobile_use-3.3.0.dist-info/WHEEL +4 -0
- minitap_mobile_use-3.3.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
|
|
5
|
+
from minitap.mobile_use.sdk import Agent
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class MessageResult(BaseModel):
|
|
9
|
+
"""Structured result from messaging task."""
|
|
10
|
+
|
|
11
|
+
messages_sent: int = Field(..., description="Number of messages successfully sent")
|
|
12
|
+
contacts: list[str] = Field(..., description="List of contacts messaged")
|
|
13
|
+
success: bool = Field(..., description="Whether all messages were sent successfully")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
async def main() -> None:
|
|
17
|
+
# Create agent with default configuration
|
|
18
|
+
agent = Agent()
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
await agent.init()
|
|
22
|
+
|
|
23
|
+
# Use app lock to keep execution in WhatsApp
|
|
24
|
+
# This ensures the agent stays in the app and relaunches if needed
|
|
25
|
+
task = (
|
|
26
|
+
agent.new_task("Send 'Happy New Year!' message to Alice, Bob, and Charlie on WhatsApp")
|
|
27
|
+
.with_name("send_new_year_messages")
|
|
28
|
+
.with_locked_app_package("com.whatsapp") # Lock to WhatsApp
|
|
29
|
+
.with_output_format(MessageResult)
|
|
30
|
+
.with_max_steps(600) # Messaging tasks may need more steps
|
|
31
|
+
.build()
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
print("Sending messages with app lock enabled...")
|
|
35
|
+
print("The agent will stay in WhatsApp and relaunch if needed.\n")
|
|
36
|
+
|
|
37
|
+
result = await agent.run_task(request=task)
|
|
38
|
+
|
|
39
|
+
if result:
|
|
40
|
+
print("\n=== Messaging Complete ===")
|
|
41
|
+
print(f"Messages sent: {result.messages_sent}")
|
|
42
|
+
print(f"Contacts: {', '.join(result.contacts)}")
|
|
43
|
+
print(f"Success: {result.success}")
|
|
44
|
+
else:
|
|
45
|
+
print("Failed to send messages")
|
|
46
|
+
|
|
47
|
+
except Exception as e:
|
|
48
|
+
print(f"Error: {e}")
|
|
49
|
+
finally:
|
|
50
|
+
await agent.clean()
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
if __name__ == "__main__":
|
|
54
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Platform Usage - Manual Task Creation Example
|
|
3
|
+
|
|
4
|
+
This example demonstrates how to use the mobile-use SDK with manual task creation:
|
|
5
|
+
- Agent with minitap_api_key
|
|
6
|
+
- PlatformTaskRequest with ManualTaskConfig instead of task_id
|
|
7
|
+
- Task configuration provided directly in code (goal, output_description)
|
|
8
|
+
- No need to pre-create task in platform UI
|
|
9
|
+
|
|
10
|
+
Platform Model:
|
|
11
|
+
- API key provides authentication and agent configuration
|
|
12
|
+
- ManualTaskConfig creates task on-the-fly with:
|
|
13
|
+
- max_steps: 400 (fixed)
|
|
14
|
+
- enable_remote_tracing: True (fixed)
|
|
15
|
+
- profile: "default" (fixed)
|
|
16
|
+
- goal: provided by you
|
|
17
|
+
- output_description: provided by you (optional)
|
|
18
|
+
|
|
19
|
+
Run:
|
|
20
|
+
- python src/mobile_use/sdk/examples/platform_manual_task_example.py
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import asyncio
|
|
24
|
+
|
|
25
|
+
from minitap.mobile_use.sdk import Agent
|
|
26
|
+
from minitap.mobile_use.sdk.types import ManualTaskConfig, PlatformTaskRequest
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
async def main() -> None:
|
|
30
|
+
"""
|
|
31
|
+
Main execution function demonstrating manual task creation pattern.
|
|
32
|
+
|
|
33
|
+
Visit https://platform.minitap.ai to get your API key.
|
|
34
|
+
Set MINITAP_API_KEY and MINITAP_BASE_URL environment variables.
|
|
35
|
+
"""
|
|
36
|
+
agent = Agent()
|
|
37
|
+
await agent.init()
|
|
38
|
+
|
|
39
|
+
# Example 1: Simple manual task
|
|
40
|
+
result = await agent.run_task(
|
|
41
|
+
request=PlatformTaskRequest(
|
|
42
|
+
task=ManualTaskConfig(
|
|
43
|
+
goal="Open the settings app and tell me the battery level",
|
|
44
|
+
),
|
|
45
|
+
profile="default", # Optional, defaults to "default"
|
|
46
|
+
)
|
|
47
|
+
)
|
|
48
|
+
print("Result 1:", result)
|
|
49
|
+
|
|
50
|
+
# Example 2: Manual task with output description
|
|
51
|
+
result = await agent.run_task(
|
|
52
|
+
request=PlatformTaskRequest(
|
|
53
|
+
task=ManualTaskConfig(
|
|
54
|
+
goal="Find the first 3 unread emails in Gmail",
|
|
55
|
+
output_description="A JSON array with sender and subject for each email",
|
|
56
|
+
),
|
|
57
|
+
),
|
|
58
|
+
# Lock gmail to ensure it is automatically started and locked during task execution
|
|
59
|
+
locked_app_package="com.google.android.gm",
|
|
60
|
+
)
|
|
61
|
+
print("Result 2:", result)
|
|
62
|
+
|
|
63
|
+
await agent.clean()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
if __name__ == "__main__":
|
|
67
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Platform Usage - Minitap SDK with API Key Example
|
|
3
|
+
|
|
4
|
+
This example demonstrates how to use the mobile-use SDK via the Minitap platform:
|
|
5
|
+
- Agent with minitap_api_key
|
|
6
|
+
- PlatformTaskRequest with platform-provided task_id
|
|
7
|
+
- All task configuration (goal, output format, etc.) managed by platform UI
|
|
8
|
+
|
|
9
|
+
Platform Model:
|
|
10
|
+
- API key provides authentication and agent configuration
|
|
11
|
+
- task_id references pre-configured task from platform UI
|
|
12
|
+
- No goal, output_format, profile selection needed in code
|
|
13
|
+
- Everything bound to task_id + api_key combination
|
|
14
|
+
|
|
15
|
+
Run:
|
|
16
|
+
- python src/mobile_use/sdk/examples/platform_minimal_example.py
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import asyncio
|
|
20
|
+
|
|
21
|
+
from minitap.mobile_use.sdk import Agent
|
|
22
|
+
from minitap.mobile_use.sdk.types import PlatformTaskRequest
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
async def main() -> None:
|
|
26
|
+
"""
|
|
27
|
+
Main execution function demonstrating minitap platform usage pattern.
|
|
28
|
+
|
|
29
|
+
Visit https://platform.minitap.ai to create a task, customize your profiles,
|
|
30
|
+
and get your API key.
|
|
31
|
+
Pass the api_key parameter to the agent.init() method
|
|
32
|
+
...or set MINITAP_API_KEY environment variable.
|
|
33
|
+
"""
|
|
34
|
+
agent = Agent()
|
|
35
|
+
await agent.init(api_key="<your-minitap-api-key>") # or set MINITAP_API_KEY env variable
|
|
36
|
+
result = await agent.run_task(
|
|
37
|
+
request=PlatformTaskRequest(
|
|
38
|
+
task="<your-task-name>",
|
|
39
|
+
profile="<your-profile-name>",
|
|
40
|
+
),
|
|
41
|
+
locked_app_package="<locked-app-package>", # optional
|
|
42
|
+
)
|
|
43
|
+
print(result)
|
|
44
|
+
await agent.clean()
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
if __name__ == "__main__":
|
|
48
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Simple Photo Organizer - Basic SDK Usage Example
|
|
3
|
+
|
|
4
|
+
This example demonstrates a straightforward way to use the mobile-use SDK
|
|
5
|
+
without builders or advanced configuration. It performs a real-world automation task:
|
|
6
|
+
1. Opens the photo gallery
|
|
7
|
+
2. Finds photos from a specific date
|
|
8
|
+
3. Creates an album and moves those photos into it
|
|
9
|
+
|
|
10
|
+
Run:
|
|
11
|
+
- python src/mobile_use/sdk/examples/simple_photo_organizer.py
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import asyncio
|
|
15
|
+
from datetime import date, timedelta
|
|
16
|
+
from pydantic import BaseModel, Field
|
|
17
|
+
from minitap.mobile_use.sdk import Agent
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class PhotosResult(BaseModel):
|
|
21
|
+
"""Structured result from photo search."""
|
|
22
|
+
|
|
23
|
+
found_photos: int = Field(..., description="Number of photos found")
|
|
24
|
+
date_range: str = Field(..., description="Date range of photos found")
|
|
25
|
+
album_created: bool = Field(..., description="Whether an album was created")
|
|
26
|
+
album_name: str = Field(..., description="Name of the created album")
|
|
27
|
+
photos_moved: int = Field(0, description="Number of photos moved to the album")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
async def main() -> None:
|
|
31
|
+
# Create a simple agent with default configuration
|
|
32
|
+
agent = Agent()
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
# Initialize agent (finds a device, starts required servers)
|
|
36
|
+
await agent.init()
|
|
37
|
+
|
|
38
|
+
# Calculate yesterday's date for the example
|
|
39
|
+
yesterday = date.today() - timedelta(days=1)
|
|
40
|
+
formatted_date = yesterday.strftime("%B %d") # e.g. "August 22"
|
|
41
|
+
|
|
42
|
+
print(f"Looking for photos from {formatted_date}...")
|
|
43
|
+
|
|
44
|
+
# First task: search for photos and organize them, with typed output
|
|
45
|
+
result = await agent.run_task(
|
|
46
|
+
goal=(
|
|
47
|
+
f"Open the Photos/Gallery app. Find photos taken on {formatted_date}. "
|
|
48
|
+
f"Create a new album named '{formatted_date} Memories' and "
|
|
49
|
+
f"move those photos into it. Count how many photos were moved."
|
|
50
|
+
),
|
|
51
|
+
output=PhotosResult,
|
|
52
|
+
name="organize_photos",
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Handle and display the result
|
|
56
|
+
if result:
|
|
57
|
+
print("\n=== Photo Organization Complete ===")
|
|
58
|
+
print(f"Found: {result.found_photos} photos from {result.date_range}")
|
|
59
|
+
|
|
60
|
+
if result.album_created:
|
|
61
|
+
print(f"Created album: '{result.album_name}'")
|
|
62
|
+
print(f"Moved {result.photos_moved} photos to the album")
|
|
63
|
+
else:
|
|
64
|
+
print("No album was created")
|
|
65
|
+
else:
|
|
66
|
+
print("Failed to organize photos")
|
|
67
|
+
|
|
68
|
+
except Exception as e:
|
|
69
|
+
print(f"Error: {e}")
|
|
70
|
+
finally:
|
|
71
|
+
# Always clean up resources
|
|
72
|
+
await agent.clean()
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
if __name__ == "__main__":
|
|
76
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Smart Notification Assistant - Intermediate SDK Usage Example
|
|
3
|
+
|
|
4
|
+
This example demonstrates more advanced SDK features including:
|
|
5
|
+
- TaskRequestBuilder pattern
|
|
6
|
+
- Multiple agent profiles for different reasoning tasks
|
|
7
|
+
- Tracing for debugging/visualization
|
|
8
|
+
- Structured output with Pydantic
|
|
9
|
+
- Exception handling
|
|
10
|
+
|
|
11
|
+
It performs a practical automation task:
|
|
12
|
+
1. Checks notification panel for unread notifications
|
|
13
|
+
2. Categorizes them by priority/app
|
|
14
|
+
3. Performs actions based on notification content
|
|
15
|
+
|
|
16
|
+
Run:
|
|
17
|
+
- python src/mobile_use/sdk/examples/smart_notification_assistant.py
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import asyncio
|
|
21
|
+
from datetime import datetime
|
|
22
|
+
from enum import Enum
|
|
23
|
+
|
|
24
|
+
from pydantic import BaseModel, Field
|
|
25
|
+
|
|
26
|
+
from minitap.mobile_use.config import LLM, LLMConfig, LLMConfigUtils, LLMWithFallback
|
|
27
|
+
from minitap.mobile_use.sdk import Agent
|
|
28
|
+
from minitap.mobile_use.sdk.builders import Builders
|
|
29
|
+
from minitap.mobile_use.sdk.types import AgentProfile
|
|
30
|
+
from minitap.mobile_use.sdk.types.exceptions import AgentError
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class NotificationPriority(str, Enum):
|
|
34
|
+
HIGH = "high"
|
|
35
|
+
MEDIUM = "medium"
|
|
36
|
+
LOW = "low"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class Notification(BaseModel):
|
|
40
|
+
"""Individual notification details."""
|
|
41
|
+
|
|
42
|
+
app_name: str = Field(..., description="Name of the app that sent the notification")
|
|
43
|
+
title: str = Field(..., description="Title/header of the notification")
|
|
44
|
+
message: str = Field(..., description="Message content of the notification")
|
|
45
|
+
priority: NotificationPriority = Field(
|
|
46
|
+
default=NotificationPriority.MEDIUM, description="Priority level of notification"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class NotificationSummary(BaseModel):
|
|
51
|
+
"""Summary of all notifications."""
|
|
52
|
+
|
|
53
|
+
total_count: int = Field(..., description="Total number of notifications found")
|
|
54
|
+
high_priority_count: int = Field(0, description="Count of high priority notifications")
|
|
55
|
+
notifications: list[Notification] = Field(
|
|
56
|
+
default_factory=list, description="List of individual notifications"
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def get_agent() -> Agent:
|
|
61
|
+
# Create two specialized profiles:
|
|
62
|
+
# 1. An analyzer profile for detailed inspection tasks
|
|
63
|
+
analyzer_profile = AgentProfile(
|
|
64
|
+
name="analyzer",
|
|
65
|
+
llm_config=LLMConfig(
|
|
66
|
+
planner=LLMWithFallback(
|
|
67
|
+
provider="openrouter",
|
|
68
|
+
model="meta-llama/llama-4-scout",
|
|
69
|
+
fallback=LLM(provider="openrouter", model="meta-llama/llama-4-maverick"),
|
|
70
|
+
),
|
|
71
|
+
orchestrator=LLMWithFallback(
|
|
72
|
+
provider="openrouter",
|
|
73
|
+
model="meta-llama/llama-4-scout",
|
|
74
|
+
fallback=LLM(provider="openrouter", model="meta-llama/llama-4-maverick"),
|
|
75
|
+
),
|
|
76
|
+
contextor=LLMWithFallback(
|
|
77
|
+
provider="openrouter",
|
|
78
|
+
model="meta-llama/llama-4-scout",
|
|
79
|
+
fallback=LLM(provider="openrouter", model="meta-llama/llama-4-maverick"),
|
|
80
|
+
),
|
|
81
|
+
cortex=LLMWithFallback(
|
|
82
|
+
provider="openai",
|
|
83
|
+
model="o4-mini",
|
|
84
|
+
fallback=LLM(provider="openai", model="gpt-5"),
|
|
85
|
+
),
|
|
86
|
+
executor=LLMWithFallback(
|
|
87
|
+
provider="openai",
|
|
88
|
+
model="gpt-5-nano",
|
|
89
|
+
fallback=LLM(provider="openai", model="gpt-5-mini"),
|
|
90
|
+
),
|
|
91
|
+
utils=LLMConfigUtils(
|
|
92
|
+
outputter=LLMWithFallback(
|
|
93
|
+
provider="openai",
|
|
94
|
+
model="gpt-5-nano",
|
|
95
|
+
fallback=LLM(provider="openai", model="gpt-5-mini"),
|
|
96
|
+
),
|
|
97
|
+
hopper=LLMWithFallback(
|
|
98
|
+
provider="openai",
|
|
99
|
+
model="gpt-5-nano",
|
|
100
|
+
fallback=LLM(provider="openai", model="gpt-5-mini"),
|
|
101
|
+
),
|
|
102
|
+
),
|
|
103
|
+
),
|
|
104
|
+
# from_file="/tmp/analyzer.jsonc" # can be loaded from file
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# 2. An action profile for handling easy & fast actions based on notifications
|
|
108
|
+
action_profile = AgentProfile(
|
|
109
|
+
name="note_taker",
|
|
110
|
+
llm_config=LLMConfig(
|
|
111
|
+
planner=LLMWithFallback(
|
|
112
|
+
provider="openai", model="o3", fallback=LLM(provider="openai", model="gpt-5")
|
|
113
|
+
),
|
|
114
|
+
orchestrator=LLMWithFallback(
|
|
115
|
+
provider="google",
|
|
116
|
+
model="gemini-2.5-flash",
|
|
117
|
+
fallback=LLM(provider="openai", model="gpt-5"),
|
|
118
|
+
),
|
|
119
|
+
contextor=LLMWithFallback(
|
|
120
|
+
provider="openai",
|
|
121
|
+
model="gpt-5-nano",
|
|
122
|
+
fallback=LLM(provider="openai", model="gpt-5-mini"),
|
|
123
|
+
),
|
|
124
|
+
cortex=LLMWithFallback(
|
|
125
|
+
provider="openai",
|
|
126
|
+
model="o4-mini",
|
|
127
|
+
fallback=LLM(provider="openai", model="gpt-5"),
|
|
128
|
+
),
|
|
129
|
+
executor=LLMWithFallback(
|
|
130
|
+
provider="openai",
|
|
131
|
+
model="gpt-4o-mini",
|
|
132
|
+
fallback=LLM(provider="openai", model="gpt-5-nano"),
|
|
133
|
+
),
|
|
134
|
+
utils=LLMConfigUtils(
|
|
135
|
+
outputter=LLMWithFallback(
|
|
136
|
+
provider="openai",
|
|
137
|
+
model="gpt-5-nano",
|
|
138
|
+
fallback=LLM(provider="openai", model="gpt-5-mini"),
|
|
139
|
+
),
|
|
140
|
+
hopper=LLMWithFallback(
|
|
141
|
+
provider="openai",
|
|
142
|
+
model="gpt-5-nano",
|
|
143
|
+
fallback=LLM(provider="openai", model="gpt-5-mini"),
|
|
144
|
+
),
|
|
145
|
+
),
|
|
146
|
+
),
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
# Configure default task settings with tracing
|
|
150
|
+
task_defaults = Builders.TaskDefaults.with_max_steps(200).build()
|
|
151
|
+
|
|
152
|
+
# Configure the agent
|
|
153
|
+
config = (
|
|
154
|
+
Builders.AgentConfig.add_profiles(profiles=[analyzer_profile, action_profile])
|
|
155
|
+
.with_default_profile(profile=action_profile)
|
|
156
|
+
.with_default_task_config(config=task_defaults)
|
|
157
|
+
.build()
|
|
158
|
+
)
|
|
159
|
+
return Agent(config=config)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
async def main():
|
|
163
|
+
# Set up traces directory with timestamp for uniqueness
|
|
164
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M")
|
|
165
|
+
traces_dir = f"/tmp/notification_traces/{timestamp}"
|
|
166
|
+
agent = get_agent()
|
|
167
|
+
|
|
168
|
+
try:
|
|
169
|
+
# Initialize agent (finds a device, starts required servers)
|
|
170
|
+
await agent.init()
|
|
171
|
+
|
|
172
|
+
print("Checking for notifications...")
|
|
173
|
+
|
|
174
|
+
# Task 1: Get and analyze notifications with analyzer profile
|
|
175
|
+
notification_task = (
|
|
176
|
+
agent.new_task(
|
|
177
|
+
goal="Open the notification panel (swipe down from top). "
|
|
178
|
+
"Scroll through the first 3 unread notifications. "
|
|
179
|
+
"For each notification, identify the app name, title, and content. "
|
|
180
|
+
"Tag messages from messaging apps or email as high priority."
|
|
181
|
+
)
|
|
182
|
+
.with_output_format(NotificationSummary)
|
|
183
|
+
.using_profile("analyzer")
|
|
184
|
+
.with_name("notification_scan")
|
|
185
|
+
.with_max_steps(400)
|
|
186
|
+
.with_trace_recording(enabled=True, path=traces_dir)
|
|
187
|
+
.build()
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
# Execute the task with proper exception handling
|
|
191
|
+
try:
|
|
192
|
+
notifications = await agent.run_task(request=notification_task)
|
|
193
|
+
|
|
194
|
+
# Display the structured results
|
|
195
|
+
if notifications:
|
|
196
|
+
print("\n=== Notification Summary ===")
|
|
197
|
+
print(f"Total notifications: {notifications.total_count}")
|
|
198
|
+
print(f"High priority: {notifications.high_priority_count}")
|
|
199
|
+
|
|
200
|
+
# Task 2: Create a note to store the notification summary
|
|
201
|
+
response = await agent.run_task(
|
|
202
|
+
goal="Open my Notes app and create a new note summarizing the following "
|
|
203
|
+
f"information:\n{notifications}",
|
|
204
|
+
name="email_action",
|
|
205
|
+
profile="note_taker",
|
|
206
|
+
)
|
|
207
|
+
print(f"Action result: {response}")
|
|
208
|
+
|
|
209
|
+
else:
|
|
210
|
+
print("Failed to retrieve notifications")
|
|
211
|
+
|
|
212
|
+
except AgentError as e:
|
|
213
|
+
print(f"Agent error occurred: {e}")
|
|
214
|
+
except Exception as e:
|
|
215
|
+
print(f"Unexpected error: {type(e).__name__}: {e}")
|
|
216
|
+
raise
|
|
217
|
+
|
|
218
|
+
finally:
|
|
219
|
+
# Clean up
|
|
220
|
+
await agent.clean()
|
|
221
|
+
print(f"\nTraces saved to: {traces_dir}")
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
if __name__ == "__main__":
|
|
225
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Video Transcription Example
|
|
3
|
+
|
|
4
|
+
This example demonstrates how to use the video recording tools to capture
|
|
5
|
+
and analyze video content from a mobile device screen.
|
|
6
|
+
|
|
7
|
+
The agent can:
|
|
8
|
+
1. Start a screen recording
|
|
9
|
+
2. Perform actions while recording
|
|
10
|
+
3. Stop the recording and analyze its content using Gemini models
|
|
11
|
+
|
|
12
|
+
Use case: Recording a video playing on the screen and transcribing its content.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import asyncio
|
|
16
|
+
|
|
17
|
+
from minitap.mobile_use.config import LLM, LLMConfig, LLMConfigUtils, LLMWithFallback
|
|
18
|
+
from minitap.mobile_use.sdk.agent import Agent
|
|
19
|
+
from minitap.mobile_use.sdk.builders.agent_config_builder import AgentConfigBuilder
|
|
20
|
+
from minitap.mobile_use.sdk.types.agent import AgentConfig
|
|
21
|
+
from minitap.mobile_use.sdk.types.task import AgentProfile, TaskRequest
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_video_capable_llm_config() -> LLMConfig:
|
|
25
|
+
"""
|
|
26
|
+
Returns an LLM config with video_analyzer configured.
|
|
27
|
+
|
|
28
|
+
The video_analyzer must use a video-capable Gemini model:
|
|
29
|
+
- gemini-3-flash-preview (recommended - fast and capable)
|
|
30
|
+
- gemini-3-pro-preview
|
|
31
|
+
- gemini-2.5-flash
|
|
32
|
+
- gemini-2.5-pro
|
|
33
|
+
- gemini-2.0-flash
|
|
34
|
+
"""
|
|
35
|
+
return LLMConfig(
|
|
36
|
+
planner=LLMWithFallback(
|
|
37
|
+
provider="openai",
|
|
38
|
+
model="gpt-5-nano",
|
|
39
|
+
fallback=LLM(provider="openai", model="gpt-5-mini"),
|
|
40
|
+
),
|
|
41
|
+
orchestrator=LLMWithFallback(
|
|
42
|
+
provider="openai",
|
|
43
|
+
model="gpt-5-nano",
|
|
44
|
+
fallback=LLM(provider="openai", model="gpt-5-mini"),
|
|
45
|
+
),
|
|
46
|
+
contextor=LLMWithFallback(
|
|
47
|
+
provider="openai",
|
|
48
|
+
model="gpt-5-nano",
|
|
49
|
+
fallback=LLM(provider="openai", model="gpt-5-mini"),
|
|
50
|
+
),
|
|
51
|
+
cortex=LLMWithFallback(
|
|
52
|
+
provider="openai",
|
|
53
|
+
model="gpt-5",
|
|
54
|
+
fallback=LLM(provider="openai", model="o4-mini"),
|
|
55
|
+
),
|
|
56
|
+
executor=LLMWithFallback(
|
|
57
|
+
provider="openai",
|
|
58
|
+
model="gpt-5-nano",
|
|
59
|
+
fallback=LLM(provider="openai", model="gpt-5-mini"),
|
|
60
|
+
),
|
|
61
|
+
utils=LLMConfigUtils(
|
|
62
|
+
outputter=LLMWithFallback(
|
|
63
|
+
provider="openai",
|
|
64
|
+
model="gpt-5-nano",
|
|
65
|
+
fallback=LLM(provider="openai", model="gpt-5-mini"),
|
|
66
|
+
),
|
|
67
|
+
hopper=LLMWithFallback(
|
|
68
|
+
provider="openai",
|
|
69
|
+
model="gpt-5-nano",
|
|
70
|
+
fallback=LLM(provider="openai", model="gpt-5-mini"),
|
|
71
|
+
),
|
|
72
|
+
video_analyzer=LLMWithFallback(
|
|
73
|
+
provider="google",
|
|
74
|
+
model="gemini-3-flash-preview",
|
|
75
|
+
fallback=LLM(provider="google", model="gemini-2.5-flash"),
|
|
76
|
+
),
|
|
77
|
+
),
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
async def main():
|
|
82
|
+
config: AgentConfig = (
|
|
83
|
+
AgentConfigBuilder()
|
|
84
|
+
.add_profile(
|
|
85
|
+
AgentProfile(
|
|
86
|
+
name="VideoCapable",
|
|
87
|
+
llm_config=get_video_capable_llm_config(),
|
|
88
|
+
)
|
|
89
|
+
)
|
|
90
|
+
.with_video_recording_tools()
|
|
91
|
+
.build()
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
agent = Agent(config=config)
|
|
95
|
+
try:
|
|
96
|
+
await agent.init()
|
|
97
|
+
|
|
98
|
+
result = await agent.run_task(
|
|
99
|
+
request=TaskRequest(
|
|
100
|
+
goal="""
|
|
101
|
+
1. Open YouTube app
|
|
102
|
+
2. Search for "Python tutorial"
|
|
103
|
+
3. Start recording the screen
|
|
104
|
+
4. Play the first video
|
|
105
|
+
5. Wait for the first 30 seconds of the video to play
|
|
106
|
+
6. Stop recording and tell me what was said in the video
|
|
107
|
+
""",
|
|
108
|
+
profile="VideoCapable",
|
|
109
|
+
)
|
|
110
|
+
)
|
|
111
|
+
print(f"Task result: {result}")
|
|
112
|
+
finally:
|
|
113
|
+
await agent.clean()
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
if __name__ == "__main__":
|
|
117
|
+
asyncio.run(main())
|