minitap-mobile-use 3.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minitap/mobile_use/__init__.py +0 -0
- minitap/mobile_use/agents/contextor/contextor.md +55 -0
- minitap/mobile_use/agents/contextor/contextor.py +175 -0
- minitap/mobile_use/agents/contextor/types.py +36 -0
- minitap/mobile_use/agents/cortex/cortex.md +135 -0
- minitap/mobile_use/agents/cortex/cortex.py +152 -0
- minitap/mobile_use/agents/cortex/types.py +15 -0
- minitap/mobile_use/agents/executor/executor.md +42 -0
- minitap/mobile_use/agents/executor/executor.py +87 -0
- minitap/mobile_use/agents/executor/tool_node.py +152 -0
- minitap/mobile_use/agents/hopper/hopper.md +15 -0
- minitap/mobile_use/agents/hopper/hopper.py +44 -0
- minitap/mobile_use/agents/orchestrator/human.md +12 -0
- minitap/mobile_use/agents/orchestrator/orchestrator.md +21 -0
- minitap/mobile_use/agents/orchestrator/orchestrator.py +134 -0
- minitap/mobile_use/agents/orchestrator/types.py +11 -0
- minitap/mobile_use/agents/outputter/human.md +25 -0
- minitap/mobile_use/agents/outputter/outputter.py +85 -0
- minitap/mobile_use/agents/outputter/test_outputter.py +167 -0
- minitap/mobile_use/agents/planner/human.md +14 -0
- minitap/mobile_use/agents/planner/planner.md +126 -0
- minitap/mobile_use/agents/planner/planner.py +101 -0
- minitap/mobile_use/agents/planner/types.py +51 -0
- minitap/mobile_use/agents/planner/utils.py +70 -0
- minitap/mobile_use/agents/summarizer/summarizer.py +35 -0
- minitap/mobile_use/agents/video_analyzer/__init__.py +5 -0
- minitap/mobile_use/agents/video_analyzer/human.md +5 -0
- minitap/mobile_use/agents/video_analyzer/video_analyzer.md +37 -0
- minitap/mobile_use/agents/video_analyzer/video_analyzer.py +111 -0
- minitap/mobile_use/clients/browserstack_client.py +477 -0
- minitap/mobile_use/clients/idb_client.py +429 -0
- minitap/mobile_use/clients/ios_client.py +332 -0
- minitap/mobile_use/clients/ios_client_config.py +141 -0
- minitap/mobile_use/clients/ui_automator_client.py +330 -0
- minitap/mobile_use/clients/wda_client.py +526 -0
- minitap/mobile_use/clients/wda_lifecycle.py +367 -0
- minitap/mobile_use/config.py +413 -0
- minitap/mobile_use/constants.py +3 -0
- minitap/mobile_use/context.py +106 -0
- minitap/mobile_use/controllers/__init__.py +0 -0
- minitap/mobile_use/controllers/android_controller.py +524 -0
- minitap/mobile_use/controllers/controller_factory.py +46 -0
- minitap/mobile_use/controllers/device_controller.py +182 -0
- minitap/mobile_use/controllers/ios_controller.py +436 -0
- minitap/mobile_use/controllers/platform_specific_commands_controller.py +199 -0
- minitap/mobile_use/controllers/types.py +106 -0
- minitap/mobile_use/controllers/unified_controller.py +193 -0
- minitap/mobile_use/graph/graph.py +160 -0
- minitap/mobile_use/graph/state.py +115 -0
- minitap/mobile_use/main.py +309 -0
- minitap/mobile_use/sdk/__init__.py +12 -0
- minitap/mobile_use/sdk/agent.py +1294 -0
- minitap/mobile_use/sdk/builders/__init__.py +10 -0
- minitap/mobile_use/sdk/builders/agent_config_builder.py +307 -0
- minitap/mobile_use/sdk/builders/index.py +15 -0
- minitap/mobile_use/sdk/builders/task_request_builder.py +236 -0
- minitap/mobile_use/sdk/constants.py +1 -0
- minitap/mobile_use/sdk/examples/README.md +83 -0
- minitap/mobile_use/sdk/examples/__init__.py +1 -0
- minitap/mobile_use/sdk/examples/app_lock_messaging.py +54 -0
- minitap/mobile_use/sdk/examples/platform_manual_task_example.py +67 -0
- minitap/mobile_use/sdk/examples/platform_minimal_example.py +48 -0
- minitap/mobile_use/sdk/examples/simple_photo_organizer.py +76 -0
- minitap/mobile_use/sdk/examples/smart_notification_assistant.py +225 -0
- minitap/mobile_use/sdk/examples/video_transcription_example.py +117 -0
- minitap/mobile_use/sdk/services/cloud_mobile.py +656 -0
- minitap/mobile_use/sdk/services/platform.py +434 -0
- minitap/mobile_use/sdk/types/__init__.py +51 -0
- minitap/mobile_use/sdk/types/agent.py +84 -0
- minitap/mobile_use/sdk/types/exceptions.py +138 -0
- minitap/mobile_use/sdk/types/platform.py +183 -0
- minitap/mobile_use/sdk/types/task.py +269 -0
- minitap/mobile_use/sdk/utils.py +29 -0
- minitap/mobile_use/services/accessibility.py +100 -0
- minitap/mobile_use/services/llm.py +247 -0
- minitap/mobile_use/services/telemetry.py +421 -0
- minitap/mobile_use/tools/index.py +67 -0
- minitap/mobile_use/tools/mobile/back.py +52 -0
- minitap/mobile_use/tools/mobile/erase_one_char.py +56 -0
- minitap/mobile_use/tools/mobile/focus_and_clear_text.py +317 -0
- minitap/mobile_use/tools/mobile/focus_and_input_text.py +153 -0
- minitap/mobile_use/tools/mobile/launch_app.py +86 -0
- minitap/mobile_use/tools/mobile/long_press_on.py +169 -0
- minitap/mobile_use/tools/mobile/open_link.py +62 -0
- minitap/mobile_use/tools/mobile/press_key.py +83 -0
- minitap/mobile_use/tools/mobile/stop_app.py +62 -0
- minitap/mobile_use/tools/mobile/swipe.py +156 -0
- minitap/mobile_use/tools/mobile/tap.py +154 -0
- minitap/mobile_use/tools/mobile/video_recording.py +177 -0
- minitap/mobile_use/tools/mobile/wait_for_delay.py +81 -0
- minitap/mobile_use/tools/scratchpad.py +147 -0
- minitap/mobile_use/tools/test_utils.py +413 -0
- minitap/mobile_use/tools/tool_wrapper.py +16 -0
- minitap/mobile_use/tools/types.py +35 -0
- minitap/mobile_use/tools/utils.py +336 -0
- minitap/mobile_use/utils/app_launch_utils.py +173 -0
- minitap/mobile_use/utils/cli_helpers.py +37 -0
- minitap/mobile_use/utils/cli_selection.py +143 -0
- minitap/mobile_use/utils/conversations.py +31 -0
- minitap/mobile_use/utils/decorators.py +124 -0
- minitap/mobile_use/utils/errors.py +6 -0
- minitap/mobile_use/utils/file.py +13 -0
- minitap/mobile_use/utils/logger.py +183 -0
- minitap/mobile_use/utils/media.py +186 -0
- minitap/mobile_use/utils/recorder.py +52 -0
- minitap/mobile_use/utils/requests_utils.py +37 -0
- minitap/mobile_use/utils/shell_utils.py +20 -0
- minitap/mobile_use/utils/test_ui_hierarchy.py +178 -0
- minitap/mobile_use/utils/time.py +6 -0
- minitap/mobile_use/utils/ui_hierarchy.py +132 -0
- minitap/mobile_use/utils/video.py +281 -0
- minitap_mobile_use-3.3.0.dist-info/METADATA +329 -0
- minitap_mobile_use-3.3.0.dist-info/RECORD +115 -0
- minitap_mobile_use-3.3.0.dist-info/WHEEL +4 -0
- minitap_mobile_use-3.3.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
from unittest.mock import patch
|
|
2
|
+
|
|
3
|
+
from minitap.mobile_use.utils.ui_hierarchy import (
|
|
4
|
+
ElementBounds,
|
|
5
|
+
Point,
|
|
6
|
+
find_element_by_resource_id,
|
|
7
|
+
get_bounds_for_element,
|
|
8
|
+
get_element_text,
|
|
9
|
+
is_element_focused,
|
|
10
|
+
text_input_is_empty,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_text_input_is_empty():
|
|
15
|
+
assert text_input_is_empty(text=None, hint_text=None)
|
|
16
|
+
assert text_input_is_empty(text="", hint_text=None)
|
|
17
|
+
assert text_input_is_empty(text="", hint_text="")
|
|
18
|
+
assert text_input_is_empty(text="text", hint_text="text")
|
|
19
|
+
|
|
20
|
+
assert not text_input_is_empty(text="text", hint_text=None)
|
|
21
|
+
assert not text_input_is_empty(text="text", hint_text="")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_find_element_by_resource_id():
|
|
25
|
+
ui_hierarchy = [
|
|
26
|
+
{"resourceId": "com.example:id/button1", "text": "Button 1", "children": []},
|
|
27
|
+
{
|
|
28
|
+
"resourceId": "com.example:id/container",
|
|
29
|
+
"children": [
|
|
30
|
+
{
|
|
31
|
+
"resourceId": "com.example:id/nested_button",
|
|
32
|
+
"text": "Nested Button",
|
|
33
|
+
"children": [],
|
|
34
|
+
}
|
|
35
|
+
],
|
|
36
|
+
},
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
result = find_element_by_resource_id(ui_hierarchy, "com.example:id/button1")
|
|
40
|
+
assert result is not None
|
|
41
|
+
assert result["resourceId"] == "com.example:id/button1"
|
|
42
|
+
assert result["text"] == "Button 1"
|
|
43
|
+
|
|
44
|
+
result = find_element_by_resource_id(ui_hierarchy, "com.example:id/nested_button")
|
|
45
|
+
assert result is not None
|
|
46
|
+
assert result["resourceId"] == "com.example:id/nested_button"
|
|
47
|
+
assert result["text"] == "Nested Button"
|
|
48
|
+
|
|
49
|
+
result = find_element_by_resource_id(ui_hierarchy, "com.example:id/nonexistent")
|
|
50
|
+
assert result is None
|
|
51
|
+
|
|
52
|
+
result = find_element_by_resource_id([], "com.example:id/button1")
|
|
53
|
+
assert result is None
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def test_find_element_by_resource_id_rich_hierarchy():
|
|
57
|
+
rich_hierarchy = [
|
|
58
|
+
{"attributes": {"resource-id": "com.example:id/button1"}, "children": []},
|
|
59
|
+
{
|
|
60
|
+
"attributes": {"resource-id": "com.example:id/container"},
|
|
61
|
+
"children": [
|
|
62
|
+
{"attributes": {"resource-id": "com.example:id/nested_button"}, "children": []}
|
|
63
|
+
],
|
|
64
|
+
},
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
result = find_element_by_resource_id(
|
|
68
|
+
rich_hierarchy, "com.example:id/button1", is_rich_hierarchy=True
|
|
69
|
+
)
|
|
70
|
+
assert result is not None
|
|
71
|
+
assert result["resource-id"] == "com.example:id/button1"
|
|
72
|
+
|
|
73
|
+
result = find_element_by_resource_id(
|
|
74
|
+
rich_hierarchy, "com.example:id/nested_button", is_rich_hierarchy=True
|
|
75
|
+
)
|
|
76
|
+
assert result is not None
|
|
77
|
+
assert result["resource-id"] == "com.example:id/nested_button"
|
|
78
|
+
|
|
79
|
+
result = find_element_by_resource_id(
|
|
80
|
+
rich_hierarchy, "com.example:id/nonexistent", is_rich_hierarchy=True
|
|
81
|
+
)
|
|
82
|
+
assert result is None
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def test_is_element_focused():
|
|
86
|
+
focused_element = {"focused": "true"}
|
|
87
|
+
assert is_element_focused(focused_element)
|
|
88
|
+
|
|
89
|
+
non_focused_element = {"focused": "false"}
|
|
90
|
+
assert not is_element_focused(non_focused_element)
|
|
91
|
+
|
|
92
|
+
no_focused_element = {"text": "some text"}
|
|
93
|
+
assert not is_element_focused(no_focused_element)
|
|
94
|
+
|
|
95
|
+
none_focused_element = {"focused": None}
|
|
96
|
+
assert not is_element_focused(none_focused_element)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def test_get_element_text():
|
|
100
|
+
element = {"text": "Button Text", "hintText": "Hint Text"}
|
|
101
|
+
assert get_element_text(element) == "Button Text"
|
|
102
|
+
assert get_element_text(element, hint_text=False) == "Button Text"
|
|
103
|
+
assert get_element_text(element, hint_text=True) == "Hint Text"
|
|
104
|
+
|
|
105
|
+
element_no_text = {"hintText": "Hint Text"}
|
|
106
|
+
assert get_element_text(element_no_text) is None
|
|
107
|
+
assert get_element_text(element_no_text, hint_text=True) == "Hint Text"
|
|
108
|
+
element_no_hint = {"text": "Button Text"}
|
|
109
|
+
assert get_element_text(element_no_hint) == "Button Text"
|
|
110
|
+
assert get_element_text(element_no_hint, hint_text=True) is None
|
|
111
|
+
|
|
112
|
+
empty_element = {}
|
|
113
|
+
assert get_element_text(empty_element) is None
|
|
114
|
+
assert get_element_text(empty_element, hint_text=True) is None
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def test_get_bounds_for_element():
|
|
118
|
+
element_with_bounds = {"bounds": {"x": 10, "y": 20, "width": 100, "height": 50}}
|
|
119
|
+
bounds = get_bounds_for_element(element_with_bounds)
|
|
120
|
+
assert bounds is not None
|
|
121
|
+
assert isinstance(bounds, ElementBounds)
|
|
122
|
+
assert bounds.x == 10
|
|
123
|
+
assert bounds.y == 20
|
|
124
|
+
assert bounds.width == 100
|
|
125
|
+
assert bounds.height == 50
|
|
126
|
+
|
|
127
|
+
element_no_bounds = {"text": "Button"}
|
|
128
|
+
bounds = get_bounds_for_element(element_no_bounds)
|
|
129
|
+
assert bounds is None
|
|
130
|
+
|
|
131
|
+
# Suppress logger output for the invalid bounds test case
|
|
132
|
+
with patch("minitap.mobile_use.utils.ui_hierarchy.logger.error"):
|
|
133
|
+
element_invalid_bounds = {
|
|
134
|
+
"bounds": {
|
|
135
|
+
"x": "invalid", # Should be int
|
|
136
|
+
"y": 20,
|
|
137
|
+
"width": 100,
|
|
138
|
+
"height": 50,
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
bounds = get_bounds_for_element(element_invalid_bounds)
|
|
142
|
+
assert bounds is None
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def test_element_bounds():
|
|
146
|
+
bounds = ElementBounds(x=10, y=20, width=100, height=50)
|
|
147
|
+
|
|
148
|
+
center = bounds.get_center()
|
|
149
|
+
assert isinstance(center, Point)
|
|
150
|
+
assert center.x == 60
|
|
151
|
+
assert center.y == 45
|
|
152
|
+
|
|
153
|
+
center_point = bounds.get_relative_point(0.5, 0.5)
|
|
154
|
+
assert isinstance(center_point, Point)
|
|
155
|
+
assert center_point.x == 60
|
|
156
|
+
assert center_point.y == 45
|
|
157
|
+
|
|
158
|
+
top_left = bounds.get_relative_point(0.0, 0.0)
|
|
159
|
+
assert top_left.x == 10
|
|
160
|
+
assert top_left.y == 20
|
|
161
|
+
|
|
162
|
+
bottom_right = bounds.get_relative_point(1.0, 1.0)
|
|
163
|
+
assert bottom_right.x == 110
|
|
164
|
+
assert bottom_right.y == 70
|
|
165
|
+
custom_point = bounds.get_relative_point(0.95, 0.95)
|
|
166
|
+
assert custom_point.x == 105
|
|
167
|
+
assert custom_point.y == 67
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
if __name__ == "__main__":
|
|
171
|
+
test_text_input_is_empty()
|
|
172
|
+
test_find_element_by_resource_id()
|
|
173
|
+
test_find_element_by_resource_id_rich_hierarchy()
|
|
174
|
+
test_is_element_focused()
|
|
175
|
+
test_get_element_text()
|
|
176
|
+
test_get_bounds_for_element()
|
|
177
|
+
test_element_bounds()
|
|
178
|
+
print("All tests passed")
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
from pydantic import BaseModel, Field
|
|
2
|
+
|
|
3
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
4
|
+
|
|
5
|
+
logger = get_logger(__name__)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def __find_element_by_ressource_id_in_rich_hierarchy(
|
|
9
|
+
hierarchy: list[dict], resource_id: str
|
|
10
|
+
) -> dict | None:
|
|
11
|
+
"""
|
|
12
|
+
Retrieves all the sibling elements for a given resource ID from a nested dictionary.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
hierarchy (dict): The nested dictionary representing the UI hierarchy.
|
|
16
|
+
resource_id (str): The resource-id to find.
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
list: A list of the sibling elements, or None if the resource_id is not found.
|
|
20
|
+
"""
|
|
21
|
+
if not hierarchy:
|
|
22
|
+
return None
|
|
23
|
+
|
|
24
|
+
for child in hierarchy:
|
|
25
|
+
if child.get("attributes", {}).get("resource-id") == resource_id:
|
|
26
|
+
return child.get("attributes", {})
|
|
27
|
+
|
|
28
|
+
for child in hierarchy:
|
|
29
|
+
result = __find_element_by_ressource_id_in_rich_hierarchy(
|
|
30
|
+
child.get("children", []), resource_id
|
|
31
|
+
)
|
|
32
|
+
if result is not None:
|
|
33
|
+
return result
|
|
34
|
+
|
|
35
|
+
return None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def text_input_is_empty(text: str | None, hint_text: str | None) -> bool:
|
|
39
|
+
return not text or text == hint_text
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def find_element_by_resource_id(
|
|
43
|
+
ui_hierarchy: list[dict],
|
|
44
|
+
resource_id: str,
|
|
45
|
+
index: int | None = None,
|
|
46
|
+
is_rich_hierarchy: bool = False,
|
|
47
|
+
) -> dict | None:
|
|
48
|
+
"""
|
|
49
|
+
Find a UI element by its resource-id in the UI hierarchy.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
ui_hierarchy: List of UI element dictionaries
|
|
53
|
+
resource_id: The resource-id to search for
|
|
54
|
+
(e.g., "com.google.android.settings.intelligence:id/open_search_view_edit_text")
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
The complete UI element dictionary if found, None otherwise
|
|
58
|
+
"""
|
|
59
|
+
if is_rich_hierarchy:
|
|
60
|
+
return __find_element_by_ressource_id_in_rich_hierarchy(ui_hierarchy, resource_id)
|
|
61
|
+
|
|
62
|
+
def search_recursive(elements: list[dict]) -> dict | None:
|
|
63
|
+
for element in elements:
|
|
64
|
+
if isinstance(element, dict):
|
|
65
|
+
if element.get("resourceId") == resource_id:
|
|
66
|
+
idx = index or 0
|
|
67
|
+
if idx == 0:
|
|
68
|
+
return element
|
|
69
|
+
idx -= 1
|
|
70
|
+
continue
|
|
71
|
+
|
|
72
|
+
children = element.get("children", [])
|
|
73
|
+
if children:
|
|
74
|
+
result = search_recursive(children)
|
|
75
|
+
if result:
|
|
76
|
+
return result
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
return search_recursive(ui_hierarchy)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def is_element_focused(element: dict) -> bool:
|
|
83
|
+
return element.get("focused", None) == "true"
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def get_element_text(element: dict, hint_text: bool = False) -> str | None:
|
|
87
|
+
if hint_text:
|
|
88
|
+
return element.get("hintText", None)
|
|
89
|
+
return element.get("text", None)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class Point(BaseModel):
|
|
93
|
+
x: int
|
|
94
|
+
y: int
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class ElementBounds(BaseModel):
|
|
98
|
+
x: int = Field(description="The x coordinate of the top-left corner of the element.")
|
|
99
|
+
y: int = Field(description="The y coordinate of the top-left corner of the element.")
|
|
100
|
+
width: int = Field(description="The width of the element.")
|
|
101
|
+
height: int = Field(description="The height of the element.")
|
|
102
|
+
|
|
103
|
+
def get_center(self) -> Point:
|
|
104
|
+
return Point(x=self.x + self.width // 2, y=self.y + self.height // 2)
|
|
105
|
+
|
|
106
|
+
def get_relative_point(self, x_percent: float, y_percent: float) -> Point:
|
|
107
|
+
"""
|
|
108
|
+
Returns the coordinates of the point at x_percent of the width and y_percent
|
|
109
|
+
of the height of the element.
|
|
110
|
+
|
|
111
|
+
Ex if x_percent = 0.95 and y_percent = 0.95,
|
|
112
|
+
the point is at the bottom right of the element:
|
|
113
|
+
<------>
|
|
114
|
+
| |
|
|
115
|
+
| x|
|
|
116
|
+
<------>
|
|
117
|
+
"""
|
|
118
|
+
return Point(
|
|
119
|
+
x=int(self.x + self.width * x_percent),
|
|
120
|
+
y=int(self.y + self.height * y_percent),
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def get_bounds_for_element(element: dict) -> ElementBounds | None:
|
|
125
|
+
bounds = element.get("bounds", None)
|
|
126
|
+
if bounds:
|
|
127
|
+
try:
|
|
128
|
+
return ElementBounds(**bounds)
|
|
129
|
+
except Exception as e:
|
|
130
|
+
logger.error(f"Failed to validate bounds: {e}")
|
|
131
|
+
return None
|
|
132
|
+
return None
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Video recording utilities for mobile devices.
|
|
3
|
+
|
|
4
|
+
Provides shared types and utilities for video recording across platforms.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import platform
|
|
9
|
+
import shutil
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel, ConfigDict
|
|
13
|
+
|
|
14
|
+
from minitap.mobile_use.utils.logger import get_logger
|
|
15
|
+
|
|
16
|
+
logger = get_logger(__name__)
|
|
17
|
+
|
|
18
|
+
DEFAULT_MAX_DURATION_SECONDS = 900 # 15 minutes
|
|
19
|
+
VIDEO_READY_DELAY_SECONDS = 1
|
|
20
|
+
ANDROID_DEVICE_VIDEO_PATH = "/sdcard/screen_recording.mp4"
|
|
21
|
+
ANDROID_MAX_RECORDING_DURATION_SECONDS = 180 # Android screenrecord limit
|
|
22
|
+
|
|
23
|
+
# Gemini API limits: 20MB for inline requests, but base64 adds ~33% overhead
|
|
24
|
+
# So we target ~14MB to be safe after base64 encoding
|
|
25
|
+
MAX_VIDEO_SIZE_MB = 14
|
|
26
|
+
MAX_VIDEO_SIZE_BYTES = MAX_VIDEO_SIZE_MB * 1024 * 1024
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class RecordingSession(BaseModel):
|
|
30
|
+
"""Tracks an active video recording session."""
|
|
31
|
+
|
|
32
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
33
|
+
|
|
34
|
+
device_id: str
|
|
35
|
+
start_time: float
|
|
36
|
+
process: asyncio.subprocess.Process | None = None
|
|
37
|
+
local_video_path: Path | None = None
|
|
38
|
+
android_device_path: str = ANDROID_DEVICE_VIDEO_PATH
|
|
39
|
+
android_video_segments: list[Path] = []
|
|
40
|
+
android_segment_index: int = 0
|
|
41
|
+
android_restart_task: asyncio.Task | None = None
|
|
42
|
+
errors: list[str] = []
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class VideoRecordingResult(BaseModel):
|
|
46
|
+
"""Result of a video recording operation."""
|
|
47
|
+
|
|
48
|
+
success: bool
|
|
49
|
+
message: str
|
|
50
|
+
video_path: Path | None = None
|
|
51
|
+
|
|
52
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# Global session storage - keyed by device_id
|
|
56
|
+
_active_recordings: dict[str, RecordingSession] = {}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def get_active_session(device_id: str) -> RecordingSession | None:
|
|
60
|
+
"""Get the active recording session for a device."""
|
|
61
|
+
return _active_recordings.get(device_id)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def set_active_session(device_id: str, session: RecordingSession) -> None:
|
|
65
|
+
"""Set the active recording session for a device."""
|
|
66
|
+
_active_recordings[device_id] = session
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def remove_active_session(device_id: str) -> RecordingSession | None:
|
|
70
|
+
"""Remove and return the active recording session for a device."""
|
|
71
|
+
return _active_recordings.pop(device_id, None)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def has_active_session(device_id: str) -> bool:
|
|
75
|
+
"""Check if there's an active recording session for a device."""
|
|
76
|
+
return device_id in _active_recordings
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def is_ffmpeg_installed() -> bool:
|
|
80
|
+
"""Check if ffmpeg is available in the system PATH."""
|
|
81
|
+
return shutil.which("ffmpeg") is not None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class FFmpegNotInstalledError(Exception):
|
|
85
|
+
"""Raised when ffmpeg is required but not installed."""
|
|
86
|
+
|
|
87
|
+
def __init__(self):
|
|
88
|
+
os_name = platform.system().lower()
|
|
89
|
+
if os_name == "darwin": # macOS
|
|
90
|
+
install_instructions = "brew install ffmpeg"
|
|
91
|
+
elif os_name == "windows":
|
|
92
|
+
install_instructions = "Download from https://www.ffmpeg.org/download.html"
|
|
93
|
+
else: # Linux and others
|
|
94
|
+
install_instructions = (
|
|
95
|
+
"Install via your package manager (e.g., apt install ffmpeg, "
|
|
96
|
+
"dnf install ffmpeg) or download from https://www.ffmpeg.org/download.html"
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
message = (
|
|
100
|
+
f"\n\n❌ ffmpeg is required for video recording but is not installed.\n\n"
|
|
101
|
+
f"Please install ffmpeg first:\n"
|
|
102
|
+
f" → {install_instructions}\n\n"
|
|
103
|
+
f"After installation, restart mobile-use.\n"
|
|
104
|
+
)
|
|
105
|
+
super().__init__(message)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def check_ffmpeg_available() -> None:
|
|
109
|
+
"""
|
|
110
|
+
Check if ffmpeg is installed and raise an error if not.
|
|
111
|
+
|
|
112
|
+
Raises:
|
|
113
|
+
FFmpegNotInstalledError: If ffmpeg is not found in PATH.
|
|
114
|
+
"""
|
|
115
|
+
if not is_ffmpeg_installed():
|
|
116
|
+
raise FFmpegNotInstalledError()
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
async def concatenate_videos(segments: list[Path], output_path: Path) -> bool:
|
|
120
|
+
"""Concatenate multiple video segments using ffmpeg."""
|
|
121
|
+
if not segments:
|
|
122
|
+
return False
|
|
123
|
+
|
|
124
|
+
if len(segments) == 1:
|
|
125
|
+
shutil.move(segments[0], output_path)
|
|
126
|
+
return True
|
|
127
|
+
|
|
128
|
+
list_file = output_path.parent / "segments.txt"
|
|
129
|
+
with open(list_file, "w") as f:
|
|
130
|
+
for segment in segments:
|
|
131
|
+
f.write(f"file '{segment}'\n")
|
|
132
|
+
|
|
133
|
+
try:
|
|
134
|
+
process = await asyncio.create_subprocess_exec(
|
|
135
|
+
"ffmpeg",
|
|
136
|
+
"-y",
|
|
137
|
+
"-f",
|
|
138
|
+
"concat",
|
|
139
|
+
"-safe",
|
|
140
|
+
"0",
|
|
141
|
+
"-i",
|
|
142
|
+
str(list_file),
|
|
143
|
+
"-c",
|
|
144
|
+
"copy",
|
|
145
|
+
str(output_path),
|
|
146
|
+
stdout=asyncio.subprocess.PIPE,
|
|
147
|
+
stderr=asyncio.subprocess.PIPE,
|
|
148
|
+
)
|
|
149
|
+
await process.wait()
|
|
150
|
+
return output_path.exists()
|
|
151
|
+
except Exception as e:
|
|
152
|
+
logger.error(f"Failed to concatenate videos: {e}")
|
|
153
|
+
return False
|
|
154
|
+
finally:
|
|
155
|
+
if list_file.exists():
|
|
156
|
+
list_file.unlink()
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def cleanup_video_segments(segments: list[Path], keep_path: Path | None = None) -> None:
|
|
160
|
+
"""Clean up temporary video segments, optionally keeping one path."""
|
|
161
|
+
for segment in segments:
|
|
162
|
+
try:
|
|
163
|
+
if segment.exists() and segment != keep_path:
|
|
164
|
+
segment.unlink()
|
|
165
|
+
if segment.parent.exists() and not any(segment.parent.iterdir()):
|
|
166
|
+
segment.parent.rmdir()
|
|
167
|
+
except Exception:
|
|
168
|
+
pass
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
async def compress_video_for_api(
|
|
172
|
+
input_path: Path,
|
|
173
|
+
target_size_bytes: int = MAX_VIDEO_SIZE_BYTES,
|
|
174
|
+
) -> Path:
|
|
175
|
+
"""
|
|
176
|
+
Compress a video to fit within API size limits using ffmpeg.
|
|
177
|
+
|
|
178
|
+
Uses a two-pass approach:
|
|
179
|
+
1. First check if video is already small enough
|
|
180
|
+
2. If not, compress with reduced resolution and bitrate
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
input_path: Path to the input video file
|
|
184
|
+
target_size_bytes: Target maximum file size in bytes
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
Path to the compressed video (may be same as input if no compression needed)
|
|
188
|
+
"""
|
|
189
|
+
if not input_path.exists():
|
|
190
|
+
raise FileNotFoundError(f"Video file not found: {input_path}")
|
|
191
|
+
|
|
192
|
+
current_size = input_path.stat().st_size
|
|
193
|
+
logger.info(f"Video size: {current_size / 1024 / 1024:.2f} MB")
|
|
194
|
+
|
|
195
|
+
if current_size <= target_size_bytes:
|
|
196
|
+
logger.info("Video already within size limit, no compression needed")
|
|
197
|
+
return input_path
|
|
198
|
+
|
|
199
|
+
logger.info(f"Compressing video to fit within {target_size_bytes / 1024 / 1024:.1f} MB")
|
|
200
|
+
|
|
201
|
+
output_path = input_path.parent / f"compressed_{input_path.name}"
|
|
202
|
+
|
|
203
|
+
# Get video duration using ffprobe
|
|
204
|
+
duration_cmd = [
|
|
205
|
+
"ffprobe",
|
|
206
|
+
"-v",
|
|
207
|
+
"error",
|
|
208
|
+
"-show_entries",
|
|
209
|
+
"format=duration",
|
|
210
|
+
"-of",
|
|
211
|
+
"default=noprint_wrappers=1:nokey=1",
|
|
212
|
+
str(input_path),
|
|
213
|
+
]
|
|
214
|
+
|
|
215
|
+
try:
|
|
216
|
+
proc = await asyncio.create_subprocess_exec(
|
|
217
|
+
*duration_cmd,
|
|
218
|
+
stdout=asyncio.subprocess.PIPE,
|
|
219
|
+
stderr=asyncio.subprocess.PIPE,
|
|
220
|
+
)
|
|
221
|
+
stdout, _ = await proc.communicate()
|
|
222
|
+
duration = float(stdout.decode().strip())
|
|
223
|
+
except Exception:
|
|
224
|
+
duration = 120.0 # Default estimate if probe fails
|
|
225
|
+
|
|
226
|
+
# Calculate target bitrate (bits per second)
|
|
227
|
+
# Leave some margin for container overhead
|
|
228
|
+
target_bitrate = int((target_size_bytes * 8 * 0.9) / duration)
|
|
229
|
+
# Ensure minimum quality
|
|
230
|
+
target_bitrate = max(target_bitrate, 100_000) # At least 100kbps
|
|
231
|
+
|
|
232
|
+
logger.info(f"Target bitrate: {target_bitrate / 1000:.0f} kbps for {duration:.1f}s video")
|
|
233
|
+
|
|
234
|
+
# Compress with ffmpeg: reduce resolution to 720p max, use target bitrate
|
|
235
|
+
compress_cmd = [
|
|
236
|
+
"ffmpeg",
|
|
237
|
+
"-y",
|
|
238
|
+
"-i",
|
|
239
|
+
str(input_path),
|
|
240
|
+
"-vf",
|
|
241
|
+
"scale='min(720,iw)':'-2'", # Max 720p width, maintain aspect
|
|
242
|
+
"-c:v",
|
|
243
|
+
"libx264",
|
|
244
|
+
"-preset",
|
|
245
|
+
"fast",
|
|
246
|
+
"-b:v",
|
|
247
|
+
str(target_bitrate),
|
|
248
|
+
"-maxrate",
|
|
249
|
+
str(int(target_bitrate * 1.5)),
|
|
250
|
+
"-bufsize",
|
|
251
|
+
str(int(target_bitrate * 2)),
|
|
252
|
+
"-c:a",
|
|
253
|
+
"aac",
|
|
254
|
+
"-b:a",
|
|
255
|
+
"64k",
|
|
256
|
+
str(output_path),
|
|
257
|
+
]
|
|
258
|
+
|
|
259
|
+
try:
|
|
260
|
+
proc = await asyncio.create_subprocess_exec(
|
|
261
|
+
*compress_cmd,
|
|
262
|
+
stdout=asyncio.subprocess.PIPE,
|
|
263
|
+
stderr=asyncio.subprocess.PIPE,
|
|
264
|
+
)
|
|
265
|
+
_, stderr = await proc.communicate()
|
|
266
|
+
|
|
267
|
+
if proc.returncode != 0:
|
|
268
|
+
logger.error(f"ffmpeg compression failed: {stderr.decode()}")
|
|
269
|
+
return input_path # Return original if compression fails
|
|
270
|
+
|
|
271
|
+
new_size = output_path.stat().st_size
|
|
272
|
+
logger.info(
|
|
273
|
+
f"Compressed: {current_size / 1024 / 1024:.2f} MB -> "
|
|
274
|
+
f"{new_size / 1024 / 1024:.2f} MB"
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
return output_path
|
|
278
|
+
|
|
279
|
+
except Exception as e:
|
|
280
|
+
logger.error(f"Video compression failed: {e}")
|
|
281
|
+
return input_path # Return original if compression fails
|