mobai-mcp 1.4.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -174,7 +174,7 @@ const TOOLS = [
174
174
  },
175
175
  {
176
176
  name: "get_screenshot",
177
- description: "Capture a screenshot from the device. Returns the file path to the saved PNG.",
177
+ description: "Capture a screenshot from the device. By default saves to /tmp/mobai/screenshots/ and returns the file path. Use path/name to save to a custom location on the host computer.",
178
178
  inputSchema: {
179
179
  type: "object",
180
180
  properties: {
@@ -182,6 +182,14 @@ const TOOLS = [
182
182
  type: "string",
183
183
  description: "Device ID",
184
184
  },
185
+ path: {
186
+ type: "string",
187
+ description: "Custom directory to save the screenshot (supports ~/). Example: ~/Downloads",
188
+ },
189
+ name: {
190
+ type: "string",
191
+ description: "Custom filename without .png extension. Defaults to timestamp-based name.",
192
+ },
185
193
  },
186
194
  required: ["device_id"],
187
195
  },
@@ -806,9 +814,16 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
806
814
  case "stop_bridge":
807
815
  result = await makeRequest("POST", `/devices/${args?.device_id}/bridge/stop`);
808
816
  break;
809
- case "get_screenshot":
810
- result = await makeRequest("GET", `/devices/${args?.device_id}/screenshot`);
817
+ case "get_screenshot": {
818
+ const screenshotParams = new URLSearchParams();
819
+ if (args?.path)
820
+ screenshotParams.set("path", args.path);
821
+ if (args?.name)
822
+ screenshotParams.set("name", args.name);
823
+ const screenshotQuery = screenshotParams.toString();
824
+ result = await makeRequest("GET", `/devices/${args?.device_id}/screenshot${screenshotQuery ? "?" + screenshotQuery : ""}`);
811
825
  break;
826
+ }
812
827
  case "get_ui_tree": {
813
828
  const params = new URLSearchParams();
814
829
  if (args?.verbose)
@@ -1224,21 +1239,29 @@ The DSL (Domain Specific Language) enables batch execution of multiple automatio
1224
1239
  |--------|-------------|------------|
1225
1240
  | observe | Get UI tree/screenshot/OCR | context, include (ui_tree, screenshot, installed_apps, ocr), filter ({text_regex, bounds}) |
1226
1241
  | tap | Tap element | predicate or coords |
1227
- | type | Type text | text, predicate (if keyboard not open), dismiss_keyboard (default: false) |
1242
+ | double_tap | Double-tap element | predicate or coords |
1243
+ | long_press | Long-press element | predicate or coords, duration_ms (default: 1000) |
1244
+ | two_finger_tap | Two-finger tap element | predicate or coords |
1245
+ | type | Type text | text, predicate (if keyboard not open), clear_first, dismiss_keyboard (default: false) |
1228
1246
  | press_key | Press keyboard key | key (return, tab, delete, etc.), context (optional: "web") |
1229
1247
  | toggle | Set switch state | predicate, state ("on"/"off") |
1230
- | swipe | Swipe gesture | direction, distance, duration_ms |
1231
- | scroll | Scroll in container | direction, predicate (container), to_element |
1248
+ | swipe | Swipe gesture | direction, distance, duration_ms, or from_coords/to_coords |
1249
+ | scroll | Scroll in container | direction, predicate (container), to_element, max_scrolls |
1250
+ | drag | Drag element to target | from (predicate), to_element (predicate), or from_coords/to_coords, duration_ms, press_duration_ms |
1232
1251
  | open_app | Launch app | bundle_id |
1252
+ | kill_app | Force-kill running app | bundle_id |
1233
1253
  | navigate | Go home/back | target ("home", "back") |
1234
1254
  | wait_for | Wait for element or UI stability | predicate, timeout_ms, poll_interval_ms, stable (wait for UI to stop changing) |
1255
+ | delay | Wait fixed time | duration_ms |
1235
1256
  | screenshot | Save screenshot to file | file_path (directory), name (optional filename) |
1236
1257
  | assert_exists | Verify element exists | predicate, timeout_ms |
1237
1258
  | assert_not_exists | Verify element gone | predicate |
1238
- | delay | Wait fixed time | duration_ms |
1259
+ | assert_count | Verify element count | predicate, count |
1260
+ | assert_screen_changed | Verify screen changed | (compared to last observe) |
1261
+ | checkpoint | Mark a test checkpoint | name |
1239
1262
  | if_exists | Conditional | predicate, then, else |
1240
1263
  | select_web_context | Select browser/WebView | url_contains, title_contains (optional filters) |
1241
- | kill_app | Force-kill running app | bundle_id |
1264
+ | execute_js | Run JavaScript in web context | script |
1242
1265
  | set_location | Simulate GPS location (Android 12+ for real devices) | lat, lon |
1243
1266
  | reset_location | Reset to real GPS (Android 12+ for real devices) | (no fields) |
1244
1267
  | metrics_start | Start performance monitoring | types, bundle_id, label, thresholds, capture_logs |
@@ -1251,11 +1274,14 @@ Match elements by:
1251
1274
  - \`text_contains\`: Contains substring (case-insensitive)
1252
1275
  - \`text_starts_with\`: Starts with prefix
1253
1276
  - \`text_regex\`: Regex pattern
1254
- - \`type\`: Element type (button, input, switch, etc.)
1255
- - \`label\`: Accessibility label
1256
- - \`bounds_hint\`: Screen region (top_half, bottom_half, center, etc.)
1257
- - \`near\`: Near another element
1258
- - \`index\`: Select Nth match
1277
+ - \`type\`: Element type (button, input, switch, text, image, cell, scrollview)
1278
+ - \`label\`: Accessibility label (exact)
1279
+ - \`label_contains\`: Accessibility label (partial)
1280
+ - \`bounds_hint\`: Screen region (top_half, bottom_half, left_half, right_half, center)
1281
+ - \`near\`: Near another element: {"text": "Label"} or {"text": "Label", "direction": "below"}
1282
+ - \`index\`: Select Nth match (0-based)
1283
+ - \`parent_of\`: Find parent containing child: {"parent_of": {"text": "child"}}
1284
+ - \`enabled\`/\`visible\`/\`selected\`: Boolean state filters
1259
1285
 
1260
1286
  ## Examples
1261
1287
 
@@ -1271,11 +1297,32 @@ Match elements by:
1271
1297
 
1272
1298
  Note: \`predicate\` is required if keyboard is not already open. Use \`dismiss_keyboard: true\` to close keyboard after typing.
1273
1299
 
1300
+ ### Double Tap
1301
+ \`\`\`json
1302
+ {"action": "double_tap", "predicate": {"text": "Image"}}
1303
+ \`\`\`
1304
+
1305
+ ### Long Press
1306
+ \`\`\`json
1307
+ {"action": "long_press", "predicate": {"text": "Message"}, "duration_ms": 1500}
1308
+ \`\`\`
1309
+
1274
1310
  ### Toggle Switch
1275
1311
  \`\`\`json
1276
1312
  {"action": "toggle", "predicate": {"type": "switch", "text_contains": "WiFi"}, "state": "on"}
1277
1313
  \`\`\`
1278
1314
 
1315
+ ### Drag Element
1316
+ \`\`\`json
1317
+ {"action": "drag", "from": {"predicate": {"text": "Item"}}, "to_element": {"predicate": {"text": "Trash"}}}
1318
+ {"action": "drag", "from_coords": {"x": 100, "y": 200}, "to_coords": {"x": 300, "y": 400}, "duration_ms": 500}
1319
+ \`\`\`
1320
+
1321
+ ### Assert Count
1322
+ \`\`\`json
1323
+ {"action": "assert_count", "predicate": {"type": "cell"}, "count": 5}
1324
+ \`\`\`
1325
+
1279
1326
  ### Scroll Until Found
1280
1327
  \`\`\`json
1281
1328
  {"action": "scroll", "direction": "down", "to_element": {"predicate": {"text": "Privacy"}}, "max_scrolls": 10}
@@ -1519,10 +1566,14 @@ The \`type\` action requires either:
1519
1566
  | Action | Description | Key Fields |
1520
1567
  |--------|-------------|------------|
1521
1568
  | tap | Tap element | predicate or coords |
1522
- | type | Type text | text, predicate (if keyboard not open), dismiss_keyboard (default: false) |
1569
+ | double_tap | Double-tap element | predicate or coords |
1570
+ | long_press | Long-press element | predicate or coords, duration_ms |
1571
+ | type | Type text | text, predicate (if keyboard not open), clear_first, dismiss_keyboard |
1523
1572
  | press_key | Press keyboard key | key (return, tab, delete, etc.) |
1524
- | swipe | Swipe gesture | direction, distance |
1525
- | scroll | Scroll container | direction, to_element |
1573
+ | toggle | Set switch state | predicate, state ("on"/"off") |
1574
+ | swipe | Swipe gesture | direction, distance, duration_ms, or from_coords/to_coords |
1575
+ | scroll | Scroll container | direction, to_element, max_scrolls |
1576
+ | drag | Drag element | from/to_element (predicates), or from_coords/to_coords |
1526
1577
 
1527
1578
  ## Tips
1528
1579
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mobai-mcp",
3
- "version": "1.4.1",
3
+ "version": "1.5.0",
4
4
  "mcpName": "io.github.MobAI-App/mobai-mcp",
5
5
  "description": "MCP server for MobAI - AI-powered mobile device automation",
6
6
  "type": "module",
package/server.json CHANGED
@@ -6,12 +6,12 @@
6
6
  "url": "https://github.com/MobAI-App/mobai-mcp",
7
7
  "source": "github"
8
8
  },
9
- "version": "1.4.1",
9
+ "version": "1.5.0",
10
10
  "packages": [
11
11
  {
12
12
  "registryType": "npm",
13
13
  "identifier": "mobai-mcp",
14
- "version": "1.4.1",
14
+ "version": "1.5.0",
15
15
  "transport": {
16
16
  "type": "stdio"
17
17
  }