mobai-mcp 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -88,7 +88,7 @@ Configure according to your client's MCP server setup. The server uses stdio tra
88
88
 
89
89
  ### UI Automation
90
90
  - `get_screenshot` - Capture device screenshot
91
- - `get_ui_tree` - Get accessibility tree
91
+ - `get_ui_tree` - Get accessibility tree (supports text_regex and bounds filtering)
92
92
  - `tap` - Tap element by index or coordinates
93
93
  - `type_text` - Type text
94
94
  - `swipe` - Perform swipe gesture
package/dist/index.js CHANGED
@@ -174,7 +174,7 @@ const TOOLS = [
174
174
  },
175
175
  {
176
176
  name: "get_screenshot",
177
- description: "Capture a screenshot from the device. Returns the file path to the saved PNG.",
177
+ description: "Capture a screenshot from the device. By default saves to /tmp/mobai/screenshots/ and returns the file path. Use path/name to save to a custom location on the host computer.",
178
178
  inputSchema: {
179
179
  type: "object",
180
180
  properties: {
@@ -182,6 +182,14 @@ const TOOLS = [
182
182
  type: "string",
183
183
  description: "Device ID",
184
184
  },
185
+ path: {
186
+ type: "string",
187
+ description: "Custom directory to save the screenshot (supports ~/). Example: ~/Downloads",
188
+ },
189
+ name: {
190
+ type: "string",
191
+ description: "Custom filename without .png extension. Defaults to timestamp-based name.",
192
+ },
185
193
  },
186
194
  required: ["device_id"],
187
195
  },
@@ -208,6 +216,21 @@ const TOOLS = [
208
216
  type: "boolean",
209
217
  description: "Include keyboard elements in the tree (default: false). Useful for interacting with on-screen keyboards.",
210
218
  },
219
+ text_regex: {
220
+ type: "string",
221
+ description: "Regex to filter elements by text/value/contentDesc. Only matching elements are returned.",
222
+ },
223
+ bounds: {
224
+ type: "object",
225
+ description: "Filter to elements within a bounding rectangle",
226
+ properties: {
227
+ x: { type: "number", description: "Left X coordinate" },
228
+ y: { type: "number", description: "Top Y coordinate" },
229
+ w: { type: "number", description: "Width" },
230
+ h: { type: "number", description: "Height" },
231
+ },
232
+ required: ["x", "y", "w", "h"],
233
+ },
211
234
  },
212
235
  required: ["device_id"],
213
236
  },
@@ -791,9 +814,16 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
791
814
  case "stop_bridge":
792
815
  result = await makeRequest("POST", `/devices/${args?.device_id}/bridge/stop`);
793
816
  break;
794
- case "get_screenshot":
795
- result = await makeRequest("GET", `/devices/${args?.device_id}/screenshot`);
817
+ case "get_screenshot": {
818
+ const screenshotParams = new URLSearchParams();
819
+ if (args?.path)
820
+ screenshotParams.set("path", args.path);
821
+ if (args?.name)
822
+ screenshotParams.set("name", args.name);
823
+ const screenshotQuery = screenshotParams.toString();
824
+ result = await makeRequest("GET", `/devices/${args?.device_id}/screenshot${screenshotQuery ? "?" + screenshotQuery : ""}`);
796
825
  break;
826
+ }
797
827
  case "get_ui_tree": {
798
828
  const params = new URLSearchParams();
799
829
  if (args?.verbose)
@@ -802,6 +832,15 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
802
832
  params.set("onlyVisible", "false");
803
833
  if (args?.include_keyboard)
804
834
  params.set("includeKeyboard", "true");
835
+ if (args?.text_regex)
836
+ params.set("textRegex", args.text_regex);
837
+ if (args?.bounds) {
838
+ const b = args.bounds;
839
+ params.set("boundsX", String(b.x));
840
+ params.set("boundsY", String(b.y));
841
+ params.set("boundsW", String(b.w));
842
+ params.set("boundsH", String(b.h));
843
+ }
805
844
  const queryString = params.toString();
806
845
  const endpoint = `/devices/${args?.device_id}/ui-tree${queryString ? `?${queryString}` : ""}`;
807
846
  result = await makeRequest("GET", endpoint);
@@ -1198,23 +1237,31 @@ The DSL (Domain Specific Language) enables batch execution of multiple automatio
1198
1237
 
1199
1238
  | Action | Description | Key Fields |
1200
1239
  |--------|-------------|------------|
1201
- | observe | Get UI tree/screenshot/OCR | context, include (ui_tree, screenshot, installed_apps, ocr) |
1240
+ | observe | Get UI tree/screenshot/OCR | context, include (ui_tree, screenshot, installed_apps, ocr), filter ({text_regex, bounds}) |
1202
1241
  | tap | Tap element | predicate or coords |
1203
- | type | Type text | text, predicate (if keyboard not open), dismiss_keyboard (default: false) |
1242
+ | double_tap | Double-tap element | predicate or coords |
1243
+ | long_press | Long-press element | predicate or coords, duration_ms (default: 1000) |
1244
+ | two_finger_tap | Two-finger tap element | predicate or coords |
1245
+ | type | Type text | text, predicate (if keyboard not open), clear_first, dismiss_keyboard (default: false) |
1204
1246
  | press_key | Press keyboard key | key (return, tab, delete, etc.), context (optional: "web") |
1205
1247
  | toggle | Set switch state | predicate, state ("on"/"off") |
1206
- | swipe | Swipe gesture | direction, distance, duration_ms |
1207
- | scroll | Scroll in container | direction, predicate (container), to_element |
1248
+ | swipe | Swipe gesture | direction, distance, duration_ms, or from_coords/to_coords |
1249
+ | scroll | Scroll in container | direction, predicate (container), to_element, max_scrolls |
1250
+ | drag | Drag element to target | from (predicate), to_element (predicate), or from_coords/to_coords, duration_ms, press_duration_ms |
1208
1251
  | open_app | Launch app | bundle_id |
1252
+ | kill_app | Force-kill running app | bundle_id |
1209
1253
  | navigate | Go home/back | target ("home", "back") |
1210
1254
  | wait_for | Wait for element or UI stability | predicate, timeout_ms, poll_interval_ms, stable (wait for UI to stop changing) |
1255
+ | delay | Wait fixed time | duration_ms |
1211
1256
  | screenshot | Save screenshot to file | file_path (directory), name (optional filename) |
1212
1257
  | assert_exists | Verify element exists | predicate, timeout_ms |
1213
1258
  | assert_not_exists | Verify element gone | predicate |
1214
- | delay | Wait fixed time | duration_ms |
1259
+ | assert_count | Verify element count | predicate, count |
1260
+ | assert_screen_changed | Verify screen changed | (compared to last observe) |
1261
+ | checkpoint | Mark a test checkpoint | name |
1215
1262
  | if_exists | Conditional | predicate, then, else |
1216
1263
  | select_web_context | Select browser/WebView | url_contains, title_contains (optional filters) |
1217
- | kill_app | Force-kill running app | bundle_id |
1264
+ | execute_js | Run JavaScript in web context | script |
1218
1265
  | set_location | Simulate GPS location (Android 12+ for real devices) | lat, lon |
1219
1266
  | reset_location | Reset to real GPS (Android 12+ for real devices) | (no fields) |
1220
1267
  | metrics_start | Start performance monitoring | types, bundle_id, label, thresholds, capture_logs |
@@ -1227,11 +1274,14 @@ Match elements by:
1227
1274
  - \`text_contains\`: Contains substring (case-insensitive)
1228
1275
  - \`text_starts_with\`: Starts with prefix
1229
1276
  - \`text_regex\`: Regex pattern
1230
- - \`type\`: Element type (button, input, switch, etc.)
1231
- - \`label\`: Accessibility label
1232
- - \`bounds_hint\`: Screen region (top_half, bottom_half, center, etc.)
1233
- - \`near\`: Near another element
1234
- - \`index\`: Select Nth match
1277
+ - \`type\`: Element type (button, input, switch, text, image, cell, scrollview)
1278
+ - \`label\`: Accessibility label (exact)
1279
+ - \`label_contains\`: Accessibility label (partial)
1280
+ - \`bounds_hint\`: Screen region (top_half, bottom_half, left_half, right_half, center)
1281
+ - \`near\`: Near another element: {"text": "Label"} or {"text": "Label", "direction": "below"}
1282
+ - \`index\`: Select Nth match (0-based)
1283
+ - \`parent_of\`: Find parent containing child: {"parent_of": {"text": "child"}}
1284
+ - \`enabled\`/\`visible\`/\`selected\`: Boolean state filters
1235
1285
 
1236
1286
  ## Examples
1237
1287
 
@@ -1247,11 +1297,32 @@ Match elements by:
1247
1297
 
1248
1298
  Note: \`predicate\` is required if keyboard is not already open. Use \`dismiss_keyboard: true\` to close keyboard after typing.
1249
1299
 
1300
+ ### Double Tap
1301
+ \`\`\`json
1302
+ {"action": "double_tap", "predicate": {"text": "Image"}}
1303
+ \`\`\`
1304
+
1305
+ ### Long Press
1306
+ \`\`\`json
1307
+ {"action": "long_press", "predicate": {"text": "Message"}, "duration_ms": 1500}
1308
+ \`\`\`
1309
+
1250
1310
  ### Toggle Switch
1251
1311
  \`\`\`json
1252
1312
  {"action": "toggle", "predicate": {"type": "switch", "text_contains": "WiFi"}, "state": "on"}
1253
1313
  \`\`\`
1254
1314
 
1315
+ ### Drag Element
1316
+ \`\`\`json
1317
+ {"action": "drag", "from": {"predicate": {"text": "Item"}}, "to_element": {"predicate": {"text": "Trash"}}}
1318
+ {"action": "drag", "from_coords": {"x": 100, "y": 200}, "to_coords": {"x": 300, "y": 400}, "duration_ms": 500}
1319
+ \`\`\`
1320
+
1321
+ ### Assert Count
1322
+ \`\`\`json
1323
+ {"action": "assert_count", "predicate": {"type": "cell"}, "count": 5}
1324
+ \`\`\`
1325
+
1255
1326
  ### Scroll Until Found
1256
1327
  \`\`\`json
1257
1328
  {"action": "scroll", "direction": "down", "to_element": {"predicate": {"text": "Privacy"}}, "max_scrolls": 10}
@@ -1495,10 +1566,14 @@ The \`type\` action requires either:
1495
1566
  | Action | Description | Key Fields |
1496
1567
  |--------|-------------|------------|
1497
1568
  | tap | Tap element | predicate or coords |
1498
- | type | Type text | text, predicate (if keyboard not open), dismiss_keyboard (default: false) |
1569
+ | double_tap | Double-tap element | predicate or coords |
1570
+ | long_press | Long-press element | predicate or coords, duration_ms |
1571
+ | type | Type text | text, predicate (if keyboard not open), clear_first, dismiss_keyboard |
1499
1572
  | press_key | Press keyboard key | key (return, tab, delete, etc.) |
1500
- | swipe | Swipe gesture | direction, distance |
1501
- | scroll | Scroll container | direction, to_element |
1573
+ | toggle | Set switch state | predicate, state ("on"/"off") |
1574
+ | swipe | Swipe gesture | direction, distance, duration_ms, or from_coords/to_coords |
1575
+ | scroll | Scroll container | direction, to_element, max_scrolls |
1576
+ | drag | Drag element | from/to_element (predicates), or from_coords/to_coords |
1502
1577
 
1503
1578
  ## Tips
1504
1579
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mobai-mcp",
3
- "version": "1.4.0",
3
+ "version": "1.5.0",
4
4
  "mcpName": "io.github.MobAI-App/mobai-mcp",
5
5
  "description": "MCP server for MobAI - AI-powered mobile device automation",
6
6
  "type": "module",
package/server.json CHANGED
@@ -6,12 +6,12 @@
6
6
  "url": "https://github.com/MobAI-App/mobai-mcp",
7
7
  "source": "github"
8
8
  },
9
- "version": "1.2.1",
9
+ "version": "1.5.0",
10
10
  "packages": [
11
11
  {
12
12
  "registryType": "npm",
13
13
  "identifier": "mobai-mcp",
14
- "version": "1.2.1",
14
+ "version": "1.5.0",
15
15
  "transport": {
16
16
  "type": "stdio"
17
17
  }