mobai-mcp 1.3.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -88,7 +88,7 @@ Configure according to your client's MCP server setup. The server uses stdio tra
88
88
 
89
89
  ### UI Automation
90
90
  - `get_screenshot` - Capture device screenshot
91
- - `get_ui_tree` - Get accessibility tree
91
+ - `get_ui_tree` - Get accessibility tree (supports text_regex and bounds filtering)
92
92
  - `tap` - Tap element by index or coordinates
93
93
  - `type_text` - Type text
94
94
  - `swipe` - Perform swipe gesture
package/dist/index.js CHANGED
@@ -208,6 +208,21 @@ const TOOLS = [
208
208
  type: "boolean",
209
209
  description: "Include keyboard elements in the tree (default: false). Useful for interacting with on-screen keyboards.",
210
210
  },
211
+ text_regex: {
212
+ type: "string",
213
+ description: "Regex to filter elements by text/value/contentDesc. Only matching elements are returned.",
214
+ },
215
+ bounds: {
216
+ type: "object",
217
+ description: "Filter to elements within a bounding rectangle",
218
+ properties: {
219
+ x: { type: "number", description: "Left X coordinate" },
220
+ y: { type: "number", description: "Top Y coordinate" },
221
+ w: { type: "number", description: "Width" },
222
+ h: { type: "number", description: "Height" },
223
+ },
224
+ required: ["x", "y", "w", "h"],
225
+ },
211
226
  },
212
227
  required: ["device_id"],
213
228
  },
@@ -238,6 +253,136 @@ const TOOLS = [
238
253
  required: ["device_id"],
239
254
  },
240
255
  },
256
+ {
257
+ name: "double_tap",
258
+ description: "Double tap an element by index (from UI tree) or coordinates",
259
+ inputSchema: {
260
+ type: "object",
261
+ properties: {
262
+ device_id: {
263
+ type: "string",
264
+ description: "Device ID",
265
+ },
266
+ index: {
267
+ type: "number",
268
+ description: "Element index from UI tree (preferred)",
269
+ },
270
+ x: {
271
+ type: "number",
272
+ description: "X coordinate (use with y instead of index)",
273
+ },
274
+ y: {
275
+ type: "number",
276
+ description: "Y coordinate (use with x instead of index)",
277
+ },
278
+ },
279
+ required: ["device_id"],
280
+ },
281
+ },
282
+ {
283
+ name: "long_press",
284
+ description: "Long press an element by index (from UI tree) or coordinates. Uses a fixed 0.5s hold duration.",
285
+ inputSchema: {
286
+ type: "object",
287
+ properties: {
288
+ device_id: {
289
+ type: "string",
290
+ description: "Device ID",
291
+ },
292
+ index: {
293
+ type: "number",
294
+ description: "Element index from UI tree (preferred)",
295
+ },
296
+ x: {
297
+ type: "number",
298
+ description: "X coordinate (use with y instead of index)",
299
+ },
300
+ y: {
301
+ type: "number",
302
+ description: "Y coordinate (use with x instead of index)",
303
+ },
304
+ },
305
+ required: ["device_id"],
306
+ },
307
+ },
308
+ {
309
+ name: "two_finger_tap",
310
+ description: "Perform a two-finger tap at coordinates (iOS only)",
311
+ inputSchema: {
312
+ type: "object",
313
+ properties: {
314
+ device_id: {
315
+ type: "string",
316
+ description: "Device ID",
317
+ },
318
+ index: {
319
+ type: "number",
320
+ description: "Element index from UI tree (preferred)",
321
+ },
322
+ x: {
323
+ type: "number",
324
+ description: "X coordinate (use with y instead of index)",
325
+ },
326
+ y: {
327
+ type: "number",
328
+ description: "Y coordinate (use with x instead of index)",
329
+ },
330
+ },
331
+ required: ["device_id"],
332
+ },
333
+ },
334
+ {
335
+ name: "drag",
336
+ description: "Drag from one point to another (press, hold, move, release)",
337
+ inputSchema: {
338
+ type: "object",
339
+ properties: {
340
+ device_id: {
341
+ type: "string",
342
+ description: "Device ID",
343
+ },
344
+ from_x: {
345
+ type: "number",
346
+ description: "Starting X coordinate",
347
+ },
348
+ from_y: {
349
+ type: "number",
350
+ description: "Starting Y coordinate",
351
+ },
352
+ to_x: {
353
+ type: "number",
354
+ description: "Ending X coordinate",
355
+ },
356
+ to_y: {
357
+ type: "number",
358
+ description: "Ending Y coordinate",
359
+ },
360
+ duration_ms: {
361
+ type: "number",
362
+ description: "Drag duration in milliseconds (default: 500)",
363
+ },
364
+ press_duration_ms: {
365
+ type: "number",
366
+ description: "Hold duration before dragging in milliseconds (0 = no hold). Use for press-and-drag gestures like moving app icons.",
367
+ },
368
+ },
369
+ required: ["device_id", "from_x", "from_y", "to_x", "to_y"],
370
+ },
371
+ },
372
+ {
373
+ name: "dismiss_keyboard",
374
+ description: "Dismiss the on-screen keyboard if visible",
375
+ inputSchema: {
376
+ type: "object",
377
+ properties: {
378
+ device_id: {
379
+ type: "string",
380
+ description: "Device ID",
381
+ },
382
+ },
383
+ required: ["device_id"],
384
+ },
385
+ },
241
386
  {
242
387
  name: "type_text",
243
388
  description: "Type text on the device (tap input field first to focus)",
@@ -355,7 +500,7 @@ const TOOLS = [
355
500
  description: `Execute a batch of automation steps using the DSL (Domain Specific Language).
356
501
  This is the PREFERRED method for complex automation as it's more reliable than sequential API calls.
357
502
 
358
- DSL supports: observe, tap, type, toggle, swipe, scroll, open_app, navigate, wait_for, assert_*, if_exists, delay, execute_js (web)
503
+ DSL supports: observe, tap, type, toggle, swipe, scroll, open_app, kill_app, navigate, wait_for, screenshot, set_location, reset_location, assert_*, if_exists, delay, execute_js (web)
359
504
 
360
505
  Example DSL script:
361
506
  {
@@ -530,6 +675,78 @@ Example DSL script:
530
675
  required: ["device_id", "script"],
531
676
  },
532
677
  },
678
+ {
679
+ name: "uninstall_app",
680
+ description: "Uninstall an application from the device by bundle ID / package name.",
681
+ inputSchema: {
682
+ type: "object",
683
+ properties: {
684
+ device_id: {
685
+ type: "string",
686
+ description: "Device ID",
687
+ },
688
+ bundle_id: {
689
+ type: "string",
690
+ description: "App bundle ID (iOS) or package name (Android) to uninstall",
691
+ },
692
+ },
693
+ required: ["device_id", "bundle_id"],
694
+ },
695
+ },
696
+ {
697
+ name: "kill_app",
698
+ description: "Force-kill a running application. On iOS (17+), uses CoreDevice appservice SIGKILL. On Android, uses 'am force-stop'.",
699
+ inputSchema: {
700
+ type: "object",
701
+ properties: {
702
+ device_id: {
703
+ type: "string",
704
+ description: "Device ID",
705
+ },
706
+ bundle_id: {
707
+ type: "string",
708
+ description: "Bundle ID / package name of the app to kill",
709
+ },
710
+ },
711
+ required: ["device_id", "bundle_id"],
712
+ },
713
+ },
714
+ {
715
+ name: "set_location",
716
+ description: "Set a simulated GPS location on the device. Supports: iOS (all versions), Android emulators (all versions), Android real devices (12+ only).",
717
+ inputSchema: {
718
+ type: "object",
719
+ properties: {
720
+ device_id: {
721
+ type: "string",
722
+ description: "Device ID",
723
+ },
724
+ lat: {
725
+ type: "number",
726
+ description: "Latitude (-90 to 90)",
727
+ },
728
+ lon: {
729
+ type: "number",
730
+ description: "Longitude (-180 to 180)",
731
+ },
732
+ },
733
+ required: ["device_id", "lat", "lon"],
734
+ },
735
+ },
736
+ {
737
+ name: "reset_location",
738
+ description: "Reset the device location to its real GPS position, removing any simulated location. Supports: iOS (all versions), Android emulators (all versions), Android real devices (12+ only).",
739
+ inputSchema: {
740
+ type: "object",
741
+ properties: {
742
+ device_id: {
743
+ type: "string",
744
+ description: "Device ID",
745
+ },
746
+ },
747
+ required: ["device_id"],
748
+ },
749
+ },
533
750
  {
534
751
  name: "http_request",
535
752
  description: `Make a raw HTTP request to the MobAI API. Use this for advanced operations not covered by other tools.
@@ -600,6 +817,15 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
600
817
  params.set("onlyVisible", "false");
601
818
  if (args?.include_keyboard)
602
819
  params.set("includeKeyboard", "true");
820
+ if (args?.text_regex)
821
+ params.set("textRegex", args.text_regex);
822
+ if (args?.bounds) {
823
+ const b = args.bounds;
824
+ params.set("boundsX", String(b.x));
825
+ params.set("boundsY", String(b.y));
826
+ params.set("boundsW", String(b.w));
827
+ params.set("boundsH", String(b.h));
828
+ }
603
829
  const queryString = params.toString();
604
830
  const endpoint = `/devices/${args?.device_id}/ui-tree${queryString ? `?${queryString}` : ""}`;
605
831
  result = await makeRequest("GET", endpoint);
@@ -616,6 +842,56 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
616
842
  result = await makeRequest("POST", `/devices/${args?.device_id}/tap`, body);
617
843
  break;
618
844
  }
845
+ case "double_tap": {
846
+ const body = {};
847
+ if (args?.index !== undefined)
848
+ body.index = args.index;
849
+ if (args?.x !== undefined && args?.y !== undefined) {
850
+ body.x = args.x;
851
+ body.y = args.y;
852
+ }
853
+ result = await makeRequest("POST", `/devices/${args?.device_id}/double-tap`, body);
854
+ break;
855
+ }
856
+ case "long_press": {
857
+ const body = {};
858
+ if (args?.index !== undefined)
859
+ body.index = args.index;
860
+ if (args?.x !== undefined && args?.y !== undefined) {
861
+ body.x = args.x;
862
+ body.y = args.y;
863
+ }
864
+ result = await makeRequest("POST", `/devices/${args?.device_id}/long-press`, body);
865
+ break;
866
+ }
867
+ case "two_finger_tap": {
868
+ const body = {};
869
+ if (args?.index !== undefined)
870
+ body.index = args.index;
871
+ if (args?.x !== undefined && args?.y !== undefined) {
872
+ body.x = args.x;
873
+ body.y = args.y;
874
+ }
875
+ result = await makeRequest("POST", `/devices/${args?.device_id}/two-finger-tap`, body);
876
+ break;
877
+ }
878
+ case "drag": {
879
+ const dragBody = {
880
+ fromX: args?.from_x,
881
+ fromY: args?.from_y,
882
+ toX: args?.to_x,
883
+ toY: args?.to_y,
884
+ duration: args?.duration_ms ?? 500,
885
+ };
886
+ if (args?.press_duration_ms) {
887
+ dragBody.pressDuration = args.press_duration_ms;
888
+ }
889
+ result = await makeRequest("POST", `/devices/${args?.device_id}/drag`, dragBody);
890
+ break;
891
+ }
892
+ case "dismiss_keyboard":
893
+ result = await makeRequest("POST", `/devices/${args?.device_id}/dismiss-keyboard`);
894
+ break;
619
895
  case "type_text":
620
896
  result = await makeRequest("POST", `/devices/${args?.device_id}/type`, { text: args?.text });
621
897
  break;
@@ -642,6 +918,23 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
642
918
  case "get_ocr":
643
919
  result = await makeRequest("GET", `/devices/${args?.device_id}/ocr`);
644
920
  break;
921
+ case "uninstall_app":
922
+ result = await makeRequest("DELETE", `/devices/${args?.device_id}/apps/${encodeURIComponent(args?.bundle_id)}`);
923
+ break;
924
+ case "kill_app":
925
+ result = await makeRequest("POST", `/devices/${args?.device_id}/kill-app`, {
926
+ bundleId: args?.bundle_id,
927
+ });
928
+ break;
929
+ case "set_location":
930
+ result = await makeRequest("POST", `/devices/${args?.device_id}/location`, {
931
+ lat: args?.lat,
932
+ lon: args?.lon,
933
+ });
934
+ break;
935
+ case "reset_location":
936
+ result = await makeRequest("DELETE", `/devices/${args?.device_id}/location`);
937
+ break;
645
938
  case "execute_dsl":
646
939
  result = await makeRequest("POST", `/devices/${args?.device_id}/dsl/execute`, args?.script, 300000 // 5 minutes
647
940
  );
@@ -822,10 +1115,17 @@ const API_REFERENCE = `# MobAI API Reference
822
1115
  | Endpoint | Method | Description |
823
1116
  |----------|--------|-------------|
824
1117
  | /devices/{id}/tap | POST | Tap element: {"index": N} or {"x": X, "y": Y} |
1118
+ | /devices/{id}/double-tap | POST | Double tap: {"index": N} or {"x": X, "y": Y} |
1119
+ | /devices/{id}/long-press | POST | Long press (0.5s): {"index": N} or {"x": X, "y": Y} |
1120
+ | /devices/{id}/two-finger-tap | POST | Two-finger tap (iOS): {"index": N} or {"x": X, "y": Y} |
825
1121
  | /devices/{id}/swipe | POST | Swipe: {"fromX", "fromY", "toX", "toY", "duration"} |
1122
+ | /devices/{id}/drag | POST | Drag: {"fromX", "fromY", "toX", "toY", "duration", "pressDuration"} |
826
1123
  | /devices/{id}/type | POST | Type text: {"text": "..."} |
1124
+ | /devices/{id}/dismiss-keyboard | POST | Dismiss on-screen keyboard |
827
1125
  | /devices/{id}/go-home | POST | Go to home screen |
828
1126
  | /devices/{id}/launch-app | POST | Launch app: {"bundleId": "..."} |
1127
+ | /devices/{id}/apps/{bundleId} | DELETE | Uninstall app by bundle ID |
1128
+ | /devices/{id}/kill-app | POST | Kill app: {"bundleId": "..."} |
829
1129
 
830
1130
  ## DSL Execution
831
1131
 
@@ -922,7 +1222,7 @@ The DSL (Domain Specific Language) enables batch execution of multiple automatio
922
1222
 
923
1223
  | Action | Description | Key Fields |
924
1224
  |--------|-------------|------------|
925
- | observe | Get UI tree/screenshot/OCR | context, include (ui_tree, screenshot, installed_apps, ocr) |
1225
+ | observe | Get UI tree/screenshot/OCR | context, include (ui_tree, screenshot, installed_apps, ocr), filter ({text_regex, bounds}) |
926
1226
  | tap | Tap element | predicate or coords |
927
1227
  | type | Type text | text, predicate (if keyboard not open), dismiss_keyboard (default: false) |
928
1228
  | press_key | Press keyboard key | key (return, tab, delete, etc.), context (optional: "web") |
@@ -931,12 +1231,16 @@ The DSL (Domain Specific Language) enables batch execution of multiple automatio
931
1231
  | scroll | Scroll in container | direction, predicate (container), to_element |
932
1232
  | open_app | Launch app | bundle_id |
933
1233
  | navigate | Go home/back | target ("home", "back") |
934
- | wait_for | Wait for element | predicate, timeout_ms |
1234
+ | wait_for | Wait for element or UI stability | predicate, timeout_ms, poll_interval_ms, stable (wait for UI to stop changing) |
1235
+ | screenshot | Save screenshot to file | file_path (directory), name (optional filename) |
935
1236
  | assert_exists | Verify element exists | predicate, timeout_ms |
936
1237
  | assert_not_exists | Verify element gone | predicate |
937
1238
  | delay | Wait fixed time | duration_ms |
938
1239
  | if_exists | Conditional | predicate, then, else |
939
1240
  | select_web_context | Select browser/WebView | url_contains, title_contains (optional filters) |
1241
+ | kill_app | Force-kill running app | bundle_id |
1242
+ | set_location | Simulate GPS location (Android 12+ for real devices) | lat, lon |
1243
+ | reset_location | Reset to real GPS (Android 12+ for real devices) | (no fields) |
940
1244
  | metrics_start | Start performance monitoring | types, bundle_id, label, thresholds, capture_logs |
941
1245
  | metrics_stop | Stop monitoring, get summary | format ("summary" or "detailed") |
942
1246
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mobai-mcp",
3
- "version": "1.3.0",
3
+ "version": "1.4.1",
4
4
  "mcpName": "io.github.MobAI-App/mobai-mcp",
5
5
  "description": "MCP server for MobAI - AI-powered mobile device automation",
6
6
  "type": "module",
package/server.json CHANGED
@@ -6,12 +6,12 @@
6
6
  "url": "https://github.com/MobAI-App/mobai-mcp",
7
7
  "source": "github"
8
8
  },
9
- "version": "1.2.1",
9
+ "version": "1.4.1",
10
10
  "packages": [
11
11
  {
12
12
  "registryType": "npm",
13
13
  "identifier": "mobai-mcp",
14
- "version": "1.2.1",
14
+ "version": "1.4.1",
15
15
  "transport": {
16
16
  "type": "stdio"
17
17
  }