mobai-mcp 1.0.3 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -198,11 +198,15 @@ const TOOLS = [
198
198
  },
199
199
  verbose: {
200
200
  type: "boolean",
201
- description: "Include full element coordinates (default: false)",
201
+ description: "Include detailed elements array with bounds (default: false)",
202
202
  },
203
203
  only_visible: {
204
204
  type: "boolean",
205
- description: "Filter to visible elements only (default: true)",
205
+ description: "Filter to only visible elements (default: true)",
206
+ },
207
+ include_keyboard: {
208
+ type: "boolean",
209
+ description: "Include keyboard elements in the tree (default: false). Useful for interacting with on-screen keyboards.",
206
210
  },
207
211
  },
208
212
  required: ["device_id"],
@@ -332,6 +336,20 @@ const TOOLS = [
332
336
  required: ["device_id"],
333
337
  },
334
338
  },
339
+ {
340
+ name: "get_ocr",
341
+ description: "Perform OCR text recognition on the current screen (iOS only). Returns detected text with screen coordinates for tapping (already adjusted for tapping).",
342
+ inputSchema: {
343
+ type: "object",
344
+ properties: {
345
+ device_id: {
346
+ type: "string",
347
+ description: "Device ID",
348
+ },
349
+ },
350
+ required: ["device_id"],
351
+ },
352
+ },
335
353
  {
336
354
  name: "execute_dsl",
337
355
  description: `Execute a batch of automation steps using the DSL (Domain Specific Language).
@@ -580,6 +598,8 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
580
598
  params.set("verbose", "true");
581
599
  if (args?.only_visible === false)
582
600
  params.set("onlyVisible", "false");
601
+ if (args?.include_keyboard)
602
+ params.set("includeKeyboard", "true");
583
603
  const queryString = params.toString();
584
604
  const endpoint = `/devices/${args?.device_id}/ui-tree${queryString ? `?${queryString}` : ""}`;
585
605
  result = await makeRequest("GET", endpoint);
@@ -619,6 +639,9 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
619
639
  case "list_apps":
620
640
  result = await makeRequest("GET", `/devices/${args?.device_id}/apps`);
621
641
  break;
642
+ case "get_ocr":
643
+ result = await makeRequest("GET", `/devices/${args?.device_id}/ocr`);
644
+ break;
622
645
  case "execute_dsl":
623
646
  result = await makeRequest("POST", `/devices/${args?.device_id}/dsl/execute`, args?.script, 300000 // 5 minutes
624
647
  );
@@ -785,6 +808,7 @@ const API_REFERENCE = `# MobAI API Reference
785
808
  | /devices/{id}/screenshot | GET | Capture screenshot (saved to /tmp/mobai/screenshots/) |
786
809
  | /devices/{id}/ui-tree | GET | Get UI accessibility tree |
787
810
  | /devices/{id}/apps | GET | List installed apps |
811
+ | /devices/{id}/ocr | GET | OCR text recognition (iOS only) |
788
812
 
789
813
  ## Bridge Control
790
814
 
@@ -889,7 +913,7 @@ The DSL (Domain Specific Language) enables batch execution of multiple automatio
889
913
 
890
914
  | Action | Description | Key Fields |
891
915
  |--------|-------------|------------|
892
- | observe | Get UI tree/screenshot | context, include (ui_tree, screenshot, installed_apps) |
916
+ | observe | Get UI tree/screenshot/OCR | context, include (ui_tree, screenshot, installed_apps, ocr) |
893
917
  | tap | Tap element | predicate or coords |
894
918
  | type | Type text | text, predicate (if keyboard not open), dismiss_keyboard (default: false) |
895
919
  | press_key | Press keyboard key | key (return, tab, delete, etc.), context (optional: "web") |
@@ -956,6 +980,16 @@ Note: \`predicate\` is required if keyboard is not already open. Use \`dismiss_k
956
980
  - \`abort\`: Stop on failure (default)
957
981
  - \`skip\`: Skip failed step, continue
958
982
  - \`retry\`: Retry with delay
983
+
984
+ ## OCR (iOS only)
985
+
986
+ Use \`include: ["ocr"]\` in observe to get text recognition when UI tree is empty:
987
+
988
+ \`\`\`json
989
+ {"action": "observe", "context": "native", "include": ["ocr"]}
990
+ \`\`\`
991
+
992
+ Returns text with coordinates for tapping (already adjusted for tapping).
959
993
  `;
960
994
  const NATIVE_RUNNER_GUIDE = `# Native App Automation Guide
961
995
 
@@ -1026,6 +1060,11 @@ The \`type\` action requires either:
1026
1060
  {"action": "type", "text": "username", "predicate": {"type": "input", "label": "Username"}}
1027
1061
  \`\`\`
1028
1062
 
1063
+ ### Dismissing Keyboard
1064
+ - Use \`press_key: return\` to submit and close the keyboard
1065
+ - If submit is not desired, look for a "Close", "Cancel", "Done" or "Back" button in the UI tree and tap it
1066
+ - On Android, \`press_key: back\` also dismisses the keyboard
1067
+
1029
1068
  ## Common Patterns
1030
1069
 
1031
1070
  ### Open App and Navigate
@@ -1097,6 +1136,7 @@ The \`type\` action requires either:
1097
1136
  - **Add delays after navigation** - Apps need time to render
1098
1137
  - **Use retry strategy** - Transient failures are common
1099
1138
  - **Use press_key for form navigation** - Tab between fields, Return to submit
1139
+ - **Use OCR for system dialogs (iOS)** - When UI tree is empty, use \`include: ["ocr"]\`
1100
1140
  `;
1101
1141
  const WEB_RUNNER_GUIDE = `# Web Automation Guide
1102
1142
 
package/package.json CHANGED
@@ -1,6 +1,7 @@
1
1
  {
2
2
  "name": "mobai-mcp",
3
- "version": "1.0.3",
3
+ "version": "1.2.0",
4
+ "mcpName": "io.github.mobai-app/mobai-mcp",
4
5
  "description": "MCP server for MobAI - AI-powered mobile device automation",
5
6
  "type": "module",
6
7
  "main": "dist/index.js",
@@ -9,6 +10,7 @@
9
10
  },
10
11
  "files": [
11
12
  "dist",
13
+ "server.json",
12
14
  "README.md",
13
15
  "LICENSE"
14
16
  ],
@@ -42,7 +44,7 @@
42
44
  "url": "https://github.com/MobAI-App/mobai-mcp/issues"
43
45
  },
44
46
  "dependencies": {
45
- "@modelcontextprotocol/sdk": "^1.0.0"
47
+ "@modelcontextprotocol/sdk": "^1.25.3"
46
48
  },
47
49
  "devDependencies": {
48
50
  "@types/node": "^20.0.0",
package/server.json ADDED
@@ -0,0 +1,20 @@
1
+ {
2
+ "$schema": "https://static.modelcontextprotocol.io/schemas/2025-12-11/server.schema.json",
3
+ "name": "io.github.mobai-app/mobai-mcp",
4
+ "description": "AI-powered mobile device automation for Android and iOS devices, emulators, and simulators",
5
+ "repository": {
6
+ "url": "https://github.com/MobAI-App/mobai-mcp",
7
+ "source": "github"
8
+ },
9
+ "version": "1.2.0",
10
+ "packages": [
11
+ {
12
+ "registryType": "npm",
13
+ "identifier": "mobai-mcp",
14
+ "version": "1.2.0",
15
+ "transport": {
16
+ "type": "stdio"
17
+ }
18
+ }
19
+ ]
20
+ }