mobai-mcp 1.0.3 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +43 -3
- package/package.json +4 -2
- package/server.json +20 -0
package/dist/index.js
CHANGED
|
@@ -198,11 +198,15 @@ const TOOLS = [
|
|
|
198
198
|
},
|
|
199
199
|
verbose: {
|
|
200
200
|
type: "boolean",
|
|
201
|
-
description: "Include
|
|
201
|
+
description: "Include detailed elements array with bounds (default: false)",
|
|
202
202
|
},
|
|
203
203
|
only_visible: {
|
|
204
204
|
type: "boolean",
|
|
205
|
-
description: "Filter to visible elements
|
|
205
|
+
description: "Filter to only visible elements (default: true)",
|
|
206
|
+
},
|
|
207
|
+
include_keyboard: {
|
|
208
|
+
type: "boolean",
|
|
209
|
+
description: "Include keyboard elements in the tree (default: false). Useful for interacting with on-screen keyboards.",
|
|
206
210
|
},
|
|
207
211
|
},
|
|
208
212
|
required: ["device_id"],
|
|
@@ -332,6 +336,20 @@ const TOOLS = [
|
|
|
332
336
|
required: ["device_id"],
|
|
333
337
|
},
|
|
334
338
|
},
|
|
339
|
+
{
|
|
340
|
+
name: "get_ocr",
|
|
341
|
+
description: "Perform OCR text recognition on the current screen (iOS only). Returns detected text with screen coordinates for tapping (already adjusted for tapping).",
|
|
342
|
+
inputSchema: {
|
|
343
|
+
type: "object",
|
|
344
|
+
properties: {
|
|
345
|
+
device_id: {
|
|
346
|
+
type: "string",
|
|
347
|
+
description: "Device ID",
|
|
348
|
+
},
|
|
349
|
+
},
|
|
350
|
+
required: ["device_id"],
|
|
351
|
+
},
|
|
352
|
+
},
|
|
335
353
|
{
|
|
336
354
|
name: "execute_dsl",
|
|
337
355
|
description: `Execute a batch of automation steps using the DSL (Domain Specific Language).
|
|
@@ -580,6 +598,8 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
580
598
|
params.set("verbose", "true");
|
|
581
599
|
if (args?.only_visible === false)
|
|
582
600
|
params.set("onlyVisible", "false");
|
|
601
|
+
if (args?.include_keyboard)
|
|
602
|
+
params.set("includeKeyboard", "true");
|
|
583
603
|
const queryString = params.toString();
|
|
584
604
|
const endpoint = `/devices/${args?.device_id}/ui-tree${queryString ? `?${queryString}` : ""}`;
|
|
585
605
|
result = await makeRequest("GET", endpoint);
|
|
@@ -619,6 +639,9 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
619
639
|
case "list_apps":
|
|
620
640
|
result = await makeRequest("GET", `/devices/${args?.device_id}/apps`);
|
|
621
641
|
break;
|
|
642
|
+
case "get_ocr":
|
|
643
|
+
result = await makeRequest("GET", `/devices/${args?.device_id}/ocr`);
|
|
644
|
+
break;
|
|
622
645
|
case "execute_dsl":
|
|
623
646
|
result = await makeRequest("POST", `/devices/${args?.device_id}/dsl/execute`, args?.script, 300000 // 5 minutes
|
|
624
647
|
);
|
|
@@ -785,6 +808,7 @@ const API_REFERENCE = `# MobAI API Reference
|
|
|
785
808
|
| /devices/{id}/screenshot | GET | Capture screenshot (saved to /tmp/mobai/screenshots/) |
|
|
786
809
|
| /devices/{id}/ui-tree | GET | Get UI accessibility tree |
|
|
787
810
|
| /devices/{id}/apps | GET | List installed apps |
|
|
811
|
+
| /devices/{id}/ocr | GET | OCR text recognition (iOS only) |
|
|
788
812
|
|
|
789
813
|
## Bridge Control
|
|
790
814
|
|
|
@@ -889,7 +913,7 @@ The DSL (Domain Specific Language) enables batch execution of multiple automatio
|
|
|
889
913
|
|
|
890
914
|
| Action | Description | Key Fields |
|
|
891
915
|
|--------|-------------|------------|
|
|
892
|
-
| observe | Get UI tree/screenshot | context, include (ui_tree, screenshot, installed_apps) |
|
|
916
|
+
| observe | Get UI tree/screenshot/OCR | context, include (ui_tree, screenshot, installed_apps, ocr) |
|
|
893
917
|
| tap | Tap element | predicate or coords |
|
|
894
918
|
| type | Type text | text, predicate (if keyboard not open), dismiss_keyboard (default: false) |
|
|
895
919
|
| press_key | Press keyboard key | key (return, tab, delete, etc.), context (optional: "web") |
|
|
@@ -956,6 +980,16 @@ Note: \`predicate\` is required if keyboard is not already open. Use \`dismiss_k
|
|
|
956
980
|
- \`abort\`: Stop on failure (default)
|
|
957
981
|
- \`skip\`: Skip failed step, continue
|
|
958
982
|
- \`retry\`: Retry with delay
|
|
983
|
+
|
|
984
|
+
## OCR (iOS only)
|
|
985
|
+
|
|
986
|
+
Use \`include: ["ocr"]\` in observe to get text recognition when UI tree is empty:
|
|
987
|
+
|
|
988
|
+
\`\`\`json
|
|
989
|
+
{"action": "observe", "context": "native", "include": ["ocr"]}
|
|
990
|
+
\`\`\`
|
|
991
|
+
|
|
992
|
+
Returns text with coordinates for tapping (already adjusted for tapping).
|
|
959
993
|
`;
|
|
960
994
|
const NATIVE_RUNNER_GUIDE = `# Native App Automation Guide
|
|
961
995
|
|
|
@@ -1026,6 +1060,11 @@ The \`type\` action requires either:
|
|
|
1026
1060
|
{"action": "type", "text": "username", "predicate": {"type": "input", "label": "Username"}}
|
|
1027
1061
|
\`\`\`
|
|
1028
1062
|
|
|
1063
|
+
### Dismissing Keyboard
|
|
1064
|
+
- Use \`press_key: return\` to submit and close the keyboard
|
|
1065
|
+
- If submit is not desired, look for a "Close", "Cancel", "Done" or "Back" button in the UI tree and tap it
|
|
1066
|
+
- On Android, \`press_key: back\` also dismisses the keyboard
|
|
1067
|
+
|
|
1029
1068
|
## Common Patterns
|
|
1030
1069
|
|
|
1031
1070
|
### Open App and Navigate
|
|
@@ -1097,6 +1136,7 @@ The \`type\` action requires either:
|
|
|
1097
1136
|
- **Add delays after navigation** - Apps need time to render
|
|
1098
1137
|
- **Use retry strategy** - Transient failures are common
|
|
1099
1138
|
- **Use press_key for form navigation** - Tab between fields, Return to submit
|
|
1139
|
+
- **Use OCR for system dialogs (iOS)** - When UI tree is empty, use \`include: ["ocr"]\`
|
|
1100
1140
|
`;
|
|
1101
1141
|
const WEB_RUNNER_GUIDE = `# Web Automation Guide
|
|
1102
1142
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mobai-mcp",
|
|
3
|
-
"version": "1.0
|
|
3
|
+
"version": "1.2.0",
|
|
4
|
+
"mcpName": "io.github.mobai-app/mobai-mcp",
|
|
4
5
|
"description": "MCP server for MobAI - AI-powered mobile device automation",
|
|
5
6
|
"type": "module",
|
|
6
7
|
"main": "dist/index.js",
|
|
@@ -9,6 +10,7 @@
|
|
|
9
10
|
},
|
|
10
11
|
"files": [
|
|
11
12
|
"dist",
|
|
13
|
+
"server.json",
|
|
12
14
|
"README.md",
|
|
13
15
|
"LICENSE"
|
|
14
16
|
],
|
|
@@ -42,7 +44,7 @@
|
|
|
42
44
|
"url": "https://github.com/MobAI-App/mobai-mcp/issues"
|
|
43
45
|
},
|
|
44
46
|
"dependencies": {
|
|
45
|
-
"@modelcontextprotocol/sdk": "^1.
|
|
47
|
+
"@modelcontextprotocol/sdk": "^1.25.3"
|
|
46
48
|
},
|
|
47
49
|
"devDependencies": {
|
|
48
50
|
"@types/node": "^20.0.0",
|
package/server.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://static.modelcontextprotocol.io/schemas/2025-12-11/server.schema.json",
|
|
3
|
+
"name": "io.github.mobai-app/mobai-mcp",
|
|
4
|
+
"description": "AI-powered mobile device automation for Android and iOS devices, emulators, and simulators",
|
|
5
|
+
"repository": {
|
|
6
|
+
"url": "https://github.com/MobAI-App/mobai-mcp",
|
|
7
|
+
"source": "github"
|
|
8
|
+
},
|
|
9
|
+
"version": "1.2.0",
|
|
10
|
+
"packages": [
|
|
11
|
+
{
|
|
12
|
+
"registryType": "npm",
|
|
13
|
+
"identifier": "mobai-mcp",
|
|
14
|
+
"version": "1.2.0",
|
|
15
|
+
"transport": {
|
|
16
|
+
"type": "stdio"
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
]
|
|
20
|
+
}
|