@mobilenext/mobile-mcp 0.0.11 → 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -79,6 +79,8 @@ How we help to scale mobile automation:
79
79
  claude mcp add mobile -- npx -y @mobilenext/mobile-mcp@latest ⁠
80
80
  ```
81
81
 
82
+ [Read more in our wiki](https://github.com/mobile-next/mobile-mcp/wiki)! 🚀
83
+
82
84
  ## Prerequisites
83
85
 
84
86
  What you will need to connect MCP with your agent and mobile devices:
@@ -109,16 +111,10 @@ On iOS, you'll need Xcode and to run the Simulator before using Mobile MCP with
109
111
  - `xcrun simctl list`
110
112
  - `xcrun simctl boot "iPhone 16"`
111
113
 
112
-
113
114
  # Mobile Commands and interaction tools
114
115
 
115
116
  The commands and tools support both accessibility-based locators (preferred) and coordinate-based inputs, giving you flexibility when accessibility/automation IDs are missing for reliable and seemless automation.
116
117
 
117
- ## mobile_install_app
118
- - **Description:** Installs an app onto the device/emulator
119
- - **Parameters:**
120
- - `appPath` (string): Path or URL to the app file (e.g., .apk for Android, .ipa/.app for iOS)
121
-
122
118
  ## mobile_list_apps
123
119
  - **Description:** List all the installed apps on the device
124
120
  - **Parameters:**
@@ -172,8 +168,6 @@ The commands and tools support both accessibility-based locators (preferred) and
172
168
  ## mobile_type_text
173
169
  - **Description:** Types text into a focused UI element (e.g., TextField, SearchField)
174
170
  - **Parameters:**
175
- - `element` (string): Human-readable element description
176
- - `ref` (string): Accessibility/automation ID of the element
177
171
  - `text` (string): Text to type
178
172
  - `submit` (boolean): Whether to press Enter/Return after typing
179
173
 
@@ -200,22 +194,12 @@ The commands and tools support both accessibility-based locators (preferred) and
200
194
 
201
195
  ## mobile_take_screenshot
202
196
  - **Description:** Captures a screenshot of the current device screen
203
- - **Parameters:**
204
- - `raw` (boolean): Return a lossless image if true; otherwise, compressed by default
197
+ - **Parameters:** None
205
198
 
206
199
  ## mobile_get_source
207
200
  - **Description:** Fetches the current device UI structure (accessibility snapshot) (xml format)
208
201
  - **Parameters:** None
209
202
 
210
- ## mobile_wait
211
- - **Description:** Waits for a specified time
212
- - **Parameters:**
213
- - `time` (number): Time to wait in seconds (capped at 10 seconds)
214
-
215
- ## mobile_close_session
216
- - **Description:** Closes the current device session
217
- - **Parameters:** None
218
-
219
203
 
220
204
  # Thanks to all contributors ❤️
221
205
 
package/lib/android.js CHANGED
@@ -37,44 +37,61 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
37
37
  };
38
38
  Object.defineProperty(exports, "__esModule", { value: true });
39
39
  exports.getConnectedDevices = exports.AndroidRobot = void 0;
40
+ const path_1 = __importDefault(require("path"));
40
41
  const child_process_1 = require("child_process");
41
42
  const xml = __importStar(require("fast-xml-parser"));
42
- const path_1 = __importDefault(require("path"));
43
+ const robot_1 = require("./robot");
44
+ const getAdbPath = () => {
45
+ let executable = "adb";
46
+ if (process.env.ANDROID_HOME) {
47
+ executable = path_1.default.join(process.env.ANDROID_HOME, "platform-tools", "adb");
48
+ }
49
+ return executable;
50
+ };
51
+ const BUTTON_MAP = {
52
+ "BACK": "KEYCODE_BACK",
53
+ "HOME": "KEYCODE_HOME",
54
+ "VOLUME_UP": "KEYCODE_VOLUME_UP",
55
+ "VOLUME_DOWN": "KEYCODE_VOLUME_DOWN",
56
+ "ENTER": "KEYCODE_ENTER",
57
+ };
58
+ const TIMEOUT = 30000;
59
+ const MAX_BUFFER_SIZE = 1024 * 1024 * 4;
43
60
  class AndroidRobot {
44
61
  deviceId;
45
62
  constructor(deviceId) {
46
63
  this.deviceId = deviceId;
47
64
  }
65
+ adb(...args) {
66
+ return (0, child_process_1.execFileSync)(getAdbPath(), ["-s", this.deviceId, ...args], {
67
+ maxBuffer: MAX_BUFFER_SIZE,
68
+ timeout: TIMEOUT,
69
+ });
70
+ }
48
71
  async getScreenSize() {
49
- const screenSize = (0, child_process_1.execSync)(`adb -s ${this.deviceId} shell wm size`)
72
+ const screenSize = this.adb("shell", "wm", "size")
50
73
  .toString()
51
74
  .split(" ")
52
75
  .pop();
53
76
  if (!screenSize) {
54
77
  throw new Error("Failed to get screen size");
55
78
  }
79
+ const scale = 1;
56
80
  const [width, height] = screenSize.split("x").map(Number);
57
- return { width, height };
58
- }
59
- adb(...args) {
60
- let executable = "adb";
61
- if (process.env.ANDROID_HOME) {
62
- executable = path_1.default.join(process.env.ANDROID_HOME, "platform-tools", "adb");
63
- }
64
- return (0, child_process_1.execFileSync)(executable, ["-s", this.deviceId, ...args], {
65
- maxBuffer: 1024 * 1024 * 4,
66
- timeout: 30000,
67
- });
81
+ return { width, height, scale };
68
82
  }
69
83
  async listApps() {
70
- const result = this.adb("shell", "cmd", "package", "query-activities", "-a", "android.intent.action.MAIN", "-c", "android.intent.category.LAUNCHER")
84
+ return this.adb("shell", "cmd", "package", "query-activities", "-a", "android.intent.action.MAIN", "-c", "android.intent.category.LAUNCHER")
71
85
  .toString()
72
86
  .split("\n")
73
87
  .map(line => line.trim())
74
88
  .filter(line => line.startsWith("packageName="))
75
89
  .map(line => line.substring("packageName=".length))
76
- .filter((value, index, self) => self.indexOf(value) === index);
77
- return result;
90
+ .filter((value, index, self) => self.indexOf(value) === index)
91
+ .map(packageName => ({
92
+ packageName,
93
+ appName: packageName,
94
+ }));
78
95
  }
79
96
  async launchApp(packageName) {
80
97
  this.adb("shell", "monkey", "-p", packageName, "-c", "android.intent.category.LAUNCHER", "1");
@@ -96,53 +113,45 @@ class AndroidRobot {
96
113
  y1 = Math.floor(screenSize.height * 0.80);
97
114
  break;
98
115
  default:
99
- throw new Error(`Swipe direction "${direction}" is not supported`);
116
+ throw new robot_1.ActionableError(`Swipe direction "${direction}" is not supported`);
100
117
  }
101
118
  this.adb("shell", "input", "swipe", `${x0}`, `${y0}`, `${x1}`, `${y1}`, "1000");
102
119
  }
103
120
  async getScreenshot() {
104
121
  return this.adb("shell", "screencap", "-p");
105
122
  }
106
- collectElements(node, screenSize) {
123
+ collectElements(node) {
107
124
  const elements = [];
108
- const getCoordinates = (element) => {
125
+ const getScreenElementRect = (element) => {
109
126
  const bounds = String(element.bounds);
110
127
  const [, left, top, right, bottom] = bounds.match(/^\[(\d+),(\d+)\]\[(\d+),(\d+)\]$/)?.map(Number) || [];
111
- return { left, top, right, bottom };
112
- };
113
- const getCenter = (coordinates) => {
114
128
  return {
115
- x: Math.floor((coordinates.left + coordinates.right) / 2),
116
- y: Math.floor((coordinates.top + coordinates.bottom) / 2),
117
- };
118
- };
119
- const normalizeCoordinates = (coordinates, screenSize) => {
120
- return {
121
- x: Number((coordinates.x / screenSize.width).toFixed(3)),
122
- y: Number((coordinates.y / screenSize.height).toFixed(3)),
129
+ x: left,
130
+ y: top,
131
+ width: right - left,
132
+ height: bottom - top,
123
133
  };
124
134
  };
125
135
  if (node.node) {
126
136
  if (Array.isArray(node.node)) {
127
137
  for (const childNode of node.node) {
128
- elements.push(...this.collectElements(childNode, screenSize));
138
+ elements.push(...this.collectElements(childNode));
129
139
  }
130
140
  }
131
141
  else {
132
- elements.push(...this.collectElements(node.node, screenSize));
142
+ elements.push(...this.collectElements(node.node));
133
143
  }
134
144
  }
135
- if (node.text) {
136
- elements.push({
137
- "text": node.text,
138
- "coordinates": normalizeCoordinates(getCenter(getCoordinates(node)), screenSize),
139
- });
140
- }
141
- if (node["content-desc"]) {
142
- elements.push({
143
- "text": node["content-desc"],
144
- "coordinates": normalizeCoordinates(getCenter(getCoordinates(node)), screenSize),
145
- });
145
+ if (node.text || node["content-desc"] || node.hint) {
146
+ const element = {
147
+ type: node.class || "text",
148
+ name: node.text,
149
+ label: node["content-desc"] || node.hint || "",
150
+ rect: getScreenElementRect(node),
151
+ };
152
+ if (element.rect.width > 0 && element.rect.height > 0) {
153
+ elements.push(element);
154
+ }
146
155
  }
147
156
  return elements;
148
157
  }
@@ -154,8 +163,7 @@ class AndroidRobot {
154
163
  });
155
164
  const parsedXml = parser.parse(dump);
156
165
  const hierarchy = parsedXml.hierarchy;
157
- const screenSize = await this.getScreenSize();
158
- const elements = this.collectElements(hierarchy.node, screenSize);
166
+ const elements = this.collectElements(hierarchy.node);
159
167
  return elements;
160
168
  }
161
169
  async terminateApp(packageName) {
@@ -170,29 +178,42 @@ class AndroidRobot {
170
178
  this.adb("shell", "input", "text", _text);
171
179
  }
172
180
  async pressButton(button) {
173
- const _map = {
174
- "BACK": "KEYCODE_BACK",
175
- "HOME": "KEYCODE_HOME",
176
- "VOLUME_UP": "KEYCODE_VOLUME_UP",
177
- "VOLUME_DOWN": "KEYCODE_VOLUME_DOWN",
178
- "ENTER": "KEYCODE_ENTER",
179
- };
180
- if (!_map[button]) {
181
- throw new Error(`Button "${button}" is not supported`);
181
+ if (!BUTTON_MAP[button]) {
182
+ throw new robot_1.ActionableError(`Button "${button}" is not supported`);
182
183
  }
183
- this.adb("shell", "input", "keyevent", _map[button]);
184
+ this.adb("shell", "input", "keyevent", BUTTON_MAP[button]);
184
185
  }
185
186
  async tap(x, y) {
186
187
  this.adb("shell", "input", "tap", `${x}`, `${y}`);
187
188
  }
189
+ async setOrientation(orientation) {
190
+ // Android uses numbers for orientation:
191
+ // 0 - Portrait
192
+ // 1 - Landscape
193
+ const orientationValue = orientation === "portrait" ? 0 : 1;
194
+ // Set orientation using content provider
195
+ this.adb("shell", "content", "insert", "--uri", "content://settings/system", "--bind", "name:s:user_rotation", "--bind", `value:i:${orientationValue}`);
196
+ // Force the orientation change
197
+ this.adb("shell", "settings", "put", "system", "accelerometer_rotation", "0");
198
+ }
199
+ async getOrientation() {
200
+ const rotation = this.adb("shell", "settings", "get", "system", "user_rotation").toString().trim();
201
+ return rotation === "0" ? "portrait" : "landscape";
202
+ }
188
203
  }
189
204
  exports.AndroidRobot = AndroidRobot;
190
205
  const getConnectedDevices = () => {
191
- return (0, child_process_1.execSync)(`adb devices`)
192
- .toString()
193
- .split("\n")
194
- .filter(line => !line.startsWith("List of devices attached"))
195
- .filter(line => line.trim() !== "")
196
- .map(line => line.split("\t")[0]);
206
+ try {
207
+ return (0, child_process_1.execFileSync)(getAdbPath(), ["devices"])
208
+ .toString()
209
+ .split("\n")
210
+ .filter(line => !line.startsWith("List of devices attached"))
211
+ .filter(line => line.trim() !== "")
212
+ .map(line => line.split("\t")[0]);
213
+ }
214
+ catch (error) {
215
+ console.error("Could not execute adb command, maybe ANDROID_HOME is not set?");
216
+ return [];
217
+ }
197
218
  };
198
219
  exports.getConnectedDevices = getConnectedDevices;
package/lib/ios.js CHANGED
@@ -1,79 +1,166 @@
1
1
  "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
2
5
  Object.defineProperty(exports, "__esModule", { value: true });
3
6
  exports.IosManager = exports.IosRobot = void 0;
7
+ const path_1 = __importDefault(require("path"));
8
+ const os_1 = require("os");
9
+ const crypto_1 = require("crypto");
4
10
  const fs_1 = require("fs");
5
11
  const child_process_1 = require("child_process");
12
+ const net_1 = require("net");
6
13
  const webdriver_agent_1 = require("./webdriver-agent");
14
+ const robot_1 = require("./robot");
15
+ const WDA_PORT = 8100;
16
+ const IOS_TUNNEL_PORT = 60105;
17
+ const getGoIosPath = () => {
18
+ if (process.env.GO_IOS_PATH) {
19
+ return process.env.GO_IOS_PATH;
20
+ }
21
+ // fallback to go-ios in PATH via `npm install -g go-ios`
22
+ return "ios";
23
+ };
7
24
  class IosRobot {
8
25
  deviceId;
9
- wda;
10
26
  constructor(deviceId) {
11
27
  this.deviceId = deviceId;
12
- this.wda = new webdriver_agent_1.WebDriverAgent("localhost", 8100);
28
+ }
29
+ isListeningOnPort(port) {
30
+ return new Promise((resolve, reject) => {
31
+ const client = new net_1.Socket();
32
+ client.connect(port, "localhost", () => {
33
+ client.destroy();
34
+ resolve(true);
35
+ });
36
+ client.on("error", (err) => {
37
+ resolve(false);
38
+ });
39
+ });
40
+ }
41
+ async isTunnelRunning() {
42
+ return await this.isListeningOnPort(IOS_TUNNEL_PORT);
43
+ }
44
+ async isWdaForwardRunning() {
45
+ return await this.isListeningOnPort(WDA_PORT);
46
+ }
47
+ async assertTunnelRunning() {
48
+ if (await this.isTunnelRequired()) {
49
+ if (!(await this.isTunnelRunning())) {
50
+ throw new robot_1.ActionableError("iOS tunnel is not running, please see https://github.com/mobile-next/mobile-mcp/wiki/");
51
+ }
52
+ }
53
+ }
54
+ async wda() {
55
+ await this.assertTunnelRunning();
56
+ if (!(await this.isWdaForwardRunning())) {
57
+ throw new robot_1.ActionableError("Port forwarding to WebDriverAgent is not running (tunnel okay), please see https://github.com/mobile-next/mobile-mcp/wiki/");
58
+ }
59
+ const wda = new webdriver_agent_1.WebDriverAgent("localhost", WDA_PORT);
60
+ if (!(await wda.isRunning())) {
61
+ throw new robot_1.ActionableError("WebDriverAgent is not running on device (tunnel okay, port forwarding okay), please see https://github.com/mobile-next/mobile-mcp/wiki/");
62
+ }
63
+ return wda;
13
64
  }
14
65
  async ios(...args) {
15
- return (0, child_process_1.execFileSync)("ios", ["--udid", this.deviceId, ...args], {}).toString();
66
+ return (0, child_process_1.execFileSync)(getGoIosPath(), ["--udid", this.deviceId, ...args], {}).toString();
67
+ }
68
+ async getIosVersion() {
69
+ const output = await this.ios("info");
70
+ const json = JSON.parse(output);
71
+ return json.ProductVersion;
72
+ }
73
+ async isTunnelRequired() {
74
+ const version = await this.getIosVersion();
75
+ const args = version.split(".");
76
+ return parseInt(args[0], 10) >= 17;
16
77
  }
17
78
  async getScreenSize() {
18
- return await this.wda.getScreenSize();
79
+ const wda = await this.wda();
80
+ return await wda.getScreenSize();
19
81
  }
20
- swipe(direction) {
21
- return Promise.resolve();
82
+ async swipe(direction) {
83
+ const wda = await this.wda();
84
+ await wda.swipe(direction);
22
85
  }
23
86
  async listApps() {
87
+ await this.assertTunnelRunning();
24
88
  const output = await this.ios("apps", "--all", "--list");
25
89
  return output
26
90
  .split("\n")
27
- .map(line => line.split(" ")[0]);
91
+ .map(line => {
92
+ const [packageName, appName] = line.split(" ");
93
+ return {
94
+ packageName,
95
+ appName,
96
+ };
97
+ });
28
98
  }
29
99
  async launchApp(packageName) {
100
+ await this.assertTunnelRunning();
30
101
  await this.ios("launch", packageName);
31
102
  }
32
103
  async terminateApp(packageName) {
104
+ await this.assertTunnelRunning();
33
105
  await this.ios("kill", packageName);
34
106
  }
35
107
  async openUrl(url) {
36
- await this.wda.withinSession(async (sessionUrl) => {
37
- await fetch(`${sessionUrl}/url`, {
38
- method: "POST",
39
- body: JSON.stringify({ url }),
40
- });
41
- });
108
+ const wda = await this.wda();
109
+ await wda.openUrl(url);
42
110
  }
43
111
  async sendKeys(text) {
44
- await this.wda.sendKeys(text);
112
+ const wda = await this.wda();
113
+ await wda.sendKeys(text);
45
114
  }
46
115
  async pressButton(button) {
47
- await this.wda.pressButton(button);
116
+ const wda = await this.wda();
117
+ await wda.pressButton(button);
48
118
  }
49
119
  async tap(x, y) {
50
- await this.wda.tap(x, y);
120
+ const wda = await this.wda();
121
+ await wda.tap(x, y);
51
122
  }
52
123
  async getElementsOnScreen() {
53
- return await this.wda.getElementsOnScreen();
124
+ const wda = await this.wda();
125
+ return await wda.getElementsOnScreen();
54
126
  }
55
127
  async getScreenshot() {
56
- await this.ios("screenshot", "--output", "screenshot.png");
57
- const buffer = (0, fs_1.readFileSync)("screenshot.png");
58
- (0, fs_1.unlinkSync)("screenshot.png");
128
+ await this.assertTunnelRunning();
129
+ const tmpFilename = path_1.default.join((0, os_1.tmpdir)(), `screenshot-${(0, crypto_1.randomBytes)(8).toString("hex")}.png`);
130
+ await this.ios("screenshot", "--output", tmpFilename);
131
+ const buffer = (0, fs_1.readFileSync)(tmpFilename);
132
+ (0, fs_1.unlinkSync)(tmpFilename);
59
133
  return buffer;
60
134
  }
135
+ async setOrientation(orientation) {
136
+ const wda = await this.wda();
137
+ await wda.setOrientation(orientation);
138
+ }
139
+ async getOrientation() {
140
+ const wda = await this.wda();
141
+ return await wda.getOrientation();
142
+ }
61
143
  }
62
144
  exports.IosRobot = IosRobot;
63
145
  class IosManager {
146
+ async isGoIosInstalled() {
147
+ try {
148
+ const output = (0, child_process_1.execFileSync)(getGoIosPath(), ["version"], { stdio: ["pipe", "pipe", "ignore"] }).toString();
149
+ const json = JSON.parse(output);
150
+ return json.version !== undefined && (json.version.startsWith("v") || json.version === "local-build");
151
+ }
152
+ catch (error) {
153
+ return false;
154
+ }
155
+ }
64
156
  async listDevices() {
65
- const output = (0, child_process_1.execSync)("ios list").toString();
157
+ if (!(await this.isGoIosInstalled())) {
158
+ console.error("go-ios is not installed, no physical iOS devices can be detected");
159
+ return [];
160
+ }
161
+ const output = (0, child_process_1.execFileSync)(getGoIosPath(), ["list"]).toString();
66
162
  const json = JSON.parse(output);
67
163
  return json.deviceList;
68
164
  }
69
165
  }
70
166
  exports.IosManager = IosManager;
71
- async function main() {
72
- const ios = new IosRobot("4C07ED7E-AE81-412E-8AA9-1061EED59DFA");
73
- const before = +new Date();
74
- console.dir(await ios.getElementsOnScreen(), { depth: null });
75
- const after = +new Date();
76
- console.log(`Time taken: ${after - before}ms`);
77
- // await ios.pressButton("VOLUME_UP");
78
- }
79
- main().then();
@@ -3,21 +3,35 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.SimctlManager = exports.Simctl = void 0;
4
4
  const child_process_1 = require("child_process");
5
5
  const webdriver_agent_1 = require("./webdriver-agent");
6
+ const robot_1 = require("./robot");
7
+ const TIMEOUT = 30000;
8
+ const WDA_PORT = 8100;
9
+ const MAX_BUFFER_SIZE = 1024 * 1024 * 4;
6
10
  class Simctl {
7
11
  simulatorUuid;
8
- webDriverAgent;
9
12
  constructor(simulatorUuid) {
10
13
  this.simulatorUuid = simulatorUuid;
11
- this.webDriverAgent = new webdriver_agent_1.WebDriverAgent("localhost", 8100);
14
+ }
15
+ async wda() {
16
+ const wda = new webdriver_agent_1.WebDriverAgent("localhost", WDA_PORT);
17
+ if (!(await wda.isRunning())) {
18
+ throw new robot_1.ActionableError("WebDriverAgent is not running on device (tunnel okay, port forwarding okay), please see https://github.com/mobile-next/mobile-mcp/wiki/");
19
+ }
20
+ return wda;
12
21
  }
13
22
  simctl(...args) {
14
- return (0, child_process_1.execFileSync)("xcrun", ["simctl", ...args], { maxBuffer: 1024 * 1024 * 4 });
23
+ return (0, child_process_1.execFileSync)("xcrun", ["simctl", ...args], {
24
+ timeout: TIMEOUT,
25
+ maxBuffer: MAX_BUFFER_SIZE,
26
+ });
15
27
  }
16
28
  async getScreenshot() {
17
29
  return this.simctl("io", this.simulatorUuid, "screenshot", "-");
18
30
  }
19
31
  async openUrl(url) {
20
- this.simctl("openurl", this.simulatorUuid, url);
32
+ const wda = await this.wda();
33
+ await wda.openUrl(url);
34
+ // alternative: this.simctl("openurl", this.simulatorUuid, url);
21
35
  }
22
36
  async launchApp(packageName) {
23
37
  this.simctl("launch", this.simulatorUuid, packageName);
@@ -35,10 +49,7 @@ class Simctl {
35
49
  while ((appMatch = appRegex.exec(cleanText)) !== null) {
36
50
  // const bundleId = appMatch[1];
37
51
  const appContent = appMatch[2];
38
- const appInfo = {
39
- GroupContainers: {},
40
- SBAppTags: []
41
- };
52
+ const appInfo = {};
42
53
  // parse simple key-value pairs
43
54
  const keyValueRegex = /\s+(\w+)\s+=\s+([^;]+);/g;
44
55
  let keyValueMatch;
@@ -53,30 +64,6 @@ class Simctl {
53
64
  appInfo[key] = value;
54
65
  }
55
66
  }
56
- // parse GroupContainers
57
- const groupContainersMatch = appContent.match(/GroupContainers\s+=\s+\{([^}]+)\};/);
58
- if (groupContainersMatch) {
59
- const groupContainersContent = groupContainersMatch[1];
60
- const groupRegex = /"([^"]+)"\s+=\s+"([^"]+)"/g;
61
- let groupMatch;
62
- while ((groupMatch = groupRegex.exec(groupContainersContent)) !== null) {
63
- const groupId = groupMatch[1];
64
- const groupPath = groupMatch[2];
65
- appInfo.GroupContainers[groupId] = groupPath;
66
- }
67
- }
68
- // parse SBAppTags
69
- const sbAppTagsMatch = appContent.match(/SBAppTags\s+=\s+\(\s*(.*?)\s*\);/);
70
- if (sbAppTagsMatch) {
71
- const tagsContent = sbAppTagsMatch[1].trim();
72
- if (tagsContent) {
73
- const tagRegex = /"([^"]+)"/g;
74
- let tagMatch;
75
- while ((tagMatch = tagRegex.exec(tagsContent)) !== null) {
76
- appInfo.SBAppTags.push(tagMatch[1]);
77
- }
78
- }
79
- }
80
67
  result.push(appInfo);
81
68
  }
82
69
  return result;
@@ -84,83 +71,58 @@ class Simctl {
84
71
  async listApps() {
85
72
  const text = this.simctl("listapps", this.simulatorUuid).toString();
86
73
  const apps = this.parseIOSAppData(text);
87
- return apps.map(app => app.CFBundleIdentifier);
74
+ return apps.map(app => ({
75
+ packageName: app.CFBundleIdentifier,
76
+ appName: app.CFBundleDisplayName,
77
+ }));
88
78
  }
89
79
  async getScreenSize() {
90
- return this.webDriverAgent.getScreenSize();
80
+ const wda = await this.wda();
81
+ return wda.getScreenSize();
91
82
  }
92
83
  async sendKeys(keys) {
93
- return this.webDriverAgent.sendKeys(keys);
84
+ const wda = await this.wda();
85
+ return wda.sendKeys(keys);
94
86
  }
95
87
  async swipe(direction) {
96
- await this.webDriverAgent.withinSession(async (sessionUrl) => {
97
- const x0 = 200;
98
- let y0 = 600;
99
- const x1 = 200;
100
- let y1 = 200;
101
- if (direction === "up") {
102
- const tmp = y0;
103
- y0 = y1;
104
- y1 = tmp;
105
- }
106
- const url = `${sessionUrl}/actions`;
107
- await fetch(url, {
108
- method: "POST",
109
- headers: {
110
- "Content-Type": "application/json",
111
- },
112
- body: JSON.stringify({
113
- actions: [
114
- {
115
- type: "pointer",
116
- id: "finger1",
117
- parameters: { pointerType: "touch" },
118
- actions: [
119
- { type: "pointerMove", duration: 0, x: x0, y: y0 },
120
- { type: "pointerDown", button: 0 },
121
- { type: "pointerMove", duration: 0, x: x1, y: y1 },
122
- { type: "pause", duration: 1000 },
123
- { type: "pointerUp", button: 0 }
124
- ]
125
- }
126
- ]
127
- }),
128
- });
129
- });
88
+ const wda = await this.wda();
89
+ return wda.swipe(direction);
130
90
  }
131
91
  async tap(x, y) {
132
- await this.webDriverAgent.tap(x, y);
92
+ const wda = await this.wda();
93
+ return wda.tap(x, y);
133
94
  }
134
95
  async pressButton(button) {
135
- await this.webDriverAgent.pressButton(button);
96
+ const wda = await this.wda();
97
+ return wda.pressButton(button);
136
98
  }
137
99
  async getElementsOnScreen() {
138
- return await this.webDriverAgent.getElementsOnScreen();
100
+ const wda = await this.wda();
101
+ return wda.getElementsOnScreen();
102
+ }
103
+ async setOrientation(orientation) {
104
+ const wda = await this.wda();
105
+ return wda.setOrientation(orientation);
106
+ }
107
+ async getOrientation() {
108
+ const wda = await this.wda();
109
+ return wda.getOrientation();
139
110
  }
140
111
  }
141
112
  exports.Simctl = Simctl;
142
113
  class SimctlManager {
143
- parseSimulator(line) {
144
- // extract device name and UUID from the line
145
- const match = line.match(/(.*?)\s+\(([\w-]+)\)\s+\((\w+)\)/);
146
- if (!match) {
147
- return null;
148
- }
149
- const deviceName = match[1].trim();
150
- const deviceUuid = match[2];
151
- const deviceState = match[3];
152
- return {
153
- name: deviceName,
154
- uuid: deviceUuid,
155
- state: deviceState,
156
- };
157
- }
158
114
  listSimulators() {
159
- return (0, child_process_1.execSync)(`xcrun simctl list devices`)
160
- .toString()
161
- .split("\n")
162
- .map(line => this.parseSimulator(line))
163
- .filter(simulator => simulator !== null);
115
+ const text = (0, child_process_1.execFileSync)("xcrun", ["simctl", "list", "devices", "-j"]).toString();
116
+ const json = JSON.parse(text);
117
+ return Object.values(json.devices).flatMap(device => {
118
+ return device.map(d => {
119
+ return {
120
+ name: d.name,
121
+ uuid: d.udid,
122
+ state: d.state,
123
+ };
124
+ });
125
+ });
164
126
  }
165
127
  listBootedSimulators() {
166
128
  return this.listSimulators()
package/lib/robot.js CHANGED
@@ -1,2 +1,9 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.ActionableError = void 0;
4
+ class ActionableError extends Error {
5
+ constructor(message) {
6
+ super(message);
7
+ }
8
+ }
9
+ exports.ActionableError = ActionableError;
package/lib/server.js CHANGED
@@ -9,6 +9,7 @@ const zod_1 = require("zod");
9
9
  const sharp_1 = __importDefault(require("sharp"));
10
10
  const logger_1 = require("./logger");
11
11
  const android_1 = require("./android");
12
+ const robot_1 = require("./robot");
12
13
  const iphone_simulator_1 = require("./iphone-simulator");
13
14
  const ios_1 = require("./ios");
14
15
  const getAgentVersion = () => {
@@ -35,18 +36,31 @@ const createMcpServer = () => {
35
36
  };
36
37
  }
37
38
  catch (error) {
38
- (0, logger_1.trace)(`Tool '${description}' failed: ${error.message} stack: ${error.stack}`);
39
- return {
40
- content: [{ type: "text", text: `Error: ${error.message}` }],
41
- isError: true,
42
- };
39
+ if (error instanceof robot_1.ActionableError) {
40
+ return {
41
+ content: [{ type: "text", text: `${error.message}. Please fix the issue and try again.` }],
42
+ };
43
+ }
44
+ else {
45
+ // a real exception
46
+ (0, logger_1.trace)(`Tool '${description}' failed: ${error.message} stack: ${error.stack}`);
47
+ return {
48
+ content: [{ type: "text", text: `Error: ${error.message}` }],
49
+ isError: true,
50
+ };
51
+ }
43
52
  }
44
53
  };
45
54
  server.tool(name, description, paramsSchema, args => wrappedCb(args));
46
55
  };
47
56
  let robot;
48
57
  const simulatorManager = new iphone_simulator_1.SimctlManager();
49
- tool("list_available_devices", "List all available devices. This includes both physical devices and simulators. If there is more than one device returned, you need to let the user select one of them.", {}, async ({}) => {
58
+ const requireRobot = () => {
59
+ if (!robot) {
60
+ throw new robot_1.ActionableError("No device selected. Use the mobile_use_device tool to select a device.");
61
+ }
62
+ };
63
+ tool("mobile_list_available_devices", "List all available devices. This includes both physical devices and simulators. If there is more than one device returned, you need to let the user select one of them.", {}, async ({}) => {
50
64
  const iosManager = new ios_1.IosManager();
51
65
  const devices = await simulatorManager.listBootedSimulators();
52
66
  const simulatorNames = devices.map(d => d.name);
@@ -54,11 +68,10 @@ const createMcpServer = () => {
54
68
  const iosDevices = await iosManager.listDevices();
55
69
  return `Found these iOS simulators: [${simulatorNames.join(".")}], iOS devices: [${iosDevices.join(",")}] and Android devices: [${androidDevices.join(",")}]`;
56
70
  });
57
- tool("use_device", "Select a device to use. This can be a simulator or an Android device. Use the list_available_devices tool to get a list of available devices.", {
71
+ tool("mobile_use_device", "Select a device to use. This can be a simulator or an Android device. Use the list_available_devices tool to get a list of available devices.", {
58
72
  device: zod_1.z.string().describe("The name of the device to select"),
59
73
  deviceType: zod_1.z.enum(["simulator", "ios", "android"]).describe("The type of device to select"),
60
74
  }, async ({ device, deviceType }) => {
61
- console.log(device, deviceType);
62
75
  switch (deviceType) {
63
76
  case "simulator":
64
77
  robot = simulatorManager.getSimulator(device);
@@ -73,112 +86,84 @@ const createMcpServer = () => {
73
86
  return `Selected device: ${device} (${deviceType})`;
74
87
  });
75
88
  tool("mobile_list_apps", "List all the installed apps on the device", {}, async ({}) => {
76
- if (!robot) {
77
- throw new Error("No device selected");
78
- }
89
+ requireRobot();
79
90
  const result = await robot.listApps();
80
- return `Found these packages on device: ${result.join(",")}`;
91
+ return `Found these apps on device: ${result.map(app => `${app.appName} (${app.packageName})`).join(", ")}`;
81
92
  });
82
93
  tool("mobile_launch_app", "Launch an app on mobile device. Use this to open a specific app. You can find the package name of the app by calling list_apps_on_device.", {
83
94
  packageName: zod_1.z.string().describe("The package name of the app to launch"),
84
95
  }, async ({ packageName }) => {
85
- if (!robot) {
86
- throw new Error("No device selected");
87
- }
96
+ requireRobot();
88
97
  await robot.launchApp(packageName);
89
98
  return `Launched app ${packageName}`;
90
99
  });
91
100
  tool("mobile_terminate_app", "Stop and terminate an app on mobile device", {
92
101
  packageName: zod_1.z.string().describe("The package name of the app to terminate"),
93
102
  }, async ({ packageName }) => {
94
- if (!robot) {
95
- throw new Error("No device selected");
96
- }
103
+ requireRobot();
97
104
  await robot.terminateApp(packageName);
98
105
  return `Terminated app ${packageName}`;
99
106
  });
100
107
  tool("mobile_get_screen_size", "Get the screen size of the mobile device in pixels", {}, async ({}) => {
101
- if (!robot) {
102
- throw new Error("No device selected");
103
- }
108
+ requireRobot();
104
109
  const screenSize = await robot.getScreenSize();
105
110
  return `Screen size is ${screenSize.width}x${screenSize.height} pixels`;
106
111
  });
107
112
  tool("mobile_click_on_screen_at_coordinates", "Click on the screen at given x,y coordinates", {
108
- x: zod_1.z.number().describe("The x coordinate to click between 0 and 1"),
109
- y: zod_1.z.number().describe("The y coordinate to click between 0 and 1"),
113
+ x: zod_1.z.number().describe("The x coordinate to click on the screen, in pixels"),
114
+ y: zod_1.z.number().describe("The y coordinate to click on the screen, in pixels"),
110
115
  }, async ({ x, y }) => {
111
- if (!robot) {
112
- throw new Error("No device selected");
113
- }
114
- const screenSize = await robot.getScreenSize();
115
- const x0 = Math.floor(screenSize.width * x);
116
- const y0 = Math.floor(screenSize.height * y);
117
- await robot.tap(x0, y0);
116
+ requireRobot();
117
+ await robot.tap(x, y);
118
118
  return `Clicked on screen at coordinates: ${x}, ${y}`;
119
119
  });
120
120
  tool("mobile_list_elements_on_screen", "List elements on screen and their coordinates, with display text or accessibility label. Do not cache this result.", {}, async ({}) => {
121
- if (!robot) {
122
- throw new Error("No device selected");
123
- }
124
- const screenSize = await robot.getScreenSize();
121
+ requireRobot();
125
122
  const elements = await robot.getElementsOnScreen();
126
- const result = [];
127
- for (let i = 0; i < elements.length; i++) {
128
- elements[i].rect.x0 = elements[i].rect.x0 / screenSize.width;
129
- elements[i].rect.y0 = elements[i].rect.y0 / screenSize.height;
130
- elements[i].rect.x1 = elements[i].rect.x1 / screenSize.width;
131
- elements[i].rect.y1 = elements[i].rect.y1 / screenSize.height;
132
- result.push({
133
- text: elements[i].label,
134
- coordinates: {
135
- x: (elements[i].rect.x0 + elements[i].rect.x1) / 2,
136
- y: (elements[i].rect.y0 + elements[i].rect.y1) / 2,
137
- }
138
- });
139
- }
123
+ const result = elements.map(element => {
124
+ const x = Number((element.rect.x + element.rect.width / 2)).toFixed(3);
125
+ const y = Number((element.rect.y + element.rect.height / 2)).toFixed(3);
126
+ return {
127
+ text: element.label || element.name,
128
+ coordinates: { x, y }
129
+ };
130
+ });
140
131
  return `Found these elements on screen: ${JSON.stringify(result)}`;
141
132
  });
142
133
  tool("mobile_press_button", "Press a button on device", {
143
- button: zod_1.z.string().describe("The button to press. Supported buttons: BACK, HOME, VOLUME_UP, VOLUME_DOWN, ENTER"),
134
+ button: zod_1.z.string().describe("The button to press. Supported buttons: BACK (android only), HOME, VOLUME_UP, VOLUME_DOWN, ENTER"),
144
135
  }, async ({ button }) => {
145
- if (!robot) {
146
- throw new Error("No device selected");
147
- }
148
- robot.pressButton(button);
136
+ requireRobot();
137
+ await robot.pressButton(button);
149
138
  return `Pressed the button: ${button}`;
150
139
  });
151
140
  tool("mobile_open_url", "Open a URL in browser on device", {
152
141
  url: zod_1.z.string().describe("The URL to open"),
153
142
  }, async ({ url }) => {
154
- if (!robot) {
155
- throw new Error("No device selected");
156
- }
157
- robot.openUrl(url);
143
+ requireRobot();
144
+ await robot.openUrl(url);
158
145
  return `Opened URL: ${url}`;
159
146
  });
160
147
  tool("swipe_on_screen", "Swipe on the screen", {
161
148
  direction: zod_1.z.enum(["up", "down"]).describe("The direction to swipe"),
162
149
  }, async ({ direction }) => {
163
- if (!robot) {
164
- throw new Error("No device selected");
165
- }
166
- robot.swipe(direction);
150
+ requireRobot();
151
+ await robot.swipe(direction);
167
152
  return `Swiped ${direction} on screen`;
168
153
  });
169
154
  tool("mobile_type_keys", "Type text into the focused element", {
170
155
  text: zod_1.z.string().describe("The text to type"),
171
- }, async ({ text }) => {
172
- if (!robot) {
173
- throw new Error("No device selected");
156
+ submit: zod_1.z.boolean().describe("Whether to submit the text. If true, the text will be submitted as if the user pressed the enter key."),
157
+ }, async ({ text, submit }) => {
158
+ requireRobot();
159
+ await robot.sendKeys(text);
160
+ if (submit) {
161
+ await robot.pressButton("ENTER");
174
162
  }
175
- robot.sendKeys(text);
176
163
  return `Typed text: ${text}`;
177
164
  });
178
165
  server.tool("mobile_take_screenshot", "Take a screenshot of the mobile device. Use this to understand what's on screen, if you need to press an element that is available through view hierarchy then you must list elements on screen instead. Do not cache this result.", {}, async ({}) => {
179
- if (!robot) {
180
- throw new Error("No device selected");
181
- }
166
+ requireRobot();
182
167
  try {
183
168
  const screenshot = await robot.getScreenshot();
184
169
  // Scale down the screenshot by 50%
@@ -208,6 +193,18 @@ const createMcpServer = () => {
208
193
  };
209
194
  }
210
195
  });
196
+ tool("mobile_set_orientation", "Change the screen orientation of the device", {
197
+ orientation: zod_1.z.enum(["portrait", "landscape"]).describe("The desired orientation"),
198
+ }, async ({ orientation }) => {
199
+ requireRobot();
200
+ await robot.setOrientation(orientation);
201
+ return `Changed device orientation to ${orientation}`;
202
+ });
203
+ tool("mobile_get_orientation", "Get the current screen orientation of the device", {}, async () => {
204
+ requireRobot();
205
+ const orientation = await robot.getOrientation();
206
+ return `Current device orientation is ${orientation}`;
207
+ });
211
208
  return server;
212
209
  };
213
210
  exports.createMcpServer = createMcpServer;
@@ -1,6 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.WebDriverAgent = void 0;
4
+ const robot_1 = require("./robot");
4
5
  class WebDriverAgent {
5
6
  host;
6
7
  port;
@@ -8,6 +9,17 @@ class WebDriverAgent {
8
9
  this.host = host;
9
10
  this.port = port;
10
11
  }
12
+ async isRunning() {
13
+ const url = `http://${this.host}:${this.port}/status`;
14
+ try {
15
+ const response = await fetch(url);
16
+ return response.status === 200;
17
+ }
18
+ catch (error) {
19
+ console.error(`Failed to connect to WebDriverAgent: ${error}`);
20
+ return false;
21
+ }
22
+ }
11
23
  async createSession() {
12
24
  const url = `http://${this.host}:${this.port}/session`;
13
25
  const response = await fetch(url, {
@@ -38,8 +50,9 @@ class WebDriverAgent {
38
50
  const response = await fetch(url);
39
51
  const json = await response.json();
40
52
  return {
41
- width: json.value.screenSize.width * json.value.scale,
42
- height: json.value.screenSize.height * json.value.scale,
53
+ width: json.value.screenSize.width,
54
+ height: json.value.screenSize.height,
55
+ scale: json.value.scale || 1,
43
56
  };
44
57
  });
45
58
  }
@@ -67,7 +80,7 @@ class WebDriverAgent {
67
80
  }
68
81
  // Type assertion to check if button is a key of _map
69
82
  if (!(button in _map)) {
70
- throw new Error(`Button "${button}" is not supported`);
83
+ throw new robot_1.ActionableError(`Button "${button}" is not supported`);
71
84
  }
72
85
  await this.withinSession(async (sessionUrl) => {
73
86
  const url = `${sessionUrl}/wda/pressButton`;
@@ -109,20 +122,29 @@ class WebDriverAgent {
109
122
  });
110
123
  });
111
124
  }
125
+ isVisible(rect) {
126
+ return rect.x >= 0 && rect.y >= 0;
127
+ }
112
128
  filterSourceElements(source) {
113
129
  const output = [];
114
- if (["TextField", "Button", "Switch"].includes(source.type)) {
115
- output.push({
116
- type: source.type,
117
- label: source.label,
118
- name: source.name,
119
- rect: {
120
- x0: source.rect.x,
121
- y0: source.rect.y,
122
- x1: source.rect.x + source.rect.width,
123
- y1: source.rect.y + source.rect.height,
124
- },
125
- });
130
+ const acceptedTypes = ["TextField", "Button", "Switch", "Icon", "SearchField"];
131
+ if (acceptedTypes.includes(source.type)) {
132
+ if (source.isVisible === "1" && this.isVisible(source.rect)) {
133
+ if (source.label !== null || source.name !== null) {
134
+ output.push({
135
+ type: source.type,
136
+ label: source.label,
137
+ name: source.name,
138
+ value: source.value,
139
+ rect: {
140
+ x: source.rect.x,
141
+ y: source.rect.y,
142
+ width: source.rect.width,
143
+ height: source.rect.height,
144
+ },
145
+ });
146
+ }
147
+ }
126
148
  }
127
149
  if (source.children) {
128
150
  for (const child of source.children) {
@@ -141,5 +163,69 @@ class WebDriverAgent {
141
163
  const source = await this.getPageSource();
142
164
  return this.filterSourceElements(source.value);
143
165
  }
166
+ async openUrl(url) {
167
+ await this.withinSession(async (sessionUrl) => {
168
+ await fetch(`${sessionUrl}/url`, {
169
+ method: "POST",
170
+ body: JSON.stringify({ url }),
171
+ });
172
+ });
173
+ }
174
+ async swipe(direction) {
175
+ await this.withinSession(async (sessionUrl) => {
176
+ const x0 = 200;
177
+ let y0 = 600;
178
+ const x1 = 200;
179
+ let y1 = 200;
180
+ if (direction === "up") {
181
+ const tmp = y0;
182
+ y0 = y1;
183
+ y1 = tmp;
184
+ }
185
+ const url = `${sessionUrl}/actions`;
186
+ await fetch(url, {
187
+ method: "POST",
188
+ headers: {
189
+ "Content-Type": "application/json",
190
+ },
191
+ body: JSON.stringify({
192
+ actions: [
193
+ {
194
+ type: "pointer",
195
+ id: "finger1",
196
+ parameters: { pointerType: "touch" },
197
+ actions: [
198
+ { type: "pointerMove", duration: 0, x: x0, y: y0 },
199
+ { type: "pointerDown", button: 0 },
200
+ { type: "pointerMove", duration: 0, x: x1, y: y1 },
201
+ { type: "pause", duration: 1000 },
202
+ { type: "pointerUp", button: 0 }
203
+ ]
204
+ }
205
+ ]
206
+ }),
207
+ });
208
+ });
209
+ }
210
+ async setOrientation(orientation) {
211
+ await this.withinSession(async (sessionUrl) => {
212
+ const url = `${sessionUrl}/orientation`;
213
+ await fetch(url, {
214
+ method: "POST",
215
+ headers: { "Content-Type": "application/json" },
216
+ body: JSON.stringify({
217
+ orientation: orientation.toUpperCase()
218
+ })
219
+ });
220
+ });
221
+ }
222
+ async getOrientation() {
223
+ return this.withinSession(async (sessionUrl) => {
224
+ const url = `${sessionUrl}/orientation`;
225
+ const response = await fetch(url);
226
+ const json = await response.json();
227
+ return json.value.toLowerCase();
228
+ });
229
+ }
144
230
  }
145
231
  exports.WebDriverAgent = WebDriverAgent;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mobilenext/mobile-mcp",
3
- "version": "0.0.11",
3
+ "version": "0.0.13",
4
4
  "description": "Mobile MCP",
5
5
  "repository": {
6
6
  "type": "git",
@@ -13,7 +13,7 @@
13
13
  "scripts": {
14
14
  "build": "tsc && chmod +x lib/index.js",
15
15
  "lint": "eslint .",
16
- "test": "mocha --require ts-node/register test/*.ts",
16
+ "test": "nyc mocha --require ts-node/register test/*.ts",
17
17
  "watch": "tsc --watch",
18
18
  "clean": "rm -rf lib",
19
19
  "prepare": "husky"
@@ -24,6 +24,7 @@
24
24
  "dependencies": {
25
25
  "@modelcontextprotocol/sdk": "^1.6.1",
26
26
  "fast-xml-parser": "^5.0.9",
27
+ "nyc": "^17.1.0",
27
28
  "sharp": "^0.33.5",
28
29
  "zod-to-json-schema": "^3.24.4"
29
30
  },