@blitzdev/iphone-mcp 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +219 -0
  3. package/dist/child-env.d.ts +1 -0
  4. package/dist/child-env.js +61 -0
  5. package/dist/child-env.js.map +1 -0
  6. package/dist/cli.d.ts +2 -0
  7. package/dist/cli.js +318 -0
  8. package/dist/cli.js.map +1 -0
  9. package/dist/device-client.d.ts +2 -0
  10. package/dist/device-client.js +15 -0
  11. package/dist/device-client.js.map +1 -0
  12. package/dist/execution-context.d.ts +33 -0
  13. package/dist/execution-context.js +66 -0
  14. package/dist/execution-context.js.map +1 -0
  15. package/dist/idb/ax-scan-client.d.ts +27 -0
  16. package/dist/idb/ax-scan-client.js +244 -0
  17. package/dist/idb/ax-scan-client.js.map +1 -0
  18. package/dist/idb/idb-client.d.ts +34 -0
  19. package/dist/idb/idb-client.js +288 -0
  20. package/dist/idb/idb-client.js.map +1 -0
  21. package/dist/index.d.ts +1 -0
  22. package/dist/index.js +15 -0
  23. package/dist/index.js.map +1 -0
  24. package/dist/logger.d.ts +3 -0
  25. package/dist/logger.js +6 -0
  26. package/dist/logger.js.map +1 -0
  27. package/dist/mcp-server.d.ts +2 -0
  28. package/dist/mcp-server.js +649 -0
  29. package/dist/mcp-server.js.map +1 -0
  30. package/dist/types.d.ts +75 -0
  31. package/dist/types.js +7 -0
  32. package/dist/types.js.map +1 -0
  33. package/dist/ui-filters.d.ts +11 -0
  34. package/dist/ui-filters.js +100 -0
  35. package/dist/ui-filters.js.map +1 -0
  36. package/dist/viewer/server.d.ts +5 -0
  37. package/dist/viewer/server.js +233 -0
  38. package/dist/viewer/server.js.map +1 -0
  39. package/dist/wda/device-discovery.d.ts +12 -0
  40. package/dist/wda/device-discovery.js +91 -0
  41. package/dist/wda/device-discovery.js.map +1 -0
  42. package/dist/wda/wda-client.d.ts +38 -0
  43. package/dist/wda/wda-client.js +314 -0
  44. package/dist/wda/wda-client.js.map +1 -0
  45. package/dist/wda/wda-manager.d.ts +29 -0
  46. package/dist/wda/wda-manager.js +263 -0
  47. package/dist/wda/wda-manager.js.map +1 -0
  48. package/dist/wda/wda-scan.d.ts +3 -0
  49. package/dist/wda/wda-scan.js +41 -0
  50. package/dist/wda/wda-scan.js.map +1 -0
  51. package/package.json +40 -0
  52. package/src/idb/ax-scan/Makefile +30 -0
  53. package/src/idb/ax-scan/ax-scan.m +168 -0
@@ -0,0 +1,649 @@
1
+ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
+ import { z } from 'zod';
3
+ import { promises as fs } from 'node:fs';
4
+ import path from 'node:path';
5
+ import os from 'node:os';
6
+ import { execFile } from 'node:child_process';
7
+ import { resolveBootedUdid } from './idb/idb-client.js';
8
+ import { AXScanClient } from './idb/ax-scan-client.js';
9
+ import { getDeviceClient } from './device-client.js';
10
+ import { isPhysicalDeviceUdid } from './types.js';
11
+ import { wdaScanGrid } from './wda/wda-scan.js';
12
+ import { listPhysicalDevices } from './wda/device-discovery.js';
13
+ import { applyScanUiFilters, applyDescribeScreenFilters } from './ui-filters.js';
14
+ import { detectExecutionContext } from './execution-context.js';
15
+ import { wdaManager } from './wda/wda-manager.js';
16
+ import { childEnv } from './child-env.js';
17
+ import { log } from './logger.js';
18
+ const tapParamsSchema = z.object({
19
+ x: z.number().describe('X coordinate to tap'),
20
+ y: z.number().describe('Y coordinate to tap'),
21
+ duration: z.number().optional().describe('Tap duration in seconds'),
22
+ });
23
+ const swipeParamsSchema = z.object({
24
+ fromX: z.number().describe('Starting X coordinate'),
25
+ fromY: z.number().describe('Starting Y coordinate'),
26
+ toX: z.number().describe('Ending X coordinate'),
27
+ toY: z.number().describe('Ending Y coordinate'),
28
+ duration: z.number().optional().describe('Swipe duration in seconds'),
29
+ delta: z.number().optional().describe('Pixels between touch points'),
30
+ });
31
+ const buttonParamsSchema = z.object({
32
+ button: z.enum(['HOME', 'LOCK', 'SIDE_BUTTON', 'APPLE_PAY', 'SIRI']).describe('Button to press'),
33
+ duration: z.number().optional().describe('Press duration in seconds'),
34
+ });
35
+ const inputTextParamsSchema = z.object({
36
+ text: z.string().describe('Text to type'),
37
+ });
38
+ const keyParamsSchema = z.object({
39
+ key: z.union([z.number(), z.string()]).describe('HID keycode (number) or character (string)'),
40
+ duration: z.number().optional().describe('Key press duration in seconds'),
41
+ });
42
+ const keySequenceParamsSchema = z.object({
43
+ keySequence: z.array(z.union([z.number(), z.string()])).describe('Sequence of HID keycodes or characters'),
44
+ });
45
+ const describeAfterSchema = z.object({
46
+ point: z.object({ x: z.number(), y: z.number() }).optional().describe('Describe element at this point after action'),
47
+ all: z.boolean().optional().describe('Describe all elements on screen after action'),
48
+ delay: z.number().optional().describe('Delay in ms before capturing screen state (default: 500)'),
49
+ }).optional();
50
+ const singleActionSchema = z.object({
51
+ action: z.enum(['tap', 'swipe', 'button', 'input-text', 'key', 'key-sequence']).describe('Type of action to perform'),
52
+ params: z.record(z.string(), z.unknown()).describe('Action-specific parameters'),
53
+ });
54
+ export function createMcpServer(viewerPort) {
55
+ const server = new McpServer({
56
+ name: '@blitzdev/iphone-mcp',
57
+ version: '0.1.0',
58
+ });
59
+ server.registerTool('describe_screen', {
60
+ description: `Get the full UI element hierarchy of the current screen. Returns ALL element types (buttons, text, images, containers, etc.) that are currently visible on screen.
61
+
62
+ Filters applied automatically:
63
+ - Off-screen elements are excluded
64
+ - Generic unlabeled container nodes are excluded
65
+
66
+ For finding tappable elements specifically, prefer scan_ui instead.`,
67
+ inputSchema: {
68
+ udid: z.string().optional().describe('Device identifier (default: "booted" for current simulator)'),
69
+ nested: z.boolean().optional().describe('Include nested element hierarchy'),
70
+ },
71
+ }, async ({ udid = 'booted', nested = false }) => {
72
+ log('MCP', 'log', `describe_screen udid=${udid} nested=${nested}`);
73
+ try {
74
+ const resolvedUdid = udid === 'booted' ? await resolveBootedUdid() : udid;
75
+ const client = await getDeviceClient(resolvedUdid);
76
+ const raw = await client.describeAll(nested);
77
+ let screenWidth = 393, screenHeight = 852;
78
+ try {
79
+ if (isPhysicalDeviceUdid(resolvedUdid)) {
80
+ const size = await client.getWindowSize();
81
+ screenWidth = size.width;
82
+ screenHeight = size.height;
83
+ }
84
+ else {
85
+ const axClient = AXScanClient.getInstance(resolvedUdid);
86
+ const size = await axClient.getScreenSize();
87
+ screenWidth = size.width;
88
+ screenHeight = size.height;
89
+ }
90
+ }
91
+ catch { /* use defaults */ }
92
+ const rawArray = Array.isArray(raw) ? raw : [raw];
93
+ const filtered = applyDescribeScreenFilters(rawArray, screenWidth, screenHeight);
94
+ return {
95
+ content: [{ type: 'text', text: JSON.stringify(filtered, null, 2) }],
96
+ };
97
+ }
98
+ catch (error) {
99
+ return {
100
+ content: [{ type: 'text', text: `Error describing screen: ${error instanceof Error ? error.message : String(error)}` }],
101
+ isError: true,
102
+ };
103
+ }
104
+ });
105
+ server.registerTool('device_action', {
106
+ description: `Execute a single device action on the iPhone.
107
+
108
+ Actions available:
109
+ - tap: Tap at coordinates { x, y, duration? }
110
+ - swipe: Swipe gesture { fromX, fromY, toX, toY, duration?, delta? }
111
+ - button: Press button { button: 'HOME'|'LOCK'|'SIDE_BUTTON'|'APPLE_PAY'|'SIRI', duration? }
112
+ - input-text: Type text { text }
113
+ - key: Press key { key: number (HID keycode) | string (character), duration? }
114
+ - key-sequence: Press key sequence { keySequence: (number|string)[] }
115
+
116
+ Use describe_after to see the screen state after the action.`,
117
+ inputSchema: {
118
+ action: z.enum(['tap', 'swipe', 'button', 'input-text', 'key', 'key-sequence']).describe('Type of action'),
119
+ params: z.record(z.string(), z.unknown()).describe('Action parameters (depends on action type)'),
120
+ udid: z.string().optional().describe('Device identifier (default: "booted")'),
121
+ describe_after: describeAfterSchema.describe('Optional: describe screen after action'),
122
+ },
123
+ }, async ({ action, params, udid = 'booted', describe_after }) => {
124
+ log('MCP', 'log', `device_action action=${action} udid=${udid}`);
125
+ try {
126
+ const client = await getDeviceClient(udid);
127
+ let actionResult = 'Action completed successfully';
128
+ switch (action) {
129
+ case 'tap': {
130
+ const p = tapParamsSchema.parse(params);
131
+ await client.tap(p.x, p.y, p.duration);
132
+ actionResult = `Tapped at (${p.x}, ${p.y})`;
133
+ break;
134
+ }
135
+ case 'swipe': {
136
+ const p = swipeParamsSchema.parse(params);
137
+ await client.swipe(p.fromX, p.fromY, p.toX, p.toY, p.duration, p.delta);
138
+ actionResult = `Swiped from (${p.fromX}, ${p.fromY}) to (${p.toX}, ${p.toY})`;
139
+ break;
140
+ }
141
+ case 'button': {
142
+ const p = buttonParamsSchema.parse(params);
143
+ await client.pressButton(p.button, p.duration);
144
+ actionResult = `Pressed ${p.button} button`;
145
+ break;
146
+ }
147
+ case 'input-text': {
148
+ const p = inputTextParamsSchema.parse(params);
149
+ await client.inputText(p.text);
150
+ actionResult = `Typed text: "${p.text}"`;
151
+ break;
152
+ }
153
+ case 'key': {
154
+ const p = keyParamsSchema.parse(params);
155
+ await client.pressKey(p.key, p.duration);
156
+ actionResult = `Pressed key: ${p.key}`;
157
+ break;
158
+ }
159
+ case 'key-sequence': {
160
+ const p = keySequenceParamsSchema.parse(params);
161
+ await client.pressKeySequence(p.keySequence);
162
+ actionResult = `Pressed key sequence: ${p.keySequence.join(', ')}`;
163
+ break;
164
+ }
165
+ }
166
+ let descriptionResult = null;
167
+ if (describe_after) {
168
+ await new Promise(resolve => setTimeout(resolve, describe_after.delay ?? 500));
169
+ if (describe_after.all) {
170
+ descriptionResult = await client.describeAll(false);
171
+ }
172
+ else if (describe_after.point) {
173
+ descriptionResult = await client.describePoint(describe_after.point.x, describe_after.point.y, false);
174
+ }
175
+ }
176
+ const result = { action_result: actionResult };
177
+ if (descriptionResult)
178
+ result.screen_description = descriptionResult;
179
+ return {
180
+ content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
181
+ };
182
+ }
183
+ catch (error) {
184
+ return {
185
+ content: [{ type: 'text', text: `Error executing ${action}: ${error instanceof Error ? error.message : String(error)}` }],
186
+ isError: true,
187
+ };
188
+ }
189
+ });
190
+ server.registerTool('device_actions', {
191
+ description: `Execute multiple device actions in sequence on the iPhone.
192
+
193
+ Each action in the array should have:
194
+ - action: 'tap' | 'swipe' | 'button' | 'input-text' | 'key' | 'key-sequence'
195
+ - params: Action-specific parameters
196
+
197
+ Use describe_after to see the screen state after all actions complete.`,
198
+ inputSchema: {
199
+ actions: z.array(singleActionSchema).describe('Array of actions to execute in sequence'),
200
+ udid: z.string().optional().describe('Device identifier (default: "booted")'),
201
+ describe_after: describeAfterSchema.describe('Optional: describe screen after all actions'),
202
+ },
203
+ }, async ({ actions, udid = 'booted', describe_after }) => {
204
+ log('MCP', 'log', `device_actions count=${actions.length} udid=${udid}`);
205
+ try {
206
+ const client = await getDeviceClient(udid);
207
+ const results = [];
208
+ for (const { action, params } of actions) {
209
+ switch (action) {
210
+ case 'tap': {
211
+ const p = tapParamsSchema.parse(params);
212
+ await client.tap(p.x, p.y, p.duration);
213
+ results.push(`Tapped at (${p.x}, ${p.y})`);
214
+ break;
215
+ }
216
+ case 'swipe': {
217
+ const p = swipeParamsSchema.parse(params);
218
+ await client.swipe(p.fromX, p.fromY, p.toX, p.toY, p.duration, p.delta);
219
+ results.push(`Swiped from (${p.fromX}, ${p.fromY}) to (${p.toX}, ${p.toY})`);
220
+ break;
221
+ }
222
+ case 'button': {
223
+ const p = buttonParamsSchema.parse(params);
224
+ await client.pressButton(p.button, p.duration);
225
+ results.push(`Pressed ${p.button} button`);
226
+ break;
227
+ }
228
+ case 'input-text': {
229
+ const p = inputTextParamsSchema.parse(params);
230
+ await client.inputText(p.text);
231
+ results.push(`Typed text: "${p.text}"`);
232
+ break;
233
+ }
234
+ case 'key': {
235
+ const p = keyParamsSchema.parse(params);
236
+ await client.pressKey(p.key, p.duration);
237
+ results.push(`Pressed key: ${p.key}`);
238
+ break;
239
+ }
240
+ case 'key-sequence': {
241
+ const p = keySequenceParamsSchema.parse(params);
242
+ await client.pressKeySequence(p.keySequence);
243
+ results.push(`Pressed key sequence: ${p.keySequence.join(', ')}`);
244
+ break;
245
+ }
246
+ }
247
+ }
248
+ let descriptionResult = null;
249
+ if (describe_after) {
250
+ await new Promise(resolve => setTimeout(resolve, describe_after.delay ?? 500));
251
+ if (describe_after.all) {
252
+ descriptionResult = await client.describeAll(false);
253
+ }
254
+ else if (describe_after.point) {
255
+ descriptionResult = await client.describePoint(describe_after.point.x, describe_after.point.y, false);
256
+ }
257
+ }
258
+ const result = { action_results: results };
259
+ if (descriptionResult)
260
+ result.screen_description = descriptionResult;
261
+ return {
262
+ content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
263
+ };
264
+ }
265
+ catch (error) {
266
+ return {
267
+ content: [{ type: 'text', text: `Error executing actions: ${error instanceof Error ? error.message : String(error)}` }],
268
+ isError: true,
269
+ };
270
+ }
271
+ });
272
+ server.registerTool('get_screenshot', {
273
+ description: 'Capture a screenshot of the current iPhone screen. Returns the file path to a PNG image.',
274
+ inputSchema: {
275
+ udid: z.string().optional().describe('Device identifier (default: "booted")'),
276
+ },
277
+ }, async ({ udid = 'booted' }) => {
278
+ log('MCP', 'log', `get_screenshot udid=${udid}`);
279
+ try {
280
+ const timestamp = Date.now();
281
+ const rawFile = path.join(os.tmpdir(), `blitz-screenshot-${timestamp}.png`);
282
+ const resizedFile = path.join(os.tmpdir(), `blitz-screenshot-${timestamp}-sm.png`);
283
+ if (isPhysicalDeviceUdid(udid)) {
284
+ const client = await getDeviceClient(udid);
285
+ const pngBuffer = await client.screenshot();
286
+ await fs.writeFile(rawFile, pngBuffer);
287
+ }
288
+ else {
289
+ await new Promise((resolve, reject) => {
290
+ execFile('xcrun', ['simctl', 'io', udid, 'screenshot', '--type=png', rawFile], { env: childEnv(), timeout: 10000 }, (error) => {
291
+ if (error)
292
+ reject(error);
293
+ else
294
+ resolve();
295
+ });
296
+ });
297
+ }
298
+ const sizeOutput = await new Promise((resolve, reject) => {
299
+ execFile('sips', ['-g', 'pixelWidth', '-g', 'pixelHeight', rawFile], { timeout: 5000 }, (error, stdout) => {
300
+ if (error)
301
+ reject(error);
302
+ else
303
+ resolve(stdout);
304
+ });
305
+ });
306
+ const widthMatch = sizeOutput.match(/pixelWidth:\s*(\d+)/);
307
+ const heightMatch = sizeOutput.match(/pixelHeight:\s*(\d+)/);
308
+ const targetWidth = Math.round(Number(widthMatch[1]) / 3);
309
+ const targetHeight = Math.round(Number(heightMatch[1]) / 3);
310
+ await new Promise((resolve, reject) => {
311
+ execFile('sips', ['--resampleWidth', String(targetWidth), '--resampleHeight', String(targetHeight), rawFile, '--out', resizedFile], { timeout: 5000 }, (error) => {
312
+ if (error)
313
+ reject(error);
314
+ else
315
+ resolve();
316
+ });
317
+ });
318
+ return {
319
+ content: [{ type: 'text', text: resizedFile }],
320
+ };
321
+ }
322
+ catch (error) {
323
+ return {
324
+ content: [{ type: 'text', text: `Error capturing screenshot: ${error instanceof Error ? error.message : String(error)}` }],
325
+ isError: true,
326
+ };
327
+ }
328
+ });
329
+ server.registerTool('scan_ui', {
330
+ description: `Find interactive UI elements (buttons, links, text fields, switches, icons, etc.) on the current screen. Returns only tappable/interactive elements with their coordinates.
331
+
332
+ Use the "query" parameter to search for a specific element by label (e.g. "Add to Cart", "Settings"). When a query is provided:
333
+ - First searches visible interactive elements matching the query
334
+ - If not found on-screen, searches off-screen elements and warns you to scroll
335
+ - If no interactive match, falls back to all visible interactive elements
336
+
337
+ Without a query, returns all visible interactive elements on screen.
338
+
339
+ Region options optimize scan time:
340
+ - "top-left" / "top-right" / "bottom-left" / "bottom-right": ~250ms
341
+ - "top-half" / "bottom-half": ~500ms
342
+ - "full": ~1s (entire screen)
343
+
344
+ For the complete element tree (all types), use describe_screen instead.`,
345
+ inputSchema: {
346
+ region: z.enum(['full', 'top-half', 'bottom-half', 'top-left', 'top-right', 'bottom-left', 'bottom-right'])
347
+ .describe('Screen region to scan'),
348
+ query: z.string().optional().describe('Search for elements matching this text (case-insensitive)'),
349
+ udid: z.string().optional().describe('Device identifier (default: "booted")'),
350
+ },
351
+ }, async ({ region, query, udid = 'booted' }) => {
352
+ log('MCP', 'log', `scan_ui region=${region} query=${query ?? '(none)'} udid=${udid}`);
353
+ try {
354
+ const resolvedUdid = udid === 'booted' ? await resolveBootedUdid() : udid;
355
+ let rawElements;
356
+ let screenWidth = 393, screenHeight = 852;
357
+ if (isPhysicalDeviceUdid(resolvedUdid)) {
358
+ const client = await getDeviceClient(resolvedUdid);
359
+ rawElements = await wdaScanGrid(client, region);
360
+ try {
361
+ const size = await client.getWindowSize();
362
+ screenWidth = size.width;
363
+ screenHeight = size.height;
364
+ }
365
+ catch { /* use defaults */ }
366
+ }
367
+ else {
368
+ const client = AXScanClient.getInstance(resolvedUdid);
369
+ rawElements = await client.scan(region);
370
+ try {
371
+ const size = await client.getScreenSize();
372
+ screenWidth = size.width;
373
+ screenHeight = size.height;
374
+ }
375
+ catch { /* use defaults */ }
376
+ }
377
+ const { elements, warning } = applyScanUiFilters(rawElements, screenWidth, screenHeight, query);
378
+ const content = [];
379
+ if (warning)
380
+ content.push({ type: 'text', text: `Warning: ${warning}` });
381
+ content.push({ type: 'text', text: JSON.stringify(elements, null, 2) });
382
+ return { content };
383
+ }
384
+ catch (error) {
385
+ return {
386
+ content: [{ type: 'text', text: `Error scanning UI: ${error instanceof Error ? error.message : String(error)}` }],
387
+ isError: true,
388
+ };
389
+ }
390
+ });
391
+ server.registerTool('list_devices', {
392
+ description: 'List all available iPhones and simulators.',
393
+ inputSchema: {},
394
+ }, async () => {
395
+ log('MCP', 'log', 'list_devices');
396
+ try {
397
+ let simulators = [];
398
+ try {
399
+ const { stdout } = await new Promise((resolve, reject) => {
400
+ execFile('xcrun', ['simctl', 'list', 'devices', 'booted', '-j'], { timeout: 10000 }, (error, stdout) => {
401
+ if (error)
402
+ reject(error);
403
+ else
404
+ resolve({ stdout });
405
+ });
406
+ });
407
+ const data = JSON.parse(stdout);
408
+ for (const runtime of Object.values(data.devices)) {
409
+ for (const device of runtime) {
410
+ if (device.state === 'Booted') {
411
+ simulators.push({ udid: device.udid, name: device.name, state: device.state });
412
+ }
413
+ }
414
+ }
415
+ }
416
+ catch {
417
+ // simctl not available
418
+ }
419
+ const physicalDevices = await listPhysicalDevices();
420
+ return {
421
+ content: [{ type: 'text', text: JSON.stringify({ simulators, physicalDevices }, null, 2) }],
422
+ };
423
+ }
424
+ catch (error) {
425
+ return {
426
+ content: [{ type: 'text', text: `Error listing devices: ${error instanceof Error ? error.message : String(error)}` }],
427
+ isError: true,
428
+ };
429
+ }
430
+ });
431
+ server.registerTool('get_execution_context', {
432
+ description: `Get the current execution context — which iPhone(s) or simulators are available.
433
+
434
+ Call this first to discover available devices. Returns:
435
+ - target: 'simulator' — one simulator booted, use the returned udid
436
+ - target: 'device' — one physical device connected, use the returned udid. Inform user about viewer_url for screen viewing.
437
+ - target: 'ambiguous' — multiple devices found. Ask the user which one to use.
438
+ - target: 'none' — no devices. Tell user to boot a simulator or connect an iPhone.
439
+
440
+ Pass the returned udid to all subsequent tool calls.`,
441
+ inputSchema: {},
442
+ }, async () => {
443
+ log('MCP', 'log', 'get_execution_context');
444
+ try {
445
+ const ctx = await detectExecutionContext(viewerPort);
446
+ if (ctx.target === 'simulator') {
447
+ let screenSize = null;
448
+ try {
449
+ const axClient = AXScanClient.getInstance(ctx.udid);
450
+ screenSize = await axClient.getScreenSize();
451
+ }
452
+ catch { /* unavailable */ }
453
+ return {
454
+ content: [{
455
+ type: 'text',
456
+ text: JSON.stringify({
457
+ target: 'simulator',
458
+ udid: ctx.udid,
459
+ name: ctx.name,
460
+ screen_size: screenSize,
461
+ }, null, 2),
462
+ }],
463
+ };
464
+ }
465
+ if (ctx.target === 'device') {
466
+ let screenSize = null;
467
+ try {
468
+ const client = await getDeviceClient(ctx.udid);
469
+ screenSize = await client.getWindowSize();
470
+ }
471
+ catch { /* unavailable */ }
472
+ return {
473
+ content: [{
474
+ type: 'text',
475
+ text: JSON.stringify({
476
+ target: 'device',
477
+ udid: ctx.udid,
478
+ device_name: ctx.name,
479
+ model: ctx.model,
480
+ connection_type: ctx.connectionType,
481
+ viewer_url: ctx.viewerUrl,
482
+ screen_size: screenSize,
483
+ }, null, 2),
484
+ }],
485
+ };
486
+ }
487
+ if (ctx.target === 'ambiguous') {
488
+ return {
489
+ content: [{
490
+ type: 'text',
491
+ text: JSON.stringify({
492
+ target: 'ambiguous',
493
+ message: 'Multiple devices found. Ask the user which device to target.',
494
+ simulators: ctx.simulators,
495
+ physical_devices: ctx.physicalDevices,
496
+ }, null, 2),
497
+ }],
498
+ };
499
+ }
500
+ return {
501
+ content: [{
502
+ type: 'text',
503
+ text: JSON.stringify({ target: 'none', message: ctx.message }, null, 2),
504
+ }],
505
+ };
506
+ }
507
+ catch (error) {
508
+ return {
509
+ content: [{ type: 'text', text: `Error getting execution context: ${error instanceof Error ? error.message : String(error)}` }],
510
+ isError: true,
511
+ };
512
+ }
513
+ });
514
+ server.registerTool('setup_device', {
515
+ description: `Build, install, and launch WebDriverAgent on a physical iPhone. This is required before any other tool can interact with a physical device.
516
+
517
+ Call this when get_execution_context shows a physical device with wdaRunning: false. The process takes 1-3 minutes (building WDA, installing on device, establishing connection).
518
+
519
+ Prerequisites:
520
+ - iPhone connected via USB and trusted
521
+ - Developer Mode enabled on iPhone (Settings > Privacy & Security > Developer Mode)
522
+ - Apple ID signed into Xcode (Xcode > Settings > Accounts)
523
+
524
+ After setup completes, use the returned udid for all subsequent tool calls. Also inform the user about the viewer_url where they can see the device screen.`,
525
+ inputSchema: {
526
+ udid: z.string().describe('Physical device UDID from list_devices or get_execution_context'),
527
+ },
528
+ }, async ({ udid }, extra) => {
529
+ log('MCP', 'log', `setup_device udid=${udid}`);
530
+ try {
531
+ const progressToken = extra._meta?.progressToken;
532
+ const steps = {
533
+ connecting: 1,
534
+ building_wda: 2,
535
+ installing_wda: 3,
536
+ establishing_connection: 4,
537
+ ready: 5,
538
+ };
539
+ const totalSteps = 5;
540
+ const progressMessages = [];
541
+ const client = await wdaManager.setupDevice(udid, (progress) => {
542
+ progressMessages.push(`[${progress.step}] ${progress.message}`);
543
+ if (progressToken) {
544
+ extra.sendNotification({
545
+ method: 'notifications/progress',
546
+ params: {
547
+ progressToken,
548
+ progress: steps[progress.step] ?? 0,
549
+ total: totalSteps,
550
+ message: progress.message,
551
+ },
552
+ }).catch(() => { });
553
+ }
554
+ });
555
+ let screenSize = null;
556
+ try {
557
+ screenSize = await client.getWindowSize();
558
+ }
559
+ catch { /* unavailable */ }
560
+ const viewerUrl = `http://localhost:${viewerPort}?udid=${encodeURIComponent(udid)}`;
561
+ return {
562
+ content: [{
563
+ type: 'text',
564
+ text: JSON.stringify({
565
+ status: 'connected',
566
+ udid,
567
+ viewer_url: viewerUrl,
568
+ screen_size: screenSize,
569
+ setup_log: progressMessages,
570
+ }, null, 2) + `\n\nIMPORTANT: Tell the user to open this URL to see the device screen: ${viewerUrl}`,
571
+ }],
572
+ };
573
+ }
574
+ catch (error) {
575
+ return {
576
+ content: [{ type: 'text', text: `Error setting up device: ${error instanceof Error ? error.message : String(error)}` }],
577
+ isError: true,
578
+ };
579
+ }
580
+ });
581
+ server.registerTool('launch_app', {
582
+ description: 'Launch an app on the iPhone by bundle ID.',
583
+ inputSchema: {
584
+ bundleId: z.string().describe('The bundle identifier of the app to launch (e.g. "com.apple.mobilesafari")'),
585
+ udid: z.string().optional().describe('Device identifier (default: "booted")'),
586
+ },
587
+ }, async ({ bundleId, udid = 'booted' }) => {
588
+ log('MCP', 'log', `launch_app bundleId=${bundleId} udid=${udid}`);
589
+ try {
590
+ if (isPhysicalDeviceUdid(udid)) {
591
+ const client = await getDeviceClient(udid);
592
+ await client.activateApp(bundleId);
593
+ }
594
+ else {
595
+ const { getIDBClient } = await import('./idb/idb-client.js');
596
+ const client = getIDBClient(udid);
597
+ await client.launch(bundleId);
598
+ }
599
+ return {
600
+ content: [{ type: 'text', text: `Launched ${bundleId}` }],
601
+ };
602
+ }
603
+ catch (error) {
604
+ return {
605
+ content: [{ type: 'text', text: `Error launching app: ${error instanceof Error ? error.message : String(error)}` }],
606
+ isError: true,
607
+ };
608
+ }
609
+ });
610
+ server.registerTool('list_apps', {
611
+ description: 'List installed apps on the iPhone.',
612
+ inputSchema: {
613
+ udid: z.string().optional().describe('Device identifier (default: "booted")'),
614
+ },
615
+ }, async ({ udid = 'booted' }) => {
616
+ log('MCP', 'log', `list_apps udid=${udid}`);
617
+ try {
618
+ if (isPhysicalDeviceUdid(udid)) {
619
+ return {
620
+ content: [{ type: 'text', text: 'list_apps is not yet supported for physical devices via WDA.' }],
621
+ };
622
+ }
623
+ const { getIDBClient } = await import('./idb/idb-client.js');
624
+ const client = getIDBClient(udid);
625
+ const apps = await client.listApps();
626
+ const userApps = apps.filter(a => a.type === 'User');
627
+ const systemApps = apps.filter(a => a.type === 'System');
628
+ let text = `User apps (${userApps.length}):\n`;
629
+ for (const app of userApps) {
630
+ text += ` ${app.name} — ${app.bundleId}\n`;
631
+ }
632
+ text += `\nSystem apps (${systemApps.length}):\n`;
633
+ for (const app of systemApps) {
634
+ text += ` ${app.name} — ${app.bundleId}\n`;
635
+ }
636
+ return {
637
+ content: [{ type: 'text', text: text.trim() }],
638
+ };
639
+ }
640
+ catch (error) {
641
+ return {
642
+ content: [{ type: 'text', text: `Error listing apps: ${error instanceof Error ? error.message : String(error)}` }],
643
+ isError: true,
644
+ };
645
+ }
646
+ });
647
+ return server;
648
+ }
649
+ //# sourceMappingURL=mcp-server.js.map