@sensaiorg/adapter-ios 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1055 @@
1
+ /**
2
+ * iOS MCP Tools — registered when an iOS Simulator is connected.
3
+ *
4
+ * Uses xcrun simctl for all operations.
5
+ */
6
+
7
+ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
8
+ import { z } from "zod";
9
+ import type { SimctlClient } from "../transport/simctl-client.js";
10
+
11
+ /** Sleep helper */
12
+ function sleep(ms: number): Promise<void> {
13
+ return new Promise((resolve) => setTimeout(resolve, ms));
14
+ }
15
+
16
+ /** Parsed accessibility element from idb describe-all output */
17
+ interface IdbAccessibilityElement {
18
+ type: string | null;
19
+ label: string | null;
20
+ value: string | null;
21
+ frame: { x: number; y: number; w: number; h: number } | null;
22
+ enabled: boolean;
23
+ rawLine?: string;
24
+ }
25
+
26
+ /**
27
+ * Parse the idb `ui describe-all` output into structured elements.
28
+ *
29
+ * idb output varies but typically contains lines like:
30
+ * Type: Button, Label: "Save", Frame: {{10, 20}, {100, 44}}, Enabled: true
31
+ * or a flat accessibility dump. We parse what we can and include raw on failure.
32
+ */
33
+ function parseIdbAccessibilityOutput(raw: string): IdbAccessibilityElement[] {
34
+ const elements: IdbAccessibilityElement[] = [];
35
+ const lines = raw.split("\n").filter(l => l.trim().length > 0);
36
+
37
+ for (const line of lines) {
38
+ try {
39
+ // Try to extract structured fields from each line
40
+ const typeMatch = line.match(/(?:Type|AXType|class)\s*[:=]\s*["']?([^"',}\n]+)/i);
41
+ const labelMatch = line.match(/(?:Label|AXLabel|title|text)\s*[:=]\s*["']?([^"'\n}]+?)["']?\s*(?:,|$|\})/i);
42
+ const valueMatch = line.match(/(?:Value|AXValue)\s*[:=]\s*["']?([^"'\n}]+?)["']?\s*(?:,|$|\})/i);
43
+ const frameMatch = line.match(/(?:Frame|frame|AXFrame)\s*[:=]\s*\{\{?\s*([\d.]+)\s*,\s*([\d.]+)\s*\}\s*,\s*\{?\s*([\d.]+)\s*,\s*([\d.]+)/i);
44
+ const enabledMatch = line.match(/(?:Enabled|enabled|AXEnabled)\s*[:=]\s*(true|false|1|0|yes|no)/i);
45
+
46
+ // Only include if we extracted at least something useful
47
+ const hasData = typeMatch || labelMatch || valueMatch || frameMatch;
48
+ if (!hasData) {
49
+ // Try JSON parsing as fallback (some idb versions output JSON)
50
+ if (line.trim().startsWith("{")) {
51
+ try {
52
+ const obj = JSON.parse(line.trim());
53
+ elements.push({
54
+ type: obj.type || obj.AXType || obj.class || null,
55
+ label: obj.label || obj.AXLabel || obj.title || obj.text || null,
56
+ value: obj.value || obj.AXValue || null,
57
+ frame: obj.frame ? { x: obj.frame.x ?? 0, y: obj.frame.y ?? 0, w: obj.frame.width ?? obj.frame.w ?? 0, h: obj.frame.height ?? obj.frame.h ?? 0 } : null,
58
+ enabled: obj.enabled !== false,
59
+ });
60
+ continue;
61
+ } catch {
62
+ // not JSON either
63
+ }
64
+ }
65
+ // Include raw line if it looks like it has content
66
+ if (line.trim().length > 2) {
67
+ elements.push({
68
+ type: null,
69
+ label: line.trim(),
70
+ value: null,
71
+ frame: null,
72
+ enabled: true,
73
+ rawLine: line.trim(),
74
+ });
75
+ }
76
+ continue;
77
+ }
78
+
79
+ elements.push({
80
+ type: typeMatch?.[1]?.trim() ?? null,
81
+ label: labelMatch?.[1]?.trim() ?? null,
82
+ value: valueMatch?.[1]?.trim() ?? null,
83
+ frame: frameMatch ? {
84
+ x: parseFloat(frameMatch[1]),
85
+ y: parseFloat(frameMatch[2]),
86
+ w: parseFloat(frameMatch[3]),
87
+ h: parseFloat(frameMatch[4]),
88
+ } : null,
89
+ enabled: enabledMatch ? ["true", "1", "yes"].includes(enabledMatch[1].toLowerCase()) : true,
90
+ });
91
+ } catch {
92
+ // If line parsing fails entirely, include raw
93
+ elements.push({
94
+ type: null,
95
+ label: line.trim(),
96
+ value: null,
97
+ frame: null,
98
+ enabled: true,
99
+ rawLine: line.trim(),
100
+ });
101
+ }
102
+ }
103
+
104
+ return elements;
105
+ }
106
+
107
+ export function registerIosTools(
108
+ server: McpServer,
109
+ simctl: SimctlClient,
110
+ bundleId: string,
111
+ prefix: string,
112
+ ): void {
113
+
114
+ // ── diagnose_screen ──────────────────────────────────────────────
115
+ server.tool(
116
+ `${prefix}diagnose_screen`,
117
+ "Get a comprehensive diagnosis of the current iOS screen: screenshot (base64), recent logs, and device info. START HERE for any iOS debugging session.",
118
+ {},
119
+ async () => {
120
+ try {
121
+ const [deviceInfo, screenshotBuf, logs] = await Promise.allSettled([
122
+ simctl.getDeviceInfo(),
123
+ simctl.screenshot(),
124
+ simctl.getLogs(`processImagePath contains "${bundleId}"`, "30s")
125
+ .catch(() => "(log retrieval failed)"),
126
+ ]);
127
+
128
+ const device = deviceInfo.status === "fulfilled" ? deviceInfo.value : null;
129
+ const screenshot = screenshotBuf.status === "fulfilled"
130
+ ? screenshotBuf.value.toString("base64")
131
+ : null;
132
+ const recentLogs = logs.status === "fulfilled"
133
+ ? (logs.value as string).split("\n").slice(-50).join("\n")
134
+ : "(unavailable)";
135
+
136
+ const result = {
137
+ platform: "ios",
138
+ device: device ? { name: device.name, runtime: device.runtime, udid: device.udid } : "unknown",
139
+ bundleId,
140
+ hasScreenshot: !!screenshot,
141
+ recentLogs,
142
+ };
143
+
144
+ const content: Array<{ type: "text"; text: string } | { type: "image"; data: string; mimeType: string }> = [
145
+ { type: "text" as const, text: JSON.stringify(result) },
146
+ ];
147
+ if (screenshot) {
148
+ content.push({ type: "image" as const, data: screenshot, mimeType: "image/png" });
149
+ }
150
+
151
+ return { content };
152
+ } catch (err) {
153
+ return {
154
+ content: [{ type: "text" as const, text: `Error diagnosing iOS screen: ${err instanceof Error ? err.message : String(err)}` }],
155
+ isError: true,
156
+ };
157
+ }
158
+ },
159
+ );
160
+
161
+ // ── take_screenshot ──────────────────────────────────────────────
162
+ server.tool(
163
+ `${prefix}take_screenshot`,
164
+ "Capture the current iOS Simulator screen as a PNG image.",
165
+ {},
166
+ async () => {
167
+ try {
168
+ const buf = await simctl.screenshot();
169
+ return {
170
+ content: [{
171
+ type: "image" as const,
172
+ data: buf.toString("base64"),
173
+ mimeType: "image/png",
174
+ }],
175
+ };
176
+ } catch (err) {
177
+ return {
178
+ content: [{ type: "text" as const, text: `Screenshot failed: ${err instanceof Error ? err.message : String(err)}` }],
179
+ isError: true,
180
+ };
181
+ }
182
+ },
183
+ );
184
+
185
+ // ── tap ──────────────────────────────────────────────────────────
186
+ server.tool(
187
+ `${prefix}tap`,
188
+ "Tap at specific coordinates on the iOS Simulator screen.",
189
+ {
190
+ x: z.number().describe("X coordinate"),
191
+ y: z.number().describe("Y coordinate"),
192
+ },
193
+ async ({ x, y }) => {
194
+ try {
195
+ await simctl.tap(x, y);
196
+ return {
197
+ content: [{ type: "text" as const, text: JSON.stringify({ ok: true }) }],
198
+ };
199
+ } catch (err) {
200
+ return {
201
+ content: [{ type: "text" as const, text: `Tap failed: ${err instanceof Error ? err.message : String(err)}` }],
202
+ isError: true,
203
+ };
204
+ }
205
+ },
206
+ );
207
+
208
+ // ── type_text ────────────────────────────────────────────────────
209
+ server.tool(
210
+ `${prefix}type_text`,
211
+ "Type text into the currently focused field on the iOS Simulator.",
212
+ {
213
+ text: z.string().describe("Text to type"),
214
+ },
215
+ async ({ text }) => {
216
+ try {
217
+ await simctl.typeText(text);
218
+ return {
219
+ content: [{ type: "text" as const, text: JSON.stringify({ ok: true }) }],
220
+ };
221
+ } catch (err) {
222
+ return {
223
+ content: [{ type: "text" as const, text: `Type failed: ${err instanceof Error ? err.message : String(err)}` }],
224
+ isError: true,
225
+ };
226
+ }
227
+ },
228
+ );
229
+
230
+ // ── get_logs ─────────────────────────────────────────────────────
231
+ server.tool(
232
+ `${prefix}get_logs`,
233
+ "Get recent iOS Simulator logs filtered by predicate. Defaults to app logs.",
234
+ {
235
+ predicate: z.string().optional().describe("Log predicate filter (default: app logs)"),
236
+ duration: z.string().optional().describe("Time window, e.g. '30s', '5m' (default: '30s')"),
237
+ grep: z.string().optional().describe("Additional grep filter on output"),
238
+ maxLines: z.number().optional().describe("Maximum lines to return (default: 100)"),
239
+ },
240
+ async ({ predicate, duration, grep, maxLines }) => {
241
+ try {
242
+ const pred = predicate ?? `processImagePath contains "${bundleId}"`;
243
+ const dur = duration ?? "30s";
244
+ let output = await simctl.getLogs(pred, dur);
245
+
246
+ if (grep) {
247
+ const regex = new RegExp(grep, "i");
248
+ output = output.split("\n").filter(line => regex.test(line)).join("\n");
249
+ }
250
+
251
+ const lines = output.split("\n");
252
+ const limit = maxLines ?? 100;
253
+ const trimmed = lines.slice(-limit).join("\n");
254
+
255
+ return {
256
+ content: [{
257
+ type: "text" as const,
258
+ text: JSON.stringify({
259
+ totalLines: lines.length,
260
+ returnedLines: Math.min(lines.length, limit),
261
+ predicate: pred,
262
+ duration: dur,
263
+ logs: trimmed,
264
+ }),
265
+ }],
266
+ };
267
+ } catch (err) {
268
+ return {
269
+ content: [{ type: "text" as const, text: `Log retrieval failed: ${err instanceof Error ? err.message : String(err)}` }],
270
+ isError: true,
271
+ };
272
+ }
273
+ },
274
+ );
275
+
276
+ // ── launch_app ───────────────────────────────────────────────────
277
+ server.tool(
278
+ `${prefix}launch_app`,
279
+ "Launch an app on the iOS Simulator by bundle ID.",
280
+ {
281
+ bundle: z.string().optional().describe(`Bundle ID (default: ${bundleId})`),
282
+ },
283
+ async ({ bundle }) => {
284
+ try {
285
+ const bid = bundle ?? bundleId;
286
+ await simctl.launchApp(bid);
287
+ return {
288
+ content: [{ type: "text" as const, text: JSON.stringify({ launched: bid, success: true }) }],
289
+ };
290
+ } catch (err) {
291
+ return {
292
+ content: [{ type: "text" as const, text: `Launch failed: ${err instanceof Error ? err.message : String(err)}` }],
293
+ isError: true,
294
+ };
295
+ }
296
+ },
297
+ );
298
+
299
+ // ── terminate_app ────────────────────────────────────────────────
300
+ server.tool(
301
+ `${prefix}terminate_app`,
302
+ "Terminate a running app on the iOS Simulator.",
303
+ {
304
+ bundle: z.string().optional().describe(`Bundle ID (default: ${bundleId})`),
305
+ },
306
+ async ({ bundle }) => {
307
+ try {
308
+ const bid = bundle ?? bundleId;
309
+ await simctl.terminateApp(bid);
310
+ return {
311
+ content: [{ type: "text" as const, text: JSON.stringify({ terminated: bid, success: true }) }],
312
+ };
313
+ } catch (err) {
314
+ return {
315
+ content: [{ type: "text" as const, text: `Terminate failed: ${err instanceof Error ? err.message : String(err)}` }],
316
+ isError: true,
317
+ };
318
+ }
319
+ },
320
+ );
321
+
322
+ // ── swipe ───────────────────────────────────────────────────────
323
+ server.tool(
324
+ `${prefix}swipe`,
325
+ "Swipe from one point to another on the iOS Simulator screen.",
326
+ {
327
+ startX: z.number().describe("Start X coordinate"),
328
+ startY: z.number().describe("Start Y coordinate"),
329
+ endX: z.number().describe("End X coordinate"),
330
+ endY: z.number().describe("End Y coordinate"),
331
+ duration: z.number().optional().describe("Swipe duration in seconds (default: 0.3)"),
332
+ },
333
+ async ({ startX, startY, endX, endY, duration }) => {
334
+ try {
335
+ await simctl.swipe(startX, startY, endX, endY, duration ?? 0.3);
336
+ return {
337
+ content: [{ type: "text" as const, text: JSON.stringify({ ok: true }) }],
338
+ };
339
+ } catch (err) {
340
+ return {
341
+ content: [{ type: "text" as const, text: `Swipe failed: ${err instanceof Error ? err.message : String(err)}` }],
342
+ isError: true,
343
+ };
344
+ }
345
+ },
346
+ );
347
+
348
+ // ── describe_all ───────────────────────────────────────────────
349
+ server.tool(
350
+ `${prefix}describe_all`,
351
+ "Get the full accessibility tree of the current iOS screen. Returns structured element info including labels, roles, frames, and states.",
352
+ {},
353
+ async () => {
354
+ try {
355
+ const output = await simctl.describeAll();
356
+ return {
357
+ content: [{ type: "text" as const, text: output }],
358
+ };
359
+ } catch (err) {
360
+ return {
361
+ content: [{ type: "text" as const, text: `Describe all failed: ${err instanceof Error ? err.message : String(err)}` }],
362
+ isError: true,
363
+ };
364
+ }
365
+ },
366
+ );
367
+
368
+ // ── describe_point ─────────────────────────────────────────────
369
+ server.tool(
370
+ `${prefix}describe_point`,
371
+ "Get accessibility info for the element at specific coordinates on the iOS screen.",
372
+ {
373
+ x: z.number().describe("X coordinate"),
374
+ y: z.number().describe("Y coordinate"),
375
+ },
376
+ async ({ x, y }) => {
377
+ try {
378
+ const output = await simctl.describePoint(x, y);
379
+ return {
380
+ content: [{ type: "text" as const, text: output }],
381
+ };
382
+ } catch (err) {
383
+ return {
384
+ content: [{ type: "text" as const, text: `Describe point failed: ${err instanceof Error ? err.message : String(err)}` }],
385
+ isError: true,
386
+ };
387
+ }
388
+ },
389
+ );
390
+
391
+ // ── open_deep_link ──────────────────────────────────────────────
392
+ server.tool(
393
+ `${prefix}open_deep_link`,
394
+ "Open the app via a deep link URL scheme on iOS Simulator. Navigates directly to a specific screen without tapping through the UI.",
395
+ {
396
+ url: z.string().describe("Deep link URL (e.g., myapp://screen/detail?id=123 or https://example.com/path)"),
397
+ },
398
+ async ({ url }) => {
399
+ try {
400
+ await simctl.exec("openurl", [url]);
401
+ return {
402
+ content: [{ type: "text" as const, text: JSON.stringify({ success: true, url }) }],
403
+ };
404
+ } catch (err) {
405
+ return {
406
+ content: [{ type: "text" as const, text: `open_deep_link failed: ${err instanceof Error ? err.message : String(err)}` }],
407
+ isError: true,
408
+ };
409
+ }
410
+ },
411
+ );
412
+
413
+ // ── get_device_info ──────────────────────────────────────────────
414
+ server.tool(
415
+ `${prefix}get_device_info`,
416
+ "Get detailed info about the connected iOS Simulator (name, runtime, UDID, state).",
417
+ {},
418
+ async () => {
419
+ try {
420
+ const info = await simctl.getDeviceInfo();
421
+ return {
422
+ content: [{ type: "text" as const, text: JSON.stringify(info) }],
423
+ };
424
+ } catch (err) {
425
+ return {
426
+ content: [{ type: "text" as const, text: `Device info failed: ${err instanceof Error ? err.message : String(err)}` }],
427
+ isError: true,
428
+ };
429
+ }
430
+ },
431
+ );
432
+
433
+ // ── get_ui_tree ────────────────────────────────────────────────
434
+ server.tool(
435
+ `${prefix}get_ui_tree`,
436
+ "Parse the iOS accessibility tree into structured JSON. Returns elements with type, label, value, frame (x,y,w,h), and enabled state.",
437
+ {},
438
+ async () => {
439
+ try {
440
+ const raw = await simctl.describeAll();
441
+ const elements = parseIdbAccessibilityOutput(raw);
442
+ return {
443
+ content: [{
444
+ type: "text" as const,
445
+ text: JSON.stringify({
446
+ totalElements: elements.length,
447
+ clickableElements: elements.filter(e => e.type === "Button" || e.type === "Link").length,
448
+ textElements: elements.filter(e => e.label || e.value).length,
449
+ elements,
450
+ }),
451
+ }],
452
+ };
453
+ } catch (err) {
454
+ return {
455
+ content: [{ type: "text" as const, text: `get_ui_tree failed: ${err instanceof Error ? err.message : String(err)}` }],
456
+ isError: true,
457
+ };
458
+ }
459
+ },
460
+ );
461
+
462
+ // ── get_screen_text ────────────────────────────────────────────
463
+ server.tool(
464
+ `${prefix}get_screen_text`,
465
+ "Extract all visible text from the iOS screen's accessibility tree. Returns a flat array of text strings in reading order.",
466
+ {},
467
+ async () => {
468
+ try {
469
+ const raw = await simctl.describeAll();
470
+ const elements = parseIdbAccessibilityOutput(raw);
471
+ const texts = elements
472
+ .map(e => e.label || e.value || "")
473
+ .filter(t => t.length > 0);
474
+ return {
475
+ content: [{ type: "text" as const, text: JSON.stringify({ screenText: texts }) }],
476
+ };
477
+ } catch (err) {
478
+ return {
479
+ content: [{ type: "text" as const, text: `get_screen_text failed: ${err instanceof Error ? err.message : String(err)}` }],
480
+ isError: true,
481
+ };
482
+ }
483
+ },
484
+ );
485
+
486
+ // ── get_element_details ────────────────────────────────────────
487
+ server.tool(
488
+ `${prefix}get_element_details`,
489
+ "Find and inspect iOS UI elements matching a text query or at specific coordinates. Returns full details including frame, type, and state.",
490
+ {
491
+ query: z.string().optional().describe("Text to search for (case-insensitive substring match)"),
492
+ x: z.number().optional().describe("X coordinate to find element at"),
493
+ y: z.number().optional().describe("Y coordinate to find element at"),
494
+ },
495
+ async ({ query, x, y }) => {
496
+ try {
497
+ // If coordinates given, use describe_point for precise result
498
+ if (x !== undefined && y !== undefined) {
499
+ const pointInfo = await simctl.describePoint(x, y);
500
+ return {
501
+ content: [{ type: "text" as const, text: JSON.stringify({ found: 1, raw: pointInfo }) }],
502
+ };
503
+ }
504
+
505
+ if (!query) {
506
+ return {
507
+ content: [{ type: "text" as const, text: JSON.stringify({ error: "Provide either 'query' or x/y coordinates" }) }],
508
+ isError: true,
509
+ };
510
+ }
511
+
512
+ const raw = await simctl.describeAll();
513
+ const elements = parseIdbAccessibilityOutput(raw);
514
+ const lowerQuery = query.toLowerCase();
515
+ const matches = elements.filter(e =>
516
+ (e.label && e.label.toLowerCase().includes(lowerQuery)) ||
517
+ (e.value && e.value.toLowerCase().includes(lowerQuery)) ||
518
+ (e.type && e.type.toLowerCase().includes(lowerQuery))
519
+ );
520
+
521
+ if (matches.length === 0) {
522
+ return {
523
+ content: [{
524
+ type: "text" as const,
525
+ text: JSON.stringify({ found: 0, message: `No elements found matching "${query}"`, hint: "Try ios_get_screen_text to see all visible text." }),
526
+ }],
527
+ };
528
+ }
529
+
530
+ const details = matches.map(e => ({
531
+ ...e,
532
+ centerX: e.frame ? Math.round(e.frame.x + e.frame.w / 2) : null,
533
+ centerY: e.frame ? Math.round(e.frame.y + e.frame.h / 2) : null,
534
+ }));
535
+
536
+ return {
537
+ content: [{ type: "text" as const, text: JSON.stringify({ found: matches.length, elements: details }) }],
538
+ };
539
+ } catch (err) {
540
+ return {
541
+ content: [{ type: "text" as const, text: `get_element_details failed: ${err instanceof Error ? err.message : String(err)}` }],
542
+ isError: true,
543
+ };
544
+ }
545
+ },
546
+ );
547
+
548
+ // ── wait_for_text ──────────────────────────────────────────────
549
+ server.tool(
550
+ `${prefix}wait_for_text`,
551
+ "Poll the iOS screen until specific text appears. Essential after navigation, screen transitions, or async operations.",
552
+ {
553
+ text: z.string().describe("Text to wait for (case-insensitive substring match)"),
554
+ timeoutMs: z.number().optional().describe("Maximum wait time in ms (default: 10000)"),
555
+ pollIntervalMs: z.number().optional().describe("Polling interval in ms (default: 1000)"),
556
+ },
557
+ async ({ text, timeoutMs, pollIntervalMs }) => {
558
+ const timeout = timeoutMs ?? 10_000;
559
+ const interval = pollIntervalMs ?? 1_000;
560
+ const startTime = Date.now();
561
+ const searchLower = text.toLowerCase();
562
+ let attempts = 0;
563
+
564
+ while (Date.now() - startTime < timeout) {
565
+ attempts++;
566
+ try {
567
+ const raw = await simctl.describeAll();
568
+ const elements = parseIdbAccessibilityOutput(raw);
569
+ const allText = elements
570
+ .map(e => `${e.label ?? ""} ${e.value ?? ""}`.toLowerCase())
571
+ .join(" ");
572
+ if (allText.includes(searchLower)) {
573
+ return {
574
+ content: [{ type: "text" as const, text: JSON.stringify({ success: true, found: true, text, elapsedMs: Date.now() - startTime, attempts }) }],
575
+ };
576
+ }
577
+ } catch {
578
+ // retry
579
+ }
580
+ if (Date.now() - startTime + interval < timeout) {
581
+ await sleep(interval);
582
+ }
583
+ }
584
+
585
+ return {
586
+ content: [{
587
+ type: "text" as const,
588
+ text: JSON.stringify({ success: false, found: false, text, elapsedMs: Date.now() - startTime, attempts, hint: "Text did not appear within timeout." }),
589
+ }],
590
+ };
591
+ },
592
+ );
593
+
594
+ // ── scroll_to_text ─────────────────────────────────────────────
595
+ server.tool(
596
+ `${prefix}scroll_to_text`,
597
+ "Swipe/scroll repeatedly until specific text appears on the iOS screen. Useful for finding elements in long scrollable lists.",
598
+ {
599
+ text: z.string().describe("Text to scroll to (case-insensitive substring match)"),
600
+ direction: z.enum(["up", "down", "left", "right"]).optional().describe("Scroll direction (default: down)"),
601
+ maxScrolls: z.number().optional().describe("Maximum scroll attempts (default: 5)"),
602
+ },
603
+ async ({ text, direction, maxScrolls }) => {
604
+ const dir = direction ?? "down";
605
+ const max = maxScrolls ?? 5;
606
+ const searchLower = text.toLowerCase();
607
+
608
+ // Default screen dimensions for iPhone (logical points)
609
+ const screenW = 390;
610
+ const screenH = 844;
611
+ const centerX = Math.round(screenW / 2);
612
+
613
+ for (let i = 0; i < max; i++) {
614
+ try {
615
+ const raw = await simctl.describeAll();
616
+ const elements = parseIdbAccessibilityOutput(raw);
617
+ const match = elements.find(e =>
618
+ (e.label && e.label.toLowerCase().includes(searchLower)) ||
619
+ (e.value && e.value.toLowerCase().includes(searchLower))
620
+ );
621
+
622
+ if (match) {
623
+ const center = match.frame
624
+ ? { x: Math.round(match.frame.x + match.frame.w / 2), y: Math.round(match.frame.y + match.frame.h / 2) }
625
+ : null;
626
+ return {
627
+ content: [{
628
+ type: "text" as const,
629
+ text: JSON.stringify({ success: true, found: true, text: match.label || match.value, center, scrollsNeeded: i }),
630
+ }],
631
+ };
632
+ }
633
+ } catch {
634
+ // continue scrolling
635
+ }
636
+
637
+ // Perform swipe
638
+ let startX = centerX, startY = Math.round(screenH * 0.7);
639
+ let endX = centerX, endY = Math.round(screenH * 0.3);
640
+ if (dir === "up") { startY = Math.round(screenH * 0.3); endY = Math.round(screenH * 0.7); }
641
+ if (dir === "left") { startX = Math.round(screenW * 0.7); endX = Math.round(screenW * 0.3); startY = Math.round(screenH / 2); endY = startY; }
642
+ if (dir === "right") { startX = Math.round(screenW * 0.3); endX = Math.round(screenW * 0.7); startY = Math.round(screenH / 2); endY = startY; }
643
+
644
+ await simctl.swipe(startX, startY, endX, endY, 0.3);
645
+ await sleep(500);
646
+ }
647
+
648
+ return {
649
+ content: [{
650
+ type: "text" as const,
651
+ text: JSON.stringify({ success: false, found: false, text, scrollAttempts: max, hint: `Text "${text}" not found after ${max} scroll attempts.` }),
652
+ }],
653
+ };
654
+ },
655
+ );
656
+
657
+ // ── fill_form ──────────────────────────────────────────────────
658
+ server.tool(
659
+ `${prefix}fill_form`,
660
+ "Batch fill form fields on iOS. For each field: finds element by label in the accessibility tree, taps its coordinates, clears existing text, types new value.",
661
+ {
662
+ fields: z.array(z.object({
663
+ label: z.string().describe("Label/text to find the field by (case-insensitive)"),
664
+ value: z.string().describe("Text to enter in the field"),
665
+ clearFirst: z.boolean().optional().describe("Clear existing text before typing (default: true)"),
666
+ })).describe("Array of field labels and values to fill"),
667
+ },
668
+ async ({ fields }) => {
669
+ const results: Array<{ field: string; success: boolean; error?: string }> = [];
670
+
671
+ for (const field of fields) {
672
+ const clearFirst = field.clearFirst ?? true;
673
+ try {
674
+ const raw = await simctl.describeAll();
675
+ const elements = parseIdbAccessibilityOutput(raw);
676
+ const lowerLabel = field.label.toLowerCase();
677
+
678
+ // Find the element by label
679
+ const match = elements.find(e =>
680
+ (e.label && e.label.toLowerCase().includes(lowerLabel)) ||
681
+ (e.value && e.value.toLowerCase().includes(lowerLabel))
682
+ );
683
+
684
+ if (!match || !match.frame) {
685
+ results.push({ field: field.label, success: false, error: "Element not found or no frame" });
686
+ continue;
687
+ }
688
+
689
+ const tapX = Math.round(match.frame.x + match.frame.w / 2);
690
+ const tapY = Math.round(match.frame.y + match.frame.h / 2);
691
+
692
+ // Tap the field
693
+ await simctl.tap(tapX, tapY);
694
+ await sleep(300);
695
+
696
+ // Clear existing text if needed (select all + delete)
697
+ if (clearFirst) {
698
+ // Triple-tap to select all, then delete
699
+ await simctl.tap(tapX, tapY);
700
+ await sleep(50);
701
+ await simctl.tap(tapX, tapY);
702
+ await sleep(50);
703
+ await simctl.tap(tapX, tapY);
704
+ await sleep(200);
705
+ await simctl.typeText(""); // This may not clear; use idb key sequence instead
706
+ try {
707
+ await simctl.keyPress(42); // DELETE key via idb
708
+ } catch {
709
+ // If keyPress fails, try typing empty — field may already be selected
710
+ }
711
+ }
712
+
713
+ // Type the value
714
+ await simctl.typeText(field.value);
715
+ await sleep(200);
716
+
717
+ results.push({ field: field.label, success: true });
718
+ } catch (err) {
719
+ results.push({ field: field.label, success: false, error: err instanceof Error ? err.message : String(err) });
720
+ }
721
+ }
722
+
723
+ const allSuccess = results.every(r => r.success);
724
+ return {
725
+ content: [{
726
+ type: "text" as const,
727
+ text: JSON.stringify({ success: allSuccess, filledCount: results.filter(r => r.success).length, totalFields: fields.length, results }),
728
+ }],
729
+ };
730
+ },
731
+ );
732
+
733
+ // ── tap_and_wait ───────────────────────────────────────────────
734
+ server.tool(
735
+ `${prefix}tap_and_wait`,
736
+ "Tap an element (by label or coordinates) on iOS, then poll until expected text appears. Combines tap + wait_for_text in one call.",
737
+ {
738
+ label: z.string().optional().describe("Label/text of element to tap (case-insensitive)"),
739
+ x: z.number().optional().describe("X coordinate to tap (used if label not found or not provided)"),
740
+ y: z.number().optional().describe("Y coordinate to tap (used if label not found or not provided)"),
741
+ waitForText: z.string().describe("Text to wait for after tap"),
742
+ timeoutMs: z.number().optional().describe("Max wait time in ms (default: 10000)"),
743
+ },
744
+ async ({ label, x, y, waitForText, timeoutMs }) => {
745
+ try {
746
+ let tapX = x;
747
+ let tapY = y;
748
+ let matchedBy = tapX !== undefined ? `coordinates (${tapX}, ${tapY})` : "";
749
+
750
+ // Find element by label if provided
751
+ if (label) {
752
+ const raw = await simctl.describeAll();
753
+ const elements = parseIdbAccessibilityOutput(raw);
754
+ const lowerLabel = label.toLowerCase();
755
+ const match = elements.find(e =>
756
+ (e.label && e.label.toLowerCase().includes(lowerLabel)) ||
757
+ (e.value && e.value.toLowerCase().includes(lowerLabel))
758
+ );
759
+ if (match?.frame) {
760
+ tapX = Math.round(match.frame.x + match.frame.w / 2);
761
+ tapY = Math.round(match.frame.y + match.frame.h / 2);
762
+ matchedBy = `label="${match.label || match.value}"`;
763
+ }
764
+ }
765
+
766
+ if (tapX === undefined || tapY === undefined) {
767
+ return {
768
+ content: [{ type: "text" as const, text: JSON.stringify({ success: false, error: "Element not found and no coordinates provided" }) }],
769
+ };
770
+ }
771
+
772
+ // Tap
773
+ await simctl.tap(tapX, tapY);
774
+
775
+ // Wait for text
776
+ const timeout = timeoutMs ?? 10_000;
777
+ const interval = 500;
778
+ const startTime = Date.now();
779
+ const searchLower = waitForText.toLowerCase();
780
+
781
+ while (Date.now() - startTime < timeout) {
782
+ await sleep(interval);
783
+ try {
784
+ const raw = await simctl.describeAll();
785
+ const elements = parseIdbAccessibilityOutput(raw);
786
+ const allText = elements.map(e => `${e.label ?? ""} ${e.value ?? ""}`.toLowerCase()).join(" ");
787
+ if (allText.includes(searchLower)) {
788
+ return {
789
+ content: [{
790
+ type: "text" as const,
791
+ text: JSON.stringify({ success: true, tappedBy: matchedBy, tapped: { x: tapX, y: tapY }, waitedFor: waitForText, elapsedMs: Date.now() - startTime }),
792
+ }],
793
+ };
794
+ }
795
+ } catch {
796
+ // retry
797
+ }
798
+ }
799
+
800
+ return {
801
+ content: [{
802
+ type: "text" as const,
803
+ text: JSON.stringify({ success: false, tappedBy: matchedBy, tapped: { x: tapX, y: tapY }, waitedFor: waitForText, timedOut: true, elapsedMs: Date.now() - startTime }),
804
+ }],
805
+ };
806
+ } catch (err) {
807
+ return {
808
+ content: [{ type: "text" as const, text: `tap_and_wait failed: ${err instanceof Error ? err.message : String(err)}` }],
809
+ isError: true,
810
+ };
811
+ }
812
+ },
813
+ );
814
+
815
+ // ── assert_screen ──────────────────────────────────────────────
816
+ server.tool(
817
+ `${prefix}assert_screen`,
818
+ "Quickly verify the iOS screen contains (or doesn't contain) expected text. Returns pass/fail without the full UI tree, saving tokens.",
819
+ {
820
+ contains: z.array(z.string()).optional().describe("Text strings that MUST be on screen"),
821
+ notContains: z.array(z.string()).optional().describe("Text strings that must NOT be on screen"),
822
+ },
823
+ async ({ contains, notContains }) => {
824
+ try {
825
+ const raw = await simctl.describeAll();
826
+ const elements = parseIdbAccessibilityOutput(raw);
827
+ const allText = elements
828
+ .map(e => `${e.label ?? ""} ${e.value ?? ""}`.toLowerCase());
829
+
830
+ const missing: string[] = [];
831
+ const unexpected: string[] = [];
832
+
833
+ for (const expected of contains ?? []) {
834
+ if (!allText.some(t => t.includes(expected.toLowerCase()))) {
835
+ missing.push(expected);
836
+ }
837
+ }
838
+ for (const banned of notContains ?? []) {
839
+ if (allText.some(t => t.includes(banned.toLowerCase()))) {
840
+ unexpected.push(banned);
841
+ }
842
+ }
843
+
844
+ const pass = missing.length === 0 && unexpected.length === 0;
845
+ return {
846
+ content: [{
847
+ type: "text" as const,
848
+ text: JSON.stringify({
849
+ pass,
850
+ ...(missing.length > 0 ? { missing } : {}),
851
+ ...(unexpected.length > 0 ? { unexpected } : {}),
852
+ }),
853
+ }],
854
+ };
855
+ } catch (err) {
856
+ return {
857
+ content: [{ type: "text" as const, text: `assert_screen failed: ${err instanceof Error ? err.message : String(err)}` }],
858
+ isError: true,
859
+ };
860
+ }
861
+ },
862
+ );
863
+
864
+ // ── get_crash_info ─────────────────────────────────────────────
865
+ server.tool(
866
+ `${prefix}get_crash_info`,
867
+ "Get recent crash and exception logs from the iOS Simulator. Searches the last N minutes for crash/exception/fatal/SIGABRT messages.",
868
+ {
869
+ minutes: z.number().optional().describe("How many minutes back to search (default: 5)"),
870
+ maxLines: z.number().optional().describe("Maximum lines to return (default: 50)"),
871
+ },
872
+ async ({ minutes, maxLines }) => {
873
+ try {
874
+ const output = await simctl.getCrashLogs(minutes ?? 5);
875
+ const lines = output.split("\n").filter(l => l.trim().length > 0);
876
+ const limit = maxLines ?? 50;
877
+ const trimmed = lines.slice(-limit);
878
+
879
+ return {
880
+ content: [{
881
+ type: "text" as const,
882
+ text: JSON.stringify({ totalLines: lines.length, returnedLines: trimmed.length, logs: trimmed.join("\n") }),
883
+ }],
884
+ };
885
+ } catch (err) {
886
+ return {
887
+ content: [{ type: "text" as const, text: `get_crash_info failed: ${err instanceof Error ? err.message : String(err)}` }],
888
+ isError: true,
889
+ };
890
+ }
891
+ },
892
+ );
893
+
894
+ // ── hot_reload ─────────────────────────────────────────────────
895
+ server.tool(
896
+ `${prefix}hot_reload`,
897
+ "Trigger a React Native reload via the Metro bundler on iOS. 'hot' performs a fast refresh, 'full' performs a complete bundle reload.",
898
+ {
899
+ type: z.enum(["hot", "full"]).optional().describe("Reload type: 'hot' for fast refresh (default), 'full' for complete reload"),
900
+ },
901
+ async ({ type }) => {
902
+ const reloadType = type ?? "hot";
903
+ try {
904
+ const endpoint = reloadType === "full"
905
+ ? "http://localhost:8081/reload"
906
+ : "http://localhost:8081/message";
907
+
908
+ if (reloadType === "full") {
909
+ await fetch(endpoint);
910
+ } else {
911
+ await fetch(endpoint, {
912
+ method: "POST",
913
+ headers: { "Content-Type": "application/json" },
914
+ body: JSON.stringify({ method: "reload" }),
915
+ });
916
+ }
917
+
918
+ return {
919
+ content: [{
920
+ type: "text" as const,
921
+ text: JSON.stringify({ success: true, type: reloadType, message: `${reloadType === "full" ? "Full" : "Hot"} reload triggered.` }),
922
+ }],
923
+ };
924
+ } catch (err) {
925
+ return {
926
+ content: [{
927
+ type: "text" as const,
928
+ text: JSON.stringify({
929
+ success: false,
930
+ error: err instanceof Error ? err.message : String(err),
931
+ hints: ["Ensure Metro bundler is running (npx react-native start)", "Ensure the app is in dev mode"],
932
+ }),
933
+ }],
934
+ isError: true,
935
+ };
936
+ }
937
+ },
938
+ );
939
+
940
+ // ── start_recording ───────────────────────────────────────────
941
+ // eslint-disable-next-line @typescript-eslint/consistent-type-imports
942
+ let iosRecordingProcess: ReturnType<typeof import("node:child_process").spawn> | null = null;
943
+ let iosRecordingPath: string | null = null;
944
+
945
+ server.tool(
946
+ `${prefix}start_recording`,
947
+ "Start recording the iOS Simulator screen. Recording runs in the background. Use stop_recording to finish and retrieve the video.",
948
+ {
949
+ maxDurationSec: z.number().optional().describe("Maximum recording duration in seconds (default: 60)"),
950
+ },
951
+ async ({ maxDurationSec }) => {
952
+ if (iosRecordingProcess) {
953
+ return {
954
+ content: [{ type: "text" as const, text: JSON.stringify({ error: "Recording already in progress. Call ios_stop_recording first." }) }],
955
+ isError: true,
956
+ };
957
+ }
958
+
959
+ try {
960
+ const { spawn } = await import("node:child_process");
961
+ const duration = maxDurationSec ?? 60;
962
+ iosRecordingPath = `/tmp/sensai-ios-recording-${Date.now()}.mp4`;
963
+
964
+ // simctl recordVideo runs until SIGINT
965
+ const proc = spawn("xcrun", [
966
+ "simctl", "io", "booted", "recordVideo",
967
+ "--force", iosRecordingPath,
968
+ ], { stdio: "ignore" });
969
+
970
+ iosRecordingProcess = proc;
971
+
972
+ // Auto-stop after max duration
973
+ setTimeout(() => {
974
+ if (iosRecordingProcess === proc) {
975
+ proc.kill("SIGINT");
976
+ iosRecordingProcess = null;
977
+ }
978
+ }, duration * 1000);
979
+
980
+ proc.on("exit", () => {
981
+ if (iosRecordingProcess === proc) {
982
+ iosRecordingProcess = null;
983
+ }
984
+ });
985
+
986
+ return {
987
+ content: [{
988
+ type: "text" as const,
989
+ text: JSON.stringify({ ok: true, maxDurationSec: duration, path: iosRecordingPath }),
990
+ }],
991
+ };
992
+ } catch (err) {
993
+ iosRecordingProcess = null;
994
+ return {
995
+ content: [{ type: "text" as const, text: `start_recording failed: ${err instanceof Error ? err.message : String(err)}` }],
996
+ isError: true,
997
+ };
998
+ }
999
+ },
1000
+ );
1001
+
1002
+ // ── stop_recording ────────────────────────────────────────────
1003
+ server.tool(
1004
+ `${prefix}stop_recording`,
1005
+ "Stop the active iOS screen recording and retrieve the MP4 video file.",
1006
+ {
1007
+ savePath: z.string().optional().describe("Local path to save the MP4 (optional). If not provided, returns base64."),
1008
+ },
1009
+ async ({ savePath }) => {
1010
+ if (!iosRecordingProcess) {
1011
+ return {
1012
+ content: [{ type: "text" as const, text: JSON.stringify({ error: "No active recording. Call ios_start_recording first." }) }],
1013
+ isError: true,
1014
+ };
1015
+ }
1016
+
1017
+ try {
1018
+ // Send SIGINT to finalize the recording
1019
+ iosRecordingProcess.kill("SIGINT");
1020
+ iosRecordingProcess = null;
1021
+
1022
+ // Wait for file to be finalized
1023
+ await sleep(2000);
1024
+
1025
+ const recordPath = iosRecordingPath!;
1026
+ const { readFile: rf, unlink: ul, copyFile: cp } = await import("node:fs/promises");
1027
+
1028
+ if (savePath) {
1029
+ await cp(recordPath, savePath);
1030
+ await ul(recordPath).catch(() => {});
1031
+ return {
1032
+ content: [{ type: "text" as const, text: JSON.stringify({ ok: true, savedTo: savePath }) }],
1033
+ };
1034
+ }
1035
+
1036
+ const data = await rf(recordPath);
1037
+ await ul(recordPath).catch(() => {});
1038
+ iosRecordingPath = null;
1039
+
1040
+ return {
1041
+ content: [{
1042
+ type: "text" as const,
1043
+ text: JSON.stringify({ ok: true, format: "mp4", sizeBytes: data.length, base64: data.toString("base64") }),
1044
+ }],
1045
+ };
1046
+ } catch (err) {
1047
+ iosRecordingProcess = null;
1048
+ return {
1049
+ content: [{ type: "text" as const, text: `stop_recording failed: ${err instanceof Error ? err.message : String(err)}` }],
1050
+ isError: true,
1051
+ };
1052
+ }
1053
+ },
1054
+ );
1055
+ }