mobai-mcp 1.4.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,61 +1,66 @@
1
1
  #!/usr/bin/env node
2
2
  /**
3
- * MobAI MCP Server
3
+ * MobAI MCP Server (stdio)
4
4
  *
5
- * Provides tools for AI-powered mobile device automation through the MobAI HTTP API.
6
- * Works with both Android and iOS devices, emulators, and simulators.
5
+ * Mirrors the Go HTTP-based MCP server as a stdio transport.
6
+ * Proxies tool calls to the MobAI HTTP API at 127.0.0.1:8686.
7
7
  */
8
8
  import { Server } from "@modelcontextprotocol/sdk/server/index.js";
9
9
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
10
- import { CallToolRequestSchema, ListToolsRequestSchema, ListResourcesRequestSchema, ReadResourceRequestSchema, } from "@modelcontextprotocol/sdk/types.js";
10
+ import { CallToolRequestSchema, ListToolsRequestSchema, } from "@modelcontextprotocol/sdk/types.js";
11
11
  import * as fs from "fs";
12
+ import * as os from "os";
12
13
  import * as path from "path";
13
14
  const API_BASE_URL = "http://127.0.0.1:8686/api/v1";
14
- const DEFAULT_TIMEOUT_MS = 600000; // 10 minutes
15
- const SCREENSHOT_DIR = "/tmp/mobai/screenshots";
16
- // Ensure screenshot directory exists
15
+ const DEFAULT_TIMEOUT_MS = 300000; // 5 minutes (matches Go httpClient timeout)
16
+ const SCREENSHOT_DIR = path.join(os.tmpdir(), "mobai", "screenshots");
17
+ // ---------------------------------------------------------------------------
18
+ // Screenshot helpers
19
+ // ---------------------------------------------------------------------------
17
20
  function ensureScreenshotDir() {
18
21
  if (!fs.existsSync(SCREENSHOT_DIR)) {
19
22
  fs.mkdirSync(SCREENSHOT_DIR, { recursive: true });
20
23
  }
21
24
  }
22
- // Save base64 screenshot to file, return path
23
- function saveBase64Screenshot(base64Data, prefix = "mcp") {
24
- if (!base64Data || base64Data.length <= 200 || base64Data.startsWith("/")) {
25
+ function saveBase64ToTemp(base64Data, prefix) {
26
+ if (!base64Data || base64Data.length <= 200)
25
27
  return null;
26
- }
27
28
  ensureScreenshotDir();
28
- const filename = `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}.png`;
29
+ const filename = `${prefix}_${Date.now()}.png`;
29
30
  const filePath = path.join(SCREENSHOT_DIR, filename);
30
31
  fs.writeFileSync(filePath, Buffer.from(base64Data, "base64"));
31
32
  return filePath;
32
33
  }
33
- // Process screenshot response: save base64 to file, return path
34
- function processScreenshotResponse(body) {
35
- if (body?.data && body?.format === "png" && !body?.path) {
34
+ function screenshotToFile(body) {
35
+ if (body?.path) {
36
+ return `Screenshot saved to ${body.path}`;
37
+ }
38
+ // Fallback: base64 mode
39
+ if (body?.data) {
40
+ const imgData = Buffer.from(body.data, "base64");
36
41
  ensureScreenshotDir();
37
- const filename = `screenshot-${Date.now()}.png`;
42
+ const ext = body.format || "png";
43
+ const filename = `screenshot_${Date.now()}.${ext}`;
38
44
  const filePath = path.join(SCREENSHOT_DIR, filename);
39
- fs.writeFileSync(filePath, Buffer.from(body.data, "base64"));
40
- return { path: filePath, format: "png", screenshot_saved: true };
45
+ fs.writeFileSync(filePath, imgData);
46
+ return `Screenshot saved to ${filePath}`;
41
47
  }
42
- return body;
48
+ return JSON.stringify(body, null, 2);
43
49
  }
44
- // Process DSL response: find and save embedded screenshots
45
- function processDslResponse(body) {
50
+ function extractDSLScreenshots(body) {
46
51
  if (!body?.step_results)
47
52
  return body;
48
53
  for (const step of body.step_results) {
49
54
  const native = step.result?.observations?.native;
50
- if (native?.screenshot && !native.screenshot_saved) {
51
- const filePath = saveBase64Screenshot(native.screenshot, "observe");
55
+ if (native?.screenshot && typeof native.screenshot === "string" && native.screenshot.length > 200) {
56
+ const filePath = saveBase64ToTemp(native.screenshot, "observe");
52
57
  if (filePath) {
53
58
  native.screenshot = filePath;
54
59
  native.screenshot_saved = true;
55
60
  }
56
61
  }
57
- if (step.debug?.screenshot && !step.debug.screenshot_saved) {
58
- const filePath = saveBase64Screenshot(step.debug.screenshot, "debug");
62
+ if (step.debug?.screenshot && typeof step.debug.screenshot === "string" && step.debug.screenshot.length > 200) {
63
+ const filePath = saveBase64ToTemp(step.debug.screenshot, "debug");
59
64
  if (filePath) {
60
65
  step.debug.screenshot = filePath;
61
66
  step.debug.screenshot_saved = true;
@@ -64,1582 +69,469 @@ function processDslResponse(body) {
64
69
  }
65
70
  return body;
66
71
  }
67
- // Process response body
68
- function processResponseBody(body, url) {
69
- if (url.includes("/screenshot")) {
70
- return processScreenshotResponse(body);
71
- }
72
- if (url.includes("/dsl/execute")) {
73
- return processDslResponse(body);
74
- }
75
- return body;
76
- }
77
- // Make HTTP request to MobAI API
78
- async function makeRequest(method, endpoint, body, timeoutMs = DEFAULT_TIMEOUT_MS) {
79
- const url = endpoint.startsWith("http") ? endpoint : `${API_BASE_URL}${endpoint}`;
72
+ // ---------------------------------------------------------------------------
73
+ // HTTP helpers
74
+ // ---------------------------------------------------------------------------
75
+ async function doRequest(method, urlPath, payload, timeoutMs = DEFAULT_TIMEOUT_MS) {
76
+ const url = urlPath.startsWith("http") ? urlPath : `${API_BASE_URL}${urlPath}`;
80
77
  const controller = new AbortController();
81
78
  const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
82
79
  try {
83
- const fetchOptions = {
80
+ const opts = {
84
81
  method,
85
82
  headers: { "Content-Type": "application/json" },
86
83
  signal: controller.signal,
87
84
  };
88
- if (body && ["POST", "PUT", "PATCH"].includes(method)) {
89
- fetchOptions.body = typeof body === "string" ? body : JSON.stringify(body);
85
+ if (payload !== undefined && ["POST", "PUT", "PATCH"].includes(method)) {
86
+ opts.body = typeof payload === "string" ? payload : JSON.stringify(payload);
90
87
  }
91
- const response = await fetch(url, fetchOptions);
88
+ const response = await fetch(url, opts);
92
89
  clearTimeout(timeoutId);
93
- const responseText = await response.text();
94
- let responseBody;
90
+ const text = await response.text();
91
+ let body;
95
92
  try {
96
- responseBody = JSON.parse(responseText);
97
- responseBody = processResponseBody(responseBody, url);
93
+ body = JSON.parse(text);
98
94
  }
99
95
  catch {
100
- responseBody = responseText;
96
+ body = text;
101
97
  }
102
- return {
103
- status: response.status,
104
- statusText: response.statusText,
105
- body: responseBody,
106
- };
98
+ if (response.status >= 400) {
99
+ throw new Error(`HTTP ${response.status}: ${typeof body === "string" ? body : JSON.stringify(body)}`);
100
+ }
101
+ return body;
107
102
  }
108
103
  finally {
109
104
  clearTimeout(timeoutId);
110
105
  }
111
106
  }
112
- // Create the MCP server
113
- const server = new Server({
114
- name: "mobai",
115
- version: "1.0.0",
116
- }, {
117
- capabilities: {
118
- tools: {},
119
- resources: {},
120
- },
107
+ const doGet = (p) => doRequest("GET", p);
108
+ const doPost = (p, body) => doRequest("POST", p, body);
109
+ const doDelete = (p) => doRequest("DELETE", p);
110
+ const doPut = (p, body) => doRequest("PUT", p, body);
111
+ const doPatch = (p, body) => doRequest("PATCH", p, body);
112
+ function textResult(data) {
113
+ return {
114
+ content: [{ type: "text", text: typeof data === "string" ? data : JSON.stringify(data, null, 2) }],
115
+ };
116
+ }
117
+ function errResult(err) {
118
+ const message = err instanceof Error ? err.message : String(err);
119
+ return {
120
+ content: [{ type: "text", text: message }],
121
+ isError: true,
122
+ };
123
+ }
124
+ // ---------------------------------------------------------------------------
125
+ // Server
126
+ // ---------------------------------------------------------------------------
127
+ const server = new Server({ name: "mobai", version: "1.0.0" }, {
128
+ capabilities: { tools: {}, resources: {} },
129
+ instructions: `MobAI controls Android and iOS devices. Before starting any device task, read the relevant MCP resources:
130
+ - mobai://reference/device-automation — how to control devices
131
+ - mobai://reference/testing — testing workflow, rules, and .mob script syntax
132
+ Check available skills in current work directory and load any relevant to the user's request.`,
121
133
  });
122
- // Tool definitions
134
+ // ---------------------------------------------------------------------------
135
+ // Tool definitions — exactly matches Go registerTools()
136
+ // ---------------------------------------------------------------------------
123
137
  const TOOLS = [
138
+ // Device management
124
139
  {
125
140
  name: "list_devices",
126
- description: "List all connected Android and iOS devices, emulators, and simulators",
127
- inputSchema: {
128
- type: "object",
129
- properties: {},
130
- required: [],
131
- },
141
+ description: "List all connected Android and iOS devices",
142
+ inputSchema: { type: "object", properties: {}, required: [] },
132
143
  },
133
144
  {
134
145
  name: "get_device",
135
- description: "Get information about a specific device",
146
+ description: "Get details about a specific device",
136
147
  inputSchema: {
137
148
  type: "object",
138
- properties: {
139
- device_id: {
140
- type: "string",
141
- description: "Device ID (serial for Android, UDID for iOS)",
142
- },
143
- },
149
+ properties: { device_id: { type: "string", description: "Device ID" } },
144
150
  required: ["device_id"],
145
151
  },
146
152
  },
147
153
  {
148
154
  name: "start_bridge",
149
- description: "Start the on-device bridge (accessibility service on Android, WebDriverAgent on iOS). Required before automation.",
155
+ description: "Start the automation bridge on a device. Required before interacting with the device.",
150
156
  inputSchema: {
151
157
  type: "object",
152
- properties: {
153
- device_id: {
154
- type: "string",
155
- description: "Device ID",
156
- },
157
- },
158
+ properties: { device_id: { type: "string", description: "Device ID" } },
158
159
  required: ["device_id"],
159
160
  },
160
161
  },
161
162
  {
162
163
  name: "stop_bridge",
163
- description: "Stop the on-device bridge",
164
+ description: "Stop the automation bridge on a device",
164
165
  inputSchema: {
165
166
  type: "object",
166
- properties: {
167
- device_id: {
168
- type: "string",
169
- description: "Device ID",
170
- },
171
- },
167
+ properties: { device_id: { type: "string", description: "Device ID" } },
172
168
  required: ["device_id"],
173
169
  },
174
170
  },
171
+ // Screenshot
175
172
  {
176
173
  name: "get_screenshot",
177
- description: "Capture a screenshot from the device. Returns the file path to the saved PNG.",
174
+ description: "Capture a fast, low-quality screenshot for LLM visual analysis. Returns the file path to the saved image. Use this for AI/LLM processing only — for full-quality screenshots use save_screenshot instead.",
178
175
  inputSchema: {
179
176
  type: "object",
180
- properties: {
181
- device_id: {
182
- type: "string",
183
- description: "Device ID",
184
- },
185
- },
177
+ properties: { device_id: { type: "string", description: "Device ID" } },
186
178
  required: ["device_id"],
187
179
  },
188
180
  },
189
181
  {
190
- name: "get_ui_tree",
191
- description: "Get the UI accessibility tree showing all visible elements with indices for tapping",
182
+ name: "save_screenshot",
183
+ description: "Save a full-quality PNG screenshot to disk. Use this when you need a high-quality image for reporting, debugging, or sharing — not for LLM processing (use get_screenshot instead).",
192
184
  inputSchema: {
193
185
  type: "object",
194
186
  properties: {
195
- device_id: {
196
- type: "string",
197
- description: "Device ID",
198
- },
199
- verbose: {
200
- type: "boolean",
201
- description: "Include detailed elements array with bounds (default: false)",
202
- },
203
- only_visible: {
204
- type: "boolean",
205
- description: "Filter to only visible elements (default: true)",
206
- },
207
- include_keyboard: {
208
- type: "boolean",
209
- description: "Include keyboard elements in the tree (default: false). Useful for interacting with on-screen keyboards.",
210
- },
211
- text_regex: {
212
- type: "string",
213
- description: "Regex to filter elements by text/value/contentDesc. Only matching elements are returned.",
214
- },
215
- bounds: {
216
- type: "object",
217
- description: "Filter to elements within a bounding rectangle",
218
- properties: {
219
- x: { type: "number", description: "Left X coordinate" },
220
- y: { type: "number", description: "Top Y coordinate" },
221
- w: { type: "number", description: "Width" },
222
- h: { type: "number", description: "Height" },
223
- },
224
- required: ["x", "y", "w", "h"],
225
- },
187
+ device_id: { type: "string", description: "Device ID" },
188
+ path: { type: "string", description: "Directory to save screenshot to (supports ~/). Defaults to OS temp directory." },
189
+ name: { type: "string", description: "Optional filename (without .png extension)" },
226
190
  },
227
191
  required: ["device_id"],
228
192
  },
229
193
  },
194
+ // App management
230
195
  {
231
- name: "tap",
232
- description: "Tap an element by index (from UI tree) or coordinates",
233
- inputSchema: {
234
- type: "object",
235
- properties: {
236
- device_id: {
237
- type: "string",
238
- description: "Device ID",
239
- },
240
- index: {
241
- type: "number",
242
- description: "Element index from UI tree (preferred)",
243
- },
244
- x: {
245
- type: "number",
246
- description: "X coordinate (use with y instead of index)",
247
- },
248
- y: {
249
- type: "number",
250
- description: "Y coordinate (use with x instead of index)",
251
- },
252
- },
253
- required: ["device_id"],
254
- },
255
- },
256
- {
257
- name: "double_tap",
258
- description: "Double tap an element by index (from UI tree) or coordinates",
259
- inputSchema: {
260
- type: "object",
261
- properties: {
262
- device_id: {
263
- type: "string",
264
- description: "Device ID",
265
- },
266
- index: {
267
- type: "number",
268
- description: "Element index from UI tree (preferred)",
269
- },
270
- x: {
271
- type: "number",
272
- description: "X coordinate (use with y instead of index)",
273
- },
274
- y: {
275
- type: "number",
276
- description: "Y coordinate (use with x instead of index)",
277
- },
278
- },
279
- required: ["device_id"],
280
- },
281
- },
282
- {
283
- name: "long_press",
284
- description: "Long press an element by index (from UI tree) or coordinates. Uses a fixed 0.5s hold duration.",
285
- inputSchema: {
286
- type: "object",
287
- properties: {
288
- device_id: {
289
- type: "string",
290
- description: "Device ID",
291
- },
292
- index: {
293
- type: "number",
294
- description: "Element index from UI tree (preferred)",
295
- },
296
- x: {
297
- type: "number",
298
- description: "X coordinate (use with y instead of index)",
299
- },
300
- y: {
301
- type: "number",
302
- description: "Y coordinate (use with x instead of index)",
303
- },
304
- },
305
- required: ["device_id"],
306
- },
307
- },
308
- {
309
- name: "two_finger_tap",
310
- description: "Perform a two-finger tap at coordinates (iOS only)",
196
+ name: "list_apps",
197
+ description: "List installed apps on the device",
311
198
  inputSchema: {
312
199
  type: "object",
313
- properties: {
314
- device_id: {
315
- type: "string",
316
- description: "Device ID",
317
- },
318
- index: {
319
- type: "number",
320
- description: "Element index from UI tree (preferred)",
321
- },
322
- x: {
323
- type: "number",
324
- description: "X coordinate (use with y instead of index)",
325
- },
326
- y: {
327
- type: "number",
328
- description: "Y coordinate (use with x instead of index)",
329
- },
330
- },
200
+ properties: { device_id: { type: "string", description: "Device ID" } },
331
201
  required: ["device_id"],
332
202
  },
333
203
  },
334
204
  {
335
- name: "drag",
336
- description: "Drag from one point to another (press, hold, move, release)",
205
+ name: "install_app",
206
+ description: "Install an app on the device from a local file path (.apk for Android, .ipa for iOS)",
337
207
  inputSchema: {
338
208
  type: "object",
339
209
  properties: {
340
- device_id: {
341
- type: "string",
342
- description: "Device ID",
343
- },
344
- from_x: {
345
- type: "number",
346
- description: "Starting X coordinate",
347
- },
348
- from_y: {
349
- type: "number",
350
- description: "Starting Y coordinate",
351
- },
352
- to_x: {
353
- type: "number",
354
- description: "Ending X coordinate",
355
- },
356
- to_y: {
357
- type: "number",
358
- description: "Ending Y coordinate",
359
- },
360
- duration_ms: {
361
- type: "number",
362
- description: "Drag duration in milliseconds (default: 500)",
363
- },
364
- press_duration_ms: {
365
- type: "number",
366
- description: "Hold duration before dragging in milliseconds (0 = no hold). Use for press-and-drag gestures like moving app icons.",
367
- },
210
+ device_id: { type: "string", description: "Device ID" },
211
+ path: { type: "string", description: "Local file path to the app (.apk or .ipa)" },
368
212
  },
369
- required: ["device_id", "from_x", "from_y", "to_x", "to_y"],
213
+ required: ["device_id", "path"],
370
214
  },
371
215
  },
372
216
  {
373
- name: "dismiss_keyboard",
374
- description: "Dismiss the on-screen keyboard if visible",
375
- inputSchema: {
376
- type: "object",
377
- properties: {
378
- device_id: {
379
- type: "string",
380
- description: "Device ID",
381
- },
382
- },
383
- required: ["device_id"],
384
- },
385
- },
386
- {
387
- name: "type_text",
388
- description: "Type text on the device (tap input field first to focus)",
389
- inputSchema: {
390
- type: "object",
391
- properties: {
392
- device_id: {
393
- type: "string",
394
- description: "Device ID",
395
- },
396
- text: {
397
- type: "string",
398
- description: "Text to type",
399
- },
400
- },
401
- required: ["device_id", "text"],
402
- },
403
- },
404
- {
405
- name: "swipe",
406
- description: "Perform a swipe gesture",
407
- inputSchema: {
408
- type: "object",
409
- properties: {
410
- device_id: {
411
- type: "string",
412
- description: "Device ID",
413
- },
414
- from_x: {
415
- type: "number",
416
- description: "Starting X coordinate",
417
- },
418
- from_y: {
419
- type: "number",
420
- description: "Starting Y coordinate",
421
- },
422
- to_x: {
423
- type: "number",
424
- description: "Ending X coordinate",
425
- },
426
- to_y: {
427
- type: "number",
428
- description: "Ending Y coordinate",
429
- },
430
- duration_ms: {
431
- type: "number",
432
- description: "Duration in milliseconds (default: 300)",
433
- },
434
- },
435
- required: ["device_id", "from_x", "from_y", "to_x", "to_y"],
436
- },
437
- },
438
- {
439
- name: "go_home",
440
- description: "Navigate to device home screen",
441
- inputSchema: {
442
- type: "object",
443
- properties: {
444
- device_id: {
445
- type: "string",
446
- description: "Device ID",
447
- },
448
- },
449
- required: ["device_id"],
450
- },
451
- },
452
- {
453
- name: "launch_app",
454
- description: "Launch an application by bundle ID",
217
+ name: "uninstall_app",
218
+ description: "Uninstall an app from the device",
455
219
  inputSchema: {
456
220
  type: "object",
457
221
  properties: {
458
- device_id: {
459
- type: "string",
460
- description: "Device ID",
461
- },
462
- bundle_id: {
463
- type: "string",
464
- description: "App bundle ID (e.g., com.apple.Preferences, com.android.settings)",
465
- },
222
+ device_id: { type: "string", description: "Device ID" },
223
+ bundle_id: { type: "string", description: "App bundle ID (iOS) or package name (Android)" },
466
224
  },
467
225
  required: ["device_id", "bundle_id"],
468
226
  },
469
227
  },
470
- {
471
- name: "list_apps",
472
- description: "List installed applications on the device",
473
- inputSchema: {
474
- type: "object",
475
- properties: {
476
- device_id: {
477
- type: "string",
478
- description: "Device ID",
479
- },
480
- },
481
- required: ["device_id"],
482
- },
483
- },
484
- {
485
- name: "get_ocr",
486
- description: "Perform OCR text recognition on the current screen (iOS only). Returns detected text with screen coordinates for tapping (already adjusted for tapping).",
487
- inputSchema: {
488
- type: "object",
489
- properties: {
490
- device_id: {
491
- type: "string",
492
- description: "Device ID",
493
- },
494
- },
495
- required: ["device_id"],
496
- },
497
- },
228
+ // DSL execution
498
229
  {
499
230
  name: "execute_dsl",
500
- description: `Execute a batch of automation steps using the DSL (Domain Specific Language).
501
- This is the PREFERRED method for complex automation as it's more reliable than sequential API calls.
231
+ description: `Execute a batch of DSL commands on a device. This is the primary tool for all device interaction — tap, type, swipe, observe, launch apps, assertions, web automation, and more.
502
232
 
503
- DSL supports: observe, tap, type, toggle, swipe, scroll, open_app, kill_app, navigate, wait_for, screenshot, set_location, reset_location, assert_*, if_exists, delay, execute_js (web)
233
+ Read the MCP resource mobai://reference/device-automation to learn how to control devices before using this tool.
504
234
 
505
- Example DSL script:
506
- {
507
- "version": "0.2",
508
- "steps": [
509
- {"action": "observe", "context": "native", "include": ["ui_tree"]},
510
- {"action": "tap", "predicate": {"text_contains": "Settings"}},
511
- {"action": "delay", "duration_ms": 500},
512
- {"action": "observe", "context": "native", "include": ["ui_tree"]}
513
- ],
514
- "on_fail": {"strategy": "retry", "max_retries": 2}
515
- }`,
235
+ Input: JSON string with "version": "0.2" and "steps" array. Example:
236
+ {"version":"0.2","steps":[
237
+ {"action":"open_app","bundle_id":"com.apple.Preferences"},
238
+ {"action":"tap","predicate":{"text_contains":"Wi-Fi"}},
239
+ {"action":"wait_for","predicate":{"type":"switch"},"timeout_ms":3000}
240
+ ]}`,
516
241
  inputSchema: {
517
242
  type: "object",
518
243
  properties: {
519
- device_id: {
520
- type: "string",
521
- description: "Device ID",
522
- },
523
- script: {
524
- type: "object",
525
- description: "DSL script object with version, steps, and optional on_fail",
526
- properties: {
527
- version: {
528
- type: "string",
529
- description: "DSL version (use '0.2')",
530
- },
531
- steps: {
532
- type: "array",
533
- description: "Array of action steps",
534
- items: { type: "object" },
535
- },
536
- on_fail: {
537
- type: "object",
538
- description: "Failure handling strategy",
539
- },
540
- },
541
- required: ["version", "steps"],
542
- },
244
+ device_id: { type: "string", description: "Device ID" },
245
+ commands: { type: "string", description: "DSL script as JSON string with version and steps" },
543
246
  },
544
- required: ["device_id", "script"],
247
+ required: ["device_id", "commands"],
545
248
  },
546
249
  },
250
+ // Test management
547
251
  {
548
- name: "run_agent",
549
- description: "Run an AI agent to perform a task autonomously. The agent will observe the screen, make decisions, and execute actions to complete the task.",
550
- inputSchema: {
551
- type: "object",
552
- properties: {
553
- device_id: {
554
- type: "string",
555
- description: "Device ID",
556
- },
557
- task: {
558
- type: "string",
559
- description: "Task description (e.g., 'Open Settings and enable WiFi')",
560
- },
561
- agent_type: {
562
- type: "string",
563
- enum: ["toolagent", "hierarchical", "classic"],
564
- description: "Agent type (default: toolagent)",
565
- },
566
- use_vision: {
567
- type: "boolean",
568
- description: "Enable vision/screenshots (default: from app settings)",
569
- },
570
- },
571
- required: ["device_id", "task"],
572
- },
252
+ name: "test_get_active",
253
+ description: "Get the currently active test project and its cases. Use this to discover which test cases are available.",
254
+ inputSchema: { type: "object", properties: {}, required: [] },
573
255
  },
574
256
  {
575
- name: "web_list_pages",
576
- description: "List available web pages (browser tabs and WebViews) for web automation",
257
+ name: "test_list_projects",
258
+ description: "List all test projects with their test cases included inline",
259
+ inputSchema: { type: "object", properties: {}, required: [] },
260
+ },
261
+ {
262
+ name: "test_create_project",
263
+ description: "Create a new test project",
577
264
  inputSchema: {
578
265
  type: "object",
579
- properties: {
580
- device_id: {
581
- type: "string",
582
- description: "Device ID",
583
- },
584
- },
585
- required: ["device_id"],
266
+ properties: { name: { type: "string", description: "Project name" } },
267
+ required: ["name"],
586
268
  },
587
269
  },
588
270
  {
589
- name: "web_navigate",
590
- description: "Navigate to a URL in the browser",
271
+ name: "test_rename_project",
272
+ description: "Rename an existing test project",
591
273
  inputSchema: {
592
274
  type: "object",
593
275
  properties: {
594
- device_id: {
595
- type: "string",
596
- description: "Device ID",
597
- },
598
- url: {
599
- type: "string",
600
- description: "URL to navigate to",
601
- },
276
+ project_id: { type: "string", description: "Project ID" },
277
+ name: { type: "string", description: "New project name" },
602
278
  },
603
- required: ["device_id", "url"],
279
+ required: ["project_id", "name"],
604
280
  },
605
281
  },
606
282
  {
607
- name: "web_get_dom",
608
- description: "Get the DOM tree of the current web page",
283
+ name: "test_create_case",
284
+ description: "Create a new test case in a project",
609
285
  inputSchema: {
610
286
  type: "object",
611
287
  properties: {
612
- device_id: {
613
- type: "string",
614
- description: "Device ID",
615
- },
288
+ project_id: { type: "string", description: "Project ID" },
289
+ name: { type: "string", description: "Test case name" },
290
+ folder: { type: "string", description: "Optional folder path within the project" },
616
291
  },
617
- required: ["device_id"],
292
+ required: ["project_id", "name"],
618
293
  },
619
294
  },
620
295
  {
621
- name: "web_click",
622
- description: "Click an element in the web page using CSS selector",
296
+ name: "test_rename_case",
297
+ description: "Rename an existing test case",
623
298
  inputSchema: {
624
299
  type: "object",
625
300
  properties: {
626
- device_id: {
627
- type: "string",
628
- description: "Device ID",
629
- },
630
- selector: {
631
- type: "string",
632
- description: "CSS selector (e.g., 'button.submit', '#login-btn')",
633
- },
301
+ project_id: { type: "string", description: "Project ID" },
302
+ case_id: { type: "string", description: "Test case ID" },
303
+ name: { type: "string", description: "New test case name" },
634
304
  },
635
- required: ["device_id", "selector"],
305
+ required: ["project_id", "case_id", "name"],
636
306
  },
637
307
  },
638
308
  {
639
- name: "web_type",
640
- description: "Type text into a web element using CSS selector",
309
+ name: "test_delete_case",
310
+ description: "Delete a test case from a project",
641
311
  inputSchema: {
642
312
  type: "object",
643
313
  properties: {
644
- device_id: {
645
- type: "string",
646
- description: "Device ID",
647
- },
648
- selector: {
649
- type: "string",
650
- description: "CSS selector for the input element",
651
- },
652
- text: {
653
- type: "string",
654
- description: "Text to type",
655
- },
314
+ project_id: { type: "string", description: "Project ID" },
315
+ case_id: { type: "string", description: "Test case ID" },
656
316
  },
657
- required: ["device_id", "selector", "text"],
317
+ required: ["project_id", "case_id"],
658
318
  },
659
319
  },
660
320
  {
661
- name: "web_execute_js",
662
- description: "Execute JavaScript in the web page context",
321
+ name: "test_get_script",
322
+ description: "Get the .mob script content for a test case (with 1-based line numbers)",
663
323
  inputSchema: {
664
324
  type: "object",
665
325
  properties: {
666
- device_id: {
667
- type: "string",
668
- description: "Device ID",
669
- },
670
- script: {
671
- type: "string",
672
- description: "JavaScript code to execute (use 'return' for results)",
673
- },
326
+ project_id: { type: "string", description: "Project ID" },
327
+ case_id: { type: "string", description: "Test case ID" },
674
328
  },
675
- required: ["device_id", "script"],
329
+ required: ["project_id", "case_id"],
676
330
  },
677
331
  },
678
332
  {
679
- name: "uninstall_app",
680
- description: "Uninstall an application from the device by bundle ID / package name.",
333
+ name: "test_replace_script",
334
+ description: "Replace the entire .mob script for a test case",
681
335
  inputSchema: {
682
336
  type: "object",
683
337
  properties: {
684
- device_id: {
685
- type: "string",
686
- description: "Device ID",
687
- },
688
- bundle_id: {
689
- type: "string",
690
- description: "App bundle ID (iOS) or package name (Android) to uninstall",
691
- },
338
+ project_id: { type: "string", description: "Project ID" },
339
+ case_id: { type: "string", description: "Test case ID" },
340
+ script: { type: "string", description: "New script content (without line numbers)" },
692
341
  },
693
- required: ["device_id", "bundle_id"],
342
+ required: ["project_id", "case_id", "script"],
694
343
  },
695
344
  },
696
345
  {
697
- name: "kill_app",
698
- description: "Force-kill a running application. On iOS (17+), uses CoreDevice appservice SIGKILL. On Android, uses 'am force-stop'.",
346
+ name: "test_update_line",
347
+ description: "Update a single line in the .mob script",
699
348
  inputSchema: {
700
349
  type: "object",
701
350
  properties: {
702
- device_id: {
703
- type: "string",
704
- description: "Device ID",
705
- },
706
- bundle_id: {
707
- type: "string",
708
- description: "Bundle ID / package name of the app to kill",
709
- },
351
+ project_id: { type: "string", description: "Project ID" },
352
+ case_id: { type: "string", description: "Test case ID" },
353
+ line_number: { type: "number", description: "1-based line number to update" },
354
+ content: { type: "string", description: "New line content" },
710
355
  },
711
- required: ["device_id", "bundle_id"],
356
+ required: ["project_id", "case_id", "line_number", "content"],
712
357
  },
713
358
  },
714
359
  {
715
- name: "set_location",
716
- description: "Set a simulated GPS location on the device. Supports: iOS (all versions), Android emulators (all versions), Android real devices (12+ only).",
360
+ name: "test_insert_after",
361
+ description: "Insert a new line after the specified line number in the .mob script",
717
362
  inputSchema: {
718
363
  type: "object",
719
364
  properties: {
720
- device_id: {
721
- type: "string",
722
- description: "Device ID",
723
- },
724
- lat: {
725
- type: "number",
726
- description: "Latitude (-90 to 90)",
727
- },
728
- lon: {
729
- type: "number",
730
- description: "Longitude (-180 to 180)",
731
- },
365
+ project_id: { type: "string", description: "Project ID" },
366
+ case_id: { type: "string", description: "Test case ID" },
367
+ line_number: { type: "number", description: "1-based line number to insert after (0 = insert at beginning)" },
368
+ content: { type: "string", description: "Line content to insert" },
732
369
  },
733
- required: ["device_id", "lat", "lon"],
370
+ required: ["project_id", "case_id", "line_number", "content"],
734
371
  },
735
372
  },
736
373
  {
737
- name: "reset_location",
738
- description: "Reset the device location to its real GPS position, removing any simulated location. Supports: iOS (all versions), Android emulators (all versions), Android real devices (12+ only).",
374
+ name: "test_delete_line",
375
+ description: "Delete a line from the .mob script",
739
376
  inputSchema: {
740
377
  type: "object",
741
378
  properties: {
742
- device_id: {
743
- type: "string",
744
- description: "Device ID",
745
- },
379
+ project_id: { type: "string", description: "Project ID" },
380
+ case_id: { type: "string", description: "Test case ID" },
381
+ line_number: { type: "number", description: "1-based line number to delete" },
746
382
  },
747
- required: ["device_id"],
383
+ required: ["project_id", "case_id", "line_number"],
748
384
  },
749
385
  },
750
386
  {
751
- name: "http_request",
752
- description: `Make a raw HTTP request to the MobAI API. Use this for advanced operations not covered by other tools.
753
-
754
- Base URL: http://127.0.0.1:8686/api/v1
755
-
756
- Common endpoints:
757
- - GET /devices - List devices
758
- - GET /devices/{id}/screenshot - Take screenshot
759
- - GET /devices/{id}/ui-tree - Get UI tree
760
- - POST /devices/{id}/dsl/execute - Execute DSL script
761
- - POST /devices/{id}/agent/run - Run AI agent`,
387
+ name: "test_run",
388
+ description: "Run a test case on a device",
762
389
  inputSchema: {
763
390
  type: "object",
764
391
  properties: {
765
- method: {
766
- type: "string",
767
- enum: ["GET", "POST", "PUT", "PATCH", "DELETE"],
768
- description: "HTTP method",
769
- },
770
- url: {
771
- type: "string",
772
- description: "Full URL or endpoint path (e.g., /devices)",
773
- },
774
- body: {
775
- type: "string",
776
- description: "Request body as JSON string",
777
- },
778
- timeout_ms: {
779
- type: "number",
780
- description: "Timeout in milliseconds (default: 600000)",
781
- },
392
+ project_id: { type: "string", description: "Project ID" },
393
+ case_id: { type: "string", description: "Test case ID" },
394
+ device_id: { type: "string", description: "Device ID to run the test on" },
782
395
  },
783
- required: ["method", "url"],
396
+ required: ["project_id", "case_id", "device_id"],
784
397
  },
785
398
  },
786
399
  ];
787
- // Handle list tools request
400
+ // ---------------------------------------------------------------------------
401
+ // List tools
402
+ // ---------------------------------------------------------------------------
788
403
  server.setRequestHandler(ListToolsRequestSchema, async () => {
789
404
  return { tools: TOOLS };
790
405
  });
791
- // Handle tool calls
406
+ // ---------------------------------------------------------------------------
407
+ // Tool call handler
408
+ // ---------------------------------------------------------------------------
409
+ function testCasePath(args) {
410
+ const projectId = args?.project_id;
411
+ const caseId = args?.case_id;
412
+ if (!projectId || !caseId)
413
+ throw new Error("project_id and case_id are required");
414
+ return `/tests/projects/${projectId}/cases/${caseId}`;
415
+ }
792
416
  server.setRequestHandler(CallToolRequestSchema, async (request) => {
793
417
  const { name, arguments: args } = request.params;
794
418
  try {
795
- let result;
796
419
  switch (name) {
420
+ // Device management
797
421
  case "list_devices":
798
- result = await makeRequest("GET", "/devices");
799
- break;
422
+ return textResult(await doGet("/devices"));
800
423
  case "get_device":
801
- result = await makeRequest("GET", `/devices/${args?.device_id}`);
802
- break;
424
+ return textResult(await doGet(`/devices/${args?.device_id}`));
803
425
  case "start_bridge":
804
- result = await makeRequest("POST", `/devices/${args?.device_id}/bridge/start`, null, 60000);
805
- break;
426
+ return textResult(await doPost(`/devices/${args?.device_id}/bridge/start`));
806
427
  case "stop_bridge":
807
- result = await makeRequest("POST", `/devices/${args?.device_id}/bridge/stop`);
808
- break;
809
- case "get_screenshot":
810
- result = await makeRequest("GET", `/devices/${args?.device_id}/screenshot`);
811
- break;
812
- case "get_ui_tree": {
428
+ return textResult(await doPost(`/devices/${args?.device_id}/bridge/stop`));
429
+ // Screenshots
430
+ case "get_screenshot": {
431
+ const body = await doGet(`/devices/${args?.device_id}/screenshot?low_quality=true`);
432
+ return textResult(screenshotToFile(body));
433
+ }
434
+ case "save_screenshot": {
813
435
  const params = new URLSearchParams();
814
- if (args?.verbose)
815
- params.set("verbose", "true");
816
- if (args?.only_visible === false)
817
- params.set("onlyVisible", "false");
818
- if (args?.include_keyboard)
819
- params.set("includeKeyboard", "true");
820
- if (args?.text_regex)
821
- params.set("textRegex", args.text_regex);
822
- if (args?.bounds) {
823
- const b = args.bounds;
824
- params.set("boundsX", String(b.x));
825
- params.set("boundsY", String(b.y));
826
- params.set("boundsW", String(b.w));
827
- params.set("boundsH", String(b.h));
828
- }
829
- const queryString = params.toString();
830
- const endpoint = `/devices/${args?.device_id}/ui-tree${queryString ? `?${queryString}` : ""}`;
831
- result = await makeRequest("GET", endpoint);
832
- break;
436
+ if (args?.path)
437
+ params.set("path", args.path);
438
+ if (args?.name)
439
+ params.set("name", args.name);
440
+ const query = params.toString();
441
+ const body = await doGet(`/devices/${args?.device_id}/screenshot${query ? "?" + query : ""}`);
442
+ return textResult(screenshotToFile(body));
833
443
  }
834
- case "tap": {
835
- const body = {};
836
- if (args?.index !== undefined)
837
- body.index = args.index;
838
- if (args?.x !== undefined && args?.y !== undefined) {
839
- body.x = args.x;
840
- body.y = args.y;
444
+ // App management
445
+ case "list_apps":
446
+ return textResult(await doGet(`/devices/${args?.device_id}/apps`));
447
+ case "install_app":
448
+ return textResult(await doPost(`/devices/${args?.device_id}/install-app`, { path: args?.path }));
449
+ case "uninstall_app":
450
+ return textResult(await doDelete(`/devices/${args?.device_id}/apps/${encodeURIComponent(args?.bundle_id)}`));
451
+ // DSL execution
452
+ case "execute_dsl": {
453
+ const commandsStr = args?.commands;
454
+ if (!commandsStr)
455
+ throw new Error("commands is required");
456
+ let script;
457
+ try {
458
+ script = JSON.parse(commandsStr);
841
459
  }
842
- result = await makeRequest("POST", `/devices/${args?.device_id}/tap`, body);
843
- break;
844
- }
845
- case "double_tap": {
846
- const body = {};
847
- if (args?.index !== undefined)
848
- body.index = args.index;
849
- if (args?.x !== undefined && args?.y !== undefined) {
850
- body.x = args.x;
851
- body.y = args.y;
460
+ catch {
461
+ throw new Error("invalid DSL JSON: " + commandsStr);
852
462
  }
853
- result = await makeRequest("POST", `/devices/${args?.device_id}/double-tap`, body);
854
- break;
463
+ const body = await doPost(`/devices/${args?.device_id}/dsl/execute`, script);
464
+ return textResult(extractDSLScreenshots(body));
855
465
  }
856
- case "long_press": {
857
- const body = {};
858
- if (args?.index !== undefined)
859
- body.index = args.index;
860
- if (args?.x !== undefined && args?.y !== undefined) {
861
- body.x = args.x;
862
- body.y = args.y;
863
- }
864
- result = await makeRequest("POST", `/devices/${args?.device_id}/long-press`, body);
865
- break;
466
+ // Test management
467
+ case "test_get_active":
468
+ return textResult(await doGet("/tests/active"));
469
+ case "test_list_projects":
470
+ return textResult(await doGet("/tests/projects"));
471
+ case "test_create_project":
472
+ return textResult(await doPost("/tests/projects", { name: args?.name }));
473
+ case "test_rename_project":
474
+ return textResult(await doPatch(`/tests/projects/${args?.project_id}`, { name: args?.name }));
475
+ case "test_create_case": {
476
+ const body = { name: args?.name };
477
+ if (args?.folder)
478
+ body.folder = args.folder;
479
+ return textResult(await doPost(`/tests/projects/${args?.project_id}/cases`, body));
866
480
  }
867
- case "two_finger_tap": {
868
- const body = {};
869
- if (args?.index !== undefined)
870
- body.index = args.index;
871
- if (args?.x !== undefined && args?.y !== undefined) {
872
- body.x = args.x;
873
- body.y = args.y;
874
- }
875
- result = await makeRequest("POST", `/devices/${args?.device_id}/two-finger-tap`, body);
876
- break;
481
+ case "test_rename_case": {
482
+ const p = testCasePath(args);
483
+ return textResult(await doPatch(p, { name: args?.name }));
877
484
  }
878
- case "drag": {
879
- const dragBody = {
880
- fromX: args?.from_x,
881
- fromY: args?.from_y,
882
- toX: args?.to_x,
883
- toY: args?.to_y,
884
- duration: args?.duration_ms ?? 500,
885
- };
886
- if (args?.press_duration_ms) {
887
- dragBody.pressDuration = args.press_duration_ms;
888
- }
889
- result = await makeRequest("POST", `/devices/${args?.device_id}/drag`, dragBody);
890
- break;
485
+ case "test_delete_case": {
486
+ const p = testCasePath(args);
487
+ return textResult(await doDelete(p));
891
488
  }
892
- case "dismiss_keyboard":
893
- result = await makeRequest("POST", `/devices/${args?.device_id}/dismiss-keyboard`);
894
- break;
895
- case "type_text":
896
- result = await makeRequest("POST", `/devices/${args?.device_id}/type`, { text: args?.text });
897
- break;
898
- case "swipe":
899
- result = await makeRequest("POST", `/devices/${args?.device_id}/swipe`, {
900
- fromX: args?.from_x,
901
- fromY: args?.from_y,
902
- toX: args?.to_x,
903
- toY: args?.to_y,
904
- duration: args?.duration_ms ?? 300,
905
- });
906
- break;
907
- case "go_home":
908
- result = await makeRequest("POST", `/devices/${args?.device_id}/go-home`);
909
- break;
910
- case "launch_app":
911
- result = await makeRequest("POST", `/devices/${args?.device_id}/launch-app`, {
912
- bundleId: args?.bundle_id,
913
- });
914
- break;
915
- case "list_apps":
916
- result = await makeRequest("GET", `/devices/${args?.device_id}/apps`);
917
- break;
918
- case "get_ocr":
919
- result = await makeRequest("GET", `/devices/${args?.device_id}/ocr`);
920
- break;
921
- case "uninstall_app":
922
- result = await makeRequest("DELETE", `/devices/${args?.device_id}/apps/${encodeURIComponent(args?.bundle_id)}`);
923
- break;
924
- case "kill_app":
925
- result = await makeRequest("POST", `/devices/${args?.device_id}/kill-app`, {
926
- bundleId: args?.bundle_id,
927
- });
928
- break;
929
- case "set_location":
930
- result = await makeRequest("POST", `/devices/${args?.device_id}/location`, {
931
- lat: args?.lat,
932
- lon: args?.lon,
933
- });
934
- break;
935
- case "reset_location":
936
- result = await makeRequest("DELETE", `/devices/${args?.device_id}/location`);
937
- break;
938
- case "execute_dsl":
939
- result = await makeRequest("POST", `/devices/${args?.device_id}/dsl/execute`, args?.script, 300000 // 5 minutes
940
- );
941
- break;
942
- case "run_agent": {
943
- const agentBody = { task: args?.task };
944
- if (args?.agent_type)
945
- agentBody.agentType = args.agent_type;
946
- if (args?.use_vision !== undefined)
947
- agentBody.useVision = args.use_vision;
948
- result = await makeRequest("POST", `/devices/${args?.device_id}/agent/run`, agentBody, 600000 // 10 minutes
949
- );
950
- break;
489
+ case "test_get_script": {
490
+ const p = testCasePath(args);
491
+ return textResult(await doGet(`${p}/script`));
951
492
  }
952
- case "web_list_pages":
953
- result = await makeRequest("GET", `/devices/${args?.device_id}/web/pages`);
954
- break;
955
- case "web_navigate":
956
- result = await makeRequest("POST", `/devices/${args?.device_id}/web/navigate`, {
957
- url: args?.url,
958
- });
959
- break;
960
- case "web_get_dom":
961
- result = await makeRequest("GET", `/devices/${args?.device_id}/web/dom`);
962
- break;
963
- case "web_click":
964
- result = await makeRequest("POST", `/devices/${args?.device_id}/web/click`, {
965
- selector: args?.selector,
966
- });
967
- break;
968
- case "web_type":
969
- result = await makeRequest("POST", `/devices/${args?.device_id}/web/type`, {
970
- selector: args?.selector,
971
- text: args?.text,
972
- });
973
- break;
974
- case "web_execute_js":
975
- result = await makeRequest("POST", `/devices/${args?.device_id}/web/execute`, {
976
- script: args?.script,
977
- });
978
- break;
979
- case "http_request": {
980
- const url = (args?.url).startsWith("http")
981
- ? args?.url
982
- : `${API_BASE_URL}${args?.url}`;
983
- let body = undefined;
984
- if (args?.body) {
985
- try {
986
- body = JSON.parse(args.body);
987
- }
988
- catch {
989
- body = args.body;
990
- }
991
- }
992
- result = await makeRequest(args?.method, url, body, args?.timeout_ms ?? DEFAULT_TIMEOUT_MS);
993
- break;
493
+ case "test_replace_script": {
494
+ const p = testCasePath(args);
495
+ return textResult(await doPut(`${p}/script`, { script: args?.script }));
496
+ }
497
+ case "test_update_line": {
498
+ const p = testCasePath(args);
499
+ return textResult(await doPost(`${p}/script/update-line`, {
500
+ line_number: args?.line_number,
501
+ content: args?.content,
502
+ }));
503
+ }
504
+ case "test_insert_after": {
505
+ const p = testCasePath(args);
506
+ return textResult(await doPost(`${p}/script/insert-after`, {
507
+ line_number: args?.line_number,
508
+ content: args?.content,
509
+ }));
510
+ }
511
+ case "test_delete_line": {
512
+ const p = testCasePath(args);
513
+ return textResult(await doPost(`${p}/script/delete-line`, {
514
+ line_number: args?.line_number,
515
+ }));
516
+ }
517
+ case "test_run": {
518
+ const p = testCasePath(args);
519
+ return textResult(await doPost(`${p}/run`, { device_id: args?.device_id }));
994
520
  }
995
521
  default:
996
- return {
997
- content: [{ type: "text", text: `Unknown tool: ${name}` }],
998
- isError: true,
999
- };
522
+ return { content: [{ type: "text", text: `Unknown tool: ${name}` }], isError: true };
1000
523
  }
1001
- const formattedBody = typeof result.body === "string"
1002
- ? result.body
1003
- : JSON.stringify(result.body, null, 2);
1004
- return {
1005
- content: [
1006
- {
1007
- type: "text",
1008
- text: `Status: ${result.status} ${result.statusText}\n\n${formattedBody}`,
1009
- },
1010
- ],
1011
- isError: result.status >= 400,
1012
- };
1013
524
  }
1014
525
  catch (error) {
1015
526
  if (error instanceof Error && error.name === "AbortError") {
1016
- return {
1017
- content: [{ type: "text", text: "Request timed out" }],
1018
- isError: true,
1019
- };
527
+ return errResult("Request timed out");
1020
528
  }
1021
- const message = error instanceof Error ? error.message : String(error);
1022
- return {
1023
- content: [{ type: "text", text: `Error: ${message}` }],
1024
- isError: true,
1025
- };
529
+ return errResult(error);
1026
530
  }
1027
531
  });
1028
- // Resources
1029
- const RESOURCES = [
1030
- {
1031
- uri: "mobai://api-reference",
1032
- name: "MobAI API Reference",
1033
- description: "Complete API documentation for MobAI HTTP API",
1034
- mimeType: "text/markdown",
1035
- },
1036
- {
1037
- uri: "mobai://dsl-guide",
1038
- name: "DSL Automation Guide",
1039
- description: "Guide for using the DSL batch execution system",
1040
- mimeType: "text/markdown",
1041
- },
1042
- {
1043
- uri: "mobai://native-runner",
1044
- name: "Native App Automation",
1045
- description: "Guide for automating native mobile apps",
1046
- mimeType: "text/markdown",
1047
- },
1048
- {
1049
- uri: "mobai://web-runner",
1050
- name: "Web Automation",
1051
- description: "Guide for automating browsers and WebViews",
1052
- mimeType: "text/markdown",
1053
- },
1054
- ];
1055
- // Handle list resources request
1056
- server.setRequestHandler(ListResourcesRequestSchema, async () => {
1057
- return { resources: RESOURCES };
1058
- });
1059
- // Handle read resource request
1060
- server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
1061
- const { uri } = request.params;
1062
- const content = getResourceContent(uri);
1063
- if (!content) {
1064
- throw new Error(`Resource not found: ${uri}`);
1065
- }
1066
- return {
1067
- contents: [
1068
- {
1069
- uri,
1070
- mimeType: "text/markdown",
1071
- text: content,
1072
- },
1073
- ],
1074
- };
1075
- });
1076
- function getResourceContent(uri) {
1077
- switch (uri) {
1078
- case "mobai://api-reference":
1079
- return API_REFERENCE;
1080
- case "mobai://dsl-guide":
1081
- return DSL_GUIDE;
1082
- case "mobai://native-runner":
1083
- return NATIVE_RUNNER_GUIDE;
1084
- case "mobai://web-runner":
1085
- return WEB_RUNNER_GUIDE;
1086
- default:
1087
- return null;
1088
- }
1089
- }
1090
- // Resource content
1091
- const API_REFERENCE = `# MobAI API Reference
1092
-
1093
- **Base URL:** \`http://127.0.0.1:8686/api/v1\`
1094
-
1095
- ## Device Management
1096
-
1097
- | Endpoint | Method | Description |
1098
- |----------|--------|-------------|
1099
- | /devices | GET | List all connected devices |
1100
- | /devices/{id} | GET | Get device info |
1101
- | /devices/{id}/screenshot | GET | Capture screenshot (saved to /tmp/mobai/screenshots/) |
1102
- | /devices/{id}/ui-tree | GET | Get UI accessibility tree |
1103
- | /devices/{id}/apps | GET | List installed apps |
1104
- | /devices/{id}/ocr | GET | OCR text recognition (iOS only) |
1105
-
1106
- ## Bridge Control
1107
-
1108
- | Endpoint | Method | Description |
1109
- |----------|--------|-------------|
1110
- | /devices/{id}/bridge/start | POST | Start on-device bridge (required for automation) |
1111
- | /devices/{id}/bridge/stop | POST | Stop bridge |
1112
-
1113
- ## UI Operations
1114
-
1115
- | Endpoint | Method | Description |
1116
- |----------|--------|-------------|
1117
- | /devices/{id}/tap | POST | Tap element: {"index": N} or {"x": X, "y": Y} |
1118
- | /devices/{id}/double-tap | POST | Double tap: {"index": N} or {"x": X, "y": Y} |
1119
- | /devices/{id}/long-press | POST | Long press (0.5s): {"index": N} or {"x": X, "y": Y} |
1120
- | /devices/{id}/two-finger-tap | POST | Two-finger tap (iOS): {"index": N} or {"x": X, "y": Y} |
1121
- | /devices/{id}/swipe | POST | Swipe: {"fromX", "fromY", "toX", "toY", "duration"} |
1122
- | /devices/{id}/drag | POST | Drag: {"fromX", "fromY", "toX", "toY", "duration", "pressDuration"} |
1123
- | /devices/{id}/type | POST | Type text: {"text": "..."} |
1124
- | /devices/{id}/dismiss-keyboard | POST | Dismiss on-screen keyboard |
1125
- | /devices/{id}/go-home | POST | Go to home screen |
1126
- | /devices/{id}/launch-app | POST | Launch app: {"bundleId": "..."} |
1127
- | /devices/{id}/apps/{bundleId} | DELETE | Uninstall app by bundle ID |
1128
- | /devices/{id}/kill-app | POST | Kill app: {"bundleId": "..."} |
1129
-
1130
- ## DSL Execution
1131
-
1132
- | Endpoint | Method | Description |
1133
- |----------|--------|-------------|
1134
- | /devices/{id}/dsl/execute | POST | Execute DSL batch script |
1135
-
1136
- ## AI Agent
1137
-
1138
- | Endpoint | Method | Description |
1139
- |----------|--------|-------------|
1140
- | /devices/{id}/agent/run | POST | Run AI agent: {"task": "..."} |
1141
-
1142
- ## Performance Metrics
1143
-
1144
- | Endpoint | Method | Description |
1145
- |----------|--------|-------------|
1146
- | /devices/{id}/metrics/start | POST | Start metrics collection |
1147
- | /devices/{id}/metrics/stop | POST | Stop collection, return summary |
1148
- | /devices/{id}/metrics | GET | Get raw metrics buffer |
1149
- | /devices/{id}/metrics/summary | GET | Get current summary without stopping |
1150
-
1151
- ## Web Automation
1152
-
1153
- | Endpoint | Method | Description |
1154
- |----------|--------|-------------|
1155
- | /devices/{id}/web/pages | GET | List browser tabs/WebViews |
1156
- | /devices/{id}/web/navigate | POST | Navigate to URL: {"url": "..."} |
1157
- | /devices/{id}/web/dom | GET | Get DOM tree |
1158
- | /devices/{id}/web/click | POST | Click element: {"selector": "..."} |
1159
- | /devices/{id}/web/type | POST | Type text: {"selector": "...", "text": "..."} |
1160
- | /devices/{id}/web/execute | POST | Execute JS: {"script": "..."} |
1161
-
1162
- ## Response Format
1163
-
1164
- **Success:**
1165
- \`\`\`json
1166
- {"success": true, "data": {...}}
1167
- \`\`\`
1168
-
1169
- **Error:**
1170
- \`\`\`json
1171
- {"error": "message", "code": "ERROR_CODE"}
1172
- \`\`\`
1173
-
1174
- ## DSL Action Reference
1175
-
1176
- ### type Action
1177
- - **predicate**: Required if keyboard not already open (auto-taps the element first)
1178
- - **dismiss_keyboard**: Default \`false\` (keyboard stays open after typing)
1179
- - **clear_first**: Optional, clears field before typing
1180
-
1181
- \`\`\`json
1182
- {"action": "type", "text": "hello", "predicate": {"type": "input"}}
1183
- \`\`\`
1184
-
1185
- ### press_key Action
1186
- - **key**: Keyboard key to press (return, tab, delete, escape, etc.)
1187
- - **context**: Optional, "web" for web context (supports enter, tab, delete, escape)
1188
-
1189
- \`\`\`json
1190
- {"action": "press_key", "key": "return"}
1191
- {"action": "press_key", "key": "tab", "context": "web"}
1192
- \`\`\`
1193
-
1194
- ### select_web_context Action
1195
- - **url_contains**: Filter by URL substring
1196
- - **title_contains**: Filter by page title substring
1197
-
1198
- \`\`\`json
1199
- {"action": "select_web_context"}
1200
- {"action": "select_web_context", "url_contains": "example.com"}
1201
- {"action": "select_web_context", "title_contains": "Login"}
1202
- \`\`\`
1203
- `;
1204
- const DSL_GUIDE = `# MobAI DSL Guide
1205
-
1206
- The DSL (Domain Specific Language) enables batch execution of multiple automation steps in a single request.
1207
-
1208
- ## Basic Structure
1209
-
1210
- \`\`\`json
1211
- {
1212
- "version": "0.2",
1213
- "steps": [
1214
- {"action": "observe", "context": "native", "include": ["ui_tree"]},
1215
- {"action": "tap", "predicate": {"text_contains": "Settings"}}
1216
- ],
1217
- "on_fail": {"strategy": "retry", "max_retries": 2}
1218
- }
1219
- \`\`\`
1220
-
1221
- ## Available Actions
1222
-
1223
- | Action | Description | Key Fields |
1224
- |--------|-------------|------------|
1225
- | observe | Get UI tree/screenshot/OCR | context, include (ui_tree, screenshot, installed_apps, ocr), filter ({text_regex, bounds}) |
1226
- | tap | Tap element | predicate or coords |
1227
- | type | Type text | text, predicate (if keyboard not open), dismiss_keyboard (default: false) |
1228
- | press_key | Press keyboard key | key (return, tab, delete, etc.), context (optional: "web") |
1229
- | toggle | Set switch state | predicate, state ("on"/"off") |
1230
- | swipe | Swipe gesture | direction, distance, duration_ms |
1231
- | scroll | Scroll in container | direction, predicate (container), to_element |
1232
- | open_app | Launch app | bundle_id |
1233
- | navigate | Go home/back | target ("home", "back") |
1234
- | wait_for | Wait for element or UI stability | predicate, timeout_ms, poll_interval_ms, stable (wait for UI to stop changing) |
1235
- | screenshot | Save screenshot to file | file_path (directory), name (optional filename) |
1236
- | assert_exists | Verify element exists | predicate, timeout_ms |
1237
- | assert_not_exists | Verify element gone | predicate |
1238
- | delay | Wait fixed time | duration_ms |
1239
- | if_exists | Conditional | predicate, then, else |
1240
- | select_web_context | Select browser/WebView | url_contains, title_contains (optional filters) |
1241
- | kill_app | Force-kill running app | bundle_id |
1242
- | set_location | Simulate GPS location (Android 12+ for real devices) | lat, lon |
1243
- | reset_location | Reset to real GPS (Android 12+ for real devices) | (no fields) |
1244
- | metrics_start | Start performance monitoring | types, bundle_id, label, thresholds, capture_logs |
1245
- | metrics_stop | Stop monitoring, get summary | format ("summary" or "detailed") |
1246
-
1247
- ## Predicates
1248
-
1249
- Match elements by:
1250
- - \`text\`: Exact text match
1251
- - \`text_contains\`: Contains substring (case-insensitive)
1252
- - \`text_starts_with\`: Starts with prefix
1253
- - \`text_regex\`: Regex pattern
1254
- - \`type\`: Element type (button, input, switch, etc.)
1255
- - \`label\`: Accessibility label
1256
- - \`bounds_hint\`: Screen region (top_half, bottom_half, center, etc.)
1257
- - \`near\`: Near another element
1258
- - \`index\`: Select Nth match
1259
-
1260
- ## Examples
1261
-
1262
- ### Tap Element
1263
- \`\`\`json
1264
- {"action": "tap", "predicate": {"text_contains": "Settings"}}
1265
- \`\`\`
1266
-
1267
- ### Type Text
1268
- \`\`\`json
1269
- {"action": "type", "text": "Hello", "predicate": {"type": "input"}}
1270
- \`\`\`
1271
-
1272
- Note: \`predicate\` is required if keyboard is not already open. Use \`dismiss_keyboard: true\` to close keyboard after typing.
1273
-
1274
- ### Toggle Switch
1275
- \`\`\`json
1276
- {"action": "toggle", "predicate": {"type": "switch", "text_contains": "WiFi"}, "state": "on"}
1277
- \`\`\`
1278
-
1279
- ### Scroll Until Found
1280
- \`\`\`json
1281
- {"action": "scroll", "direction": "down", "to_element": {"predicate": {"text": "Privacy"}}, "max_scrolls": 10}
1282
- \`\`\`
1283
-
1284
- ### Conditional (Dismiss Popup)
1285
- \`\`\`json
1286
- {
1287
- "action": "if_exists",
1288
- "predicate": {"text_contains": "Allow"},
1289
- "then": [{"action": "tap", "predicate": {"text": "Allow"}}]
1290
- }
1291
- \`\`\`
1292
-
1293
- ## Failure Strategies
1294
-
1295
- - \`abort\`: Stop on failure (default)
1296
- - \`skip\`: Skip failed step, continue
1297
- - \`retry\`: Retry with delay
1298
-
1299
- ## OCR (iOS only)
1300
-
1301
- Use \`include: ["ocr"]\` in observe to get text recognition when UI tree is empty:
1302
-
1303
- \`\`\`json
1304
- {"action": "observe", "context": "native", "include": ["ocr"]}
1305
- \`\`\`
1306
-
1307
- Returns text with coordinates for tapping (already adjusted for tapping).
1308
-
1309
- ## Performance Metrics
1310
-
1311
- Collect CPU, memory, FPS, network, and battery metrics during test flows with optional logging capture.
1312
-
1313
- ### Start Metrics Collection
1314
- \`\`\`json
1315
- {
1316
- "action": "metrics_start",
1317
- "types": ["system_cpu", "system_memory", "fps"],
1318
- "bundle_id": "com.example.app",
1319
- "label": "login_flow",
1320
- "capture_logs": true,
1321
- "thresholds": {
1322
- "cpu_high": 80,
1323
- "fps_low": 45,
1324
- "memory_growth_mb_min": 50
1325
- }
1326
- }
1327
- \`\`\`
1328
-
1329
- **Fields:**
1330
- - \`types\`: Metrics to collect - system_cpu, system_memory, fps, network, battery, process
1331
- - \`bundle_id\`: Filter to specific app (optional)
1332
- - \`label\`: Human-readable session label (optional)
1333
- - \`thresholds\`: Custom thresholds for anomaly detection (optional)
1334
- - \`capture_logs\`: Capture device logs during session (default: false)
1335
-
1336
- ### Stop and Get Summary
1337
- \`\`\`json
1338
- {"action": "metrics_stop", "format": "summary"}
1339
- \`\`\`
1340
-
1341
- **Response:**
1342
- \`\`\`json
1343
- {
1344
- "metrics_summary": {
1345
- "session": {
1346
- "label": "login_flow",
1347
- "duration_seconds": 45.2,
1348
- "sample_count": 45,
1349
- "session_id": "abc123",
1350
- "data_file": "/tmp/mobai/metrics/abc123.jsonl",
1351
- "logs_file": "/tmp/mobai/logs/abc123.jsonl",
1352
- "logs_available": true
1353
- },
1354
- "overall_health": "warning",
1355
- "health_score": 72,
1356
- "system_cpu": {"avg": 34.5, "max": 89.2, "p95": 78.1, "status": "ok"},
1357
- "system_memory": {"avg_percent": 45.2, "growth_mb": 28.5, "trend": "increasing", "status": "warning"},
1358
- "fps": {"avg": 58.2, "min": 24.0, "jank_percent": 8.5, "status": "warning"},
1359
- "anomalies": {
1360
- "cpu_spikes": [
1361
- {"at_s": 0.5, "peak": 288, "duration_ms": 18147, "source": "system"}
1362
- ],
1363
- "fps_drops": [
1364
- {"start_s": 1.2, "end_s": 16.8, "min_fps": 39.5, "avg_fps": 42.3, "samples": 1}
1365
- ],
1366
- },
1367
- "recommendations": [
1368
- "FPS dropped to 24 at +15s - investigate screen transition"
1369
- ]
1370
- }
1371
- }
1372
- \`\`\`
1373
-
1374
- ### Example: Performance Test Flow
1375
- \`\`\`json
1376
- {
1377
- "version": "0.2",
1378
- "steps": [
1379
- {"action": "metrics_start", "types": ["system_cpu", "system_memory", "fps"], "label": "app_launch"},
1380
- {"action": "open_app", "bundle_id": "com.example.app"},
1381
- {"action": "wait_for", "predicate": {"text": "Welcome"}, "timeout_ms": 10000},
1382
- {"action": "tap", "predicate": {"text": "Login"}},
1383
- {"action": "delay", "duration_ms": 5000},
1384
- {"action": "metrics_stop", "format": "summary"}
1385
- ]
1386
- }
1387
- \`\`\`
1388
- `;
1389
- const NATIVE_RUNNER_GUIDE = `# Native App Automation Guide
1390
-
1391
- Use this for automating native mobile apps (Settings, Mail, Instagram, etc.).
1392
-
1393
- ## Script Writing Guidelines
1394
-
1395
- The DSL's purpose is to **minimize LLM calls** by encoding assumptions into comprehensive scripts. Write scripts that handle common scenarios without needing to re-observe.
1396
-
1397
- ### Example: Handle Cookie Banner
1398
- \`\`\`json
1399
- {
1400
- "action": "if_exists",
1401
- "predicate": {"text_contains": "Accept Cookies"},
1402
- "then": [{"action": "tap", "predicate": {"text_contains": "Accept"}}]
1403
- }
1404
- \`\`\`
1405
-
1406
- ### Common Knowledge (use without observing)
1407
- - Safari has an address bar at the top
1408
- - Settings app has Wi-Fi, Bluetooth, General sections
1409
- - Alert dialogs have "OK", "Cancel", "Allow", "Don't Allow" buttons
1410
- - iOS keyboard has "Done", "Return", "Search" keys
1411
-
1412
- ### Script Writing Rules
1413
- - **Use open_app** - Always start scripts with open_app to ensure correct app
1414
- - **UI tree provided upfront** - You receive the initial UI tree, use it to plan the script
1415
- - **Use if_exists for popups** - Handle cookie banners, permission dialogs, notifications
1416
- - **observe only for assert_screen_changed** - Use observe to establish baseline, then assert_screen_changed to verify navigation
1417
-
1418
- ## IMPORTANT: Browser Native UI
1419
-
1420
- When automating browsers (Safari, Chrome), use **Native Runner** for the browser's own UI:
1421
- - Address bar / URL bar
1422
- - Tab bar and tab management
1423
- - Navigation buttons (back, forward, refresh)
1424
- - Bookmarks bar
1425
- - Browser menus and settings
1426
-
1427
- These are native OS elements, NOT web content. Only use Web Runner for the actual webpage content inside the browser.
1428
-
1429
- ## Workflow
1430
-
1431
- 1. **Observe UI** - Get the accessibility tree
1432
- 2. **Match Elements** - Use predicates to find elements
1433
- 3. **Execute Actions** - Tap, type, swipe, press_key, etc.
1434
- 4. **Verify Results** - Check UI state changed
1435
-
1436
- ## Type Action
1437
-
1438
- The \`type\` action requires either:
1439
- 1. Keyboard already open (from previous tap on input), OR
1440
- 2. A predicate to identify and tap the input field
1441
-
1442
- **dismiss_keyboard** default is \`false\` (keyboard stays open after typing).
1443
-
1444
- ### Pattern 1: Tap then Type
1445
- \`\`\`json
1446
- [
1447
- {"action": "tap", "predicate": {"type": "input"}},
1448
- {"action": "type", "text": "username"},
1449
- {"action": "press_key", "key": "tab"}
1450
- ]
1451
- \`\`\`
1452
-
1453
- ### Pattern 2: Type with Predicate
1454
- \`\`\`json
1455
- {"action": "type", "text": "username", "predicate": {"type": "input", "label": "Username"}}
1456
- \`\`\`
1457
-
1458
- ### Dismissing Keyboard
1459
- - Use \`press_key: return\` to submit and close the keyboard
1460
- - If submit is not desired, look for a "Close", "Cancel", "Done" or "Back" button in the UI tree and tap it
1461
- - On Android, \`press_key: back\` also dismisses the keyboard
1462
-
1463
- ## Common Patterns
1464
-
1465
- ### Open App and Navigate
1466
- \`\`\`json
1467
- {
1468
- "version": "0.2",
1469
- "steps": [
1470
- {"action": "open_app", "bundle_id": "com.apple.Preferences"},
1471
- {"action": "delay", "duration_ms": 1000},
1472
- {"action": "observe", "context": "native", "include": ["ui_tree"]},
1473
- {"action": "tap", "predicate": {"text_contains": "General"}}
1474
- ]
1475
- }
1476
- \`\`\`
1477
-
1478
- ### Fill Form
1479
- \`\`\`json
1480
- {
1481
- "version": "0.2",
1482
- "steps": [
1483
- {"action": "tap", "predicate": {"type": "input"}},
1484
- {"action": "type", "text": "username"},
1485
- {"action": "press_key", "key": "tab"},
1486
- {"action": "type", "text": "password"},
1487
- {"action": "press_key", "key": "return"}
1488
- ]
1489
- }
1490
- \`\`\`
1491
-
1492
- ### Scroll to Find Element
1493
- \`\`\`json
1494
- {
1495
- "version": "0.2",
1496
- "steps": [
1497
- {"action": "scroll", "direction": "down", "to_element": {"predicate": {"text": "Privacy"}}, "max_scrolls": 10},
1498
- {"action": "tap", "predicate": {"text": "Privacy"}}
1499
- ]
1500
- }
1501
- \`\`\`
1502
-
1503
- ### Handle Dialogs
1504
- \`\`\`json
1505
- {
1506
- "version": "0.2",
1507
- "steps": [
1508
- {
1509
- "action": "if_exists",
1510
- "predicate": {"text_contains": "Allow"},
1511
- "then": [{"action": "tap", "predicate": {"text": "Allow"}}]
1512
- }
1513
- ]
1514
- }
1515
- \`\`\`
1516
-
1517
- ## Quick Reference
1518
-
1519
- | Action | Description | Key Fields |
1520
- |--------|-------------|------------|
1521
- | tap | Tap element | predicate or coords |
1522
- | type | Type text | text, predicate (if keyboard not open), dismiss_keyboard (default: false) |
1523
- | press_key | Press keyboard key | key (return, tab, delete, etc.) |
1524
- | swipe | Swipe gesture | direction, distance |
1525
- | scroll | Scroll container | direction, to_element |
1526
-
1527
- ## Tips
1528
-
1529
- - **Always observe first** - Get UI tree before interacting
1530
- - **Use predicates** - More robust than hardcoded indices
1531
- - **Add delays after navigation** - Apps need time to render
1532
- - **Use retry strategy** - Transient failures are common
1533
- - **Use press_key for form navigation** - Tab between fields, Return to submit
1534
- - **Use OCR for system dialogs (iOS)** - When UI tree is empty, use \`include: ["ocr"]\`
1535
- `;
1536
- const WEB_RUNNER_GUIDE = `# Web Automation Guide
1537
-
1538
- **Try native-runner first for simple taps/types.** Only use Web Runner when you need DOM manipulation, CSS selectors, or JavaScript execution.
1539
-
1540
- ## iOS Simulator Limitation
1541
-
1542
- **IMPORTANT: Web context is NOT supported on iOS simulators.** Web automation features (select_web_context, web DOM access, CSS selectors, JavaScript execution) only work on:
1543
- - **Physical iOS devices** (iPhone, iPad)
1544
- - **Android emulators and physical devices**
1545
-
1546
- ## When to Use Web Runner
1547
-
1548
- **USE Web Runner for:**
1549
- - Native runner returns NO_MATCH for web elements
1550
- - CSS selector-based element targeting
1551
- - JavaScript execution in page context
1552
- - DOM manipulation and inspection
1553
- - Complex form interactions requiring DOM access
1554
-
1555
- **DO NOT use Web Runner for:**
1556
- - Browser address bar / URL bar → use Native Runner
1557
- - Browser tab bar → use Native Runner
1558
- - Browser navigation buttons (back, forward, refresh) → use Native Runner
1559
- - Browser menus and settings → use Native Runner
1560
- - Any UI outside the webpage or webview content area → use Native Runner
1561
-
1562
- The browser's own UI (address bar, tabs, navigation) are **native OS elements**, not web content.
1563
-
1564
- ## Platform Support
1565
-
1566
- | Platform | Browser | Protocol |
1567
- |----------|---------|----------|
1568
- | iOS | Safari, WebViews | WebInspector |
1569
- | Android | Chrome, WebViews | Chrome DevTools Protocol |
1570
-
1571
- ## Workflow
1572
-
1573
- 1. **Select web context** - Connect to browser
1574
- 2. **Navigate** - Go to URL
1575
- 3. **Get DOM** - Inspect page structure
1576
- 4. **Interact** - Click, type, press_key using CSS selectors
1577
-
1578
- ## select_web_context Options
1579
-
1580
- \`\`\`json
1581
- {"action": "select_web_context"}
1582
- {"action": "select_web_context", "url_contains": "example.com"}
1583
- {"action": "select_web_context", "title_contains": "Login"}
1584
- \`\`\`
1585
-
1586
- Use \`url_contains\` or \`title_contains\` to select a specific tab/WebView when multiple are available.
1587
-
1588
- ## press_key (Web Context)
1589
-
1590
- Press keyboard keys in web context. Supported keys: \`enter\`, \`tab\`, \`delete\`, \`escape\`
1591
-
1592
- \`\`\`json
1593
- {"action": "press_key", "context": "web", "key": "enter"}
1594
- {"action": "press_key", "context": "web", "key": "tab"}
1595
- \`\`\`
1596
-
1597
- ## Common Patterns
1598
-
1599
- ### Navigate and Fill Form
1600
- \`\`\`json
1601
- {
1602
- "version": "0.2",
1603
- "steps": [
1604
- {"action": "select_web_context"},
1605
- {"action": "navigate", "url": "https://example.com/login"},
1606
- {"action": "wait_for", "context": "web", "predicate": {"css_selector": "form"}, "timeout_ms": 5000},
1607
- {"action": "type", "context": "web", "predicate": {"css_selector": "input[name='email']"}, "text": "user@example.com"},
1608
- {"action": "type", "context": "web", "predicate": {"css_selector": "input[type='password']"}, "text": "password"},
1609
- {"action": "tap", "context": "web", "predicate": {"css_selector": "button[type='submit']"}}
1610
- ]
1611
- }
1612
- \`\`\`
1613
-
1614
- ### Click Element
1615
- \`\`\`json
1616
- {"action": "tap", "context": "web", "predicate": {"css_selector": "button.submit"}}
1617
- \`\`\`
1618
-
1619
- ### Execute JavaScript
1620
- \`\`\`json
1621
- {"action": "execute_js", "script": "return document.querySelector('h1').textContent"}
1622
- \`\`\`
1623
-
1624
- ## CSS Selectors
1625
-
1626
- | Selector | Description |
1627
- |----------|-------------|
1628
- | #id | Element by ID |
1629
- | .class | Elements by class |
1630
- | button.submit | Button with class |
1631
- | input[type='email'] | Input by attribute |
1632
- | input[name='username'] | Input by name |
1633
- | a[href*='login'] | Link containing text in href |
1634
-
1635
- ## Tips
1636
-
1637
- - **Select context first** - Use select_web_context before web operations
1638
- - **Use specific selectors** - Prefer id > name > class
1639
- - **Re-fetch DOM after navigation** - Page content changes
1640
- - **Use JavaScript for complex logic** - When CSS selectors aren't enough
1641
- `;
1642
- // Start the server
532
+ // ---------------------------------------------------------------------------
533
+ // Start
534
+ // ---------------------------------------------------------------------------
1643
535
  async function main() {
1644
536
  const transport = new StdioServerTransport();
1645
537
  await server.connect(transport);