pagebolt-mcp 1.8.1 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/package.json +1 -1
  2. package/server.json +3 -3
  3. package/src/index.mjs +516 -187
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pagebolt-mcp",
3
- "version": "1.8.1",
3
+ "version": "1.9.0",
4
4
  "description": "MCP server for PageBolt — take screenshots, generate PDFs, create OG images, inspect pages, record demo videos with Audio Guide narration, from AI coding assistants like Claude, Cursor, and Windsurf.",
5
5
  "main": "src/index.mjs",
6
6
  "module": "src/index.mjs",
package/server.json CHANGED
@@ -1,17 +1,17 @@
1
1
  {
2
2
  "$schema": "https://static.modelcontextprotocol.io/schemas/2025-12-11/server.schema.json",
3
3
  "name": "io.github.Custodia-Admin/pagebolt",
4
- "description": "Take screenshots, generate PDFs, and create OG images from your AI assistant.",
4
+ "description": "Screenshots, PDFs, OG images, page inspection, and narrated video recording for Claude and Cursor.",
5
5
  "repository": {
6
6
  "url": "https://github.com/Custodia-Admin/pagebolt-mcp",
7
7
  "source": "github"
8
8
  },
9
- "version": "1.0.2",
9
+ "version": "1.9.0",
10
10
  "packages": [
11
11
  {
12
12
  "registryType": "npm",
13
13
  "identifier": "pagebolt-mcp",
14
- "version": "1.0.2",
14
+ "version": "1.9.0",
15
15
  "transport": {
16
16
  "type": "stdio"
17
17
  },
package/src/index.mjs CHANGED
@@ -50,35 +50,62 @@ function requireApiKey() {
50
50
  }
51
51
  }
52
52
 
53
- // ─── HTTP helper ─────────────────────────────────────────────────
53
+ // ─── HTTP helper (with timeout + retry) ─────────────────────────
54
+ const RETRYABLE_STATUSES = new Set([429, 502, 503, 504]);
55
+ const MAX_RETRIES = 1;
56
+ const REQUEST_TIMEOUT_MS = 120_000;
57
+
54
58
  async function callApi(endpoint, options = {}) {
55
59
  requireApiKey();
56
60
  const url = `${BASE_URL}${endpoint}`;
57
61
  const method = options.method || 'GET';
58
62
  const headers = {
59
63
  'x-api-key': API_KEY,
60
- 'user-agent': 'pagebolt-mcp/1.7.0',
64
+ 'user-agent': 'pagebolt-mcp/1.9.0',
61
65
  ...(options.body ? { 'Content-Type': 'application/json' } : {}),
62
66
  };
67
+ const body = options.body ? JSON.stringify(options.body) : undefined;
63
68
 
64
- const res = await fetch(url, {
65
- method,
66
- headers,
67
- body: options.body ? JSON.stringify(options.body) : undefined,
68
- });
69
+ let lastError;
70
+ for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
71
+ const controller = new AbortController();
72
+ const timer = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS);
69
73
 
70
- if (!res.ok) {
71
- let errorMsg;
72
74
  try {
73
- const errJson = await res.json();
74
- errorMsg = errJson.error || JSON.stringify(errJson);
75
- } catch {
76
- errorMsg = `HTTP ${res.status} ${res.statusText}`;
75
+ const res = await fetch(url, { method, headers, body, signal: controller.signal });
76
+ clearTimeout(timer);
77
+
78
+ if (res.ok) return res;
79
+
80
+ if (RETRYABLE_STATUSES.has(res.status) && attempt < MAX_RETRIES) {
81
+ const retryAfter = parseInt(res.headers.get('retry-after'), 10);
82
+ const delayMs = retryAfter > 0 ? retryAfter * 1000 : 1000 * (attempt + 1);
83
+ await new Promise(r => setTimeout(r, Math.min(delayMs, 10_000)));
84
+ continue;
85
+ }
86
+
87
+ let errorMsg;
88
+ try {
89
+ const errJson = await res.json();
90
+ errorMsg = errJson.error || JSON.stringify(errJson);
91
+ } catch {
92
+ errorMsg = `HTTP ${res.status} ${res.statusText}`;
93
+ }
94
+ throw new Error(`PageBolt API error: ${errorMsg}`);
95
+ } catch (err) {
96
+ clearTimeout(timer);
97
+ if (err.name === 'AbortError') {
98
+ throw new Error(`PageBolt API error: request timed out after ${REQUEST_TIMEOUT_MS / 1000}s`);
99
+ }
100
+ lastError = err;
101
+ if (attempt < MAX_RETRIES && !err.message.startsWith('PageBolt API error:')) {
102
+ await new Promise(r => setTimeout(r, 1000 * (attempt + 1)));
103
+ continue;
104
+ }
105
+ throw err;
77
106
  }
78
- throw new Error(`PageBolt API error: ${errorMsg}`);
79
107
  }
80
-
81
- return res;
108
+ throw lastError;
82
109
  }
83
110
 
84
111
  // ─── MIME type helper ────────────────────────────────────────────
@@ -132,7 +159,7 @@ const styleSchema = z.object({
132
159
 
133
160
  // ─── Server Instructions ────────────────────────────────────────
134
161
  const SERVER_INSTRUCTIONS = `
135
- PageBolt gives you 8 tools for web capture and browser automation. All tools use your API key automatically.
162
+ PageBolt gives you tools for web capture and browser automation. All tools use your API key automatically.
136
163
 
137
164
  ## Tools Overview
138
165
 
@@ -141,7 +168,9 @@ PageBolt gives you 8 tools for web capture and browser automation. All tools use
141
168
  | take_screenshot | Capture a URL, HTML, or Markdown as PNG/JPEG/WebP | 1 request |
142
169
  | generate_pdf | Convert a URL or HTML to PDF, saves to disk | 1 request |
143
170
  | create_og_image | Generate social card images from templates or custom HTML | 1 request |
144
- | run_sequence | Multi-step browser automation with multiple screenshot/PDF outputs | 1 request per output |
171
+ | observe_page | Agent-optimized page observation: id-indexed elements, page-type classification, suggested actions (+ optional content/ARIA/screenshot) | 1 request |
172
+ | visual_diff | Pixel-level visual comparison of two pages | 1 request |
173
+ | run_sequence | Multi-step browser automation with screenshot/PDF/diff outputs | 1 request per output |
145
174
  | record_video | Record browser automation as MP4/WebM/GIF with cursor effects | 3 requests |
146
175
  | inspect_page | Get structured map of page elements with CSS selectors | 1 request |
147
176
  | list_devices | List 25+ device presets (iPhone, iPad, MacBook, etc.) | 0 (free) |
@@ -149,6 +178,10 @@ PageBolt gives you 8 tools for web capture and browser automation. All tools use
149
178
  | create_session | Create a persistent browser session (Starter+ only) | 0 (free to create) |
150
179
  | destroy_session | Destroy a persistent browser session | 0 (free) |
151
180
 
181
+ ## Agent Perception: observe_page vs inspect_page
182
+
183
+ For AI agents that need to understand and act on an arbitrary page, prefer **observe_page** — it returns a compact, token-budgeted observation (id-indexed elements + page-type + grouped suggested actions) in one call, and can optionally bundle readable content, the ARIA tree, and a screenshot. Use **inspect_page** when you specifically want the full raw element/heading/link/image inventory. Both return reliable CSS selectors you can pass to run_sequence.
184
+
152
185
  ## Key Workflow: Inspect Before You Interact
153
186
 
154
187
  When building sequences or videos, ALWAYS use inspect_page first to discover reliable CSS selectors:
@@ -158,6 +191,14 @@ When building sequences or videos, ALWAYS use inspect_page first to discover rel
158
191
 
159
192
  This avoids guessing selectors like "#submit" when the actual element is "#submitBtn".
160
193
 
194
+ ## Visual Diff
195
+
196
+ Use visual_diff to compare two pages pixel-by-pixel. Returns a diff image with changed pixels highlighted in red.
197
+ - Supports fullPage: true to diff entire scrollable pages (not just the viewport)
198
+ - Supports all screenshot options: device emulation, dark mode, selectors, blocking, etc.
199
+ - Use in run_sequence as a "diff" step to automate browser interactions before comparing — navigate, click, fill forms, then diff against another URL.
200
+ - threshold: 0.1 (default) — lower values catch more subtle differences
201
+
161
202
  ## Styling Screenshots
162
203
 
163
204
  Use the "style" parameter on take_screenshot for beautiful styled captures:
@@ -199,8 +240,9 @@ Use blockBanners on almost every request to get clean captures. Combine blockAds
199
240
  - response_type: "json" returns base64 data instead of binary (useful for programmatic use)
200
241
  - record_video pace presets: "fast" (0.5x), "normal" (1x), "slow" (2x), "dramatic" (3x), "cinematic" (4.5x)
201
242
  - record_video cursor styles: "highlight", "circle", "spotlight", "dot", "classic"
202
- - run_sequence requires at least 1 screenshot or pdf step as output
203
- - record_video does NOT allow screenshot/pdf steps the whole sequence IS the video
243
+ - run_sequence requires at least 1 output step (screenshot, pdf, or diff)
244
+ - run_sequence supports "diff" steps: automate interactions, then diff current page against another URL/HTML
245
+ - record_video does NOT allow screenshot/pdf/diff steps — the whole sequence IS the video
204
246
  - Max 2 evaluate (JavaScript) steps per sequence/video
205
247
  - fullPage: true on screenshots captures the entire scrollable page
206
248
  - fullPageScroll: true triggers lazy-loaded images before capture
@@ -209,8 +251,8 @@ Use blockBanners on almost every request to get clean captures. Combine blockAds
209
251
 
210
252
  | Action | Cost |
211
253
  |--------|------|
212
- | Screenshot, PDF, OG image, Inspect | 1 request each |
213
- | Sequence | 1 request per output (screenshot/pdf) |
254
+ | Screenshot, PDF, OG image, Inspect, Visual Diff | 1 request each |
255
+ | Sequence | 1 request per output (screenshot/pdf/diff) |
214
256
  | Video recording | 3 requests flat |
215
257
  | list_devices, check_usage | Free |
216
258
  `.trim();
@@ -219,7 +261,7 @@ Use blockBanners on almost every request to get clean captures. Combine blockAds
219
261
  function createConfiguredServer() {
220
262
  const srv = new McpServer({
221
263
  name: 'pagebolt',
222
- version: '1.7.0',
264
+ version: '1.9.0',
223
265
  }, {
224
266
  instructions: SERVER_INSTRUCTIONS,
225
267
  });
@@ -314,35 +356,38 @@ server.tool(
314
356
  return { content: [{ type: 'text', text: 'Error: One of "url", "html", or "markdown" is required.' }], isError: true };
315
357
  }
316
358
 
317
- const res = await callApi('/api/v1/screenshot', {
318
- method: 'POST',
319
- body: { ...params, response_type: 'json' },
320
- });
321
-
322
- const data = await res.json();
323
- const format = params.format || 'png';
324
-
325
- const content = [
326
- {
327
- type: 'image',
328
- data: data.data,
329
- mimeType: imageMimeType(format),
330
- },
331
- {
332
- type: 'text',
333
- text: `Screenshot captured successfully. Format: ${format}, Size: ${data.size_bytes} bytes, Duration: ${data.duration_ms}ms`,
334
- },
335
- ];
336
-
337
- // Include metadata if extracted
338
- if (data.metadata) {
339
- content.push({
340
- type: 'text',
341
- text: `Metadata:\n${JSON.stringify(data.metadata, null, 2)}`,
359
+ try {
360
+ const res = await callApi('/api/v1/screenshot', {
361
+ method: 'POST',
362
+ body: { ...params, response_type: 'json' },
342
363
  });
343
- }
344
364
 
345
- return { content };
365
+ const data = await res.json();
366
+ const format = params.format || 'png';
367
+
368
+ const content = [
369
+ {
370
+ type: 'image',
371
+ data: data.data,
372
+ mimeType: imageMimeType(format),
373
+ },
374
+ {
375
+ type: 'text',
376
+ text: `Screenshot captured successfully. Format: ${format}, Size: ${data.size_bytes} bytes, Duration: ${data.duration_ms}ms`,
377
+ },
378
+ ];
379
+
380
+ if (data.metadata) {
381
+ content.push({
382
+ type: 'text',
383
+ text: `Metadata:\n${JSON.stringify(data.metadata, null, 2)}`,
384
+ });
385
+ }
386
+
387
+ return { content };
388
+ } catch (err) {
389
+ return { content: [{ type: 'text', text: `Screenshot error: ${err.message}` }], isError: true };
390
+ }
346
391
  }
347
392
  );
348
393
 
@@ -381,49 +426,51 @@ server.tool(
381
426
  return { content: [{ type: 'text', text: 'Error: Either "url" or "html" is required.' }], isError: true };
382
427
  }
383
428
 
384
- const { saveTo, ...apiParams } = params;
385
- const res = await callApi('/api/v1/pdf', {
386
- method: 'POST',
387
- body: { ...apiParams, response_type: 'json' },
388
- });
429
+ try {
430
+ const { saveTo, ...apiParams } = params;
431
+ const res = await callApi('/api/v1/pdf', {
432
+ method: 'POST',
433
+ body: { ...apiParams, response_type: 'json' },
434
+ });
389
435
 
390
- const data = await res.json();
436
+ const data = await res.json();
391
437
 
392
- // Best-effort save to disk (may fail in hosted/sandboxed environments)
393
- let savedPath = null;
394
- try {
395
- const outputPath = safePath(saveTo, './output.pdf');
396
- const buffer = Buffer.from(data.data, 'base64');
397
- writeFileSync(outputPath, buffer);
398
- savedPath = outputPath;
399
- } catch (_diskErr) {
400
- // Disk write failed (e.g. hosted environment, read-only FS) — data is
401
- // still returned as an embedded resource below, so the client gets it.
402
- }
438
+ let savedPath = null;
439
+ try {
440
+ const outputPath = safePath(saveTo, './output.pdf');
441
+ const buffer = Buffer.from(data.data, 'base64');
442
+ writeFileSync(outputPath, buffer);
443
+ savedPath = outputPath;
444
+ } catch (_diskErr) {
445
+ // Disk write failed — data still returned as embedded resource
446
+ }
403
447
 
404
- const fileNote = savedPath
405
- ? ` File: ${savedPath}`
406
- : ` File: (not saved to disk — use the embedded resource data below)`;
448
+ const fileNote = savedPath
449
+ ? ` File: ${savedPath}`
450
+ : ` File: (not saved to disk — use the embedded resource data below)`;
407
451
 
408
- return {
409
- content: [
410
- {
411
- type: 'resource',
412
- resource: {
413
- uri: 'pagebolt://pdf/output.pdf',
414
- mimeType: 'application/pdf',
415
- blob: data.data, // base64-encoded PDF — always delivered to client
452
+ return {
453
+ content: [
454
+ {
455
+ type: 'resource',
456
+ resource: {
457
+ uri: 'pagebolt://pdf/output.pdf',
458
+ mimeType: 'application/pdf',
459
+ blob: data.data,
460
+ },
416
461
  },
417
- },
418
- {
419
- type: 'text',
420
- text: `PDF generated successfully.\n` +
421
- `${fileNote}\n` +
422
- ` Size: ${data.size_bytes} bytes\n` +
423
- ` Duration: ${data.duration_ms}ms`,
424
- },
425
- ],
426
- };
462
+ {
463
+ type: 'text',
464
+ text: `PDF generated successfully.\n` +
465
+ `${fileNote}\n` +
466
+ ` Size: ${data.size_bytes} bytes\n` +
467
+ ` Duration: ${data.duration_ms}ms`,
468
+ },
469
+ ],
470
+ };
471
+ } catch (err) {
472
+ return { content: [{ type: 'text', text: `PDF error: ${err.message}` }], isError: true };
473
+ }
427
474
  }
428
475
  );
429
476
 
@@ -448,27 +495,31 @@ server.tool(
448
495
  format: z.enum(['png', 'jpeg', 'webp']).optional().describe('Image format (default: png)'),
449
496
  },
450
497
  async (params) => {
451
- const res = await callApi('/api/v1/og-image', {
452
- method: 'POST',
453
- body: { ...params, response_type: 'json' },
454
- });
498
+ try {
499
+ const res = await callApi('/api/v1/og-image', {
500
+ method: 'POST',
501
+ body: { ...params, response_type: 'json' },
502
+ });
455
503
 
456
- const data = await res.json();
457
- const format = params.format || 'png';
504
+ const data = await res.json();
505
+ const format = params.format || 'png';
458
506
 
459
- return {
460
- content: [
461
- {
462
- type: 'image',
463
- data: data.data,
464
- mimeType: imageMimeType(format),
465
- },
466
- {
467
- type: 'text',
468
- text: `OG image created successfully. Format: ${format}, Size: ${data.size_bytes} bytes, Duration: ${data.duration_ms}ms`,
469
- },
507
+ return {
508
+ content: [
509
+ {
510
+ type: 'image',
511
+ data: data.data,
512
+ mimeType: imageMimeType(format),
513
+ },
514
+ {
515
+ type: 'text',
516
+ text: `OG image created successfully. Format: ${format}, Size: ${data.size_bytes} bytes, Duration: ${data.duration_ms}ms`,
517
+ },
470
518
  ],
471
519
  };
520
+ } catch (err) {
521
+ return { content: [{ type: 'text', text: `OG image error: ${err.message}` }], isError: true };
522
+ }
472
523
  }
473
524
  );
474
525
 
@@ -477,14 +528,14 @@ server.tool(
477
528
  // ═══════════════════════════════════════════════════════════════════
478
529
  server.tool(
479
530
  'run_sequence',
480
- 'Execute a multi-step browser automation sequence. Navigate pages, interact with elements (click, fill, select), and capture multiple screenshots/PDFs in a single browser session. Each output counts as 1 API request.',
531
+ 'Execute a multi-step browser automation sequence. Navigate pages, interact with elements (click, fill, select), and capture multiple screenshots/PDFs/diffs in a single browser session. Use the "diff" step to compare the current page state against another URL after automation. Each output counts as 1 API request.',
481
532
  {
482
533
  steps: z.array(
483
534
  z.object({
484
535
  action: z.enum([
485
536
  'navigate', 'click', 'dblclick', 'fill', 'select', 'hover',
486
537
  'scroll', 'wait', 'wait_for', 'evaluate',
487
- 'screenshot', 'pdf',
538
+ 'screenshot', 'pdf', 'diff',
488
539
  ]).describe('The action to perform'),
489
540
  url: z.string().url().optional().describe('URL to navigate to (for navigate action)'),
490
541
  selector: z.string().optional().describe('CSS selector for the target element (also used for element screenshots)'),
@@ -494,20 +545,25 @@ server.tool(
494
545
  x: z.number().optional().describe('Horizontal scroll position in pixels (scroll action). Use when scrolling horizontally without a selector.'),
495
546
  y: z.number().optional().describe('Vertical scroll position in pixels (scroll action). REQUIRED when no selector is provided — e.g. {"action":"scroll","y":800} scrolls 800px down.'),
496
547
  script: z.string().max(5000).optional().describe('JavaScript to execute in page context (for evaluate action)'),
497
- name: z.string().optional().describe('Name for the output (for screenshot/pdf actions)'),
548
+ name: z.string().optional().describe('Name for the output (for screenshot/pdf/diff actions)'),
498
549
  format: z.string().optional().describe('Image format: png, jpeg, webp (screenshot) or A4, Letter (pdf)'),
499
- fullPage: z.boolean().optional().describe('Capture full scrollable page (for screenshot action)'),
500
- fullPageScroll: z.boolean().optional().describe('Auto-scroll for lazy images (for screenshot action)'),
550
+ fullPage: z.boolean().optional().describe('Capture full scrollable page (for screenshot/diff actions)'),
551
+ fullPageScroll: z.boolean().optional().describe('Auto-scroll for lazy images (for screenshot/diff actions)'),
501
552
  quality: z.number().int().min(1).max(100).optional().describe('JPEG/WebP quality (for screenshot action)'),
502
553
  omitBackground: z.boolean().optional().describe('Transparent background (for screenshot action)'),
503
- delay: z.number().int().min(0).max(10000).optional().describe('Pre-capture delay in ms (for screenshot action)'),
554
+ delay: z.number().int().min(0).max(10000).optional().describe('Pre-capture delay in ms (for screenshot/diff actions)'),
504
555
  landscape: z.boolean().optional().describe('Landscape orientation (for pdf action)'),
505
556
  printBackground: z.boolean().optional().describe('Include CSS backgrounds (for pdf action)'),
506
557
  margin: z.string().optional().describe('CSS margin for all sides (for pdf action)'),
507
558
  scale: z.number().min(0.1).max(2).optional().describe('Rendering scale (for pdf action)'),
508
559
  style: styleSchema,
560
+ // ── Diff-specific step properties ──
561
+ url_b: z.string().url().optional().describe('URL of the comparison page (for diff action). The current page state is "A"; this URL is rendered as "B".'),
562
+ html_b: z.string().optional().describe('HTML of the comparison page (for diff action). The current page state is "A"; this HTML is rendered as "B".'),
563
+ selector_a: z.string().optional().describe('CSS selector to capture on the current page as side "A" (for diff action). If omitted, captures the full viewport/page.'),
564
+ threshold: z.number().min(0).max(1).optional().describe('Pixelmatch sensitivity 0–1 (for diff action, default: 0.1). Lower = more sensitive.'),
509
565
  })
510
- ).min(1).max(20).describe('Array of steps to execute in order. Must include at least one screenshot or pdf step. Max 20 steps, max 5 outputs.'),
566
+ ).min(1).max(20).describe('Array of steps to execute in order. Must include at least one output step (screenshot, pdf, or diff). Max 20 steps, max 5 outputs.'),
511
567
  viewport: z.object({
512
568
  width: z.number().int().min(320).max(3840).optional().describe('Viewport width (default: 1280)'),
513
569
  height: z.number().int().min(200).max(2160).optional().describe('Viewport height (default: 720)'),
@@ -546,9 +602,33 @@ server.tool(
546
602
  text: `[${output.name}] Screenshot — ${output.format}, ${output.size_bytes} bytes, step ${output.step_index}`,
547
603
  });
548
604
  } else if (output.type === 'pdf') {
605
+ if (output.data) {
606
+ content.push({
607
+ type: 'resource',
608
+ resource: {
609
+ uri: `pagebolt://sequence-pdf/${output.name || `step-${output.step_index}`}`,
610
+ mimeType: 'application/pdf',
611
+ blob: output.data,
612
+ },
613
+ });
614
+ }
615
+ content.push({
616
+ type: 'text',
617
+ text: `[${output.name}] PDF generated — ${output.size_bytes} bytes, step ${output.step_index}`,
618
+ });
619
+ } else if (output.type === 'diff') {
620
+ content.push({
621
+ type: 'image',
622
+ data: output.data,
623
+ mimeType: 'image/png',
624
+ });
549
625
  content.push({
550
626
  type: 'text',
551
- text: `[${output.name}] PDF generated — ${output.format}, ${output.size_bytes} bytes, step ${output.step_index} (base64 data available in raw response)`,
627
+ text: `[${output.name}] Diff — ${output.changed_pct}% changed (${output.changed_pixels?.toLocaleString()} of ${output.total_pixels?.toLocaleString()} pixels), step ${output.step_index}` +
628
+ (output.changed_pct === 0 ? ' — Pages are visually identical.' :
629
+ output.changed_pct < 1 ? ' — Minor differences.' :
630
+ output.changed_pct < 10 ? ' — Moderate differences.' :
631
+ ' — Significant differences.'),
552
632
  });
553
633
  }
554
634
  }
@@ -882,6 +962,235 @@ server.tool(
882
962
  }
883
963
  );
884
964
 
965
+ // ═══════════════════════════════════════════════════════════════════
966
+ // Tool: observe_page — agent-optimized page observation (perception layer)
967
+ // ═══════════════════════════════════════════════════════════════════
968
+ server.tool(
969
+ 'observe_page',
970
+ 'Get a compact, token-budgeted "observation" of any web page, purpose-built for AI agents. In ONE request it returns: id-indexed interactive elements (role, name, CSS selector, state), a heuristic page-type classification (login, signup, search, article, form, generic), and grouped "suggested actions" (login flow, search, primary buttons, navigation). Optionally include readable content (Markdown), the ARIA tree, and a screenshot. This is the fastest way for an agent to understand and act on an un-instrumented page — far more token-efficient than a raw screenshot or full DOM. Use the returned selectors with run_sequence to act. Costs 1 API request.',
971
+ {
972
+ // ── Source ──
973
+ url: z.string().url().optional().describe('URL to observe (required if no html)'),
974
+ html: z.string().optional().describe('Raw HTML to observe (required if no url)'),
975
+ // ── Observation shape ──
976
+ maxElements: z.number().int().min(1).max(150).optional().describe('Cap on interactive elements returned (default 40, max 150). Lower = fewer tokens.'),
977
+ includeRects: z.boolean().optional().describe('Include bounding boxes {x,y,w,h} per element (default false — omit to save tokens)'),
978
+ includeContent: z.boolean().optional().describe('Also extract the main readable content as Markdown (default false)'),
979
+ includeAriaTree: z.boolean().optional().describe('Also include the interesting-only ARIA accessibility tree (default false)'),
980
+ includeScreenshot: z.boolean().optional().describe('Also capture a screenshot in the same page load (default false)'),
981
+ screenshotFormat: z.enum(['jpeg', 'png', 'webp']).optional().describe('Screenshot format when includeScreenshot is true (default jpeg)'),
982
+ screenshotFullPage: z.boolean().optional().describe('Capture the full scrollable page for the screenshot (default false)'),
983
+ // ── Viewport ──
984
+ width: z.number().int().min(1).max(3840).optional().describe('Viewport width in pixels (default: 1280)'),
985
+ height: z.number().int().min(1).max(2160).optional().describe('Viewport height in pixels (default: 720)'),
986
+ viewportDevice: z.string().optional().describe('Device preset for viewport emulation (e.g. "iphone_14_pro"). Use list_devices to see all presets.'),
987
+ deviceScaleFactor: z.number().min(1).max(3).optional().describe('Device pixel ratio (default: 1)'),
988
+ // ── Timing ──
989
+ waitUntil: z.enum(['load', 'domcontentloaded', 'networkidle0', 'networkidle2']).optional().describe('When to consider navigation finished (default: networkidle2)'),
990
+ waitForSelector: z.string().optional().describe('Wait for this CSS selector to appear before observing'),
991
+ navigationTimeout: z.number().int().min(0).max(30000).optional().describe('Navigation timeout in ms (default: 25000)'),
992
+ // ── Emulation ──
993
+ darkMode: z.boolean().optional().describe('Emulate dark color scheme (default: false)'),
994
+ timeZone: z.string().optional().describe('Override browser timezone'),
995
+ userAgent: z.string().optional().describe('Override the browser User-Agent string'),
996
+ // ── Auth & headers ──
997
+ cookies: z.array(cookieSchema).optional().describe('Cookies to set — array of "name=value" strings or { name, value, domain? } objects'),
998
+ headers: z.record(z.string(), z.string()).optional().describe('Extra HTTP headers to send with the request'),
999
+ authorization: z.string().optional().describe('Authorization header value (e.g. "Bearer <token>")'),
1000
+ bypassCSP: z.boolean().optional().describe('Bypass Content-Security-Policy on the page'),
1001
+ // ── Blocking ──
1002
+ blockBanners: z.boolean().optional().describe('Hide cookie consent banners (default: false)'),
1003
+ blockAds: z.boolean().optional().describe('Block advertisements on the page'),
1004
+ blockChats: z.boolean().optional().describe('Block live chat widgets'),
1005
+ blockTrackers: z.boolean().optional().describe('Block tracking scripts'),
1006
+ },
1007
+ async (params) => {
1008
+ if (!params.url && !params.html) {
1009
+ return { content: [{ type: 'text', text: 'Error: Either "url" or "html" is required.' }], isError: true };
1010
+ }
1011
+
1012
+ try {
1013
+ const res = await callApi('/api/v1/observe', { method: 'POST', body: params });
1014
+ const data = await res.json();
1015
+
1016
+ const lines = [];
1017
+ lines.push(`Page: ${data.title || '(untitled)'} (${data.url})`);
1018
+ lines.push(`Type: ${data.pageType}`);
1019
+ if (data.metadata && data.metadata.httpStatusCode) lines.push(`HTTP Status: ${data.metadata.httpStatusCode}`);
1020
+ lines.push('');
1021
+
1022
+ if (data.actions && data.actions.length > 0) {
1023
+ lines.push('Suggested actions:');
1024
+ for (const a of data.actions) {
1025
+ lines.push(` ${a.intent}: ${a.elementIds.join(', ')}`);
1026
+ }
1027
+ lines.push('');
1028
+ }
1029
+
1030
+ if (data.elements && data.elements.length > 0) {
1031
+ lines.push(`Interactive elements (${data.elements.length}):`);
1032
+ for (const el of data.elements) {
1033
+ let line = ` ${el.id} [${el.role}${el.type ? ` ${el.type}` : ''}]`;
1034
+ if (el.name) line += ` "${el.name}"`;
1035
+ if (el.state && el.state.length) line += ` {${el.state.join(',')}}`;
1036
+ line += ` — selector: ${el.selector}`;
1037
+ if (el.href) line += ` → ${el.href}`;
1038
+ lines.push(line);
1039
+ }
1040
+ lines.push('');
1041
+ }
1042
+
1043
+ if (data.forms && data.forms.length > 0) {
1044
+ lines.push(`Forms (${data.forms.length}):`);
1045
+ for (const f of data.forms) {
1046
+ lines.push(` ${f.selector} (${f.method} ${f.action || '(none)'}): fields ${f.fieldIds.join(', ')}`);
1047
+ }
1048
+ lines.push('');
1049
+ }
1050
+
1051
+ if (data.headings && data.headings.length > 0) {
1052
+ lines.push('Outline:');
1053
+ for (const h of data.headings) lines.push(` ${' '.repeat(h.level - 1)}H${h.level}: ${h.text}`);
1054
+ lines.push('');
1055
+ }
1056
+
1057
+ if (data.content && data.content.markdown) {
1058
+ lines.push(`Readable content (${data.content.wordCount} words):`);
1059
+ lines.push(data.content.markdown.slice(0, 4000) + (data.content.markdown.length > 4000 ? '\n…(truncated)' : ''));
1060
+ lines.push('');
1061
+ }
1062
+
1063
+ if (data.ariaTree) {
1064
+ lines.push('ARIA tree:');
1065
+ lines.push(JSON.stringify(data.ariaTree, null, 2));
1066
+ lines.push('');
1067
+ }
1068
+
1069
+ lines.push(`Stats: ${data.stats.elementCount} elements, ~${data.stats.estimatedTokens} tokens. Duration: ${data.duration_ms}ms`);
1070
+
1071
+ const content = [{ type: 'text', text: lines.join('\n') }];
1072
+ if (data.screenshot && data.screenshot.base64) {
1073
+ content.unshift({ type: 'image', data: data.screenshot.base64, mimeType: imageMimeType(data.screenshot.format) });
1074
+ }
1075
+ return { content };
1076
+ } catch (err) {
1077
+ return { content: [{ type: 'text', text: `Observe error: ${err.message}` }], isError: true };
1078
+ }
1079
+ }
1080
+ );
1081
+
1082
+ // ═══════════════════════════════════════════════════════════════════
1083
+ // Tool: visual_diff — pixel-level visual comparison
1084
+ // ═══════════════════════════════════════════════════════════════════
1085
+ server.tool(
1086
+ 'visual_diff',
1087
+ 'Compare two web pages (or HTML strings) pixel-by-pixel and return a diff image highlighting all visual differences. Supports full-page capture, device emulation, element selectors, and all screenshot-like options. Returns the diff image, changed pixel count, and percentage changed. Costs 1 API request.',
1088
+ {
1089
+ // ── Sources ──
1090
+ url_a: z.string().url().optional().describe('URL of the first page (required if no html_a)'),
1091
+ url_b: z.string().url().optional().describe('URL of the second page (required if no html_b)'),
1092
+ html_a: z.string().optional().describe('Raw HTML for the first page (required if no url_a)'),
1093
+ html_b: z.string().optional().describe('Raw HTML for the second page (required if no url_b)'),
1094
+ // ── Diff sensitivity ──
1095
+ threshold: z.number().min(0).max(1).optional().describe('Pixelmatch sensitivity 0–1 (default: 0.1). Lower = more sensitive to subtle differences.'),
1096
+ // ── Viewport ──
1097
+ width: z.number().int().min(1).max(3840).optional().describe('Viewport width in pixels (default: 1280)'),
1098
+ height: z.number().int().min(1).max(2160).optional().describe('Viewport height in pixels (default: 720)'),
1099
+ viewportDevice: z.string().optional().describe('Device preset for viewport emulation (e.g. "iphone_14_pro"). Use list_devices to see all presets.'),
1100
+ viewportMobile: z.boolean().optional().describe('Enable mobile meta viewport emulation'),
1101
+ viewportHasTouch: z.boolean().optional().describe('Enable touch event emulation'),
1102
+ viewportLandscape: z.boolean().optional().describe('Landscape orientation'),
1103
+ deviceScaleFactor: z.number().min(1).max(3).optional().describe('Device pixel ratio (default: 1)'),
1104
+ // ── Capture region ──
1105
+ fullPage: z.boolean().optional().describe('Capture the full scrollable page for both sides (default: false)'),
1106
+ fullPageScroll: z.boolean().optional().describe('Auto-scroll pages before capture to trigger lazy-loaded images'),
1107
+ fullPageScrollDelay: z.number().int().min(0).max(2000).optional().describe('Delay between scroll steps in ms (default: 400)'),
1108
+ fullPageScrollBy: z.number().int().optional().describe('Pixels to scroll per step (default: viewport height)'),
1109
+ fullPageMaxHeight: z.number().int().optional().describe('Maximum pixel height cap for full-page captures'),
1110
+ selector: z.string().optional().describe('CSS selector — capture only this element on both pages'),
1111
+ clip: z.object({
1112
+ x: z.number(),
1113
+ y: z.number(),
1114
+ width: z.number(),
1115
+ height: z.number(),
1116
+ }).optional().describe('Crop region { x, y, width, height } in pixels'),
1117
+ // ── Timing ──
1118
+ delay: z.number().int().min(0).max(30000).optional().describe('Milliseconds to wait before capture on both pages (default: 0)'),
1119
+ click: z.string().optional().describe('CSS selector to click before capturing on both pages'),
1120
+ waitUntil: z.enum(['load', 'domcontentloaded', 'networkidle0', 'networkidle2']).optional().describe('When to consider navigation finished (default: networkidle2)'),
1121
+ waitForSelector: z.string().optional().describe('Wait for this CSS selector to appear before capturing'),
1122
+ navigationTimeout: z.number().int().min(0).max(30000).optional().describe('Navigation timeout in ms (default: 25000)'),
1123
+ // ── Emulation ──
1124
+ darkMode: z.boolean().optional().describe('Emulate dark color scheme (default: false)'),
1125
+ reducedMotion: z.boolean().optional().describe('Emulate prefers-reduced-motion to disable animations'),
1126
+ mediaType: z.enum(['screen', 'print']).optional().describe('Emulate CSS media type'),
1127
+ timeZone: z.string().optional().describe('Override browser timezone (e.g. "America/New_York")'),
1128
+ geolocation: z.object({
1129
+ latitude: z.number(),
1130
+ longitude: z.number(),
1131
+ accuracy: z.number().optional(),
1132
+ }).optional().describe('Emulate geolocation { latitude, longitude, accuracy? }'),
1133
+ userAgent: z.string().optional().describe('Override the browser User-Agent string'),
1134
+ // ── Auth & headers ──
1135
+ cookies: z.array(cookieSchema).optional().describe('Cookies to set — array of "name=value" strings or { name, value, domain? } objects'),
1136
+ headers: z.record(z.string(), z.string()).optional().describe('Extra HTTP headers to send with the request'),
1137
+ authorization: z.string().optional().describe('Authorization header value (e.g. "Bearer <token>")'),
1138
+ bypassCSP: z.boolean().optional().describe('Bypass Content-Security-Policy on the page'),
1139
+ // ── Content manipulation ──
1140
+ hideSelectors: z.array(z.string()).optional().describe('Array of CSS selectors to hide before capture'),
1141
+ injectCss: z.string().optional().describe('Custom CSS to inject before capturing (max 50KB)'),
1142
+ injectJs: z.string().optional().describe('Custom JavaScript to execute before capturing (max 50KB)'),
1143
+ // ── Blocking ──
1144
+ blockBanners: z.boolean().optional().describe('Hide cookie consent banners (default: false)'),
1145
+ blockAds: z.boolean().optional().describe('Block advertisements on the page'),
1146
+ blockChats: z.boolean().optional().describe('Block live chat widgets on the page'),
1147
+ blockTrackers: z.boolean().optional().describe('Block tracking scripts on the page'),
1148
+ blockRequests: z.array(z.string()).optional().describe('URL patterns to block (array of strings)'),
1149
+ blockResources: z.array(z.string()).optional().describe('Resource types to block (e.g. ["image", "font"])'),
1150
+ },
1151
+ async (params) => {
1152
+ if (!params.url_a && !params.html_a) {
1153
+ return { content: [{ type: 'text', text: 'Error: One of "url_a" or "html_a" is required.' }], isError: true };
1154
+ }
1155
+ if (!params.url_b && !params.html_b) {
1156
+ return { content: [{ type: 'text', text: 'Error: One of "url_b" or "html_b" is required.' }], isError: true };
1157
+ }
1158
+
1159
+ try {
1160
+ const res = await callApi('/api/v1/diff', {
1161
+ method: 'POST',
1162
+ body: params,
1163
+ });
1164
+
1165
+ const data = await res.json();
1166
+
1167
+ const content = [
1168
+ {
1169
+ type: 'image',
1170
+ data: data.diff_image.replace(/^data:image\/png;base64,/, ''),
1171
+ mimeType: 'image/png',
1172
+ },
1173
+ {
1174
+ type: 'text',
1175
+ text: `Visual diff complete.\n` +
1176
+ ` Changed: ${data.changed_pct}% (${data.changed_pixels.toLocaleString()} of ${data.total_pixels.toLocaleString()} pixels)\n` +
1177
+ ` URL A: ${data.url_a || '(html)'}\n` +
1178
+ ` URL B: ${data.url_b || '(html)'}\n` +
1179
+ ` Duration: ${data.duration_ms}ms\n` +
1180
+ (data.changed_pct === 0 ? ' Result: Pages are visually identical.' :
1181
+ data.changed_pct < 1 ? ' Result: Minor visual differences detected.' :
1182
+ data.changed_pct < 10 ? ' Result: Moderate visual differences detected.' :
1183
+ ' Result: Significant visual differences detected.'),
1184
+ },
1185
+ ];
1186
+
1187
+ return { content };
1188
+ } catch (err) {
1189
+ return { content: [{ type: 'text', text: `Visual diff error: ${err.message}` }], isError: true };
1190
+ }
1191
+ }
1192
+ );
1193
+
885
1194
  // ═══════════════════════════════════════════════════════════════════
886
1195
  // Tool: list_devices
887
1196
  // ═══════════════════════════════════════════════════════════════════
@@ -890,26 +1199,29 @@ server.tool(
890
1199
  'List all available device presets for viewport emulation (e.g. iphone_14_pro, macbook_pro_14). Use the returned device names with the viewportDevice parameter in take_screenshot.',
891
1200
  {},
892
1201
  async () => {
893
- const res = await callApi('/api/v1/devices');
894
- const data = await res.json();
1202
+ try {
1203
+ const res = await callApi('/api/v1/devices');
1204
+ const data = await res.json();
895
1205
 
896
- const lines = data.devices.map((d) => {
897
- const touch = d.hasTouch ? ', touch' : '';
898
- const mobile = d.isMobile ? ', mobile' : '';
899
- return ` ${d.name} — ${d.viewport.width}x${d.viewport.height} @${d.viewport.deviceScaleFactor}x${mobile}${touch}`;
900
- });
1206
+ const lines = data.devices.map((d) => {
1207
+ const mobile = d.mobile ? ', mobile' : '';
1208
+ return ` ${d.id} ${d.name} ${d.width}x${d.height} @${d.deviceScaleFactor}x${mobile}`;
1209
+ });
901
1210
 
902
- return {
903
- content: [
904
- {
905
- type: 'text',
906
- text:
907
- `Available device presets (${data.devices.length}):\n` +
908
- lines.join('\n') +
909
- `\n\nUse the device name as the "viewportDevice" parameter in take_screenshot.`,
910
- },
911
- ],
912
- };
1211
+ return {
1212
+ content: [
1213
+ {
1214
+ type: 'text',
1215
+ text:
1216
+ `Available device presets (${data.devices.length}):\n` +
1217
+ lines.join('\n') +
1218
+ `\n\nUse the device name as the "viewportDevice" parameter in take_screenshot.`,
1219
+ },
1220
+ ],
1221
+ };
1222
+ } catch (err) {
1223
+ return { content: [{ type: 'text', text: `List devices error: ${err.message}` }], isError: true };
1224
+ }
913
1225
  }
914
1226
  );
915
1227
 
@@ -921,25 +1233,29 @@ server.tool(
921
1233
  'Check your current PageBolt API usage and plan limits.',
922
1234
  {},
923
1235
  async () => {
924
- const res = await callApi('/api/v1/usage');
925
- const data = await res.json();
1236
+ try {
1237
+ const res = await callApi('/api/v1/usage');
1238
+ const data = await res.json();
926
1239
 
927
- const { plan, usage } = data;
928
- const pct = usage.limit > 0 ? Math.round((usage.current / usage.limit) * 100) : 0;
1240
+ const { plan, usage } = data;
1241
+ const pct = usage.limit > 0 ? Math.round((usage.current / usage.limit) * 100) : 0;
929
1242
 
930
- return {
931
- content: [
932
- {
933
- type: 'text',
934
- text:
935
- `PageBolt Usage\n` +
936
- ` Plan: ${plan}\n` +
937
- ` Used: ${usage.current.toLocaleString()} / ${usage.limit.toLocaleString()} requests\n` +
938
- ` Remaining: ${usage.remaining.toLocaleString()}\n` +
939
- ` Usage: ${pct}%`,
940
- },
941
- ],
942
- };
1243
+ return {
1244
+ content: [
1245
+ {
1246
+ type: 'text',
1247
+ text:
1248
+ `PageBolt Usage\n` +
1249
+ ` Plan: ${plan}\n` +
1250
+ ` Used: ${usage.current.toLocaleString()} / ${usage.limit.toLocaleString()} requests\n` +
1251
+ ` Remaining: ${usage.remaining.toLocaleString()}\n` +
1252
+ ` Usage: ${pct}%`,
1253
+ },
1254
+ ],
1255
+ };
1256
+ } catch (err) {
1257
+ return { content: [{ type: 'text', text: `Usage check error: ${err.message}` }], isError: true };
1258
+ }
943
1259
  }
944
1260
  );
945
1261
 
@@ -958,24 +1274,28 @@ server.tool(
958
1274
  stealth: z.boolean().optional().describe('Launch this session with stealth mode (bypasses bot detection). Note: stealth sessions use a dedicated browser and consume more memory.'),
959
1275
  },
960
1276
  async (params) => {
961
- const res = await callApi('/api/v1/sessions', {
962
- method: 'POST',
963
- body: params,
964
- });
965
- const data = await res.json();
966
- return {
967
- content: [
968
- {
969
- type: 'text',
970
- text:
971
- `Session created.\n` +
972
- ` session_id: ${data.session_id}\n` +
973
- ` expires_at: ${data.expires_at}\n\n` +
974
- `Pass session_id to take_screenshot or run_sequence to reuse this browser page.\n` +
975
- `Note: ${data.note || 'Sessions do not persist across server restarts.'}`,
976
- },
977
- ],
978
- };
1277
+ try {
1278
+ const res = await callApi('/api/v1/sessions', {
1279
+ method: 'POST',
1280
+ body: params,
1281
+ });
1282
+ const data = await res.json();
1283
+ return {
1284
+ content: [
1285
+ {
1286
+ type: 'text',
1287
+ text:
1288
+ `Session created.\n` +
1289
+ ` session_id: ${data.session_id}\n` +
1290
+ ` expires_at: ${data.expires_at}\n\n` +
1291
+ `Pass session_id to take_screenshot or run_sequence to reuse this browser page.\n` +
1292
+ `Note: ${data.note || 'Sessions do not persist across server restarts.'}`,
1293
+ },
1294
+ ],
1295
+ };
1296
+ } catch (err) {
1297
+ return { content: [{ type: 'text', text: `Create session error: ${err.message}` }], isError: true };
1298
+ }
979
1299
  }
980
1300
  );
981
1301
 
@@ -987,17 +1307,22 @@ server.tool(
987
1307
  'List all active persistent browser sessions for your API key. Returns session IDs, creation times, and expiry times. Useful for checking which sessions are still alive before reusing them.',
988
1308
  {},
989
1309
  async () => {
990
- const data = await callApi('/api/v1/sessions', { method: 'GET' });
991
- const sessions = data.sessions || [];
992
- if (sessions.length === 0) {
993
- return { content: [{ type: 'text', text: 'No active sessions.' }] };
1310
+ try {
1311
+ const res = await callApi('/api/v1/sessions', { method: 'GET' });
1312
+ const data = await res.json();
1313
+ const sessions = data.sessions || [];
1314
+ if (sessions.length === 0) {
1315
+ return { content: [{ type: 'text', text: 'No active sessions.' }] };
1316
+ }
1317
+ const lines = sessions.map(s =>
1318
+ `• ${s.session_id} expires: ${s.expires_at} created: ${s.created_at}`
1319
+ );
1320
+ return {
1321
+ content: [{ type: 'text', text: `Active sessions (${sessions.length}):\n${lines.join('\n')}` }],
1322
+ };
1323
+ } catch (err) {
1324
+ return { content: [{ type: 'text', text: `List sessions error: ${err.message}` }], isError: true };
994
1325
  }
995
- const lines = sessions.map(s =>
996
- `• ${s.session_id} expires: ${s.expires_at} created: ${s.created_at}`
997
- );
998
- return {
999
- content: [{ type: 'text', text: `Active sessions (${sessions.length}):\n${lines.join('\n')}` }],
1000
- };
1001
1326
  }
1002
1327
  );
1003
1328
 
@@ -1010,17 +1335,21 @@ server.tool(
1010
1335
  session_id: z.string().describe('The session ID to destroy (returned by create_session)'),
1011
1336
  },
1012
1337
  async (params) => {
1013
- await callApi(`/api/v1/sessions/${encodeURIComponent(params.session_id)}`, {
1014
- method: 'DELETE',
1015
- });
1016
- return {
1017
- content: [
1018
- {
1019
- type: 'text',
1020
- text: `Session ${params.session_id} destroyed successfully.`,
1021
- },
1022
- ],
1023
- };
1338
+ try {
1339
+ await callApi(`/api/v1/sessions/${encodeURIComponent(params.session_id)}`, {
1340
+ method: 'DELETE',
1341
+ });
1342
+ return {
1343
+ content: [
1344
+ {
1345
+ type: 'text',
1346
+ text: `Session ${params.session_id} destroyed successfully.`,
1347
+ },
1348
+ ],
1349
+ };
1350
+ } catch (err) {
1351
+ return { content: [{ type: 'text', text: `Destroy session error: ${err.message}` }], isError: true };
1352
+ }
1024
1353
  }
1025
1354
  );
1026
1355