pagebolt-mcp 1.8.2 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pagebolt-mcp",
3
- "version": "1.8.2",
3
+ "version": "1.9.0",
4
4
  "description": "MCP server for PageBolt — take screenshots, generate PDFs, create OG images, inspect pages, record demo videos with Audio Guide narration, from AI coding assistants like Claude, Cursor, and Windsurf.",
5
5
  "main": "src/index.mjs",
6
6
  "module": "src/index.mjs",
package/server.json CHANGED
@@ -6,12 +6,12 @@
6
6
  "url": "https://github.com/Custodia-Admin/pagebolt-mcp",
7
7
  "source": "github"
8
8
  },
9
- "version": "1.8.2",
9
+ "version": "1.9.0",
10
10
  "packages": [
11
11
  {
12
12
  "registryType": "npm",
13
13
  "identifier": "pagebolt-mcp",
14
- "version": "1.8.2",
14
+ "version": "1.9.0",
15
15
  "transport": {
16
16
  "type": "stdio"
17
17
  },
package/src/index.mjs CHANGED
@@ -61,7 +61,7 @@ async function callApi(endpoint, options = {}) {
61
61
  const method = options.method || 'GET';
62
62
  const headers = {
63
63
  'x-api-key': API_KEY,
64
- 'user-agent': 'pagebolt-mcp/1.8.2',
64
+ 'user-agent': 'pagebolt-mcp/1.9.0',
65
65
  ...(options.body ? { 'Content-Type': 'application/json' } : {}),
66
66
  };
67
67
  const body = options.body ? JSON.stringify(options.body) : undefined;
@@ -159,7 +159,7 @@ const styleSchema = z.object({
159
159
 
160
160
  // ─── Server Instructions ────────────────────────────────────────
161
161
  const SERVER_INSTRUCTIONS = `
162
- PageBolt gives you 8 tools for web capture and browser automation. All tools use your API key automatically.
162
+ PageBolt gives you tools for web capture and browser automation. All tools use your API key automatically.
163
163
 
164
164
  ## Tools Overview
165
165
 
@@ -168,7 +168,9 @@ PageBolt gives you 8 tools for web capture and browser automation. All tools use
168
168
  | take_screenshot | Capture a URL, HTML, or Markdown as PNG/JPEG/WebP | 1 request |
169
169
  | generate_pdf | Convert a URL or HTML to PDF, saves to disk | 1 request |
170
170
  | create_og_image | Generate social card images from templates or custom HTML | 1 request |
171
- | run_sequence | Multi-step browser automation with multiple screenshot/PDF outputs | 1 request per output |
171
+ | observe_page | Agent-optimized page observation: id-indexed elements, page-type classification, suggested actions (+ optional content/ARIA/screenshot) | 1 request |
172
+ | visual_diff | Pixel-level visual comparison of two pages | 1 request |
173
+ | run_sequence | Multi-step browser automation with screenshot/PDF/diff outputs | 1 request per output |
172
174
  | record_video | Record browser automation as MP4/WebM/GIF with cursor effects | 3 requests |
173
175
  | inspect_page | Get structured map of page elements with CSS selectors | 1 request |
174
176
  | list_devices | List 25+ device presets (iPhone, iPad, MacBook, etc.) | 0 (free) |
@@ -176,6 +178,10 @@ PageBolt gives you 8 tools for web capture and browser automation. All tools use
176
178
  | create_session | Create a persistent browser session (Starter+ only) | 0 (free to create) |
177
179
  | destroy_session | Destroy a persistent browser session | 0 (free) |
178
180
 
181
+ ## Agent Perception: observe_page vs inspect_page
182
+
183
+ For AI agents that need to understand and act on an arbitrary page, prefer **observe_page** — it returns a compact, token-budgeted observation (id-indexed elements + page-type + grouped suggested actions) in one call, and can optionally bundle readable content, the ARIA tree, and a screenshot. Use **inspect_page** when you specifically want the full raw element/heading/link/image inventory. Both return reliable CSS selectors you can pass to run_sequence.
184
+
179
185
  ## Key Workflow: Inspect Before You Interact
180
186
 
181
187
  When building sequences or videos, ALWAYS use inspect_page first to discover reliable CSS selectors:
@@ -185,6 +191,14 @@ When building sequences or videos, ALWAYS use inspect_page first to discover rel
185
191
 
186
192
  This avoids guessing selectors like "#submit" when the actual element is "#submitBtn".
187
193
 
194
+ ## Visual Diff
195
+
196
+ Use visual_diff to compare two pages pixel-by-pixel. Returns a diff image with changed pixels highlighted in red.
197
+ - Supports fullPage: true to diff entire scrollable pages (not just the viewport)
198
+ - Supports all screenshot options: device emulation, dark mode, selectors, blocking, etc.
199
+ - Use in run_sequence as a "diff" step to automate browser interactions before comparing — navigate, click, fill forms, then diff against another URL.
200
+ - threshold: 0.1 (default) — lower values catch more subtle differences
201
+
188
202
  ## Styling Screenshots
189
203
 
190
204
  Use the "style" parameter on take_screenshot for beautiful styled captures:
@@ -226,8 +240,9 @@ Use blockBanners on almost every request to get clean captures. Combine blockAds
226
240
  - response_type: "json" returns base64 data instead of binary (useful for programmatic use)
227
241
  - record_video pace presets: "fast" (0.5x), "normal" (1x), "slow" (2x), "dramatic" (3x), "cinematic" (4.5x)
228
242
  - record_video cursor styles: "highlight", "circle", "spotlight", "dot", "classic"
229
- - run_sequence requires at least 1 screenshot or pdf step as output
230
- - record_video does NOT allow screenshot/pdf steps the whole sequence IS the video
243
+ - run_sequence requires at least 1 output step (screenshot, pdf, or diff)
244
+ - run_sequence supports "diff" steps: automate interactions, then diff current page against another URL/HTML
245
+ - record_video does NOT allow screenshot/pdf/diff steps — the whole sequence IS the video
231
246
  - Max 2 evaluate (JavaScript) steps per sequence/video
232
247
  - fullPage: true on screenshots captures the entire scrollable page
233
248
  - fullPageScroll: true triggers lazy-loaded images before capture
@@ -236,8 +251,8 @@ Use blockBanners on almost every request to get clean captures. Combine blockAds
236
251
 
237
252
  | Action | Cost |
238
253
  |--------|------|
239
- | Screenshot, PDF, OG image, Inspect | 1 request each |
240
- | Sequence | 1 request per output (screenshot/pdf) |
254
+ | Screenshot, PDF, OG image, Inspect, Visual Diff | 1 request each |
255
+ | Sequence | 1 request per output (screenshot/pdf/diff) |
241
256
  | Video recording | 3 requests flat |
242
257
  | list_devices, check_usage | Free |
243
258
  `.trim();
@@ -246,7 +261,7 @@ Use blockBanners on almost every request to get clean captures. Combine blockAds
246
261
  function createConfiguredServer() {
247
262
  const srv = new McpServer({
248
263
  name: 'pagebolt',
249
- version: '1.8.2',
264
+ version: '1.9.0',
250
265
  }, {
251
266
  instructions: SERVER_INSTRUCTIONS,
252
267
  });
@@ -513,14 +528,14 @@ server.tool(
513
528
  // ═══════════════════════════════════════════════════════════════════
514
529
  server.tool(
515
530
  'run_sequence',
516
- 'Execute a multi-step browser automation sequence. Navigate pages, interact with elements (click, fill, select), and capture multiple screenshots/PDFs in a single browser session. Each output counts as 1 API request.',
531
+ 'Execute a multi-step browser automation sequence. Navigate pages, interact with elements (click, fill, select), and capture multiple screenshots/PDFs/diffs in a single browser session. Use the "diff" step to compare the current page state against another URL after automation. Each output counts as 1 API request.',
517
532
  {
518
533
  steps: z.array(
519
534
  z.object({
520
535
  action: z.enum([
521
536
  'navigate', 'click', 'dblclick', 'fill', 'select', 'hover',
522
537
  'scroll', 'wait', 'wait_for', 'evaluate',
523
- 'screenshot', 'pdf',
538
+ 'screenshot', 'pdf', 'diff',
524
539
  ]).describe('The action to perform'),
525
540
  url: z.string().url().optional().describe('URL to navigate to (for navigate action)'),
526
541
  selector: z.string().optional().describe('CSS selector for the target element (also used for element screenshots)'),
@@ -530,20 +545,25 @@ server.tool(
530
545
  x: z.number().optional().describe('Horizontal scroll position in pixels (scroll action). Use when scrolling horizontally without a selector.'),
531
546
  y: z.number().optional().describe('Vertical scroll position in pixels (scroll action). REQUIRED when no selector is provided — e.g. {"action":"scroll","y":800} scrolls 800px down.'),
532
547
  script: z.string().max(5000).optional().describe('JavaScript to execute in page context (for evaluate action)'),
533
- name: z.string().optional().describe('Name for the output (for screenshot/pdf actions)'),
548
+ name: z.string().optional().describe('Name for the output (for screenshot/pdf/diff actions)'),
534
549
  format: z.string().optional().describe('Image format: png, jpeg, webp (screenshot) or A4, Letter (pdf)'),
535
- fullPage: z.boolean().optional().describe('Capture full scrollable page (for screenshot action)'),
536
- fullPageScroll: z.boolean().optional().describe('Auto-scroll for lazy images (for screenshot action)'),
550
+ fullPage: z.boolean().optional().describe('Capture full scrollable page (for screenshot/diff actions)'),
551
+ fullPageScroll: z.boolean().optional().describe('Auto-scroll for lazy images (for screenshot/diff actions)'),
537
552
  quality: z.number().int().min(1).max(100).optional().describe('JPEG/WebP quality (for screenshot action)'),
538
553
  omitBackground: z.boolean().optional().describe('Transparent background (for screenshot action)'),
539
- delay: z.number().int().min(0).max(10000).optional().describe('Pre-capture delay in ms (for screenshot action)'),
554
+ delay: z.number().int().min(0).max(10000).optional().describe('Pre-capture delay in ms (for screenshot/diff actions)'),
540
555
  landscape: z.boolean().optional().describe('Landscape orientation (for pdf action)'),
541
556
  printBackground: z.boolean().optional().describe('Include CSS backgrounds (for pdf action)'),
542
557
  margin: z.string().optional().describe('CSS margin for all sides (for pdf action)'),
543
558
  scale: z.number().min(0.1).max(2).optional().describe('Rendering scale (for pdf action)'),
544
559
  style: styleSchema,
560
+ // ── Diff-specific step properties ──
561
+ url_b: z.string().url().optional().describe('URL of the comparison page (for diff action). The current page state is "A"; this URL is rendered as "B".'),
562
+ html_b: z.string().optional().describe('HTML of the comparison page (for diff action). The current page state is "A"; this HTML is rendered as "B".'),
563
+ selector_a: z.string().optional().describe('CSS selector to capture on the current page as side "A" (for diff action). If omitted, captures the full viewport/page.'),
564
+ threshold: z.number().min(0).max(1).optional().describe('Pixelmatch sensitivity 0–1 (for diff action, default: 0.1). Lower = more sensitive.'),
545
565
  })
546
- ).min(1).max(20).describe('Array of steps to execute in order. Must include at least one screenshot or pdf step. Max 20 steps, max 5 outputs.'),
566
+ ).min(1).max(20).describe('Array of steps to execute in order. Must include at least one output step (screenshot, pdf, or diff). Max 20 steps, max 5 outputs.'),
547
567
  viewport: z.object({
548
568
  width: z.number().int().min(320).max(3840).optional().describe('Viewport width (default: 1280)'),
549
569
  height: z.number().int().min(200).max(2160).optional().describe('Viewport height (default: 720)'),
@@ -596,6 +616,20 @@ server.tool(
596
616
  type: 'text',
597
617
  text: `[${output.name}] PDF generated — ${output.size_bytes} bytes, step ${output.step_index}`,
598
618
  });
619
+ } else if (output.type === 'diff') {
620
+ content.push({
621
+ type: 'image',
622
+ data: output.data,
623
+ mimeType: 'image/png',
624
+ });
625
+ content.push({
626
+ type: 'text',
627
+ text: `[${output.name}] Diff — ${output.changed_pct}% changed (${output.changed_pixels?.toLocaleString()} of ${output.total_pixels?.toLocaleString()} pixels), step ${output.step_index}` +
628
+ (output.changed_pct === 0 ? ' — Pages are visually identical.' :
629
+ output.changed_pct < 1 ? ' — Minor differences.' :
630
+ output.changed_pct < 10 ? ' — Moderate differences.' :
631
+ ' — Significant differences.'),
632
+ });
599
633
  }
600
634
  }
601
635
 
@@ -928,6 +962,235 @@ server.tool(
928
962
  }
929
963
  );
930
964
 
965
+ // ═══════════════════════════════════════════════════════════════════
966
+ // Tool: observe_page — agent-optimized page observation (perception layer)
967
+ // ═══════════════════════════════════════════════════════════════════
968
+ server.tool(
969
+ 'observe_page',
970
+ 'Get a compact, token-budgeted "observation" of any web page, purpose-built for AI agents. In ONE request it returns: id-indexed interactive elements (role, name, CSS selector, state), a heuristic page-type classification (login, signup, search, article, form, generic), and grouped "suggested actions" (login flow, search, primary buttons, navigation). Optionally include readable content (Markdown), the ARIA tree, and a screenshot. This is the fastest way for an agent to understand and act on an un-instrumented page — far more token-efficient than a raw screenshot or full DOM. Use the returned selectors with run_sequence to act. Costs 1 API request.',
971
+ {
972
+ // ── Source ──
973
+ url: z.string().url().optional().describe('URL to observe (required if no html)'),
974
+ html: z.string().optional().describe('Raw HTML to observe (required if no url)'),
975
+ // ── Observation shape ──
976
+ maxElements: z.number().int().min(1).max(150).optional().describe('Cap on interactive elements returned (default 40, max 150). Lower = fewer tokens.'),
977
+ includeRects: z.boolean().optional().describe('Include bounding boxes {x,y,w,h} per element (default false — omit to save tokens)'),
978
+ includeContent: z.boolean().optional().describe('Also extract the main readable content as Markdown (default false)'),
979
+ includeAriaTree: z.boolean().optional().describe('Also include the interesting-only ARIA accessibility tree (default false)'),
980
+ includeScreenshot: z.boolean().optional().describe('Also capture a screenshot in the same page load (default false)'),
981
+ screenshotFormat: z.enum(['jpeg', 'png', 'webp']).optional().describe('Screenshot format when includeScreenshot is true (default jpeg)'),
982
+ screenshotFullPage: z.boolean().optional().describe('Capture the full scrollable page for the screenshot (default false)'),
983
+ // ── Viewport ──
984
+ width: z.number().int().min(1).max(3840).optional().describe('Viewport width in pixels (default: 1280)'),
985
+ height: z.number().int().min(1).max(2160).optional().describe('Viewport height in pixels (default: 720)'),
986
+ viewportDevice: z.string().optional().describe('Device preset for viewport emulation (e.g. "iphone_14_pro"). Use list_devices to see all presets.'),
987
+ deviceScaleFactor: z.number().min(1).max(3).optional().describe('Device pixel ratio (default: 1)'),
988
+ // ── Timing ──
989
+ waitUntil: z.enum(['load', 'domcontentloaded', 'networkidle0', 'networkidle2']).optional().describe('When to consider navigation finished (default: networkidle2)'),
990
+ waitForSelector: z.string().optional().describe('Wait for this CSS selector to appear before observing'),
991
+ navigationTimeout: z.number().int().min(0).max(30000).optional().describe('Navigation timeout in ms (default: 25000)'),
992
+ // ── Emulation ──
993
+ darkMode: z.boolean().optional().describe('Emulate dark color scheme (default: false)'),
994
+ timeZone: z.string().optional().describe('Override browser timezone'),
995
+ userAgent: z.string().optional().describe('Override the browser User-Agent string'),
996
+ // ── Auth & headers ──
997
+ cookies: z.array(cookieSchema).optional().describe('Cookies to set — array of "name=value" strings or { name, value, domain? } objects'),
998
+ headers: z.record(z.string(), z.string()).optional().describe('Extra HTTP headers to send with the request'),
999
+ authorization: z.string().optional().describe('Authorization header value (e.g. "Bearer <token>")'),
1000
+ bypassCSP: z.boolean().optional().describe('Bypass Content-Security-Policy on the page'),
1001
+ // ── Blocking ──
1002
+ blockBanners: z.boolean().optional().describe('Hide cookie consent banners (default: false)'),
1003
+ blockAds: z.boolean().optional().describe('Block advertisements on the page'),
1004
+ blockChats: z.boolean().optional().describe('Block live chat widgets'),
1005
+ blockTrackers: z.boolean().optional().describe('Block tracking scripts'),
1006
+ },
1007
+ async (params) => {
1008
+ if (!params.url && !params.html) {
1009
+ return { content: [{ type: 'text', text: 'Error: Either "url" or "html" is required.' }], isError: true };
1010
+ }
1011
+
1012
+ try {
1013
+ const res = await callApi('/api/v1/observe', { method: 'POST', body: params });
1014
+ const data = await res.json();
1015
+
1016
+ const lines = [];
1017
+ lines.push(`Page: ${data.title || '(untitled)'} (${data.url})`);
1018
+ lines.push(`Type: ${data.pageType}`);
1019
+ if (data.metadata && data.metadata.httpStatusCode) lines.push(`HTTP Status: ${data.metadata.httpStatusCode}`);
1020
+ lines.push('');
1021
+
1022
+ if (data.actions && data.actions.length > 0) {
1023
+ lines.push('Suggested actions:');
1024
+ for (const a of data.actions) {
1025
+ lines.push(` ${a.intent}: ${a.elementIds.join(', ')}`);
1026
+ }
1027
+ lines.push('');
1028
+ }
1029
+
1030
+ if (data.elements && data.elements.length > 0) {
1031
+ lines.push(`Interactive elements (${data.elements.length}):`);
1032
+ for (const el of data.elements) {
1033
+ let line = ` ${el.id} [${el.role}${el.type ? ` ${el.type}` : ''}]`;
1034
+ if (el.name) line += ` "${el.name}"`;
1035
+ if (el.state && el.state.length) line += ` {${el.state.join(',')}}`;
1036
+ line += ` — selector: ${el.selector}`;
1037
+ if (el.href) line += ` → ${el.href}`;
1038
+ lines.push(line);
1039
+ }
1040
+ lines.push('');
1041
+ }
1042
+
1043
+ if (data.forms && data.forms.length > 0) {
1044
+ lines.push(`Forms (${data.forms.length}):`);
1045
+ for (const f of data.forms) {
1046
+ lines.push(` ${f.selector} (${f.method} ${f.action || '(none)'}): fields ${f.fieldIds.join(', ')}`);
1047
+ }
1048
+ lines.push('');
1049
+ }
1050
+
1051
+ if (data.headings && data.headings.length > 0) {
1052
+ lines.push('Outline:');
1053
+ for (const h of data.headings) lines.push(` ${' '.repeat(h.level - 1)}H${h.level}: ${h.text}`);
1054
+ lines.push('');
1055
+ }
1056
+
1057
+ if (data.content && data.content.markdown) {
1058
+ lines.push(`Readable content (${data.content.wordCount} words):`);
1059
+ lines.push(data.content.markdown.slice(0, 4000) + (data.content.markdown.length > 4000 ? '\n…(truncated)' : ''));
1060
+ lines.push('');
1061
+ }
1062
+
1063
+ if (data.ariaTree) {
1064
+ lines.push('ARIA tree:');
1065
+ lines.push(JSON.stringify(data.ariaTree, null, 2));
1066
+ lines.push('');
1067
+ }
1068
+
1069
+ lines.push(`Stats: ${data.stats.elementCount} elements, ~${data.stats.estimatedTokens} tokens. Duration: ${data.duration_ms}ms`);
1070
+
1071
+ const content = [{ type: 'text', text: lines.join('\n') }];
1072
+ if (data.screenshot && data.screenshot.base64) {
1073
+ content.unshift({ type: 'image', data: data.screenshot.base64, mimeType: imageMimeType(data.screenshot.format) });
1074
+ }
1075
+ return { content };
1076
+ } catch (err) {
1077
+ return { content: [{ type: 'text', text: `Observe error: ${err.message}` }], isError: true };
1078
+ }
1079
+ }
1080
+ );
1081
+
1082
+ // ═══════════════════════════════════════════════════════════════════
1083
+ // Tool: visual_diff — pixel-level visual comparison
1084
+ // ═══════════════════════════════════════════════════════════════════
1085
+ server.tool(
1086
+ 'visual_diff',
1087
+ 'Compare two web pages (or HTML strings) pixel-by-pixel and return a diff image highlighting all visual differences. Supports full-page capture, device emulation, element selectors, and all screenshot-like options. Returns the diff image, changed pixel count, and percentage changed. Costs 1 API request.',
1088
+ {
1089
+ // ── Sources ──
1090
+ url_a: z.string().url().optional().describe('URL of the first page (required if no html_a)'),
1091
+ url_b: z.string().url().optional().describe('URL of the second page (required if no html_b)'),
1092
+ html_a: z.string().optional().describe('Raw HTML for the first page (required if no url_a)'),
1093
+ html_b: z.string().optional().describe('Raw HTML for the second page (required if no url_b)'),
1094
+ // ── Diff sensitivity ──
1095
+ threshold: z.number().min(0).max(1).optional().describe('Pixelmatch sensitivity 0–1 (default: 0.1). Lower = more sensitive to subtle differences.'),
1096
+ // ── Viewport ──
1097
+ width: z.number().int().min(1).max(3840).optional().describe('Viewport width in pixels (default: 1280)'),
1098
+ height: z.number().int().min(1).max(2160).optional().describe('Viewport height in pixels (default: 720)'),
1099
+ viewportDevice: z.string().optional().describe('Device preset for viewport emulation (e.g. "iphone_14_pro"). Use list_devices to see all presets.'),
1100
+ viewportMobile: z.boolean().optional().describe('Enable mobile meta viewport emulation'),
1101
+ viewportHasTouch: z.boolean().optional().describe('Enable touch event emulation'),
1102
+ viewportLandscape: z.boolean().optional().describe('Landscape orientation'),
1103
+ deviceScaleFactor: z.number().min(1).max(3).optional().describe('Device pixel ratio (default: 1)'),
1104
+ // ── Capture region ──
1105
+ fullPage: z.boolean().optional().describe('Capture the full scrollable page for both sides (default: false)'),
1106
+ fullPageScroll: z.boolean().optional().describe('Auto-scroll pages before capture to trigger lazy-loaded images'),
1107
+ fullPageScrollDelay: z.number().int().min(0).max(2000).optional().describe('Delay between scroll steps in ms (default: 400)'),
1108
+ fullPageScrollBy: z.number().int().optional().describe('Pixels to scroll per step (default: viewport height)'),
1109
+ fullPageMaxHeight: z.number().int().optional().describe('Maximum pixel height cap for full-page captures'),
1110
+ selector: z.string().optional().describe('CSS selector — capture only this element on both pages'),
1111
+ clip: z.object({
1112
+ x: z.number(),
1113
+ y: z.number(),
1114
+ width: z.number(),
1115
+ height: z.number(),
1116
+ }).optional().describe('Crop region { x, y, width, height } in pixels'),
1117
+ // ── Timing ──
1118
+ delay: z.number().int().min(0).max(30000).optional().describe('Milliseconds to wait before capture on both pages (default: 0)'),
1119
+ click: z.string().optional().describe('CSS selector to click before capturing on both pages'),
1120
+ waitUntil: z.enum(['load', 'domcontentloaded', 'networkidle0', 'networkidle2']).optional().describe('When to consider navigation finished (default: networkidle2)'),
1121
+ waitForSelector: z.string().optional().describe('Wait for this CSS selector to appear before capturing'),
1122
+ navigationTimeout: z.number().int().min(0).max(30000).optional().describe('Navigation timeout in ms (default: 25000)'),
1123
+ // ── Emulation ──
1124
+ darkMode: z.boolean().optional().describe('Emulate dark color scheme (default: false)'),
1125
+ reducedMotion: z.boolean().optional().describe('Emulate prefers-reduced-motion to disable animations'),
1126
+ mediaType: z.enum(['screen', 'print']).optional().describe('Emulate CSS media type'),
1127
+ timeZone: z.string().optional().describe('Override browser timezone (e.g. "America/New_York")'),
1128
+ geolocation: z.object({
1129
+ latitude: z.number(),
1130
+ longitude: z.number(),
1131
+ accuracy: z.number().optional(),
1132
+ }).optional().describe('Emulate geolocation { latitude, longitude, accuracy? }'),
1133
+ userAgent: z.string().optional().describe('Override the browser User-Agent string'),
1134
+ // ── Auth & headers ──
1135
+ cookies: z.array(cookieSchema).optional().describe('Cookies to set — array of "name=value" strings or { name, value, domain? } objects'),
1136
+ headers: z.record(z.string(), z.string()).optional().describe('Extra HTTP headers to send with the request'),
1137
+ authorization: z.string().optional().describe('Authorization header value (e.g. "Bearer <token>")'),
1138
+ bypassCSP: z.boolean().optional().describe('Bypass Content-Security-Policy on the page'),
1139
+ // ── Content manipulation ──
1140
+ hideSelectors: z.array(z.string()).optional().describe('Array of CSS selectors to hide before capture'),
1141
+ injectCss: z.string().optional().describe('Custom CSS to inject before capturing (max 50KB)'),
1142
+ injectJs: z.string().optional().describe('Custom JavaScript to execute before capturing (max 50KB)'),
1143
+ // ── Blocking ──
1144
+ blockBanners: z.boolean().optional().describe('Hide cookie consent banners (default: false)'),
1145
+ blockAds: z.boolean().optional().describe('Block advertisements on the page'),
1146
+ blockChats: z.boolean().optional().describe('Block live chat widgets on the page'),
1147
+ blockTrackers: z.boolean().optional().describe('Block tracking scripts on the page'),
1148
+ blockRequests: z.array(z.string()).optional().describe('URL patterns to block (array of strings)'),
1149
+ blockResources: z.array(z.string()).optional().describe('Resource types to block (e.g. ["image", "font"])'),
1150
+ },
1151
+ async (params) => {
1152
+ if (!params.url_a && !params.html_a) {
1153
+ return { content: [{ type: 'text', text: 'Error: One of "url_a" or "html_a" is required.' }], isError: true };
1154
+ }
1155
+ if (!params.url_b && !params.html_b) {
1156
+ return { content: [{ type: 'text', text: 'Error: One of "url_b" or "html_b" is required.' }], isError: true };
1157
+ }
1158
+
1159
+ try {
1160
+ const res = await callApi('/api/v1/diff', {
1161
+ method: 'POST',
1162
+ body: params,
1163
+ });
1164
+
1165
+ const data = await res.json();
1166
+
1167
+ const content = [
1168
+ {
1169
+ type: 'image',
1170
+ data: data.diff_image.replace(/^data:image\/png;base64,/, ''),
1171
+ mimeType: 'image/png',
1172
+ },
1173
+ {
1174
+ type: 'text',
1175
+ text: `Visual diff complete.\n` +
1176
+ ` Changed: ${data.changed_pct}% (${data.changed_pixels.toLocaleString()} of ${data.total_pixels.toLocaleString()} pixels)\n` +
1177
+ ` URL A: ${data.url_a || '(html)'}\n` +
1178
+ ` URL B: ${data.url_b || '(html)'}\n` +
1179
+ ` Duration: ${data.duration_ms}ms\n` +
1180
+ (data.changed_pct === 0 ? ' Result: Pages are visually identical.' :
1181
+ data.changed_pct < 1 ? ' Result: Minor visual differences detected.' :
1182
+ data.changed_pct < 10 ? ' Result: Moderate visual differences detected.' :
1183
+ ' Result: Significant visual differences detected.'),
1184
+ },
1185
+ ];
1186
+
1187
+ return { content };
1188
+ } catch (err) {
1189
+ return { content: [{ type: 'text', text: `Visual diff error: ${err.message}` }], isError: true };
1190
+ }
1191
+ }
1192
+ );
1193
+
931
1194
  // ═══════════════════════════════════════════════════════════════════
932
1195
  // Tool: list_devices
933
1196
  // ═══════════════════════════════════════════════════════════════════