pagebolt-mcp 1.10.1 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pagebolt-mcp",
3
- "version": "1.10.1",
3
+ "version": "1.12.0",
4
4
  "description": "MCP server for PageBolt — take screenshots, generate PDFs, create OG images, inspect pages, record demo videos with Audio Guide narration, from AI coding assistants like Claude, Cursor, and Windsurf.",
5
5
  "main": "src/index.mjs",
6
6
  "module": "src/index.mjs",
package/server.json CHANGED
@@ -6,12 +6,12 @@
6
6
  "url": "https://github.com/Custodia-Admin/pagebolt-mcp",
7
7
  "source": "github"
8
8
  },
9
- "version": "1.10.1",
9
+ "version": "1.12.0",
10
10
  "packages": [
11
11
  {
12
12
  "registryType": "npm",
13
13
  "identifier": "pagebolt-mcp",
14
- "version": "1.10.1",
14
+ "version": "1.12.0",
15
15
  "transport": {
16
16
  "type": "stdio"
17
17
  },
package/src/index.mjs CHANGED
@@ -61,7 +61,7 @@ async function callApi(endpoint, options = {}) {
61
61
  const method = options.method || 'GET';
62
62
  const headers = {
63
63
  'x-api-key': API_KEY,
64
- 'user-agent': 'pagebolt-mcp/1.10.1',
64
+ 'user-agent': 'pagebolt-mcp/1.11.0',
65
65
  ...(options.body ? { 'Content-Type': 'application/json' } : {}),
66
66
  };
67
67
  const body = options.body ? JSON.stringify(options.body) : undefined;
@@ -215,9 +215,9 @@ Clicking menus, avatars, profile icons, "⋯" buttons, hamburger toggles, or any
215
215
 
216
216
  Rules:
217
217
  1. **Don't open menus you don't need.** For a high-level tour, navigate directly to the destination URL (from inspect_page / observe_page) instead of clicking through a dropdown.
218
- 2. **If you open an overlay, the very next step must commit to it** — either interact with an element INSIDE the overlay, or explicitly close it before continuing. There is no "press_key" action, so close an overlay with an evaluate step (note: max 2 evaluate steps per sequence):
219
- { "action": "evaluate", "script": "document.activeElement&&document.activeElement.blur&&document.activeElement.blur();document.dispatchEvent(new KeyboardEvent('keydown',{key:'Escape',bubbles:true}));" }
220
- (Clicking a blank area can also work, but may hit the overlay backdrop and navigate — prefer the evaluate approach or click a known-safe element.)
218
+ 2. **If you open an overlay, the very next step must commit to it** — either interact with an element INSIDE the overlay, or explicitly close it before continuing. The cleanest way to dismiss a dropdown/popover/modal is a press_key step:
219
+ { "action": "press_key", "key": "Escape" }
220
+ (Clicking a blank area can also work, but may hit the overlay backdrop and navigate — prefer press_key Escape, or click a known-safe element.)
221
221
  3. **Never chain clicks across a state change you haven't re-perceived.** Selectors gathered before a menu opened or a route changed may now point at the wrong (or covered) element.
222
222
 
223
223
  ## Re-perceive Between Actions (avoid getting lost)
@@ -300,7 +300,7 @@ Use blockBanners on almost every request to get clean captures. Combine blockAds
300
300
  function createConfiguredServer() {
301
301
  const srv = new McpServer({
302
302
  name: 'pagebolt',
303
- version: '1.10.1',
303
+ version: '1.11.0',
304
304
  }, {
305
305
  instructions: SERVER_INSTRUCTIONS,
306
306
  });
@@ -573,12 +573,13 @@ server.tool(
573
573
  z.object({
574
574
  action: z.enum([
575
575
  'navigate', 'click', 'dblclick', 'fill', 'select', 'hover',
576
- 'scroll', 'wait', 'wait_for', 'evaluate',
576
+ 'scroll', 'wait', 'wait_for', 'evaluate', 'press_key',
577
577
  'screenshot', 'pdf', 'diff',
578
578
  ]).describe('The action to perform'),
579
579
  url: z.string().url().optional().describe('URL to navigate to (for navigate action)'),
580
- selector: z.string().optional().describe('CSS selector for the target element (also used for element screenshots)'),
580
+ selector: z.string().optional().describe('CSS selector for the target element (also used for element screenshots; optional for press_key to focus a field first)'),
581
581
  value: z.string().optional().describe('Value to type or select'),
582
+ key: z.enum(['Escape', 'Enter', 'Tab', 'Backspace', 'Delete', 'Space', 'ArrowUp', 'ArrowDown', 'ArrowLeft', 'ArrowRight', 'Home', 'End', 'PageUp', 'PageDown']).optional().describe('Key to press (for press_key action). Use Escape to dismiss a dropdown/popover/modal, Enter to submit, Tab to move focus.'),
582
583
  ms: z.number().int().min(0).max(10000).optional().describe('Milliseconds to wait (for wait action)'),
583
584
  timeout: z.number().int().min(0).max(15000).optional().describe('Timeout in ms for wait_for (default: 10000)'),
584
585
  x: z.number().optional().describe('Horizontal scroll position in pixels (scroll action). Use when scrolling horizontally without a selector.'),
@@ -614,6 +615,7 @@ server.tool(
614
615
  blockTrackers: z.boolean().optional().describe('Block tracking scripts'),
615
616
  deviceScaleFactor: z.number().min(1).max(3).optional().describe('Device pixel ratio (default: 1)'),
616
617
  session_id: z.string().optional().describe('Persistent session ID (Starter+ only). Reuse a live browser page created with create_session — browser state (cookies, localStorage, auth) carries over from previous requests in this session.'),
618
+ observeAfterEachStep: z.boolean().optional().describe('FREE (no extra request charged). After every step, attach a compact, token-budgeted state snapshot — page type + the top interactive elements (id/role/name/selector) + suggested actions, NO screenshot. Use this when a step might open a dropdown/popover/modal or navigate: read the trace to confirm what is now on screen and pick the right selector for the NEXT call, instead of blind-batching. Hidden/off-screen elements are filtered out.'),
617
619
  },
618
620
  async (params) => {
619
621
  if (!params.steps || params.steps.length === 0) {
@@ -679,6 +681,22 @@ server.tool(
679
681
  }
680
682
  summary += `\nUsage: ${data.usage.outputs_charged} request(s) charged, ${data.usage.remaining} remaining.`;
681
683
 
684
+ // Phase 3: render the compact per-step state trace (free) so the agent can
685
+ // course-correct on its NEXT call — e.g. notice a popover opened.
686
+ const traced = (data.step_results || []).filter(s => s && s.state);
687
+ if (traced.length > 0) {
688
+ const lines = traced.map(s => {
689
+ const st = s.state;
690
+ if (st.error) return ` • step ${s.step_index} (${s.action}): [state unavailable]`;
691
+ const els = (st.elements || []).slice(0, 6)
692
+ .map(e => `${e.id}:${e.role}${e.name ? ` "${e.name}"` : ''}`).join(', ');
693
+ const acts = (st.actions || []).map(a => a.intent).join(', ');
694
+ return ` • step ${s.step_index} (${s.action}) → ${st.pageType} @ ${st.url}\n` +
695
+ ` elements: ${els || '(none)'}` + (acts ? `\n actions: ${acts}` : '');
696
+ });
697
+ summary += `\n\nState trace (observeAfterEachStep — free):\n${lines.join('\n')}`;
698
+ }
699
+
682
700
  content.push({ type: 'text', text: summary });
683
701
  return { content };
684
702
  } catch (err) {
@@ -698,11 +716,12 @@ server.tool(
698
716
  z.object({
699
717
  action: z.enum([
700
718
  'navigate', 'click', 'dblclick', 'fill', 'select', 'hover',
701
- 'scroll', 'wait', 'wait_for', 'evaluate',
719
+ 'scroll', 'wait', 'wait_for', 'evaluate', 'press_key',
702
720
  ]).describe('The action to perform (no screenshot/pdf — the whole sequence is recorded as video)'),
703
721
  url: z.string().url().optional().describe('URL to navigate to (for navigate action)'),
704
- selector: z.string().optional().describe('CSS selector for the target element'),
722
+ selector: z.string().optional().describe('CSS selector for the target element (optional for press_key to focus a field first)'),
705
723
  value: z.string().optional().describe('Value to type or select'),
724
+ key: z.enum(['Escape', 'Enter', 'Tab', 'Backspace', 'Delete', 'Space', 'ArrowUp', 'ArrowDown', 'ArrowLeft', 'ArrowRight', 'Home', 'End', 'PageUp', 'PageDown']).optional().describe('Key to press (for press_key action). Use Escape to dismiss a dropdown/popover/modal that a previous step opened — the cleanest way to avoid a stuck-open overlay obscuring later steps.'),
706
725
  ms: z.number().int().min(0).max(10000).optional().describe('Milliseconds to wait (for wait action). Only use wait steps when the page needs loading time or to hold for narration — the pace parameter handles inter-step timing automatically.'),
707
726
  timeout: z.number().int().min(0).max(15000).optional().describe('Timeout in ms for wait_for (default: 10000)'),
708
727
  x: z.number().optional().describe('Horizontal scroll position in pixels (scroll action). Use when scrolling horizontally without a selector.'),
@@ -1498,8 +1517,8 @@ Based on the inspection and the description, plan 5–12 action steps. Rules:
1498
1517
  - Do NOT pad with wait steps between steps that don't need load time — pace handles inter-step timing automatically.
1499
1518
  - Do NOT use zoom unless the user explicitly asked for it.
1500
1519
  - **Avoid opening dropdowns/menus/popovers** unless the demo is specifically about their contents — they stay open and obscure or misdirect later steps. Prefer navigating directly to the target URL (from the inspection) over clicking through a menu. The recording cannot re-check the page between steps, so a stuck-open overlay will break everything after it.
1501
- - If a step DOES open an overlay, the next step must either act on an element inside it or close it. There is no key-press action; close with an evaluate step (max 2 per video):
1502
- { "action": "evaluate", "script": "document.activeElement&&document.activeElement.blur&&document.activeElement.blur();document.dispatchEvent(new KeyboardEvent('keydown',{key:'Escape',bubbles:true}));" }
1520
+ - If a step DOES open an overlay, the next step must either act on an element inside it or close it. The cleanest way is a press_key step:
1521
+ { "action": "press_key", "key": "Escape" }
1503
1522
 
1504
1523
  **Step 3 — Write the narration script**
1505
1524
  Write an audioGuide.script that matches the step count. Format: