@browserless.io/mcp 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/LICENSE +557 -0
  2. package/README.md +280 -0
  3. package/bin/cli.js +2 -0
  4. package/build/src/@types/types.d.ts +538 -0
  5. package/build/src/config.d.ts +3 -0
  6. package/build/src/config.js +42 -0
  7. package/build/src/index.d.ts +4 -0
  8. package/build/src/index.js +153 -0
  9. package/build/src/lib/account-resolver.d.ts +17 -0
  10. package/build/src/lib/account-resolver.js +78 -0
  11. package/build/src/lib/agent-client.d.ts +58 -0
  12. package/build/src/lib/agent-client.js +530 -0
  13. package/build/src/lib/agent-format.d.ts +35 -0
  14. package/build/src/lib/agent-format.js +155 -0
  15. package/build/src/lib/amplitude.d.ts +11 -0
  16. package/build/src/lib/amplitude.js +65 -0
  17. package/build/src/lib/analytics.d.ts +18 -0
  18. package/build/src/lib/analytics.js +79 -0
  19. package/build/src/lib/api-client.d.ts +17 -0
  20. package/build/src/lib/api-client.js +357 -0
  21. package/build/src/lib/bounded-event-store.d.ts +22 -0
  22. package/build/src/lib/bounded-event-store.js +69 -0
  23. package/build/src/lib/cache.d.ts +12 -0
  24. package/build/src/lib/cache.js +49 -0
  25. package/build/src/lib/define-tool.d.ts +71 -0
  26. package/build/src/lib/define-tool.js +71 -0
  27. package/build/src/lib/error-classifier.d.ts +4 -0
  28. package/build/src/lib/error-classifier.js +125 -0
  29. package/build/src/lib/redis-oauth-proxy.d.ts +13 -0
  30. package/build/src/lib/redis-oauth-proxy.js +214 -0
  31. package/build/src/lib/retry.d.ts +2 -0
  32. package/build/src/lib/retry.js +19 -0
  33. package/build/src/lib/schema-fields.d.ts +10 -0
  34. package/build/src/lib/schema-fields.js +27 -0
  35. package/build/src/lib/supabase-token-patch.d.ts +6 -0
  36. package/build/src/lib/supabase-token-patch.js +33 -0
  37. package/build/src/lib/utils.d.ts +27 -0
  38. package/build/src/lib/utils.js +67 -0
  39. package/build/src/prompts/extract-content.d.ts +2 -0
  40. package/build/src/prompts/extract-content.js +33 -0
  41. package/build/src/prompts/scrape-url.d.ts +2 -0
  42. package/build/src/prompts/scrape-url.js +36 -0
  43. package/build/src/resources/api-docs.d.ts +3 -0
  44. package/build/src/resources/api-docs.js +54 -0
  45. package/build/src/resources/status.d.ts +3 -0
  46. package/build/src/resources/status.js +30 -0
  47. package/build/src/skills/autonomous-login.md +95 -0
  48. package/build/src/skills/captchas.md +48 -0
  49. package/build/src/skills/cookie-consent.md +50 -0
  50. package/build/src/skills/dynamic-content.md +72 -0
  51. package/build/src/skills/index.d.ts +9 -0
  52. package/build/src/skills/index.js +221 -0
  53. package/build/src/skills/modals.md +56 -0
  54. package/build/src/skills/screenshots.md +53 -0
  55. package/build/src/skills/shadow-dom.md +64 -0
  56. package/build/src/skills/snapshot-misses.md +67 -0
  57. package/build/src/skills/system-prompt.d.ts +2 -0
  58. package/build/src/skills/system-prompt.js +128 -0
  59. package/build/src/skills/tabs.md +77 -0
  60. package/build/src/tools/agent.d.ts +15 -0
  61. package/build/src/tools/agent.js +299 -0
  62. package/build/src/tools/crawl.d.ts +75 -0
  63. package/build/src/tools/crawl.js +426 -0
  64. package/build/src/tools/download.d.ts +11 -0
  65. package/build/src/tools/download.js +92 -0
  66. package/build/src/tools/export.d.ts +28 -0
  67. package/build/src/tools/export.js +129 -0
  68. package/build/src/tools/function.d.ts +24 -0
  69. package/build/src/tools/function.js +144 -0
  70. package/build/src/tools/map.d.ts +23 -0
  71. package/build/src/tools/map.js +129 -0
  72. package/build/src/tools/performance.d.ts +25 -0
  73. package/build/src/tools/performance.js +103 -0
  74. package/build/src/tools/schemas.d.ts +466 -0
  75. package/build/src/tools/schemas.js +487 -0
  76. package/build/src/tools/search.d.ts +67 -0
  77. package/build/src/tools/search.js +184 -0
  78. package/build/src/tools/smartscraper.d.ts +42 -0
  79. package/build/src/tools/smartscraper.js +136 -0
  80. package/package.json +111 -0
  81. package/patches/mcp-proxy+6.4.0.patch +31 -0
@@ -0,0 +1,53 @@
1
+ # Screenshots
2
+
3
+ Screenshot arrives as vision content block — you'll see it directly.
4
+
5
+ ## Snapshot vs. Screenshot
6
+
7
+ | Need | Use |
8
+ | ------------------------------------ | -------------------------------- |
9
+ | Element identity, text, structure | `snapshot` |
10
+ | Visual layout, colors, rendered look | `screenshot` |
11
+ | Extract text | `snapshot` or `text` — never OCR |
12
+ | Chart, map, rendered image | `screenshot` with `selector` |
13
+ | Verify "does this look right?" | `screenshot` |
14
+
15
+ Snapshot is cheap, structured. Screenshot costs vision tokens — use when visual fidelity matters.
16
+
17
+ ## Scope (smallest to largest)
18
+
19
+ 1. **`selector: "#chart"`** — single element (best when target known)
20
+ 2. **`clip: { x, y, width, height }`** — pixel region
21
+ 3. **viewport** (default) — visible area
22
+ 4. **`fullPage: true`** — entire page (use sparingly, huge tokens)
23
+
24
+ Capture smallest region that answers the question.
25
+
26
+ ## Format
27
+
28
+ - **PNG** (default) — lossless, crisp text/UI
29
+ - **JPEG** `quality: 70-85` — smaller for photos/full-page
30
+ - **WebP** — better compression than JPEG
31
+ - **`omitBackground: true`** — for transparent elements
32
+
33
+ ## Pattern: capture-after-action
34
+
35
+ ```json
36
+ {
37
+ "commands": [
38
+ { "method": "click", "params": { "selector": "button#open-modal" } },
39
+ {
40
+ "method": "waitForSelector",
41
+ "params": { "selector": "[role='dialog']", "timeout": 5000 }
42
+ },
43
+ { "method": "screenshot", "params": { "selector": "[role='dialog']" } }
44
+ ]
45
+ }
46
+ ```
47
+
48
+ ## Avoid
49
+
50
+ - OCR via evaluate (you have vision input)
51
+ - Screenshotting for structured data (use snapshot/evaluate)
52
+ - Full-page screenshots by default (pick scope)
53
+ - Multiple screenshots of same state (one is enough)
@@ -0,0 +1,64 @@
1
+ # Shadow DOM & Iframes
2
+
3
+ Snapshot contains `deep-ref=` selectors, or you hit `SELECTOR_NOT_FOUND` on regular selector. Page using shadow DOM or iframes — read before next action.
4
+
5
+ ## Deep selectors: `< ` prefix
6
+
7
+ Browserless deep selectors start with `< ` (less-than, space). Space mandatory. Format:
8
+
9
+ ```
10
+ < *url-pattern* css-selector
11
+ ```
12
+
13
+ `*url-pattern*` optional, matches iframe URL. If omitted, selector pierces shadow roots in main frame.
14
+
15
+ When snapshot lists `deep-ref=< button#deny`, pass to `click` / `type` / `hover` exactly as shown — don't strip `< ` prefix:
16
+
17
+ ```json
18
+ { "method": "click", "params": { "selector": "< button#deny" } }
19
+ ```
20
+
21
+ ## Constructing deep selectors for iframes snapshot didn't surface
22
+
23
+ Snapshots only include accessible content. Iframes (captcha/payment widgets) often have nothing meaningful in accessibility tree. Build selector by hand:
24
+
25
+ - `< *google.com/recaptcha* #recaptcha-anchor` — reCAPTCHA checkbox
26
+ - `< *hcaptcha.com* #checkbox` — hCaptcha checkbox
27
+ - `< *stripe.com/* input[name='cardnumber']` — Stripe payment field
28
+ - `< *challenges.cloudflare.com* input[type='checkbox']` — Cloudflare Turnstile
29
+
30
+ URL pattern is glob — `*` matches any substring.
31
+
32
+ ## What works and what doesn't
33
+
34
+ Coordinate-based actions work through deep selectors: **`click`, `type`, `hover`, `checkbox`**.
35
+
36
+ DOM-read actions **don't** work, fail or return null: **`text`, `html`, `waitForSelector`** with deep selectors.
37
+
38
+ To read content from shadow root or iframe, use `evaluate` with explicit traversal:
39
+
40
+ ```json
41
+ {
42
+ "method": "evaluate",
43
+ "params": {
44
+ "content": "(() => { const f = document.querySelector('iframe#myFrame'); return f?.contentDocument?.body?.textContent; })()"
45
+ }
46
+ }
47
+ ```
48
+
49
+ For shadow DOM:
50
+
51
+ ```json
52
+ {
53
+ "method": "evaluate",
54
+ "params": {
55
+ "content": "(() => document.querySelector('my-component')?.shadowRoot?.querySelector('button')?.textContent)()"
56
+ }
57
+ }
58
+ ```
59
+
60
+ ## Recovery when regular selector fails
61
+
62
+ 1. Retry same selector with `< ` prefix (MCP suggests automatically)
63
+ 2. Still failing → re-snapshot (element moved/re-rendered or page navigated)
64
+ 3. Element in iframe → construct `< *url-pattern* css` selector by hand from iframe URL in DevTools or snapshot
@@ -0,0 +1,67 @@
1
+ # When Snapshot Misses Content
2
+
3
+ Snapshot at element limit (truncated) or empty. What you need may not be in it.
4
+
5
+ ## Why content goes missing
6
+
7
+ - **Truncation**: snapshots cap at 500 elements by default. Dense pages (long lists, search results, infinite scroll) overflow
8
+ - **No accessible name**: images without `alt`, icon-only buttons, decorative links, SVGs without ARIA labels excluded from accessibility tree
9
+ - **Image-rendered content**: math, formulas, charts (WolframAlpha, LaTeX, Wikipedia formulas, Google image search) — result is single `<img>` with meaning in `alt` text, not DOM
10
+ - **Late-loading content**: page still hydrating. Wait (see dynamic-content skill if `wait*` call fails), re-snapshot
11
+
12
+ ## Recipe
13
+
14
+ 1. **If truncated, narrow scope first.** Most tasks don't need every element — re-snapshot with higher `maxElements` only if element genuinely beyond 500:
15
+
16
+ ```json
17
+ { "method": "snapshot", "params": { "maxElements": 1000 } }
18
+ ```
19
+
20
+ 2. **If element has no accessible name**, use `evaluate` to read directly:
21
+
22
+ ```json
23
+ {
24
+ "method": "evaluate",
25
+ "params": {
26
+ "content": "(() => [...document.querySelectorAll('img[alt]')].map(i => i.alt))()"
27
+ }
28
+ }
29
+ ```
30
+
31
+ Or get text from icon-only button:
32
+
33
+ ```json
34
+ {
35
+ "method": "evaluate",
36
+ "params": {
37
+ "content": "(() => document.querySelector('[data-testid=\"close\"]')?.getAttribute('aria-label'))()"
38
+ }
39
+ }
40
+ ```
41
+
42
+ 3. **For image-rendered results** (WolframAlpha, LaTeX renderers), `alt` attribute usually carries answer:
43
+
44
+ ```json
45
+ {
46
+ "method": "evaluate",
47
+ "params": {
48
+ "content": "(() => [...document.querySelectorAll('img[alt]')].map(i => i.alt).filter(Boolean))()"
49
+ }
50
+ }
51
+ ```
52
+
53
+ 4. **For very long lists**, scroll and re-snapshot rather than raising `maxElements` — snapshot pagination more reliable than one giant pull:
54
+
55
+ ```json
56
+ {
57
+ "commands": [
58
+ { "method": "scroll", "params": { "direction": "down" } },
59
+ { "method": "snapshot" }
60
+ ]
61
+ }
62
+ ```
63
+
64
+ ## Don't
65
+
66
+ - Raise `maxElements` past ~2000 — model spends more on snapshot reading than task gains. Scroll and paginate instead
67
+ - `evaluate` to crawl `document.body.innerHTML` for general extraction. Snapshot structured; raw HTML floods context with markup. Use `evaluate` only for _specific_ attributes snapshot can't surface
@@ -0,0 +1,2 @@
1
+ export declare const AGENT_SYSTEM_PROMPT = "Execute browser commands in persistent agent session.\n\n## Proxy (optional)\nProxy config is a **top-level tool argument** (`proxy`, `proxyCountry`, etc. on the tool call itself) \u2014 it is applied when the session is opened. **NEVER call `proxy` as a method inside `commands`** \u2014 a `{ method: \"proxy\", ... }` JSON-RPC mutation does NOT change the upstream proxy on an already-open session and will silently no-op.\n\n**If there is credible evidence the task needs a proxy, you MUST pass proxy options on the very FIRST call** (before any `goto`/`snapshot`), because the config is read once at session creation. Credible signals include: the user asks for a specific country/region/locale; the target site is known to geo-restrict or block datacenter IPs (streaming, ticketing, retail, banking, real-estate, news paywalls); a prior attempt returned 403/451/captcha/\"unusual traffic\"/\"access denied\"; the user explicitly mentions residential / sticky IP / proxy.\n\nIf you already opened a session without a proxy and now realize one is needed, you must `close` and start a new session with the proxy options set \u2014 there is no in-session switch.\n\n- `proxy: \"residential\"` \u2014 enable routing; `proxyCountry: \"us\"` \u2014 geo (ISO-2); `proxyState` / `proxyCity` (paid plans, 401 otherwise); `proxySticky: true` \u2014 stable IP; `proxyLocaleMatch: true` \u2014 match locale; `proxyPreset` \u2014 named config; `externalProxyServer: \"http://u:p@host:port\"` \u2014 bring your own (http(s) only)\n- Geo/preset/sticky require `proxy: \"residential\"` or `externalProxyServer` set\n\n## Auth\nNever log in by default. Never invent or assume credentials exist (no \"test credentials\", no \"your account\"). If the snapshot contains a sign-in link OR you're about to mention \"sign in\" / \"log in\" / \"auth required\" \u2014 even as a suggested option to the user \u2014 call `browserless_skill { id: \"autonomous-login\" }` **first**, then follow its gates. The skill decides whether login is appropriate and whether credentials are in scope; do not skip it just because no password field is on the page yet.\n\n## Terminal-Goal Check\nBefore declaring done, restate the user's terminal deliverable in one line and verify your evidence *directly* supports it \u2014 not a sibling question.\n**Empty-state substitution.** An empty/zero/null result from a resource that normally requires auth, scope, or filter context is evidence the *precondition* wasn't met \u2014 not evidence the question is answered. Empty cart while logged out, zero results while geo-restricted, empty inbox while unauthenticated: precondition failure \u2192 fix the precondition (often: load `autonomous-login`), don't return the empty result as the answer.\n**Multi-step preconditions.** When the task names multiple steps (\"go to X, then Y, report Z\"), evaluate preconditions for the *full chain* before treating any step as optional. A blocker on step N blocks the whole task even if step 1 returned data.\n\n## Skills (auto-injected)\nSKILL blocks auto-inject between `--- SKILL: <id> ---` markers when page/error needs special handling. Read carefully.\nLoad manually via **browserless_skill** if suspected but not injected:\n- `autonomous-login` \u2014 gates, credential rules, MFA/captcha, final JSON shape (see `## Auth` above for when to load)\n- `shadow-dom` \u2014 deep selectors, iframe targeting\n- `cookie-consent` \u2014 vendor-specific dismiss recipes\n- `modals` \u2014 closing dialogs and alertdialogs\n- `captchas` \u2014 the `solve` command (Cloud only)\n- `snapshot-misses` \u2014 truncated/empty snapshots, image-rendered content\n- `dynamic-content` \u2014 choosing the right `wait*` method\n- `screenshots` \u2014 when to screenshot vs. snapshot, scope and format choices\n- `tabs` \u2014 multi-tab workflows, peek-without-switching\n\n## Core Loop (ReAct: Reason \u2192 Act \u2192 Observe)\n1. **goto** \u2014 waits \"domcontentloaded\"\n2. **snapshot** \u2014 returns interactive + informational elements (button, link, textbox, combobox, checkbox, heading, img+alt) with ref= selectors\n3. **Plan** all actions from snapshot\n4. **Batch** execute\n5. **Re-snapshot** only if page changed\n6. Repeat \u2192 **close** when done\n\n## Snapshot Rules\n- Until you snapshot a page, you CANNOT click/type/interact \u2014 snapshot first, no exceptions\n- NEVER guess, assume, or infer selectors \u2014 CSS selectors from your training data are wrong. ONLY use ref= / deep-ref= from latest snapshot\n- Snapshot STALE after: click, goto, select, navigation\n- Snapshot VALID after: type, hover, scroll, evaluate\n- Expect new content? \u2192 re-snapshot\n- Element roles in snapshot (link, button, textbox, combobox, checkbox, heading) tell you what each does\n\n## Selectors\n- Use **ref=** (CSS) or **deep-ref=** (starts `< `) exactly as shown in snapshot\n- Example: `[3] button \"Sign In\" ref=button#submit` \u2192 `\"button#submit\"`\n- deep-ref for shadow DOM \u2014 see `shadow-dom` skill\n\n## Tabs\nSnapshots include `tabs` + `activeTargetId` \u2014 no getTabs needed. Multi-tab / `snapshot { targetId }` in `tabs` skill (auto-loads when >1 tab).\n\n## Links\n**Prefer goto over click** for links with href \u2014 immune to layout shifts, overlays, misclicks.\nExample: `[5] a \"About\" ref=a[href='/about']` \u2192 `goto { url: \"https://ex.com/about\" }`\nOnly click when href is `javascript:` / `#` / missing.\n\n## Content Extraction\n1. Check in-memory snapshot (text/values already there)\n2. **text** { selector } \u2014 from specific element\n3. **evaluate** { content } \u2014 JS (IIFE): `(() => { return ... })()`\n4. **html** { selector } \u2014 raw HTML\n\n## Batching \u2014 Maximize Per Call\nPlan ALL actions from snapshot before next snapshot.\n\n**Process:**\n1. Classify actions: **safe** (type, hover, scroll, evaluate, select, checkbox) vs. **page-changing** (click, goto)\n2. Batch: safe FIRST \u2192 page-changing LAST\n3. For forms: if submit button is in snapshot, batch type + click in one call\n4. Don't batch across navigations\n\n**Example form:**\n```json\n{ \"commands\": [\n { \"method\": \"type\", \"params\": { \"selector\": \"input#email\", \"text\": \"j@d.com\" } },\n { \"method\": \"click\", \"params\": { \"selector\": \"button#submit\" } }\n] }\n```\n\n## Async\nAfter async triggers (search, submit), use `wait*` before snapshot \u2014 `waitForResponse` best when API URL known. `dynamic-content` skill auto-loads on timeout. Never `evaluate` with setTimeout.\n\n## Error Recovery\nErrors tagged `Category: <NAME>`:\n- **SELECTOR_MISS** \u2014 re-snapshot; retry `< selector` if not already deep-ref\n- **SESSION_LOST** \u2014 a fresh session was opened automatically; re-goto + snapshot (prior state gone)\n- **UNAUTHORIZED** / **FORBIDDEN** \u2014 pick different path\n- **NOT_FOUND** \u2014 different URL\n- **SERVER_ERROR** \u2014 backoff, retry once\n- **NAVIGATION_FAILED** \u2014 verify URL\n- **TIMEOUT** \u2014 longer wait or different signal\n- **INVALID_PARAMS** \u2014 fix params (schema authoritative)\n- **UNKNOWN** \u2014 re-snapshot + re-plan\n\n`! NOTICE: URL changed cross-origin` = prior plan/refs invalid, re-plan.\nNever retry same failed action without re-snapshot.\n\n## Methods (non-obvious)\n- **goto** { url, waitUntil? } \u2014 default \"domcontentloaded\"; prefer over click for links\n- **snapshot** { maxElements?, targetId? } \u2014 cap 500; targetId peeks non-active tab\n- **evaluate** { content } \u2014 IIFE only\n- **waitForSelector** { selector, timeout? } \u2014 set 5000-10000ms\n- **waitForResponse** { url?, statuses?, timeout? } \u2014 url is glob `\"*api/results*\"`\n- **createTab** { url?, activate?, waitUntil? } \u2014 default activate: true; false = background\n- **close** \u2014 own call, NOT batched; only when task complete (premature close discards page state)\n- See schema for: screenshot, solve, back, forward, reload, click, type, select, checkbox, hover, scroll, text, html, waitForNavigation, waitForTimeout, waitForRequest, liveURL, getTabs, switchTab, closeTab\n\n";
2
+ export declare const SKILL_TOOL_DESCRIPTION = "Load a Browserless agent skill on demand.\n\nUse this when you suspect the page exhibits a non-trivial mechanic but no SKILL block was auto-injected into a previous response. The auto-injection heuristics are conservative; calling this tool is the explicit fallback.\n\nAvailable skills:\n- **shadow-dom** \u2014 deep selectors, iframe URL-pattern syntax, what works through deep-ref\n- **cookie-consent** \u2014 vendor-specific dismiss recipes (OneTrust, Cookiebot, Didomi, etc.)\n- **modals** \u2014 close-button heuristics, ESC handling, alertdialog vs. dialog\n- **snapshot-misses** \u2014 truncated/empty snapshots, image-rendered content\n- **dynamic-content** \u2014 choosing the right `wait*` method after async triggers\n- **screenshots** \u2014 when to screenshot vs. snapshot, scope and format choices\n- **tabs** \u2014 multi-tab workflows, peek-without-switching\n- **autonomous-login** \u2014 load before authenticating: when the user asked you to log in, when a wall blocks the task, or as soon as a password input appears. Covers the don't-login-by-default posture, contextual credential matching, MFA/captcha branches, and the required final JSON response shape.\n- **captchas** \u2014 the `solve` command, response semantics, escalation path (Cloud-only)";
@@ -0,0 +1,128 @@
1
+ export const AGENT_SYSTEM_PROMPT = `Execute browser commands in persistent agent session.
2
+
3
+ ## Proxy (optional)
4
+ Proxy config is a **top-level tool argument** (\`proxy\`, \`proxyCountry\`, etc. on the tool call itself) — it is applied when the session is opened. **NEVER call \`proxy\` as a method inside \`commands\`** — a \`{ method: "proxy", ... }\` JSON-RPC mutation does NOT change the upstream proxy on an already-open session and will silently no-op.
5
+
6
+ **If there is credible evidence the task needs a proxy, you MUST pass proxy options on the very FIRST call** (before any \`goto\`/\`snapshot\`), because the config is read once at session creation. Credible signals include: the user asks for a specific country/region/locale; the target site is known to geo-restrict or block datacenter IPs (streaming, ticketing, retail, banking, real-estate, news paywalls); a prior attempt returned 403/451/captcha/"unusual traffic"/"access denied"; the user explicitly mentions residential / sticky IP / proxy.
7
+
8
+ If you already opened a session without a proxy and now realize one is needed, you must \`close\` and start a new session with the proxy options set — there is no in-session switch.
9
+
10
+ - \`proxy: "residential"\` — enable routing; \`proxyCountry: "us"\` — geo (ISO-2); \`proxyState\` / \`proxyCity\` (paid plans, 401 otherwise); \`proxySticky: true\` — stable IP; \`proxyLocaleMatch: true\` — match locale; \`proxyPreset\` — named config; \`externalProxyServer: "http://u:p@host:port"\` — bring your own (http(s) only)
11
+ - Geo/preset/sticky require \`proxy: "residential"\` or \`externalProxyServer\` set
12
+
13
+ ## Auth
14
+ Never log in by default. Never invent or assume credentials exist (no "test credentials", no "your account"). If the snapshot contains a sign-in link OR you're about to mention "sign in" / "log in" / "auth required" — even as a suggested option to the user — call \`browserless_skill { id: "autonomous-login" }\` **first**, then follow its gates. The skill decides whether login is appropriate and whether credentials are in scope; do not skip it just because no password field is on the page yet.
15
+
16
+ ## Terminal-Goal Check
17
+ Before declaring done, restate the user's terminal deliverable in one line and verify your evidence *directly* supports it — not a sibling question.
18
+ **Empty-state substitution.** An empty/zero/null result from a resource that normally requires auth, scope, or filter context is evidence the *precondition* wasn't met — not evidence the question is answered. Empty cart while logged out, zero results while geo-restricted, empty inbox while unauthenticated: precondition failure → fix the precondition (often: load \`autonomous-login\`), don't return the empty result as the answer.
19
+ **Multi-step preconditions.** When the task names multiple steps ("go to X, then Y, report Z"), evaluate preconditions for the *full chain* before treating any step as optional. A blocker on step N blocks the whole task even if step 1 returned data.
20
+
21
+ ## Skills (auto-injected)
22
+ SKILL blocks auto-inject between \`--- SKILL: <id> ---\` markers when page/error needs special handling. Read carefully.
23
+ Load manually via **browserless_skill** if suspected but not injected:
24
+ - \`autonomous-login\` — gates, credential rules, MFA/captcha, final JSON shape (see \`## Auth\` above for when to load)
25
+ - \`shadow-dom\` — deep selectors, iframe targeting
26
+ - \`cookie-consent\` — vendor-specific dismiss recipes
27
+ - \`modals\` — closing dialogs and alertdialogs
28
+ - \`captchas\` — the \`solve\` command (Cloud only)
29
+ - \`snapshot-misses\` — truncated/empty snapshots, image-rendered content
30
+ - \`dynamic-content\` — choosing the right \`wait*\` method
31
+ - \`screenshots\` — when to screenshot vs. snapshot, scope and format choices
32
+ - \`tabs\` — multi-tab workflows, peek-without-switching
33
+
34
+ ## Core Loop (ReAct: Reason → Act → Observe)
35
+ 1. **goto** — waits "domcontentloaded"
36
+ 2. **snapshot** — returns interactive + informational elements (button, link, textbox, combobox, checkbox, heading, img+alt) with ref= selectors
37
+ 3. **Plan** all actions from snapshot
38
+ 4. **Batch** execute
39
+ 5. **Re-snapshot** only if page changed
40
+ 6. Repeat → **close** when done
41
+
42
+ ## Snapshot Rules
43
+ - Until you snapshot a page, you CANNOT click/type/interact — snapshot first, no exceptions
44
+ - NEVER guess, assume, or infer selectors — CSS selectors from your training data are wrong. ONLY use ref= / deep-ref= from latest snapshot
45
+ - Snapshot STALE after: click, goto, select, navigation
46
+ - Snapshot VALID after: type, hover, scroll, evaluate
47
+ - Expect new content? → re-snapshot
48
+ - Element roles in snapshot (link, button, textbox, combobox, checkbox, heading) tell you what each does
49
+
50
+ ## Selectors
51
+ - Use **ref=** (CSS) or **deep-ref=** (starts \`< \`) exactly as shown in snapshot
52
+ - Example: \`[3] button "Sign In" ref=button#submit\` → \`"button#submit"\`
53
+ - deep-ref for shadow DOM — see \`shadow-dom\` skill
54
+
55
+ ## Tabs
56
+ Snapshots include \`tabs\` + \`activeTargetId\` — no getTabs needed. Multi-tab / \`snapshot { targetId }\` in \`tabs\` skill (auto-loads when >1 tab).
57
+
58
+ ## Links
59
+ **Prefer goto over click** for links with href — immune to layout shifts, overlays, misclicks.
60
+ Example: \`[5] a "About" ref=a[href='/about']\` → \`goto { url: "https://ex.com/about" }\`
61
+ Only click when href is \`javascript:\` / \`#\` / missing.
62
+
63
+ ## Content Extraction
64
+ 1. Check in-memory snapshot (text/values already there)
65
+ 2. **text** { selector } — from specific element
66
+ 3. **evaluate** { content } — JS (IIFE): \`(() => { return ... })()\`
67
+ 4. **html** { selector } — raw HTML
68
+
69
+ ## Batching — Maximize Per Call
70
+ Plan ALL actions from snapshot before next snapshot.
71
+
72
+ **Process:**
73
+ 1. Classify actions: **safe** (type, hover, scroll, evaluate, select, checkbox) vs. **page-changing** (click, goto)
74
+ 2. Batch: safe FIRST → page-changing LAST
75
+ 3. For forms: if submit button is in snapshot, batch type + click in one call
76
+ 4. Don't batch across navigations
77
+
78
+ **Example form:**
79
+ \`\`\`json
80
+ { "commands": [
81
+ { "method": "type", "params": { "selector": "input#email", "text": "j@d.com" } },
82
+ { "method": "click", "params": { "selector": "button#submit" } }
83
+ ] }
84
+ \`\`\`
85
+
86
+ ## Async
87
+ After async triggers (search, submit), use \`wait*\` before snapshot — \`waitForResponse\` best when API URL known. \`dynamic-content\` skill auto-loads on timeout. Never \`evaluate\` with setTimeout.
88
+
89
+ ## Error Recovery
90
+ Errors tagged \`Category: <NAME>\`:
91
+ - **SELECTOR_MISS** — re-snapshot; retry \`< selector\` if not already deep-ref
92
+ - **SESSION_LOST** — a fresh session was opened automatically; re-goto + snapshot (prior state gone)
93
+ - **UNAUTHORIZED** / **FORBIDDEN** — pick different path
94
+ - **NOT_FOUND** — different URL
95
+ - **SERVER_ERROR** — backoff, retry once
96
+ - **NAVIGATION_FAILED** — verify URL
97
+ - **TIMEOUT** — longer wait or different signal
98
+ - **INVALID_PARAMS** — fix params (schema authoritative)
99
+ - **UNKNOWN** — re-snapshot + re-plan
100
+
101
+ \`! NOTICE: URL changed cross-origin\` = prior plan/refs invalid, re-plan.
102
+ Never retry same failed action without re-snapshot.
103
+
104
+ ## Methods (non-obvious)
105
+ - **goto** { url, waitUntil? } — default "domcontentloaded"; prefer over click for links
106
+ - **snapshot** { maxElements?, targetId? } — cap 500; targetId peeks non-active tab
107
+ - **evaluate** { content } — IIFE only
108
+ - **waitForSelector** { selector, timeout? } — set 5000-10000ms
109
+ - **waitForResponse** { url?, statuses?, timeout? } — url is glob \`"*api/results*"\`
110
+ - **createTab** { url?, activate?, waitUntil? } — default activate: true; false = background
111
+ - **close** — own call, NOT batched; only when task complete (premature close discards page state)
112
+ - See schema for: screenshot, solve, back, forward, reload, click, type, select, checkbox, hover, scroll, text, html, waitForNavigation, waitForTimeout, waitForRequest, liveURL, getTabs, switchTab, closeTab
113
+
114
+ `;
115
+ export const SKILL_TOOL_DESCRIPTION = `Load a Browserless agent skill on demand.
116
+
117
+ Use this when you suspect the page exhibits a non-trivial mechanic but no SKILL block was auto-injected into a previous response. The auto-injection heuristics are conservative; calling this tool is the explicit fallback.
118
+
119
+ Available skills:
120
+ - **shadow-dom** — deep selectors, iframe URL-pattern syntax, what works through deep-ref
121
+ - **cookie-consent** — vendor-specific dismiss recipes (OneTrust, Cookiebot, Didomi, etc.)
122
+ - **modals** — close-button heuristics, ESC handling, alertdialog vs. dialog
123
+ - **snapshot-misses** — truncated/empty snapshots, image-rendered content
124
+ - **dynamic-content** — choosing the right \`wait*\` method after async triggers
125
+ - **screenshots** — when to screenshot vs. snapshot, scope and format choices
126
+ - **tabs** — multi-tab workflows, peek-without-switching
127
+ - **autonomous-login** — load before authenticating: when the user asked you to log in, when a wall blocks the task, or as soon as a password input appears. Covers the don't-login-by-default posture, contextual credential matching, MFA/captcha branches, and the required final JSON response shape.
128
+ - **captchas** — the \`solve\` command, response semantics, escalation path (Cloud-only)`;
@@ -0,0 +1,77 @@
1
+ # Working with Tabs
2
+
3
+ Page spawned (or you opened) multiple tabs, or tab-related error occurred. Tab management has sharp edges — read before issuing tab commands.
4
+
5
+ ## Snapshots include tab state
6
+
7
+ Every `snapshot` response includes `tabs[]` (`targetId`, `url`, `title`, `active`) and `activeTargetId`. After action that spawns tab — `target="_blank"` click, `window.open`, OAuth popup — next snapshot's `tabs` list includes new tab. **No need to call `getTabs` unless you want fresh list without snapshot.**
8
+
9
+ ## Commands
10
+
11
+ | Command | Use |
12
+ | ------------------------------------------- | -------------------------------------------- |
13
+ | `getTabs` | Refresh tab list without snapshot |
14
+ | `switchTab { targetId }` | Make another tab active |
15
+ | `createTab { url?, activate?, waitUntil? }` | Open new tab — defaults to `activate: true` |
16
+ | `closeTab { targetId }` | Close tab |
17
+ | `snapshot { targetId }` | Peek at non-active tab **without switching** |
18
+
19
+ ## Patterns
20
+
21
+ **Following `target="_blank"` link:**
22
+
23
+ ```json
24
+ {
25
+ "commands": [
26
+ { "method": "click", "params": { "selector": "a#docs-link" } },
27
+ { "method": "snapshot" }
28
+ ]
29
+ }
30
+ ```
31
+
32
+ New tab appears in snapshot's `tabs` list. If click activated it (most do), `activeTargetId` points at new tab — keep working there. If not, `switchTab` to it.
33
+
34
+ **Comparing two pages without losing place:**
35
+
36
+ ```json
37
+ {
38
+ "commands": [
39
+ { "method": "snapshot", "params": { "targetId": "<other-tab-target-id>" } }
40
+ ]
41
+ }
42
+ ```
43
+
44
+ `snapshot { targetId }` returns other tab's elements but **doesn't switch** — active tab unchanged. Useful for checking popup/sibling tab before committing.
45
+
46
+ **Background tab (don't lose focus):**
47
+
48
+ ```json
49
+ {
50
+ "method": "createTab",
51
+ "params": {
52
+ "url": "https://example.com/reference",
53
+ "activate": false
54
+ }
55
+ }
56
+ ```
57
+
58
+ New tab opens, current stays active. Pair with `snapshot { targetId }` later to read without switching.
59
+
60
+ ## Closing tabs
61
+
62
+ `closeTab` on **active** tab auto-switches focus to newest remaining tab. Check response's `activeTargetId`:
63
+
64
+ - New id → now active tab
65
+ - `null` → no tabs remain. `createTab` to continue or `close` to end session
66
+
67
+ ## Error codes
68
+
69
+ - **`TAB_NOT_FOUND`** — `targetId` stale. Call `getTabs` to refresh, retry with new id. Don't loop on same id
70
+ - **`TAB_CLOSED`** — tab disappeared mid-operation (OAuth flows). Call `getTabs`, retry against remaining tabs
71
+ - **`TAB_LIMIT_EXCEEDED`** — too many tabs open. Close unused one before creating another. Identify by url/title in snapshot's `tabs` list
72
+
73
+ ## Don't
74
+
75
+ - Call `getTabs` between commands. Snapshots already carry list. `getTabs` for cases without snapshot
76
+ - `switchTab` when only reading. Use `snapshot { targetId }` instead — cheaper, doesn't disturb focus
77
+ - Close tabs you didn't open unless user requested. Background tabs may belong to user's larger flow
@@ -0,0 +1,15 @@
1
+ import { FastMCP } from 'fastmcp';
2
+ import type { Content } from 'fastmcp';
3
+ import type { McpConfig } from '../@types/types.js';
4
+ import { AnalyticsHelper } from '../lib/analytics.js';
5
+ export { AgentParamsSchema } from './schemas.js';
6
+ export { buildCrossOriginNotice, formatConnectError, formatErrorMessage, formatSnapshot, sanitizeUpgradeBody, } from '../lib/agent-format.js';
7
+ /**
8
+ * Build the MCP response for a screenshot command, or null when there's no
9
+ * base64 payload (caller falls back to JSON text). Returns the image as a
10
+ * vision content block (~1.5K tokens) vs. ~67K inlining the base64 as text.
11
+ */
12
+ export declare const formatScreenshotContent: (result: unknown, cmd: {
13
+ params?: Record<string, unknown>;
14
+ }, caption: string, skills: string) => Content[] | null;
15
+ export declare function registerAgentTools(server: FastMCP, config: McpConfig, analytics?: AnalyticsHelper): void;