@browserless.io/mcp 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +557 -0
- package/README.md +280 -0
- package/bin/cli.js +2 -0
- package/build/src/@types/types.d.ts +538 -0
- package/build/src/config.d.ts +3 -0
- package/build/src/config.js +42 -0
- package/build/src/index.d.ts +4 -0
- package/build/src/index.js +153 -0
- package/build/src/lib/account-resolver.d.ts +17 -0
- package/build/src/lib/account-resolver.js +78 -0
- package/build/src/lib/agent-client.d.ts +58 -0
- package/build/src/lib/agent-client.js +530 -0
- package/build/src/lib/agent-format.d.ts +35 -0
- package/build/src/lib/agent-format.js +155 -0
- package/build/src/lib/amplitude.d.ts +11 -0
- package/build/src/lib/amplitude.js +65 -0
- package/build/src/lib/analytics.d.ts +18 -0
- package/build/src/lib/analytics.js +79 -0
- package/build/src/lib/api-client.d.ts +17 -0
- package/build/src/lib/api-client.js +357 -0
- package/build/src/lib/bounded-event-store.d.ts +22 -0
- package/build/src/lib/bounded-event-store.js +69 -0
- package/build/src/lib/cache.d.ts +12 -0
- package/build/src/lib/cache.js +49 -0
- package/build/src/lib/define-tool.d.ts +71 -0
- package/build/src/lib/define-tool.js +71 -0
- package/build/src/lib/error-classifier.d.ts +4 -0
- package/build/src/lib/error-classifier.js +125 -0
- package/build/src/lib/redis-oauth-proxy.d.ts +13 -0
- package/build/src/lib/redis-oauth-proxy.js +214 -0
- package/build/src/lib/retry.d.ts +2 -0
- package/build/src/lib/retry.js +19 -0
- package/build/src/lib/schema-fields.d.ts +10 -0
- package/build/src/lib/schema-fields.js +27 -0
- package/build/src/lib/supabase-token-patch.d.ts +6 -0
- package/build/src/lib/supabase-token-patch.js +33 -0
- package/build/src/lib/utils.d.ts +27 -0
- package/build/src/lib/utils.js +67 -0
- package/build/src/prompts/extract-content.d.ts +2 -0
- package/build/src/prompts/extract-content.js +33 -0
- package/build/src/prompts/scrape-url.d.ts +2 -0
- package/build/src/prompts/scrape-url.js +36 -0
- package/build/src/resources/api-docs.d.ts +3 -0
- package/build/src/resources/api-docs.js +54 -0
- package/build/src/resources/status.d.ts +3 -0
- package/build/src/resources/status.js +30 -0
- package/build/src/skills/autonomous-login.md +95 -0
- package/build/src/skills/captchas.md +48 -0
- package/build/src/skills/cookie-consent.md +50 -0
- package/build/src/skills/dynamic-content.md +72 -0
- package/build/src/skills/index.d.ts +9 -0
- package/build/src/skills/index.js +221 -0
- package/build/src/skills/modals.md +56 -0
- package/build/src/skills/screenshots.md +53 -0
- package/build/src/skills/shadow-dom.md +64 -0
- package/build/src/skills/snapshot-misses.md +67 -0
- package/build/src/skills/system-prompt.d.ts +2 -0
- package/build/src/skills/system-prompt.js +128 -0
- package/build/src/skills/tabs.md +77 -0
- package/build/src/tools/agent.d.ts +15 -0
- package/build/src/tools/agent.js +299 -0
- package/build/src/tools/crawl.d.ts +75 -0
- package/build/src/tools/crawl.js +426 -0
- package/build/src/tools/download.d.ts +11 -0
- package/build/src/tools/download.js +92 -0
- package/build/src/tools/export.d.ts +28 -0
- package/build/src/tools/export.js +129 -0
- package/build/src/tools/function.d.ts +24 -0
- package/build/src/tools/function.js +144 -0
- package/build/src/tools/map.d.ts +23 -0
- package/build/src/tools/map.js +129 -0
- package/build/src/tools/performance.d.ts +25 -0
- package/build/src/tools/performance.js +103 -0
- package/build/src/tools/schemas.d.ts +466 -0
- package/build/src/tools/schemas.js +487 -0
- package/build/src/tools/search.d.ts +67 -0
- package/build/src/tools/search.js +184 -0
- package/build/src/tools/smartscraper.d.ts +42 -0
- package/build/src/tools/smartscraper.js +136 -0
- package/package.json +111 -0
- package/patches/mcp-proxy+6.4.0.patch +31 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# Screenshots
|
|
2
|
+
|
|
3
|
+
Screenshot arrives as vision content block — you'll see it directly.
|
|
4
|
+
|
|
5
|
+
## Snapshot vs. Screenshot
|
|
6
|
+
|
|
7
|
+
| Need | Use |
|
|
8
|
+
| ------------------------------------ | -------------------------------- |
|
|
9
|
+
| Element identity, text, structure | `snapshot` |
|
|
10
|
+
| Visual layout, colors, rendered look | `screenshot` |
|
|
11
|
+
| Extract text | `snapshot` or `text` — never OCR |
|
|
12
|
+
| Chart, map, rendered image | `screenshot` with `selector` |
|
|
13
|
+
| Verify "does this look right?" | `screenshot` |
|
|
14
|
+
|
|
15
|
+
Snapshot is cheap, structured. Screenshot costs vision tokens — use when visual fidelity matters.
|
|
16
|
+
|
|
17
|
+
## Scope (smallest to largest)
|
|
18
|
+
|
|
19
|
+
1. **`selector: "#chart"`** — single element (best when target known)
|
|
20
|
+
2. **`clip: { x, y, width, height }`** — pixel region
|
|
21
|
+
3. **viewport** (default) — visible area
|
|
22
|
+
4. **`fullPage: true`** — entire page (use sparingly, huge tokens)
|
|
23
|
+
|
|
24
|
+
Capture smallest region that answers the question.
|
|
25
|
+
|
|
26
|
+
## Format
|
|
27
|
+
|
|
28
|
+
- **PNG** (default) — lossless, crisp text/UI
|
|
29
|
+
- **JPEG** `quality: 70-85` — smaller for photos/full-page
|
|
30
|
+
- **WebP** — better compression than JPEG
|
|
31
|
+
- **`omitBackground: true`** — for transparent elements
|
|
32
|
+
|
|
33
|
+
## Pattern: capture-after-action
|
|
34
|
+
|
|
35
|
+
```json
|
|
36
|
+
{
|
|
37
|
+
"commands": [
|
|
38
|
+
{ "method": "click", "params": { "selector": "button#open-modal" } },
|
|
39
|
+
{
|
|
40
|
+
"method": "waitForSelector",
|
|
41
|
+
"params": { "selector": "[role='dialog']", "timeout": 5000 }
|
|
42
|
+
},
|
|
43
|
+
{ "method": "screenshot", "params": { "selector": "[role='dialog']" } }
|
|
44
|
+
]
|
|
45
|
+
}
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Avoid
|
|
49
|
+
|
|
50
|
+
- OCR via evaluate (you have vision input)
|
|
51
|
+
- Screenshotting for structured data (use snapshot/evaluate)
|
|
52
|
+
- Full-page screenshots by default (pick scope)
|
|
53
|
+
- Multiple screenshots of same state (one is enough)
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# Shadow DOM & Iframes
|
|
2
|
+
|
|
3
|
+
Snapshot contains `deep-ref=` selectors, or you hit `SELECTOR_NOT_FOUND` on regular selector. Page using shadow DOM or iframes — read before next action.
|
|
4
|
+
|
|
5
|
+
## Deep selectors: `< ` prefix
|
|
6
|
+
|
|
7
|
+
Browserless deep selectors start with `< ` (less-than, space). Space mandatory. Format:
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
< *url-pattern* css-selector
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
`*url-pattern*` optional, matches iframe URL. If omitted, selector pierces shadow roots in main frame.
|
|
14
|
+
|
|
15
|
+
When snapshot lists `deep-ref=< button#deny`, pass to `click` / `type` / `hover` exactly as shown — don't strip `< ` prefix:
|
|
16
|
+
|
|
17
|
+
```json
|
|
18
|
+
{ "method": "click", "params": { "selector": "< button#deny" } }
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Constructing deep selectors for iframes snapshot didn't surface
|
|
22
|
+
|
|
23
|
+
Snapshots only include accessible content. Iframes (captcha/payment widgets) often have nothing meaningful in accessibility tree. Build selector by hand:
|
|
24
|
+
|
|
25
|
+
- `< *google.com/recaptcha* #recaptcha-anchor` — reCAPTCHA checkbox
|
|
26
|
+
- `< *hcaptcha.com* #checkbox` — hCaptcha checkbox
|
|
27
|
+
- `< *stripe.com/* input[name='cardnumber']` — Stripe payment field
|
|
28
|
+
- `< *challenges.cloudflare.com* input[type='checkbox']` — Cloudflare Turnstile
|
|
29
|
+
|
|
30
|
+
URL pattern is glob — `*` matches any substring.
|
|
31
|
+
|
|
32
|
+
## What works and what doesn't
|
|
33
|
+
|
|
34
|
+
Coordinate-based actions work through deep selectors: **`click`, `type`, `hover`, `checkbox`**.
|
|
35
|
+
|
|
36
|
+
DOM-read actions **don't** work, fail or return null: **`text`, `html`, `waitForSelector`** with deep selectors.
|
|
37
|
+
|
|
38
|
+
To read content from shadow root or iframe, use `evaluate` with explicit traversal:
|
|
39
|
+
|
|
40
|
+
```json
|
|
41
|
+
{
|
|
42
|
+
"method": "evaluate",
|
|
43
|
+
"params": {
|
|
44
|
+
"content": "(() => { const f = document.querySelector('iframe#myFrame'); return f?.contentDocument?.body?.textContent; })()"
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
For shadow DOM:
|
|
50
|
+
|
|
51
|
+
```json
|
|
52
|
+
{
|
|
53
|
+
"method": "evaluate",
|
|
54
|
+
"params": {
|
|
55
|
+
"content": "(() => document.querySelector('my-component')?.shadowRoot?.querySelector('button')?.textContent)()"
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Recovery when regular selector fails
|
|
61
|
+
|
|
62
|
+
1. Retry same selector with `< ` prefix (MCP suggests automatically)
|
|
63
|
+
2. Still failing → re-snapshot (element moved/re-rendered or page navigated)
|
|
64
|
+
3. Element in iframe → construct `< *url-pattern* css` selector by hand from iframe URL in DevTools or snapshot
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# When Snapshot Misses Content
|
|
2
|
+
|
|
3
|
+
Snapshot at element limit (truncated) or empty. What you need may not be in it.
|
|
4
|
+
|
|
5
|
+
## Why content goes missing
|
|
6
|
+
|
|
7
|
+
- **Truncation**: snapshots cap at 500 elements by default. Dense pages (long lists, search results, infinite scroll) overflow
|
|
8
|
+
- **No accessible name**: images without `alt`, icon-only buttons, decorative links, SVGs without ARIA labels excluded from accessibility tree
|
|
9
|
+
- **Image-rendered content**: math, formulas, charts (WolframAlpha, LaTeX, Wikipedia formulas, Google image search) — result is single `<img>` with meaning in `alt` text, not DOM
|
|
10
|
+
- **Late-loading content**: page still hydrating. Wait (see dynamic-content skill if `wait*` call fails), re-snapshot
|
|
11
|
+
|
|
12
|
+
## Recipe
|
|
13
|
+
|
|
14
|
+
1. **If truncated, narrow scope first.** Most tasks don't need every element — re-snapshot with higher `maxElements` only if element genuinely beyond 500:
|
|
15
|
+
|
|
16
|
+
```json
|
|
17
|
+
{ "method": "snapshot", "params": { "maxElements": 1000 } }
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
2. **If element has no accessible name**, use `evaluate` to read directly:
|
|
21
|
+
|
|
22
|
+
```json
|
|
23
|
+
{
|
|
24
|
+
"method": "evaluate",
|
|
25
|
+
"params": {
|
|
26
|
+
"content": "(() => [...document.querySelectorAll('img[alt]')].map(i => i.alt))()"
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Or get text from icon-only button:
|
|
32
|
+
|
|
33
|
+
```json
|
|
34
|
+
{
|
|
35
|
+
"method": "evaluate",
|
|
36
|
+
"params": {
|
|
37
|
+
"content": "(() => document.querySelector('[data-testid=\"close\"]')?.getAttribute('aria-label'))()"
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
3. **For image-rendered results** (WolframAlpha, LaTeX renderers), `alt` attribute usually carries answer:
|
|
43
|
+
|
|
44
|
+
```json
|
|
45
|
+
{
|
|
46
|
+
"method": "evaluate",
|
|
47
|
+
"params": {
|
|
48
|
+
"content": "(() => [...document.querySelectorAll('img[alt]')].map(i => i.alt).filter(Boolean))()"
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
4. **For very long lists**, scroll and re-snapshot rather than raising `maxElements` — snapshot pagination more reliable than one giant pull:
|
|
54
|
+
|
|
55
|
+
```json
|
|
56
|
+
{
|
|
57
|
+
"commands": [
|
|
58
|
+
{ "method": "scroll", "params": { "direction": "down" } },
|
|
59
|
+
{ "method": "snapshot" }
|
|
60
|
+
]
|
|
61
|
+
}
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Don't
|
|
65
|
+
|
|
66
|
+
- Raise `maxElements` past ~2000 — model spends more on snapshot reading than task gains. Scroll and paginate instead
|
|
67
|
+
- `evaluate` to crawl `document.body.innerHTML` for general extraction. Snapshot structured; raw HTML floods context with markup. Use `evaluate` only for _specific_ attributes snapshot can't surface
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
export declare const AGENT_SYSTEM_PROMPT = "Execute browser commands in persistent agent session.\n\n## Proxy (optional)\nProxy config is a **top-level tool argument** (`proxy`, `proxyCountry`, etc. on the tool call itself) \u2014 it is applied when the session is opened. **NEVER call `proxy` as a method inside `commands`** \u2014 a `{ method: \"proxy\", ... }` JSON-RPC mutation does NOT change the upstream proxy on an already-open session and will silently no-op.\n\n**If there is credible evidence the task needs a proxy, you MUST pass proxy options on the very FIRST call** (before any `goto`/`snapshot`), because the config is read once at session creation. Credible signals include: the user asks for a specific country/region/locale; the target site is known to geo-restrict or block datacenter IPs (streaming, ticketing, retail, banking, real-estate, news paywalls); a prior attempt returned 403/451/captcha/\"unusual traffic\"/\"access denied\"; the user explicitly mentions residential / sticky IP / proxy.\n\nIf you already opened a session without a proxy and now realize one is needed, you must `close` and start a new session with the proxy options set \u2014 there is no in-session switch.\n\n- `proxy: \"residential\"` \u2014 enable routing; `proxyCountry: \"us\"` \u2014 geo (ISO-2); `proxyState` / `proxyCity` (paid plans, 401 otherwise); `proxySticky: true` \u2014 stable IP; `proxyLocaleMatch: true` \u2014 match locale; `proxyPreset` \u2014 named config; `externalProxyServer: \"http://u:p@host:port\"` \u2014 bring your own (http(s) only)\n- Geo/preset/sticky require `proxy: \"residential\"` or `externalProxyServer` set\n\n## Auth\nNever log in by default. Never invent or assume credentials exist (no \"test credentials\", no \"your account\"). If the snapshot contains a sign-in link OR you're about to mention \"sign in\" / \"log in\" / \"auth required\" \u2014 even as a suggested option to the user \u2014 call `browserless_skill { id: \"autonomous-login\" }` **first**, then follow its gates. The skill decides whether login is appropriate and whether credentials are in scope; do not skip it just because no password field is on the page yet.\n\n## Terminal-Goal Check\nBefore declaring done, restate the user's terminal deliverable in one line and verify your evidence *directly* supports it \u2014 not a sibling question.\n**Empty-state substitution.** An empty/zero/null result from a resource that normally requires auth, scope, or filter context is evidence the *precondition* wasn't met \u2014 not evidence the question is answered. Empty cart while logged out, zero results while geo-restricted, empty inbox while unauthenticated: precondition failure \u2192 fix the precondition (often: load `autonomous-login`), don't return the empty result as the answer.\n**Multi-step preconditions.** When the task names multiple steps (\"go to X, then Y, report Z\"), evaluate preconditions for the *full chain* before treating any step as optional. A blocker on step N blocks the whole task even if step 1 returned data.\n\n## Skills (auto-injected)\nSKILL blocks auto-inject between `--- SKILL: <id> ---` markers when page/error needs special handling. Read carefully.\nLoad manually via **browserless_skill** if suspected but not injected:\n- `autonomous-login` \u2014 gates, credential rules, MFA/captcha, final JSON shape (see `## Auth` above for when to load)\n- `shadow-dom` \u2014 deep selectors, iframe targeting\n- `cookie-consent` \u2014 vendor-specific dismiss recipes\n- `modals` \u2014 closing dialogs and alertdialogs\n- `captchas` \u2014 the `solve` command (Cloud only)\n- `snapshot-misses` \u2014 truncated/empty snapshots, image-rendered content\n- `dynamic-content` \u2014 choosing the right `wait*` method\n- `screenshots` \u2014 when to screenshot vs. snapshot, scope and format choices\n- `tabs` \u2014 multi-tab workflows, peek-without-switching\n\n## Core Loop (ReAct: Reason \u2192 Act \u2192 Observe)\n1. **goto** \u2014 waits \"domcontentloaded\"\n2. **snapshot** \u2014 returns interactive + informational elements (button, link, textbox, combobox, checkbox, heading, img+alt) with ref= selectors\n3. **Plan** all actions from snapshot\n4. **Batch** execute\n5. **Re-snapshot** only if page changed\n6. Repeat \u2192 **close** when done\n\n## Snapshot Rules\n- Until you snapshot a page, you CANNOT click/type/interact \u2014 snapshot first, no exceptions\n- NEVER guess, assume, or infer selectors \u2014 CSS selectors from your training data are wrong. ONLY use ref= / deep-ref= from latest snapshot\n- Snapshot STALE after: click, goto, select, navigation\n- Snapshot VALID after: type, hover, scroll, evaluate\n- Expect new content? \u2192 re-snapshot\n- Element roles in snapshot (link, button, textbox, combobox, checkbox, heading) tell you what each does\n\n## Selectors\n- Use **ref=** (CSS) or **deep-ref=** (starts `< `) exactly as shown in snapshot\n- Example: `[3] button \"Sign In\" ref=button#submit` \u2192 `\"button#submit\"`\n- deep-ref for shadow DOM \u2014 see `shadow-dom` skill\n\n## Tabs\nSnapshots include `tabs` + `activeTargetId` \u2014 no getTabs needed. Multi-tab / `snapshot { targetId }` in `tabs` skill (auto-loads when >1 tab).\n\n## Links\n**Prefer goto over click** for links with href \u2014 immune to layout shifts, overlays, misclicks.\nExample: `[5] a \"About\" ref=a[href='/about']` \u2192 `goto { url: \"https://ex.com/about\" }`\nOnly click when href is `javascript:` / `#` / missing.\n\n## Content Extraction\n1. Check in-memory snapshot (text/values already there)\n2. **text** { selector } \u2014 from specific element\n3. **evaluate** { content } \u2014 JS (IIFE): `(() => { return ... })()`\n4. **html** { selector } \u2014 raw HTML\n\n## Batching \u2014 Maximize Per Call\nPlan ALL actions from snapshot before next snapshot.\n\n**Process:**\n1. Classify actions: **safe** (type, hover, scroll, evaluate, select, checkbox) vs. **page-changing** (click, goto)\n2. Batch: safe FIRST \u2192 page-changing LAST\n3. For forms: if submit button is in snapshot, batch type + click in one call\n4. Don't batch across navigations\n\n**Example form:**\n```json\n{ \"commands\": [\n { \"method\": \"type\", \"params\": { \"selector\": \"input#email\", \"text\": \"j@d.com\" } },\n { \"method\": \"click\", \"params\": { \"selector\": \"button#submit\" } }\n] }\n```\n\n## Async\nAfter async triggers (search, submit), use `wait*` before snapshot \u2014 `waitForResponse` best when API URL known. `dynamic-content` skill auto-loads on timeout. Never `evaluate` with setTimeout.\n\n## Error Recovery\nErrors tagged `Category: <NAME>`:\n- **SELECTOR_MISS** \u2014 re-snapshot; retry `< selector` if not already deep-ref\n- **SESSION_LOST** \u2014 a fresh session was opened automatically; re-goto + snapshot (prior state gone)\n- **UNAUTHORIZED** / **FORBIDDEN** \u2014 pick different path\n- **NOT_FOUND** \u2014 different URL\n- **SERVER_ERROR** \u2014 backoff, retry once\n- **NAVIGATION_FAILED** \u2014 verify URL\n- **TIMEOUT** \u2014 longer wait or different signal\n- **INVALID_PARAMS** \u2014 fix params (schema authoritative)\n- **UNKNOWN** \u2014 re-snapshot + re-plan\n\n`! NOTICE: URL changed cross-origin` = prior plan/refs invalid, re-plan.\nNever retry same failed action without re-snapshot.\n\n## Methods (non-obvious)\n- **goto** { url, waitUntil? } \u2014 default \"domcontentloaded\"; prefer over click for links\n- **snapshot** { maxElements?, targetId? } \u2014 cap 500; targetId peeks non-active tab\n- **evaluate** { content } \u2014 IIFE only\n- **waitForSelector** { selector, timeout? } \u2014 set 5000-10000ms\n- **waitForResponse** { url?, statuses?, timeout? } \u2014 url is glob `\"*api/results*\"`\n- **createTab** { url?, activate?, waitUntil? } \u2014 default activate: true; false = background\n- **close** \u2014 own call, NOT batched; only when task complete (premature close discards page state)\n- See schema for: screenshot, solve, back, forward, reload, click, type, select, checkbox, hover, scroll, text, html, waitForNavigation, waitForTimeout, waitForRequest, liveURL, getTabs, switchTab, closeTab\n\n";
|
|
2
|
+
export declare const SKILL_TOOL_DESCRIPTION = "Load a Browserless agent skill on demand.\n\nUse this when you suspect the page exhibits a non-trivial mechanic but no SKILL block was auto-injected into a previous response. The auto-injection heuristics are conservative; calling this tool is the explicit fallback.\n\nAvailable skills:\n- **shadow-dom** \u2014 deep selectors, iframe URL-pattern syntax, what works through deep-ref\n- **cookie-consent** \u2014 vendor-specific dismiss recipes (OneTrust, Cookiebot, Didomi, etc.)\n- **modals** \u2014 close-button heuristics, ESC handling, alertdialog vs. dialog\n- **snapshot-misses** \u2014 truncated/empty snapshots, image-rendered content\n- **dynamic-content** \u2014 choosing the right `wait*` method after async triggers\n- **screenshots** \u2014 when to screenshot vs. snapshot, scope and format choices\n- **tabs** \u2014 multi-tab workflows, peek-without-switching\n- **autonomous-login** \u2014 load before authenticating: when the user asked you to log in, when a wall blocks the task, or as soon as a password input appears. Covers the don't-login-by-default posture, contextual credential matching, MFA/captcha branches, and the required final JSON response shape.\n- **captchas** \u2014 the `solve` command, response semantics, escalation path (Cloud-only)";
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
export const AGENT_SYSTEM_PROMPT = `Execute browser commands in persistent agent session.
|
|
2
|
+
|
|
3
|
+
## Proxy (optional)
|
|
4
|
+
Proxy config is a **top-level tool argument** (\`proxy\`, \`proxyCountry\`, etc. on the tool call itself) — it is applied when the session is opened. **NEVER call \`proxy\` as a method inside \`commands\`** — a \`{ method: "proxy", ... }\` JSON-RPC mutation does NOT change the upstream proxy on an already-open session and will silently no-op.
|
|
5
|
+
|
|
6
|
+
**If there is credible evidence the task needs a proxy, you MUST pass proxy options on the very FIRST call** (before any \`goto\`/\`snapshot\`), because the config is read once at session creation. Credible signals include: the user asks for a specific country/region/locale; the target site is known to geo-restrict or block datacenter IPs (streaming, ticketing, retail, banking, real-estate, news paywalls); a prior attempt returned 403/451/captcha/"unusual traffic"/"access denied"; the user explicitly mentions residential / sticky IP / proxy.
|
|
7
|
+
|
|
8
|
+
If you already opened a session without a proxy and now realize one is needed, you must \`close\` and start a new session with the proxy options set — there is no in-session switch.
|
|
9
|
+
|
|
10
|
+
- \`proxy: "residential"\` — enable routing; \`proxyCountry: "us"\` — geo (ISO-2); \`proxyState\` / \`proxyCity\` (paid plans, 401 otherwise); \`proxySticky: true\` — stable IP; \`proxyLocaleMatch: true\` — match locale; \`proxyPreset\` — named config; \`externalProxyServer: "http://u:p@host:port"\` — bring your own (http(s) only)
|
|
11
|
+
- Geo/preset/sticky require \`proxy: "residential"\` or \`externalProxyServer\` set
|
|
12
|
+
|
|
13
|
+
## Auth
|
|
14
|
+
Never log in by default. Never invent or assume credentials exist (no "test credentials", no "your account"). If the snapshot contains a sign-in link OR you're about to mention "sign in" / "log in" / "auth required" — even as a suggested option to the user — call \`browserless_skill { id: "autonomous-login" }\` **first**, then follow its gates. The skill decides whether login is appropriate and whether credentials are in scope; do not skip it just because no password field is on the page yet.
|
|
15
|
+
|
|
16
|
+
## Terminal-Goal Check
|
|
17
|
+
Before declaring done, restate the user's terminal deliverable in one line and verify your evidence *directly* supports it — not a sibling question.
|
|
18
|
+
**Empty-state substitution.** An empty/zero/null result from a resource that normally requires auth, scope, or filter context is evidence the *precondition* wasn't met — not evidence the question is answered. Empty cart while logged out, zero results while geo-restricted, empty inbox while unauthenticated: precondition failure → fix the precondition (often: load \`autonomous-login\`), don't return the empty result as the answer.
|
|
19
|
+
**Multi-step preconditions.** When the task names multiple steps ("go to X, then Y, report Z"), evaluate preconditions for the *full chain* before treating any step as optional. A blocker on step N blocks the whole task even if step 1 returned data.
|
|
20
|
+
|
|
21
|
+
## Skills (auto-injected)
|
|
22
|
+
SKILL blocks auto-inject between \`--- SKILL: <id> ---\` markers when page/error needs special handling. Read carefully.
|
|
23
|
+
Load manually via **browserless_skill** if suspected but not injected:
|
|
24
|
+
- \`autonomous-login\` — gates, credential rules, MFA/captcha, final JSON shape (see \`## Auth\` above for when to load)
|
|
25
|
+
- \`shadow-dom\` — deep selectors, iframe targeting
|
|
26
|
+
- \`cookie-consent\` — vendor-specific dismiss recipes
|
|
27
|
+
- \`modals\` — closing dialogs and alertdialogs
|
|
28
|
+
- \`captchas\` — the \`solve\` command (Cloud only)
|
|
29
|
+
- \`snapshot-misses\` — truncated/empty snapshots, image-rendered content
|
|
30
|
+
- \`dynamic-content\` — choosing the right \`wait*\` method
|
|
31
|
+
- \`screenshots\` — when to screenshot vs. snapshot, scope and format choices
|
|
32
|
+
- \`tabs\` — multi-tab workflows, peek-without-switching
|
|
33
|
+
|
|
34
|
+
## Core Loop (ReAct: Reason → Act → Observe)
|
|
35
|
+
1. **goto** — waits "domcontentloaded"
|
|
36
|
+
2. **snapshot** — returns interactive + informational elements (button, link, textbox, combobox, checkbox, heading, img+alt) with ref= selectors
|
|
37
|
+
3. **Plan** all actions from snapshot
|
|
38
|
+
4. **Batch** execute
|
|
39
|
+
5. **Re-snapshot** only if page changed
|
|
40
|
+
6. Repeat → **close** when done
|
|
41
|
+
|
|
42
|
+
## Snapshot Rules
|
|
43
|
+
- Until you snapshot a page, you CANNOT click/type/interact — snapshot first, no exceptions
|
|
44
|
+
- NEVER guess, assume, or infer selectors — CSS selectors from your training data are wrong. ONLY use ref= / deep-ref= from latest snapshot
|
|
45
|
+
- Snapshot STALE after: click, goto, select, navigation
|
|
46
|
+
- Snapshot VALID after: type, hover, scroll, evaluate
|
|
47
|
+
- Expect new content? → re-snapshot
|
|
48
|
+
- Element roles in snapshot (link, button, textbox, combobox, checkbox, heading) tell you what each does
|
|
49
|
+
|
|
50
|
+
## Selectors
|
|
51
|
+
- Use **ref=** (CSS) or **deep-ref=** (starts \`< \`) exactly as shown in snapshot
|
|
52
|
+
- Example: \`[3] button "Sign In" ref=button#submit\` → \`"button#submit"\`
|
|
53
|
+
- deep-ref for shadow DOM — see \`shadow-dom\` skill
|
|
54
|
+
|
|
55
|
+
## Tabs
|
|
56
|
+
Snapshots include \`tabs\` + \`activeTargetId\` — no getTabs needed. Multi-tab / \`snapshot { targetId }\` in \`tabs\` skill (auto-loads when >1 tab).
|
|
57
|
+
|
|
58
|
+
## Links
|
|
59
|
+
**Prefer goto over click** for links with href — immune to layout shifts, overlays, misclicks.
|
|
60
|
+
Example: \`[5] a "About" ref=a[href='/about']\` → \`goto { url: "https://ex.com/about" }\`
|
|
61
|
+
Only click when href is \`javascript:\` / \`#\` / missing.
|
|
62
|
+
|
|
63
|
+
## Content Extraction
|
|
64
|
+
1. Check in-memory snapshot (text/values already there)
|
|
65
|
+
2. **text** { selector } — from specific element
|
|
66
|
+
3. **evaluate** { content } — JS (IIFE): \`(() => { return ... })()\`
|
|
67
|
+
4. **html** { selector } — raw HTML
|
|
68
|
+
|
|
69
|
+
## Batching — Maximize Per Call
|
|
70
|
+
Plan ALL actions from snapshot before next snapshot.
|
|
71
|
+
|
|
72
|
+
**Process:**
|
|
73
|
+
1. Classify actions: **safe** (type, hover, scroll, evaluate, select, checkbox) vs. **page-changing** (click, goto)
|
|
74
|
+
2. Batch: safe FIRST → page-changing LAST
|
|
75
|
+
3. For forms: if submit button is in snapshot, batch type + click in one call
|
|
76
|
+
4. Don't batch across navigations
|
|
77
|
+
|
|
78
|
+
**Example form:**
|
|
79
|
+
\`\`\`json
|
|
80
|
+
{ "commands": [
|
|
81
|
+
{ "method": "type", "params": { "selector": "input#email", "text": "j@d.com" } },
|
|
82
|
+
{ "method": "click", "params": { "selector": "button#submit" } }
|
|
83
|
+
] }
|
|
84
|
+
\`\`\`
|
|
85
|
+
|
|
86
|
+
## Async
|
|
87
|
+
After async triggers (search, submit), use \`wait*\` before snapshot — \`waitForResponse\` best when API URL known. \`dynamic-content\` skill auto-loads on timeout. Never \`evaluate\` with setTimeout.
|
|
88
|
+
|
|
89
|
+
## Error Recovery
|
|
90
|
+
Errors tagged \`Category: <NAME>\`:
|
|
91
|
+
- **SELECTOR_MISS** — re-snapshot; retry \`< selector\` if not already deep-ref
|
|
92
|
+
- **SESSION_LOST** — a fresh session was opened automatically; re-goto + snapshot (prior state gone)
|
|
93
|
+
- **UNAUTHORIZED** / **FORBIDDEN** — pick different path
|
|
94
|
+
- **NOT_FOUND** — different URL
|
|
95
|
+
- **SERVER_ERROR** — backoff, retry once
|
|
96
|
+
- **NAVIGATION_FAILED** — verify URL
|
|
97
|
+
- **TIMEOUT** — longer wait or different signal
|
|
98
|
+
- **INVALID_PARAMS** — fix params (schema authoritative)
|
|
99
|
+
- **UNKNOWN** — re-snapshot + re-plan
|
|
100
|
+
|
|
101
|
+
\`! NOTICE: URL changed cross-origin\` = prior plan/refs invalid, re-plan.
|
|
102
|
+
Never retry same failed action without re-snapshot.
|
|
103
|
+
|
|
104
|
+
## Methods (non-obvious)
|
|
105
|
+
- **goto** { url, waitUntil? } — default "domcontentloaded"; prefer over click for links
|
|
106
|
+
- **snapshot** { maxElements?, targetId? } — cap 500; targetId peeks non-active tab
|
|
107
|
+
- **evaluate** { content } — IIFE only
|
|
108
|
+
- **waitForSelector** { selector, timeout? } — set 5000-10000ms
|
|
109
|
+
- **waitForResponse** { url?, statuses?, timeout? } — url is glob \`"*api/results*"\`
|
|
110
|
+
- **createTab** { url?, activate?, waitUntil? } — default activate: true; false = background
|
|
111
|
+
- **close** — own call, NOT batched; only when task complete (premature close discards page state)
|
|
112
|
+
- See schema for: screenshot, solve, back, forward, reload, click, type, select, checkbox, hover, scroll, text, html, waitForNavigation, waitForTimeout, waitForRequest, liveURL, getTabs, switchTab, closeTab
|
|
113
|
+
|
|
114
|
+
`;
|
|
115
|
+
export const SKILL_TOOL_DESCRIPTION = `Load a Browserless agent skill on demand.
|
|
116
|
+
|
|
117
|
+
Use this when you suspect the page exhibits a non-trivial mechanic but no SKILL block was auto-injected into a previous response. The auto-injection heuristics are conservative; calling this tool is the explicit fallback.
|
|
118
|
+
|
|
119
|
+
Available skills:
|
|
120
|
+
- **shadow-dom** — deep selectors, iframe URL-pattern syntax, what works through deep-ref
|
|
121
|
+
- **cookie-consent** — vendor-specific dismiss recipes (OneTrust, Cookiebot, Didomi, etc.)
|
|
122
|
+
- **modals** — close-button heuristics, ESC handling, alertdialog vs. dialog
|
|
123
|
+
- **snapshot-misses** — truncated/empty snapshots, image-rendered content
|
|
124
|
+
- **dynamic-content** — choosing the right \`wait*\` method after async triggers
|
|
125
|
+
- **screenshots** — when to screenshot vs. snapshot, scope and format choices
|
|
126
|
+
- **tabs** — multi-tab workflows, peek-without-switching
|
|
127
|
+
- **autonomous-login** — load before authenticating: when the user asked you to log in, when a wall blocks the task, or as soon as a password input appears. Covers the don't-login-by-default posture, contextual credential matching, MFA/captcha branches, and the required final JSON response shape.
|
|
128
|
+
- **captchas** — the \`solve\` command, response semantics, escalation path (Cloud-only)`;
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# Working with Tabs
|
|
2
|
+
|
|
3
|
+
Page spawned (or you opened) multiple tabs, or tab-related error occurred. Tab management has sharp edges — read before issuing tab commands.
|
|
4
|
+
|
|
5
|
+
## Snapshots include tab state
|
|
6
|
+
|
|
7
|
+
Every `snapshot` response includes `tabs[]` (`targetId`, `url`, `title`, `active`) and `activeTargetId`. After action that spawns tab — `target="_blank"` click, `window.open`, OAuth popup — next snapshot's `tabs` list includes new tab. **No need to call `getTabs` unless you want fresh list without snapshot.**
|
|
8
|
+
|
|
9
|
+
## Commands
|
|
10
|
+
|
|
11
|
+
| Command | Use |
|
|
12
|
+
| ------------------------------------------- | -------------------------------------------- |
|
|
13
|
+
| `getTabs` | Refresh tab list without snapshot |
|
|
14
|
+
| `switchTab { targetId }` | Make another tab active |
|
|
15
|
+
| `createTab { url?, activate?, waitUntil? }` | Open new tab — defaults to `activate: true` |
|
|
16
|
+
| `closeTab { targetId }` | Close tab |
|
|
17
|
+
| `snapshot { targetId }` | Peek at non-active tab **without switching** |
|
|
18
|
+
|
|
19
|
+
## Patterns
|
|
20
|
+
|
|
21
|
+
**Following `target="_blank"` link:**
|
|
22
|
+
|
|
23
|
+
```json
|
|
24
|
+
{
|
|
25
|
+
"commands": [
|
|
26
|
+
{ "method": "click", "params": { "selector": "a#docs-link" } },
|
|
27
|
+
{ "method": "snapshot" }
|
|
28
|
+
]
|
|
29
|
+
}
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
New tab appears in snapshot's `tabs` list. If click activated it (most do), `activeTargetId` points at new tab — keep working there. If not, `switchTab` to it.
|
|
33
|
+
|
|
34
|
+
**Comparing two pages without losing place:**
|
|
35
|
+
|
|
36
|
+
```json
|
|
37
|
+
{
|
|
38
|
+
"commands": [
|
|
39
|
+
{ "method": "snapshot", "params": { "targetId": "<other-tab-target-id>" } }
|
|
40
|
+
]
|
|
41
|
+
}
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
`snapshot { targetId }` returns other tab's elements but **doesn't switch** — active tab unchanged. Useful for checking popup/sibling tab before committing.
|
|
45
|
+
|
|
46
|
+
**Background tab (don't lose focus):**
|
|
47
|
+
|
|
48
|
+
```json
|
|
49
|
+
{
|
|
50
|
+
"method": "createTab",
|
|
51
|
+
"params": {
|
|
52
|
+
"url": "https://example.com/reference",
|
|
53
|
+
"activate": false
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
New tab opens, current stays active. Pair with `snapshot { targetId }` later to read without switching.
|
|
59
|
+
|
|
60
|
+
## Closing tabs
|
|
61
|
+
|
|
62
|
+
`closeTab` on **active** tab auto-switches focus to newest remaining tab. Check response's `activeTargetId`:
|
|
63
|
+
|
|
64
|
+
- New id → now active tab
|
|
65
|
+
- `null` → no tabs remain. `createTab` to continue or `close` to end session
|
|
66
|
+
|
|
67
|
+
## Error codes
|
|
68
|
+
|
|
69
|
+
- **`TAB_NOT_FOUND`** — `targetId` stale. Call `getTabs` to refresh, retry with new id. Don't loop on same id
|
|
70
|
+
- **`TAB_CLOSED`** — tab disappeared mid-operation (OAuth flows). Call `getTabs`, retry against remaining tabs
|
|
71
|
+
- **`TAB_LIMIT_EXCEEDED`** — too many tabs open. Close unused one before creating another. Identify by url/title in snapshot's `tabs` list
|
|
72
|
+
|
|
73
|
+
## Don't
|
|
74
|
+
|
|
75
|
+
- Call `getTabs` between commands. Snapshots already carry list. `getTabs` for cases without snapshot
|
|
76
|
+
- `switchTab` when only reading. Use `snapshot { targetId }` instead — cheaper, doesn't disturb focus
|
|
77
|
+
- Close tabs you didn't open unless user requested. Background tabs may belong to user's larger flow
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { FastMCP } from 'fastmcp';
|
|
2
|
+
import type { Content } from 'fastmcp';
|
|
3
|
+
import type { McpConfig } from '../@types/types.js';
|
|
4
|
+
import { AnalyticsHelper } from '../lib/analytics.js';
|
|
5
|
+
export { AgentParamsSchema } from './schemas.js';
|
|
6
|
+
export { buildCrossOriginNotice, formatConnectError, formatErrorMessage, formatSnapshot, sanitizeUpgradeBody, } from '../lib/agent-format.js';
|
|
7
|
+
/**
|
|
8
|
+
* Build the MCP response for a screenshot command, or null when there's no
|
|
9
|
+
* base64 payload (caller falls back to JSON text). Returns the image as a
|
|
10
|
+
* vision content block (~1.5K tokens) vs. ~67K inlining the base64 as text.
|
|
11
|
+
*/
|
|
12
|
+
export declare const formatScreenshotContent: (result: unknown, cmd: {
|
|
13
|
+
params?: Record<string, unknown>;
|
|
14
|
+
}, caption: string, skills: string) => Content[] | null;
|
|
15
|
+
export declare function registerAgentTools(server: FastMCP, config: McpConfig, analytics?: AnalyticsHelper): void;
|