@browserless.io/mcp 1.6.2 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/README.md +13 -12
  2. package/build/src/@types/types.d.ts +27 -3
  3. package/build/src/index.js +21 -29
  4. package/build/src/lib/agent-client.d.ts +5 -4
  5. package/build/src/lib/agent-client.js +87 -16
  6. package/build/src/lib/agent-format.d.ts +1 -1
  7. package/build/src/lib/agent-format.js +22 -4
  8. package/build/src/lib/define-tool.d.ts +5 -0
  9. package/build/src/lib/define-tool.js +1 -0
  10. package/build/src/lib/download-store.d.ts +17 -0
  11. package/build/src/lib/download-store.js +84 -0
  12. package/build/src/lib/http-auth.d.ts +22 -0
  13. package/build/src/lib/http-auth.js +33 -0
  14. package/build/src/resources/download-route.d.ts +16 -0
  15. package/build/src/resources/download-route.js +53 -0
  16. package/build/src/resources/upload-route.d.ts +3 -0
  17. package/build/src/resources/upload-route.js +53 -0
  18. package/build/src/skills/auth-profile.md +66 -0
  19. package/build/src/skills/autonomous-login.md +44 -43
  20. package/build/src/skills/file-transfers.md +88 -0
  21. package/build/src/skills/index.js +19 -0
  22. package/build/src/skills/shadow-dom.md +10 -1
  23. package/build/src/skills/system-prompt.d.ts +3 -2
  24. package/build/src/skills/system-prompt.js +32 -2
  25. package/build/src/tools/agent.d.ts +23 -0
  26. package/build/src/tools/agent.js +212 -30
  27. package/build/src/tools/map.js +1 -1
  28. package/build/src/tools/schemas.d.ts +79 -0
  29. package/build/src/tools/schemas.js +126 -3
  30. package/build/src/tools/smartscraper.js +4 -3
  31. package/package.json +5 -3
  32. package/build/src/tools/download.d.ts +0 -11
  33. package/build/src/tools/download.js +0 -92
@@ -0,0 +1,22 @@
1
+ import type { McpConfig } from '../@types/types.js';
2
+ export interface ResolvedBrowserlessAuth {
3
+ token: string;
4
+ apiUrl: string;
5
+ attachSessionId?: string;
6
+ }
7
+ export interface AuthInput {
8
+ authHeader?: string;
9
+ tokenQuery?: string;
10
+ apiUrlHeader?: string;
11
+ browserlessUrlQuery?: string;
12
+ sessionIdHeader?: string;
13
+ sessionIdQuery?: string;
14
+ }
15
+ /**
16
+ * Resolve a Browserless API token from an inbound HTTP request, in order:
17
+ * (1) Authorization header with a plain API key, (2) `?token=` query param,
18
+ * (3) Authorization header with a Supabase JWT → resolved via PostgREST.
19
+ * Throws when none is present/valid. Shared by the FastMCP `authenticate`
20
+ * callback and the custom `/upload` route so both gate on the same rules.
21
+ */
22
+ export declare const resolveBrowserlessAuth: (input: AuthInput, config: Pick<McpConfig, "browserlessApiUrl" | "supabaseUrl" | "supabaseServiceRoleKey">) => Promise<ResolvedBrowserlessAuth>;
@@ -0,0 +1,33 @@
1
+ import { resolveApiKey } from './account-resolver.js';
2
+ /**
3
+ * Resolve a Browserless API token from an inbound HTTP request, in order:
4
+ * (1) Authorization header with a plain API key, (2) `?token=` query param,
5
+ * (3) Authorization header with a Supabase JWT → resolved via PostgREST.
6
+ * Throws when none is present/valid. Shared by the FastMCP `authenticate`
7
+ * callback and the custom `/upload` route so both gate on the same rules.
8
+ */
9
+ export const resolveBrowserlessAuth = async (input, config) => {
10
+ const headerToken = input.authHeader?.startsWith('Bearer ')
11
+ ? input.authHeader.slice(7)
12
+ : input.authHeader;
13
+ const apiUrl = input.apiUrlHeader ?? input.browserlessUrlQuery ?? config.browserlessApiUrl;
14
+ // A pre-created session id to attach to, threaded by the autologin runner.
15
+ // The agent tool opens /chromium/agent?sessionId=<this> instead of doing its
16
+ // own POST /profile.
17
+ const attachSessionId = input.sessionIdHeader ?? input.sessionIdQuery ?? undefined;
18
+ // JWTs have 3 dot-separated base64url segments; plain API keys do not.
19
+ const isJwt = headerToken ? headerToken.split('.').length === 3 : false;
20
+ if (headerToken && !isJwt) {
21
+ return { token: headerToken, apiUrl, attachSessionId };
22
+ }
23
+ if (input.tokenQuery) {
24
+ return { token: input.tokenQuery, apiUrl, attachSessionId };
25
+ }
26
+ if (isJwt && headerToken) {
27
+ const { apiKey } = await resolveApiKey(config.supabaseUrl, config.supabaseServiceRoleKey, headerToken);
28
+ return { token: apiKey, apiUrl, attachSessionId };
29
+ }
30
+ throw new Error('No Browserless API token provided. ' +
31
+ 'Pass it as Authorization: Bearer <token> header, ' +
32
+ '?token= query parameter, or authenticate via OAuth.');
33
+ };
@@ -0,0 +1,16 @@
1
+ import type { FastMCP } from 'fastmcp';
2
+ import type { McpConfig } from '../@types/types.js';
3
+ /**
4
+ * Registers `GET /download/:id` on the HTTP-stream server. getDownloads surfaces
5
+ * a download as a notification (metadata only) plus this URL; the client fetches
6
+ * the bytes out-of-band when it decides to save them — over plain HTTP, NOT
7
+ * through the conversation:
8
+ *
9
+ * curl -s "<mcpBaseUrl>/download/<id>?token=<token>" -o ./file
10
+ *
11
+ * Single use: the file is removed from the store and disk once served (or after
12
+ * the 15-min TTL / session end, whichever comes first). Same token rules as the
13
+ * MCP surface. Only meaningful for httpStream; in stdio the file is already on
14
+ * the local disk at the path getDownloads reported.
15
+ */
16
+ export declare function registerDownloadRoute(server: FastMCP, config: McpConfig): void;
@@ -0,0 +1,53 @@
1
+ import { readFile, rm } from 'node:fs/promises';
2
+ import { consumeDownload } from '../lib/download-store.js';
3
+ import { resolveBrowserlessAuth } from '../lib/http-auth.js';
4
+ /**
5
+ * Registers `GET /download/:id` on the HTTP-stream server. getDownloads surfaces
6
+ * a download as a notification (metadata only) plus this URL; the client fetches
7
+ * the bytes out-of-band when it decides to save them — over plain HTTP, NOT
8
+ * through the conversation:
9
+ *
10
+ * curl -s "<mcpBaseUrl>/download/<id>?token=<token>" -o ./file
11
+ *
12
+ * Single use: the file is removed from the store and disk once served (or after
13
+ * the 15-min TTL / session end, whichever comes first). Same token rules as the
14
+ * MCP surface. Only meaningful for httpStream; in stdio the file is already on
15
+ * the local disk at the path getDownloads reported.
16
+ */
17
+ export function registerDownloadRoute(server, config) {
18
+ const app = server.getApp();
19
+ app.get('/download/:id', async (c) => {
20
+ try {
21
+ await resolveBrowserlessAuth({
22
+ authHeader: c.req.header('authorization'),
23
+ tokenQuery: c.req.query('token'),
24
+ apiUrlHeader: c.req.header('x-browserless-api-url'),
25
+ browserlessUrlQuery: c.req.query('browserlessUrl'),
26
+ }, config);
27
+ }
28
+ catch {
29
+ return c.json({ ok: false, error: 'Unauthorized' }, 401);
30
+ }
31
+ // Single-use: consume removes it from the registry so a second GET 404s.
32
+ const record = consumeDownload(c.req.param('id'));
33
+ if (!record) {
34
+ return c.json({
35
+ ok: false,
36
+ error: 'Not found (already fetched, expired, or unknown)',
37
+ }, 404);
38
+ }
39
+ try {
40
+ const data = await readFile(record.path);
41
+ c.header('Content-Type', record.mimeType);
42
+ c.header('Content-Disposition', `attachment; filename="${record.filename.replace(/"/g, '')}"`);
43
+ return c.body(data);
44
+ }
45
+ catch {
46
+ return c.json({ ok: false, error: 'File no longer available' }, 410);
47
+ }
48
+ finally {
49
+ // Drop the bytes once served (or on read failure) — single use.
50
+ void rm(record.path, { force: true }).catch(() => { });
51
+ }
52
+ });
53
+ }
@@ -0,0 +1,3 @@
1
+ import type { FastMCP } from 'fastmcp';
2
+ import type { McpConfig } from '../@types/types.js';
3
+ export declare function registerUploadRoute(server: FastMCP, config: McpConfig): void;
@@ -0,0 +1,53 @@
1
+ import { downloadUri, storeDownload, FILE_TRANSFER_MAX_BYTES, } from '../lib/download-store.js';
2
+ import { resolveBrowserlessAuth } from '../lib/http-auth.js';
3
+ // Registers `POST /upload` (httpStream only): clients push a file's bytes over
4
+ // plain HTTP and get back a handle to pass to the agent's `uploadFile`.
5
+ // curl -s -F file=@/path/to/file "<mcpBaseUrl>/upload?token=<token>"
6
+ // Same token as the MCP surface; the base64 never enters the model's context.
7
+ export function registerUploadRoute(server, config) {
8
+ const app = server.getApp();
9
+ app.post('/upload', async (c) => {
10
+ // Raw Hono routes bypass FastMCP's authenticate, so gate the route on the
11
+ // same Browserless token rules as the MCP surface — no anonymous drops.
12
+ try {
13
+ await resolveBrowserlessAuth({
14
+ authHeader: c.req.header('authorization'),
15
+ tokenQuery: c.req.query('token'),
16
+ apiUrlHeader: c.req.header('x-browserless-api-url'),
17
+ browserlessUrlQuery: c.req.query('browserlessUrl'),
18
+ }, config);
19
+ }
20
+ catch {
21
+ return c.json({ ok: false, error: 'Unauthorized' }, 401);
22
+ }
23
+ let file;
24
+ try {
25
+ const body = await c.req.parseBody();
26
+ file = body.file;
27
+ }
28
+ catch {
29
+ return c.json({
30
+ ok: false,
31
+ error: 'Expected multipart/form-data with a "file" field',
32
+ }, 400);
33
+ }
34
+ if (!(file instanceof File)) {
35
+ return c.json({
36
+ ok: false,
37
+ error: 'Missing multipart "file" field (use -F file=@path)',
38
+ }, 400);
39
+ }
40
+ const buf = Buffer.from(await file.arrayBuffer());
41
+ if (buf.byteLength > FILE_TRANSFER_MAX_BYTES) {
42
+ return c.json({ ok: false, error: 'FileTooLarge', maxBytes: FILE_TRANSFER_MAX_BYTES }, 413);
43
+ }
44
+ const record = await storeDownload(file.name || 'upload', file.type || 'application/octet-stream', buf);
45
+ return c.json({
46
+ ok: true,
47
+ handle: downloadUri(record.id),
48
+ filename: record.filename,
49
+ mimeType: record.mimeType,
50
+ size: record.size,
51
+ });
52
+ });
53
+ }
@@ -0,0 +1,66 @@
1
+ # Authenticated Profiles
2
+
3
+ A **profile** is a server-side bundle of cookies, localStorage, and IndexedDB
4
+ captured from a live agent session and replayed on future sessions that connect
5
+ with `profile=<name>`. Use it whenever a task needs the browser to start
6
+ already signed in.
7
+
8
+ ## Recipe — creating a profile
9
+
10
+ 1. **Open a creation session.** Call `browserless_agent` with a top-level
11
+ `createProfile` object — do NOT pass `profile` (the two are mutually
12
+ exclusive). The MCP tool calls `POST /profile` for you, attaches the WS
13
+ to the creation session, and gives you a non-headless browser with a
14
+ 10-minute keepalive:
15
+ ```json
16
+ {
17
+ "createProfile": { "name": "github" },
18
+ "commands": [
19
+ { "method": "goto", "params": { "url": "https://github.com/login" } }
20
+ ]
21
+ }
22
+ ```
23
+ 2. **Drive the auth flow like a normal task.** Type credentials (use values
24
+ the user supplied — never invent them), submit, and handle any
25
+ MFA/CAPTCHA step. If a CAPTCHA appears, load the `captchas` skill and
26
+ run `solve`.
27
+ 3. **Verify you are actually signed in before saving.** Re-snapshot and
28
+ confirm at least one of:
29
+ - an authenticated-only element (account menu, "Sign out" link, avatar)
30
+ - the URL is the post-login destination (not `/login`, `/signin`, or an
31
+ error path)
32
+ - a known auth cookie name appears in `document.cookie`
33
+ If none of these hold, do NOT save — a logged-out profile is worse than
34
+ no profile.
35
+ 4. **Call `saveProfile`** as the next command (JSON-RPC, no `Browserless.`
36
+ prefix):
37
+ ```json
38
+ { "method": "saveProfile", "params": { "name": "github" } }
39
+ ```
40
+ Pass the same `name` you opened the session with. If a profile with that
41
+ name already exists for this token, the call returns `ok: false` with an
42
+ `error` saying the profile already exists. Don't retry `saveProfile` with the
43
+ same name — choose a different name, or tell the user a profile by that name
44
+ already exists.
45
+ 5. **Inspect the result.** A successful save returns:
46
+
47
+ ```json
48
+ {
49
+ "ok": true,
50
+ "profileId": "...",
51
+ "name": "github",
52
+ "cookieCount": 12,
53
+ "originCount": 3,
54
+ "skippedOriginsCount": 0,
55
+ "skippedIdbDatabasesCount": 0,
56
+ "skippedIdbStoresCount": 0
57
+ }
58
+ ```
59
+
60
+ - `cookieCount === 0` is a red flag — the site likely uses session-only
61
+ cookies or storage you can't capture. Tell the user.
62
+ - Any non-zero `skipped*` count means partial capture — surface it.
63
+
64
+ 6. **Close** the session. Tell the user the profile name and how to use it
65
+ ("future calls can pass `profile: \"github\"`"). Do not echo cookie
66
+ values or any captured state.
@@ -1,95 +1,96 @@
1
1
  # Autonomous Login
2
2
 
3
- Page wants auth. **Default: don't.** Logins are intrusive and can damage account state. Proceed only when both gates pass.
3
+ Page wants auth. **Default: don't.** Logins are intrusive and can damage account state. Proceed only when the gates below pass.
4
4
 
5
- ## Gate 1Login required for continuing _this_ task?
5
+ ## Gate 0Did you drop your session binding?
6
6
 
7
- If the user's task is literally "log in / post / DM", or needs login to continue, gate passed. For extract/read/observe tasks, check whether the wall actually blocks the goal:
7
+ `profile` (and `proxy`) bind **each** call to its hydrated session. If an earlier call this flow was logged in but this one looks logged out, the cause is almost certainly a missing `profile`/`proxy` param on **this** call — not stale cookies. Re-issue the call **with** the binding before treating the wall as real. Never re-authenticate to repair a parameter you forgot to pass.
8
8
 
9
- - Target content already in DOM beneath the wall? Read it directly.
10
- - Dismiss available (`Maybe later`, `Skip`, modal `×`)? Click it.
11
- - Alternative path — public mirror, archive.org, RSS, JSON endpoint, deep link?
9
+ ## Gate 1 Login required to continue _this_ task?
12
10
 
13
- If the rest of the task completes without auth`LOGIN_NOT_NEEDED`. Wikipedia, public docs/news, public read-only profiles.
11
+ Task is literally "log in / post / DM" or needs login to proceed pass. For read/extract tasks, check the wall actually blocks the goal:
14
12
 
15
- ## Gate 2 Credentials unambiguously for _this_ site?
13
+ - Content already in DOM beneath the wall → read it.
14
+ - Dismiss available (`Maybe later`, `Skip`, `×`) → click it.
15
+ - Alt path (public mirror, archive.org, RSS, JSON endpoint, deep link) → use it.
16
+
17
+ Task completes without auth → `LOGIN_NOT_NEEDED` (Wikipedia, public docs/news, public read-only profiles).
16
18
 
17
- **Password is not required to pass Gate 2.** Many sites use magic-link / email-only / passkey auth an email alone (or any contextually-matched identifier) can be sufficient. Don't preemptively fail Gate 2 because no password is in context; let the form tell you at runtime. Only fail Gate 2 if the form actually demands a credential type you don't have.
19
+ ## Gate 2 — Credentials unambiguously for _this_ site?
18
20
 
19
- Identified **contextually** by name-to-domain correspondence fixed names not required. Bar is **extraordinary evidence**, not plausibility.
21
+ **Password not required** magic-link / email-only / passkey sites accept an email (or any contextually-matched identifier) alone. Don't fail early for a missing password; let the form demand it at runtime. Fail only if the form requires a credential type you lack.
20
22
 
21
- - `instagram.com` + `instagramHandle` / `instagramPassword`
22
- - ✅ `LOGIN_USERNAME` / `LOGIN_PASSWORD` paired with `LOGIN_TARGET_URL` whose host matches
23
- - ❌ `wikipedia.org` + `instagramHandle` (names belong to a different service)
24
- - ❌ Bare `username` / `password` with no domain qualifier (ambiguous)
23
+ Match **contextually** by name-to-domain correspondence (fixed names not required). Bar is **extraordinary evidence**, not plausibility.
25
24
 
26
- Absent / ambiguous / multiple plausible pairs → `MISSING_CONTEXT`. TOTP follows the same rule.
25
+ - `instagram.com` + `instagramHandle`/`instagramPassword`
26
+ - ✅ `LOGIN_USERNAME`/`LOGIN_PASSWORD` + `LOGIN_TARGET_URL` host matches
27
+ - ❌ `wikipedia.org` + `instagramHandle` (different service)
28
+ - ❌ bare `username`/`password`, no domain qualifier (ambiguous)
27
29
 
28
- ---
30
+ Absent / ambiguous / multiple plausible pairs → `MISSING_CONTEXT`. TOTP same rule.
29
31
 
30
- If either gate fails, stop and emit the matching `reason_code`. Rest runs only when both pass.
32
+ Gate 1 or 2 fails stop, emit the matching `reason_code`. (Gate 0 isn't a stop — it means fix the call and retry.) Continue only when both pass.
31
33
 
32
34
  ## Reach the form
33
35
 
34
36
  - Password input in snapshot → continue.
35
- - Sign-in link/button visible → click, wait, re-snapshot.
36
- - Email-first (username only): type username, click `Continue` / `Next`, `waitForSelector` on `input[type="password"]` (10000ms), re-snapshot.
37
- - After two transitions with no password input → `FORM_NOT_FOUND`.
37
+ - Sign-in link/button → click, wait, re-snapshot.
38
+ - Email-first type username, click `Continue`/`Next`, `waitForSelector` on `input[type="password"]` (10000ms), re-snapshot.
39
+ - Two transitions, still no password → `FORM_NOT_FOUND`.
38
40
 
39
41
  ## Sanity check
40
42
 
41
- Confirm login (not signup/reset): submit name is `Sign in` / `Log in` / `Continue` (not `Sign up` / `Register` / `Reset`), and exactly **one** password field present. Else `FORM_NOT_FOUND`.
43
+ Login, not signup/reset: submit reads `Sign in`/`Log in`/`Continue` (not `Sign up`/`Register`/`Reset`) and exactly **one** password field. Else `FORM_NOT_FOUND`.
42
44
 
43
45
  ## Field selection (anchor off password)
44
46
 
45
- - **Password**: `input[type="password"]`. With multiples: matches `/password/i` and **not** `confirm|new password`.
46
- - **Username** (first match): same-form `input[type="email"]` → input matching `/email|username|user|login|account/i` → visible text/email/tel input immediately preceding the password in `ref` order.
47
- - **Submit** (first match): same-form button matching `/^(sign in|log in|login|continue|submit)$/i` → `button[type="submit"]` in form → the only non-SSO visible button (skip `Continue with Google` etc. unless context names that provider).
47
+ - **Password**: `input[type="password"]`; with multiples, matches `/password/i` and not `confirm|new password`.
48
+ - **Username** (first hit): same-form `input[type="email"]` → `/email|username|user|login|account/i` → visible text/email/tel input immediately preceding the password in `ref` order.
49
+ - **Submit** (first hit): same-form button `/^(sign in|log in|login|continue|submit)$/i` → `button[type="submit"]` in form → the only non-SSO visible button (skip `Continue with Google` etc. unless context names that provider).
48
50
 
49
- Any missing → `FORM_NOT_FOUND` with what's missing.
51
+ Anything missing → `FORM_NOT_FOUND` (say what's missing).
50
52
 
51
53
  ## Submit
52
54
 
53
- Single batched call (type username, type password, click submit) with Gate-2 values. Then `waitForNavigation` (10000ms) or `waitForResponse` on `*`. If both time out, verify anyway page may have updated in place. Re-snapshot.
55
+ One batched call (type username, type password, click submit) with Gate-2 values `waitForNavigation` (10000ms) or `waitForResponse` on `*`. Both time out verify anyway (page may update in place). Re-snapshot. **Never retype the same credentials to retry** — caller's call.
54
56
 
55
57
  ## Verify success (any one, priority order)
56
58
 
57
59
  1. URL no longer matches `/login|signin|sign-in|log-in|auth|sso|account\/sign/i`.
58
60
  2. Password input absent from new snapshot.
59
- 3. Authed-state element matching `/log out|sign out|my account|profile|dashboard|avatar/i`.
61
+ 3. Authed element matching `/log out|sign out|my account|profile|dashboard|avatar/i`.
60
62
 
61
- If none holds:
63
+ The visible account/display name will usually NOT equal the email or username you typed (it's the profile's display name, often a real name) — that's expected, NOT a mismatch. Never mark a login failed because the shown identity differs from the credential; judge only by the three signals above.
62
64
 
63
- - Form error matching `/invalid|incorrect|wrong|doesn'?t match|not recognized|please try again/i` → `INVALID_CREDENTIALS`.
64
- - Captcha indicator → invoke `captchas` skill, re-verify. Unsolvable → `CAPTCHA_BLOCKED`.
65
+ None holds:
66
+
67
+ - Error matching `/invalid|incorrect|wrong|doesn'?t match|not recognized|please try again/i` → `INVALID_CREDENTIALS`.
68
+ - Captcha → invoke `captchas` skill, re-verify; unsolvable → `CAPTCHA_BLOCKED`.
65
69
  - MFA prompt → MFA branch.
66
70
  - No change, no error → `SUBMIT_NO_FEEDBACK`.
67
71
 
68
- **Never retype the same credentials to retry.** Caller's call.
69
-
70
72
  ## MFA branch
71
73
 
72
- Required when snapshot has `autocomplete="one-time-code"`, numeric input with `maxlength` ∈ {4, 6, 8}, or label/`name`/`placeholder` matching `/code|verification|otp|2fa|two[- ]?factor|authenticator/i`.
74
+ Triggered by `autocomplete="one-time-code"`, numeric input with `maxlength` ∈ {4,6,8}, or label/`name`/`placeholder` matching `/code|verification|otp|2fa|two[- ]?factor|authenticator/i`.
73
75
 
74
- - Contextually-matched TOTP available (same Gate-2 rule) → type, click submit, re-verify.
75
- - **No matching TOTP in context → ask the user for the code in plain text and STOP this turn. Do not call `close`. Do not emit the final JSON block. Leave the agent session open so the next turn can resume — the OTP input is still on the page and the cookies/state are intact.** When the user replies with a code, treat it as the TOTP value, type + click submit + re-verify. If the user declines or says they don't have one → `MFA_INPUT_MISSING`. Never attempt SMS/email/WebAuthn flows.
76
- - TOTP rejected (`/invalid|expired|incorrect/i`) → ask user for a fresh code (same don't-close rule); after one fresh-code rejection → `MFA_FAILED`.
77
- - Second MFA prompt after first cleared → `UNEXPECTED_STATE`.
76
+ - Contextually-matched TOTP (Gate-2 rule) → type, submit, re-verify.
77
+ - **No matching TOTP → ask the user for the code in plain text and STOP this turn. Do NOT `close`, do NOT emit the final JSON. Leave the session open so the OTP input and cookies/state survive to next turn.** User replies treat as the TOTP, type + submit + re-verify. User declines / has none → `MFA_INPUT_MISSING`. Never attempt SMS/email/WebAuthn.
78
+ - TOTP rejected (`/invalid|expired|incorrect/i`) → ask for a fresh code (same don't-close rule); one fresh-code rejection → `MFA_FAILED`.
79
+ - Second MFA prompt after the first cleared → `UNEXPECTED_STATE`.
78
80
 
79
81
  ## Final response
80
82
 
81
- Call `close`, then emit **exactly one** fenced JSON block — nothing before or after, no prose. Fields: `success`, `reason_code`, `final_url`, `evidence`, `steps_taken` (JSON-RPC call count; batched call = 1). On failure, `success: false` and `final_url` = current URL.
83
+ `close`, then emit **exactly one** fenced JSON block — nothing before or after, no prose. Fields: `success`, `reason_code`, `final_url`, `evidence`, `steps_taken` (JSON-RPC call count; batched call = 1). On failure: `success: false`, `final_url` = current URL.
82
84
 
83
85
  `reason_code` ∈ `SUCCESS` | `LOGIN_NOT_NEEDED` | `MISSING_CONTEXT` | `INVALID_CREDENTIALS` | `MFA_INPUT_MISSING` | `MFA_FAILED` | `CAPTCHA_BLOCKED` | `FORM_NOT_FOUND` | `SUBMIT_NO_FEEDBACK` | `FIELD_TYPE_MISMATCH` | `UNEXPECTED_STATE`.
84
86
 
85
87
  ## Don't
86
88
 
87
89
  - Log in just because a form is visible — gates first.
88
- - Use credentials whose names don't unambiguously belong to this site.
89
- - Guess among multiple plausible pairs `MISSING_CONTEXT`.
90
- - Retry with the same credentials after failure.
90
+ - Re-authenticate to fix an apparent logout before confirming you passed `profile`/`proxy` (Gate 0).
91
+ - Use credentials whose names don't unambiguously belong to this site; guess among plausible pairs (→ `MISSING_CONTEXT`); or retry the same credentials after failure.
91
92
  - Try SSO buttons unless the task names that provider.
92
93
  - `evaluate` to set input `value` — use `type` so real keystrokes fire.
93
94
  - Leak credentials into narration, errors, or non-`type.params.text` fields.
94
- - Emit anything other than the final JSON block in your last _terminal_ message (ask-the-user turns are not terminal — emit plain prose and stop without `close`).
95
- - Close the session while waiting for a user-supplied OTP — leave it open so cookies, page state, and the OTP input survive the round-trip.
95
+ - Emit anything but the final JSON in your last _terminal_ message (ask-the-user turns aren't terminal — plain prose, stop, no `close`).
96
+ - `close` while awaiting a user-supplied OTP — leave the session open so cookies, page state, and the OTP input survive the round-trip.
@@ -0,0 +1,88 @@
1
+ # File Uploads & Downloads
2
+
3
+ Transferring files to/from the browser. Two methods: `uploadFile` (attach files to an `<input type="file">`) and `getDownloads` (retrieve files Chrome downloaded).
4
+
5
+ **Do not `curl`/`wget`/`fetch` a file yourself to download it.** That only works for a public, static, directly-addressable URL — the easy case. The general case (files behind login/cookies, generated server-side on demand, or served by a click via `Content-Disposition` headers) has **no URL you can fetch**, and a direct fetch silently returns the wrong bytes, an HTML page, or a 403. **Drive the browser** (click/goto), and the file is captured for you. A direct fetch is only correct when this flow _hands you_ a URL (the single-use `/download/<id>` URL, or an over-cap `sourceUrl`).
6
+
7
+ **Key idea — never move bytes through this conversation.** Large files as base64 blow up the context. So downloads come back as a _handle_ (a path or a `browserless-download://` URI), and uploads take that handle (or a local path) instead of base64. The MCP server reads/writes the actual bytes on disk; you only pass small references. Only fall back to base64 `content` when you genuinely have raw bytes and no handle.
8
+
9
+ ## Downloading
10
+
11
+ Just trigger the download in the agent — navigate to the file URL, or click a download link/button:
12
+
13
+ ```json
14
+ {
15
+ "commands": [
16
+ { "method": "goto", "params": { "url": "https://example.com/report.csv" } }
17
+ ]
18
+ }
19
+ ```
20
+
21
+ - Captured downloads **auto-surface**: every agent response carries the current download ledger — **never the bytes**. You don't need to call anything to see it.
22
+ - A short, size-scaled grace wait lets quick downloads land on the **same** call. A slower one shows up as **in-progress with a byte count** ("downloading 2.0MB / 10MB") — just keep using the browser; it'll appear completed on a later response. As long as you keep touching the browser, the download state stays fresh.
23
+ - Files **larger than the cap** aren't transferred: you get a `FileTooLarge` note with the **source URL** — fetch it directly (e.g. `curl`) if you have network access.
24
+ - You decide whether to save each file. (`getDownloads` still exists for an explicit poll, but it's rarely needed.)
25
+
26
+ **Local (stdio) mode:** the file is already on the local disk (`BROWSERLESS_DOWNLOAD_DIR`, default a temp dir). The response lists the saved **path** — use/move it, or hand it straight back to `uploadFile { path }`. Nothing more to fetch.
27
+
28
+ **Remote (HTTP) mode:** the server can't write to your disk, so each file comes with a **single-use** GET URL. Fetch it with `curl` to save locally — works **once**:
29
+
30
+ ```bash
31
+ curl -s "<MCP_BASE_URL>/download/<id>?token=<YOUR_BROWSERLESS_TOKEN>" -o "report.csv"
32
+ ```
33
+
34
+ The exact command (with id + token + URL) is in the `getDownloads` response. Alternatively, reuse the handle as `uploadFile { files: [{ handle: "browserless-download://<id>" }] }` to re-upload it elsewhere without ever fetching the bytes. A file is dropped after one GET, after 15 minutes, or when the session ends — whichever comes first.
35
+
36
+ ## Uploading
37
+
38
+ ```json
39
+ {
40
+ "method": "uploadFile",
41
+ "params": {
42
+ "selector": "input[type=file]",
43
+ "files": [
44
+ { "handle": "browserless-download://abc-1", "name": "report.pdf" }
45
+ ]
46
+ }
47
+ }
48
+ ```
49
+
50
+ Each file is resolved in this order — pick the first you have:
51
+
52
+ - **`handle`** — a handle from a previous `getDownloads`, or from staging a local file (below). The server reads the stored file. Works in **both** transports. This is how you re-upload a file you just downloaded — zero bytes through the conversation.
53
+ - **`path`** — a local filesystem path. **stdio only** (HTTP can't read your filesystem). The server reads and encodes it.
54
+ - **`content`** — base64 bytes. Last resort; avoid for large files.
55
+
56
+ ### Uploading a NEW local file in HTTP mode
57
+
58
+ The server can't read your filesystem, so stage the file once over HTTP (bytes go via `curl`, never through the conversation), then use the returned handle:
59
+
60
+ ```bash
61
+ curl -s -F file=@"/path/to/file.png" "<MCP_BASE_URL>/upload?token=<YOUR_BROWSERLESS_TOKEN>"
62
+ # → { "ok": true, "handle": "browserless-download://abc-1", "filename": "file.png", ... }
63
+ ```
64
+
65
+ The `/upload` route requires your Browserless token (`?token=` or `Authorization: Bearer`). The `uploadFile` path-rejection error gives you the exact command with the token filled in.
66
+
67
+ ```json
68
+ {
69
+ "method": "uploadFile",
70
+ "params": {
71
+ "selector": "input[type=file]",
72
+ "files": [{ "handle": "browserless-download://abc-1" }]
73
+ }
74
+ }
75
+ ```
76
+
77
+ Staged files share the download store (15-minute TTL). **Never** base64 a file into `content` by hand — that's what staging avoids.
78
+
79
+ Other params:
80
+
81
+ - `selector` — the file input. If hidden behind a styled button, the input still exists in the DOM; target it directly (use a deep selector — prefix `<` followed by a space — for shadow DOM).
82
+ - `name` / `mimeType` — optional; default from the handle/path, mimeType inferred from the extension.
83
+ - Triggers native `input`/`change` events, so frameworks (React, etc.) see the file.
84
+ - Returns `{ "ok": true }`, or `{ "ok": false, "error": "SelectorNotFound" | "InvalidTarget" | "FileTooLarge" }`.
85
+
86
+ ## Size limits
87
+
88
+ Uploads and downloads are capped (server default 10MB, hard max 50MB). Oversized downloads report `error: "FileTooLarge"` (metadata, no data); oversized uploads return `ok: false, error: "FileTooLarge"`.
@@ -11,6 +11,7 @@ const LOGIN_URL_RE = /\/(login|signin|sign-?in|log-?in|auth|sso|oauth)\b|\/accou
11
11
  const LOGIN_NUDGE_RE = /sign in to (view|see|continue|access|read|comment|post|reply|save|order|buy|checkout|your account)|please sign in|signed out\b.*sign in|create (an )?account to/i;
12
12
  const TAB_ERROR_CODES = ['TAB_NOT_FOUND', 'TAB_CLOSED', 'TAB_LIMIT_EXCEEDED'];
13
13
  const TAB_COMMAND_METHODS = ['getTabs', 'switchTab', 'createTab', 'closeTab'];
14
+ const FILE_TRANSFER_METHODS = ['uploadFile', 'getDownloads'];
14
15
  const evalPredicate = (p, ctx) => {
15
16
  switch (p.kind) {
16
17
  case 'snapshot.has-element': {
@@ -145,6 +146,24 @@ const SKILL_SPECS = [
145
146
  ],
146
147
  ],
147
148
  },
149
+ {
150
+ // No auto-fire triggers: there is no snapshot/error/command signal for
151
+ // "about to create a profile". The model loads it by id via
152
+ // browserless_skill, prompted by the createProfile field description.
153
+ id: 'auth-profile',
154
+ path: 'src/skills/auth-profile.md',
155
+ triggers: [],
156
+ },
157
+ {
158
+ id: 'file-transfers',
159
+ path: 'src/skills/file-transfers.md',
160
+ triggers: [
161
+ // A file input on the page — uploads are likely next.
162
+ [{ kind: 'snapshot.has-input-type', type: 'file' }],
163
+ // The model issued an upload/download command.
164
+ [{ kind: 'command.method', methods: FILE_TRANSFER_METHODS }],
165
+ ],
166
+ },
148
167
  {
149
168
  id: 'captchas',
150
169
  path: 'src/skills/captchas.md',
@@ -2,6 +2,15 @@
2
2
 
3
3
  Snapshot contains `deep-ref=` selectors, or you hit `SELECTOR_NOT_FOUND` on regular selector. Page using shadow DOM or iframes — read before next action.
4
4
 
5
+ ## Iframes in the snapshot
6
+
7
+ Iframes (same-origin and cross-origin) are now snapshotted too. When present:
8
+
9
+ - Snapshot shows a `Frames (N iframes):` block listing each frame's label, URL, and origin.
10
+ - Elements inside a frame are tagged `[frame#N]` and carry a ready `deep-ref=` selector — cross-origin uses `< *url* css`, same-origin uses `< css`. Pass it as-is to `click`/`type`/`hover`/`checkbox` — no frame switching, no hand-construction.
11
+
12
+ Only build a deep selector by hand (below) when a frame element wasn't surfaced (a11y-empty widget, capped snapshot).
13
+
5
14
  ## Deep selectors: `< ` prefix
6
15
 
7
16
  Browserless deep selectors start with `< ` (less-than, space). Space mandatory. Format:
@@ -20,7 +29,7 @@ When snapshot lists `deep-ref=< button#deny`, pass to `click` / `type` / `hover`
20
29
 
21
30
  ## Constructing deep selectors for iframes snapshot didn't surface
22
31
 
23
- Snapshots only include accessible content. Iframes (captcha/payment widgets) often have nothing meaningful in accessibility tree. Build selector by hand:
32
+ Fallback only most cross-origin iframes are now in the snapshot (see above). Some widgets still have nothing meaningful in the accessibility tree. Build selector by hand:
24
33
 
25
34
  - `< *google.com/recaptcha* #recaptcha-anchor` — reCAPTCHA checkbox
26
35
  - `< *hcaptcha.com* #checkbox` — hCaptcha checkbox
@@ -1,2 +1,3 @@
1
- export declare const AGENT_SYSTEM_PROMPT = "Execute browser commands in persistent agent session.\n\n## Proxy (optional)\nProxy config is a **top-level tool argument** (`proxy`, `proxyCountry`, etc. on the tool call itself) \u2014 it is applied when the session is opened. **NEVER call `proxy` as a method inside `commands`** \u2014 a `{ method: \"proxy\", ... }` JSON-RPC mutation does NOT change the upstream proxy on an already-open session and will silently no-op.\n\n**If there is credible evidence the task needs a proxy, you MUST pass proxy options on the very FIRST call** (before any `goto`/`snapshot`), because the config is read once at session creation. Credible signals include: the user asks for a specific country/region/locale; the target site is known to geo-restrict or block datacenter IPs (streaming, ticketing, retail, banking, real-estate, news paywalls); a prior attempt returned 403/451/captcha/\"unusual traffic\"/\"access denied\"; the user explicitly mentions residential / sticky IP / proxy.\n\nIf you already opened a session without a proxy and now realize one is needed, you must `close` and start a new session with the proxy options set \u2014 there is no in-session switch.\n\n- `proxy: \"residential\"` \u2014 enable routing; `proxyCountry: \"us\"` \u2014 geo (ISO-2); `proxyState` / `proxyCity` (paid plans, 401 otherwise); `proxySticky: true` \u2014 stable IP; `proxyLocaleMatch: true` \u2014 match locale; `proxyPreset` \u2014 named config; `externalProxyServer: \"http://u:p@host:port\"` \u2014 bring your own (http(s) only)\n- Geo/preset/sticky require `proxy: \"residential\"` or `externalProxyServer` set\n\n## Auth\nNever log in by default. Never invent or assume credentials exist (no \"test credentials\", no \"your account\"). If the snapshot contains a sign-in link OR you're about to mention \"sign in\" / \"log in\" / \"auth required\" \u2014 even as a suggested option to the user \u2014 call `browserless_skill { id: \"autonomous-login\" }` **first**, then follow its gates. The skill decides whether login is appropriate and whether credentials are in scope; do not skip it just because no password field is on the page yet.\n\n## Terminal-Goal Check\nBefore declaring done, restate the user's terminal deliverable in one line and verify your evidence *directly* supports it \u2014 not a sibling question.\n**Empty-state substitution.** An empty/zero/null result from a resource that normally requires auth, scope, or filter context is evidence the *precondition* wasn't met \u2014 not evidence the question is answered. Empty cart while logged out, zero results while geo-restricted, empty inbox while unauthenticated: precondition failure \u2192 fix the precondition (often: load `autonomous-login`), don't return the empty result as the answer.\n**Multi-step preconditions.** When the task names multiple steps (\"go to X, then Y, report Z\"), evaluate preconditions for the *full chain* before treating any step as optional. A blocker on step N blocks the whole task even if step 1 returned data.\n\n## Skills (auto-injected)\nSKILL blocks auto-inject between `--- SKILL: <id> ---` markers when page/error needs special handling. Read carefully.\nLoad manually via **browserless_skill** if suspected but not injected:\n- `autonomous-login` \u2014 gates, credential rules, MFA/captcha, final JSON shape (see `## Auth` above for when to load)\n- `shadow-dom` \u2014 deep selectors, iframe targeting\n- `cookie-consent` \u2014 vendor-specific dismiss recipes\n- `modals` \u2014 closing dialogs and alertdialogs\n- `captchas` \u2014 the `solve` command (Cloud only)\n- `snapshot-misses` \u2014 truncated/empty snapshots, image-rendered content\n- `dynamic-content` \u2014 choosing the right `wait*` method\n- `screenshots` \u2014 when to screenshot vs. snapshot, scope and format choices\n- `tabs` \u2014 multi-tab workflows, peek-without-switching\n\n## Core Loop (ReAct: Reason \u2192 Act \u2192 Observe)\n1. **goto** \u2014 waits \"domcontentloaded\"\n2. **snapshot** \u2014 returns interactive + informational elements (button, link, textbox, combobox, checkbox, heading, img+alt) with ref= selectors\n3. **Plan** all actions from snapshot\n4. **Batch** execute\n5. **Re-snapshot** only if page changed\n6. Repeat \u2192 **close** when done\n\n## Snapshot Rules\n- Until you snapshot a page, you CANNOT click/type/interact \u2014 snapshot first, no exceptions\n- NEVER guess, assume, or infer selectors \u2014 CSS selectors from your training data are wrong. ONLY use ref= / deep-ref= from latest snapshot\n- Snapshot STALE after: click, goto, select, navigation\n- Snapshot VALID after: type, hover, scroll, evaluate\n- Expect new content? \u2192 re-snapshot\n- Element roles in snapshot (link, button, textbox, combobox, checkbox, heading) tell you what each does\n\n## Selectors\n- Use **ref=** (CSS) or **deep-ref=** (starts `< `) exactly as shown in snapshot\n- Example: `[3] button \"Sign In\" ref=button#submit` \u2192 `\"button#submit\"`\n- deep-ref for shadow DOM \u2014 see `shadow-dom` skill\n\n## Tabs\nSnapshots include `tabs` + `activeTargetId` \u2014 no getTabs needed. Multi-tab / `snapshot { targetId }` in `tabs` skill (auto-loads when >1 tab).\n\n## Links\n**Prefer goto over click** for links with href \u2014 immune to layout shifts, overlays, misclicks.\nExample: `[5] a \"About\" ref=a[href='/about']` \u2192 `goto { url: \"https://ex.com/about\" }`\nOnly click when href is `javascript:` / `#` / missing.\n\n## Content Extraction\n1. Check in-memory snapshot (text/values already there)\n2. **text** { selector } \u2014 from specific element\n3. **evaluate** { content } \u2014 JS (IIFE): `(() => { return ... })()`\n4. **html** { selector } \u2014 raw HTML\n\n## Batching \u2014 Maximize Per Call\nPlan ALL actions from snapshot before next snapshot.\n\n**Process:**\n1. Classify actions: **safe** (type, hover, scroll, evaluate, select, checkbox) vs. **page-changing** (click, goto)\n2. Batch: safe FIRST \u2192 page-changing LAST\n3. For forms: if submit button is in snapshot, batch type + click in one call\n4. Don't batch across navigations\n\n**Example form:**\n```json\n{ \"commands\": [\n { \"method\": \"type\", \"params\": { \"selector\": \"input#email\", \"text\": \"j@d.com\" } },\n { \"method\": \"click\", \"params\": { \"selector\": \"button#submit\" } }\n] }\n```\n\n## Async\nAfter async triggers (search, submit), use `wait*` before snapshot \u2014 `waitForResponse` best when API URL known. `dynamic-content` skill auto-loads on timeout. Never `evaluate` with setTimeout.\n\n## Error Recovery\nErrors tagged `Category: <NAME>`:\n- **SELECTOR_MISS** \u2014 re-snapshot; retry `< selector` if not already deep-ref\n- **SESSION_LOST** \u2014 a fresh session was opened automatically; re-goto + snapshot (prior state gone)\n- **UNAUTHORIZED** / **FORBIDDEN** \u2014 pick different path\n- **NOT_FOUND** \u2014 different URL\n- **SERVER_ERROR** \u2014 backoff, retry once\n- **NAVIGATION_FAILED** \u2014 verify URL\n- **TIMEOUT** \u2014 longer wait or different signal\n- **INVALID_PARAMS** \u2014 fix params (schema authoritative)\n- **UNKNOWN** \u2014 re-snapshot + re-plan\n\n`! NOTICE: URL changed cross-origin` = prior plan/refs invalid, re-plan.\nNever retry same failed action without re-snapshot.\n\n## Methods (non-obvious)\n- **goto** { url, waitUntil? } \u2014 default \"domcontentloaded\"; prefer over click for links\n- **snapshot** { maxElements?, targetId? } \u2014 cap 500; targetId peeks non-active tab\n- **evaluate** { content } \u2014 IIFE only\n- **waitForSelector** { selector, timeout? } \u2014 set 5000-10000ms\n- **waitForResponse** { url?, statuses?, timeout? } \u2014 url is glob `\"*api/results*\"`\n- **createTab** { url?, activate?, waitUntil? } \u2014 default activate: true; false = background\n- **close** \u2014 own call, NOT batched; only when task complete (premature close discards page state)\n- See schema for: screenshot, solve, back, forward, reload, click, type, select, checkbox, hover, scroll, text, html, waitForNavigation, waitForTimeout, waitForRequest, liveURL, getTabs, switchTab, closeTab\n\n";
2
- export declare const SKILL_TOOL_DESCRIPTION = "Load a Browserless agent skill on demand.\n\nUse this when you suspect the page exhibits a non-trivial mechanic but no SKILL block was auto-injected into a previous response. The auto-injection heuristics are conservative; calling this tool is the explicit fallback.\n\nAvailable skills:\n- **shadow-dom** \u2014 deep selectors, iframe URL-pattern syntax, what works through deep-ref\n- **cookie-consent** \u2014 vendor-specific dismiss recipes (OneTrust, Cookiebot, Didomi, etc.)\n- **modals** \u2014 close-button heuristics, ESC handling, alertdialog vs. dialog\n- **snapshot-misses** \u2014 truncated/empty snapshots, image-rendered content\n- **dynamic-content** \u2014 choosing the right `wait*` method after async triggers\n- **screenshots** \u2014 when to screenshot vs. snapshot, scope and format choices\n- **tabs** \u2014 multi-tab workflows, peek-without-switching\n- **autonomous-login** \u2014 load before authenticating: when the user asked you to log in, when a wall blocks the task, or as soon as a password input appears. Covers the don't-login-by-default posture, contextual credential matching, MFA/captcha branches, and the required final JSON response shape.\n- **captchas** \u2014 the `solve` command, response semantics, escalation path (Cloud-only)";
1
+ export declare const AGENT_SYSTEM_PROMPT = "Execute browser commands in persistent agent session.\n\n## Proxy (optional)\nProxy config is a **top-level tool argument** (`proxy`, `proxyCountry`, etc. on the tool call itself) \u2014 it is applied when the session is opened. **NEVER call `proxy` as a method inside `commands`** \u2014 a `{ method: \"proxy\", ... }` JSON-RPC mutation does NOT change the upstream proxy on an already-open session and will silently no-op.\n\n**If there is credible evidence the task needs a proxy, you MUST pass proxy options on the very FIRST call** (before any `goto`/`snapshot`), because the config is read once at session creation. Credible signals include: the user asks for a specific country/region/locale; the target site is known to geo-restrict or block datacenter IPs (streaming, ticketing, retail, banking, real-estate, news paywalls); a prior attempt returned 403/451/captcha/\"unusual traffic\"/\"access denied\"; the user explicitly mentions residential / sticky IP / proxy.\n\nIf you already opened a session without a proxy and now realize one is needed, you must `close` and start a new session with the proxy options set \u2014 there is no in-session switch.\n\n- `proxy: \"residential\"` \u2014 enable routing; `proxyCountry: \"us\"` \u2014 geo (ISO-2); `proxyState` / `proxyCity` (paid plans, 401 otherwise); `proxySticky: true` \u2014 stable IP; `proxyLocaleMatch: true` \u2014 match locale; `proxyPreset` \u2014 named config; `externalProxyServer: \"http://u:p@host:port\"` \u2014 bring your own (http(s) only)\n- Geo/preset/sticky require `proxy: \"residential\"` or `externalProxyServer` set\n\n## Auth\nNever log in by default. Never invent or assume credentials exist (no \"test credentials\", no \"your account\"). If the snapshot contains a sign-in link OR you're about to mention \"sign in\" / \"log in\" / \"auth required\" \u2014 even as a suggested option to the user \u2014 call `browserless_skill { id: \"autonomous-login\" }` **first**, then follow its gates. The skill decides whether login is appropriate and whether credentials are in scope; do not skip it just because no password field is on the page yet.\n\n## Terminal-Goal Check\nBefore declaring done, restate the user's terminal deliverable in one line and verify your evidence *directly* supports it \u2014 not a sibling question.\n**Empty-state substitution.** An empty/zero/null result from a resource that normally requires auth, scope, or filter context is evidence the *precondition* wasn't met \u2014 not evidence the question is answered. Empty cart while logged out, zero results while geo-restricted, empty inbox while unauthenticated: precondition failure \u2192 fix the precondition (often: load `autonomous-login`), don't return the empty result as the answer.\n**Multi-step preconditions.** When the task names multiple steps (\"go to X, then Y, report Z\"), evaluate preconditions for the *full chain* before treating any step as optional. A blocker on step N blocks the whole task even if step 1 returned data.\n\n## Skills (auto-injected)\nSKILL blocks auto-inject between `--- SKILL: <id> ---` markers when page/error needs special handling. Read carefully.\nLoad manually via **browserless_skill** if suspected but not injected:\n- `autonomous-login` \u2014 gates, credential rules, MFA/captcha, final JSON shape (see `## Auth` above for when to load)\n- `shadow-dom` \u2014 deep selectors, iframe targeting\n- `cookie-consent` \u2014 vendor-specific dismiss recipes\n- `modals` \u2014 closing dialogs and alertdialogs\n- `captchas` \u2014 the `solve` command (Cloud only)\n- `snapshot-misses` \u2014 truncated/empty snapshots, image-rendered content\n- `dynamic-content` \u2014 choosing the right `wait*` method\n- `screenshots` \u2014 when to screenshot vs. snapshot, scope and format choices\n- `tabs` \u2014 multi-tab workflows, peek-without-switching\n\n## Core Loop (ReAct: Reason \u2192 Act \u2192 Observe)\n1. **goto** \u2014 waits \"domcontentloaded\"\n2. **snapshot** \u2014 returns interactive + informational elements (button, link, textbox, combobox, checkbox, heading, img+alt) with ref= selectors\n3. **Plan** all actions from snapshot\n4. **Batch** execute\n5. **Re-snapshot** only if page changed\n6. Repeat \u2192 **close** when done\n\n## Snapshot Rules\n- Until you snapshot a page, you CANNOT click/type/interact \u2014 snapshot first, no exceptions\n- NEVER guess, assume, or infer selectors \u2014 CSS selectors from your training data are wrong. ONLY use ref= / deep-ref= from latest snapshot\n- Snapshot STALE after: click, goto, select, navigation\n- Snapshot VALID after: type, hover, scroll, evaluate\n- Expect new content? \u2192 re-snapshot\n- Element roles in snapshot (link, button, textbox, combobox, checkbox, heading) tell you what each does\n\n## Selectors\n- Use **ref=** (CSS) or **deep-ref=** (starts `< `) exactly as shown in snapshot\n- Example: `[3] button \"Sign In\" ref=button#submit` \u2192 `\"button#submit\"`\n- deep-ref for shadow DOM / iframes \u2014 see `shadow-dom` skill\n\n## Iframes\nSnapshots include a `Frames` list (cross-origin iframes) when present. Elements inside a frame are tagged `[frame#N]` and carry a `deep-ref=< *url* css` selector that already pierces the frame \u2014 pass it as-is to `click`/`type`/`hover`/`checkbox`. No frame switching needed. captcha/payment widgets (reCAPTCHA, hCaptcha, Stripe, Turnstile) show up here. `shadow-dom` skill auto-loads when frames present.\n\n## Tabs\nSnapshots include `tabs` + `activeTargetId` \u2014 no getTabs needed. Multi-tab / `snapshot { targetId }` in `tabs` skill (auto-loads when >1 tab).\n\n## Links\n**Prefer goto over click** for links with href \u2014 immune to layout shifts, overlays, misclicks.\nExample: `[5] a \"About\" ref=a[href='/about']` \u2192 `goto { url: \"https://ex.com/about\" }`\nOnly click when href is `javascript:` / `#` / missing.\n\n## Content Extraction\n1. Check in-memory snapshot (text/values already there)\n2. **text** { selector } \u2014 from specific element\n3. **evaluate** { content } \u2014 JS (IIFE): `(() => { return ... })()`\n4. **html** { selector } \u2014 raw HTML\n\n## Files (upload / download)\n**To download a file, DRIVE THE BROWSER \u2014 do not `curl`/`wget`/`fetch` the file yourself as a first move.** Many real downloads (login/cookie-gated, generated server-side on demand, or triggered by a click whose response headers force the download) have NO fetchable URL \u2014 a direct fetch silently gets the wrong bytes, an HTML error page, or 403. Click/goto in the agent and collect from the auto-surfaced ledger. The ONLY time a direct fetch is correct: the ledger hands you a URL to use \u2014 the single-use `/download/<id>` URL, or an over-cap `sourceUrl`. Reaching for `curl` first is a bug, not a shortcut.\n**NEVER read a file's bytes or base64 into this conversation, and NEVER split/reassemble/inline base64 by hand.** That is the wrong tool and will stall.\n- **Upload a local file (stdio)**: `uploadFile { selector, files: [{ path }] }` \u2014 the server reads + encodes it.\n- **Upload a local file (HTTP)**: the server can't read your disk. Stage it once over HTTP, then use the handle:\n `curl -s -F file=@\"/path/to/file\" \"<MCP_BASE_URL>/upload?token=<TOKEN>\"` \u2192 returns `{ \"handle\": \"browserless-download://\u2026\" }` \u2192 `uploadFile { files: [{ handle }] }`. (The path-rejection error gives you the exact command with your token + URL filled in.)\n- **Re-upload something from `getDownloads`**: pass its `handle` (works in both modes).\n- **Download**: just trigger it in the agent (click a download link, or goto the file URL). The captured file **auto-surfaces** as a notification on the agent response (filename/size/handle), never the bytes \u2014 the server waits for it to finish (bounded by size), so it usually lands on that same call. stdio: file already saved, you get its path. HTTP: a **single-use** `curl \u2026 /download/<id>?token=` URL \u2014 fetch only if you need it. Files over the cap aren't transferred \u2014 you get the source URL to fetch directly. Path/handle reuses in `uploadFile`. (No separate download tool \u2014 use the agent.)\n- base64 `content` is a LAST RESORT \u2014 tiny inline data only.\n- Full recipe: `file-transfers` skill.\n\n## Batching \u2014 Maximize Per Call\nPlan ALL actions from snapshot before next snapshot.\n\n**Process:**\n1. Classify actions: **safe** (type, hover, scroll, evaluate, select, checkbox) vs. **page-changing** (click, goto)\n2. Batch: safe FIRST \u2192 page-changing LAST\n3. For forms: if submit button is in snapshot, batch type + click in one call\n4. Don't batch across navigations\n\n**Example form:**\n```json\n{ \"commands\": [\n { \"method\": \"type\", \"params\": { \"selector\": \"input#email\", \"text\": \"j@d.com\" } },\n { \"method\": \"click\", \"params\": { \"selector\": \"button#submit\" } }\n] }\n```\n\n## Async\nAfter async triggers (search, submit), use `wait*` before snapshot \u2014 `waitForResponse` best when API URL known. `dynamic-content` skill auto-loads on timeout. Never `evaluate` with setTimeout.\n\n## Error Recovery\nErrors tagged `Category: <NAME>`:\n- **SELECTOR_MISS** \u2014 re-snapshot; retry `< selector` if not already deep-ref\n- **SESSION_LOST** \u2014 a fresh session was opened automatically; re-goto + snapshot (prior state gone)\n- **UNAUTHORIZED** / **FORBIDDEN** \u2014 pick different path\n- **NOT_FOUND** \u2014 different URL\n- **SERVER_ERROR** \u2014 backoff, retry once\n- **NAVIGATION_FAILED** \u2014 verify URL\n- **TIMEOUT** \u2014 longer wait or different signal\n- **INVALID_PARAMS** \u2014 fix params (schema authoritative)\n- **UNKNOWN** \u2014 re-snapshot + re-plan\n\n`! NOTICE: URL changed cross-origin` = prior plan/refs invalid, re-plan.\nNever retry same failed action without re-snapshot.\n\n## Methods (non-obvious)\n- **goto** { url, waitUntil? } \u2014 default \"domcontentloaded\"; prefer over click for links\n- **snapshot** { maxElements?, targetId? } \u2014 cap 500; targetId peeks non-active tab\n- **evaluate** { content } \u2014 IIFE only\n- **waitForSelector** { selector, timeout? } \u2014 set 5000-10000ms\n- **waitForResponse** { url?, statuses?, timeout? } \u2014 url is glob `\"*api/results*\"`\n- **createTab** { url?, activate?, waitUntil? } \u2014 default activate: true; false = background\n- **close** \u2014 own call, NOT batched; only when task complete (premature close discards page state)\n- See schema for: screenshot, solve, back, forward, reload, click, type, select, checkbox, hover, scroll, text, html, waitForNavigation, waitForTimeout, waitForRequest, liveURL, getTabs, switchTab, closeTab\n\n";
2
+ export declare const fileTransferModeNote: (transport: "stdio" | "httpStream", mcpBaseUrl: string) => string;
3
+ export declare const SKILL_TOOL_DESCRIPTION = "Load a Browserless agent skill on demand.\n\nUse this when you suspect the page exhibits a non-trivial mechanic but no SKILL block was auto-injected into a previous response. The auto-injection heuristics are conservative; calling this tool is the explicit fallback.\n\nAvailable skills:\n- **shadow-dom** \u2014 deep selectors, iframe URL-pattern syntax, what works through deep-ref\n- **cookie-consent** \u2014 vendor-specific dismiss recipes (OneTrust, Cookiebot, Didomi, etc.)\n- **modals** \u2014 close-button heuristics, ESC handling, alertdialog vs. dialog\n- **snapshot-misses** \u2014 truncated/empty snapshots, image-rendered content\n- **dynamic-content** \u2014 choosing the right `wait*` method after async triggers\n- **screenshots** \u2014 when to screenshot vs. snapshot, scope and format choices\n- **tabs** \u2014 multi-tab workflows, peek-without-switching\n- **autonomous-login** \u2014 load before authenticating: when the user asked you to log in, when a wall blocks the task, or as soon as a password input appears. Covers the don't-login-by-default posture, contextual credential matching, MFA/captcha branches, and the required final JSON response shape.\n- **captchas** \u2014 the `solve` command, response semantics, escalation path (Cloud-only)\n- **file-transfers** \u2014 `uploadFile` / `getDownloads`, stdio-path vs. base64 content, size caps";