barebrowse 0.10.1 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +100 -0
- package/README.md +13 -0
- package/barebrowse.context.md +14 -2
- package/cli.js +8 -0
- package/package.json +38 -6
- package/src/auth.js +35 -10
- package/src/bareagent.js +16 -5
- package/src/cdp.js +4 -2
- package/src/chromium.js +19 -6
- package/src/daemon.js +54 -12
- package/src/index.js +50 -2
- package/src/network-idle.js +4 -1
- package/src/prune.js +1 -1
- package/src/session-client.js +6 -2
- package/src/url-guard.js +138 -0
- package/src/wearehere.d.ts +6 -0
- package/types/aria.d.ts +17 -0
- package/types/auth.d.ts +35 -0
- package/types/bareagent.d.ts +25 -0
- package/types/blocklist.d.ts +21 -0
- package/types/cdp.d.ts +16 -0
- package/types/chromium.d.ts +58 -0
- package/types/consent.d.ts +9 -0
- package/types/daemon.d.ts +10 -0
- package/types/index.d.ts +138 -0
- package/types/interact.d.ts +79 -0
- package/types/network-idle.d.ts +19 -0
- package/types/prune.d.ts +13 -0
- package/types/session-client.d.ts +19 -0
- package/types/stealth.d.ts +14 -0
- package/types/url-guard.d.ts +26 -0
- package/commands/barebrowse/SKILL.md +0 -133
- package/commands/barebrowse.md +0 -132
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Try to dismiss a cookie consent dialog on the current page.
|
|
3
|
+
* Inspects the ARIA tree for dialog elements with consent-related content,
|
|
4
|
+
* then clicks the "accept" button.
|
|
5
|
+
*
|
|
6
|
+
* @param {object} session - Session-scoped CDP handle
|
|
7
|
+
* @returns {Promise<boolean>} true if a consent dialog was dismissed
|
|
8
|
+
*/
|
|
9
|
+
export function dismissConsent(session: object): Promise<boolean>;
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Spawn a detached child process that runs the daemon.
|
|
3
|
+
* Parent polls for session.json, then exits.
|
|
4
|
+
*/
|
|
5
|
+
export function startDaemon(opts: any, outputDir: any, initialUrl: any): Promise<any>;
|
|
6
|
+
/**
|
|
7
|
+
* Run the daemon HTTP server. Called by cli.js --daemon-internal.
|
|
8
|
+
* Holds a connect() session and serves commands over HTTP.
|
|
9
|
+
*/
|
|
10
|
+
export function runDaemon(opts: any, outputDir: any, initialUrl: any): Promise<void>;
|
package/types/index.d.ts
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Browse a URL and return an ARIA snapshot.
|
|
3
|
+
* This is the primary API — URL in, agent-ready snapshot out.
|
|
4
|
+
*
|
|
5
|
+
* @param {string} url - The URL to browse
|
|
6
|
+
* @param {object} [opts]
|
|
7
|
+
* @param {'headless'|'headed'|'hybrid'} [opts.mode='headless'] - Browser mode
|
|
8
|
+
* @param {boolean} [opts.cookies=true] - Inject user's cookies (Phase 2)
|
|
9
|
+
* @param {boolean} [opts.prune=true] - Apply ARIA pruning (Phase 2)
|
|
10
|
+
* @param {number} [opts.timeout=30000] - Navigation timeout in ms
|
|
11
|
+
* @param {boolean} [opts.blockAds=true] - Block ~120 common ad/tracker
|
|
12
|
+
* URL patterns via CDP. Shrinks ARIA snapshots and speeds page loads.
|
|
13
|
+
* See src/blocklist.js for the default set. Set false to disable.
|
|
14
|
+
* @param {string[]} [opts.blockUrls] - Extra URL glob patterns to block,
|
|
15
|
+
* merged with the default unless blockAds:false.
|
|
16
|
+
* @param {boolean} [opts.allowLocalUrls=false] - Permit navigation to local-
|
|
17
|
+
* resource schemes (file:, view-source:, chrome:, …). Blocked by default.
|
|
18
|
+
* @param {boolean} [opts.blockPrivateNetwork=false] - Reject navigation to
|
|
19
|
+
* loopback / RFC-1918 / link-local / cloud-metadata hosts (SSRF guard).
|
|
20
|
+
* @param {string} [opts.proxy] - Proxy server (e.g. 'http://host:port').
|
|
21
|
+
* @param {string} [opts.binary] - Path to browser binary (auto-detected if omitted).
|
|
22
|
+
* @param {string} [opts.userDataDir] - Browser profile directory.
|
|
23
|
+
* @param {{width: number, height: number}} [opts.viewport] - Viewport dimensions.
|
|
24
|
+
* @param {string} [opts.browser] - Source browser for cookie extraction.
|
|
25
|
+
* @param {boolean} [opts.consent=true] - Auto-dismiss cookie consent dialogs.
|
|
26
|
+
* @param {'act'|'browse'|'navigate'|'full'|'read'} [opts.pruneMode='act'] - Pruning mode.
|
|
27
|
+
* @returns {Promise<string>} ARIA snapshot text
|
|
28
|
+
*/
|
|
29
|
+
export function browse(url: string, opts?: {
|
|
30
|
+
mode?: "headless" | "headed" | "hybrid" | undefined;
|
|
31
|
+
cookies?: boolean | undefined;
|
|
32
|
+
prune?: boolean | undefined;
|
|
33
|
+
timeout?: number | undefined;
|
|
34
|
+
blockAds?: boolean | undefined;
|
|
35
|
+
blockUrls?: string[] | undefined;
|
|
36
|
+
allowLocalUrls?: boolean | undefined;
|
|
37
|
+
blockPrivateNetwork?: boolean | undefined;
|
|
38
|
+
proxy?: string | undefined;
|
|
39
|
+
binary?: string | undefined;
|
|
40
|
+
userDataDir?: string | undefined;
|
|
41
|
+
viewport?: {
|
|
42
|
+
width: number;
|
|
43
|
+
height: number;
|
|
44
|
+
} | undefined;
|
|
45
|
+
browser?: string | undefined;
|
|
46
|
+
consent?: boolean | undefined;
|
|
47
|
+
pruneMode?: "act" | "browse" | "navigate" | "full" | "read" | undefined;
|
|
48
|
+
}): Promise<string>;
|
|
49
|
+
/**
|
|
50
|
+
* Connect to a browser for a long-lived interactive session.
|
|
51
|
+
*
|
|
52
|
+
* @param {object} [opts]
|
|
53
|
+
* @param {'headless'|'headed'|'hybrid'} [opts.mode='headless'] - Browser mode
|
|
54
|
+
* @param {number} [opts.port] - Attach to an already-running Chromium at this
|
|
55
|
+
* CDP port instead of launching a new one. The browser keeps running on
|
|
56
|
+
* close(); only the tab we created is torn down. Use this to drive a
|
|
57
|
+
* user's logged-in session (start Chromium with --remote-debugging-port=N).
|
|
58
|
+
* @param {string} [opts.downloadPath] - Directory to save downloaded files.
|
|
59
|
+
* Default: a per-session subdirectory under the OS temp dir. Downloads
|
|
60
|
+
* land here as <guid>; check `page.downloads` for { url, suggestedFilename,
|
|
61
|
+
* savedPath, state, totalBytes, receivedBytes } per file.
|
|
62
|
+
* @param {boolean} [opts.blockAds] - Block ~120 common ad/tracker URL
|
|
63
|
+
* patterns via CDP. Defaults to true for launched browsers, false in
|
|
64
|
+
* attach mode (would affect any tab attached to the user's running
|
|
65
|
+
* session). Setting blockAds:true explicitly in attach mode honors the
|
|
66
|
+
* request — blocking applies to whichever tab the session is currently
|
|
67
|
+
* attached to and follows the session across switchTab() until close.
|
|
68
|
+
* @param {string[]} [opts.blockUrls] - Extra URL glob patterns to block,
|
|
69
|
+
* merged with the default unless blockAds is false.
|
|
70
|
+
* @param {boolean} [opts.allowLocalUrls=false] - Permit navigation to local-
|
|
71
|
+
* resource schemes (file:, view-source:, chrome:, …). Blocked by default
|
|
72
|
+
* because a prompt-injected agent could use them to read local files.
|
|
73
|
+
* @param {boolean} [opts.blockPrivateNetwork=false] - Reject navigation to
|
|
74
|
+
* loopback / RFC-1918 / link-local / cloud-metadata hosts (SSRF guard).
|
|
75
|
+
* Off by default so localhost dev-server browsing keeps working.
|
|
76
|
+
* @param {string} [opts.uploadDir] - When set, upload() rejects any file that
|
|
77
|
+
* does not resolve (symlinks included) inside this directory. Sandboxes the
|
|
78
|
+
* agent's file-upload capability. Default: no restriction.
|
|
79
|
+
* @param {string} [opts.proxy] - Proxy server (e.g. 'http://host:port').
|
|
80
|
+
* @param {string} [opts.binary] - Path to browser binary (auto-detected if omitted).
|
|
81
|
+
* @param {string} [opts.userDataDir] - Browser profile directory.
|
|
82
|
+
* @param {{width: number, height: number}} [opts.viewport] - Viewport dimensions.
|
|
83
|
+
* @param {boolean} [opts.consent=true] - Auto-dismiss cookie consent dialogs.
|
|
84
|
+
* @param {string} [opts.storageState] - Path to a storage-state JSON file
|
|
85
|
+
* (cookies + localStorage) to load before navigation.
|
|
86
|
+
* @param {'act'|'browse'|'navigate'|'full'|'read'} [opts.pruneMode='act'] - Pruning mode.
|
|
87
|
+
* @returns {Promise<object>} Page handle with goto, snapshot, close
|
|
88
|
+
*/
|
|
89
|
+
export function connect(opts?: {
|
|
90
|
+
mode?: "headless" | "headed" | "hybrid" | undefined;
|
|
91
|
+
port?: number | undefined;
|
|
92
|
+
downloadPath?: string | undefined;
|
|
93
|
+
blockAds?: boolean | undefined;
|
|
94
|
+
blockUrls?: string[] | undefined;
|
|
95
|
+
allowLocalUrls?: boolean | undefined;
|
|
96
|
+
blockPrivateNetwork?: boolean | undefined;
|
|
97
|
+
uploadDir?: string | undefined;
|
|
98
|
+
proxy?: string | undefined;
|
|
99
|
+
binary?: string | undefined;
|
|
100
|
+
userDataDir?: string | undefined;
|
|
101
|
+
viewport?: {
|
|
102
|
+
width: number;
|
|
103
|
+
height: number;
|
|
104
|
+
} | undefined;
|
|
105
|
+
consent?: boolean | undefined;
|
|
106
|
+
storageState?: string | undefined;
|
|
107
|
+
pruneMode?: "act" | "browse" | "navigate" | "full" | "read" | undefined;
|
|
108
|
+
}): Promise<object>;
|
|
109
|
+
/**
|
|
110
|
+
* Apply Network.setBlockedURLs for ad/tracker blocking on a session.
|
|
111
|
+
* Default list is on; pass blockAds:false to skip, blockUrls:[] to extend.
|
|
112
|
+
* On failure (legacy Chrome lacking the method) warns once and continues —
|
|
113
|
+
* blocking is an enhancement, not a hard requirement.
|
|
114
|
+
*
|
|
115
|
+
* Exported for unit testing of the warn-once behavior; not part of the public
|
|
116
|
+
* API surface.
|
|
117
|
+
*/
|
|
118
|
+
export function applyBlocklist(session: any, pageOpts: any): Promise<void>;
|
|
119
|
+
/** Test-only: reset the warn-once flag. Not part of the public API. */
|
|
120
|
+
export function _resetBlocklistWarning(): void;
|
|
121
|
+
/**
|
|
122
|
+
* Detect if a page is a bot-challenge page (Cloudflare, hCaptcha, etc.).
|
|
123
|
+
*
|
|
124
|
+
* Pre-H9 this was over-aggressive: `nodeCount < 50` alone fired on any
|
|
125
|
+
* legitimate small page (404s, simple landings, error pages), and generic
|
|
126
|
+
* phrases like "access denied" / "unknown error" / "permission denied"
|
|
127
|
+
* triggered on real HTTP 4xx/5xx pages, kicking hybrid mode into a costly
|
|
128
|
+
* headed fallback for nothing.
|
|
129
|
+
*
|
|
130
|
+
* H9 split: STRONG_PHRASES are essentially-unambiguous challenge UI and
|
|
131
|
+
* fire regardless of page size; WEAK_PHRASES only fire when the page is
|
|
132
|
+
* ALSO tiny (so a legitimate-looking error page with "access denied" in
|
|
133
|
+
* its body doesn't trip the fallback).
|
|
134
|
+
*
|
|
135
|
+
* @param {object} tree - Nested ARIA tree (from buildTree)
|
|
136
|
+
* @param {number} [nodeCount] - Raw CDP node count (from Accessibility.getFullAXTree)
|
|
137
|
+
*/
|
|
138
|
+
export function isChallengePage(tree: object, nodeCount?: number): boolean;
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Click an element by its backendDOMNodeId.
|
|
3
|
+
* Scrolls into view, resolves coordinates, then dispatches mousePressed + mouseReleased.
|
|
4
|
+
*
|
|
5
|
+
* @param {object} session - Session-scoped CDP handle
|
|
6
|
+
* @param {number} backendNodeId - Backend DOM node ID
|
|
7
|
+
*/
|
|
8
|
+
export function click(session: object, backendNodeId: number): Promise<void>;
|
|
9
|
+
/**
|
|
10
|
+
* Type text into an element by its backendDOMNodeId.
|
|
11
|
+
* Default: DOM.focus + Input.insertText (fast, no key events).
|
|
12
|
+
* With { keyEvents: true }: dispatches keyDown/keyUp per character (triggers handlers).
|
|
13
|
+
* With { clear: true }: selects all existing text and deletes it before typing.
|
|
14
|
+
*
|
|
15
|
+
* @param {object} session - Session-scoped CDP handle
|
|
16
|
+
* @param {number} backendNodeId - Backend DOM node ID
|
|
17
|
+
* @param {string} text - Text to type
|
|
18
|
+
* @param {object} [opts]
|
|
19
|
+
* @param {boolean} [opts.keyEvents=false] - Use char-by-char key events
|
|
20
|
+
* @param {boolean} [opts.clear=false] - Clear existing content before typing
|
|
21
|
+
*/
|
|
22
|
+
export function type(session: object, backendNodeId: number, text: string, opts?: {
|
|
23
|
+
keyEvents?: boolean | undefined;
|
|
24
|
+
clear?: boolean | undefined;
|
|
25
|
+
}): Promise<void>;
|
|
26
|
+
/**
|
|
27
|
+
* Press a special key (Enter, Tab, Escape, etc.).
|
|
28
|
+
* Dispatches keyDown + keyUp for the named key.
|
|
29
|
+
*
|
|
30
|
+
* @param {object} session - Session-scoped CDP handle
|
|
31
|
+
* @param {string} key - Key name (e.g. 'Enter', 'Tab', 'Escape', 'ArrowDown')
|
|
32
|
+
*/
|
|
33
|
+
export function press(session: object, key: string): Promise<void>;
|
|
34
|
+
/**
|
|
35
|
+
* Scroll the page via mouseWheel event.
|
|
36
|
+
* Dispatches at viewport center by default, or at given coordinates.
|
|
37
|
+
*
|
|
38
|
+
* @param {object} session - Session-scoped CDP handle
|
|
39
|
+
* @param {number} deltaY - Pixels to scroll (positive = down, negative = up)
|
|
40
|
+
* @param {number} [x=400] - X coordinate for scroll event
|
|
41
|
+
* @param {number} [y=300] - Y coordinate for scroll event
|
|
42
|
+
*/
|
|
43
|
+
export function scroll(session: object, deltaY: number, x?: number, y?: number): Promise<void>;
|
|
44
|
+
/**
|
|
45
|
+
* Hover over an element by its backendDOMNodeId.
|
|
46
|
+
* Scrolls into view, then dispatches mouseMoved at center.
|
|
47
|
+
*
|
|
48
|
+
* @param {object} session - Session-scoped CDP handle
|
|
49
|
+
* @param {number} backendNodeId - Backend DOM node ID
|
|
50
|
+
*/
|
|
51
|
+
export function hover(session: object, backendNodeId: number): Promise<void>;
|
|
52
|
+
/**
|
|
53
|
+
* Select a value in a <select> element or custom dropdown.
|
|
54
|
+
*
|
|
55
|
+
* Strategy 1: Native <select> — set .value + dispatch 'change' event.
|
|
56
|
+
* Strategy 2: Custom dropdown — click to open, find matching option, click it.
|
|
57
|
+
*
|
|
58
|
+
* @param {object} session - Session-scoped CDP handle
|
|
59
|
+
* @param {number} backendNodeId - Backend DOM node ID of the select/combobox
|
|
60
|
+
* @param {string} value - Value or visible text to select
|
|
61
|
+
*/
|
|
62
|
+
export function select(session: object, backendNodeId: number, value: string): Promise<void>;
|
|
63
|
+
/**
|
|
64
|
+
* Drag one element to another.
|
|
65
|
+
* Scrolls source into view, mouse down, move to target center, mouse up.
|
|
66
|
+
*
|
|
67
|
+
* @param {object} session - Session-scoped CDP handle
|
|
68
|
+
* @param {number} fromNodeId - Source element backendDOMNodeId
|
|
69
|
+
* @param {number} toNodeId - Target element backendDOMNodeId
|
|
70
|
+
*/
|
|
71
|
+
export function drag(session: object, fromNodeId: number, toNodeId: number): Promise<void>;
|
|
72
|
+
/**
|
|
73
|
+
* Upload files to a file input element.
|
|
74
|
+
*
|
|
75
|
+
* @param {object} session - Session-scoped CDP handle
|
|
76
|
+
* @param {number} backendNodeId - Backend DOM node ID of the file input
|
|
77
|
+
* @param {string[]} files - Absolute paths to files to upload
|
|
78
|
+
*/
|
|
79
|
+
export function upload(session: object, backendNodeId: number, files: string[]): Promise<void>;
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* network-idle.js — wait until the page's network has been idle for N ms.
|
|
3
|
+
*
|
|
4
|
+
* Tracks in-flight requests by requestId in a Set, so an orphan
|
|
5
|
+
* loadingFinished/Failed (event for a request whose requestWillBeSent
|
|
6
|
+
* arrived before our listener attached) is a harmless no-op instead of
|
|
7
|
+
* driving a counter negative and resolving prematurely.
|
|
8
|
+
*/
|
|
9
|
+
/**
|
|
10
|
+
* @param {object} session - CDP session-scoped handle with .on() returning unsub
|
|
11
|
+
* @param {object} [opts]
|
|
12
|
+
* @param {number} [opts.timeout=30000] - Max wait time before reject
|
|
13
|
+
* @param {number} [opts.idle=500] - Required idle duration before resolve
|
|
14
|
+
* @returns {Promise<void>}
|
|
15
|
+
*/
|
|
16
|
+
export function waitForNetworkIdle(session: object, opts?: {
|
|
17
|
+
timeout?: number | undefined;
|
|
18
|
+
idle?: number | undefined;
|
|
19
|
+
}): Promise<void>;
|
package/types/prune.d.ts
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prune an ARIA tree for agent consumption.
|
|
3
|
+
*
|
|
4
|
+
* @param {object} tree - Root node from buildTree() (CDP format)
|
|
5
|
+
* @param {object} [options]
|
|
6
|
+
* @param {'act'|'browse'|'navigate'|'full'|'read'} [options.mode='act'] - Pruning mode ('read' is an alias for 'browse')
|
|
7
|
+
* @param {string} [options.context=''] - Search context for relevance filtering
|
|
8
|
+
* @returns {object|null} Pruned tree
|
|
9
|
+
*/
|
|
10
|
+
export function prune(tree: object, options?: {
|
|
11
|
+
mode?: "act" | "browse" | "navigate" | "full" | "read" | undefined;
|
|
12
|
+
context?: string | undefined;
|
|
13
|
+
}): object | null;
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Read session.json from the output directory.
|
|
3
|
+
* @returns {{ port: number, pid: number, token?: string, startedAt: string } | null}
|
|
4
|
+
*/
|
|
5
|
+
export function readSession(outputDir: any): {
|
|
6
|
+
port: number;
|
|
7
|
+
pid: number;
|
|
8
|
+
token?: string;
|
|
9
|
+
startedAt: string;
|
|
10
|
+
} | null;
|
|
11
|
+
/**
|
|
12
|
+
* Check if the daemon is alive by hitting GET /status.
|
|
13
|
+
*/
|
|
14
|
+
export function isAlive(outputDir: any): Promise<boolean>;
|
|
15
|
+
/**
|
|
16
|
+
* Send a command to the running daemon.
|
|
17
|
+
* @returns {Promise<object>} The daemon's response
|
|
18
|
+
*/
|
|
19
|
+
export function sendCommand(command: any, args: any, outputDir: any): Promise<object>;
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Apply stealth patches to a CDP session.
|
|
3
|
+
* Must be called before any navigation.
|
|
4
|
+
*
|
|
5
|
+
* Splits into two layers:
|
|
6
|
+
* 1. Network.setUserAgentOverride strips "HeadlessChrome" from the UA
|
|
7
|
+
* that ships in HTTP request headers AND that navigator.userAgent
|
|
8
|
+
* reports — `--headless=new` leaves "HeadlessChrome" in there.
|
|
9
|
+
* 2. Page.addScriptToEvaluateOnNewDocument injects the JS-level patches
|
|
10
|
+
* before any page script runs.
|
|
11
|
+
*
|
|
12
|
+
* @param {object} session - Session-scoped CDP handle
|
|
13
|
+
*/
|
|
14
|
+
export function applyStealth(session: object): Promise<void>;
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Throw if `url` is unsafe to navigate to under the given policy.
|
|
3
|
+
* @param {string} url
|
|
4
|
+
* @param {object} [opts]
|
|
5
|
+
* @param {boolean} [opts.allowLocalUrls=false] - permit file:/chrome:/etc.
|
|
6
|
+
* @param {boolean} [opts.blockPrivateNetwork=false] - reject loopback/RFC-1918/metadata.
|
|
7
|
+
*/
|
|
8
|
+
export function assertNavigable(url: string, opts?: {
|
|
9
|
+
allowLocalUrls?: boolean | undefined;
|
|
10
|
+
blockPrivateNetwork?: boolean | undefined;
|
|
11
|
+
}): void;
|
|
12
|
+
/**
|
|
13
|
+
* Throw if any file in `files` resolves outside `uploadDir`. Both the base
|
|
14
|
+
* dir and each file are resolved through realpath, so symlinks (in either the
|
|
15
|
+
* base path — e.g. macOS /tmp → /private/tmp — or the file) can't be used to
|
|
16
|
+
* escape the sandbox or to false-reject a legitimate file.
|
|
17
|
+
* No-op when `uploadDir` is falsy (no restriction configured).
|
|
18
|
+
* @param {string|string[]} files
|
|
19
|
+
* @param {string|null} uploadDir
|
|
20
|
+
*/
|
|
21
|
+
export function assertUploadAllowed(files: string | string[], uploadDir: string | null): void;
|
|
22
|
+
/**
|
|
23
|
+
* @param {string} host - hostname (no brackets for IPv6)
|
|
24
|
+
* @returns {boolean} true if it names a private/loopback/link-local/internal host
|
|
25
|
+
*/
|
|
26
|
+
export function isPrivateHost(host: string): boolean;
|
|
@@ -1,133 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: barebrowse
|
|
3
|
-
description: Browser automation using the user's real browser with real cookies. Handles consent walls, login sessions, and bot detection automatically.
|
|
4
|
-
allowed-tools: Bash(barebrowse:*)
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
# barebrowse CLI — Browser Automation for Agents
|
|
8
|
-
|
|
9
|
-
Browse any URL using the user's real browser with real cookies. Returns pruned ARIA snapshots (40-90% smaller than raw) with `[ref=N]` markers for interaction. Handles cookie consent, login sessions, JS dialogs, and bot detection automatically.
|
|
10
|
-
|
|
11
|
-
## Quick Start
|
|
12
|
-
|
|
13
|
-
```bash
|
|
14
|
-
barebrowse open https://example.com # Start session + navigate
|
|
15
|
-
barebrowse snapshot # Get ARIA snapshot → .barebrowse/page-*.yml
|
|
16
|
-
barebrowse click 8 # Click element with ref=8
|
|
17
|
-
barebrowse snapshot # See result
|
|
18
|
-
barebrowse close # End session
|
|
19
|
-
```
|
|
20
|
-
|
|
21
|
-
All output files go to `.barebrowse/` in the current directory. Read them with the Read tool when needed.
|
|
22
|
-
|
|
23
|
-
## Commands
|
|
24
|
-
|
|
25
|
-
### Session Lifecycle
|
|
26
|
-
|
|
27
|
-
| Command | Description |
|
|
28
|
-
|---------|-------------|
|
|
29
|
-
| `barebrowse open [url] [flags]` | Start browser session. Optionally navigate to URL. |
|
|
30
|
-
| `barebrowse close` | Close session and kill browser. |
|
|
31
|
-
| `barebrowse status` | Check if session is running. |
|
|
32
|
-
|
|
33
|
-
**Open flags:**
|
|
34
|
-
- `--mode=headless|headed|hybrid` — Browser mode (default: headless)
|
|
35
|
-
- `--no-cookies` — Skip cookie injection
|
|
36
|
-
- `--browser=firefox|chromium` — Cookie source
|
|
37
|
-
- `--prune-mode=act|read` — Default pruning mode
|
|
38
|
-
- `--timeout=N` — Navigation timeout in ms
|
|
39
|
-
- `--proxy=URL` — HTTP/SOCKS proxy server
|
|
40
|
-
- `--viewport=WxH` — Viewport size (e.g. 1280x720)
|
|
41
|
-
- `--storage-state=FILE` — Load cookies/localStorage from JSON file
|
|
42
|
-
|
|
43
|
-
### Navigation
|
|
44
|
-
|
|
45
|
-
| Command | Output |
|
|
46
|
-
|---------|--------|
|
|
47
|
-
| `barebrowse goto <url>` | Navigates, waits for load, dismisses consent. Prints "ok". |
|
|
48
|
-
| `barebrowse back` | Go back in browser history. |
|
|
49
|
-
| `barebrowse forward` | Go forward in browser history. |
|
|
50
|
-
| `barebrowse snapshot` | ARIA snapshot → `.barebrowse/page-<timestamp>.yml` |
|
|
51
|
-
| `barebrowse snapshot --mode=read` | Read mode: keeps all text (for content extraction) |
|
|
52
|
-
| `barebrowse screenshot` | Screenshot → `.barebrowse/screenshot-<timestamp>.png` |
|
|
53
|
-
| `barebrowse pdf [--landscape]` | PDF export → `.barebrowse/page-<timestamp>.pdf` |
|
|
54
|
-
|
|
55
|
-
### Interaction
|
|
56
|
-
|
|
57
|
-
| Command | Description |
|
|
58
|
-
|---------|-------------|
|
|
59
|
-
| `barebrowse click <ref>` | Click element (scrolls into view first) |
|
|
60
|
-
| `barebrowse type <ref> <text>` | Type text into element |
|
|
61
|
-
| `barebrowse fill <ref> <text>` | Clear existing content + type new text |
|
|
62
|
-
| `barebrowse press <key>` | Press key: Enter, Tab, Escape, Backspace, Delete, arrows, Space |
|
|
63
|
-
| `barebrowse scroll <deltaY>` | Scroll page (positive=down, negative=up) |
|
|
64
|
-
| `barebrowse hover <ref>` | Hover over element (triggers tooltips) |
|
|
65
|
-
| `barebrowse select <ref> <value>` | Select dropdown option |
|
|
66
|
-
| `barebrowse drag <fromRef> <toRef>` | Drag element to another element |
|
|
67
|
-
| `barebrowse upload <ref> <files..>` | Upload file(s) to a file input element |
|
|
68
|
-
|
|
69
|
-
### Tabs
|
|
70
|
-
|
|
71
|
-
| Command | Description |
|
|
72
|
-
|---------|-------------|
|
|
73
|
-
| `barebrowse tabs` | List open tabs (index, url, title) |
|
|
74
|
-
| `barebrowse tab <index>` | Switch to tab by index |
|
|
75
|
-
|
|
76
|
-
### Debugging
|
|
77
|
-
|
|
78
|
-
| Command | Output |
|
|
79
|
-
|---------|--------|
|
|
80
|
-
| `barebrowse eval <expression>` | Evaluate JS in page, print result |
|
|
81
|
-
| `barebrowse wait-idle` | Wait for network idle (no requests for 500ms) |
|
|
82
|
-
| `barebrowse wait-for [opts]` | Wait for content to appear on page |
|
|
83
|
-
| `barebrowse console-logs` | Console logs → `.barebrowse/console-<timestamp>.json` |
|
|
84
|
-
| `barebrowse network-log` | Network log → `.barebrowse/network-<timestamp>.json` |
|
|
85
|
-
| `barebrowse network-log --failed` | Only failed/4xx/5xx requests |
|
|
86
|
-
| `barebrowse dialog-log` | JS dialog log → `.barebrowse/dialogs-<timestamp>.json` |
|
|
87
|
-
| `barebrowse save-state` | Cookies + localStorage → `.barebrowse/state-<timestamp>.json` |
|
|
88
|
-
|
|
89
|
-
**wait-for flags:**
|
|
90
|
-
- `--text=STRING` — Wait for text to appear in page body
|
|
91
|
-
- `--selector=CSS` — Wait for CSS selector to match
|
|
92
|
-
- `--timeout=N` — Max wait time in ms (default: 30000)
|
|
93
|
-
|
|
94
|
-
## Snapshot Format
|
|
95
|
-
|
|
96
|
-
The snapshot is a YAML-like ARIA tree. Each line is one node:
|
|
97
|
-
|
|
98
|
-
```
|
|
99
|
-
# https://example.com/
|
|
100
|
-
# 379 chars → 45 chars (88% pruned)
|
|
101
|
-
- heading "Example Domain" [level=1] [ref=3]
|
|
102
|
-
```
|
|
103
|
-
|
|
104
|
-
- `[ref=N]` — Use this number with click, type, fill, hover, select, drag, upload
|
|
105
|
-
- Refs change on every snapshot — always take a fresh snapshot before interacting
|
|
106
|
-
- **act mode** (default): interactive elements + labels — for clicking, typing, navigating
|
|
107
|
-
- **read mode**: all text content — for reading articles, extracting data
|
|
108
|
-
|
|
109
|
-
## Workflow Pattern
|
|
110
|
-
|
|
111
|
-
1. `barebrowse open <url>` — start session
|
|
112
|
-
2. `barebrowse snapshot` — observe page (read the .yml file)
|
|
113
|
-
3. Decide action based on snapshot content
|
|
114
|
-
4. `barebrowse click/type/fill/press/scroll/drag/upload <ref>` — act
|
|
115
|
-
5. `barebrowse snapshot` — observe result (refs are now different!)
|
|
116
|
-
6. Repeat 3-5 until goal achieved
|
|
117
|
-
7. `barebrowse close` — clean up
|
|
118
|
-
|
|
119
|
-
## Tips
|
|
120
|
-
|
|
121
|
-
- **Always snapshot before interacting** — refs are ephemeral and change every time
|
|
122
|
-
- **Use `fill` instead of `type`** when replacing existing text in input fields
|
|
123
|
-
- **Use `--mode=read`** for snapshot when you need to extract article content or data
|
|
124
|
-
- **Use `back`/`forward`** to navigate browser history instead of re-entering URLs
|
|
125
|
-
- **Use `upload`** for file inputs — pass absolute paths to the files
|
|
126
|
-
- **Use `wait-for`** when content loads asynchronously — more reliable than `wait-idle`
|
|
127
|
-
- **Check `dialog-log`** if JS alerts/confirms were auto-dismissed during your session
|
|
128
|
-
- **Use `save-state`** to persist cookies/localStorage for later sessions via `--storage-state`
|
|
129
|
-
- **Check `console-logs`** when page behavior seems wrong — JS errors show up there
|
|
130
|
-
- **Check `network-log --failed`** to debug missing content or broken API calls
|
|
131
|
-
- **Use `eval`** as an escape hatch when ARIA tree doesn't show what you need
|
|
132
|
-
- **One session per project** — `.barebrowse/` is project-scoped
|
|
133
|
-
- For bot-detected sites, use `--mode=headed` (requires browser with `--remote-debugging-port=9222`)
|
package/commands/barebrowse.md
DELETED
|
@@ -1,132 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: barebrowse
|
|
3
|
-
description: Browser automation using the user's real browser with real cookies. Handles consent walls, login sessions, and bot detection automatically.
|
|
4
|
-
allowed-tools: Bash(barebrowse:*)
|
|
5
|
-
---
|
|
6
|
-
# barebrowse CLI — Browser Automation for Agents
|
|
7
|
-
|
|
8
|
-
Browse any URL using the user's real browser with real cookies. Returns pruned ARIA snapshots (40-90% smaller than raw) with `[ref=N]` markers for interaction. Handles cookie consent, login sessions, JS dialogs, and bot detection automatically.
|
|
9
|
-
|
|
10
|
-
## Quick Start
|
|
11
|
-
|
|
12
|
-
```bash
|
|
13
|
-
barebrowse open https://example.com # Start session + navigate
|
|
14
|
-
barebrowse snapshot # Get ARIA snapshot → .barebrowse/page-*.yml
|
|
15
|
-
barebrowse click 8 # Click element with ref=8
|
|
16
|
-
barebrowse snapshot # See result
|
|
17
|
-
barebrowse close # End session
|
|
18
|
-
```
|
|
19
|
-
|
|
20
|
-
All output files go to `.barebrowse/` in the current directory. Read them with the Read tool when needed.
|
|
21
|
-
|
|
22
|
-
## Commands
|
|
23
|
-
|
|
24
|
-
### Session Lifecycle
|
|
25
|
-
|
|
26
|
-
| Command | Description |
|
|
27
|
-
|---------|-------------|
|
|
28
|
-
| `barebrowse open [url] [flags]` | Start browser session. Optionally navigate to URL. |
|
|
29
|
-
| `barebrowse close` | Close session and kill browser. |
|
|
30
|
-
| `barebrowse status` | Check if session is running. |
|
|
31
|
-
|
|
32
|
-
**Open flags:**
|
|
33
|
-
- `--mode=headless|headed|hybrid` — Browser mode (default: headless)
|
|
34
|
-
- `--no-cookies` — Skip cookie injection
|
|
35
|
-
- `--browser=firefox|chromium` — Cookie source
|
|
36
|
-
- `--prune-mode=act|read` — Default pruning mode
|
|
37
|
-
- `--timeout=N` — Navigation timeout in ms
|
|
38
|
-
- `--proxy=URL` — HTTP/SOCKS proxy server
|
|
39
|
-
- `--viewport=WxH` — Viewport size (e.g. 1280x720)
|
|
40
|
-
- `--storage-state=FILE` — Load cookies/localStorage from JSON file
|
|
41
|
-
|
|
42
|
-
### Navigation
|
|
43
|
-
|
|
44
|
-
| Command | Output |
|
|
45
|
-
|---------|--------|
|
|
46
|
-
| `barebrowse goto <url>` | Navigates, waits for load, dismisses consent. Prints "ok". |
|
|
47
|
-
| `barebrowse back` | Go back in browser history. |
|
|
48
|
-
| `barebrowse forward` | Go forward in browser history. |
|
|
49
|
-
| `barebrowse snapshot` | ARIA snapshot → `.barebrowse/page-<timestamp>.yml` |
|
|
50
|
-
| `barebrowse snapshot --mode=read` | Read mode: keeps all text (for content extraction) |
|
|
51
|
-
| `barebrowse screenshot` | Screenshot → `.barebrowse/screenshot-<timestamp>.png` |
|
|
52
|
-
| `barebrowse pdf [--landscape]` | PDF export → `.barebrowse/page-<timestamp>.pdf` |
|
|
53
|
-
|
|
54
|
-
### Interaction
|
|
55
|
-
|
|
56
|
-
| Command | Description |
|
|
57
|
-
|---------|-------------|
|
|
58
|
-
| `barebrowse click <ref>` | Click element (scrolls into view first) |
|
|
59
|
-
| `barebrowse type <ref> <text>` | Type text into element |
|
|
60
|
-
| `barebrowse fill <ref> <text>` | Clear existing content + type new text |
|
|
61
|
-
| `barebrowse press <key>` | Press key: Enter, Tab, Escape, Backspace, Delete, arrows, Space |
|
|
62
|
-
| `barebrowse scroll <deltaY>` | Scroll page (positive=down, negative=up) |
|
|
63
|
-
| `barebrowse hover <ref>` | Hover over element (triggers tooltips) |
|
|
64
|
-
| `barebrowse select <ref> <value>` | Select dropdown option |
|
|
65
|
-
| `barebrowse drag <fromRef> <toRef>` | Drag element to another element |
|
|
66
|
-
| `barebrowse upload <ref> <files..>` | Upload file(s) to a file input element |
|
|
67
|
-
|
|
68
|
-
### Tabs
|
|
69
|
-
|
|
70
|
-
| Command | Description |
|
|
71
|
-
|---------|-------------|
|
|
72
|
-
| `barebrowse tabs` | List open tabs (index, url, title) |
|
|
73
|
-
| `barebrowse tab <index>` | Switch to tab by index |
|
|
74
|
-
|
|
75
|
-
### Debugging
|
|
76
|
-
|
|
77
|
-
| Command | Output |
|
|
78
|
-
|---------|--------|
|
|
79
|
-
| `barebrowse eval <expression>` | Evaluate JS in page, print result |
|
|
80
|
-
| `barebrowse wait-idle` | Wait for network idle (no requests for 500ms) |
|
|
81
|
-
| `barebrowse wait-for [opts]` | Wait for content to appear on page |
|
|
82
|
-
| `barebrowse console-logs` | Console logs → `.barebrowse/console-<timestamp>.json` |
|
|
83
|
-
| `barebrowse network-log` | Network log → `.barebrowse/network-<timestamp>.json` |
|
|
84
|
-
| `barebrowse network-log --failed` | Only failed/4xx/5xx requests |
|
|
85
|
-
| `barebrowse dialog-log` | JS dialog log → `.barebrowse/dialogs-<timestamp>.json` |
|
|
86
|
-
| `barebrowse save-state` | Cookies + localStorage → `.barebrowse/state-<timestamp>.json` |
|
|
87
|
-
|
|
88
|
-
**wait-for flags:**
|
|
89
|
-
- `--text=STRING` — Wait for text to appear in page body
|
|
90
|
-
- `--selector=CSS` — Wait for CSS selector to match
|
|
91
|
-
- `--timeout=N` — Max wait time in ms (default: 30000)
|
|
92
|
-
|
|
93
|
-
## Snapshot Format
|
|
94
|
-
|
|
95
|
-
The snapshot is a YAML-like ARIA tree. Each line is one node:
|
|
96
|
-
|
|
97
|
-
```
|
|
98
|
-
# https://example.com/
|
|
99
|
-
# 379 chars → 45 chars (88% pruned)
|
|
100
|
-
- heading "Example Domain" [level=1] [ref=3]
|
|
101
|
-
```
|
|
102
|
-
|
|
103
|
-
- `[ref=N]` — Use this number with click, type, fill, hover, select, drag, upload
|
|
104
|
-
- Refs change on every snapshot — always take a fresh snapshot before interacting
|
|
105
|
-
- **act mode** (default): interactive elements + labels — for clicking, typing, navigating
|
|
106
|
-
- **read mode**: all text content — for reading articles, extracting data
|
|
107
|
-
|
|
108
|
-
## Workflow Pattern
|
|
109
|
-
|
|
110
|
-
1. `barebrowse open <url>` — start session
|
|
111
|
-
2. `barebrowse snapshot` — observe page (read the .yml file)
|
|
112
|
-
3. Decide action based on snapshot content
|
|
113
|
-
4. `barebrowse click/type/fill/press/scroll/drag/upload <ref>` — act
|
|
114
|
-
5. `barebrowse snapshot` — observe result (refs are now different!)
|
|
115
|
-
6. Repeat 3-5 until goal achieved
|
|
116
|
-
7. `barebrowse close` — clean up
|
|
117
|
-
|
|
118
|
-
## Tips
|
|
119
|
-
|
|
120
|
-
- **Always snapshot before interacting** — refs are ephemeral and change every time
|
|
121
|
-
- **Use `fill` instead of `type`** when replacing existing text in input fields
|
|
122
|
-
- **Use `--mode=read`** for snapshot when you need to extract article content or data
|
|
123
|
-
- **Use `back`/`forward`** to navigate browser history instead of re-entering URLs
|
|
124
|
-
- **Use `upload`** for file inputs — pass absolute paths to the files
|
|
125
|
-
- **Use `wait-for`** when content loads asynchronously — more reliable than `wait-idle`
|
|
126
|
-
- **Check `dialog-log`** if JS alerts/confirms were auto-dismissed during your session
|
|
127
|
-
- **Use `save-state`** to persist cookies/localStorage for later sessions via `--storage-state`
|
|
128
|
-
- **Check `console-logs`** when page behavior seems wrong — JS errors show up there
|
|
129
|
-
- **Check `network-log --failed`** to debug missing content or broken API calls
|
|
130
|
-
- **Use `eval`** as an escape hatch when ARIA tree doesn't show what you need
|
|
131
|
-
- **One session per project** — `.barebrowse/` is project-scoped
|
|
132
|
-
- For bot-detected sites, use `--mode=headed` (requires browser with `--remote-debugging-port=9222`)
|