barebrowse 0.4.2 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -0
- package/README.md +2 -2
- package/barebrowse.context.md +9 -8
- package/commands/barebrowse/SKILL.md +3 -5
- package/commands/barebrowse.md +3 -5
- package/docs/00-context/system-state.md +6 -5
- package/mcp-server.js +37 -6
- package/package.json +1 -1
- package/src/consent.js +95 -3
- package/src/index.js +5 -3
- package/.idea/barebrowse.iml +0 -10
- package/.idea/inspectionProfiles/profiles_settings.xml +0 -6
- package/.idea/misc.xml +0 -4
- package/.idea/modules.xml +0 -8
- package/.idea/vcs.xml +0 -6
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,31 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.4.4
|
|
4
|
+
|
|
5
|
+
Snapshot URL prefix and MCP large-snapshot handling.
|
|
6
|
+
|
|
7
|
+
### Snapshot URL (`src/index.js`)
|
|
8
|
+
- First line of every snapshot is now `# <current-page-url>`
|
|
9
|
+
- Works in both `browse()` (uses the url param) and `connect().snapshot()` (uses `Page.getNavigationHistory`)
|
|
10
|
+
|
|
11
|
+
### MCP maxChars (`mcp-server.js`)
|
|
12
|
+
- `browse` and `snapshot` tools accept `maxChars` param (default 30000)
|
|
13
|
+
- If snapshot exceeds limit: saved to `.barebrowse/page-<timestamp>.yml`, returns file path message
|
|
14
|
+
- If under limit: returned inline as before
|
|
15
|
+
|
|
16
|
+
### Docs
|
|
17
|
+
- barebrowse.context.md: snapshot format updated with URL line, maxChars documented
|
|
18
|
+
- commands/barebrowse.md + SKILL.md: snapshot example updated
|
|
19
|
+
- docs/00-context/system-state.md: pipeline step 8 updated, MCP maxChars noted
|
|
20
|
+
|
|
21
|
+
## 0.4.3
|
|
22
|
+
|
|
23
|
+
Cookie consent expanded to 29 languages.
|
|
24
|
+
|
|
25
|
+
- Added: RU, UK, PL, CS, TR, RO, HU, EL, SV, DA, NO, FI, AR, FA, ZH, JA, KO, VI, TH, HI, ID/MS
|
|
26
|
+
- Dialog hints for 11 more languages
|
|
27
|
+
- `.npmignore`: added `.idea/` (leaked in 0.4.2 tarball)
|
|
28
|
+
|
|
3
29
|
## 0.4.2
|
|
4
30
|
|
|
5
31
|
Authenticated browsing improvements. MCP sessions now auto-inject cookies and fall back to headed mode when bot-detected.
|
package/README.md
CHANGED
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
|
|
17
17
|
## What this is
|
|
18
18
|
|
|
19
|
-
barebrowse is agentic browsing stripped to the bone. It gives your AI agent eyes and hands on the web -- navigate any page, see what's there, click buttons, fill forms, scroll, and move on. It uses your installed Chromium browser (Chrome, Brave, Edge -- whatever you have), reuses your existing login sessions, and handles all the friction automatically: cookie consent walls, permission prompts, bot detection
|
|
19
|
+
barebrowse is agentic browsing stripped to the bone. It gives your AI agent eyes and hands on the web -- navigate any page, see what's there, click buttons, fill forms, scroll, and move on. It uses your installed Chromium browser (Chrome, Brave, Edge -- whatever you have), reuses your existing login sessions, and handles all the friction automatically: cookie consent walls, permission prompts, and bot detection.
|
|
20
20
|
|
|
21
21
|
Instead of dumping raw DOM or taking screenshots, barebrowse returns a **pruned ARIA snapshot** -- a compact semantic view of what's on the page and what the agent can interact with. Buttons, links, inputs, headings -- labeled with `[ref=N]` markers the agent uses to act. The pruning pipeline is ported from [mcprune](https://github.com/hamr0/mcprune) and cuts 40-90% of tokens compared to raw page output. Every token your agent reads is meaningful.
|
|
22
22
|
|
|
@@ -109,7 +109,7 @@ This is the obstacle course your agent doesn't have to think about:
|
|
|
109
109
|
|
|
110
110
|
| Obstacle | How it's handled | Mode |
|
|
111
111
|
|----------|-----------------|------|
|
|
112
|
-
| **Cookie consent walls**
|
|
112
|
+
| **Cookie consent walls** | ARIA tree scan + jsClick accept button, 29 languages | Both |
|
|
113
113
|
| **Consent in dialog role** | Detect `dialog`/`alertdialog` with consent hints, click accept inside | Both |
|
|
114
114
|
| **Consent outside dialog** (BBC SourcePoint) | Fallback global button scan when dialog has no accept button | Both |
|
|
115
115
|
| **Consent behind iframe overlay** | JS click via DOM.resolveNode bypasses z-index/overlay issues | Both |
|
package/barebrowse.context.md
CHANGED
|
@@ -87,14 +87,13 @@ const snapshot = await browse('https://example.com', {
|
|
|
87
87
|
|
|
88
88
|
## Snapshot format
|
|
89
89
|
|
|
90
|
-
The snapshot is a YAML-like ARIA tree.
|
|
90
|
+
The snapshot is a YAML-like ARIA tree. First line is the page URL, second is pruning stats, then the tree:
|
|
91
91
|
|
|
92
92
|
```
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
- link "More information..." [ref=8]
|
|
93
|
+
# https://example.com/
|
|
94
|
+
# 379 chars → 45 chars (88% pruned)
|
|
95
|
+
- heading "Example Domain" [level=1] [ref=3]
|
|
96
|
+
- link "More information..." [ref=8]
|
|
98
97
|
```
|
|
99
98
|
|
|
100
99
|
Key rules:
|
|
@@ -145,7 +144,7 @@ barebrowse can inject cookies from the user's real browser sessions, bypassing l
|
|
|
145
144
|
|
|
146
145
|
| Obstacle | How | Mode |
|
|
147
146
|
|---|---|---|
|
|
148
|
-
| Cookie consent
|
|
147
|
+
| Cookie consent | ARIA scan + jsClick accept button, 29 languages | Both |
|
|
149
148
|
| Consent behind iframes | JS `.click()` via DOM.resolveNode bypasses overlays | Both |
|
|
150
149
|
| Permission prompts | Launch flags + CDP Browser.setPermission auto-deny | Both |
|
|
151
150
|
| Media autoplay blocked | `--autoplay-policy=no-user-gesture-required` | Both |
|
|
@@ -238,6 +237,8 @@ barebrowse ships an MCP server for direct use with Claude Desktop, Cursor, or an
|
|
|
238
237
|
|
|
239
238
|
Action tools return `'ok'` -- the agent calls `snapshot` explicitly to observe. This avoids double-token output since MCP tool calls are cheap to chain.
|
|
240
239
|
|
|
240
|
+
`browse` and `snapshot` accept a `maxChars` param (default 30000). If the snapshot exceeds the limit, it's saved to `.barebrowse/page-<timestamp>.yml` and a short message with the file path is returned instead.
|
|
241
|
+
|
|
241
242
|
Session runs in hybrid mode (headless with automatic headed fallback on bot detection). `goto` injects cookies from the user's browser before navigation for authenticated access.
|
|
242
243
|
|
|
243
244
|
Session tools share a singleton page, lazy-created on first use.
|
|
@@ -288,7 +289,7 @@ URL -> chromium.js (find/launch browser, permission flags)
|
|
|
288
289
|
|
|
289
290
|
7. **One page per connect().** Each `connect()` call creates one page. For multiple tabs, call `connect()` multiple times.
|
|
290
291
|
|
|
291
|
-
8. **Consent dismiss is best-effort.** It handles 16+ tested sites across
|
|
292
|
+
8. **Consent dismiss is best-effort.** It handles 16+ tested sites across 29 languages but novel consent implementations may need manual handling. Disable with `{ consent: false }`.
|
|
292
293
|
|
|
293
294
|
9. **Screenshot returns base64.** Write to file with `fs.writeFileSync('shot.png', Buffer.from(base64, 'base64'))` or pass directly to a vision model.
|
|
294
295
|
|
|
@@ -96,11 +96,9 @@ All output files go to `.barebrowse/` in the current directory. Read them with t
|
|
|
96
96
|
The snapshot is a YAML-like ARIA tree. Each line is one node:
|
|
97
97
|
|
|
98
98
|
```
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
- StaticText "This domain is for use in illustrative examples." [ref=6]
|
|
103
|
-
- link "More information..." [ref=8]
|
|
99
|
+
# https://example.com/
|
|
100
|
+
# 379 chars → 45 chars (88% pruned)
|
|
101
|
+
- heading "Example Domain" [level=1] [ref=3]
|
|
104
102
|
```
|
|
105
103
|
|
|
106
104
|
- `[ref=N]` — Use this number with click, type, fill, hover, select, drag, upload
|
package/commands/barebrowse.md
CHANGED
|
@@ -95,11 +95,9 @@ All output files go to `.barebrowse/` in the current directory. Read them with t
|
|
|
95
95
|
The snapshot is a YAML-like ARIA tree. Each line is one node:
|
|
96
96
|
|
|
97
97
|
```
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
- StaticText "This domain is for use in illustrative examples." [ref=6]
|
|
102
|
-
- link "More information..." [ref=8]
|
|
98
|
+
# https://example.com/
|
|
99
|
+
# 379 chars → 45 chars (88% pruned)
|
|
100
|
+
- heading "Example Domain" [level=1] [ref=3]
|
|
103
101
|
```
|
|
104
102
|
|
|
105
103
|
- `[ref=N]` — Use this number with click, type, fill, hover, select, drag, upload
|
|
@@ -64,7 +64,7 @@ Every action returns a **pruned ARIA snapshot** -- the agent's view of the page
|
|
|
64
64
|
|
|
65
65
|
| Obstacle | How It's Handled | Mode |
|
|
66
66
|
|----------|-----------------|------|
|
|
67
|
-
| **Cookie consent walls**
|
|
67
|
+
| **Cookie consent walls** | ARIA tree scan, jsClick accept button. 29 languages | Both |
|
|
68
68
|
| **Consent in dialog role** | Detect `dialog`/`alertdialog` with consent hints, click accept inside | Both |
|
|
69
69
|
| **Consent outside dialog** (BBC SourcePoint) | Fallback global button scan when dialog has no accept button | Both |
|
|
70
70
|
| **Consent behind iframe overlay** | JS `.click()` via `DOM.resolveNode` bypasses z-index/overlay issues | Both |
|
|
@@ -155,7 +155,7 @@ Every action returns a **pruned ARIA snapshot** -- the agent's view of the page
|
|
|
155
155
|
|
|
156
156
|
8. SNAPSHOT Accessibility.getFullAXTree -> nested tree (aria.js)
|
|
157
157
|
prune.js: 9-step pipeline (47-95% token reduction)
|
|
158
|
-
Output: YAML-like text with [ref=N] markers
|
|
158
|
+
Output: URL + pruning stats + YAML-like text with [ref=N] markers
|
|
159
159
|
|
|
160
160
|
9. INTERACT interact.js dispatches real CDP Input events
|
|
161
161
|
click: scrollIntoView -> getBoxModel -> mousePressed/Released
|
|
@@ -181,7 +181,7 @@ Thirteen modules, zero required dependencies.
|
|
|
181
181
|
| `src/auth.js` | 279 | Cookie extraction (Chromium AES + keyring, Firefox), CDP injection |
|
|
182
182
|
| `src/prune.js` | 472 | ARIA pruning pipeline (9-step, ported from mcprune) |
|
|
183
183
|
| `src/interact.js` | 208 | Click, type, press, scroll, hover, select |
|
|
184
|
-
| `src/consent.js` |
|
|
184
|
+
| `src/consent.js` | ~280 | Auto-dismiss cookie consent dialogs, 29 languages |
|
|
185
185
|
| `src/stealth.js` | 51 | Navigator patches for headless anti-detection |
|
|
186
186
|
| `src/bareagent.js` | 161 | Tool adapter for bareagent Loop |
|
|
187
187
|
| `src/daemon.js` | ~230 | Background HTTP server holding connect() session for CLI mode |
|
|
@@ -232,11 +232,11 @@ On `connect()` sessions: `click(ref)`, `type(ref, text, opts)`, `press(key)`, `s
|
|
|
232
232
|
**Real-world tested against:** Google, Wikipedia, GitHub (SPA), Hacker News, DuckDuckGo, YouTube (search + video playback), example.com
|
|
233
233
|
|
|
234
234
|
### Cookie consent auto-dismiss -- done
|
|
235
|
-
Automatically detects and dismisses
|
|
235
|
+
Automatically detects and dismisses cookie consent dialogs after page load.
|
|
236
236
|
- Scans ARIA tree for `dialog`/`alertdialog` with consent-related content
|
|
237
237
|
- Falls back to global button scan for sites that don't use dialog roles (e.g. BBC SourcePoint)
|
|
238
238
|
- Uses JS `.click()` via `DOM.resolveNode` + `Runtime.callFunctionOn` to bypass iframe overlays
|
|
239
|
-
-
|
|
239
|
+
- 29 languages: EN, NL, DE, FR, ES, IT, PT, RU, UK, PL, CS, TR, RO, HU, EL, SV, DA, NO, FI, AR, FA, ZH, JA, KO, VI, TH, HI, ID/MS
|
|
240
240
|
- Opt-out via `{ consent: false }`
|
|
241
241
|
- Works in both headless and headed modes
|
|
242
242
|
|
|
@@ -311,6 +311,7 @@ Raw JSON-RPC 2.0 over stdio. Zero SDK dependencies. `npm install barebrowse` the
|
|
|
311
311
|
|
|
312
312
|
12 tools: browse (one-shot), goto, snapshot, click, type, press, scroll, back, forward, drag, upload, pdf.
|
|
313
313
|
Action tools return `'ok'` -- agent calls `snapshot` explicitly (MCP tool calls are cheap to chain).
|
|
314
|
+
`browse` and `snapshot` accept `maxChars` (default 30000) — large snapshots are saved to `.barebrowse/` and a file path is returned.
|
|
314
315
|
Session runs in hybrid mode (headless + automatic headed fallback on bot detection). `goto` injects cookies from the user's browser before navigation.
|
|
315
316
|
Session tools share a singleton page, lazy-created on first use.
|
|
316
317
|
|
package/mcp-server.js
CHANGED
|
@@ -10,6 +10,19 @@
|
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
12
|
import { browse, connect } from './src/index.js';
|
|
13
|
+
import { mkdirSync, writeFileSync } from 'node:fs';
|
|
14
|
+
import { join } from 'node:path';
|
|
15
|
+
|
|
16
|
+
const MAX_CHARS_DEFAULT = 30000;
|
|
17
|
+
const OUTPUT_DIR = join(process.cwd(), '.barebrowse');
|
|
18
|
+
|
|
19
|
+
function saveSnapshot(text) {
|
|
20
|
+
mkdirSync(OUTPUT_DIR, { recursive: true });
|
|
21
|
+
const ts = new Date().toISOString().replace(/[:.]/g, '-');
|
|
22
|
+
const file = join(OUTPUT_DIR, `page-${ts}.yml`);
|
|
23
|
+
writeFileSync(file, text);
|
|
24
|
+
return file;
|
|
25
|
+
}
|
|
13
26
|
|
|
14
27
|
let _page = null;
|
|
15
28
|
|
|
@@ -27,6 +40,7 @@ const TOOLS = [
|
|
|
27
40
|
properties: {
|
|
28
41
|
url: { type: 'string', description: 'URL to browse' },
|
|
29
42
|
mode: { type: 'string', enum: ['headless', 'headed', 'hybrid'], description: 'Browser mode (default: headless)' },
|
|
43
|
+
maxChars: { type: 'number', description: 'Max chars to return inline. Larger snapshots are saved to .barebrowse/ and a file path is returned instead. Default: 30000.' },
|
|
30
44
|
},
|
|
31
45
|
required: ['url'],
|
|
32
46
|
},
|
|
@@ -45,7 +59,12 @@ const TOOLS = [
|
|
|
45
59
|
{
|
|
46
60
|
name: 'snapshot',
|
|
47
61
|
description: 'Get the current ARIA snapshot of the session page. Returns a YAML-like tree with [ref=N] markers on interactive elements.',
|
|
48
|
-
inputSchema: {
|
|
62
|
+
inputSchema: {
|
|
63
|
+
type: 'object',
|
|
64
|
+
properties: {
|
|
65
|
+
maxChars: { type: 'number', description: 'Max chars to return inline. Larger snapshots are saved to .barebrowse/ and a file path is returned instead. Default: 30000.' },
|
|
66
|
+
},
|
|
67
|
+
},
|
|
49
68
|
},
|
|
50
69
|
{
|
|
51
70
|
name: 'click',
|
|
@@ -141,9 +160,15 @@ const TOOLS = [
|
|
|
141
160
|
|
|
142
161
|
async function handleToolCall(name, args) {
|
|
143
162
|
switch (name) {
|
|
144
|
-
case 'browse':
|
|
145
|
-
|
|
146
|
-
|
|
163
|
+
case 'browse': {
|
|
164
|
+
const text = await browse(args.url, { mode: args.mode });
|
|
165
|
+
const limit = args.maxChars ?? MAX_CHARS_DEFAULT;
|
|
166
|
+
if (text.length > limit) {
|
|
167
|
+
const file = saveSnapshot(text);
|
|
168
|
+
return `Snapshot (${text.length} chars) saved to ${file}`;
|
|
169
|
+
}
|
|
170
|
+
return text;
|
|
171
|
+
}
|
|
147
172
|
case 'goto': {
|
|
148
173
|
const page = await getPage();
|
|
149
174
|
try { await page.injectCookies(args.url); } catch {}
|
|
@@ -152,7 +177,13 @@ async function handleToolCall(name, args) {
|
|
|
152
177
|
}
|
|
153
178
|
case 'snapshot': {
|
|
154
179
|
const page = await getPage();
|
|
155
|
-
|
|
180
|
+
const text = await page.snapshot();
|
|
181
|
+
const limit = args.maxChars ?? MAX_CHARS_DEFAULT;
|
|
182
|
+
if (text.length > limit) {
|
|
183
|
+
const file = saveSnapshot(text);
|
|
184
|
+
return `Snapshot (${text.length} chars) saved to ${file}`;
|
|
185
|
+
}
|
|
186
|
+
return text;
|
|
156
187
|
}
|
|
157
188
|
case 'click': {
|
|
158
189
|
const page = await getPage();
|
|
@@ -218,7 +249,7 @@ async function handleMessage(msg) {
|
|
|
218
249
|
return jsonrpcResponse(id, {
|
|
219
250
|
protocolVersion: '2024-11-05',
|
|
220
251
|
capabilities: { tools: {} },
|
|
221
|
-
serverInfo: { name: 'barebrowse', version: '0.4.
|
|
252
|
+
serverInfo: { name: 'barebrowse', version: '0.4.4' },
|
|
222
253
|
});
|
|
223
254
|
}
|
|
224
255
|
|
package/package.json
CHANGED
package/src/consent.js
CHANGED
|
@@ -38,6 +38,86 @@ const ACCEPT_PATTERNS = [
|
|
|
38
38
|
/\baccetto\b/i,
|
|
39
39
|
// Portuguese
|
|
40
40
|
/\baceitar\s*tudo\b/i,
|
|
41
|
+
// Russian
|
|
42
|
+
/принять\s*все/i,
|
|
43
|
+
/принять/i,
|
|
44
|
+
/согласен/i,
|
|
45
|
+
// Ukrainian
|
|
46
|
+
/прийняти\s*все/i,
|
|
47
|
+
/прийняти/i,
|
|
48
|
+
// Polish
|
|
49
|
+
/zaakceptuj\s*wszystk/i,
|
|
50
|
+
/akceptuj\s*wszystk/i,
|
|
51
|
+
/zgadzam\s*się/i,
|
|
52
|
+
// Czech
|
|
53
|
+
/přijmout\s*vše/i,
|
|
54
|
+
/souhlasím/i,
|
|
55
|
+
// Turkish
|
|
56
|
+
/tümünü\s*kabul\s*et/i,
|
|
57
|
+
/kabul\s*et/i,
|
|
58
|
+
/kabul\s*ediyorum/i,
|
|
59
|
+
// Romanian
|
|
60
|
+
/acceptă\s*tot/i,
|
|
61
|
+
/accept\s*toate/i,
|
|
62
|
+
// Hungarian
|
|
63
|
+
/összes\s*elfogadás/i,
|
|
64
|
+
/elfogad/i,
|
|
65
|
+
// Greek
|
|
66
|
+
/αποδοχή\s*όλων/i,
|
|
67
|
+
/αποδέχομαι/i,
|
|
68
|
+
// Swedish
|
|
69
|
+
/acceptera\s*alla/i,
|
|
70
|
+
/godkänn\s*alla/i,
|
|
71
|
+
// Danish
|
|
72
|
+
/accepter\s*alle/i,
|
|
73
|
+
/acceptér\s*alle/i,
|
|
74
|
+
// Norwegian
|
|
75
|
+
/godta\s*alle/i,
|
|
76
|
+
/aksepter\s*alle/i,
|
|
77
|
+
// Finnish
|
|
78
|
+
/hyväksy\s*kaikki/i,
|
|
79
|
+
/hyväksyn/i,
|
|
80
|
+
// Arabic
|
|
81
|
+
/قبول\s*الكل/,
|
|
82
|
+
/قبول\s*الجميع/,
|
|
83
|
+
/موافق/,
|
|
84
|
+
/قبول/,
|
|
85
|
+
// Persian
|
|
86
|
+
/پذیرش\s*همه/,
|
|
87
|
+
/موافقم/,
|
|
88
|
+
/پذیرش/,
|
|
89
|
+
// Chinese (Simplified + Traditional)
|
|
90
|
+
/全部接受/,
|
|
91
|
+
/接受所有/,
|
|
92
|
+
/接受全部/,
|
|
93
|
+
/同意并继续/,
|
|
94
|
+
/全部接受/,
|
|
95
|
+
/接受/,
|
|
96
|
+
/同意/,
|
|
97
|
+
// Japanese
|
|
98
|
+
/すべて受け入れ/,
|
|
99
|
+
/すべて許可/,
|
|
100
|
+
/同意する/,
|
|
101
|
+
/同意します/,
|
|
102
|
+
// Korean
|
|
103
|
+
/모두\s*수락/,
|
|
104
|
+
/모두\s*동의/,
|
|
105
|
+
/동의합니다/,
|
|
106
|
+
/수락/,
|
|
107
|
+
// Vietnamese
|
|
108
|
+
/chấp\s*nhận\s*tất\s*cả/i,
|
|
109
|
+
/đồng\s*ý\s*tất\s*cả/i,
|
|
110
|
+
/đồng\s*ý/i,
|
|
111
|
+
// Thai
|
|
112
|
+
/ยอมรับทั้งหมด/,
|
|
113
|
+
/ยอมรับ/,
|
|
114
|
+
// Hindi
|
|
115
|
+
/सभी\s*स्वीकार/,
|
|
116
|
+
/स्वीकार\s*करें/,
|
|
117
|
+
/सहमत/,
|
|
118
|
+
// Indonesian / Malay
|
|
119
|
+
/terima\s*semua/i,
|
|
120
|
+
/setuju/i,
|
|
41
121
|
// Generic single-word fallbacks (only matched inside dialogs)
|
|
42
122
|
/^accept$/i,
|
|
43
123
|
/^agree$/i,
|
|
@@ -52,10 +132,22 @@ const CONSENT_DIALOG_HINTS = [
|
|
|
52
132
|
/cookie/i,
|
|
53
133
|
/consent/i,
|
|
54
134
|
/privacy/i,
|
|
55
|
-
/voordat\s*je\s*verdergaat/i, // Dutch: "Before you continue"
|
|
56
135
|
/before\s*you\s*continue/i,
|
|
57
|
-
/
|
|
58
|
-
/
|
|
136
|
+
/voordat\s*je\s*verdergaat/i, // Dutch
|
|
137
|
+
/bevor\s*du\s*fortf/i, // German
|
|
138
|
+
/avant\s*de\s*continuer/i, // French
|
|
139
|
+
/antes\s*de\s*continuar/i, // Spanish / Portuguese
|
|
140
|
+
/prima\s*di\s*continuare/i, // Italian
|
|
141
|
+
/zanim\s*przejdziesz/i, // Polish
|
|
142
|
+
/прежде\s*чем\s*продолжить/i, // Russian
|
|
143
|
+
/devam\s*etmeden\s*önce/i, // Turkish
|
|
144
|
+
/続行する前に/, // Japanese
|
|
145
|
+
/继续前/, // Chinese Simplified
|
|
146
|
+
/繼續前/, // Chinese Traditional
|
|
147
|
+
/계속하기\s*전에/, // Korean
|
|
148
|
+
/trước\s*khi\s*tiếp\s*tục/i, // Vietnamese
|
|
149
|
+
/ملفات\s*تعريف\s*الارتباط/, // Arabic: cookies
|
|
150
|
+
/คุกกี้/, // Thai: cookies
|
|
59
151
|
];
|
|
60
152
|
|
|
61
153
|
/**
|
package/src/index.js
CHANGED
|
@@ -103,7 +103,7 @@ export async function browse(url, opts = {}) {
|
|
|
103
103
|
} else {
|
|
104
104
|
snapshot = raw;
|
|
105
105
|
}
|
|
106
|
-
const stats = `# ${raw.length.toLocaleString()} chars → ${snapshot.length.toLocaleString()} chars (${Math.round((1 - snapshot.length / raw.length) * 100)}% pruned)`;
|
|
106
|
+
const stats = `# ${url}\n# ${raw.length.toLocaleString()} chars → ${snapshot.length.toLocaleString()} chars (${Math.round((1 - snapshot.length / raw.length) * 100)}% pruned)`;
|
|
107
107
|
snapshot = stats + '\n' + snapshot;
|
|
108
108
|
|
|
109
109
|
// Step 7: Clean up
|
|
@@ -221,10 +221,12 @@ export async function connect(opts = {}) {
|
|
|
221
221
|
const result = await ariaTree(page);
|
|
222
222
|
refMap = result.refMap;
|
|
223
223
|
const raw = formatTree(result.tree);
|
|
224
|
-
|
|
224
|
+
const { currentIndex, entries } = await page.session.send('Page.getNavigationHistory');
|
|
225
|
+
const pageUrl = entries[currentIndex]?.url || '';
|
|
226
|
+
if (pruneOpts === false) return `# ${pageUrl}\n` + raw;
|
|
225
227
|
const pruned = pruneTree(result.tree, { mode: pruneOpts?.mode || 'act' });
|
|
226
228
|
const out = formatTree(pruned);
|
|
227
|
-
const stats = `# ${raw.length.toLocaleString()} chars → ${out.length.toLocaleString()} chars (${Math.round((1 - out.length / raw.length) * 100)}% pruned)`;
|
|
229
|
+
const stats = `# ${pageUrl}\n# ${raw.length.toLocaleString()} chars → ${out.length.toLocaleString()} chars (${Math.round((1 - out.length / raw.length) * 100)}% pruned)`;
|
|
228
230
|
return stats + '\n' + out;
|
|
229
231
|
},
|
|
230
232
|
|
package/.idea/barebrowse.iml
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
-
<module type="PYTHON_MODULE" version="4">
|
|
3
|
-
<component name="NewModuleRootManager">
|
|
4
|
-
<content url="file://$MODULE_DIR$">
|
|
5
|
-
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
|
|
6
|
-
</content>
|
|
7
|
-
<orderEntry type="jdk" jdkName="Python 3.14" jdkType="Python SDK" />
|
|
8
|
-
<orderEntry type="sourceFolder" forTests="false" />
|
|
9
|
-
</component>
|
|
10
|
-
</module>
|
package/.idea/misc.xml
DELETED
package/.idea/modules.xml
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
-
<project version="4">
|
|
3
|
-
<component name="ProjectModuleManager">
|
|
4
|
-
<modules>
|
|
5
|
-
<module fileurl="file://$PROJECT_DIR$/.idea/barebrowse.iml" filepath="$PROJECT_DIR$/.idea/barebrowse.iml" />
|
|
6
|
-
</modules>
|
|
7
|
-
</component>
|
|
8
|
-
</project>
|