barebrowse 0.7.1 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +249 -0
- package/LICENSE +202 -21
- package/NOTICE +8 -0
- package/README.md +39 -10
- package/barebrowse.context.md +45 -18
- package/cli.js +114 -3
- package/mcp-server.js +276 -70
- package/package.json +2 -2
- package/src/bareagent.js +43 -4
- package/src/chromium.js +115 -5
- package/src/consent.js +3 -8
- package/src/daemon.js +13 -0
- package/src/index.js +440 -135
- package/src/network-idle.js +62 -0
- package/src/prune.js +2 -1
- package/src/stealth.js +87 -6
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* network-idle.js — wait until the page's network has been idle for N ms.
|
|
3
|
+
*
|
|
4
|
+
* Tracks in-flight requests by requestId in a Set, so an orphan
|
|
5
|
+
* loadingFinished/Failed (event for a request whose requestWillBeSent
|
|
6
|
+
* arrived before our listener attached) is a harmless no-op instead of
|
|
7
|
+
* driving a counter negative and resolving prematurely.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* @param {object} session - CDP session-scoped handle with .on() returning unsub
|
|
12
|
+
* @param {object} [opts]
|
|
13
|
+
* @param {number} [opts.timeout=30000] - Max wait time before reject
|
|
14
|
+
* @param {number} [opts.idle=500] - Required idle duration before resolve
|
|
15
|
+
*/
|
|
16
|
+
export function waitForNetworkIdle(session, opts = {}) {
|
|
17
|
+
const timeout = opts.timeout || 30000;
|
|
18
|
+
const idle = opts.idle || 500;
|
|
19
|
+
|
|
20
|
+
return new Promise((resolve, reject) => {
|
|
21
|
+
const pending = new Set();
|
|
22
|
+
let timer = null;
|
|
23
|
+
const unsubs = [];
|
|
24
|
+
|
|
25
|
+
const done = () => {
|
|
26
|
+
clearTimeout(timer);
|
|
27
|
+
clearTimeout(deadlineTimer);
|
|
28
|
+
for (const unsub of unsubs) unsub();
|
|
29
|
+
resolve();
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
const check = () => {
|
|
33
|
+
clearTimeout(timer);
|
|
34
|
+
if (pending.size === 0) {
|
|
35
|
+
timer = setTimeout(done, idle);
|
|
36
|
+
}
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
unsubs.push(session.on('Network.requestWillBeSent', (p) => {
|
|
40
|
+
pending.add(p.requestId);
|
|
41
|
+
clearTimeout(timer);
|
|
42
|
+
}));
|
|
43
|
+
unsubs.push(session.on('Network.loadingFinished', (p) => {
|
|
44
|
+
// delete() on a Set is a no-op for unknown keys — orphan events from
|
|
45
|
+
// requests started before we attached the listener can't push us negative.
|
|
46
|
+
pending.delete(p.requestId);
|
|
47
|
+
check();
|
|
48
|
+
}));
|
|
49
|
+
unsubs.push(session.on('Network.loadingFailed', (p) => {
|
|
50
|
+
pending.delete(p.requestId);
|
|
51
|
+
check();
|
|
52
|
+
}));
|
|
53
|
+
|
|
54
|
+
const deadlineTimer = setTimeout(() => {
|
|
55
|
+
for (const unsub of unsubs) unsub();
|
|
56
|
+
reject(new Error(`waitForNetworkIdle timed out after ${timeout}ms`));
|
|
57
|
+
}, timeout);
|
|
58
|
+
|
|
59
|
+
// Start check immediately (might already be idle)
|
|
60
|
+
check();
|
|
61
|
+
});
|
|
62
|
+
}
|
package/src/prune.js
CHANGED
|
@@ -65,7 +65,8 @@ const SKIP_ROLES = new Set([
|
|
|
65
65
|
* @returns {object|null} Pruned tree
|
|
66
66
|
*/
|
|
67
67
|
export function prune(tree, options = {}) {
|
|
68
|
-
|
|
68
|
+
let { mode = 'act', context = '' } = options;
|
|
69
|
+
if (mode === 'read') mode = 'browse';
|
|
69
70
|
const allowedRegions = MODE_REGIONS[mode] || MODE_REGIONS.act;
|
|
70
71
|
const isBrowse = mode === 'browse';
|
|
71
72
|
const keywords = context
|
package/src/stealth.js
CHANGED
|
@@ -23,28 +23,109 @@ const STEALTH_SCRIPT = `
|
|
|
23
23
|
get: () => ['en-US', 'en'],
|
|
24
24
|
});
|
|
25
25
|
|
|
26
|
-
//
|
|
27
|
-
|
|
28
|
-
|
|
26
|
+
// Realistic CPU + memory. Headless under containers can report 1 or odd
|
|
27
|
+
// values that real desktops rarely have, which is its own fingerprint.
|
|
28
|
+
Object.defineProperty(navigator, 'hardwareConcurrency', { get: () => 8 });
|
|
29
|
+
Object.defineProperty(navigator, 'deviceMemory', { get: () => 8 });
|
|
30
|
+
|
|
31
|
+
// chrome / chrome.runtime — headless either omits the object entirely or
|
|
32
|
+
// gives an empty {}; real Chrome has the enum shapes below even before any
|
|
33
|
+
// extension is installed. Fingerprinters check that chrome.runtime exists
|
|
34
|
+
// AND that these enums are present.
|
|
35
|
+
if (!window.chrome) window.chrome = {};
|
|
36
|
+
if (!window.chrome.runtime) {
|
|
37
|
+
window.chrome.runtime = {
|
|
38
|
+
OnInstalledReason: { CHROME_UPDATE: 'chrome_update', INSTALL: 'install', SHARED_MODULE_UPDATE: 'shared_module_update', UPDATE: 'update' },
|
|
39
|
+
OnRestartRequiredReason: { APP_UPDATE: 'app_update', OS_UPDATE: 'os_update', PERIODIC: 'periodic' },
|
|
40
|
+
PlatformArch: { ARM: 'arm', ARM64: 'arm64', MIPS: 'mips', MIPS64: 'mips64', X86_32: 'x86-32', X86_64: 'x86-64' },
|
|
41
|
+
PlatformNaclArch: { ARM: 'arm', MIPS: 'mips', MIPS64: 'mips64', X86_32: 'x86-32', X86_64: 'x86-64' },
|
|
42
|
+
PlatformOs: { ANDROID: 'android', CROS: 'cros', LINUX: 'linux', MAC: 'mac', OPENBSD: 'openbsd', WIN: 'win' },
|
|
43
|
+
RequestUpdateCheckStatus: { NO_UPDATE: 'no_update', THROTTLED: 'throttled', UPDATE_AVAILABLE: 'update_available' },
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Notification — headless Chrome doesn't expose the Notification API at
|
|
48
|
+
// all (even on secure contexts), while real Chrome always does and reports
|
|
49
|
+
// 'default' before any prompt. Fingerprinters check both \`typeof
|
|
50
|
+
// Notification\` and \`Notification.permission\`, so we fake both: the
|
|
51
|
+
// constructor when missing, and only the permission getter when it's
|
|
52
|
+
// present (some Chrome versions ship a non-configurable getter and
|
|
53
|
+
// defineProperty would throw — swallowed so the rest of the script runs).
|
|
54
|
+
if (typeof Notification === 'undefined') {
|
|
55
|
+
window.Notification = function Notification() {};
|
|
56
|
+
window.Notification.permission = 'default';
|
|
57
|
+
window.Notification.requestPermission = () => Promise.resolve('default');
|
|
58
|
+
} else {
|
|
59
|
+
try {
|
|
60
|
+
Object.defineProperty(Notification, 'permission', { get: () => 'default' });
|
|
61
|
+
} catch {}
|
|
29
62
|
}
|
|
30
63
|
|
|
31
|
-
// Permissions.query
|
|
64
|
+
// Permissions.query for notifications: keep it consistent with the
|
|
65
|
+
// Notification.permission override above instead of returning 'prompt'
|
|
66
|
+
// unconditionally (the prior hardcoded value was a tell of its own).
|
|
32
67
|
const origQuery = Permissions.prototype.query;
|
|
33
68
|
Permissions.prototype.query = function(desc) {
|
|
34
|
-
if (desc.name === 'notifications') {
|
|
35
|
-
return Promise.resolve({ state:
|
|
69
|
+
if (desc && desc.name === 'notifications') {
|
|
70
|
+
return Promise.resolve({ state: Notification.permission, onchange: null });
|
|
36
71
|
}
|
|
37
72
|
return origQuery.call(this, desc);
|
|
38
73
|
};
|
|
74
|
+
|
|
75
|
+
// WebGL UNMASKED_VENDOR_WEBGL (37445) and UNMASKED_RENDERER_WEBGL (37446) —
|
|
76
|
+
// headless returns "Google Inc. (Google)" / "Google SwiftShader" which
|
|
77
|
+
// is the single most-used headless fingerprint. Spoof a realistic
|
|
78
|
+
// Intel integrated GPU pair (works on macOS and Linux user agents).
|
|
79
|
+
const SPOOFED_VENDOR = 'Intel Inc.';
|
|
80
|
+
const SPOOFED_RENDERER = 'Intel Iris OpenGL Engine';
|
|
81
|
+
const origGetParam = WebGLRenderingContext.prototype.getParameter;
|
|
82
|
+
WebGLRenderingContext.prototype.getParameter = function(p) {
|
|
83
|
+
if (p === 37445) return SPOOFED_VENDOR;
|
|
84
|
+
if (p === 37446) return SPOOFED_RENDERER;
|
|
85
|
+
return origGetParam.apply(this, arguments);
|
|
86
|
+
};
|
|
87
|
+
if (typeof WebGL2RenderingContext !== 'undefined') {
|
|
88
|
+
const origGetParam2 = WebGL2RenderingContext.prototype.getParameter;
|
|
89
|
+
WebGL2RenderingContext.prototype.getParameter = function(p) {
|
|
90
|
+
if (p === 37445) return SPOOFED_VENDOR;
|
|
91
|
+
if (p === 37446) return SPOOFED_RENDERER;
|
|
92
|
+
return origGetParam2.apply(this, arguments);
|
|
93
|
+
};
|
|
94
|
+
}
|
|
39
95
|
`;
|
|
40
96
|
|
|
41
97
|
/**
|
|
42
98
|
* Apply stealth patches to a CDP session.
|
|
43
99
|
* Must be called before any navigation.
|
|
44
100
|
*
|
|
101
|
+
* Splits into two layers:
|
|
102
|
+
* 1. Network.setUserAgentOverride strips "HeadlessChrome" from the UA
|
|
103
|
+
* that ships in HTTP request headers AND that navigator.userAgent
|
|
104
|
+
* reports — `--headless=new` leaves "HeadlessChrome" in there.
|
|
105
|
+
* 2. Page.addScriptToEvaluateOnNewDocument injects the JS-level patches
|
|
106
|
+
* before any page script runs.
|
|
107
|
+
*
|
|
45
108
|
* @param {object} session - Session-scoped CDP handle
|
|
46
109
|
*/
|
|
47
110
|
export async function applyStealth(session) {
|
|
111
|
+
// 1. UA override — read whatever the running browser actually claims, then
|
|
112
|
+
// rewrite the "Headless" marker out. Doing it this way (vs hardcoding a
|
|
113
|
+
// string) keeps the version + platform fields accurate across Chromium
|
|
114
|
+
// releases. Network.setUserAgentOverride is per-session, so it also
|
|
115
|
+
// cleans up the value navigator.userAgent reports inside the page.
|
|
116
|
+
try {
|
|
117
|
+
const { userAgent } = await session.send('Browser.getVersion');
|
|
118
|
+
if (userAgent && userAgent.includes('HeadlessChrome')) {
|
|
119
|
+
await session.send('Network.setUserAgentOverride', {
|
|
120
|
+
userAgent: userAgent.replace(/HeadlessChrome/g, 'Chrome'),
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
} catch {
|
|
124
|
+
// Browser.getVersion not reachable from this session — skip UA override
|
|
125
|
+
// and rely on the JS-level patches alone.
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// 2. JS-level patches
|
|
48
129
|
await session.send('Page.addScriptToEvaluateOnNewDocument', {
|
|
49
130
|
source: STEALTH_SCRIPT,
|
|
50
131
|
});
|