unbrowse 2.0.4 → 2.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -6,6 +6,40 @@ import { log } from "../logger.js";
|
|
|
6
6
|
// BUG-GC-012: Use a real Chrome UA — HeadlessChrome is actively blocked by Google and others.
|
|
7
7
|
const CHROME_UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
|
|
8
8
|
|
|
9
|
+
// Stealth script — hides headless Chrome indicators from bot detection.
|
|
10
|
+
// Ported from kuri's cdp/js/stealth.js (commit 4dbbd89).
|
|
11
|
+
const STEALTH_SCRIPT = `
|
|
12
|
+
Object.defineProperty(navigator, 'webdriver', { get: () => false, configurable: true });
|
|
13
|
+
Object.defineProperty(navigator, 'plugins', {
|
|
14
|
+
get: () => {
|
|
15
|
+
const p = [
|
|
16
|
+
{ name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: 'Portable Document Format' },
|
|
17
|
+
{ name: 'Chrome PDF Viewer', filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai', description: '' },
|
|
18
|
+
{ name: 'Native Client', filename: 'internal-nacl-plugin', description: '' },
|
|
19
|
+
];
|
|
20
|
+
p.length = 3;
|
|
21
|
+
return p;
|
|
22
|
+
},
|
|
23
|
+
configurable: true,
|
|
24
|
+
});
|
|
25
|
+
Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'], configurable: true });
|
|
26
|
+
if (!window.chrome) window.chrome = {};
|
|
27
|
+
if (!window.chrome.runtime) window.chrome.runtime = { connect: () => {}, sendMessage: () => {}, id: undefined };
|
|
28
|
+
const origQuery = window.navigator.permissions?.query;
|
|
29
|
+
if (origQuery) {
|
|
30
|
+
window.navigator.permissions.query = (p) =>
|
|
31
|
+
p.name === 'notifications' ? Promise.resolve({ state: Notification.permission }) : origQuery(p);
|
|
32
|
+
}
|
|
33
|
+
try {
|
|
34
|
+
const d = Object.getOwnPropertyDescriptor(HTMLIFrameElement.prototype, 'contentWindow');
|
|
35
|
+
if (d) Object.defineProperty(HTMLIFrameElement.prototype, 'contentWindow', { get: function() { return d.get.call(this); } });
|
|
36
|
+
} catch {}
|
|
37
|
+
Object.defineProperty(navigator, 'userAgent', {
|
|
38
|
+
get: () => '${CHROME_UA}',
|
|
39
|
+
configurable: true,
|
|
40
|
+
});
|
|
41
|
+
`;
|
|
42
|
+
|
|
9
43
|
// Tab semaphore: max 3 concurrent capture tabs
|
|
10
44
|
const MAX_CONCURRENT_TABS = 3;
|
|
11
45
|
let activeTabs = 0;
|
|
@@ -654,6 +688,11 @@ export async function captureSession(
|
|
|
654
688
|
await injectCookies(tabId, cookies);
|
|
655
689
|
}
|
|
656
690
|
|
|
691
|
+
// Inject stealth patches — hide headless Chrome indicators from bot detection
|
|
692
|
+
try {
|
|
693
|
+
await kuri.evaluate(tabId, STEALTH_SCRIPT);
|
|
694
|
+
} catch { /* best-effort */ }
|
|
695
|
+
|
|
657
696
|
// Start HAR recording
|
|
658
697
|
await kuri.harStart(tabId);
|
|
659
698
|
|
|
@@ -662,16 +701,23 @@ export async function captureSession(
|
|
|
662
701
|
try { pageDomain = getRegistrableDomain(new URL(url).hostname); } catch { /* bad url */ }
|
|
663
702
|
|
|
664
703
|
// Inject fetch/XHR interceptor BEFORE navigation to capture all response bodies
|
|
665
|
-
// Navigate
|
|
666
|
-
//
|
|
667
|
-
|
|
704
|
+
// Navigate to origin first so cookies are applied in the correct domain context
|
|
705
|
+
// before the full page load — required for sites like LinkedIn that check auth on first load.
|
|
706
|
+
try {
|
|
707
|
+
const origin = new URL(url).origin;
|
|
708
|
+
await kuri.navigate(tabId, origin);
|
|
709
|
+
await new Promise((r) => setTimeout(r, 500));
|
|
710
|
+
await kuri.evaluate(tabId, STEALTH_SCRIPT);
|
|
711
|
+
await kuri.evaluate(tabId, INTERCEPTOR_SCRIPT);
|
|
712
|
+
} catch { /* best-effort */ }
|
|
668
713
|
|
|
669
714
|
// Navigate to target URL
|
|
670
715
|
await kuri.navigate(tabId, url);
|
|
671
716
|
|
|
672
|
-
// Re-inject interceptor after navigation (page context resets on navigate)
|
|
717
|
+
// Re-inject stealth + interceptor after navigation (page context resets on navigate)
|
|
673
718
|
try {
|
|
674
719
|
await new Promise((r) => setTimeout(r, 300));
|
|
720
|
+
await kuri.evaluate(tabId, STEALTH_SCRIPT);
|
|
675
721
|
await kuri.evaluate(tabId, INTERCEPTOR_SCRIPT);
|
|
676
722
|
} catch { /* page may not be ready */ }
|
|
677
723
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|