unbrowse 2.0.4 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unbrowse",
3
- "version": "2.0.4",
3
+ "version": "2.0.5",
4
4
  "description": "Reverse-engineer any website into reusable API skills. npm CLI + local engine.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -6,6 +6,40 @@ import { log } from "../logger.js";
6
6
  // BUG-GC-012: Use a real Chrome UA — HeadlessChrome is actively blocked by Google and others.
7
7
  const CHROME_UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
8
8
 
9
+ // Stealth script — hides headless Chrome indicators from bot detection.
10
+ // Ported from kuri's cdp/js/stealth.js (commit 4dbbd89).
11
+ const STEALTH_SCRIPT = `
12
+ Object.defineProperty(navigator, 'webdriver', { get: () => false, configurable: true });
13
+ Object.defineProperty(navigator, 'plugins', {
14
+ get: () => {
15
+ const p = [
16
+ { name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: 'Portable Document Format' },
17
+ { name: 'Chrome PDF Viewer', filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai', description: '' },
18
+ { name: 'Native Client', filename: 'internal-nacl-plugin', description: '' },
19
+ ];
20
+ p.length = 3;
21
+ return p;
22
+ },
23
+ configurable: true,
24
+ });
25
+ Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'], configurable: true });
26
+ if (!window.chrome) window.chrome = {};
27
+ if (!window.chrome.runtime) window.chrome.runtime = { connect: () => {}, sendMessage: () => {}, id: undefined };
28
+ const origQuery = window.navigator.permissions?.query;
29
+ if (origQuery) {
30
+ window.navigator.permissions.query = (p) =>
31
+ p.name === 'notifications' ? Promise.resolve({ state: Notification.permission }) : origQuery(p);
32
+ }
33
+ try {
34
+ const d = Object.getOwnPropertyDescriptor(HTMLIFrameElement.prototype, 'contentWindow');
35
+ if (d) Object.defineProperty(HTMLIFrameElement.prototype, 'contentWindow', { get: function() { return d.get.call(this); } });
36
+ } catch {}
37
+ Object.defineProperty(navigator, 'userAgent', {
38
+ get: () => '${CHROME_UA}',
39
+ configurable: true,
40
+ });
41
+ `;
42
+
9
43
  // Tab semaphore: max 3 concurrent capture tabs
10
44
  const MAX_CONCURRENT_TABS = 3;
11
45
  let activeTabs = 0;
@@ -654,6 +688,11 @@ export async function captureSession(
654
688
  await injectCookies(tabId, cookies);
655
689
  }
656
690
 
691
+ // Inject stealth patches — hide headless Chrome indicators from bot detection
692
+ try {
693
+ await kuri.evaluate(tabId, STEALTH_SCRIPT);
694
+ } catch { /* best-effort */ }
695
+
657
696
  // Start HAR recording
658
697
  await kuri.harStart(tabId);
659
698
 
@@ -662,16 +701,23 @@ export async function captureSession(
662
701
  try { pageDomain = getRegistrableDomain(new URL(url).hostname); } catch { /* bad url */ }
663
702
 
664
703
  // Inject fetch/XHR interceptor BEFORE navigation to capture all response bodies
665
- // Navigate directly to target URL skip origin pre-navigation to save 1-2s on heavy SPAs.
666
- // The interceptor is re-injected after navigation anyway (page context resets on navigate).
667
- await kuri.evaluate(tabId, INTERCEPTOR_SCRIPT).catch(() => {});
704
+ // Navigate to origin first so cookies are applied in the correct domain context
705
+ // before the full page load required for sites like LinkedIn that check auth on first load.
706
+ try {
707
+ const origin = new URL(url).origin;
708
+ await kuri.navigate(tabId, origin);
709
+ await new Promise((r) => setTimeout(r, 500));
710
+ await kuri.evaluate(tabId, STEALTH_SCRIPT);
711
+ await kuri.evaluate(tabId, INTERCEPTOR_SCRIPT);
712
+ } catch { /* best-effort */ }
668
713
 
669
714
  // Navigate to target URL
670
715
  await kuri.navigate(tabId, url);
671
716
 
672
- // Re-inject interceptor after navigation (page context resets on navigate)
717
+ // Re-inject stealth + interceptor after navigation (page context resets on navigate)
673
718
  try {
674
719
  await new Promise((r) => setTimeout(r, 300));
720
+ await kuri.evaluate(tabId, STEALTH_SCRIPT);
675
721
  await kuri.evaluate(tabId, INTERCEPTOR_SCRIPT);
676
722
  } catch { /* page may not be ready */ }
677
723
 
Binary file
Binary file
Binary file
Binary file