unbrowse 2.0.3 → 2.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unbrowse",
3
- "version": "2.0.3",
3
+ "version": "2.0.4",
4
4
  "description": "Reverse-engineer any website into reusable API skills. npm CLI + local engine.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -112,10 +112,10 @@ function pickFirefoxProfile(profilesRoot: string, profile?: string): string | nu
112
112
  return existsSync(candidate) ? candidate : null;
113
113
  }
114
114
 
115
- function getFirefoxCookiesPath(profile?: string): string | null {
116
- const profilesRoot = getFirefoxProfilesRoot();
117
- if (!profilesRoot || !existsSync(profilesRoot)) return null;
118
- return pickFirefoxProfile(profilesRoot, profile);
115
+ function getFirefoxCookiesPath(profile?: string, profilesRoot?: string): string | null {
116
+ const root = profilesRoot ?? getFirefoxProfilesRoot();
117
+ if (!root || !existsSync(root)) return null;
118
+ return pickFirefoxProfile(root, profile);
119
119
  }
120
120
 
121
121
  // ---------------------------------------------------------------------------
@@ -335,13 +335,14 @@ export function extractFromChromium(
335
335
 
336
336
  export function extractFromFirefox(
337
337
  domain: string,
338
- opts?: { profile?: string },
338
+ opts?: { profile?: string; profilesRoot?: string },
339
339
  ): ExtractionResult {
340
340
  const warnings: string[] = [];
341
- const dbPath = getFirefoxCookiesPath(opts?.profile);
341
+ const dbPath = getFirefoxCookiesPath(opts?.profile, opts?.profilesRoot);
342
+ const browserLabel = opts?.profilesRoot ? "Zen" : "Firefox";
342
343
 
343
344
  if (!dbPath) {
344
- warnings.push("Firefox cookies DB not found");
345
+ warnings.push(`${browserLabel} cookies DB not found`);
345
346
  return { cookies: [], source: null, warnings };
346
347
  }
347
348
 
@@ -373,14 +374,14 @@ export function extractFromFirefox(
373
374
  return results;
374
375
  });
375
376
 
376
- const source = opts?.profile ? `Firefox profile "${opts.profile}"` : "Firefox default profile";
377
+ const source = opts?.profile ? `${browserLabel} profile "${opts.profile}"` : `${browserLabel} default profile`;
377
378
  if (cookies.length === 0) {
378
379
  warnings.push(`No cookies for ${domain} found in ${source}`);
379
380
  }
380
381
  log("auth", `extracted ${cookies.length} cookies for ${domain} from ${source}`);
381
382
  return { cookies, source: cookies.length > 0 ? source : null, warnings };
382
383
  } catch (err) {
383
- warnings.push(`Firefox extraction failed: ${err instanceof Error ? err.message : err}`);
384
+ warnings.push(`${browserLabel} extraction failed: ${err instanceof Error ? err.message : err}`);
384
385
  return { cookies: [], source: null, warnings };
385
386
  }
386
387
  }
@@ -416,8 +417,62 @@ export function extractBrowserCookies(
416
417
  return chromium;
417
418
  }
418
419
 
419
- // Fall back to Chrome
420
+ // Try Chrome first
420
421
  const chrome = extractFromChrome(domain, { profile: opts?.chromeProfile });
421
- chrome.warnings.push(...ff.warnings);
422
- return chrome;
422
+ if (chrome.cookies.length > 0) {
423
+ chrome.warnings.push(...ff.warnings);
424
+ return chrome;
425
+ }
426
+
427
+ // Auto-discover other Chromium-family browsers
428
+ const home = homedir();
429
+ const chromiumBrowsers: Array<{ name: string; userDataDir: string; safeStorageService: string }> =
430
+ platform() === "darwin"
431
+ ? [
432
+ { name: "Arc", userDataDir: join(home, "Library", "Application Support", "Arc", "User Data"), safeStorageService: "Arc Safe Storage" },
433
+ { name: "Dia", userDataDir: join(home, "Library", "Application Support", "Dia", "User Data"), safeStorageService: "Dia Safe Storage" },
434
+ { name: "Brave", userDataDir: join(home, "Library", "Application Support", "BraveSoftware", "Brave-Browser"), safeStorageService: "Brave Safe Storage" },
435
+ { name: "Edge", userDataDir: join(home, "Library", "Application Support", "Microsoft Edge"), safeStorageService: "Microsoft Edge Safe Storage" },
436
+ { name: "Vivaldi", userDataDir: join(home, "Library", "Application Support", "Vivaldi"), safeStorageService: "Vivaldi Safe Storage" },
437
+ { name: "Chromium", userDataDir: join(home, "Library", "Application Support", "Chromium"), safeStorageService: "Chromium Safe Storage" },
438
+ ]
439
+ : platform() === "linux"
440
+ ? [
441
+ { name: "Brave", userDataDir: join(home, ".config", "BraveSoftware", "Brave-Browser"), safeStorageService: "Brave Safe Storage" },
442
+ { name: "Edge", userDataDir: join(home, ".config", "microsoft-edge"), safeStorageService: "Microsoft Edge Safe Storage" },
443
+ { name: "Vivaldi", userDataDir: join(home, ".config", "vivaldi"), safeStorageService: "Vivaldi Safe Storage" },
444
+ { name: "Chromium", userDataDir: join(home, ".config", "chromium"), safeStorageService: "Chromium Safe Storage" },
445
+ ]
446
+ : [];
447
+
448
+ const allWarnings = [...ff.warnings, ...chrome.warnings];
449
+ for (const browser of chromiumBrowsers) {
450
+ if (!existsSync(browser.userDataDir)) continue;
451
+ const result = extractFromChromium(domain, {
452
+ userDataDir: browser.userDataDir,
453
+ browserName: browser.name,
454
+ safeStorageService: browser.safeStorageService,
455
+ });
456
+ if (result.cookies.length > 0) {
457
+ result.warnings.push(...allWarnings);
458
+ return result;
459
+ }
460
+ allWarnings.push(...result.warnings);
461
+ }
462
+
463
+ // Also try Firefox-based alternatives (Zen)
464
+ const zenPaths = platform() === "darwin"
465
+ ? [join(home, "Library", "Application Support", "zen")]
466
+ : [join(home, ".zen")];
467
+ for (const zenRoot of zenPaths) {
468
+ if (!existsSync(zenRoot)) continue;
469
+ const zenResult = extractFromFirefox(domain, { profilesRoot: zenRoot });
470
+ if (zenResult.cookies.length > 0) {
471
+ zenResult.warnings.push(...allWarnings);
472
+ return zenResult;
473
+ }
474
+ allWarnings.push(...zenResult.warnings);
475
+ }
476
+
477
+ return { cookies: [], source: null, warnings: allWarnings };
423
478
  }
@@ -38,95 +38,43 @@ export async function interactiveLogin(
38
38
  domain?: string,
39
39
  ): Promise<LoginResult> {
40
40
  const targetDomain = domain ?? new URL(url).hostname;
41
- const profileDir = getProfilePath(targetDomain);
42
41
 
43
42
  log("auth", `interactiveLogin — url: ${url}, domain: ${targetDomain}`);
44
43
 
45
- try {
46
- fs.mkdirSync(profileDir, { recursive: true });
47
-
48
- // Start Kuri and get a tab
49
- await kuri.start();
50
- const tabId = await kuri.getDefaultTab();
51
- await kuri.networkEnable(tabId);
52
-
53
- // Navigate to login URL
54
- await kuri.navigate(tabId, url);
55
-
56
- const startTime = Date.now();
57
-
58
- // Snapshot initial cookies
59
- const initialCookies = await kuri.getCookies(tabId);
60
- const initialCookieCount = initialCookies.filter((c) => isDomainMatch(c.domain, targetDomain)).length;
61
- log("auth", `initial cookies for ${targetDomain}: ${initialCookieCount}`);
62
-
63
- // Wait for user to complete login — detect via cookie changes + URL change
64
- let loggedIn = false;
65
- let lastLoggedUrl = "";
66
- while (Date.now() - startTime < LOGIN_TIMEOUT_MS) {
67
- await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS));
68
- const elapsed = Date.now() - startTime;
69
-
70
- try {
71
- const currentUrl = await kuri.getCurrentUrl(tabId);
72
- const currentDomain = new URL(currentUrl).hostname.toLowerCase();
73
- const targetNorm = targetDomain.toLowerCase();
74
-
75
- if (currentUrl !== lastLoggedUrl) {
76
- log("auth", `navigated to: ${currentUrl}`);
77
- lastLoggedUrl = currentUrl;
78
- }
79
-
80
- if (elapsed < MIN_WAIT_MS) continue;
81
-
82
- const isOnTarget = currentDomain === targetNorm || currentDomain.endsWith("." + targetNorm);
83
- if (isOnTarget) {
84
- const isStillLogin = /\/(login|signin|sign-in|sso|auth|oauth|uas\/login|checkpoint)/.test(new URL(currentUrl).pathname);
44
+ // Open URL in the user's default browser (visible, not headless)
45
+ const { exec } = await import("node:child_process");
46
+ const openCmd = process.platform === "darwin" ? "open" : "xdg-open";
47
+ exec(`${openCmd} ${JSON.stringify(url)}`);
48
+ log("auth", `opened ${url} in default browser via ${openCmd}`);
85
49
 
86
- const currentCookies = await kuri.getCookies(tabId);
87
- const currentCookieCount = currentCookies.filter((c) => isDomainMatch(c.domain, targetDomain)).length;
88
- const gotNewCookies = currentCookieCount > initialCookieCount;
50
+ // Poll extractBrowserAuth until cookies appear or timeout
51
+ const startTime = Date.now();
52
+ while (Date.now() - startTime < LOGIN_TIMEOUT_MS) {
53
+ await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS));
89
54
 
90
- if (!isStillLogin && gotNewCookies) {
91
- loggedIn = true;
92
- log("auth", `login complete ${currentUrl} (cookies: ${initialCookieCount} → ${currentCookieCount})`);
93
- break;
94
- }
95
-
96
- if (!isStillLogin && currentCookieCount > 0) {
97
- loggedIn = true;
98
- log("auth", `already logged in — ${currentUrl} (${currentCookieCount} cookies present)`);
99
- break;
100
- }
101
- }
102
- } catch { /* page navigating */ }
103
- }
104
-
105
- if (!loggedIn) {
106
- log("auth", `login wait ended after ${Math.round((Date.now() - startTime) / 1000)}s — capturing cookies anyway`);
55
+ try {
56
+ const result = await extractBrowserAuth(targetDomain);
57
+ if (result.success && result.cookies_stored > 0) {
58
+ log("auth", `login detected — ${result.cookies_stored} cookies captured for ${targetDomain}`);
59
+ return result;
60
+ }
61
+ } catch (err) {
62
+ log("auth", `poll error: ${err instanceof Error ? err.message : err}`);
107
63
  }
108
64
 
109
- // Extract and store cookies
110
- const cookies = await kuri.getCookies(tabId);
111
- const domainCookies = cookies.filter((c) => isDomainMatch(c.domain, targetDomain));
112
-
113
- if (domainCookies.length === 0) {
114
- return { success: false, domain: targetDomain, cookies_stored: 0, error: "No cookies captured for domain" };
65
+ // Log progress every 10s
66
+ const elapsed = Date.now() - startTime;
67
+ if (elapsed % 10_000 < POLL_INTERVAL_MS) {
68
+ log("auth", `waiting for login... ${Math.round(elapsed / 1000)}s elapsed`);
115
69
  }
116
-
117
- const storableCookies = domainCookies.map((c) => ({
118
- name: c.name, value: c.value, domain: c.domain, path: c.path,
119
- secure: c.secure, httpOnly: c.httpOnly, sameSite: c.sameSite, expires: c.expires,
120
- }));
121
-
122
- const vaultKey = `auth:${getRegistrableDomain(targetDomain)}`;
123
- await storeCredential(vaultKey, JSON.stringify({ cookies: storableCookies }));
124
- log("auth", `stored ${storableCookies.length} cookies under ${vaultKey}`);
125
-
126
- return { success: true, domain: targetDomain, cookies_stored: storableCookies.length };
127
- } finally {
128
- // Cleanup handled by Kuri's tab management
129
70
  }
71
+
72
+ return {
73
+ success: false,
74
+ domain: targetDomain,
75
+ cookies_stored: 0,
76
+ error: `Login timed out after ${LOGIN_TIMEOUT_MS / 1000}s — no cookies detected in browser`,
77
+ };
130
78
  }
131
79
 
132
80
  /**
@@ -1792,13 +1792,15 @@ export async function executeEndpoint(
1792
1792
 
1793
1793
  // CSRF token auto-detection (bird pattern): many sites require CSRF tokens
1794
1794
  // as both a cookie AND a header. Detect common patterns and replay them.
1795
- if (!headers["x-csrf-token"] && !headers["x-xsrf-token"]) {
1795
+ if (!headers["x-csrf-token"] && !headers["x-xsrf-token"] && !headers["csrf-token"]) {
1796
1796
  const csrfCookie = cookies.find((c) =>
1797
- /^(ct0|csrf_token|_csrf|csrftoken|XSRF-TOKEN|_xsrf)$/i.test(c.name)
1797
+ /^(ct0|csrf_token|_csrf|csrftoken|XSRF-TOKEN|_xsrf|JSESSIONID)$/i.test(c.name)
1798
1798
  );
1799
1799
  if (csrfCookie) {
1800
1800
  const v = csrfCookie.value.startsWith('"') && csrfCookie.value.endsWith('"') ? csrfCookie.value.slice(1, -1) : csrfCookie.value;
1801
- headers["x-csrf-token"] = v;
1801
+ // LinkedIn uses "csrf-token" header derived from JSESSIONID
1802
+ const headerName = csrfCookie.name === "JSESSIONID" ? "csrf-token" : "x-csrf-token";
1803
+ headers[headerName] = v;
1802
1804
  }
1803
1805
  }
1804
1806
  }
@@ -381,8 +381,30 @@ function inferCsrfPlan(req: RawRequest, parsedBody?: unknown): CsrfPlan | undefi
381
381
  Object.entries(req.request_headers).map(([key, value]) => [key.toLowerCase(), value]),
382
382
  );
383
383
  const cookies = parseCookieHeader(headers["cookie"]);
384
- const csrfCookieNames = Object.keys(cookies).filter((name) => /^(ct0|csrf_token|_csrf|csrftoken|xsrf-token|_xsrf)$/i.test(name));
385
- const headerName = ["x-csrf-token", "x-xsrf-token", "x-csrftoken"].find((name) => typeof headers[name] === "string" && headers[name].length > 0);
384
+ const csrfCookieNames = Object.keys(cookies).filter((name) => /^(ct0|csrf_token|_csrf|csrftoken|xsrf-token|_xsrf|JSESSIONID)$/i.test(name));
385
+ const headerName = ["x-csrf-token", "x-xsrf-token", "x-csrftoken", "csrf-token"].find((name) => typeof headers[name] === "string" && headers[name].length > 0);
386
+
387
+ // Also detect CSRF by value matching: if any cookie value appears as a header value,
388
+ // that's a CSRF token pattern regardless of naming convention
389
+ if (!headerName && csrfCookieNames.length === 0) {
390
+ for (const [cookieName, cookieValue] of Object.entries(cookies)) {
391
+ if (!cookieValue || cookieValue.length < 8) continue;
392
+ const unquoted = cookieValue.startsWith('"') && cookieValue.endsWith('"') ? cookieValue.slice(1, -1) : cookieValue;
393
+ for (const [hName, hValue] of Object.entries(headers)) {
394
+ if (hName === "cookie" || hName === "host" || hName === "content-length") continue;
395
+ const hUnquoted = hValue.startsWith('"') && hValue.endsWith('"') ? hValue.slice(1, -1) : hValue;
396
+ if (unquoted === hUnquoted && unquoted.length >= 8) {
397
+ return {
398
+ source: "cookie",
399
+ param_name: hName,
400
+ refresh_on_401: true,
401
+ extractor_sequence: [cookieName],
402
+ };
403
+ }
404
+ }
405
+ }
406
+ }
407
+
386
408
  if (headerName && csrfCookieNames.length > 0) {
387
409
  return {
388
410
  source: "cookie",