unbrowse 2.0.16 → 2.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -560,6 +560,7 @@ async function maybeAutoUpdate(metaUrl, overrides = {}) {
560
560
  import { closeSync, openSync, readFileSync as readFileSync4, statSync, unlinkSync, writeFileSync as writeFileSync2 } from "node:fs";
561
561
  import path3 from "node:path";
562
562
  import { spawn } from "node:child_process";
563
+ import { execFileSync } from "node:child_process";
563
564
 
564
565
  // ../../src/version.ts
565
566
  import { createHash } from "crypto";
@@ -666,6 +667,34 @@ function clearStalePidFile(pidFile) {
666
667
  unlinkSync(pidFile);
667
668
  } catch {}
668
669
  }
670
+ function findListeningPid(baseUrl) {
671
+ try {
672
+ const url = new URL(baseUrl);
673
+ const port = url.port || (url.protocol === "https:" ? "443" : "80");
674
+ const output = execFileSync("lsof", ["-nP", `-iTCP:${port}`, "-sTCP:LISTEN", "-t"], {
675
+ encoding: "utf8",
676
+ stdio: ["ignore", "pipe", "ignore"]
677
+ }).trim();
678
+ const pid = Number(output.split(/\s+/).find(Boolean));
679
+ return Number.isInteger(pid) && pid > 0 ? pid : null;
680
+ } catch {
681
+ return null;
682
+ }
683
+ }
684
+ function readProcessCommand(pid) {
685
+ try {
686
+ return execFileSync("ps", ["-o", "command=", "-p", String(pid)], {
687
+ encoding: "utf8",
688
+ stdio: ["ignore", "pipe", "ignore"]
689
+ }).trim();
690
+ } catch {
691
+ return "";
692
+ }
693
+ }
694
+ function isLikelyUnbrowseServerProcess(pid) {
695
+ const command = readProcessCommand(pid);
696
+ return /\bunbrowse\b|runtime-src\/index\.ts|src\/index\.ts|dist\/index\.js/i.test(command);
697
+ }
669
698
  async function stopManagedServer(pid, pidFile, baseUrl) {
670
699
  try {
671
700
  process.kill(pid, "SIGTERM");
@@ -714,7 +743,13 @@ async function ensureLocalServer(baseUrl, noAutoStart, metaUrl) {
714
743
  } else {
715
744
  if (existing)
716
745
  clearStalePidFile(pidFile);
717
- return;
746
+ const discoveredPid = findListeningPid(baseUrl);
747
+ if (discoveredPid && isLikelyUnbrowseServerProcess(discoveredPid)) {
748
+ await stopManagedServer(discoveredPid, pidFile, baseUrl);
749
+ existing = null;
750
+ } else {
751
+ return;
752
+ }
718
753
  }
719
754
  }
720
755
  if (existing?.pid && isPidAlive(existing.pid)) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unbrowse",
3
- "version": "2.0.16",
3
+ "version": "2.0.21",
4
4
  "description": "Reverse-engineer any website into reusable API skills. npm CLI + local engine.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -27,9 +27,20 @@ export interface BrowserCookie {
27
27
  expires: number;
28
28
  }
29
29
 
30
+ export interface BrowserAuthSourceMeta {
31
+ family: "chromium" | "firefox";
32
+ browserName: string;
33
+ source: string;
34
+ userDataDir?: string;
35
+ profile?: string;
36
+ cookieDbPath?: string;
37
+ safeStorageService?: string;
38
+ }
39
+
30
40
  export interface ExtractionResult {
31
41
  cookies: BrowserCookie[];
32
42
  source: string | null;
43
+ sourceMeta?: BrowserAuthSourceMeta | null;
33
44
  warnings: string[];
34
45
  }
35
46
 
@@ -50,6 +61,13 @@ export interface ExtractBrowserCookiesOptions {
50
61
  chromium?: ChromiumCookieSourceOptions;
51
62
  }
52
63
 
64
+ type ChromiumBrowserCandidate = {
65
+ name: string;
66
+ userDataDir: string;
67
+ safeStorageService: string;
68
+ bundleId?: string;
69
+ };
70
+
53
71
  // ---------------------------------------------------------------------------
54
72
  // Path helpers
55
73
  // ---------------------------------------------------------------------------
@@ -66,6 +84,46 @@ function getChromeUserDataDir(): string {
66
84
  return join(home, ".config", "google-chrome");
67
85
  }
68
86
 
87
+ export function extractDefaultBrowserBundleIdFromLaunchServicesData(data: unknown): string | null {
88
+ const handlers = data && typeof data === "object" && Array.isArray((data as { LSHandlers?: unknown[] }).LSHandlers)
89
+ ? (data as { LSHandlers: Array<Record<string, unknown>> }).LSHandlers
90
+ : [];
91
+ for (const scheme of ["https", "http"]) {
92
+ const match = handlers.find((entry) => entry.LSHandlerURLScheme === scheme && typeof entry.LSHandlerRoleAll === "string");
93
+ if (typeof match?.LSHandlerRoleAll === "string" && match.LSHandlerRoleAll.length > 0) {
94
+ return match.LSHandlerRoleAll;
95
+ }
96
+ }
97
+ return null;
98
+ }
99
+
100
+ function getMacDefaultBrowserBundleId(): string | null {
101
+ if (platform() !== "darwin") return null;
102
+ const plist = join(homedir(), "Library", "Preferences", "com.apple.LaunchServices.com.apple.launchservices.secure.plist");
103
+ const fallbackPlist = join(homedir(), "Library", "Preferences", "com.apple.LaunchServices", "com.apple.launchservices.secure.plist");
104
+ const target = existsSync(plist) ? plist : fallbackPlist;
105
+ if (!existsSync(target)) return null;
106
+ try {
107
+ const json = execFileSync("plutil", ["-convert", "json", "-o", "-", target], {
108
+ encoding: "utf8",
109
+ stdio: ["ignore", "pipe", "ignore"],
110
+ });
111
+ return extractDefaultBrowserBundleIdFromLaunchServicesData(JSON.parse(json));
112
+ } catch {
113
+ return null;
114
+ }
115
+ }
116
+
117
+ export function prioritizeChromiumCandidates(
118
+ sources: ChromiumBrowserCandidate[],
119
+ preferredBundleId?: string | null,
120
+ ): ChromiumBrowserCandidate[] {
121
+ if (!preferredBundleId) return [...sources];
122
+ const preferred = sources.find((source) => source.bundleId === preferredBundleId);
123
+ if (!preferred) return [...sources];
124
+ return [preferred, ...sources.filter((source) => source !== preferred)];
125
+ }
126
+
69
127
  export function resolveChromiumCookiesPath(opts?: ChromiumCookieSourceOptions): string | null {
70
128
  if (opts?.cookieDbPath) {
71
129
  return opts.cookieDbPath.replace(/^~\//, homedir() + "/");
@@ -83,6 +141,23 @@ export function resolveChromiumCookiesPath(opts?: ChromiumCookieSourceOptions):
83
141
  return candidates.find((candidate) => existsSync(candidate)) ?? candidates[0] ?? null;
84
142
  }
85
143
 
144
+ function inferChromiumProfileFromPath(
145
+ dbPath: string,
146
+ userDataDir?: string,
147
+ ): string | undefined {
148
+ if (!userDataDir) return undefined;
149
+ const normalizedRoot = userDataDir.replace(/^~\//, homedir() + "/").replace(/\/+$/, "");
150
+ const normalizedDbPath = dbPath.replace(/\/+$/, "");
151
+ if (!normalizedDbPath.startsWith(`${normalizedRoot}/`)) return undefined;
152
+ const rel = normalizedDbPath.slice(normalizedRoot.length + 1);
153
+ const parts = rel.split("/");
154
+ if (parts.length < 2) return undefined;
155
+ if (parts[1] === "Cookies" || (parts[1] === "Network" && parts[2] === "Cookies")) {
156
+ return parts[0];
157
+ }
158
+ return undefined;
159
+ }
160
+
86
161
  function getFirefoxProfilesRoot(): string | null {
87
162
  const home = homedir();
88
163
  if (platform() === "darwin") {
@@ -262,7 +337,9 @@ export function extractFromChrome(
262
337
  ): ExtractionResult {
263
338
  return extractFromChromium(domain, {
264
339
  profile: opts?.profile,
340
+ userDataDir: getChromeUserDataDir(),
265
341
  browserName: "Chrome",
342
+ safeStorageService: "Chrome Safe Storage",
266
343
  });
267
344
  }
268
345
 
@@ -276,10 +353,11 @@ export function extractFromChromium(
276
353
 
277
354
  if (!dbPath || !existsSync(dbPath)) {
278
355
  warnings.push(`${sourceLabel} cookies DB not found${dbPath ? ` at ${dbPath}` : ""}`);
279
- return { cookies: [], source: null, warnings };
356
+ return { cookies: [], source: null, sourceMeta: null, warnings };
280
357
  }
281
358
 
282
359
  try {
360
+ const resolvedProfile = opts?.profile || inferChromiumProfileFromPath(dbPath, opts?.userDataDir);
283
361
  const cookies = withTempCopy(dbPath, (tempDb) => {
284
362
  const where = buildDomainWhereClause(domain, "host_key");
285
363
  const sql = `SELECT name, value, hex(encrypted_value) as ev, host_key, path, is_secure, is_httponly, samesite, expires_utc FROM cookies WHERE ${where};`;
@@ -322,10 +400,21 @@ export function extractFromChromium(
322
400
  warnings.push(`No cookies for ${domain} found in ${source}`);
323
401
  }
324
402
  log("auth", `extracted ${cookies.length} cookies for ${domain} from ${source}`);
325
- return { cookies, source: cookies.length > 0 ? source : null, warnings };
403
+ const sourceMeta: BrowserAuthSourceMeta | null = cookies.length > 0
404
+ ? {
405
+ family: "chromium",
406
+ browserName: sourceLabel,
407
+ source,
408
+ ...(opts?.userDataDir ? { userDataDir: opts.userDataDir } : {}),
409
+ ...(resolvedProfile ? { profile: resolvedProfile } : {}),
410
+ ...(opts?.cookieDbPath ? { cookieDbPath: dbPath } : {}),
411
+ ...(opts?.safeStorageService ? { safeStorageService: opts.safeStorageService } : {}),
412
+ }
413
+ : null;
414
+ return { cookies, source: cookies.length > 0 ? source : null, sourceMeta, warnings };
326
415
  } catch (err) {
327
416
  warnings.push(`${sourceLabel} extraction failed: ${err instanceof Error ? err.message : err}`);
328
- return { cookies: [], source: null, warnings };
417
+ return { cookies: [], source: null, sourceMeta: null, warnings };
329
418
  }
330
419
  }
331
420
 
@@ -343,7 +432,7 @@ export function extractFromFirefox(
343
432
 
344
433
  if (!dbPath) {
345
434
  warnings.push(`${browserLabel} cookies DB not found`);
346
- return { cookies: [], source: null, warnings };
435
+ return { cookies: [], source: null, sourceMeta: null, warnings };
347
436
  }
348
437
 
349
438
  try {
@@ -379,10 +468,18 @@ export function extractFromFirefox(
379
468
  warnings.push(`No cookies for ${domain} found in ${source}`);
380
469
  }
381
470
  log("auth", `extracted ${cookies.length} cookies for ${domain} from ${source}`);
382
- return { cookies, source: cookies.length > 0 ? source : null, warnings };
471
+ const sourceMeta: BrowserAuthSourceMeta | null = cookies.length > 0
472
+ ? {
473
+ family: "firefox",
474
+ browserName: browserLabel,
475
+ source,
476
+ ...(opts?.profile ? { profile: opts.profile } : {}),
477
+ }
478
+ : null;
479
+ return { cookies, source: cookies.length > 0 ? source : null, sourceMeta, warnings };
383
480
  } catch (err) {
384
481
  warnings.push(`${browserLabel} extraction failed: ${err instanceof Error ? err.message : err}`);
385
- return { cookies: [], source: null, warnings };
482
+ return { cookies: [], source: null, sourceMeta: null, warnings };
386
483
  }
387
484
  }
388
485
 
@@ -406,38 +503,27 @@ export function extractBrowserCookies(
406
503
  return extractFromChromium(domain, opts.chromium);
407
504
  }
408
505
 
409
- // Try Firefox first (no decryption needed, more reliable)
410
- const ff = extractFromFirefox(domain, { profile: opts?.firefoxProfile });
411
- if (ff.cookies.length > 0) return ff;
412
-
413
- // If caller provided an explicit Chromium-family source, try that next.
506
+ // If caller provided an explicit Chromium-family source, try that first.
414
507
  if (opts?.chromium?.cookieDbPath || opts?.chromium?.userDataDir) {
415
508
  const chromium = extractFromChromium(domain, opts.chromium);
416
- chromium.warnings.push(...ff.warnings);
417
509
  return chromium;
418
510
  }
419
511
 
420
- // Try Chrome first
421
- const chrome = extractFromChrome(domain, { profile: opts?.chromeProfile });
422
- if (chrome.cookies.length > 0) {
423
- chrome.warnings.push(...ff.warnings);
424
- return chrome;
425
- }
426
-
427
- // Auto-discover other Chromium-family browsers
428
512
  const home = homedir();
429
- const chromiumBrowsers: Array<{ name: string; userDataDir: string; safeStorageService: string }> =
513
+ const chromiumBrowsers: ChromiumBrowserCandidate[] =
430
514
  platform() === "darwin"
431
515
  ? [
432
- { name: "Arc", userDataDir: join(home, "Library", "Application Support", "Arc", "User Data"), safeStorageService: "Arc Safe Storage" },
433
- { name: "Dia", userDataDir: join(home, "Library", "Application Support", "Dia", "User Data"), safeStorageService: "Dia Safe Storage" },
434
- { name: "Brave", userDataDir: join(home, "Library", "Application Support", "BraveSoftware", "Brave-Browser"), safeStorageService: "Brave Safe Storage" },
435
- { name: "Edge", userDataDir: join(home, "Library", "Application Support", "Microsoft Edge"), safeStorageService: "Microsoft Edge Safe Storage" },
436
- { name: "Vivaldi", userDataDir: join(home, "Library", "Application Support", "Vivaldi"), safeStorageService: "Vivaldi Safe Storage" },
437
- { name: "Chromium", userDataDir: join(home, "Library", "Application Support", "Chromium"), safeStorageService: "Chromium Safe Storage" },
516
+ { name: "Chrome", userDataDir: getChromeUserDataDir(), safeStorageService: "Chrome Safe Storage", bundleId: "com.google.chrome" },
517
+ { name: "Arc", userDataDir: join(home, "Library", "Application Support", "Arc", "User Data"), safeStorageService: "Arc Safe Storage", bundleId: "company.thebrowser.Browser" },
518
+ { name: "Dia", userDataDir: join(home, "Library", "Application Support", "Dia", "User Data"), safeStorageService: "Dia Safe Storage", bundleId: "company.thebrowser.dia" },
519
+ { name: "Brave", userDataDir: join(home, "Library", "Application Support", "BraveSoftware", "Brave-Browser"), safeStorageService: "Brave Safe Storage", bundleId: "com.brave.Browser" },
520
+ { name: "Edge", userDataDir: join(home, "Library", "Application Support", "Microsoft Edge"), safeStorageService: "Microsoft Edge Safe Storage", bundleId: "com.microsoft.edgemac" },
521
+ { name: "Vivaldi", userDataDir: join(home, "Library", "Application Support", "Vivaldi"), safeStorageService: "Vivaldi Safe Storage", bundleId: "com.vivaldi.Vivaldi" },
522
+ { name: "Chromium", userDataDir: join(home, "Library", "Application Support", "Chromium"), safeStorageService: "Chromium Safe Storage", bundleId: "org.chromium.Chromium" },
438
523
  ]
439
524
  : platform() === "linux"
440
525
  ? [
526
+ { name: "Chrome", userDataDir: getChromeUserDataDir(), safeStorageService: "Chrome Safe Storage" },
441
527
  { name: "Brave", userDataDir: join(home, ".config", "BraveSoftware", "Brave-Browser"), safeStorageService: "Brave Safe Storage" },
442
528
  { name: "Edge", userDataDir: join(home, ".config", "microsoft-edge"), safeStorageService: "Microsoft Edge Safe Storage" },
443
529
  { name: "Vivaldi", userDataDir: join(home, ".config", "vivaldi"), safeStorageService: "Vivaldi Safe Storage" },
@@ -445,8 +531,33 @@ export function extractBrowserCookies(
445
531
  ]
446
532
  : [];
447
533
 
448
- const allWarnings = [...ff.warnings, ...chrome.warnings];
449
- for (const browser of chromiumBrowsers) {
534
+ const preferredBundleId = getMacDefaultBrowserBundleId();
535
+ const orderedChromiumBrowsers = prioritizeChromiumCandidates(chromiumBrowsers, preferredBundleId);
536
+
537
+ const preferredChromium = preferredBundleId ? orderedChromiumBrowsers[0] : null;
538
+ const accumulatedWarnings: string[] = [];
539
+ if (preferredChromium?.bundleId === preferredBundleId && existsSync(preferredChromium.userDataDir)) {
540
+ const preferredResult = extractFromChromium(domain, {
541
+ userDataDir: preferredChromium.userDataDir,
542
+ browserName: preferredChromium.name,
543
+ safeStorageService: preferredChromium.safeStorageService,
544
+ });
545
+ if (preferredResult.cookies.length > 0) {
546
+ return preferredResult;
547
+ }
548
+ accumulatedWarnings.push(...preferredResult.warnings);
549
+ }
550
+
551
+ // Try Firefox next (no decryption needed, more reliable when it actually has the session)
552
+ const ff = extractFromFirefox(domain, { profile: opts?.firefoxProfile });
553
+ if (ff.cookies.length > 0) {
554
+ ff.warnings.push(...accumulatedWarnings);
555
+ return ff;
556
+ }
557
+
558
+ const allWarnings = [...accumulatedWarnings, ...ff.warnings];
559
+ for (const browser of orderedChromiumBrowsers) {
560
+ if (browser.bundleId && browser.bundleId === preferredBundleId) continue;
450
561
  if (!existsSync(browser.userDataDir)) continue;
451
562
  const result = extractFromChromium(domain, {
452
563
  userDataDir: browser.userDataDir,
@@ -474,5 +585,5 @@ export function extractBrowserCookies(
474
585
  allWarnings.push(...zenResult.warnings);
475
586
  }
476
587
 
477
- return { cookies: [], source: null, warnings: allWarnings };
588
+ return { cookies: [], source: null, sourceMeta: null, warnings: allWarnings };
478
589
  }
@@ -6,6 +6,7 @@ import { log } from "../logger.js";
6
6
  import path from "node:path";
7
7
  import os from "node:os";
8
8
  import fs from "node:fs";
9
+ import type { BrowserAuthSourceMeta } from "./browser-cookies.js";
9
10
 
10
11
  const LOGIN_TIMEOUT_MS = 300_000;
11
12
  const POLL_INTERVAL_MS = 2_000;
@@ -30,6 +31,18 @@ export interface StoredAuthBundle {
30
31
  cookies: AuthCookie[];
31
32
  headers: Record<string, string>;
32
33
  source_keys: string[];
34
+ source_meta?: BrowserAuthSourceMeta | null;
35
+ }
36
+
37
+ export function storedAuthNeedsBrowserRefresh(bundle: StoredAuthBundle | null | undefined): boolean {
38
+ if (!bundle) return true;
39
+ if (bundle.cookies.length === 0 && Object.keys(bundle.headers).length === 0) return true;
40
+ const sourceMeta = bundle.source_meta;
41
+ if (!sourceMeta) return true;
42
+ if (sourceMeta.family === "chromium" && !sourceMeta.userDataDir && !sourceMeta.cookieDbPath) {
43
+ return true;
44
+ }
45
+ return false;
33
46
  }
34
47
 
35
48
  /**
@@ -118,7 +131,7 @@ export async function extractBrowserAuth(
118
131
  const vaultKey = `auth:${getRegistrableDomain(domain)}`;
119
132
  await storeCredential(
120
133
  vaultKey,
121
- JSON.stringify({ cookies: storableCookies })
134
+ JSON.stringify({ cookies: storableCookies, source_meta: result.sourceMeta ?? null })
122
135
  );
123
136
 
124
137
  log("auth", `stored ${storableCookies.length} cookies for ${domain} (key: ${vaultKey}) from ${result.source}`);
@@ -172,6 +185,7 @@ export async function getStoredAuthBundle(
172
185
  const cookies: AuthCookie[] = [];
173
186
  const headers: Record<string, string> = {};
174
187
  const source_keys: string[] = [];
188
+ let source_meta: BrowserAuthSourceMeta | null = null;
175
189
 
176
190
  for (const key of getStoredAuthKeys(domain)) {
177
191
  const stored = await getCredential(key);
@@ -180,6 +194,7 @@ export async function getStoredAuthBundle(
180
194
  const parsed = JSON.parse(stored) as {
181
195
  cookies?: AuthCookie[];
182
196
  headers?: Record<string, string>;
197
+ source_meta?: BrowserAuthSourceMeta | null;
183
198
  };
184
199
  const rawCookies = parsed.cookies ?? [];
185
200
  const validCookies = filterExpired(rawCookies);
@@ -198,6 +213,7 @@ export async function getStoredAuthBundle(
198
213
  }
199
214
  if ((validCookies.length > 0 || Object.keys(parsedHeaders).length > 0) && !source_keys.includes(key)) {
200
215
  source_keys.push(key);
216
+ if (!source_meta && parsed.source_meta) source_meta = parsed.source_meta;
201
217
  }
202
218
  } catch {
203
219
  continue;
@@ -205,7 +221,7 @@ export async function getStoredAuthBundle(
205
221
  }
206
222
 
207
223
  if (cookies.length === 0 && Object.keys(headers).length === 0) return null;
208
- return { cookies, headers, source_keys };
224
+ return { cookies, headers, source_keys, source_meta };
209
225
  }
210
226
 
211
227
  export async function findStoredAuthReference(domain: string): Promise<string | null> {
@@ -234,12 +250,14 @@ export async function getAuthCookies(
234
250
  domain: string,
235
251
  opts?: { autoExtract?: boolean }
236
252
  ): Promise<AuthCookie[] | null> {
237
- const vaultCookies = await getStoredAuth(domain);
238
- if (vaultCookies && vaultCookies.length > 0) return vaultCookies;
253
+ const bundle = await getStoredAuthBundle(domain);
254
+ if (bundle && bundle.cookies.length > 0 && !storedAuthNeedsBrowserRefresh(bundle)) {
255
+ return bundle.cookies;
256
+ }
239
257
 
240
- if (opts?.autoExtract === false) return null;
258
+ if (opts?.autoExtract === false) return bundle?.cookies ?? null;
241
259
 
242
- log("auth", `no vault cookies for ${domain} — auto-extracting from browser`);
260
+ log("auth", `${bundle ? "stored auth lacks usable browser source metadata" : "no vault cookies"} for ${domain} — auto-extracting from browser`);
243
261
  try {
244
262
  const result = await extractBrowserAuth(domain);
245
263
  if (result.success && result.cookies_stored > 0) {
@@ -249,7 +267,7 @@ export async function getAuthCookies(
249
267
  log("auth", `browser auto-extract failed for ${domain}: ${err instanceof Error ? err.message : err}`);
250
268
  }
251
269
 
252
- return null;
270
+ return bundle?.cookies ?? null;
253
271
  }
254
272
 
255
273
  /**