unbrowse 3.4.1 → 3.5.0-preview.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -31,7 +31,7 @@ var __promiseAll = (args) => Promise.all(args);
31
31
  var __require = /* @__PURE__ */ createRequire(import.meta.url);
32
32
 
33
33
  // ../../src/build-info.generated.ts
34
- var BUILD_RELEASE_VERSION = "3.4.1", BUILD_GIT_SHA = "7b0ee7aec348", BUILD_CODE_HASH = "3d7c45796360", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy40LjEiLCJnaXRfc2hhIjoiN2IwZWU3YWVjMzQ4IiwiY29kZV9oYXNoIjoiM2Q3YzQ1Nzk2MzYwIiwidHJhY2VfdmVyc2lvbiI6IjNkN2M0NTc5NjM2MEA3YjBlZTdhZWMzNDgiLCJpc3N1ZWRfYXQiOiIyMDI2LTA0LTA5VDAzOjI3OjIyLjg5MFoifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "Z2fi5XpHHBZygM_Fc14sq2nq-qXEwAxU6uzgqQSD7Ew", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
34
+ var BUILD_RELEASE_VERSION = "3.5.0-preview.1", BUILD_GIT_SHA = "98686464d176", BUILD_CODE_HASH = "6712fc4b9fea", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy41LjAtcHJldmlldy4xIiwiZ2l0X3NoYSI6Ijk4Njg2NDY0ZDE3NiIsImNvZGVfaGFzaCI6IjY3MTJmYzRiOWZlYSIsInRyYWNlX3ZlcnNpb24iOiI2NzEyZmM0YjlmZWFAOTg2ODY0NjRkMTc2IiwiaXNzdWVkX2F0IjoiMjAyNi0wNC0wOVQxNDowNzoyOC40OTBaIn0", BUILD_RELEASE_MANIFEST_SIGNATURE = "3tfUGHc_qHJwOqJwMh4HuRaGiV99sjiiXCIC8B2DV3A", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
35
35
 
36
36
  // ../../src/version.ts
37
37
  import { createHash } from "crypto";
@@ -519,7 +519,17 @@ var init_marketplace = __esm(() => {
519
519
  });
520
520
 
521
521
  // ../../src/domain.ts
522
- var CC_TLDS;
522
+ function getRegistrableDomain(hostname2) {
523
+ const parts = hostname2.split(".");
524
+ if (parts.length >= 3) {
525
+ const lastTwo = parts.slice(-2).join(".");
526
+ if (CC_TLDS.has(lastTwo) || GEO_TLD_SUFFIXES.has(lastTwo)) {
527
+ return parts.slice(-3).join(".");
528
+ }
529
+ }
530
+ return parts.slice(-2).join(".");
531
+ }
532
+ var CC_TLDS, GEO_TLD_SUFFIXES;
523
533
  var init_domain = __esm(() => {
524
534
  CC_TLDS = new Set([
525
535
  "co.uk",
@@ -539,6 +549,43 @@ var init_domain = __esm(() => {
539
549
  "net.au",
540
550
  "org.au"
541
551
  ]);
552
+ GEO_TLD_SUFFIXES = new Set([
553
+ "com.sg",
554
+ "com.hk",
555
+ "com.my",
556
+ "com.ph",
557
+ "com.vn",
558
+ "com.id",
559
+ "com.th",
560
+ "com.np",
561
+ "com.pk",
562
+ "com.bd",
563
+ "com.lk",
564
+ "com.mm",
565
+ "com.kh",
566
+ "com.eg",
567
+ "com.sa",
568
+ "com.qa",
569
+ "com.ae",
570
+ "com.kw",
571
+ "com.jo",
572
+ "com.lb",
573
+ "com.bh",
574
+ "com.om",
575
+ "com.iq",
576
+ "co.id",
577
+ "co.th",
578
+ "co.nz",
579
+ "co.jp",
580
+ "co.kr",
581
+ "co.in",
582
+ "co.uk",
583
+ "co.za",
584
+ "co.zw",
585
+ "co.ke",
586
+ "co.tz",
587
+ "co.ug"
588
+ ]);
542
589
  });
543
590
 
544
591
  // ../../src/publish-admission.ts
@@ -1013,10 +1060,53 @@ var init_robots = __esm(() => {
1013
1060
  });
1014
1061
 
1015
1062
  // ../../src/site-policy.ts
1016
- var MUTATION_METHODS;
1063
+ var MUTATION_METHODS, KNOWN_SESSION_BOUND_PARAMS;
1017
1064
  var init_site_policy = __esm(() => {
1018
1065
  init_domain();
1019
1066
  MUTATION_METHODS = new Set(["POST", "PUT", "PATCH", "DELETE"]);
1067
+ KNOWN_SESSION_BOUND_PARAMS = new Set([
1068
+ "device_id",
1069
+ "WebIdLastTime",
1070
+ "browser_version",
1071
+ "browser_name",
1072
+ "browser_language",
1073
+ "browser_platform",
1074
+ "browser_online",
1075
+ "os",
1076
+ "os_version",
1077
+ "device_platform",
1078
+ "channel",
1079
+ "app_id",
1080
+ "app_name",
1081
+ "app_version",
1082
+ "fp",
1083
+ "msToken",
1084
+ "tt_webid",
1085
+ "tt_webid_v2",
1086
+ "verifyFp",
1087
+ "X-Bogus",
1088
+ "_signature",
1089
+ "aid",
1090
+ "biz_trace_id",
1091
+ "clientABVersions",
1092
+ "coverFormat",
1093
+ "webcast_sdk_version",
1094
+ "live_id",
1095
+ "enter_from",
1096
+ "enter_method",
1097
+ "from_source",
1098
+ "screen_width",
1099
+ "screen_height",
1100
+ "history_len",
1101
+ "is_fullscreen",
1102
+ "is_page_visible",
1103
+ "focus_state",
1104
+ "is_night_mode",
1105
+ "cookie_enabled",
1106
+ "timezone_name",
1107
+ "timezone_offset",
1108
+ "uid"
1109
+ ]);
1020
1110
  });
1021
1111
 
1022
1112
  // ../../src/workflow/compile.ts
@@ -1376,6 +1466,484 @@ function checkWalletConfigured2() {
1376
1466
  }
1377
1467
  var init_wallet2 = () => {};
1378
1468
 
1469
+ // ../../src/auth/agent-mail.ts
1470
+ var exports_agent_mail = {};
1471
+ __export(exports_agent_mail, {
1472
+ waitForVerificationEmail: () => waitForVerificationEmail,
1473
+ getOrCreateSiteInbox: () => getOrCreateSiteInbox,
1474
+ extractVerificationLink: () => extractVerificationLink,
1475
+ extractOtpFromEmail: () => extractOtpFromEmail,
1476
+ autonomousEmailLogin: () => autonomousEmailLogin
1477
+ });
1478
+ async function amFetch(method, path9, body) {
1479
+ const res = await fetch(`${AGENTMAIL_API}${path9}`, {
1480
+ method,
1481
+ headers: {
1482
+ Authorization: `Bearer ${AGENTMAIL_KEY}`,
1483
+ "Content-Type": "application/json"
1484
+ },
1485
+ ...body ? { body: JSON.stringify(body) } : {}
1486
+ });
1487
+ if (!res.ok) {
1488
+ const text = await res.text().catch(() => "");
1489
+ throw new Error(`AgentMail ${method} ${path9}: ${res.status} ${text}`);
1490
+ }
1491
+ return res.json();
1492
+ }
1493
+ async function getOrCreateSiteInbox(domain) {
1494
+ const safeDomain = domain.replace(/[^a-z0-9-]/gi, "-").toLowerCase();
1495
+ const username = `unbrowse-${safeDomain}`;
1496
+ const clientId = `unbrowse-login-${safeDomain}`;
1497
+ try {
1498
+ const inbox = await amFetch("POST", "/inboxes", {
1499
+ username,
1500
+ domain: "agentmail.to",
1501
+ display_name: `Unbrowse Agent - ${domain}`,
1502
+ client_id: clientId
1503
+ });
1504
+ return inbox;
1505
+ } catch {
1506
+ const email = `${username}@agentmail.to`;
1507
+ try {
1508
+ const inbox = await amFetch("GET", `/inboxes/${encodeURIComponent(email)}`);
1509
+ return inbox;
1510
+ } catch {
1511
+ return { inbox_id: "unbrowse-agent@agentmail.to", email: "unbrowse-agent@agentmail.to" };
1512
+ }
1513
+ }
1514
+ }
1515
+ async function waitForVerificationEmail(inboxId, fromDomain, timeoutMs = 60000) {
1516
+ const start2 = Date.now();
1517
+ const pollInterval = 3000;
1518
+ while (Date.now() - start2 < timeoutMs) {
1519
+ const data = await amFetch("GET", `/inboxes/${encodeURIComponent(inboxId)}/messages?limit=5&labels=unread`);
1520
+ for (const msg of data.messages ?? []) {
1521
+ if (msg.from?.toLowerCase().includes(fromDomain.toLowerCase())) {
1522
+ return {
1523
+ subject: msg.subject ?? "",
1524
+ text: msg.extractedText ?? msg.text ?? "",
1525
+ html: msg.extractedHtml ?? msg.html ?? ""
1526
+ };
1527
+ }
1528
+ }
1529
+ await new Promise((r) => setTimeout(r, pollInterval));
1530
+ }
1531
+ return null;
1532
+ }
1533
+ function extractOtpFromEmail(text) {
1534
+ const six = text.match(/\b(\d{6})\b/);
1535
+ if (six)
1536
+ return six[1];
1537
+ const four = text.match(/\b(\d{4})\b/);
1538
+ if (four)
1539
+ return four[1];
1540
+ const codeIs = text.match(/code[:\s]+(\w{4,8})/i);
1541
+ if (codeIs)
1542
+ return codeIs[1];
1543
+ return null;
1544
+ }
1545
+ function extractVerificationLink(text, html) {
1546
+ const htmlLink = html.match(/href="(https?:\/\/[^"]*(?:verify|confirm|activate|magic|token|auth)[^"]*)"/i);
1547
+ if (htmlLink)
1548
+ return htmlLink[1];
1549
+ const textLink = text.match(/(https?:\/\/\S*(?:verify|confirm|activate|magic|token|auth)\S*)/i);
1550
+ if (textLink)
1551
+ return textLink[1];
1552
+ const anyLink = text.match(/(https?:\/\/\S+)/);
1553
+ if (anyLink)
1554
+ return anyLink[1];
1555
+ return null;
1556
+ }
1557
+ async function autonomousEmailLogin(domain) {
1558
+ const inbox = await getOrCreateSiteInbox(domain);
1559
+ return {
1560
+ email: inbox.email,
1561
+ inboxId: inbox.inbox_id,
1562
+ waitForOtp: async () => {
1563
+ const email = await waitForVerificationEmail(inbox.inbox_id, domain, 90000);
1564
+ if (!email)
1565
+ return null;
1566
+ return extractOtpFromEmail(email.text);
1567
+ },
1568
+ waitForLink: async () => {
1569
+ const email = await waitForVerificationEmail(inbox.inbox_id, domain, 90000);
1570
+ if (!email)
1571
+ return null;
1572
+ return extractVerificationLink(email.text, email.html);
1573
+ }
1574
+ };
1575
+ }
1576
+ var AGENTMAIL_API = "https://api.agentmail.to", AGENTMAIL_KEY;
1577
+ var init_agent_mail = __esm(() => {
1578
+ AGENTMAIL_KEY = process.env.AGENTMAIL_API_KEY ?? "";
1579
+ });
1580
+
1581
+ // ../../src/auth/browser-cookies.ts
1582
+ var exports_browser_cookies = {};
1583
+ __export(exports_browser_cookies, {
1584
+ scanAllBrowserSessions: () => scanAllBrowserSessions,
1585
+ resolveChromiumCookiesPath: () => resolveChromiumCookiesPath,
1586
+ findBestBrowserSession: () => findBestBrowserSession,
1587
+ extractFromFirefox: () => extractFromFirefox,
1588
+ extractFromChromium: () => extractFromChromium,
1589
+ extractFromChrome: () => extractFromChrome,
1590
+ extractBrowserCookies: () => extractBrowserCookies,
1591
+ decodeChromiumCookieValue: () => decodeChromiumCookieValue
1592
+ });
1593
+ import { execFileSync as execFileSync4 } from "node:child_process";
1594
+ import { createDecipheriv, pbkdf2Sync } from "node:crypto";
1595
+ import { copyFileSync, existsSync as existsSync15, mkdtempSync, readdirSync as readdirSync4, rmSync } from "node:fs";
1596
+ import { tmpdir, homedir as homedir7, platform } from "node:os";
1597
+ import { join as join12 } from "node:path";
1598
+ function getChromeUserDataDir() {
1599
+ const home = homedir7();
1600
+ if (platform() === "darwin") {
1601
+ return join12(home, "Library", "Application Support", "Google", "Chrome");
1602
+ }
1603
+ if (platform() === "win32") {
1604
+ const appData = process.env.LOCALAPPDATA ?? join12(home, "AppData", "Local");
1605
+ return join12(appData, "Google", "Chrome", "User Data");
1606
+ }
1607
+ return join12(home, ".config", "google-chrome");
1608
+ }
1609
+ function resolveChromiumCookiesPath(opts) {
1610
+ if (opts?.cookieDbPath) {
1611
+ return opts.cookieDbPath.replace(/^~\//, homedir7() + "/");
1612
+ }
1613
+ const profileDir = opts?.profile || "Default";
1614
+ const userDataDir = (opts?.userDataDir || getChromeUserDataDir()).replace(/^~\//, homedir7() + "/");
1615
+ const candidates = [
1616
+ join12(userDataDir, profileDir, "Network", "Cookies"),
1617
+ join12(userDataDir, profileDir, "Cookies"),
1618
+ join12(userDataDir, "Network", "Cookies"),
1619
+ join12(userDataDir, "Cookies")
1620
+ ];
1621
+ return candidates.find((candidate) => existsSync15(candidate)) ?? candidates[0] ?? null;
1622
+ }
1623
+ function getFirefoxProfilesRoot() {
1624
+ const home = homedir7();
1625
+ if (platform() === "darwin") {
1626
+ return join12(home, "Library", "Application Support", "Firefox", "Profiles");
1627
+ }
1628
+ if (platform() === "linux") {
1629
+ return join12(home, ".mozilla", "firefox");
1630
+ }
1631
+ if (platform() === "win32") {
1632
+ const appData = process.env.APPDATA;
1633
+ if (!appData)
1634
+ return null;
1635
+ return join12(appData, "Mozilla", "Firefox", "Profiles");
1636
+ }
1637
+ return null;
1638
+ }
1639
+ function pickFirefoxProfile(profilesRoot, profile) {
1640
+ if (profile) {
1641
+ const candidate2 = join12(profilesRoot, profile, "cookies.sqlite");
1642
+ return existsSync15(candidate2) ? candidate2 : null;
1643
+ }
1644
+ const entries = readdirSync4(profilesRoot, { withFileTypes: true });
1645
+ const defaultRelease = entries.find((e) => e.isDirectory() && e.name.includes("default-release"));
1646
+ const targetDir = defaultRelease?.name ?? entries.find((e) => e.isDirectory())?.name;
1647
+ if (!targetDir)
1648
+ return null;
1649
+ const candidate = join12(profilesRoot, targetDir, "cookies.sqlite");
1650
+ return existsSync15(candidate) ? candidate : null;
1651
+ }
1652
+ function getFirefoxCookiesPath(profile) {
1653
+ const profilesRoot = getFirefoxProfilesRoot();
1654
+ if (!profilesRoot || !existsSync15(profilesRoot))
1655
+ return null;
1656
+ return pickFirefoxProfile(profilesRoot, profile);
1657
+ }
1658
+ function getChromiumKeychainServiceName(opts) {
1659
+ if (opts?.safeStorageService)
1660
+ return opts.safeStorageService;
1661
+ return `${opts?.browserName || "Chrome"} Safe Storage`;
1662
+ }
1663
+ function getChromiumDecryptionKey(opts) {
1664
+ const service = getChromiumKeychainServiceName(opts);
1665
+ const cached = _chromiumKeyCache.get(service);
1666
+ if (cached)
1667
+ return cached;
1668
+ if (platform() !== "darwin")
1669
+ return null;
1670
+ try {
1671
+ const keyOutput = execFileSync4("security", ["find-generic-password", "-s", service, "-w"], { encoding: "utf8", stdio: ["pipe", "pipe", "pipe"] }).trim();
1672
+ if (!keyOutput)
1673
+ return null;
1674
+ const derived = pbkdf2Sync(keyOutput, "saltysalt", 1003, 16, "sha1");
1675
+ _chromiumKeyCache.set(service, derived);
1676
+ return derived;
1677
+ } catch {
1678
+ return null;
1679
+ }
1680
+ }
1681
+ function decryptChromiumValue(encryptedHex, opts) {
1682
+ try {
1683
+ const buf = Buffer.from(encryptedHex, "hex");
1684
+ if (buf.length < 4)
1685
+ return null;
1686
+ const version = buf.subarray(0, 3).toString("utf8");
1687
+ if (version !== "v10" && version !== "v11") {
1688
+ return buf.toString("utf8");
1689
+ }
1690
+ const key = getChromiumDecryptionKey(opts);
1691
+ if (!key)
1692
+ return null;
1693
+ const payload = buf.subarray(3);
1694
+ if (payload.length >= 48) {
1695
+ try {
1696
+ const iv2 = payload.subarray(16, 32);
1697
+ const encrypted = payload.subarray(32);
1698
+ const decipher2 = createDecipheriv("aes-128-cbc", key, iv2);
1699
+ decipher2.setAutoPadding(true);
1700
+ const decrypted2 = Buffer.concat([decipher2.update(encrypted), decipher2.final()]);
1701
+ const val = decrypted2.toString("utf8").replace(/[^\x20-\x7E]/g, "");
1702
+ if (val.length > 0)
1703
+ return val;
1704
+ } catch {}
1705
+ }
1706
+ const iv = Buffer.alloc(16, 32);
1707
+ const decipher = createDecipheriv("aes-128-cbc", key, iv);
1708
+ decipher.setAutoPadding(true);
1709
+ const decrypted = Buffer.concat([decipher.update(payload), decipher.final()]);
1710
+ return decrypted.toString("utf8").replace(/[^\x20-\x7E]/g, "");
1711
+ } catch {
1712
+ return null;
1713
+ }
1714
+ }
1715
+ function decodeChromiumCookieValue(rawValue, encryptedHex, opts) {
1716
+ if (rawValue)
1717
+ return rawValue;
1718
+ if (!encryptedHex)
1719
+ return null;
1720
+ return decryptChromiumValue(encryptedHex, opts);
1721
+ }
1722
+ function withTempCopy(dbPath, fn) {
1723
+ const tempDir = mkdtempSync(join12(tmpdir(), "unbrowse-cookies-"));
1724
+ const tempDb = join12(tempDir, "cookies.db");
1725
+ try {
1726
+ copyFileSync(dbPath, tempDb);
1727
+ for (const ext of ["-wal", "-shm"]) {
1728
+ const src = dbPath + ext;
1729
+ if (existsSync15(src))
1730
+ copyFileSync(src, tempDb + ext);
1731
+ }
1732
+ return fn(tempDb);
1733
+ } finally {
1734
+ try {
1735
+ rmSync(tempDir, { recursive: true, force: true });
1736
+ } catch {}
1737
+ }
1738
+ }
1739
+ function sqliteQuery(dbPath, sql) {
1740
+ return execFileSync4("sqlite3", ["-separator", "|", dbPath, sql], {
1741
+ encoding: "utf8",
1742
+ maxBuffer: 4 * 1024 * 1024
1743
+ }).trim();
1744
+ }
1745
+ function buildDomainWhereClause(domain, column) {
1746
+ const reg = getRegistrableDomain(domain);
1747
+ const variants = new Set([
1748
+ reg,
1749
+ `.${reg}`,
1750
+ domain,
1751
+ `.${domain}`,
1752
+ `www.${reg}`,
1753
+ `.www.${reg}`
1754
+ ]);
1755
+ for (const d of variants) {
1756
+ if (d.includes("'"))
1757
+ throw new Error(`Invalid domain for cookie query: ${d}`);
1758
+ }
1759
+ const escaped = [...variants].map((d) => `'${d}'`);
1760
+ const likeReg = reg.includes("'") ? reg : reg;
1761
+ const likePattern = `'%.${likeReg}'`;
1762
+ return `(${column} IN (${escaped.join(", ")}) OR ${column} LIKE ${likePattern})`;
1763
+ }
1764
+ function extractFromChrome(domain, opts) {
1765
+ return extractFromChromium(domain, {
1766
+ profile: opts?.profile,
1767
+ browserName: "Chrome"
1768
+ });
1769
+ }
1770
+ function extractFromChromium(domain, opts) {
1771
+ const warnings = [];
1772
+ const dbPath = resolveChromiumCookiesPath(opts);
1773
+ const sourceLabel = opts?.browserName || "Chromium";
1774
+ if (!dbPath || !existsSync15(dbPath)) {
1775
+ warnings.push(`${sourceLabel} cookies DB not found${dbPath ? ` at ${dbPath}` : ""}`);
1776
+ return { cookies: [], source: null, warnings };
1777
+ }
1778
+ try {
1779
+ const cookies = withTempCopy(dbPath, (tempDb) => {
1780
+ const where = buildDomainWhereClause(domain, "host_key");
1781
+ const sql = `SELECT name, value, hex(encrypted_value) as ev, host_key, path, is_secure, is_httponly, samesite, expires_utc FROM cookies WHERE ${where};`;
1782
+ const rows = sqliteQuery(tempDb, sql);
1783
+ if (!rows)
1784
+ return [];
1785
+ const results = [];
1786
+ for (const line of rows.split(`
1787
+ `)) {
1788
+ const parts = line.split("|");
1789
+ if (parts.length < 9)
1790
+ continue;
1791
+ const [name, rawValue, encHex, host, cookiePath, secure, httpOnly, sameSite, expiresUtc] = parts;
1792
+ const value = decodeChromiumCookieValue(rawValue, encHex, opts);
1793
+ if (!value)
1794
+ continue;
1795
+ results.push({
1796
+ name,
1797
+ value,
1798
+ domain: host,
1799
+ path: cookiePath || "/",
1800
+ secure: secure === "1",
1801
+ httpOnly: httpOnly === "1",
1802
+ sameSite: sameSite === "0" ? "None" : sameSite === "1" ? "Lax" : "Strict",
1803
+ expires: expiresUtc === "0" ? -1 : Math.floor((Number(expiresUtc) - 11644473600000000) / 1e6)
1804
+ });
1805
+ }
1806
+ return results;
1807
+ });
1808
+ const source = opts?.cookieDbPath ? `${sourceLabel} cookie DB "${dbPath}"` : opts?.userDataDir ? `${sourceLabel} user data "${opts.userDataDir}"${opts.profile ? ` profile "${opts.profile}"` : ""}` : opts?.profile ? `${sourceLabel} profile "${opts.profile}"` : `${sourceLabel} default profile`;
1809
+ if (cookies.length === 0) {
1810
+ warnings.push(`No cookies for ${domain} found in ${source}`);
1811
+ }
1812
+ log("auth", `extracted ${cookies.length} cookies for ${domain} from ${source}`);
1813
+ return { cookies, source: cookies.length > 0 ? source : null, warnings };
1814
+ } catch (err) {
1815
+ warnings.push(`${sourceLabel} extraction failed: ${err instanceof Error ? err.message : err}`);
1816
+ return { cookies: [], source: null, warnings };
1817
+ }
1818
+ }
1819
+ function extractFromFirefox(domain, opts) {
1820
+ const warnings = [];
1821
+ const dbPath = getFirefoxCookiesPath(opts?.profile);
1822
+ if (!dbPath) {
1823
+ warnings.push("Firefox cookies DB not found");
1824
+ return { cookies: [], source: null, warnings };
1825
+ }
1826
+ try {
1827
+ const cookies = withTempCopy(dbPath, (tempDb) => {
1828
+ const where = buildDomainWhereClause(domain, "host");
1829
+ const sql = `SELECT name, value, host, path, isSecure, isHttpOnly, sameSite, expiry FROM moz_cookies WHERE ${where};`;
1830
+ const rows = sqliteQuery(tempDb, sql);
1831
+ if (!rows)
1832
+ return [];
1833
+ const results = [];
1834
+ for (const line of rows.split(`
1835
+ `)) {
1836
+ const parts = line.split("|");
1837
+ if (parts.length < 8)
1838
+ continue;
1839
+ const [name, value, host, cookiePath, secure, httpOnly, sameSite, expiry] = parts;
1840
+ if (!name || !value)
1841
+ continue;
1842
+ results.push({
1843
+ name,
1844
+ value,
1845
+ domain: host,
1846
+ path: cookiePath || "/",
1847
+ secure: secure === "1",
1848
+ httpOnly: httpOnly === "1",
1849
+ sameSite: sameSite === "0" ? "None" : sameSite === "1" ? "Lax" : "Strict",
1850
+ expires: Number(expiry) || -1
1851
+ });
1852
+ }
1853
+ return results;
1854
+ });
1855
+ const source = opts?.profile ? `Firefox profile "${opts.profile}"` : "Firefox default profile";
1856
+ if (cookies.length === 0) {
1857
+ warnings.push(`No cookies for ${domain} found in ${source}`);
1858
+ }
1859
+ log("auth", `extracted ${cookies.length} cookies for ${domain} from ${source}`);
1860
+ return { cookies, source: cookies.length > 0 ? source : null, warnings };
1861
+ } catch (err) {
1862
+ warnings.push(`Firefox extraction failed: ${err instanceof Error ? err.message : err}`);
1863
+ return { cookies: [], source: null, warnings };
1864
+ }
1865
+ }
1866
+ function extractBrowserCookies(domain, opts) {
1867
+ if (opts?.browser === "firefox") {
1868
+ return extractFromFirefox(domain, { profile: opts.firefoxProfile });
1869
+ }
1870
+ if (opts?.browser === "chrome") {
1871
+ return extractFromChrome(domain, { profile: opts.chromeProfile });
1872
+ }
1873
+ if (opts?.browser === "chromium") {
1874
+ return extractFromChromium(domain, opts.chromium);
1875
+ }
1876
+ const ff = extractFromFirefox(domain, { profile: opts?.firefoxProfile });
1877
+ if (ff.cookies.length > 0)
1878
+ return ff;
1879
+ if (opts?.chromium?.cookieDbPath || opts?.chromium?.userDataDir) {
1880
+ const chromium = extractFromChromium(domain, opts.chromium);
1881
+ chromium.warnings.push(...ff.warnings);
1882
+ return chromium;
1883
+ }
1884
+ const chrome = extractFromChrome(domain, { profile: opts?.chromeProfile });
1885
+ chrome.warnings.push(...ff.warnings);
1886
+ return chrome;
1887
+ }
1888
+ function scanAllBrowserSessions(domain) {
1889
+ const results = [];
1890
+ const home = homedir7();
1891
+ for (const browser of CHROMIUM_BROWSERS) {
1892
+ const userDataDir = platform() === "darwin" ? join12(home, "Library", "Application Support", browser.macPath) : platform() === "win32" ? join12(process.env.LOCALAPPDATA ?? join12(home, "AppData", "Local"), browser.macPath, "User Data") : join12(home, ".config", browser.macPath.toLowerCase());
1893
+ if (!existsSync15(userDataDir))
1894
+ continue;
1895
+ try {
1896
+ const result = extractFromChromium(domain, {
1897
+ userDataDir,
1898
+ browserName: browser.name
1899
+ });
1900
+ if (result.cookies.length > 0) {
1901
+ const sessionCookies = result.cookies.filter((c) => c.httpOnly || c.secure).length;
1902
+ results.push({
1903
+ browser: browser.name,
1904
+ cookies: result.cookies,
1905
+ sessionCookies,
1906
+ source: result.source
1907
+ });
1908
+ }
1909
+ } catch {}
1910
+ }
1911
+ try {
1912
+ const ff = extractFromFirefox(domain);
1913
+ if (ff.cookies.length > 0) {
1914
+ const sessionCookies = ff.cookies.filter((c) => c.httpOnly || c.secure).length;
1915
+ results.push({
1916
+ browser: "Firefox",
1917
+ cookies: ff.cookies,
1918
+ sessionCookies,
1919
+ source: ff.source
1920
+ });
1921
+ }
1922
+ } catch {}
1923
+ results.sort((a, b) => b.sessionCookies - a.sessionCookies);
1924
+ return results;
1925
+ }
1926
+ function findBestBrowserSession(domain) {
1927
+ const sessions = scanAllBrowserSessions(domain);
1928
+ return sessions[0] ?? null;
1929
+ }
1930
+ var _chromiumKeyCache, CHROMIUM_BROWSERS;
1931
+ var init_browser_cookies = __esm(() => {
1932
+ init_logger();
1933
+ init_domain();
1934
+ _chromiumKeyCache = new Map;
1935
+ CHROMIUM_BROWSERS = [
1936
+ { name: "Chrome", macPath: "Google/Chrome" },
1937
+ { name: "Arc", macPath: "Arc/User Data" },
1938
+ { name: "Brave", macPath: "BraveSoftware/Brave-Browser" },
1939
+ { name: "Edge", macPath: "Microsoft Edge" },
1940
+ { name: "Vivaldi", macPath: "Vivaldi" },
1941
+ { name: "Opera", macPath: "com.operasoftware.Opera" },
1942
+ { name: "Dia", macPath: "Dia/User Data" },
1943
+ { name: "Chromium", macPath: "Chromium" }
1944
+ ];
1945
+ });
1946
+
1379
1947
  // ../../src/cli.ts
1380
1948
  import { config as loadEnv } from "dotenv";
1381
1949
  import { spawn as spawn3 } from "child_process";
@@ -4106,7 +4674,9 @@ var CLI_REFERENCE = {
4106
4674
  { name: "forward", usage: "[--session id]", desc: "Navigate forward" },
4107
4675
  { name: "sync", usage: "[--session id]", desc: "Checkpoint current capture, keep tab open, queue background index + publish, then inspect via skill/publish review" },
4108
4676
  { name: "close", usage: "[--session id]", desc: "Checkpoint capture, queue background index + publish, close browse session, then inspect via skill/publish review" },
4109
- { name: "stats", usage: "[--json] [--pretty]", desc: "Show lifetime time/tokens/cost saved and marketplace earnings/spending" }
4677
+ { name: "stats", usage: "[--json] [--pretty]", desc: "Show lifetime time/tokens/cost saved and marketplace earnings/spending" },
4678
+ { name: "corpus-test", usage: "--url <url> [--id <id>] [--retries N]", desc: "Capture a single URL with retry logic; keeps best result across N attempts" },
4679
+ { name: "corpus-run", usage: "--corpus <file> --out <file> [--retries N]", desc: "Run corpus-test over all cases in a corpus JSON file and write a comparable snapshot" }
4110
4680
  ],
4111
4681
  globalFlags: [
4112
4682
  { flag: "--pretty", desc: "Indented JSON output" },
@@ -4620,8 +5190,270 @@ async function cmdSync(flags) {
4620
5190
  async function cmdClose(flags) {
4621
5191
  output(await api2("POST", "/v1/browse/close", typeof flags.session === "string" ? { session_id: flags.session } : undefined), false);
4622
5192
  }
5193
+ async function cmdLoginAuto(flags) {
5194
+ const url = flags.url;
5195
+ if (!url)
5196
+ return die("--url is required");
5197
+ const domain = (() => {
5198
+ try {
5199
+ return new URL(url).hostname.replace(/^www\./, "");
5200
+ } catch {
5201
+ return url;
5202
+ }
5203
+ })();
5204
+ info(`[login-auto] creating agent email for ${domain}...`);
5205
+ const { autonomousEmailLogin: autonomousEmailLogin2 } = await Promise.resolve().then(() => (init_agent_mail(), exports_agent_mail));
5206
+ const session = await autonomousEmailLogin2(domain);
5207
+ info(`[login-auto] agent email: ${session.email}`);
5208
+ info(`[login-auto] use this email to register/login on ${domain}`);
5209
+ info(`[login-auto] then run: unbrowse login-auto --url ${url} --wait-otp`);
5210
+ info(`[login-auto] or: unbrowse login-auto --url ${url} --wait-link`);
5211
+ if (flags["wait-otp"]) {
5212
+ info(`[login-auto] waiting for OTP email from ${domain}...`);
5213
+ const otp = await session.waitForOtp();
5214
+ if (otp) {
5215
+ info(`[login-auto] OTP received: ${otp}`);
5216
+ output({ email: session.email, otp, domain });
5217
+ } else {
5218
+ info(`[login-auto] no OTP received within 90 seconds`);
5219
+ output({ email: session.email, otp: null, domain, error: "timeout" });
5220
+ }
5221
+ return;
5222
+ }
5223
+ if (flags["wait-link"]) {
5224
+ info(`[login-auto] waiting for verification link from ${domain}...`);
5225
+ const link = await session.waitForLink();
5226
+ if (link) {
5227
+ info(`[login-auto] verification link: ${link}`);
5228
+ output({ email: session.email, link, domain });
5229
+ } else {
5230
+ info(`[login-auto] no verification email within 90 seconds`);
5231
+ output({ email: session.email, link: null, domain, error: "timeout" });
5232
+ }
5233
+ return;
5234
+ }
5235
+ output({ email: session.email, inbox_id: session.inboxId, domain });
5236
+ }
5237
+ async function cmdSessionsScan(flags) {
5238
+ const domain = flags.domain;
5239
+ const { scanAllBrowserSessions: scanAllBrowserSessions2, findBestBrowserSession: findBestBrowserSession2 } = await Promise.resolve().then(() => (init_browser_cookies(), exports_browser_cookies));
5240
+ const { execFileSync: execFileSync5, existsSync: existsSync16 } = await import("./cli-imports.js").catch(() => ({ execFileSync: __require("child_process").execFileSync, existsSync: __require("fs").existsSync }));
5241
+ if (domain) {
5242
+ const sessions = scanAllBrowserSessions2(domain);
5243
+ if (sessions.length === 0) {
5244
+ info(`No browser has a session for ${domain}`);
5245
+ output({ domain, sessions: [], best: null });
5246
+ return;
5247
+ }
5248
+ const best = sessions[0];
5249
+ info(`Best session for ${domain}: ${best.browser} (${best.sessionCookies} session cookies)`);
5250
+ output({
5251
+ domain,
5252
+ sessions: sessions.map((s) => ({
5253
+ browser: s.browser,
5254
+ cookies: s.cookies.length,
5255
+ session_cookies: s.sessionCookies
5256
+ })),
5257
+ best: { browser: best.browser, session_cookies: best.sessionCookies }
5258
+ });
5259
+ return;
5260
+ }
5261
+ const home = __require("os").homedir();
5262
+ const { join: join13 } = __require("path");
5263
+ const browsers = [
5264
+ { name: "Chrome", path: join13(home, "Library/Application Support/Google/Chrome/Default/Cookies") },
5265
+ { name: "Dia", path: join13(home, "Library/Application Support/Dia/User Data/Default/Cookies") },
5266
+ { name: "Arc", path: join13(home, "Library/Application Support/Arc/User Data/Default/Cookies") },
5267
+ { name: "Brave", path: join13(home, "Library/Application Support/BraveSoftware/Brave-Browser/Default/Cookies") },
5268
+ { name: "Edge", path: join13(home, "Library/Application Support/Microsoft Edge/Default/Cookies") }
5269
+ ];
5270
+ const allSessions = [];
5271
+ for (const b of browsers) {
5272
+ if (!__require("fs").existsSync(b.path))
5273
+ continue;
5274
+ try {
5275
+ const tmp = `/tmp/unbrowse-scan-${b.name}.db`;
5276
+ __require("child_process").execFileSync("cp", [b.path, tmp]);
5277
+ const result = __require("child_process").execFileSync("sqlite3", [
5278
+ tmp,
5279
+ `SELECT host_key, COUNT(*) as c FROM cookies WHERE is_httponly=1 OR is_secure=1 GROUP BY host_key HAVING c >= 2 ORDER BY c DESC LIMIT 50;`
5280
+ ], { encoding: "utf8" });
5281
+ for (const line of result.trim().split(`
5282
+ `)) {
5283
+ if (!line)
5284
+ continue;
5285
+ const [d, count] = line.split("|");
5286
+ if (/google|facebook|doubleclick|rubiconproject|demdex|hcaptcha|protechts/.test(d))
5287
+ continue;
5288
+ allSessions.push({ browser: b.name, domain: d, session_cookies: parseInt(count) || 0 });
5289
+ }
5290
+ __require("child_process").execFileSync("rm", [tmp]);
5291
+ } catch {}
5292
+ }
5293
+ const byDomain = new Map;
5294
+ for (const s of allSessions) {
5295
+ const existing = byDomain.get(s.domain);
5296
+ if (!existing || s.session_cookies > existing.session_cookies) {
5297
+ byDomain.set(s.domain, s);
5298
+ }
5299
+ }
5300
+ const sorted = [...byDomain.values()].sort((a, b) => b.session_cookies - a.session_cookies);
5301
+ info(`Found ${sorted.length} logged-in domains across ${browsers.filter((b) => __require("fs").existsSync(b.path)).length} browsers`);
5302
+ output({ sessions: sorted.slice(0, 30) });
5303
+ }
5304
+ async function captureOnce(url) {
5305
+ try {
5306
+ const goResult = await api2("POST", "/v1/browse/go", { url });
5307
+ if (goResult.error) {
5308
+ return { capture: "error", endpoints: 0, requests: 0, error: String(goResult.error) };
5309
+ }
5310
+ await new Promise((r) => setTimeout(r, 6000));
5311
+ const closeResult = await api2("POST", "/v1/browse/close", {});
5312
+ if (closeResult.error) {
5313
+ return { capture: "error", endpoints: 0, requests: 0, error: String(closeResult.error) };
5314
+ }
5315
+ return {
5316
+ capture: "ok",
5317
+ endpoints: closeResult.endpoint_count ?? 0,
5318
+ requests: closeResult.request_count ?? 0,
5319
+ raw: closeResult
5320
+ };
5321
+ } catch (err) {
5322
+ return { capture: "error", endpoints: 0, requests: 0, error: err.message };
5323
+ }
5324
+ }
5325
+ async function cmdCorpusTest(flags) {
5326
+ const url = flags.url;
5327
+ if (!url)
5328
+ die("--url is required for corpus-test");
5329
+ const id = flags.id || new URL(url).hostname;
5330
+ const retries = flags.retries ? parseInt(flags.retries, 10) : 3;
5331
+ let best = { capture: "error", endpoints: 0, requests: 0 };
5332
+ let attempts = 0;
5333
+ for (let attempt = 0;attempt < retries; attempt++) {
5334
+ attempts++;
5335
+ info(`corpus-test [${id}] attempt ${attempt + 1}/${retries}`);
5336
+ const result = await captureOnce(url);
5337
+ if (result.endpoints > best.endpoints)
5338
+ best = result;
5339
+ if (best.endpoints > 0)
5340
+ break;
5341
+ if (attempt < retries - 1) {
5342
+ try {
5343
+ spawn3("pkill", ["-9", "-f", "kuri|chrome-profile"], { stdio: "ignore" });
5344
+ } catch {}
5345
+ await new Promise((r) => setTimeout(r, 2000));
5346
+ }
5347
+ }
5348
+ output({
5349
+ id,
5350
+ url,
5351
+ capture: best.capture,
5352
+ endpoints: best.endpoints,
5353
+ requests: best.requests,
5354
+ attempts,
5355
+ best_of: retries,
5356
+ ...best.error ? { error: best.error } : {}
5357
+ }, !!flags.pretty);
5358
+ }
5359
+ async function cmdCorpusRun(flags) {
5360
+ const corpusPath = flags.corpus;
5361
+ const outPath = flags.out;
5362
+ if (!corpusPath)
5363
+ die("--corpus is required for corpus-run");
5364
+ if (!outPath)
5365
+ die("--out is required for corpus-run");
5366
+ const retries = flags.retries ? parseInt(flags.retries, 10) : 3;
5367
+ let corpus;
5368
+ try {
5369
+ const raw = __require("fs").readFileSync(corpusPath, "utf-8");
5370
+ corpus = JSON.parse(raw);
5371
+ } catch (err) {
5372
+ die(`Failed to read corpus file: ${err.message}`);
5373
+ }
5374
+ if (!Array.isArray(corpus.cases) || corpus.cases.length === 0) {
5375
+ die("Corpus file must have a non-empty 'cases' array");
5376
+ }
5377
+ let gitSha = "unknown";
5378
+ try {
5379
+ const { execSync: execSync3 } = __require("child_process");
5380
+ gitSha = execSync3("git rev-parse --short HEAD", { encoding: "utf-8" }).trim();
5381
+ } catch {}
5382
+ const startTime = Date.now();
5383
+ const results = [];
5384
+ info(`corpus-run: ${corpus.cases.length} cases, retries=${retries}`);
5385
+ for (const c of corpus.cases) {
5386
+ const caseId = c.id || new URL(c.url).hostname;
5387
+ info(`corpus-run [${caseId}] starting`);
5388
+ try {
5389
+ spawn3("pkill", ["-9", "-f", "kuri|chrome-profile"], { stdio: "ignore" });
5390
+ } catch {}
5391
+ await new Promise((r) => setTimeout(r, 1500));
5392
+ let best = { capture: "error", endpoints: 0, requests: 0 };
5393
+ let attempts = 0;
5394
+ for (let attempt = 0;attempt < retries; attempt++) {
5395
+ attempts++;
5396
+ const result = await captureOnce(c.url);
5397
+ if (result.endpoints > best.endpoints)
5398
+ best = result;
5399
+ if (best.endpoints > 0)
5400
+ break;
5401
+ if (attempt < retries - 1) {
5402
+ try {
5403
+ spawn3("pkill", ["-9", "-f", "kuri|chrome-profile"], { stdio: "ignore" });
5404
+ } catch {}
5405
+ await new Promise((r) => setTimeout(r, 2000));
5406
+ }
5407
+ }
5408
+ let resolveEndpoints = 0;
5409
+ if (best.capture === "ok" && best.endpoints > 0) {
5410
+ try {
5411
+ const resolveResult = await api2("GET", "/v1/resolve", {
5412
+ intent: c.intent ?? `get data from ${caseId}`,
5413
+ url: c.url,
5414
+ domain: new URL(c.url).hostname
5415
+ });
5416
+ const endpoints = resolveResult.endpoints ?? resolveResult.results ?? [];
5417
+ resolveEndpoints = Array.isArray(endpoints) ? endpoints.length : 0;
5418
+ } catch {}
5419
+ }
5420
+ const verdict = best.capture === "error" ? "fail" : best.endpoints > 0 ? "pass" : "fail";
5421
+ results.push({
5422
+ id: caseId,
5423
+ capture: best.capture,
5424
+ endpoints: best.endpoints,
5425
+ requests: best.requests,
5426
+ resolve_endpoints: resolveEndpoints,
5427
+ verdict,
5428
+ attempts,
5429
+ notes: best.error ?? ""
5430
+ });
5431
+ const snapshot3 = {
5432
+ git_sha: gitSha,
5433
+ timestamp: new Date().toISOString(),
5434
+ total_runtime_ms: Date.now() - startTime,
5435
+ results
5436
+ };
5437
+ try {
5438
+ __require("fs").writeFileSync(outPath, JSON.stringify(snapshot3, null, 2));
5439
+ } catch {}
5440
+ info(`corpus-run [${caseId}] done: endpoints=${best.endpoints} verdict=${verdict} attempts=${attempts}`);
5441
+ }
5442
+ const totalRuntime = Date.now() - startTime;
5443
+ const pass = results.filter((r) => r.verdict === "pass").length;
5444
+ const fail = results.filter((r) => r.verdict === "fail").length;
5445
+ const snapshot2 = {
5446
+ git_sha: gitSha,
5447
+ timestamp: new Date().toISOString(),
5448
+ total_runtime_ms: totalRuntime,
5449
+ results
5450
+ };
5451
+ __require("fs").writeFileSync(outPath, JSON.stringify(snapshot2, null, 2));
5452
+ info(`corpus-run complete: ${pass} pass, ${fail} fail of ${results.length} total`);
5453
+ output(snapshot2, !!flags.pretty);
5454
+ }
4623
5455
  async function cmdConnectChrome() {
4624
- const { execSync: execSync2, spawn: spawnProc } = __require("child_process");
5456
+ const { execSync: execSync3, spawn: spawnProc } = __require("child_process");
4625
5457
  try {
4626
5458
  const res = await fetch("http://127.0.0.1:9222/json/version", { signal: AbortSignal.timeout(1000) });
4627
5459
  if (res.ok) {
@@ -4634,16 +5466,16 @@ async function cmdConnectChrome() {
4634
5466
  }
4635
5467
  } catch {}
4636
5468
  try {
4637
- execSync2("pkill -f kuri/chrome-profile", { stdio: "ignore" });
5469
+ execSync3("pkill -f kuri/chrome-profile", { stdio: "ignore" });
4638
5470
  } catch {}
4639
5471
  console.log("Quitting Chrome to relaunch with remote debugging...");
4640
5472
  if (process.platform === "darwin") {
4641
5473
  try {
4642
- execSync2('osascript -e "quit app \\"Google Chrome\\""', { stdio: "ignore", timeout: 5000 });
5474
+ execSync3('osascript -e "quit app \\"Google Chrome\\""', { stdio: "ignore", timeout: 5000 });
4643
5475
  } catch {}
4644
5476
  } else {
4645
5477
  try {
4646
- execSync2("pkill -f chrome", { stdio: "ignore" });
5478
+ execSync3("pkill -f chrome", { stdio: "ignore" });
4647
5479
  } catch {}
4648
5480
  }
4649
5481
  await new Promise((r) => setTimeout(r, 2000));
@@ -4695,6 +5527,10 @@ async function main() {
4695
5527
  return cmdConnectChrome();
4696
5528
  if (command === "stats")
4697
5529
  return cmdStats(flags);
5530
+ if (command === "sessions-scan")
5531
+ return cmdSessionsScan(flags);
5532
+ if (command === "login-auto")
5533
+ return cmdLoginAuto(flags);
4698
5534
  const KNOWN_COMMANDS = new Set([
4699
5535
  "health",
4700
5536
  "mcp",
@@ -4740,7 +5576,12 @@ async function main() {
4740
5576
  "sync",
4741
5577
  "close",
4742
5578
  "connect-chrome",
4743
- "stats"
5579
+ "stats",
5580
+ "corpus-test",
5581
+ "corpus-run",
5582
+ "sessions-scan",
5583
+ "cache-clear",
5584
+ "login-auto"
4744
5585
  ]);
4745
5586
  if (!KNOWN_COMMANDS.has(command)) {
4746
5587
  const pack = findSitePack(command);
@@ -4840,6 +5681,14 @@ async function main() {
4840
5681
  return cmdConnectChrome();
4841
5682
  case "stats":
4842
5683
  return cmdStats(flags);
5684
+ case "corpus-test":
5685
+ return cmdCorpusTest(flags);
5686
+ case "corpus-run":
5687
+ return cmdCorpusRun(flags);
5688
+ case "sessions-scan":
5689
+ return cmdSessionsScan(flags);
5690
+ case "login-auto":
5691
+ return cmdLoginAuto(flags);
4843
5692
  default:
4844
5693
  info(`Unknown command: ${command}`);
4845
5694
  printHelp();