unbrowse 2.0.23 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -122,11 +122,13 @@ Put that in:
122
122
  ```bash
123
123
  unbrowse health
124
124
  unbrowse resolve --intent "get trending searches" --url "https://google.com" --pretty
125
- unbrowse login --url "https://calendar.google.com"
125
+ unbrowse login --url "https://calendar.google.com" --browser chrome
126
126
  unbrowse skills
127
127
  unbrowse search --intent "get stock prices"
128
128
  ```
129
129
 
130
+ `unbrowse login` reuses cookies from a supported local browser profile. On macOS, pass `--browser chrome|arc|dia|brave|edge|vivaldi|chromium|firefox` if your default browser is Safari or another unsupported app.
131
+
130
132
  ## Demo notes
131
133
 
132
134
  - First-time capture/indexing on a site can take 20-80 seconds. That is the slow path; repeats should be much faster.
package/dist/cli.js CHANGED
@@ -707,7 +707,7 @@ function readProcessCommand(pid) {
707
707
  }
708
708
  function isLikelyUnbrowseServerProcess(pid) {
709
709
  const command = readProcessCommand(pid);
710
- return /\bunbrowse\b|runtime-src\/index\.ts|src\/index\.ts|dist\/index\.js/i.test(command);
710
+ return /\bunbrowse\b|runtime-src\/(index|supervisor)\.ts|src\/(index|supervisor)\.ts|dist\/(index|supervisor)\.js/i.test(command);
711
711
  }
712
712
  async function stopManagedServer(pid, pidFile, baseUrl) {
713
713
  try {
@@ -737,15 +737,32 @@ function isStartupLockStale(lockFile) {
737
737
  return true;
738
738
  }
739
739
  }
740
+ function shouldReclaimStartupLock(lockFile, pidFile) {
741
+ if (!isStartupLockStale(lockFile))
742
+ return false;
743
+ const owner = readPidState(pidFile);
744
+ const ownerAlive = owner?.pid ? isPidAlive(owner.pid) : false;
745
+ return !ownerAlive;
746
+ }
740
747
  function deriveListenEnv(baseUrl) {
741
748
  const url = new URL(baseUrl);
742
749
  const host = !url.hostname || url.hostname === "localhost" ? "127.0.0.1" : url.hostname;
743
750
  const port = url.port || (url.protocol === "https:" ? "443" : "80");
744
751
  return { HOST: host, PORT: port, UNBROWSE_URL: baseUrl };
745
752
  }
753
+ function describeListenTarget(baseUrl) {
754
+ const url = new URL(baseUrl);
755
+ const host = !url.hostname || url.hostname === "localhost" ? "127.0.0.1" : url.hostname;
756
+ const port = url.port || (url.protocol === "https:" ? "443" : "80");
757
+ return `${host}:${port}`;
758
+ }
746
759
  async function ensureLocalServer(baseUrl, noAutoStart, metaUrl) {
747
760
  const pidFile = getServerPidFile(baseUrl);
748
761
  const startupLockFile = `${pidFile}.lock`;
762
+ if (shouldReclaimStartupLock(startupLockFile, pidFile)) {
763
+ clearStalePidFile(pidFile);
764
+ clearStaleStartupLockFile(startupLockFile);
765
+ }
749
766
  let existing = readPidState(pidFile);
750
767
  const health = await getServerHealth(baseUrl);
751
768
  if (health.ok) {
@@ -784,6 +801,11 @@ async function ensureLocalServer(baseUrl, noAutoStart, metaUrl) {
784
801
  startupLockFd = openSync(startupLockFile, "wx");
785
802
  } catch (error) {
786
803
  if (error.code === "EEXIST") {
804
+ if (shouldReclaimStartupLock(startupLockFile, pidFile)) {
805
+ clearStalePidFile(pidFile);
806
+ clearStaleStartupLockFile(startupLockFile);
807
+ return ensureLocalServer(baseUrl, noAutoStart, metaUrl);
808
+ }
787
809
  if (await waitForHealthy(baseUrl, 30000))
788
810
  return;
789
811
  const owner = readPidState(pidFile);
@@ -800,7 +822,16 @@ async function ensureLocalServer(baseUrl, noAutoStart, metaUrl) {
800
822
  try {
801
823
  if (await isServerHealthy(baseUrl))
802
824
  return;
803
- const entrypoint = resolveSiblingEntrypoint(metaUrl, "index");
825
+ const discoveredPid = findListeningPid(baseUrl);
826
+ if (discoveredPid) {
827
+ if (isLikelyUnbrowseServerProcess(discoveredPid)) {
828
+ if (await waitForHealthy(baseUrl, 5000))
829
+ return;
830
+ throw new Error(`Port ${describeListenTarget(baseUrl)} already has an unbrowse server (pid ${discoveredPid}), but it did not become healthy.`);
831
+ }
832
+ throw new Error(`Port ${describeListenTarget(baseUrl)} already in use by pid ${discoveredPid}.`);
833
+ }
834
+ const entrypoint = resolveSiblingEntrypoint(metaUrl, "supervisor");
804
835
  const packageRoot = getPackageRoot(metaUrl);
805
836
  const logFile = getServerAutostartLogFile();
806
837
  ensureDir(path3.dirname(logFile));
@@ -1374,6 +1405,17 @@ function detectEntityIndex(data) {
1374
1405
  }
1375
1406
  return best ? buildEntityIndex(best) : null;
1376
1407
  }
1408
+ function unwrapCarrier(data) {
1409
+ if (data == null || typeof data !== "object" || Array.isArray(data))
1410
+ return data;
1411
+ const rec = data;
1412
+ const keys = Object.keys(rec);
1413
+ const isCarrierOnly = keys.every((key) => key === "data" || key === "_extraction");
1414
+ if (isCarrierOnly && "data" in rec && (("_extraction" in rec) || Array.isArray(rec.data) || rec.data != null && typeof rec.data === "object")) {
1415
+ return unwrapCarrier(rec.data);
1416
+ }
1417
+ return data;
1418
+ }
1377
1419
  function resolvePath(obj, path5, entityIndex) {
1378
1420
  if (!path5 || obj == null)
1379
1421
  return obj;
@@ -1483,8 +1525,8 @@ function looksStructuredForDirectOutput(value) {
1483
1525
  return scalarFields >= 2;
1484
1526
  }
1485
1527
  function applyTransforms(result, flags) {
1486
- let data = result;
1487
- const entityIndex = detectEntityIndex(result);
1528
+ let data = unwrapCarrier(result);
1529
+ const entityIndex = detectEntityIndex(data);
1488
1530
  const pathFlag = flags.path;
1489
1531
  if (pathFlag) {
1490
1532
  data = resolvePath(data, pathFlag, entityIndex);
@@ -1611,7 +1653,7 @@ async function cmdResolve(flags) {
1611
1653
  if (flags["force-capture"])
1612
1654
  body.force_capture = true;
1613
1655
  const hasTransforms = !!(flags.path || flags.extract);
1614
- if (flags.raw || hasTransforms)
1656
+ if (flags.raw || flags.schema || hasTransforms)
1615
1657
  body.projection = { raw: true };
1616
1658
  const startedAt = Date.now();
1617
1659
  let result = await withPendingNotice(api2("POST", "/v1/intent/resolve", body), "Still working. First-time capture/indexing for a site can take 20-80s. Waiting is usually better than falling back.");
@@ -1658,7 +1700,7 @@ async function cmdExecute(flags) {
1658
1700
  if (flags["confirm-unsafe"])
1659
1701
  body.confirm_unsafe = true;
1660
1702
  const hasTransforms = !!(flags.path || flags.extract);
1661
- if (flags.raw || hasTransforms)
1703
+ if (flags.raw || flags.schema || hasTransforms)
1662
1704
  body.projection = { raw: true };
1663
1705
  let result = await withPendingNotice(api2("POST", `/v1/skills/${skillId}/execute`, body), "Still working. This endpoint may require browser replay or first-time auth/capture setup.");
1664
1706
  if (flags.schema) {
@@ -1693,7 +1735,12 @@ async function cmdLogin(flags) {
1693
1735
  const url = flags.url;
1694
1736
  if (!url)
1695
1737
  die("--url is required");
1696
- output(await api2("POST", "/v1/auth/login", { url }), !!flags.pretty);
1738
+ const browserLabel = typeof flags.browser === "string" ? flags.browser : "default browser";
1739
+ const result = await withPendingNotice(api2("POST", "/v1/auth/login", {
1740
+ url,
1741
+ ...typeof flags.browser === "string" ? { browser: flags.browser } : {}
1742
+ }), `Opened ${url} in ${browserLabel}. Finish sign-in there; waiting for fresh cookies...`, 1000);
1743
+ output(result, !!flags.pretty);
1697
1744
  }
1698
1745
  async function cmdSkills(flags) {
1699
1746
  output(await api2("GET", "/v1/skills"), !!flags.pretty);
@@ -1728,7 +1775,7 @@ var CLI_REFERENCE = {
1728
1775
  { name: "resolve", usage: '--intent "..." --url "..." [opts]', desc: "Resolve intent \u2192 search/capture/execute" },
1729
1776
  { name: "execute", usage: "--skill ID --endpoint ID [opts]", desc: "Execute a specific endpoint" },
1730
1777
  { name: "feedback", usage: "--skill ID --endpoint ID --rating N", desc: "Submit feedback (mandatory after resolve)" },
1731
- { name: "login", usage: '--url "..."', desc: "Interactive browser login" },
1778
+ { name: "login", usage: '--url "..." [--browser chrome|arc|dia|brave|edge|vivaldi|chromium|firefox]', desc: "Interactive browser login" },
1732
1779
  { name: "skills", usage: "", desc: "List all skills" },
1733
1780
  { name: "skill", usage: "<id>", desc: "Get skill details" },
1734
1781
  { name: "search", usage: '--intent "..." [--domain "..."]', desc: "Search marketplace" },
@@ -1753,6 +1800,7 @@ var CLI_REFERENCE = {
1753
1800
  examples: [
1754
1801
  "unbrowse health",
1755
1802
  'unbrowse resolve --intent "get timeline" --url "https://x.com"',
1803
+ 'unbrowse login --url "https://lu.ma/signin" --browser chrome',
1756
1804
  "unbrowse execute --skill abc --endpoint def --pretty",
1757
1805
  'unbrowse execute --skill abc --endpoint def --extract "user,text,likes" --limit 10',
1758
1806
  'unbrowse execute --skill abc --endpoint def --path "data.included[]" --extract "name:actor.name,text:commentary.text" --limit 20',