unbrowse 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -15,7 +15,7 @@ One agent learns a site once. Every later agent gets the fast path.
15
15
  npx unbrowse setup
16
16
  ```
17
17
 
18
- `npx unbrowse setup` downloads the CLI on demand, installs browser assets, lets you register with an email-shaped display identity, registers the Open Code `/unbrowse` command when Open Code is detected, and starts the local server.
18
+ `npx unbrowse setup` downloads the CLI on demand, verifies the bundled Kuri runtime, lets you register with an email-shaped display identity, registers the Open Code `/unbrowse` command when Open Code is detected, and starts the local server.
19
19
 
20
20
  For daily use:
21
21
 
@@ -30,6 +30,25 @@ If your agent host uses skills:
30
30
  npx skills add unbrowse-ai/unbrowse
31
31
  ```
32
32
 
33
+ ## Upgrading
34
+
35
+ Unbrowse no longer self-updates at runtime. If you already have Unbrowse installed, upgrade to the latest version after each release or the new flow may not work on your machine.
36
+
37
+ If you installed the CLI globally:
38
+
39
+ ```bash
40
+ npm install -g unbrowse@latest
41
+ unbrowse setup
42
+ ```
43
+
44
+ If your agent host uses skills, rerun its skill install/update command too:
45
+
46
+ ```bash
47
+ npx skills add unbrowse-ai/unbrowse
48
+ ```
49
+
50
+ Need help or want release updates? Join the Discord: [discord.gg/VWugEeFNsG](https://discord.gg/VWugEeFNsG)
51
+
33
52
  Every CLI command auto-starts the local server on `http://localhost:6969` by default. Override with `UNBROWSE_URL`, `PORT`, or `HOST`. On first startup it auto-registers as an agent with the marketplace and caches credentials in `~/.unbrowse/config.json`. `unbrowse setup` now prompts for an email-shaped identity first; headless setups can provide `UNBROWSE_AGENT_EMAIL`.
34
53
 
35
54
  Works with Claude Code, Open Code, Cursor, Codex, Windsurf, and any agent host that can call a local CLI or skill.
@@ -37,7 +56,7 @@ Works with Claude Code, Open Code, Cursor, Codex, Windsurf, and any agent host t
37
56
  ## What setup does
38
57
 
39
58
  - Checks local prerequisites for the npm/npx flow.
40
- - Installs browser assets needed for live capture.
59
+ - Verifies the bundled Kuri binary, or builds it from the vendored Kuri source when working from repo source with Zig installed.
41
60
  - Registers the Open Code `/unbrowse` command when Open Code is present.
42
61
  - Starts the local Unbrowse server unless `--no-start` is passed.
43
62
 
package/dist/cli.js CHANGED
@@ -1,23 +1,5 @@
1
1
  #!/usr/bin/env node
2
2
  // @bun
3
- import { createRequire } from "node:module";
4
- var __create = Object.create;
5
- var __getProtoOf = Object.getPrototypeOf;
6
- var __defProp = Object.defineProperty;
7
- var __getOwnPropNames = Object.getOwnPropertyNames;
8
- var __hasOwnProp = Object.prototype.hasOwnProperty;
9
- var __toESM = (mod, isNodeMode, target) => {
10
- target = mod != null ? __create(__getProtoOf(mod)) : {};
11
- const to = isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target;
12
- for (let key of __getOwnPropNames(mod))
13
- if (!__hasOwnProp.call(to, key))
14
- __defProp(to, key, {
15
- get: () => mod[key],
16
- enumerable: true
17
- });
18
- return to;
19
- };
20
- var __require = /* @__PURE__ */ createRequire(import.meta.url);
21
3
 
22
4
  // ../../src/cli.ts
23
5
  import { config as loadEnv } from "dotenv";
@@ -322,7 +304,7 @@ import { spawn } from "node:child_process";
322
304
  import { existsSync as existsSync2, mkdirSync as mkdirSync2, realpathSync } from "node:fs";
323
305
  import os from "node:os";
324
306
  import path from "node:path";
325
- import { createRequire as createRequire2 } from "node:module";
307
+ import { createRequire } from "node:module";
326
308
  import { fileURLToPath } from "node:url";
327
309
  function getModuleDir(metaUrl) {
328
310
  return path.dirname(fileURLToPath(metaUrl));
@@ -344,7 +326,7 @@ function runtimeArgsForEntrypoint(metaUrl, entrypoint) {
344
326
  if (process.versions.bun)
345
327
  return [entrypoint];
346
328
  try {
347
- const req = createRequire2(metaUrl);
329
+ const req = createRequire(metaUrl);
348
330
  const tsxPkg = req.resolve("tsx/package.json");
349
331
  const tsxLoader = path.join(path.dirname(tsxPkg), "dist", "loader.mjs");
350
332
  if (existsSync2(tsxLoader))
@@ -483,16 +465,88 @@ function isMainModule(metaUrl) {
483
465
  }
484
466
 
485
467
  // ../../src/runtime/setup.ts
486
- import { execFileSync } from "node:child_process";
487
- import { createRequire as createRequire3 } from "node:module";
488
- import { existsSync as existsSync4, mkdirSync as mkdirSync4, writeFileSync as writeFileSync3 } from "node:fs";
489
- import os2 from "node:os";
468
+ import { execFileSync as execFileSync2 } from "node:child_process";
469
+ import { existsSync as existsSync5, mkdirSync as mkdirSync4, writeFileSync as writeFileSync3 } from "node:fs";
470
+ import os3 from "node:os";
471
+ import path6 from "node:path";
472
+
473
+ // ../../src/kuri/client.ts
474
+ import { execFileSync, spawn as spawn2 } from "node:child_process";
475
+ import { existsSync as existsSync4 } from "node:fs";
476
+ import path5 from "node:path";
477
+
478
+ // ../../src/logger.ts
490
479
  import path4 from "node:path";
491
- var req = createRequire3(import.meta.url);
480
+ import os2 from "node:os";
481
+ var LOG_DIR = path4.join(os2.homedir(), ".unbrowse", "logs");
482
+
483
+ // ../../src/kuri/client.ts
484
+ function kuriBinaryName() {
485
+ return process.platform === "win32" ? "kuri.exe" : "kuri";
486
+ }
487
+ function currentBundledKuriTarget() {
488
+ if (process.platform === "darwin" && process.arch === "arm64")
489
+ return "darwin-arm64";
490
+ if (process.platform === "darwin" && process.arch === "x64")
491
+ return "darwin-x64";
492
+ if (process.platform === "linux" && process.arch === "arm64")
493
+ return "linux-arm64";
494
+ if (process.platform === "linux" && process.arch === "x64")
495
+ return "linux-x64";
496
+ return null;
497
+ }
498
+ function resolveBinaryOnPath(name) {
499
+ const checker = process.platform === "win32" ? "where" : "which";
500
+ try {
501
+ const output = execFileSync(checker, [name], { encoding: "utf8", stdio: ["ignore", "pipe", "ignore"] });
502
+ const match = output.split(/\r?\n/).map((line) => line.trim()).find(Boolean);
503
+ return match || null;
504
+ } catch {
505
+ return null;
506
+ }
507
+ }
508
+ function addCandidate(candidates, candidate) {
509
+ if (!candidate)
510
+ return;
511
+ if (!candidates.includes(candidate))
512
+ candidates.push(candidate);
513
+ }
514
+ function getKuriSourceCandidates() {
515
+ const packageRoot = getPackageRoot(import.meta.url);
516
+ const candidates = [];
517
+ addCandidate(candidates, path5.join(packageRoot, "vendor", "kuri-src"));
518
+ addCandidate(candidates, path5.join(packageRoot, "submodules", "kuri"));
519
+ if (process.env.KURI_PATH)
520
+ addCandidate(candidates, process.env.KURI_PATH);
521
+ if (process.env.HOME)
522
+ addCandidate(candidates, path5.join(process.env.HOME, "kuri"));
523
+ return candidates;
524
+ }
525
+ function getKuriBinaryCandidates() {
526
+ const packageRoot = getPackageRoot(import.meta.url);
527
+ const binaryName = kuriBinaryName();
528
+ const target = currentBundledKuriTarget();
529
+ const candidates = [];
530
+ if (target)
531
+ addCandidate(candidates, path5.join(packageRoot, "vendor", "kuri", target, binaryName));
532
+ for (const sourceDir of getKuriSourceCandidates()) {
533
+ addCandidate(candidates, path5.join(sourceDir, "zig-out", "bin", binaryName));
534
+ }
535
+ addCandidate(candidates, resolveBinaryOnPath("kuri"));
536
+ return candidates;
537
+ }
538
+ function findKuriBinary() {
539
+ if (process.env.KURI_BIN)
540
+ return process.env.KURI_BIN;
541
+ const candidates = getKuriBinaryCandidates();
542
+ return candidates.find((candidate) => existsSync4(candidate)) ?? candidates[0] ?? kuriBinaryName();
543
+ }
544
+
545
+ // ../../src/runtime/setup.ts
492
546
  function hasBinary(name) {
493
547
  const checker = process.platform === "win32" ? "where" : "which";
494
548
  try {
495
- execFileSync(checker, [name], { stdio: "ignore" });
549
+ execFileSync2(checker, [name], { stdio: "ignore" });
496
550
  return true;
497
551
  } catch {
498
552
  return false;
@@ -508,18 +562,18 @@ function detectPackageManagers() {
508
562
  }
509
563
  function resolveConfigHome() {
510
564
  if (process.platform === "win32") {
511
- return process.env.APPDATA || path4.join(os2.homedir(), "AppData", "Roaming");
565
+ return process.env.APPDATA || path6.join(os3.homedir(), "AppData", "Roaming");
512
566
  }
513
- return process.env.XDG_CONFIG_HOME || path4.join(os2.homedir(), ".config");
567
+ return process.env.XDG_CONFIG_HOME || path6.join(os3.homedir(), ".config");
514
568
  }
515
569
  function getOpenCodeGlobalCommandsDir() {
516
- return path4.join(resolveConfigHome(), "opencode", "commands");
570
+ return path6.join(resolveConfigHome(), "opencode", "commands");
517
571
  }
518
572
  function getOpenCodeProjectCommandsDir(cwd) {
519
- return path4.join(cwd, ".opencode", "commands");
573
+ return path6.join(cwd, ".opencode", "commands");
520
574
  }
521
575
  function detectOpenCode(cwd) {
522
- return hasBinary("opencode") || existsSync4(path4.join(resolveConfigHome(), "opencode")) || existsSync4(path4.join(cwd, ".opencode"));
576
+ return hasBinary("opencode") || existsSync5(path6.join(resolveConfigHome(), "opencode")) || existsSync5(path6.join(cwd, ".opencode"));
523
577
  }
524
578
  function renderOpenCodeCommand() {
525
579
  return `---
@@ -547,12 +601,12 @@ function writeOpenCodeCommand(scope, cwd) {
547
601
  if (scope === "auto" && !detected) {
548
602
  return { detected: false, action: "not-detected", scope: "off" };
549
603
  }
550
- const resolvedScope = scope === "project" ? "project" : scope === "global" ? "global" : existsSync4(path4.join(cwd, ".opencode")) ? "project" : "global";
604
+ const resolvedScope = scope === "project" ? "project" : scope === "global" ? "global" : existsSync5(path6.join(cwd, ".opencode")) ? "project" : "global";
551
605
  const commandsDir = resolvedScope === "project" ? getOpenCodeProjectCommandsDir(cwd) : getOpenCodeGlobalCommandsDir();
552
- const commandFile = path4.join(ensureDir(commandsDir), "unbrowse.md");
606
+ const commandFile = path6.join(ensureDir(commandsDir), "unbrowse.md");
553
607
  const content = renderOpenCodeCommand();
554
- const action = existsSync4(commandFile) ? "updated" : "installed";
555
- mkdirSync4(path4.dirname(commandFile), { recursive: true });
608
+ const action = existsSync5(commandFile) ? "updated" : "installed";
609
+ mkdirSync4(path6.dirname(commandFile), { recursive: true });
556
610
  writeFileSync3(commandFile, content);
557
611
  return {
558
612
  detected: detected || scope !== "auto",
@@ -562,17 +616,44 @@ function writeOpenCodeCommand(scope, cwd) {
562
616
  };
563
617
  }
564
618
  async function ensureBrowserEngineInstalled() {
619
+ const binary = findKuriBinary();
620
+ if (existsSync5(binary)) {
621
+ return { installed: true, action: "already-installed" };
622
+ }
623
+ const sourceDir = getKuriSourceCandidates().find((candidate) => existsSync5(path6.join(candidate, "build.zig")));
624
+ if (!sourceDir) {
625
+ return {
626
+ installed: false,
627
+ action: "failed",
628
+ message: `Kuri binary not found. Checked ${binary}`
629
+ };
630
+ }
631
+ if (!hasBinary("zig")) {
632
+ return {
633
+ installed: false,
634
+ action: "failed",
635
+ message: `Kuri source found at ${sourceDir}, but Zig is not installed`
636
+ };
637
+ }
565
638
  try {
566
- const { chromium } = await import("playwright-core");
567
- if (existsSync4(chromium.executablePath())) {
568
- return { installed: true, action: "already-installed" };
569
- }
570
- const agentBrowserBin = req.resolve("agent-browser/bin/agent-browser.js");
571
- execFileSync(process.execPath, [agentBrowserBin, "install"], {
639
+ execFileSync2("zig", ["build", "-Doptimize=ReleaseFast"], {
640
+ cwd: sourceDir,
572
641
  stdio: "inherit",
573
642
  timeout: 300000
574
643
  });
575
- return { installed: true, action: "installed" };
644
+ const builtBinary = findKuriBinary();
645
+ if (existsSync5(builtBinary)) {
646
+ return {
647
+ installed: true,
648
+ action: "installed",
649
+ message: `Built Kuri from ${sourceDir}`
650
+ };
651
+ }
652
+ return {
653
+ installed: false,
654
+ action: "failed",
655
+ message: `Kuri build completed but ${builtBinary} was not created`
656
+ };
576
657
  } catch (error) {
577
658
  const message = error instanceof Error ? error.message : String(error);
578
659
  return { installed: false, action: "failed", message };
@@ -584,7 +665,7 @@ async function runSetup(options) {
584
665
  return {
585
666
  os: {
586
667
  platform: process.platform,
587
- release: os2.release(),
668
+ release: os3.release(),
588
669
  arch: process.arch
589
670
  },
590
671
  package_managers: detectPackageManagers(),
@@ -621,8 +702,8 @@ function parseArgs(argv) {
621
702
  }
622
703
  return { command, args: positional, flags };
623
704
  }
624
- async function api2(method, path5, body) {
625
- const res = await fetch(`${BASE_URL}${path5}`, {
705
+ async function api2(method, path7, body) {
706
+ const res = await fetch(`${BASE_URL}${path7}`, {
626
707
  method,
627
708
  headers: {
628
709
  ...body ? { "Content-Type": "application/json" } : {},
@@ -708,10 +789,10 @@ function detectEntityIndex(data) {
708
789
  }
709
790
  return best ? buildEntityIndex(best) : null;
710
791
  }
711
- function resolvePath(obj, path5, entityIndex) {
712
- if (!path5 || obj == null)
792
+ function resolvePath(obj, path7, entityIndex) {
793
+ if (!path7 || obj == null)
713
794
  return obj;
714
- const segments = path5.split(".");
795
+ const segments = path7.split(".");
715
796
  let cur = obj;
716
797
  for (let i = 0;i < segments.length; i++) {
717
798
  if (cur == null)
@@ -750,8 +831,8 @@ function extractFields(data, fields, entityIndex) {
750
831
  for (const f of fields) {
751
832
  const colonIdx = f.indexOf(":");
752
833
  const alias = colonIdx >= 0 ? f.slice(0, colonIdx) : f.split(".").pop();
753
- const path5 = colonIdx >= 0 ? f.slice(colonIdx + 1) : f;
754
- const resolved = resolvePath(item, path5, entityIndex ?? undefined) ?? [];
834
+ const path7 = colonIdx >= 0 ? f.slice(colonIdx + 1) : f;
835
+ const resolved = resolvePath(item, path7, entityIndex ?? undefined) ?? [];
755
836
  out[alias] = Array.isArray(resolved) ? resolved.length === 0 ? null : resolved.length === 1 ? resolved[0] : resolved : resolved;
756
837
  }
757
838
  return out;
@@ -1039,11 +1120,11 @@ async function cmdSearch(flags) {
1039
1120
  if (!intent)
1040
1121
  die("--intent is required");
1041
1122
  const domain = flags.domain;
1042
- const path5 = domain ? "/v1/search/domain" : "/v1/search";
1123
+ const path7 = domain ? "/v1/search/domain" : "/v1/search";
1043
1124
  const body = { intent, k: Number(flags.k) || 5 };
1044
1125
  if (domain)
1045
1126
  body.domain = domain;
1046
- output(await api2("POST", path5, body), !!flags.pretty);
1127
+ output(await api2("POST", path7, body), !!flags.pretty);
1047
1128
  }
1048
1129
  async function cmdSessions(flags) {
1049
1130
  const domain = flags.domain;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unbrowse",
3
- "version": "2.0.0",
3
+ "version": "2.0.2",
4
4
  "description": "Reverse-engineer any website into reusable API skills. npm CLI + local engine.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -9,6 +9,7 @@
9
9
  "files": [
10
10
  "dist",
11
11
  "runtime-src",
12
+ "vendor/kuri",
12
13
  "README.md",
13
14
  "LICENSE"
14
15
  ],
@@ -25,8 +26,6 @@
25
26
  "cheerio": "^1.2.0",
26
27
  "dotenv": "^17.3.1",
27
28
  "nanoid": "^5.1.6",
28
- "agent-browser": "^0.13.0",
29
- "playwright-core": "^1.58.2",
30
29
  "tsx": "^4.20.6",
31
30
  "ws": "^8.19.0"
32
31
  },
@@ -137,7 +137,9 @@ export function isBlockedAppShell(html?: string): boolean {
137
137
  /switch to a supported browser/i.test(html) ||
138
138
  /Something went wrong, but don.?t fret/i.test(html) ||
139
139
  /class=["']errorContainer["']/i.test(html) ||
140
- /#placeholder,\s*#react-root\s*\{\s*display:\s*none/i.test(html)
140
+ /#placeholder,\s*#react-root\s*\{\s*display:\s*none/i.test(html) ||
141
+ /Attention Required!\s*\|\s*Cloudflare/i.test(html) ||
142
+ /cf-error-details|cf\.errors\.css/i.test(html)
141
143
  );
142
144
  }
143
145
 
@@ -436,7 +438,15 @@ async function waitForContentReady(
436
438
  responseBodies?: Map<string, string>,
437
439
  ): Promise<void> {
438
440
  // Phase 1: Initial settle — let the page start rendering
439
- await new Promise((r) => setTimeout(r, 2000));
441
+ await new Promise((r) => setTimeout(r, 1000));
442
+
443
+ // Early exit: if interceptor already captured API responses, page is loaded enough
444
+ if (responseBodies && responseBodies.size > 0) {
445
+ log("capture", `early exit: ${responseBodies.size} API responses already captured during navigation`);
446
+ // Brief extra settle to catch any trailing responses
447
+ await new Promise((r) => setTimeout(r, 500));
448
+ return;
449
+ }
440
450
 
441
451
  // Phase 2: Cloudflare challenge detection and wait
442
452
  try {
@@ -453,7 +463,21 @@ async function waitForContentReady(
453
463
  }
454
464
 
455
465
  // Phase 3: Wait for document ready state (replaces networkidle)
456
- await waitForReadyState(tabId, 8000);
466
+ await waitForReadyState(tabId, 5000);
467
+
468
+ // Early exit: check again after readyState — SPAs often fire API calls during hydration
469
+ if (responseBodies) {
470
+ const intercepted = await collectInterceptedRequests(tabId);
471
+ for (const entry of intercepted) {
472
+ if (entry.response_body && !entry.is_js) {
473
+ responseBodies.set(entry.url, entry.response_body);
474
+ }
475
+ }
476
+ if (responseBodies.size > 0) {
477
+ log("capture", `early exit after readyState: ${responseBodies.size} API responses captured`);
478
+ return;
479
+ }
480
+ }
457
481
 
458
482
  // Phase 4: Intent-aware API wait — poll intercepted requests for matching API URLs
459
483
  if (captureUrl && responseBodies) {
@@ -464,8 +488,8 @@ async function waitForContentReady(
464
488
  if (wantedHints.length > 0) {
465
489
  log("capture", `intent-aware wait: looking for API matching one of [${wantedHints.join(", ")}] (from ${captureUrl})`);
466
490
  const intentStart = Date.now();
467
- const INTENT_MAX_WAIT = 15000;
468
- const INTENT_POLL_INTERVAL = 1500;
491
+ const INTENT_MAX_WAIT = 8000;
492
+ const INTENT_POLL_INTERVAL = 1000;
469
493
  while (Date.now() - intentStart < INTENT_MAX_WAIT) {
470
494
  await new Promise((r) => setTimeout(r, INTENT_POLL_INTERVAL));
471
495
  // Check newly intercepted requests
@@ -505,7 +529,7 @@ async function waitForContentReady(
505
529
  await new Promise((r) => setTimeout(r, 1200));
506
530
  await kuri.evaluate(tabId, "window.scrollTo(0, 0)");
507
531
  if (responseBodies.size === before) {
508
- await new Promise((r) => setTimeout(r, 2000));
532
+ await new Promise((r) => setTimeout(r, 1500));
509
533
  }
510
534
  } catch {
511
535
  // non-fatal
@@ -638,14 +662,9 @@ export async function captureSession(
638
662
  try { pageDomain = getRegistrableDomain(new URL(url).hostname); } catch { /* bad url */ }
639
663
 
640
664
  // Inject fetch/XHR interceptor BEFORE navigation to capture all response bodies
641
- // Navigate to origin first so the interceptor runs in the correct context
642
- try {
643
- const origin = new URL(url).origin;
644
- await kuri.navigate(tabId, origin);
645
- await new Promise((r) => setTimeout(r, 500));
646
- } catch { /* best-effort */ }
647
-
648
- await kuri.evaluate(tabId, INTERCEPTOR_SCRIPT);
665
+ // Navigate directly to target URL skip origin pre-navigation to save 1-2s on heavy SPAs.
666
+ // The interceptor is re-injected after navigation anyway (page context resets on navigate).
667
+ await kuri.evaluate(tabId, INTERCEPTOR_SCRIPT).catch(() => {});
649
668
 
650
669
  // Navigate to target URL
651
670
  await kuri.navigate(tabId, url);
@@ -707,10 +726,14 @@ export async function captureSession(
707
726
  log("capture", `response body captured: ${bodyUrl.substring(0, 150)}`);
708
727
  }
709
728
 
729
+
710
730
  let final_url = url;
711
731
  let html: string | undefined;
712
732
  try {
713
- final_url = await kuri.getCurrentUrl(tabId);
733
+ const rawUrl = await kuri.getCurrentUrl(tabId);
734
+ final_url = typeof rawUrl === "string" ? rawUrl : String(rawUrl ?? url);
735
+ // Validate it's actually a URL, fall back to original if not
736
+ try { new URL(final_url); } catch { final_url = url; }
714
737
  html = await kuri.getPageHtml(tabId);
715
738
  } catch {}
716
739
 
@@ -779,6 +802,14 @@ export async function captureSession(
779
802
  responseBodyCount < 10 &&
780
803
  !hasUsefulCapturedResponses(responseBodies.keys(), url, intent)
781
804
  ) {
805
+ // On ephemeral retry, if still blocked by Cloudflare WAF, throw auth_required
806
+ // so the caller can surface a login prompt instead of retrying forever
807
+ if (options?.forceEphemeral && html && /Cloudflare|cf\.errors\.css|cf-error-details/i.test(html)) {
808
+ throw Object.assign(new Error("cloudflare_waf_block"), {
809
+ code: "auth_required",
810
+ login_url: url,
811
+ });
812
+ }
782
813
  retryFreshTab = true;
783
814
  log("capture", `rendered blocked app shell for ${url}; retrying with fresh tab`);
784
815
  } else {
@@ -807,7 +838,7 @@ export async function captureSession(
807
838
  await resetTab(tabId);
808
839
  releaseTabSlot(tabId);
809
840
  }
810
- if (retryFreshTab) {
841
+ if (retryFreshTab && !options?.forceEphemeral) {
811
842
  return captureSession(url, authHeaders, cookies, intent, { forceEphemeral: true });
812
843
  }
813
844
  if (captureError) throw captureError;
@@ -939,7 +939,7 @@ async function executeBrowserCapture(
939
939
  skill.endpoints.find((endpoint) => typeof endpoint.trigger_url === "string" && endpoint.trigger_url)?.trigger_url ||
940
940
  skill.endpoints.find((endpoint) => !/\{[^}]+\}/.test(endpoint.url_template))?.url_template ||
941
941
  "";
942
- const url = String(params.url ?? fallbackUrl);
942
+ const url = typeof params.url === "string" ? params.url : String(params.url ?? fallbackUrl);
943
943
  const intent = String(params.intent ?? skill.intent_signature);
944
944
  if (!url) throw new Error("browser-capture skill requires params.url");
945
945
 
@@ -981,7 +981,33 @@ async function executeBrowserCapture(
981
981
  usedStoredAuth,
982
982
  );
983
983
  if (documentSeed) return documentSeed;
984
- const captured = await captureSession(url, authHeaders, cookies, intent);
984
+ let captured;
985
+ try {
986
+ captured = await captureSession(url, authHeaders, cookies, intent);
987
+ } catch (captureErr: unknown) {
988
+ const err = captureErr as Error & { code?: string; login_url?: string };
989
+ if (err.code === "auth_required") {
990
+ const trace: ExecutionTrace = stampTrace({
991
+ trace_id: traceId,
992
+ skill_id: skill.skill_id,
993
+ endpoint_id: "browser-capture",
994
+ started_at: startedAt,
995
+ completed_at: new Date().toISOString(),
996
+ success: false,
997
+ error: "auth_required",
998
+ });
999
+ return {
1000
+ trace,
1001
+ result: {
1002
+ error: "auth_required",
1003
+ provider: "cloudflare",
1004
+ login_url: err.login_url ?? url,
1005
+ message: `Site is blocked by Cloudflare WAF. Run: unbrowse login --url "${url}" to authenticate interactively.`,
1006
+ },
1007
+ };
1008
+ }
1009
+ throw captureErr;
1010
+ }
985
1011
 
986
1012
  const finalDomain = (() => {
987
1013
  try { return new URL(captured.final_url).hostname; } catch { return targetDomain; }
@@ -990,7 +1016,7 @@ async function executeBrowserCapture(
990
1016
  const LOGIN_PATHS = /\/(login|signin|sign-in|sso|auth|uas\/login|checkpoint|oauth)/i;
991
1017
 
992
1018
  const redirectedToAuth = finalDomain !== targetDomain && AUTH_PROVIDERS.test(finalDomain);
993
- const redirectedToLogin = captured.final_url !== url && LOGIN_PATHS.test(new URL(captured.final_url).pathname);
1019
+ const redirectedToLogin = captured.final_url !== url && (() => { try { return LOGIN_PATHS.test(new URL(String(captured.final_url)).pathname); } catch { return false; } })();
994
1020
 
995
1021
  if (redirectedToAuth || redirectedToLogin) {
996
1022
  const trace: ExecutionTrace = stampTrace({
@@ -1278,18 +1278,38 @@ export function extractFromDOMWithHint(
1278
1278
  * the best match for the given intent.
1279
1279
  */
1280
1280
  export function extractFromDOM(html: string, intent: string): ExtractionResult {
1281
+ // Cap HTML size to prevent cheerio from hanging on massive pages
1282
+ const MAX_HTML_SIZE = 300_000;
1283
+ let workingHtml = html;
1284
+ if (workingHtml.length > MAX_HTML_SIZE) {
1285
+ // Strip attribute bloat first (class/style/data-* attributes inflate HTML 2-3x)
1286
+ workingHtml = workingHtml
1287
+ .replace(/\s+class="[^"]*"/g, "")
1288
+ .replace(/\s+style="[^"]*"/g, "")
1289
+ .replace(/\s+data-[a-z][-a-z]*="[^"]*"/g, "");
1290
+ // If still too large, truncate keeping body content
1291
+ if (workingHtml.length > MAX_HTML_SIZE) {
1292
+ const bodyStart = workingHtml.indexOf("<body");
1293
+ if (bodyStart > 0) {
1294
+ workingHtml = workingHtml.substring(0, Math.max(MAX_HTML_SIZE, bodyStart + MAX_HTML_SIZE));
1295
+ } else {
1296
+ workingHtml = workingHtml.substring(0, MAX_HTML_SIZE);
1297
+ }
1298
+ }
1299
+ }
1300
+
1281
1301
  // Extract SPA-embedded data from raw HTML BEFORE cleanDOM strips scripts
1282
- const spaStructures = extractSPAData(html);
1283
- const flashStructures = extractFlashNoticeSpecial(html, intent);
1284
- const cleaned = cleanDOM(html);
1285
- const githubStructures = extractGitHubSpecial(html, intent);
1286
- const linkedInStructures = extractLinkedInSpecial(html, intent);
1287
- const packageSearchStructures = extractPackageSearchSpecial(html, intent);
1288
- const xProfileStructures = extractXProfileSpecial(html, intent);
1289
- const postStructures = extractPostSpecial(html, intent);
1290
- const trendStructures = extractTrendSpecial(html, intent);
1291
- const definitionStructures = extractDefinitionSpecial(html, intent);
1292
- const courseStructures = extractCourseSearchSpecial(html, intent);
1302
+ const spaStructures = extractSPAData(workingHtml);
1303
+ const flashStructures = extractFlashNoticeSpecial(workingHtml, intent);
1304
+ const cleaned = cleanDOM(workingHtml);
1305
+ const githubStructures = extractGitHubSpecial(workingHtml, intent);
1306
+ const linkedInStructures = extractLinkedInSpecial(workingHtml, intent);
1307
+ const packageSearchStructures = extractPackageSearchSpecial(workingHtml, intent);
1308
+ const xProfileStructures = extractXProfileSpecial(workingHtml, intent);
1309
+ const postStructures = extractPostSpecial(workingHtml, intent);
1310
+ const trendStructures = extractTrendSpecial(workingHtml, intent);
1311
+ const definitionStructures = extractDefinitionSpecial(workingHtml, intent);
1312
+ const courseStructures = extractCourseSearchSpecial(workingHtml, intent);
1293
1313
  const structures = [...flashStructures, ...githubStructures, ...linkedInStructures, ...packageSearchStructures, ...xProfileStructures, ...postStructures, ...trendStructures, ...definitionStructures, ...courseStructures, ...spaStructures, ...parseStructured(cleaned)]
1294
1314
  .map((structure) => normalizeStructureForIntent(structure, intent));
1295
1315
 
@@ -1306,7 +1326,17 @@ export function extractFromDOM(html: string, intent: string): ExtractionResult {
1306
1326
 
1307
1327
  scored.sort((a, b) => b.score - a.score);
1308
1328
 
1309
- const bestPassing = scored.find((candidate) => assessIntentResult(candidate.structure.data, intent).verdict === "pass");
1329
+ const passing = scored.filter((candidate) => assessIntentResult(candidate.structure.data, intent).verdict === "pass");
1330
+ const bestPassing = (() => {
1331
+ if (passing.length === 0) return undefined;
1332
+ const bestPassingOverall = passing[0];
1333
+ const bestPassingSpa = passing.find((candidate) => candidate.structure.type.startsWith("spa-"));
1334
+ // Prefer cleaner SPA payloads when they're effectively tied with DOM-derived candidates.
1335
+ if (bestPassingSpa && bestPassingOverall && bestPassingSpa.score >= bestPassingOverall.score - 2) {
1336
+ return bestPassingSpa;
1337
+ }
1338
+ return bestPassingOverall;
1339
+ })();
1310
1340
  if (bestPassing) {
1311
1341
  return {
1312
1342
  data: bestPassing.structure.data,
@@ -1325,7 +1355,17 @@ export function extractFromDOM(html: string, intent: string): ExtractionResult {
1325
1355
  selector: best.structure.selector,
1326
1356
  };
1327
1357
  }
1328
- const hasClearWinner = scored.length === 1 || best.score > scored[1].score * 1.5;
1358
+
1359
+ if (scored.length === 1) {
1360
+ return {
1361
+ data: best.structure.data,
1362
+ extraction_method: best.structure.type,
1363
+ confidence: computeConfidence(best.structure, best.score),
1364
+ selector: best.structure.selector,
1365
+ };
1366
+ }
1367
+
1368
+ const hasClearWinner = best.score > scored[1].score * 1.5;
1329
1369
 
1330
1370
  if (hasClearWinner && best.score > 0) {
1331
1371
  return {
@@ -8,8 +8,11 @@
8
8
  * All browser ops go through HTTP — no Playwright, no Node CDP bindings.
9
9
  */
10
10
 
11
- import { spawn, type ChildProcess } from "node:child_process";
11
+ import { execFileSync, spawn, type ChildProcess } from "node:child_process";
12
+ import { existsSync } from "node:fs";
13
+ import path from "node:path";
12
14
  import { log } from "../logger.js";
15
+ import { getPackageRoot } from "../runtime/paths.js";
13
16
 
14
17
  const KURI_DEFAULT_PORT = 7700;
15
18
  const KURI_STARTUP_TIMEOUT_MS = 10_000;
@@ -52,6 +55,58 @@ let kuriPort = KURI_DEFAULT_PORT;
52
55
  let kuriCdpPort: number | null = null;
53
56
  let kuriReady = false;
54
57
 
58
+ function kuriBinaryName(): string {
59
+ return process.platform === "win32" ? "kuri.exe" : "kuri";
60
+ }
61
+
62
+ function currentBundledKuriTarget(): string | null {
63
+ if (process.platform === "darwin" && process.arch === "arm64") return "darwin-arm64";
64
+ if (process.platform === "darwin" && process.arch === "x64") return "darwin-x64";
65
+ if (process.platform === "linux" && process.arch === "arm64") return "linux-arm64";
66
+ if (process.platform === "linux" && process.arch === "x64") return "linux-x64";
67
+ return null;
68
+ }
69
+
70
+ function resolveBinaryOnPath(name: string): string | null {
71
+ const checker = process.platform === "win32" ? "where" : "which";
72
+ try {
73
+ const output = execFileSync(checker, [name], { encoding: "utf8", stdio: ["ignore", "pipe", "ignore"] });
74
+ const match = output.split(/\r?\n/).map((line) => line.trim()).find(Boolean);
75
+ return match || null;
76
+ } catch {
77
+ return null;
78
+ }
79
+ }
80
+
81
+ function addCandidate(candidates: string[], candidate?: string | null): void {
82
+ if (!candidate) return;
83
+ if (!candidates.includes(candidate)) candidates.push(candidate);
84
+ }
85
+
86
+ export function getKuriSourceCandidates(): string[] {
87
+ const packageRoot = getPackageRoot(import.meta.url);
88
+ const candidates: string[] = [];
89
+ addCandidate(candidates, path.join(packageRoot, "vendor", "kuri-src"));
90
+ addCandidate(candidates, path.join(packageRoot, "submodules", "kuri"));
91
+ if (process.env.KURI_PATH) addCandidate(candidates, process.env.KURI_PATH);
92
+ if (process.env.HOME) addCandidate(candidates, path.join(process.env.HOME, "kuri"));
93
+ return candidates;
94
+ }
95
+
96
+ export function getKuriBinaryCandidates(): string[] {
97
+ const packageRoot = getPackageRoot(import.meta.url);
98
+ const binaryName = kuriBinaryName();
99
+ const target = currentBundledKuriTarget();
100
+ const candidates: string[] = [];
101
+
102
+ if (target) addCandidate(candidates, path.join(packageRoot, "vendor", "kuri", target, binaryName));
103
+ for (const sourceDir of getKuriSourceCandidates()) {
104
+ addCandidate(candidates, path.join(sourceDir, "zig-out", "bin", binaryName));
105
+ }
106
+ addCandidate(candidates, resolveBinaryOnPath("kuri"));
107
+ return candidates;
108
+ }
109
+
55
110
  /** Try common CDP ports to find where Chrome is listening. */
56
111
  async function discoverCdpPort(): Promise<void> {
57
112
  const portsToTry = [9222, 9223, 9224, 9225];
@@ -128,10 +183,10 @@ async function kuriPost(path: string, params: Record<string, string>, body: unkn
128
183
  }
129
184
 
130
185
  /** Find the kuri binary — check env, then common build locations. */
131
- function findKuriBinary(): string {
186
+ export function findKuriBinary(): string {
132
187
  if (process.env.KURI_BIN) return process.env.KURI_BIN;
133
- const kuriRepoPath = process.env.KURI_PATH ?? `${process.env.HOME}/kuri`;
134
- return `${kuriRepoPath}/zig-out/bin/kuri`;
188
+ const candidates = getKuriBinaryCandidates();
189
+ return candidates.find((candidate) => existsSync(candidate)) ?? candidates[0] ?? kuriBinaryName();
135
190
  }
136
191
 
137
192
  /**
@@ -160,6 +215,9 @@ export async function start(port?: number): Promise<void> {
160
215
 
161
216
  const binary = findKuriBinary();
162
217
  log("kuri", `starting: ${binary} on port ${kuriPort}`);
218
+ if (!existsSync(binary)) {
219
+ throw new Error(`Kuri binary not found at ${binary}`);
220
+ }
163
221
 
164
222
  // Check if Chrome is already running — if so, pass CDP_URL to connect
165
223
  // If not, omit CDP_URL so Kuri launches its own managed Chrome
@@ -281,12 +339,16 @@ export async function getDefaultTab(): Promise<string> {
281
339
  throw new Error("No tabs available and failed to create one");
282
340
  }
283
341
 
342
+ /** Trigger Kuri's /discover to sync Chrome tabs into Kuri's registry. */
284
343
  /** Trigger Kuri's /discover to sync Chrome tabs into Kuri's registry. */
285
344
  async function ensureTabsDiscovered(): Promise<void> {
286
345
  try {
287
- await kuriGet("/discover");
346
+ // Pass CDP URL as query param so /discover works even if Kuri was started without CDP_URL env
347
+ const params: Record<string, string> = {};
348
+ if (kuriCdpPort) params.cdp_url = `ws://127.0.0.1:${kuriCdpPort}`;
349
+ await kuriGet("/discover", params);
288
350
  } catch {
289
- // /discover may fail if CDP_URL not set — that's handled by start()
351
+ // /discover may fail if no Chrome running — that's OK
290
352
  }
291
353
  }
292
354
 
@@ -295,12 +357,35 @@ export async function navigate(tabId: string, url: string): Promise<void> {
295
357
  await kuriGet("/navigate", { tab_id: tabId, url });
296
358
  }
297
359
 
360
+ /** Evaluate JavaScript in tab context. */
361
+ /** Evaluate JavaScript in tab context. */
362
+ /** Evaluate JavaScript in tab context. */
298
363
  /** Evaluate JavaScript in tab context. */
299
364
  export async function evaluate(tabId: string, expression: string): Promise<unknown> {
300
- const raw = (await kuriGet("/evaluate", { tab_id: tabId, expression })) as {
365
+ let raw: {
301
366
  id?: number;
302
367
  result?: { result?: { type?: string; value?: unknown; description?: string }; exceptionDetails?: unknown };
303
368
  };
369
+ if (expression.length > 2000) {
370
+ // Use POST with raw text body for large expressions to avoid URL length limits
371
+ const url = kuriUrl("/evaluate", { tab_id: tabId });
372
+ const controller = new AbortController();
373
+ const timeout = setTimeout(() => controller.abort(), KURI_REQUEST_TIMEOUT_MS);
374
+ try {
375
+ const res = await fetch(url, {
376
+ method: "POST",
377
+ headers: { "Content-Type": "text/plain" },
378
+ body: expression,
379
+ signal: controller.signal,
380
+ });
381
+ const text = await res.text();
382
+ try { raw = JSON.parse(text); } catch { raw = text as never; }
383
+ } finally {
384
+ clearTimeout(timeout);
385
+ }
386
+ } else {
387
+ raw = (await kuriGet("/evaluate", { tab_id: tabId, expression })) as typeof raw;
388
+ }
304
389
  // CDP Runtime.evaluate response: { id, result: { result: { type, value } } }
305
390
  const inner = raw?.result?.result;
306
391
  if (!inner) return raw;
@@ -425,7 +510,10 @@ export async function hasCloudflareChallenge(tabId: string): Promise<boolean> {
425
510
  var html = document.documentElement.innerHTML;
426
511
  return html.indexOf('challenge-platform') !== -1 ||
427
512
  html.indexOf('cf_chl_opt') !== -1 ||
513
+ html.indexOf('cf-error-details') !== -1 ||
514
+ html.indexOf('cf.errors.css') !== -1 ||
428
515
  document.title === 'Just a moment...' ||
516
+ /Attention Required.*Cloudflare/.test(document.title) ||
429
517
  !!document.querySelector('#challenge-running, #challenge-form, .cf-browser-verification');
430
518
  })()`);
431
519
  return result === true;
@@ -1,11 +1,9 @@
1
1
  import { execFileSync } from "node:child_process";
2
- import { createRequire } from "node:module";
3
2
  import { existsSync, mkdirSync, writeFileSync } from "node:fs";
4
3
  import os from "node:os";
5
4
  import path from "node:path";
6
5
  import { ensureDir } from "./paths.js";
7
-
8
- const req = createRequire(import.meta.url);
6
+ import { findKuriBinary, getKuriSourceCandidates } from "../kuri/client.js";
9
7
 
10
8
  export type SetupScope = "auto" | "global" | "project" | "off";
11
9
 
@@ -128,18 +126,47 @@ function writeOpenCodeCommand(scope: SetupScope, cwd: string): SetupReport["open
128
126
  }
129
127
 
130
128
  export async function ensureBrowserEngineInstalled(): Promise<SetupReport["browser_engine"]> {
131
- try {
132
- const { chromium } = await import("playwright-core");
133
- if (existsSync(chromium.executablePath())) {
134
- return { installed: true, action: "already-installed" };
135
- }
129
+ const binary = findKuriBinary();
130
+ if (existsSync(binary)) {
131
+ return { installed: true, action: "already-installed" };
132
+ }
133
+
134
+ const sourceDir = getKuriSourceCandidates().find((candidate) => existsSync(path.join(candidate, "build.zig")));
135
+ if (!sourceDir) {
136
+ return {
137
+ installed: false,
138
+ action: "failed",
139
+ message: `Kuri binary not found. Checked ${binary}`,
140
+ };
141
+ }
142
+
143
+ if (!hasBinary("zig")) {
144
+ return {
145
+ installed: false,
146
+ action: "failed",
147
+ message: `Kuri source found at ${sourceDir}, but Zig is not installed`,
148
+ };
149
+ }
136
150
 
137
- const agentBrowserBin = req.resolve("agent-browser/bin/agent-browser.js");
138
- execFileSync(process.execPath, [agentBrowserBin, "install"], {
151
+ try {
152
+ execFileSync("zig", ["build", "-Doptimize=ReleaseFast"], {
153
+ cwd: sourceDir,
139
154
  stdio: "inherit",
140
155
  timeout: 300_000,
141
156
  });
142
- return { installed: true, action: "installed" };
157
+ const builtBinary = findKuriBinary();
158
+ if (existsSync(builtBinary)) {
159
+ return {
160
+ installed: true,
161
+ action: "installed",
162
+ message: `Built Kuri from ${sourceDir}`,
163
+ };
164
+ }
165
+ return {
166
+ installed: false,
167
+ action: "failed",
168
+ message: `Kuri build completed but ${builtBinary} was not created`,
169
+ };
143
170
  } catch (error) {
144
171
  const message = error instanceof Error ? error.message : String(error);
145
172
  return { installed: false, action: "failed", message };
Binary file
Binary file
Binary file
Binary file