unbrowse 9.5.0-preview.1 → 9.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unbrowse",
3
- "version": "9.5.0-preview.1",
3
+ "version": "9.6.0",
4
4
  "repository": {
5
5
  "type": "git",
6
6
  "url": "git+https://github.com/unbrowse-ai/unbrowse.git"
package/runtime/cli.js CHANGED
@@ -2223,7 +2223,7 @@ var init_telemetry = __esm(() => {
2223
2223
  });
2224
2224
 
2225
2225
  // .tmp-runtime-src/build-info.generated.ts
2226
- var BUILD_RELEASE_VERSION = "9.5.0-preview.1", BUILD_GIT_SHA = "77a00a93a3da", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiOS41LjAtcHJldmlldy4xIiwiZ2l0X3NoYSI6Ijc3YTAwYTkzYTNkYSIsImNvZGVfaGFzaCI6IjVkOWViZjYxOWM2MSIsInRyYWNlX3ZlcnNpb24iOiI1ZDllYmY2MTljNjFANzdhMDBhOTNhM2RhIiwiaXNzdWVkX2F0IjoiMjAyNi0wNi0xN1QxNjoyMjozNC41MjdaIn0", BUILD_RELEASE_MANIFEST_SIGNATURE = "S6SeRd9EuGt1SLhfbZOdCZbjCUsYF8E0D-Yx-cet33g", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai", BUILD_DEFAULT_PROFILE = "";
2226
+ var BUILD_RELEASE_VERSION = "9.6.0", BUILD_GIT_SHA = "5b6b9dc9e8e1", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiOS42LjAiLCJnaXRfc2hhIjoiNWI2YjlkYzllOGUxIiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUA1YjZiOWRjOWU4ZTEiLCJpc3N1ZWRfYXQiOiIyMDI2LTA2LTE3VDIyOjAyOjQ0LjA3MVoifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "MqIyb1B_GA8W5Nv_Jictwt1jVCCTgxxdIjUFJqLoeAU", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai", BUILD_DEFAULT_PROFILE = "";
2227
2227
 
2228
2228
  // .tmp-runtime-src/version.ts
2229
2229
  import { createHash as createHash7 } from "crypto";
@@ -123597,6 +123597,125 @@ var init_fetch_ladder = __esm(() => {
123597
123597
  ];
123598
123598
  });
123599
123599
 
123600
+ // .tmp-runtime-src/execution/tencent-waf-solve.ts
123601
+ function parseCapzyProxy(proxyUrl) {
123602
+ try {
123603
+ const u = new URL(proxyUrl);
123604
+ if (!u.hostname || !u.port)
123605
+ return;
123606
+ const scheme = u.protocol.replace(":", "").toLowerCase();
123607
+ const type = scheme === "socks5" || scheme === "socks4" || scheme === "https" ? scheme : "http";
123608
+ return {
123609
+ type,
123610
+ address: u.hostname,
123611
+ port: Number(u.port),
123612
+ ...u.username ? { login: decodeURIComponent(u.username) } : {},
123613
+ ...u.password ? { password: decodeURIComponent(u.password) } : {}
123614
+ };
123615
+ } catch {
123616
+ return;
123617
+ }
123618
+ }
123619
+
123620
+ // .tmp-runtime-src/execution/capzy-cf-solve.ts
123621
+ function extractCfClearance(solution) {
123622
+ if (!solution || typeof solution !== "object")
123623
+ return null;
123624
+ const s = solution;
123625
+ const ua = typeof s.user_agent === "string" && s.user_agent || typeof s.userAgent === "string" && s.userAgent || undefined;
123626
+ if (typeof s.cf_clearance === "string" && s.cf_clearance) {
123627
+ return { cf_clearance: s.cf_clearance, user_agent: ua || undefined };
123628
+ }
123629
+ if (typeof s.token === "string" && s.token && s.type !== "turnstile") {
123630
+ return { cf_clearance: s.token, user_agent: ua || undefined };
123631
+ }
123632
+ const cookies = s.cookies;
123633
+ if (cookies && typeof cookies === "object" && !Array.isArray(cookies)) {
123634
+ const v = cookies.cf_clearance;
123635
+ if (typeof v === "string" && v)
123636
+ return { cf_clearance: v, user_agent: ua || undefined };
123637
+ }
123638
+ if (Array.isArray(cookies)) {
123639
+ for (const c of cookies) {
123640
+ if (c && typeof c === "object") {
123641
+ const o = c;
123642
+ if (o.name === "cf_clearance" && typeof o.value === "string" && o.value) {
123643
+ return { cf_clearance: o.value, user_agent: ua || undefined };
123644
+ }
123645
+ } else if (typeof c === "string" && c.startsWith("cf_clearance=")) {
123646
+ const v = c.slice("cf_clearance=".length).split(";")[0];
123647
+ if (v)
123648
+ return { cf_clearance: v, user_agent: ua || undefined };
123649
+ }
123650
+ }
123651
+ }
123652
+ return null;
123653
+ }
123654
+ function buildCfTask(websiteURL, proxy) {
123655
+ return {
123656
+ type: "AntiCloudflareTask",
123657
+ websiteURL,
123658
+ proxyType: proxy.type,
123659
+ proxyAddress: proxy.address,
123660
+ proxyPort: proxy.port,
123661
+ ...proxy.login ? { proxyLogin: proxy.login } : {},
123662
+ ...proxy.password ? { proxyPassword: proxy.password } : {}
123663
+ };
123664
+ }
123665
+ async function solveCfViaCapzy(input) {
123666
+ const clientKey = input.clientKey ?? process.env.UNBROWSE_CAPZY_KEY?.trim();
123667
+ if (!clientKey)
123668
+ return null;
123669
+ if (!input.proxy)
123670
+ return null;
123671
+ const apiBase = (input.apiBase ?? process.env.UNBROWSE_CAPZY_URL?.trim() ?? "https://api.capzy.ai").replace(/\/+$/, "");
123672
+ const doFetch = input.fetchImpl ?? fetch;
123673
+ const deadline = Date.now() + (input.timeoutMs ?? 120000);
123674
+ const task = buildCfTask(input.websiteURL, input.proxy);
123675
+ let taskId;
123676
+ try {
123677
+ const created = await doFetch(`${apiBase}/createTask`, {
123678
+ method: "POST",
123679
+ headers: { "Content-Type": "application/json" },
123680
+ body: JSON.stringify({ clientKey, task }),
123681
+ signal: AbortSignal.timeout(Math.max(1, Math.min(20000, deadline - Date.now())))
123682
+ });
123683
+ if (!created.ok)
123684
+ return null;
123685
+ const cj = await created.json().catch(() => null);
123686
+ if (!cj || cj.errorId || !cj.taskId)
123687
+ return null;
123688
+ taskId = cj.taskId;
123689
+ } catch {
123690
+ return null;
123691
+ }
123692
+ while (Date.now() < deadline) {
123693
+ try {
123694
+ const res = await doFetch(`${apiBase}/getTaskResult`, {
123695
+ method: "POST",
123696
+ headers: { "Content-Type": "application/json" },
123697
+ body: JSON.stringify({ clientKey, taskId }),
123698
+ signal: AbortSignal.timeout(Math.max(1, Math.min(15000, deadline - Date.now())))
123699
+ });
123700
+ if (!res.ok)
123701
+ return null;
123702
+ const rj = await res.json().catch(() => null);
123703
+ if (!rj || rj.errorId)
123704
+ return null;
123705
+ if (rj.status === "failed")
123706
+ return null;
123707
+ if (rj.status === "ready") {
123708
+ return extractCfClearance(rj.solution);
123709
+ }
123710
+ } catch {
123711
+ return null;
123712
+ }
123713
+ await new Promise((r) => setTimeout(r, 2500));
123714
+ }
123715
+ return null;
123716
+ }
123717
+ var init_capzy_cf_solve = () => {};
123718
+
123600
123719
  // .tmp-runtime-src/execution/cf-challenge.ts
123601
123720
  var exports_cf_challenge = {};
123602
123721
  __export(exports_cf_challenge, {
@@ -123617,6 +123736,31 @@ function extractCfBundleUrl(body, requestUrl) {
123617
123736
  }
123618
123737
  }
123619
123738
  async function solveCfAndRetry(input) {
123739
+ if (process.env.UNBROWSE_CAPZY_KEY?.trim()) {
123740
+ const viaCapzy = await solveCfViaCapzy({
123741
+ websiteURL: input.url,
123742
+ proxy: input.proxy ? parseCapzyProxy(input.proxy) : undefined,
123743
+ timeoutMs: input.timeoutMs
123744
+ });
123745
+ if (viaCapzy?.cf_clearance) {
123746
+ const merged2 = mergeCookieJar(input.cookies ?? [], [
123747
+ { name: "cf_clearance", value: viaCapzy.cf_clearance }
123748
+ ]);
123749
+ const cookieHeader2 = merged2.map((c) => `${c.name}=${c.value}`).join("; ");
123750
+ try {
123751
+ const retry = await globalThis.fetch(input.url, {
123752
+ method: "GET",
123753
+ headers: {
123754
+ ...cookieHeader2 ? { cookie: cookieHeader2 } : {},
123755
+ ...viaCapzy.user_agent ? { "user-agent": viaCapzy.user_agent } : {}
123756
+ }
123757
+ });
123758
+ if (retry.status >= 200 && retry.status < 400) {
123759
+ return { status: retry.status, html: await retry.text(), cookies: merged2 };
123760
+ }
123761
+ } catch {}
123762
+ }
123763
+ }
123620
123764
  const bundleUrl = extractCfBundleUrl(input.body, input.url);
123621
123765
  if (!bundleUrl)
123622
123766
  return null;
@@ -123698,6 +123842,7 @@ function mergeCookieJar(a, b) {
123698
123842
  var CF_BUNDLE_RE;
123699
123843
  var init_cf_challenge = __esm(() => {
123700
123844
  init_bundle_replay_client();
123845
+ init_capzy_cf_solve();
123701
123846
  CF_BUNDLE_RE = /src=["'](\/cdn-cgi\/challenge-platform\/h\/[gb]\/scripts\/jsd\/[a-f0-9]+\/main\.js)/i;
123702
123847
  });
123703
123848
 
@@ -131983,6 +132128,35 @@ var init_mount = __esm(() => {
131983
132128
  import { existsSync as existsSync33, writeFileSync as writeFileSync20, readFileSync as readFileSync28, mkdirSync as mkdirSync22, readdirSync as readdirSync9 } from "node:fs";
131984
132129
  import { dirname as dirname7, join as join36 } from "node:path";
131985
132130
  import { createHash as createHash19 } from "node:crypto";
132131
+ function registrableHost(u) {
132132
+ if (!u)
132133
+ return null;
132134
+ try {
132135
+ return new URL(u).hostname.replace(/^www\./, "").split(".").slice(-2).join(".");
132136
+ } catch {
132137
+ return null;
132138
+ }
132139
+ }
132140
+ function shouldAutoWalk(requestedUrl, topUrl, topScore, minScore = 0.8) {
132141
+ if (!topUrl)
132142
+ return false;
132143
+ const reqReg = registrableHost(requestedUrl);
132144
+ const topReg = registrableHost(topUrl);
132145
+ return !!reqReg && reqReg === topReg || (topScore ?? 0) >= minScore;
132146
+ }
132147
+ function pickWalkTarget(requestedUrl, ranked, minScore = 0.8) {
132148
+ const eligible = ranked.filter((c) => c?.url && shouldAutoWalk(requestedUrl, c.url, c.score, minScore));
132149
+ if (eligible.length === 0)
132150
+ return null;
132151
+ const hasPath = (u) => {
132152
+ try {
132153
+ return new URL(u).pathname.replace(/\/+$/, "").length > 0;
132154
+ } catch {
132155
+ return false;
132156
+ }
132157
+ };
132158
+ return eligible.find((c) => hasPath(c.url)) ?? eligible[0];
132159
+ }
131986
132160
  function artifactResultWithShortlist(artifact, skillId, triggerUrl) {
131987
132161
  const ep = artifact.endpoint;
131988
132162
  const res = artifact.result;
@@ -132522,6 +132696,19 @@ function promoteResultSnapshot(cacheKey2, skill, endpointId, result, trace) {
132522
132696
  expires: Date.now() + ROUTE_CACHE_TTL
132523
132697
  });
132524
132698
  }
132699
+ function persistWalkedRoute(scope, cacheKey2, walked) {
132700
+ try {
132701
+ const skill = walked.skill;
132702
+ if (!skill)
132703
+ return;
132704
+ const endpointId = walked.trace?.endpoint_id;
132705
+ const isReplayableRoute = (walked.source === "marketplace" || walked.source === "route-cache" || walked.source === "live-capture" || walked.source === "direct-fetch") && !!skill.endpoints?.length;
132706
+ if (isReplayableRoute) {
132707
+ promoteLearnedSkill(scope, cacheKey2, skill, endpointId, undefined);
132708
+ }
132709
+ promoteResultSnapshot(cacheKey2, skill, endpointId, walked.result, walked.trace);
132710
+ } catch {}
132711
+ }
132525
132712
  function buildCachedResultResponse(cached4, source, timing) {
132526
132713
  const now = new Date().toISOString();
132527
132714
  return {
@@ -133667,6 +133854,7 @@ function resolveEndpointTemplateBindings(endpoint, params = {}, contextUrl) {
133667
133854
  }
133668
133855
  async function resolveAndExecute(intent, params = {}, context, projection, options) {
133669
133856
  const t0 = Date.now();
133857
+ const walkDepth = options?.__walkDepth ?? 0;
133670
133858
  const timing = {
133671
133859
  search_ms: 0,
133672
133860
  get_skill_ms: 0,
@@ -135216,7 +135404,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
135216
135404
  }
135217
135405
  } catch {}
135218
135406
  }
135219
- if (exaHits.length > 0) {
135407
+ if (walkDepth === 0 && exaHits.length > 0) {
135220
135408
  const intentTokens = (queryIntent || "").toLowerCase().match(/[a-z0-9]{3,}/g) ?? [];
135221
135409
  const stop2 = new Set(["get", "the", "for", "from", "with", "and", "any", "all", "new", "top", "top1", "top10"]);
135222
135410
  const intentTokenSet = new Set(intentTokens.filter((t) => !stop2.has(t)));
@@ -135244,7 +135432,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
135244
135432
  console.log(`[exa] raw-candidate mode (UNBROWSE_EXA_RAW=1): keeping ${exaHits.length} low-score hits for the agent to judge`);
135245
135433
  }
135246
135434
  }
135247
- if (exaHits.length > 0) {
135435
+ if (walkDepth === 0 && exaHits.length > 0) {
135248
135436
  const richHit = pickAnswerHit(exaHits, raceProbeDomain);
135249
135437
  const candidates = exaHits.map((hit) => ({
135250
135438
  url: hit.url,
@@ -135277,6 +135465,40 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
135277
135465
  skill_id: "exa-web-search",
135278
135466
  domain: exaSkillDomain
135279
135467
  };
135468
+ const topWalk = pickWalkTarget(raceContextUrl, exaHits);
135469
+ if (topWalk?.url && walkDepth < 1) {
135470
+ try {
135471
+ const walked = await resolveAndExecute(intent, params, { url: topWalk.url, domain: registrableHost(topWalk.url) ?? undefined }, projection, { ...options ?? {}, __walkDepth: walkDepth + 1 });
135472
+ if (walked && walked.source !== "exa" && !walked.result?.exec_unsupported) {
135473
+ console.log(`[exa→walk] resolved candidate ${topWalk.url} via ${walked.source} (pointer pipe d${walkDepth + 1})`);
135474
+ {
135475
+ const wDom = context?.domain ?? (() => {
135476
+ try {
135477
+ return new URL(raceContextUrl).hostname;
135478
+ } catch {
135479
+ return null;
135480
+ }
135481
+ })();
135482
+ persistWalkedRoute(clientScope, scopedCacheKey(clientScope, buildResolveCacheKey(wDom ?? null, intent, raceContextUrl)), walked);
135483
+ }
135484
+ return {
135485
+ ...walked,
135486
+ result: {
135487
+ ...walked.result,
135488
+ walked_from: "exa-web-search",
135489
+ exa_candidates: candidates,
135490
+ run_plan: [
135491
+ { step: "resolve", mode: "web-search", status: "hit", label: "fallback", error: null, source: webProvider ?? "exa" },
135492
+ { step: "walk", mode: walked.source, status: "hit", label: "top-candidate", error: null, source: walked.source }
135493
+ ]
135494
+ }
135495
+ };
135496
+ }
135497
+ console.log(`[exa→walk] candidate ${topWalk.url} produced no ladder rung — candidate leaf`);
135498
+ } catch (e) {
135499
+ console.log(`[exa→walk] walk error (${e.message}) — candidate leaf`);
135500
+ }
135501
+ }
135280
135502
  return {
135281
135503
  result: {
135282
135504
  ...richHit ? {
@@ -136053,7 +136275,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
136053
136275
  console.log(`[ddg] $0 fallback failed: ${err.message}`);
136054
136276
  }
136055
136277
  }
136056
- if (viable.length === 0 && exaResults?.length) {
136278
+ if (walkDepth === 0 && viable.length === 0 && exaResults?.length) {
136057
136279
  const richHit = pickAnswerHit(exaResults, requestedDomain);
136058
136280
  if (richHit) {
136059
136281
  console.log(`[exa] returning highlights answer from ${richHit.url} (${(richHit.highlights ?? []).join(" ").length} chars) + ${exaResults.length} ranked candidate(s)`);
@@ -136076,6 +136298,31 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
136076
136298
  fetch: `unbrowse fetch --url ${JSON.stringify(hit.url)}`
136077
136299
  }
136078
136300
  }));
136301
+ const topWalk = pickWalkTarget(context?.url, exaResults);
136302
+ if (topWalk?.url && walkDepth < 1) {
136303
+ try {
136304
+ const walked = await resolveAndExecute(intent, params, { url: topWalk.url, domain: registrableHost(topWalk.url) ?? undefined }, projection, { ...options ?? {}, __walkDepth: walkDepth + 1 });
136305
+ if (walked && walked.source !== "exa" && !walked.result?.exec_unsupported) {
136306
+ console.log(`[exa→walk] resolved candidate ${topWalk.url} via ${walked.source} (pointer pipe d${walkDepth + 1})`);
136307
+ persistWalkedRoute(clientScope, cacheKey2, walked);
136308
+ return {
136309
+ ...walked,
136310
+ result: {
136311
+ ...walked.result,
136312
+ walked_from: "exa-web-search",
136313
+ exa_candidates: candidates2,
136314
+ run_plan: [
136315
+ { step: "resolve", mode: "web-search", status: "hit", label: "fallback", error: null, source: serialWebProvider ?? "exa" },
136316
+ { step: "walk", mode: walked.source, status: "hit", label: "top-candidate", error: null, source: walked.source }
136317
+ ]
136318
+ }
136319
+ };
136320
+ }
136321
+ console.log(`[exa→walk] candidate ${topWalk.url} produced no ladder rung — exa leaf`);
136322
+ } catch (e) {
136323
+ console.log(`[exa→walk] walk error (${e.message}) — exa leaf`);
136324
+ }
136325
+ }
136079
136326
  return {
136080
136327
  result: {
136081
136328
  data: richHit.highlights,
@@ -154011,7 +154258,7 @@ var exports_tencent_waf_solve = {};
154011
154258
  __export(exports_tencent_waf_solve, {
154012
154259
  submitWafClearance: () => submitWafClearance,
154013
154260
  solveTencentViaCapzy: () => solveTencentViaCapzy,
154014
- parseCapzyProxy: () => parseCapzyProxy,
154261
+ parseCapzyProxy: () => parseCapzyProxy2,
154015
154262
  mergeCookieHeader: () => mergeCookieHeader,
154016
154263
  extractTencentChallenge: () => extractTencentChallenge,
154017
154264
  clearTencentWafViaCapzy: () => clearTencentWafViaCapzy
@@ -154112,7 +154359,7 @@ async function submitWafClearance(input) {
154112
154359
  return { ok: false, status: 0, setCookies: [] };
154113
154360
  }
154114
154361
  }
154115
- function parseCapzyProxy(proxyUrl) {
154362
+ function parseCapzyProxy2(proxyUrl) {
154116
154363
  try {
154117
154364
  const u = new URL(proxyUrl);
154118
154365
  if (!u.hostname || !u.port)
@@ -154153,7 +154400,7 @@ async function clearTencentWafViaCapzy(input) {
154153
154400
  websiteURL: input.url,
154154
154401
  appId: challenge.appId,
154155
154402
  clientKey: input.capzyKey,
154156
- proxy: input.proxyUrl ? parseCapzyProxy(input.proxyUrl) : undefined,
154403
+ proxy: input.proxyUrl ? parseCapzyProxy2(input.proxyUrl) : undefined,
154157
154404
  timeoutMs: input.timeoutMs,
154158
154405
  fetchImpl: input.fetchImpl
154159
154406
  });
package/runtime/mcp.js CHANGED
@@ -36310,7 +36310,7 @@ var init_cached_resolution = __esm(() => {
36310
36310
  });
36311
36311
 
36312
36312
  // .tmp-runtime-src/build-info.generated.ts
36313
- var BUILD_RELEASE_VERSION = "9.5.0-preview.1", BUILD_GIT_SHA = "77a00a93a3da", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiOS41LjAtcHJldmlldy4xIiwiZ2l0X3NoYSI6Ijc3YTAwYTkzYTNkYSIsImNvZGVfaGFzaCI6IjVkOWViZjYxOWM2MSIsInRyYWNlX3ZlcnNpb24iOiI1ZDllYmY2MTljNjFANzdhMDBhOTNhM2RhIiwiaXNzdWVkX2F0IjoiMjAyNi0wNi0xN1QxNjoyMjozNC41MjdaIn0", BUILD_RELEASE_MANIFEST_SIGNATURE = "S6SeRd9EuGt1SLhfbZOdCZbjCUsYF8E0D-Yx-cet33g", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai", BUILD_DEFAULT_PROFILE = "";
36313
+ var BUILD_RELEASE_VERSION = "9.6.0", BUILD_GIT_SHA = "5b6b9dc9e8e1", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiOS42LjAiLCJnaXRfc2hhIjoiNWI2YjlkYzllOGUxIiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUA1YjZiOWRjOWU4ZTEiLCJpc3N1ZWRfYXQiOiIyMDI2LTA2LTE3VDIyOjAyOjQ0LjA3MVoifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "MqIyb1B_GA8W5Nv_Jictwt1jVCCTgxxdIjUFJqLoeAU", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai", BUILD_DEFAULT_PROFILE = "";
36314
36314
 
36315
36315
  // .tmp-runtime-src/version.ts
36316
36316
  import { createHash as createHash4 } from "crypto";
@@ -121818,6 +121818,292 @@ var init_fetch_ladder = __esm(() => {
121818
121818
  ];
121819
121819
  });
121820
121820
 
121821
+ // .tmp-runtime-src/execution/tencent-waf-solve.ts
121822
+ var exports_tencent_waf_solve = {};
121823
+ __export(exports_tencent_waf_solve, {
121824
+ submitWafClearance: () => submitWafClearance,
121825
+ solveTencentViaCapzy: () => solveTencentViaCapzy,
121826
+ parseCapzyProxy: () => parseCapzyProxy,
121827
+ mergeCookieHeader: () => mergeCookieHeader,
121828
+ extractTencentChallenge: () => extractTencentChallenge,
121829
+ clearTencentWafViaCapzy: () => clearTencentWafViaCapzy
121830
+ });
121831
+ function extractTencentChallenge(html3) {
121832
+ if (!html3 || typeof html3 !== "string")
121833
+ return null;
121834
+ const appMatch = html3.match(/new\s+Captcha\(\s*["']([0-9]{5,})["']/);
121835
+ const seqMatch = html3.match(/var\s+seqid\s*=\s*["']([^"']+?)["']/);
121836
+ if (!appMatch || !seqMatch)
121837
+ return null;
121838
+ return { appId: appMatch[1], seqid: seqMatch[1] };
121839
+ }
121840
+ async function solveTencentViaCapzy(input) {
121841
+ const clientKey = input.clientKey ?? process.env.UNBROWSE_CAPZY_KEY?.trim();
121842
+ if (!clientKey)
121843
+ return null;
121844
+ const apiBase = (input.apiBase ?? process.env.UNBROWSE_CAPZY_URL?.trim() ?? "https://api.capzy.ai").replace(/\/+$/, "");
121845
+ const doFetch = input.fetchImpl ?? fetch;
121846
+ const deadline = Date.now() + (input.timeoutMs ?? 90000);
121847
+ const task = input.proxy ? {
121848
+ type: "TencentTask",
121849
+ websiteURL: input.websiteURL,
121850
+ websiteKey: input.appId,
121851
+ proxyType: input.proxy.type,
121852
+ proxyAddress: input.proxy.address,
121853
+ proxyPort: input.proxy.port,
121854
+ ...input.proxy.login ? { proxyLogin: input.proxy.login } : {},
121855
+ ...input.proxy.password ? { proxyPassword: input.proxy.password } : {}
121856
+ } : {
121857
+ type: "TencentTaskProxyLess",
121858
+ websiteURL: input.websiteURL,
121859
+ websiteKey: input.appId
121860
+ };
121861
+ let taskId;
121862
+ try {
121863
+ const created = await doFetch(`${apiBase}/createTask`, {
121864
+ method: "POST",
121865
+ headers: { "Content-Type": "application/json" },
121866
+ body: JSON.stringify({ clientKey, task }),
121867
+ signal: AbortSignal.timeout(Math.max(1, Math.min(20000, deadline - Date.now())))
121868
+ });
121869
+ if (!created.ok)
121870
+ return null;
121871
+ const cj = await created.json().catch(() => null);
121872
+ if (!cj || cj.errorId || !cj.taskId)
121873
+ return null;
121874
+ taskId = cj.taskId;
121875
+ } catch {
121876
+ return null;
121877
+ }
121878
+ while (Date.now() < deadline) {
121879
+ try {
121880
+ const res = await doFetch(`${apiBase}/getTaskResult`, {
121881
+ method: "POST",
121882
+ headers: { "Content-Type": "application/json" },
121883
+ body: JSON.stringify({ clientKey, taskId }),
121884
+ signal: AbortSignal.timeout(Math.max(1, Math.min(15000, deadline - Date.now())))
121885
+ });
121886
+ if (!res.ok)
121887
+ return null;
121888
+ const rj = await res.json().catch(() => null);
121889
+ if (!rj || rj.errorId)
121890
+ return null;
121891
+ if (rj.status === "ready") {
121892
+ const s = rj.solution ?? {};
121893
+ if (s.ticket && s.randstr) {
121894
+ return { ticket: s.ticket, randstr: s.randstr, appid: s.appid ?? input.appId };
121895
+ }
121896
+ return null;
121897
+ }
121898
+ } catch {
121899
+ return null;
121900
+ }
121901
+ await new Promise((r) => setTimeout(r, 2000));
121902
+ }
121903
+ return null;
121904
+ }
121905
+ async function submitWafClearance(input) {
121906
+ const doFetch = input.fetchImpl ?? fetch;
121907
+ const ret = input.ret ?? 0;
121908
+ const body = [String(ret), input.ticket, input.randstr, input.seqid].join(`
121909
+ `);
121910
+ const headers = { "Content-Type": "text/plain;charset=UTF-8" };
121911
+ if (input.cookieHeader)
121912
+ headers["Cookie"] = input.cookieHeader;
121913
+ try {
121914
+ const res = await doFetch(input.wafUrl, {
121915
+ method: "POST",
121916
+ headers,
121917
+ body,
121918
+ signal: AbortSignal.timeout(input.timeoutMs ?? 20000),
121919
+ ...input.proxyUrl ? { proxy: input.proxyUrl } : {}
121920
+ });
121921
+ const setCookies2 = typeof res.headers.getSetCookie === "function" ? res.headers.getSetCookie() : res.headers.get("set-cookie") ? [res.headers.get("set-cookie")] : [];
121922
+ return { ok: res.ok, status: res.status, setCookies: setCookies2 };
121923
+ } catch {
121924
+ return { ok: false, status: 0, setCookies: [] };
121925
+ }
121926
+ }
121927
+ function parseCapzyProxy(proxyUrl) {
121928
+ try {
121929
+ const u = new URL(proxyUrl);
121930
+ if (!u.hostname || !u.port)
121931
+ return;
121932
+ const scheme = u.protocol.replace(":", "").toLowerCase();
121933
+ const type = scheme === "socks5" || scheme === "socks4" || scheme === "https" ? scheme : "http";
121934
+ return {
121935
+ type,
121936
+ address: u.hostname,
121937
+ port: Number(u.port),
121938
+ ...u.username ? { login: decodeURIComponent(u.username) } : {},
121939
+ ...u.password ? { password: decodeURIComponent(u.password) } : {}
121940
+ };
121941
+ } catch {
121942
+ return;
121943
+ }
121944
+ }
121945
+ function mergeCookieHeader(prior, setCookies2) {
121946
+ const jar = new Map;
121947
+ for (const pair of (prior ?? "").split(/;\s*/)) {
121948
+ const eq2 = pair.indexOf("=");
121949
+ if (eq2 > 0)
121950
+ jar.set(pair.slice(0, eq2).trim(), pair.slice(eq2 + 1).trim());
121951
+ }
121952
+ for (const sc of setCookies2) {
121953
+ const first2 = sc.split(";")[0] ?? "";
121954
+ const eq2 = first2.indexOf("=");
121955
+ if (eq2 > 0)
121956
+ jar.set(first2.slice(0, eq2).trim(), first2.slice(eq2 + 1).trim());
121957
+ }
121958
+ return Array.from(jar, ([k, v]) => `${k}=${v}`).join("; ");
121959
+ }
121960
+ async function clearTencentWafViaCapzy(input) {
121961
+ const challenge = extractTencentChallenge(input.html);
121962
+ if (!challenge)
121963
+ return null;
121964
+ const solved = await solveTencentViaCapzy({
121965
+ websiteURL: input.url,
121966
+ appId: challenge.appId,
121967
+ clientKey: input.capzyKey,
121968
+ proxy: input.proxyUrl ? parseCapzyProxy(input.proxyUrl) : undefined,
121969
+ timeoutMs: input.timeoutMs,
121970
+ fetchImpl: input.fetchImpl
121971
+ });
121972
+ if (!solved)
121973
+ return null;
121974
+ let origin;
121975
+ try {
121976
+ origin = new URL(input.url).origin;
121977
+ } catch {
121978
+ return null;
121979
+ }
121980
+ const clearance = await submitWafClearance({
121981
+ wafUrl: `${origin}/WafCaptcha`,
121982
+ ticket: solved.ticket,
121983
+ randstr: solved.randstr,
121984
+ seqid: challenge.seqid,
121985
+ cookieHeader: input.cookieHeader,
121986
+ proxyUrl: input.proxyUrl,
121987
+ fetchImpl: input.fetchImpl
121988
+ });
121989
+ if (!clearance.ok)
121990
+ return null;
121991
+ const cookieHeader = mergeCookieHeader(input.cookieHeader, clearance.setCookies);
121992
+ const doFetch = input.fetchImpl ?? fetch;
121993
+ try {
121994
+ const res = await doFetch(input.url, {
121995
+ headers: { Cookie: cookieHeader, Accept: "*/*" },
121996
+ signal: AbortSignal.timeout(input.timeoutMs ?? 30000),
121997
+ ...input.proxyUrl ? { proxy: input.proxyUrl } : {}
121998
+ });
121999
+ const html3 = await res.text();
122000
+ if (!html3 || extractTencentChallenge(html3))
122001
+ return null;
122002
+ return { html: html3, cookieHeader };
122003
+ } catch {
122004
+ return null;
122005
+ }
122006
+ }
122007
+
122008
+ // .tmp-runtime-src/execution/capzy-cf-solve.ts
122009
+ function extractCfClearance(solution) {
122010
+ if (!solution || typeof solution !== "object")
122011
+ return null;
122012
+ const s = solution;
122013
+ const ua = typeof s.user_agent === "string" && s.user_agent || typeof s.userAgent === "string" && s.userAgent || undefined;
122014
+ if (typeof s.cf_clearance === "string" && s.cf_clearance) {
122015
+ return { cf_clearance: s.cf_clearance, user_agent: ua || undefined };
122016
+ }
122017
+ if (typeof s.token === "string" && s.token && s.type !== "turnstile") {
122018
+ return { cf_clearance: s.token, user_agent: ua || undefined };
122019
+ }
122020
+ const cookies = s.cookies;
122021
+ if (cookies && typeof cookies === "object" && !Array.isArray(cookies)) {
122022
+ const v = cookies.cf_clearance;
122023
+ if (typeof v === "string" && v)
122024
+ return { cf_clearance: v, user_agent: ua || undefined };
122025
+ }
122026
+ if (Array.isArray(cookies)) {
122027
+ for (const c of cookies) {
122028
+ if (c && typeof c === "object") {
122029
+ const o = c;
122030
+ if (o.name === "cf_clearance" && typeof o.value === "string" && o.value) {
122031
+ return { cf_clearance: o.value, user_agent: ua || undefined };
122032
+ }
122033
+ } else if (typeof c === "string" && c.startsWith("cf_clearance=")) {
122034
+ const v = c.slice("cf_clearance=".length).split(";")[0];
122035
+ if (v)
122036
+ return { cf_clearance: v, user_agent: ua || undefined };
122037
+ }
122038
+ }
122039
+ }
122040
+ return null;
122041
+ }
122042
+ function buildCfTask(websiteURL, proxy) {
122043
+ return {
122044
+ type: "AntiCloudflareTask",
122045
+ websiteURL,
122046
+ proxyType: proxy.type,
122047
+ proxyAddress: proxy.address,
122048
+ proxyPort: proxy.port,
122049
+ ...proxy.login ? { proxyLogin: proxy.login } : {},
122050
+ ...proxy.password ? { proxyPassword: proxy.password } : {}
122051
+ };
122052
+ }
122053
+ async function solveCfViaCapzy(input) {
122054
+ const clientKey = input.clientKey ?? process.env.UNBROWSE_CAPZY_KEY?.trim();
122055
+ if (!clientKey)
122056
+ return null;
122057
+ if (!input.proxy)
122058
+ return null;
122059
+ const apiBase = (input.apiBase ?? process.env.UNBROWSE_CAPZY_URL?.trim() ?? "https://api.capzy.ai").replace(/\/+$/, "");
122060
+ const doFetch = input.fetchImpl ?? fetch;
122061
+ const deadline = Date.now() + (input.timeoutMs ?? 120000);
122062
+ const task = buildCfTask(input.websiteURL, input.proxy);
122063
+ let taskId;
122064
+ try {
122065
+ const created = await doFetch(`${apiBase}/createTask`, {
122066
+ method: "POST",
122067
+ headers: { "Content-Type": "application/json" },
122068
+ body: JSON.stringify({ clientKey, task }),
122069
+ signal: AbortSignal.timeout(Math.max(1, Math.min(20000, deadline - Date.now())))
122070
+ });
122071
+ if (!created.ok)
122072
+ return null;
122073
+ const cj = await created.json().catch(() => null);
122074
+ if (!cj || cj.errorId || !cj.taskId)
122075
+ return null;
122076
+ taskId = cj.taskId;
122077
+ } catch {
122078
+ return null;
122079
+ }
122080
+ while (Date.now() < deadline) {
122081
+ try {
122082
+ const res = await doFetch(`${apiBase}/getTaskResult`, {
122083
+ method: "POST",
122084
+ headers: { "Content-Type": "application/json" },
122085
+ body: JSON.stringify({ clientKey, taskId }),
122086
+ signal: AbortSignal.timeout(Math.max(1, Math.min(15000, deadline - Date.now())))
122087
+ });
122088
+ if (!res.ok)
122089
+ return null;
122090
+ const rj = await res.json().catch(() => null);
122091
+ if (!rj || rj.errorId)
122092
+ return null;
122093
+ if (rj.status === "failed")
122094
+ return null;
122095
+ if (rj.status === "ready") {
122096
+ return extractCfClearance(rj.solution);
122097
+ }
122098
+ } catch {
122099
+ return null;
122100
+ }
122101
+ await new Promise((r) => setTimeout(r, 2500));
122102
+ }
122103
+ return null;
122104
+ }
122105
+ var init_capzy_cf_solve = () => {};
122106
+
121821
122107
  // .tmp-runtime-src/execution/cf-challenge.ts
121822
122108
  var exports_cf_challenge = {};
121823
122109
  __export(exports_cf_challenge, {
@@ -121838,6 +122124,31 @@ function extractCfBundleUrl(body, requestUrl) {
121838
122124
  }
121839
122125
  }
121840
122126
  async function solveCfAndRetry(input) {
122127
+ if (process.env.UNBROWSE_CAPZY_KEY?.trim()) {
122128
+ const viaCapzy = await solveCfViaCapzy({
122129
+ websiteURL: input.url,
122130
+ proxy: input.proxy ? parseCapzyProxy(input.proxy) : undefined,
122131
+ timeoutMs: input.timeoutMs
122132
+ });
122133
+ if (viaCapzy?.cf_clearance) {
122134
+ const merged2 = mergeCookieJar(input.cookies ?? [], [
122135
+ { name: "cf_clearance", value: viaCapzy.cf_clearance }
122136
+ ]);
122137
+ const cookieHeader2 = merged2.map((c) => `${c.name}=${c.value}`).join("; ");
122138
+ try {
122139
+ const retry = await globalThis.fetch(input.url, {
122140
+ method: "GET",
122141
+ headers: {
122142
+ ...cookieHeader2 ? { cookie: cookieHeader2 } : {},
122143
+ ...viaCapzy.user_agent ? { "user-agent": viaCapzy.user_agent } : {}
122144
+ }
122145
+ });
122146
+ if (retry.status >= 200 && retry.status < 400) {
122147
+ return { status: retry.status, html: await retry.text(), cookies: merged2 };
122148
+ }
122149
+ } catch {}
122150
+ }
122151
+ }
121841
122152
  const bundleUrl = extractCfBundleUrl(input.body, input.url);
121842
122153
  if (!bundleUrl)
121843
122154
  return null;
@@ -121919,6 +122230,7 @@ function mergeCookieJar(a, b) {
121919
122230
  var CF_BUNDLE_RE;
121920
122231
  var init_cf_challenge = __esm(() => {
121921
122232
  init_bundle_replay_client();
122233
+ init_capzy_cf_solve();
121922
122234
  CF_BUNDLE_RE = /src=["'](\/cdn-cgi\/challenge-platform\/h\/[gb]\/scripts\/jsd\/[a-f0-9]+\/main\.js)/i;
121923
122235
  });
121924
122236
 
@@ -130228,6 +130540,35 @@ var init_mount = __esm(() => {
130228
130540
  import { existsSync as existsSync27, writeFileSync as writeFileSync17, readFileSync as readFileSync23, mkdirSync as mkdirSync19, readdirSync as readdirSync8 } from "node:fs";
130229
130541
  import { dirname as dirname6, join as join33 } from "node:path";
130230
130542
  import { createHash as createHash14 } from "node:crypto";
130543
+ function registrableHost(u) {
130544
+ if (!u)
130545
+ return null;
130546
+ try {
130547
+ return new URL(u).hostname.replace(/^www\./, "").split(".").slice(-2).join(".");
130548
+ } catch {
130549
+ return null;
130550
+ }
130551
+ }
130552
+ function shouldAutoWalk(requestedUrl, topUrl, topScore, minScore = 0.8) {
130553
+ if (!topUrl)
130554
+ return false;
130555
+ const reqReg = registrableHost(requestedUrl);
130556
+ const topReg = registrableHost(topUrl);
130557
+ return !!reqReg && reqReg === topReg || (topScore ?? 0) >= minScore;
130558
+ }
130559
+ function pickWalkTarget(requestedUrl, ranked, minScore = 0.8) {
130560
+ const eligible = ranked.filter((c) => c?.url && shouldAutoWalk(requestedUrl, c.url, c.score, minScore));
130561
+ if (eligible.length === 0)
130562
+ return null;
130563
+ const hasPath = (u) => {
130564
+ try {
130565
+ return new URL(u).pathname.replace(/\/+$/, "").length > 0;
130566
+ } catch {
130567
+ return false;
130568
+ }
130569
+ };
130570
+ return eligible.find((c) => hasPath(c.url)) ?? eligible[0];
130571
+ }
130231
130572
  function artifactResultWithShortlist(artifact, skillId, triggerUrl) {
130232
130573
  const ep = artifact.endpoint;
130233
130574
  const res = artifact.result;
@@ -130767,6 +131108,19 @@ function promoteResultSnapshot(cacheKey2, skill, endpointId, result, trace) {
130767
131108
  expires: Date.now() + ROUTE_CACHE_TTL
130768
131109
  });
130769
131110
  }
131111
+ function persistWalkedRoute(scope, cacheKey2, walked) {
131112
+ try {
131113
+ const skill = walked.skill;
131114
+ if (!skill)
131115
+ return;
131116
+ const endpointId = walked.trace?.endpoint_id;
131117
+ const isReplayableRoute = (walked.source === "marketplace" || walked.source === "route-cache" || walked.source === "live-capture" || walked.source === "direct-fetch") && !!skill.endpoints?.length;
131118
+ if (isReplayableRoute) {
131119
+ promoteLearnedSkill(scope, cacheKey2, skill, endpointId, undefined);
131120
+ }
131121
+ promoteResultSnapshot(cacheKey2, skill, endpointId, walked.result, walked.trace);
131122
+ } catch {}
131123
+ }
130770
131124
  function buildCachedResultResponse(cached4, source, timing) {
130771
131125
  const now = new Date().toISOString();
130772
131126
  return {
@@ -131912,6 +132266,7 @@ function resolveEndpointTemplateBindings(endpoint, params = {}, contextUrl) {
131912
132266
  }
131913
132267
  async function resolveAndExecute(intent, params = {}, context, projection, options) {
131914
132268
  const t0 = Date.now();
132269
+ const walkDepth = options?.__walkDepth ?? 0;
131915
132270
  const timing = {
131916
132271
  search_ms: 0,
131917
132272
  get_skill_ms: 0,
@@ -133461,7 +133816,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
133461
133816
  }
133462
133817
  } catch {}
133463
133818
  }
133464
- if (exaHits.length > 0) {
133819
+ if (walkDepth === 0 && exaHits.length > 0) {
133465
133820
  const intentTokens = (queryIntent || "").toLowerCase().match(/[a-z0-9]{3,}/g) ?? [];
133466
133821
  const stop2 = new Set(["get", "the", "for", "from", "with", "and", "any", "all", "new", "top", "top1", "top10"]);
133467
133822
  const intentTokenSet = new Set(intentTokens.filter((t) => !stop2.has(t)));
@@ -133489,7 +133844,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
133489
133844
  console.log(`[exa] raw-candidate mode (UNBROWSE_EXA_RAW=1): keeping ${exaHits.length} low-score hits for the agent to judge`);
133490
133845
  }
133491
133846
  }
133492
- if (exaHits.length > 0) {
133847
+ if (walkDepth === 0 && exaHits.length > 0) {
133493
133848
  const richHit = pickAnswerHit(exaHits, raceProbeDomain);
133494
133849
  const candidates = exaHits.map((hit) => ({
133495
133850
  url: hit.url,
@@ -133522,6 +133877,40 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
133522
133877
  skill_id: "exa-web-search",
133523
133878
  domain: exaSkillDomain
133524
133879
  };
133880
+ const topWalk = pickWalkTarget(raceContextUrl, exaHits);
133881
+ if (topWalk?.url && walkDepth < 1) {
133882
+ try {
133883
+ const walked = await resolveAndExecute(intent, params, { url: topWalk.url, domain: registrableHost(topWalk.url) ?? undefined }, projection, { ...options ?? {}, __walkDepth: walkDepth + 1 });
133884
+ if (walked && walked.source !== "exa" && !walked.result?.exec_unsupported) {
133885
+ console.log(`[exa→walk] resolved candidate ${topWalk.url} via ${walked.source} (pointer pipe d${walkDepth + 1})`);
133886
+ {
133887
+ const wDom = context?.domain ?? (() => {
133888
+ try {
133889
+ return new URL(raceContextUrl).hostname;
133890
+ } catch {
133891
+ return null;
133892
+ }
133893
+ })();
133894
+ persistWalkedRoute(clientScope, scopedCacheKey(clientScope, buildResolveCacheKey(wDom ?? null, intent, raceContextUrl)), walked);
133895
+ }
133896
+ return {
133897
+ ...walked,
133898
+ result: {
133899
+ ...walked.result,
133900
+ walked_from: "exa-web-search",
133901
+ exa_candidates: candidates,
133902
+ run_plan: [
133903
+ { step: "resolve", mode: "web-search", status: "hit", label: "fallback", error: null, source: webProvider ?? "exa" },
133904
+ { step: "walk", mode: walked.source, status: "hit", label: "top-candidate", error: null, source: walked.source }
133905
+ ]
133906
+ }
133907
+ };
133908
+ }
133909
+ console.log(`[exa→walk] candidate ${topWalk.url} produced no ladder rung — candidate leaf`);
133910
+ } catch (e) {
133911
+ console.log(`[exa→walk] walk error (${e.message}) — candidate leaf`);
133912
+ }
133913
+ }
133525
133914
  return {
133526
133915
  result: {
133527
133916
  ...richHit ? {
@@ -134298,7 +134687,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
134298
134687
  console.log(`[ddg] $0 fallback failed: ${err.message}`);
134299
134688
  }
134300
134689
  }
134301
- if (viable.length === 0 && exaResults?.length) {
134690
+ if (walkDepth === 0 && viable.length === 0 && exaResults?.length) {
134302
134691
  const richHit = pickAnswerHit(exaResults, requestedDomain);
134303
134692
  if (richHit) {
134304
134693
  console.log(`[exa] returning highlights answer from ${richHit.url} (${(richHit.highlights ?? []).join(" ").length} chars) + ${exaResults.length} ranked candidate(s)`);
@@ -134321,6 +134710,31 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
134321
134710
  fetch: `unbrowse fetch --url ${JSON.stringify(hit.url)}`
134322
134711
  }
134323
134712
  }));
134713
+ const topWalk = pickWalkTarget(context?.url, exaResults);
134714
+ if (topWalk?.url && walkDepth < 1) {
134715
+ try {
134716
+ const walked = await resolveAndExecute(intent, params, { url: topWalk.url, domain: registrableHost(topWalk.url) ?? undefined }, projection, { ...options ?? {}, __walkDepth: walkDepth + 1 });
134717
+ if (walked && walked.source !== "exa" && !walked.result?.exec_unsupported) {
134718
+ console.log(`[exa→walk] resolved candidate ${topWalk.url} via ${walked.source} (pointer pipe d${walkDepth + 1})`);
134719
+ persistWalkedRoute(clientScope, cacheKey2, walked);
134720
+ return {
134721
+ ...walked,
134722
+ result: {
134723
+ ...walked.result,
134724
+ walked_from: "exa-web-search",
134725
+ exa_candidates: candidates2,
134726
+ run_plan: [
134727
+ { step: "resolve", mode: "web-search", status: "hit", label: "fallback", error: null, source: serialWebProvider ?? "exa" },
134728
+ { step: "walk", mode: walked.source, status: "hit", label: "top-candidate", error: null, source: walked.source }
134729
+ ]
134730
+ }
134731
+ };
134732
+ }
134733
+ console.log(`[exa→walk] candidate ${topWalk.url} produced no ladder rung — exa leaf`);
134734
+ } catch (e) {
134735
+ console.log(`[exa→walk] walk error (${e.message}) — exa leaf`);
134736
+ }
134737
+ }
134324
134738
  return {
134325
134739
  result: {
134326
134740
  data: richHit.highlights,
@@ -148674,193 +149088,6 @@ function isGithubBlob(url) {
148674
149088
  return githubBlobToRaw(url) !== null;
148675
149089
  }
148676
149090
 
148677
- // .tmp-runtime-src/execution/tencent-waf-solve.ts
148678
- var exports_tencent_waf_solve = {};
148679
- __export(exports_tencent_waf_solve, {
148680
- submitWafClearance: () => submitWafClearance,
148681
- solveTencentViaCapzy: () => solveTencentViaCapzy,
148682
- parseCapzyProxy: () => parseCapzyProxy,
148683
- mergeCookieHeader: () => mergeCookieHeader,
148684
- extractTencentChallenge: () => extractTencentChallenge,
148685
- clearTencentWafViaCapzy: () => clearTencentWafViaCapzy
148686
- });
148687
- function extractTencentChallenge(html3) {
148688
- if (!html3 || typeof html3 !== "string")
148689
- return null;
148690
- const appMatch = html3.match(/new\s+Captcha\(\s*["']([0-9]{5,})["']/);
148691
- const seqMatch = html3.match(/var\s+seqid\s*=\s*["']([^"']+?)["']/);
148692
- if (!appMatch || !seqMatch)
148693
- return null;
148694
- return { appId: appMatch[1], seqid: seqMatch[1] };
148695
- }
148696
- async function solveTencentViaCapzy(input) {
148697
- const clientKey = input.clientKey ?? process.env.UNBROWSE_CAPZY_KEY?.trim();
148698
- if (!clientKey)
148699
- return null;
148700
- const apiBase = (input.apiBase ?? process.env.UNBROWSE_CAPZY_URL?.trim() ?? "https://api.capzy.ai").replace(/\/+$/, "");
148701
- const doFetch = input.fetchImpl ?? fetch;
148702
- const deadline = Date.now() + (input.timeoutMs ?? 90000);
148703
- const task = input.proxy ? {
148704
- type: "TencentTask",
148705
- websiteURL: input.websiteURL,
148706
- websiteKey: input.appId,
148707
- proxyType: input.proxy.type,
148708
- proxyAddress: input.proxy.address,
148709
- proxyPort: input.proxy.port,
148710
- ...input.proxy.login ? { proxyLogin: input.proxy.login } : {},
148711
- ...input.proxy.password ? { proxyPassword: input.proxy.password } : {}
148712
- } : {
148713
- type: "TencentTaskProxyLess",
148714
- websiteURL: input.websiteURL,
148715
- websiteKey: input.appId
148716
- };
148717
- let taskId;
148718
- try {
148719
- const created = await doFetch(`${apiBase}/createTask`, {
148720
- method: "POST",
148721
- headers: { "Content-Type": "application/json" },
148722
- body: JSON.stringify({ clientKey, task }),
148723
- signal: AbortSignal.timeout(Math.max(1, Math.min(20000, deadline - Date.now())))
148724
- });
148725
- if (!created.ok)
148726
- return null;
148727
- const cj = await created.json().catch(() => null);
148728
- if (!cj || cj.errorId || !cj.taskId)
148729
- return null;
148730
- taskId = cj.taskId;
148731
- } catch {
148732
- return null;
148733
- }
148734
- while (Date.now() < deadline) {
148735
- try {
148736
- const res = await doFetch(`${apiBase}/getTaskResult`, {
148737
- method: "POST",
148738
- headers: { "Content-Type": "application/json" },
148739
- body: JSON.stringify({ clientKey, taskId }),
148740
- signal: AbortSignal.timeout(Math.max(1, Math.min(15000, deadline - Date.now())))
148741
- });
148742
- if (!res.ok)
148743
- return null;
148744
- const rj = await res.json().catch(() => null);
148745
- if (!rj || rj.errorId)
148746
- return null;
148747
- if (rj.status === "ready") {
148748
- const s = rj.solution ?? {};
148749
- if (s.ticket && s.randstr) {
148750
- return { ticket: s.ticket, randstr: s.randstr, appid: s.appid ?? input.appId };
148751
- }
148752
- return null;
148753
- }
148754
- } catch {
148755
- return null;
148756
- }
148757
- await new Promise((r) => setTimeout(r, 2000));
148758
- }
148759
- return null;
148760
- }
148761
- async function submitWafClearance(input) {
148762
- const doFetch = input.fetchImpl ?? fetch;
148763
- const ret = input.ret ?? 0;
148764
- const body = [String(ret), input.ticket, input.randstr, input.seqid].join(`
148765
- `);
148766
- const headers = { "Content-Type": "text/plain;charset=UTF-8" };
148767
- if (input.cookieHeader)
148768
- headers["Cookie"] = input.cookieHeader;
148769
- try {
148770
- const res = await doFetch(input.wafUrl, {
148771
- method: "POST",
148772
- headers,
148773
- body,
148774
- signal: AbortSignal.timeout(input.timeoutMs ?? 20000),
148775
- ...input.proxyUrl ? { proxy: input.proxyUrl } : {}
148776
- });
148777
- const setCookies2 = typeof res.headers.getSetCookie === "function" ? res.headers.getSetCookie() : res.headers.get("set-cookie") ? [res.headers.get("set-cookie")] : [];
148778
- return { ok: res.ok, status: res.status, setCookies: setCookies2 };
148779
- } catch {
148780
- return { ok: false, status: 0, setCookies: [] };
148781
- }
148782
- }
148783
- function parseCapzyProxy(proxyUrl) {
148784
- try {
148785
- const u = new URL(proxyUrl);
148786
- if (!u.hostname || !u.port)
148787
- return;
148788
- const scheme = u.protocol.replace(":", "").toLowerCase();
148789
- const type = scheme === "socks5" || scheme === "socks4" || scheme === "https" ? scheme : "http";
148790
- return {
148791
- type,
148792
- address: u.hostname,
148793
- port: Number(u.port),
148794
- ...u.username ? { login: decodeURIComponent(u.username) } : {},
148795
- ...u.password ? { password: decodeURIComponent(u.password) } : {}
148796
- };
148797
- } catch {
148798
- return;
148799
- }
148800
- }
148801
- function mergeCookieHeader(prior, setCookies2) {
148802
- const jar = new Map;
148803
- for (const pair of (prior ?? "").split(/;\s*/)) {
148804
- const eq2 = pair.indexOf("=");
148805
- if (eq2 > 0)
148806
- jar.set(pair.slice(0, eq2).trim(), pair.slice(eq2 + 1).trim());
148807
- }
148808
- for (const sc of setCookies2) {
148809
- const first2 = sc.split(";")[0] ?? "";
148810
- const eq2 = first2.indexOf("=");
148811
- if (eq2 > 0)
148812
- jar.set(first2.slice(0, eq2).trim(), first2.slice(eq2 + 1).trim());
148813
- }
148814
- return Array.from(jar, ([k, v]) => `${k}=${v}`).join("; ");
148815
- }
148816
- async function clearTencentWafViaCapzy(input) {
148817
- const challenge = extractTencentChallenge(input.html);
148818
- if (!challenge)
148819
- return null;
148820
- const solved = await solveTencentViaCapzy({
148821
- websiteURL: input.url,
148822
- appId: challenge.appId,
148823
- clientKey: input.capzyKey,
148824
- proxy: input.proxyUrl ? parseCapzyProxy(input.proxyUrl) : undefined,
148825
- timeoutMs: input.timeoutMs,
148826
- fetchImpl: input.fetchImpl
148827
- });
148828
- if (!solved)
148829
- return null;
148830
- let origin;
148831
- try {
148832
- origin = new URL(input.url).origin;
148833
- } catch {
148834
- return null;
148835
- }
148836
- const clearance = await submitWafClearance({
148837
- wafUrl: `${origin}/WafCaptcha`,
148838
- ticket: solved.ticket,
148839
- randstr: solved.randstr,
148840
- seqid: challenge.seqid,
148841
- cookieHeader: input.cookieHeader,
148842
- proxyUrl: input.proxyUrl,
148843
- fetchImpl: input.fetchImpl
148844
- });
148845
- if (!clearance.ok)
148846
- return null;
148847
- const cookieHeader = mergeCookieHeader(input.cookieHeader, clearance.setCookies);
148848
- const doFetch = input.fetchImpl ?? fetch;
148849
- try {
148850
- const res = await doFetch(input.url, {
148851
- headers: { Cookie: cookieHeader, Accept: "*/*" },
148852
- signal: AbortSignal.timeout(input.timeoutMs ?? 30000),
148853
- ...input.proxyUrl ? { proxy: input.proxyUrl } : {}
148854
- });
148855
- const html3 = await res.text();
148856
- if (!html3 || extractTencentChallenge(html3))
148857
- return null;
148858
- return { html: html3, cookieHeader };
148859
- } catch {
148860
- return null;
148861
- }
148862
- }
148863
-
148864
149091
  // .tmp-runtime-src/extraction/readability.ts
148865
149092
  function linkDensity($2, $el) {
148866
149093
  const textLen = $el.text().replace(/\s+/g, " ").trim().length || 1;
@@ -235975,11 +236202,13 @@ __export(exports_orchestrator, {
235975
236202
  shouldReuseRouteResultSnapshot: () => shouldReuseRouteResultSnapshot2,
235976
236203
  shouldFallbackToLiveCaptureAfterAutoexecFailure: () => shouldFallbackToLiveCaptureAfterAutoexecFailure2,
235977
236204
  shouldBypassLiveCaptureQueue: () => shouldBypassLiveCaptureQueue2,
236205
+ shouldAutoWalk: () => shouldAutoWalk2,
235978
236206
  selectSkillIdsToHydrate: () => selectSkillIdsToHydrate2,
235979
236207
  selectSearchTermsForExecution: () => selectSearchTermsForExecution2,
235980
236208
  scopedCacheKey: () => scopedCacheKey2,
235981
236209
  resolveEndpointTemplateBindings: () => resolveEndpointTemplateBindings2,
235982
236210
  resolveAndExecute: () => resolveAndExecute2,
236211
+ registrableHost: () => registrableHost2,
235983
236212
  readSkillSnapshot: () => readSkillSnapshot2,
235984
236213
  readComposite: () => readComposite2,
235985
236214
  pruneLocalCacheEntriesForSkill: () => pruneLocalCacheEntriesForSkill2,
@@ -235987,6 +236216,7 @@ __export(exports_orchestrator, {
235987
236216
  probeLooksLikeFetchableHtmlDocument: () => probeLooksLikeFetchableHtmlDocument2,
235988
236217
  probeLooksLikeDirectJsonApi: () => probeLooksLikeDirectJsonApi2,
235989
236218
  planPrereqOrder: () => planPrereqOrder2,
236219
+ pickWalkTarget: () => pickWalkTarget2,
235990
236220
  pickPreferredSkillSnapshot: () => pickPreferredSkillSnapshot2,
235991
236221
  persistDomainCache: () => persistDomainCache2,
235992
236222
  marketplaceSkillMatchesContext: () => marketplaceSkillMatchesContext2,
@@ -236018,6 +236248,35 @@ __export(exports_orchestrator, {
236018
236248
  import { existsSync as existsSync60, writeFileSync as writeFileSync35, readFileSync as readFileSync49, mkdirSync as mkdirSync39, readdirSync as readdirSync17 } from "node:fs";
236019
236249
  import { dirname as dirname15, join as join68 } from "node:path";
236020
236250
  import { createHash as createHash48 } from "node:crypto";
236251
+ function registrableHost2(u) {
236252
+ if (!u)
236253
+ return null;
236254
+ try {
236255
+ return new URL(u).hostname.replace(/^www\./, "").split(".").slice(-2).join(".");
236256
+ } catch {
236257
+ return null;
236258
+ }
236259
+ }
236260
+ function shouldAutoWalk2(requestedUrl, topUrl, topScore, minScore = 0.8) {
236261
+ if (!topUrl)
236262
+ return false;
236263
+ const reqReg = registrableHost2(requestedUrl);
236264
+ const topReg = registrableHost2(topUrl);
236265
+ return !!reqReg && reqReg === topReg || (topScore ?? 0) >= minScore;
236266
+ }
236267
+ function pickWalkTarget2(requestedUrl, ranked, minScore = 0.8) {
236268
+ const eligible = ranked.filter((c) => c?.url && shouldAutoWalk2(requestedUrl, c.url, c.score, minScore));
236269
+ if (eligible.length === 0)
236270
+ return null;
236271
+ const hasPath = (u) => {
236272
+ try {
236273
+ return new URL(u).pathname.replace(/\/+$/, "").length > 0;
236274
+ } catch {
236275
+ return false;
236276
+ }
236277
+ };
236278
+ return eligible.find((c) => hasPath(c.url)) ?? eligible[0];
236279
+ }
236021
236280
  function artifactResultWithShortlist2(artifact, skillId, triggerUrl) {
236022
236281
  const ep = artifact.endpoint;
236023
236282
  const res = artifact.result;
@@ -236557,6 +236816,19 @@ function promoteResultSnapshot2(cacheKey2, skill, endpointId, result, trace) {
236557
236816
  expires: Date.now() + ROUTE_CACHE_TTL2
236558
236817
  });
236559
236818
  }
236819
+ function persistWalkedRoute2(scope, cacheKey2, walked) {
236820
+ try {
236821
+ const skill = walked.skill;
236822
+ if (!skill)
236823
+ return;
236824
+ const endpointId = walked.trace?.endpoint_id;
236825
+ const isReplayableRoute = (walked.source === "marketplace" || walked.source === "route-cache" || walked.source === "live-capture" || walked.source === "direct-fetch") && !!skill.endpoints?.length;
236826
+ if (isReplayableRoute) {
236827
+ promoteLearnedSkill2(scope, cacheKey2, skill, endpointId, undefined);
236828
+ }
236829
+ promoteResultSnapshot2(cacheKey2, skill, endpointId, walked.result, walked.trace);
236830
+ } catch {}
236831
+ }
236560
236832
  function buildCachedResultResponse2(cached5, source, timing) {
236561
236833
  const now = new Date().toISOString();
236562
236834
  return {
@@ -237707,6 +237979,7 @@ function resolveEndpointTemplateBindings2(endpoint, params = {}, contextUrl) {
237707
237979
  }
237708
237980
  async function resolveAndExecute2(intent, params = {}, context, projection, options) {
237709
237981
  const t0 = Date.now();
237982
+ const walkDepth = options?.__walkDepth ?? 0;
237710
237983
  const timing = {
237711
237984
  search_ms: 0,
237712
237985
  get_skill_ms: 0,
@@ -239256,7 +239529,7 @@ async function resolveAndExecute2(intent, params = {}, context, projection, opti
239256
239529
  }
239257
239530
  } catch {}
239258
239531
  }
239259
- if (exaHits.length > 0) {
239532
+ if (walkDepth === 0 && exaHits.length > 0) {
239260
239533
  const intentTokens = (queryIntent || "").toLowerCase().match(/[a-z0-9]{3,}/g) ?? [];
239261
239534
  const stop2 = new Set(["get", "the", "for", "from", "with", "and", "any", "all", "new", "top", "top1", "top10"]);
239262
239535
  const intentTokenSet = new Set(intentTokens.filter((t) => !stop2.has(t)));
@@ -239284,7 +239557,7 @@ async function resolveAndExecute2(intent, params = {}, context, projection, opti
239284
239557
  console.log(`[exa] raw-candidate mode (UNBROWSE_EXA_RAW=1): keeping ${exaHits.length} low-score hits for the agent to judge`);
239285
239558
  }
239286
239559
  }
239287
- if (exaHits.length > 0) {
239560
+ if (walkDepth === 0 && exaHits.length > 0) {
239288
239561
  const richHit = pickAnswerHit(exaHits, raceProbeDomain);
239289
239562
  const candidates = exaHits.map((hit) => ({
239290
239563
  url: hit.url,
@@ -239317,6 +239590,40 @@ async function resolveAndExecute2(intent, params = {}, context, projection, opti
239317
239590
  skill_id: "exa-web-search",
239318
239591
  domain: exaSkillDomain
239319
239592
  };
239593
+ const topWalk = pickWalkTarget2(raceContextUrl, exaHits);
239594
+ if (topWalk?.url && walkDepth < 1) {
239595
+ try {
239596
+ const walked = await resolveAndExecute2(intent, params, { url: topWalk.url, domain: registrableHost2(topWalk.url) ?? undefined }, projection, { ...options ?? {}, __walkDepth: walkDepth + 1 });
239597
+ if (walked && walked.source !== "exa" && !walked.result?.exec_unsupported) {
239598
+ console.log(`[exa→walk] resolved candidate ${topWalk.url} via ${walked.source} (pointer pipe d${walkDepth + 1})`);
239599
+ {
239600
+ const wDom = context?.domain ?? (() => {
239601
+ try {
239602
+ return new URL(raceContextUrl).hostname;
239603
+ } catch {
239604
+ return null;
239605
+ }
239606
+ })();
239607
+ persistWalkedRoute2(clientScope, scopedCacheKey2(clientScope, buildResolveCacheKey2(wDom ?? null, intent, raceContextUrl)), walked);
239608
+ }
239609
+ return {
239610
+ ...walked,
239611
+ result: {
239612
+ ...walked.result,
239613
+ walked_from: "exa-web-search",
239614
+ exa_candidates: candidates,
239615
+ run_plan: [
239616
+ { step: "resolve", mode: "web-search", status: "hit", label: "fallback", error: null, source: webProvider ?? "exa" },
239617
+ { step: "walk", mode: walked.source, status: "hit", label: "top-candidate", error: null, source: walked.source }
239618
+ ]
239619
+ }
239620
+ };
239621
+ }
239622
+ console.log(`[exa→walk] candidate ${topWalk.url} produced no ladder rung — candidate leaf`);
239623
+ } catch (e) {
239624
+ console.log(`[exa→walk] walk error (${e.message}) — candidate leaf`);
239625
+ }
239626
+ }
239320
239627
  return {
239321
239628
  result: {
239322
239629
  ...richHit ? {
@@ -240093,7 +240400,7 @@ async function resolveAndExecute2(intent, params = {}, context, projection, opti
240093
240400
  console.log(`[ddg] $0 fallback failed: ${err.message}`);
240094
240401
  }
240095
240402
  }
240096
- if (viable.length === 0 && exaResults?.length) {
240403
+ if (walkDepth === 0 && viable.length === 0 && exaResults?.length) {
240097
240404
  const richHit = pickAnswerHit(exaResults, requestedDomain);
240098
240405
  if (richHit) {
240099
240406
  console.log(`[exa] returning highlights answer from ${richHit.url} (${(richHit.highlights ?? []).join(" ").length} chars) + ${exaResults.length} ranked candidate(s)`);
@@ -240116,6 +240423,31 @@ async function resolveAndExecute2(intent, params = {}, context, projection, opti
240116
240423
  fetch: `unbrowse fetch --url ${JSON.stringify(hit.url)}`
240117
240424
  }
240118
240425
  }));
240426
+ const topWalk = pickWalkTarget2(context?.url, exaResults);
240427
+ if (topWalk?.url && walkDepth < 1) {
240428
+ try {
240429
+ const walked = await resolveAndExecute2(intent, params, { url: topWalk.url, domain: registrableHost2(topWalk.url) ?? undefined }, projection, { ...options ?? {}, __walkDepth: walkDepth + 1 });
240430
+ if (walked && walked.source !== "exa" && !walked.result?.exec_unsupported) {
240431
+ console.log(`[exa→walk] resolved candidate ${topWalk.url} via ${walked.source} (pointer pipe d${walkDepth + 1})`);
240432
+ persistWalkedRoute2(clientScope, cacheKey2, walked);
240433
+ return {
240434
+ ...walked,
240435
+ result: {
240436
+ ...walked.result,
240437
+ walked_from: "exa-web-search",
240438
+ exa_candidates: candidates2,
240439
+ run_plan: [
240440
+ { step: "resolve", mode: "web-search", status: "hit", label: "fallback", error: null, source: serialWebProvider ?? "exa" },
240441
+ { step: "walk", mode: walked.source, status: "hit", label: "top-candidate", error: null, source: walked.source }
240442
+ ]
240443
+ }
240444
+ };
240445
+ }
240446
+ console.log(`[exa→walk] candidate ${topWalk.url} produced no ladder rung — exa leaf`);
240447
+ } catch (e) {
240448
+ console.log(`[exa→walk] walk error (${e.message}) — exa leaf`);
240449
+ }
240450
+ }
240119
240451
  return {
240120
240452
  result: {
240121
240453
  data: richHit.highlights,
Binary file
@@ -2,7 +2,7 @@
2
2
  "repo_url": "https://github.com/justrach/kuri.git",
3
3
  "branch": "adding-extensions",
4
4
  "source_sha": "149881254046a20778f642b69f20f0c6468f6fb4",
5
- "built_at": "2026-06-17T15:12:51.641Z",
5
+ "built_at": "2026-06-17T21:47:52.749Z",
6
6
  "binaries": {
7
7
  "darwin-arm64": {
8
8
  "zig_target": "aarch64-macos",
@@ -21,11 +21,11 @@
21
21
  },
22
22
  "linux-x64": {
23
23
  "zig_target": "x86_64-linux",
24
- "sha256": "4d009a669b71c2e11d747fb2839da17b1f89b62b4e937a9f5bb7274cc2ee2b4a"
24
+ "sha256": "250b2dfafc912dfda669416984036cf745749a0f5322c85acf0f95d7902e2dff"
25
25
  },
26
26
  "win-x64": {
27
27
  "zig_target": "x86_64-windows-gnu",
28
- "sha256": "4a244ea5f508b7dbeff9227f43458bb03e6db2e11f72abd00ab3c81c0fde973d",
28
+ "sha256": "f920a3a3f95e38c3245c52af11989a046a0c8c87b4091d8b3c0a1638b44b6179",
29
29
  "source": "pre-staged"
30
30
  }
31
31
  },
@@ -33,22 +33,22 @@
33
33
  "darwin-arm64": {
34
34
  "zig_target": "aarch64-macos",
35
35
  "lib": "libkuri_ffi.dylib",
36
- "sha256": "c49a5ad76c782f3fa351a8c8f797f63bf92a2e4f4405a76fc92d59cba9e52dee"
36
+ "sha256": "898a9290964371d9814d98de79bc62678833a0b914c47f765d46d6080ee391d5"
37
37
  },
38
38
  "darwin-x64": {
39
39
  "zig_target": "x86_64-macos",
40
40
  "lib": "libkuri_ffi.dylib",
41
- "sha256": "44ef8a02fa2e3f70f52cfb915cc2cf69e321964770c3fcdaaa9906341044fa5d"
41
+ "sha256": "730a09622d95ee3a5c9f5c41a7971d8ab93e7ca401be717c54c13bbbe498f604"
42
42
  },
43
43
  "linux-arm64": {
44
44
  "zig_target": "aarch64-linux",
45
45
  "lib": "libkuri_ffi.so",
46
- "sha256": "18a8c9834bfd1090d29d9ea20426dc8b798a7eb0c3a64949cefe0ef83551a7e3"
46
+ "sha256": "ef27e02d48f90d159b0bbcbc35b07970354af2924653ba0b00a863d9153c0313"
47
47
  },
48
48
  "linux-x64": {
49
49
  "zig_target": "x86_64-linux",
50
50
  "lib": "libkuri_ffi.so",
51
- "sha256": "9c874005b5d4b32ed52999a68a4f5961b304e072cf152bac28aa86351d662ac7"
51
+ "sha256": "de4075e2444204d2f860d1293577d4389247c6fe55049c7505196d9e8eb5f4fb"
52
52
  }
53
53
  }
54
54
  }
Binary file