unbrowse 2.8.5 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -721,6 +721,51 @@ var init_client = __esm(() => {
721
721
  });
722
722
 
723
723
  // ../../src/transform/index.ts
724
+ var exports_transform = {};
725
+ __export(exports_transform, {
726
+ resolvePath: () => resolvePath,
727
+ project: () => project,
728
+ inferSchema: () => inferSchema,
729
+ detectEntityIndex: () => detectEntityIndex,
730
+ compact: () => compact,
731
+ buildEntityIndex: () => buildEntityIndex,
732
+ applyProjection: () => applyProjection
733
+ });
734
+ function buildEntityIndex(items) {
735
+ const index = new Map;
736
+ for (const item of items) {
737
+ if (item != null && typeof item === "object") {
738
+ const urn = item.entityUrn;
739
+ if (typeof urn === "string")
740
+ index.set(urn, item);
741
+ }
742
+ }
743
+ return index;
744
+ }
745
+ function detectEntityIndex(data) {
746
+ if (data == null || typeof data !== "object")
747
+ return null;
748
+ const candidates = [];
749
+ const obj = data;
750
+ if (Array.isArray(obj.included))
751
+ candidates.push(obj.included);
752
+ if (obj.data && typeof obj.data === "object") {
753
+ const d = obj.data;
754
+ if (Array.isArray(d.included))
755
+ candidates.push(d.included);
756
+ }
757
+ for (const arr of candidates) {
758
+ const items = arr;
759
+ if (items.length < 2)
760
+ continue;
761
+ const sample = items.slice(0, 5);
762
+ const hasUrns = sample.filter((i) => i != null && typeof i === "object" && typeof i.entityUrn === "string").length;
763
+ if (hasUrns >= sample.length * 0.5) {
764
+ return buildEntityIndex(items);
765
+ }
766
+ }
767
+ return null;
768
+ }
724
769
  function resolvePath(obj, path4, entityIndex) {
725
770
  const parts = path4.split(".");
726
771
  let current = [obj];
@@ -1091,6 +1136,20 @@ function mergeContextTemplateParams(params, urlTemplate, contextUrl) {
1091
1136
  }
1092
1137
 
1093
1138
  // ../../src/graph/index.ts
1139
+ var exports_graph = {};
1140
+ __export(exports_graph, {
1141
+ toAgentSkillChunkView: () => toAgentSkillChunkView,
1142
+ resolveEndpointSemantic: () => resolveEndpointSemantic,
1143
+ operationSoftPenalty: () => operationSoftPenalty,
1144
+ knownBindingsFromInputs: () => knownBindingsFromInputs,
1145
+ isRunnable: () => isRunnable,
1146
+ isOperationHardExcluded: () => isOperationHardExcluded,
1147
+ inferEndpointSemantic: () => inferEndpointSemantic,
1148
+ getSkillChunk: () => getSkillChunk,
1149
+ ensureSkillOperationGraph: () => ensureSkillOperationGraph,
1150
+ computeReachableEndpoints: () => computeReachableEndpoints,
1151
+ buildSkillOperationGraph: () => buildSkillOperationGraph
1152
+ });
1094
1153
  function normalizeTokenText(text) {
1095
1154
  return text.replace(/([a-z0-9])([A-Z])/g, "$1 $2").replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2").replace(/([a-zA-Z])(\d)/g, "$1 $2").replace(/(\d)([a-zA-Z])/g, "$1 $2");
1096
1155
  }
@@ -2203,6 +2262,12 @@ function extractRscDataEndpoints(body) {
2203
2262
  }
2204
2263
 
2205
2264
  // ../../src/reverse-engineer/index.ts
2265
+ var exports_reverse_engineer = {};
2266
+ __export(exports_reverse_engineer, {
2267
+ minePathTemplates: () => minePathTemplates,
2268
+ extractEndpoints: () => extractEndpoints,
2269
+ extractAuthHeaders: () => extractAuthHeaders
2270
+ });
2206
2271
  import { nanoid as nanoid2 } from "nanoid";
2207
2272
  function compactForSemanticExample(value, depth = 0) {
2208
2273
  if (depth > 2 || value == null)
@@ -3378,6 +3443,7 @@ __export(exports_capture, {
3378
3443
  navigatePageForCapture: () => navigatePageForCapture,
3379
3444
  isBrowserAccessAvailable: () => isBrowserAccessAvailable,
3380
3445
  isBlockedAppShell: () => isBlockedAppShell,
3446
+ injectInterceptor: () => injectInterceptor,
3381
3447
  hasUsefulCapturedResponses: () => hasUsefulCapturedResponses,
3382
3448
  filterFirstPartySessionCookies: () => filterFirstPartySessionCookies,
3383
3449
  executeInBrowser: () => executeInBrowser,
@@ -3596,6 +3662,14 @@ async function collectInterceptedRequests(tabId) {
3596
3662
  } catch {}
3597
3663
  return [];
3598
3664
  }
3665
+ async function injectInterceptor(tabId) {
3666
+ const SETUP = `(function(){if(window.__unbrowse_interceptor_installed)return;window.__unbrowse_interceptor_installed=true;window.__unbrowse_intercepted=[];window.__UB_MAX=2*1024*1024;window.__UB_MAX_JS=2*1024*1024;window.__UB_MAX_N=500;})()`;
3667
+ const FETCH_PATCH = `(function(){if(!window.__unbrowse_interceptor_installed)return;var M=window.__UB_MAX,MJ=window.__UB_MAX_JS,MN=window.__UB_MAX_N;var oF=window.fetch;window.fetch=function(){var a=arguments,u=typeof a[0]==='string'?a[0]:(a[0]&&a[0].url?a[0].url:''),o=a[1]||{},m=(o.method||'GET').toUpperCase(),rb=o.body?String(o.body).substring(0,M):void 0,rh={};if(o.headers){if(typeof o.headers.forEach==='function')o.headers.forEach(function(v,k){rh[k]=v});else Object.keys(o.headers).forEach(function(k){rh[k]=o.headers[k]})}return oF.apply(this,a).then(function(r){if(window.__unbrowse_intercepted.length>=MN)return r;var ct=r.headers.get('content-type')||'';var isJ=ct.indexOf('javascript')!==-1||/\\.js(\\?|$)/.test(u);var isD=ct.indexOf('json')!==-1||ct.indexOf('x-protobuf')!==-1||ct.indexOf('text/plain')!==-1||u.indexOf('/api/')!==-1||u.indexOf('graphql')!==-1||u.indexOf('voyager')!==-1;if(!isJ&&!isD)return r;if(/\\.(css|woff2?|png|jpg|svg|ico)(\\?|$)/.test(u))return r;var c=r.clone();c.text().then(function(b){var lim=isJ?MJ:M;if(b.length>lim)return;var rr={};r.headers.forEach(function(v,k){rr[k]=v});window.__unbrowse_intercepted.push({url:u,method:m,request_headers:rh,request_body:rb,response_status:r.status,response_headers:rr,response_body:b,content_type:ct,is_js:isJ,timestamp:new Date().toISOString()})}).catch(function(){});return r}).catch(function(e){throw e})}})()`;
3668
+ const XHR_PATCH = `(function(){if(!window.__unbrowse_interceptor_installed)return;var M=window.__UB_MAX,MJ=window.__UB_MAX_JS,MN=window.__UB_MAX_N;var oO=XMLHttpRequest.prototype.open,oS=XMLHttpRequest.prototype.send;XMLHttpRequest.prototype.open=function(m,u){this.__ub_m=m;this.__ub_u=u;this.__ub_h={};var oSH=this.setRequestHeader.bind(this);this.setRequestHeader=function(k,v){this.__ub_h[k]=v;oSH(k,v)}.bind(this);return oO.apply(this,arguments)};XMLHttpRequest.prototype.send=function(b){var x=this;x.addEventListener('load',function(){if(window.__unbrowse_intercepted.length>=MN)return;var ct=x.getResponseHeader('content-type')||'',u=x.__ub_u||'';var isJ=ct.indexOf('javascript')!==-1||/\\.js(\\?|$)/.test(u);var isD=ct.indexOf('json')!==-1||ct.indexOf('x-protobuf')!==-1||ct.indexOf('text/plain')!==-1||u.indexOf('/api/')!==-1||u.indexOf('graphql')!==-1||u.indexOf('voyager')!==-1;if(!isJ&&!isD)return;if(/\\.(css|woff2?|png|jpg|svg|ico)(\\?|$)/.test(u))return;var rb=x.responseText||'';var lim=isJ?MJ:M;if(rb.length>lim)return;window.__unbrowse_intercepted.push({url:u,method:(x.__ub_m||'GET').toUpperCase(),request_headers:x.__ub_h||{},request_body:b?String(b).substring(0,M):void 0,response_status:x.status,response_headers:{},response_body:rb,content_type:ct,is_js:isJ,timestamp:new Date().toISOString()})});return oS.apply(this,arguments)}})()`;
3669
+ for (const chunk of [SETUP, FETCH_PATCH, XHR_PATCH]) {
3670
+ await evaluate(tabId, chunk).catch(() => {});
3671
+ }
3672
+ }
3599
3673
  function mergePassiveCaptureData(intercepted, harEntries, extensionEntries, responseBodies) {
3600
3674
  const seen = new Map;
3601
3675
  for (const entry of intercepted) {
@@ -4385,7 +4459,7 @@ var MAX_CONCURRENT_TABS = 3, activeTabs = 0, waitQueue, activeTabRegistry, inter
4385
4459
  if (window.__unbrowse_interceptor_installed) return;
4386
4460
  window.__unbrowse_interceptor_installed = true;
4387
4461
  window.__unbrowse_intercepted = [];
4388
- var MAX_BODY = 512 * 1024;
4462
+ var MAX_BODY = 2 * 1024 * 1024;
4389
4463
  var MAX_JS_BODY = 2 * 1024 * 1024;
4390
4464
  var MAX_ENTRIES = 500;
4391
4465
 
@@ -4409,9 +4483,10 @@ var MAX_CONCURRENT_TABS = 3, activeTabs = 0, waitQueue, activeTabRegistry, inter
4409
4483
  if (window.__unbrowse_intercepted.length >= MAX_ENTRIES) return response;
4410
4484
  var ct = response.headers.get('content-type') || '';
4411
4485
  var isJs = ct.indexOf('javascript') !== -1 || /\\.js(\\?|$)/.test(url);
4412
- var isData = ct.indexOf('application/json') !== -1 || ct.indexOf('+json') !== -1 ||
4413
- ct.indexOf('application/x-protobuf') !== -1 || ct.indexOf('text/plain') !== -1 ||
4414
- url.indexOf('batchexecute') !== -1 || url.indexOf('/api/') !== -1;
4486
+ var isData = ct.indexOf('json') !== -1 || ct.indexOf('application/x-protobuf') !== -1 ||
4487
+ ct.indexOf('text/plain') !== -1 ||
4488
+ url.indexOf('batchexecute') !== -1 || url.indexOf('/api/') !== -1 ||
4489
+ url.indexOf('graphql') !== -1 || url.indexOf('voyager') !== -1;
4415
4490
  if (!isJs && !isData) return response;
4416
4491
  if (/\\.(css|woff2?|png|jpg|svg|ico)(\\?|$)/.test(url)) return response;
4417
4492
  var clone = response.clone();
@@ -4458,9 +4533,10 @@ var MAX_CONCURRENT_TABS = 3, activeTabs = 0, waitQueue, activeTabRegistry, inter
4458
4533
  var ct = xhr.getResponseHeader('content-type') || '';
4459
4534
  var url = xhr.__unbrowse_url || '';
4460
4535
  var isJs = ct.indexOf('javascript') !== -1 || /\\.js(\\?|$)/.test(url);
4461
- var isData = ct.indexOf('application/json') !== -1 || ct.indexOf('+json') !== -1 ||
4462
- ct.indexOf('application/x-protobuf') !== -1 || ct.indexOf('text/plain') !== -1 ||
4463
- url.indexOf('batchexecute') !== -1 || url.indexOf('/api/') !== -1;
4536
+ var isData = ct.indexOf('json') !== -1 || ct.indexOf('application/x-protobuf') !== -1 ||
4537
+ ct.indexOf('text/plain') !== -1 ||
4538
+ url.indexOf('batchexecute') !== -1 || url.indexOf('/api/') !== -1 ||
4539
+ url.indexOf('graphql') !== -1 || url.indexOf('voyager') !== -1;
4464
4540
  if (!isJs && !isData) return;
4465
4541
  if (/\\.(css|woff2?|png|jpg|svg|ico)(\\?|$)/.test(url)) return;
4466
4542
  var respBody = xhr.responseText || '';
@@ -4547,6 +4623,21 @@ import { join as join3 } from "path";
4547
4623
  import { homedir as homedir2, hostname as hostname2 } from "os";
4548
4624
  import { randomBytes as randomBytes2, createHash as createHash2 } from "crypto";
4549
4625
  import { createInterface as createInterface2 } from "readline";
4626
+ function decodeBase64Json2(value) {
4627
+ try {
4628
+ if (typeof globalThis !== "undefined" && typeof globalThis.atob === "function") {
4629
+ const binary = globalThis.atob(value);
4630
+ const bytes = new Uint8Array(binary.length);
4631
+ for (let i = 0;i < binary.length; i++) {
4632
+ bytes[i] = binary.charCodeAt(i);
4633
+ }
4634
+ return JSON.parse(new TextDecoder("utf-8").decode(bytes));
4635
+ }
4636
+ return JSON.parse(Buffer.from(value, "base64").toString("utf8"));
4637
+ } catch {
4638
+ return;
4639
+ }
4640
+ }
4550
4641
  function scopedSkillKey(skillId, scopeId) {
4551
4642
  return scopeId ? `${scopeId}:${skillId}` : skillId;
4552
4643
  }
@@ -4680,7 +4771,7 @@ async function findUsableApiKey2() {
4680
4771
  async function api2(method, path4, body, opts) {
4681
4772
  const key = opts?.noAuth ? "" : getApiKey2();
4682
4773
  const controller = new AbortController;
4683
- const timer = setTimeout(() => controller.abort(), API_TIMEOUT_MS2);
4774
+ const timer = setTimeout(() => controller.abort(), opts?.timeoutMs ?? API_TIMEOUT_MS2);
4684
4775
  let res;
4685
4776
  try {
4686
4777
  res = await fetch(`${API_URL2}${path4}`, {
@@ -4709,8 +4800,9 @@ async function api2(method, path4, body, opts) {
4709
4800
  throw new Error("ToS update required. Restart unbrowse to accept new terms.");
4710
4801
  }
4711
4802
  if (res.status === 402) {
4712
- const paymentTerms = res.headers.get("X-Payment-Required");
4713
- const terms = paymentTerms ? JSON.parse(paymentTerms) : data.terms;
4803
+ const paymentRequired = res.headers.get("PAYMENT-REQUIRED");
4804
+ const legacyPaymentTerms = res.headers.get("X-Payment-Required");
4805
+ const terms = paymentRequired ? decodeBase64Json2(paymentRequired) : legacyPaymentTerms ? JSON.parse(legacyPaymentTerms) : data.terms;
4714
4806
  const err = new Error(`Payment required: ${data.error ?? "This skill requires payment"}`);
4715
4807
  err.x402 = true;
4716
4808
  err.terms = terms;
@@ -5002,7 +5094,7 @@ async function publishSkill(draft) {
5002
5094
  }
5003
5095
  if (LOCAL_ONLY2)
5004
5096
  throw new Error("local-only mode");
5005
- return api2("POST", "/v1/skills", draft);
5097
+ return api2("POST", "/v1/skills", draft, { timeoutMs: PUBLISH_TIMEOUT_MS2 });
5006
5098
  }
5007
5099
  async function deprecateSkill(skillId) {
5008
5100
  if (LOCAL_ONLY2)
@@ -5194,7 +5286,7 @@ async function getCreatorEarnings(agentId) {
5194
5286
  async function setSkillPrice(skillId, priceUsd) {
5195
5287
  return api2("PATCH", `/v1/skills/${skillId}`, { base_price_usd: priceUsd });
5196
5288
  }
5197
- var API_URL2, PROFILE_NAME2, recentLocalSkills2, LOCAL_ONLY2, EMAIL_RE2, API_TIMEOUT_MS2;
5289
+ var API_URL2, PROFILE_NAME2, recentLocalSkills2, LOCAL_ONLY2, EMAIL_RE2, API_TIMEOUT_MS2, PUBLISH_TIMEOUT_MS2;
5198
5290
  var init_client2 = __esm(() => {
5199
5291
  API_URL2 = process.env.UNBROWSE_BACKEND_URL || "https://beta-api.unbrowse.ai";
5200
5292
  PROFILE_NAME2 = sanitizeProfileName2(process.env.UNBROWSE_PROFILE ?? "");
@@ -5202,6 +5294,7 @@ var init_client2 = __esm(() => {
5202
5294
  LOCAL_ONLY2 = process.env.UNBROWSE_LOCAL_ONLY === "1";
5203
5295
  EMAIL_RE2 = /^[^\s@]+@[^\s@]+\.[^\s@]+$/i;
5204
5296
  API_TIMEOUT_MS2 = parseInt(process.env.UNBROWSE_API_TIMEOUT ?? "8000", 10);
5297
+ PUBLISH_TIMEOUT_MS2 = parseInt(process.env.UNBROWSE_PUBLISH_TIMEOUT ?? "30000", 10);
5205
5298
  });
5206
5299
 
5207
5300
  // ../../src/marketplace/index.ts
@@ -5281,7 +5374,6 @@ var init_marketplace = __esm(() => {
5281
5374
  import { createHash as createHash3 } from "crypto";
5282
5375
  import { readFileSync as readFileSync3, readdirSync as readdirSync3 } from "fs";
5283
5376
  import { dirname, join as join4 } from "path";
5284
- import { execSync } from "child_process";
5285
5377
  import { fileURLToPath as fileURLToPath2 } from "url";
5286
5378
  function collectTsFiles(dir) {
5287
5379
  const results = [];
@@ -5310,11 +5402,7 @@ function computeCodeHash() {
5310
5402
  }
5311
5403
  }
5312
5404
  function getGitSha() {
5313
- try {
5314
- return execSync("git rev-parse --short HEAD", { encoding: "utf-8", cwd: MODULE_DIR }).trim();
5315
- } catch {
5316
- return "unknown";
5317
- }
5405
+ return "unknown";
5318
5406
  }
5319
5407
  var MODULE_DIR, CODE_HASH, GIT_SHA, TRACE_VERSION;
5320
5408
  var init_version = __esm(() => {
@@ -7531,6 +7619,14 @@ function assessIntentResult(data, intent) {
7531
7619
  }
7532
7620
 
7533
7621
  // ../../src/extraction/index.ts
7622
+ var exports_extraction = {};
7623
+ __export(exports_extraction, {
7624
+ parseStructured: () => parseStructured,
7625
+ extractSPAData: () => extractSPAData,
7626
+ extractFromDOMWithHint: () => extractFromDOMWithHint,
7627
+ extractFromDOM: () => extractFromDOM,
7628
+ cleanDOM: () => cleanDOM
7629
+ });
7534
7630
  import * as cheerio from "cheerio";
7535
7631
  function extractFlashNoticeSpecial(html, intent) {
7536
7632
  if (!/\b(flash|message|messages|alert|success|error|warning)\b/i.test(intent))
@@ -9062,6 +9158,11 @@ var init_agent_augment = __esm(() => {
9062
9158
  });
9063
9159
 
9064
9160
  // ../../src/execution/search-forms.ts
9161
+ var exports_search_forms = {};
9162
+ __export(exports_search_forms, {
9163
+ isStructuredSearchForm: () => isStructuredSearchForm,
9164
+ detectSearchForms: () => detectSearchForms
9165
+ });
9065
9166
  function isStructuredSearchForm(spec) {
9066
9167
  return spec.fields.length > 0 && !!spec.submit_selector;
9067
9168
  }
@@ -9375,6 +9476,26 @@ var init_payments = __esm(() => {
9375
9476
  });
9376
9477
 
9377
9478
  // ../../src/execution/index.ts
9479
+ var exports_execution = {};
9480
+ __export(exports_execution, {
9481
+ validateExtractionQuality: () => validateExtractionQuality,
9482
+ templatizeQueryParams: () => templatizeQueryParams,
9483
+ shouldIgnoreLearnedBrowserStrategy: () => shouldIgnoreLearnedBrowserStrategy,
9484
+ resolveExecutionUrlTemplate: () => resolveExecutionUrlTemplate,
9485
+ rankEndpoints: () => rankEndpoints,
9486
+ projectResultForIntent: () => projectResultForIntent,
9487
+ isCanonicalReplayEndpoint: () => isCanonicalReplayEndpoint,
9488
+ isBundleInferredEndpoint: () => isBundleInferredEndpoint,
9489
+ executeSkill: () => executeSkill,
9490
+ executeEndpoint: () => executeEndpoint,
9491
+ deriveStructuredDataReplayUrl: () => deriveStructuredDataReplayUrl,
9492
+ deriveStructuredDataReplayTemplate: () => deriveStructuredDataReplayTemplate,
9493
+ deriveStructuredDataReplayCandidatesFromInputs: () => deriveStructuredDataReplayCandidatesFromInputs,
9494
+ deriveStructuredDataReplayCandidates: () => deriveStructuredDataReplayCandidates,
9495
+ buildStructuredReplayHeaders: () => buildStructuredReplayHeaders,
9496
+ buildPageArtifactCapture: () => buildPageArtifactCapture,
9497
+ buildCanonicalDocumentEndpoint: () => buildCanonicalDocumentEndpoint
9498
+ });
9378
9499
  import { nanoid as nanoid5 } from "nanoid";
9379
9500
  function stampTrace(trace) {
9380
9501
  trace.trace_version = TRACE_VERSION;
@@ -10104,6 +10225,24 @@ async function executeSkill(skill, params = {}, projection, options) {
10104
10225
  const { endpoint_id: _, ...cleanParams } = params;
10105
10226
  return executeEndpoint(skill, target, cleanParams, projection, options);
10106
10227
  }
10228
+ log("exec", `endpoint ${params.endpoint_id} not found in skill ${skill.skill_id} (${skill.endpoints.length} endpoints: ${skill.endpoints.map((e) => e.endpoint_id).join(", ")})`);
10229
+ const trace = {
10230
+ trace_id: nanoid5(),
10231
+ skill_id: skill.skill_id,
10232
+ endpoint_id: String(params.endpoint_id),
10233
+ started_at: new Date().toISOString(),
10234
+ completed_at: new Date().toISOString(),
10235
+ success: false,
10236
+ error: `endpoint_not_found: ${params.endpoint_id} not in skill ${skill.skill_id}`
10237
+ };
10238
+ return {
10239
+ trace,
10240
+ result: {
10241
+ error: "endpoint_not_found",
10242
+ message: `Endpoint ${params.endpoint_id} not found in skill ${skill.skill_id}. Available: ${skill.endpoints.map((e) => `${e.endpoint_id} (${e.description?.slice(0, 50)})`).join(", ")}`,
10243
+ available_endpoints: skill.endpoints.map((e) => ({ endpoint_id: e.endpoint_id, description: e.description }))
10244
+ }
10245
+ };
10107
10246
  }
10108
10247
  const endpoint = selectBestEndpoint(skill.endpoints, options?.intent ?? skill.intent_signature, skill.domain, options?.contextUrl);
10109
10248
  return executeEndpoint(skill, endpoint, params, projection, options);
@@ -10578,6 +10717,22 @@ async function executeDomExtractionEndpoint(endpoint, url, intent, authHeaders,
10578
10717
  console.log(`[ssr-fast] miss, falling back to browser`);
10579
10718
  }
10580
10719
  const captured = await captureSession(url, authHeaders, cookies, intent);
10720
+ if (captured.requests.length > 0) {
10721
+ const { extractEndpoints: extractEps } = await Promise.resolve().then(() => (init_reverse_engineer(), exports_reverse_engineer));
10722
+ const apiEndpoints = extractEps(captured.requests, undefined, { pageUrl: url, finalUrl: captured.final_url });
10723
+ const jsonEndpoints = apiEndpoints.filter((ep) => ep.response_schema && !ep.dom_extraction);
10724
+ if (jsonEndpoints.length > 0) {
10725
+ const best = jsonEndpoints[0];
10726
+ const matchingReq = captured.requests.find((r) => r.url.includes(best.url_template.split("?")[0].split("{")[0]) && r.response_body && r.response_status >= 200 && r.response_status < 400);
10727
+ if (matchingReq?.response_body) {
10728
+ try {
10729
+ const data = JSON.parse(matchingReq.response_body);
10730
+ console.log(`[dom-exec] found API response from browser capture: ${matchingReq.url.substring(0, 80)}`);
10731
+ return { data, status: matchingReq.response_status, trace_id: nanoid5() };
10732
+ } catch {}
10733
+ }
10734
+ }
10735
+ }
10581
10736
  const html = captured.html ?? "";
10582
10737
  const extracted = extractFromDOMWithHint(html, intent, endpoint.dom_extraction);
10583
10738
  if (extracted.data) {
@@ -10952,7 +11107,19 @@ async function executeEndpoint(skill, endpoint, params = {}, projection, options
10952
11107
  if (result.status >= 200 && result.status < 400 && !shouldFallbackToBrowserReplay(result.data, endpoint, options?.intent ?? skill.intent_signature, options?.contextUrl)) {
10953
11108
  strategy = "server";
10954
11109
  } else if (endpoint.trigger_url && isSafe) {
10955
- result = await triggerAndIntercept(endpoint.trigger_url, endpoint.url_template, cookies, authHeaders);
11110
+ let triggerUrl = endpoint.trigger_url;
11111
+ if (Object.keys(mergedParams).length > 0) {
11112
+ try {
11113
+ const tu = new URL(endpoint.trigger_url);
11114
+ for (const [k, v] of Object.entries(mergedParams)) {
11115
+ if (v != null && !reservedMetaParams.has(k)) {
11116
+ tu.searchParams.set(k, String(v));
11117
+ }
11118
+ }
11119
+ triggerUrl = tu.toString();
11120
+ } catch {}
11121
+ }
11122
+ result = await triggerAndIntercept(triggerUrl, endpoint.url_template, cookies, authHeaders);
10956
11123
  strategy = "trigger-intercept";
10957
11124
  } else {
10958
11125
  result = await withRetry(browserCall, (r) => isRetryableStatus(r.status));
@@ -10967,8 +11134,19 @@ async function executeEndpoint(skill, endpoint, params = {}, projection, options
10967
11134
  strategy = "server";
10968
11135
  }
10969
11136
  } else if (endpointStrategy === "trigger-intercept" && endpoint.trigger_url && isSafe) {
10970
- log("exec", `using learned strategy trigger-intercept via ${endpoint.trigger_url}`);
10971
- result = await triggerAndIntercept(endpoint.trigger_url, endpoint.url_template, cookies, authHeaders);
11137
+ let triggerUrl = endpoint.trigger_url;
11138
+ if (Object.keys(mergedParams).length > 0) {
11139
+ try {
11140
+ const tu = new URL(endpoint.trigger_url);
11141
+ for (const [k, v] of Object.entries(mergedParams)) {
11142
+ if (v != null && !reservedMetaParams.has(k))
11143
+ tu.searchParams.set(k, String(v));
11144
+ }
11145
+ triggerUrl = tu.toString();
11146
+ } catch {}
11147
+ }
11148
+ log("exec", `using learned strategy trigger-intercept via ${triggerUrl}`);
11149
+ result = await triggerAndIntercept(triggerUrl, endpoint.url_template, cookies, authHeaders);
10972
11150
  strategy = "trigger-intercept";
10973
11151
  } else if (endpointStrategy === "browser") {
10974
11152
  if (shouldIgnoreLearnedBrowserStrategy(endpoint, url)) {
@@ -10999,7 +11177,18 @@ async function executeEndpoint(skill, endpoint, params = {}, projection, options
10999
11177
  } else {
11000
11178
  log("exec", `server fetch returned ${result.status}, falling back`);
11001
11179
  if (endpoint.trigger_url && isSafe) {
11002
- result = await triggerAndIntercept(endpoint.trigger_url, endpoint.url_template, cookies, authHeaders);
11180
+ let triggerUrl = endpoint.trigger_url;
11181
+ if (Object.keys(mergedParams).length > 0) {
11182
+ try {
11183
+ const tu = new URL(endpoint.trigger_url);
11184
+ for (const [k, v] of Object.entries(mergedParams)) {
11185
+ if (v != null && !reservedMetaParams.has(k))
11186
+ tu.searchParams.set(k, String(v));
11187
+ }
11188
+ triggerUrl = tu.toString();
11189
+ } catch {}
11190
+ }
11191
+ result = await triggerAndIntercept(triggerUrl, endpoint.url_template, cookies, authHeaders);
11003
11192
  strategy = "trigger-intercept";
11004
11193
  } else {
11005
11194
  result = await withRetry(browserCall, (r) => isRetryableStatus(r.status));
@@ -12739,15 +12928,84 @@ import { nanoid as nanoid7 } from "nanoid";
12739
12928
  import { existsSync as existsSync9, writeFileSync as writeFileSync7, readFileSync as readFileSync6, mkdirSync as mkdirSync8, readdirSync as readdirSync5 } from "node:fs";
12740
12929
  import { dirname as dirname2, join as join9 } from "node:path";
12741
12930
  import { createHash as createHash6 } from "node:crypto";
12742
- function summarizeSchema(schema) {
12743
- if (schema.properties) {
12744
- return Object.fromEntries(Object.entries(schema.properties).map(([k, v]) => [k, v.type]));
12745
- }
12746
- if (schema.type === "array" && schema.items?.properties) {
12747
- return Object.fromEntries(Object.entries(schema.items.properties).map(([k, v]) => [k, v.type]));
12931
+ function summarizeSchema(schema, maxDepth = 3) {
12932
+ function walk(s, depth) {
12933
+ if (depth <= 0)
12934
+ return s.type;
12935
+ if (s.type === "array" && s.items) {
12936
+ const inner = walk(s.items, depth - 1);
12937
+ return inner && typeof inner === "object" ? [inner] : [`${s.items.type ?? "unknown"}`];
12938
+ }
12939
+ if (s.properties) {
12940
+ const out = {};
12941
+ for (const [k, v] of Object.entries(s.properties)) {
12942
+ out[k] = walk(v, depth - 1);
12943
+ }
12944
+ return out;
12945
+ }
12946
+ return s.type;
12748
12947
  }
12948
+ if (schema.properties)
12949
+ return walk(schema, maxDepth);
12950
+ if (schema.type === "array" && schema.items)
12951
+ return { "[]": walk(schema.items, maxDepth - 1) };
12749
12952
  return null;
12750
12953
  }
12954
+ function extractSampleValues(sample, maxLeaves = 12) {
12955
+ if (sample == null)
12956
+ return null;
12957
+ const SKIP_KEYS = new Set([
12958
+ "__typename",
12959
+ "entryType",
12960
+ "itemType",
12961
+ "clientEventInfo",
12962
+ "feedbackInfo",
12963
+ "controllerData",
12964
+ "injectionType",
12965
+ "sortIndex",
12966
+ "cursor",
12967
+ "cursorType",
12968
+ "displayTreatment",
12969
+ "socialContext",
12970
+ "promotedMetadata",
12971
+ "feedbackKeys",
12972
+ "tweetDisplayType",
12973
+ "element",
12974
+ "component",
12975
+ "details"
12976
+ ]);
12977
+ const out = {};
12978
+ let count = 0;
12979
+ function walk(obj, path5, depth) {
12980
+ if (count >= maxLeaves || depth > 10)
12981
+ return;
12982
+ if (obj == null)
12983
+ return;
12984
+ if (Array.isArray(obj)) {
12985
+ if (obj.length > 0)
12986
+ walk(obj[0], path5 + "[]", depth + 1);
12987
+ return;
12988
+ }
12989
+ if (typeof obj === "object") {
12990
+ for (const [k, v] of Object.entries(obj)) {
12991
+ if (count >= maxLeaves)
12992
+ break;
12993
+ if (SKIP_KEYS.has(k))
12994
+ continue;
12995
+ const p = path5 ? `${path5}.${k}` : k;
12996
+ if (v != null && typeof v === "object") {
12997
+ walk(v, p, depth + 1);
12998
+ } else if (v != null && v !== "" && v !== 0 && v !== false) {
12999
+ out[p] = typeof v === "string" && v.length > 80 ? v.slice(0, 77) + "..." : v;
13000
+ count++;
13001
+ }
13002
+ }
13003
+ return;
13004
+ }
13005
+ }
13006
+ walk(sample, "", 0);
13007
+ return count > 0 ? out : null;
13008
+ }
12751
13009
  function persistDomainCache() {
12752
13010
  try {
12753
13011
  const dir = dirname2(DOMAIN_CACHE_FILE);
@@ -13340,22 +13598,6 @@ function dedupeObservedOverBundle(ranked) {
13340
13598
  }
13341
13599
  return Array.from(byRoute.values()).sort((a, b) => b.score - a.score);
13342
13600
  }
13343
- function extractBinaryVerdict(payload) {
13344
- for (const value of Object.values(payload)) {
13345
- if (typeof value !== "string")
13346
- continue;
13347
- const normalized = value.trim().toLowerCase();
13348
- if (normalized === "pass" || normalized.startsWith("pass "))
13349
- return "pass";
13350
- if (normalized === "fail" || normalized.startsWith("fail "))
13351
- return "fail";
13352
- if (normalized.includes('"pass"'))
13353
- return "pass";
13354
- if (normalized.includes('"fail"'))
13355
- return "fail";
13356
- }
13357
- return "skip";
13358
- }
13359
13601
  function obviousSemanticMismatch(intent, endpoint, result) {
13360
13602
  const haystack = `${intent} ${endpoint.url_template} ${endpoint.description ?? ""}`.toLowerCase();
13361
13603
  const wantsChannels = /\b(channel|channels|guild|guilds|message|messages|thread|threads|dm|chat)\b/.test(intent.toLowerCase());
@@ -13628,88 +13870,7 @@ async function inferParamsFromIntent(urlTemplate, intent, unboundParams, endpoin
13628
13870
  }
13629
13871
  }
13630
13872
  }
13631
- const system = `You extract URL query/path parameter values from a user's natural-language intent.
13632
- Given a URL template with placeholder parameters and the user's intent, return a JSON object mapping parameter names to their values.
13633
-
13634
- Rules:
13635
- - Only fill in parameters where the intent clearly implies a value
13636
- - For search/query parameters, extract the search terms from the intent
13637
- - For filter parameters (location, category, price, date, etc.), extract if mentioned
13638
- - Strip meta-phrases like "search for", "find me", "on amazon" — just return the core value
13639
- - If you can't determine a value for a parameter, omit it from the response
13640
- - Return raw values, not URL-encoded
13641
-
13642
- Examples:
13643
- URL: https://amazon.com/s?k={k}&ref={ref}
13644
- Intent: "search for wireless headphones under $50"
13645
- → {"k": "wireless headphones under $50"}
13646
-
13647
- URL: https://yelp.com/search?find_desc={find_desc}&find_loc={find_loc}
13648
- Intent: "find pizza restaurants in san francisco"
13649
- → {"find_desc": "pizza restaurants", "find_loc": "san francisco"}
13650
-
13651
- URL: https://booking.com/searchresults.html?ss={ss}&checkin={checkin}&checkout={checkout}
13652
- Intent: "hotels in tokyo for march 20 to march 25"
13653
- → {"ss": "tokyo", "checkin": "2026-03-20", "checkout": "2026-03-25"}`;
13654
- const user = `URL template: ${urlTemplate}
13655
- ${endpointDescription ? `Endpoint description: ${endpointDescription}` : ""}
13656
- Unbound parameters: ${unboundParams.join(", ")}
13657
- User intent: ${intent}
13658
-
13659
- Return JSON mapping parameter names to values. Only include parameters you can confidently fill from the intent.`;
13660
- const result = await callJsonAgent(system, user, {});
13661
- const unboundSet = new Set(unboundParams);
13662
- const filtered = {};
13663
- for (const [k, v] of Object.entries(result)) {
13664
- if (unboundSet.has(k) && v != null && v !== "") {
13665
- filtered[k] = String(v);
13666
- }
13667
- }
13668
- return filtered;
13669
- }
13670
- async function callJsonAgent(system, user, fallback) {
13671
- const providers = [
13672
- OPENAI_API_KEY ? { url: OPENAI_CHAT_URL2, key: OPENAI_API_KEY, model: JUDGE_MODEL } : null,
13673
- NEBIUS_API_KEY ? { url: CHAT_URL2, key: NEBIUS_API_KEY, model: JUDGE_MODEL } : null
13674
- ].filter((p) => !!p);
13675
- if (providers.length === 0)
13676
- return fallback;
13677
- const controller = new AbortController;
13678
- const timeout = setTimeout(() => controller.abort(), 8000);
13679
- try {
13680
- for (const provider of providers) {
13681
- const res = await fetch(provider.url, {
13682
- method: "POST",
13683
- headers: {
13684
- "content-type": "application/json",
13685
- Authorization: `Bearer ${provider.key}`
13686
- },
13687
- body: JSON.stringify({
13688
- model: provider.model,
13689
- temperature: 0,
13690
- max_tokens: 400,
13691
- response_format: { type: "json_object" },
13692
- messages: [
13693
- { role: "system", content: system },
13694
- { role: "user", content: user }
13695
- ]
13696
- }),
13697
- signal: controller.signal
13698
- });
13699
- if (!res.ok)
13700
- continue;
13701
- const json = await res.json();
13702
- const content = json.choices?.[0]?.message?.content;
13703
- if (!content)
13704
- continue;
13705
- return JSON.parse(content);
13706
- }
13707
- return fallback;
13708
- } catch {
13709
- return fallback;
13710
- } finally {
13711
- clearTimeout(timeout);
13712
- }
13873
+ return {};
13713
13874
  }
13714
13875
  async function withOpTimeout(label, ms, work) {
13715
13876
  return await Promise.race([
@@ -13858,62 +14019,21 @@ function prioritizeIntentMatchedApis(ranked, intent, contextUrl) {
13858
14019
  ...ranked.filter((candidate) => !preferredIds.has(candidate.endpoint.endpoint_id))
13859
14020
  ];
13860
14021
  }
13861
- async function agentSelectEndpoint(intent, skill, ranked, contextUrl) {
13862
- const topRanked = ranked.slice(0, 5);
13863
- const preferred = inferPreferredEntityTokens(intent);
13864
- const concreteEntityIntent = isConcreteEntityDetailIntent(intent, contextUrl);
13865
- const hasObservedCandidate = topRanked.some((r) => !/inferred from js bundle/i.test(r.endpoint.description ?? ""));
13866
- const narrowedBase = hasObservedCandidate ? topRanked.filter((r) => !/inferred from js bundle/i.test(r.endpoint.description ?? "")) : topRanked;
13867
- const hasPreferredObservedApi = concreteEntityIntent && preferred.length > 0 && narrowedBase.some((candidate) => candidateMatchesPreferredEntity(candidate, preferred) && !isDocumentLikeCandidate(candidate, contextUrl));
13868
- const narrowed = hasPreferredObservedApi ? narrowedBase.filter((candidate) => !isDocumentLikeCandidate(candidate, contextUrl)) : narrowedBase;
13869
- const top = narrowed.map((r) => ({
13870
- endpoint_id: r.endpoint.endpoint_id,
13871
- method: r.endpoint.method,
13872
- url: r.endpoint.url_template,
13873
- description: r.endpoint.description ?? "",
13874
- score: Math.round(r.score * 10) / 10,
13875
- schema: r.endpoint.response_schema ? summarizeSchema(r.endpoint.response_schema) : null,
13876
- dom_extraction: !!r.endpoint.dom_extraction,
13877
- trigger_url: r.endpoint.trigger_url ?? null
13878
- }));
13879
- const fallback = { ordered_endpoint_ids: top.map((r) => r.endpoint_id) };
13880
- const judged = await callJsonAgent("You pick the best endpoint(s) for a website task. Return JSON only.", JSON.stringify({
13881
- task: "rank_endpoints_for_execution",
13882
- intent,
13883
- domain: skill.domain,
13884
- context_url: contextUrl ?? null,
13885
- endpoints: top,
13886
- rules: [
13887
- "Prefer endpoints that directly satisfy the intent, not adjacent metadata.",
13888
- "Prefer final user-visible data over experiments, config, telemetry, auth, status, or affinity endpoints.",
13889
- "If the intent asks for channels/messages/people/documents/listings, reject endpoints that return unrelated experiments or scores.",
13890
- "Return ordered_endpoint_ids best-first. Do not invent ids."
13891
- ]
13892
- }), fallback);
13893
- const orderedRaw = judged.ordered_endpoint_ids ?? judged.endpoint_ids ?? judged.ids ?? [];
13894
- const ordered = orderedRaw.filter((id) => top.some((r) => r.endpoint_id === id));
13895
- return ordered.length > 0 ? ordered : fallback.ordered_endpoint_ids;
13896
- }
13897
- async function agentJudgeExecution(intent, endpoint, result) {
14022
+ async function agentSelectEndpoint(_intent, _skill, _ranked, _contextUrl) {
14023
+ return null;
14024
+ }
14025
+ function agentJudgeExecution(intent, endpoint, result) {
13898
14026
  if (obviousSemanticMismatch(intent, endpoint, result))
13899
14027
  return "fail";
13900
- const verdict = await callJsonAgent("You judge whether returned data satisfies a web data intent. Return JSON only.", JSON.stringify({
13901
- task: "judge_endpoint_result",
13902
- intent,
13903
- endpoint: {
13904
- endpoint_id: endpoint.endpoint_id,
13905
- method: endpoint.method,
13906
- url: endpoint.url_template,
13907
- description: endpoint.description ?? ""
13908
- },
13909
- result,
13910
- rules: [
13911
- "pass only if the returned data directly answers the intent",
13912
- "fail if the data is empty, unrelated, config, experiment, telemetry, status, auth/session, or only a weak proxy",
13913
- "for list/search intents, wrong entity type is fail"
13914
- ]
13915
- }), { verdict: "skip" });
13916
- return verdict.verdict ?? verdict.result ?? verdict.judgment ?? extractBinaryVerdict(verdict);
14028
+ if (result == null)
14029
+ return "fail";
14030
+ if (Array.isArray(result))
14031
+ return result.length > 0 ? "pass" : "fail";
14032
+ if (typeof result === "object")
14033
+ return Object.keys(result).length > 0 ? "pass" : "fail";
14034
+ if (typeof result === "string")
14035
+ return result.length > 0 ? "pass" : "fail";
14036
+ return "skip";
13917
14037
  }
13918
14038
  function normalizeParityRows(data, intent) {
13919
14039
  const projected = projectIntentData(data, intent);
@@ -13988,22 +14108,9 @@ function localParityVerdict(intent, browserBaseline, replayResult) {
13988
14108
  }
13989
14109
  return { verdict: "skip", reason: `low_overlap_${overlapRatio.toFixed(2)}` };
13990
14110
  }
13991
- async function agentJudgeParity(intent, browserBaseline, replayResult) {
13992
- const browserProjected = projectIntentData(browserBaseline, intent);
13993
- const replayProjected = projectIntentData(replayResult, intent);
13994
- const verdict = await callJsonAgent("You judge whether a replay/API result is close enough to the browser-visible result for the same web task. Return JSON only.", JSON.stringify({
13995
- task: "judge_browser_replay_parity",
13996
- intent,
13997
- browser_result: browserProjected,
13998
- replay_result: replayProjected,
13999
- rules: [
14000
- "This is a soft parity check, not strict equality.",
14001
- "Pass when the replay captures substantially the same user-visible entities or records, even if order, counts, or some fields differ.",
14002
- "Fail when the replay is a different entity type, obviously unrelated, or misses almost all visible items.",
14003
- "Skip when evidence is too sparse or ambiguous."
14004
- ]
14005
- }), { verdict: "skip" });
14006
- return verdict.verdict ?? verdict.result ?? verdict.judgment ?? extractBinaryVerdict(verdict);
14111
+ function agentJudgeParity(intent, browserBaseline, replayResult) {
14112
+ const local = localParityVerdict(intent, browserBaseline, replayResult);
14113
+ return local.verdict;
14007
14114
  }
14008
14115
  function resolveEndpointTemplateBindings(endpoint, params = {}, contextUrl) {
14009
14116
  const merged = mergeContextTemplateParams(params, endpoint.url_template, contextUrl);
@@ -14220,8 +14327,18 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
14220
14327
  url: r.endpoint.url_template.length > 120 ? r.endpoint.url_template.slice(0, 120) + "..." : r.endpoint.url_template,
14221
14328
  score: Math.round(r.score * 10) / 10,
14222
14329
  schema_summary: r.endpoint.response_schema ? summarizeSchema(r.endpoint.response_schema) : null,
14330
+ input_params: r.endpoint.semantic?.requires?.map((b) => ({
14331
+ key: b.key,
14332
+ type: b.type ?? b.semantic_type,
14333
+ required: b.required ?? false,
14334
+ example: b.example_value
14335
+ })) ?? [],
14336
+ description_in: r.endpoint.semantic?.description_in,
14337
+ example_fields: r.endpoint.semantic?.example_fields?.slice(0, 12),
14338
+ sample_values: extractSampleValues(r.endpoint.semantic?.example_response_compact),
14223
14339
  dom_extraction: !!r.endpoint.dom_extraction,
14224
- trigger_url: r.endpoint.trigger_url
14340
+ trigger_url: r.endpoint.trigger_url,
14341
+ needs_params: r.endpoint.semantic?.requires?.some((b) => b.required) ?? false
14225
14342
  })),
14226
14343
  ...extraFields
14227
14344
  },
@@ -14535,7 +14652,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
14535
14652
  continue;
14536
14653
  }
14537
14654
  const trustDomExtraction = candidate.endpoint.dom_extraction && !isCapturedPageArtifact && localAssessment.verdict !== "fail" && candidate.score >= 0;
14538
- const judged = localAssessment.verdict === "pass" || trustDomExtraction ? "pass" : await agentJudgeExecution(intent, candidate.endpoint, execOut.result);
14655
+ const judged = localAssessment.verdict === "pass" || trustDomExtraction ? "pass" : agentJudgeExecution(intent, candidate.endpoint, execOut.result);
14539
14656
  decisionTrace.autoexec_attempts.push({
14540
14657
  endpoint_id: candidate.endpoint.endpoint_id,
14541
14658
  score: Math.round(candidate.score * 10) / 10,
@@ -14845,9 +14962,104 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
14845
14962
  }
14846
14963
  }
14847
14964
  }
14965
+ const shouldBypassBrowserFirstPass = shouldBypassLiveCaptureQueue(context?.url);
14966
+ if (context?.url && !agentChoseEndpoint && !forceCapture && !shouldBypassBrowserFirstPass) {
14967
+ console.log(`[fast-path] no local cache for ${requestedDomain} — skipping marketplace, going to browser`);
14968
+ (async () => {
14969
+ try {
14970
+ const { domain_results, global_results } = await searchIntentResolve(queryIntent, requestedDomain ?? undefined, MARKETPLACE_DOMAIN_SEARCH_K, MARKETPLACE_GLOBAL_SEARCH_K);
14971
+ const totalResults = domain_results.length + global_results.length;
14972
+ if (totalResults > 0) {
14973
+ console.log(`[fast-path:bg] marketplace found ${totalResults} candidates — will be cached for next resolve`);
14974
+ }
14975
+ } catch {}
14976
+ })();
14977
+ const firstPassResult = await tryFirstPassBrowserAction(intent, params, context.url, { signal: options?.signal, clientScope: options?.client_scope });
14978
+ decisionTrace.first_pass = {
14979
+ intentClass: firstPassResult.intentClass,
14980
+ actionTaken: firstPassResult.actionTaken,
14981
+ hit: firstPassResult.hit,
14982
+ interceptedCount: firstPassResult.interceptedEntries.length,
14983
+ timeMs: firstPassResult.timeMs,
14984
+ fast_path: true
14985
+ };
14986
+ if (firstPassResult.hit && firstPassResult.miniSkill) {
14987
+ const fpNow = new Date().toISOString();
14988
+ const trace2 = {
14989
+ trace_id: nanoid7(),
14990
+ skill_id: firstPassResult.miniSkill.skill_id,
14991
+ endpoint_id: firstPassResult.miniSkill.endpoints[0]?.endpoint_id ?? "",
14992
+ started_at: fpNow,
14993
+ completed_at: fpNow,
14994
+ success: true,
14995
+ network_events: firstPassResult.interceptedEntries
14996
+ };
14997
+ return {
14998
+ result: firstPassResult.result,
14999
+ trace: trace2,
15000
+ source: "first-pass",
15001
+ skill: firstPassResult.miniSkill,
15002
+ timing: finalize("first-pass", firstPassResult.result, firstPassResult.miniSkill.skill_id, firstPassResult.miniSkill, trace2)
15003
+ };
15004
+ }
15005
+ console.log(`[fast-path] first-pass miss — opening browse session for agent`);
15006
+ if (firstPassResult.tabId && context.url) {
15007
+ const tabId = firstPassResult.tabId;
15008
+ const domain = new URL(context.url).hostname.replace(/^www\./, "");
15009
+ try {
15010
+ const { extractBrowserCookies: extractBrowserCookies2 } = await Promise.resolve().then(() => (init_browser_cookies(), exports_browser_cookies));
15011
+ const { cookies } = extractBrowserCookies2(domain);
15012
+ for (const c of cookies)
15013
+ await setCookie(tabId, c).catch(() => {});
15014
+ } catch {}
15015
+ await evaluate(tabId, (await Promise.resolve().then(() => (init_capture(), exports_capture))).INTERCEPTOR_SCRIPT).catch(() => {});
15016
+ await harStart(tabId).catch(() => {});
15017
+ try {
15018
+ const routesModule = await init_routes().then(() => exports_routes);
15019
+ if (typeof routesModule.registerBrowseSession === "function") {
15020
+ routesModule.registerBrowseSession(tabId, context.url, domain);
15021
+ }
15022
+ } catch {}
15023
+ const fpNow = new Date().toISOString();
15024
+ const trace2 = {
15025
+ trace_id: nanoid7(),
15026
+ skill_id: "browse-session",
15027
+ endpoint_id: "",
15028
+ started_at: fpNow,
15029
+ completed_at: fpNow,
15030
+ success: true
15031
+ };
15032
+ return {
15033
+ result: {
15034
+ status: "browse_session_open",
15035
+ tab_id: tabId,
15036
+ url: context.url,
15037
+ domain,
15038
+ next_step: "unbrowse snap",
15039
+ commands: [
15040
+ "unbrowse snap --filter interactive",
15041
+ "unbrowse click <ref>",
15042
+ "unbrowse fill <ref> <value>",
15043
+ "unbrowse close"
15044
+ ]
15045
+ },
15046
+ trace: trace2,
15047
+ source: "browser-action",
15048
+ skill: undefined,
15049
+ timing: finalize("browser-action", null, "browse-session", undefined, trace2)
15050
+ };
15051
+ }
15052
+ }
15053
+ const MARKETPLACE_TIMEOUT_MS = context?.url ? 5000 : 30000;
14848
15054
  if (!forceCapture) {
14849
15055
  const ts0 = Date.now();
14850
- const { domain_results: domainResults, global_results: globalResults } = await searchIntentResolve(queryIntent, requestedDomain ?? undefined, MARKETPLACE_DOMAIN_SEARCH_K, MARKETPLACE_GLOBAL_SEARCH_K).catch(() => ({
15056
+ const { domain_results: domainResults, global_results: globalResults } = await Promise.race([
15057
+ searchIntentResolve(queryIntent, requestedDomain ?? undefined, MARKETPLACE_DOMAIN_SEARCH_K, MARKETPLACE_GLOBAL_SEARCH_K),
15058
+ new Promise((resolve) => setTimeout(() => {
15059
+ console.log(`[marketplace] timeout after ${MARKETPLACE_TIMEOUT_MS}ms — falling through to browser`);
15060
+ resolve({ domain_results: [], global_results: [], skipped_global: true });
15061
+ }, MARKETPLACE_TIMEOUT_MS))
15062
+ ]).catch(() => ({
14851
15063
  domain_results: [],
14852
15064
  global_results: [],
14853
15065
  skipped_global: false
@@ -14982,7 +15194,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
14982
15194
  }
14983
15195
  } catch {}
14984
15196
  }
14985
- if (context?.url && !forceCapture) {
15197
+ if (context?.url && !forceCapture && !shouldBypassBrowserFirstPass) {
14986
15198
  const firstPassResult = await tryFirstPassBrowserAction(intent, params, context.url, { signal: options?.signal, clientScope: options?.client_scope });
14987
15199
  decisionTrace.first_pass = {
14988
15200
  intentClass: firstPassResult.intentClass,
@@ -15098,7 +15310,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
15098
15310
  }
15099
15311
  }
15100
15312
  }
15101
- const bypassLiveCaptureQueue = shouldBypassLiveCaptureQueue(context?.url);
15313
+ const bypassLiveCaptureQueue = shouldBypassBrowserFirstPass;
15102
15314
  const captureLockKey = scopedCacheKey(clientScope, captureDomain);
15103
15315
  let learned_skill;
15104
15316
  let trace;
@@ -15247,12 +15459,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
15247
15459
  function queuePassivePublishIfExecuted(skill, orchestratorResult, browserBaseline) {
15248
15460
  if (!orchestratorResult.trace.success || !orchestratorResult.trace.endpoint_id)
15249
15461
  return;
15250
- const parity = browserBaseline === undefined ? undefined : (async () => {
15251
- const local = localParityVerdict(intent, browserBaseline, orchestratorResult.result);
15252
- if (local.verdict !== "skip")
15253
- return local.verdict;
15254
- return await agentJudgeParity(intent, browserBaseline, orchestratorResult.result);
15255
- })();
15462
+ const parity = browserBaseline === undefined ? undefined : Promise.resolve(agentJudgeParity(intent, browserBaseline, orchestratorResult.result));
15256
15463
  queuePassiveSkillPublish(skill, { parity });
15257
15464
  }
15258
15465
  if (!learned_skill && !trace.success) {
@@ -15461,7 +15668,7 @@ function selectSkillIdsToHydrate(candidates, requestedDomain, limit = MARKETPLAC
15461
15668
  ];
15462
15669
  return [...new Set(prioritizedCandidates.map((c) => extractSkillId(c.metadata)).filter((value) => !!value))].slice(0, limit);
15463
15670
  }
15464
- var CONFIDENCE_THRESHOLD = 0.3, NEBIUS_API_KEY, OPENAI_API_KEY, CHAT_URL2 = "https://api.tokenfactory.nebius.com/v1/chat/completions", OPENAI_CHAT_URL2 = "https://api.openai.com/v1/chat/completions", JUDGE_MODEL, LIVE_CAPTURE_TIMEOUT_MS, BROWSER_CAPTURE_SKILL_ID = "browser-capture", capturedDomainCache, captureInFlight, captureDomainLocks, skillRouteCache, ROUTE_CACHE_FILE, SKILL_SNAPSHOT_DIR, domainSkillCache, DOMAIN_CACHE_FILE, _routeCacheDirty = false, routeCacheFlushTimer, routeResultCache, ROUTE_CACHE_TTL, MARKETPLACE_HYDRATE_LIMIT, MARKETPLACE_GET_SKILL_TIMEOUT_MS, MARKETPLACE_DOMAIN_SEARCH_K, MARKETPLACE_GLOBAL_SEARCH_K, SEARCH_INTENT_STOPWORDS, SEARCH_DIRECTIVE_PREFIX, SEARCH_TRAILING_SITE_HINT, SEARCH_INSTRUCTION_NOISE, SEARCH_PRIORITY_PATTERN;
15671
+ var CONFIDENCE_THRESHOLD = 0.3, LIVE_CAPTURE_TIMEOUT_MS, BROWSER_CAPTURE_SKILL_ID = "browser-capture", capturedDomainCache, captureInFlight, captureDomainLocks, skillRouteCache, ROUTE_CACHE_FILE, SKILL_SNAPSHOT_DIR, domainSkillCache, DOMAIN_CACHE_FILE, _routeCacheDirty = false, routeCacheFlushTimer, routeResultCache, ROUTE_CACHE_TTL, MARKETPLACE_HYDRATE_LIMIT, MARKETPLACE_GET_SKILL_TIMEOUT_MS, MARKETPLACE_DOMAIN_SEARCH_K, MARKETPLACE_GLOBAL_SEARCH_K, SEARCH_INTENT_STOPWORDS, SEARCH_DIRECTIVE_PREFIX, SEARCH_TRAILING_SITE_HINT, SEARCH_INSTRUCTION_NOISE, SEARCH_PRIORITY_PATTERN;
15465
15672
  var init_orchestrator = __esm(async () => {
15466
15673
  init_client2();
15467
15674
  init_client();
@@ -15483,9 +15690,6 @@ var init_orchestrator = __esm(async () => {
15483
15690
  init_execution(),
15484
15691
  init_prefetch()
15485
15692
  ]);
15486
- NEBIUS_API_KEY = process.env.NEBIUS_API_KEY ?? "";
15487
- OPENAI_API_KEY = process.env.OPENAI_API_KEY ?? "";
15488
- JUDGE_MODEL = process.env.UNBROWSE_AGENT_JUDGE_MODEL ?? "gpt-4.1-mini";
15489
15693
  LIVE_CAPTURE_TIMEOUT_MS = Number(process.env.UNBROWSE_LIVE_CAPTURE_TIMEOUT_MS ?? "120000");
15490
15694
  capturedDomainCache = new Map;
15491
15695
  captureInFlight = new Map;
@@ -16433,18 +16637,128 @@ async function registerRoutes(app) {
16433
16637
  const { endpoints: reviews } = req.body;
16434
16638
  if (!reviews?.length)
16435
16639
  return reply.code(400).send({ error: "endpoints[] required" });
16436
- const skill = getRecentLocalSkill(skill_id, clientScope) ?? await getSkill2(skill_id, clientScope);
16640
+ let skill = getRecentLocalSkill(skill_id, clientScope);
16641
+ if (!skill) {
16642
+ for (const [, entry] of domainSkillCache) {
16643
+ if (entry.skillId === skill_id && entry.localSkillPath) {
16644
+ try {
16645
+ skill = JSON.parse(__require("fs").readFileSync(entry.localSkillPath, "utf-8"));
16646
+ } catch {}
16647
+ break;
16648
+ }
16649
+ }
16650
+ }
16651
+ if (!skill)
16652
+ skill = await getSkill2(skill_id, clientScope);
16437
16653
  if (!skill)
16438
16654
  return reply.code(404).send({ error: "Skill not found" });
16439
16655
  const updated = mergeAgentReview(skill.endpoints, reviews);
16440
16656
  skill.endpoints = updated;
16441
16657
  skill.updated_at = new Date().toISOString();
16658
+ try {
16659
+ cachePublishedSkill(skill);
16660
+ } catch {}
16661
+ const domain = skill.domain;
16662
+ if (domain) {
16663
+ const revCacheKey = buildResolveCacheKey(domain, skill.intent_signature ?? `browse ${domain}`, undefined);
16664
+ const revScopedKey = scopedCacheKey(clientScope, revCacheKey);
16665
+ writeSkillSnapshot(revScopedKey, skill);
16666
+ const revDomainKey = getDomainReuseKey(domain);
16667
+ if (revDomainKey) {
16668
+ domainSkillCache.set(revDomainKey, {
16669
+ skillId: skill.skill_id,
16670
+ localSkillPath: snapshotPathForCacheKey(revScopedKey),
16671
+ ts: Date.now()
16672
+ });
16673
+ persistDomainCache();
16674
+ }
16675
+ }
16442
16676
  try {
16443
16677
  await publishSkill2(skill);
16444
- return reply.send({ ok: true, endpoints_updated: reviews.length });
16445
- } catch (err) {
16446
- return reply.code(500).send({ error: err.message });
16678
+ } catch {}
16679
+ try {
16680
+ cachePublishedSkill(skill);
16681
+ } catch {}
16682
+ return reply.send({ ok: true, endpoints_updated: reviews.length });
16683
+ });
16684
+ app.post("/v1/skills/:skill_id/publish", async (req, reply) => {
16685
+ const clientScope = clientScopeFor(req);
16686
+ const { skill_id } = req.params;
16687
+ const { endpoints: reviews } = req.body ?? {};
16688
+ let skill = getRecentLocalSkill(skill_id, clientScope);
16689
+ if (!skill) {
16690
+ for (const [, entry] of domainSkillCache) {
16691
+ if (entry.skillId === skill_id && entry.localSkillPath) {
16692
+ try {
16693
+ skill = JSON.parse(__require("fs").readFileSync(entry.localSkillPath, "utf-8"));
16694
+ } catch {}
16695
+ break;
16696
+ }
16697
+ }
16698
+ }
16699
+ if (!skill)
16700
+ skill = await getSkill2(skill_id, clientScope);
16701
+ if (!skill)
16702
+ return reply.code(404).send({ error: "Skill not found" });
16703
+ if (reviews?.length) {
16704
+ const updated = mergeAgentReview(skill.endpoints, reviews);
16705
+ skill.endpoints = updated;
16706
+ skill.updated_at = new Date().toISOString();
16707
+ try {
16708
+ cachePublishedSkill(skill);
16709
+ } catch {}
16710
+ const domain = skill.domain;
16711
+ if (domain) {
16712
+ const ck = buildResolveCacheKey(domain, skill.intent_signature ?? `browse ${domain}`, undefined);
16713
+ const sk = scopedCacheKey(clientScope, ck);
16714
+ writeSkillSnapshot(sk, skill);
16715
+ const dk = getDomainReuseKey(domain);
16716
+ if (dk) {
16717
+ domainSkillCache.set(dk, {
16718
+ skillId: skill.skill_id,
16719
+ localSkillPath: snapshotPathForCacheKey(sk),
16720
+ ts: Date.now()
16721
+ });
16722
+ persistDomainCache();
16723
+ }
16724
+ }
16725
+ try {
16726
+ await publishSkill2(skill);
16727
+ } catch {}
16728
+ try {
16729
+ cachePublishedSkill(skill);
16730
+ } catch {}
16731
+ return reply.send({
16732
+ ok: true,
16733
+ skill_id: skill.skill_id,
16734
+ endpoints_updated: reviews.length,
16735
+ published: true
16736
+ });
16447
16737
  }
16738
+ const ranked = rankEndpoints(skill.endpoints, skill.intent_signature, skill.domain);
16739
+ const endpoints_to_describe = ranked.map((r) => ({
16740
+ endpoint_id: r.endpoint.endpoint_id,
16741
+ method: r.endpoint.method,
16742
+ url: r.endpoint.url_template.length > 120 ? r.endpoint.url_template.slice(0, 120) + "..." : r.endpoint.url_template,
16743
+ current_description: r.endpoint.description ?? "",
16744
+ schema_summary: r.endpoint.response_schema ? summarizeSchema(r.endpoint.response_schema) : null,
16745
+ sample_values: extractSampleValues(r.endpoint.semantic?.example_response_compact),
16746
+ input_params: r.endpoint.semantic?.requires?.map((b) => ({
16747
+ key: b.key,
16748
+ type: b.type ?? b.semantic_type,
16749
+ required: b.required ?? false,
16750
+ example: b.example_value
16751
+ })) ?? [],
16752
+ dom_extraction: !!r.endpoint.dom_extraction,
16753
+ _fill_description: "DESCRIBE THIS ENDPOINT — what it returns, key params, action type"
16754
+ }));
16755
+ return reply.send({
16756
+ skill_id: skill.skill_id,
16757
+ domain: skill.domain,
16758
+ endpoint_count: skill.endpoints.length,
16759
+ endpoints_to_describe,
16760
+ _next_step: `Fill each endpoint's description, then call: unbrowse publish --skill ${skill.skill_id} --endpoints '[{endpoint_id, description, action_kind, resource_kind}]'`
16761
+ });
16448
16762
  });
16449
16763
  app.post("/v1/skills/:skill_id/chunk", async (req, reply) => {
16450
16764
  const clientScope = clientScopeFor(req);
@@ -16660,7 +16974,7 @@ async function registerRoutes(app) {
16660
16974
  await start().catch(() => {});
16661
16975
  const tabId = await newTab();
16662
16976
  await harStart(tabId).catch(() => {});
16663
- await evaluate(tabId, INTERCEPTOR_SCRIPT).catch(() => {});
16977
+ await injectInterceptor(tabId);
16664
16978
  const session = { tabId, url: "about:blank", harActive: true, domain: "" };
16665
16979
  browseSessions.set("default", session);
16666
16980
  return session;
@@ -16700,7 +17014,7 @@ async function registerRoutes(app) {
16700
17014
  const finalUrl = await getCurrentUrl(session.tabId).catch(() => url);
16701
17015
  session.url = typeof finalUrl === "string" && finalUrl.startsWith("http") ? finalUrl : url;
16702
17016
  session.domain = profileName(session.url);
16703
- await evaluate(session.tabId, INTERCEPTOR_SCRIPT).catch(() => {});
17017
+ await injectInterceptor(session.tabId);
16704
17018
  return reply.send({ ok: true, url: session.url, tab_id: session.tabId, auth_profile: session.domain });
16705
17019
  });
16706
17020
  app.post("/v1/browse/snap", async (req, reply) => {
@@ -16901,6 +17215,88 @@ async function registerRoutes(app) {
16901
17215
  invalidateRouteCacheForDomain(domain);
16902
17216
  console.log(`[passive-index] ${domain}: ${mergedEps.length} endpoints cached synchronously`);
16903
17217
  }
17218
+ } else {
17219
+ let domain2;
17220
+ try {
17221
+ domain2 = new URL(session.url).hostname;
17222
+ } catch {
17223
+ domain2 = session.domain;
17224
+ }
17225
+ try {
17226
+ const html = await getPageHtml(session.tabId);
17227
+ if (html && typeof html === "string" && html.startsWith("<")) {
17228
+ const { extractFromDOM: extractFromDOM2 } = await Promise.resolve().then(() => (init_extraction(), exports_extraction));
17229
+ const { detectSearchForms: detectSearchForms2, isStructuredSearchForm: isStructuredSearchForm2 } = await Promise.resolve().then(() => (init_search_forms(), exports_search_forms));
17230
+ const { inferSchema: inferSchema2 } = await Promise.resolve().then(() => (init_transform(), exports_transform));
17231
+ const { inferEndpointSemantic: inferEndpointSemantic2 } = await Promise.resolve().then(() => (init_graph(), exports_graph));
17232
+ const { templatizeQueryParams: templatizeQueryParams2 } = await init_execution().then(() => exports_execution);
17233
+ const extracted = extractFromDOM2(html, `browse ${domain2}`);
17234
+ const searchForms = detectSearchForms2(html);
17235
+ const validForm = searchForms.find((s) => isStructuredSearchForm2(s));
17236
+ if (extracted.data || validForm) {
17237
+ const urlTemplate = templatizeQueryParams2(session.url);
17238
+ const ep = {
17239
+ endpoint_id: nanoid8(),
17240
+ method: "GET",
17241
+ url_template: urlTemplate,
17242
+ idempotency: "safe",
17243
+ verification_status: "verified",
17244
+ reliability_score: extracted.confidence ?? 0.7,
17245
+ description: validForm ? `Search form for ${domain2}` : `Page content from ${domain2}`,
17246
+ response_schema: extracted.data ? inferSchema2([extracted.data]) : undefined,
17247
+ dom_extraction: {
17248
+ extraction_method: extracted.extraction_method ?? "repeated-elements",
17249
+ confidence: extracted.confidence ?? 0.7,
17250
+ ...extracted.selector ? { selector: extracted.selector } : {},
17251
+ ...validForm ? { search_form: validForm } : {}
17252
+ },
17253
+ trigger_url: session.url
17254
+ };
17255
+ ep.semantic = inferEndpointSemantic2(ep, {
17256
+ sampleResponse: extracted.data,
17257
+ observedAt: new Date().toISOString(),
17258
+ sampleRequestUrl: session.url
17259
+ });
17260
+ const existing = findExistingSkillForDomain(domain2);
17261
+ const allEps = existing ? mergeEndpoints(existing.endpoints, [ep]) : [ep];
17262
+ for (const e of allEps) {
17263
+ if (!e.description)
17264
+ e.description = generateLocalDescription(e);
17265
+ }
17266
+ const skill = {
17267
+ skill_id: existing?.skill_id ?? nanoid8(),
17268
+ version: "1.0.0",
17269
+ schema_version: "1",
17270
+ lifecycle: "active",
17271
+ execution_type: "http",
17272
+ created_at: existing?.created_at ?? new Date().toISOString(),
17273
+ updated_at: new Date().toISOString(),
17274
+ name: domain2,
17275
+ intent_signature: `browse ${domain2}`,
17276
+ domain: domain2,
17277
+ description: `DOM skill for ${domain2}`,
17278
+ owner_type: "agent",
17279
+ endpoints: allEps,
17280
+ intents: [...new Set([...existing?.intents ?? [], `browse ${domain2}`])]
17281
+ };
17282
+ const ck = buildResolveCacheKey(domain2, `browse ${domain2}`, session.url);
17283
+ const sk = scopedCacheKey("global", ck);
17284
+ writeSkillSnapshot(sk, skill);
17285
+ const dk = getDomainReuseKey(session.url ?? domain2);
17286
+ if (dk) {
17287
+ domainSkillCache.set(dk, { skillId: skill.skill_id, localSkillPath: snapshotPathForCacheKey(sk), ts: Date.now() });
17288
+ persistDomainCache();
17289
+ }
17290
+ try {
17291
+ cachePublishedSkill(skill);
17292
+ } catch {}
17293
+ invalidateRouteCacheForDomain(domain2);
17294
+ console.log(`[close] ${domain2}: DOM endpoint created (form=${!!validForm})`);
17295
+ }
17296
+ }
17297
+ } catch (err) {
17298
+ console.log(`[close] DOM fallback failed: ${err instanceof Error ? err.message : err}`);
17299
+ }
16904
17300
  }
16905
17301
  }
16906
17302
  passiveIndexFromRequests(allRequests, session.url);
@@ -16952,7 +17348,7 @@ __export(exports_server, {
16952
17348
  startUnbrowseServer: () => startUnbrowseServer,
16953
17349
  installServerExitCleanup: () => installServerExitCleanup
16954
17350
  });
16955
- import { execSync as execSync3 } from "node:child_process";
17351
+ import { execSync as execSync2 } from "node:child_process";
16956
17352
  import { mkdirSync as mkdirSync10, unlinkSync, writeFileSync as writeFileSync9 } from "node:fs";
16957
17353
  import path5 from "node:path";
16958
17354
  import Fastify from "fastify";
@@ -16982,7 +17378,7 @@ async function startUnbrowseServer(options = {}) {
16982
17378
  const pidFile = options.pidFile ?? process.env.UNBROWSE_PID_FILE;
16983
17379
  updatePidFile(pidFile, host, port);
16984
17380
  try {
16985
- execSync3("pkill -f chrome-headless-shell", { stdio: "ignore" });
17381
+ execSync2("pkill -f chrome-headless-shell", { stdio: "ignore" });
16986
17382
  } catch {}
16987
17383
  await ensureRegistered2();
16988
17384
  const app = Fastify({ logger: options.logger ?? true });
@@ -17030,6 +17426,21 @@ var API_URL = process.env.UNBROWSE_BACKEND_URL || "https://beta-api.unbrowse.ai"
17030
17426
  var PROFILE_NAME = sanitizeProfileName(process.env.UNBROWSE_PROFILE ?? "");
17031
17427
  var recentLocalSkills = new Map;
17032
17428
  var LOCAL_ONLY = process.env.UNBROWSE_LOCAL_ONLY === "1";
17429
+ function decodeBase64Json(value) {
17430
+ try {
17431
+ if (typeof globalThis !== "undefined" && typeof globalThis.atob === "function") {
17432
+ const binary = globalThis.atob(value);
17433
+ const bytes = new Uint8Array(binary.length);
17434
+ for (let i = 0;i < binary.length; i++) {
17435
+ bytes[i] = binary.charCodeAt(i);
17436
+ }
17437
+ return JSON.parse(new TextDecoder("utf-8").decode(bytes));
17438
+ }
17439
+ return JSON.parse(Buffer.from(value, "base64").toString("utf8"));
17440
+ } catch {
17441
+ return;
17442
+ }
17443
+ }
17033
17444
  function getConfigDir() {
17034
17445
  if (process.env.UNBROWSE_CONFIG_DIR)
17035
17446
  return process.env.UNBROWSE_CONFIG_DIR;
@@ -17084,6 +17495,7 @@ function getApiKey() {
17084
17495
  return "";
17085
17496
  }
17086
17497
  var API_TIMEOUT_MS = parseInt(process.env.UNBROWSE_API_TIMEOUT ?? "8000", 10);
17498
+ var PUBLISH_TIMEOUT_MS = parseInt(process.env.UNBROWSE_PUBLISH_TIMEOUT ?? "30000", 10);
17087
17499
  async function validateApiKey(key) {
17088
17500
  const controller = new AbortController;
17089
17501
  const timer = setTimeout(() => controller.abort(), API_TIMEOUT_MS);
@@ -17144,7 +17556,7 @@ async function findUsableApiKey() {
17144
17556
  async function api(method, path, body, opts) {
17145
17557
  const key = opts?.noAuth ? "" : getApiKey();
17146
17558
  const controller = new AbortController;
17147
- const timer = setTimeout(() => controller.abort(), API_TIMEOUT_MS);
17559
+ const timer = setTimeout(() => controller.abort(), opts?.timeoutMs ?? API_TIMEOUT_MS);
17148
17560
  let res;
17149
17561
  try {
17150
17562
  res = await fetch(`${API_URL}${path}`, {
@@ -17173,8 +17585,9 @@ async function api(method, path, body, opts) {
17173
17585
  throw new Error("ToS update required. Restart unbrowse to accept new terms.");
17174
17586
  }
17175
17587
  if (res.status === 402) {
17176
- const paymentTerms = res.headers.get("X-Payment-Required");
17177
- const terms = paymentTerms ? JSON.parse(paymentTerms) : data.terms;
17588
+ const paymentRequired = res.headers.get("PAYMENT-REQUIRED");
17589
+ const legacyPaymentTerms = res.headers.get("X-Payment-Required");
17590
+ const terms = paymentRequired ? decodeBase64Json(paymentRequired) : legacyPaymentTerms ? JSON.parse(legacyPaymentTerms) : data.terms;
17178
17591
  const err = new Error(`Payment required: ${data.error ?? "This skill requires payment"}`);
17179
17592
  err.x402 = true;
17180
17593
  err.terms = terms;
@@ -18122,6 +18535,81 @@ async function cmdResolve(flags) {
18122
18535
  }
18123
18536
  output(result, !!flags.pretty);
18124
18537
  }
18538
+ function drillPath(data, path9) {
18539
+ const segments = path9.split(/\./).flatMap((s) => {
18540
+ const m = s.match(/^(.+)\[\]$/);
18541
+ return m ? [m[1], "[]"] : [s];
18542
+ });
18543
+ let values = [data];
18544
+ for (const seg of segments) {
18545
+ if (values.length === 0)
18546
+ return [];
18547
+ if (seg === "[]") {
18548
+ values = values.flatMap((v) => Array.isArray(v) ? v : [v]);
18549
+ continue;
18550
+ }
18551
+ values = values.flatMap((v) => {
18552
+ if (v == null)
18553
+ return [];
18554
+ if (Array.isArray(v)) {
18555
+ return v.map((item) => item?.[seg]).filter((x) => x !== undefined);
18556
+ }
18557
+ if (typeof v === "object") {
18558
+ const val = v[seg];
18559
+ return val !== undefined ? [val] : [];
18560
+ }
18561
+ return [];
18562
+ });
18563
+ }
18564
+ return values;
18565
+ }
18566
+ function resolveDotPath(obj, path9) {
18567
+ let cur = obj;
18568
+ for (const key of path9.split(".")) {
18569
+ if (cur == null || typeof cur !== "object")
18570
+ return;
18571
+ cur = cur[key];
18572
+ }
18573
+ return cur;
18574
+ }
18575
+ function applyExtract(items, extractSpec) {
18576
+ const fields = extractSpec.split(",").map((f) => {
18577
+ const colon = f.indexOf(":");
18578
+ if (colon > 0)
18579
+ return { alias: f.slice(0, colon), path: f.slice(colon + 1) };
18580
+ return { alias: f, path: f };
18581
+ });
18582
+ return items.map((item) => {
18583
+ const row = {};
18584
+ let hasValue = false;
18585
+ for (const { alias, path: path9 } of fields) {
18586
+ const val = resolveDotPath(item, path9);
18587
+ row[alias] = val ?? null;
18588
+ if (val != null)
18589
+ hasValue = true;
18590
+ }
18591
+ return hasValue ? row : null;
18592
+ }).filter((row) => row !== null);
18593
+ }
18594
+ function schemaOf(value, depth = 4) {
18595
+ if (value == null)
18596
+ return "null";
18597
+ if (Array.isArray(value)) {
18598
+ if (value.length === 0)
18599
+ return ["unknown"];
18600
+ return [schemaOf(value[0], depth - 1)];
18601
+ }
18602
+ if (typeof value === "object") {
18603
+ if (depth <= 0)
18604
+ return "object";
18605
+ const out = {};
18606
+ for (const [k, v] of Object.entries(value)) {
18607
+ out[k] = schemaOf(v, depth - 1);
18608
+ }
18609
+ return out;
18610
+ }
18611
+ return typeof value;
18612
+ }
18125
18613
  async function cmdExecute(flags) {
18126
18614
  const skillId = flags.skill;
18127
18615
  if (!skillId)
@@ -18146,6 +18634,44 @@ async function cmdExecute(flags) {
18146
18634
  body.projection = { raw: true };
18147
18635
  let result = await withPendingNotice(api3("POST", `/v1/skills/${skillId}/execute`, body), "Still working. This endpoint may require browser replay or first-time auth/capture setup.");
18148
18636
  result = slimTrace(result);
18637
+ const pathFlag = flags.path;
18638
+ const extractFlag = flags.extract;
18639
+ const limitFlag = flags.limit ? Number(flags.limit) : undefined;
18640
+ const schemaFlag = !!flags.schema;
18641
+ const rawFlag = !!flags.raw;
18642
+ if (schemaFlag && !rawFlag) {
18643
+ const data = result.result;
18644
+ output({ trace: result.trace, schema: schemaOf(data) }, !!flags.pretty);
18645
+ return;
18646
+ }
18647
+ if (!rawFlag && (pathFlag || extractFlag || limitFlag)) {
18648
+ let data = pathFlag ? drillPath(result.result, pathFlag) : result.result;
18649
+ const items = Array.isArray(data) ? data : data != null ? [data] : [];
18650
+ const extracted = extractFlag ? applyExtract(items, extractFlag) : items;
18651
+ const limited = limitFlag ? extracted.slice(0, limitFlag) : extracted;
18652
+ const trace = result.trace;
18653
+ const out = { trace: result.trace, data: limited, count: limited.length };
18654
+ if (trace?.skill_id && trace?.endpoint_id && limited.length > 0) {
18655
+ out._review_hint = `After presenting results, improve this endpoint's description: unbrowse review --skill ${trace.skill_id} --endpoints '[{"endpoint_id":"${trace.endpoint_id}","description":"DESCRIBE WHAT THIS RETURNS","action_kind":"ACTION","resource_kind":"RESOURCE"}]'`;
18656
+ }
18657
+ output(out, !!flags.pretty);
18658
+ return;
18659
+ }
18660
+ if (!rawFlag && !pathFlag && !extractFlag && !schemaFlag) {
18661
+ const raw = JSON.stringify(result.result);
18662
+ if (raw && raw.length > 2048) {
18663
+ const schema = schemaOf(result.result);
18664
+ output({
18665
+ trace: result.trace,
18666
+ extraction_hints: {
18667
+ message: "Response is large. Use --path/--extract/--limit to filter, or --schema to see structure, or --raw for full response.",
18668
+ schema_tree: schema,
18669
+ response_bytes: raw.length
18670
+ }
18671
+ }, !!flags.pretty);
18672
+ return;
18673
+ }
18674
+ }
18149
18675
  output(result, !!flags.pretty);
18150
18676
  }
18151
18677
  async function cmdFeedback(flags) {
@@ -18165,6 +18691,32 @@ async function cmdFeedback(flags) {
18165
18691
  body.diagnostics = JSON.parse(flags.diagnostics);
18166
18692
  output(await api3("POST", "/v1/feedback", body), !!flags.pretty);
18167
18693
  }
18694
+ async function cmdReview(flags) {
18695
+ const skillId = flags.skill;
18696
+ if (!skillId)
18697
+ die("--skill is required");
18698
+ const endpointsJson = flags.endpoints;
18699
+ if (!endpointsJson)
18700
+ die("--endpoints is required (JSON array of {endpoint_id, description?, action_kind?, resource_kind?})");
18701
+ const endpoints = JSON.parse(endpointsJson);
18702
+ if (!Array.isArray(endpoints) || endpoints.length === 0)
18703
+ die("--endpoints must be a non-empty JSON array");
18704
+ output(await api3("POST", `/v1/skills/${skillId}/review`, { endpoints }), !!flags.pretty);
18705
+ }
18706
+ async function cmdPublish(flags) {
18707
+ const skillId = flags.skill;
18708
+ if (!skillId)
18709
+ die("--skill is required");
18710
+ const endpointsJson = flags.endpoints;
18711
+ if (endpointsJson) {
18712
+ const endpoints = JSON.parse(endpointsJson);
18713
+ if (!Array.isArray(endpoints) || endpoints.length === 0)
18714
+ die("--endpoints must be a non-empty JSON array");
18715
+ output(await api3("POST", `/v1/skills/${skillId}/publish`, { endpoints }), !!flags.pretty);
18716
+ } else {
18717
+ output(await api3("POST", `/v1/skills/${skillId}/publish`, {}), !!flags.pretty);
18718
+ }
18719
+ }
18168
18720
  async function cmdLogin(flags) {
18169
18721
  const url = flags.url;
18170
18722
  if (!url)
@@ -18240,9 +18792,11 @@ var CLI_REFERENCE = {
18240
18792
  commands: [
18241
18793
  { name: "health", usage: "", desc: "Server health check" },
18242
18794
  { name: "setup", usage: "[--opencode auto|global|project|off] [--no-start]", desc: "Bootstrap browser deps + Open Code command" },
18243
- { name: "resolve", usage: '--intent "..." --url "..." [opts]', desc: "Resolve intent \u2192 find skill + execute" },
18795
+ { name: "resolve", usage: '--intent "..." --url "..." [opts]', desc: "Resolve intent \u2192 search/capture/execute" },
18244
18796
  { name: "execute", usage: "--skill ID --endpoint ID [opts]", desc: "Execute a specific endpoint" },
18245
18797
  { name: "feedback", usage: "--skill ID --endpoint ID --rating N", desc: "Submit feedback (mandatory after resolve)" },
18798
+ { name: "review", usage: "--skill ID --endpoints '[...]'", desc: "Push reviewed descriptions/metadata back to skill" },
18799
+ { name: "publish", usage: "--skill ID [--endpoints '[...]']", desc: "Describe + publish skill to marketplace (two-phase)" },
18246
18800
  { name: "login", usage: '--url "..."', desc: "Interactive browser login" },
18247
18801
  { name: "skills", usage: "", desc: "List all skills" },
18248
18802
  { name: "skill", usage: "<id>", desc: "Get skill details" },
@@ -18268,11 +18822,15 @@ var CLI_REFERENCE = {
18268
18822
  globalFlags: [
18269
18823
  { flag: "--pretty", desc: "Indented JSON output" },
18270
18824
  { flag: "--no-auto-start", desc: "Don't auto-start server" },
18825
+ { flag: "--raw", desc: "Return raw response data (skip server-side projection)" },
18271
18826
  { flag: "--skip-browser", desc: "setup: skip browser-engine install" },
18272
18827
  { flag: "--opencode auto|global|project|off", desc: "setup: install /unbrowse command for Open Code" }
18273
18828
  ],
18274
18829
  resolveExecuteFlags: [
18275
- { flag: "--execute", desc: "Auto-pick best endpoint and return data (resolve only)" },
18830
+ { flag: "--schema", desc: "Show response schema + extraction hints only (no data)" },
18831
+ { flag: '--path "data.items[]"', desc: "Drill into result before extract/output" },
18832
+ { flag: '--extract "field1,alias:deep.path.to.val"', desc: "Pick specific fields (no piping needed)" },
18833
+ { flag: "--limit N", desc: "Cap array output to N items" },
18276
18834
  { flag: "--endpoint-id ID", desc: "Pick a specific endpoint" },
18277
18835
  { flag: "--dry-run", desc: "Preview mutations" },
18278
18836
  { flag: "--force-capture", desc: "Bypass caches, re-capture" },
@@ -18283,7 +18841,12 @@ var CLI_REFERENCE = {
18283
18841
  'unbrowse resolve --intent "top stories" --url "https://news.ycombinator.com" --execute',
18284
18842
  'unbrowse resolve --intent "get timeline" --url "https://x.com"',
18285
18843
  "unbrowse execute --skill abc --endpoint def --pretty",
18286
- "unbrowse feedback --skill abc --endpoint def --rating 5"
18844
+ "unbrowse execute --skill abc --endpoint def --schema --pretty",
18845
+ 'unbrowse execute --skill abc --endpoint def --path "data.items[]" --extract "name,url" --limit 10 --pretty',
18846
+ "unbrowse feedback --skill abc --endpoint def --rating 5",
18847
+ `unbrowse review --skill abc --endpoints '[{"endpoint_id":"def","description":"..."}]'`,
18848
+ "unbrowse publish --skill abc --pretty",
18849
+ `unbrowse publish --skill abc --endpoints '[{"endpoint_id":"def","description":"Search court judgments by keywords","action_kind":"search","resource_kind":"judgment"}]'`
18287
18850
  ]
18288
18851
  };
18289
18852
  function printHelp() {
@@ -18337,9 +18900,9 @@ function cmdStop(flags) {
18337
18900
  }
18338
18901
  async function cmdUpgrade(flags) {
18339
18902
  info("Checking for updates...");
18340
- const { execSync: execSync4 } = await import("child_process");
18903
+ const { execSync: execSync3 } = await import("child_process");
18341
18904
  try {
18342
- const result = execSync4("npm view unbrowse version", { encoding: "utf-8", timeout: 1e4 }).trim();
18905
+ const result = execSync3("npm view unbrowse version", { encoding: "utf-8", timeout: 1e4 }).trim();
18343
18906
  const versionInfo = checkServerVersion(BASE_URL, import.meta.url);
18344
18907
  const installed = versionInfo?.installed ?? "unknown";
18345
18908
  if (result === installed) {
@@ -18548,7 +19111,7 @@ async function cmdClose() {
18548
19111
  output(await api3("POST", "/v1/browse/close"), false);
18549
19112
  }
18550
19113
  async function cmdConnectChrome() {
18551
- const { execSync: execSync4, spawn: spawnProc } = __require("child_process");
19114
+ const { execSync: execSync3, spawn: spawnProc } = __require("child_process");
18552
19115
  try {
18553
19116
  const res = await fetch("http://127.0.0.1:9222/json/version", { signal: AbortSignal.timeout(1000) });
18554
19117
  if (res.ok) {
@@ -18561,16 +19124,16 @@ async function cmdConnectChrome() {
18561
19124
  }
18562
19125
  } catch {}
18563
19126
  try {
18564
- execSync4("pkill -f kuri/chrome-profile", { stdio: "ignore" });
19127
+ execSync3("pkill -f kuri/chrome-profile", { stdio: "ignore" });
18565
19128
  } catch {}
18566
19129
  console.log("Quitting Chrome to relaunch with remote debugging...");
18567
19130
  if (process.platform === "darwin") {
18568
19131
  try {
18569
- execSync4('osascript -e "quit app \\"Google Chrome\\""', { stdio: "ignore", timeout: 5000 });
19132
+ execSync3('osascript -e "quit app \\"Google Chrome\\""', { stdio: "ignore", timeout: 5000 });
18570
19133
  } catch {}
18571
19134
  } else {
18572
19135
  try {
18573
- execSync4("pkill -f chrome", { stdio: "ignore" });
19136
+ execSync3("pkill -f chrome", { stdio: "ignore" });
18574
19137
  } catch {}
18575
19138
  }
18576
19139
  await new Promise((r) => setTimeout(r, 2000));
@@ -18626,6 +19189,8 @@ async function main() {
18626
19189
  "exec",
18627
19190
  "feedback",
18628
19191
  "fb",
19192
+ "review",
19193
+ "publish",
18629
19194
  "login",
18630
19195
  "skills",
18631
19196
  "skill",
@@ -18686,6 +19251,10 @@ async function main() {
18686
19251
  case "feedback":
18687
19252
  case "fb":
18688
19253
  return cmdFeedback(flags);
19254
+ case "review":
19255
+ return cmdReview(flags);
19256
+ case "publish":
19257
+ return cmdPublish(flags);
18689
19258
  case "login":
18690
19259
  return cmdLogin(flags);
18691
19260
  case "skills":