unbrowse 2.8.6 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -721,6 +721,51 @@ var init_client = __esm(() => {
721
721
  });
722
722
 
723
723
  // ../../src/transform/index.ts
724
+ var exports_transform = {};
725
+ __export(exports_transform, {
726
+ resolvePath: () => resolvePath,
727
+ project: () => project,
728
+ inferSchema: () => inferSchema,
729
+ detectEntityIndex: () => detectEntityIndex,
730
+ compact: () => compact,
731
+ buildEntityIndex: () => buildEntityIndex,
732
+ applyProjection: () => applyProjection
733
+ });
734
+ function buildEntityIndex(items) {
735
+ const index = new Map;
736
+ for (const item of items) {
737
+ if (item != null && typeof item === "object") {
738
+ const urn = item.entityUrn;
739
+ if (typeof urn === "string")
740
+ index.set(urn, item);
741
+ }
742
+ }
743
+ return index;
744
+ }
745
+ function detectEntityIndex(data) {
746
+ if (data == null || typeof data !== "object")
747
+ return null;
748
+ const candidates = [];
749
+ const obj = data;
750
+ if (Array.isArray(obj.included))
751
+ candidates.push(obj.included);
752
+ if (obj.data && typeof obj.data === "object") {
753
+ const d = obj.data;
754
+ if (Array.isArray(d.included))
755
+ candidates.push(d.included);
756
+ }
757
+ for (const arr of candidates) {
758
+ const items = arr;
759
+ if (items.length < 2)
760
+ continue;
761
+ const sample = items.slice(0, 5);
762
+ const hasUrns = sample.filter((i) => i != null && typeof i === "object" && typeof i.entityUrn === "string").length;
763
+ if (hasUrns >= sample.length * 0.5) {
764
+ return buildEntityIndex(items);
765
+ }
766
+ }
767
+ return null;
768
+ }
724
769
  function resolvePath(obj, path4, entityIndex) {
725
770
  const parts = path4.split(".");
726
771
  let current = [obj];
@@ -1091,6 +1136,20 @@ function mergeContextTemplateParams(params, urlTemplate, contextUrl) {
1091
1136
  }
1092
1137
 
1093
1138
  // ../../src/graph/index.ts
1139
+ var exports_graph = {};
1140
+ __export(exports_graph, {
1141
+ toAgentSkillChunkView: () => toAgentSkillChunkView,
1142
+ resolveEndpointSemantic: () => resolveEndpointSemantic,
1143
+ operationSoftPenalty: () => operationSoftPenalty,
1144
+ knownBindingsFromInputs: () => knownBindingsFromInputs,
1145
+ isRunnable: () => isRunnable,
1146
+ isOperationHardExcluded: () => isOperationHardExcluded,
1147
+ inferEndpointSemantic: () => inferEndpointSemantic,
1148
+ getSkillChunk: () => getSkillChunk,
1149
+ ensureSkillOperationGraph: () => ensureSkillOperationGraph,
1150
+ computeReachableEndpoints: () => computeReachableEndpoints,
1151
+ buildSkillOperationGraph: () => buildSkillOperationGraph
1152
+ });
1094
1153
  function normalizeTokenText(text) {
1095
1154
  return text.replace(/([a-z0-9])([A-Z])/g, "$1 $2").replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2").replace(/([a-zA-Z])(\d)/g, "$1 $2").replace(/(\d)([a-zA-Z])/g, "$1 $2");
1096
1155
  }
@@ -2203,6 +2262,12 @@ function extractRscDataEndpoints(body) {
2203
2262
  }
2204
2263
 
2205
2264
  // ../../src/reverse-engineer/index.ts
2265
+ var exports_reverse_engineer = {};
2266
+ __export(exports_reverse_engineer, {
2267
+ minePathTemplates: () => minePathTemplates,
2268
+ extractEndpoints: () => extractEndpoints,
2269
+ extractAuthHeaders: () => extractAuthHeaders
2270
+ });
2206
2271
  import { nanoid as nanoid2 } from "nanoid";
2207
2272
  function compactForSemanticExample(value, depth = 0) {
2208
2273
  if (depth > 2 || value == null)
@@ -3378,6 +3443,7 @@ __export(exports_capture, {
3378
3443
  navigatePageForCapture: () => navigatePageForCapture,
3379
3444
  isBrowserAccessAvailable: () => isBrowserAccessAvailable,
3380
3445
  isBlockedAppShell: () => isBlockedAppShell,
3446
+ injectInterceptor: () => injectInterceptor,
3381
3447
  hasUsefulCapturedResponses: () => hasUsefulCapturedResponses,
3382
3448
  filterFirstPartySessionCookies: () => filterFirstPartySessionCookies,
3383
3449
  executeInBrowser: () => executeInBrowser,
@@ -3596,6 +3662,14 @@ async function collectInterceptedRequests(tabId) {
3596
3662
  } catch {}
3597
3663
  return [];
3598
3664
  }
3665
+ async function injectInterceptor(tabId) {
3666
+ const SETUP = `(function(){if(window.__unbrowse_interceptor_installed)return;window.__unbrowse_interceptor_installed=true;window.__unbrowse_intercepted=[];window.__UB_MAX=2*1024*1024;window.__UB_MAX_JS=2*1024*1024;window.__UB_MAX_N=500;})()`;
3667
+ const FETCH_PATCH = `(function(){if(!window.__unbrowse_interceptor_installed)return;var M=window.__UB_MAX,MJ=window.__UB_MAX_JS,MN=window.__UB_MAX_N;var oF=window.fetch;window.fetch=function(){var a=arguments,u=typeof a[0]==='string'?a[0]:(a[0]&&a[0].url?a[0].url:''),o=a[1]||{},m=(o.method||'GET').toUpperCase(),rb=o.body?String(o.body).substring(0,M):void 0,rh={};if(o.headers){if(typeof o.headers.forEach==='function')o.headers.forEach(function(v,k){rh[k]=v});else Object.keys(o.headers).forEach(function(k){rh[k]=o.headers[k]})}return oF.apply(this,a).then(function(r){if(window.__unbrowse_intercepted.length>=MN)return r;var ct=r.headers.get('content-type')||'';var isJ=ct.indexOf('javascript')!==-1||/\\.js(\\?|$)/.test(u);var isD=ct.indexOf('json')!==-1||ct.indexOf('x-protobuf')!==-1||ct.indexOf('text/plain')!==-1||u.indexOf('/api/')!==-1||u.indexOf('graphql')!==-1||u.indexOf('voyager')!==-1;if(!isJ&&!isD)return r;if(/\\.(css|woff2?|png|jpg|svg|ico)(\\?|$)/.test(u))return r;var c=r.clone();c.text().then(function(b){var lim=isJ?MJ:M;if(b.length>lim)return;var rr={};r.headers.forEach(function(v,k){rr[k]=v});window.__unbrowse_intercepted.push({url:u,method:m,request_headers:rh,request_body:rb,response_status:r.status,response_headers:rr,response_body:b,content_type:ct,is_js:isJ,timestamp:new Date().toISOString()})}).catch(function(){});return r}).catch(function(e){throw e})}})()`;
3668
+ const XHR_PATCH = `(function(){if(!window.__unbrowse_interceptor_installed)return;var M=window.__UB_MAX,MJ=window.__UB_MAX_JS,MN=window.__UB_MAX_N;var oO=XMLHttpRequest.prototype.open,oS=XMLHttpRequest.prototype.send;XMLHttpRequest.prototype.open=function(m,u){this.__ub_m=m;this.__ub_u=u;this.__ub_h={};var oSH=this.setRequestHeader.bind(this);this.setRequestHeader=function(k,v){this.__ub_h[k]=v;oSH(k,v)}.bind(this);return oO.apply(this,arguments)};XMLHttpRequest.prototype.send=function(b){var x=this;x.addEventListener('load',function(){if(window.__unbrowse_intercepted.length>=MN)return;var ct=x.getResponseHeader('content-type')||'',u=x.__ub_u||'';var isJ=ct.indexOf('javascript')!==-1||/\\.js(\\?|$)/.test(u);var isD=ct.indexOf('json')!==-1||ct.indexOf('x-protobuf')!==-1||ct.indexOf('text/plain')!==-1||u.indexOf('/api/')!==-1||u.indexOf('graphql')!==-1||u.indexOf('voyager')!==-1;if(!isJ&&!isD)return;if(/\\.(css|woff2?|png|jpg|svg|ico)(\\?|$)/.test(u))return;var rb=x.responseText||'';var lim=isJ?MJ:M;if(rb.length>lim)return;window.__unbrowse_intercepted.push({url:u,method:(x.__ub_m||'GET').toUpperCase(),request_headers:x.__ub_h||{},request_body:b?String(b).substring(0,M):void 0,response_status:x.status,response_headers:{},response_body:rb,content_type:ct,is_js:isJ,timestamp:new Date().toISOString()})});return oS.apply(this,arguments)}})()`;
3669
+ for (const chunk of [SETUP, FETCH_PATCH, XHR_PATCH]) {
3670
+ await evaluate(tabId, chunk).catch(() => {});
3671
+ }
3672
+ }
3599
3673
  function mergePassiveCaptureData(intercepted, harEntries, extensionEntries, responseBodies) {
3600
3674
  const seen = new Map;
3601
3675
  for (const entry of intercepted) {
@@ -4385,7 +4459,7 @@ var MAX_CONCURRENT_TABS = 3, activeTabs = 0, waitQueue, activeTabRegistry, inter
4385
4459
  if (window.__unbrowse_interceptor_installed) return;
4386
4460
  window.__unbrowse_interceptor_installed = true;
4387
4461
  window.__unbrowse_intercepted = [];
4388
- var MAX_BODY = 512 * 1024;
4462
+ var MAX_BODY = 2 * 1024 * 1024;
4389
4463
  var MAX_JS_BODY = 2 * 1024 * 1024;
4390
4464
  var MAX_ENTRIES = 500;
4391
4465
 
@@ -4409,9 +4483,10 @@ var MAX_CONCURRENT_TABS = 3, activeTabs = 0, waitQueue, activeTabRegistry, inter
4409
4483
  if (window.__unbrowse_intercepted.length >= MAX_ENTRIES) return response;
4410
4484
  var ct = response.headers.get('content-type') || '';
4411
4485
  var isJs = ct.indexOf('javascript') !== -1 || /\\.js(\\?|$)/.test(url);
4412
- var isData = ct.indexOf('application/json') !== -1 || ct.indexOf('+json') !== -1 ||
4413
- ct.indexOf('application/x-protobuf') !== -1 || ct.indexOf('text/plain') !== -1 ||
4414
- url.indexOf('batchexecute') !== -1 || url.indexOf('/api/') !== -1;
4486
+ var isData = ct.indexOf('json') !== -1 || ct.indexOf('application/x-protobuf') !== -1 ||
4487
+ ct.indexOf('text/plain') !== -1 ||
4488
+ url.indexOf('batchexecute') !== -1 || url.indexOf('/api/') !== -1 ||
4489
+ url.indexOf('graphql') !== -1 || url.indexOf('voyager') !== -1;
4415
4490
  if (!isJs && !isData) return response;
4416
4491
  if (/\\.(css|woff2?|png|jpg|svg|ico)(\\?|$)/.test(url)) return response;
4417
4492
  var clone = response.clone();
@@ -4458,9 +4533,10 @@ var MAX_CONCURRENT_TABS = 3, activeTabs = 0, waitQueue, activeTabRegistry, inter
4458
4533
  var ct = xhr.getResponseHeader('content-type') || '';
4459
4534
  var url = xhr.__unbrowse_url || '';
4460
4535
  var isJs = ct.indexOf('javascript') !== -1 || /\\.js(\\?|$)/.test(url);
4461
- var isData = ct.indexOf('application/json') !== -1 || ct.indexOf('+json') !== -1 ||
4462
- ct.indexOf('application/x-protobuf') !== -1 || ct.indexOf('text/plain') !== -1 ||
4463
- url.indexOf('batchexecute') !== -1 || url.indexOf('/api/') !== -1;
4536
+ var isData = ct.indexOf('json') !== -1 || ct.indexOf('application/x-protobuf') !== -1 ||
4537
+ ct.indexOf('text/plain') !== -1 ||
4538
+ url.indexOf('batchexecute') !== -1 || url.indexOf('/api/') !== -1 ||
4539
+ url.indexOf('graphql') !== -1 || url.indexOf('voyager') !== -1;
4464
4540
  if (!isJs && !isData) return;
4465
4541
  if (/\\.(css|woff2?|png|jpg|svg|ico)(\\?|$)/.test(url)) return;
4466
4542
  var respBody = xhr.responseText || '';
@@ -4547,6 +4623,21 @@ import { join as join3 } from "path";
4547
4623
  import { homedir as homedir2, hostname as hostname2 } from "os";
4548
4624
  import { randomBytes as randomBytes2, createHash as createHash2 } from "crypto";
4549
4625
  import { createInterface as createInterface2 } from "readline";
4626
+ function decodeBase64Json2(value) {
4627
+ try {
4628
+ if (typeof globalThis !== "undefined" && typeof globalThis.atob === "function") {
4629
+ const binary = globalThis.atob(value);
4630
+ const bytes = new Uint8Array(binary.length);
4631
+ for (let i = 0;i < binary.length; i++) {
4632
+ bytes[i] = binary.charCodeAt(i);
4633
+ }
4634
+ return JSON.parse(new TextDecoder("utf-8").decode(bytes));
4635
+ }
4636
+ return JSON.parse(Buffer.from(value, "base64").toString("utf8"));
4637
+ } catch {
4638
+ return;
4639
+ }
4640
+ }
4550
4641
  function scopedSkillKey(skillId, scopeId) {
4551
4642
  return scopeId ? `${scopeId}:${skillId}` : skillId;
4552
4643
  }
@@ -4680,7 +4771,7 @@ async function findUsableApiKey2() {
4680
4771
  async function api2(method, path4, body, opts) {
4681
4772
  const key = opts?.noAuth ? "" : getApiKey2();
4682
4773
  const controller = new AbortController;
4683
- const timer = setTimeout(() => controller.abort(), API_TIMEOUT_MS2);
4774
+ const timer = setTimeout(() => controller.abort(), opts?.timeoutMs ?? API_TIMEOUT_MS2);
4684
4775
  let res;
4685
4776
  try {
4686
4777
  res = await fetch(`${API_URL2}${path4}`, {
@@ -4709,8 +4800,9 @@ async function api2(method, path4, body, opts) {
4709
4800
  throw new Error("ToS update required. Restart unbrowse to accept new terms.");
4710
4801
  }
4711
4802
  if (res.status === 402) {
4712
- const paymentTerms = res.headers.get("X-Payment-Required");
4713
- const terms = paymentTerms ? JSON.parse(paymentTerms) : data.terms;
4803
+ const paymentRequired = res.headers.get("PAYMENT-REQUIRED");
4804
+ const legacyPaymentTerms = res.headers.get("X-Payment-Required");
4805
+ const terms = paymentRequired ? decodeBase64Json2(paymentRequired) : legacyPaymentTerms ? JSON.parse(legacyPaymentTerms) : data.terms;
4714
4806
  const err = new Error(`Payment required: ${data.error ?? "This skill requires payment"}`);
4715
4807
  err.x402 = true;
4716
4808
  err.terms = terms;
@@ -5002,7 +5094,7 @@ async function publishSkill(draft) {
5002
5094
  }
5003
5095
  if (LOCAL_ONLY2)
5004
5096
  throw new Error("local-only mode");
5005
- return api2("POST", "/v1/skills", draft);
5097
+ return api2("POST", "/v1/skills", draft, { timeoutMs: PUBLISH_TIMEOUT_MS2 });
5006
5098
  }
5007
5099
  async function deprecateSkill(skillId) {
5008
5100
  if (LOCAL_ONLY2)
@@ -5194,7 +5286,7 @@ async function getCreatorEarnings(agentId) {
5194
5286
  async function setSkillPrice(skillId, priceUsd) {
5195
5287
  return api2("PATCH", `/v1/skills/${skillId}`, { base_price_usd: priceUsd });
5196
5288
  }
5197
- var API_URL2, PROFILE_NAME2, recentLocalSkills2, LOCAL_ONLY2, EMAIL_RE2, API_TIMEOUT_MS2;
5289
+ var API_URL2, PROFILE_NAME2, recentLocalSkills2, LOCAL_ONLY2, EMAIL_RE2, API_TIMEOUT_MS2, PUBLISH_TIMEOUT_MS2;
5198
5290
  var init_client2 = __esm(() => {
5199
5291
  API_URL2 = process.env.UNBROWSE_BACKEND_URL || "https://beta-api.unbrowse.ai";
5200
5292
  PROFILE_NAME2 = sanitizeProfileName2(process.env.UNBROWSE_PROFILE ?? "");
@@ -5202,6 +5294,7 @@ var init_client2 = __esm(() => {
5202
5294
  LOCAL_ONLY2 = process.env.UNBROWSE_LOCAL_ONLY === "1";
5203
5295
  EMAIL_RE2 = /^[^\s@]+@[^\s@]+\.[^\s@]+$/i;
5204
5296
  API_TIMEOUT_MS2 = parseInt(process.env.UNBROWSE_API_TIMEOUT ?? "8000", 10);
5297
+ PUBLISH_TIMEOUT_MS2 = parseInt(process.env.UNBROWSE_PUBLISH_TIMEOUT ?? "30000", 10);
5205
5298
  });
5206
5299
 
5207
5300
  // ../../src/marketplace/index.ts
@@ -7526,6 +7619,14 @@ function assessIntentResult(data, intent) {
7526
7619
  }
7527
7620
 
7528
7621
  // ../../src/extraction/index.ts
7622
+ var exports_extraction = {};
7623
+ __export(exports_extraction, {
7624
+ parseStructured: () => parseStructured,
7625
+ extractSPAData: () => extractSPAData,
7626
+ extractFromDOMWithHint: () => extractFromDOMWithHint,
7627
+ extractFromDOM: () => extractFromDOM,
7628
+ cleanDOM: () => cleanDOM
7629
+ });
7529
7630
  import * as cheerio from "cheerio";
7530
7631
  function extractFlashNoticeSpecial(html, intent) {
7531
7632
  if (!/\b(flash|message|messages|alert|success|error|warning)\b/i.test(intent))
@@ -9057,6 +9158,11 @@ var init_agent_augment = __esm(() => {
9057
9158
  });
9058
9159
 
9059
9160
  // ../../src/execution/search-forms.ts
9161
+ var exports_search_forms = {};
9162
+ __export(exports_search_forms, {
9163
+ isStructuredSearchForm: () => isStructuredSearchForm,
9164
+ detectSearchForms: () => detectSearchForms
9165
+ });
9060
9166
  function isStructuredSearchForm(spec) {
9061
9167
  return spec.fields.length > 0 && !!spec.submit_selector;
9062
9168
  }
@@ -9370,6 +9476,26 @@ var init_payments = __esm(() => {
9370
9476
  });
9371
9477
 
9372
9478
  // ../../src/execution/index.ts
9479
+ var exports_execution = {};
9480
+ __export(exports_execution, {
9481
+ validateExtractionQuality: () => validateExtractionQuality,
9482
+ templatizeQueryParams: () => templatizeQueryParams,
9483
+ shouldIgnoreLearnedBrowserStrategy: () => shouldIgnoreLearnedBrowserStrategy,
9484
+ resolveExecutionUrlTemplate: () => resolveExecutionUrlTemplate,
9485
+ rankEndpoints: () => rankEndpoints,
9486
+ projectResultForIntent: () => projectResultForIntent,
9487
+ isCanonicalReplayEndpoint: () => isCanonicalReplayEndpoint,
9488
+ isBundleInferredEndpoint: () => isBundleInferredEndpoint,
9489
+ executeSkill: () => executeSkill,
9490
+ executeEndpoint: () => executeEndpoint,
9491
+ deriveStructuredDataReplayUrl: () => deriveStructuredDataReplayUrl,
9492
+ deriveStructuredDataReplayTemplate: () => deriveStructuredDataReplayTemplate,
9493
+ deriveStructuredDataReplayCandidatesFromInputs: () => deriveStructuredDataReplayCandidatesFromInputs,
9494
+ deriveStructuredDataReplayCandidates: () => deriveStructuredDataReplayCandidates,
9495
+ buildStructuredReplayHeaders: () => buildStructuredReplayHeaders,
9496
+ buildPageArtifactCapture: () => buildPageArtifactCapture,
9497
+ buildCanonicalDocumentEndpoint: () => buildCanonicalDocumentEndpoint
9498
+ });
9373
9499
  import { nanoid as nanoid5 } from "nanoid";
9374
9500
  function stampTrace(trace) {
9375
9501
  trace.trace_version = TRACE_VERSION;
@@ -10099,6 +10225,24 @@ async function executeSkill(skill, params = {}, projection, options) {
10099
10225
  const { endpoint_id: _, ...cleanParams } = params;
10100
10226
  return executeEndpoint(skill, target, cleanParams, projection, options);
10101
10227
  }
10228
+ log("exec", `endpoint ${params.endpoint_id} not found in skill ${skill.skill_id} (${skill.endpoints.length} endpoints: ${skill.endpoints.map((e) => e.endpoint_id).join(", ")})`);
10229
+ const trace = {
10230
+ trace_id: nanoid5(),
10231
+ skill_id: skill.skill_id,
10232
+ endpoint_id: String(params.endpoint_id),
10233
+ started_at: new Date().toISOString(),
10234
+ completed_at: new Date().toISOString(),
10235
+ success: false,
10236
+ error: `endpoint_not_found: ${params.endpoint_id} not in skill ${skill.skill_id}`
10237
+ };
10238
+ return {
10239
+ trace,
10240
+ result: {
10241
+ error: "endpoint_not_found",
10242
+ message: `Endpoint ${params.endpoint_id} not found in skill ${skill.skill_id}. Available: ${skill.endpoints.map((e) => `${e.endpoint_id} (${e.description?.slice(0, 50)})`).join(", ")}`,
10243
+ available_endpoints: skill.endpoints.map((e) => ({ endpoint_id: e.endpoint_id, description: e.description }))
10244
+ }
10245
+ };
10102
10246
  }
10103
10247
  const endpoint = selectBestEndpoint(skill.endpoints, options?.intent ?? skill.intent_signature, skill.domain, options?.contextUrl);
10104
10248
  return executeEndpoint(skill, endpoint, params, projection, options);
@@ -10573,6 +10717,22 @@ async function executeDomExtractionEndpoint(endpoint, url, intent, authHeaders,
10573
10717
  console.log(`[ssr-fast] miss, falling back to browser`);
10574
10718
  }
10575
10719
  const captured = await captureSession(url, authHeaders, cookies, intent);
10720
+ if (captured.requests.length > 0) {
10721
+ const { extractEndpoints: extractEps } = await Promise.resolve().then(() => (init_reverse_engineer(), exports_reverse_engineer));
10722
+ const apiEndpoints = extractEps(captured.requests, undefined, { pageUrl: url, finalUrl: captured.final_url });
10723
+ const jsonEndpoints = apiEndpoints.filter((ep) => ep.response_schema && !ep.dom_extraction);
10724
+ if (jsonEndpoints.length > 0) {
10725
+ const best = jsonEndpoints[0];
10726
+ const matchingReq = captured.requests.find((r) => r.url.includes(best.url_template.split("?")[0].split("{")[0]) && r.response_body && r.response_status >= 200 && r.response_status < 400);
10727
+ if (matchingReq?.response_body) {
10728
+ try {
10729
+ const data = JSON.parse(matchingReq.response_body);
10730
+ console.log(`[dom-exec] found API response from browser capture: ${matchingReq.url.substring(0, 80)}`);
10731
+ return { data, status: matchingReq.response_status, trace_id: nanoid5() };
10732
+ } catch {}
10733
+ }
10734
+ }
10735
+ }
10576
10736
  const html = captured.html ?? "";
10577
10737
  const extracted = extractFromDOMWithHint(html, intent, endpoint.dom_extraction);
10578
10738
  if (extracted.data) {
@@ -10947,7 +11107,19 @@ async function executeEndpoint(skill, endpoint, params = {}, projection, options
10947
11107
  if (result.status >= 200 && result.status < 400 && !shouldFallbackToBrowserReplay(result.data, endpoint, options?.intent ?? skill.intent_signature, options?.contextUrl)) {
10948
11108
  strategy = "server";
10949
11109
  } else if (endpoint.trigger_url && isSafe) {
10950
- result = await triggerAndIntercept(endpoint.trigger_url, endpoint.url_template, cookies, authHeaders);
11110
+ let triggerUrl = endpoint.trigger_url;
11111
+ if (Object.keys(mergedParams).length > 0) {
11112
+ try {
11113
+ const tu = new URL(endpoint.trigger_url);
11114
+ for (const [k, v] of Object.entries(mergedParams)) {
11115
+ if (v != null && !reservedMetaParams.has(k)) {
11116
+ tu.searchParams.set(k, String(v));
11117
+ }
11118
+ }
11119
+ triggerUrl = tu.toString();
11120
+ } catch {}
11121
+ }
11122
+ result = await triggerAndIntercept(triggerUrl, endpoint.url_template, cookies, authHeaders);
10951
11123
  strategy = "trigger-intercept";
10952
11124
  } else {
10953
11125
  result = await withRetry(browserCall, (r) => isRetryableStatus(r.status));
@@ -10962,8 +11134,19 @@ async function executeEndpoint(skill, endpoint, params = {}, projection, options
10962
11134
  strategy = "server";
10963
11135
  }
10964
11136
  } else if (endpointStrategy === "trigger-intercept" && endpoint.trigger_url && isSafe) {
10965
- log("exec", `using learned strategy trigger-intercept via ${endpoint.trigger_url}`);
10966
- result = await triggerAndIntercept(endpoint.trigger_url, endpoint.url_template, cookies, authHeaders);
11137
+ let triggerUrl = endpoint.trigger_url;
11138
+ if (Object.keys(mergedParams).length > 0) {
11139
+ try {
11140
+ const tu = new URL(endpoint.trigger_url);
11141
+ for (const [k, v] of Object.entries(mergedParams)) {
11142
+ if (v != null && !reservedMetaParams.has(k))
11143
+ tu.searchParams.set(k, String(v));
11144
+ }
11145
+ triggerUrl = tu.toString();
11146
+ } catch {}
11147
+ }
11148
+ log("exec", `using learned strategy trigger-intercept via ${triggerUrl}`);
11149
+ result = await triggerAndIntercept(triggerUrl, endpoint.url_template, cookies, authHeaders);
10967
11150
  strategy = "trigger-intercept";
10968
11151
  } else if (endpointStrategy === "browser") {
10969
11152
  if (shouldIgnoreLearnedBrowserStrategy(endpoint, url)) {
@@ -10994,7 +11177,18 @@ async function executeEndpoint(skill, endpoint, params = {}, projection, options
10994
11177
  } else {
10995
11178
  log("exec", `server fetch returned ${result.status}, falling back`);
10996
11179
  if (endpoint.trigger_url && isSafe) {
10997
- result = await triggerAndIntercept(endpoint.trigger_url, endpoint.url_template, cookies, authHeaders);
11180
+ let triggerUrl = endpoint.trigger_url;
11181
+ if (Object.keys(mergedParams).length > 0) {
11182
+ try {
11183
+ const tu = new URL(endpoint.trigger_url);
11184
+ for (const [k, v] of Object.entries(mergedParams)) {
11185
+ if (v != null && !reservedMetaParams.has(k))
11186
+ tu.searchParams.set(k, String(v));
11187
+ }
11188
+ triggerUrl = tu.toString();
11189
+ } catch {}
11190
+ }
11191
+ result = await triggerAndIntercept(triggerUrl, endpoint.url_template, cookies, authHeaders);
10998
11192
  strategy = "trigger-intercept";
10999
11193
  } else {
11000
11194
  result = await withRetry(browserCall, (r) => isRetryableStatus(r.status));
@@ -13404,22 +13598,6 @@ function dedupeObservedOverBundle(ranked) {
13404
13598
  }
13405
13599
  return Array.from(byRoute.values()).sort((a, b) => b.score - a.score);
13406
13600
  }
13407
- function extractBinaryVerdict(payload) {
13408
- for (const value of Object.values(payload)) {
13409
- if (typeof value !== "string")
13410
- continue;
13411
- const normalized = value.trim().toLowerCase();
13412
- if (normalized === "pass" || normalized.startsWith("pass "))
13413
- return "pass";
13414
- if (normalized === "fail" || normalized.startsWith("fail "))
13415
- return "fail";
13416
- if (normalized.includes('"pass"'))
13417
- return "pass";
13418
- if (normalized.includes('"fail"'))
13419
- return "fail";
13420
- }
13421
- return "skip";
13422
- }
13423
13601
  function obviousSemanticMismatch(intent, endpoint, result) {
13424
13602
  const haystack = `${intent} ${endpoint.url_template} ${endpoint.description ?? ""}`.toLowerCase();
13425
13603
  const wantsChannels = /\b(channel|channels|guild|guilds|message|messages|thread|threads|dm|chat)\b/.test(intent.toLowerCase());
@@ -13692,88 +13870,7 @@ async function inferParamsFromIntent(urlTemplate, intent, unboundParams, endpoin
13692
13870
  }
13693
13871
  }
13694
13872
  }
13695
- const system = `You extract URL query/path parameter values from a user's natural-language intent.
13696
- Given a URL template with placeholder parameters and the user's intent, return a JSON object mapping parameter names to their values.
13697
-
13698
- Rules:
13699
- - Only fill in parameters where the intent clearly implies a value
13700
- - For search/query parameters, extract the search terms from the intent
13701
- - For filter parameters (location, category, price, date, etc.), extract if mentioned
13702
- - Strip meta-phrases like "search for", "find me", "on amazon" — just return the core value
13703
- - If you can't determine a value for a parameter, omit it from the response
13704
- - Return raw values, not URL-encoded
13705
-
13706
- Examples:
13707
- URL: https://amazon.com/s?k={k}&ref={ref}
13708
- Intent: "search for wireless headphones under $50"
13709
- → {"k": "wireless headphones under $50"}
13710
-
13711
- URL: https://yelp.com/search?find_desc={find_desc}&find_loc={find_loc}
13712
- Intent: "find pizza restaurants in san francisco"
13713
- → {"find_desc": "pizza restaurants", "find_loc": "san francisco"}
13714
-
13715
- URL: https://booking.com/searchresults.html?ss={ss}&checkin={checkin}&checkout={checkout}
13716
- Intent: "hotels in tokyo for march 20 to march 25"
13717
- → {"ss": "tokyo", "checkin": "2026-03-20", "checkout": "2026-03-25"}`;
13718
- const user = `URL template: ${urlTemplate}
13719
- ${endpointDescription ? `Endpoint description: ${endpointDescription}` : ""}
13720
- Unbound parameters: ${unboundParams.join(", ")}
13721
- User intent: ${intent}
13722
-
13723
- Return JSON mapping parameter names to values. Only include parameters you can confidently fill from the intent.`;
13724
- const result = await callJsonAgent(system, user, {});
13725
- const unboundSet = new Set(unboundParams);
13726
- const filtered = {};
13727
- for (const [k, v] of Object.entries(result)) {
13728
- if (unboundSet.has(k) && v != null && v !== "") {
13729
- filtered[k] = String(v);
13730
- }
13731
- }
13732
- return filtered;
13733
- }
13734
- async function callJsonAgent(system, user, fallback) {
13735
- const providers = [
13736
- OPENAI_API_KEY ? { url: OPENAI_CHAT_URL2, key: OPENAI_API_KEY, model: JUDGE_MODEL } : null,
13737
- NEBIUS_API_KEY ? { url: CHAT_URL2, key: NEBIUS_API_KEY, model: JUDGE_MODEL } : null
13738
- ].filter((p) => !!p);
13739
- if (providers.length === 0)
13740
- return fallback;
13741
- const controller = new AbortController;
13742
- const timeout = setTimeout(() => controller.abort(), 8000);
13743
- try {
13744
- for (const provider of providers) {
13745
- const res = await fetch(provider.url, {
13746
- method: "POST",
13747
- headers: {
13748
- "content-type": "application/json",
13749
- Authorization: `Bearer ${provider.key}`
13750
- },
13751
- body: JSON.stringify({
13752
- model: provider.model,
13753
- temperature: 0,
13754
- max_tokens: 400,
13755
- response_format: { type: "json_object" },
13756
- messages: [
13757
- { role: "system", content: system },
13758
- { role: "user", content: user }
13759
- ]
13760
- }),
13761
- signal: controller.signal
13762
- });
13763
- if (!res.ok)
13764
- continue;
13765
- const json = await res.json();
13766
- const content = json.choices?.[0]?.message?.content;
13767
- if (!content)
13768
- continue;
13769
- return JSON.parse(content);
13770
- }
13771
- return fallback;
13772
- } catch {
13773
- return fallback;
13774
- } finally {
13775
- clearTimeout(timeout);
13776
- }
13873
+ return {};
13777
13874
  }
13778
13875
  async function withOpTimeout(label, ms, work) {
13779
13876
  return await Promise.race([
@@ -13922,62 +14019,21 @@ function prioritizeIntentMatchedApis(ranked, intent, contextUrl) {
13922
14019
  ...ranked.filter((candidate) => !preferredIds.has(candidate.endpoint.endpoint_id))
13923
14020
  ];
13924
14021
  }
13925
- async function agentSelectEndpoint(intent, skill, ranked, contextUrl) {
13926
- const topRanked = ranked.slice(0, 5);
13927
- const preferred = inferPreferredEntityTokens(intent);
13928
- const concreteEntityIntent = isConcreteEntityDetailIntent(intent, contextUrl);
13929
- const hasObservedCandidate = topRanked.some((r) => !/inferred from js bundle/i.test(r.endpoint.description ?? ""));
13930
- const narrowedBase = hasObservedCandidate ? topRanked.filter((r) => !/inferred from js bundle/i.test(r.endpoint.description ?? "")) : topRanked;
13931
- const hasPreferredObservedApi = concreteEntityIntent && preferred.length > 0 && narrowedBase.some((candidate) => candidateMatchesPreferredEntity(candidate, preferred) && !isDocumentLikeCandidate(candidate, contextUrl));
13932
- const narrowed = hasPreferredObservedApi ? narrowedBase.filter((candidate) => !isDocumentLikeCandidate(candidate, contextUrl)) : narrowedBase;
13933
- const top = narrowed.map((r) => ({
13934
- endpoint_id: r.endpoint.endpoint_id,
13935
- method: r.endpoint.method,
13936
- url: r.endpoint.url_template,
13937
- description: r.endpoint.description ?? "",
13938
- score: Math.round(r.score * 10) / 10,
13939
- schema: r.endpoint.response_schema ? summarizeSchema(r.endpoint.response_schema) : null,
13940
- dom_extraction: !!r.endpoint.dom_extraction,
13941
- trigger_url: r.endpoint.trigger_url ?? null
13942
- }));
13943
- const fallback = { ordered_endpoint_ids: top.map((r) => r.endpoint_id) };
13944
- const judged = await callJsonAgent("You pick the best endpoint(s) for a website task. Return JSON only.", JSON.stringify({
13945
- task: "rank_endpoints_for_execution",
13946
- intent,
13947
- domain: skill.domain,
13948
- context_url: contextUrl ?? null,
13949
- endpoints: top,
13950
- rules: [
13951
- "Prefer endpoints that directly satisfy the intent, not adjacent metadata.",
13952
- "Prefer final user-visible data over experiments, config, telemetry, auth, status, or affinity endpoints.",
13953
- "If the intent asks for channels/messages/people/documents/listings, reject endpoints that return unrelated experiments or scores.",
13954
- "Return ordered_endpoint_ids best-first. Do not invent ids."
13955
- ]
13956
- }), fallback);
13957
- const orderedRaw = judged.ordered_endpoint_ids ?? judged.endpoint_ids ?? judged.ids ?? [];
13958
- const ordered = orderedRaw.filter((id) => top.some((r) => r.endpoint_id === id));
13959
- return ordered.length > 0 ? ordered : fallback.ordered_endpoint_ids;
13960
- }
13961
- async function agentJudgeExecution(intent, endpoint, result) {
14022
+ async function agentSelectEndpoint(_intent, _skill, _ranked, _contextUrl) {
14023
+ return null;
14024
+ }
14025
+ function agentJudgeExecution(intent, endpoint, result) {
13962
14026
  if (obviousSemanticMismatch(intent, endpoint, result))
13963
14027
  return "fail";
13964
- const verdict = await callJsonAgent("You judge whether returned data satisfies a web data intent. Return JSON only.", JSON.stringify({
13965
- task: "judge_endpoint_result",
13966
- intent,
13967
- endpoint: {
13968
- endpoint_id: endpoint.endpoint_id,
13969
- method: endpoint.method,
13970
- url: endpoint.url_template,
13971
- description: endpoint.description ?? ""
13972
- },
13973
- result,
13974
- rules: [
13975
- "pass only if the returned data directly answers the intent",
13976
- "fail if the data is empty, unrelated, config, experiment, telemetry, status, auth/session, or only a weak proxy",
13977
- "for list/search intents, wrong entity type is fail"
13978
- ]
13979
- }), { verdict: "skip" });
13980
- return verdict.verdict ?? verdict.result ?? verdict.judgment ?? extractBinaryVerdict(verdict);
14028
+ if (result == null)
14029
+ return "fail";
14030
+ if (Array.isArray(result))
14031
+ return result.length > 0 ? "pass" : "fail";
14032
+ if (typeof result === "object")
14033
+ return Object.keys(result).length > 0 ? "pass" : "fail";
14034
+ if (typeof result === "string")
14035
+ return result.length > 0 ? "pass" : "fail";
14036
+ return "skip";
13981
14037
  }
13982
14038
  function normalizeParityRows(data, intent) {
13983
14039
  const projected = projectIntentData(data, intent);
@@ -14052,22 +14108,9 @@ function localParityVerdict(intent, browserBaseline, replayResult) {
14052
14108
  }
14053
14109
  return { verdict: "skip", reason: `low_overlap_${overlapRatio.toFixed(2)}` };
14054
14110
  }
14055
- async function agentJudgeParity(intent, browserBaseline, replayResult) {
14056
- const browserProjected = projectIntentData(browserBaseline, intent);
14057
- const replayProjected = projectIntentData(replayResult, intent);
14058
- const verdict = await callJsonAgent("You judge whether a replay/API result is close enough to the browser-visible result for the same web task. Return JSON only.", JSON.stringify({
14059
- task: "judge_browser_replay_parity",
14060
- intent,
14061
- browser_result: browserProjected,
14062
- replay_result: replayProjected,
14063
- rules: [
14064
- "This is a soft parity check, not strict equality.",
14065
- "Pass when the replay captures substantially the same user-visible entities or records, even if order, counts, or some fields differ.",
14066
- "Fail when the replay is a different entity type, obviously unrelated, or misses almost all visible items.",
14067
- "Skip when evidence is too sparse or ambiguous."
14068
- ]
14069
- }), { verdict: "skip" });
14070
- return verdict.verdict ?? verdict.result ?? verdict.judgment ?? extractBinaryVerdict(verdict);
14111
+ function agentJudgeParity(intent, browserBaseline, replayResult) {
14112
+ const local = localParityVerdict(intent, browserBaseline, replayResult);
14113
+ return local.verdict;
14071
14114
  }
14072
14115
  function resolveEndpointTemplateBindings(endpoint, params = {}, contextUrl) {
14073
14116
  const merged = mergeContextTemplateParams(params, endpoint.url_template, contextUrl);
@@ -14294,7 +14337,8 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
14294
14337
  example_fields: r.endpoint.semantic?.example_fields?.slice(0, 12),
14295
14338
  sample_values: extractSampleValues(r.endpoint.semantic?.example_response_compact),
14296
14339
  dom_extraction: !!r.endpoint.dom_extraction,
14297
- trigger_url: r.endpoint.trigger_url
14340
+ trigger_url: r.endpoint.trigger_url,
14341
+ needs_params: r.endpoint.semantic?.requires?.some((b) => b.required) ?? false
14298
14342
  })),
14299
14343
  ...extraFields
14300
14344
  },
@@ -14608,7 +14652,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
14608
14652
  continue;
14609
14653
  }
14610
14654
  const trustDomExtraction = candidate.endpoint.dom_extraction && !isCapturedPageArtifact && localAssessment.verdict !== "fail" && candidate.score >= 0;
14611
- const judged = localAssessment.verdict === "pass" || trustDomExtraction ? "pass" : await agentJudgeExecution(intent, candidate.endpoint, execOut.result);
14655
+ const judged = localAssessment.verdict === "pass" || trustDomExtraction ? "pass" : agentJudgeExecution(intent, candidate.endpoint, execOut.result);
14612
14656
  decisionTrace.autoexec_attempts.push({
14613
14657
  endpoint_id: candidate.endpoint.endpoint_id,
14614
14658
  score: Math.round(candidate.score * 10) / 10,
@@ -14918,9 +14962,104 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
14918
14962
  }
14919
14963
  }
14920
14964
  }
14965
+ const shouldBypassBrowserFirstPass = shouldBypassLiveCaptureQueue(context?.url);
14966
+ if (context?.url && !agentChoseEndpoint && !forceCapture && !shouldBypassBrowserFirstPass) {
14967
+ console.log(`[fast-path] no local cache for ${requestedDomain} — skipping marketplace, going to browser`);
14968
+ (async () => {
14969
+ try {
14970
+ const { domain_results, global_results } = await searchIntentResolve(queryIntent, requestedDomain ?? undefined, MARKETPLACE_DOMAIN_SEARCH_K, MARKETPLACE_GLOBAL_SEARCH_K);
14971
+ const totalResults = domain_results.length + global_results.length;
14972
+ if (totalResults > 0) {
14973
+ console.log(`[fast-path:bg] marketplace found ${totalResults} candidates — will be cached for next resolve`);
14974
+ }
14975
+ } catch {}
14976
+ })();
14977
+ const firstPassResult = await tryFirstPassBrowserAction(intent, params, context.url, { signal: options?.signal, clientScope: options?.client_scope });
14978
+ decisionTrace.first_pass = {
14979
+ intentClass: firstPassResult.intentClass,
14980
+ actionTaken: firstPassResult.actionTaken,
14981
+ hit: firstPassResult.hit,
14982
+ interceptedCount: firstPassResult.interceptedEntries.length,
14983
+ timeMs: firstPassResult.timeMs,
14984
+ fast_path: true
14985
+ };
14986
+ if (firstPassResult.hit && firstPassResult.miniSkill) {
14987
+ const fpNow = new Date().toISOString();
14988
+ const trace2 = {
14989
+ trace_id: nanoid7(),
14990
+ skill_id: firstPassResult.miniSkill.skill_id,
14991
+ endpoint_id: firstPassResult.miniSkill.endpoints[0]?.endpoint_id ?? "",
14992
+ started_at: fpNow,
14993
+ completed_at: fpNow,
14994
+ success: true,
14995
+ network_events: firstPassResult.interceptedEntries
14996
+ };
14997
+ return {
14998
+ result: firstPassResult.result,
14999
+ trace: trace2,
15000
+ source: "first-pass",
15001
+ skill: firstPassResult.miniSkill,
15002
+ timing: finalize("first-pass", firstPassResult.result, firstPassResult.miniSkill.skill_id, firstPassResult.miniSkill, trace2)
15003
+ };
15004
+ }
15005
+ console.log(`[fast-path] first-pass miss — opening browse session for agent`);
15006
+ if (firstPassResult.tabId && context.url) {
15007
+ const tabId = firstPassResult.tabId;
15008
+ const domain = new URL(context.url).hostname.replace(/^www\./, "");
15009
+ try {
15010
+ const { extractBrowserCookies: extractBrowserCookies2 } = await Promise.resolve().then(() => (init_browser_cookies(), exports_browser_cookies));
15011
+ const { cookies } = extractBrowserCookies2(domain);
15012
+ for (const c of cookies)
15013
+ await setCookie(tabId, c).catch(() => {});
15014
+ } catch {}
15015
+ await evaluate(tabId, (await Promise.resolve().then(() => (init_capture(), exports_capture))).INTERCEPTOR_SCRIPT).catch(() => {});
15016
+ await harStart(tabId).catch(() => {});
15017
+ try {
15018
+ const routesModule = await init_routes().then(() => exports_routes);
15019
+ if (typeof routesModule.registerBrowseSession === "function") {
15020
+ routesModule.registerBrowseSession(tabId, context.url, domain);
15021
+ }
15022
+ } catch {}
15023
+ const fpNow = new Date().toISOString();
15024
+ const trace2 = {
15025
+ trace_id: nanoid7(),
15026
+ skill_id: "browse-session",
15027
+ endpoint_id: "",
15028
+ started_at: fpNow,
15029
+ completed_at: fpNow,
15030
+ success: true
15031
+ };
15032
+ return {
15033
+ result: {
15034
+ status: "browse_session_open",
15035
+ tab_id: tabId,
15036
+ url: context.url,
15037
+ domain,
15038
+ next_step: "unbrowse snap",
15039
+ commands: [
15040
+ "unbrowse snap --filter interactive",
15041
+ "unbrowse click <ref>",
15042
+ "unbrowse fill <ref> <value>",
15043
+ "unbrowse close"
15044
+ ]
15045
+ },
15046
+ trace: trace2,
15047
+ source: "browser-action",
15048
+ skill: undefined,
15049
+ timing: finalize("browser-action", null, "browse-session", undefined, trace2)
15050
+ };
15051
+ }
15052
+ }
15053
+ const MARKETPLACE_TIMEOUT_MS = context?.url ? 5000 : 30000;
14921
15054
  if (!forceCapture) {
14922
15055
  const ts0 = Date.now();
14923
- const { domain_results: domainResults, global_results: globalResults } = await searchIntentResolve(queryIntent, requestedDomain ?? undefined, MARKETPLACE_DOMAIN_SEARCH_K, MARKETPLACE_GLOBAL_SEARCH_K).catch(() => ({
15056
+ const { domain_results: domainResults, global_results: globalResults } = await Promise.race([
15057
+ searchIntentResolve(queryIntent, requestedDomain ?? undefined, MARKETPLACE_DOMAIN_SEARCH_K, MARKETPLACE_GLOBAL_SEARCH_K),
15058
+ new Promise((resolve) => setTimeout(() => {
15059
+ console.log(`[marketplace] timeout after ${MARKETPLACE_TIMEOUT_MS}ms — falling through to browser`);
15060
+ resolve({ domain_results: [], global_results: [], skipped_global: true });
15061
+ }, MARKETPLACE_TIMEOUT_MS))
15062
+ ]).catch(() => ({
14924
15063
  domain_results: [],
14925
15064
  global_results: [],
14926
15065
  skipped_global: false
@@ -15055,7 +15194,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
15055
15194
  }
15056
15195
  } catch {}
15057
15196
  }
15058
- if (context?.url && !forceCapture) {
15197
+ if (context?.url && !forceCapture && !shouldBypassBrowserFirstPass) {
15059
15198
  const firstPassResult = await tryFirstPassBrowserAction(intent, params, context.url, { signal: options?.signal, clientScope: options?.client_scope });
15060
15199
  decisionTrace.first_pass = {
15061
15200
  intentClass: firstPassResult.intentClass,
@@ -15171,7 +15310,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
15171
15310
  }
15172
15311
  }
15173
15312
  }
15174
- const bypassLiveCaptureQueue = shouldBypassLiveCaptureQueue(context?.url);
15313
+ const bypassLiveCaptureQueue = shouldBypassBrowserFirstPass;
15175
15314
  const captureLockKey = scopedCacheKey(clientScope, captureDomain);
15176
15315
  let learned_skill;
15177
15316
  let trace;
@@ -15320,12 +15459,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
15320
15459
  function queuePassivePublishIfExecuted(skill, orchestratorResult, browserBaseline) {
15321
15460
  if (!orchestratorResult.trace.success || !orchestratorResult.trace.endpoint_id)
15322
15461
  return;
15323
- const parity = browserBaseline === undefined ? undefined : (async () => {
15324
- const local = localParityVerdict(intent, browserBaseline, orchestratorResult.result);
15325
- if (local.verdict !== "skip")
15326
- return local.verdict;
15327
- return await agentJudgeParity(intent, browserBaseline, orchestratorResult.result);
15328
- })();
15462
+ const parity = browserBaseline === undefined ? undefined : Promise.resolve(agentJudgeParity(intent, browserBaseline, orchestratorResult.result));
15329
15463
  queuePassiveSkillPublish(skill, { parity });
15330
15464
  }
15331
15465
  if (!learned_skill && !trace.success) {
@@ -15534,7 +15668,7 @@ function selectSkillIdsToHydrate(candidates, requestedDomain, limit = MARKETPLAC
15534
15668
  ];
15535
15669
  return [...new Set(prioritizedCandidates.map((c) => extractSkillId(c.metadata)).filter((value) => !!value))].slice(0, limit);
15536
15670
  }
15537
- var CONFIDENCE_THRESHOLD = 0.3, NEBIUS_API_KEY, OPENAI_API_KEY, CHAT_URL2 = "https://api.tokenfactory.nebius.com/v1/chat/completions", OPENAI_CHAT_URL2 = "https://api.openai.com/v1/chat/completions", JUDGE_MODEL, LIVE_CAPTURE_TIMEOUT_MS, BROWSER_CAPTURE_SKILL_ID = "browser-capture", capturedDomainCache, captureInFlight, captureDomainLocks, skillRouteCache, ROUTE_CACHE_FILE, SKILL_SNAPSHOT_DIR, domainSkillCache, DOMAIN_CACHE_FILE, _routeCacheDirty = false, routeCacheFlushTimer, routeResultCache, ROUTE_CACHE_TTL, MARKETPLACE_HYDRATE_LIMIT, MARKETPLACE_GET_SKILL_TIMEOUT_MS, MARKETPLACE_DOMAIN_SEARCH_K, MARKETPLACE_GLOBAL_SEARCH_K, SEARCH_INTENT_STOPWORDS, SEARCH_DIRECTIVE_PREFIX, SEARCH_TRAILING_SITE_HINT, SEARCH_INSTRUCTION_NOISE, SEARCH_PRIORITY_PATTERN;
15671
+ var CONFIDENCE_THRESHOLD = 0.3, LIVE_CAPTURE_TIMEOUT_MS, BROWSER_CAPTURE_SKILL_ID = "browser-capture", capturedDomainCache, captureInFlight, captureDomainLocks, skillRouteCache, ROUTE_CACHE_FILE, SKILL_SNAPSHOT_DIR, domainSkillCache, DOMAIN_CACHE_FILE, _routeCacheDirty = false, routeCacheFlushTimer, routeResultCache, ROUTE_CACHE_TTL, MARKETPLACE_HYDRATE_LIMIT, MARKETPLACE_GET_SKILL_TIMEOUT_MS, MARKETPLACE_DOMAIN_SEARCH_K, MARKETPLACE_GLOBAL_SEARCH_K, SEARCH_INTENT_STOPWORDS, SEARCH_DIRECTIVE_PREFIX, SEARCH_TRAILING_SITE_HINT, SEARCH_INSTRUCTION_NOISE, SEARCH_PRIORITY_PATTERN;
15538
15672
  var init_orchestrator = __esm(async () => {
15539
15673
  init_client2();
15540
15674
  init_client();
@@ -15556,9 +15690,6 @@ var init_orchestrator = __esm(async () => {
15556
15690
  init_execution(),
15557
15691
  init_prefetch()
15558
15692
  ]);
15559
- NEBIUS_API_KEY = process.env.NEBIUS_API_KEY ?? "";
15560
- OPENAI_API_KEY = process.env.OPENAI_API_KEY ?? "";
15561
- JUDGE_MODEL = process.env.UNBROWSE_AGENT_JUDGE_MODEL ?? "gpt-4.1-mini";
15562
15693
  LIVE_CAPTURE_TIMEOUT_MS = Number(process.env.UNBROWSE_LIVE_CAPTURE_TIMEOUT_MS ?? "120000");
15563
15694
  capturedDomainCache = new Map;
15564
15695
  captureInFlight = new Map;
@@ -16545,8 +16676,90 @@ async function registerRoutes(app) {
16545
16676
  try {
16546
16677
  await publishSkill2(skill);
16547
16678
  } catch {}
16679
+ try {
16680
+ cachePublishedSkill(skill);
16681
+ } catch {}
16548
16682
  return reply.send({ ok: true, endpoints_updated: reviews.length });
16549
16683
  });
16684
+ app.post("/v1/skills/:skill_id/publish", async (req, reply) => {
16685
+ const clientScope = clientScopeFor(req);
16686
+ const { skill_id } = req.params;
16687
+ const { endpoints: reviews } = req.body ?? {};
16688
+ let skill = getRecentLocalSkill(skill_id, clientScope);
16689
+ if (!skill) {
16690
+ for (const [, entry] of domainSkillCache) {
16691
+ if (entry.skillId === skill_id && entry.localSkillPath) {
16692
+ try {
16693
+ skill = JSON.parse(__require("fs").readFileSync(entry.localSkillPath, "utf-8"));
16694
+ } catch {}
16695
+ break;
16696
+ }
16697
+ }
16698
+ }
16699
+ if (!skill)
16700
+ skill = await getSkill2(skill_id, clientScope);
16701
+ if (!skill)
16702
+ return reply.code(404).send({ error: "Skill not found" });
16703
+ if (reviews?.length) {
16704
+ const updated = mergeAgentReview(skill.endpoints, reviews);
16705
+ skill.endpoints = updated;
16706
+ skill.updated_at = new Date().toISOString();
16707
+ try {
16708
+ cachePublishedSkill(skill);
16709
+ } catch {}
16710
+ const domain = skill.domain;
16711
+ if (domain) {
16712
+ const ck = buildResolveCacheKey(domain, skill.intent_signature ?? `browse ${domain}`, undefined);
16713
+ const sk = scopedCacheKey(clientScope, ck);
16714
+ writeSkillSnapshot(sk, skill);
16715
+ const dk = getDomainReuseKey(domain);
16716
+ if (dk) {
16717
+ domainSkillCache.set(dk, {
16718
+ skillId: skill.skill_id,
16719
+ localSkillPath: snapshotPathForCacheKey(sk),
16720
+ ts: Date.now()
16721
+ });
16722
+ persistDomainCache();
16723
+ }
16724
+ }
16725
+ try {
16726
+ await publishSkill2(skill);
16727
+ } catch {}
16728
+ try {
16729
+ cachePublishedSkill(skill);
16730
+ } catch {}
16731
+ return reply.send({
16732
+ ok: true,
16733
+ skill_id: skill.skill_id,
16734
+ endpoints_updated: reviews.length,
16735
+ published: true
16736
+ });
16737
+ }
16738
+ const ranked = rankEndpoints(skill.endpoints, skill.intent_signature, skill.domain);
16739
+ const endpoints_to_describe = ranked.map((r) => ({
16740
+ endpoint_id: r.endpoint.endpoint_id,
16741
+ method: r.endpoint.method,
16742
+ url: r.endpoint.url_template.length > 120 ? r.endpoint.url_template.slice(0, 120) + "..." : r.endpoint.url_template,
16743
+ current_description: r.endpoint.description ?? "",
16744
+ schema_summary: r.endpoint.response_schema ? summarizeSchema(r.endpoint.response_schema) : null,
16745
+ sample_values: extractSampleValues(r.endpoint.semantic?.example_response_compact),
16746
+ input_params: r.endpoint.semantic?.requires?.map((b) => ({
16747
+ key: b.key,
16748
+ type: b.type ?? b.semantic_type,
16749
+ required: b.required ?? false,
16750
+ example: b.example_value
16751
+ })) ?? [],
16752
+ dom_extraction: !!r.endpoint.dom_extraction,
16753
+ _fill_description: "DESCRIBE THIS ENDPOINT — what it returns, key params, action type"
16754
+ }));
16755
+ return reply.send({
16756
+ skill_id: skill.skill_id,
16757
+ domain: skill.domain,
16758
+ endpoint_count: skill.endpoints.length,
16759
+ endpoints_to_describe,
16760
+ _next_step: `Fill each endpoint's description, then call: unbrowse publish --skill ${skill.skill_id} --endpoints '[{endpoint_id, description, action_kind, resource_kind}]'`
16761
+ });
16762
+ });
16550
16763
  app.post("/v1/skills/:skill_id/chunk", async (req, reply) => {
16551
16764
  const clientScope = clientScopeFor(req);
16552
16765
  const { skill_id } = req.params;
@@ -16761,7 +16974,7 @@ async function registerRoutes(app) {
16761
16974
  await start().catch(() => {});
16762
16975
  const tabId = await newTab();
16763
16976
  await harStart(tabId).catch(() => {});
16764
- await evaluate(tabId, INTERCEPTOR_SCRIPT).catch(() => {});
16977
+ await injectInterceptor(tabId);
16765
16978
  const session = { tabId, url: "about:blank", harActive: true, domain: "" };
16766
16979
  browseSessions.set("default", session);
16767
16980
  return session;
@@ -16801,7 +17014,7 @@ async function registerRoutes(app) {
16801
17014
  const finalUrl = await getCurrentUrl(session.tabId).catch(() => url);
16802
17015
  session.url = typeof finalUrl === "string" && finalUrl.startsWith("http") ? finalUrl : url;
16803
17016
  session.domain = profileName(session.url);
16804
- await evaluate(session.tabId, INTERCEPTOR_SCRIPT).catch(() => {});
17017
+ await injectInterceptor(session.tabId);
16805
17018
  return reply.send({ ok: true, url: session.url, tab_id: session.tabId, auth_profile: session.domain });
16806
17019
  });
16807
17020
  app.post("/v1/browse/snap", async (req, reply) => {
@@ -17002,6 +17215,88 @@ async function registerRoutes(app) {
17002
17215
  invalidateRouteCacheForDomain(domain);
17003
17216
  console.log(`[passive-index] ${domain}: ${mergedEps.length} endpoints cached synchronously`);
17004
17217
  }
17218
+ } else {
17219
+ let domain2;
17220
+ try {
17221
+ domain2 = new URL(session.url).hostname;
17222
+ } catch {
17223
+ domain2 = session.domain;
17224
+ }
17225
+ try {
17226
+ const html = await getPageHtml(session.tabId);
17227
+ if (html && typeof html === "string" && html.startsWith("<")) {
17228
+ const { extractFromDOM: extractFromDOM2 } = await Promise.resolve().then(() => (init_extraction(), exports_extraction));
17229
+ const { detectSearchForms: detectSearchForms2, isStructuredSearchForm: isStructuredSearchForm2 } = await Promise.resolve().then(() => (init_search_forms(), exports_search_forms));
17230
+ const { inferSchema: inferSchema2 } = await Promise.resolve().then(() => (init_transform(), exports_transform));
17231
+ const { inferEndpointSemantic: inferEndpointSemantic2 } = await Promise.resolve().then(() => (init_graph(), exports_graph));
17232
+ const { templatizeQueryParams: templatizeQueryParams2 } = await init_execution().then(() => exports_execution);
17233
+ const extracted = extractFromDOM2(html, `browse ${domain2}`);
17234
+ const searchForms = detectSearchForms2(html);
17235
+ const validForm = searchForms.find((s) => isStructuredSearchForm2(s));
17236
+ if (extracted.data || validForm) {
17237
+ const urlTemplate = templatizeQueryParams2(session.url);
17238
+ const ep = {
17239
+ endpoint_id: nanoid8(),
17240
+ method: "GET",
17241
+ url_template: urlTemplate,
17242
+ idempotency: "safe",
17243
+ verification_status: "verified",
17244
+ reliability_score: extracted.confidence ?? 0.7,
17245
+ description: validForm ? `Search form for ${domain2}` : `Page content from ${domain2}`,
17246
+ response_schema: extracted.data ? inferSchema2([extracted.data]) : undefined,
17247
+ dom_extraction: {
17248
+ extraction_method: extracted.extraction_method ?? "repeated-elements",
17249
+ confidence: extracted.confidence ?? 0.7,
17250
+ ...extracted.selector ? { selector: extracted.selector } : {},
17251
+ ...validForm ? { search_form: validForm } : {}
17252
+ },
17253
+ trigger_url: session.url
17254
+ };
17255
+ ep.semantic = inferEndpointSemantic2(ep, {
17256
+ sampleResponse: extracted.data,
17257
+ observedAt: new Date().toISOString(),
17258
+ sampleRequestUrl: session.url
17259
+ });
17260
+ const existing = findExistingSkillForDomain(domain2);
17261
+ const allEps = existing ? mergeEndpoints(existing.endpoints, [ep]) : [ep];
17262
+ for (const e of allEps) {
17263
+ if (!e.description)
17264
+ e.description = generateLocalDescription(e);
17265
+ }
17266
+ const skill = {
17267
+ skill_id: existing?.skill_id ?? nanoid8(),
17268
+ version: "1.0.0",
17269
+ schema_version: "1",
17270
+ lifecycle: "active",
17271
+ execution_type: "http",
17272
+ created_at: existing?.created_at ?? new Date().toISOString(),
17273
+ updated_at: new Date().toISOString(),
17274
+ name: domain2,
17275
+ intent_signature: `browse ${domain2}`,
17276
+ domain: domain2,
17277
+ description: `DOM skill for ${domain2}`,
17278
+ owner_type: "agent",
17279
+ endpoints: allEps,
17280
+ intents: [...new Set([...existing?.intents ?? [], `browse ${domain2}`])]
17281
+ };
17282
+ const ck = buildResolveCacheKey(domain2, `browse ${domain2}`, session.url);
17283
+ const sk = scopedCacheKey("global", ck);
17284
+ writeSkillSnapshot(sk, skill);
17285
+ const dk = getDomainReuseKey(session.url ?? domain2);
17286
+ if (dk) {
17287
+ domainSkillCache.set(dk, { skillId: skill.skill_id, localSkillPath: snapshotPathForCacheKey(sk), ts: Date.now() });
17288
+ persistDomainCache();
17289
+ }
17290
+ try {
17291
+ cachePublishedSkill(skill);
17292
+ } catch {}
17293
+ invalidateRouteCacheForDomain(domain2);
17294
+ console.log(`[close] ${domain2}: DOM endpoint created (form=${!!validForm})`);
17295
+ }
17296
+ }
17297
+ } catch (err) {
17298
+ console.log(`[close] DOM fallback failed: ${err instanceof Error ? err.message : err}`);
17299
+ }
17005
17300
  }
17006
17301
  }
17007
17302
  passiveIndexFromRequests(allRequests, session.url);
@@ -17131,6 +17426,21 @@ var API_URL = process.env.UNBROWSE_BACKEND_URL || "https://beta-api.unbrowse.ai"
17131
17426
  var PROFILE_NAME = sanitizeProfileName(process.env.UNBROWSE_PROFILE ?? "");
17132
17427
  var recentLocalSkills = new Map;
17133
17428
  var LOCAL_ONLY = process.env.UNBROWSE_LOCAL_ONLY === "1";
17429
+ function decodeBase64Json(value) {
17430
+ try {
17431
+ if (typeof globalThis !== "undefined" && typeof globalThis.atob === "function") {
17432
+ const binary = globalThis.atob(value);
17433
+ const bytes = new Uint8Array(binary.length);
17434
+ for (let i = 0;i < binary.length; i++) {
17435
+ bytes[i] = binary.charCodeAt(i);
17436
+ }
17437
+ return JSON.parse(new TextDecoder("utf-8").decode(bytes));
17438
+ }
17439
+ return JSON.parse(Buffer.from(value, "base64").toString("utf8"));
17440
+ } catch {
17441
+ return;
17442
+ }
17443
+ }
17134
17444
  function getConfigDir() {
17135
17445
  if (process.env.UNBROWSE_CONFIG_DIR)
17136
17446
  return process.env.UNBROWSE_CONFIG_DIR;
@@ -17185,6 +17495,7 @@ function getApiKey() {
17185
17495
  return "";
17186
17496
  }
17187
17497
  var API_TIMEOUT_MS = parseInt(process.env.UNBROWSE_API_TIMEOUT ?? "8000", 10);
17498
+ var PUBLISH_TIMEOUT_MS = parseInt(process.env.UNBROWSE_PUBLISH_TIMEOUT ?? "30000", 10);
17188
17499
  async function validateApiKey(key) {
17189
17500
  const controller = new AbortController;
17190
17501
  const timer = setTimeout(() => controller.abort(), API_TIMEOUT_MS);
@@ -17245,7 +17556,7 @@ async function findUsableApiKey() {
17245
17556
  async function api(method, path, body, opts) {
17246
17557
  const key = opts?.noAuth ? "" : getApiKey();
17247
17558
  const controller = new AbortController;
17248
- const timer = setTimeout(() => controller.abort(), API_TIMEOUT_MS);
17559
+ const timer = setTimeout(() => controller.abort(), opts?.timeoutMs ?? API_TIMEOUT_MS);
17249
17560
  let res;
17250
17561
  try {
17251
17562
  res = await fetch(`${API_URL}${path}`, {
@@ -17274,8 +17585,9 @@ async function api(method, path, body, opts) {
17274
17585
  throw new Error("ToS update required. Restart unbrowse to accept new terms.");
17275
17586
  }
17276
17587
  if (res.status === 402) {
17277
- const paymentTerms = res.headers.get("X-Payment-Required");
17278
- const terms = paymentTerms ? JSON.parse(paymentTerms) : data.terms;
17588
+ const paymentRequired = res.headers.get("PAYMENT-REQUIRED");
17589
+ const legacyPaymentTerms = res.headers.get("X-Payment-Required");
17590
+ const terms = paymentRequired ? decodeBase64Json(paymentRequired) : legacyPaymentTerms ? JSON.parse(legacyPaymentTerms) : data.terms;
17279
17591
  const err = new Error(`Payment required: ${data.error ?? "This skill requires payment"}`);
17280
17592
  err.x402 = true;
17281
17593
  err.terms = terms;
@@ -18337,7 +18649,12 @@ async function cmdExecute(flags) {
18337
18649
  const items = Array.isArray(data) ? data : data != null ? [data] : [];
18338
18650
  const extracted = extractFlag ? applyExtract(items, extractFlag) : items;
18339
18651
  const limited = limitFlag ? extracted.slice(0, limitFlag) : extracted;
18340
- output({ trace: result.trace, data: limited, count: limited.length }, !!flags.pretty);
18652
+ const trace = result.trace;
18653
+ const out = { trace: result.trace, data: limited, count: limited.length };
18654
+ if (trace?.skill_id && trace?.endpoint_id && limited.length > 0) {
18655
+ out._review_hint = `After presenting results, improve this endpoint's description: unbrowse review --skill ${trace.skill_id} --endpoints '[{"endpoint_id":"${trace.endpoint_id}","description":"DESCRIBE WHAT THIS RETURNS","action_kind":"ACTION","resource_kind":"RESOURCE"}]'`;
18656
+ }
18657
+ output(out, !!flags.pretty);
18341
18658
  return;
18342
18659
  }
18343
18660
  if (!rawFlag && !pathFlag && !extractFlag && !schemaFlag) {
@@ -18386,6 +18703,20 @@ async function cmdReview(flags) {
18386
18703
  die("--endpoints must be a non-empty JSON array");
18387
18704
  output(await api3("POST", `/v1/skills/${skillId}/review`, { endpoints }), !!flags.pretty);
18388
18705
  }
18706
+ async function cmdPublish(flags) {
18707
+ const skillId = flags.skill;
18708
+ if (!skillId)
18709
+ die("--skill is required");
18710
+ const endpointsJson = flags.endpoints;
18711
+ if (endpointsJson) {
18712
+ const endpoints = JSON.parse(endpointsJson);
18713
+ if (!Array.isArray(endpoints) || endpoints.length === 0)
18714
+ die("--endpoints must be a non-empty JSON array");
18715
+ output(await api3("POST", `/v1/skills/${skillId}/publish`, { endpoints }), !!flags.pretty);
18716
+ } else {
18717
+ output(await api3("POST", `/v1/skills/${skillId}/publish`, {}), !!flags.pretty);
18718
+ }
18719
+ }
18389
18720
  async function cmdLogin(flags) {
18390
18721
  const url = flags.url;
18391
18722
  if (!url)
@@ -18465,6 +18796,7 @@ var CLI_REFERENCE = {
18465
18796
  { name: "execute", usage: "--skill ID --endpoint ID [opts]", desc: "Execute a specific endpoint" },
18466
18797
  { name: "feedback", usage: "--skill ID --endpoint ID --rating N", desc: "Submit feedback (mandatory after resolve)" },
18467
18798
  { name: "review", usage: "--skill ID --endpoints '[...]'", desc: "Push reviewed descriptions/metadata back to skill" },
18799
+ { name: "publish", usage: "--skill ID [--endpoints '[...]']", desc: "Describe + publish skill to marketplace (two-phase)" },
18468
18800
  { name: "login", usage: '--url "..."', desc: "Interactive browser login" },
18469
18801
  { name: "skills", usage: "", desc: "List all skills" },
18470
18802
  { name: "skill", usage: "<id>", desc: "Get skill details" },
@@ -18512,7 +18844,9 @@ var CLI_REFERENCE = {
18512
18844
  "unbrowse execute --skill abc --endpoint def --schema --pretty",
18513
18845
  'unbrowse execute --skill abc --endpoint def --path "data.items[]" --extract "name,url" --limit 10 --pretty',
18514
18846
  "unbrowse feedback --skill abc --endpoint def --rating 5",
18515
- `unbrowse review --skill abc --endpoints '[{"endpoint_id":"def","description":"..."}]'`
18847
+ `unbrowse review --skill abc --endpoints '[{"endpoint_id":"def","description":"..."}]'`,
18848
+ "unbrowse publish --skill abc --pretty",
18849
+ `unbrowse publish --skill abc --endpoints '[{"endpoint_id":"def","description":"Search court judgments by keywords","action_kind":"search","resource_kind":"judgment"}]'`
18516
18850
  ]
18517
18851
  };
18518
18852
  function printHelp() {
@@ -18856,6 +19190,7 @@ async function main() {
18856
19190
  "feedback",
18857
19191
  "fb",
18858
19192
  "review",
19193
+ "publish",
18859
19194
  "login",
18860
19195
  "skills",
18861
19196
  "skill",
@@ -18918,6 +19253,8 @@ async function main() {
18918
19253
  return cmdFeedback(flags);
18919
19254
  case "review":
18920
19255
  return cmdReview(flags);
19256
+ case "publish":
19257
+ return cmdPublish(flags);
18921
19258
  case "login":
18922
19259
  return cmdLogin(flags);
18923
19260
  case "skills":