unbrowse 3.1.0-experiments.e16f194 → 3.1.0-experiments.ef43417

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -31,7 +31,7 @@ var __promiseAll = (args) => Promise.all(args);
31
31
  var __require = /* @__PURE__ */ createRequire(import.meta.url);
32
32
 
33
33
  // ../../src/build-info.generated.ts
34
- var BUILD_RELEASE_VERSION = "3.1.0-experiments.e16f194", BUILD_GIT_SHA = "e16f194a388c", BUILD_CODE_HASH = "1488fc1d92b7", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4xLjAtZXhwZXJpbWVudHMuZTE2ZjE5NCIsImdpdF9zaGEiOiJlMTZmMTk0YTM4OGMiLCJjb2RlX2hhc2giOiIxNDg4ZmMxZDkyYjciLCJ0cmFjZV92ZXJzaW9uIjoiMTQ4OGZjMWQ5MmI3QGUxNmYxOTRhMzg4YyIsImlzc3VlZF9hdCI6IjIwMjYtMDQtMDVUMTQ6MzY6MjMuNDY3WiJ9", BUILD_RELEASE_MANIFEST_SIGNATURE = "FextqttPXF5moOI2DNGvrN-yPrwuleNkbadmvTLmmPc", BUILD_DEFAULT_BACKEND_URL = "https://unbrowse-backend-experiments.lewis-6d8.workers.dev";
34
+ var BUILD_RELEASE_VERSION = "3.1.0-experiments.ef43417", BUILD_GIT_SHA = "ef434171bbc8", BUILD_CODE_HASH = "1488fc1d92b7", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4xLjAtZXhwZXJpbWVudHMuZWY0MzQxNyIsImdpdF9zaGEiOiJlZjQzNDE3MWJiYzgiLCJjb2RlX2hhc2giOiIxNDg4ZmMxZDkyYjciLCJ0cmFjZV92ZXJzaW9uIjoiMTQ4OGZjMWQ5MmI3QGVmNDM0MTcxYmJjOCIsImlzc3VlZF9hdCI6IjIwMjYtMDQtMDZUMDI6NTQ6MTAuMjU3WiJ9", BUILD_RELEASE_MANIFEST_SIGNATURE = "TWn8YbnQaAVxkiuNsLHcxl8-9saAN2wWFxDFtz9Xc0c", BUILD_DEFAULT_BACKEND_URL = "https://unbrowse-backend-experiments.lewis-6d8.workers.dev";
35
35
 
36
36
  // ../../src/version.ts
37
37
  import { createHash } from "crypto";
@@ -899,18 +899,6 @@ var init_extraction = __esm(() => {
899
899
  CHROME_TAGS = new Set(["nav", "footer", "header"]);
900
900
  });
901
901
 
902
- // ../../src/graph/agent-augment.ts
903
- var DEFAULT_MODEL, ENABLED, AUGMENT_TIMEOUT_MS, MAX_AUGMENT_ENDPOINTS, MAX_AUGMENT_PAYLOAD_CHARS, GENERIC_SEMANTIC_TYPES;
904
- var init_agent_augment = __esm(() => {
905
- init_graph();
906
- DEFAULT_MODEL = process.env.UNBROWSE_AGENT_SEMANTIC_MODEL ?? process.env.UNBROWSE_AGENT_JUDGE_MODEL ?? "gpt-4.1-mini";
907
- ENABLED = process.env.UNBROWSE_AGENT_SEMANTIC_AUGMENT !== "0";
908
- AUGMENT_TIMEOUT_MS = Number(process.env.UNBROWSE_AGENT_SEMANTIC_TIMEOUT_MS ?? 8000);
909
- MAX_AUGMENT_ENDPOINTS = Math.max(1, Number(process.env.UNBROWSE_AGENT_SEMANTIC_MAX_ENDPOINTS ?? 6));
910
- MAX_AUGMENT_PAYLOAD_CHARS = Math.max(4000, Number(process.env.UNBROWSE_AGENT_SEMANTIC_MAX_PAYLOAD_CHARS ?? 24000));
911
- GENERIC_SEMANTIC_TYPES = new Set(["identifier", "input", "resource", "entity", "item"]);
912
- });
913
-
914
902
  // ../../src/execution/search-forms.ts
915
903
  var SEARCH_FIELD_NAMES, LOGIN_FIELD_NAMES, SUPPORTED_INPUT_TYPES;
916
904
  var init_search_forms = __esm(() => {
@@ -1036,7 +1024,6 @@ var init_execution = __esm(async () => {
1036
1024
  init_domain();
1037
1025
  init_extraction();
1038
1026
  init_graph();
1039
- init_agent_augment();
1040
1027
  init_logger();
1041
1028
  init_version();
1042
1029
  init_search_forms();
@@ -1382,7 +1369,7 @@ init_wallet();
1382
1369
  init_telemetry_attribution();
1383
1370
  import { readFileSync as readFileSync3, writeFileSync, existsSync as existsSync4, mkdirSync, readdirSync as readdirSync2 } from "fs";
1384
1371
  import { join as join4 } from "path";
1385
- import { homedir as homedir3, hostname } from "os";
1372
+ import { homedir as homedir3, hostname, release as osRelease } from "os";
1386
1373
  import { randomBytes, createHash as createHash2 } from "crypto";
1387
1374
  import { createInterface } from "readline";
1388
1375
  var API_URL = process.env.UNBROWSE_BACKEND_URL || DEFAULT_BACKEND_URL;
@@ -1575,9 +1562,19 @@ async function recordInstallTelemetryEvent(source, options) {
1575
1562
  skill_version: options?.skillVersion,
1576
1563
  status: options?.status ?? "installed",
1577
1564
  created_at: createdAt,
1578
- properties: mergeTelemetryProperties(options?.properties, getTelemetryAttribution())
1565
+ properties: mergeTelemetryProperties({ ...getRuntimeContext(), ...options?.properties }, getTelemetryAttribution())
1579
1566
  });
1580
1567
  }
1568
+ function getRuntimeContext() {
1569
+ return {
1570
+ cli_version: PACKAGE_VERSION,
1571
+ code_hash: CODE_HASH,
1572
+ node_version: process.version,
1573
+ platform: process.platform,
1574
+ arch: process.arch,
1575
+ os_release: osRelease()
1576
+ };
1577
+ }
1581
1578
  async function recordFunnelTelemetryEvent(name, options) {
1582
1579
  const createdAt = options?.createdAt ?? new Date().toISOString();
1583
1580
  const landingToken = getLandingToken();
@@ -1589,7 +1586,7 @@ async function recordFunnelTelemetryEvent(name, options) {
1589
1586
  source: options?.source ?? "cli",
1590
1587
  host_type: options?.hostType ?? detectTelemetryHostType(),
1591
1588
  created_at: createdAt,
1592
- properties: mergeTelemetryProperties(options?.properties, getTelemetryAttribution())
1589
+ properties: mergeTelemetryProperties({ ...getRuntimeContext(), ...options?.properties }, getTelemetryAttribution())
1593
1590
  });
1594
1591
  }
1595
1592
  var EMAIL_RE = /^[^\s@]+@[^\s@]+\.[^\s@]+$/i;
package/dist/mcp.js CHANGED
@@ -225,11 +225,11 @@ import { dirname, join, parse } from "path";
225
225
  import { fileURLToPath as fileURLToPath2 } from "url";
226
226
 
227
227
  // ../../src/build-info.generated.ts
228
- var BUILD_RELEASE_VERSION = "3.1.0-experiments.e16f194";
229
- var BUILD_GIT_SHA = "e16f194a388c";
228
+ var BUILD_RELEASE_VERSION = "3.1.0-experiments.ef43417";
229
+ var BUILD_GIT_SHA = "ef434171bbc8";
230
230
  var BUILD_CODE_HASH = "1488fc1d92b7";
231
- var BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4xLjAtZXhwZXJpbWVudHMuZTE2ZjE5NCIsImdpdF9zaGEiOiJlMTZmMTk0YTM4OGMiLCJjb2RlX2hhc2giOiIxNDg4ZmMxZDkyYjciLCJ0cmFjZV92ZXJzaW9uIjoiMTQ4OGZjMWQ5MmI3QGUxNmYxOTRhMzg4YyIsImlzc3VlZF9hdCI6IjIwMjYtMDQtMDVUMTQ6MzY6MjMuNDY3WiJ9";
232
- var BUILD_RELEASE_MANIFEST_SIGNATURE = "FextqttPXF5moOI2DNGvrN-yPrwuleNkbadmvTLmmPc";
231
+ var BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4xLjAtZXhwZXJpbWVudHMuZWY0MzQxNyIsImdpdF9zaGEiOiJlZjQzNDE3MWJiYzgiLCJjb2RlX2hhc2giOiIxNDg4ZmMxZDkyYjciLCJ0cmFjZV92ZXJzaW9uIjoiMTQ4OGZjMWQ5MmI3QGVmNDM0MTcxYmJjOCIsImlzc3VlZF9hdCI6IjIwMjYtMDQtMDZUMDI6NTQ6MTAuMjU3WiJ9";
232
+ var BUILD_RELEASE_MANIFEST_SIGNATURE = "TWn8YbnQaAVxkiuNsLHcxl8-9saAN2wWFxDFtz9Xc0c";
233
233
  var BUILD_DEFAULT_BACKEND_URL = "https://unbrowse-backend-experiments.lewis-6d8.workers.dev";
234
234
 
235
235
  // ../../src/version.ts
@@ -743,7 +743,7 @@ function readImpactSummary() {
743
743
  // ../../src/client/index.ts
744
744
  import { readFileSync as readFileSync5, writeFileSync as writeFileSync3, existsSync as existsSync6, mkdirSync as mkdirSync4, readdirSync as readdirSync3 } from "fs";
745
745
  import { join as join5 } from "path";
746
- import { homedir as homedir4, hostname } from "os";
746
+ import { homedir as homedir4, hostname, release as osRelease } from "os";
747
747
 
748
748
  // ../../src/payments/cascade.ts
749
749
  import bs58 from "bs58";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unbrowse",
3
- "version": "3.1.0-experiments.e16f194",
3
+ "version": "3.1.0-experiments.ef43417",
4
4
  "description": "Reverse-engineer any website into reusable API skills. Zero-dep single binary with embedded browser engine.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,5 +1,6 @@
1
1
  import { nanoid } from "nanoid";
2
2
  import { readFileSync } from "node:fs";
3
+ import { log } from "../logger.js";
3
4
  import { extractEndpoints, extractAuthHeaders } from "../reverse-engineer/index.js";
4
5
  import { enrichEndpointsWithTokenSources } from "../reverse-engineer/token-sources.js";
5
6
  import { buildSkillOperationGraph, inferEndpointSemantic } from "../graph/index.js";
@@ -168,6 +169,10 @@ export async function cacheBrowseRequests(params: {
168
169
  // so serverFetch can replay them. Use registrable domain for vault key
169
170
  // so ads.x.com and ads-api.x.com share the same session.
170
171
  const capturedAuthHeaders = extractAuthHeaders(requests);
172
+ // Filter out [REDACTED] placeholders from cookie-inferred auth headers
173
+ for (const [k, v] of Object.entries(capturedAuthHeaders)) {
174
+ if (v === "[REDACTED]") delete capturedAuthHeaders[k];
175
+ }
171
176
  if (Object.keys(capturedAuthHeaders).length > 0) {
172
177
  const sessionKey = `${getRegistrableDomain(domain)}-session`;
173
178
  await storeCredential(sessionKey, JSON.stringify({ headers: capturedAuthHeaders })).catch(() => {});
@@ -196,6 +201,7 @@ export async function cacheBrowseRequests(params: {
196
201
  if (!existingSkill || mergedEndpoints.length >= existingSkill.endpoints.length) {
197
202
  for (const endpoint of mergedEndpoints) {
198
203
  if (!endpoint.description) endpoint.description = generateLocalDescription(endpoint);
204
+ if (!endpoint.semantic) endpoint.semantic = inferEndpointSemantic(endpoint);
199
205
  }
200
206
  const quickSkill: SkillManifest = {
201
207
  skill_id: existingSkill?.skill_id ?? nanoid(),
@@ -221,9 +227,11 @@ export async function cacheBrowseRequests(params: {
221
227
  try {
222
228
  const html = getPageHtml ? await getPageHtml() : undefined;
223
229
  if (html && html.startsWith("<")) {
224
- enrichEndpointsWithTokenSources(quickSkill.endpoints, requests, html, jsBundles);
230
+ const preCheck = requests.filter(r => r.request_headers["authorization"] || r.request_headers["x-csrf-token"]).length;
231
+ const enriched = enrichEndpointsWithTokenSources(quickSkill.endpoints, requests, html, jsBundles);
232
+ log("browse-index", `token enrichment: ${enriched} bindings, ${preCheck} auth-reqs pre-call, ${quickSkill.endpoints.length} eps`);
225
233
  }
226
- } catch { /* best-effort */ }
234
+ } catch (e) { log("browse-index", `token enrichment failed: ${e}`); }
227
235
 
228
236
  const cacheKey = buildResolveCacheKey(domain, intent, sessionUrl);
229
237
  const scopedKey = scopedCacheKey("global", cacheKey);
@@ -8,7 +8,6 @@ import { nanoid } from "nanoid";
8
8
  import type { ExecutionTrace, OrchestrationTiming, ProjectionOptions, SkillManifest } from "../types/index.js";
9
9
  import { mergeEndpoints } from "../marketplace/index.js";
10
10
  import { buildSkillOperationGraph, getEndpointDescriptionMetadata, getSkillChunk, toAgentSkillChunkView } from "../graph/index.js";
11
- import { augmentEndpointsWithAgent } from "../graph/agent-augment.js";
12
11
  import { findExistingSkillForDomain, cachePublishedSkill } from "../client/index.js";
13
12
  import { storeCredential } from "../vault/index.js";
14
13
  import { getRegistrableDomain } from "../domain.js";
@@ -1179,10 +1178,16 @@ export async function registerRoutes(app: FastifyInstance) {
1179
1178
 
1180
1179
  async function restartBrowseCapture(session: BrowseSession): Promise<void> {
1181
1180
  const broker = brokerForSession(session);
1182
- const load = await broker.waitForLoad(session.tabId, 2_000).catch(() => null);
1183
- if (load && load.status === "timeout") {
1184
- session.harActive = false;
1185
- return;
1181
+ // Ensure Kuri is actually responding before starting HAR.
1182
+ // Cold starts on packaged CLI can have ConnectionRefused retries —
1183
+ // if we start HAR during instability, it gets lost on restart.
1184
+ for (let attempt = 0; attempt < 5; attempt++) {
1185
+ try {
1186
+ await broker.waitForLoad(session.tabId, 2_000);
1187
+ break;
1188
+ } catch {
1189
+ if (attempt < 4) await new Promise((r) => setTimeout(r, 1_000));
1190
+ }
1186
1191
  }
1187
1192
  await broker.networkEnable(session.tabId).catch(() => {});
1188
1193
  await broker.harStart(session.tabId).catch(() => {});
@@ -7,7 +7,6 @@ import type { RawRequest } from "../capture/index.js";
7
7
  import { queueBackgroundIndex } from "../indexer/index.js";
8
8
  import { mergeEndpoints } from "../marketplace/index.js";
9
9
  import { buildSkillOperationGraph } from "../graph/index.js";
10
- import { augmentEndpointsWithAgent } from "../graph/agent-augment.js";
11
10
  import { findExistingSkillForDomain, cachePublishedSkill } from "../client/index.js";
12
11
  import { storeCredential } from "../vault/index.js";
13
12
  import { getRegistrableDomain } from "../domain.js";
@@ -1,6 +1,6 @@
1
- export const BUILD_RELEASE_VERSION = "3.1.0-experiments.e16f194";
2
- export const BUILD_GIT_SHA = "e16f194a388c";
1
+ export const BUILD_RELEASE_VERSION = "3.1.0-experiments.ef43417";
2
+ export const BUILD_GIT_SHA = "ef434171bbc8";
3
3
  export const BUILD_CODE_HASH = "1488fc1d92b7";
4
- export const BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4xLjAtZXhwZXJpbWVudHMuZTE2ZjE5NCIsImdpdF9zaGEiOiJlMTZmMTk0YTM4OGMiLCJjb2RlX2hhc2giOiIxNDg4ZmMxZDkyYjciLCJ0cmFjZV92ZXJzaW9uIjoiMTQ4OGZjMWQ5MmI3QGUxNmYxOTRhMzg4YyIsImlzc3VlZF9hdCI6IjIwMjYtMDQtMDVUMTQ6MzY6MjMuNDY3WiJ9";
5
- export const BUILD_RELEASE_MANIFEST_SIGNATURE = "FextqttPXF5moOI2DNGvrN-yPrwuleNkbadmvTLmmPc";
4
+ export const BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4xLjAtZXhwZXJpbWVudHMuZWY0MzQxNyIsImdpdF9zaGEiOiJlZjQzNDE3MWJiYzgiLCJjb2RlX2hhc2giOiIxNDg4ZmMxZDkyYjciLCJ0cmFjZV92ZXJzaW9uIjoiMTQ4OGZjMWQ5MmI3QGVmNDM0MTcxYmJjOCIsImlzc3VlZF9hdCI6IjIwMjYtMDQtMDZUMDI6NTQ6MTAuMjU3WiJ9";
5
+ export const BUILD_RELEASE_MANIFEST_SIGNATURE = "TWn8YbnQaAVxkiuNsLHcxl8-9saAN2wWFxDFtz9Xc0c";
6
6
  export const BUILD_DEFAULT_BACKEND_URL = "https://unbrowse-backend-experiments.lewis-6d8.workers.dev";
@@ -670,13 +670,21 @@ export function mergePassiveCaptureData(
670
670
  });
671
671
  }
672
672
 
673
- // Priority 3: Extension entries (URL+headers supplement, no bodies)
673
+ // Priority 3: Extension entries (chrome.webRequest — has full auth headers HAR strips)
674
674
  for (const entry of extensionEntries) {
675
- if (seen.has(entry.url)) continue;
676
675
  const reqHeaders: Record<string, string> = {};
677
676
  for (const h of entry.requestHeaders ?? []) reqHeaders[h.name] = h.value;
678
677
  const respHeaders: Record<string, string> = {};
679
678
  for (const h of entry.responseHeaders ?? []) respHeaders[h.name] = h.value;
679
+
680
+ const existing = seen.get(entry.url);
681
+ if (existing) {
682
+ // Merge auth headers from extension into existing entry (HAR strips them)
683
+ for (const [k, v] of Object.entries(reqHeaders)) {
684
+ if (!existing.request_headers[k]) existing.request_headers[k] = v;
685
+ }
686
+ continue;
687
+ }
680
688
  seen.set(entry.url, {
681
689
  url: entry.url,
682
690
  method: entry.method,
@@ -936,22 +944,36 @@ export async function enrichPassiveCaptureRequests(params: {
936
944
  ...request,
937
945
  url: normalizeCapturedUrl(request.url, captureUrl),
938
946
  }));
947
+
948
+ // HAR strips auth headers. Infer their presence from cookies so
949
+ // enrichEndpointsWithTokenSources can create DAG bindings.
950
+ if (extensionEntries.length === 0) {
951
+ const tabCookies = await kuri.getCookies(tabId).catch(() => []) as Array<{ name: string; value: string }>;
952
+ const hasAuthCookie = tabCookies.some((c) => /^(ct0|csrf_token|_csrf|csrftoken|XSRF-TOKEN|auth_token)$/i.test(c.name));
953
+ if (hasAuthCookie) {
954
+ for (const req of requests) {
955
+ if (/\/(api|graphql|v\d+)\b/i.test(req.url) || /ads-api|voyager/i.test(req.url)) {
956
+ if (!req.request_headers["authorization"]) req.request_headers["authorization"] = "[REDACTED]";
957
+ if (!req.request_headers["x-csrf-token"]) req.request_headers["x-csrf-token"] = "[REDACTED]";
958
+ }
959
+ }
960
+ }
961
+ }
962
+
939
963
  log("capture", `browse-checkpoint tracked ${harEntries.length} HAR, ${intercepted.length} intercepted, ${extensionEntries.length} extension, ${responseBodies.size} bodies → ${requests.length} merged`);
940
964
  return requests;
941
965
  }
942
-
943
966
  /**
944
967
  * Collect network requests observed by kuri's builtin extension (chrome.webRequest).
945
968
  * Gracefully returns [] if the extension relay is not yet wired.
946
969
  */
947
970
  async function collectExtensionRequests(tabId: string): Promise<ExtensionEntry[]> {
948
971
  try {
949
- // Query the builtin extension's network log via the agent bridge
950
972
  const raw = await kuri.evaluate(tabId, `
951
973
  (function() {
952
974
  if (!window.__kuri || !window.__kuri._networkLog) return '[]';
953
975
  var log = window.__kuri._networkLog;
954
- window.__kuri._networkLog = []; // drain
976
+ window.__kuri._networkLog = [];
955
977
  return JSON.stringify(log);
956
978
  })()
957
979
  `);
@@ -1,6 +1,6 @@
1
1
  import { readFileSync, writeFileSync, existsSync, mkdirSync, readdirSync } from "fs";
2
2
  import { join } from "path";
3
- import { homedir, hostname } from "os";
3
+ import { homedir, hostname, release as osRelease } from "os";
4
4
  import { randomBytes, createHash } from "crypto";
5
5
  import { createInterface } from "readline";
6
6
  import type {
@@ -16,6 +16,7 @@ import {
16
16
  CODE_HASH,
17
17
  DEFAULT_BACKEND_URL,
18
18
  GIT_SHA,
19
+ PACKAGE_VERSION,
19
20
  RELEASE_MANIFEST_BASE64,
20
21
  RELEASE_MANIFEST_SIGNATURE,
21
22
  TRACE_VERSION,
@@ -302,10 +303,21 @@ export async function recordInstallTelemetryEvent(
302
303
  skill_version: options?.skillVersion,
303
304
  status: options?.status ?? "installed",
304
305
  created_at: createdAt,
305
- properties: mergeTelemetryProperties(options?.properties, getTelemetryAttribution()),
306
+ properties: mergeTelemetryProperties({ ...getRuntimeContext(), ...options?.properties }, getTelemetryAttribution()),
306
307
  });
307
308
  }
308
309
 
310
+ function getRuntimeContext(): Record<string, unknown> {
311
+ return {
312
+ cli_version: PACKAGE_VERSION,
313
+ code_hash: CODE_HASH,
314
+ node_version: process.version,
315
+ platform: process.platform,
316
+ arch: process.arch,
317
+ os_release: osRelease(),
318
+ };
319
+ }
320
+
309
321
  export async function recordFunnelTelemetryEvent(
310
322
  name: string,
311
323
  options?: {
@@ -326,7 +338,7 @@ export async function recordFunnelTelemetryEvent(
326
338
  source: options?.source ?? "cli",
327
339
  host_type: options?.hostType ?? detectTelemetryHostType(),
328
340
  created_at: createdAt,
329
- properties: mergeTelemetryProperties(options?.properties, getTelemetryAttribution()),
341
+ properties: mergeTelemetryProperties({ ...getRuntimeContext(), ...options?.properties }, getTelemetryAttribution()),
330
342
  });
331
343
  }
332
344
 
@@ -19,7 +19,6 @@ import { nanoid } from "nanoid";
19
19
  import { getRegistrableDomain } from "../domain.js";
20
20
  import { extractFromDOM, extractFromDOMWithHint } from "../extraction/index.js";
21
21
  import { buildSkillOperationGraph, getEndpointDescriptionMetadata, inferEndpointSemantic, resolveEndpointSemantic } from "../graph/index.js";
22
- import { augmentEndpointsWithAgent } from "../graph/agent-augment.js";
23
22
  import { log } from "../logger.js";
24
23
  import { TRACE_VERSION } from "../version.js";
25
24
  import { buildQueryBindingMap, extractTemplateQueryBindings, mergeContextTemplateParams } from "../template-params.js";
@@ -190,11 +189,10 @@ function normalizeEndpointForManifest(endpoint: EndpointDescriptor): EndpointDes
190
189
 
191
190
  async function prepareLearnedEndpoints(
192
191
  endpoints: EndpointDescriptor[],
193
- intent: string,
194
- domain: string,
192
+ _intent: string,
193
+ _domain: string,
195
194
  ): Promise<EndpointDescriptor[]> {
196
- const normalized = endpoints.map(normalizeEndpointForManifest);
197
- return augmentEndpointsWithAgent(normalized, { intent, domain });
195
+ return endpoints.map(normalizeEndpointForManifest);
198
196
  }
199
197
 
200
198
  function intentWantsStructuredRecords(intent?: string): boolean {
@@ -2017,14 +2015,16 @@ export async function executeEndpoint(
2017
2015
  const epDomain = (() => { try { return new URL(endpoint.url_template).hostname; } catch { return skill.domain; } })();
2018
2016
  await reloadExecutionAuthState(skill, epDomain, authHeaders, cookies);
2019
2017
 
2020
- // If endpoint has auth_tokens bindings and vault didn't provide the needed headers,
2021
- // resolve them from the page source (meta tags, inline scripts, JS bundles)
2022
- if (endpoint.auth_tokens?.length && Object.keys(authHeaders).length === 0) {
2018
+ // If endpoint has auth_tokens bindings, always resolve fresh tokens.
2019
+ // Vault headers may be stale — the DAG knows how to get fresh ones.
2020
+ log("exec", `auth_tokens check: ${endpoint.auth_tokens?.length ?? 0} bindings on ${endpoint.endpoint_id}`);
2021
+ if (endpoint.auth_tokens?.length) {
2023
2022
  try {
2024
2023
  const { resolveAuthTokens } = await import("./token-resolver.js");
2025
2024
  const resolved = await resolveAuthTokens(endpoint, cookies, authHeaders);
2025
+ log("exec", `token resolver returned ${Object.keys(resolved).length} headers: ${Object.keys(resolved).join(",") || "none"} auth=${(resolved.authorization || "").substring(0, 40)}`);
2026
2026
  Object.assign(authHeaders, resolved);
2027
- } catch { /* token resolution is best-effort */ }
2027
+ } catch (e) { log("exec", `token resolver failed: ${e}`); }
2028
2028
  }
2029
2029
 
2030
2030
  log("exec", `endpoint ${endpoint.endpoint_id}: cookies=${cookies.length} authHeaders=${Object.keys(authHeaders).length} hasAuth=${cookies.length > 0 || Object.keys(authHeaders).length > 0}`);
@@ -2241,7 +2241,7 @@ export async function executeEndpoint(
2241
2241
 
2242
2242
  for (const replayUrl of replayUrls) {
2243
2243
  const replayHeaders = buildStructuredReplayHeaders(url, replayUrl, headers);
2244
- log("exec", `server-fetch: ${endpoint.method} ${replayUrl.substring(0, 80)} csrf=${replayHeaders["x-csrf-token"]?.substring(0, 10)}... cookies=${(replayHeaders["cookie"]?.length ?? 0)}chars`);
2244
+ log("exec", `server-fetch: ${endpoint.method} ${replayUrl.substring(0, 80)} auth=${(replayHeaders["authorization"] || "none").substring(0, 50)} csrf=${replayHeaders["x-csrf-token"]?.substring(0, 10)}... cookies=${(replayHeaders["cookie"]?.length ?? 0)}chars`);
2245
2245
  const res = await fetch(replayUrl, {
2246
2246
  method: endpoint.method,
2247
2247
  headers: replayHeaders,
@@ -3165,6 +3165,7 @@ export function rankEndpoints(endpoints: EndpointDescriptor[], intent?: string,
3165
3165
  }
3166
3166
  }
3167
3167
  score += ep.reliability_score * 5;
3168
+ if (ep.verification_status === "verified") score += 15;
3168
3169
  if (ep.method === "WS" && ep.response_schema) score += 3;
3169
3170
 
3170
3171
  // === Domain affinity ===
@@ -34,10 +34,8 @@ export async function resolveAuthTokens(
34
34
  const triggerUrl = endpoint.trigger_url;
35
35
  if (!triggerUrl) return {};
36
36
 
37
- // Only resolve header bindings that aren't already satisfied
38
- const headerBindings = bindings.filter(
39
- (b) => b.param_location === "header" && !existingAuthHeaders[b.param_name],
40
- );
37
+ // Resolve ALL header bindings DAG sources are authoritative over vault cache
38
+ const headerBindings = bindings.filter((b) => b.param_location === "header");
41
39
  if (headerBindings.length === 0) return {};
42
40
 
43
41
  const resolved: Record<string, string> = {};
@@ -56,7 +54,14 @@ export async function resolveAuthTokens(
56
54
  }
57
55
 
58
56
  for (const binding of headerBindings) {
59
- const value = resolveBinding(binding, html);
57
+ let value = await resolveBinding(binding, html, cookies);
58
+
59
+ // If stored sources didn't resolve (e.g. bearer in a different bundle),
60
+ // scan all loaded script resources via Performance API
61
+ if (!value && binding.sources.some((s) => s.kind === "js-bundle")) {
62
+ value = await scanAllScriptResources(tabId, binding);
63
+ }
64
+
60
65
  if (value) {
61
66
  resolved[binding.param_name] = binding.param_name.toLowerCase() === "authorization"
62
67
  ? (value.startsWith("Bearer ") ? value : `Bearer ${value}`)
@@ -73,21 +78,36 @@ export async function resolveAuthTokens(
73
78
  return resolved;
74
79
  }
75
80
 
76
- function resolveBinding(binding: AuthTokenBinding, html: string): string | undefined {
81
+ async function resolveBinding(
82
+ binding: AuthTokenBinding,
83
+ html: string,
84
+ cookies: Array<{ name: string; value: string; domain: string }>,
85
+ ): Promise<string | undefined> {
77
86
  for (const source of binding.sources) {
78
87
  let value: string | undefined;
79
88
 
80
- if (source.kind === "html-meta" || source.kind === "html-inline-script") {
89
+ if (source.kind === "cookie" && source.cookie_names?.length) {
90
+ // Resolve from cookies — CSRF tokens typically live here
91
+ for (const name of source.cookie_names) {
92
+ const cookie = cookies.find((c) => c.name === name);
93
+ if (cookie?.value) { value = cookie.value; break; }
94
+ }
95
+ } else if (source.kind === "html-meta" || source.kind === "html-inline-script") {
81
96
  value = extractTokenFromHtml(source, html);
97
+ } else if (source.kind === "js-bundle" && source.bundle_url_pattern) {
98
+ try {
99
+ const resp = await fetch(source.bundle_url_pattern);
100
+ if (resp.ok) {
101
+ const body = await resp.text();
102
+ value = extractTokenFromBundle(source, body);
103
+ }
104
+ } catch { /* fetch failed — try next source */ }
82
105
  }
83
- // JS bundle resolution would require fetching the bundle URL —
84
- // skip for now, HTML sources cover most cases (CSRF, inline tokens)
85
106
 
86
107
  if (value && value.length >= 8) return value;
87
108
  }
88
109
  return undefined;
89
110
  }
90
-
91
111
  async function openResolverTab(
92
112
  url: string,
93
113
  cookies: Array<{ name: string; value: string; domain: string }>,
@@ -120,3 +140,35 @@ async function waitForLoad(tabId: string): Promise<void> {
120
140
  await new Promise((r) => setTimeout(r, 500));
121
141
  }
122
142
  }
143
+
144
+ /**
145
+ * Scan all script resources loaded by the page via Performance API.
146
+ * Fetches each bundle and searches for a token matching the binding pattern.
147
+ * This handles cases where the enrichment captured the wrong bundle URLs.
148
+ */
149
+ async function scanAllScriptResources(tabId: string, binding: AuthTokenBinding): Promise<string | undefined> {
150
+ try {
151
+ const raw = await kuri.evaluate(tabId, `
152
+ JSON.stringify(
153
+ performance.getEntriesByType('resource')
154
+ .filter(function(e) { return e.initiatorType === 'script'; })
155
+ .map(function(e) { return e.name; })
156
+ )
157
+ `);
158
+ if (typeof raw !== "string" || !raw.startsWith("[")) return undefined;
159
+ const urls: string[] = JSON.parse(raw);
160
+
161
+ const tokenPattern = /AAAAAAAAAAAAAAAAAAA[A-Za-z0-9+/=_%-]{20,}/;
162
+
163
+ for (const url of urls) {
164
+ try {
165
+ const resp = await fetch(url);
166
+ if (!resp.ok) continue;
167
+ const body = await resp.text();
168
+ const m = body.match(tokenPattern);
169
+ if (m && m[0].length >= 20) return m[0];
170
+ } catch { /* fetch failed, try next */ }
171
+ }
172
+ } catch { /* evaluate failed */ }
173
+ return undefined;
174
+ }
@@ -15,7 +15,6 @@ import { extractBrowserCookies } from "../auth/browser-cookies.js";
15
15
  import { queueBackgroundIndex } from "../indexer/index.js";
16
16
  import { mergeEndpoints } from "../marketplace/index.js";
17
17
  import { buildSkillOperationGraph } from "../graph/index.js";
18
- import { augmentEndpointsWithAgent } from "../graph/agent-augment.js";
19
18
  import { findExistingSkillForDomain, cachePublishedSkill } from "../client/index.js";
20
19
  import { storeCredential } from "../vault/index.js";
21
20
  import { getRegistrableDomain } from "../domain.js";
@@ -289,8 +289,6 @@ export function enrichEndpointsWithTokenSources(
289
289
  html: string | undefined,
290
290
  jsBundles: Map<string, string> | undefined,
291
291
  ): number {
292
- if (!html && (!jsBundles || jsBundles.size === 0)) return 0;
293
-
294
292
  let enriched = 0;
295
293
  for (const req of requests) {
296
294
  const matching = endpoints.filter((ep) => endpointMatchesRequest(ep, req));
@@ -298,15 +296,35 @@ export function enrichEndpointsWithTokenSources(
298
296
 
299
297
  for (const [headerName, headerValue] of Object.entries(req.request_headers)) {
300
298
  if (!TOKEN_HEADER_PATTERN.test(headerName)) continue;
301
- // Bearer tokens are prefixed — strip before scanning
302
299
  const tokenValue = extractTokenValue(headerName, headerValue);
303
300
  if (!tokenValue) continue;
304
301
 
305
- const sources = findTokenSources(tokenValue, html, jsBundles);
302
+ const sources = (html || (jsBundles && jsBundles.size > 0))
303
+ ? findTokenSources(tokenValue, html, jsBundles)
304
+ : [];
305
+
306
+ const lowerName = headerName.toLowerCase();
307
+ if (sources.length === 0) {
308
+ if (/csrf|xsrf/i.test(lowerName)) {
309
+ sources.push({ kind: "cookie", cookie_names: ["ct0", "csrf_token", "_csrf", "csrftoken", "XSRF-TOKEN"] });
310
+ } else if (lowerName === "authorization") {
311
+ if (html) {
312
+ const scriptSrcRe = /<script[^>]+src=["']([^"']+)["']/gi;
313
+ let m: RegExpExecArray | null;
314
+ while ((m = scriptSrcRe.exec(html)) !== null) {
315
+ if (/main|app|client|bundle|vendor/i.test(m[1])) {
316
+ sources.push({ kind: "js-bundle", bundle_url_pattern: m[1] });
317
+ if (sources.length >= 3) break;
318
+ }
319
+ }
320
+ }
321
+ }
322
+ }
323
+
306
324
  if (sources.length === 0) continue;
307
325
 
308
326
  const binding: AuthTokenBinding = {
309
- param_name: headerName.toLowerCase(),
327
+ param_name: lowerName,
310
328
  param_location: "header",
311
329
  sources,
312
330
  refresh_on_401: true,
@@ -314,9 +332,13 @@ export function enrichEndpointsWithTokenSources(
314
332
 
315
333
  for (const ep of matching) {
316
334
  if (!ep.auth_tokens) ep.auth_tokens = [];
317
- // Dedupe: skip if a binding for the same param already exists
318
- if (ep.auth_tokens.some((b) => b.param_name === binding.param_name)) continue;
319
- ep.auth_tokens.push(binding);
335
+ // Always replace with fresh binding stale sources from cached merges get overwritten
336
+ const idx = ep.auth_tokens.findIndex((b) => b.param_name === binding.param_name);
337
+ if (idx >= 0) {
338
+ ep.auth_tokens[idx] = binding;
339
+ } else {
340
+ ep.auth_tokens.push(binding);
341
+ }
320
342
  enriched++;
321
343
  }
322
344
  }
Binary file
Binary file
Binary file
Binary file
@@ -1,28 +1,25 @@
1
1
  {
2
2
  "repo_url": "https://github.com/justrach/kuri.git",
3
3
  "branch": "adding-extensions",
4
- "source_sha": "08eecbe3740f046a46f656eed7ebfc66c1bad9bb",
5
- "built_at": "2026-04-05T06:43:57.212Z",
4
+ "source_sha": "eadfaa5f921f7152e1762aed5ed64b3a4fbefbf3",
5
+ "built_at": "2026-04-06T02:54:10.305Z",
6
6
  "binaries": {
7
7
  "darwin-arm64": {
8
8
  "zig_target": "aarch64-macos",
9
- "sha256": "1553633e722d18059dedffa8a52d55ed6c052e4961fd2753ee0b62be60b241bf"
9
+ "sha256": "1796501e393403016723c6b69266b834e2db04ba2559f51c84c957bd85c3927b",
10
+ "source": "prebuilt"
10
11
  },
11
12
  "darwin-x64": {
12
13
  "zig_target": "x86_64-macos",
13
- "sha256": "b5eb07e631c6ddad64019c8d0c86c32cb76a74ff0791ac5611a3aa3550767ec8"
14
+ "sha256": "82db8a0f3596d1f9335785eca8009e257bf379197e725f29aaedd6f9c2456267"
14
15
  },
15
16
  "linux-arm64": {
16
17
  "zig_target": "aarch64-linux",
17
- "sha256": "ea88a26f7b335d5842b0c1d83bfa4066bed0a119284560f6bd3833f1d240cce2"
18
+ "sha256": "8b53f4944274cb8930488ef822b0052f121e824424501863c399f251d827386b"
18
19
  },
19
20
  "linux-x64": {
20
21
  "zig_target": "x86_64-linux",
21
- "sha256": "175a7c59e458e952a26974f0fb5c2ce374e56f2c4c352903b481b5aa5a16978f"
22
- },
23
- "win-x64": {
24
- "zig_target": "x86_64-windows",
25
- "sha256": "176291ad9827a183ba7322ddb56cc1fa5edc7c214a264ecdf8a1d5d18366d686"
22
+ "sha256": "ac00c41f2a8c706de9f0dcce1087fcdccb53484303c05f101bb461f7b9dceb48"
26
23
  }
27
24
  }
28
25
  }
Binary file