unbrowse 3.1.0-experiments.5e7a7bb → 3.1.0-experiments.995f8bb

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -31,7 +31,7 @@ var __promiseAll = (args) => Promise.all(args);
31
31
  var __require = /* @__PURE__ */ createRequire(import.meta.url);
32
32
 
33
33
  // ../../src/build-info.generated.ts
34
- var BUILD_RELEASE_VERSION = "3.1.0-experiments.5e7a7bb", BUILD_GIT_SHA = "5e7a7bb949c1", BUILD_CODE_HASH = "1488fc1d92b7", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4xLjAtZXhwZXJpbWVudHMuNWU3YTdiYiIsImdpdF9zaGEiOiI1ZTdhN2JiOTQ5YzEiLCJjb2RlX2hhc2giOiIxNDg4ZmMxZDkyYjciLCJ0cmFjZV92ZXJzaW9uIjoiMTQ4OGZjMWQ5MmI3QDVlN2E3YmI5NDljMSIsImlzc3VlZF9hdCI6IjIwMjYtMDQtMDVUMTQ6NTY6MjkuNjY2WiJ9", BUILD_RELEASE_MANIFEST_SIGNATURE = "OuZD9NeemoStAyT3-MgMS3V3eeatbRMKkVY_J4_6nsM", BUILD_DEFAULT_BACKEND_URL = "https://unbrowse-backend-experiments.lewis-6d8.workers.dev";
34
+ var BUILD_RELEASE_VERSION = "3.1.0-experiments.995f8bb", BUILD_GIT_SHA = "995f8bbf54ac", BUILD_CODE_HASH = "1488fc1d92b7", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4xLjAtZXhwZXJpbWVudHMuOTk1ZjhiYiIsImdpdF9zaGEiOiI5OTVmOGJiZjU0YWMiLCJjb2RlX2hhc2giOiIxNDg4ZmMxZDkyYjciLCJ0cmFjZV92ZXJzaW9uIjoiMTQ4OGZjMWQ5MmI3QDk5NWY4YmJmNTRhYyIsImlzc3VlZF9hdCI6IjIwMjYtMDQtMDVUMjM6NDc6MDguMzk3WiJ9", BUILD_RELEASE_MANIFEST_SIGNATURE = "fsY0FlwZTGKoZwvzv3jow-t3ri874GEziuzW8i3aupw", BUILD_DEFAULT_BACKEND_URL = "https://unbrowse-backend-experiments.lewis-6d8.workers.dev";
35
35
 
36
36
  // ../../src/version.ts
37
37
  import { createHash } from "crypto";
@@ -899,18 +899,6 @@ var init_extraction = __esm(() => {
899
899
  CHROME_TAGS = new Set(["nav", "footer", "header"]);
900
900
  });
901
901
 
902
- // ../../src/graph/agent-augment.ts
903
- var DEFAULT_MODEL, ENABLED, AUGMENT_TIMEOUT_MS, MAX_AUGMENT_ENDPOINTS, MAX_AUGMENT_PAYLOAD_CHARS, GENERIC_SEMANTIC_TYPES;
904
- var init_agent_augment = __esm(() => {
905
- init_graph();
906
- DEFAULT_MODEL = process.env.UNBROWSE_AGENT_SEMANTIC_MODEL ?? process.env.UNBROWSE_AGENT_JUDGE_MODEL ?? "gpt-4.1-mini";
907
- ENABLED = process.env.UNBROWSE_AGENT_SEMANTIC_AUGMENT !== "0";
908
- AUGMENT_TIMEOUT_MS = Number(process.env.UNBROWSE_AGENT_SEMANTIC_TIMEOUT_MS ?? 8000);
909
- MAX_AUGMENT_ENDPOINTS = Math.max(1, Number(process.env.UNBROWSE_AGENT_SEMANTIC_MAX_ENDPOINTS ?? 6));
910
- MAX_AUGMENT_PAYLOAD_CHARS = Math.max(4000, Number(process.env.UNBROWSE_AGENT_SEMANTIC_MAX_PAYLOAD_CHARS ?? 24000));
911
- GENERIC_SEMANTIC_TYPES = new Set(["identifier", "input", "resource", "entity", "item"]);
912
- });
913
-
914
902
  // ../../src/execution/search-forms.ts
915
903
  var SEARCH_FIELD_NAMES, LOGIN_FIELD_NAMES, SUPPORTED_INPUT_TYPES;
916
904
  var init_search_forms = __esm(() => {
@@ -1036,7 +1024,6 @@ var init_execution = __esm(async () => {
1036
1024
  init_domain();
1037
1025
  init_extraction();
1038
1026
  init_graph();
1039
- init_agent_augment();
1040
1027
  init_logger();
1041
1028
  init_version();
1042
1029
  init_search_forms();
package/dist/mcp.js CHANGED
@@ -225,11 +225,11 @@ import { dirname, join, parse } from "path";
225
225
  import { fileURLToPath as fileURLToPath2 } from "url";
226
226
 
227
227
  // ../../src/build-info.generated.ts
228
- var BUILD_RELEASE_VERSION = "3.1.0-experiments.5e7a7bb";
229
- var BUILD_GIT_SHA = "5e7a7bb949c1";
228
+ var BUILD_RELEASE_VERSION = "3.1.0-experiments.995f8bb";
229
+ var BUILD_GIT_SHA = "995f8bbf54ac";
230
230
  var BUILD_CODE_HASH = "1488fc1d92b7";
231
- var BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4xLjAtZXhwZXJpbWVudHMuNWU3YTdiYiIsImdpdF9zaGEiOiI1ZTdhN2JiOTQ5YzEiLCJjb2RlX2hhc2giOiIxNDg4ZmMxZDkyYjciLCJ0cmFjZV92ZXJzaW9uIjoiMTQ4OGZjMWQ5MmI3QDVlN2E3YmI5NDljMSIsImlzc3VlZF9hdCI6IjIwMjYtMDQtMDVUMTQ6NTY6MjkuNjY2WiJ9";
232
- var BUILD_RELEASE_MANIFEST_SIGNATURE = "OuZD9NeemoStAyT3-MgMS3V3eeatbRMKkVY_J4_6nsM";
231
+ var BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4xLjAtZXhwZXJpbWVudHMuOTk1ZjhiYiIsImdpdF9zaGEiOiI5OTVmOGJiZjU0YWMiLCJjb2RlX2hhc2giOiIxNDg4ZmMxZDkyYjciLCJ0cmFjZV92ZXJzaW9uIjoiMTQ4OGZjMWQ5MmI3QDk5NWY4YmJmNTRhYyIsImlzc3VlZF9hdCI6IjIwMjYtMDQtMDVUMjM6NDc6MDguMzk3WiJ9";
232
+ var BUILD_RELEASE_MANIFEST_SIGNATURE = "fsY0FlwZTGKoZwvzv3jow-t3ri874GEziuzW8i3aupw";
233
233
  var BUILD_DEFAULT_BACKEND_URL = "https://unbrowse-backend-experiments.lewis-6d8.workers.dev";
234
234
 
235
235
  // ../../src/version.ts
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unbrowse",
3
- "version": "3.1.0-experiments.5e7a7bb",
3
+ "version": "3.1.0-experiments.995f8bb",
4
4
  "description": "Reverse-engineer any website into reusable API skills. Zero-dep single binary with embedded browser engine.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,5 +1,6 @@
1
1
  import { nanoid } from "nanoid";
2
2
  import { readFileSync } from "node:fs";
3
+ import { log } from "../logger.js";
3
4
  import { extractEndpoints, extractAuthHeaders } from "../reverse-engineer/index.js";
4
5
  import { enrichEndpointsWithTokenSources } from "../reverse-engineer/token-sources.js";
5
6
  import { buildSkillOperationGraph, inferEndpointSemantic } from "../graph/index.js";
@@ -196,6 +197,7 @@ export async function cacheBrowseRequests(params: {
196
197
  if (!existingSkill || mergedEndpoints.length >= existingSkill.endpoints.length) {
197
198
  for (const endpoint of mergedEndpoints) {
198
199
  if (!endpoint.description) endpoint.description = generateLocalDescription(endpoint);
200
+ if (!endpoint.semantic) endpoint.semantic = inferEndpointSemantic(endpoint);
199
201
  }
200
202
  const quickSkill: SkillManifest = {
201
203
  skill_id: existingSkill?.skill_id ?? nanoid(),
@@ -221,9 +223,11 @@ export async function cacheBrowseRequests(params: {
221
223
  try {
222
224
  const html = getPageHtml ? await getPageHtml() : undefined;
223
225
  if (html && html.startsWith("<")) {
224
- enrichEndpointsWithTokenSources(quickSkill.endpoints, requests, html, jsBundles);
226
+ const preCheck = requests.filter(r => r.request_headers["authorization"] || r.request_headers["x-csrf-token"]).length;
227
+ const enriched = enrichEndpointsWithTokenSources(quickSkill.endpoints, requests, html, jsBundles);
228
+ log("browse-index", `token enrichment: ${enriched} bindings, ${preCheck} auth-reqs pre-call, ${quickSkill.endpoints.length} eps`);
225
229
  }
226
- } catch { /* best-effort */ }
230
+ } catch (e) { log("browse-index", `token enrichment failed: ${e}`); }
227
231
 
228
232
  const cacheKey = buildResolveCacheKey(domain, intent, sessionUrl);
229
233
  const scopedKey = scopedCacheKey("global", cacheKey);
@@ -8,7 +8,6 @@ import { nanoid } from "nanoid";
8
8
  import type { ExecutionTrace, OrchestrationTiming, ProjectionOptions, SkillManifest } from "../types/index.js";
9
9
  import { mergeEndpoints } from "../marketplace/index.js";
10
10
  import { buildSkillOperationGraph, getEndpointDescriptionMetadata, getSkillChunk, toAgentSkillChunkView } from "../graph/index.js";
11
- import { augmentEndpointsWithAgent } from "../graph/agent-augment.js";
12
11
  import { findExistingSkillForDomain, cachePublishedSkill } from "../client/index.js";
13
12
  import { storeCredential } from "../vault/index.js";
14
13
  import { getRegistrableDomain } from "../domain.js";
@@ -7,7 +7,6 @@ import type { RawRequest } from "../capture/index.js";
7
7
  import { queueBackgroundIndex } from "../indexer/index.js";
8
8
  import { mergeEndpoints } from "../marketplace/index.js";
9
9
  import { buildSkillOperationGraph } from "../graph/index.js";
10
- import { augmentEndpointsWithAgent } from "../graph/agent-augment.js";
11
10
  import { findExistingSkillForDomain, cachePublishedSkill } from "../client/index.js";
12
11
  import { storeCredential } from "../vault/index.js";
13
12
  import { getRegistrableDomain } from "../domain.js";
@@ -1,6 +1,6 @@
1
- export const BUILD_RELEASE_VERSION = "3.1.0-experiments.5e7a7bb";
2
- export const BUILD_GIT_SHA = "5e7a7bb949c1";
1
+ export const BUILD_RELEASE_VERSION = "3.1.0-experiments.995f8bb";
2
+ export const BUILD_GIT_SHA = "995f8bbf54ac";
3
3
  export const BUILD_CODE_HASH = "1488fc1d92b7";
4
- export const BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4xLjAtZXhwZXJpbWVudHMuNWU3YTdiYiIsImdpdF9zaGEiOiI1ZTdhN2JiOTQ5YzEiLCJjb2RlX2hhc2giOiIxNDg4ZmMxZDkyYjciLCJ0cmFjZV92ZXJzaW9uIjoiMTQ4OGZjMWQ5MmI3QDVlN2E3YmI5NDljMSIsImlzc3VlZF9hdCI6IjIwMjYtMDQtMDVUMTQ6NTY6MjkuNjY2WiJ9";
5
- export const BUILD_RELEASE_MANIFEST_SIGNATURE = "OuZD9NeemoStAyT3-MgMS3V3eeatbRMKkVY_J4_6nsM";
4
+ export const BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4xLjAtZXhwZXJpbWVudHMuOTk1ZjhiYiIsImdpdF9zaGEiOiI5OTVmOGJiZjU0YWMiLCJjb2RlX2hhc2giOiIxNDg4ZmMxZDkyYjciLCJ0cmFjZV92ZXJzaW9uIjoiMTQ4OGZjMWQ5MmI3QDk5NWY4YmJmNTRhYyIsImlzc3VlZF9hdCI6IjIwMjYtMDQtMDVUMjM6NDc6MDguMzk3WiJ9";
5
+ export const BUILD_RELEASE_MANIFEST_SIGNATURE = "fsY0FlwZTGKoZwvzv3jow-t3ri874GEziuzW8i3aupw";
6
6
  export const BUILD_DEFAULT_BACKEND_URL = "https://unbrowse-backend-experiments.lewis-6d8.workers.dev";
@@ -670,13 +670,21 @@ export function mergePassiveCaptureData(
670
670
  });
671
671
  }
672
672
 
673
- // Priority 3: Extension entries (URL+headers supplement, no bodies)
673
+ // Priority 3: Extension entries (chrome.webRequest — has full auth headers HAR strips)
674
674
  for (const entry of extensionEntries) {
675
- if (seen.has(entry.url)) continue;
676
675
  const reqHeaders: Record<string, string> = {};
677
676
  for (const h of entry.requestHeaders ?? []) reqHeaders[h.name] = h.value;
678
677
  const respHeaders: Record<string, string> = {};
679
678
  for (const h of entry.responseHeaders ?? []) respHeaders[h.name] = h.value;
679
+
680
+ const existing = seen.get(entry.url);
681
+ if (existing) {
682
+ // Merge auth headers from extension into existing entry (HAR strips them)
683
+ for (const [k, v] of Object.entries(reqHeaders)) {
684
+ if (!existing.request_headers[k]) existing.request_headers[k] = v;
685
+ }
686
+ continue;
687
+ }
680
688
  seen.set(entry.url, {
681
689
  url: entry.url,
682
690
  method: entry.method,
@@ -936,22 +944,36 @@ export async function enrichPassiveCaptureRequests(params: {
936
944
  ...request,
937
945
  url: normalizeCapturedUrl(request.url, captureUrl),
938
946
  }));
947
+
948
+ // HAR strips auth headers. Infer their presence from cookies so
949
+ // enrichEndpointsWithTokenSources can create DAG bindings.
950
+ if (extensionEntries.length === 0) {
951
+ const tabCookies = await kuri.getCookies(tabId).catch(() => []) as Array<{ name: string; value: string }>;
952
+ const hasAuthCookie = tabCookies.some((c) => /^(ct0|csrf_token|_csrf|csrftoken|XSRF-TOKEN|auth_token)$/i.test(c.name));
953
+ if (hasAuthCookie) {
954
+ for (const req of requests) {
955
+ if (/\/(api|graphql|v\d+)\b/i.test(req.url) || /ads-api|voyager/i.test(req.url)) {
956
+ if (!req.request_headers["authorization"]) req.request_headers["authorization"] = "[REDACTED]";
957
+ if (!req.request_headers["x-csrf-token"]) req.request_headers["x-csrf-token"] = "[REDACTED]";
958
+ }
959
+ }
960
+ }
961
+ }
962
+
939
963
  log("capture", `browse-checkpoint tracked ${harEntries.length} HAR, ${intercepted.length} intercepted, ${extensionEntries.length} extension, ${responseBodies.size} bodies → ${requests.length} merged`);
940
964
  return requests;
941
965
  }
942
-
943
966
  /**
944
967
  * Collect network requests observed by kuri's builtin extension (chrome.webRequest).
945
968
  * Gracefully returns [] if the extension relay is not yet wired.
946
969
  */
947
970
  async function collectExtensionRequests(tabId: string): Promise<ExtensionEntry[]> {
948
971
  try {
949
- // Query the builtin extension's network log via the agent bridge
950
972
  const raw = await kuri.evaluate(tabId, `
951
973
  (function() {
952
974
  if (!window.__kuri || !window.__kuri._networkLog) return '[]';
953
975
  var log = window.__kuri._networkLog;
954
- window.__kuri._networkLog = []; // drain
976
+ window.__kuri._networkLog = [];
955
977
  return JSON.stringify(log);
956
978
  })()
957
979
  `);
@@ -19,7 +19,6 @@ import { nanoid } from "nanoid";
19
19
  import { getRegistrableDomain } from "../domain.js";
20
20
  import { extractFromDOM, extractFromDOMWithHint } from "../extraction/index.js";
21
21
  import { buildSkillOperationGraph, getEndpointDescriptionMetadata, inferEndpointSemantic, resolveEndpointSemantic } from "../graph/index.js";
22
- import { augmentEndpointsWithAgent } from "../graph/agent-augment.js";
23
22
  import { log } from "../logger.js";
24
23
  import { TRACE_VERSION } from "../version.js";
25
24
  import { buildQueryBindingMap, extractTemplateQueryBindings, mergeContextTemplateParams } from "../template-params.js";
@@ -190,11 +189,10 @@ function normalizeEndpointForManifest(endpoint: EndpointDescriptor): EndpointDes
190
189
 
191
190
  async function prepareLearnedEndpoints(
192
191
  endpoints: EndpointDescriptor[],
193
- intent: string,
194
- domain: string,
192
+ _intent: string,
193
+ _domain: string,
195
194
  ): Promise<EndpointDescriptor[]> {
196
- const normalized = endpoints.map(normalizeEndpointForManifest);
197
- return augmentEndpointsWithAgent(normalized, { intent, domain });
195
+ return endpoints.map(normalizeEndpointForManifest);
198
196
  }
199
197
 
200
198
  function intentWantsStructuredRecords(intent?: string): boolean {
@@ -2017,14 +2015,16 @@ export async function executeEndpoint(
2017
2015
  const epDomain = (() => { try { return new URL(endpoint.url_template).hostname; } catch { return skill.domain; } })();
2018
2016
  await reloadExecutionAuthState(skill, epDomain, authHeaders, cookies);
2019
2017
 
2020
- // If endpoint has auth_tokens bindings and vault didn't provide the needed headers,
2021
- // resolve them from the page source (meta tags, inline scripts, JS bundles)
2022
- if (endpoint.auth_tokens?.length && Object.keys(authHeaders).length === 0) {
2018
+ // If endpoint has auth_tokens bindings, always resolve fresh tokens.
2019
+ // Vault headers may be stale — the DAG knows how to get fresh ones.
2020
+ log("exec", `auth_tokens check: ${endpoint.auth_tokens?.length ?? 0} bindings on ${endpoint.endpoint_id}`);
2021
+ if (endpoint.auth_tokens?.length) {
2023
2022
  try {
2024
2023
  const { resolveAuthTokens } = await import("./token-resolver.js");
2025
2024
  const resolved = await resolveAuthTokens(endpoint, cookies, authHeaders);
2025
+ log("exec", `token resolver returned ${Object.keys(resolved).length} headers: ${Object.keys(resolved).join(",") || "none"}`);
2026
2026
  Object.assign(authHeaders, resolved);
2027
- } catch { /* token resolution is best-effort */ }
2027
+ } catch (e) { log("exec", `token resolver failed: ${e}`); }
2028
2028
  }
2029
2029
 
2030
2030
  log("exec", `endpoint ${endpoint.endpoint_id}: cookies=${cookies.length} authHeaders=${Object.keys(authHeaders).length} hasAuth=${cookies.length > 0 || Object.keys(authHeaders).length > 0}`);
@@ -34,10 +34,8 @@ export async function resolveAuthTokens(
34
34
  const triggerUrl = endpoint.trigger_url;
35
35
  if (!triggerUrl) return {};
36
36
 
37
- // Only resolve header bindings that aren't already satisfied
38
- const headerBindings = bindings.filter(
39
- (b) => b.param_location === "header" && !existingAuthHeaders[b.param_name],
40
- );
37
+ // Resolve ALL header bindings DAG sources are authoritative over vault cache
38
+ const headerBindings = bindings.filter((b) => b.param_location === "header");
41
39
  if (headerBindings.length === 0) return {};
42
40
 
43
41
  const resolved: Record<string, string> = {};
@@ -56,7 +54,7 @@ export async function resolveAuthTokens(
56
54
  }
57
55
 
58
56
  for (const binding of headerBindings) {
59
- const value = resolveBinding(binding, html);
57
+ const value = await resolveBinding(binding, html, cookies);
60
58
  if (value) {
61
59
  resolved[binding.param_name] = binding.param_name.toLowerCase() === "authorization"
62
60
  ? (value.startsWith("Bearer ") ? value : `Bearer ${value}`)
@@ -73,21 +71,36 @@ export async function resolveAuthTokens(
73
71
  return resolved;
74
72
  }
75
73
 
76
- function resolveBinding(binding: AuthTokenBinding, html: string): string | undefined {
74
+ async function resolveBinding(
75
+ binding: AuthTokenBinding,
76
+ html: string,
77
+ cookies: Array<{ name: string; value: string; domain: string }>,
78
+ ): Promise<string | undefined> {
77
79
  for (const source of binding.sources) {
78
80
  let value: string | undefined;
79
81
 
80
- if (source.kind === "html-meta" || source.kind === "html-inline-script") {
82
+ if (source.kind === "cookie" && source.cookie_names?.length) {
83
+ // Resolve from cookies — CSRF tokens typically live here
84
+ for (const name of source.cookie_names) {
85
+ const cookie = cookies.find((c) => c.name === name);
86
+ if (cookie?.value) { value = cookie.value; break; }
87
+ }
88
+ } else if (source.kind === "html-meta" || source.kind === "html-inline-script") {
81
89
  value = extractTokenFromHtml(source, html);
90
+ } else if (source.kind === "js-bundle" && source.bundle_url_pattern) {
91
+ try {
92
+ const resp = await fetch(source.bundle_url_pattern);
93
+ if (resp.ok) {
94
+ const body = await resp.text();
95
+ value = extractTokenFromBundle(source, body);
96
+ }
97
+ } catch { /* fetch failed — try next source */ }
82
98
  }
83
- // JS bundle resolution would require fetching the bundle URL —
84
- // skip for now, HTML sources cover most cases (CSRF, inline tokens)
85
99
 
86
100
  if (value && value.length >= 8) return value;
87
101
  }
88
102
  return undefined;
89
103
  }
90
-
91
104
  async function openResolverTab(
92
105
  url: string,
93
106
  cookies: Array<{ name: string; value: string; domain: string }>,
@@ -15,7 +15,6 @@ import { extractBrowserCookies } from "../auth/browser-cookies.js";
15
15
  import { queueBackgroundIndex } from "../indexer/index.js";
16
16
  import { mergeEndpoints } from "../marketplace/index.js";
17
17
  import { buildSkillOperationGraph } from "../graph/index.js";
18
- import { augmentEndpointsWithAgent } from "../graph/agent-augment.js";
19
18
  import { findExistingSkillForDomain, cachePublishedSkill } from "../client/index.js";
20
19
  import { storeCredential } from "../vault/index.js";
21
20
  import { getRegistrableDomain } from "../domain.js";
@@ -289,8 +289,6 @@ export function enrichEndpointsWithTokenSources(
289
289
  html: string | undefined,
290
290
  jsBundles: Map<string, string> | undefined,
291
291
  ): number {
292
- if (!html && (!jsBundles || jsBundles.size === 0)) return 0;
293
-
294
292
  let enriched = 0;
295
293
  for (const req of requests) {
296
294
  const matching = endpoints.filter((ep) => endpointMatchesRequest(ep, req));
@@ -298,15 +296,35 @@ export function enrichEndpointsWithTokenSources(
298
296
 
299
297
  for (const [headerName, headerValue] of Object.entries(req.request_headers)) {
300
298
  if (!TOKEN_HEADER_PATTERN.test(headerName)) continue;
301
- // Bearer tokens are prefixed — strip before scanning
302
299
  const tokenValue = extractTokenValue(headerName, headerValue);
303
300
  if (!tokenValue) continue;
304
301
 
305
- const sources = findTokenSources(tokenValue, html, jsBundles);
302
+ const sources = (html || (jsBundles && jsBundles.size > 0))
303
+ ? findTokenSources(tokenValue, html, jsBundles)
304
+ : [];
305
+
306
+ const lowerName = headerName.toLowerCase();
307
+ if (sources.length === 0) {
308
+ if (/csrf|xsrf/i.test(lowerName)) {
309
+ sources.push({ kind: "cookie", cookie_names: ["ct0", "csrf_token", "_csrf", "csrftoken", "XSRF-TOKEN"] });
310
+ } else if (lowerName === "authorization") {
311
+ if (html) {
312
+ const scriptSrcRe = /<script[^>]+src=["']([^"']+)["']/gi;
313
+ let m: RegExpExecArray | null;
314
+ while ((m = scriptSrcRe.exec(html)) !== null) {
315
+ if (/main|app|client|bundle|vendor/i.test(m[1])) {
316
+ sources.push({ kind: "js-bundle", bundle_url_pattern: m[1] });
317
+ if (sources.length >= 3) break;
318
+ }
319
+ }
320
+ }
321
+ }
322
+ }
323
+
306
324
  if (sources.length === 0) continue;
307
325
 
308
326
  const binding: AuthTokenBinding = {
309
- param_name: headerName.toLowerCase(),
327
+ param_name: lowerName,
310
328
  param_location: "header",
311
329
  sources,
312
330
  refresh_on_401: true,
@@ -314,9 +332,13 @@ export function enrichEndpointsWithTokenSources(
314
332
 
315
333
  for (const ep of matching) {
316
334
  if (!ep.auth_tokens) ep.auth_tokens = [];
317
- // Dedupe: skip if a binding for the same param already exists
318
- if (ep.auth_tokens.some((b) => b.param_name === binding.param_name)) continue;
319
- ep.auth_tokens.push(binding);
335
+ // Always replace with fresh binding stale sources from cached merges get overwritten
336
+ const idx = ep.auth_tokens.findIndex((b) => b.param_name === binding.param_name);
337
+ if (idx >= 0) {
338
+ ep.auth_tokens[idx] = binding;
339
+ } else {
340
+ ep.auth_tokens.push(binding);
341
+ }
320
342
  enriched++;
321
343
  }
322
344
  }