unbrowse 3.2.0 → 3.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -31,7 +31,7 @@ var __promiseAll = (args) => Promise.all(args);
31
31
  var __require = /* @__PURE__ */ createRequire(import.meta.url);
32
32
 
33
33
  // ../../src/build-info.generated.ts
34
- var BUILD_RELEASE_VERSION = "3.2.0", BUILD_GIT_SHA = "c3fc3f822751", BUILD_CODE_HASH = "1488fc1d92b7", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4yLjAiLCJnaXRfc2hhIjoiYzNmYzNmODIyNzUxIiwiY29kZV9oYXNoIjoiMTQ4OGZjMWQ5MmI3IiwidHJhY2VfdmVyc2lvbiI6IjE0ODhmYzFkOTJiN0BjM2ZjM2Y4MjI3NTEiLCJpc3N1ZWRfYXQiOiIyMDI2LTA0LTA2VDA1OjA2OjAxLjIwMFoifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "xCBEHEB2UsniVYLfzuTpoXlcomZHL2pXht-7Ii1e7mM", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
34
+ var BUILD_RELEASE_VERSION = "3.2.2", BUILD_GIT_SHA = "150cce0d751e", BUILD_CODE_HASH = "1488fc1d92b7", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4yLjIiLCJnaXRfc2hhIjoiMTUwY2NlMGQ3NTFlIiwiY29kZV9oYXNoIjoiMTQ4OGZjMWQ5MmI3IiwidHJhY2VfdmVyc2lvbiI6IjE0ODhmYzFkOTJiN0AxNTBjY2UwZDc1MWUiLCJpc3N1ZWRfYXQiOiIyMDI2LTA0LTA2VDA4OjQ2OjM5LjIwNVoifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "ZS7C10DGuwyn1m02shMHyz6cN5UbTHfa8yqnkELg_L8", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
35
35
 
36
36
  // ../../src/version.ts
37
37
  import { createHash } from "crypto";
@@ -802,7 +802,6 @@ var init_token_sources = () => {};
802
802
  // ../../src/execution/token-resolver.ts
803
803
  var init_token_resolver = __esm(() => {
804
804
  init_token_sources();
805
- init_client2();
806
805
  });
807
806
 
808
807
  // ../../src/vault/index.ts
package/dist/mcp.js CHANGED
@@ -225,11 +225,11 @@ import { dirname, join, parse } from "path";
225
225
  import { fileURLToPath as fileURLToPath2 } from "url";
226
226
 
227
227
  // ../../src/build-info.generated.ts
228
- var BUILD_RELEASE_VERSION = "3.2.0";
229
- var BUILD_GIT_SHA = "c3fc3f822751";
228
+ var BUILD_RELEASE_VERSION = "3.2.2";
229
+ var BUILD_GIT_SHA = "150cce0d751e";
230
230
  var BUILD_CODE_HASH = "1488fc1d92b7";
231
- var BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4yLjAiLCJnaXRfc2hhIjoiYzNmYzNmODIyNzUxIiwiY29kZV9oYXNoIjoiMTQ4OGZjMWQ5MmI3IiwidHJhY2VfdmVyc2lvbiI6IjE0ODhmYzFkOTJiN0BjM2ZjM2Y4MjI3NTEiLCJpc3N1ZWRfYXQiOiIyMDI2LTA0LTA2VDA1OjA2OjAxLjIwMFoifQ";
232
- var BUILD_RELEASE_MANIFEST_SIGNATURE = "xCBEHEB2UsniVYLfzuTpoXlcomZHL2pXht-7Ii1e7mM";
231
+ var BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4yLjIiLCJnaXRfc2hhIjoiMTUwY2NlMGQ3NTFlIiwiY29kZV9oYXNoIjoiMTQ4OGZjMWQ5MmI3IiwidHJhY2VfdmVyc2lvbiI6IjE0ODhmYzFkOTJiN0AxNTBjY2UwZDc1MWUiLCJpc3N1ZWRfYXQiOiIyMDI2LTA0LTA2VDA4OjQ2OjM5LjIwNVoifQ";
232
+ var BUILD_RELEASE_MANIFEST_SIGNATURE = "ZS7C10DGuwyn1m02shMHyz6cN5UbTHfa8yqnkELg_L8";
233
233
  var BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
234
234
 
235
235
  // ../../src/version.ts
package/dist/server.js CHANGED
@@ -76,6 +76,88 @@ function getPackageRoot(metaUrl) {
76
76
  var init_paths = () => {};
77
77
 
78
78
  // ../../src/kuri/client.ts
79
+ var exports_client = {};
80
+ __export(exports_client, {
81
+ waitForSelector: () => waitForSelector,
82
+ waitForLoad: () => waitForLoad,
83
+ waitForCloudflare: () => waitForCloudflare,
84
+ stop: () => stop,
85
+ start: () => start,
86
+ snapshot: () => snapshot,
87
+ shouldReuseManagedChrome: () => shouldReuseManagedChrome,
88
+ setViewport: () => setViewport,
89
+ setUserAgent: () => setUserAgent,
90
+ setHeaders: () => setHeaders,
91
+ setCredentials: () => setCredentials,
92
+ setCookies: () => setCookies,
93
+ setCookie: () => setCookie,
94
+ setCdpPortForTests: () => setCdpPortForTests,
95
+ sessionSave: () => sessionSave,
96
+ sessionLoad: () => sessionLoad,
97
+ sessionList: () => sessionList,
98
+ select: () => select,
99
+ scrollIntoView: () => scrollIntoView,
100
+ scroll: () => scroll,
101
+ scriptInject: () => scriptInject,
102
+ screenshot: () => screenshot,
103
+ reuseHealthyBrokerIfPossible: () => reuseHealthyBrokerIfPossible,
104
+ resolveKuriPort: () => resolveKuriPort,
105
+ resolveKuriLaunchConfig: () => resolveKuriLaunchConfig,
106
+ reload: () => reload,
107
+ press: () => press,
108
+ newTab: () => newTab,
109
+ networkEnable: () => networkEnable,
110
+ navigate: () => navigate,
111
+ keyboardType: () => keyboardType,
112
+ keyboardInsertText: () => keyboardInsertText,
113
+ keyUp: () => keyUp,
114
+ keyDown: () => keyDown,
115
+ isReady: () => isReady,
116
+ interceptStart: () => interceptStart,
117
+ health: () => health,
118
+ hasCloudflareChallenge: () => hasCloudflareChallenge,
119
+ harStop: () => harStop,
120
+ harStart: () => harStart,
121
+ goForward: () => goForward,
122
+ goBack: () => goBack,
123
+ getText: () => getText,
124
+ getPort: () => getPort,
125
+ getPerfLcp: () => getPerfLcp,
126
+ getPageHtml: () => getPageHtml,
127
+ getNetworkEvents: () => getNetworkEvents,
128
+ getMarkdown: () => getMarkdown,
129
+ getLinks: () => getLinks,
130
+ getKuriSourceCandidates: () => getKuriSourceCandidates,
131
+ getKuriErrorMessage: () => getKuriErrorMessage,
132
+ getKuriClient: () => getKuriClient,
133
+ getKuriBinaryCandidates: () => getKuriBinaryCandidates,
134
+ getErrors: () => getErrors,
135
+ getDefaultTab: () => getDefaultTab,
136
+ getCurrentUrl: () => getCurrentUrl,
137
+ getCookies: () => getCookies,
138
+ getConsole: () => getConsole,
139
+ getCdpPort: () => getCdpPort,
140
+ findText: () => findText,
141
+ findKuriBinary: () => findKuriBinary,
142
+ fill: () => fill,
143
+ extractLoadPluginsFromHtml: () => extractLoadPluginsFromHtml,
144
+ extractLoadPlugins: () => extractLoadPlugins,
145
+ executeInPageFetch: () => executeInPageFetch,
146
+ evaluate: () => evaluate,
147
+ drag: () => drag,
148
+ domQuery: () => domQuery,
149
+ domHtml: () => domHtml,
150
+ domAttributes: () => domAttributes,
151
+ discoverTabs: () => discoverTabs,
152
+ closeTab: () => closeTab,
153
+ click: () => click,
154
+ bestEffortRehydratePlugins: () => bestEffortRehydratePlugins,
155
+ authProfileSave: () => authProfileSave,
156
+ authProfileLoad: () => authProfileLoad,
157
+ authProfileList: () => authProfileList,
158
+ authProfileDelete: () => authProfileDelete,
159
+ action: () => action
160
+ });
79
161
  import { execFileSync, spawn } from "node:child_process";
80
162
  import { existsSync as existsSync2 } from "node:fs";
81
163
  import net from "node:net";
@@ -977,6 +1059,19 @@ async function getPageHtml(tabId, state = defaultBrokerState) {
977
1059
  const result = await evaluate(tabId, "document.documentElement.outerHTML", state);
978
1060
  return String(result ?? "");
979
1061
  }
1062
+ function extractLoadPlugins(value) {
1063
+ if (typeof value !== "string")
1064
+ return [];
1065
+ return Array.from(new Set(value.split(/[\s,;]+/).map((part) => part.trim()).filter(Boolean)));
1066
+ }
1067
+ function extractLoadPluginsFromHtml(html) {
1068
+ const modules = [];
1069
+ const pattern = /data-load-plugins=(["'])(.*?)\1/gi;
1070
+ for (const match of html.matchAll(pattern)) {
1071
+ modules.push(...extractLoadPlugins(match[2]));
1072
+ }
1073
+ return Array.from(new Set(modules));
1074
+ }
980
1075
  async function bestEffortRehydratePlugins(tabId, state = defaultBrokerState) {
981
1076
  const result = await evaluate(tabId, `(async function() {
982
1077
  function splitPlugins(value) {
@@ -1118,6 +1213,9 @@ function getPort(state = defaultBrokerState) {
1118
1213
  function getCdpPort() {
1119
1214
  return kuriCdpPort;
1120
1215
  }
1216
+ function setCdpPortForTests(port) {
1217
+ kuriCdpPort = port;
1218
+ }
1121
1219
  function isReady(state = defaultBrokerState) {
1122
1220
  return state.ready;
1123
1221
  }
@@ -4112,7 +4210,7 @@ function extractEndpoints(requests, wsMessages, context) {
4112
4210
  return "";
4113
4211
  }
4114
4212
  })();
4115
- const isApiUrl = /\/(api|graphql)\b/i.test(urlPath) || /\.(json)(\?|$)/.test(req.url);
4213
+ const isApiUrl = /\/(api|graphql|youtubei|__ssr_data__)\b/i.test(urlPath) || /\.(json)(\?|$)/.test(req.url);
4116
4214
  let graphqlOpName;
4117
4215
  if (/graphql/i.test(req.url)) {
4118
4216
  if (req.request_body) {
@@ -4848,7 +4946,7 @@ var init_reverse_engineer = __esm(() => {
4848
4946
  SKIP_HOSTS = /(cloudflare\.com|google-analytics\.com|doubleclick\.net|gstatic\.com|accounts\.google\.com|login\.microsoftonline\.com|auth0\.com|cognito-idp\.|appleid\.apple\.com|github\.com\/login|facebook\.com\/login|protechts\.net|demdex\.net|litms|platform-telemetry|datadoghq\.com|fullstory\.com|launchdarkly\.com|intercom\.io|privy\.io|mypinata\.cloud|sentry\.io|segment\.io|amplitude\.com|mixpanel\.com|hotjar\.com|clarity\.ms|googletagmanager\.com|walletconnect\.com|imagedelivery\.net|cloudflareinsights\.com)/i;
4849
4947
  SKIP_TELEMETRY_HOSTS = /(waa-pa\.|signaler-pa\.|appsgrowthpromo-pa\.|ogads-pa\.|peoplestackwebexperiments-pa\.)/i;
4850
4948
  SKIP_TELEMETRY_PATHS = /\/(log|logging|telemetry|analytics|beacon|ping|heartbeat|metrics)(\/|$)/i;
4851
- RPC_HINTS = /(\/$rpc\/|\/rpc\/|graphql|trending|search|feed|results|batchexecute|\/api\/)/i;
4949
+ RPC_HINTS = /(\/$rpc\/|\/rpc\/|graphql|trending|search|feed|results|batchexecute|\/api\/|youtubei|__ssr_data__)/i;
4852
4950
  ALLOWED_METHODS = new Set(["GET", "POST", "PUT", "PATCH", "DELETE"]);
4853
4951
  STRIP_HEADERS = new Set([
4854
4952
  "cookie",
@@ -4926,7 +5024,7 @@ var init_reverse_engineer = __esm(() => {
4926
5024
  "adsize",
4927
5025
  "lineitemid"
4928
5026
  ]);
4929
- ON_DOMAIN_NOISE = /\/(recaptcha|captcha|update-recaptcha|csrf|consent|data-protection|badge|drawer|header-action|geolocation|onboarding|wana\/bids|prebid|bids\/request|ads\/|pixel|beacon|collect|impression|click-tracking|heartbeat|webConfig|config\.json|manifest\.json|service-worker|sw\.js|favicon|robots\.txt|sitemap|opensearch|partial\/[a-zA-Z]+\/mod-|logging|csp-report|gen_204|generate_204|sodar|__|devvit-|user-drawer|action-item)/i;
5027
+ ON_DOMAIN_NOISE = /\/(recaptcha|captcha|update-recaptcha|csrf|consent|data-protection|badge|drawer|header-action|geolocation|onboarding|wana\/bids|prebid|bids\/request|ads\/|pixel|beacon|collect|impression|click-tracking|heartbeat|webConfig|config\.json|manifest\.json|service-worker|sw\.js|favicon|robots\.txt|sitemap|opensearch|partial\/[a-zA-Z]+\/mod-|logging|csp-report|gen_204|generate_204|sodar|__webpack|__next|devvit-|user-drawer|action-item)/i;
4930
5028
  });
4931
5029
 
4932
5030
  // ../../src/runtime/browser-access.ts
@@ -5854,7 +5952,7 @@ async function captureSession(url, authHeaders, cookies, intent, options) {
5854
5952
  if (status < 200 || status >= 400)
5855
5953
  continue;
5856
5954
  const ct = (entry.response?.headers ?? []).find((h) => h.name.toLowerCase() === "content-type")?.value ?? "";
5857
- if (!HAR_REPLAY_CT2.test(ct) && !harUrl.includes("/api/") && !harUrl.includes("_search") && !harUrl.includes("graphql"))
5955
+ if (!HAR_REPLAY_CT2.test(ct) && !harUrl.includes("/api/") && !harUrl.includes("_search") && !harUrl.includes("graphql") && !harUrl.includes("youtubei"))
5858
5956
  continue;
5859
5957
  if (harReplayCount >= 20)
5860
5958
  break;
@@ -5889,6 +5987,70 @@ async function captureSession(url, authHeaders, cookies, intent, options) {
5889
5987
  }
5890
5988
  html = await phase("getPageHtml", () => getPageHtml(tabId));
5891
5989
  } catch {}
5990
+ const SSR_DATA_EXTRACTORS = [
5991
+ { name: "ytmusic", script: `(function(){try{var d=ytcfg.get('YTMUSIC_INITIAL_DATA');if(!d||!d.length)return null;var out={};d.forEach(function(x){if(x.path&&x.data)out[x.path]=x.data});return JSON.stringify(out)}catch(e){return null}})()` },
5992
+ { name: "youtube", script: `(function(){try{return typeof ytInitialData!=='undefined'?JSON.stringify(ytInitialData):null}catch(e){return null}})()` },
5993
+ { name: "nextjs", script: `(function(){try{return window.__NEXT_DATA__?JSON.stringify(window.__NEXT_DATA__):null}catch(e){return null}})()` },
5994
+ { name: "nuxt", script: `(function(){try{return window.__NUXT__?JSON.stringify(window.__NUXT__):null}catch(e){return null}})()` }
5995
+ ];
5996
+ let embeddedDataCount = 0;
5997
+ for (const extractor of SSR_DATA_EXTRACTORS) {
5998
+ try {
5999
+ const raw = await phase(`ssr:${extractor.name}`, () => evaluate(tabId, extractor.script));
6000
+ if (typeof raw !== "string" || !raw || raw === "null")
6001
+ continue;
6002
+ if (extractor.name === "ytmusic") {
6003
+ const paths = JSON.parse(raw);
6004
+ for (const [path4, data] of Object.entries(paths)) {
6005
+ if (!data || typeof data !== "object")
6006
+ continue;
6007
+ const cleaned = { ...data };
6008
+ delete cleaned.responseContext;
6009
+ delete cleaned.trackingParams;
6010
+ delete cleaned.header;
6011
+ delete cleaned.background;
6012
+ let bodyStr = JSON.stringify(cleaned);
6013
+ if (bodyStr.length > 1e4) {
6014
+ bodyStr = bodyStr.substring(0, 1e4) + '"}]}';
6015
+ try {
6016
+ JSON.parse(bodyStr);
6017
+ } catch {
6018
+ bodyStr = JSON.stringify(cleaned).substring(0, 1e4);
6019
+ }
6020
+ }
6021
+ if (bodyStr.length < 100)
6022
+ continue;
6023
+ const origin = new URL(url).origin;
6024
+ const contextParams = new URL(url).searchParams;
6025
+ const queryStr = path4.includes("search") && contextParams.toString() ? `?${contextParams.toString()}&prettyPrint=false` : "?prettyPrint=false";
6026
+ const syntheticUrl = `${origin}/youtubei/v1${path4}${queryStr}`;
6027
+ responseBodies.set(syntheticUrl, bodyStr);
6028
+ harEntries.push({
6029
+ startedDateTime: new Date().toISOString(),
6030
+ request: { method: "POST", url: syntheticUrl, headers: [{ name: "content-type", value: "application/json" }], postData: { text: "{}" } },
6031
+ response: { status: 200, headers: [{ name: "content-type", value: "application/json" }], content: { text: bodyStr, mimeType: "application/json" } }
6032
+ });
6033
+ embeddedDataCount++;
6034
+ log("capture", `ssr:${extractor.name} extracted ${path4} (${bodyStr.length}B)`);
6035
+ }
6036
+ } else {
6037
+ if (raw.length < 100)
6038
+ continue;
6039
+ const syntheticUrl = `${new URL(url).origin}/__ssr_data__/${extractor.name}`;
6040
+ responseBodies.set(syntheticUrl, raw);
6041
+ harEntries.push({
6042
+ startedDateTime: new Date().toISOString(),
6043
+ request: { method: "GET", url: syntheticUrl, headers: [] },
6044
+ response: { status: 200, headers: [{ name: "content-type", value: "application/json" }], content: { text: raw, mimeType: "application/json" } }
6045
+ });
6046
+ embeddedDataCount++;
6047
+ log("capture", `ssr:${extractor.name} extracted (${raw.length}B)`);
6048
+ }
6049
+ } catch {}
6050
+ }
6051
+ if (embeddedDataCount > 0) {
6052
+ log("capture", `embedded SSR data: ${embeddedDataCount} synthetic endpoints injected`);
6053
+ }
5892
6054
  const requests = mergePassiveCaptureData(intercepted, harEntries, extensionEntries, responseBodies, performanceUrls);
5893
6055
  log("capture", `tracked ${harEntries.length} HAR, ${intercepted.length} intercepted, ${extensionEntries.length} extension, ${responseBodies.size} bodies → ${requests.length} merged`);
5894
6056
  const rawCookies = await phase("extractCookies", () => extractCookiesFromPage(tabId, url));
@@ -6287,7 +6449,8 @@ var MAX_CONCURRENT_TABS = 3, activeTabs = 0, waitQueue, activeTabRegistry, inter
6287
6449
  var isData = ct.indexOf('json') !== -1 || ct.indexOf('application/x-protobuf') !== -1 ||
6288
6450
  ct.indexOf('text/plain') !== -1 ||
6289
6451
  url.indexOf('batchexecute') !== -1 || url.indexOf('/api/') !== -1 ||
6290
- url.indexOf('graphql') !== -1 || url.indexOf('voyager') !== -1;
6452
+ url.indexOf('graphql') !== -1 || url.indexOf('voyager') !== -1 ||
6453
+ url.indexOf('youtubei') !== -1;
6291
6454
  if (!isJs && !isData) return response;
6292
6455
  if (/\\.(css|woff2?|png|jpg|svg|ico)(\\?|$)/.test(url)) return response;
6293
6456
  var clone = response.clone();
@@ -6337,7 +6500,8 @@ var MAX_CONCURRENT_TABS = 3, activeTabs = 0, waitQueue, activeTabRegistry, inter
6337
6500
  var isData = ct.indexOf('json') !== -1 || ct.indexOf('application/x-protobuf') !== -1 ||
6338
6501
  ct.indexOf('text/plain') !== -1 ||
6339
6502
  url.indexOf('batchexecute') !== -1 || url.indexOf('/api/') !== -1 ||
6340
- url.indexOf('graphql') !== -1 || url.indexOf('voyager') !== -1;
6503
+ url.indexOf('graphql') !== -1 || url.indexOf('voyager') !== -1 ||
6504
+ url.indexOf('youtubei') !== -1;
6341
6505
  if (!isJs && !isData) return;
6342
6506
  if (/\\.(css|woff2?|png|jpg|svg|ico)(\\?|$)/.test(url)) return;
6343
6507
  var respBody = xhr.responseText || '';
@@ -6380,7 +6544,7 @@ var init_capture = __esm(() => {
6380
6544
  });
6381
6545
 
6382
6546
  // ../../src/build-info.generated.ts
6383
- var BUILD_RELEASE_VERSION = "3.2.0", BUILD_GIT_SHA = "c3fc3f822751", BUILD_CODE_HASH = "1488fc1d92b7", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4yLjAiLCJnaXRfc2hhIjoiYzNmYzNmODIyNzUxIiwiY29kZV9oYXNoIjoiMTQ4OGZjMWQ5MmI3IiwidHJhY2VfdmVyc2lvbiI6IjE0ODhmYzFkOTJiN0BjM2ZjM2Y4MjI3NTEiLCJpc3N1ZWRfYXQiOiIyMDI2LTA0LTA2VDA1OjA2OjAxLjIwMFoifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "xCBEHEB2UsniVYLfzuTpoXlcomZHL2pXht-7Ii1e7mM", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
6547
+ var BUILD_RELEASE_VERSION = "3.2.2", BUILD_GIT_SHA = "150cce0d751e", BUILD_CODE_HASH = "1488fc1d92b7", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4yLjIiLCJnaXRfc2hhIjoiMTUwY2NlMGQ3NTFlIiwiY29kZV9oYXNoIjoiMTQ4OGZjMWQ5MmI3IiwidHJhY2VfdmVyc2lvbiI6IjE0ODhmYzFkOTJiN0AxNTBjY2UwZDc1MWUiLCJpc3N1ZWRfYXQiOiIyMDI2LTA0LTA2VDA4OjQ2OjM5LjIwNVoifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "ZS7C10DGuwyn1m02shMHyz6cN5UbTHfa8yqnkELg_L8", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
6384
6548
 
6385
6549
  // ../../src/version.ts
6386
6550
  import { createHash } from "crypto";
@@ -8818,121 +8982,206 @@ async function resolveAuthTokens(endpoint, cookies, existingAuthHeaders) {
8818
8982
  const triggerUrl = endpoint.trigger_url;
8819
8983
  if (!triggerUrl)
8820
8984
  return {};
8821
- const headerBindings = bindings.filter((b) => b.param_location === "header");
8985
+ const headerBindings = bindings.filter((b) => b.param_location === "header" && !existingAuthHeaders[b.param_name]);
8822
8986
  if (headerBindings.length === 0)
8823
8987
  return {};
8824
8988
  const resolved = {};
8825
- try {
8826
- const tabId = await openResolverTab(triggerUrl, cookies);
8827
- if (!tabId)
8828
- return {};
8829
- try {
8830
- await waitForLoad2(tabId);
8831
- const html = await getPageHtml(tabId).catch(() => "");
8832
- if (typeof html !== "string" || !html.startsWith("<")) {
8833
- return {};
8834
- }
8835
- for (const binding of headerBindings) {
8836
- let value = await resolveBinding(binding, html, cookies);
8837
- if (!value && binding.sources.some((s) => s.kind === "js-bundle")) {
8838
- value = await scanAllScriptResources(tabId, binding);
8839
- }
8840
- if (value) {
8841
- resolved[binding.param_name] = binding.param_name.toLowerCase() === "authorization" ? value.startsWith("Bearer ") ? value : `Bearer ${value}` : value;
8989
+ for (const binding of headerBindings) {
8990
+ const cookieSources = binding.sources.filter((s) => s.kind === "cookie");
8991
+ if (cookieSources.length === 0)
8992
+ continue;
8993
+ for (const source of cookieSources) {
8994
+ const names = source.cookie_names ?? [];
8995
+ for (const name of names) {
8996
+ const cookie = cookies.find((c) => c.name === name);
8997
+ if (cookie?.value && cookie.value.length >= 8) {
8998
+ resolved[binding.param_name] = cookie.value;
8999
+ break;
8842
9000
  }
8843
9001
  }
8844
- } finally {
8845
- await closeTab(tabId).catch(() => {});
9002
+ if (resolved[binding.param_name])
9003
+ break;
8846
9004
  }
8847
- } catch {}
9005
+ }
9006
+ const remaining = headerBindings.filter((b) => !resolved[b.param_name]);
9007
+ if (remaining.length === 0)
9008
+ return formatHeaders(resolved);
9009
+ if (!hasResolvableSources(remaining))
9010
+ return formatHeaders(resolved);
9011
+ const html = await fetchHtml(triggerUrl, cookies);
9012
+ if (html) {
9013
+ const fromHtml = await resolveFromHtml(remaining, html);
9014
+ Object.assign(resolved, fromHtml);
9015
+ const stillMissing = remaining.filter((b) => !resolved[b.param_name]);
9016
+ if (stillMissing.length === 0)
9017
+ return formatHeaders(resolved);
9018
+ if (Object.keys(fromHtml).length > 0 && hasHtmlResolvableSources(stillMissing)) {
9019
+ const browserHtml = await fetchHtmlViaBrowser(triggerUrl, cookies);
9020
+ if (browserHtml) {
9021
+ Object.assign(resolved, await resolveFromHtml(stillMissing, browserHtml));
9022
+ }
9023
+ }
9024
+ return formatHeaders(resolved);
9025
+ }
9026
+ if (hasHtmlResolvableSources(remaining)) {
9027
+ const browserHtml = await fetchHtmlViaBrowser(triggerUrl, cookies);
9028
+ if (browserHtml) {
9029
+ Object.assign(resolved, await resolveFromHtml(remaining, browserHtml));
9030
+ }
9031
+ }
9032
+ return formatHeaders(resolved);
9033
+ }
9034
+ function hasResolvableSources(bindings) {
9035
+ for (const b of bindings) {
9036
+ for (const s of b.sources) {
9037
+ if (s.kind === "html-meta" || s.kind === "html-inline-script")
9038
+ return true;
9039
+ if (s.kind === "js-bundle" && s.bundle_url_pattern && s.bundle_regex)
9040
+ return true;
9041
+ }
9042
+ }
9043
+ return false;
9044
+ }
9045
+ function hasHtmlResolvableSources(bindings) {
9046
+ for (const b of bindings) {
9047
+ for (const s of b.sources) {
9048
+ if (s.kind === "html-meta" || s.kind === "html-inline-script")
9049
+ return true;
9050
+ }
9051
+ }
9052
+ return false;
9053
+ }
9054
+ function formatHeaders(raw) {
9055
+ const out = {};
9056
+ for (const [k, v] of Object.entries(raw)) {
9057
+ if (k.toLowerCase() === "authorization" && !v.startsWith("Bearer ")) {
9058
+ out[k] = `Bearer ${v}`;
9059
+ } else {
9060
+ out[k] = v;
9061
+ }
9062
+ }
9063
+ return out;
9064
+ }
9065
+ async function resolveFromHtml(bindings, html) {
9066
+ const resolved = {};
9067
+ for (const binding of bindings) {
9068
+ const value = await resolveBinding(binding, html);
9069
+ if (value)
9070
+ resolved[binding.param_name] = value;
9071
+ }
8848
9072
  return resolved;
8849
9073
  }
8850
- async function resolveBinding(binding, html, cookies) {
9074
+ async function resolveBinding(binding, html) {
8851
9075
  for (const source of binding.sources) {
8852
9076
  let value;
8853
- if (source.kind === "cookie" && source.cookie_names?.length) {
8854
- for (const name of source.cookie_names) {
8855
- const cookie = cookies.find((c) => c.name === name);
8856
- if (cookie?.value) {
8857
- value = cookie.value;
8858
- break;
8859
- }
8860
- }
8861
- } else if (source.kind === "html-meta" || source.kind === "html-inline-script") {
9077
+ if (source.kind === "html-meta" || source.kind === "html-inline-script") {
8862
9078
  value = extractTokenFromHtml(source, html);
8863
- } else if (source.kind === "js-bundle" && source.bundle_url_pattern) {
8864
- try {
8865
- const resp = await fetch(source.bundle_url_pattern);
8866
- if (resp.ok) {
8867
- const body = await resp.text();
8868
- value = extractTokenFromBundle(source, body);
8869
- }
8870
- } catch {}
9079
+ } else if (source.kind === "js-bundle" && source.bundle_url_pattern && source.bundle_regex) {
9080
+ value = await resolveJsBundle(source, html);
8871
9081
  }
8872
9082
  if (value && value.length >= 8)
8873
9083
  return value;
8874
9084
  }
8875
9085
  return;
8876
9086
  }
8877
- async function openResolverTab(url, cookies) {
9087
+ async function resolveJsBundle(source, html) {
9088
+ const pattern = source.bundle_url_pattern;
9089
+ const scriptSrcRe = /<script[^>]+src=["']([^"']+)["']/gi;
9090
+ let match;
9091
+ while ((match = scriptSrcRe.exec(html)) !== null) {
9092
+ const src = match[1];
9093
+ if (!src.includes(pattern))
9094
+ continue;
9095
+ try {
9096
+ const url = src.startsWith("http") ? src : `https:${src}`;
9097
+ const controller = new AbortController;
9098
+ const timeout = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
9099
+ const res = await fetch(url, {
9100
+ headers: { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36" },
9101
+ signal: controller.signal
9102
+ });
9103
+ clearTimeout(timeout);
9104
+ if (!res.ok)
9105
+ continue;
9106
+ const bundleContent = await res.text();
9107
+ const extracted = extractTokenFromBundle(source, bundleContent);
9108
+ if (extracted && extracted.length >= 8)
9109
+ return extracted;
9110
+ } catch {}
9111
+ }
9112
+ return;
9113
+ }
9114
+ async function fetchHtml(url, cookies) {
8878
9115
  try {
8879
- const tab = await newTab(url);
8880
- const tabId = typeof tab === "string" ? tab : tab?.tab_id;
8881
- if (!tabId)
8882
- return;
8883
- if (cookies.length > 0) {
8884
- for (const c of cookies) {
8885
- await setCookie(tabId, c.name, c.value, c.domain).catch(() => {});
8886
- }
8887
- await navigate(tabId, url).catch(() => {});
8888
- }
8889
- return tabId;
9116
+ const cookieHeader = cookies.map((c) => `${c.name}=${c.value}`).join("; ");
9117
+ const controller = new AbortController;
9118
+ const timeout = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
9119
+ const res = await fetch(url, {
9120
+ headers: {
9121
+ Cookie: cookieHeader,
9122
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
9123
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
9124
+ "Accept-Language": "en-US,en;q=0.9"
9125
+ },
9126
+ redirect: "follow",
9127
+ signal: controller.signal
9128
+ });
9129
+ clearTimeout(timeout);
9130
+ if (!res.ok)
9131
+ return null;
9132
+ const ct = res.headers.get("content-type") ?? "";
9133
+ if (!ct.includes("text/html") && !ct.includes("text/plain") && !ct.includes("application/xhtml"))
9134
+ return null;
9135
+ const html = await res.text();
9136
+ if (!html || html.length < 200 || !html.includes("<"))
9137
+ return null;
9138
+ return html;
8890
9139
  } catch {
8891
- return;
9140
+ return null;
8892
9141
  }
8893
9142
  }
8894
- async function waitForLoad2(tabId) {
8895
- const start2 = Date.now();
8896
- while (Date.now() - start2 < RESOLVE_TIMEOUT_MS) {
8897
- try {
8898
- const state = await evaluate(tabId, "document.readyState");
8899
- if (state === "complete" || state === "interactive")
8900
- return;
8901
- } catch {}
8902
- await new Promise((r) => setTimeout(r, 500));
9143
+ async function fetchHtmlViaBrowser(url, cookies) {
9144
+ let kuri;
9145
+ try {
9146
+ kuri = await Promise.resolve().then(() => (init_client(), exports_client));
9147
+ } catch {
9148
+ return null;
8903
9149
  }
8904
- }
8905
- async function scanAllScriptResources(tabId, binding) {
9150
+ let tabId;
8906
9151
  try {
8907
- const raw = await evaluate(tabId, `
8908
- JSON.stringify(
8909
- performance.getEntriesByType('resource')
8910
- .filter(function(e) { return e.initiatorType === 'script'; })
8911
- .map(function(e) { return e.name; })
8912
- )
8913
- `);
8914
- if (typeof raw !== "string" || !raw.startsWith("["))
8915
- return;
8916
- const urls = JSON.parse(raw);
8917
- const tokenPattern = /AAAAAAAAAAAAAAAAAAA[A-Za-z0-9+/=_%-]{20,}/;
8918
- for (const url of urls) {
9152
+ const tab = await kuri.newTab(url);
9153
+ tabId = typeof tab === "string" ? tab : tab?.tab_id;
9154
+ if (!tabId)
9155
+ return null;
9156
+ if (cookies.length > 0) {
9157
+ for (const c of cookies) {
9158
+ await kuri.setCookie(tabId, c.name, c.value, c.domain).catch(() => {});
9159
+ }
9160
+ await kuri.navigate(tabId, url).catch(() => {});
9161
+ }
9162
+ const start2 = Date.now();
9163
+ while (Date.now() - start2 < 12000) {
8919
9164
  try {
8920
- const resp = await fetch(url);
8921
- if (!resp.ok)
8922
- continue;
8923
- const body = await resp.text();
8924
- const m = body.match(tokenPattern);
8925
- if (m && m[0].length >= 20)
8926
- return m[0];
9165
+ const state = await kuri.evaluate(tabId, "document.readyState");
9166
+ if (state === "complete" || state === "interactive")
9167
+ break;
8927
9168
  } catch {}
9169
+ await new Promise((r) => setTimeout(r, 500));
8928
9170
  }
8929
- } catch {}
8930
- return;
9171
+ const html = await kuri.getPageHtml(tabId).catch(() => "");
9172
+ if (typeof html !== "string" || !html.startsWith("<"))
9173
+ return null;
9174
+ return html;
9175
+ } catch {
9176
+ return null;
9177
+ } finally {
9178
+ if (tabId)
9179
+ await kuri.closeTab(tabId).catch(() => {});
9180
+ }
8931
9181
  }
8932
- var RESOLVE_TIMEOUT_MS = 12000;
9182
+ var FETCH_TIMEOUT_MS = 8000;
8933
9183
  var init_token_resolver = __esm(() => {
8934
9184
  init_token_sources();
8935
- init_client();
8936
9185
  });
8937
9186
 
8938
9187
  // ../../src/vault/index.ts
@@ -16526,6 +16775,17 @@ function rankEndpoints(endpoints, intent, skillDomain, contextUrl) {
16526
16775
  }
16527
16776
  score += matches * 100;
16528
16777
  }
16778
+ if (rawTokens.length > 0 && pathname) {
16779
+ const pathLower = pathname.toLowerCase();
16780
+ const pathSegs = pathLower.split("/").filter(Boolean);
16781
+ for (const token of rawTokens) {
16782
+ const stemmed = stem(token);
16783
+ if (pathSegs.some((seg) => seg === stemmed || seg === token || seg.includes(token))) {
16784
+ score += 150;
16785
+ break;
16786
+ }
16787
+ }
16788
+ }
16529
16789
  if (ep.dom_extraction)
16530
16790
  score += 25;
16531
16791
  if (descriptionMeta.needs_review && ep.dom_extraction)
@@ -18385,6 +18645,13 @@ function isCachedSkillRelevantForIntent(skill, intent, contextUrl) {
18385
18645
  const hasStructuredSearchEndpoint = candidateSkill.endpoints.some((endpoint) => endpointHasSearchBindings(endpoint) && (!!endpoint.dom_extraction || !!endpoint.response_schema) && endpointMatchesContextOrigin(endpoint, contextUrl) && endpointMatchesExplicitSearchContext(endpoint, contextUrl));
18386
18646
  if (hasStructuredSearchEndpoint)
18387
18647
  return true;
18648
+ if (top && top.score >= 0) {
18649
+ try {
18650
+ const topPath = new URL(top.endpoint.url_template).pathname.toLowerCase();
18651
+ if (/\/(search|find|query|browse|explore)\b/.test(topPath))
18652
+ return true;
18653
+ } catch {}
18654
+ }
18388
18655
  if (collectExplicitSearchContextBindingKeys(contextUrl).size > 0)
18389
18656
  return false;
18390
18657
  }
@@ -20592,7 +20859,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
20592
20859
  }
20593
20860
  } catch {}
20594
20861
  }
20595
- if (context?.url && !forceCapture) {
20862
+ if (process.env.UNBROWSE_LOCAL_ONLY === "1" && !forceCapture) {
20596
20863
  return buildNoCachedMatch();
20597
20864
  }
20598
20865
  if (!context?.url) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unbrowse",
3
- "version": "3.2.0",
3
+ "version": "3.2.2",
4
4
  "description": "Reverse-engineer any website into reusable API skills. Zero-dep single binary with embedded browser engine.",
5
5
  "type": "module",
6
6
  "bin": {
Binary file
Binary file
Binary file
Binary file
@@ -2,24 +2,27 @@
2
2
  "repo_url": "https://github.com/justrach/kuri.git",
3
3
  "branch": "adding-extensions",
4
4
  "source_sha": "eadfaa5f921f7152e1762aed5ed64b3a4fbefbf3",
5
- "built_at": "2026-04-06T05:01:20.543Z",
5
+ "built_at": "2026-04-05T06:43:57.212Z",
6
6
  "binaries": {
7
7
  "darwin-arm64": {
8
8
  "zig_target": "aarch64-macos",
9
- "sha256": "1796501e393403016723c6b69266b834e2db04ba2559f51c84c957bd85c3927b",
10
- "source": "prebuilt"
9
+ "sha256": "1553633e722d18059dedffa8a52d55ed6c052e4961fd2753ee0b62be60b241bf"
11
10
  },
12
11
  "darwin-x64": {
13
12
  "zig_target": "x86_64-macos",
14
- "sha256": "f9adbebad3b17c10fc359b8125a33eda6890ec728cb2b6c625b36b895ef7c97f"
13
+ "sha256": "b5eb07e631c6ddad64019c8d0c86c32cb76a74ff0791ac5611a3aa3550767ec8"
15
14
  },
16
15
  "linux-arm64": {
17
16
  "zig_target": "aarch64-linux",
18
- "sha256": "30d1da652d589e5dffa4520615f958db3acf063bd831da9662c97afd50969699"
17
+ "sha256": "ea88a26f7b335d5842b0c1d83bfa4066bed0a119284560f6bd3833f1d240cce2"
19
18
  },
20
19
  "linux-x64": {
21
20
  "zig_target": "x86_64-linux",
22
- "sha256": "90a8d60715a5c1723b7dae98d90a565b92a781b16ab8721fd546a26f9d86f39f"
21
+ "sha256": "175a7c59e458e952a26974f0fb5c2ce374e56f2c4c352903b481b5aa5a16978f"
22
+ },
23
+ "win-x64": {
24
+ "zig_target": "x86_64-windows",
25
+ "sha256": "176291ad9827a183ba7322ddb56cc1fa5edc7c214a264ecdf8a1d5d18366d686"
23
26
  }
24
27
  }
25
28
  }
Binary file