unbrowse 3.2.1 → 3.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -31,7 +31,7 @@ var __promiseAll = (args) => Promise.all(args);
31
31
  var __require = /* @__PURE__ */ createRequire(import.meta.url);
32
32
 
33
33
  // ../../src/build-info.generated.ts
34
- var BUILD_RELEASE_VERSION = "3.2.1", BUILD_GIT_SHA = "150cce0d751e", BUILD_CODE_HASH = "1488fc1d92b7", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4yLjEiLCJnaXRfc2hhIjoiMTUwY2NlMGQ3NTFlIiwiY29kZV9oYXNoIjoiMTQ4OGZjMWQ5MmI3IiwidHJhY2VfdmVyc2lvbiI6IjE0ODhmYzFkOTJiN0AxNTBjY2UwZDc1MWUiLCJpc3N1ZWRfYXQiOiIyMDI2LTA0LTA2VDA3OjM0OjU3LjQ1NFoifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "2iiYVQS4ow2XkpkyCm072lmrjIIGvkAPjkO1s_LI_Do", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
34
+ var BUILD_RELEASE_VERSION = "3.2.2", BUILD_GIT_SHA = "150cce0d751e", BUILD_CODE_HASH = "1488fc1d92b7", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4yLjIiLCJnaXRfc2hhIjoiMTUwY2NlMGQ3NTFlIiwiY29kZV9oYXNoIjoiMTQ4OGZjMWQ5MmI3IiwidHJhY2VfdmVyc2lvbiI6IjE0ODhmYzFkOTJiN0AxNTBjY2UwZDc1MWUiLCJpc3N1ZWRfYXQiOiIyMDI2LTA0LTA2VDA4OjQ2OjM5LjIwNVoifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "ZS7C10DGuwyn1m02shMHyz6cN5UbTHfa8yqnkELg_L8", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
35
35
 
36
36
  // ../../src/version.ts
37
37
  import { createHash } from "crypto";
package/dist/mcp.js CHANGED
@@ -225,11 +225,11 @@ import { dirname, join, parse } from "path";
225
225
  import { fileURLToPath as fileURLToPath2 } from "url";
226
226
 
227
227
  // ../../src/build-info.generated.ts
228
- var BUILD_RELEASE_VERSION = "3.2.1";
228
+ var BUILD_RELEASE_VERSION = "3.2.2";
229
229
  var BUILD_GIT_SHA = "150cce0d751e";
230
230
  var BUILD_CODE_HASH = "1488fc1d92b7";
231
- var BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4yLjEiLCJnaXRfc2hhIjoiMTUwY2NlMGQ3NTFlIiwiY29kZV9oYXNoIjoiMTQ4OGZjMWQ5MmI3IiwidHJhY2VfdmVyc2lvbiI6IjE0ODhmYzFkOTJiN0AxNTBjY2UwZDc1MWUiLCJpc3N1ZWRfYXQiOiIyMDI2LTA0LTA2VDA3OjM0OjU3LjQ1NFoifQ";
232
- var BUILD_RELEASE_MANIFEST_SIGNATURE = "2iiYVQS4ow2XkpkyCm072lmrjIIGvkAPjkO1s_LI_Do";
231
+ var BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4yLjIiLCJnaXRfc2hhIjoiMTUwY2NlMGQ3NTFlIiwiY29kZV9oYXNoIjoiMTQ4OGZjMWQ5MmI3IiwidHJhY2VfdmVyc2lvbiI6IjE0ODhmYzFkOTJiN0AxNTBjY2UwZDc1MWUiLCJpc3N1ZWRfYXQiOiIyMDI2LTA0LTA2VDA4OjQ2OjM5LjIwNVoifQ";
232
+ var BUILD_RELEASE_MANIFEST_SIGNATURE = "ZS7C10DGuwyn1m02shMHyz6cN5UbTHfa8yqnkELg_L8";
233
233
  var BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
234
234
 
235
235
  // ../../src/version.ts
package/dist/server.js CHANGED
@@ -4210,7 +4210,7 @@ function extractEndpoints(requests, wsMessages, context) {
4210
4210
  return "";
4211
4211
  }
4212
4212
  })();
4213
- const isApiUrl = /\/(api|graphql)\b/i.test(urlPath) || /\.(json)(\?|$)/.test(req.url);
4213
+ const isApiUrl = /\/(api|graphql|youtubei|__ssr_data__)\b/i.test(urlPath) || /\.(json)(\?|$)/.test(req.url);
4214
4214
  let graphqlOpName;
4215
4215
  if (/graphql/i.test(req.url)) {
4216
4216
  if (req.request_body) {
@@ -4946,7 +4946,7 @@ var init_reverse_engineer = __esm(() => {
4946
4946
  SKIP_HOSTS = /(cloudflare\.com|google-analytics\.com|doubleclick\.net|gstatic\.com|accounts\.google\.com|login\.microsoftonline\.com|auth0\.com|cognito-idp\.|appleid\.apple\.com|github\.com\/login|facebook\.com\/login|protechts\.net|demdex\.net|litms|platform-telemetry|datadoghq\.com|fullstory\.com|launchdarkly\.com|intercom\.io|privy\.io|mypinata\.cloud|sentry\.io|segment\.io|amplitude\.com|mixpanel\.com|hotjar\.com|clarity\.ms|googletagmanager\.com|walletconnect\.com|imagedelivery\.net|cloudflareinsights\.com)/i;
4947
4947
  SKIP_TELEMETRY_HOSTS = /(waa-pa\.|signaler-pa\.|appsgrowthpromo-pa\.|ogads-pa\.|peoplestackwebexperiments-pa\.)/i;
4948
4948
  SKIP_TELEMETRY_PATHS = /\/(log|logging|telemetry|analytics|beacon|ping|heartbeat|metrics)(\/|$)/i;
4949
- RPC_HINTS = /(\/$rpc\/|\/rpc\/|graphql|trending|search|feed|results|batchexecute|\/api\/)/i;
4949
+ RPC_HINTS = /(\/$rpc\/|\/rpc\/|graphql|trending|search|feed|results|batchexecute|\/api\/|youtubei|__ssr_data__)/i;
4950
4950
  ALLOWED_METHODS = new Set(["GET", "POST", "PUT", "PATCH", "DELETE"]);
4951
4951
  STRIP_HEADERS = new Set([
4952
4952
  "cookie",
@@ -5024,7 +5024,7 @@ var init_reverse_engineer = __esm(() => {
5024
5024
  "adsize",
5025
5025
  "lineitemid"
5026
5026
  ]);
5027
- ON_DOMAIN_NOISE = /\/(recaptcha|captcha|update-recaptcha|csrf|consent|data-protection|badge|drawer|header-action|geolocation|onboarding|wana\/bids|prebid|bids\/request|ads\/|pixel|beacon|collect|impression|click-tracking|heartbeat|webConfig|config\.json|manifest\.json|service-worker|sw\.js|favicon|robots\.txt|sitemap|opensearch|partial\/[a-zA-Z]+\/mod-|logging|csp-report|gen_204|generate_204|sodar|__|devvit-|user-drawer|action-item)/i;
5027
+ ON_DOMAIN_NOISE = /\/(recaptcha|captcha|update-recaptcha|csrf|consent|data-protection|badge|drawer|header-action|geolocation|onboarding|wana\/bids|prebid|bids\/request|ads\/|pixel|beacon|collect|impression|click-tracking|heartbeat|webConfig|config\.json|manifest\.json|service-worker|sw\.js|favicon|robots\.txt|sitemap|opensearch|partial\/[a-zA-Z]+\/mod-|logging|csp-report|gen_204|generate_204|sodar|__webpack|__next|devvit-|user-drawer|action-item)/i;
5028
5028
  });
5029
5029
 
5030
5030
  // ../../src/runtime/browser-access.ts
@@ -5952,7 +5952,7 @@ async function captureSession(url, authHeaders, cookies, intent, options) {
5952
5952
  if (status < 200 || status >= 400)
5953
5953
  continue;
5954
5954
  const ct = (entry.response?.headers ?? []).find((h) => h.name.toLowerCase() === "content-type")?.value ?? "";
5955
- if (!HAR_REPLAY_CT2.test(ct) && !harUrl.includes("/api/") && !harUrl.includes("_search") && !harUrl.includes("graphql"))
5955
+ if (!HAR_REPLAY_CT2.test(ct) && !harUrl.includes("/api/") && !harUrl.includes("_search") && !harUrl.includes("graphql") && !harUrl.includes("youtubei"))
5956
5956
  continue;
5957
5957
  if (harReplayCount >= 20)
5958
5958
  break;
@@ -5987,6 +5987,70 @@ async function captureSession(url, authHeaders, cookies, intent, options) {
5987
5987
  }
5988
5988
  html = await phase("getPageHtml", () => getPageHtml(tabId));
5989
5989
  } catch {}
5990
+ const SSR_DATA_EXTRACTORS = [
5991
+ { name: "ytmusic", script: `(function(){try{var d=ytcfg.get('YTMUSIC_INITIAL_DATA');if(!d||!d.length)return null;var out={};d.forEach(function(x){if(x.path&&x.data)out[x.path]=x.data});return JSON.stringify(out)}catch(e){return null}})()` },
5992
+ { name: "youtube", script: `(function(){try{return typeof ytInitialData!=='undefined'?JSON.stringify(ytInitialData):null}catch(e){return null}})()` },
5993
+ { name: "nextjs", script: `(function(){try{return window.__NEXT_DATA__?JSON.stringify(window.__NEXT_DATA__):null}catch(e){return null}})()` },
5994
+ { name: "nuxt", script: `(function(){try{return window.__NUXT__?JSON.stringify(window.__NUXT__):null}catch(e){return null}})()` }
5995
+ ];
5996
+ let embeddedDataCount = 0;
5997
+ for (const extractor of SSR_DATA_EXTRACTORS) {
5998
+ try {
5999
+ const raw = await phase(`ssr:${extractor.name}`, () => evaluate(tabId, extractor.script));
6000
+ if (typeof raw !== "string" || !raw || raw === "null")
6001
+ continue;
6002
+ if (extractor.name === "ytmusic") {
6003
+ const paths = JSON.parse(raw);
6004
+ for (const [path4, data] of Object.entries(paths)) {
6005
+ if (!data || typeof data !== "object")
6006
+ continue;
6007
+ const cleaned = { ...data };
6008
+ delete cleaned.responseContext;
6009
+ delete cleaned.trackingParams;
6010
+ delete cleaned.header;
6011
+ delete cleaned.background;
6012
+ let bodyStr = JSON.stringify(cleaned);
6013
+ if (bodyStr.length > 1e4) {
6014
+ bodyStr = bodyStr.substring(0, 1e4) + '"}]}';
6015
+ try {
6016
+ JSON.parse(bodyStr);
6017
+ } catch {
6018
+ bodyStr = JSON.stringify(cleaned).substring(0, 1e4);
6019
+ }
6020
+ }
6021
+ if (bodyStr.length < 100)
6022
+ continue;
6023
+ const origin = new URL(url).origin;
6024
+ const contextParams = new URL(url).searchParams;
6025
+ const queryStr = path4.includes("search") && contextParams.toString() ? `?${contextParams.toString()}&prettyPrint=false` : "?prettyPrint=false";
6026
+ const syntheticUrl = `${origin}/youtubei/v1${path4}${queryStr}`;
6027
+ responseBodies.set(syntheticUrl, bodyStr);
6028
+ harEntries.push({
6029
+ startedDateTime: new Date().toISOString(),
6030
+ request: { method: "POST", url: syntheticUrl, headers: [{ name: "content-type", value: "application/json" }], postData: { text: "{}" } },
6031
+ response: { status: 200, headers: [{ name: "content-type", value: "application/json" }], content: { text: bodyStr, mimeType: "application/json" } }
6032
+ });
6033
+ embeddedDataCount++;
6034
+ log("capture", `ssr:${extractor.name} extracted ${path4} (${bodyStr.length}B)`);
6035
+ }
6036
+ } else {
6037
+ if (raw.length < 100)
6038
+ continue;
6039
+ const syntheticUrl = `${new URL(url).origin}/__ssr_data__/${extractor.name}`;
6040
+ responseBodies.set(syntheticUrl, raw);
6041
+ harEntries.push({
6042
+ startedDateTime: new Date().toISOString(),
6043
+ request: { method: "GET", url: syntheticUrl, headers: [] },
6044
+ response: { status: 200, headers: [{ name: "content-type", value: "application/json" }], content: { text: raw, mimeType: "application/json" } }
6045
+ });
6046
+ embeddedDataCount++;
6047
+ log("capture", `ssr:${extractor.name} extracted (${raw.length}B)`);
6048
+ }
6049
+ } catch {}
6050
+ }
6051
+ if (embeddedDataCount > 0) {
6052
+ log("capture", `embedded SSR data: ${embeddedDataCount} synthetic endpoints injected`);
6053
+ }
5990
6054
  const requests = mergePassiveCaptureData(intercepted, harEntries, extensionEntries, responseBodies, performanceUrls);
5991
6055
  log("capture", `tracked ${harEntries.length} HAR, ${intercepted.length} intercepted, ${extensionEntries.length} extension, ${responseBodies.size} bodies → ${requests.length} merged`);
5992
6056
  const rawCookies = await phase("extractCookies", () => extractCookiesFromPage(tabId, url));
@@ -6385,7 +6449,8 @@ var MAX_CONCURRENT_TABS = 3, activeTabs = 0, waitQueue, activeTabRegistry, inter
6385
6449
  var isData = ct.indexOf('json') !== -1 || ct.indexOf('application/x-protobuf') !== -1 ||
6386
6450
  ct.indexOf('text/plain') !== -1 ||
6387
6451
  url.indexOf('batchexecute') !== -1 || url.indexOf('/api/') !== -1 ||
6388
- url.indexOf('graphql') !== -1 || url.indexOf('voyager') !== -1;
6452
+ url.indexOf('graphql') !== -1 || url.indexOf('voyager') !== -1 ||
6453
+ url.indexOf('youtubei') !== -1;
6389
6454
  if (!isJs && !isData) return response;
6390
6455
  if (/\\.(css|woff2?|png|jpg|svg|ico)(\\?|$)/.test(url)) return response;
6391
6456
  var clone = response.clone();
@@ -6435,7 +6500,8 @@ var MAX_CONCURRENT_TABS = 3, activeTabs = 0, waitQueue, activeTabRegistry, inter
6435
6500
  var isData = ct.indexOf('json') !== -1 || ct.indexOf('application/x-protobuf') !== -1 ||
6436
6501
  ct.indexOf('text/plain') !== -1 ||
6437
6502
  url.indexOf('batchexecute') !== -1 || url.indexOf('/api/') !== -1 ||
6438
- url.indexOf('graphql') !== -1 || url.indexOf('voyager') !== -1;
6503
+ url.indexOf('graphql') !== -1 || url.indexOf('voyager') !== -1 ||
6504
+ url.indexOf('youtubei') !== -1;
6439
6505
  if (!isJs && !isData) return;
6440
6506
  if (/\\.(css|woff2?|png|jpg|svg|ico)(\\?|$)/.test(url)) return;
6441
6507
  var respBody = xhr.responseText || '';
@@ -6478,7 +6544,7 @@ var init_capture = __esm(() => {
6478
6544
  });
6479
6545
 
6480
6546
  // ../../src/build-info.generated.ts
6481
- var BUILD_RELEASE_VERSION = "3.2.1", BUILD_GIT_SHA = "150cce0d751e", BUILD_CODE_HASH = "1488fc1d92b7", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4yLjEiLCJnaXRfc2hhIjoiMTUwY2NlMGQ3NTFlIiwiY29kZV9oYXNoIjoiMTQ4OGZjMWQ5MmI3IiwidHJhY2VfdmVyc2lvbiI6IjE0ODhmYzFkOTJiN0AxNTBjY2UwZDc1MWUiLCJpc3N1ZWRfYXQiOiIyMDI2LTA0LTA2VDA3OjM0OjU3LjQ1NFoifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "2iiYVQS4ow2XkpkyCm072lmrjIIGvkAPjkO1s_LI_Do", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
6547
+ var BUILD_RELEASE_VERSION = "3.2.2", BUILD_GIT_SHA = "150cce0d751e", BUILD_CODE_HASH = "1488fc1d92b7", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4yLjIiLCJnaXRfc2hhIjoiMTUwY2NlMGQ3NTFlIiwiY29kZV9oYXNoIjoiMTQ4OGZjMWQ5MmI3IiwidHJhY2VfdmVyc2lvbiI6IjE0ODhmYzFkOTJiN0AxNTBjY2UwZDc1MWUiLCJpc3N1ZWRfYXQiOiIyMDI2LTA0LTA2VDA4OjQ2OjM5LjIwNVoifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "ZS7C10DGuwyn1m02shMHyz6cN5UbTHfa8yqnkELg_L8", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
6482
6548
 
6483
6549
  // ../../src/version.ts
6484
6550
  import { createHash } from "crypto";
@@ -16709,6 +16775,17 @@ function rankEndpoints(endpoints, intent, skillDomain, contextUrl) {
16709
16775
  }
16710
16776
  score += matches * 100;
16711
16777
  }
16778
+ if (rawTokens.length > 0 && pathname) {
16779
+ const pathLower = pathname.toLowerCase();
16780
+ const pathSegs = pathLower.split("/").filter(Boolean);
16781
+ for (const token of rawTokens) {
16782
+ const stemmed = stem(token);
16783
+ if (pathSegs.some((seg) => seg === stemmed || seg === token || seg.includes(token))) {
16784
+ score += 150;
16785
+ break;
16786
+ }
16787
+ }
16788
+ }
16712
16789
  if (ep.dom_extraction)
16713
16790
  score += 25;
16714
16791
  if (descriptionMeta.needs_review && ep.dom_extraction)
@@ -18568,6 +18645,13 @@ function isCachedSkillRelevantForIntent(skill, intent, contextUrl) {
18568
18645
  const hasStructuredSearchEndpoint = candidateSkill.endpoints.some((endpoint) => endpointHasSearchBindings(endpoint) && (!!endpoint.dom_extraction || !!endpoint.response_schema) && endpointMatchesContextOrigin(endpoint, contextUrl) && endpointMatchesExplicitSearchContext(endpoint, contextUrl));
18569
18646
  if (hasStructuredSearchEndpoint)
18570
18647
  return true;
18648
+ if (top && top.score >= 0) {
18649
+ try {
18650
+ const topPath = new URL(top.endpoint.url_template).pathname.toLowerCase();
18651
+ if (/\/(search|find|query|browse|explore)\b/.test(topPath))
18652
+ return true;
18653
+ } catch {}
18654
+ }
18571
18655
  if (collectExplicitSearchContextBindingKeys(contextUrl).size > 0)
18572
18656
  return false;
18573
18657
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unbrowse",
3
- "version": "3.2.1",
3
+ "version": "3.2.2",
4
4
  "description": "Reverse-engineer any website into reusable API skills. Zero-dep single binary with embedded browser engine.",
5
5
  "type": "module",
6
6
  "bin": {