unbrowse 3.2.1 → 3.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +1 -1
- package/dist/mcp.js +3 -3
- package/dist/server.js +91 -7
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -31,7 +31,7 @@ var __promiseAll = (args) => Promise.all(args);
|
|
|
31
31
|
var __require = /* @__PURE__ */ createRequire(import.meta.url);
|
|
32
32
|
|
|
33
33
|
// ../../src/build-info.generated.ts
|
|
34
|
-
var BUILD_RELEASE_VERSION = "3.2.
|
|
34
|
+
var BUILD_RELEASE_VERSION = "3.2.2", BUILD_GIT_SHA = "150cce0d751e", BUILD_CODE_HASH = "1488fc1d92b7", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4yLjIiLCJnaXRfc2hhIjoiMTUwY2NlMGQ3NTFlIiwiY29kZV9oYXNoIjoiMTQ4OGZjMWQ5MmI3IiwidHJhY2VfdmVyc2lvbiI6IjE0ODhmYzFkOTJiN0AxNTBjY2UwZDc1MWUiLCJpc3N1ZWRfYXQiOiIyMDI2LTA0LTA2VDA4OjQ2OjM5LjIwNVoifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "ZS7C10DGuwyn1m02shMHyz6cN5UbTHfa8yqnkELg_L8", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
|
|
35
35
|
|
|
36
36
|
// ../../src/version.ts
|
|
37
37
|
import { createHash } from "crypto";
|
package/dist/mcp.js
CHANGED
|
@@ -225,11 +225,11 @@ import { dirname, join, parse } from "path";
|
|
|
225
225
|
import { fileURLToPath as fileURLToPath2 } from "url";
|
|
226
226
|
|
|
227
227
|
// ../../src/build-info.generated.ts
|
|
228
|
-
var BUILD_RELEASE_VERSION = "3.2.
|
|
228
|
+
var BUILD_RELEASE_VERSION = "3.2.2";
|
|
229
229
|
var BUILD_GIT_SHA = "150cce0d751e";
|
|
230
230
|
var BUILD_CODE_HASH = "1488fc1d92b7";
|
|
231
|
-
var BUILD_RELEASE_MANIFEST_BASE64 = "
|
|
232
|
-
var BUILD_RELEASE_MANIFEST_SIGNATURE = "
|
|
231
|
+
var BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4yLjIiLCJnaXRfc2hhIjoiMTUwY2NlMGQ3NTFlIiwiY29kZV9oYXNoIjoiMTQ4OGZjMWQ5MmI3IiwidHJhY2VfdmVyc2lvbiI6IjE0ODhmYzFkOTJiN0AxNTBjY2UwZDc1MWUiLCJpc3N1ZWRfYXQiOiIyMDI2LTA0LTA2VDA4OjQ2OjM5LjIwNVoifQ";
|
|
232
|
+
var BUILD_RELEASE_MANIFEST_SIGNATURE = "ZS7C10DGuwyn1m02shMHyz6cN5UbTHfa8yqnkELg_L8";
|
|
233
233
|
var BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
|
|
234
234
|
|
|
235
235
|
// ../../src/version.ts
|
package/dist/server.js
CHANGED
|
@@ -4210,7 +4210,7 @@ function extractEndpoints(requests, wsMessages, context) {
|
|
|
4210
4210
|
return "";
|
|
4211
4211
|
}
|
|
4212
4212
|
})();
|
|
4213
|
-
const isApiUrl = /\/(api|graphql)\b/i.test(urlPath) || /\.(json)(\?|$)/.test(req.url);
|
|
4213
|
+
const isApiUrl = /\/(api|graphql|youtubei|__ssr_data__)\b/i.test(urlPath) || /\.(json)(\?|$)/.test(req.url);
|
|
4214
4214
|
let graphqlOpName;
|
|
4215
4215
|
if (/graphql/i.test(req.url)) {
|
|
4216
4216
|
if (req.request_body) {
|
|
@@ -4946,7 +4946,7 @@ var init_reverse_engineer = __esm(() => {
|
|
|
4946
4946
|
SKIP_HOSTS = /(cloudflare\.com|google-analytics\.com|doubleclick\.net|gstatic\.com|accounts\.google\.com|login\.microsoftonline\.com|auth0\.com|cognito-idp\.|appleid\.apple\.com|github\.com\/login|facebook\.com\/login|protechts\.net|demdex\.net|litms|platform-telemetry|datadoghq\.com|fullstory\.com|launchdarkly\.com|intercom\.io|privy\.io|mypinata\.cloud|sentry\.io|segment\.io|amplitude\.com|mixpanel\.com|hotjar\.com|clarity\.ms|googletagmanager\.com|walletconnect\.com|imagedelivery\.net|cloudflareinsights\.com)/i;
|
|
4947
4947
|
SKIP_TELEMETRY_HOSTS = /(waa-pa\.|signaler-pa\.|appsgrowthpromo-pa\.|ogads-pa\.|peoplestackwebexperiments-pa\.)/i;
|
|
4948
4948
|
SKIP_TELEMETRY_PATHS = /\/(log|logging|telemetry|analytics|beacon|ping|heartbeat|metrics)(\/|$)/i;
|
|
4949
|
-
RPC_HINTS = /(\/$rpc\/|\/rpc\/|graphql|trending|search|feed|results|batchexecute|\/api
|
|
4949
|
+
RPC_HINTS = /(\/$rpc\/|\/rpc\/|graphql|trending|search|feed|results|batchexecute|\/api\/|youtubei|__ssr_data__)/i;
|
|
4950
4950
|
ALLOWED_METHODS = new Set(["GET", "POST", "PUT", "PATCH", "DELETE"]);
|
|
4951
4951
|
STRIP_HEADERS = new Set([
|
|
4952
4952
|
"cookie",
|
|
@@ -5024,7 +5024,7 @@ var init_reverse_engineer = __esm(() => {
|
|
|
5024
5024
|
"adsize",
|
|
5025
5025
|
"lineitemid"
|
|
5026
5026
|
]);
|
|
5027
|
-
ON_DOMAIN_NOISE = /\/(recaptcha|captcha|update-recaptcha|csrf|consent|data-protection|badge|drawer|header-action|geolocation|onboarding|wana\/bids|prebid|bids\/request|ads\/|pixel|beacon|collect|impression|click-tracking|heartbeat|webConfig|config\.json|manifest\.json|service-worker|sw\.js|favicon|robots\.txt|sitemap|opensearch|partial\/[a-zA-Z]+\/mod-|logging|csp-report|gen_204|generate_204|sodar|
|
|
5027
|
+
ON_DOMAIN_NOISE = /\/(recaptcha|captcha|update-recaptcha|csrf|consent|data-protection|badge|drawer|header-action|geolocation|onboarding|wana\/bids|prebid|bids\/request|ads\/|pixel|beacon|collect|impression|click-tracking|heartbeat|webConfig|config\.json|manifest\.json|service-worker|sw\.js|favicon|robots\.txt|sitemap|opensearch|partial\/[a-zA-Z]+\/mod-|logging|csp-report|gen_204|generate_204|sodar|__webpack|__next|devvit-|user-drawer|action-item)/i;
|
|
5028
5028
|
});
|
|
5029
5029
|
|
|
5030
5030
|
// ../../src/runtime/browser-access.ts
|
|
@@ -5952,7 +5952,7 @@ async function captureSession(url, authHeaders, cookies, intent, options) {
|
|
|
5952
5952
|
if (status < 200 || status >= 400)
|
|
5953
5953
|
continue;
|
|
5954
5954
|
const ct = (entry.response?.headers ?? []).find((h) => h.name.toLowerCase() === "content-type")?.value ?? "";
|
|
5955
|
-
if (!HAR_REPLAY_CT2.test(ct) && !harUrl.includes("/api/") && !harUrl.includes("_search") && !harUrl.includes("graphql"))
|
|
5955
|
+
if (!HAR_REPLAY_CT2.test(ct) && !harUrl.includes("/api/") && !harUrl.includes("_search") && !harUrl.includes("graphql") && !harUrl.includes("youtubei"))
|
|
5956
5956
|
continue;
|
|
5957
5957
|
if (harReplayCount >= 20)
|
|
5958
5958
|
break;
|
|
@@ -5987,6 +5987,70 @@ async function captureSession(url, authHeaders, cookies, intent, options) {
|
|
|
5987
5987
|
}
|
|
5988
5988
|
html = await phase("getPageHtml", () => getPageHtml(tabId));
|
|
5989
5989
|
} catch {}
|
|
5990
|
+
const SSR_DATA_EXTRACTORS = [
|
|
5991
|
+
{ name: "ytmusic", script: `(function(){try{var d=ytcfg.get('YTMUSIC_INITIAL_DATA');if(!d||!d.length)return null;var out={};d.forEach(function(x){if(x.path&&x.data)out[x.path]=x.data});return JSON.stringify(out)}catch(e){return null}})()` },
|
|
5992
|
+
{ name: "youtube", script: `(function(){try{return typeof ytInitialData!=='undefined'?JSON.stringify(ytInitialData):null}catch(e){return null}})()` },
|
|
5993
|
+
{ name: "nextjs", script: `(function(){try{return window.__NEXT_DATA__?JSON.stringify(window.__NEXT_DATA__):null}catch(e){return null}})()` },
|
|
5994
|
+
{ name: "nuxt", script: `(function(){try{return window.__NUXT__?JSON.stringify(window.__NUXT__):null}catch(e){return null}})()` }
|
|
5995
|
+
];
|
|
5996
|
+
let embeddedDataCount = 0;
|
|
5997
|
+
for (const extractor of SSR_DATA_EXTRACTORS) {
|
|
5998
|
+
try {
|
|
5999
|
+
const raw = await phase(`ssr:${extractor.name}`, () => evaluate(tabId, extractor.script));
|
|
6000
|
+
if (typeof raw !== "string" || !raw || raw === "null")
|
|
6001
|
+
continue;
|
|
6002
|
+
if (extractor.name === "ytmusic") {
|
|
6003
|
+
const paths = JSON.parse(raw);
|
|
6004
|
+
for (const [path4, data] of Object.entries(paths)) {
|
|
6005
|
+
if (!data || typeof data !== "object")
|
|
6006
|
+
continue;
|
|
6007
|
+
const cleaned = { ...data };
|
|
6008
|
+
delete cleaned.responseContext;
|
|
6009
|
+
delete cleaned.trackingParams;
|
|
6010
|
+
delete cleaned.header;
|
|
6011
|
+
delete cleaned.background;
|
|
6012
|
+
let bodyStr = JSON.stringify(cleaned);
|
|
6013
|
+
if (bodyStr.length > 1e4) {
|
|
6014
|
+
bodyStr = bodyStr.substring(0, 1e4) + '"}]}';
|
|
6015
|
+
try {
|
|
6016
|
+
JSON.parse(bodyStr);
|
|
6017
|
+
} catch {
|
|
6018
|
+
bodyStr = JSON.stringify(cleaned).substring(0, 1e4);
|
|
6019
|
+
}
|
|
6020
|
+
}
|
|
6021
|
+
if (bodyStr.length < 100)
|
|
6022
|
+
continue;
|
|
6023
|
+
const origin = new URL(url).origin;
|
|
6024
|
+
const contextParams = new URL(url).searchParams;
|
|
6025
|
+
const queryStr = path4.includes("search") && contextParams.toString() ? `?${contextParams.toString()}&prettyPrint=false` : "?prettyPrint=false";
|
|
6026
|
+
const syntheticUrl = `${origin}/youtubei/v1${path4}${queryStr}`;
|
|
6027
|
+
responseBodies.set(syntheticUrl, bodyStr);
|
|
6028
|
+
harEntries.push({
|
|
6029
|
+
startedDateTime: new Date().toISOString(),
|
|
6030
|
+
request: { method: "POST", url: syntheticUrl, headers: [{ name: "content-type", value: "application/json" }], postData: { text: "{}" } },
|
|
6031
|
+
response: { status: 200, headers: [{ name: "content-type", value: "application/json" }], content: { text: bodyStr, mimeType: "application/json" } }
|
|
6032
|
+
});
|
|
6033
|
+
embeddedDataCount++;
|
|
6034
|
+
log("capture", `ssr:${extractor.name} extracted ${path4} (${bodyStr.length}B)`);
|
|
6035
|
+
}
|
|
6036
|
+
} else {
|
|
6037
|
+
if (raw.length < 100)
|
|
6038
|
+
continue;
|
|
6039
|
+
const syntheticUrl = `${new URL(url).origin}/__ssr_data__/${extractor.name}`;
|
|
6040
|
+
responseBodies.set(syntheticUrl, raw);
|
|
6041
|
+
harEntries.push({
|
|
6042
|
+
startedDateTime: new Date().toISOString(),
|
|
6043
|
+
request: { method: "GET", url: syntheticUrl, headers: [] },
|
|
6044
|
+
response: { status: 200, headers: [{ name: "content-type", value: "application/json" }], content: { text: raw, mimeType: "application/json" } }
|
|
6045
|
+
});
|
|
6046
|
+
embeddedDataCount++;
|
|
6047
|
+
log("capture", `ssr:${extractor.name} extracted (${raw.length}B)`);
|
|
6048
|
+
}
|
|
6049
|
+
} catch {}
|
|
6050
|
+
}
|
|
6051
|
+
if (embeddedDataCount > 0) {
|
|
6052
|
+
log("capture", `embedded SSR data: ${embeddedDataCount} synthetic endpoints injected`);
|
|
6053
|
+
}
|
|
5990
6054
|
const requests = mergePassiveCaptureData(intercepted, harEntries, extensionEntries, responseBodies, performanceUrls);
|
|
5991
6055
|
log("capture", `tracked ${harEntries.length} HAR, ${intercepted.length} intercepted, ${extensionEntries.length} extension, ${responseBodies.size} bodies → ${requests.length} merged`);
|
|
5992
6056
|
const rawCookies = await phase("extractCookies", () => extractCookiesFromPage(tabId, url));
|
|
@@ -6385,7 +6449,8 @@ var MAX_CONCURRENT_TABS = 3, activeTabs = 0, waitQueue, activeTabRegistry, inter
|
|
|
6385
6449
|
var isData = ct.indexOf('json') !== -1 || ct.indexOf('application/x-protobuf') !== -1 ||
|
|
6386
6450
|
ct.indexOf('text/plain') !== -1 ||
|
|
6387
6451
|
url.indexOf('batchexecute') !== -1 || url.indexOf('/api/') !== -1 ||
|
|
6388
|
-
url.indexOf('graphql') !== -1 || url.indexOf('voyager') !== -1
|
|
6452
|
+
url.indexOf('graphql') !== -1 || url.indexOf('voyager') !== -1 ||
|
|
6453
|
+
url.indexOf('youtubei') !== -1;
|
|
6389
6454
|
if (!isJs && !isData) return response;
|
|
6390
6455
|
if (/\\.(css|woff2?|png|jpg|svg|ico)(\\?|$)/.test(url)) return response;
|
|
6391
6456
|
var clone = response.clone();
|
|
@@ -6435,7 +6500,8 @@ var MAX_CONCURRENT_TABS = 3, activeTabs = 0, waitQueue, activeTabRegistry, inter
|
|
|
6435
6500
|
var isData = ct.indexOf('json') !== -1 || ct.indexOf('application/x-protobuf') !== -1 ||
|
|
6436
6501
|
ct.indexOf('text/plain') !== -1 ||
|
|
6437
6502
|
url.indexOf('batchexecute') !== -1 || url.indexOf('/api/') !== -1 ||
|
|
6438
|
-
url.indexOf('graphql') !== -1 || url.indexOf('voyager') !== -1
|
|
6503
|
+
url.indexOf('graphql') !== -1 || url.indexOf('voyager') !== -1 ||
|
|
6504
|
+
url.indexOf('youtubei') !== -1;
|
|
6439
6505
|
if (!isJs && !isData) return;
|
|
6440
6506
|
if (/\\.(css|woff2?|png|jpg|svg|ico)(\\?|$)/.test(url)) return;
|
|
6441
6507
|
var respBody = xhr.responseText || '';
|
|
@@ -6478,7 +6544,7 @@ var init_capture = __esm(() => {
|
|
|
6478
6544
|
});
|
|
6479
6545
|
|
|
6480
6546
|
// ../../src/build-info.generated.ts
|
|
6481
|
-
var BUILD_RELEASE_VERSION = "3.2.
|
|
6547
|
+
var BUILD_RELEASE_VERSION = "3.2.2", BUILD_GIT_SHA = "150cce0d751e", BUILD_CODE_HASH = "1488fc1d92b7", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy4yLjIiLCJnaXRfc2hhIjoiMTUwY2NlMGQ3NTFlIiwiY29kZV9oYXNoIjoiMTQ4OGZjMWQ5MmI3IiwidHJhY2VfdmVyc2lvbiI6IjE0ODhmYzFkOTJiN0AxNTBjY2UwZDc1MWUiLCJpc3N1ZWRfYXQiOiIyMDI2LTA0LTA2VDA4OjQ2OjM5LjIwNVoifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "ZS7C10DGuwyn1m02shMHyz6cN5UbTHfa8yqnkELg_L8", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
|
|
6482
6548
|
|
|
6483
6549
|
// ../../src/version.ts
|
|
6484
6550
|
import { createHash } from "crypto";
|
|
@@ -16709,6 +16775,17 @@ function rankEndpoints(endpoints, intent, skillDomain, contextUrl) {
|
|
|
16709
16775
|
}
|
|
16710
16776
|
score += matches * 100;
|
|
16711
16777
|
}
|
|
16778
|
+
if (rawTokens.length > 0 && pathname) {
|
|
16779
|
+
const pathLower = pathname.toLowerCase();
|
|
16780
|
+
const pathSegs = pathLower.split("/").filter(Boolean);
|
|
16781
|
+
for (const token of rawTokens) {
|
|
16782
|
+
const stemmed = stem(token);
|
|
16783
|
+
if (pathSegs.some((seg) => seg === stemmed || seg === token || seg.includes(token))) {
|
|
16784
|
+
score += 150;
|
|
16785
|
+
break;
|
|
16786
|
+
}
|
|
16787
|
+
}
|
|
16788
|
+
}
|
|
16712
16789
|
if (ep.dom_extraction)
|
|
16713
16790
|
score += 25;
|
|
16714
16791
|
if (descriptionMeta.needs_review && ep.dom_extraction)
|
|
@@ -18568,6 +18645,13 @@ function isCachedSkillRelevantForIntent(skill, intent, contextUrl) {
|
|
|
18568
18645
|
const hasStructuredSearchEndpoint = candidateSkill.endpoints.some((endpoint) => endpointHasSearchBindings(endpoint) && (!!endpoint.dom_extraction || !!endpoint.response_schema) && endpointMatchesContextOrigin(endpoint, contextUrl) && endpointMatchesExplicitSearchContext(endpoint, contextUrl));
|
|
18569
18646
|
if (hasStructuredSearchEndpoint)
|
|
18570
18647
|
return true;
|
|
18648
|
+
if (top && top.score >= 0) {
|
|
18649
|
+
try {
|
|
18650
|
+
const topPath = new URL(top.endpoint.url_template).pathname.toLowerCase();
|
|
18651
|
+
if (/\/(search|find|query|browse|explore)\b/.test(topPath))
|
|
18652
|
+
return true;
|
|
18653
|
+
} catch {}
|
|
18654
|
+
}
|
|
18571
18655
|
if (collectExplicitSearchContextBindingKeys(contextUrl).size > 0)
|
|
18572
18656
|
return false;
|
|
18573
18657
|
}
|