unbrowse 6.1.3 → 6.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +1 -1
- package/dist/mcp.js +4 -4
- package/dist/server.js +74 -4
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -31,7 +31,7 @@ var __promiseAll = (args) => Promise.all(args);
|
|
|
31
31
|
var __require = /* @__PURE__ */ createRequire(import.meta.url);
|
|
32
32
|
|
|
33
33
|
// ../../src/build-info.generated.ts
|
|
34
|
-
var BUILD_RELEASE_VERSION = "6.
|
|
34
|
+
var BUILD_RELEASE_VERSION = "6.2.0", BUILD_GIT_SHA = "d39e0773367a", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiNi4yLjAiLCJnaXRfc2hhIjoiZDM5ZTA3NzMzNjdhIiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUBkMzllMDc3MzM2N2EiLCJpc3N1ZWRfYXQiOiIyMDI2LTA1LTAxVDAyOjA5OjIwLjQ1N1oifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "R7RjaEXQD0TV-UY0ZOZwLAJ7iFqZc3n-ZIfmrZHKtec", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
|
|
35
35
|
|
|
36
36
|
// ../../src/version.ts
|
|
37
37
|
import { createHash } from "crypto";
|
package/dist/mcp.js
CHANGED
|
@@ -226,11 +226,11 @@ import { dirname, join, parse } from "path";
|
|
|
226
226
|
import { fileURLToPath as fileURLToPath2 } from "url";
|
|
227
227
|
|
|
228
228
|
// ../../src/build-info.generated.ts
|
|
229
|
-
var BUILD_RELEASE_VERSION = "6.
|
|
230
|
-
var BUILD_GIT_SHA = "
|
|
229
|
+
var BUILD_RELEASE_VERSION = "6.2.0";
|
|
230
|
+
var BUILD_GIT_SHA = "d39e0773367a";
|
|
231
231
|
var BUILD_CODE_HASH = "5d9ebf619c61";
|
|
232
|
-
var BUILD_RELEASE_MANIFEST_BASE64 = "
|
|
233
|
-
var BUILD_RELEASE_MANIFEST_SIGNATURE = "
|
|
232
|
+
var BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiNi4yLjAiLCJnaXRfc2hhIjoiZDM5ZTA3NzMzNjdhIiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUBkMzllMDc3MzM2N2EiLCJpc3N1ZWRfYXQiOiIyMDI2LTA1LTAxVDAyOjA5OjIwLjQ1N1oifQ";
|
|
233
|
+
var BUILD_RELEASE_MANIFEST_SIGNATURE = "R7RjaEXQD0TV-UY0ZOZwLAJ7iFqZc3n-ZIfmrZHKtec";
|
|
234
234
|
var BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
|
|
235
235
|
|
|
236
236
|
// ../../src/version.ts
|
package/dist/server.js
CHANGED
|
@@ -7285,7 +7285,7 @@ var init_capture = __esm(async () => {
|
|
|
7285
7285
|
});
|
|
7286
7286
|
|
|
7287
7287
|
// ../../src/build-info.generated.ts
|
|
7288
|
-
var BUILD_RELEASE_VERSION = "6.
|
|
7288
|
+
var BUILD_RELEASE_VERSION = "6.2.0", BUILD_GIT_SHA = "d39e0773367a", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiNi4yLjAiLCJnaXRfc2hhIjoiZDM5ZTA3NzMzNjdhIiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUBkMzllMDc3MzM2N2EiLCJpc3N1ZWRfYXQiOiIyMDI2LTA1LTAxVDAyOjA5OjIwLjQ1N1oifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "R7RjaEXQD0TV-UY0ZOZwLAJ7iFqZc3n-ZIfmrZHKtec", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
|
|
7289
7289
|
|
|
7290
7290
|
// ../../src/version.ts
|
|
7291
7291
|
import { createHash as createHash2 } from "crypto";
|
|
@@ -12880,6 +12880,67 @@ function extractDefinitionSpecial(html, intent) {
|
|
|
12880
12880
|
element_count: 1
|
|
12881
12881
|
}];
|
|
12882
12882
|
}
|
|
12883
|
+
function extractArticleBodySpecial(html, intent) {
|
|
12884
|
+
const intentLower = intent.toLowerCase();
|
|
12885
|
+
const articleIntent = /(wikipedia|article|wiki page|page on|read|content of|body of|summary of|about )/i.test(intentLower);
|
|
12886
|
+
const looksWikipedia = /id="mw-content-text"|class="mw-parser-output"/i.test(html);
|
|
12887
|
+
if (!articleIntent && !looksWikipedia)
|
|
12888
|
+
return [];
|
|
12889
|
+
const $ = cheerio.load(html);
|
|
12890
|
+
const root = looksWikipedia ? $(".mw-parser-output").first() : $("article, [role='article']").first().length > 0 ? $("article, [role='article']").first() : $("main").first();
|
|
12891
|
+
if (!root.length)
|
|
12892
|
+
return [];
|
|
12893
|
+
const title = cleanText($("h1").first().text());
|
|
12894
|
+
if (!title)
|
|
12895
|
+
return [];
|
|
12896
|
+
root.find(".mw-editsection, .reference, .references, .navbox, .infobox, .reflist, .hatnote, .ambox, .toc, sup.reference, style, script, .sidebar").remove();
|
|
12897
|
+
let summary = "";
|
|
12898
|
+
root.find("> p, .mw-parser-output > p, p").each((_, el) => {
|
|
12899
|
+
if (summary.length >= 80)
|
|
12900
|
+
return false;
|
|
12901
|
+
const t = cleanText($(el).text());
|
|
12902
|
+
if (t.length >= 80)
|
|
12903
|
+
summary = t;
|
|
12904
|
+
return;
|
|
12905
|
+
});
|
|
12906
|
+
const sections = [];
|
|
12907
|
+
let current = null;
|
|
12908
|
+
root.find("h2, h3, p, li").each((_, el) => {
|
|
12909
|
+
const tag = el.tagName?.toLowerCase?.() ?? el.name ?? "";
|
|
12910
|
+
const txt = cleanText($(el).text());
|
|
12911
|
+
if (!txt)
|
|
12912
|
+
return;
|
|
12913
|
+
if (tag === "h2" || tag === "h3") {
|
|
12914
|
+
if (current && current.parts.length)
|
|
12915
|
+
sections.push({ heading: current.heading, text: current.parts.join(`
|
|
12916
|
+
`).slice(0, 1500) });
|
|
12917
|
+
if (/^(references?|external links?|see also|notes?|further reading|bibliography|sources?)$/i.test(txt)) {
|
|
12918
|
+
current = null;
|
|
12919
|
+
} else {
|
|
12920
|
+
current = { heading: txt, parts: [] };
|
|
12921
|
+
}
|
|
12922
|
+
} else if (current && (tag === "p" || tag === "li") && txt.length > 20) {
|
|
12923
|
+
if (current.parts.join(`
|
|
12924
|
+
`).length < 1500)
|
|
12925
|
+
current.parts.push(txt);
|
|
12926
|
+
}
|
|
12927
|
+
});
|
|
12928
|
+
if (current && current.parts.length)
|
|
12929
|
+
sections.push({ heading: current.heading, text: current.parts.join(`
|
|
12930
|
+
`).slice(0, 1500) });
|
|
12931
|
+
if (!summary && sections.length === 0)
|
|
12932
|
+
return [];
|
|
12933
|
+
return [{
|
|
12934
|
+
type: "article",
|
|
12935
|
+
data: {
|
|
12936
|
+
title,
|
|
12937
|
+
summary: summary || undefined,
|
|
12938
|
+
sections: sections.slice(0, 12),
|
|
12939
|
+
section_count: sections.length
|
|
12940
|
+
},
|
|
12941
|
+
element_count: 1 + sections.length
|
|
12942
|
+
}];
|
|
12943
|
+
}
|
|
12883
12944
|
function extractCourseSearchSpecial(html, intent) {
|
|
12884
12945
|
if (!/\b(course|courses)\b/i.test(intent))
|
|
12885
12946
|
return [];
|
|
@@ -13440,7 +13501,8 @@ function extractFromDOM(html, intent) {
|
|
|
13440
13501
|
const trendStructures = extractTrendSpecial(workingHtml, intent);
|
|
13441
13502
|
const definitionStructures = extractDefinitionSpecial(workingHtml, intent);
|
|
13442
13503
|
const courseStructures = extractCourseSearchSpecial(workingHtml, intent);
|
|
13443
|
-
const
|
|
13504
|
+
const articleStructures = extractArticleBodySpecial(workingHtml, intent);
|
|
13505
|
+
const structures = [...flashStructures, ...githubStructures, ...linkedInStructures, ...packageSearchStructures, ...xProfileStructures, ...postStructures, ...trendStructures, ...definitionStructures, ...courseStructures, ...articleStructures, ...spaStructures, ...parseStructured(cleaned)].map((structure) => normalizeStructureForIntent(structure, intent));
|
|
13444
13506
|
if (structures.length === 0) {
|
|
13445
13507
|
return { data: null, extraction_method: "none", confidence: 0 };
|
|
13446
13508
|
}
|
|
@@ -17425,6 +17487,13 @@ async function executeEndpoint(skill, endpoint, params = {}, projection, options
|
|
|
17425
17487
|
if (mergedParams.features == null || mergedParams.features === "{features}") {
|
|
17426
17488
|
mergedParams.features = encodeURIComponent(__gqlEnc.features);
|
|
17427
17489
|
}
|
|
17490
|
+
if (endpoint.body && typeof endpoint.body === "object" && !Array.isArray(endpoint.body)) {
|
|
17491
|
+
const b = endpoint.body;
|
|
17492
|
+
if ("variables" in b)
|
|
17493
|
+
b.variables = __gqlEnc.variables;
|
|
17494
|
+
if ("features" in b)
|
|
17495
|
+
b.features = __gqlEnc.features;
|
|
17496
|
+
}
|
|
17428
17497
|
}
|
|
17429
17498
|
let url = interpolate(urlTemplate, mergedParams);
|
|
17430
17499
|
const __callerUrl = typeof mergedParams.url === "string" && mergedParams.url ? mergedParams.url : endpoint.trigger_url ?? "";
|
|
@@ -17871,7 +17940,8 @@ async function executeEndpoint(skill, endpoint, params = {}, projection, options
|
|
|
17871
17940
|
});
|
|
17872
17941
|
if (!trace.success) {
|
|
17873
17942
|
trace.error = status === 0 ? `HTTP 0 — network failure or browser fetch was blocked (DNS, TLS, CORS, anti-bot, or kuri tab error). Try \`unbrowse go\` to open a live session, then re-run.` : status === 404 ? `HTTP 404 — endpoint may be stale. Re-run via POST /v1/intent/resolve to get fresh endpoints.` : `HTTP ${status}`;
|
|
17874
|
-
|
|
17943
|
+
const isEmptyData = data == null || typeof data === "object" && !Array.isArray(data) && Object.keys(data).length === 0;
|
|
17944
|
+
if (isEmptyData) {
|
|
17875
17945
|
data = {
|
|
17876
17946
|
error: status === 0 ? "network_failure" : `http_${status}`,
|
|
17877
17947
|
message: trace.error,
|
|
@@ -21904,7 +21974,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
|
|
|
21904
21974
|
requiredRecovery: false
|
|
21905
21975
|
});
|
|
21906
21976
|
const isSameHostResolve = !!context?.url && !!endpointScopedSkill.domain;
|
|
21907
|
-
const hostMatches = isSameHostResolve && new URL(context.url).hostname === endpointScopedSkill.domain;
|
|
21977
|
+
const hostMatches = isSameHostResolve && (new URL(context.url).hostname === endpointScopedSkill.domain || getRegistrableDomain(new URL(context.url).hostname) === getRegistrableDomain(endpointScopedSkill.domain));
|
|
21908
21978
|
const allNegative = epRanked.length > 0 && epRanked.every((r) => r.score < 0);
|
|
21909
21979
|
if ((epRanked.length === 0 || allNegative) && hostMatches) {
|
|
21910
21980
|
return {
|