unbrowse 6.1.3 → 6.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -31,7 +31,7 @@ var __promiseAll = (args) => Promise.all(args);
31
31
  var __require = /* @__PURE__ */ createRequire(import.meta.url);
32
32
 
33
33
  // ../../src/build-info.generated.ts
34
- var BUILD_RELEASE_VERSION = "6.1.3", BUILD_GIT_SHA = "2efa82e8f6a7", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiNi4xLjMiLCJnaXRfc2hhIjoiMmVmYTgyZThmNmE3IiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUAyZWZhODJlOGY2YTciLCJpc3N1ZWRfYXQiOiIyMDI2LTA1LTAxVDAxOjA3OjIxLjM5NFoifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "bzwIl25aiAhrxXJfAHiTDAFUYKkOUntait0ZAKFE2Ys", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
34
+ var BUILD_RELEASE_VERSION = "6.2.0", BUILD_GIT_SHA = "d39e0773367a", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiNi4yLjAiLCJnaXRfc2hhIjoiZDM5ZTA3NzMzNjdhIiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUBkMzllMDc3MzM2N2EiLCJpc3N1ZWRfYXQiOiIyMDI2LTA1LTAxVDAyOjA5OjIwLjQ1N1oifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "R7RjaEXQD0TV-UY0ZOZwLAJ7iFqZc3n-ZIfmrZHKtec", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
35
35
 
36
36
  // ../../src/version.ts
37
37
  import { createHash } from "crypto";
package/dist/mcp.js CHANGED
@@ -226,11 +226,11 @@ import { dirname, join, parse } from "path";
226
226
  import { fileURLToPath as fileURLToPath2 } from "url";
227
227
 
228
228
  // ../../src/build-info.generated.ts
229
- var BUILD_RELEASE_VERSION = "6.1.3";
230
- var BUILD_GIT_SHA = "2efa82e8f6a7";
229
+ var BUILD_RELEASE_VERSION = "6.2.0";
230
+ var BUILD_GIT_SHA = "d39e0773367a";
231
231
  var BUILD_CODE_HASH = "5d9ebf619c61";
232
- var BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiNi4xLjMiLCJnaXRfc2hhIjoiMmVmYTgyZThmNmE3IiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUAyZWZhODJlOGY2YTciLCJpc3N1ZWRfYXQiOiIyMDI2LTA1LTAxVDAxOjA3OjIxLjM5NFoifQ";
233
- var BUILD_RELEASE_MANIFEST_SIGNATURE = "bzwIl25aiAhrxXJfAHiTDAFUYKkOUntait0ZAKFE2Ys";
232
+ var BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiNi4yLjAiLCJnaXRfc2hhIjoiZDM5ZTA3NzMzNjdhIiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUBkMzllMDc3MzM2N2EiLCJpc3N1ZWRfYXQiOiIyMDI2LTA1LTAxVDAyOjA5OjIwLjQ1N1oifQ";
233
+ var BUILD_RELEASE_MANIFEST_SIGNATURE = "R7RjaEXQD0TV-UY0ZOZwLAJ7iFqZc3n-ZIfmrZHKtec";
234
234
  var BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
235
235
 
236
236
  // ../../src/version.ts
package/dist/server.js CHANGED
@@ -7285,7 +7285,7 @@ var init_capture = __esm(async () => {
7285
7285
  });
7286
7286
 
7287
7287
  // ../../src/build-info.generated.ts
7288
- var BUILD_RELEASE_VERSION = "6.1.3", BUILD_GIT_SHA = "2efa82e8f6a7", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiNi4xLjMiLCJnaXRfc2hhIjoiMmVmYTgyZThmNmE3IiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUAyZWZhODJlOGY2YTciLCJpc3N1ZWRfYXQiOiIyMDI2LTA1LTAxVDAxOjA3OjIxLjM5NFoifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "bzwIl25aiAhrxXJfAHiTDAFUYKkOUntait0ZAKFE2Ys", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
7288
+ var BUILD_RELEASE_VERSION = "6.2.0", BUILD_GIT_SHA = "d39e0773367a", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiNi4yLjAiLCJnaXRfc2hhIjoiZDM5ZTA3NzMzNjdhIiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUBkMzllMDc3MzM2N2EiLCJpc3N1ZWRfYXQiOiIyMDI2LTA1LTAxVDAyOjA5OjIwLjQ1N1oifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "R7RjaEXQD0TV-UY0ZOZwLAJ7iFqZc3n-ZIfmrZHKtec", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
7289
7289
 
7290
7290
  // ../../src/version.ts
7291
7291
  import { createHash as createHash2 } from "crypto";
@@ -12880,6 +12880,67 @@ function extractDefinitionSpecial(html, intent) {
12880
12880
  element_count: 1
12881
12881
  }];
12882
12882
  }
12883
+ function extractArticleBodySpecial(html, intent) {
12884
+ const intentLower = intent.toLowerCase();
12885
+ const articleIntent = /(wikipedia|article|wiki page|page on|read|content of|body of|summary of|about )/i.test(intentLower);
12886
+ const looksWikipedia = /id="mw-content-text"|class="mw-parser-output"/i.test(html);
12887
+ if (!articleIntent && !looksWikipedia)
12888
+ return [];
12889
+ const $ = cheerio.load(html);
12890
+ const root = looksWikipedia ? $(".mw-parser-output").first() : $("article, [role='article']").first().length > 0 ? $("article, [role='article']").first() : $("main").first();
12891
+ if (!root.length)
12892
+ return [];
12893
+ const title = cleanText($("h1").first().text());
12894
+ if (!title)
12895
+ return [];
12896
+ root.find(".mw-editsection, .reference, .references, .navbox, .infobox, .reflist, .hatnote, .ambox, .toc, sup.reference, style, script, .sidebar").remove();
12897
+ let summary = "";
12898
+ root.find("> p, .mw-parser-output > p, p").each((_, el) => {
12899
+ if (summary.length >= 80)
12900
+ return false;
12901
+ const t = cleanText($(el).text());
12902
+ if (t.length >= 80)
12903
+ summary = t;
12904
+ return;
12905
+ });
12906
+ const sections = [];
12907
+ let current = null;
12908
+ root.find("h2, h3, p, li").each((_, el) => {
12909
+ const tag = el.tagName?.toLowerCase?.() ?? el.name ?? "";
12910
+ const txt = cleanText($(el).text());
12911
+ if (!txt)
12912
+ return;
12913
+ if (tag === "h2" || tag === "h3") {
12914
+ if (current && current.parts.length)
12915
+ sections.push({ heading: current.heading, text: current.parts.join(`
12916
+ `).slice(0, 1500) });
12917
+ if (/^(references?|external links?|see also|notes?|further reading|bibliography|sources?)$/i.test(txt)) {
12918
+ current = null;
12919
+ } else {
12920
+ current = { heading: txt, parts: [] };
12921
+ }
12922
+ } else if (current && (tag === "p" || tag === "li") && txt.length > 20) {
12923
+ if (current.parts.join(`
12924
+ `).length < 1500)
12925
+ current.parts.push(txt);
12926
+ }
12927
+ });
12928
+ if (current && current.parts.length)
12929
+ sections.push({ heading: current.heading, text: current.parts.join(`
12930
+ `).slice(0, 1500) });
12931
+ if (!summary && sections.length === 0)
12932
+ return [];
12933
+ return [{
12934
+ type: "article",
12935
+ data: {
12936
+ title,
12937
+ summary: summary || undefined,
12938
+ sections: sections.slice(0, 12),
12939
+ section_count: sections.length
12940
+ },
12941
+ element_count: 1 + sections.length
12942
+ }];
12943
+ }
12883
12944
  function extractCourseSearchSpecial(html, intent) {
12884
12945
  if (!/\b(course|courses)\b/i.test(intent))
12885
12946
  return [];
@@ -13440,7 +13501,8 @@ function extractFromDOM(html, intent) {
13440
13501
  const trendStructures = extractTrendSpecial(workingHtml, intent);
13441
13502
  const definitionStructures = extractDefinitionSpecial(workingHtml, intent);
13442
13503
  const courseStructures = extractCourseSearchSpecial(workingHtml, intent);
13443
- const structures = [...flashStructures, ...githubStructures, ...linkedInStructures, ...packageSearchStructures, ...xProfileStructures, ...postStructures, ...trendStructures, ...definitionStructures, ...courseStructures, ...spaStructures, ...parseStructured(cleaned)].map((structure) => normalizeStructureForIntent(structure, intent));
13504
+ const articleStructures = extractArticleBodySpecial(workingHtml, intent);
13505
+ const structures = [...flashStructures, ...githubStructures, ...linkedInStructures, ...packageSearchStructures, ...xProfileStructures, ...postStructures, ...trendStructures, ...definitionStructures, ...courseStructures, ...articleStructures, ...spaStructures, ...parseStructured(cleaned)].map((structure) => normalizeStructureForIntent(structure, intent));
13444
13506
  if (structures.length === 0) {
13445
13507
  return { data: null, extraction_method: "none", confidence: 0 };
13446
13508
  }
@@ -17425,6 +17487,13 @@ async function executeEndpoint(skill, endpoint, params = {}, projection, options
17425
17487
  if (mergedParams.features == null || mergedParams.features === "{features}") {
17426
17488
  mergedParams.features = encodeURIComponent(__gqlEnc.features);
17427
17489
  }
17490
+ if (endpoint.body && typeof endpoint.body === "object" && !Array.isArray(endpoint.body)) {
17491
+ const b = endpoint.body;
17492
+ if ("variables" in b)
17493
+ b.variables = __gqlEnc.variables;
17494
+ if ("features" in b)
17495
+ b.features = __gqlEnc.features;
17496
+ }
17428
17497
  }
17429
17498
  let url = interpolate(urlTemplate, mergedParams);
17430
17499
  const __callerUrl = typeof mergedParams.url === "string" && mergedParams.url ? mergedParams.url : endpoint.trigger_url ?? "";
@@ -17871,7 +17940,8 @@ async function executeEndpoint(skill, endpoint, params = {}, projection, options
17871
17940
  });
17872
17941
  if (!trace.success) {
17873
17942
  trace.error = status === 0 ? `HTTP 0 — network failure or browser fetch was blocked (DNS, TLS, CORS, anti-bot, or kuri tab error). Try \`unbrowse go\` to open a live session, then re-run.` : status === 404 ? `HTTP 404 — endpoint may be stale. Re-run via POST /v1/intent/resolve to get fresh endpoints.` : `HTTP ${status}`;
17874
- if (data == null) {
17943
+ const isEmptyData = data == null || typeof data === "object" && !Array.isArray(data) && Object.keys(data).length === 0;
17944
+ if (isEmptyData) {
17875
17945
  data = {
17876
17946
  error: status === 0 ? "network_failure" : `http_${status}`,
17877
17947
  message: trace.error,
@@ -21904,7 +21974,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
21904
21974
  requiredRecovery: false
21905
21975
  });
21906
21976
  const isSameHostResolve = !!context?.url && !!endpointScopedSkill.domain;
21907
- const hostMatches = isSameHostResolve && new URL(context.url).hostname === endpointScopedSkill.domain;
21977
+ const hostMatches = isSameHostResolve && (new URL(context.url).hostname === endpointScopedSkill.domain || getRegistrableDomain(new URL(context.url).hostname) === getRegistrableDomain(endpointScopedSkill.domain));
21908
21978
  const allNegative = epRanked.length > 0 && epRanked.every((r) => r.score < 0);
21909
21979
  if ((epRanked.length === 0 || allNegative) && hostMatches) {
21910
21980
  return {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unbrowse",
3
- "version": "6.1.3",
3
+ "version": "6.2.0",
4
4
  "description": "Reverse-engineer any website into reusable API skills. Zero-dep single binary with embedded browser engine.",
5
5
  "type": "module",
6
6
  "bin": {