unbrowse 3.8.0-preview.1 → 3.8.0-preview.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -31,7 +31,7 @@ var __promiseAll = (args) => Promise.all(args);
31
31
  var __require = /* @__PURE__ */ createRequire(import.meta.url);
32
32
 
33
33
  // ../../src/build-info.generated.ts
34
- var BUILD_RELEASE_VERSION = "3.8.0-preview.1", BUILD_GIT_SHA = "5b9633fd977c", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy44LjAtcHJldmlldy4xIiwiZ2l0X3NoYSI6IjViOTYzM2ZkOTc3YyIsImNvZGVfaGFzaCI6IjVkOWViZjYxOWM2MSIsInRyYWNlX3ZlcnNpb24iOiI1ZDllYmY2MTljNjFANWI5NjMzZmQ5NzdjIiwiaXNzdWVkX2F0IjoiMjAyNi0wNC0xMFQxNjo0NjoxMy4yMTFaIn0", BUILD_RELEASE_MANIFEST_SIGNATURE = "CR3byJyWyomilicbi2WBO0kBuXqbATk49Z5ZP9B0lz8", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
34
+ var BUILD_RELEASE_VERSION = "3.8.0-preview.2", BUILD_GIT_SHA = "efefee1f85df", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy44LjAtcHJldmlldy4yIiwiZ2l0X3NoYSI6ImVmZWZlZTFmODVkZiIsImNvZGVfaGFzaCI6IjVkOWViZjYxOWM2MSIsInRyYWNlX3ZlcnNpb24iOiI1ZDllYmY2MTljNjFAZWZlZmVlMWY4NWRmIiwiaXNzdWVkX2F0IjoiMjAyNi0wNC0xMFQxNzowOTo0OS44NzNaIn0", BUILD_RELEASE_MANIFEST_SIGNATURE = "nVliy0ydfucv7W56hI5CiuHzYJ92ve4oJqgzV1NFASg", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
35
35
 
36
36
  // ../../src/version.ts
37
37
  import { createHash } from "crypto";
package/dist/mcp.js CHANGED
@@ -225,11 +225,11 @@ import { dirname, join, parse } from "path";
225
225
  import { fileURLToPath as fileURLToPath2 } from "url";
226
226
 
227
227
  // ../../src/build-info.generated.ts
228
- var BUILD_RELEASE_VERSION = "3.8.0-preview.1";
229
- var BUILD_GIT_SHA = "5b9633fd977c";
228
+ var BUILD_RELEASE_VERSION = "3.8.0-preview.2";
229
+ var BUILD_GIT_SHA = "efefee1f85df";
230
230
  var BUILD_CODE_HASH = "5d9ebf619c61";
231
- var BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy44LjAtcHJldmlldy4xIiwiZ2l0X3NoYSI6IjViOTYzM2ZkOTc3YyIsImNvZGVfaGFzaCI6IjVkOWViZjYxOWM2MSIsInRyYWNlX3ZlcnNpb24iOiI1ZDllYmY2MTljNjFANWI5NjMzZmQ5NzdjIiwiaXNzdWVkX2F0IjoiMjAyNi0wNC0xMFQxNjo0NjoxMy4yMTFaIn0";
232
- var BUILD_RELEASE_MANIFEST_SIGNATURE = "CR3byJyWyomilicbi2WBO0kBuXqbATk49Z5ZP9B0lz8";
231
+ var BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy44LjAtcHJldmlldy4yIiwiZ2l0X3NoYSI6ImVmZWZlZTFmODVkZiIsImNvZGVfaGFzaCI6IjVkOWViZjYxOWM2MSIsInRyYWNlX3ZlcnNpb24iOiI1ZDllYmY2MTljNjFAZWZlZmVlMWY4NWRmIiwiaXNzdWVkX2F0IjoiMjAyNi0wNC0xMFQxNzowOTo0OS44NzNaIn0";
232
+ var BUILD_RELEASE_MANIFEST_SIGNATURE = "nVliy0ydfucv7W56hI5CiuHzYJ92ve4oJqgzV1NFASg";
233
233
  var BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
234
234
 
235
235
  // ../../src/version.ts
package/dist/server.js CHANGED
@@ -7120,7 +7120,7 @@ var init_capture = __esm(async () => {
7120
7120
  });
7121
7121
 
7122
7122
  // ../../src/build-info.generated.ts
7123
- var BUILD_RELEASE_VERSION = "3.8.0-preview.1", BUILD_GIT_SHA = "5b9633fd977c", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy44LjAtcHJldmlldy4xIiwiZ2l0X3NoYSI6IjViOTYzM2ZkOTc3YyIsImNvZGVfaGFzaCI6IjVkOWViZjYxOWM2MSIsInRyYWNlX3ZlcnNpb24iOiI1ZDllYmY2MTljNjFANWI5NjMzZmQ5NzdjIiwiaXNzdWVkX2F0IjoiMjAyNi0wNC0xMFQxNjo0NjoxMy4yMTFaIn0", BUILD_RELEASE_MANIFEST_SIGNATURE = "CR3byJyWyomilicbi2WBO0kBuXqbATk49Z5ZP9B0lz8", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
7123
+ var BUILD_RELEASE_VERSION = "3.8.0-preview.2", BUILD_GIT_SHA = "efefee1f85df", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy44LjAtcHJldmlldy4yIiwiZ2l0X3NoYSI6ImVmZWZlZTFmODVkZiIsImNvZGVfaGFzaCI6IjVkOWViZjYxOWM2MSIsInRyYWNlX3ZlcnNpb24iOiI1ZDllYmY2MTljNjFAZWZlZmVlMWY4NWRmIiwiaXNzdWVkX2F0IjoiMjAyNi0wNC0xMFQxNzowOTo0OS44NzNaIn0", BUILD_RELEASE_MANIFEST_SIGNATURE = "nVliy0ydfucv7W56hI5CiuHzYJ92ve4oJqgzV1NFASg", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
7124
7124
 
7125
7125
  // ../../src/version.ts
7126
7126
  import { createHash as createHash2 } from "crypto";
@@ -16839,56 +16839,30 @@ async function executeBrowserCapture(skill, params, options) {
16839
16839
  }
16840
16840
  };
16841
16841
  }
16842
- const antiBotSignal = (() => {
16842
+ const capturedMeta = (() => {
16843
16843
  const html = captured.html ?? "";
16844
- if (!html)
16845
- return "empty_capture";
16846
- const lower = html.toLowerCase();
16847
- const titleMatch = lower.match(/<title[^>]*>([^<]{0,200})<\/title>/);
16844
+ const titleMatch = html.toLowerCase().match(/<title[^>]*>([^<]{0,200})<\/title>/);
16848
16845
  const title = titleMatch ? titleMatch[1].trim() : "";
16849
- const blockTitles = [
16850
- "robot check",
16851
- "access denied",
16852
- "just a moment",
16853
- "attention required",
16854
- "please verify",
16855
- "security check",
16856
- "are you a robot"
16857
- ];
16858
- for (const marker of blockTitles) {
16859
- if (title.includes(marker))
16860
- return `anti_bot_title:${marker}`;
16861
- }
16862
- const authTitles = ["log in", "sign in", "log in to", "sign in to"];
16863
- if (authTitles.some((t) => title === t || title.startsWith(t + " ")) || title === "login") {
16864
- return `auth_wall_title:${title}`;
16865
- }
16866
- if (lower.includes("g-recaptcha") || lower.includes("hcaptcha.com") || lower.includes("cf-challenge-form")) {
16867
- return "captcha_present";
16868
- }
16869
- if (html.length < 5000 && !lower.includes("<script")) {
16870
- return `tiny_body:${html.length}b`;
16846
+ const stripped = html.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, "").replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "");
16847
+ const text = stripped.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
16848
+ let intentVerdict = "skip";
16849
+ let intentReason = "no_semantic_assessment";
16850
+ if (text && intent) {
16851
+ try {
16852
+ const assessment = assessIntentResult(text, intent);
16853
+ intentVerdict = assessment.verdict;
16854
+ intentReason = assessment.reason;
16855
+ } catch {}
16871
16856
  }
16872
- return null;
16873
- })();
16874
- if (antiBotSignal) {
16875
- const trace3 = stampTrace({
16876
- trace_id: traceId,
16877
- skill_id: skill.skill_id,
16878
- endpoint_id: "browser-capture",
16879
- started_at: startedAt,
16880
- completed_at: new Date().toISOString(),
16881
- success: false,
16882
- error: "browser_block"
16883
- });
16884
16857
  return {
16885
- trace: trace3,
16886
- result: {
16887
- error: "browser_block",
16888
- message: `Browser capture detected a block signal at ${url}: ${antiBotSignal}. The site served a degraded/challenge page to the headless browser. Real-browser cookies via autoExtract may be required.`
16889
- }
16858
+ html_bytes: html.length,
16859
+ title,
16860
+ text_bytes: text.length,
16861
+ observed_api_calls: captured.requests?.length ?? 0,
16862
+ intent_verdict: intentVerdict,
16863
+ intent_reason: intentReason
16890
16864
  };
16891
- }
16865
+ })();
16892
16866
  const trace2 = stampTrace({
16893
16867
  trace_id: traceId,
16894
16868
  skill_id: skill.skill_id,
@@ -16902,7 +16876,8 @@ async function executeBrowserCapture(skill, params, options) {
16902
16876
  trace: trace2,
16903
16877
  result: {
16904
16878
  error: "no_endpoints",
16905
- message: `No API endpoints or structured DOM data found at ${url}. The site may require authentication or may not expose machine-readable data from this page.`
16879
+ message: `No API endpoints or structured DOM data found at ${url}. The site may require authentication or may not expose machine-readable data from this page.`,
16880
+ captured_meta: capturedMeta
16906
16881
  }
16907
16882
  };
16908
16883
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unbrowse",
3
- "version": "3.8.0-preview.1",
3
+ "version": "3.8.0-preview.2",
4
4
  "description": "Reverse-engineer any website into reusable API skills. Zero-dep single binary with embedded browser engine.",
5
5
  "type": "module",
6
6
  "bin": {