unbrowse 3.7.0-preview.2 → 3.7.0-preview.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -31,7 +31,7 @@ var __promiseAll = (args) => Promise.all(args);
31
31
  var __require = /* @__PURE__ */ createRequire(import.meta.url);
32
32
 
33
33
  // ../../src/build-info.generated.ts
34
- var BUILD_RELEASE_VERSION = "3.7.0-preview.2", BUILD_GIT_SHA = "c3b7d2a563b0", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy43LjAtcHJldmlldy4yIiwiZ2l0X3NoYSI6ImMzYjdkMmE1NjNiMCIsImNvZGVfaGFzaCI6IjVkOWViZjYxOWM2MSIsInRyYWNlX3ZlcnNpb24iOiI1ZDllYmY2MTljNjFAYzNiN2QyYTU2M2IwIiwiaXNzdWVkX2F0IjoiMjAyNi0wNC0xMFQwNTo0OTo0OS45OTBaIn0", BUILD_RELEASE_MANIFEST_SIGNATURE = "Y_s0r-BxEwdCh5t91laYK58bdu_st2vyzaQdPhhepGY", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
34
+ var BUILD_RELEASE_VERSION = "3.7.0-preview.3", BUILD_GIT_SHA = "652904c236e5", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy43LjAtcHJldmlldy4zIiwiZ2l0X3NoYSI6IjY1MjkwNGMyMzZlNSIsImNvZGVfaGFzaCI6IjVkOWViZjYxOWM2MSIsInRyYWNlX3ZlcnNpb24iOiI1ZDllYmY2MTljNjFANjUyOTA0YzIzNmU1IiwiaXNzdWVkX2F0IjoiMjAyNi0wNC0xMFQwNzoyNzozNS44ODZaIn0", BUILD_RELEASE_MANIFEST_SIGNATURE = "IXw8XfBaII9eVNJCmgvwRyesbKiWJVNMurzf5P2mRb0", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
35
35
 
36
36
  // ../../src/version.ts
37
37
  import { createHash } from "crypto";
package/dist/mcp.js CHANGED
@@ -225,11 +225,11 @@ import { dirname, join, parse } from "path";
225
225
  import { fileURLToPath as fileURLToPath2 } from "url";
226
226
 
227
227
  // ../../src/build-info.generated.ts
228
- var BUILD_RELEASE_VERSION = "3.7.0-preview.2";
229
- var BUILD_GIT_SHA = "c3b7d2a563b0";
228
+ var BUILD_RELEASE_VERSION = "3.7.0-preview.3";
229
+ var BUILD_GIT_SHA = "652904c236e5";
230
230
  var BUILD_CODE_HASH = "5d9ebf619c61";
231
- var BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy43LjAtcHJldmlldy4yIiwiZ2l0X3NoYSI6ImMzYjdkMmE1NjNiMCIsImNvZGVfaGFzaCI6IjVkOWViZjYxOWM2MSIsInRyYWNlX3ZlcnNpb24iOiI1ZDllYmY2MTljNjFAYzNiN2QyYTU2M2IwIiwiaXNzdWVkX2F0IjoiMjAyNi0wNC0xMFQwNTo0OTo0OS45OTBaIn0";
232
- var BUILD_RELEASE_MANIFEST_SIGNATURE = "Y_s0r-BxEwdCh5t91laYK58bdu_st2vyzaQdPhhepGY";
231
+ var BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy43LjAtcHJldmlldy4zIiwiZ2l0X3NoYSI6IjY1MjkwNGMyMzZlNSIsImNvZGVfaGFzaCI6IjVkOWViZjYxOWM2MSIsInRyYWNlX3ZlcnNpb24iOiI1ZDllYmY2MTljNjFANjUyOTA0YzIzNmU1IiwiaXNzdWVkX2F0IjoiMjAyNi0wNC0xMFQwNzoyNzozNS44ODZaIn0";
232
+ var BUILD_RELEASE_MANIFEST_SIGNATURE = "IXw8XfBaII9eVNJCmgvwRyesbKiWJVNMurzf5P2mRb0";
233
233
  var BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
234
234
 
235
235
  // ../../src/version.ts
package/dist/server.js CHANGED
@@ -7120,7 +7120,7 @@ var init_capture = __esm(async () => {
7120
7120
  });
7121
7121
 
7122
7122
  // ../../src/build-info.generated.ts
7123
- var BUILD_RELEASE_VERSION = "3.7.0-preview.2", BUILD_GIT_SHA = "c3b7d2a563b0", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy43LjAtcHJldmlldy4yIiwiZ2l0X3NoYSI6ImMzYjdkMmE1NjNiMCIsImNvZGVfaGFzaCI6IjVkOWViZjYxOWM2MSIsInRyYWNlX3ZlcnNpb24iOiI1ZDllYmY2MTljNjFAYzNiN2QyYTU2M2IwIiwiaXNzdWVkX2F0IjoiMjAyNi0wNC0xMFQwNTo0OTo0OS45OTBaIn0", BUILD_RELEASE_MANIFEST_SIGNATURE = "Y_s0r-BxEwdCh5t91laYK58bdu_st2vyzaQdPhhepGY", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
7123
+ var BUILD_RELEASE_VERSION = "3.7.0-preview.3", BUILD_GIT_SHA = "652904c236e5", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy43LjAtcHJldmlldy4zIiwiZ2l0X3NoYSI6IjY1MjkwNGMyMzZlNSIsImNvZGVfaGFzaCI6IjVkOWViZjYxOWM2MSIsInRyYWNlX3ZlcnNpb24iOiI1ZDllYmY2MTljNjFANjUyOTA0YzIzNmU1IiwiaXNzdWVkX2F0IjoiMjAyNi0wNC0xMFQwNzoyNzozNS44ODZaIn0", BUILD_RELEASE_MANIFEST_SIGNATURE = "IXw8XfBaII9eVNJCmgvwRyesbKiWJVNMurzf5P2mRb0", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
7124
7124
 
7125
7125
  // ../../src/version.ts
7126
7126
  import { createHash as createHash2 } from "crypto";
@@ -12481,7 +12481,12 @@ function cleanDOM(html) {
12481
12481
  const $ = cheerio.load(html);
12482
12482
  for (const tag of STRIP_TAGS) {
12483
12483
  if (tag === "script") {
12484
- $("script").not('[type="application/ld+json"]').remove();
12484
+ $("script").each((_, el) => {
12485
+ const type = $(el).attr("type") ?? "";
12486
+ if (type !== "application/ld+json") {
12487
+ $(el).remove();
12488
+ }
12489
+ });
12485
12490
  } else {
12486
12491
  $(tag).remove();
12487
12492
  }
@@ -16300,6 +16305,65 @@ async function trySeedPublicDocumentFetchSkill(skill, url, intent, targetDomain,
16300
16305
  redirect: "follow"
16301
16306
  });
16302
16307
  const html = await response.text();
16308
+ const contentType = (response.headers.get("content-type") || "").toLowerCase();
16309
+ if (response.ok && (contentType.includes("application/json") || contentType.includes("text/json"))) {
16310
+ try {
16311
+ const parsed = JSON.parse(html);
16312
+ const urlObj = new URL(response.url || url);
16313
+ const pathTemplate = `${urlObj.origin}${urlObj.pathname}`;
16314
+ const responseSchema = inferSchema([parsed]);
16315
+ const endpoint = {
16316
+ endpoint_id: stableEndpointId2("GET", pathTemplate),
16317
+ method: "GET",
16318
+ url_template: pathTemplate,
16319
+ idempotency: "safe",
16320
+ verification_status: "verified",
16321
+ reliability_score: 0.95,
16322
+ description: `Direct JSON API for ${intent}`,
16323
+ response_schema: responseSchema
16324
+ };
16325
+ endpoint.semantic = inferEndpointSemantic(endpoint, {
16326
+ sampleResponse: parsed,
16327
+ observedAt: new Date().toISOString(),
16328
+ sampleRequestUrl: url
16329
+ });
16330
+ const domain2 = getRegistrableDomain(targetDomain);
16331
+ const existingSkill2 = findExistingSkillForDomain(domain2, intent);
16332
+ const localEndpoints2 = await prepareLearnedEndpoints(existingSkill2 ? mergeEndpoints(existingSkill2.endpoints, [endpoint]) : [endpoint], intent, domain2);
16333
+ const localDraft2 = {
16334
+ skill_id: existingSkill2?.skill_id ?? nanoid6(),
16335
+ version: "1.0.0",
16336
+ schema_version: "1",
16337
+ lifecycle: "active",
16338
+ execution_type: "http",
16339
+ created_at: existingSkill2?.created_at ?? new Date().toISOString(),
16340
+ updated_at: new Date().toISOString(),
16341
+ name: domain2,
16342
+ intent_signature: intent,
16343
+ domain: domain2,
16344
+ description: `API skill for ${domain2}`,
16345
+ owner_type: "agent",
16346
+ endpoints: localEndpoints2,
16347
+ operation_graph: buildSkillOperationGraph(localEndpoints2),
16348
+ intents: [intent]
16349
+ };
16350
+ try {
16351
+ cachePublishedSkill(localDraft2);
16352
+ } catch {}
16353
+ return {
16354
+ trace: stampTrace({
16355
+ trace_id: nanoid6(),
16356
+ skill_id: localDraft2.skill_id,
16357
+ endpoint_id: endpoint.endpoint_id,
16358
+ started_at: new Date().toISOString(),
16359
+ completed_at: new Date().toISOString(),
16360
+ success: true,
16361
+ status_code: response.status
16362
+ }),
16363
+ result: parsed
16364
+ };
16365
+ } catch {}
16366
+ }
16303
16367
  if (!isHtml(html) || isSpaShell(html))
16304
16368
  return;
16305
16369
  const built = buildPageArtifactCapture(response.url || url, intent, html, usedStoredAuth);
@@ -22267,15 +22331,15 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
22267
22331
  }
22268
22332
  }
22269
22333
  }
22270
- if (context?.url && (/\.(json|xml)(\?|$)|\/api\/|\/v\d+\//.test(context.url) || /[?&]format=(j\d*|json)\b/i.test(context.url) || /^https?:\/\/api\./i.test(context.url))) {
22334
+ if (context?.url) {
22271
22335
  try {
22272
22336
  const directRes = await fetch(context.url, {
22273
- headers: { Accept: "application/json", "User-Agent": "unbrowse/1.0" },
22274
- signal: AbortSignal.timeout(5000),
22337
+ headers: { Accept: "application/json, text/html;q=0.5", "User-Agent": "unbrowse/1.0" },
22338
+ signal: AbortSignal.timeout(15000),
22275
22339
  redirect: "follow"
22276
22340
  });
22277
22341
  const ct = directRes.headers.get("content-type") ?? "";
22278
- if (directRes.ok && (ct.includes("json") || ct.includes("+json"))) {
22342
+ if (directRes.ok && (ct.includes("application/json") || ct.includes("+json") || ct.includes("text/json"))) {
22279
22343
  const data = await directRes.json();
22280
22344
  const trace2 = {
22281
22345
  trace_id: nanoid9(),
@@ -22295,7 +22359,10 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
22295
22359
  timing: t
22296
22360
  };
22297
22361
  }
22298
- } catch {}
22362
+ } catch (err) {
22363
+ const msg = err instanceof Error ? err.message : String(err);
22364
+ console.log(`[direct-fetch] ${context.url} skipped: ${msg.slice(0, 100)}`);
22365
+ }
22299
22366
  }
22300
22367
  if (process.env.UNBROWSE_LOCAL_ONLY === "1" && !forceCapture) {
22301
22368
  return buildNoCachedMatch();
@@ -22454,6 +22521,32 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
22454
22521
  throw error;
22455
22522
  }
22456
22523
  timing.execute_ms = Date.now() - te0;
22524
+ const captureErrCheck = result?.error;
22525
+ if (captureErrCheck === "connection_failed" || captureErrCheck === "capture_failed") {
22526
+ console.warn(`[capture] ${captureErrCheck} detected — restarting Kuri and retrying once`);
22527
+ try {
22528
+ const kuri = await Promise.resolve().then(() => (init_client(), exports_client));
22529
+ await kuri.stop().catch(() => {});
22530
+ await new Promise((r) => setTimeout(r, 500));
22531
+ } catch {}
22532
+ try {
22533
+ const retryCaptureSkill = await getOrCreateBrowserCaptureSkill();
22534
+ const retryOut = await withAbortableOpTimeout("live_capture_retry", LIVE_CAPTURE_TIMEOUT_MS, (signal) => executeSkill(retryCaptureSkill, { ...params, url: context.url, intent }, undefined, {
22535
+ ...options,
22536
+ intent,
22537
+ contextUrl: context?.url,
22538
+ signal
22539
+ }));
22540
+ if (retryOut.trace.success || !retryOut.result?.error) {
22541
+ trace = retryOut.trace;
22542
+ result = retryOut.result;
22543
+ learned_skill = retryOut.learned_skill;
22544
+ console.log(`[capture] retry after Kuri restart succeeded`);
22545
+ }
22546
+ } catch (retryErr) {
22547
+ console.warn(`[capture] retry failed: ${retryErr instanceof Error ? retryErr.message : retryErr}`);
22548
+ }
22549
+ }
22457
22550
  const captureResult = result;
22458
22551
  const authRecommended = captureResult?.auth_recommended === true;
22459
22552
  const directDomCaptureResult = trace.success && trace.endpoint_id !== "browser-capture" && !!result && typeof result === "object" && "_extraction" in result;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unbrowse",
3
- "version": "3.7.0-preview.2",
3
+ "version": "3.7.0-preview.3",
4
4
  "description": "Reverse-engineer any website into reusable API skills. Zero-dep single binary with embedded browser engine.",
5
5
  "type": "module",
6
6
  "bin": {