unbrowse 3.7.0-preview.1 → 3.7.0-preview.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +1 -1
- package/dist/mcp.js +4 -4
- package/dist/server.js +116 -7
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -31,7 +31,7 @@ var __promiseAll = (args) => Promise.all(args);
|
|
|
31
31
|
var __require = /* @__PURE__ */ createRequire(import.meta.url);
|
|
32
32
|
|
|
33
33
|
// ../../src/build-info.generated.ts
|
|
34
|
-
var BUILD_RELEASE_VERSION = "3.7.0-preview.
|
|
34
|
+
var BUILD_RELEASE_VERSION = "3.7.0-preview.3", BUILD_GIT_SHA = "652904c236e5", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy43LjAtcHJldmlldy4zIiwiZ2l0X3NoYSI6IjY1MjkwNGMyMzZlNSIsImNvZGVfaGFzaCI6IjVkOWViZjYxOWM2MSIsInRyYWNlX3ZlcnNpb24iOiI1ZDllYmY2MTljNjFANjUyOTA0YzIzNmU1IiwiaXNzdWVkX2F0IjoiMjAyNi0wNC0xMFQwNzoyNzozNS44ODZaIn0", BUILD_RELEASE_MANIFEST_SIGNATURE = "IXw8XfBaII9eVNJCmgvwRyesbKiWJVNMurzf5P2mRb0", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
|
|
35
35
|
|
|
36
36
|
// ../../src/version.ts
|
|
37
37
|
import { createHash } from "crypto";
|
package/dist/mcp.js
CHANGED
|
@@ -225,11 +225,11 @@ import { dirname, join, parse } from "path";
|
|
|
225
225
|
import { fileURLToPath as fileURLToPath2 } from "url";
|
|
226
226
|
|
|
227
227
|
// ../../src/build-info.generated.ts
|
|
228
|
-
var BUILD_RELEASE_VERSION = "3.7.0-preview.
|
|
229
|
-
var BUILD_GIT_SHA = "
|
|
228
|
+
var BUILD_RELEASE_VERSION = "3.7.0-preview.3";
|
|
229
|
+
var BUILD_GIT_SHA = "652904c236e5";
|
|
230
230
|
var BUILD_CODE_HASH = "5d9ebf619c61";
|
|
231
|
-
var BUILD_RELEASE_MANIFEST_BASE64 = "
|
|
232
|
-
var BUILD_RELEASE_MANIFEST_SIGNATURE = "
|
|
231
|
+
var BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy43LjAtcHJldmlldy4zIiwiZ2l0X3NoYSI6IjY1MjkwNGMyMzZlNSIsImNvZGVfaGFzaCI6IjVkOWViZjYxOWM2MSIsInRyYWNlX3ZlcnNpb24iOiI1ZDllYmY2MTljNjFANjUyOTA0YzIzNmU1IiwiaXNzdWVkX2F0IjoiMjAyNi0wNC0xMFQwNzoyNzozNS44ODZaIn0";
|
|
232
|
+
var BUILD_RELEASE_MANIFEST_SIGNATURE = "IXw8XfBaII9eVNJCmgvwRyesbKiWJVNMurzf5P2mRb0";
|
|
233
233
|
var BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
|
|
234
234
|
|
|
235
235
|
// ../../src/version.ts
|
package/dist/server.js
CHANGED
|
@@ -1910,6 +1910,22 @@ function deriveTemplateParamsFromContextUrl(urlTemplate, contextUrl) {
|
|
|
1910
1910
|
const actualValue = actualUrl.searchParams.get(key);
|
|
1911
1911
|
if (actualValue != null && actualValue !== "") {
|
|
1912
1912
|
out[placeholder] = actualValue;
|
|
1913
|
+
continue;
|
|
1914
|
+
}
|
|
1915
|
+
const byPlaceholderName = actualUrl.searchParams.get(placeholder);
|
|
1916
|
+
if (byPlaceholderName != null && byPlaceholderName !== "") {
|
|
1917
|
+
out[placeholder] = byPlaceholderName;
|
|
1918
|
+
continue;
|
|
1919
|
+
}
|
|
1920
|
+
const lowerPlaceholder = placeholder.toLowerCase();
|
|
1921
|
+
if (/^(q|query|search|text|keyword|keywords|term|terms)$/.test(lowerPlaceholder)) {
|
|
1922
|
+
for (const alias of ["q", "query", "search", "text", "keyword", "keywords", "term"]) {
|
|
1923
|
+
const v = actualUrl.searchParams.get(alias);
|
|
1924
|
+
if (v != null && v !== "") {
|
|
1925
|
+
out[placeholder] = v;
|
|
1926
|
+
break;
|
|
1927
|
+
}
|
|
1928
|
+
}
|
|
1913
1929
|
}
|
|
1914
1930
|
}
|
|
1915
1931
|
return out;
|
|
@@ -7104,7 +7120,7 @@ var init_capture = __esm(async () => {
|
|
|
7104
7120
|
});
|
|
7105
7121
|
|
|
7106
7122
|
// ../../src/build-info.generated.ts
|
|
7107
|
-
var BUILD_RELEASE_VERSION = "3.7.0-preview.
|
|
7123
|
+
var BUILD_RELEASE_VERSION = "3.7.0-preview.3", BUILD_GIT_SHA = "652904c236e5", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiMy43LjAtcHJldmlldy4zIiwiZ2l0X3NoYSI6IjY1MjkwNGMyMzZlNSIsImNvZGVfaGFzaCI6IjVkOWViZjYxOWM2MSIsInRyYWNlX3ZlcnNpb24iOiI1ZDllYmY2MTljNjFANjUyOTA0YzIzNmU1IiwiaXNzdWVkX2F0IjoiMjAyNi0wNC0xMFQwNzoyNzozNS44ODZaIn0", BUILD_RELEASE_MANIFEST_SIGNATURE = "IXw8XfBaII9eVNJCmgvwRyesbKiWJVNMurzf5P2mRb0", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai";
|
|
7108
7124
|
|
|
7109
7125
|
// ../../src/version.ts
|
|
7110
7126
|
import { createHash as createHash2 } from "crypto";
|
|
@@ -12465,7 +12481,12 @@ function cleanDOM(html) {
|
|
|
12465
12481
|
const $ = cheerio.load(html);
|
|
12466
12482
|
for (const tag of STRIP_TAGS) {
|
|
12467
12483
|
if (tag === "script") {
|
|
12468
|
-
$("script").
|
|
12484
|
+
$("script").each((_, el) => {
|
|
12485
|
+
const type = $(el).attr("type") ?? "";
|
|
12486
|
+
if (type !== "application/ld+json") {
|
|
12487
|
+
$(el).remove();
|
|
12488
|
+
}
|
|
12489
|
+
});
|
|
12469
12490
|
} else {
|
|
12470
12491
|
$(tag).remove();
|
|
12471
12492
|
}
|
|
@@ -16284,6 +16305,65 @@ async function trySeedPublicDocumentFetchSkill(skill, url, intent, targetDomain,
|
|
|
16284
16305
|
redirect: "follow"
|
|
16285
16306
|
});
|
|
16286
16307
|
const html = await response.text();
|
|
16308
|
+
const contentType = (response.headers.get("content-type") || "").toLowerCase();
|
|
16309
|
+
if (response.ok && (contentType.includes("application/json") || contentType.includes("text/json"))) {
|
|
16310
|
+
try {
|
|
16311
|
+
const parsed = JSON.parse(html);
|
|
16312
|
+
const urlObj = new URL(response.url || url);
|
|
16313
|
+
const pathTemplate = `${urlObj.origin}${urlObj.pathname}`;
|
|
16314
|
+
const responseSchema = inferSchema([parsed]);
|
|
16315
|
+
const endpoint = {
|
|
16316
|
+
endpoint_id: stableEndpointId2("GET", pathTemplate),
|
|
16317
|
+
method: "GET",
|
|
16318
|
+
url_template: pathTemplate,
|
|
16319
|
+
idempotency: "safe",
|
|
16320
|
+
verification_status: "verified",
|
|
16321
|
+
reliability_score: 0.95,
|
|
16322
|
+
description: `Direct JSON API for ${intent}`,
|
|
16323
|
+
response_schema: responseSchema
|
|
16324
|
+
};
|
|
16325
|
+
endpoint.semantic = inferEndpointSemantic(endpoint, {
|
|
16326
|
+
sampleResponse: parsed,
|
|
16327
|
+
observedAt: new Date().toISOString(),
|
|
16328
|
+
sampleRequestUrl: url
|
|
16329
|
+
});
|
|
16330
|
+
const domain2 = getRegistrableDomain(targetDomain);
|
|
16331
|
+
const existingSkill2 = findExistingSkillForDomain(domain2, intent);
|
|
16332
|
+
const localEndpoints2 = await prepareLearnedEndpoints(existingSkill2 ? mergeEndpoints(existingSkill2.endpoints, [endpoint]) : [endpoint], intent, domain2);
|
|
16333
|
+
const localDraft2 = {
|
|
16334
|
+
skill_id: existingSkill2?.skill_id ?? nanoid6(),
|
|
16335
|
+
version: "1.0.0",
|
|
16336
|
+
schema_version: "1",
|
|
16337
|
+
lifecycle: "active",
|
|
16338
|
+
execution_type: "http",
|
|
16339
|
+
created_at: existingSkill2?.created_at ?? new Date().toISOString(),
|
|
16340
|
+
updated_at: new Date().toISOString(),
|
|
16341
|
+
name: domain2,
|
|
16342
|
+
intent_signature: intent,
|
|
16343
|
+
domain: domain2,
|
|
16344
|
+
description: `API skill for ${domain2}`,
|
|
16345
|
+
owner_type: "agent",
|
|
16346
|
+
endpoints: localEndpoints2,
|
|
16347
|
+
operation_graph: buildSkillOperationGraph(localEndpoints2),
|
|
16348
|
+
intents: [intent]
|
|
16349
|
+
};
|
|
16350
|
+
try {
|
|
16351
|
+
cachePublishedSkill(localDraft2);
|
|
16352
|
+
} catch {}
|
|
16353
|
+
return {
|
|
16354
|
+
trace: stampTrace({
|
|
16355
|
+
trace_id: nanoid6(),
|
|
16356
|
+
skill_id: localDraft2.skill_id,
|
|
16357
|
+
endpoint_id: endpoint.endpoint_id,
|
|
16358
|
+
started_at: new Date().toISOString(),
|
|
16359
|
+
completed_at: new Date().toISOString(),
|
|
16360
|
+
success: true,
|
|
16361
|
+
status_code: response.status
|
|
16362
|
+
}),
|
|
16363
|
+
result: parsed
|
|
16364
|
+
};
|
|
16365
|
+
} catch {}
|
|
16366
|
+
}
|
|
16287
16367
|
if (!isHtml(html) || isSpaShell(html))
|
|
16288
16368
|
return;
|
|
16289
16369
|
const built = buildPageArtifactCapture(response.url || url, intent, html, usedStoredAuth);
|
|
@@ -22251,15 +22331,15 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
|
|
|
22251
22331
|
}
|
|
22252
22332
|
}
|
|
22253
22333
|
}
|
|
22254
|
-
if (context?.url
|
|
22334
|
+
if (context?.url) {
|
|
22255
22335
|
try {
|
|
22256
22336
|
const directRes = await fetch(context.url, {
|
|
22257
|
-
headers: { Accept: "application/json", "User-Agent": "unbrowse/1.0" },
|
|
22258
|
-
signal: AbortSignal.timeout(
|
|
22337
|
+
headers: { Accept: "application/json, text/html;q=0.5", "User-Agent": "unbrowse/1.0" },
|
|
22338
|
+
signal: AbortSignal.timeout(15000),
|
|
22259
22339
|
redirect: "follow"
|
|
22260
22340
|
});
|
|
22261
22341
|
const ct = directRes.headers.get("content-type") ?? "";
|
|
22262
|
-
if (directRes.ok && (ct.includes("json") || ct.includes("+json"))) {
|
|
22342
|
+
if (directRes.ok && (ct.includes("application/json") || ct.includes("+json") || ct.includes("text/json"))) {
|
|
22263
22343
|
const data = await directRes.json();
|
|
22264
22344
|
const trace2 = {
|
|
22265
22345
|
trace_id: nanoid9(),
|
|
@@ -22279,7 +22359,10 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
|
|
|
22279
22359
|
timing: t
|
|
22280
22360
|
};
|
|
22281
22361
|
}
|
|
22282
|
-
} catch {
|
|
22362
|
+
} catch (err) {
|
|
22363
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
22364
|
+
console.log(`[direct-fetch] ${context.url} skipped: ${msg.slice(0, 100)}`);
|
|
22365
|
+
}
|
|
22283
22366
|
}
|
|
22284
22367
|
if (process.env.UNBROWSE_LOCAL_ONLY === "1" && !forceCapture) {
|
|
22285
22368
|
return buildNoCachedMatch();
|
|
@@ -22438,6 +22521,32 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
|
|
|
22438
22521
|
throw error;
|
|
22439
22522
|
}
|
|
22440
22523
|
timing.execute_ms = Date.now() - te0;
|
|
22524
|
+
const captureErrCheck = result?.error;
|
|
22525
|
+
if (captureErrCheck === "connection_failed" || captureErrCheck === "capture_failed") {
|
|
22526
|
+
console.warn(`[capture] ${captureErrCheck} detected — restarting Kuri and retrying once`);
|
|
22527
|
+
try {
|
|
22528
|
+
const kuri = await Promise.resolve().then(() => (init_client(), exports_client));
|
|
22529
|
+
await kuri.stop().catch(() => {});
|
|
22530
|
+
await new Promise((r) => setTimeout(r, 500));
|
|
22531
|
+
} catch {}
|
|
22532
|
+
try {
|
|
22533
|
+
const retryCaptureSkill = await getOrCreateBrowserCaptureSkill();
|
|
22534
|
+
const retryOut = await withAbortableOpTimeout("live_capture_retry", LIVE_CAPTURE_TIMEOUT_MS, (signal) => executeSkill(retryCaptureSkill, { ...params, url: context.url, intent }, undefined, {
|
|
22535
|
+
...options,
|
|
22536
|
+
intent,
|
|
22537
|
+
contextUrl: context?.url,
|
|
22538
|
+
signal
|
|
22539
|
+
}));
|
|
22540
|
+
if (retryOut.trace.success || !retryOut.result?.error) {
|
|
22541
|
+
trace = retryOut.trace;
|
|
22542
|
+
result = retryOut.result;
|
|
22543
|
+
learned_skill = retryOut.learned_skill;
|
|
22544
|
+
console.log(`[capture] retry after Kuri restart succeeded`);
|
|
22545
|
+
}
|
|
22546
|
+
} catch (retryErr) {
|
|
22547
|
+
console.warn(`[capture] retry failed: ${retryErr instanceof Error ? retryErr.message : retryErr}`);
|
|
22548
|
+
}
|
|
22549
|
+
}
|
|
22441
22550
|
const captureResult = result;
|
|
22442
22551
|
const authRecommended = captureResult?.auth_recommended === true;
|
|
22443
22552
|
const directDomCaptureResult = trace.success && trace.endpoint_id !== "browser-capture" && !!result && typeof result === "object" && "_extraction" in result;
|
package/package.json
CHANGED