unbrowse 9.6.2 → 9.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/runtime/cli.js +299 -190
- package/runtime/mcp.js +299 -190
- package/vendor/kuri/darwin-arm64/libkuri_ffi.dylib +0 -0
- package/vendor/kuri/darwin-x64/libkuri_ffi.dylib +0 -0
- package/vendor/kuri/linux-arm64/libkuri_ffi.so +0 -0
- package/vendor/kuri/linux-x64/kuri +0 -0
- package/vendor/kuri/linux-x64/libkuri_ffi.so +0 -0
- package/vendor/kuri/manifest.json +7 -7
- package/vendor/kuri/win-x64/kuri.exe +0 -0
package/package.json
CHANGED
package/runtime/cli.js
CHANGED
|
@@ -2350,7 +2350,7 @@ var init_telemetry = __esm(() => {
|
|
|
2350
2350
|
});
|
|
2351
2351
|
|
|
2352
2352
|
// .tmp-runtime-src/build-info.generated.ts
|
|
2353
|
-
var BUILD_RELEASE_VERSION = "9.
|
|
2353
|
+
var BUILD_RELEASE_VERSION = "9.8.0", BUILD_GIT_SHA = "255142bb4c25", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiOS44LjAiLCJnaXRfc2hhIjoiMjU1MTQyYmI0YzI1IiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUAyNTUxNDJiYjRjMjUiLCJpc3N1ZWRfYXQiOiIyMDI2LTA2LTE4VDA2OjA4OjA2LjY3MVoifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "Tw3ScHlFYGaEtPwKLhcPI_lgQUgjAZmhWKSi4fDFMw4", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai", BUILD_DEFAULT_PROFILE = "";
|
|
2354
2354
|
|
|
2355
2355
|
// .tmp-runtime-src/version.ts
|
|
2356
2356
|
import { createHash as createHash7 } from "crypto";
|
|
@@ -46122,6 +46122,42 @@ function urlPathLooksListLike2(contextUrl) {
|
|
|
46122
46122
|
return false;
|
|
46123
46123
|
}
|
|
46124
46124
|
}
|
|
46125
|
+
function entityPointerTemplate(href) {
|
|
46126
|
+
let path7 = href;
|
|
46127
|
+
try {
|
|
46128
|
+
path7 = new URL(href, "https://_").pathname;
|
|
46129
|
+
} catch {
|
|
46130
|
+
path7 = href.split("?")[0];
|
|
46131
|
+
}
|
|
46132
|
+
const segs = path7.split("/").filter(Boolean);
|
|
46133
|
+
if (segs.length === 0)
|
|
46134
|
+
return null;
|
|
46135
|
+
const shape = [];
|
|
46136
|
+
let hasId = false;
|
|
46137
|
+
for (const s of segs) {
|
|
46138
|
+
const low = s.toLowerCase();
|
|
46139
|
+
if (/\d{3,}/.test(low) || low.length > 30 || /^[0-9a-f-]{8,}$/.test(low) || /-\d{2,}$/.test(low)) {
|
|
46140
|
+
shape.push("{id}");
|
|
46141
|
+
hasId = true;
|
|
46142
|
+
} else {
|
|
46143
|
+
shape.push(low);
|
|
46144
|
+
}
|
|
46145
|
+
}
|
|
46146
|
+
return hasId ? shape.slice(0, 3).join("/") : null;
|
|
46147
|
+
}
|
|
46148
|
+
function linksFormEntityCollection(hrefs, min = 4) {
|
|
46149
|
+
const groups = new Map;
|
|
46150
|
+
for (const href of hrefs) {
|
|
46151
|
+
const t = entityPointerTemplate(href);
|
|
46152
|
+
if (!t)
|
|
46153
|
+
continue;
|
|
46154
|
+
const n = (groups.get(t) ?? 0) + 1;
|
|
46155
|
+
if (n >= min)
|
|
46156
|
+
return true;
|
|
46157
|
+
groups.set(t, n);
|
|
46158
|
+
}
|
|
46159
|
+
return false;
|
|
46160
|
+
}
|
|
46125
46161
|
function cardinalityMatches2(intent, subject, opts) {
|
|
46126
46162
|
const wantsMany = isListLikeIntent2(intent) || urlPathLooksListLike2(opts?.contextUrl);
|
|
46127
46163
|
if (!wantsMany)
|
|
@@ -46694,6 +46730,7 @@ __export(exports_capture, {
|
|
|
46694
46730
|
tagRequestProvenance: () => tagRequestProvenance,
|
|
46695
46731
|
shutdownAllBrowsers: () => shutdownAllBrowsers,
|
|
46696
46732
|
shouldStopHydrationWait: () => shouldStopHydrationWait,
|
|
46733
|
+
shouldScrollStimulate: () => shouldScrollStimulate,
|
|
46697
46734
|
selectPerformanceReplayCandidates: () => selectPerformanceReplayCandidates,
|
|
46698
46735
|
registerDocumentStartScript: () => registerDocumentStartScript,
|
|
46699
46736
|
navigatePageForCapture: () => navigatePageForCapture,
|
|
@@ -46956,6 +46993,9 @@ function extractRouteHint(url) {
|
|
|
46956
46993
|
} catch {}
|
|
46957
46994
|
return null;
|
|
46958
46995
|
}
|
|
46996
|
+
function shouldScrollStimulate(captureUrl, intent) {
|
|
46997
|
+
return isListLikeIntent2(intent) || urlPathLooksListLike2(captureUrl);
|
|
46998
|
+
}
|
|
46959
46999
|
function deriveIntentHints(captureUrl, intent) {
|
|
46960
47000
|
const derivedHints = new Set;
|
|
46961
47001
|
if (captureUrl) {
|
|
@@ -47583,8 +47623,7 @@ async function waitForContentReady(tabId, captureUrl, intent, responseBodies) {
|
|
|
47583
47623
|
log("capture", `intent-aware wait: already captured API matching one of [${[...derivedHints].join(", ")}], skipping`);
|
|
47584
47624
|
}
|
|
47585
47625
|
}
|
|
47586
|
-
|
|
47587
|
-
if (captureUrl && responseBodies && (/search|explore|trending|tabs|discover/i.test(captureUrl) || /\b(person|people|profile|profiles|user|users|member|members|company|companies|organization|organisations|business|post|posts|tweet|tweets|status|statuses)\b/.test(lowerIntent))) {
|
|
47626
|
+
if (captureUrl && responseBodies && shouldScrollStimulate(captureUrl, intent)) {
|
|
47588
47627
|
try {
|
|
47589
47628
|
const before = responseBodies.size;
|
|
47590
47629
|
await evaluate(tabId, "window.scrollTo(0, Math.max(window.innerHeight, Math.min(document.body.scrollHeight, window.innerHeight * 2)))");
|
|
@@ -48658,6 +48697,7 @@ var init_capture = __esm(async () => {
|
|
|
48658
48697
|
init_domain();
|
|
48659
48698
|
init_logger();
|
|
48660
48699
|
init_header_classify();
|
|
48700
|
+
init_cardinality2();
|
|
48661
48701
|
init_browser_access();
|
|
48662
48702
|
await init_vault();
|
|
48663
48703
|
waitQueue = [];
|
|
@@ -56326,6 +56366,215 @@ var init_curl_impersonate_fallback = __esm(() => {
|
|
|
56326
56366
|
};
|
|
56327
56367
|
});
|
|
56328
56368
|
|
|
56369
|
+
// .tmp-runtime-src/execution/search-forms.ts
|
|
56370
|
+
var exports_search_forms = {};
|
|
56371
|
+
__export(exports_search_forms, {
|
|
56372
|
+
isStructuredSearchForm: () => isStructuredSearchForm,
|
|
56373
|
+
fillSearchRoute: () => fillSearchRoute,
|
|
56374
|
+
detectSearchForms: () => detectSearchForms,
|
|
56375
|
+
deriveSearchRouteTemplates: () => deriveSearchRouteTemplates
|
|
56376
|
+
});
|
|
56377
|
+
function deriveSearchRouteTemplates(html, minDistinct = 4) {
|
|
56378
|
+
const hrefs = new Set;
|
|
56379
|
+
for (const m of html.matchAll(/href\s*=\s*["'](\/[^"'?#\s]+)["']/gi))
|
|
56380
|
+
hrefs.add(m[1]);
|
|
56381
|
+
const groups = new Map;
|
|
56382
|
+
for (const h of hrefs) {
|
|
56383
|
+
const segs = h.split("/").filter(Boolean);
|
|
56384
|
+
if (segs.length < 1 || segs.length > 4)
|
|
56385
|
+
continue;
|
|
56386
|
+
for (let i = 0;i < segs.length; i++) {
|
|
56387
|
+
const val = segs[i];
|
|
56388
|
+
if (!/^[a-z][a-z0-9-]{1,40}$/i.test(val) || /\d{3,}/.test(val) || /\.[a-z0-9]{1,5}$/i.test(val))
|
|
56389
|
+
continue;
|
|
56390
|
+
const shape = segs.map((s, j) => j === i ? "{query}" : s).join("/");
|
|
56391
|
+
const trailing = h.endsWith("/") ? "/" : "";
|
|
56392
|
+
const key = `/${shape}${trailing}`;
|
|
56393
|
+
if (!groups.has(key))
|
|
56394
|
+
groups.set(key, new Set);
|
|
56395
|
+
groups.get(key).add(val.toLowerCase());
|
|
56396
|
+
}
|
|
56397
|
+
}
|
|
56398
|
+
const out = [];
|
|
56399
|
+
for (const [template, vals] of groups) {
|
|
56400
|
+
if (vals.size >= minDistinct)
|
|
56401
|
+
out.push({ template, samples: [...vals].slice(0, 5), count: vals.size });
|
|
56402
|
+
}
|
|
56403
|
+
return out.sort((a, b) => b.count - a.count);
|
|
56404
|
+
}
|
|
56405
|
+
function fillSearchRoute(origin, template, query) {
|
|
56406
|
+
const slug = encodeURIComponent(query.trim().toLowerCase());
|
|
56407
|
+
return origin.replace(/\/+$/, "") + template.replace("{query}", slug);
|
|
56408
|
+
}
|
|
56409
|
+
function isStructuredSearchForm(spec) {
|
|
56410
|
+
return spec.fields.length > 0 && !!spec.submit_selector;
|
|
56411
|
+
}
|
|
56412
|
+
function formSelectorFromElement(attribs, index) {
|
|
56413
|
+
const id = attribs.id;
|
|
56414
|
+
if (id)
|
|
56415
|
+
return `form#${id}`;
|
|
56416
|
+
const name = attribs.name;
|
|
56417
|
+
if (name)
|
|
56418
|
+
return `form[name="${name}"]`;
|
|
56419
|
+
const action2 = attribs.action;
|
|
56420
|
+
if (action2)
|
|
56421
|
+
return `form[action="${action2}"]`;
|
|
56422
|
+
return `form:nth-of-type(${index + 1})`;
|
|
56423
|
+
}
|
|
56424
|
+
function inputSelectorFromElement(attribs, tagName) {
|
|
56425
|
+
const id = attribs.id;
|
|
56426
|
+
if (id)
|
|
56427
|
+
return `#${id}`;
|
|
56428
|
+
const name = attribs.name;
|
|
56429
|
+
if (name)
|
|
56430
|
+
return `${tagName}[name="${name}"]`;
|
|
56431
|
+
return tagName;
|
|
56432
|
+
}
|
|
56433
|
+
function mapInputType(typeAttr, tagName) {
|
|
56434
|
+
if (tagName === "select")
|
|
56435
|
+
return "select";
|
|
56436
|
+
if (tagName === "textarea")
|
|
56437
|
+
return "text";
|
|
56438
|
+
const t = (typeAttr ?? "text").toLowerCase();
|
|
56439
|
+
if (t === "radio")
|
|
56440
|
+
return "radio";
|
|
56441
|
+
if (t === "checkbox")
|
|
56442
|
+
return "checkbox";
|
|
56443
|
+
if (t === "date")
|
|
56444
|
+
return "date";
|
|
56445
|
+
if (t === "hidden")
|
|
56446
|
+
return "hidden";
|
|
56447
|
+
if (t === "submit" || t === "button" || t === "image" || t === "reset")
|
|
56448
|
+
return null;
|
|
56449
|
+
if (t === "password" || t === "file")
|
|
56450
|
+
return null;
|
|
56451
|
+
if (SUPPORTED_INPUT_TYPES.has(t))
|
|
56452
|
+
return "text";
|
|
56453
|
+
return "text";
|
|
56454
|
+
}
|
|
56455
|
+
function parseAttrs(attrStr) {
|
|
56456
|
+
const attrs = {};
|
|
56457
|
+
const attrRegex = /(\w[\w-]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/g;
|
|
56458
|
+
let m;
|
|
56459
|
+
while ((m = attrRegex.exec(attrStr)) !== null) {
|
|
56460
|
+
attrs[m[1]] = m[2] ?? m[3] ?? m[4] ?? "";
|
|
56461
|
+
}
|
|
56462
|
+
return attrs;
|
|
56463
|
+
}
|
|
56464
|
+
function detectSearchForms(html) {
|
|
56465
|
+
const results = [];
|
|
56466
|
+
const formRegex = /<form([^>]*)>([\s\S]*?)<\/form>/gi;
|
|
56467
|
+
let formMatch;
|
|
56468
|
+
let formIndex = 0;
|
|
56469
|
+
while ((formMatch = formRegex.exec(html)) !== null) {
|
|
56470
|
+
const formAttrs = formMatch[1];
|
|
56471
|
+
const formBody = formMatch[2];
|
|
56472
|
+
const formElAttrs = parseAttrs(formAttrs);
|
|
56473
|
+
const fieldRegex = /<(input|select|textarea)([^>]*)\/?>/gi;
|
|
56474
|
+
let fieldMatch;
|
|
56475
|
+
const fields = [];
|
|
56476
|
+
const seenNames = new Set;
|
|
56477
|
+
let hasLoginField = false;
|
|
56478
|
+
let hasSearchLikeField = false;
|
|
56479
|
+
while ((fieldMatch = fieldRegex.exec(formBody)) !== null) {
|
|
56480
|
+
const tagName = fieldMatch[1].toLowerCase();
|
|
56481
|
+
const fieldAttrs = parseAttrs(fieldMatch[2]);
|
|
56482
|
+
const name = fieldAttrs.name ?? "";
|
|
56483
|
+
const typeAttr = fieldAttrs.type;
|
|
56484
|
+
if (LOGIN_FIELD_NAMES.has(name.toLowerCase()) || typeAttr === "password") {
|
|
56485
|
+
hasLoginField = true;
|
|
56486
|
+
}
|
|
56487
|
+
if (SEARCH_FIELD_NAMES.has(name.toLowerCase())) {
|
|
56488
|
+
hasSearchLikeField = true;
|
|
56489
|
+
}
|
|
56490
|
+
const mappedType = mapInputType(typeAttr, tagName);
|
|
56491
|
+
if (!mappedType)
|
|
56492
|
+
continue;
|
|
56493
|
+
if (!name && mappedType !== "text")
|
|
56494
|
+
continue;
|
|
56495
|
+
if (seenNames.has(name) && mappedType !== "radio")
|
|
56496
|
+
continue;
|
|
56497
|
+
if (name)
|
|
56498
|
+
seenNames.add(name);
|
|
56499
|
+
let options;
|
|
56500
|
+
if (tagName === "select") {
|
|
56501
|
+
const optRegex = /<option[^>]*value="([^"]*)"[^>]*>/gi;
|
|
56502
|
+
let optMatch;
|
|
56503
|
+
options = [];
|
|
56504
|
+
while ((optMatch = optRegex.exec(formBody)) !== null) {
|
|
56505
|
+
options.push(optMatch[1]);
|
|
56506
|
+
}
|
|
56507
|
+
if (options.length === 0)
|
|
56508
|
+
options = undefined;
|
|
56509
|
+
}
|
|
56510
|
+
fields.push({
|
|
56511
|
+
name: name || `unnamed_${fields.length}`,
|
|
56512
|
+
type: mappedType,
|
|
56513
|
+
selector: inputSelectorFromElement(fieldAttrs, tagName),
|
|
56514
|
+
...options ? { options } : {},
|
|
56515
|
+
required: fieldAttrs.required !== undefined
|
|
56516
|
+
});
|
|
56517
|
+
}
|
|
56518
|
+
let submitSelector = "";
|
|
56519
|
+
if (/<button[^>]*type\s*=\s*"submit"/i.test(formBody)) {
|
|
56520
|
+
submitSelector = "button[type=submit]";
|
|
56521
|
+
} else if (/<input[^>]*type\s*=\s*"submit"/i.test(formBody)) {
|
|
56522
|
+
submitSelector = 'input[type="submit"]';
|
|
56523
|
+
} else if (/<button/i.test(formBody)) {
|
|
56524
|
+
submitSelector = "button";
|
|
56525
|
+
}
|
|
56526
|
+
const nonHiddenFields = fields.filter((f) => f.type !== "hidden");
|
|
56527
|
+
if (!hasLoginField && nonHiddenFields.length > 0 && submitSelector && (hasSearchLikeField || nonHiddenFields.length >= 1)) {
|
|
56528
|
+
const formSelector = formSelectorFromElement(formElAttrs, formIndex);
|
|
56529
|
+
results.push({
|
|
56530
|
+
form_selector: formSelector,
|
|
56531
|
+
submit_selector: submitSelector,
|
|
56532
|
+
fields
|
|
56533
|
+
});
|
|
56534
|
+
}
|
|
56535
|
+
formIndex++;
|
|
56536
|
+
}
|
|
56537
|
+
return results;
|
|
56538
|
+
}
|
|
56539
|
+
var SEARCH_FIELD_NAMES, LOGIN_FIELD_NAMES, SUPPORTED_INPUT_TYPES;
|
|
56540
|
+
var init_search_forms = __esm(() => {
|
|
56541
|
+
SEARCH_FIELD_NAMES = new Set([
|
|
56542
|
+
"q",
|
|
56543
|
+
"query",
|
|
56544
|
+
"search",
|
|
56545
|
+
"keyword",
|
|
56546
|
+
"keywords",
|
|
56547
|
+
"term",
|
|
56548
|
+
"terms",
|
|
56549
|
+
"find",
|
|
56550
|
+
"lookup",
|
|
56551
|
+
"filter",
|
|
56552
|
+
"s",
|
|
56553
|
+
"text",
|
|
56554
|
+
"input"
|
|
56555
|
+
]);
|
|
56556
|
+
LOGIN_FIELD_NAMES = new Set([
|
|
56557
|
+
"password",
|
|
56558
|
+
"passwd",
|
|
56559
|
+
"pass",
|
|
56560
|
+
"pwd",
|
|
56561
|
+
"confirm_password",
|
|
56562
|
+
"username",
|
|
56563
|
+
"email",
|
|
56564
|
+
"login",
|
|
56565
|
+
"user"
|
|
56566
|
+
]);
|
|
56567
|
+
SUPPORTED_INPUT_TYPES = new Set([
|
|
56568
|
+
"text",
|
|
56569
|
+
"search",
|
|
56570
|
+
"hidden",
|
|
56571
|
+
"date",
|
|
56572
|
+
"number",
|
|
56573
|
+
"tel",
|
|
56574
|
+
"email"
|
|
56575
|
+
]);
|
|
56576
|
+
});
|
|
56577
|
+
|
|
56329
56578
|
// node_modules/.bun/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/Event.js
|
|
56330
56579
|
var require_Event = __commonJS((exports, module) => {
|
|
56331
56580
|
module.exports = Event2;
|
|
@@ -72977,20 +73226,24 @@ function buildDirectDocumentResult(url, html, contentType, intent) {
|
|
|
72977
73226
|
const hits = intentTokens.filter((tok) => haystack.includes(tok));
|
|
72978
73227
|
const hitRate = hits.length / intentTokens.length;
|
|
72979
73228
|
if (hitRate < 0.34) {
|
|
72980
|
-
|
|
72981
|
-
|
|
72982
|
-
|
|
72983
|
-
|
|
72984
|
-
|
|
72985
|
-
|
|
72986
|
-
|
|
72987
|
-
|
|
72988
|
-
|
|
72989
|
-
|
|
73229
|
+
const isCollection = isListLikeIntent2(intent) && linksFormEntityCollection(Array.from(html.matchAll(/href\s*=\s*["']([^"']+)["']/gi), (m) => m[1]));
|
|
73230
|
+
if (!isCollection) {
|
|
73231
|
+
return {
|
|
73232
|
+
rejected: true,
|
|
73233
|
+
reason: "intent_mismatch",
|
|
73234
|
+
evidence: {
|
|
73235
|
+
intent_tokens: intentTokens,
|
|
73236
|
+
response_token_hits: hits,
|
|
73237
|
+
response_token_hit_rate: hitRate,
|
|
73238
|
+
html_bytes: html.length
|
|
73239
|
+
}
|
|
73240
|
+
};
|
|
73241
|
+
}
|
|
72990
73242
|
}
|
|
72991
73243
|
}
|
|
72992
73244
|
}
|
|
72993
73245
|
const { url_template, input_params, path_params, query } = extractHtmlHoles(url);
|
|
73246
|
+
const routing_candidates = buildSearchRouteCandidates(html, url, intent);
|
|
72994
73247
|
return {
|
|
72995
73248
|
rejected: false,
|
|
72996
73249
|
title,
|
|
@@ -73004,12 +73257,40 @@ function buildDirectDocumentResult(url, html, contentType, intent) {
|
|
|
73004
73257
|
text_excerpt: bodyText.slice(0, MARKDOWN_BUDGET),
|
|
73005
73258
|
markdown: htmlToMarkdownSafe(html, bodyText),
|
|
73006
73259
|
tables: extractTables(html),
|
|
73260
|
+
...routing_candidates.length > 0 ? { routing_candidates } : {},
|
|
73007
73261
|
extraction: {
|
|
73008
73262
|
source: "direct-document",
|
|
73009
73263
|
rejected: false
|
|
73010
73264
|
}
|
|
73011
73265
|
};
|
|
73012
73266
|
}
|
|
73267
|
+
function intentQueryTerm(intent, url) {
|
|
73268
|
+
let domTokens = new Set;
|
|
73269
|
+
try {
|
|
73270
|
+
domTokens = new Set(new URL(url).hostname.toLowerCase().split(/[.-]/));
|
|
73271
|
+
} catch {}
|
|
73272
|
+
const toks = (intent.toLowerCase().match(/[a-z][a-z0-9]{2,}/g) ?? []).filter((t) => !QUERY_STOPWORDS.has(t) && !domTokens.has(t));
|
|
73273
|
+
return [...new Set(toks)].join(" ").trim();
|
|
73274
|
+
}
|
|
73275
|
+
function buildSearchRouteCandidates(html, url, intent) {
|
|
73276
|
+
if (!intent || !isListLikeIntent2(intent))
|
|
73277
|
+
return [];
|
|
73278
|
+
const queryTerm = intentQueryTerm(intent, url);
|
|
73279
|
+
if (!queryTerm)
|
|
73280
|
+
return [];
|
|
73281
|
+
let origin = "";
|
|
73282
|
+
try {
|
|
73283
|
+
origin = new URL(url).origin;
|
|
73284
|
+
} catch {
|
|
73285
|
+
return [];
|
|
73286
|
+
}
|
|
73287
|
+
return deriveSearchRouteTemplates(html).slice(0, 3).map((t) => ({
|
|
73288
|
+
url: fillSearchRoute(origin, t.template, queryTerm),
|
|
73289
|
+
template: t.template,
|
|
73290
|
+
query: queryTerm,
|
|
73291
|
+
samples: t.samples
|
|
73292
|
+
}));
|
|
73293
|
+
}
|
|
73013
73294
|
async function fetchDirectDocument(url) {
|
|
73014
73295
|
if (!isDirectDocumentEligibleUrl(url))
|
|
73015
73296
|
return null;
|
|
@@ -73197,10 +73478,12 @@ function cellText(html) {
|
|
|
73197
73478
|
function decodeHtmlEntityText(input) {
|
|
73198
73479
|
return input.replace(/ /g, " ").replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, '"').replace(/'/g, "'");
|
|
73199
73480
|
}
|
|
73200
|
-
var HTML_RE, MIN_DIRECT_DOCUMENT_HTML_BYTES = 5000, CHALLENGE_RE, INTERSTITIAL_RE, MIN_DIRECT_DOCUMENT_BODY_TEXT = 500, SPA_HYDRATION_RE, SPA_HYDRATION_BODY_TEXT_FLOOR = 2000, SPA_ROOT_CONTAINER_RE, PARKED_RE, INTENT_STOPWORDS, MARKDOWN_BUDGET, MAX_TABLES = 10, MAX_TABLE_ROWS = 50, buildBloombergDirectDocumentResult;
|
|
73481
|
+
var HTML_RE, MIN_DIRECT_DOCUMENT_HTML_BYTES = 5000, CHALLENGE_RE, INTERSTITIAL_RE, MIN_DIRECT_DOCUMENT_BODY_TEXT = 500, SPA_HYDRATION_RE, SPA_HYDRATION_BODY_TEXT_FLOOR = 2000, SPA_ROOT_CONTAINER_RE, PARKED_RE, INTENT_STOPWORDS, MARKDOWN_BUDGET, MAX_TABLES = 10, MAX_TABLE_ROWS = 50, QUERY_STOPWORDS, buildBloombergDirectDocumentResult;
|
|
73201
73482
|
var init_direct_document = __esm(() => {
|
|
73202
73483
|
init_curl_impersonate_fallback();
|
|
73203
73484
|
init_proxy_fetch();
|
|
73485
|
+
init_cardinality2();
|
|
73486
|
+
init_search_forms();
|
|
73204
73487
|
HTML_RE = /text\/html|application\/xhtml\+xml/i;
|
|
73205
73488
|
CHALLENGE_RE = /\b(access denied|are you a robot|captcha|just a moment|pardon our interruption|robot check|unusual traffic|verify you are human)\b/i;
|
|
73206
73489
|
INTERSTITIAL_RE = /\b(please wait for verification|just a moment|cf-mitigated|datadome|akamai bot|perimeterx|sign in to continue|log in to (?:continue|access)|javascript is not available)\b/i;
|
|
@@ -73283,6 +73566,7 @@ var init_direct_document = __esm(() => {
|
|
|
73283
73566
|
"look"
|
|
73284
73567
|
]);
|
|
73285
73568
|
MARKDOWN_BUDGET = Math.max(1000, Number(process.env.UNBROWSE_MARKDOWN_BUDGET ?? "12000") || 12000);
|
|
73569
|
+
QUERY_STOPWORDS = new Set(("resolve unbrowse execute run walk go fetch open view want need please " + "find search browse list lookup discover show get me a an the on of for in to " + "with and or all my your this that some good best top new latest cheap near").split(" "));
|
|
73286
73570
|
buildBloombergDirectDocumentResult = buildDirectDocumentResult;
|
|
73287
73571
|
});
|
|
73288
73572
|
|
|
@@ -121955,181 +122239,6 @@ function clampToFloor(score, demotion, floor) {
|
|
|
121955
122239
|
}
|
|
121956
122240
|
var HARD_NEGATIVE_FLOOR = -2000, WEAK_NEGATIVE_FLOOR = -400, PAGE_ARTIFACT_DEMOTION = 800, EMPTY_ENTITY_BAG_DEMOTION = 650, EMPTY_ENTITY_BAG_FLOOR = -700;
|
|
121957
122241
|
|
|
121958
|
-
// .tmp-runtime-src/execution/search-forms.ts
|
|
121959
|
-
var exports_search_forms = {};
|
|
121960
|
-
__export(exports_search_forms, {
|
|
121961
|
-
isStructuredSearchForm: () => isStructuredSearchForm,
|
|
121962
|
-
detectSearchForms: () => detectSearchForms
|
|
121963
|
-
});
|
|
121964
|
-
function isStructuredSearchForm(spec) {
|
|
121965
|
-
return spec.fields.length > 0 && !!spec.submit_selector;
|
|
121966
|
-
}
|
|
121967
|
-
function formSelectorFromElement(attribs, index2) {
|
|
121968
|
-
const id = attribs.id;
|
|
121969
|
-
if (id)
|
|
121970
|
-
return `form#${id}`;
|
|
121971
|
-
const name = attribs.name;
|
|
121972
|
-
if (name)
|
|
121973
|
-
return `form[name="${name}"]`;
|
|
121974
|
-
const action2 = attribs.action;
|
|
121975
|
-
if (action2)
|
|
121976
|
-
return `form[action="${action2}"]`;
|
|
121977
|
-
return `form:nth-of-type(${index2 + 1})`;
|
|
121978
|
-
}
|
|
121979
|
-
function inputSelectorFromElement(attribs, tagName) {
|
|
121980
|
-
const id = attribs.id;
|
|
121981
|
-
if (id)
|
|
121982
|
-
return `#${id}`;
|
|
121983
|
-
const name = attribs.name;
|
|
121984
|
-
if (name)
|
|
121985
|
-
return `${tagName}[name="${name}"]`;
|
|
121986
|
-
return tagName;
|
|
121987
|
-
}
|
|
121988
|
-
function mapInputType(typeAttr, tagName) {
|
|
121989
|
-
if (tagName === "select")
|
|
121990
|
-
return "select";
|
|
121991
|
-
if (tagName === "textarea")
|
|
121992
|
-
return "text";
|
|
121993
|
-
const t = (typeAttr ?? "text").toLowerCase();
|
|
121994
|
-
if (t === "radio")
|
|
121995
|
-
return "radio";
|
|
121996
|
-
if (t === "checkbox")
|
|
121997
|
-
return "checkbox";
|
|
121998
|
-
if (t === "date")
|
|
121999
|
-
return "date";
|
|
122000
|
-
if (t === "hidden")
|
|
122001
|
-
return "hidden";
|
|
122002
|
-
if (t === "submit" || t === "button" || t === "image" || t === "reset")
|
|
122003
|
-
return null;
|
|
122004
|
-
if (t === "password" || t === "file")
|
|
122005
|
-
return null;
|
|
122006
|
-
if (SUPPORTED_INPUT_TYPES.has(t))
|
|
122007
|
-
return "text";
|
|
122008
|
-
return "text";
|
|
122009
|
-
}
|
|
122010
|
-
function parseAttrs(attrStr) {
|
|
122011
|
-
const attrs = {};
|
|
122012
|
-
const attrRegex = /(\w[\w-]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/g;
|
|
122013
|
-
let m;
|
|
122014
|
-
while ((m = attrRegex.exec(attrStr)) !== null) {
|
|
122015
|
-
attrs[m[1]] = m[2] ?? m[3] ?? m[4] ?? "";
|
|
122016
|
-
}
|
|
122017
|
-
return attrs;
|
|
122018
|
-
}
|
|
122019
|
-
function detectSearchForms(html3) {
|
|
122020
|
-
const results = [];
|
|
122021
|
-
const formRegex = /<form([^>]*)>([\s\S]*?)<\/form>/gi;
|
|
122022
|
-
let formMatch;
|
|
122023
|
-
let formIndex = 0;
|
|
122024
|
-
while ((formMatch = formRegex.exec(html3)) !== null) {
|
|
122025
|
-
const formAttrs = formMatch[1];
|
|
122026
|
-
const formBody = formMatch[2];
|
|
122027
|
-
const formElAttrs = parseAttrs(formAttrs);
|
|
122028
|
-
const fieldRegex = /<(input|select|textarea)([^>]*)\/?>/gi;
|
|
122029
|
-
let fieldMatch;
|
|
122030
|
-
const fields = [];
|
|
122031
|
-
const seenNames = new Set;
|
|
122032
|
-
let hasLoginField = false;
|
|
122033
|
-
let hasSearchLikeField = false;
|
|
122034
|
-
while ((fieldMatch = fieldRegex.exec(formBody)) !== null) {
|
|
122035
|
-
const tagName = fieldMatch[1].toLowerCase();
|
|
122036
|
-
const fieldAttrs = parseAttrs(fieldMatch[2]);
|
|
122037
|
-
const name = fieldAttrs.name ?? "";
|
|
122038
|
-
const typeAttr = fieldAttrs.type;
|
|
122039
|
-
if (LOGIN_FIELD_NAMES.has(name.toLowerCase()) || typeAttr === "password") {
|
|
122040
|
-
hasLoginField = true;
|
|
122041
|
-
}
|
|
122042
|
-
if (SEARCH_FIELD_NAMES.has(name.toLowerCase())) {
|
|
122043
|
-
hasSearchLikeField = true;
|
|
122044
|
-
}
|
|
122045
|
-
const mappedType = mapInputType(typeAttr, tagName);
|
|
122046
|
-
if (!mappedType)
|
|
122047
|
-
continue;
|
|
122048
|
-
if (!name && mappedType !== "text")
|
|
122049
|
-
continue;
|
|
122050
|
-
if (seenNames.has(name) && mappedType !== "radio")
|
|
122051
|
-
continue;
|
|
122052
|
-
if (name)
|
|
122053
|
-
seenNames.add(name);
|
|
122054
|
-
let options;
|
|
122055
|
-
if (tagName === "select") {
|
|
122056
|
-
const optRegex = /<option[^>]*value="([^"]*)"[^>]*>/gi;
|
|
122057
|
-
let optMatch;
|
|
122058
|
-
options = [];
|
|
122059
|
-
while ((optMatch = optRegex.exec(formBody)) !== null) {
|
|
122060
|
-
options.push(optMatch[1]);
|
|
122061
|
-
}
|
|
122062
|
-
if (options.length === 0)
|
|
122063
|
-
options = undefined;
|
|
122064
|
-
}
|
|
122065
|
-
fields.push({
|
|
122066
|
-
name: name || `unnamed_${fields.length}`,
|
|
122067
|
-
type: mappedType,
|
|
122068
|
-
selector: inputSelectorFromElement(fieldAttrs, tagName),
|
|
122069
|
-
...options ? { options } : {},
|
|
122070
|
-
required: fieldAttrs.required !== undefined
|
|
122071
|
-
});
|
|
122072
|
-
}
|
|
122073
|
-
let submitSelector = "";
|
|
122074
|
-
if (/<button[^>]*type\s*=\s*"submit"/i.test(formBody)) {
|
|
122075
|
-
submitSelector = "button[type=submit]";
|
|
122076
|
-
} else if (/<input[^>]*type\s*=\s*"submit"/i.test(formBody)) {
|
|
122077
|
-
submitSelector = 'input[type="submit"]';
|
|
122078
|
-
} else if (/<button/i.test(formBody)) {
|
|
122079
|
-
submitSelector = "button";
|
|
122080
|
-
}
|
|
122081
|
-
const nonHiddenFields = fields.filter((f) => f.type !== "hidden");
|
|
122082
|
-
if (!hasLoginField && nonHiddenFields.length > 0 && submitSelector && (hasSearchLikeField || nonHiddenFields.length >= 1)) {
|
|
122083
|
-
const formSelector = formSelectorFromElement(formElAttrs, formIndex);
|
|
122084
|
-
results.push({
|
|
122085
|
-
form_selector: formSelector,
|
|
122086
|
-
submit_selector: submitSelector,
|
|
122087
|
-
fields
|
|
122088
|
-
});
|
|
122089
|
-
}
|
|
122090
|
-
formIndex++;
|
|
122091
|
-
}
|
|
122092
|
-
return results;
|
|
122093
|
-
}
|
|
122094
|
-
var SEARCH_FIELD_NAMES, LOGIN_FIELD_NAMES, SUPPORTED_INPUT_TYPES;
|
|
122095
|
-
var init_search_forms = __esm(() => {
|
|
122096
|
-
SEARCH_FIELD_NAMES = new Set([
|
|
122097
|
-
"q",
|
|
122098
|
-
"query",
|
|
122099
|
-
"search",
|
|
122100
|
-
"keyword",
|
|
122101
|
-
"keywords",
|
|
122102
|
-
"term",
|
|
122103
|
-
"terms",
|
|
122104
|
-
"find",
|
|
122105
|
-
"lookup",
|
|
122106
|
-
"filter",
|
|
122107
|
-
"s",
|
|
122108
|
-
"text",
|
|
122109
|
-
"input"
|
|
122110
|
-
]);
|
|
122111
|
-
LOGIN_FIELD_NAMES = new Set([
|
|
122112
|
-
"password",
|
|
122113
|
-
"passwd",
|
|
122114
|
-
"pass",
|
|
122115
|
-
"pwd",
|
|
122116
|
-
"confirm_password",
|
|
122117
|
-
"username",
|
|
122118
|
-
"email",
|
|
122119
|
-
"login",
|
|
122120
|
-
"user"
|
|
122121
|
-
]);
|
|
122122
|
-
SUPPORTED_INPUT_TYPES = new Set([
|
|
122123
|
-
"text",
|
|
122124
|
-
"search",
|
|
122125
|
-
"hidden",
|
|
122126
|
-
"date",
|
|
122127
|
-
"number",
|
|
122128
|
-
"tel",
|
|
122129
|
-
"email"
|
|
122130
|
-
]);
|
|
122131
|
-
});
|
|
122132
|
-
|
|
122133
122242
|
// .tmp-runtime-src/state/stateless.ts
|
|
122134
122243
|
function isStateless() {
|
|
122135
122244
|
const v = process.env.UNBROWSE_STATELESS;
|
|
@@ -123558,7 +123667,7 @@ function isProtobufContentType(contentType) {
|
|
|
123558
123667
|
function isProtobufLikeEndpoint(url, contentType) {
|
|
123559
123668
|
if (isProtobufContentType(contentType))
|
|
123560
123669
|
return true;
|
|
123561
|
-
return
|
|
123670
|
+
return /[-/](proto|protobuf)(\/|$|-)/i.test(url);
|
|
123562
123671
|
}
|
|
123563
123672
|
function decodeProtobufBytes(bytes) {
|
|
123564
123673
|
return decodeBytes(bytes, "bytes");
|
package/runtime/mcp.js
CHANGED
|
@@ -36310,7 +36310,7 @@ var init_cached_resolution = __esm(() => {
|
|
|
36310
36310
|
});
|
|
36311
36311
|
|
|
36312
36312
|
// .tmp-runtime-src/build-info.generated.ts
|
|
36313
|
-
var BUILD_RELEASE_VERSION = "9.
|
|
36313
|
+
var BUILD_RELEASE_VERSION = "9.8.0", BUILD_GIT_SHA = "255142bb4c25", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiOS44LjAiLCJnaXRfc2hhIjoiMjU1MTQyYmI0YzI1IiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUAyNTUxNDJiYjRjMjUiLCJpc3N1ZWRfYXQiOiIyMDI2LTA2LTE4VDA2OjA4OjA2LjY3MVoifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "Tw3ScHlFYGaEtPwKLhcPI_lgQUgjAZmhWKSi4fDFMw4", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai", BUILD_DEFAULT_PROFILE = "";
|
|
36314
36314
|
|
|
36315
36315
|
// .tmp-runtime-src/version.ts
|
|
36316
36316
|
import { createHash as createHash4 } from "crypto";
|
|
@@ -43252,6 +43252,42 @@ function urlPathLooksListLike(contextUrl) {
|
|
|
43252
43252
|
return false;
|
|
43253
43253
|
}
|
|
43254
43254
|
}
|
|
43255
|
+
function entityPointerTemplate(href) {
|
|
43256
|
+
let path5 = href;
|
|
43257
|
+
try {
|
|
43258
|
+
path5 = new URL(href, "https://_").pathname;
|
|
43259
|
+
} catch {
|
|
43260
|
+
path5 = href.split("?")[0];
|
|
43261
|
+
}
|
|
43262
|
+
const segs = path5.split("/").filter(Boolean);
|
|
43263
|
+
if (segs.length === 0)
|
|
43264
|
+
return null;
|
|
43265
|
+
const shape = [];
|
|
43266
|
+
let hasId = false;
|
|
43267
|
+
for (const s of segs) {
|
|
43268
|
+
const low = s.toLowerCase();
|
|
43269
|
+
if (/\d{3,}/.test(low) || low.length > 30 || /^[0-9a-f-]{8,}$/.test(low) || /-\d{2,}$/.test(low)) {
|
|
43270
|
+
shape.push("{id}");
|
|
43271
|
+
hasId = true;
|
|
43272
|
+
} else {
|
|
43273
|
+
shape.push(low);
|
|
43274
|
+
}
|
|
43275
|
+
}
|
|
43276
|
+
return hasId ? shape.slice(0, 3).join("/") : null;
|
|
43277
|
+
}
|
|
43278
|
+
function linksFormEntityCollection(hrefs, min = 4) {
|
|
43279
|
+
const groups = new Map;
|
|
43280
|
+
for (const href of hrefs) {
|
|
43281
|
+
const t = entityPointerTemplate(href);
|
|
43282
|
+
if (!t)
|
|
43283
|
+
continue;
|
|
43284
|
+
const n = (groups.get(t) ?? 0) + 1;
|
|
43285
|
+
if (n >= min)
|
|
43286
|
+
return true;
|
|
43287
|
+
groups.set(t, n);
|
|
43288
|
+
}
|
|
43289
|
+
return false;
|
|
43290
|
+
}
|
|
43255
43291
|
function cardinalityMatches(intent, subject, opts) {
|
|
43256
43292
|
const wantsMany = isListLikeIntent(intent) || urlPathLooksListLike(opts?.contextUrl);
|
|
43257
43293
|
if (!wantsMany)
|
|
@@ -43827,6 +43863,7 @@ __export(exports_capture, {
|
|
|
43827
43863
|
tagRequestProvenance: () => tagRequestProvenance,
|
|
43828
43864
|
shutdownAllBrowsers: () => shutdownAllBrowsers,
|
|
43829
43865
|
shouldStopHydrationWait: () => shouldStopHydrationWait,
|
|
43866
|
+
shouldScrollStimulate: () => shouldScrollStimulate,
|
|
43830
43867
|
selectPerformanceReplayCandidates: () => selectPerformanceReplayCandidates,
|
|
43831
43868
|
registerDocumentStartScript: () => registerDocumentStartScript,
|
|
43832
43869
|
navigatePageForCapture: () => navigatePageForCapture,
|
|
@@ -44089,6 +44126,9 @@ function extractRouteHint(url) {
|
|
|
44089
44126
|
} catch {}
|
|
44090
44127
|
return null;
|
|
44091
44128
|
}
|
|
44129
|
+
function shouldScrollStimulate(captureUrl, intent) {
|
|
44130
|
+
return isListLikeIntent(intent) || urlPathLooksListLike(captureUrl);
|
|
44131
|
+
}
|
|
44092
44132
|
function deriveIntentHints(captureUrl, intent) {
|
|
44093
44133
|
const derivedHints = new Set;
|
|
44094
44134
|
if (captureUrl) {
|
|
@@ -44716,8 +44756,7 @@ async function waitForContentReady(tabId, captureUrl, intent, responseBodies) {
|
|
|
44716
44756
|
log("capture", `intent-aware wait: already captured API matching one of [${[...derivedHints].join(", ")}], skipping`);
|
|
44717
44757
|
}
|
|
44718
44758
|
}
|
|
44719
|
-
|
|
44720
|
-
if (captureUrl && responseBodies && (/search|explore|trending|tabs|discover/i.test(captureUrl) || /\b(person|people|profile|profiles|user|users|member|members|company|companies|organization|organisations|business|post|posts|tweet|tweets|status|statuses)\b/.test(lowerIntent))) {
|
|
44759
|
+
if (captureUrl && responseBodies && shouldScrollStimulate(captureUrl, intent)) {
|
|
44721
44760
|
try {
|
|
44722
44761
|
const before = responseBodies.size;
|
|
44723
44762
|
await evaluate(tabId, "window.scrollTo(0, Math.max(window.innerHeight, Math.min(document.body.scrollHeight, window.innerHeight * 2)))");
|
|
@@ -45791,6 +45830,7 @@ var init_capture = __esm(async () => {
|
|
|
45791
45830
|
init_domain();
|
|
45792
45831
|
init_logger();
|
|
45793
45832
|
init_header_classify();
|
|
45833
|
+
init_cardinality();
|
|
45794
45834
|
init_browser_access();
|
|
45795
45835
|
await init_vault();
|
|
45796
45836
|
waitQueue = [];
|
|
@@ -54383,6 +54423,215 @@ var init_curl_impersonate_fallback = __esm(() => {
|
|
|
54383
54423
|
};
|
|
54384
54424
|
});
|
|
54385
54425
|
|
|
54426
|
+
// .tmp-runtime-src/execution/search-forms.ts
|
|
54427
|
+
var exports_search_forms = {};
|
|
54428
|
+
__export(exports_search_forms, {
|
|
54429
|
+
isStructuredSearchForm: () => isStructuredSearchForm,
|
|
54430
|
+
fillSearchRoute: () => fillSearchRoute,
|
|
54431
|
+
detectSearchForms: () => detectSearchForms,
|
|
54432
|
+
deriveSearchRouteTemplates: () => deriveSearchRouteTemplates
|
|
54433
|
+
});
|
|
54434
|
+
function deriveSearchRouteTemplates(html, minDistinct = 4) {
|
|
54435
|
+
const hrefs = new Set;
|
|
54436
|
+
for (const m of html.matchAll(/href\s*=\s*["'](\/[^"'?#\s]+)["']/gi))
|
|
54437
|
+
hrefs.add(m[1]);
|
|
54438
|
+
const groups = new Map;
|
|
54439
|
+
for (const h of hrefs) {
|
|
54440
|
+
const segs = h.split("/").filter(Boolean);
|
|
54441
|
+
if (segs.length < 1 || segs.length > 4)
|
|
54442
|
+
continue;
|
|
54443
|
+
for (let i = 0;i < segs.length; i++) {
|
|
54444
|
+
const val = segs[i];
|
|
54445
|
+
if (!/^[a-z][a-z0-9-]{1,40}$/i.test(val) || /\d{3,}/.test(val) || /\.[a-z0-9]{1,5}$/i.test(val))
|
|
54446
|
+
continue;
|
|
54447
|
+
const shape = segs.map((s, j) => j === i ? "{query}" : s).join("/");
|
|
54448
|
+
const trailing = h.endsWith("/") ? "/" : "";
|
|
54449
|
+
const key = `/${shape}${trailing}`;
|
|
54450
|
+
if (!groups.has(key))
|
|
54451
|
+
groups.set(key, new Set);
|
|
54452
|
+
groups.get(key).add(val.toLowerCase());
|
|
54453
|
+
}
|
|
54454
|
+
}
|
|
54455
|
+
const out = [];
|
|
54456
|
+
for (const [template, vals] of groups) {
|
|
54457
|
+
if (vals.size >= minDistinct)
|
|
54458
|
+
out.push({ template, samples: [...vals].slice(0, 5), count: vals.size });
|
|
54459
|
+
}
|
|
54460
|
+
return out.sort((a, b) => b.count - a.count);
|
|
54461
|
+
}
|
|
54462
|
+
function fillSearchRoute(origin, template, query) {
|
|
54463
|
+
const slug = encodeURIComponent(query.trim().toLowerCase());
|
|
54464
|
+
return origin.replace(/\/+$/, "") + template.replace("{query}", slug);
|
|
54465
|
+
}
|
|
54466
|
+
function isStructuredSearchForm(spec) {
|
|
54467
|
+
return spec.fields.length > 0 && !!spec.submit_selector;
|
|
54468
|
+
}
|
|
54469
|
+
function formSelectorFromElement(attribs, index) {
|
|
54470
|
+
const id = attribs.id;
|
|
54471
|
+
if (id)
|
|
54472
|
+
return `form#${id}`;
|
|
54473
|
+
const name = attribs.name;
|
|
54474
|
+
if (name)
|
|
54475
|
+
return `form[name="${name}"]`;
|
|
54476
|
+
const action2 = attribs.action;
|
|
54477
|
+
if (action2)
|
|
54478
|
+
return `form[action="${action2}"]`;
|
|
54479
|
+
return `form:nth-of-type(${index + 1})`;
|
|
54480
|
+
}
|
|
54481
|
+
function inputSelectorFromElement(attribs, tagName) {
|
|
54482
|
+
const id = attribs.id;
|
|
54483
|
+
if (id)
|
|
54484
|
+
return `#${id}`;
|
|
54485
|
+
const name = attribs.name;
|
|
54486
|
+
if (name)
|
|
54487
|
+
return `${tagName}[name="${name}"]`;
|
|
54488
|
+
return tagName;
|
|
54489
|
+
}
|
|
54490
|
+
function mapInputType(typeAttr, tagName) {
|
|
54491
|
+
if (tagName === "select")
|
|
54492
|
+
return "select";
|
|
54493
|
+
if (tagName === "textarea")
|
|
54494
|
+
return "text";
|
|
54495
|
+
const t = (typeAttr ?? "text").toLowerCase();
|
|
54496
|
+
if (t === "radio")
|
|
54497
|
+
return "radio";
|
|
54498
|
+
if (t === "checkbox")
|
|
54499
|
+
return "checkbox";
|
|
54500
|
+
if (t === "date")
|
|
54501
|
+
return "date";
|
|
54502
|
+
if (t === "hidden")
|
|
54503
|
+
return "hidden";
|
|
54504
|
+
if (t === "submit" || t === "button" || t === "image" || t === "reset")
|
|
54505
|
+
return null;
|
|
54506
|
+
if (t === "password" || t === "file")
|
|
54507
|
+
return null;
|
|
54508
|
+
if (SUPPORTED_INPUT_TYPES.has(t))
|
|
54509
|
+
return "text";
|
|
54510
|
+
return "text";
|
|
54511
|
+
}
|
|
54512
|
+
function parseAttrs(attrStr) {
|
|
54513
|
+
const attrs = {};
|
|
54514
|
+
const attrRegex = /(\w[\w-]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/g;
|
|
54515
|
+
let m;
|
|
54516
|
+
while ((m = attrRegex.exec(attrStr)) !== null) {
|
|
54517
|
+
attrs[m[1]] = m[2] ?? m[3] ?? m[4] ?? "";
|
|
54518
|
+
}
|
|
54519
|
+
return attrs;
|
|
54520
|
+
}
|
|
54521
|
+
function detectSearchForms(html) {
|
|
54522
|
+
const results = [];
|
|
54523
|
+
const formRegex = /<form([^>]*)>([\s\S]*?)<\/form>/gi;
|
|
54524
|
+
let formMatch;
|
|
54525
|
+
let formIndex = 0;
|
|
54526
|
+
while ((formMatch = formRegex.exec(html)) !== null) {
|
|
54527
|
+
const formAttrs = formMatch[1];
|
|
54528
|
+
const formBody = formMatch[2];
|
|
54529
|
+
const formElAttrs = parseAttrs(formAttrs);
|
|
54530
|
+
const fieldRegex = /<(input|select|textarea)([^>]*)\/?>/gi;
|
|
54531
|
+
let fieldMatch;
|
|
54532
|
+
const fields = [];
|
|
54533
|
+
const seenNames = new Set;
|
|
54534
|
+
let hasLoginField = false;
|
|
54535
|
+
let hasSearchLikeField = false;
|
|
54536
|
+
while ((fieldMatch = fieldRegex.exec(formBody)) !== null) {
|
|
54537
|
+
const tagName = fieldMatch[1].toLowerCase();
|
|
54538
|
+
const fieldAttrs = parseAttrs(fieldMatch[2]);
|
|
54539
|
+
const name = fieldAttrs.name ?? "";
|
|
54540
|
+
const typeAttr = fieldAttrs.type;
|
|
54541
|
+
if (LOGIN_FIELD_NAMES.has(name.toLowerCase()) || typeAttr === "password") {
|
|
54542
|
+
hasLoginField = true;
|
|
54543
|
+
}
|
|
54544
|
+
if (SEARCH_FIELD_NAMES.has(name.toLowerCase())) {
|
|
54545
|
+
hasSearchLikeField = true;
|
|
54546
|
+
}
|
|
54547
|
+
const mappedType = mapInputType(typeAttr, tagName);
|
|
54548
|
+
if (!mappedType)
|
|
54549
|
+
continue;
|
|
54550
|
+
if (!name && mappedType !== "text")
|
|
54551
|
+
continue;
|
|
54552
|
+
if (seenNames.has(name) && mappedType !== "radio")
|
|
54553
|
+
continue;
|
|
54554
|
+
if (name)
|
|
54555
|
+
seenNames.add(name);
|
|
54556
|
+
let options;
|
|
54557
|
+
if (tagName === "select") {
|
|
54558
|
+
const optRegex = /<option[^>]*value="([^"]*)"[^>]*>/gi;
|
|
54559
|
+
let optMatch;
|
|
54560
|
+
options = [];
|
|
54561
|
+
while ((optMatch = optRegex.exec(formBody)) !== null) {
|
|
54562
|
+
options.push(optMatch[1]);
|
|
54563
|
+
}
|
|
54564
|
+
if (options.length === 0)
|
|
54565
|
+
options = undefined;
|
|
54566
|
+
}
|
|
54567
|
+
fields.push({
|
|
54568
|
+
name: name || `unnamed_${fields.length}`,
|
|
54569
|
+
type: mappedType,
|
|
54570
|
+
selector: inputSelectorFromElement(fieldAttrs, tagName),
|
|
54571
|
+
...options ? { options } : {},
|
|
54572
|
+
required: fieldAttrs.required !== undefined
|
|
54573
|
+
});
|
|
54574
|
+
}
|
|
54575
|
+
let submitSelector = "";
|
|
54576
|
+
if (/<button[^>]*type\s*=\s*"submit"/i.test(formBody)) {
|
|
54577
|
+
submitSelector = "button[type=submit]";
|
|
54578
|
+
} else if (/<input[^>]*type\s*=\s*"submit"/i.test(formBody)) {
|
|
54579
|
+
submitSelector = 'input[type="submit"]';
|
|
54580
|
+
} else if (/<button/i.test(formBody)) {
|
|
54581
|
+
submitSelector = "button";
|
|
54582
|
+
}
|
|
54583
|
+
const nonHiddenFields = fields.filter((f) => f.type !== "hidden");
|
|
54584
|
+
if (!hasLoginField && nonHiddenFields.length > 0 && submitSelector && (hasSearchLikeField || nonHiddenFields.length >= 1)) {
|
|
54585
|
+
const formSelector = formSelectorFromElement(formElAttrs, formIndex);
|
|
54586
|
+
results.push({
|
|
54587
|
+
form_selector: formSelector,
|
|
54588
|
+
submit_selector: submitSelector,
|
|
54589
|
+
fields
|
|
54590
|
+
});
|
|
54591
|
+
}
|
|
54592
|
+
formIndex++;
|
|
54593
|
+
}
|
|
54594
|
+
return results;
|
|
54595
|
+
}
|
|
54596
|
+
var SEARCH_FIELD_NAMES, LOGIN_FIELD_NAMES, SUPPORTED_INPUT_TYPES;
|
|
54597
|
+
var init_search_forms = __esm(() => {
|
|
54598
|
+
SEARCH_FIELD_NAMES = new Set([
|
|
54599
|
+
"q",
|
|
54600
|
+
"query",
|
|
54601
|
+
"search",
|
|
54602
|
+
"keyword",
|
|
54603
|
+
"keywords",
|
|
54604
|
+
"term",
|
|
54605
|
+
"terms",
|
|
54606
|
+
"find",
|
|
54607
|
+
"lookup",
|
|
54608
|
+
"filter",
|
|
54609
|
+
"s",
|
|
54610
|
+
"text",
|
|
54611
|
+
"input"
|
|
54612
|
+
]);
|
|
54613
|
+
LOGIN_FIELD_NAMES = new Set([
|
|
54614
|
+
"password",
|
|
54615
|
+
"passwd",
|
|
54616
|
+
"pass",
|
|
54617
|
+
"pwd",
|
|
54618
|
+
"confirm_password",
|
|
54619
|
+
"username",
|
|
54620
|
+
"email",
|
|
54621
|
+
"login",
|
|
54622
|
+
"user"
|
|
54623
|
+
]);
|
|
54624
|
+
SUPPORTED_INPUT_TYPES = new Set([
|
|
54625
|
+
"text",
|
|
54626
|
+
"search",
|
|
54627
|
+
"hidden",
|
|
54628
|
+
"date",
|
|
54629
|
+
"number",
|
|
54630
|
+
"tel",
|
|
54631
|
+
"email"
|
|
54632
|
+
]);
|
|
54633
|
+
});
|
|
54634
|
+
|
|
54386
54635
|
// node_modules/.bun/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/Event.js
|
|
54387
54636
|
var require_Event = __commonJS((exports, module) => {
|
|
54388
54637
|
module.exports = Event2;
|
|
@@ -71034,20 +71283,24 @@ function buildDirectDocumentResult(url, html, contentType, intent) {
|
|
|
71034
71283
|
const hits = intentTokens.filter((tok) => haystack.includes(tok));
|
|
71035
71284
|
const hitRate = hits.length / intentTokens.length;
|
|
71036
71285
|
if (hitRate < 0.34) {
|
|
71037
|
-
|
|
71038
|
-
|
|
71039
|
-
|
|
71040
|
-
|
|
71041
|
-
|
|
71042
|
-
|
|
71043
|
-
|
|
71044
|
-
|
|
71045
|
-
|
|
71046
|
-
|
|
71286
|
+
const isCollection = isListLikeIntent(intent) && linksFormEntityCollection(Array.from(html.matchAll(/href\s*=\s*["']([^"']+)["']/gi), (m) => m[1]));
|
|
71287
|
+
if (!isCollection) {
|
|
71288
|
+
return {
|
|
71289
|
+
rejected: true,
|
|
71290
|
+
reason: "intent_mismatch",
|
|
71291
|
+
evidence: {
|
|
71292
|
+
intent_tokens: intentTokens,
|
|
71293
|
+
response_token_hits: hits,
|
|
71294
|
+
response_token_hit_rate: hitRate,
|
|
71295
|
+
html_bytes: html.length
|
|
71296
|
+
}
|
|
71297
|
+
};
|
|
71298
|
+
}
|
|
71047
71299
|
}
|
|
71048
71300
|
}
|
|
71049
71301
|
}
|
|
71050
71302
|
const { url_template, input_params, path_params, query } = extractHtmlHoles(url);
|
|
71303
|
+
const routing_candidates = buildSearchRouteCandidates(html, url, intent);
|
|
71051
71304
|
return {
|
|
71052
71305
|
rejected: false,
|
|
71053
71306
|
title,
|
|
@@ -71061,12 +71314,40 @@ function buildDirectDocumentResult(url, html, contentType, intent) {
|
|
|
71061
71314
|
text_excerpt: bodyText.slice(0, MARKDOWN_BUDGET),
|
|
71062
71315
|
markdown: htmlToMarkdownSafe(html, bodyText),
|
|
71063
71316
|
tables: extractTables(html),
|
|
71317
|
+
...routing_candidates.length > 0 ? { routing_candidates } : {},
|
|
71064
71318
|
extraction: {
|
|
71065
71319
|
source: "direct-document",
|
|
71066
71320
|
rejected: false
|
|
71067
71321
|
}
|
|
71068
71322
|
};
|
|
71069
71323
|
}
|
|
71324
|
+
function intentQueryTerm(intent, url) {
|
|
71325
|
+
let domTokens = new Set;
|
|
71326
|
+
try {
|
|
71327
|
+
domTokens = new Set(new URL(url).hostname.toLowerCase().split(/[.-]/));
|
|
71328
|
+
} catch {}
|
|
71329
|
+
const toks = (intent.toLowerCase().match(/[a-z][a-z0-9]{2,}/g) ?? []).filter((t) => !QUERY_STOPWORDS.has(t) && !domTokens.has(t));
|
|
71330
|
+
return [...new Set(toks)].join(" ").trim();
|
|
71331
|
+
}
|
|
71332
|
+
function buildSearchRouteCandidates(html, url, intent) {
|
|
71333
|
+
if (!intent || !isListLikeIntent(intent))
|
|
71334
|
+
return [];
|
|
71335
|
+
const queryTerm = intentQueryTerm(intent, url);
|
|
71336
|
+
if (!queryTerm)
|
|
71337
|
+
return [];
|
|
71338
|
+
let origin = "";
|
|
71339
|
+
try {
|
|
71340
|
+
origin = new URL(url).origin;
|
|
71341
|
+
} catch {
|
|
71342
|
+
return [];
|
|
71343
|
+
}
|
|
71344
|
+
return deriveSearchRouteTemplates(html).slice(0, 3).map((t) => ({
|
|
71345
|
+
url: fillSearchRoute(origin, t.template, queryTerm),
|
|
71346
|
+
template: t.template,
|
|
71347
|
+
query: queryTerm,
|
|
71348
|
+
samples: t.samples
|
|
71349
|
+
}));
|
|
71350
|
+
}
|
|
71070
71351
|
async function fetchDirectDocument(url) {
|
|
71071
71352
|
if (!isDirectDocumentEligibleUrl(url))
|
|
71072
71353
|
return null;
|
|
@@ -71254,10 +71535,12 @@ function cellText(html) {
|
|
|
71254
71535
|
function decodeHtmlEntityText(input) {
|
|
71255
71536
|
return input.replace(/ /g, " ").replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, '"').replace(/'/g, "'");
|
|
71256
71537
|
}
|
|
71257
|
-
var HTML_RE, MIN_DIRECT_DOCUMENT_HTML_BYTES = 5000, CHALLENGE_RE, INTERSTITIAL_RE, MIN_DIRECT_DOCUMENT_BODY_TEXT = 500, SPA_HYDRATION_RE, SPA_HYDRATION_BODY_TEXT_FLOOR = 2000, SPA_ROOT_CONTAINER_RE, PARKED_RE, INTENT_STOPWORDS, MARKDOWN_BUDGET, MAX_TABLES = 10, MAX_TABLE_ROWS = 50, buildBloombergDirectDocumentResult;
|
|
71538
|
+
var HTML_RE, MIN_DIRECT_DOCUMENT_HTML_BYTES = 5000, CHALLENGE_RE, INTERSTITIAL_RE, MIN_DIRECT_DOCUMENT_BODY_TEXT = 500, SPA_HYDRATION_RE, SPA_HYDRATION_BODY_TEXT_FLOOR = 2000, SPA_ROOT_CONTAINER_RE, PARKED_RE, INTENT_STOPWORDS, MARKDOWN_BUDGET, MAX_TABLES = 10, MAX_TABLE_ROWS = 50, QUERY_STOPWORDS, buildBloombergDirectDocumentResult;
|
|
71258
71539
|
var init_direct_document = __esm(() => {
|
|
71259
71540
|
init_curl_impersonate_fallback();
|
|
71260
71541
|
init_proxy_fetch();
|
|
71542
|
+
init_cardinality();
|
|
71543
|
+
init_search_forms();
|
|
71261
71544
|
HTML_RE = /text\/html|application\/xhtml\+xml/i;
|
|
71262
71545
|
CHALLENGE_RE = /\b(access denied|are you a robot|captcha|just a moment|pardon our interruption|robot check|unusual traffic|verify you are human)\b/i;
|
|
71263
71546
|
INTERSTITIAL_RE = /\b(please wait for verification|just a moment|cf-mitigated|datadome|akamai bot|perimeterx|sign in to continue|log in to (?:continue|access)|javascript is not available)\b/i;
|
|
@@ -71340,6 +71623,7 @@ var init_direct_document = __esm(() => {
|
|
|
71340
71623
|
"look"
|
|
71341
71624
|
]);
|
|
71342
71625
|
MARKDOWN_BUDGET = Math.max(1000, Number(process.env.UNBROWSE_MARKDOWN_BUDGET ?? "12000") || 12000);
|
|
71626
|
+
QUERY_STOPWORDS = new Set(("resolve unbrowse execute run walk go fetch open view want need please " + "find search browse list lookup discover show get me a an the on of for in to " + "with and or all my your this that some good best top new latest cheap near").split(" "));
|
|
71343
71627
|
buildBloombergDirectDocumentResult = buildDirectDocumentResult;
|
|
71344
71628
|
});
|
|
71345
71629
|
|
|
@@ -120052,181 +120336,6 @@ function clampToFloor(score, demotion, floor) {
|
|
|
120052
120336
|
}
|
|
120053
120337
|
var HARD_NEGATIVE_FLOOR = -2000, WEAK_NEGATIVE_FLOOR = -400, PAGE_ARTIFACT_DEMOTION = 800, EMPTY_ENTITY_BAG_DEMOTION = 650, EMPTY_ENTITY_BAG_FLOOR = -700;
|
|
120054
120338
|
|
|
120055
|
-
// .tmp-runtime-src/execution/search-forms.ts
|
|
120056
|
-
var exports_search_forms = {};
|
|
120057
|
-
__export(exports_search_forms, {
|
|
120058
|
-
isStructuredSearchForm: () => isStructuredSearchForm,
|
|
120059
|
-
detectSearchForms: () => detectSearchForms
|
|
120060
|
-
});
|
|
120061
|
-
function isStructuredSearchForm(spec) {
|
|
120062
|
-
return spec.fields.length > 0 && !!spec.submit_selector;
|
|
120063
|
-
}
|
|
120064
|
-
function formSelectorFromElement(attribs, index2) {
|
|
120065
|
-
const id = attribs.id;
|
|
120066
|
-
if (id)
|
|
120067
|
-
return `form#${id}`;
|
|
120068
|
-
const name = attribs.name;
|
|
120069
|
-
if (name)
|
|
120070
|
-
return `form[name="${name}"]`;
|
|
120071
|
-
const action2 = attribs.action;
|
|
120072
|
-
if (action2)
|
|
120073
|
-
return `form[action="${action2}"]`;
|
|
120074
|
-
return `form:nth-of-type(${index2 + 1})`;
|
|
120075
|
-
}
|
|
120076
|
-
function inputSelectorFromElement(attribs, tagName) {
|
|
120077
|
-
const id = attribs.id;
|
|
120078
|
-
if (id)
|
|
120079
|
-
return `#${id}`;
|
|
120080
|
-
const name = attribs.name;
|
|
120081
|
-
if (name)
|
|
120082
|
-
return `${tagName}[name="${name}"]`;
|
|
120083
|
-
return tagName;
|
|
120084
|
-
}
|
|
120085
|
-
function mapInputType(typeAttr, tagName) {
|
|
120086
|
-
if (tagName === "select")
|
|
120087
|
-
return "select";
|
|
120088
|
-
if (tagName === "textarea")
|
|
120089
|
-
return "text";
|
|
120090
|
-
const t = (typeAttr ?? "text").toLowerCase();
|
|
120091
|
-
if (t === "radio")
|
|
120092
|
-
return "radio";
|
|
120093
|
-
if (t === "checkbox")
|
|
120094
|
-
return "checkbox";
|
|
120095
|
-
if (t === "date")
|
|
120096
|
-
return "date";
|
|
120097
|
-
if (t === "hidden")
|
|
120098
|
-
return "hidden";
|
|
120099
|
-
if (t === "submit" || t === "button" || t === "image" || t === "reset")
|
|
120100
|
-
return null;
|
|
120101
|
-
if (t === "password" || t === "file")
|
|
120102
|
-
return null;
|
|
120103
|
-
if (SUPPORTED_INPUT_TYPES.has(t))
|
|
120104
|
-
return "text";
|
|
120105
|
-
return "text";
|
|
120106
|
-
}
|
|
120107
|
-
function parseAttrs(attrStr) {
|
|
120108
|
-
const attrs = {};
|
|
120109
|
-
const attrRegex = /(\w[\w-]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/g;
|
|
120110
|
-
let m;
|
|
120111
|
-
while ((m = attrRegex.exec(attrStr)) !== null) {
|
|
120112
|
-
attrs[m[1]] = m[2] ?? m[3] ?? m[4] ?? "";
|
|
120113
|
-
}
|
|
120114
|
-
return attrs;
|
|
120115
|
-
}
|
|
120116
|
-
function detectSearchForms(html3) {
|
|
120117
|
-
const results = [];
|
|
120118
|
-
const formRegex = /<form([^>]*)>([\s\S]*?)<\/form>/gi;
|
|
120119
|
-
let formMatch;
|
|
120120
|
-
let formIndex = 0;
|
|
120121
|
-
while ((formMatch = formRegex.exec(html3)) !== null) {
|
|
120122
|
-
const formAttrs = formMatch[1];
|
|
120123
|
-
const formBody = formMatch[2];
|
|
120124
|
-
const formElAttrs = parseAttrs(formAttrs);
|
|
120125
|
-
const fieldRegex = /<(input|select|textarea)([^>]*)\/?>/gi;
|
|
120126
|
-
let fieldMatch;
|
|
120127
|
-
const fields = [];
|
|
120128
|
-
const seenNames = new Set;
|
|
120129
|
-
let hasLoginField = false;
|
|
120130
|
-
let hasSearchLikeField = false;
|
|
120131
|
-
while ((fieldMatch = fieldRegex.exec(formBody)) !== null) {
|
|
120132
|
-
const tagName = fieldMatch[1].toLowerCase();
|
|
120133
|
-
const fieldAttrs = parseAttrs(fieldMatch[2]);
|
|
120134
|
-
const name = fieldAttrs.name ?? "";
|
|
120135
|
-
const typeAttr = fieldAttrs.type;
|
|
120136
|
-
if (LOGIN_FIELD_NAMES.has(name.toLowerCase()) || typeAttr === "password") {
|
|
120137
|
-
hasLoginField = true;
|
|
120138
|
-
}
|
|
120139
|
-
if (SEARCH_FIELD_NAMES.has(name.toLowerCase())) {
|
|
120140
|
-
hasSearchLikeField = true;
|
|
120141
|
-
}
|
|
120142
|
-
const mappedType = mapInputType(typeAttr, tagName);
|
|
120143
|
-
if (!mappedType)
|
|
120144
|
-
continue;
|
|
120145
|
-
if (!name && mappedType !== "text")
|
|
120146
|
-
continue;
|
|
120147
|
-
if (seenNames.has(name) && mappedType !== "radio")
|
|
120148
|
-
continue;
|
|
120149
|
-
if (name)
|
|
120150
|
-
seenNames.add(name);
|
|
120151
|
-
let options;
|
|
120152
|
-
if (tagName === "select") {
|
|
120153
|
-
const optRegex = /<option[^>]*value="([^"]*)"[^>]*>/gi;
|
|
120154
|
-
let optMatch;
|
|
120155
|
-
options = [];
|
|
120156
|
-
while ((optMatch = optRegex.exec(formBody)) !== null) {
|
|
120157
|
-
options.push(optMatch[1]);
|
|
120158
|
-
}
|
|
120159
|
-
if (options.length === 0)
|
|
120160
|
-
options = undefined;
|
|
120161
|
-
}
|
|
120162
|
-
fields.push({
|
|
120163
|
-
name: name || `unnamed_${fields.length}`,
|
|
120164
|
-
type: mappedType,
|
|
120165
|
-
selector: inputSelectorFromElement(fieldAttrs, tagName),
|
|
120166
|
-
...options ? { options } : {},
|
|
120167
|
-
required: fieldAttrs.required !== undefined
|
|
120168
|
-
});
|
|
120169
|
-
}
|
|
120170
|
-
let submitSelector = "";
|
|
120171
|
-
if (/<button[^>]*type\s*=\s*"submit"/i.test(formBody)) {
|
|
120172
|
-
submitSelector = "button[type=submit]";
|
|
120173
|
-
} else if (/<input[^>]*type\s*=\s*"submit"/i.test(formBody)) {
|
|
120174
|
-
submitSelector = 'input[type="submit"]';
|
|
120175
|
-
} else if (/<button/i.test(formBody)) {
|
|
120176
|
-
submitSelector = "button";
|
|
120177
|
-
}
|
|
120178
|
-
const nonHiddenFields = fields.filter((f) => f.type !== "hidden");
|
|
120179
|
-
if (!hasLoginField && nonHiddenFields.length > 0 && submitSelector && (hasSearchLikeField || nonHiddenFields.length >= 1)) {
|
|
120180
|
-
const formSelector = formSelectorFromElement(formElAttrs, formIndex);
|
|
120181
|
-
results.push({
|
|
120182
|
-
form_selector: formSelector,
|
|
120183
|
-
submit_selector: submitSelector,
|
|
120184
|
-
fields
|
|
120185
|
-
});
|
|
120186
|
-
}
|
|
120187
|
-
formIndex++;
|
|
120188
|
-
}
|
|
120189
|
-
return results;
|
|
120190
|
-
}
|
|
120191
|
-
var SEARCH_FIELD_NAMES, LOGIN_FIELD_NAMES, SUPPORTED_INPUT_TYPES;
|
|
120192
|
-
var init_search_forms = __esm(() => {
|
|
120193
|
-
SEARCH_FIELD_NAMES = new Set([
|
|
120194
|
-
"q",
|
|
120195
|
-
"query",
|
|
120196
|
-
"search",
|
|
120197
|
-
"keyword",
|
|
120198
|
-
"keywords",
|
|
120199
|
-
"term",
|
|
120200
|
-
"terms",
|
|
120201
|
-
"find",
|
|
120202
|
-
"lookup",
|
|
120203
|
-
"filter",
|
|
120204
|
-
"s",
|
|
120205
|
-
"text",
|
|
120206
|
-
"input"
|
|
120207
|
-
]);
|
|
120208
|
-
LOGIN_FIELD_NAMES = new Set([
|
|
120209
|
-
"password",
|
|
120210
|
-
"passwd",
|
|
120211
|
-
"pass",
|
|
120212
|
-
"pwd",
|
|
120213
|
-
"confirm_password",
|
|
120214
|
-
"username",
|
|
120215
|
-
"email",
|
|
120216
|
-
"login",
|
|
120217
|
-
"user"
|
|
120218
|
-
]);
|
|
120219
|
-
SUPPORTED_INPUT_TYPES = new Set([
|
|
120220
|
-
"text",
|
|
120221
|
-
"search",
|
|
120222
|
-
"hidden",
|
|
120223
|
-
"date",
|
|
120224
|
-
"number",
|
|
120225
|
-
"tel",
|
|
120226
|
-
"email"
|
|
120227
|
-
]);
|
|
120228
|
-
});
|
|
120229
|
-
|
|
120230
120339
|
// .tmp-runtime-src/state/stateless.ts
|
|
120231
120340
|
function isStateless() {
|
|
120232
120341
|
const v = process.env.UNBROWSE_STATELESS;
|
|
@@ -121655,7 +121764,7 @@ function isProtobufContentType(contentType) {
|
|
|
121655
121764
|
function isProtobufLikeEndpoint(url, contentType) {
|
|
121656
121765
|
if (isProtobufContentType(contentType))
|
|
121657
121766
|
return true;
|
|
121658
|
-
return
|
|
121767
|
+
return /[-/](proto|protobuf)(\/|$|-)/i.test(url);
|
|
121659
121768
|
}
|
|
121660
121769
|
function decodeProtobufBytes(bytes) {
|
|
121661
121770
|
return decodeBytes(bytes, "bytes");
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"repo_url": "https://github.com/justrach/kuri.git",
|
|
3
3
|
"branch": "adding-extensions",
|
|
4
4
|
"source_sha": "149881254046a20778f642b69f20f0c6468f6fb4",
|
|
5
|
-
"built_at": "2026-06-
|
|
5
|
+
"built_at": "2026-06-18T05:53:02.585Z",
|
|
6
6
|
"binaries": {
|
|
7
7
|
"darwin-arm64": {
|
|
8
8
|
"zig_target": "aarch64-macos",
|
|
@@ -21,11 +21,11 @@
|
|
|
21
21
|
},
|
|
22
22
|
"linux-x64": {
|
|
23
23
|
"zig_target": "x86_64-linux",
|
|
24
|
-
"sha256": "
|
|
24
|
+
"sha256": "f39955d73d86150fba2a4bec6393e7745feb42f5152870b6c27fd68a5cff3a6e"
|
|
25
25
|
},
|
|
26
26
|
"win-x64": {
|
|
27
27
|
"zig_target": "x86_64-windows-gnu",
|
|
28
|
-
"sha256": "
|
|
28
|
+
"sha256": "376a34f508ea6a4e140150f9f6ddc00519f8bb0894ee2dd7a60bc0e7613d89b0",
|
|
29
29
|
"source": "pre-staged"
|
|
30
30
|
}
|
|
31
31
|
},
|
|
@@ -33,22 +33,22 @@
|
|
|
33
33
|
"darwin-arm64": {
|
|
34
34
|
"zig_target": "aarch64-macos",
|
|
35
35
|
"lib": "libkuri_ffi.dylib",
|
|
36
|
-
"sha256": "
|
|
36
|
+
"sha256": "2ca1be4d477f28c4a4ab1dd993cfe6766b9f6858c42befda3f503a3e2940bf6f"
|
|
37
37
|
},
|
|
38
38
|
"darwin-x64": {
|
|
39
39
|
"zig_target": "x86_64-macos",
|
|
40
40
|
"lib": "libkuri_ffi.dylib",
|
|
41
|
-
"sha256": "
|
|
41
|
+
"sha256": "80e27865e521b4bc6a79dfcb4fd481535d70e03226125ce10d9625ba02320f47"
|
|
42
42
|
},
|
|
43
43
|
"linux-arm64": {
|
|
44
44
|
"zig_target": "aarch64-linux",
|
|
45
45
|
"lib": "libkuri_ffi.so",
|
|
46
|
-
"sha256": "
|
|
46
|
+
"sha256": "6fa04fc6b505212e5ae9cfdc0d21eb2f9b2eb97dc41850e02ba3f4112e252e9d"
|
|
47
47
|
},
|
|
48
48
|
"linux-x64": {
|
|
49
49
|
"zig_target": "x86_64-linux",
|
|
50
50
|
"lib": "libkuri_ffi.so",
|
|
51
|
-
"sha256": "
|
|
51
|
+
"sha256": "fa520df6cca6eab9260bd43ed7a60d665c746645f12f6fb4e3ecddbb026bdb05"
|
|
52
52
|
}
|
|
53
53
|
}
|
|
54
54
|
}
|
|
Binary file
|