unbrowse 9.6.1 → 9.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/runtime/cli.js +286 -116
- package/runtime/mcp.js +222 -152
- package/vendor/kuri/darwin-arm64/libkuri_ffi.dylib +0 -0
- package/vendor/kuri/darwin-x64/libkuri_ffi.dylib +0 -0
- package/vendor/kuri/linux-arm64/libkuri_ffi.so +0 -0
- package/vendor/kuri/linux-x64/kuri +0 -0
- package/vendor/kuri/linux-x64/libkuri_ffi.so +0 -0
- package/vendor/kuri/manifest.json +7 -7
- package/vendor/kuri/win-x64/kuri.exe +0 -0
package/package.json
CHANGED
package/runtime/cli.js
CHANGED
|
@@ -1752,10 +1752,74 @@ function valueLooksLikeSingleItem(value) {
|
|
|
1752
1752
|
const hasPriceish = "offers" in obj || "price" in obj || "sku" in obj;
|
|
1753
1753
|
return isItemType || hasName && hasPriceish;
|
|
1754
1754
|
}
|
|
1755
|
-
function
|
|
1756
|
-
if (!
|
|
1755
|
+
function schemaLooksLikeSingleItem(rs) {
|
|
1756
|
+
if (!rs || typeof rs !== "object")
|
|
1757
|
+
return false;
|
|
1758
|
+
const schema = rs;
|
|
1759
|
+
if (schema.type === "array")
|
|
1760
|
+
return false;
|
|
1761
|
+
const props = schema.properties ?? {};
|
|
1762
|
+
for (const key of COLLECTION_KEYS) {
|
|
1763
|
+
if (key in props)
|
|
1764
|
+
return false;
|
|
1765
|
+
}
|
|
1766
|
+
for (const value of Object.values(props)) {
|
|
1767
|
+
if (value && typeof value === "object" && value.type === "array" && value.items?.type === "object") {
|
|
1768
|
+
return false;
|
|
1769
|
+
}
|
|
1770
|
+
}
|
|
1771
|
+
if (schema.type !== "object")
|
|
1772
|
+
return false;
|
|
1773
|
+
const hasType = "@type" in props;
|
|
1774
|
+
const hasName = "name" in props || "title" in props;
|
|
1775
|
+
const hasPriceish = "offers" in props || "price" in props || "sku" in props;
|
|
1776
|
+
return hasType || hasName && hasPriceish;
|
|
1777
|
+
}
|
|
1778
|
+
function routeLooksLikeSingleItem(route) {
|
|
1779
|
+
const tmpl = route.url_template ?? "";
|
|
1780
|
+
let pathAndQuery = tmpl;
|
|
1781
|
+
try {
|
|
1782
|
+
const u = new URL(tmpl);
|
|
1783
|
+
pathAndQuery = `${u.pathname}${u.search}`;
|
|
1784
|
+
} catch {}
|
|
1785
|
+
const lower = pathAndQuery.toLowerCase();
|
|
1786
|
+
if (/\/(?:search|q|categories?|browse|results?|listings|explore|discover|feed|catalog(?:ue)?|collections?|shop|all)\b/.test(lower) || /[?&](?:q|query|keyword|keywords|search|term|category|cat|page)=/.test(lower)) {
|
|
1787
|
+
return false;
|
|
1788
|
+
}
|
|
1789
|
+
if (/\/(?:p|product|products|item|items|listing|detail|details|dp|pd|sku)\/[^/]+/.test(lower))
|
|
1790
|
+
return true;
|
|
1791
|
+
const lastSeg = lower.split("?")[0].replace(/\/+$/, "").split("/").pop() ?? "";
|
|
1792
|
+
if (/-\d{3,}$/.test(lastSeg) || /^\d{3,}$/.test(lastSeg))
|
|
1757
1793
|
return true;
|
|
1758
|
-
|
|
1794
|
+
if (/\{[^}]+\}/.test(lower))
|
|
1795
|
+
return false;
|
|
1796
|
+
return schemaLooksLikeSingleItem(route.response_schema);
|
|
1797
|
+
}
|
|
1798
|
+
function urlPathLooksListLike(contextUrl) {
|
|
1799
|
+
if (!contextUrl)
|
|
1800
|
+
return false;
|
|
1801
|
+
try {
|
|
1802
|
+
const pathname = new URL(contextUrl).pathname.toLowerCase();
|
|
1803
|
+
return /\/(?:search|basic-search|result-page|results?|discover|browse|categories?|q|listings|feed|catalog(?:ue)?)\b/.test(pathname);
|
|
1804
|
+
} catch {
|
|
1805
|
+
return false;
|
|
1806
|
+
}
|
|
1807
|
+
}
|
|
1808
|
+
function cardinalityMatches(intent, subject, opts) {
|
|
1809
|
+
const wantsMany = isListLikeIntent(intent) || urlPathLooksListLike(opts?.contextUrl);
|
|
1810
|
+
if (!wantsMany)
|
|
1811
|
+
return true;
|
|
1812
|
+
switch (subject.kind) {
|
|
1813
|
+
case "value":
|
|
1814
|
+
return !valueLooksLikeSingleItem(subject.value);
|
|
1815
|
+
case "schema":
|
|
1816
|
+
return !schemaLooksLikeSingleItem(subject.schema);
|
|
1817
|
+
case "route":
|
|
1818
|
+
return !routeLooksLikeSingleItem(subject.route);
|
|
1819
|
+
}
|
|
1820
|
+
}
|
|
1821
|
+
function resolutionCardinalityMatches(intent, data) {
|
|
1822
|
+
return cardinalityMatches(intent, { kind: "value", value: data });
|
|
1759
1823
|
}
|
|
1760
1824
|
var LIST_INTENT_RE, ITEM_SCHEMA_TYPES, COLLECTION_KEYS;
|
|
1761
1825
|
var init_cardinality = __esm(() => {
|
|
@@ -2286,7 +2350,7 @@ var init_telemetry = __esm(() => {
|
|
|
2286
2350
|
});
|
|
2287
2351
|
|
|
2288
2352
|
// .tmp-runtime-src/build-info.generated.ts
|
|
2289
|
-
var BUILD_RELEASE_VERSION = "9.
|
|
2353
|
+
var BUILD_RELEASE_VERSION = "9.7.0", BUILD_GIT_SHA = "98fa4d4472e2", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiOS43LjAiLCJnaXRfc2hhIjoiOThmYTRkNDQ3MmUyIiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUA5OGZhNGQ0NDcyZTIiLCJpc3N1ZWRfYXQiOiIyMDI2LTA2LTE4VDA0OjU4OjQwLjQ3NFoifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "-b2L4xhYhJ-mLJeo39RL19HSAKgjhZ2_D3ezoXjvQLQ", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai", BUILD_DEFAULT_PROFILE = "";
|
|
2290
2354
|
|
|
2291
2355
|
// .tmp-runtime-src/version.ts
|
|
2292
2356
|
import { createHash as createHash7 } from "crypto";
|
|
@@ -45983,6 +46047,130 @@ var init_header_classify = __esm(() => {
|
|
|
45983
46047
|
SENSITIVE_HEADER_PATTERN = /token|key|secret|credential|password|session/i;
|
|
45984
46048
|
});
|
|
45985
46049
|
|
|
46050
|
+
// .tmp-runtime-src/values/cardinality.ts
|
|
46051
|
+
function isListLikeIntent2(intent) {
|
|
46052
|
+
return LIST_INTENT_RE2.test(intent ?? "");
|
|
46053
|
+
}
|
|
46054
|
+
function valueLooksLikeSingleItem2(value) {
|
|
46055
|
+
if (value == null || Array.isArray(value) || typeof value !== "object")
|
|
46056
|
+
return false;
|
|
46057
|
+
const obj = value;
|
|
46058
|
+
for (const key of COLLECTION_KEYS2) {
|
|
46059
|
+
if (Array.isArray(obj[key]))
|
|
46060
|
+
return false;
|
|
46061
|
+
}
|
|
46062
|
+
for (const v of Object.values(obj)) {
|
|
46063
|
+
if (Array.isArray(v) && v.some((x) => x !== null && typeof x === "object"))
|
|
46064
|
+
return false;
|
|
46065
|
+
}
|
|
46066
|
+
const atType = typeof obj["@type"] === "string" ? obj["@type"].toLowerCase() : "";
|
|
46067
|
+
const isItemType = ITEM_SCHEMA_TYPES2.has(atType);
|
|
46068
|
+
const hasName = "name" in obj || "title" in obj || "headline" in obj;
|
|
46069
|
+
const hasPriceish = "offers" in obj || "price" in obj || "sku" in obj;
|
|
46070
|
+
return isItemType || hasName && hasPriceish;
|
|
46071
|
+
}
|
|
46072
|
+
function schemaLooksLikeSingleItem2(rs) {
|
|
46073
|
+
if (!rs || typeof rs !== "object")
|
|
46074
|
+
return false;
|
|
46075
|
+
const schema = rs;
|
|
46076
|
+
if (schema.type === "array")
|
|
46077
|
+
return false;
|
|
46078
|
+
const props = schema.properties ?? {};
|
|
46079
|
+
for (const key of COLLECTION_KEYS2) {
|
|
46080
|
+
if (key in props)
|
|
46081
|
+
return false;
|
|
46082
|
+
}
|
|
46083
|
+
for (const value of Object.values(props)) {
|
|
46084
|
+
if (value && typeof value === "object" && value.type === "array" && value.items?.type === "object") {
|
|
46085
|
+
return false;
|
|
46086
|
+
}
|
|
46087
|
+
}
|
|
46088
|
+
if (schema.type !== "object")
|
|
46089
|
+
return false;
|
|
46090
|
+
const hasType = "@type" in props;
|
|
46091
|
+
const hasName = "name" in props || "title" in props;
|
|
46092
|
+
const hasPriceish = "offers" in props || "price" in props || "sku" in props;
|
|
46093
|
+
return hasType || hasName && hasPriceish;
|
|
46094
|
+
}
|
|
46095
|
+
function routeLooksLikeSingleItem2(route) {
|
|
46096
|
+
const tmpl = route.url_template ?? "";
|
|
46097
|
+
let pathAndQuery = tmpl;
|
|
46098
|
+
try {
|
|
46099
|
+
const u = new URL(tmpl);
|
|
46100
|
+
pathAndQuery = `${u.pathname}${u.search}`;
|
|
46101
|
+
} catch {}
|
|
46102
|
+
const lower = pathAndQuery.toLowerCase();
|
|
46103
|
+
if (/\/(?:search|q|categories?|browse|results?|listings|explore|discover|feed|catalog(?:ue)?|collections?|shop|all)\b/.test(lower) || /[?&](?:q|query|keyword|keywords|search|term|category|cat|page)=/.test(lower)) {
|
|
46104
|
+
return false;
|
|
46105
|
+
}
|
|
46106
|
+
if (/\/(?:p|product|products|item|items|listing|detail|details|dp|pd|sku)\/[^/]+/.test(lower))
|
|
46107
|
+
return true;
|
|
46108
|
+
const lastSeg = lower.split("?")[0].replace(/\/+$/, "").split("/").pop() ?? "";
|
|
46109
|
+
if (/-\d{3,}$/.test(lastSeg) || /^\d{3,}$/.test(lastSeg))
|
|
46110
|
+
return true;
|
|
46111
|
+
if (/\{[^}]+\}/.test(lower))
|
|
46112
|
+
return false;
|
|
46113
|
+
return schemaLooksLikeSingleItem2(route.response_schema);
|
|
46114
|
+
}
|
|
46115
|
+
function urlPathLooksListLike2(contextUrl) {
|
|
46116
|
+
if (!contextUrl)
|
|
46117
|
+
return false;
|
|
46118
|
+
try {
|
|
46119
|
+
const pathname = new URL(contextUrl).pathname.toLowerCase();
|
|
46120
|
+
return /\/(?:search|basic-search|result-page|results?|discover|browse|categories?|q|listings|feed|catalog(?:ue)?)\b/.test(pathname);
|
|
46121
|
+
} catch {
|
|
46122
|
+
return false;
|
|
46123
|
+
}
|
|
46124
|
+
}
|
|
46125
|
+
function cardinalityMatches2(intent, subject, opts) {
|
|
46126
|
+
const wantsMany = isListLikeIntent2(intent) || urlPathLooksListLike2(opts?.contextUrl);
|
|
46127
|
+
if (!wantsMany)
|
|
46128
|
+
return true;
|
|
46129
|
+
switch (subject.kind) {
|
|
46130
|
+
case "value":
|
|
46131
|
+
return !valueLooksLikeSingleItem2(subject.value);
|
|
46132
|
+
case "schema":
|
|
46133
|
+
return !schemaLooksLikeSingleItem2(subject.schema);
|
|
46134
|
+
case "route":
|
|
46135
|
+
return !routeLooksLikeSingleItem2(subject.route);
|
|
46136
|
+
}
|
|
46137
|
+
}
|
|
46138
|
+
var LIST_INTENT_RE2, ITEM_SCHEMA_TYPES2, COLLECTION_KEYS2;
|
|
46139
|
+
var init_cardinality2 = __esm(() => {
|
|
46140
|
+
LIST_INTENT_RE2 = /\b(search|find|lookup|browse|discover|list(?:ings?)?|feed|catalog(?:ue)?)\b/i;
|
|
46141
|
+
ITEM_SCHEMA_TYPES2 = new Set([
|
|
46142
|
+
"product",
|
|
46143
|
+
"offer",
|
|
46144
|
+
"article",
|
|
46145
|
+
"newsarticle",
|
|
46146
|
+
"blogposting",
|
|
46147
|
+
"recipe",
|
|
46148
|
+
"event",
|
|
46149
|
+
"place",
|
|
46150
|
+
"localbusiness",
|
|
46151
|
+
"jobposting",
|
|
46152
|
+
"book",
|
|
46153
|
+
"movie",
|
|
46154
|
+
"creativework",
|
|
46155
|
+
"person",
|
|
46156
|
+
"organization"
|
|
46157
|
+
]);
|
|
46158
|
+
COLLECTION_KEYS2 = [
|
|
46159
|
+
"itemListElement",
|
|
46160
|
+
"items",
|
|
46161
|
+
"results",
|
|
46162
|
+
"products",
|
|
46163
|
+
"listings",
|
|
46164
|
+
"data",
|
|
46165
|
+
"edges",
|
|
46166
|
+
"hits",
|
|
46167
|
+
"records",
|
|
46168
|
+
"entries",
|
|
46169
|
+
"rows",
|
|
46170
|
+
"nodes"
|
|
46171
|
+
];
|
|
46172
|
+
});
|
|
46173
|
+
|
|
45986
46174
|
// node_modules/.bun/nanoid@5.1.11/node_modules/nanoid/url-alphabet/index.js
|
|
45987
46175
|
var urlAlphabet = "useandom-26T198340PX75pxJACKVERYMINDBUSHWOLF_GQZbfghjklqvwyzrict";
|
|
45988
46176
|
|
|
@@ -46506,6 +46694,7 @@ __export(exports_capture, {
|
|
|
46506
46694
|
tagRequestProvenance: () => tagRequestProvenance,
|
|
46507
46695
|
shutdownAllBrowsers: () => shutdownAllBrowsers,
|
|
46508
46696
|
shouldStopHydrationWait: () => shouldStopHydrationWait,
|
|
46697
|
+
shouldScrollStimulate: () => shouldScrollStimulate,
|
|
46509
46698
|
selectPerformanceReplayCandidates: () => selectPerformanceReplayCandidates,
|
|
46510
46699
|
registerDocumentStartScript: () => registerDocumentStartScript,
|
|
46511
46700
|
navigatePageForCapture: () => navigatePageForCapture,
|
|
@@ -46768,6 +46957,9 @@ function extractRouteHint(url) {
|
|
|
46768
46957
|
} catch {}
|
|
46769
46958
|
return null;
|
|
46770
46959
|
}
|
|
46960
|
+
function shouldScrollStimulate(captureUrl, intent) {
|
|
46961
|
+
return isListLikeIntent2(intent) || urlPathLooksListLike2(captureUrl);
|
|
46962
|
+
}
|
|
46771
46963
|
function deriveIntentHints(captureUrl, intent) {
|
|
46772
46964
|
const derivedHints = new Set;
|
|
46773
46965
|
if (captureUrl) {
|
|
@@ -47395,8 +47587,7 @@ async function waitForContentReady(tabId, captureUrl, intent, responseBodies) {
|
|
|
47395
47587
|
log("capture", `intent-aware wait: already captured API matching one of [${[...derivedHints].join(", ")}], skipping`);
|
|
47396
47588
|
}
|
|
47397
47589
|
}
|
|
47398
|
-
|
|
47399
|
-
if (captureUrl && responseBodies && (/search|explore|trending|tabs|discover/i.test(captureUrl) || /\b(person|people|profile|profiles|user|users|member|members|company|companies|organization|organisations|business|post|posts|tweet|tweets|status|statuses)\b/.test(lowerIntent))) {
|
|
47590
|
+
if (captureUrl && responseBodies && shouldScrollStimulate(captureUrl, intent)) {
|
|
47400
47591
|
try {
|
|
47401
47592
|
const before = responseBodies.size;
|
|
47402
47593
|
await evaluate(tabId, "window.scrollTo(0, Math.max(window.innerHeight, Math.min(document.body.scrollHeight, window.innerHeight * 2)))");
|
|
@@ -48470,6 +48661,7 @@ var init_capture = __esm(async () => {
|
|
|
48470
48661
|
init_domain();
|
|
48471
48662
|
init_logger();
|
|
48472
48663
|
init_header_classify();
|
|
48664
|
+
init_cardinality2();
|
|
48473
48665
|
init_browser_access();
|
|
48474
48666
|
await init_vault();
|
|
48475
48667
|
waitQueue = [];
|
|
@@ -117291,7 +117483,7 @@ __export(exports_extraction, {
|
|
|
117291
117483
|
cleanDOM: () => cleanDOM,
|
|
117292
117484
|
buildStructuredDataHeader: () => buildStructuredDataHeader
|
|
117293
117485
|
});
|
|
117294
|
-
function extractHtmlMetadataFallback(html3) {
|
|
117486
|
+
function extractHtmlMetadataFallback(html3, intent) {
|
|
117295
117487
|
if (!html3 || html3.length < 100)
|
|
117296
117488
|
return null;
|
|
117297
117489
|
try {
|
|
@@ -117323,8 +117515,9 @@ function extractHtmlMetadataFallback(html3) {
|
|
|
117323
117515
|
jsonLdBlocks.push(parsed);
|
|
117324
117516
|
} catch {}
|
|
117325
117517
|
});
|
|
117326
|
-
|
|
117327
|
-
|
|
117518
|
+
const usableJsonLd = isListLikeIntent2(intent) ? jsonLdBlocks.filter((b) => !valueLooksLikeSingleItem2(b)) : jsonLdBlocks;
|
|
117519
|
+
if (usableJsonLd.length > 0)
|
|
117520
|
+
out.json_ld = usableJsonLd;
|
|
117328
117521
|
const headings = [];
|
|
117329
117522
|
$2("h1, h2").each((_, el) => {
|
|
117330
117523
|
const text3 = cleanText($2(el).text());
|
|
@@ -119647,6 +119840,16 @@ function scoreSiteMetaJsonLdDemotion(structure, intent) {
|
|
|
119647
119840
|
return 0;
|
|
119648
119841
|
return -200;
|
|
119649
119842
|
}
|
|
119843
|
+
function isSingleItemStructureForList(structure, intent) {
|
|
119844
|
+
if (!TINY_RESULT_LIST_INTENT.test(intent.toLowerCase()))
|
|
119845
|
+
return false;
|
|
119846
|
+
if (structure.type === "repeated-elements")
|
|
119847
|
+
return false;
|
|
119848
|
+
return valueLooksLikeSingleItem2(structure.data);
|
|
119849
|
+
}
|
|
119850
|
+
function scoreSingleItemListMismatch(structure, intent) {
|
|
119851
|
+
return isSingleItemStructureForList(structure, intent) ? -200 : 0;
|
|
119852
|
+
}
|
|
119650
119853
|
function looksLikeTinyContentReadResult(data2, intent) {
|
|
119651
119854
|
if (data2 == null)
|
|
119652
119855
|
return { tiny: false, bytes: 0, stringLeafChars: 0 };
|
|
@@ -119800,9 +120003,9 @@ function extractFromDOM(html3, intent, contextUrl) {
|
|
|
119800
120003
|
const articleStructures = extractArticleBodySpecial(html3.length > 600000 ? html3.slice(0, 600000) : html3, intent);
|
|
119801
120004
|
const allStructures = [...flashStructures, ...githubStructures, ...repeatedPersonStructures, ...packageSearchStructures, ...xProfileStructures, ...postStructures, ...repeatedArticleStructures, ...trendStructures, ...definitionStructures, ...packageDetailStructures, ...arxivAbstractStructures, ...courseStructures, ...articleStructures, ...spaStructures, ...parseStructured(cleaned)].map((structure) => normalizeStructureForIntent(structure, intent));
|
|
119802
120005
|
const isListIntent = TINY_RESULT_LIST_INTENT.test(intent.toLowerCase());
|
|
119803
|
-
const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray(s.data)) && !looksLikeConfigShape(s.data) && !looksLikeEmptyContainer(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd(s.data)));
|
|
120006
|
+
const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray(s.data)) && !looksLikeConfigShape(s.data) && !looksLikeEmptyContainer(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd(s.data)) && !isSingleItemStructureForList(s, intent));
|
|
119804
120007
|
if (structures.length === 0) {
|
|
119805
|
-
const fallback2 = extractHtmlMetadataFallback(html3);
|
|
120008
|
+
const fallback2 = extractHtmlMetadataFallback(html3, intent);
|
|
119806
120009
|
if (fallback2) {
|
|
119807
120010
|
return _finalize({ data: fallback2, extraction_method: "html_metadata_fallback", confidence: 0.4 });
|
|
119808
120011
|
}
|
|
@@ -119811,7 +120014,7 @@ function extractFromDOM(html3, intent, contextUrl) {
|
|
|
119811
120014
|
const intentWords = intent.toLowerCase().split(/\s+/).filter(Boolean);
|
|
119812
120015
|
const scored = structures.map((s) => ({
|
|
119813
120016
|
structure: s,
|
|
119814
|
-
score: scoreRelevance(s, intentWords) + scoreSemanticFit(s, intent) + scoreSparseLinkList(s) + scoreFieldRichness(s) + scoreConfigShapeDemotion(s) + scoreDegenerateRowDemotion(s) + scoreDuplicateRowDemotion(s) + scoreEmptyContainerDemotion(s) + scoreSiteMetaJsonLdDemotion(s, intent) + scoreTableIntentOverlapDemotion(s, intent, contextUrl, structures)
|
|
120017
|
+
score: scoreRelevance(s, intentWords) + scoreSemanticFit(s, intent) + scoreSparseLinkList(s) + scoreFieldRichness(s) + scoreConfigShapeDemotion(s) + scoreDegenerateRowDemotion(s) + scoreDuplicateRowDemotion(s) + scoreEmptyContainerDemotion(s) + scoreSiteMetaJsonLdDemotion(s, intent) + scoreSingleItemListMismatch(s, intent) + scoreTableIntentOverlapDemotion(s, intent, contextUrl, structures)
|
|
119815
120018
|
}));
|
|
119816
120019
|
scored.sort((a, b) => b.score - a.score);
|
|
119817
120020
|
const passing = scored.filter((candidate) => assessIntentResult(candidate.structure.data, intent).verdict === "pass");
|
|
@@ -120427,6 +120630,7 @@ function sanitizeExtractionToJson(data2, depth = 0) {
|
|
|
120427
120630
|
var STRIP_TAGS, CHROME_TAGS, AD_PATTERNS, HIDDEN_ATTRS, CONTENT_SELECTORS, CARD_SELECTORS, CONFIG_TOP_LEVEL_KEYS, CONFIG_CHUNK_VALUE_KEYS, INTENT_OVERLAP_STOPWORDS, SITE_META_LD_TYPES, TINY_RESULT_LIST_INTENT, TINY_RESULT_DETAIL_INTENT, STRUCTURED_DATA_HIGHLIGHT_TYPES, HTML_TAG_RE, TABLE_RE;
|
|
120428
120631
|
var init_extraction = __esm(() => {
|
|
120429
120632
|
init_esm11();
|
|
120633
|
+
init_cardinality2();
|
|
120430
120634
|
STRIP_TAGS = new Set(["script", "style", "noscript", "svg", "iframe"]);
|
|
120431
120635
|
CHROME_TAGS = new Set(["nav", "footer", "header"]);
|
|
120432
120636
|
AD_PATTERNS = /\b(ad|ads|advert|advertisement|tracking|tracker|cookie-banner|cookie-consent|cookie-notice|popup|modal-overlay|gdpr|consent|banner-promo)\b/i;
|
|
@@ -123358,7 +123562,7 @@ function isProtobufContentType(contentType) {
|
|
|
123358
123562
|
function isProtobufLikeEndpoint(url, contentType) {
|
|
123359
123563
|
if (isProtobufContentType(contentType))
|
|
123360
123564
|
return true;
|
|
123361
|
-
return
|
|
123565
|
+
return /[-/](proto|protobuf)(\/|$|-)/i.test(url);
|
|
123362
123566
|
}
|
|
123363
123567
|
function decodeProtobufBytes(bytes) {
|
|
123364
123568
|
return decodeBytes(bytes, "bytes");
|
|
@@ -129952,7 +130156,8 @@ function selectBestEndpoint(endpoints, intent, skillDomain, contextUrl) {
|
|
|
129952
130156
|
const ranked = rankEndpoints(endpoints, intent, skillDomain, contextUrl);
|
|
129953
130157
|
if (ranked.length === 0)
|
|
129954
130158
|
throw new Error("All endpoints are disabled");
|
|
129955
|
-
|
|
130159
|
+
const preferred = ranked.find((r) => cardinalityMatches2(intent, { kind: "route", route: r.endpoint }, { contextUrl }));
|
|
130160
|
+
return (preferred ?? ranked[0]).endpoint;
|
|
129956
130161
|
}
|
|
129957
130162
|
function isHtml2(text3) {
|
|
129958
130163
|
const trimmed = text3.trimStart().slice(0, 200).toLowerCase();
|
|
@@ -129999,6 +130204,7 @@ var init_execution = __esm(async () => {
|
|
|
129999
130204
|
init_reveng_server_first();
|
|
130000
130205
|
init_header_classify();
|
|
130001
130206
|
init_storage_hole_bindings();
|
|
130207
|
+
init_cardinality2();
|
|
130002
130208
|
init_sealed_blob_store();
|
|
130003
130209
|
init_signer();
|
|
130004
130210
|
init_bundle_scanner();
|
|
@@ -130551,69 +130757,6 @@ function bindingGraphFromOperationGraph(og) {
|
|
|
130551
130757
|
return { endpoints, edges };
|
|
130552
130758
|
}
|
|
130553
130759
|
|
|
130554
|
-
// .tmp-runtime-src/values/cardinality.ts
|
|
130555
|
-
function isListLikeIntent2(intent) {
|
|
130556
|
-
return LIST_INTENT_RE2.test(intent ?? "");
|
|
130557
|
-
}
|
|
130558
|
-
function schemaLooksLikeSingleItem(rs) {
|
|
130559
|
-
if (!rs || typeof rs !== "object")
|
|
130560
|
-
return false;
|
|
130561
|
-
const schema = rs;
|
|
130562
|
-
if (schema.type === "array")
|
|
130563
|
-
return false;
|
|
130564
|
-
const props = schema.properties ?? {};
|
|
130565
|
-
for (const key2 of COLLECTION_KEYS2) {
|
|
130566
|
-
if (key2 in props)
|
|
130567
|
-
return false;
|
|
130568
|
-
}
|
|
130569
|
-
for (const value of Object.values(props)) {
|
|
130570
|
-
if (value && typeof value === "object" && value.type === "array" && value.items?.type === "object") {
|
|
130571
|
-
return false;
|
|
130572
|
-
}
|
|
130573
|
-
}
|
|
130574
|
-
if (schema.type !== "object")
|
|
130575
|
-
return false;
|
|
130576
|
-
const hasType = "@type" in props;
|
|
130577
|
-
const hasName = "name" in props || "title" in props;
|
|
130578
|
-
const hasPriceish = "offers" in props || "price" in props || "sku" in props;
|
|
130579
|
-
return hasType || hasName && hasPriceish;
|
|
130580
|
-
}
|
|
130581
|
-
var LIST_INTENT_RE2, ITEM_SCHEMA_TYPES2, COLLECTION_KEYS2;
|
|
130582
|
-
var init_cardinality2 = __esm(() => {
|
|
130583
|
-
LIST_INTENT_RE2 = /\b(search|find|lookup|browse|discover|list(?:ings?)?|feed|catalog(?:ue)?)\b/i;
|
|
130584
|
-
ITEM_SCHEMA_TYPES2 = new Set([
|
|
130585
|
-
"product",
|
|
130586
|
-
"offer",
|
|
130587
|
-
"article",
|
|
130588
|
-
"newsarticle",
|
|
130589
|
-
"blogposting",
|
|
130590
|
-
"recipe",
|
|
130591
|
-
"event",
|
|
130592
|
-
"place",
|
|
130593
|
-
"localbusiness",
|
|
130594
|
-
"jobposting",
|
|
130595
|
-
"book",
|
|
130596
|
-
"movie",
|
|
130597
|
-
"creativework",
|
|
130598
|
-
"person",
|
|
130599
|
-
"organization"
|
|
130600
|
-
]);
|
|
130601
|
-
COLLECTION_KEYS2 = [
|
|
130602
|
-
"itemListElement",
|
|
130603
|
-
"items",
|
|
130604
|
-
"results",
|
|
130605
|
-
"products",
|
|
130606
|
-
"listings",
|
|
130607
|
-
"data",
|
|
130608
|
-
"edges",
|
|
130609
|
-
"hits",
|
|
130610
|
-
"records",
|
|
130611
|
-
"entries",
|
|
130612
|
-
"rows",
|
|
130613
|
-
"nodes"
|
|
130614
|
-
];
|
|
130615
|
-
});
|
|
130616
|
-
|
|
130617
130760
|
// .tmp-runtime-src/values/yield-safety.ts
|
|
130618
130761
|
function tokenizeKey(key2) {
|
|
130619
130762
|
return key2.replace(/([a-z0-9])([A-Z])/g, "$1 $2").split(/[_\-.\s]+/).map((s) => s.toLowerCase()).filter(Boolean);
|
|
@@ -132745,26 +132888,6 @@ function endpointTargetsMismatchedLocalReplayHost(endpoint, contextUrl) {
|
|
|
132745
132888
|
function endpointHasNegativeTag(endpoint, tag) {
|
|
132746
132889
|
return (endpoint.semantic?.negative_tags ?? []).some((candidate) => candidate.trim().toLowerCase() === tag.trim().toLowerCase());
|
|
132747
132890
|
}
|
|
132748
|
-
function looksLikeSingleItemRoute(endpoint) {
|
|
132749
|
-
const tmpl = endpoint.url_template ?? "";
|
|
132750
|
-
let pathAndQuery = tmpl;
|
|
132751
|
-
try {
|
|
132752
|
-
const u = new URL(tmpl);
|
|
132753
|
-
pathAndQuery = `${u.pathname}${u.search}`;
|
|
132754
|
-
} catch {}
|
|
132755
|
-
const lower = pathAndQuery.toLowerCase();
|
|
132756
|
-
if (/\/(?:search|q|categories?|browse|results?|listings|explore|discover|feed|catalog(?:ue)?|collections?|shop|all)\b/.test(lower) || /[?&](?:q|query|keyword|keywords|search|term|category|cat|page)=/.test(lower)) {
|
|
132757
|
-
return false;
|
|
132758
|
-
}
|
|
132759
|
-
if (/\/(?:p|product|products|item|items|listing|detail|details|dp|pd|sku)\/[^/]+/.test(lower))
|
|
132760
|
-
return true;
|
|
132761
|
-
const lastSeg = lower.split("?")[0].replace(/\/+$/, "").split("/").pop() ?? "";
|
|
132762
|
-
if (/-\d{3,}$/.test(lastSeg) || /^\d{3,}$/.test(lastSeg))
|
|
132763
|
-
return true;
|
|
132764
|
-
if (/\{[^}]+\}/.test(lower))
|
|
132765
|
-
return false;
|
|
132766
|
-
return schemaLooksLikeSingleItem(endpoint.response_schema);
|
|
132767
|
-
}
|
|
132768
132891
|
function isResolveUsableEndpointForIntent(endpoint, intent, contextUrl) {
|
|
132769
132892
|
if (endpointTargetsMismatchedLocalReplayHost(endpoint, contextUrl))
|
|
132770
132893
|
return false;
|
|
@@ -132774,7 +132897,7 @@ function isResolveUsableEndpointForIntent(endpoint, intent, contextUrl) {
|
|
|
132774
132897
|
if (isFeedTimelineIntent(intent, contextUrl) && endpointHasNegativeTag(endpoint, "helper")) {
|
|
132775
132898
|
return false;
|
|
132776
132899
|
}
|
|
132777
|
-
if (
|
|
132900
|
+
if (!cardinalityMatches2(intent, { kind: "route", route: endpoint }, { contextUrl })) {
|
|
132778
132901
|
return false;
|
|
132779
132902
|
}
|
|
132780
132903
|
return true;
|
|
@@ -133791,7 +133914,20 @@ function inferPreferredEntityTokens(intent) {
|
|
|
133791
133914
|
return [];
|
|
133792
133915
|
}
|
|
133793
133916
|
function isAcceptableIntentResult(result, intent) {
|
|
133794
|
-
|
|
133917
|
+
if (assessIntentResult(result, intent).verdict === "fail")
|
|
133918
|
+
return false;
|
|
133919
|
+
if (!cardinalityMatches2(intent, { kind: "value", value: unwrapResultPayload(result) }))
|
|
133920
|
+
return false;
|
|
133921
|
+
return true;
|
|
133922
|
+
}
|
|
133923
|
+
function unwrapResultPayload(result) {
|
|
133924
|
+
if (result == null || typeof result !== "object" || Array.isArray(result))
|
|
133925
|
+
return result;
|
|
133926
|
+
const rec = { ...result };
|
|
133927
|
+
for (const k of ["available_endpoints", "available_operations", "shortlist_for_judgment", "workflow_dag", "walked_from", "exa_candidates", "run_plan"]) {
|
|
133928
|
+
delete rec[k];
|
|
133929
|
+
}
|
|
133930
|
+
return rec;
|
|
133795
133931
|
}
|
|
133796
133932
|
function candidateMatchesPreferredEntity(candidate, preferredTokens) {
|
|
133797
133933
|
if (preferredTokens.length === 0)
|
|
@@ -144132,9 +144268,14 @@ async function registerRoutes(app) {
|
|
|
144132
144268
|
recovered = true;
|
|
144133
144269
|
} else if (errResult.available_endpoints?.length === 1) {
|
|
144134
144270
|
const only = errResult.available_endpoints[0].endpoint_id;
|
|
144135
|
-
|
|
144136
|
-
|
|
144137
|
-
|
|
144271
|
+
const onlyEp = (skill.endpoints ?? []).find((e) => e.endpoint_id === only);
|
|
144272
|
+
if (!onlyEp || cardinalityMatches2(intent, { kind: "route", route: onlyEp }, { contextUrl: context_url })) {
|
|
144273
|
+
console.log(`[exec] D7 single-endpoint skill: rewriting endpoint_id ${want} → ${only}`);
|
|
144274
|
+
execParams.endpoint_id = only;
|
|
144275
|
+
recovered = true;
|
|
144276
|
+
} else {
|
|
144277
|
+
console.log(`[exec] D7 single-endpoint skill: only endpoint ${only} is a single-item route for a list intent — not forcing (cardinality gate)`);
|
|
144278
|
+
}
|
|
144138
144279
|
}
|
|
144139
144280
|
}
|
|
144140
144281
|
if (!recovered && (skill.endpoints?.length ?? 0) >= 2 && intent) {
|
|
@@ -145362,6 +145503,7 @@ var init_routes = __esm(async () => {
|
|
|
145362
145503
|
init_client3();
|
|
145363
145504
|
init_reveng_server_first();
|
|
145364
145505
|
init_header_classify();
|
|
145506
|
+
init_cardinality2();
|
|
145365
145507
|
init_capture_spool();
|
|
145366
145508
|
init_nanoid();
|
|
145367
145509
|
init_marketplace();
|
|
@@ -150755,7 +150897,7 @@ __export(exports_extraction2, {
|
|
|
150755
150897
|
cleanDOM: () => cleanDOM2,
|
|
150756
150898
|
buildStructuredDataHeader: () => buildStructuredDataHeader2
|
|
150757
150899
|
});
|
|
150758
|
-
function extractHtmlMetadataFallback2(html3) {
|
|
150900
|
+
function extractHtmlMetadataFallback2(html3, intent) {
|
|
150759
150901
|
if (!html3 || html3.length < 100)
|
|
150760
150902
|
return null;
|
|
150761
150903
|
try {
|
|
@@ -150787,8 +150929,9 @@ function extractHtmlMetadataFallback2(html3) {
|
|
|
150787
150929
|
jsonLdBlocks.push(parsed);
|
|
150788
150930
|
} catch {}
|
|
150789
150931
|
});
|
|
150790
|
-
|
|
150791
|
-
|
|
150932
|
+
const usableJsonLd = isListLikeIntent2(intent) ? jsonLdBlocks.filter((b) => !valueLooksLikeSingleItem2(b)) : jsonLdBlocks;
|
|
150933
|
+
if (usableJsonLd.length > 0)
|
|
150934
|
+
out.json_ld = usableJsonLd;
|
|
150792
150935
|
const headings = [];
|
|
150793
150936
|
$2("h1, h2").each((_, el) => {
|
|
150794
150937
|
const text3 = cleanText2($2(el).text());
|
|
@@ -153111,6 +153254,16 @@ function scoreSiteMetaJsonLdDemotion2(structure, intent) {
|
|
|
153111
153254
|
return 0;
|
|
153112
153255
|
return -200;
|
|
153113
153256
|
}
|
|
153257
|
+
function isSingleItemStructureForList2(structure, intent) {
|
|
153258
|
+
if (!TINY_RESULT_LIST_INTENT2.test(intent.toLowerCase()))
|
|
153259
|
+
return false;
|
|
153260
|
+
if (structure.type === "repeated-elements")
|
|
153261
|
+
return false;
|
|
153262
|
+
return valueLooksLikeSingleItem2(structure.data);
|
|
153263
|
+
}
|
|
153264
|
+
function scoreSingleItemListMismatch2(structure, intent) {
|
|
153265
|
+
return isSingleItemStructureForList2(structure, intent) ? -200 : 0;
|
|
153266
|
+
}
|
|
153114
153267
|
function looksLikeTinyContentReadResult2(data2, intent) {
|
|
153115
153268
|
if (data2 == null)
|
|
153116
153269
|
return { tiny: false, bytes: 0, stringLeafChars: 0 };
|
|
@@ -153264,9 +153417,9 @@ function extractFromDOM2(html3, intent, contextUrl) {
|
|
|
153264
153417
|
const articleStructures = extractArticleBodySpecial2(html3.length > 600000 ? html3.slice(0, 600000) : html3, intent);
|
|
153265
153418
|
const allStructures = [...flashStructures, ...githubStructures, ...repeatedPersonStructures, ...packageSearchStructures, ...xProfileStructures, ...postStructures, ...repeatedArticleStructures, ...trendStructures, ...definitionStructures, ...packageDetailStructures, ...arxivAbstractStructures, ...courseStructures, ...articleStructures, ...spaStructures, ...parseStructured2(cleaned)].map((structure) => normalizeStructureForIntent2(structure, intent));
|
|
153266
153419
|
const isListIntent = TINY_RESULT_LIST_INTENT2.test(intent.toLowerCase());
|
|
153267
|
-
const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray2(s.data)) && !looksLikeConfigShape2(s.data) && !looksLikeEmptyContainer2(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd2(s.data)));
|
|
153420
|
+
const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray2(s.data)) && !looksLikeConfigShape2(s.data) && !looksLikeEmptyContainer2(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd2(s.data)) && !isSingleItemStructureForList2(s, intent));
|
|
153268
153421
|
if (structures.length === 0) {
|
|
153269
|
-
const fallback2 = extractHtmlMetadataFallback2(html3);
|
|
153422
|
+
const fallback2 = extractHtmlMetadataFallback2(html3, intent);
|
|
153270
153423
|
if (fallback2) {
|
|
153271
153424
|
return _finalize({ data: fallback2, extraction_method: "html_metadata_fallback", confidence: 0.4 });
|
|
153272
153425
|
}
|
|
@@ -153275,7 +153428,7 @@ function extractFromDOM2(html3, intent, contextUrl) {
|
|
|
153275
153428
|
const intentWords = intent.toLowerCase().split(/\s+/).filter(Boolean);
|
|
153276
153429
|
const scored = structures.map((s) => ({
|
|
153277
153430
|
structure: s,
|
|
153278
|
-
score: scoreRelevance2(s, intentWords) + scoreSemanticFit2(s, intent) + scoreSparseLinkList2(s) + scoreFieldRichness2(s) + scoreConfigShapeDemotion2(s) + scoreDegenerateRowDemotion2(s) + scoreDuplicateRowDemotion2(s) + scoreEmptyContainerDemotion2(s) + scoreSiteMetaJsonLdDemotion2(s, intent) + scoreTableIntentOverlapDemotion2(s, intent, contextUrl, structures)
|
|
153431
|
+
score: scoreRelevance2(s, intentWords) + scoreSemanticFit2(s, intent) + scoreSparseLinkList2(s) + scoreFieldRichness2(s) + scoreConfigShapeDemotion2(s) + scoreDegenerateRowDemotion2(s) + scoreDuplicateRowDemotion2(s) + scoreEmptyContainerDemotion2(s) + scoreSiteMetaJsonLdDemotion2(s, intent) + scoreSingleItemListMismatch2(s, intent) + scoreTableIntentOverlapDemotion2(s, intent, contextUrl, structures)
|
|
153279
153432
|
}));
|
|
153280
153433
|
scored.sort((a, b) => b.score - a.score);
|
|
153281
153434
|
const passing = scored.filter((candidate) => assessIntentResult(candidate.structure.data, intent).verdict === "pass");
|
|
@@ -153891,6 +154044,7 @@ function sanitizeExtractionToJson2(data2, depth = 0) {
|
|
|
153891
154044
|
var STRIP_TAGS2, CHROME_TAGS2, AD_PATTERNS2, HIDDEN_ATTRS2, CONTENT_SELECTORS2, CARD_SELECTORS2, CONFIG_TOP_LEVEL_KEYS2, CONFIG_CHUNK_VALUE_KEYS2, INTENT_OVERLAP_STOPWORDS2, SITE_META_LD_TYPES2, TINY_RESULT_LIST_INTENT2, TINY_RESULT_DETAIL_INTENT2, STRUCTURED_DATA_HIGHLIGHT_TYPES2, HTML_TAG_RE2, TABLE_RE2;
|
|
153892
154045
|
var init_extraction2 = __esm(() => {
|
|
153893
154046
|
init_esm11();
|
|
154047
|
+
init_cardinality2();
|
|
153894
154048
|
STRIP_TAGS2 = new Set(["script", "style", "noscript", "svg", "iframe"]);
|
|
153895
154049
|
CHROME_TAGS2 = new Set(["nav", "footer", "header"]);
|
|
153896
154050
|
AD_PATTERNS2 = /\b(ad|ads|advert|advertisement|tracking|tracker|cookie-banner|cookie-consent|cookie-notice|popup|modal-overlay|gdpr|consent|banner-promo)\b/i;
|
|
@@ -240035,14 +240189,30 @@ async function cmdRun(args, flags, verb = "run") {
|
|
|
240035
240189
|
} else if (explicitEndpointId || !bestEndpoint || endpointIsSafeToAutoExecute(bestEndpoint)) {
|
|
240036
240190
|
runPlan.push({ step: "execute", mode: "direct_api", status: "started", endpoint_id: endpointToExecute });
|
|
240037
240191
|
const resolvedSource = typeof result.source === "string" ? result.source : undefined;
|
|
240038
|
-
|
|
240039
|
-
|
|
240040
|
-
|
|
240041
|
-
|
|
240042
|
-
|
|
240043
|
-
|
|
240044
|
-
|
|
240045
|
-
|
|
240192
|
+
const deferralResult = result;
|
|
240193
|
+
const executed = await withPendingNotice(api4("POST", `/v1/skills/${skillId}/execute`, execBody(endpointToExecute)), "Executing best endpoint...");
|
|
240194
|
+
if (!explicitEndpointId && isResolveSuccessResult(executed) && !resolutionCardinalityMatches(intent, executed.result ?? executed.data)) {
|
|
240195
|
+
runPlan[runPlan.length - 1] = {
|
|
240196
|
+
...runPlan[runPlan.length - 1],
|
|
240197
|
+
status: "skipped",
|
|
240198
|
+
reason: "cardinality_mismatch_single_item"
|
|
240199
|
+
};
|
|
240200
|
+
deferralResult.next_action = {
|
|
240201
|
+
title: "List intent returned a single item",
|
|
240202
|
+
command: `unbrowse execute --skill ${skillId} --endpoint ${endpointToExecute}`,
|
|
240203
|
+
why: "Auto-execute yielded a single item for a list/search intent; the page's listings are likely JS-rendered behind an internal API. Returning the route shortlist instead of one item."
|
|
240204
|
+
};
|
|
240205
|
+
result = deferralResult;
|
|
240206
|
+
} else {
|
|
240207
|
+
result = executed;
|
|
240208
|
+
if (resolvedSource && typeof result.source !== "string")
|
|
240209
|
+
result.source = resolvedSource;
|
|
240210
|
+
runPlan[runPlan.length - 1] = {
|
|
240211
|
+
...runPlan[runPlan.length - 1],
|
|
240212
|
+
status: isResolveSuccessResult(result) ? "complete" : "error",
|
|
240213
|
+
error: resolveResultError(result) ?? null
|
|
240214
|
+
};
|
|
240215
|
+
}
|
|
240046
240216
|
} else {
|
|
240047
240217
|
runPlan.push({
|
|
240048
240218
|
step: "execute",
|
package/runtime/mcp.js
CHANGED
|
@@ -36310,7 +36310,7 @@ var init_cached_resolution = __esm(() => {
|
|
|
36310
36310
|
});
|
|
36311
36311
|
|
|
36312
36312
|
// .tmp-runtime-src/build-info.generated.ts
|
|
36313
|
-
var BUILD_RELEASE_VERSION = "9.
|
|
36313
|
+
var BUILD_RELEASE_VERSION = "9.7.0", BUILD_GIT_SHA = "98fa4d4472e2", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiOS43LjAiLCJnaXRfc2hhIjoiOThmYTRkNDQ3MmUyIiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUA5OGZhNGQ0NDcyZTIiLCJpc3N1ZWRfYXQiOiIyMDI2LTA2LTE4VDA0OjU4OjQwLjQ3NFoifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "-b2L4xhYhJ-mLJeo39RL19HSAKgjhZ2_D3ezoXjvQLQ", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai", BUILD_DEFAULT_PROFILE = "";
|
|
36314
36314
|
|
|
36315
36315
|
// .tmp-runtime-src/version.ts
|
|
36316
36316
|
import { createHash as createHash4 } from "crypto";
|
|
@@ -43177,6 +43177,133 @@ var init_header_classify = __esm(() => {
|
|
|
43177
43177
|
SENSITIVE_HEADER_PATTERN = /token|key|secret|credential|password|session/i;
|
|
43178
43178
|
});
|
|
43179
43179
|
|
|
43180
|
+
// .tmp-runtime-src/values/cardinality.ts
|
|
43181
|
+
function isListLikeIntent(intent) {
|
|
43182
|
+
return LIST_INTENT_RE.test(intent ?? "");
|
|
43183
|
+
}
|
|
43184
|
+
function valueLooksLikeSingleItem(value) {
|
|
43185
|
+
if (value == null || Array.isArray(value) || typeof value !== "object")
|
|
43186
|
+
return false;
|
|
43187
|
+
const obj = value;
|
|
43188
|
+
for (const key of COLLECTION_KEYS) {
|
|
43189
|
+
if (Array.isArray(obj[key]))
|
|
43190
|
+
return false;
|
|
43191
|
+
}
|
|
43192
|
+
for (const v of Object.values(obj)) {
|
|
43193
|
+
if (Array.isArray(v) && v.some((x) => x !== null && typeof x === "object"))
|
|
43194
|
+
return false;
|
|
43195
|
+
}
|
|
43196
|
+
const atType = typeof obj["@type"] === "string" ? obj["@type"].toLowerCase() : "";
|
|
43197
|
+
const isItemType = ITEM_SCHEMA_TYPES.has(atType);
|
|
43198
|
+
const hasName = "name" in obj || "title" in obj || "headline" in obj;
|
|
43199
|
+
const hasPriceish = "offers" in obj || "price" in obj || "sku" in obj;
|
|
43200
|
+
return isItemType || hasName && hasPriceish;
|
|
43201
|
+
}
|
|
43202
|
+
function schemaLooksLikeSingleItem(rs) {
|
|
43203
|
+
if (!rs || typeof rs !== "object")
|
|
43204
|
+
return false;
|
|
43205
|
+
const schema = rs;
|
|
43206
|
+
if (schema.type === "array")
|
|
43207
|
+
return false;
|
|
43208
|
+
const props = schema.properties ?? {};
|
|
43209
|
+
for (const key of COLLECTION_KEYS) {
|
|
43210
|
+
if (key in props)
|
|
43211
|
+
return false;
|
|
43212
|
+
}
|
|
43213
|
+
for (const value of Object.values(props)) {
|
|
43214
|
+
if (value && typeof value === "object" && value.type === "array" && value.items?.type === "object") {
|
|
43215
|
+
return false;
|
|
43216
|
+
}
|
|
43217
|
+
}
|
|
43218
|
+
if (schema.type !== "object")
|
|
43219
|
+
return false;
|
|
43220
|
+
const hasType = "@type" in props;
|
|
43221
|
+
const hasName = "name" in props || "title" in props;
|
|
43222
|
+
const hasPriceish = "offers" in props || "price" in props || "sku" in props;
|
|
43223
|
+
return hasType || hasName && hasPriceish;
|
|
43224
|
+
}
|
|
43225
|
+
function routeLooksLikeSingleItem(route) {
|
|
43226
|
+
const tmpl = route.url_template ?? "";
|
|
43227
|
+
let pathAndQuery = tmpl;
|
|
43228
|
+
try {
|
|
43229
|
+
const u = new URL(tmpl);
|
|
43230
|
+
pathAndQuery = `${u.pathname}${u.search}`;
|
|
43231
|
+
} catch {}
|
|
43232
|
+
const lower = pathAndQuery.toLowerCase();
|
|
43233
|
+
if (/\/(?:search|q|categories?|browse|results?|listings|explore|discover|feed|catalog(?:ue)?|collections?|shop|all)\b/.test(lower) || /[?&](?:q|query|keyword|keywords|search|term|category|cat|page)=/.test(lower)) {
|
|
43234
|
+
return false;
|
|
43235
|
+
}
|
|
43236
|
+
if (/\/(?:p|product|products|item|items|listing|detail|details|dp|pd|sku)\/[^/]+/.test(lower))
|
|
43237
|
+
return true;
|
|
43238
|
+
const lastSeg = lower.split("?")[0].replace(/\/+$/, "").split("/").pop() ?? "";
|
|
43239
|
+
if (/-\d{3,}$/.test(lastSeg) || /^\d{3,}$/.test(lastSeg))
|
|
43240
|
+
return true;
|
|
43241
|
+
if (/\{[^}]+\}/.test(lower))
|
|
43242
|
+
return false;
|
|
43243
|
+
return schemaLooksLikeSingleItem(route.response_schema);
|
|
43244
|
+
}
|
|
43245
|
+
function urlPathLooksListLike(contextUrl) {
|
|
43246
|
+
if (!contextUrl)
|
|
43247
|
+
return false;
|
|
43248
|
+
try {
|
|
43249
|
+
const pathname = new URL(contextUrl).pathname.toLowerCase();
|
|
43250
|
+
return /\/(?:search|basic-search|result-page|results?|discover|browse|categories?|q|listings|feed|catalog(?:ue)?)\b/.test(pathname);
|
|
43251
|
+
} catch {
|
|
43252
|
+
return false;
|
|
43253
|
+
}
|
|
43254
|
+
}
|
|
43255
|
+
function cardinalityMatches(intent, subject, opts) {
|
|
43256
|
+
const wantsMany = isListLikeIntent(intent) || urlPathLooksListLike(opts?.contextUrl);
|
|
43257
|
+
if (!wantsMany)
|
|
43258
|
+
return true;
|
|
43259
|
+
switch (subject.kind) {
|
|
43260
|
+
case "value":
|
|
43261
|
+
return !valueLooksLikeSingleItem(subject.value);
|
|
43262
|
+
case "schema":
|
|
43263
|
+
return !schemaLooksLikeSingleItem(subject.schema);
|
|
43264
|
+
case "route":
|
|
43265
|
+
return !routeLooksLikeSingleItem(subject.route);
|
|
43266
|
+
}
|
|
43267
|
+
}
|
|
43268
|
+
function resolutionCardinalityMatches(intent, data) {
|
|
43269
|
+
return cardinalityMatches(intent, { kind: "value", value: data });
|
|
43270
|
+
}
|
|
43271
|
+
var LIST_INTENT_RE, ITEM_SCHEMA_TYPES, COLLECTION_KEYS;
|
|
43272
|
+
var init_cardinality = __esm(() => {
|
|
43273
|
+
LIST_INTENT_RE = /\b(search|find|lookup|browse|discover|list(?:ings?)?|feed|catalog(?:ue)?)\b/i;
|
|
43274
|
+
ITEM_SCHEMA_TYPES = new Set([
|
|
43275
|
+
"product",
|
|
43276
|
+
"offer",
|
|
43277
|
+
"article",
|
|
43278
|
+
"newsarticle",
|
|
43279
|
+
"blogposting",
|
|
43280
|
+
"recipe",
|
|
43281
|
+
"event",
|
|
43282
|
+
"place",
|
|
43283
|
+
"localbusiness",
|
|
43284
|
+
"jobposting",
|
|
43285
|
+
"book",
|
|
43286
|
+
"movie",
|
|
43287
|
+
"creativework",
|
|
43288
|
+
"person",
|
|
43289
|
+
"organization"
|
|
43290
|
+
]);
|
|
43291
|
+
COLLECTION_KEYS = [
|
|
43292
|
+
"itemListElement",
|
|
43293
|
+
"items",
|
|
43294
|
+
"results",
|
|
43295
|
+
"products",
|
|
43296
|
+
"listings",
|
|
43297
|
+
"data",
|
|
43298
|
+
"edges",
|
|
43299
|
+
"hits",
|
|
43300
|
+
"records",
|
|
43301
|
+
"entries",
|
|
43302
|
+
"rows",
|
|
43303
|
+
"nodes"
|
|
43304
|
+
];
|
|
43305
|
+
});
|
|
43306
|
+
|
|
43180
43307
|
// node_modules/.bun/nanoid@5.1.11/node_modules/nanoid/url-alphabet/index.js
|
|
43181
43308
|
var urlAlphabet = "useandom-26T198340PX75pxJACKVERYMINDBUSHWOLF_GQZbfghjklqvwyzrict";
|
|
43182
43309
|
|
|
@@ -43700,6 +43827,7 @@ __export(exports_capture, {
|
|
|
43700
43827
|
tagRequestProvenance: () => tagRequestProvenance,
|
|
43701
43828
|
shutdownAllBrowsers: () => shutdownAllBrowsers,
|
|
43702
43829
|
shouldStopHydrationWait: () => shouldStopHydrationWait,
|
|
43830
|
+
shouldScrollStimulate: () => shouldScrollStimulate,
|
|
43703
43831
|
selectPerformanceReplayCandidates: () => selectPerformanceReplayCandidates,
|
|
43704
43832
|
registerDocumentStartScript: () => registerDocumentStartScript,
|
|
43705
43833
|
navigatePageForCapture: () => navigatePageForCapture,
|
|
@@ -43962,6 +44090,9 @@ function extractRouteHint(url) {
|
|
|
43962
44090
|
} catch {}
|
|
43963
44091
|
return null;
|
|
43964
44092
|
}
|
|
44093
|
+
function shouldScrollStimulate(captureUrl, intent) {
|
|
44094
|
+
return isListLikeIntent(intent) || urlPathLooksListLike(captureUrl);
|
|
44095
|
+
}
|
|
43965
44096
|
function deriveIntentHints(captureUrl, intent) {
|
|
43966
44097
|
const derivedHints = new Set;
|
|
43967
44098
|
if (captureUrl) {
|
|
@@ -44589,8 +44720,7 @@ async function waitForContentReady(tabId, captureUrl, intent, responseBodies) {
|
|
|
44589
44720
|
log("capture", `intent-aware wait: already captured API matching one of [${[...derivedHints].join(", ")}], skipping`);
|
|
44590
44721
|
}
|
|
44591
44722
|
}
|
|
44592
|
-
|
|
44593
|
-
if (captureUrl && responseBodies && (/search|explore|trending|tabs|discover/i.test(captureUrl) || /\b(person|people|profile|profiles|user|users|member|members|company|companies|organization|organisations|business|post|posts|tweet|tweets|status|statuses)\b/.test(lowerIntent))) {
|
|
44723
|
+
if (captureUrl && responseBodies && shouldScrollStimulate(captureUrl, intent)) {
|
|
44594
44724
|
try {
|
|
44595
44725
|
const before = responseBodies.size;
|
|
44596
44726
|
await evaluate(tabId, "window.scrollTo(0, Math.max(window.innerHeight, Math.min(document.body.scrollHeight, window.innerHeight * 2)))");
|
|
@@ -45664,6 +45794,7 @@ var init_capture = __esm(async () => {
|
|
|
45664
45794
|
init_domain();
|
|
45665
45795
|
init_logger();
|
|
45666
45796
|
init_header_classify();
|
|
45797
|
+
init_cardinality();
|
|
45667
45798
|
init_browser_access();
|
|
45668
45799
|
await init_vault();
|
|
45669
45800
|
waitQueue = [];
|
|
@@ -115440,7 +115571,7 @@ __export(exports_extraction, {
|
|
|
115440
115571
|
cleanDOM: () => cleanDOM,
|
|
115441
115572
|
buildStructuredDataHeader: () => buildStructuredDataHeader
|
|
115442
115573
|
});
|
|
115443
|
-
function extractHtmlMetadataFallback(html3) {
|
|
115574
|
+
function extractHtmlMetadataFallback(html3, intent) {
|
|
115444
115575
|
if (!html3 || html3.length < 100)
|
|
115445
115576
|
return null;
|
|
115446
115577
|
try {
|
|
@@ -115472,8 +115603,9 @@ function extractHtmlMetadataFallback(html3) {
|
|
|
115472
115603
|
jsonLdBlocks.push(parsed);
|
|
115473
115604
|
} catch {}
|
|
115474
115605
|
});
|
|
115475
|
-
|
|
115476
|
-
|
|
115606
|
+
const usableJsonLd = isListLikeIntent(intent) ? jsonLdBlocks.filter((b) => !valueLooksLikeSingleItem(b)) : jsonLdBlocks;
|
|
115607
|
+
if (usableJsonLd.length > 0)
|
|
115608
|
+
out.json_ld = usableJsonLd;
|
|
115477
115609
|
const headings = [];
|
|
115478
115610
|
$2("h1, h2").each((_, el) => {
|
|
115479
115611
|
const text3 = cleanText($2(el).text());
|
|
@@ -117796,6 +117928,16 @@ function scoreSiteMetaJsonLdDemotion(structure, intent) {
|
|
|
117796
117928
|
return 0;
|
|
117797
117929
|
return -200;
|
|
117798
117930
|
}
|
|
117931
|
+
function isSingleItemStructureForList(structure, intent) {
|
|
117932
|
+
if (!TINY_RESULT_LIST_INTENT.test(intent.toLowerCase()))
|
|
117933
|
+
return false;
|
|
117934
|
+
if (structure.type === "repeated-elements")
|
|
117935
|
+
return false;
|
|
117936
|
+
return valueLooksLikeSingleItem(structure.data);
|
|
117937
|
+
}
|
|
117938
|
+
function scoreSingleItemListMismatch(structure, intent) {
|
|
117939
|
+
return isSingleItemStructureForList(structure, intent) ? -200 : 0;
|
|
117940
|
+
}
|
|
117799
117941
|
function looksLikeTinyContentReadResult(data2, intent) {
|
|
117800
117942
|
if (data2 == null)
|
|
117801
117943
|
return { tiny: false, bytes: 0, stringLeafChars: 0 };
|
|
@@ -117949,9 +118091,9 @@ function extractFromDOM(html3, intent, contextUrl) {
|
|
|
117949
118091
|
const articleStructures = extractArticleBodySpecial(html3.length > 600000 ? html3.slice(0, 600000) : html3, intent);
|
|
117950
118092
|
const allStructures = [...flashStructures, ...githubStructures, ...repeatedPersonStructures, ...packageSearchStructures, ...xProfileStructures, ...postStructures, ...repeatedArticleStructures, ...trendStructures, ...definitionStructures, ...packageDetailStructures, ...arxivAbstractStructures, ...courseStructures, ...articleStructures, ...spaStructures, ...parseStructured(cleaned)].map((structure) => normalizeStructureForIntent(structure, intent));
|
|
117951
118093
|
const isListIntent = TINY_RESULT_LIST_INTENT.test(intent.toLowerCase());
|
|
117952
|
-
const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray(s.data)) && !looksLikeConfigShape(s.data) && !looksLikeEmptyContainer(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd(s.data)));
|
|
118094
|
+
const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray(s.data)) && !looksLikeConfigShape(s.data) && !looksLikeEmptyContainer(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd(s.data)) && !isSingleItemStructureForList(s, intent));
|
|
117953
118095
|
if (structures.length === 0) {
|
|
117954
|
-
const fallback2 = extractHtmlMetadataFallback(html3);
|
|
118096
|
+
const fallback2 = extractHtmlMetadataFallback(html3, intent);
|
|
117955
118097
|
if (fallback2) {
|
|
117956
118098
|
return _finalize({ data: fallback2, extraction_method: "html_metadata_fallback", confidence: 0.4 });
|
|
117957
118099
|
}
|
|
@@ -117960,7 +118102,7 @@ function extractFromDOM(html3, intent, contextUrl) {
|
|
|
117960
118102
|
const intentWords = intent.toLowerCase().split(/\s+/).filter(Boolean);
|
|
117961
118103
|
const scored = structures.map((s) => ({
|
|
117962
118104
|
structure: s,
|
|
117963
|
-
score: scoreRelevance(s, intentWords) + scoreSemanticFit(s, intent) + scoreSparseLinkList(s) + scoreFieldRichness(s) + scoreConfigShapeDemotion(s) + scoreDegenerateRowDemotion(s) + scoreDuplicateRowDemotion(s) + scoreEmptyContainerDemotion(s) + scoreSiteMetaJsonLdDemotion(s, intent) + scoreTableIntentOverlapDemotion(s, intent, contextUrl, structures)
|
|
118105
|
+
score: scoreRelevance(s, intentWords) + scoreSemanticFit(s, intent) + scoreSparseLinkList(s) + scoreFieldRichness(s) + scoreConfigShapeDemotion(s) + scoreDegenerateRowDemotion(s) + scoreDuplicateRowDemotion(s) + scoreEmptyContainerDemotion(s) + scoreSiteMetaJsonLdDemotion(s, intent) + scoreSingleItemListMismatch(s, intent) + scoreTableIntentOverlapDemotion(s, intent, contextUrl, structures)
|
|
117964
118106
|
}));
|
|
117965
118107
|
scored.sort((a, b) => b.score - a.score);
|
|
117966
118108
|
const passing = scored.filter((candidate) => assessIntentResult(candidate.structure.data, intent).verdict === "pass");
|
|
@@ -118576,6 +118718,7 @@ function sanitizeExtractionToJson(data2, depth = 0) {
|
|
|
118576
118718
|
var STRIP_TAGS, CHROME_TAGS, AD_PATTERNS, HIDDEN_ATTRS, CONTENT_SELECTORS, CARD_SELECTORS, CONFIG_TOP_LEVEL_KEYS, CONFIG_CHUNK_VALUE_KEYS, INTENT_OVERLAP_STOPWORDS, SITE_META_LD_TYPES, TINY_RESULT_LIST_INTENT, TINY_RESULT_DETAIL_INTENT, STRUCTURED_DATA_HIGHLIGHT_TYPES, HTML_TAG_RE, TABLE_RE;
|
|
118577
118719
|
var init_extraction = __esm(() => {
|
|
118578
118720
|
init_esm11();
|
|
118721
|
+
init_cardinality();
|
|
118579
118722
|
STRIP_TAGS = new Set(["script", "style", "noscript", "svg", "iframe"]);
|
|
118580
118723
|
CHROME_TAGS = new Set(["nav", "footer", "header"]);
|
|
118581
118724
|
AD_PATTERNS = /\b(ad|ads|advert|advertisement|tracking|tracker|cookie-banner|cookie-consent|cookie-notice|popup|modal-overlay|gdpr|consent|banner-promo)\b/i;
|
|
@@ -121516,7 +121659,7 @@ function isProtobufContentType(contentType) {
|
|
|
121516
121659
|
function isProtobufLikeEndpoint(url, contentType) {
|
|
121517
121660
|
if (isProtobufContentType(contentType))
|
|
121518
121661
|
return true;
|
|
121519
|
-
return
|
|
121662
|
+
return /[-/](proto|protobuf)(\/|$|-)/i.test(url);
|
|
121520
121663
|
}
|
|
121521
121664
|
function decodeProtobufBytes(bytes) {
|
|
121522
121665
|
return decodeBytes(bytes, "bytes");
|
|
@@ -128277,7 +128420,8 @@ function selectBestEndpoint(endpoints, intent, skillDomain, contextUrl) {
|
|
|
128277
128420
|
const ranked = rankEndpoints(endpoints, intent, skillDomain, contextUrl);
|
|
128278
128421
|
if (ranked.length === 0)
|
|
128279
128422
|
throw new Error("All endpoints are disabled");
|
|
128280
|
-
|
|
128423
|
+
const preferred = ranked.find((r) => cardinalityMatches(intent, { kind: "route", route: r.endpoint }, { contextUrl }));
|
|
128424
|
+
return (preferred ?? ranked[0]).endpoint;
|
|
128281
128425
|
}
|
|
128282
128426
|
function isHtml2(text3) {
|
|
128283
128427
|
const trimmed = text3.trimStart().slice(0, 200).toLowerCase();
|
|
@@ -128324,6 +128468,7 @@ var init_execution = __esm(async () => {
|
|
|
128324
128468
|
init_reveng_server_first();
|
|
128325
128469
|
init_header_classify();
|
|
128326
128470
|
init_storage_hole_bindings();
|
|
128471
|
+
init_cardinality();
|
|
128327
128472
|
init_sealed_blob_store();
|
|
128328
128473
|
init_signer();
|
|
128329
128474
|
init_bundle_scanner();
|
|
@@ -128876,92 +129021,6 @@ function bindingGraphFromOperationGraph(og) {
|
|
|
128876
129021
|
return { endpoints, edges };
|
|
128877
129022
|
}
|
|
128878
129023
|
|
|
128879
|
-
// .tmp-runtime-src/values/cardinality.ts
|
|
128880
|
-
function isListLikeIntent(intent) {
|
|
128881
|
-
return LIST_INTENT_RE.test(intent ?? "");
|
|
128882
|
-
}
|
|
128883
|
-
function valueLooksLikeSingleItem(value) {
|
|
128884
|
-
if (value == null || Array.isArray(value) || typeof value !== "object")
|
|
128885
|
-
return false;
|
|
128886
|
-
const obj = value;
|
|
128887
|
-
for (const key2 of COLLECTION_KEYS) {
|
|
128888
|
-
if (Array.isArray(obj[key2]))
|
|
128889
|
-
return false;
|
|
128890
|
-
}
|
|
128891
|
-
for (const v of Object.values(obj)) {
|
|
128892
|
-
if (Array.isArray(v) && v.some((x) => x !== null && typeof x === "object"))
|
|
128893
|
-
return false;
|
|
128894
|
-
}
|
|
128895
|
-
const atType = typeof obj["@type"] === "string" ? obj["@type"].toLowerCase() : "";
|
|
128896
|
-
const isItemType = ITEM_SCHEMA_TYPES.has(atType);
|
|
128897
|
-
const hasName = "name" in obj || "title" in obj || "headline" in obj;
|
|
128898
|
-
const hasPriceish = "offers" in obj || "price" in obj || "sku" in obj;
|
|
128899
|
-
return isItemType || hasName && hasPriceish;
|
|
128900
|
-
}
|
|
128901
|
-
function schemaLooksLikeSingleItem(rs) {
|
|
128902
|
-
if (!rs || typeof rs !== "object")
|
|
128903
|
-
return false;
|
|
128904
|
-
const schema = rs;
|
|
128905
|
-
if (schema.type === "array")
|
|
128906
|
-
return false;
|
|
128907
|
-
const props = schema.properties ?? {};
|
|
128908
|
-
for (const key2 of COLLECTION_KEYS) {
|
|
128909
|
-
if (key2 in props)
|
|
128910
|
-
return false;
|
|
128911
|
-
}
|
|
128912
|
-
for (const value of Object.values(props)) {
|
|
128913
|
-
if (value && typeof value === "object" && value.type === "array" && value.items?.type === "object") {
|
|
128914
|
-
return false;
|
|
128915
|
-
}
|
|
128916
|
-
}
|
|
128917
|
-
if (schema.type !== "object")
|
|
128918
|
-
return false;
|
|
128919
|
-
const hasType = "@type" in props;
|
|
128920
|
-
const hasName = "name" in props || "title" in props;
|
|
128921
|
-
const hasPriceish = "offers" in props || "price" in props || "sku" in props;
|
|
128922
|
-
return hasType || hasName && hasPriceish;
|
|
128923
|
-
}
|
|
128924
|
-
function resolutionCardinalityMatches(intent, data2) {
|
|
128925
|
-
if (!isListLikeIntent(intent))
|
|
128926
|
-
return true;
|
|
128927
|
-
return !valueLooksLikeSingleItem(data2);
|
|
128928
|
-
}
|
|
128929
|
-
var LIST_INTENT_RE, ITEM_SCHEMA_TYPES, COLLECTION_KEYS;
|
|
128930
|
-
var init_cardinality = __esm(() => {
|
|
128931
|
-
LIST_INTENT_RE = /\b(search|find|lookup|browse|discover|list(?:ings?)?|feed|catalog(?:ue)?)\b/i;
|
|
128932
|
-
ITEM_SCHEMA_TYPES = new Set([
|
|
128933
|
-
"product",
|
|
128934
|
-
"offer",
|
|
128935
|
-
"article",
|
|
128936
|
-
"newsarticle",
|
|
128937
|
-
"blogposting",
|
|
128938
|
-
"recipe",
|
|
128939
|
-
"event",
|
|
128940
|
-
"place",
|
|
128941
|
-
"localbusiness",
|
|
128942
|
-
"jobposting",
|
|
128943
|
-
"book",
|
|
128944
|
-
"movie",
|
|
128945
|
-
"creativework",
|
|
128946
|
-
"person",
|
|
128947
|
-
"organization"
|
|
128948
|
-
]);
|
|
128949
|
-
COLLECTION_KEYS = [
|
|
128950
|
-
"itemListElement",
|
|
128951
|
-
"items",
|
|
128952
|
-
"results",
|
|
128953
|
-
"products",
|
|
128954
|
-
"listings",
|
|
128955
|
-
"data",
|
|
128956
|
-
"edges",
|
|
128957
|
-
"hits",
|
|
128958
|
-
"records",
|
|
128959
|
-
"entries",
|
|
128960
|
-
"rows",
|
|
128961
|
-
"nodes"
|
|
128962
|
-
];
|
|
128963
|
-
});
|
|
128964
|
-
|
|
128965
129024
|
// .tmp-runtime-src/values/yield-safety.ts
|
|
128966
129025
|
function tokenizeKey(key2) {
|
|
128967
129026
|
return key2.replace(/([a-z0-9])([A-Z])/g, "$1 $2").split(/[_\-.\s]+/).map((s) => s.toLowerCase()).filter(Boolean);
|
|
@@ -131117,26 +131176,6 @@ function endpointTargetsMismatchedLocalReplayHost(endpoint, contextUrl) {
|
|
|
131117
131176
|
function endpointHasNegativeTag(endpoint, tag) {
|
|
131118
131177
|
return (endpoint.semantic?.negative_tags ?? []).some((candidate) => candidate.trim().toLowerCase() === tag.trim().toLowerCase());
|
|
131119
131178
|
}
|
|
131120
|
-
function looksLikeSingleItemRoute(endpoint) {
|
|
131121
|
-
const tmpl = endpoint.url_template ?? "";
|
|
131122
|
-
let pathAndQuery = tmpl;
|
|
131123
|
-
try {
|
|
131124
|
-
const u = new URL(tmpl);
|
|
131125
|
-
pathAndQuery = `${u.pathname}${u.search}`;
|
|
131126
|
-
} catch {}
|
|
131127
|
-
const lower = pathAndQuery.toLowerCase();
|
|
131128
|
-
if (/\/(?:search|q|categories?|browse|results?|listings|explore|discover|feed|catalog(?:ue)?|collections?|shop|all)\b/.test(lower) || /[?&](?:q|query|keyword|keywords|search|term|category|cat|page)=/.test(lower)) {
|
|
131129
|
-
return false;
|
|
131130
|
-
}
|
|
131131
|
-
if (/\/(?:p|product|products|item|items|listing|detail|details|dp|pd|sku)\/[^/]+/.test(lower))
|
|
131132
|
-
return true;
|
|
131133
|
-
const lastSeg = lower.split("?")[0].replace(/\/+$/, "").split("/").pop() ?? "";
|
|
131134
|
-
if (/-\d{3,}$/.test(lastSeg) || /^\d{3,}$/.test(lastSeg))
|
|
131135
|
-
return true;
|
|
131136
|
-
if (/\{[^}]+\}/.test(lower))
|
|
131137
|
-
return false;
|
|
131138
|
-
return schemaLooksLikeSingleItem(endpoint.response_schema);
|
|
131139
|
-
}
|
|
131140
131179
|
function isResolveUsableEndpointForIntent(endpoint, intent, contextUrl) {
|
|
131141
131180
|
if (endpointTargetsMismatchedLocalReplayHost(endpoint, contextUrl))
|
|
131142
131181
|
return false;
|
|
@@ -131146,7 +131185,7 @@ function isResolveUsableEndpointForIntent(endpoint, intent, contextUrl) {
|
|
|
131146
131185
|
if (isFeedTimelineIntent(intent, contextUrl) && endpointHasNegativeTag(endpoint, "helper")) {
|
|
131147
131186
|
return false;
|
|
131148
131187
|
}
|
|
131149
|
-
if (
|
|
131188
|
+
if (!cardinalityMatches(intent, { kind: "route", route: endpoint }, { contextUrl })) {
|
|
131150
131189
|
return false;
|
|
131151
131190
|
}
|
|
131152
131191
|
return true;
|
|
@@ -132163,7 +132202,20 @@ function inferPreferredEntityTokens(intent) {
|
|
|
132163
132202
|
return [];
|
|
132164
132203
|
}
|
|
132165
132204
|
function isAcceptableIntentResult(result, intent) {
|
|
132166
|
-
|
|
132205
|
+
if (assessIntentResult(result, intent).verdict === "fail")
|
|
132206
|
+
return false;
|
|
132207
|
+
if (!cardinalityMatches(intent, { kind: "value", value: unwrapResultPayload(result) }))
|
|
132208
|
+
return false;
|
|
132209
|
+
return true;
|
|
132210
|
+
}
|
|
132211
|
+
function unwrapResultPayload(result) {
|
|
132212
|
+
if (result == null || typeof result !== "object" || Array.isArray(result))
|
|
132213
|
+
return result;
|
|
132214
|
+
const rec = { ...result };
|
|
132215
|
+
for (const k of ["available_endpoints", "available_operations", "shortlist_for_judgment", "workflow_dag", "walked_from", "exa_candidates", "run_plan"]) {
|
|
132216
|
+
delete rec[k];
|
|
132217
|
+
}
|
|
132218
|
+
return rec;
|
|
132167
132219
|
}
|
|
132168
132220
|
function candidateMatchesPreferredEntity(candidate, preferredTokens) {
|
|
132169
132221
|
if (preferredTokens.length === 0)
|
|
@@ -142761,9 +142813,14 @@ async function registerRoutes(app) {
|
|
|
142761
142813
|
recovered = true;
|
|
142762
142814
|
} else if (errResult.available_endpoints?.length === 1) {
|
|
142763
142815
|
const only = errResult.available_endpoints[0].endpoint_id;
|
|
142764
|
-
|
|
142765
|
-
|
|
142766
|
-
|
|
142816
|
+
const onlyEp = (skill.endpoints ?? []).find((e) => e.endpoint_id === only);
|
|
142817
|
+
if (!onlyEp || cardinalityMatches(intent, { kind: "route", route: onlyEp }, { contextUrl: context_url })) {
|
|
142818
|
+
console.log(`[exec] D7 single-endpoint skill: rewriting endpoint_id ${want} → ${only}`);
|
|
142819
|
+
execParams.endpoint_id = only;
|
|
142820
|
+
recovered = true;
|
|
142821
|
+
} else {
|
|
142822
|
+
console.log(`[exec] D7 single-endpoint skill: only endpoint ${only} is a single-item route for a list intent — not forcing (cardinality gate)`);
|
|
142823
|
+
}
|
|
142767
142824
|
}
|
|
142768
142825
|
}
|
|
142769
142826
|
if (!recovered && (skill.endpoints?.length ?? 0) >= 2 && intent) {
|
|
@@ -143991,6 +144048,7 @@ var init_routes = __esm(async () => {
|
|
|
143991
144048
|
init_client();
|
|
143992
144049
|
init_reveng_server_first();
|
|
143993
144050
|
init_header_classify();
|
|
144051
|
+
init_cardinality();
|
|
143994
144052
|
init_capture_spool();
|
|
143995
144053
|
init_nanoid();
|
|
143996
144054
|
init_marketplace();
|
|
@@ -233199,14 +233257,30 @@ async function cmdRun(args, flags, verb = "run") {
|
|
|
233199
233257
|
} else if (explicitEndpointId || !bestEndpoint || endpointIsSafeToAutoExecute(bestEndpoint)) {
|
|
233200
233258
|
runPlan.push({ step: "execute", mode: "direct_api", status: "started", endpoint_id: endpointToExecute });
|
|
233201
233259
|
const resolvedSource = typeof result.source === "string" ? result.source : undefined;
|
|
233202
|
-
|
|
233203
|
-
|
|
233204
|
-
|
|
233205
|
-
|
|
233206
|
-
|
|
233207
|
-
|
|
233208
|
-
|
|
233209
|
-
|
|
233260
|
+
const deferralResult = result;
|
|
233261
|
+
const executed = await withPendingNotice(api4("POST", `/v1/skills/${skillId}/execute`, execBody(endpointToExecute)), "Executing best endpoint...");
|
|
233262
|
+
if (!explicitEndpointId && isResolveSuccessResult(executed) && !resolutionCardinalityMatches(intent, executed.result ?? executed.data)) {
|
|
233263
|
+
runPlan[runPlan.length - 1] = {
|
|
233264
|
+
...runPlan[runPlan.length - 1],
|
|
233265
|
+
status: "skipped",
|
|
233266
|
+
reason: "cardinality_mismatch_single_item"
|
|
233267
|
+
};
|
|
233268
|
+
deferralResult.next_action = {
|
|
233269
|
+
title: "List intent returned a single item",
|
|
233270
|
+
command: `unbrowse execute --skill ${skillId} --endpoint ${endpointToExecute}`,
|
|
233271
|
+
why: "Auto-execute yielded a single item for a list/search intent; the page's listings are likely JS-rendered behind an internal API. Returning the route shortlist instead of one item."
|
|
233272
|
+
};
|
|
233273
|
+
result = deferralResult;
|
|
233274
|
+
} else {
|
|
233275
|
+
result = executed;
|
|
233276
|
+
if (resolvedSource && typeof result.source !== "string")
|
|
233277
|
+
result.source = resolvedSource;
|
|
233278
|
+
runPlan[runPlan.length - 1] = {
|
|
233279
|
+
...runPlan[runPlan.length - 1],
|
|
233280
|
+
status: isResolveSuccessResult(result) ? "complete" : "error",
|
|
233281
|
+
error: resolveResultError(result) ?? null
|
|
233282
|
+
};
|
|
233283
|
+
}
|
|
233210
233284
|
} else {
|
|
233211
233285
|
runPlan.push({
|
|
233212
233286
|
step: "execute",
|
|
@@ -236336,7 +236410,7 @@ __export(exports_orchestrator, {
|
|
|
236336
236410
|
pickPreferredSkillSnapshot: () => pickPreferredSkillSnapshot2,
|
|
236337
236411
|
persistDomainCache: () => persistDomainCache2,
|
|
236338
236412
|
marketplaceSkillMatchesContext: () => marketplaceSkillMatchesContext2,
|
|
236339
|
-
looksLikeSingleItemRoute: () =>
|
|
236413
|
+
looksLikeSingleItemRoute: () => looksLikeSingleItemRoute,
|
|
236340
236414
|
isRouteCacheEntryStale: () => isRouteCacheEntryStale2,
|
|
236341
236415
|
isResolveUsableEndpointForIntent: () => isResolveUsableEndpointForIntent2,
|
|
236342
236416
|
isCachedSkillRelevantForIntent: () => isCachedSkillRelevantForIntent2,
|
|
@@ -236857,25 +236931,8 @@ function endpointTargetsMismatchedLocalReplayHost2(endpoint, contextUrl) {
|
|
|
236857
236931
|
function endpointHasNegativeTag2(endpoint, tag) {
|
|
236858
236932
|
return (endpoint.semantic?.negative_tags ?? []).some((candidate) => candidate.trim().toLowerCase() === tag.trim().toLowerCase());
|
|
236859
236933
|
}
|
|
236860
|
-
function
|
|
236861
|
-
|
|
236862
|
-
let pathAndQuery = tmpl;
|
|
236863
|
-
try {
|
|
236864
|
-
const u = new URL(tmpl);
|
|
236865
|
-
pathAndQuery = `${u.pathname}${u.search}`;
|
|
236866
|
-
} catch {}
|
|
236867
|
-
const lower = pathAndQuery.toLowerCase();
|
|
236868
|
-
if (/\/(?:search|q|categories?|browse|results?|listings|explore|discover|feed|catalog(?:ue)?|collections?|shop|all)\b/.test(lower) || /[?&](?:q|query|keyword|keywords|search|term|category|cat|page)=/.test(lower)) {
|
|
236869
|
-
return false;
|
|
236870
|
-
}
|
|
236871
|
-
if (/\/(?:p|product|products|item|items|listing|detail|details|dp|pd|sku)\/[^/]+/.test(lower))
|
|
236872
|
-
return true;
|
|
236873
|
-
const lastSeg = lower.split("?")[0].replace(/\/+$/, "").split("/").pop() ?? "";
|
|
236874
|
-
if (/-\d{3,}$/.test(lastSeg) || /^\d{3,}$/.test(lastSeg))
|
|
236875
|
-
return true;
|
|
236876
|
-
if (/\{[^}]+\}/.test(lower))
|
|
236877
|
-
return false;
|
|
236878
|
-
return schemaLooksLikeSingleItem(endpoint.response_schema);
|
|
236934
|
+
function looksLikeSingleItemRoute(endpoint) {
|
|
236935
|
+
return routeLooksLikeSingleItem(endpoint);
|
|
236879
236936
|
}
|
|
236880
236937
|
function isResolveUsableEndpointForIntent2(endpoint, intent, contextUrl) {
|
|
236881
236938
|
if (endpointTargetsMismatchedLocalReplayHost2(endpoint, contextUrl))
|
|
@@ -236886,7 +236943,7 @@ function isResolveUsableEndpointForIntent2(endpoint, intent, contextUrl) {
|
|
|
236886
236943
|
if (isFeedTimelineIntent2(intent, contextUrl) && endpointHasNegativeTag2(endpoint, "helper")) {
|
|
236887
236944
|
return false;
|
|
236888
236945
|
}
|
|
236889
|
-
if (
|
|
236946
|
+
if (!cardinalityMatches(intent, { kind: "route", route: endpoint }, { contextUrl })) {
|
|
236890
236947
|
return false;
|
|
236891
236948
|
}
|
|
236892
236949
|
return true;
|
|
@@ -237908,7 +237965,20 @@ function inferPreferredEntityTokens2(intent) {
|
|
|
237908
237965
|
return [];
|
|
237909
237966
|
}
|
|
237910
237967
|
function isAcceptableIntentResult2(result, intent) {
|
|
237911
|
-
|
|
237968
|
+
if (assessIntentResult(result, intent).verdict === "fail")
|
|
237969
|
+
return false;
|
|
237970
|
+
if (!cardinalityMatches(intent, { kind: "value", value: unwrapResultPayload2(result) }))
|
|
237971
|
+
return false;
|
|
237972
|
+
return true;
|
|
237973
|
+
}
|
|
237974
|
+
function unwrapResultPayload2(result) {
|
|
237975
|
+
if (result == null || typeof result !== "object" || Array.isArray(result))
|
|
237976
|
+
return result;
|
|
237977
|
+
const rec = { ...result };
|
|
237978
|
+
for (const k of ["available_endpoints", "available_operations", "shortlist_for_judgment", "workflow_dag", "walked_from", "exa_candidates", "run_plan"]) {
|
|
237979
|
+
delete rec[k];
|
|
237980
|
+
}
|
|
237981
|
+
return rec;
|
|
237912
237982
|
}
|
|
237913
237983
|
function candidateMatchesPreferredEntity2(candidate, preferredTokens) {
|
|
237914
237984
|
if (preferredTokens.length === 0)
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"repo_url": "https://github.com/justrach/kuri.git",
|
|
3
3
|
"branch": "adding-extensions",
|
|
4
4
|
"source_sha": "149881254046a20778f642b69f20f0c6468f6fb4",
|
|
5
|
-
"built_at": "2026-06-
|
|
5
|
+
"built_at": "2026-06-18T04:40:25.593Z",
|
|
6
6
|
"binaries": {
|
|
7
7
|
"darwin-arm64": {
|
|
8
8
|
"zig_target": "aarch64-macos",
|
|
@@ -21,11 +21,11 @@
|
|
|
21
21
|
},
|
|
22
22
|
"linux-x64": {
|
|
23
23
|
"zig_target": "x86_64-linux",
|
|
24
|
-
"sha256": "
|
|
24
|
+
"sha256": "b505ed7fd67c24c58d666b3d868bd5d0eb6c44033f0d6c52a5ad3f4abfcbedf7"
|
|
25
25
|
},
|
|
26
26
|
"win-x64": {
|
|
27
27
|
"zig_target": "x86_64-windows-gnu",
|
|
28
|
-
"sha256": "
|
|
28
|
+
"sha256": "5cb9e912772b7a80126ef358721b05a99d28259ffc03e75df2715569ed799b80",
|
|
29
29
|
"source": "pre-staged"
|
|
30
30
|
}
|
|
31
31
|
},
|
|
@@ -33,22 +33,22 @@
|
|
|
33
33
|
"darwin-arm64": {
|
|
34
34
|
"zig_target": "aarch64-macos",
|
|
35
35
|
"lib": "libkuri_ffi.dylib",
|
|
36
|
-
"sha256": "
|
|
36
|
+
"sha256": "01b68bd41b030c8d70ba6c8e4858ad1b2f578511709bb44affb60266766e089a"
|
|
37
37
|
},
|
|
38
38
|
"darwin-x64": {
|
|
39
39
|
"zig_target": "x86_64-macos",
|
|
40
40
|
"lib": "libkuri_ffi.dylib",
|
|
41
|
-
"sha256": "
|
|
41
|
+
"sha256": "bbf6543f8dc9490a1f0e84b877c03499b1836b0462b119b678483b890eadadfa"
|
|
42
42
|
},
|
|
43
43
|
"linux-arm64": {
|
|
44
44
|
"zig_target": "aarch64-linux",
|
|
45
45
|
"lib": "libkuri_ffi.so",
|
|
46
|
-
"sha256": "
|
|
46
|
+
"sha256": "3ff8184062706577cbdba34700c853bbabb7de40953fd13d94ab12d4fd470424"
|
|
47
47
|
},
|
|
48
48
|
"linux-x64": {
|
|
49
49
|
"zig_target": "x86_64-linux",
|
|
50
50
|
"lib": "libkuri_ffi.so",
|
|
51
|
-
"sha256": "
|
|
51
|
+
"sha256": "b44805644d94e1cd2d8613d5c0a0474e72fb9550e2067b750c967845c483e3e5"
|
|
52
52
|
}
|
|
53
53
|
}
|
|
54
54
|
}
|
|
Binary file
|