unbrowse 9.6.1 → 9.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/runtime/cli.js +279 -113
- package/runtime/mcp.js +215 -149
- package/vendor/kuri/darwin-arm64/libkuri_ffi.dylib +0 -0
- package/vendor/kuri/darwin-x64/libkuri_ffi.dylib +0 -0
- package/vendor/kuri/linux-arm64/libkuri_ffi.so +0 -0
- package/vendor/kuri/linux-x64/kuri +0 -0
- package/vendor/kuri/linux-x64/libkuri_ffi.so +0 -0
- package/vendor/kuri/manifest.json +7 -7
- package/vendor/kuri/win-x64/kuri.exe +0 -0
package/package.json
CHANGED
package/runtime/cli.js
CHANGED
|
@@ -1752,10 +1752,74 @@ function valueLooksLikeSingleItem(value) {
|
|
|
1752
1752
|
const hasPriceish = "offers" in obj || "price" in obj || "sku" in obj;
|
|
1753
1753
|
return isItemType || hasName && hasPriceish;
|
|
1754
1754
|
}
|
|
1755
|
-
function
|
|
1756
|
-
if (!
|
|
1755
|
+
function schemaLooksLikeSingleItem(rs) {
|
|
1756
|
+
if (!rs || typeof rs !== "object")
|
|
1757
|
+
return false;
|
|
1758
|
+
const schema = rs;
|
|
1759
|
+
if (schema.type === "array")
|
|
1760
|
+
return false;
|
|
1761
|
+
const props = schema.properties ?? {};
|
|
1762
|
+
for (const key of COLLECTION_KEYS) {
|
|
1763
|
+
if (key in props)
|
|
1764
|
+
return false;
|
|
1765
|
+
}
|
|
1766
|
+
for (const value of Object.values(props)) {
|
|
1767
|
+
if (value && typeof value === "object" && value.type === "array" && value.items?.type === "object") {
|
|
1768
|
+
return false;
|
|
1769
|
+
}
|
|
1770
|
+
}
|
|
1771
|
+
if (schema.type !== "object")
|
|
1772
|
+
return false;
|
|
1773
|
+
const hasType = "@type" in props;
|
|
1774
|
+
const hasName = "name" in props || "title" in props;
|
|
1775
|
+
const hasPriceish = "offers" in props || "price" in props || "sku" in props;
|
|
1776
|
+
return hasType || hasName && hasPriceish;
|
|
1777
|
+
}
|
|
1778
|
+
function routeLooksLikeSingleItem(route) {
|
|
1779
|
+
const tmpl = route.url_template ?? "";
|
|
1780
|
+
let pathAndQuery = tmpl;
|
|
1781
|
+
try {
|
|
1782
|
+
const u = new URL(tmpl);
|
|
1783
|
+
pathAndQuery = `${u.pathname}${u.search}`;
|
|
1784
|
+
} catch {}
|
|
1785
|
+
const lower = pathAndQuery.toLowerCase();
|
|
1786
|
+
if (/\/(?:search|q|categories?|browse|results?|listings|explore|discover|feed|catalog(?:ue)?|collections?|shop|all)\b/.test(lower) || /[?&](?:q|query|keyword|keywords|search|term|category|cat|page)=/.test(lower)) {
|
|
1787
|
+
return false;
|
|
1788
|
+
}
|
|
1789
|
+
if (/\/(?:p|product|products|item|items|listing|detail|details|dp|pd|sku)\/[^/]+/.test(lower))
|
|
1757
1790
|
return true;
|
|
1758
|
-
|
|
1791
|
+
const lastSeg = lower.split("?")[0].replace(/\/+$/, "").split("/").pop() ?? "";
|
|
1792
|
+
if (/-\d{3,}$/.test(lastSeg) || /^\d{3,}$/.test(lastSeg))
|
|
1793
|
+
return true;
|
|
1794
|
+
if (/\{[^}]+\}/.test(lower))
|
|
1795
|
+
return false;
|
|
1796
|
+
return schemaLooksLikeSingleItem(route.response_schema);
|
|
1797
|
+
}
|
|
1798
|
+
function urlPathLooksListLike(contextUrl) {
|
|
1799
|
+
if (!contextUrl)
|
|
1800
|
+
return false;
|
|
1801
|
+
try {
|
|
1802
|
+
const pathname = new URL(contextUrl).pathname.toLowerCase();
|
|
1803
|
+
return /\/(?:search|basic-search|result-page|results?|discover|browse|categories?|q|listings|feed|catalog(?:ue)?)\b/.test(pathname);
|
|
1804
|
+
} catch {
|
|
1805
|
+
return false;
|
|
1806
|
+
}
|
|
1807
|
+
}
|
|
1808
|
+
function cardinalityMatches(intent, subject, opts) {
|
|
1809
|
+
const wantsMany = isListLikeIntent(intent) || urlPathLooksListLike(opts?.contextUrl);
|
|
1810
|
+
if (!wantsMany)
|
|
1811
|
+
return true;
|
|
1812
|
+
switch (subject.kind) {
|
|
1813
|
+
case "value":
|
|
1814
|
+
return !valueLooksLikeSingleItem(subject.value);
|
|
1815
|
+
case "schema":
|
|
1816
|
+
return !schemaLooksLikeSingleItem(subject.schema);
|
|
1817
|
+
case "route":
|
|
1818
|
+
return !routeLooksLikeSingleItem(subject.route);
|
|
1819
|
+
}
|
|
1820
|
+
}
|
|
1821
|
+
function resolutionCardinalityMatches(intent, data) {
|
|
1822
|
+
return cardinalityMatches(intent, { kind: "value", value: data });
|
|
1759
1823
|
}
|
|
1760
1824
|
var LIST_INTENT_RE, ITEM_SCHEMA_TYPES, COLLECTION_KEYS;
|
|
1761
1825
|
var init_cardinality = __esm(() => {
|
|
@@ -2286,7 +2350,7 @@ var init_telemetry = __esm(() => {
|
|
|
2286
2350
|
});
|
|
2287
2351
|
|
|
2288
2352
|
// .tmp-runtime-src/build-info.generated.ts
|
|
2289
|
-
var BUILD_RELEASE_VERSION = "9.6.
|
|
2353
|
+
var BUILD_RELEASE_VERSION = "9.6.2", BUILD_GIT_SHA = "d2d14a6629a0", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiOS42LjIiLCJnaXRfc2hhIjoiZDJkMTRhNjYyOWEwIiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUBkMmQxNGE2NjI5YTAiLCJpc3N1ZWRfYXQiOiIyMDI2LTA2LTE4VDA0OjE3OjAzLjg1M1oifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "OefvyW2iLVVPQ-0HMg9Mz-bciCmP8LE5u7fVEss_98E", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai", BUILD_DEFAULT_PROFILE = "";
|
|
2290
2354
|
|
|
2291
2355
|
// .tmp-runtime-src/version.ts
|
|
2292
2356
|
import { createHash as createHash7 } from "crypto";
|
|
@@ -45983,6 +46047,130 @@ var init_header_classify = __esm(() => {
|
|
|
45983
46047
|
SENSITIVE_HEADER_PATTERN = /token|key|secret|credential|password|session/i;
|
|
45984
46048
|
});
|
|
45985
46049
|
|
|
46050
|
+
// .tmp-runtime-src/values/cardinality.ts
|
|
46051
|
+
function isListLikeIntent2(intent) {
|
|
46052
|
+
return LIST_INTENT_RE2.test(intent ?? "");
|
|
46053
|
+
}
|
|
46054
|
+
function valueLooksLikeSingleItem2(value) {
|
|
46055
|
+
if (value == null || Array.isArray(value) || typeof value !== "object")
|
|
46056
|
+
return false;
|
|
46057
|
+
const obj = value;
|
|
46058
|
+
for (const key of COLLECTION_KEYS2) {
|
|
46059
|
+
if (Array.isArray(obj[key]))
|
|
46060
|
+
return false;
|
|
46061
|
+
}
|
|
46062
|
+
for (const v of Object.values(obj)) {
|
|
46063
|
+
if (Array.isArray(v) && v.some((x) => x !== null && typeof x === "object"))
|
|
46064
|
+
return false;
|
|
46065
|
+
}
|
|
46066
|
+
const atType = typeof obj["@type"] === "string" ? obj["@type"].toLowerCase() : "";
|
|
46067
|
+
const isItemType = ITEM_SCHEMA_TYPES2.has(atType);
|
|
46068
|
+
const hasName = "name" in obj || "title" in obj || "headline" in obj;
|
|
46069
|
+
const hasPriceish = "offers" in obj || "price" in obj || "sku" in obj;
|
|
46070
|
+
return isItemType || hasName && hasPriceish;
|
|
46071
|
+
}
|
|
46072
|
+
function schemaLooksLikeSingleItem2(rs) {
|
|
46073
|
+
if (!rs || typeof rs !== "object")
|
|
46074
|
+
return false;
|
|
46075
|
+
const schema = rs;
|
|
46076
|
+
if (schema.type === "array")
|
|
46077
|
+
return false;
|
|
46078
|
+
const props = schema.properties ?? {};
|
|
46079
|
+
for (const key of COLLECTION_KEYS2) {
|
|
46080
|
+
if (key in props)
|
|
46081
|
+
return false;
|
|
46082
|
+
}
|
|
46083
|
+
for (const value of Object.values(props)) {
|
|
46084
|
+
if (value && typeof value === "object" && value.type === "array" && value.items?.type === "object") {
|
|
46085
|
+
return false;
|
|
46086
|
+
}
|
|
46087
|
+
}
|
|
46088
|
+
if (schema.type !== "object")
|
|
46089
|
+
return false;
|
|
46090
|
+
const hasType = "@type" in props;
|
|
46091
|
+
const hasName = "name" in props || "title" in props;
|
|
46092
|
+
const hasPriceish = "offers" in props || "price" in props || "sku" in props;
|
|
46093
|
+
return hasType || hasName && hasPriceish;
|
|
46094
|
+
}
|
|
46095
|
+
function routeLooksLikeSingleItem2(route) {
|
|
46096
|
+
const tmpl = route.url_template ?? "";
|
|
46097
|
+
let pathAndQuery = tmpl;
|
|
46098
|
+
try {
|
|
46099
|
+
const u = new URL(tmpl);
|
|
46100
|
+
pathAndQuery = `${u.pathname}${u.search}`;
|
|
46101
|
+
} catch {}
|
|
46102
|
+
const lower = pathAndQuery.toLowerCase();
|
|
46103
|
+
if (/\/(?:search|q|categories?|browse|results?|listings|explore|discover|feed|catalog(?:ue)?|collections?|shop|all)\b/.test(lower) || /[?&](?:q|query|keyword|keywords|search|term|category|cat|page)=/.test(lower)) {
|
|
46104
|
+
return false;
|
|
46105
|
+
}
|
|
46106
|
+
if (/\/(?:p|product|products|item|items|listing|detail|details|dp|pd|sku)\/[^/]+/.test(lower))
|
|
46107
|
+
return true;
|
|
46108
|
+
const lastSeg = lower.split("?")[0].replace(/\/+$/, "").split("/").pop() ?? "";
|
|
46109
|
+
if (/-\d{3,}$/.test(lastSeg) || /^\d{3,}$/.test(lastSeg))
|
|
46110
|
+
return true;
|
|
46111
|
+
if (/\{[^}]+\}/.test(lower))
|
|
46112
|
+
return false;
|
|
46113
|
+
return schemaLooksLikeSingleItem2(route.response_schema);
|
|
46114
|
+
}
|
|
46115
|
+
function urlPathLooksListLike2(contextUrl) {
|
|
46116
|
+
if (!contextUrl)
|
|
46117
|
+
return false;
|
|
46118
|
+
try {
|
|
46119
|
+
const pathname = new URL(contextUrl).pathname.toLowerCase();
|
|
46120
|
+
return /\/(?:search|basic-search|result-page|results?|discover|browse|categories?|q|listings|feed|catalog(?:ue)?)\b/.test(pathname);
|
|
46121
|
+
} catch {
|
|
46122
|
+
return false;
|
|
46123
|
+
}
|
|
46124
|
+
}
|
|
46125
|
+
function cardinalityMatches2(intent, subject, opts) {
|
|
46126
|
+
const wantsMany = isListLikeIntent2(intent) || urlPathLooksListLike2(opts?.contextUrl);
|
|
46127
|
+
if (!wantsMany)
|
|
46128
|
+
return true;
|
|
46129
|
+
switch (subject.kind) {
|
|
46130
|
+
case "value":
|
|
46131
|
+
return !valueLooksLikeSingleItem2(subject.value);
|
|
46132
|
+
case "schema":
|
|
46133
|
+
return !schemaLooksLikeSingleItem2(subject.schema);
|
|
46134
|
+
case "route":
|
|
46135
|
+
return !routeLooksLikeSingleItem2(subject.route);
|
|
46136
|
+
}
|
|
46137
|
+
}
|
|
46138
|
+
var LIST_INTENT_RE2, ITEM_SCHEMA_TYPES2, COLLECTION_KEYS2;
|
|
46139
|
+
var init_cardinality2 = __esm(() => {
|
|
46140
|
+
LIST_INTENT_RE2 = /\b(search|find|lookup|browse|discover|list(?:ings?)?|feed|catalog(?:ue)?)\b/i;
|
|
46141
|
+
ITEM_SCHEMA_TYPES2 = new Set([
|
|
46142
|
+
"product",
|
|
46143
|
+
"offer",
|
|
46144
|
+
"article",
|
|
46145
|
+
"newsarticle",
|
|
46146
|
+
"blogposting",
|
|
46147
|
+
"recipe",
|
|
46148
|
+
"event",
|
|
46149
|
+
"place",
|
|
46150
|
+
"localbusiness",
|
|
46151
|
+
"jobposting",
|
|
46152
|
+
"book",
|
|
46153
|
+
"movie",
|
|
46154
|
+
"creativework",
|
|
46155
|
+
"person",
|
|
46156
|
+
"organization"
|
|
46157
|
+
]);
|
|
46158
|
+
COLLECTION_KEYS2 = [
|
|
46159
|
+
"itemListElement",
|
|
46160
|
+
"items",
|
|
46161
|
+
"results",
|
|
46162
|
+
"products",
|
|
46163
|
+
"listings",
|
|
46164
|
+
"data",
|
|
46165
|
+
"edges",
|
|
46166
|
+
"hits",
|
|
46167
|
+
"records",
|
|
46168
|
+
"entries",
|
|
46169
|
+
"rows",
|
|
46170
|
+
"nodes"
|
|
46171
|
+
];
|
|
46172
|
+
});
|
|
46173
|
+
|
|
45986
46174
|
// node_modules/.bun/nanoid@5.1.11/node_modules/nanoid/url-alphabet/index.js
|
|
45987
46175
|
var urlAlphabet = "useandom-26T198340PX75pxJACKVERYMINDBUSHWOLF_GQZbfghjklqvwyzrict";
|
|
45988
46176
|
|
|
@@ -117291,7 +117479,7 @@ __export(exports_extraction, {
|
|
|
117291
117479
|
cleanDOM: () => cleanDOM,
|
|
117292
117480
|
buildStructuredDataHeader: () => buildStructuredDataHeader
|
|
117293
117481
|
});
|
|
117294
|
-
function extractHtmlMetadataFallback(html3) {
|
|
117482
|
+
function extractHtmlMetadataFallback(html3, intent) {
|
|
117295
117483
|
if (!html3 || html3.length < 100)
|
|
117296
117484
|
return null;
|
|
117297
117485
|
try {
|
|
@@ -117323,8 +117511,9 @@ function extractHtmlMetadataFallback(html3) {
|
|
|
117323
117511
|
jsonLdBlocks.push(parsed);
|
|
117324
117512
|
} catch {}
|
|
117325
117513
|
});
|
|
117326
|
-
|
|
117327
|
-
|
|
117514
|
+
const usableJsonLd = isListLikeIntent2(intent) ? jsonLdBlocks.filter((b) => !valueLooksLikeSingleItem2(b)) : jsonLdBlocks;
|
|
117515
|
+
if (usableJsonLd.length > 0)
|
|
117516
|
+
out.json_ld = usableJsonLd;
|
|
117328
117517
|
const headings = [];
|
|
117329
117518
|
$2("h1, h2").each((_, el) => {
|
|
117330
117519
|
const text3 = cleanText($2(el).text());
|
|
@@ -119647,6 +119836,16 @@ function scoreSiteMetaJsonLdDemotion(structure, intent) {
|
|
|
119647
119836
|
return 0;
|
|
119648
119837
|
return -200;
|
|
119649
119838
|
}
|
|
119839
|
+
function isSingleItemStructureForList(structure, intent) {
|
|
119840
|
+
if (!TINY_RESULT_LIST_INTENT.test(intent.toLowerCase()))
|
|
119841
|
+
return false;
|
|
119842
|
+
if (structure.type === "repeated-elements")
|
|
119843
|
+
return false;
|
|
119844
|
+
return valueLooksLikeSingleItem2(structure.data);
|
|
119845
|
+
}
|
|
119846
|
+
function scoreSingleItemListMismatch(structure, intent) {
|
|
119847
|
+
return isSingleItemStructureForList(structure, intent) ? -200 : 0;
|
|
119848
|
+
}
|
|
119650
119849
|
function looksLikeTinyContentReadResult(data2, intent) {
|
|
119651
119850
|
if (data2 == null)
|
|
119652
119851
|
return { tiny: false, bytes: 0, stringLeafChars: 0 };
|
|
@@ -119800,9 +119999,9 @@ function extractFromDOM(html3, intent, contextUrl) {
|
|
|
119800
119999
|
const articleStructures = extractArticleBodySpecial(html3.length > 600000 ? html3.slice(0, 600000) : html3, intent);
|
|
119801
120000
|
const allStructures = [...flashStructures, ...githubStructures, ...repeatedPersonStructures, ...packageSearchStructures, ...xProfileStructures, ...postStructures, ...repeatedArticleStructures, ...trendStructures, ...definitionStructures, ...packageDetailStructures, ...arxivAbstractStructures, ...courseStructures, ...articleStructures, ...spaStructures, ...parseStructured(cleaned)].map((structure) => normalizeStructureForIntent(structure, intent));
|
|
119802
120001
|
const isListIntent = TINY_RESULT_LIST_INTENT.test(intent.toLowerCase());
|
|
119803
|
-
const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray(s.data)) && !looksLikeConfigShape(s.data) && !looksLikeEmptyContainer(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd(s.data)));
|
|
120002
|
+
const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray(s.data)) && !looksLikeConfigShape(s.data) && !looksLikeEmptyContainer(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd(s.data)) && !isSingleItemStructureForList(s, intent));
|
|
119804
120003
|
if (structures.length === 0) {
|
|
119805
|
-
const fallback2 = extractHtmlMetadataFallback(html3);
|
|
120004
|
+
const fallback2 = extractHtmlMetadataFallback(html3, intent);
|
|
119806
120005
|
if (fallback2) {
|
|
119807
120006
|
return _finalize({ data: fallback2, extraction_method: "html_metadata_fallback", confidence: 0.4 });
|
|
119808
120007
|
}
|
|
@@ -119811,7 +120010,7 @@ function extractFromDOM(html3, intent, contextUrl) {
|
|
|
119811
120010
|
const intentWords = intent.toLowerCase().split(/\s+/).filter(Boolean);
|
|
119812
120011
|
const scored = structures.map((s) => ({
|
|
119813
120012
|
structure: s,
|
|
119814
|
-
score: scoreRelevance(s, intentWords) + scoreSemanticFit(s, intent) + scoreSparseLinkList(s) + scoreFieldRichness(s) + scoreConfigShapeDemotion(s) + scoreDegenerateRowDemotion(s) + scoreDuplicateRowDemotion(s) + scoreEmptyContainerDemotion(s) + scoreSiteMetaJsonLdDemotion(s, intent) + scoreTableIntentOverlapDemotion(s, intent, contextUrl, structures)
|
|
120013
|
+
score: scoreRelevance(s, intentWords) + scoreSemanticFit(s, intent) + scoreSparseLinkList(s) + scoreFieldRichness(s) + scoreConfigShapeDemotion(s) + scoreDegenerateRowDemotion(s) + scoreDuplicateRowDemotion(s) + scoreEmptyContainerDemotion(s) + scoreSiteMetaJsonLdDemotion(s, intent) + scoreSingleItemListMismatch(s, intent) + scoreTableIntentOverlapDemotion(s, intent, contextUrl, structures)
|
|
119815
120014
|
}));
|
|
119816
120015
|
scored.sort((a, b) => b.score - a.score);
|
|
119817
120016
|
const passing = scored.filter((candidate) => assessIntentResult(candidate.structure.data, intent).verdict === "pass");
|
|
@@ -120427,6 +120626,7 @@ function sanitizeExtractionToJson(data2, depth = 0) {
|
|
|
120427
120626
|
var STRIP_TAGS, CHROME_TAGS, AD_PATTERNS, HIDDEN_ATTRS, CONTENT_SELECTORS, CARD_SELECTORS, CONFIG_TOP_LEVEL_KEYS, CONFIG_CHUNK_VALUE_KEYS, INTENT_OVERLAP_STOPWORDS, SITE_META_LD_TYPES, TINY_RESULT_LIST_INTENT, TINY_RESULT_DETAIL_INTENT, STRUCTURED_DATA_HIGHLIGHT_TYPES, HTML_TAG_RE, TABLE_RE;
|
|
120428
120627
|
var init_extraction = __esm(() => {
|
|
120429
120628
|
init_esm11();
|
|
120629
|
+
init_cardinality2();
|
|
120430
120630
|
STRIP_TAGS = new Set(["script", "style", "noscript", "svg", "iframe"]);
|
|
120431
120631
|
CHROME_TAGS = new Set(["nav", "footer", "header"]);
|
|
120432
120632
|
AD_PATTERNS = /\b(ad|ads|advert|advertisement|tracking|tracker|cookie-banner|cookie-consent|cookie-notice|popup|modal-overlay|gdpr|consent|banner-promo)\b/i;
|
|
@@ -129952,7 +130152,8 @@ function selectBestEndpoint(endpoints, intent, skillDomain, contextUrl) {
|
|
|
129952
130152
|
const ranked = rankEndpoints(endpoints, intent, skillDomain, contextUrl);
|
|
129953
130153
|
if (ranked.length === 0)
|
|
129954
130154
|
throw new Error("All endpoints are disabled");
|
|
129955
|
-
|
|
130155
|
+
const preferred = ranked.find((r) => cardinalityMatches2(intent, { kind: "route", route: r.endpoint }, { contextUrl }));
|
|
130156
|
+
return (preferred ?? ranked[0]).endpoint;
|
|
129956
130157
|
}
|
|
129957
130158
|
function isHtml2(text3) {
|
|
129958
130159
|
const trimmed = text3.trimStart().slice(0, 200).toLowerCase();
|
|
@@ -129999,6 +130200,7 @@ var init_execution = __esm(async () => {
|
|
|
129999
130200
|
init_reveng_server_first();
|
|
130000
130201
|
init_header_classify();
|
|
130001
130202
|
init_storage_hole_bindings();
|
|
130203
|
+
init_cardinality2();
|
|
130002
130204
|
init_sealed_blob_store();
|
|
130003
130205
|
init_signer();
|
|
130004
130206
|
init_bundle_scanner();
|
|
@@ -130551,69 +130753,6 @@ function bindingGraphFromOperationGraph(og) {
|
|
|
130551
130753
|
return { endpoints, edges };
|
|
130552
130754
|
}
|
|
130553
130755
|
|
|
130554
|
-
// .tmp-runtime-src/values/cardinality.ts
|
|
130555
|
-
function isListLikeIntent2(intent) {
|
|
130556
|
-
return LIST_INTENT_RE2.test(intent ?? "");
|
|
130557
|
-
}
|
|
130558
|
-
function schemaLooksLikeSingleItem(rs) {
|
|
130559
|
-
if (!rs || typeof rs !== "object")
|
|
130560
|
-
return false;
|
|
130561
|
-
const schema = rs;
|
|
130562
|
-
if (schema.type === "array")
|
|
130563
|
-
return false;
|
|
130564
|
-
const props = schema.properties ?? {};
|
|
130565
|
-
for (const key2 of COLLECTION_KEYS2) {
|
|
130566
|
-
if (key2 in props)
|
|
130567
|
-
return false;
|
|
130568
|
-
}
|
|
130569
|
-
for (const value of Object.values(props)) {
|
|
130570
|
-
if (value && typeof value === "object" && value.type === "array" && value.items?.type === "object") {
|
|
130571
|
-
return false;
|
|
130572
|
-
}
|
|
130573
|
-
}
|
|
130574
|
-
if (schema.type !== "object")
|
|
130575
|
-
return false;
|
|
130576
|
-
const hasType = "@type" in props;
|
|
130577
|
-
const hasName = "name" in props || "title" in props;
|
|
130578
|
-
const hasPriceish = "offers" in props || "price" in props || "sku" in props;
|
|
130579
|
-
return hasType || hasName && hasPriceish;
|
|
130580
|
-
}
|
|
130581
|
-
var LIST_INTENT_RE2, ITEM_SCHEMA_TYPES2, COLLECTION_KEYS2;
|
|
130582
|
-
var init_cardinality2 = __esm(() => {
|
|
130583
|
-
LIST_INTENT_RE2 = /\b(search|find|lookup|browse|discover|list(?:ings?)?|feed|catalog(?:ue)?)\b/i;
|
|
130584
|
-
ITEM_SCHEMA_TYPES2 = new Set([
|
|
130585
|
-
"product",
|
|
130586
|
-
"offer",
|
|
130587
|
-
"article",
|
|
130588
|
-
"newsarticle",
|
|
130589
|
-
"blogposting",
|
|
130590
|
-
"recipe",
|
|
130591
|
-
"event",
|
|
130592
|
-
"place",
|
|
130593
|
-
"localbusiness",
|
|
130594
|
-
"jobposting",
|
|
130595
|
-
"book",
|
|
130596
|
-
"movie",
|
|
130597
|
-
"creativework",
|
|
130598
|
-
"person",
|
|
130599
|
-
"organization"
|
|
130600
|
-
]);
|
|
130601
|
-
COLLECTION_KEYS2 = [
|
|
130602
|
-
"itemListElement",
|
|
130603
|
-
"items",
|
|
130604
|
-
"results",
|
|
130605
|
-
"products",
|
|
130606
|
-
"listings",
|
|
130607
|
-
"data",
|
|
130608
|
-
"edges",
|
|
130609
|
-
"hits",
|
|
130610
|
-
"records",
|
|
130611
|
-
"entries",
|
|
130612
|
-
"rows",
|
|
130613
|
-
"nodes"
|
|
130614
|
-
];
|
|
130615
|
-
});
|
|
130616
|
-
|
|
130617
130756
|
// .tmp-runtime-src/values/yield-safety.ts
|
|
130618
130757
|
function tokenizeKey(key2) {
|
|
130619
130758
|
return key2.replace(/([a-z0-9])([A-Z])/g, "$1 $2").split(/[_\-.\s]+/).map((s) => s.toLowerCase()).filter(Boolean);
|
|
@@ -132745,26 +132884,6 @@ function endpointTargetsMismatchedLocalReplayHost(endpoint, contextUrl) {
|
|
|
132745
132884
|
function endpointHasNegativeTag(endpoint, tag) {
|
|
132746
132885
|
return (endpoint.semantic?.negative_tags ?? []).some((candidate) => candidate.trim().toLowerCase() === tag.trim().toLowerCase());
|
|
132747
132886
|
}
|
|
132748
|
-
function looksLikeSingleItemRoute(endpoint) {
|
|
132749
|
-
const tmpl = endpoint.url_template ?? "";
|
|
132750
|
-
let pathAndQuery = tmpl;
|
|
132751
|
-
try {
|
|
132752
|
-
const u = new URL(tmpl);
|
|
132753
|
-
pathAndQuery = `${u.pathname}${u.search}`;
|
|
132754
|
-
} catch {}
|
|
132755
|
-
const lower = pathAndQuery.toLowerCase();
|
|
132756
|
-
if (/\/(?:search|q|categories?|browse|results?|listings|explore|discover|feed|catalog(?:ue)?|collections?|shop|all)\b/.test(lower) || /[?&](?:q|query|keyword|keywords|search|term|category|cat|page)=/.test(lower)) {
|
|
132757
|
-
return false;
|
|
132758
|
-
}
|
|
132759
|
-
if (/\/(?:p|product|products|item|items|listing|detail|details|dp|pd|sku)\/[^/]+/.test(lower))
|
|
132760
|
-
return true;
|
|
132761
|
-
const lastSeg = lower.split("?")[0].replace(/\/+$/, "").split("/").pop() ?? "";
|
|
132762
|
-
if (/-\d{3,}$/.test(lastSeg) || /^\d{3,}$/.test(lastSeg))
|
|
132763
|
-
return true;
|
|
132764
|
-
if (/\{[^}]+\}/.test(lower))
|
|
132765
|
-
return false;
|
|
132766
|
-
return schemaLooksLikeSingleItem(endpoint.response_schema);
|
|
132767
|
-
}
|
|
132768
132887
|
function isResolveUsableEndpointForIntent(endpoint, intent, contextUrl) {
|
|
132769
132888
|
if (endpointTargetsMismatchedLocalReplayHost(endpoint, contextUrl))
|
|
132770
132889
|
return false;
|
|
@@ -132774,7 +132893,7 @@ function isResolveUsableEndpointForIntent(endpoint, intent, contextUrl) {
|
|
|
132774
132893
|
if (isFeedTimelineIntent(intent, contextUrl) && endpointHasNegativeTag(endpoint, "helper")) {
|
|
132775
132894
|
return false;
|
|
132776
132895
|
}
|
|
132777
|
-
if (
|
|
132896
|
+
if (!cardinalityMatches2(intent, { kind: "route", route: endpoint }, { contextUrl })) {
|
|
132778
132897
|
return false;
|
|
132779
132898
|
}
|
|
132780
132899
|
return true;
|
|
@@ -133791,7 +133910,20 @@ function inferPreferredEntityTokens(intent) {
|
|
|
133791
133910
|
return [];
|
|
133792
133911
|
}
|
|
133793
133912
|
function isAcceptableIntentResult(result, intent) {
|
|
133794
|
-
|
|
133913
|
+
if (assessIntentResult(result, intent).verdict === "fail")
|
|
133914
|
+
return false;
|
|
133915
|
+
if (!cardinalityMatches2(intent, { kind: "value", value: unwrapResultPayload(result) }))
|
|
133916
|
+
return false;
|
|
133917
|
+
return true;
|
|
133918
|
+
}
|
|
133919
|
+
function unwrapResultPayload(result) {
|
|
133920
|
+
if (result == null || typeof result !== "object" || Array.isArray(result))
|
|
133921
|
+
return result;
|
|
133922
|
+
const rec = { ...result };
|
|
133923
|
+
for (const k of ["available_endpoints", "available_operations", "shortlist_for_judgment", "workflow_dag", "walked_from", "exa_candidates", "run_plan"]) {
|
|
133924
|
+
delete rec[k];
|
|
133925
|
+
}
|
|
133926
|
+
return rec;
|
|
133795
133927
|
}
|
|
133796
133928
|
function candidateMatchesPreferredEntity(candidate, preferredTokens) {
|
|
133797
133929
|
if (preferredTokens.length === 0)
|
|
@@ -144132,9 +144264,14 @@ async function registerRoutes(app) {
|
|
|
144132
144264
|
recovered = true;
|
|
144133
144265
|
} else if (errResult.available_endpoints?.length === 1) {
|
|
144134
144266
|
const only = errResult.available_endpoints[0].endpoint_id;
|
|
144135
|
-
|
|
144136
|
-
|
|
144137
|
-
|
|
144267
|
+
const onlyEp = (skill.endpoints ?? []).find((e) => e.endpoint_id === only);
|
|
144268
|
+
if (!onlyEp || cardinalityMatches2(intent, { kind: "route", route: onlyEp }, { contextUrl: context_url })) {
|
|
144269
|
+
console.log(`[exec] D7 single-endpoint skill: rewriting endpoint_id ${want} → ${only}`);
|
|
144270
|
+
execParams.endpoint_id = only;
|
|
144271
|
+
recovered = true;
|
|
144272
|
+
} else {
|
|
144273
|
+
console.log(`[exec] D7 single-endpoint skill: only endpoint ${only} is a single-item route for a list intent — not forcing (cardinality gate)`);
|
|
144274
|
+
}
|
|
144138
144275
|
}
|
|
144139
144276
|
}
|
|
144140
144277
|
if (!recovered && (skill.endpoints?.length ?? 0) >= 2 && intent) {
|
|
@@ -145362,6 +145499,7 @@ var init_routes = __esm(async () => {
|
|
|
145362
145499
|
init_client3();
|
|
145363
145500
|
init_reveng_server_first();
|
|
145364
145501
|
init_header_classify();
|
|
145502
|
+
init_cardinality2();
|
|
145365
145503
|
init_capture_spool();
|
|
145366
145504
|
init_nanoid();
|
|
145367
145505
|
init_marketplace();
|
|
@@ -150755,7 +150893,7 @@ __export(exports_extraction2, {
|
|
|
150755
150893
|
cleanDOM: () => cleanDOM2,
|
|
150756
150894
|
buildStructuredDataHeader: () => buildStructuredDataHeader2
|
|
150757
150895
|
});
|
|
150758
|
-
function extractHtmlMetadataFallback2(html3) {
|
|
150896
|
+
function extractHtmlMetadataFallback2(html3, intent) {
|
|
150759
150897
|
if (!html3 || html3.length < 100)
|
|
150760
150898
|
return null;
|
|
150761
150899
|
try {
|
|
@@ -150787,8 +150925,9 @@ function extractHtmlMetadataFallback2(html3) {
|
|
|
150787
150925
|
jsonLdBlocks.push(parsed);
|
|
150788
150926
|
} catch {}
|
|
150789
150927
|
});
|
|
150790
|
-
|
|
150791
|
-
|
|
150928
|
+
const usableJsonLd = isListLikeIntent2(intent) ? jsonLdBlocks.filter((b) => !valueLooksLikeSingleItem2(b)) : jsonLdBlocks;
|
|
150929
|
+
if (usableJsonLd.length > 0)
|
|
150930
|
+
out.json_ld = usableJsonLd;
|
|
150792
150931
|
const headings = [];
|
|
150793
150932
|
$2("h1, h2").each((_, el) => {
|
|
150794
150933
|
const text3 = cleanText2($2(el).text());
|
|
@@ -153111,6 +153250,16 @@ function scoreSiteMetaJsonLdDemotion2(structure, intent) {
|
|
|
153111
153250
|
return 0;
|
|
153112
153251
|
return -200;
|
|
153113
153252
|
}
|
|
153253
|
+
function isSingleItemStructureForList2(structure, intent) {
|
|
153254
|
+
if (!TINY_RESULT_LIST_INTENT2.test(intent.toLowerCase()))
|
|
153255
|
+
return false;
|
|
153256
|
+
if (structure.type === "repeated-elements")
|
|
153257
|
+
return false;
|
|
153258
|
+
return valueLooksLikeSingleItem2(structure.data);
|
|
153259
|
+
}
|
|
153260
|
+
function scoreSingleItemListMismatch2(structure, intent) {
|
|
153261
|
+
return isSingleItemStructureForList2(structure, intent) ? -200 : 0;
|
|
153262
|
+
}
|
|
153114
153263
|
function looksLikeTinyContentReadResult2(data2, intent) {
|
|
153115
153264
|
if (data2 == null)
|
|
153116
153265
|
return { tiny: false, bytes: 0, stringLeafChars: 0 };
|
|
@@ -153264,9 +153413,9 @@ function extractFromDOM2(html3, intent, contextUrl) {
|
|
|
153264
153413
|
const articleStructures = extractArticleBodySpecial2(html3.length > 600000 ? html3.slice(0, 600000) : html3, intent);
|
|
153265
153414
|
const allStructures = [...flashStructures, ...githubStructures, ...repeatedPersonStructures, ...packageSearchStructures, ...xProfileStructures, ...postStructures, ...repeatedArticleStructures, ...trendStructures, ...definitionStructures, ...packageDetailStructures, ...arxivAbstractStructures, ...courseStructures, ...articleStructures, ...spaStructures, ...parseStructured2(cleaned)].map((structure) => normalizeStructureForIntent2(structure, intent));
|
|
153266
153415
|
const isListIntent = TINY_RESULT_LIST_INTENT2.test(intent.toLowerCase());
|
|
153267
|
-
const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray2(s.data)) && !looksLikeConfigShape2(s.data) && !looksLikeEmptyContainer2(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd2(s.data)));
|
|
153416
|
+
const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray2(s.data)) && !looksLikeConfigShape2(s.data) && !looksLikeEmptyContainer2(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd2(s.data)) && !isSingleItemStructureForList2(s, intent));
|
|
153268
153417
|
if (structures.length === 0) {
|
|
153269
|
-
const fallback2 = extractHtmlMetadataFallback2(html3);
|
|
153418
|
+
const fallback2 = extractHtmlMetadataFallback2(html3, intent);
|
|
153270
153419
|
if (fallback2) {
|
|
153271
153420
|
return _finalize({ data: fallback2, extraction_method: "html_metadata_fallback", confidence: 0.4 });
|
|
153272
153421
|
}
|
|
@@ -153275,7 +153424,7 @@ function extractFromDOM2(html3, intent, contextUrl) {
|
|
|
153275
153424
|
const intentWords = intent.toLowerCase().split(/\s+/).filter(Boolean);
|
|
153276
153425
|
const scored = structures.map((s) => ({
|
|
153277
153426
|
structure: s,
|
|
153278
|
-
score: scoreRelevance2(s, intentWords) + scoreSemanticFit2(s, intent) + scoreSparseLinkList2(s) + scoreFieldRichness2(s) + scoreConfigShapeDemotion2(s) + scoreDegenerateRowDemotion2(s) + scoreDuplicateRowDemotion2(s) + scoreEmptyContainerDemotion2(s) + scoreSiteMetaJsonLdDemotion2(s, intent) + scoreTableIntentOverlapDemotion2(s, intent, contextUrl, structures)
|
|
153427
|
+
score: scoreRelevance2(s, intentWords) + scoreSemanticFit2(s, intent) + scoreSparseLinkList2(s) + scoreFieldRichness2(s) + scoreConfigShapeDemotion2(s) + scoreDegenerateRowDemotion2(s) + scoreDuplicateRowDemotion2(s) + scoreEmptyContainerDemotion2(s) + scoreSiteMetaJsonLdDemotion2(s, intent) + scoreSingleItemListMismatch2(s, intent) + scoreTableIntentOverlapDemotion2(s, intent, contextUrl, structures)
|
|
153279
153428
|
}));
|
|
153280
153429
|
scored.sort((a, b) => b.score - a.score);
|
|
153281
153430
|
const passing = scored.filter((candidate) => assessIntentResult(candidate.structure.data, intent).verdict === "pass");
|
|
@@ -153891,6 +154040,7 @@ function sanitizeExtractionToJson2(data2, depth = 0) {
|
|
|
153891
154040
|
var STRIP_TAGS2, CHROME_TAGS2, AD_PATTERNS2, HIDDEN_ATTRS2, CONTENT_SELECTORS2, CARD_SELECTORS2, CONFIG_TOP_LEVEL_KEYS2, CONFIG_CHUNK_VALUE_KEYS2, INTENT_OVERLAP_STOPWORDS2, SITE_META_LD_TYPES2, TINY_RESULT_LIST_INTENT2, TINY_RESULT_DETAIL_INTENT2, STRUCTURED_DATA_HIGHLIGHT_TYPES2, HTML_TAG_RE2, TABLE_RE2;
|
|
153892
154041
|
var init_extraction2 = __esm(() => {
|
|
153893
154042
|
init_esm11();
|
|
154043
|
+
init_cardinality2();
|
|
153894
154044
|
STRIP_TAGS2 = new Set(["script", "style", "noscript", "svg", "iframe"]);
|
|
153895
154045
|
CHROME_TAGS2 = new Set(["nav", "footer", "header"]);
|
|
153896
154046
|
AD_PATTERNS2 = /\b(ad|ads|advert|advertisement|tracking|tracker|cookie-banner|cookie-consent|cookie-notice|popup|modal-overlay|gdpr|consent|banner-promo)\b/i;
|
|
@@ -240035,14 +240185,30 @@ async function cmdRun(args, flags, verb = "run") {
|
|
|
240035
240185
|
} else if (explicitEndpointId || !bestEndpoint || endpointIsSafeToAutoExecute(bestEndpoint)) {
|
|
240036
240186
|
runPlan.push({ step: "execute", mode: "direct_api", status: "started", endpoint_id: endpointToExecute });
|
|
240037
240187
|
const resolvedSource = typeof result.source === "string" ? result.source : undefined;
|
|
240038
|
-
|
|
240039
|
-
|
|
240040
|
-
|
|
240041
|
-
|
|
240042
|
-
|
|
240043
|
-
|
|
240044
|
-
|
|
240045
|
-
|
|
240188
|
+
const deferralResult = result;
|
|
240189
|
+
const executed = await withPendingNotice(api4("POST", `/v1/skills/${skillId}/execute`, execBody(endpointToExecute)), "Executing best endpoint...");
|
|
240190
|
+
if (!explicitEndpointId && isResolveSuccessResult(executed) && !resolutionCardinalityMatches(intent, executed.result ?? executed.data)) {
|
|
240191
|
+
runPlan[runPlan.length - 1] = {
|
|
240192
|
+
...runPlan[runPlan.length - 1],
|
|
240193
|
+
status: "skipped",
|
|
240194
|
+
reason: "cardinality_mismatch_single_item"
|
|
240195
|
+
};
|
|
240196
|
+
deferralResult.next_action = {
|
|
240197
|
+
title: "List intent returned a single item",
|
|
240198
|
+
command: `unbrowse execute --skill ${skillId} --endpoint ${endpointToExecute}`,
|
|
240199
|
+
why: "Auto-execute yielded a single item for a list/search intent; the page's listings are likely JS-rendered behind an internal API. Returning the route shortlist instead of one item."
|
|
240200
|
+
};
|
|
240201
|
+
result = deferralResult;
|
|
240202
|
+
} else {
|
|
240203
|
+
result = executed;
|
|
240204
|
+
if (resolvedSource && typeof result.source !== "string")
|
|
240205
|
+
result.source = resolvedSource;
|
|
240206
|
+
runPlan[runPlan.length - 1] = {
|
|
240207
|
+
...runPlan[runPlan.length - 1],
|
|
240208
|
+
status: isResolveSuccessResult(result) ? "complete" : "error",
|
|
240209
|
+
error: resolveResultError(result) ?? null
|
|
240210
|
+
};
|
|
240211
|
+
}
|
|
240046
240212
|
} else {
|
|
240047
240213
|
runPlan.push({
|
|
240048
240214
|
step: "execute",
|
package/runtime/mcp.js
CHANGED
|
@@ -36310,7 +36310,7 @@ var init_cached_resolution = __esm(() => {
|
|
|
36310
36310
|
});
|
|
36311
36311
|
|
|
36312
36312
|
// .tmp-runtime-src/build-info.generated.ts
|
|
36313
|
-
var BUILD_RELEASE_VERSION = "9.6.
|
|
36313
|
+
var BUILD_RELEASE_VERSION = "9.6.2", BUILD_GIT_SHA = "d2d14a6629a0", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiOS42LjIiLCJnaXRfc2hhIjoiZDJkMTRhNjYyOWEwIiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUBkMmQxNGE2NjI5YTAiLCJpc3N1ZWRfYXQiOiIyMDI2LTA2LTE4VDA0OjE3OjAzLjg1M1oifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "OefvyW2iLVVPQ-0HMg9Mz-bciCmP8LE5u7fVEss_98E", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai", BUILD_DEFAULT_PROFILE = "";
|
|
36314
36314
|
|
|
36315
36315
|
// .tmp-runtime-src/version.ts
|
|
36316
36316
|
import { createHash as createHash4 } from "crypto";
|
|
@@ -43177,6 +43177,133 @@ var init_header_classify = __esm(() => {
|
|
|
43177
43177
|
SENSITIVE_HEADER_PATTERN = /token|key|secret|credential|password|session/i;
|
|
43178
43178
|
});
|
|
43179
43179
|
|
|
43180
|
+
// .tmp-runtime-src/values/cardinality.ts
|
|
43181
|
+
function isListLikeIntent(intent) {
|
|
43182
|
+
return LIST_INTENT_RE.test(intent ?? "");
|
|
43183
|
+
}
|
|
43184
|
+
function valueLooksLikeSingleItem(value) {
|
|
43185
|
+
if (value == null || Array.isArray(value) || typeof value !== "object")
|
|
43186
|
+
return false;
|
|
43187
|
+
const obj = value;
|
|
43188
|
+
for (const key of COLLECTION_KEYS) {
|
|
43189
|
+
if (Array.isArray(obj[key]))
|
|
43190
|
+
return false;
|
|
43191
|
+
}
|
|
43192
|
+
for (const v of Object.values(obj)) {
|
|
43193
|
+
if (Array.isArray(v) && v.some((x) => x !== null && typeof x === "object"))
|
|
43194
|
+
return false;
|
|
43195
|
+
}
|
|
43196
|
+
const atType = typeof obj["@type"] === "string" ? obj["@type"].toLowerCase() : "";
|
|
43197
|
+
const isItemType = ITEM_SCHEMA_TYPES.has(atType);
|
|
43198
|
+
const hasName = "name" in obj || "title" in obj || "headline" in obj;
|
|
43199
|
+
const hasPriceish = "offers" in obj || "price" in obj || "sku" in obj;
|
|
43200
|
+
return isItemType || hasName && hasPriceish;
|
|
43201
|
+
}
|
|
43202
|
+
function schemaLooksLikeSingleItem(rs) {
|
|
43203
|
+
if (!rs || typeof rs !== "object")
|
|
43204
|
+
return false;
|
|
43205
|
+
const schema = rs;
|
|
43206
|
+
if (schema.type === "array")
|
|
43207
|
+
return false;
|
|
43208
|
+
const props = schema.properties ?? {};
|
|
43209
|
+
for (const key of COLLECTION_KEYS) {
|
|
43210
|
+
if (key in props)
|
|
43211
|
+
return false;
|
|
43212
|
+
}
|
|
43213
|
+
for (const value of Object.values(props)) {
|
|
43214
|
+
if (value && typeof value === "object" && value.type === "array" && value.items?.type === "object") {
|
|
43215
|
+
return false;
|
|
43216
|
+
}
|
|
43217
|
+
}
|
|
43218
|
+
if (schema.type !== "object")
|
|
43219
|
+
return false;
|
|
43220
|
+
const hasType = "@type" in props;
|
|
43221
|
+
const hasName = "name" in props || "title" in props;
|
|
43222
|
+
const hasPriceish = "offers" in props || "price" in props || "sku" in props;
|
|
43223
|
+
return hasType || hasName && hasPriceish;
|
|
43224
|
+
}
|
|
43225
|
+
function routeLooksLikeSingleItem(route) {
|
|
43226
|
+
const tmpl = route.url_template ?? "";
|
|
43227
|
+
let pathAndQuery = tmpl;
|
|
43228
|
+
try {
|
|
43229
|
+
const u = new URL(tmpl);
|
|
43230
|
+
pathAndQuery = `${u.pathname}${u.search}`;
|
|
43231
|
+
} catch {}
|
|
43232
|
+
const lower = pathAndQuery.toLowerCase();
|
|
43233
|
+
if (/\/(?:search|q|categories?|browse|results?|listings|explore|discover|feed|catalog(?:ue)?|collections?|shop|all)\b/.test(lower) || /[?&](?:q|query|keyword|keywords|search|term|category|cat|page)=/.test(lower)) {
|
|
43234
|
+
return false;
|
|
43235
|
+
}
|
|
43236
|
+
if (/\/(?:p|product|products|item|items|listing|detail|details|dp|pd|sku)\/[^/]+/.test(lower))
|
|
43237
|
+
return true;
|
|
43238
|
+
const lastSeg = lower.split("?")[0].replace(/\/+$/, "").split("/").pop() ?? "";
|
|
43239
|
+
if (/-\d{3,}$/.test(lastSeg) || /^\d{3,}$/.test(lastSeg))
|
|
43240
|
+
return true;
|
|
43241
|
+
if (/\{[^}]+\}/.test(lower))
|
|
43242
|
+
return false;
|
|
43243
|
+
return schemaLooksLikeSingleItem(route.response_schema);
|
|
43244
|
+
}
|
|
43245
|
+
function urlPathLooksListLike(contextUrl) {
|
|
43246
|
+
if (!contextUrl)
|
|
43247
|
+
return false;
|
|
43248
|
+
try {
|
|
43249
|
+
const pathname = new URL(contextUrl).pathname.toLowerCase();
|
|
43250
|
+
return /\/(?:search|basic-search|result-page|results?|discover|browse|categories?|q|listings|feed|catalog(?:ue)?)\b/.test(pathname);
|
|
43251
|
+
} catch {
|
|
43252
|
+
return false;
|
|
43253
|
+
}
|
|
43254
|
+
}
|
|
43255
|
+
function cardinalityMatches(intent, subject, opts) {
|
|
43256
|
+
const wantsMany = isListLikeIntent(intent) || urlPathLooksListLike(opts?.contextUrl);
|
|
43257
|
+
if (!wantsMany)
|
|
43258
|
+
return true;
|
|
43259
|
+
switch (subject.kind) {
|
|
43260
|
+
case "value":
|
|
43261
|
+
return !valueLooksLikeSingleItem(subject.value);
|
|
43262
|
+
case "schema":
|
|
43263
|
+
return !schemaLooksLikeSingleItem(subject.schema);
|
|
43264
|
+
case "route":
|
|
43265
|
+
return !routeLooksLikeSingleItem(subject.route);
|
|
43266
|
+
}
|
|
43267
|
+
}
|
|
43268
|
+
function resolutionCardinalityMatches(intent, data) {
|
|
43269
|
+
return cardinalityMatches(intent, { kind: "value", value: data });
|
|
43270
|
+
}
|
|
43271
|
+
var LIST_INTENT_RE, ITEM_SCHEMA_TYPES, COLLECTION_KEYS;
|
|
43272
|
+
var init_cardinality = __esm(() => {
|
|
43273
|
+
LIST_INTENT_RE = /\b(search|find|lookup|browse|discover|list(?:ings?)?|feed|catalog(?:ue)?)\b/i;
|
|
43274
|
+
ITEM_SCHEMA_TYPES = new Set([
|
|
43275
|
+
"product",
|
|
43276
|
+
"offer",
|
|
43277
|
+
"article",
|
|
43278
|
+
"newsarticle",
|
|
43279
|
+
"blogposting",
|
|
43280
|
+
"recipe",
|
|
43281
|
+
"event",
|
|
43282
|
+
"place",
|
|
43283
|
+
"localbusiness",
|
|
43284
|
+
"jobposting",
|
|
43285
|
+
"book",
|
|
43286
|
+
"movie",
|
|
43287
|
+
"creativework",
|
|
43288
|
+
"person",
|
|
43289
|
+
"organization"
|
|
43290
|
+
]);
|
|
43291
|
+
COLLECTION_KEYS = [
|
|
43292
|
+
"itemListElement",
|
|
43293
|
+
"items",
|
|
43294
|
+
"results",
|
|
43295
|
+
"products",
|
|
43296
|
+
"listings",
|
|
43297
|
+
"data",
|
|
43298
|
+
"edges",
|
|
43299
|
+
"hits",
|
|
43300
|
+
"records",
|
|
43301
|
+
"entries",
|
|
43302
|
+
"rows",
|
|
43303
|
+
"nodes"
|
|
43304
|
+
];
|
|
43305
|
+
});
|
|
43306
|
+
|
|
43180
43307
|
// node_modules/.bun/nanoid@5.1.11/node_modules/nanoid/url-alphabet/index.js
|
|
43181
43308
|
var urlAlphabet = "useandom-26T198340PX75pxJACKVERYMINDBUSHWOLF_GQZbfghjklqvwyzrict";
|
|
43182
43309
|
|
|
@@ -115440,7 +115567,7 @@ __export(exports_extraction, {
|
|
|
115440
115567
|
cleanDOM: () => cleanDOM,
|
|
115441
115568
|
buildStructuredDataHeader: () => buildStructuredDataHeader
|
|
115442
115569
|
});
|
|
115443
|
-
function extractHtmlMetadataFallback(html3) {
|
|
115570
|
+
function extractHtmlMetadataFallback(html3, intent) {
|
|
115444
115571
|
if (!html3 || html3.length < 100)
|
|
115445
115572
|
return null;
|
|
115446
115573
|
try {
|
|
@@ -115472,8 +115599,9 @@ function extractHtmlMetadataFallback(html3) {
|
|
|
115472
115599
|
jsonLdBlocks.push(parsed);
|
|
115473
115600
|
} catch {}
|
|
115474
115601
|
});
|
|
115475
|
-
|
|
115476
|
-
|
|
115602
|
+
const usableJsonLd = isListLikeIntent(intent) ? jsonLdBlocks.filter((b) => !valueLooksLikeSingleItem(b)) : jsonLdBlocks;
|
|
115603
|
+
if (usableJsonLd.length > 0)
|
|
115604
|
+
out.json_ld = usableJsonLd;
|
|
115477
115605
|
const headings = [];
|
|
115478
115606
|
$2("h1, h2").each((_, el) => {
|
|
115479
115607
|
const text3 = cleanText($2(el).text());
|
|
@@ -117796,6 +117924,16 @@ function scoreSiteMetaJsonLdDemotion(structure, intent) {
|
|
|
117796
117924
|
return 0;
|
|
117797
117925
|
return -200;
|
|
117798
117926
|
}
|
|
117927
|
+
function isSingleItemStructureForList(structure, intent) {
|
|
117928
|
+
if (!TINY_RESULT_LIST_INTENT.test(intent.toLowerCase()))
|
|
117929
|
+
return false;
|
|
117930
|
+
if (structure.type === "repeated-elements")
|
|
117931
|
+
return false;
|
|
117932
|
+
return valueLooksLikeSingleItem(structure.data);
|
|
117933
|
+
}
|
|
117934
|
+
function scoreSingleItemListMismatch(structure, intent) {
|
|
117935
|
+
return isSingleItemStructureForList(structure, intent) ? -200 : 0;
|
|
117936
|
+
}
|
|
117799
117937
|
function looksLikeTinyContentReadResult(data2, intent) {
|
|
117800
117938
|
if (data2 == null)
|
|
117801
117939
|
return { tiny: false, bytes: 0, stringLeafChars: 0 };
|
|
@@ -117949,9 +118087,9 @@ function extractFromDOM(html3, intent, contextUrl) {
|
|
|
117949
118087
|
const articleStructures = extractArticleBodySpecial(html3.length > 600000 ? html3.slice(0, 600000) : html3, intent);
|
|
117950
118088
|
const allStructures = [...flashStructures, ...githubStructures, ...repeatedPersonStructures, ...packageSearchStructures, ...xProfileStructures, ...postStructures, ...repeatedArticleStructures, ...trendStructures, ...definitionStructures, ...packageDetailStructures, ...arxivAbstractStructures, ...courseStructures, ...articleStructures, ...spaStructures, ...parseStructured(cleaned)].map((structure) => normalizeStructureForIntent(structure, intent));
|
|
117951
118089
|
const isListIntent = TINY_RESULT_LIST_INTENT.test(intent.toLowerCase());
|
|
117952
|
-
const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray(s.data)) && !looksLikeConfigShape(s.data) && !looksLikeEmptyContainer(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd(s.data)));
|
|
118090
|
+
const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray(s.data)) && !looksLikeConfigShape(s.data) && !looksLikeEmptyContainer(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd(s.data)) && !isSingleItemStructureForList(s, intent));
|
|
117953
118091
|
if (structures.length === 0) {
|
|
117954
|
-
const fallback2 = extractHtmlMetadataFallback(html3);
|
|
118092
|
+
const fallback2 = extractHtmlMetadataFallback(html3, intent);
|
|
117955
118093
|
if (fallback2) {
|
|
117956
118094
|
return _finalize({ data: fallback2, extraction_method: "html_metadata_fallback", confidence: 0.4 });
|
|
117957
118095
|
}
|
|
@@ -117960,7 +118098,7 @@ function extractFromDOM(html3, intent, contextUrl) {
|
|
|
117960
118098
|
const intentWords = intent.toLowerCase().split(/\s+/).filter(Boolean);
|
|
117961
118099
|
const scored = structures.map((s) => ({
|
|
117962
118100
|
structure: s,
|
|
117963
|
-
score: scoreRelevance(s, intentWords) + scoreSemanticFit(s, intent) + scoreSparseLinkList(s) + scoreFieldRichness(s) + scoreConfigShapeDemotion(s) + scoreDegenerateRowDemotion(s) + scoreDuplicateRowDemotion(s) + scoreEmptyContainerDemotion(s) + scoreSiteMetaJsonLdDemotion(s, intent) + scoreTableIntentOverlapDemotion(s, intent, contextUrl, structures)
|
|
118101
|
+
score: scoreRelevance(s, intentWords) + scoreSemanticFit(s, intent) + scoreSparseLinkList(s) + scoreFieldRichness(s) + scoreConfigShapeDemotion(s) + scoreDegenerateRowDemotion(s) + scoreDuplicateRowDemotion(s) + scoreEmptyContainerDemotion(s) + scoreSiteMetaJsonLdDemotion(s, intent) + scoreSingleItemListMismatch(s, intent) + scoreTableIntentOverlapDemotion(s, intent, contextUrl, structures)
|
|
117964
118102
|
}));
|
|
117965
118103
|
scored.sort((a, b) => b.score - a.score);
|
|
117966
118104
|
const passing = scored.filter((candidate) => assessIntentResult(candidate.structure.data, intent).verdict === "pass");
|
|
@@ -118576,6 +118714,7 @@ function sanitizeExtractionToJson(data2, depth = 0) {
|
|
|
118576
118714
|
var STRIP_TAGS, CHROME_TAGS, AD_PATTERNS, HIDDEN_ATTRS, CONTENT_SELECTORS, CARD_SELECTORS, CONFIG_TOP_LEVEL_KEYS, CONFIG_CHUNK_VALUE_KEYS, INTENT_OVERLAP_STOPWORDS, SITE_META_LD_TYPES, TINY_RESULT_LIST_INTENT, TINY_RESULT_DETAIL_INTENT, STRUCTURED_DATA_HIGHLIGHT_TYPES, HTML_TAG_RE, TABLE_RE;
|
|
118577
118715
|
var init_extraction = __esm(() => {
|
|
118578
118716
|
init_esm11();
|
|
118717
|
+
init_cardinality();
|
|
118579
118718
|
STRIP_TAGS = new Set(["script", "style", "noscript", "svg", "iframe"]);
|
|
118580
118719
|
CHROME_TAGS = new Set(["nav", "footer", "header"]);
|
|
118581
118720
|
AD_PATTERNS = /\b(ad|ads|advert|advertisement|tracking|tracker|cookie-banner|cookie-consent|cookie-notice|popup|modal-overlay|gdpr|consent|banner-promo)\b/i;
|
|
@@ -128277,7 +128416,8 @@ function selectBestEndpoint(endpoints, intent, skillDomain, contextUrl) {
|
|
|
128277
128416
|
const ranked = rankEndpoints(endpoints, intent, skillDomain, contextUrl);
|
|
128278
128417
|
if (ranked.length === 0)
|
|
128279
128418
|
throw new Error("All endpoints are disabled");
|
|
128280
|
-
|
|
128419
|
+
const preferred = ranked.find((r) => cardinalityMatches(intent, { kind: "route", route: r.endpoint }, { contextUrl }));
|
|
128420
|
+
return (preferred ?? ranked[0]).endpoint;
|
|
128281
128421
|
}
|
|
128282
128422
|
function isHtml2(text3) {
|
|
128283
128423
|
const trimmed = text3.trimStart().slice(0, 200).toLowerCase();
|
|
@@ -128324,6 +128464,7 @@ var init_execution = __esm(async () => {
|
|
|
128324
128464
|
init_reveng_server_first();
|
|
128325
128465
|
init_header_classify();
|
|
128326
128466
|
init_storage_hole_bindings();
|
|
128467
|
+
init_cardinality();
|
|
128327
128468
|
init_sealed_blob_store();
|
|
128328
128469
|
init_signer();
|
|
128329
128470
|
init_bundle_scanner();
|
|
@@ -128876,92 +129017,6 @@ function bindingGraphFromOperationGraph(og) {
|
|
|
128876
129017
|
return { endpoints, edges };
|
|
128877
129018
|
}
|
|
128878
129019
|
|
|
128879
|
-
// .tmp-runtime-src/values/cardinality.ts
|
|
128880
|
-
function isListLikeIntent(intent) {
|
|
128881
|
-
return LIST_INTENT_RE.test(intent ?? "");
|
|
128882
|
-
}
|
|
128883
|
-
function valueLooksLikeSingleItem(value) {
|
|
128884
|
-
if (value == null || Array.isArray(value) || typeof value !== "object")
|
|
128885
|
-
return false;
|
|
128886
|
-
const obj = value;
|
|
128887
|
-
for (const key2 of COLLECTION_KEYS) {
|
|
128888
|
-
if (Array.isArray(obj[key2]))
|
|
128889
|
-
return false;
|
|
128890
|
-
}
|
|
128891
|
-
for (const v of Object.values(obj)) {
|
|
128892
|
-
if (Array.isArray(v) && v.some((x) => x !== null && typeof x === "object"))
|
|
128893
|
-
return false;
|
|
128894
|
-
}
|
|
128895
|
-
const atType = typeof obj["@type"] === "string" ? obj["@type"].toLowerCase() : "";
|
|
128896
|
-
const isItemType = ITEM_SCHEMA_TYPES.has(atType);
|
|
128897
|
-
const hasName = "name" in obj || "title" in obj || "headline" in obj;
|
|
128898
|
-
const hasPriceish = "offers" in obj || "price" in obj || "sku" in obj;
|
|
128899
|
-
return isItemType || hasName && hasPriceish;
|
|
128900
|
-
}
|
|
128901
|
-
function schemaLooksLikeSingleItem(rs) {
|
|
128902
|
-
if (!rs || typeof rs !== "object")
|
|
128903
|
-
return false;
|
|
128904
|
-
const schema = rs;
|
|
128905
|
-
if (schema.type === "array")
|
|
128906
|
-
return false;
|
|
128907
|
-
const props = schema.properties ?? {};
|
|
128908
|
-
for (const key2 of COLLECTION_KEYS) {
|
|
128909
|
-
if (key2 in props)
|
|
128910
|
-
return false;
|
|
128911
|
-
}
|
|
128912
|
-
for (const value of Object.values(props)) {
|
|
128913
|
-
if (value && typeof value === "object" && value.type === "array" && value.items?.type === "object") {
|
|
128914
|
-
return false;
|
|
128915
|
-
}
|
|
128916
|
-
}
|
|
128917
|
-
if (schema.type !== "object")
|
|
128918
|
-
return false;
|
|
128919
|
-
const hasType = "@type" in props;
|
|
128920
|
-
const hasName = "name" in props || "title" in props;
|
|
128921
|
-
const hasPriceish = "offers" in props || "price" in props || "sku" in props;
|
|
128922
|
-
return hasType || hasName && hasPriceish;
|
|
128923
|
-
}
|
|
128924
|
-
function resolutionCardinalityMatches(intent, data2) {
|
|
128925
|
-
if (!isListLikeIntent(intent))
|
|
128926
|
-
return true;
|
|
128927
|
-
return !valueLooksLikeSingleItem(data2);
|
|
128928
|
-
}
|
|
128929
|
-
var LIST_INTENT_RE, ITEM_SCHEMA_TYPES, COLLECTION_KEYS;
|
|
128930
|
-
var init_cardinality = __esm(() => {
|
|
128931
|
-
LIST_INTENT_RE = /\b(search|find|lookup|browse|discover|list(?:ings?)?|feed|catalog(?:ue)?)\b/i;
|
|
128932
|
-
ITEM_SCHEMA_TYPES = new Set([
|
|
128933
|
-
"product",
|
|
128934
|
-
"offer",
|
|
128935
|
-
"article",
|
|
128936
|
-
"newsarticle",
|
|
128937
|
-
"blogposting",
|
|
128938
|
-
"recipe",
|
|
128939
|
-
"event",
|
|
128940
|
-
"place",
|
|
128941
|
-
"localbusiness",
|
|
128942
|
-
"jobposting",
|
|
128943
|
-
"book",
|
|
128944
|
-
"movie",
|
|
128945
|
-
"creativework",
|
|
128946
|
-
"person",
|
|
128947
|
-
"organization"
|
|
128948
|
-
]);
|
|
128949
|
-
COLLECTION_KEYS = [
|
|
128950
|
-
"itemListElement",
|
|
128951
|
-
"items",
|
|
128952
|
-
"results",
|
|
128953
|
-
"products",
|
|
128954
|
-
"listings",
|
|
128955
|
-
"data",
|
|
128956
|
-
"edges",
|
|
128957
|
-
"hits",
|
|
128958
|
-
"records",
|
|
128959
|
-
"entries",
|
|
128960
|
-
"rows",
|
|
128961
|
-
"nodes"
|
|
128962
|
-
];
|
|
128963
|
-
});
|
|
128964
|
-
|
|
128965
129020
|
// .tmp-runtime-src/values/yield-safety.ts
|
|
128966
129021
|
function tokenizeKey(key2) {
|
|
128967
129022
|
return key2.replace(/([a-z0-9])([A-Z])/g, "$1 $2").split(/[_\-.\s]+/).map((s) => s.toLowerCase()).filter(Boolean);
|
|
@@ -131117,26 +131172,6 @@ function endpointTargetsMismatchedLocalReplayHost(endpoint, contextUrl) {
|
|
|
131117
131172
|
function endpointHasNegativeTag(endpoint, tag) {
|
|
131118
131173
|
return (endpoint.semantic?.negative_tags ?? []).some((candidate) => candidate.trim().toLowerCase() === tag.trim().toLowerCase());
|
|
131119
131174
|
}
|
|
131120
|
-
function looksLikeSingleItemRoute(endpoint) {
|
|
131121
|
-
const tmpl = endpoint.url_template ?? "";
|
|
131122
|
-
let pathAndQuery = tmpl;
|
|
131123
|
-
try {
|
|
131124
|
-
const u = new URL(tmpl);
|
|
131125
|
-
pathAndQuery = `${u.pathname}${u.search}`;
|
|
131126
|
-
} catch {}
|
|
131127
|
-
const lower = pathAndQuery.toLowerCase();
|
|
131128
|
-
if (/\/(?:search|q|categories?|browse|results?|listings|explore|discover|feed|catalog(?:ue)?|collections?|shop|all)\b/.test(lower) || /[?&](?:q|query|keyword|keywords|search|term|category|cat|page)=/.test(lower)) {
|
|
131129
|
-
return false;
|
|
131130
|
-
}
|
|
131131
|
-
if (/\/(?:p|product|products|item|items|listing|detail|details|dp|pd|sku)\/[^/]+/.test(lower))
|
|
131132
|
-
return true;
|
|
131133
|
-
const lastSeg = lower.split("?")[0].replace(/\/+$/, "").split("/").pop() ?? "";
|
|
131134
|
-
if (/-\d{3,}$/.test(lastSeg) || /^\d{3,}$/.test(lastSeg))
|
|
131135
|
-
return true;
|
|
131136
|
-
if (/\{[^}]+\}/.test(lower))
|
|
131137
|
-
return false;
|
|
131138
|
-
return schemaLooksLikeSingleItem(endpoint.response_schema);
|
|
131139
|
-
}
|
|
131140
131175
|
function isResolveUsableEndpointForIntent(endpoint, intent, contextUrl) {
|
|
131141
131176
|
if (endpointTargetsMismatchedLocalReplayHost(endpoint, contextUrl))
|
|
131142
131177
|
return false;
|
|
@@ -131146,7 +131181,7 @@ function isResolveUsableEndpointForIntent(endpoint, intent, contextUrl) {
|
|
|
131146
131181
|
if (isFeedTimelineIntent(intent, contextUrl) && endpointHasNegativeTag(endpoint, "helper")) {
|
|
131147
131182
|
return false;
|
|
131148
131183
|
}
|
|
131149
|
-
if (
|
|
131184
|
+
if (!cardinalityMatches(intent, { kind: "route", route: endpoint }, { contextUrl })) {
|
|
131150
131185
|
return false;
|
|
131151
131186
|
}
|
|
131152
131187
|
return true;
|
|
@@ -132163,7 +132198,20 @@ function inferPreferredEntityTokens(intent) {
|
|
|
132163
132198
|
return [];
|
|
132164
132199
|
}
|
|
132165
132200
|
function isAcceptableIntentResult(result, intent) {
|
|
132166
|
-
|
|
132201
|
+
if (assessIntentResult(result, intent).verdict === "fail")
|
|
132202
|
+
return false;
|
|
132203
|
+
if (!cardinalityMatches(intent, { kind: "value", value: unwrapResultPayload(result) }))
|
|
132204
|
+
return false;
|
|
132205
|
+
return true;
|
|
132206
|
+
}
|
|
132207
|
+
function unwrapResultPayload(result) {
|
|
132208
|
+
if (result == null || typeof result !== "object" || Array.isArray(result))
|
|
132209
|
+
return result;
|
|
132210
|
+
const rec = { ...result };
|
|
132211
|
+
for (const k of ["available_endpoints", "available_operations", "shortlist_for_judgment", "workflow_dag", "walked_from", "exa_candidates", "run_plan"]) {
|
|
132212
|
+
delete rec[k];
|
|
132213
|
+
}
|
|
132214
|
+
return rec;
|
|
132167
132215
|
}
|
|
132168
132216
|
function candidateMatchesPreferredEntity(candidate, preferredTokens) {
|
|
132169
132217
|
if (preferredTokens.length === 0)
|
|
@@ -142761,9 +142809,14 @@ async function registerRoutes(app) {
|
|
|
142761
142809
|
recovered = true;
|
|
142762
142810
|
} else if (errResult.available_endpoints?.length === 1) {
|
|
142763
142811
|
const only = errResult.available_endpoints[0].endpoint_id;
|
|
142764
|
-
|
|
142765
|
-
|
|
142766
|
-
|
|
142812
|
+
const onlyEp = (skill.endpoints ?? []).find((e) => e.endpoint_id === only);
|
|
142813
|
+
if (!onlyEp || cardinalityMatches(intent, { kind: "route", route: onlyEp }, { contextUrl: context_url })) {
|
|
142814
|
+
console.log(`[exec] D7 single-endpoint skill: rewriting endpoint_id ${want} → ${only}`);
|
|
142815
|
+
execParams.endpoint_id = only;
|
|
142816
|
+
recovered = true;
|
|
142817
|
+
} else {
|
|
142818
|
+
console.log(`[exec] D7 single-endpoint skill: only endpoint ${only} is a single-item route for a list intent — not forcing (cardinality gate)`);
|
|
142819
|
+
}
|
|
142767
142820
|
}
|
|
142768
142821
|
}
|
|
142769
142822
|
if (!recovered && (skill.endpoints?.length ?? 0) >= 2 && intent) {
|
|
@@ -143991,6 +144044,7 @@ var init_routes = __esm(async () => {
|
|
|
143991
144044
|
init_client();
|
|
143992
144045
|
init_reveng_server_first();
|
|
143993
144046
|
init_header_classify();
|
|
144047
|
+
init_cardinality();
|
|
143994
144048
|
init_capture_spool();
|
|
143995
144049
|
init_nanoid();
|
|
143996
144050
|
init_marketplace();
|
|
@@ -233199,14 +233253,30 @@ async function cmdRun(args, flags, verb = "run") {
|
|
|
233199
233253
|
} else if (explicitEndpointId || !bestEndpoint || endpointIsSafeToAutoExecute(bestEndpoint)) {
|
|
233200
233254
|
runPlan.push({ step: "execute", mode: "direct_api", status: "started", endpoint_id: endpointToExecute });
|
|
233201
233255
|
const resolvedSource = typeof result.source === "string" ? result.source : undefined;
|
|
233202
|
-
|
|
233203
|
-
|
|
233204
|
-
|
|
233205
|
-
|
|
233206
|
-
|
|
233207
|
-
|
|
233208
|
-
|
|
233209
|
-
|
|
233256
|
+
const deferralResult = result;
|
|
233257
|
+
const executed = await withPendingNotice(api4("POST", `/v1/skills/${skillId}/execute`, execBody(endpointToExecute)), "Executing best endpoint...");
|
|
233258
|
+
if (!explicitEndpointId && isResolveSuccessResult(executed) && !resolutionCardinalityMatches(intent, executed.result ?? executed.data)) {
|
|
233259
|
+
runPlan[runPlan.length - 1] = {
|
|
233260
|
+
...runPlan[runPlan.length - 1],
|
|
233261
|
+
status: "skipped",
|
|
233262
|
+
reason: "cardinality_mismatch_single_item"
|
|
233263
|
+
};
|
|
233264
|
+
deferralResult.next_action = {
|
|
233265
|
+
title: "List intent returned a single item",
|
|
233266
|
+
command: `unbrowse execute --skill ${skillId} --endpoint ${endpointToExecute}`,
|
|
233267
|
+
why: "Auto-execute yielded a single item for a list/search intent; the page's listings are likely JS-rendered behind an internal API. Returning the route shortlist instead of one item."
|
|
233268
|
+
};
|
|
233269
|
+
result = deferralResult;
|
|
233270
|
+
} else {
|
|
233271
|
+
result = executed;
|
|
233272
|
+
if (resolvedSource && typeof result.source !== "string")
|
|
233273
|
+
result.source = resolvedSource;
|
|
233274
|
+
runPlan[runPlan.length - 1] = {
|
|
233275
|
+
...runPlan[runPlan.length - 1],
|
|
233276
|
+
status: isResolveSuccessResult(result) ? "complete" : "error",
|
|
233277
|
+
error: resolveResultError(result) ?? null
|
|
233278
|
+
};
|
|
233279
|
+
}
|
|
233210
233280
|
} else {
|
|
233211
233281
|
runPlan.push({
|
|
233212
233282
|
step: "execute",
|
|
@@ -236336,7 +236406,7 @@ __export(exports_orchestrator, {
|
|
|
236336
236406
|
pickPreferredSkillSnapshot: () => pickPreferredSkillSnapshot2,
|
|
236337
236407
|
persistDomainCache: () => persistDomainCache2,
|
|
236338
236408
|
marketplaceSkillMatchesContext: () => marketplaceSkillMatchesContext2,
|
|
236339
|
-
looksLikeSingleItemRoute: () =>
|
|
236409
|
+
looksLikeSingleItemRoute: () => looksLikeSingleItemRoute,
|
|
236340
236410
|
isRouteCacheEntryStale: () => isRouteCacheEntryStale2,
|
|
236341
236411
|
isResolveUsableEndpointForIntent: () => isResolveUsableEndpointForIntent2,
|
|
236342
236412
|
isCachedSkillRelevantForIntent: () => isCachedSkillRelevantForIntent2,
|
|
@@ -236857,25 +236927,8 @@ function endpointTargetsMismatchedLocalReplayHost2(endpoint, contextUrl) {
|
|
|
236857
236927
|
function endpointHasNegativeTag2(endpoint, tag) {
|
|
236858
236928
|
return (endpoint.semantic?.negative_tags ?? []).some((candidate) => candidate.trim().toLowerCase() === tag.trim().toLowerCase());
|
|
236859
236929
|
}
|
|
236860
|
-
function
|
|
236861
|
-
|
|
236862
|
-
let pathAndQuery = tmpl;
|
|
236863
|
-
try {
|
|
236864
|
-
const u = new URL(tmpl);
|
|
236865
|
-
pathAndQuery = `${u.pathname}${u.search}`;
|
|
236866
|
-
} catch {}
|
|
236867
|
-
const lower = pathAndQuery.toLowerCase();
|
|
236868
|
-
if (/\/(?:search|q|categories?|browse|results?|listings|explore|discover|feed|catalog(?:ue)?|collections?|shop|all)\b/.test(lower) || /[?&](?:q|query|keyword|keywords|search|term|category|cat|page)=/.test(lower)) {
|
|
236869
|
-
return false;
|
|
236870
|
-
}
|
|
236871
|
-
if (/\/(?:p|product|products|item|items|listing|detail|details|dp|pd|sku)\/[^/]+/.test(lower))
|
|
236872
|
-
return true;
|
|
236873
|
-
const lastSeg = lower.split("?")[0].replace(/\/+$/, "").split("/").pop() ?? "";
|
|
236874
|
-
if (/-\d{3,}$/.test(lastSeg) || /^\d{3,}$/.test(lastSeg))
|
|
236875
|
-
return true;
|
|
236876
|
-
if (/\{[^}]+\}/.test(lower))
|
|
236877
|
-
return false;
|
|
236878
|
-
return schemaLooksLikeSingleItem(endpoint.response_schema);
|
|
236930
|
+
function looksLikeSingleItemRoute(endpoint) {
|
|
236931
|
+
return routeLooksLikeSingleItem(endpoint);
|
|
236879
236932
|
}
|
|
236880
236933
|
function isResolveUsableEndpointForIntent2(endpoint, intent, contextUrl) {
|
|
236881
236934
|
if (endpointTargetsMismatchedLocalReplayHost2(endpoint, contextUrl))
|
|
@@ -236886,7 +236939,7 @@ function isResolveUsableEndpointForIntent2(endpoint, intent, contextUrl) {
|
|
|
236886
236939
|
if (isFeedTimelineIntent2(intent, contextUrl) && endpointHasNegativeTag2(endpoint, "helper")) {
|
|
236887
236940
|
return false;
|
|
236888
236941
|
}
|
|
236889
|
-
if (
|
|
236942
|
+
if (!cardinalityMatches(intent, { kind: "route", route: endpoint }, { contextUrl })) {
|
|
236890
236943
|
return false;
|
|
236891
236944
|
}
|
|
236892
236945
|
return true;
|
|
@@ -237908,7 +237961,20 @@ function inferPreferredEntityTokens2(intent) {
|
|
|
237908
237961
|
return [];
|
|
237909
237962
|
}
|
|
237910
237963
|
function isAcceptableIntentResult2(result, intent) {
|
|
237911
|
-
|
|
237964
|
+
if (assessIntentResult(result, intent).verdict === "fail")
|
|
237965
|
+
return false;
|
|
237966
|
+
if (!cardinalityMatches(intent, { kind: "value", value: unwrapResultPayload2(result) }))
|
|
237967
|
+
return false;
|
|
237968
|
+
return true;
|
|
237969
|
+
}
|
|
237970
|
+
function unwrapResultPayload2(result) {
|
|
237971
|
+
if (result == null || typeof result !== "object" || Array.isArray(result))
|
|
237972
|
+
return result;
|
|
237973
|
+
const rec = { ...result };
|
|
237974
|
+
for (const k of ["available_endpoints", "available_operations", "shortlist_for_judgment", "workflow_dag", "walked_from", "exa_candidates", "run_plan"]) {
|
|
237975
|
+
delete rec[k];
|
|
237976
|
+
}
|
|
237977
|
+
return rec;
|
|
237912
237978
|
}
|
|
237913
237979
|
function candidateMatchesPreferredEntity2(candidate, preferredTokens) {
|
|
237914
237980
|
if (preferredTokens.length === 0)
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"repo_url": "https://github.com/justrach/kuri.git",
|
|
3
3
|
"branch": "adding-extensions",
|
|
4
4
|
"source_sha": "149881254046a20778f642b69f20f0c6468f6fb4",
|
|
5
|
-
"built_at": "2026-06-
|
|
5
|
+
"built_at": "2026-06-18T03:58:10.362Z",
|
|
6
6
|
"binaries": {
|
|
7
7
|
"darwin-arm64": {
|
|
8
8
|
"zig_target": "aarch64-macos",
|
|
@@ -21,11 +21,11 @@
|
|
|
21
21
|
},
|
|
22
22
|
"linux-x64": {
|
|
23
23
|
"zig_target": "x86_64-linux",
|
|
24
|
-
"sha256": "
|
|
24
|
+
"sha256": "e73aecfbf07001ba0be5032118790eb253ad5d8d12caca6a1dd5ad3ccab44b9e"
|
|
25
25
|
},
|
|
26
26
|
"win-x64": {
|
|
27
27
|
"zig_target": "x86_64-windows-gnu",
|
|
28
|
-
"sha256": "
|
|
28
|
+
"sha256": "9ecbc82be646e755e4664051cf345d54dde3c6610e457d763deff67895047963",
|
|
29
29
|
"source": "pre-staged"
|
|
30
30
|
}
|
|
31
31
|
},
|
|
@@ -33,22 +33,22 @@
|
|
|
33
33
|
"darwin-arm64": {
|
|
34
34
|
"zig_target": "aarch64-macos",
|
|
35
35
|
"lib": "libkuri_ffi.dylib",
|
|
36
|
-
"sha256": "
|
|
36
|
+
"sha256": "6c72cf383df4fa3f870b745da43d64eae8f67e58c6f971214ac29602fb649939"
|
|
37
37
|
},
|
|
38
38
|
"darwin-x64": {
|
|
39
39
|
"zig_target": "x86_64-macos",
|
|
40
40
|
"lib": "libkuri_ffi.dylib",
|
|
41
|
-
"sha256": "
|
|
41
|
+
"sha256": "82480772ddc8e44c8e34e70b80d7dc0969004942f77276587af450b62d3d2750"
|
|
42
42
|
},
|
|
43
43
|
"linux-arm64": {
|
|
44
44
|
"zig_target": "aarch64-linux",
|
|
45
45
|
"lib": "libkuri_ffi.so",
|
|
46
|
-
"sha256": "
|
|
46
|
+
"sha256": "ef8dfa2b634f04294f93a94472d9856ba777681afaab2d4213f0e29821882e07"
|
|
47
47
|
},
|
|
48
48
|
"linux-x64": {
|
|
49
49
|
"zig_target": "x86_64-linux",
|
|
50
50
|
"lib": "libkuri_ffi.so",
|
|
51
|
-
"sha256": "
|
|
51
|
+
"sha256": "fb29ad2b71186d176306321d17e88074a67fea139991faef9aa4862333942c9e"
|
|
52
52
|
}
|
|
53
53
|
}
|
|
54
54
|
}
|
|
Binary file
|