unbrowse 9.6.0 → 9.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/runtime/cli.js +351 -30
- package/runtime/mcp.js +239 -27
- package/vendor/kuri/darwin-arm64/libkuri_ffi.dylib +0 -0
- package/vendor/kuri/darwin-x64/libkuri_ffi.dylib +0 -0
- package/vendor/kuri/linux-arm64/libkuri_ffi.so +0 -0
- package/vendor/kuri/linux-x64/kuri +0 -0
- package/vendor/kuri/linux-x64/libkuri_ffi.so +0 -0
- package/vendor/kuri/manifest.json +7 -7
- package/vendor/kuri/win-x64/kuri.exe +0 -0
package/package.json
CHANGED
package/runtime/cli.js
CHANGED
|
@@ -1730,6 +1730,133 @@ var init_cached_resolution = __esm(() => {
|
|
|
1730
1730
|
init_principal_scope();
|
|
1731
1731
|
});
|
|
1732
1732
|
|
|
1733
|
+
// .tmp-runtime-src/values/cardinality.ts
|
|
1734
|
+
function isListLikeIntent(intent) {
|
|
1735
|
+
return LIST_INTENT_RE.test(intent ?? "");
|
|
1736
|
+
}
|
|
1737
|
+
function valueLooksLikeSingleItem(value) {
|
|
1738
|
+
if (value == null || Array.isArray(value) || typeof value !== "object")
|
|
1739
|
+
return false;
|
|
1740
|
+
const obj = value;
|
|
1741
|
+
for (const key of COLLECTION_KEYS) {
|
|
1742
|
+
if (Array.isArray(obj[key]))
|
|
1743
|
+
return false;
|
|
1744
|
+
}
|
|
1745
|
+
for (const v of Object.values(obj)) {
|
|
1746
|
+
if (Array.isArray(v) && v.some((x) => x !== null && typeof x === "object"))
|
|
1747
|
+
return false;
|
|
1748
|
+
}
|
|
1749
|
+
const atType = typeof obj["@type"] === "string" ? obj["@type"].toLowerCase() : "";
|
|
1750
|
+
const isItemType = ITEM_SCHEMA_TYPES.has(atType);
|
|
1751
|
+
const hasName = "name" in obj || "title" in obj || "headline" in obj;
|
|
1752
|
+
const hasPriceish = "offers" in obj || "price" in obj || "sku" in obj;
|
|
1753
|
+
return isItemType || hasName && hasPriceish;
|
|
1754
|
+
}
|
|
1755
|
+
function schemaLooksLikeSingleItem(rs) {
|
|
1756
|
+
if (!rs || typeof rs !== "object")
|
|
1757
|
+
return false;
|
|
1758
|
+
const schema = rs;
|
|
1759
|
+
if (schema.type === "array")
|
|
1760
|
+
return false;
|
|
1761
|
+
const props = schema.properties ?? {};
|
|
1762
|
+
for (const key of COLLECTION_KEYS) {
|
|
1763
|
+
if (key in props)
|
|
1764
|
+
return false;
|
|
1765
|
+
}
|
|
1766
|
+
for (const value of Object.values(props)) {
|
|
1767
|
+
if (value && typeof value === "object" && value.type === "array" && value.items?.type === "object") {
|
|
1768
|
+
return false;
|
|
1769
|
+
}
|
|
1770
|
+
}
|
|
1771
|
+
if (schema.type !== "object")
|
|
1772
|
+
return false;
|
|
1773
|
+
const hasType = "@type" in props;
|
|
1774
|
+
const hasName = "name" in props || "title" in props;
|
|
1775
|
+
const hasPriceish = "offers" in props || "price" in props || "sku" in props;
|
|
1776
|
+
return hasType || hasName && hasPriceish;
|
|
1777
|
+
}
|
|
1778
|
+
function routeLooksLikeSingleItem(route) {
|
|
1779
|
+
const tmpl = route.url_template ?? "";
|
|
1780
|
+
let pathAndQuery = tmpl;
|
|
1781
|
+
try {
|
|
1782
|
+
const u = new URL(tmpl);
|
|
1783
|
+
pathAndQuery = `${u.pathname}${u.search}`;
|
|
1784
|
+
} catch {}
|
|
1785
|
+
const lower = pathAndQuery.toLowerCase();
|
|
1786
|
+
if (/\/(?:search|q|categories?|browse|results?|listings|explore|discover|feed|catalog(?:ue)?|collections?|shop|all)\b/.test(lower) || /[?&](?:q|query|keyword|keywords|search|term|category|cat|page)=/.test(lower)) {
|
|
1787
|
+
return false;
|
|
1788
|
+
}
|
|
1789
|
+
if (/\/(?:p|product|products|item|items|listing|detail|details|dp|pd|sku)\/[^/]+/.test(lower))
|
|
1790
|
+
return true;
|
|
1791
|
+
const lastSeg = lower.split("?")[0].replace(/\/+$/, "").split("/").pop() ?? "";
|
|
1792
|
+
if (/-\d{3,}$/.test(lastSeg) || /^\d{3,}$/.test(lastSeg))
|
|
1793
|
+
return true;
|
|
1794
|
+
if (/\{[^}]+\}/.test(lower))
|
|
1795
|
+
return false;
|
|
1796
|
+
return schemaLooksLikeSingleItem(route.response_schema);
|
|
1797
|
+
}
|
|
1798
|
+
function urlPathLooksListLike(contextUrl) {
|
|
1799
|
+
if (!contextUrl)
|
|
1800
|
+
return false;
|
|
1801
|
+
try {
|
|
1802
|
+
const pathname = new URL(contextUrl).pathname.toLowerCase();
|
|
1803
|
+
return /\/(?:search|basic-search|result-page|results?|discover|browse|categories?|q|listings|feed|catalog(?:ue)?)\b/.test(pathname);
|
|
1804
|
+
} catch {
|
|
1805
|
+
return false;
|
|
1806
|
+
}
|
|
1807
|
+
}
|
|
1808
|
+
function cardinalityMatches(intent, subject, opts) {
|
|
1809
|
+
const wantsMany = isListLikeIntent(intent) || urlPathLooksListLike(opts?.contextUrl);
|
|
1810
|
+
if (!wantsMany)
|
|
1811
|
+
return true;
|
|
1812
|
+
switch (subject.kind) {
|
|
1813
|
+
case "value":
|
|
1814
|
+
return !valueLooksLikeSingleItem(subject.value);
|
|
1815
|
+
case "schema":
|
|
1816
|
+
return !schemaLooksLikeSingleItem(subject.schema);
|
|
1817
|
+
case "route":
|
|
1818
|
+
return !routeLooksLikeSingleItem(subject.route);
|
|
1819
|
+
}
|
|
1820
|
+
}
|
|
1821
|
+
function resolutionCardinalityMatches(intent, data) {
|
|
1822
|
+
return cardinalityMatches(intent, { kind: "value", value: data });
|
|
1823
|
+
}
|
|
1824
|
+
var LIST_INTENT_RE, ITEM_SCHEMA_TYPES, COLLECTION_KEYS;
|
|
1825
|
+
var init_cardinality = __esm(() => {
|
|
1826
|
+
LIST_INTENT_RE = /\b(search|find|lookup|browse|discover|list(?:ings?)?|feed|catalog(?:ue)?)\b/i;
|
|
1827
|
+
ITEM_SCHEMA_TYPES = new Set([
|
|
1828
|
+
"product",
|
|
1829
|
+
"offer",
|
|
1830
|
+
"article",
|
|
1831
|
+
"newsarticle",
|
|
1832
|
+
"blogposting",
|
|
1833
|
+
"recipe",
|
|
1834
|
+
"event",
|
|
1835
|
+
"place",
|
|
1836
|
+
"localbusiness",
|
|
1837
|
+
"jobposting",
|
|
1838
|
+
"book",
|
|
1839
|
+
"movie",
|
|
1840
|
+
"creativework",
|
|
1841
|
+
"person",
|
|
1842
|
+
"organization"
|
|
1843
|
+
]);
|
|
1844
|
+
COLLECTION_KEYS = [
|
|
1845
|
+
"itemListElement",
|
|
1846
|
+
"items",
|
|
1847
|
+
"results",
|
|
1848
|
+
"products",
|
|
1849
|
+
"listings",
|
|
1850
|
+
"data",
|
|
1851
|
+
"edges",
|
|
1852
|
+
"hits",
|
|
1853
|
+
"records",
|
|
1854
|
+
"entries",
|
|
1855
|
+
"rows",
|
|
1856
|
+
"nodes"
|
|
1857
|
+
];
|
|
1858
|
+
});
|
|
1859
|
+
|
|
1733
1860
|
// .tmp-runtime-src/values/cache-key.ts
|
|
1734
1861
|
function requestCacheKey(parts) {
|
|
1735
1862
|
const method = (parts.method ?? "GET").toUpperCase();
|
|
@@ -2223,7 +2350,7 @@ var init_telemetry = __esm(() => {
|
|
|
2223
2350
|
});
|
|
2224
2351
|
|
|
2225
2352
|
// .tmp-runtime-src/build-info.generated.ts
|
|
2226
|
-
var BUILD_RELEASE_VERSION = "9.6.
|
|
2353
|
+
var BUILD_RELEASE_VERSION = "9.6.2", BUILD_GIT_SHA = "d2d14a6629a0", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiOS42LjIiLCJnaXRfc2hhIjoiZDJkMTRhNjYyOWEwIiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUBkMmQxNGE2NjI5YTAiLCJpc3N1ZWRfYXQiOiIyMDI2LTA2LTE4VDA0OjE3OjAzLjg1M1oifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "OefvyW2iLVVPQ-0HMg9Mz-bciCmP8LE5u7fVEss_98E", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai", BUILD_DEFAULT_PROFILE = "";
|
|
2227
2354
|
|
|
2228
2355
|
// .tmp-runtime-src/version.ts
|
|
2229
2356
|
import { createHash as createHash7 } from "crypto";
|
|
@@ -45920,6 +46047,130 @@ var init_header_classify = __esm(() => {
|
|
|
45920
46047
|
SENSITIVE_HEADER_PATTERN = /token|key|secret|credential|password|session/i;
|
|
45921
46048
|
});
|
|
45922
46049
|
|
|
46050
|
+
// .tmp-runtime-src/values/cardinality.ts
|
|
46051
|
+
function isListLikeIntent2(intent) {
|
|
46052
|
+
return LIST_INTENT_RE2.test(intent ?? "");
|
|
46053
|
+
}
|
|
46054
|
+
function valueLooksLikeSingleItem2(value) {
|
|
46055
|
+
if (value == null || Array.isArray(value) || typeof value !== "object")
|
|
46056
|
+
return false;
|
|
46057
|
+
const obj = value;
|
|
46058
|
+
for (const key of COLLECTION_KEYS2) {
|
|
46059
|
+
if (Array.isArray(obj[key]))
|
|
46060
|
+
return false;
|
|
46061
|
+
}
|
|
46062
|
+
for (const v of Object.values(obj)) {
|
|
46063
|
+
if (Array.isArray(v) && v.some((x) => x !== null && typeof x === "object"))
|
|
46064
|
+
return false;
|
|
46065
|
+
}
|
|
46066
|
+
const atType = typeof obj["@type"] === "string" ? obj["@type"].toLowerCase() : "";
|
|
46067
|
+
const isItemType = ITEM_SCHEMA_TYPES2.has(atType);
|
|
46068
|
+
const hasName = "name" in obj || "title" in obj || "headline" in obj;
|
|
46069
|
+
const hasPriceish = "offers" in obj || "price" in obj || "sku" in obj;
|
|
46070
|
+
return isItemType || hasName && hasPriceish;
|
|
46071
|
+
}
|
|
46072
|
+
function schemaLooksLikeSingleItem2(rs) {
|
|
46073
|
+
if (!rs || typeof rs !== "object")
|
|
46074
|
+
return false;
|
|
46075
|
+
const schema = rs;
|
|
46076
|
+
if (schema.type === "array")
|
|
46077
|
+
return false;
|
|
46078
|
+
const props = schema.properties ?? {};
|
|
46079
|
+
for (const key of COLLECTION_KEYS2) {
|
|
46080
|
+
if (key in props)
|
|
46081
|
+
return false;
|
|
46082
|
+
}
|
|
46083
|
+
for (const value of Object.values(props)) {
|
|
46084
|
+
if (value && typeof value === "object" && value.type === "array" && value.items?.type === "object") {
|
|
46085
|
+
return false;
|
|
46086
|
+
}
|
|
46087
|
+
}
|
|
46088
|
+
if (schema.type !== "object")
|
|
46089
|
+
return false;
|
|
46090
|
+
const hasType = "@type" in props;
|
|
46091
|
+
const hasName = "name" in props || "title" in props;
|
|
46092
|
+
const hasPriceish = "offers" in props || "price" in props || "sku" in props;
|
|
46093
|
+
return hasType || hasName && hasPriceish;
|
|
46094
|
+
}
|
|
46095
|
+
function routeLooksLikeSingleItem2(route) {
|
|
46096
|
+
const tmpl = route.url_template ?? "";
|
|
46097
|
+
let pathAndQuery = tmpl;
|
|
46098
|
+
try {
|
|
46099
|
+
const u = new URL(tmpl);
|
|
46100
|
+
pathAndQuery = `${u.pathname}${u.search}`;
|
|
46101
|
+
} catch {}
|
|
46102
|
+
const lower = pathAndQuery.toLowerCase();
|
|
46103
|
+
if (/\/(?:search|q|categories?|browse|results?|listings|explore|discover|feed|catalog(?:ue)?|collections?|shop|all)\b/.test(lower) || /[?&](?:q|query|keyword|keywords|search|term|category|cat|page)=/.test(lower)) {
|
|
46104
|
+
return false;
|
|
46105
|
+
}
|
|
46106
|
+
if (/\/(?:p|product|products|item|items|listing|detail|details|dp|pd|sku)\/[^/]+/.test(lower))
|
|
46107
|
+
return true;
|
|
46108
|
+
const lastSeg = lower.split("?")[0].replace(/\/+$/, "").split("/").pop() ?? "";
|
|
46109
|
+
if (/-\d{3,}$/.test(lastSeg) || /^\d{3,}$/.test(lastSeg))
|
|
46110
|
+
return true;
|
|
46111
|
+
if (/\{[^}]+\}/.test(lower))
|
|
46112
|
+
return false;
|
|
46113
|
+
return schemaLooksLikeSingleItem2(route.response_schema);
|
|
46114
|
+
}
|
|
46115
|
+
function urlPathLooksListLike2(contextUrl) {
|
|
46116
|
+
if (!contextUrl)
|
|
46117
|
+
return false;
|
|
46118
|
+
try {
|
|
46119
|
+
const pathname = new URL(contextUrl).pathname.toLowerCase();
|
|
46120
|
+
return /\/(?:search|basic-search|result-page|results?|discover|browse|categories?|q|listings|feed|catalog(?:ue)?)\b/.test(pathname);
|
|
46121
|
+
} catch {
|
|
46122
|
+
return false;
|
|
46123
|
+
}
|
|
46124
|
+
}
|
|
46125
|
+
function cardinalityMatches2(intent, subject, opts) {
|
|
46126
|
+
const wantsMany = isListLikeIntent2(intent) || urlPathLooksListLike2(opts?.contextUrl);
|
|
46127
|
+
if (!wantsMany)
|
|
46128
|
+
return true;
|
|
46129
|
+
switch (subject.kind) {
|
|
46130
|
+
case "value":
|
|
46131
|
+
return !valueLooksLikeSingleItem2(subject.value);
|
|
46132
|
+
case "schema":
|
|
46133
|
+
return !schemaLooksLikeSingleItem2(subject.schema);
|
|
46134
|
+
case "route":
|
|
46135
|
+
return !routeLooksLikeSingleItem2(subject.route);
|
|
46136
|
+
}
|
|
46137
|
+
}
|
|
46138
|
+
var LIST_INTENT_RE2, ITEM_SCHEMA_TYPES2, COLLECTION_KEYS2;
|
|
46139
|
+
var init_cardinality2 = __esm(() => {
|
|
46140
|
+
LIST_INTENT_RE2 = /\b(search|find|lookup|browse|discover|list(?:ings?)?|feed|catalog(?:ue)?)\b/i;
|
|
46141
|
+
ITEM_SCHEMA_TYPES2 = new Set([
|
|
46142
|
+
"product",
|
|
46143
|
+
"offer",
|
|
46144
|
+
"article",
|
|
46145
|
+
"newsarticle",
|
|
46146
|
+
"blogposting",
|
|
46147
|
+
"recipe",
|
|
46148
|
+
"event",
|
|
46149
|
+
"place",
|
|
46150
|
+
"localbusiness",
|
|
46151
|
+
"jobposting",
|
|
46152
|
+
"book",
|
|
46153
|
+
"movie",
|
|
46154
|
+
"creativework",
|
|
46155
|
+
"person",
|
|
46156
|
+
"organization"
|
|
46157
|
+
]);
|
|
46158
|
+
COLLECTION_KEYS2 = [
|
|
46159
|
+
"itemListElement",
|
|
46160
|
+
"items",
|
|
46161
|
+
"results",
|
|
46162
|
+
"products",
|
|
46163
|
+
"listings",
|
|
46164
|
+
"data",
|
|
46165
|
+
"edges",
|
|
46166
|
+
"hits",
|
|
46167
|
+
"records",
|
|
46168
|
+
"entries",
|
|
46169
|
+
"rows",
|
|
46170
|
+
"nodes"
|
|
46171
|
+
];
|
|
46172
|
+
});
|
|
46173
|
+
|
|
45923
46174
|
// node_modules/.bun/nanoid@5.1.11/node_modules/nanoid/url-alphabet/index.js
|
|
45924
46175
|
var urlAlphabet = "useandom-26T198340PX75pxJACKVERYMINDBUSHWOLF_GQZbfghjklqvwyzrict";
|
|
45925
46176
|
|
|
@@ -117228,7 +117479,7 @@ __export(exports_extraction, {
|
|
|
117228
117479
|
cleanDOM: () => cleanDOM,
|
|
117229
117480
|
buildStructuredDataHeader: () => buildStructuredDataHeader
|
|
117230
117481
|
});
|
|
117231
|
-
function extractHtmlMetadataFallback(html3) {
|
|
117482
|
+
function extractHtmlMetadataFallback(html3, intent) {
|
|
117232
117483
|
if (!html3 || html3.length < 100)
|
|
117233
117484
|
return null;
|
|
117234
117485
|
try {
|
|
@@ -117260,8 +117511,9 @@ function extractHtmlMetadataFallback(html3) {
|
|
|
117260
117511
|
jsonLdBlocks.push(parsed);
|
|
117261
117512
|
} catch {}
|
|
117262
117513
|
});
|
|
117263
|
-
|
|
117264
|
-
|
|
117514
|
+
const usableJsonLd = isListLikeIntent2(intent) ? jsonLdBlocks.filter((b) => !valueLooksLikeSingleItem2(b)) : jsonLdBlocks;
|
|
117515
|
+
if (usableJsonLd.length > 0)
|
|
117516
|
+
out.json_ld = usableJsonLd;
|
|
117265
117517
|
const headings = [];
|
|
117266
117518
|
$2("h1, h2").each((_, el) => {
|
|
117267
117519
|
const text3 = cleanText($2(el).text());
|
|
@@ -119584,6 +119836,16 @@ function scoreSiteMetaJsonLdDemotion(structure, intent) {
|
|
|
119584
119836
|
return 0;
|
|
119585
119837
|
return -200;
|
|
119586
119838
|
}
|
|
119839
|
+
function isSingleItemStructureForList(structure, intent) {
|
|
119840
|
+
if (!TINY_RESULT_LIST_INTENT.test(intent.toLowerCase()))
|
|
119841
|
+
return false;
|
|
119842
|
+
if (structure.type === "repeated-elements")
|
|
119843
|
+
return false;
|
|
119844
|
+
return valueLooksLikeSingleItem2(structure.data);
|
|
119845
|
+
}
|
|
119846
|
+
function scoreSingleItemListMismatch(structure, intent) {
|
|
119847
|
+
return isSingleItemStructureForList(structure, intent) ? -200 : 0;
|
|
119848
|
+
}
|
|
119587
119849
|
function looksLikeTinyContentReadResult(data2, intent) {
|
|
119588
119850
|
if (data2 == null)
|
|
119589
119851
|
return { tiny: false, bytes: 0, stringLeafChars: 0 };
|
|
@@ -119737,9 +119999,9 @@ function extractFromDOM(html3, intent, contextUrl) {
|
|
|
119737
119999
|
const articleStructures = extractArticleBodySpecial(html3.length > 600000 ? html3.slice(0, 600000) : html3, intent);
|
|
119738
120000
|
const allStructures = [...flashStructures, ...githubStructures, ...repeatedPersonStructures, ...packageSearchStructures, ...xProfileStructures, ...postStructures, ...repeatedArticleStructures, ...trendStructures, ...definitionStructures, ...packageDetailStructures, ...arxivAbstractStructures, ...courseStructures, ...articleStructures, ...spaStructures, ...parseStructured(cleaned)].map((structure) => normalizeStructureForIntent(structure, intent));
|
|
119739
120001
|
const isListIntent = TINY_RESULT_LIST_INTENT.test(intent.toLowerCase());
|
|
119740
|
-
const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray(s.data)) && !looksLikeConfigShape(s.data) && !looksLikeEmptyContainer(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd(s.data)));
|
|
120002
|
+
const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray(s.data)) && !looksLikeConfigShape(s.data) && !looksLikeEmptyContainer(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd(s.data)) && !isSingleItemStructureForList(s, intent));
|
|
119741
120003
|
if (structures.length === 0) {
|
|
119742
|
-
const fallback2 = extractHtmlMetadataFallback(html3);
|
|
120004
|
+
const fallback2 = extractHtmlMetadataFallback(html3, intent);
|
|
119743
120005
|
if (fallback2) {
|
|
119744
120006
|
return _finalize({ data: fallback2, extraction_method: "html_metadata_fallback", confidence: 0.4 });
|
|
119745
120007
|
}
|
|
@@ -119748,7 +120010,7 @@ function extractFromDOM(html3, intent, contextUrl) {
|
|
|
119748
120010
|
const intentWords = intent.toLowerCase().split(/\s+/).filter(Boolean);
|
|
119749
120011
|
const scored = structures.map((s) => ({
|
|
119750
120012
|
structure: s,
|
|
119751
|
-
score: scoreRelevance(s, intentWords) + scoreSemanticFit(s, intent) + scoreSparseLinkList(s) + scoreFieldRichness(s) + scoreConfigShapeDemotion(s) + scoreDegenerateRowDemotion(s) + scoreDuplicateRowDemotion(s) + scoreEmptyContainerDemotion(s) + scoreSiteMetaJsonLdDemotion(s, intent) + scoreTableIntentOverlapDemotion(s, intent, contextUrl, structures)
|
|
120013
|
+
score: scoreRelevance(s, intentWords) + scoreSemanticFit(s, intent) + scoreSparseLinkList(s) + scoreFieldRichness(s) + scoreConfigShapeDemotion(s) + scoreDegenerateRowDemotion(s) + scoreDuplicateRowDemotion(s) + scoreEmptyContainerDemotion(s) + scoreSiteMetaJsonLdDemotion(s, intent) + scoreSingleItemListMismatch(s, intent) + scoreTableIntentOverlapDemotion(s, intent, contextUrl, structures)
|
|
119752
120014
|
}));
|
|
119753
120015
|
scored.sort((a, b) => b.score - a.score);
|
|
119754
120016
|
const passing = scored.filter((candidate) => assessIntentResult(candidate.structure.data, intent).verdict === "pass");
|
|
@@ -120364,6 +120626,7 @@ function sanitizeExtractionToJson(data2, depth = 0) {
|
|
|
120364
120626
|
var STRIP_TAGS, CHROME_TAGS, AD_PATTERNS, HIDDEN_ATTRS, CONTENT_SELECTORS, CARD_SELECTORS, CONFIG_TOP_LEVEL_KEYS, CONFIG_CHUNK_VALUE_KEYS, INTENT_OVERLAP_STOPWORDS, SITE_META_LD_TYPES, TINY_RESULT_LIST_INTENT, TINY_RESULT_DETAIL_INTENT, STRUCTURED_DATA_HIGHLIGHT_TYPES, HTML_TAG_RE, TABLE_RE;
|
|
120365
120627
|
var init_extraction = __esm(() => {
|
|
120366
120628
|
init_esm11();
|
|
120629
|
+
init_cardinality2();
|
|
120367
120630
|
STRIP_TAGS = new Set(["script", "style", "noscript", "svg", "iframe"]);
|
|
120368
120631
|
CHROME_TAGS = new Set(["nav", "footer", "header"]);
|
|
120369
120632
|
AD_PATTERNS = /\b(ad|ads|advert|advertisement|tracking|tracker|cookie-banner|cookie-consent|cookie-notice|popup|modal-overlay|gdpr|consent|banner-promo)\b/i;
|
|
@@ -129889,7 +130152,8 @@ function selectBestEndpoint(endpoints, intent, skillDomain, contextUrl) {
|
|
|
129889
130152
|
const ranked = rankEndpoints(endpoints, intent, skillDomain, contextUrl);
|
|
129890
130153
|
if (ranked.length === 0)
|
|
129891
130154
|
throw new Error("All endpoints are disabled");
|
|
129892
|
-
|
|
130155
|
+
const preferred = ranked.find((r) => cardinalityMatches2(intent, { kind: "route", route: r.endpoint }, { contextUrl }));
|
|
130156
|
+
return (preferred ?? ranked[0]).endpoint;
|
|
129893
130157
|
}
|
|
129894
130158
|
function isHtml2(text3) {
|
|
129895
130159
|
const trimmed = text3.trimStart().slice(0, 200).toLowerCase();
|
|
@@ -129936,6 +130200,7 @@ var init_execution = __esm(async () => {
|
|
|
129936
130200
|
init_reveng_server_first();
|
|
129937
130201
|
init_header_classify();
|
|
129938
130202
|
init_storage_hole_bindings();
|
|
130203
|
+
init_cardinality2();
|
|
129939
130204
|
init_sealed_blob_store();
|
|
129940
130205
|
init_signer();
|
|
129941
130206
|
init_bundle_scanner();
|
|
@@ -132155,7 +132420,10 @@ function pickWalkTarget(requestedUrl, ranked, minScore = 0.8) {
|
|
|
132155
132420
|
return false;
|
|
132156
132421
|
}
|
|
132157
132422
|
};
|
|
132158
|
-
|
|
132423
|
+
const reqReg = registrableHost(requestedUrl);
|
|
132424
|
+
const sameDomain = reqReg ? eligible.filter((c) => registrableHost(c.url) === reqReg) : [];
|
|
132425
|
+
const pool2 = sameDomain.length > 0 ? sameDomain : eligible;
|
|
132426
|
+
return pool2.find((c) => hasPath(c.url)) ?? pool2[0];
|
|
132159
132427
|
}
|
|
132160
132428
|
function artifactResultWithShortlist(artifact, skillId, triggerUrl) {
|
|
132161
132429
|
const ep = artifact.endpoint;
|
|
@@ -132625,6 +132893,9 @@ function isResolveUsableEndpointForIntent(endpoint, intent, contextUrl) {
|
|
|
132625
132893
|
if (isFeedTimelineIntent(intent, contextUrl) && endpointHasNegativeTag(endpoint, "helper")) {
|
|
132626
132894
|
return false;
|
|
132627
132895
|
}
|
|
132896
|
+
if (!cardinalityMatches2(intent, { kind: "route", route: endpoint }, { contextUrl })) {
|
|
132897
|
+
return false;
|
|
132898
|
+
}
|
|
132628
132899
|
return true;
|
|
132629
132900
|
}
|
|
132630
132901
|
function normalizeRouteContext(url) {
|
|
@@ -132767,7 +133038,7 @@ function withContextReplayEndpoint(skill, _intent, _contextUrl) {
|
|
|
132767
133038
|
return skill;
|
|
132768
133039
|
}
|
|
132769
133040
|
function isSearchLikeIntent(intent, contextUrl) {
|
|
132770
|
-
if (
|
|
133041
|
+
if (isListLikeIntent2(intent))
|
|
132771
133042
|
return true;
|
|
132772
133043
|
try {
|
|
132773
133044
|
const pathname = contextUrl ? new URL(contextUrl).pathname.toLowerCase() : "";
|
|
@@ -133639,7 +133910,20 @@ function inferPreferredEntityTokens(intent) {
|
|
|
133639
133910
|
return [];
|
|
133640
133911
|
}
|
|
133641
133912
|
function isAcceptableIntentResult(result, intent) {
|
|
133642
|
-
|
|
133913
|
+
if (assessIntentResult(result, intent).verdict === "fail")
|
|
133914
|
+
return false;
|
|
133915
|
+
if (!cardinalityMatches2(intent, { kind: "value", value: unwrapResultPayload(result) }))
|
|
133916
|
+
return false;
|
|
133917
|
+
return true;
|
|
133918
|
+
}
|
|
133919
|
+
function unwrapResultPayload(result) {
|
|
133920
|
+
if (result == null || typeof result !== "object" || Array.isArray(result))
|
|
133921
|
+
return result;
|
|
133922
|
+
const rec = { ...result };
|
|
133923
|
+
for (const k of ["available_endpoints", "available_operations", "shortlist_for_judgment", "workflow_dag", "walked_from", "exa_candidates", "run_plan"]) {
|
|
133924
|
+
delete rec[k];
|
|
133925
|
+
}
|
|
133926
|
+
return rec;
|
|
133643
133927
|
}
|
|
133644
133928
|
function candidateMatchesPreferredEntity(candidate, preferredTokens) {
|
|
133645
133929
|
if (preferredTokens.length === 0)
|
|
@@ -137481,6 +137765,8 @@ var init_orchestrator = __esm(async () => {
|
|
|
137481
137765
|
init_search_forms();
|
|
137482
137766
|
init_ddg_search();
|
|
137483
137767
|
init_cached_resolution2();
|
|
137768
|
+
init_cardinality2();
|
|
137769
|
+
init_cardinality2();
|
|
137484
137770
|
init_principal_scope();
|
|
137485
137771
|
init_yield_safety();
|
|
137486
137772
|
init_trace_store();
|
|
@@ -143978,9 +144264,14 @@ async function registerRoutes(app) {
|
|
|
143978
144264
|
recovered = true;
|
|
143979
144265
|
} else if (errResult.available_endpoints?.length === 1) {
|
|
143980
144266
|
const only = errResult.available_endpoints[0].endpoint_id;
|
|
143981
|
-
|
|
143982
|
-
|
|
143983
|
-
|
|
144267
|
+
const onlyEp = (skill.endpoints ?? []).find((e) => e.endpoint_id === only);
|
|
144268
|
+
if (!onlyEp || cardinalityMatches2(intent, { kind: "route", route: onlyEp }, { contextUrl: context_url })) {
|
|
144269
|
+
console.log(`[exec] D7 single-endpoint skill: rewriting endpoint_id ${want} → ${only}`);
|
|
144270
|
+
execParams.endpoint_id = only;
|
|
144271
|
+
recovered = true;
|
|
144272
|
+
} else {
|
|
144273
|
+
console.log(`[exec] D7 single-endpoint skill: only endpoint ${only} is a single-item route for a list intent — not forcing (cardinality gate)`);
|
|
144274
|
+
}
|
|
143984
144275
|
}
|
|
143985
144276
|
}
|
|
143986
144277
|
if (!recovered && (skill.endpoints?.length ?? 0) >= 2 && intent) {
|
|
@@ -145208,6 +145499,7 @@ var init_routes = __esm(async () => {
|
|
|
145208
145499
|
init_client3();
|
|
145209
145500
|
init_reveng_server_first();
|
|
145210
145501
|
init_header_classify();
|
|
145502
|
+
init_cardinality2();
|
|
145211
145503
|
init_capture_spool();
|
|
145212
145504
|
init_nanoid();
|
|
145213
145505
|
init_marketplace();
|
|
@@ -150601,7 +150893,7 @@ __export(exports_extraction2, {
|
|
|
150601
150893
|
cleanDOM: () => cleanDOM2,
|
|
150602
150894
|
buildStructuredDataHeader: () => buildStructuredDataHeader2
|
|
150603
150895
|
});
|
|
150604
|
-
function extractHtmlMetadataFallback2(html3) {
|
|
150896
|
+
function extractHtmlMetadataFallback2(html3, intent) {
|
|
150605
150897
|
if (!html3 || html3.length < 100)
|
|
150606
150898
|
return null;
|
|
150607
150899
|
try {
|
|
@@ -150633,8 +150925,9 @@ function extractHtmlMetadataFallback2(html3) {
|
|
|
150633
150925
|
jsonLdBlocks.push(parsed);
|
|
150634
150926
|
} catch {}
|
|
150635
150927
|
});
|
|
150636
|
-
|
|
150637
|
-
|
|
150928
|
+
const usableJsonLd = isListLikeIntent2(intent) ? jsonLdBlocks.filter((b) => !valueLooksLikeSingleItem2(b)) : jsonLdBlocks;
|
|
150929
|
+
if (usableJsonLd.length > 0)
|
|
150930
|
+
out.json_ld = usableJsonLd;
|
|
150638
150931
|
const headings = [];
|
|
150639
150932
|
$2("h1, h2").each((_, el) => {
|
|
150640
150933
|
const text3 = cleanText2($2(el).text());
|
|
@@ -152957,6 +153250,16 @@ function scoreSiteMetaJsonLdDemotion2(structure, intent) {
|
|
|
152957
153250
|
return 0;
|
|
152958
153251
|
return -200;
|
|
152959
153252
|
}
|
|
153253
|
+
function isSingleItemStructureForList2(structure, intent) {
|
|
153254
|
+
if (!TINY_RESULT_LIST_INTENT2.test(intent.toLowerCase()))
|
|
153255
|
+
return false;
|
|
153256
|
+
if (structure.type === "repeated-elements")
|
|
153257
|
+
return false;
|
|
153258
|
+
return valueLooksLikeSingleItem2(structure.data);
|
|
153259
|
+
}
|
|
153260
|
+
function scoreSingleItemListMismatch2(structure, intent) {
|
|
153261
|
+
return isSingleItemStructureForList2(structure, intent) ? -200 : 0;
|
|
153262
|
+
}
|
|
152960
153263
|
function looksLikeTinyContentReadResult2(data2, intent) {
|
|
152961
153264
|
if (data2 == null)
|
|
152962
153265
|
return { tiny: false, bytes: 0, stringLeafChars: 0 };
|
|
@@ -153110,9 +153413,9 @@ function extractFromDOM2(html3, intent, contextUrl) {
|
|
|
153110
153413
|
const articleStructures = extractArticleBodySpecial2(html3.length > 600000 ? html3.slice(0, 600000) : html3, intent);
|
|
153111
153414
|
const allStructures = [...flashStructures, ...githubStructures, ...repeatedPersonStructures, ...packageSearchStructures, ...xProfileStructures, ...postStructures, ...repeatedArticleStructures, ...trendStructures, ...definitionStructures, ...packageDetailStructures, ...arxivAbstractStructures, ...courseStructures, ...articleStructures, ...spaStructures, ...parseStructured2(cleaned)].map((structure) => normalizeStructureForIntent2(structure, intent));
|
|
153112
153415
|
const isListIntent = TINY_RESULT_LIST_INTENT2.test(intent.toLowerCase());
|
|
153113
|
-
const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray2(s.data)) && !looksLikeConfigShape2(s.data) && !looksLikeEmptyContainer2(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd2(s.data)));
|
|
153416
|
+
const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray2(s.data)) && !looksLikeConfigShape2(s.data) && !looksLikeEmptyContainer2(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd2(s.data)) && !isSingleItemStructureForList2(s, intent));
|
|
153114
153417
|
if (structures.length === 0) {
|
|
153115
|
-
const fallback2 = extractHtmlMetadataFallback2(html3);
|
|
153418
|
+
const fallback2 = extractHtmlMetadataFallback2(html3, intent);
|
|
153116
153419
|
if (fallback2) {
|
|
153117
153420
|
return _finalize({ data: fallback2, extraction_method: "html_metadata_fallback", confidence: 0.4 });
|
|
153118
153421
|
}
|
|
@@ -153121,7 +153424,7 @@ function extractFromDOM2(html3, intent, contextUrl) {
|
|
|
153121
153424
|
const intentWords = intent.toLowerCase().split(/\s+/).filter(Boolean);
|
|
153122
153425
|
const scored = structures.map((s) => ({
|
|
153123
153426
|
structure: s,
|
|
153124
|
-
score: scoreRelevance2(s, intentWords) + scoreSemanticFit2(s, intent) + scoreSparseLinkList2(s) + scoreFieldRichness2(s) + scoreConfigShapeDemotion2(s) + scoreDegenerateRowDemotion2(s) + scoreDuplicateRowDemotion2(s) + scoreEmptyContainerDemotion2(s) + scoreSiteMetaJsonLdDemotion2(s, intent) + scoreTableIntentOverlapDemotion2(s, intent, contextUrl, structures)
|
|
153427
|
+
score: scoreRelevance2(s, intentWords) + scoreSemanticFit2(s, intent) + scoreSparseLinkList2(s) + scoreFieldRichness2(s) + scoreConfigShapeDemotion2(s) + scoreDegenerateRowDemotion2(s) + scoreDuplicateRowDemotion2(s) + scoreEmptyContainerDemotion2(s) + scoreSiteMetaJsonLdDemotion2(s, intent) + scoreSingleItemListMismatch2(s, intent) + scoreTableIntentOverlapDemotion2(s, intent, contextUrl, structures)
|
|
153125
153428
|
}));
|
|
153126
153429
|
scored.sort((a, b) => b.score - a.score);
|
|
153127
153430
|
const passing = scored.filter((candidate) => assessIntentResult(candidate.structure.data, intent).verdict === "pass");
|
|
@@ -153737,6 +154040,7 @@ function sanitizeExtractionToJson2(data2, depth = 0) {
|
|
|
153737
154040
|
var STRIP_TAGS2, CHROME_TAGS2, AD_PATTERNS2, HIDDEN_ATTRS2, CONTENT_SELECTORS2, CARD_SELECTORS2, CONFIG_TOP_LEVEL_KEYS2, CONFIG_CHUNK_VALUE_KEYS2, INTENT_OVERLAP_STOPWORDS2, SITE_META_LD_TYPES2, TINY_RESULT_LIST_INTENT2, TINY_RESULT_DETAIL_INTENT2, STRUCTURED_DATA_HIGHLIGHT_TYPES2, HTML_TAG_RE2, TABLE_RE2;
|
|
153738
154041
|
var init_extraction2 = __esm(() => {
|
|
153739
154042
|
init_esm11();
|
|
154043
|
+
init_cardinality2();
|
|
153740
154044
|
STRIP_TAGS2 = new Set(["script", "style", "noscript", "svg", "iframe"]);
|
|
153741
154045
|
CHROME_TAGS2 = new Set(["nav", "footer", "header"]);
|
|
153742
154046
|
AD_PATTERNS2 = /\b(ad|ads|advert|advertisement|tracking|tracker|cookie-banner|cookie-consent|cookie-notice|popup|modal-overlay|gdpr|consent|banner-promo)\b/i;
|
|
@@ -239361,7 +239665,7 @@ async function cmdResolve(flags) {
|
|
|
239361
239665
|
}
|
|
239362
239666
|
if (resolveCacheSafe(flags)) {
|
|
239363
239667
|
const cachedHit = peekResolution(resolveCacheKeyFor(flags, intent), resolveCacheTtlMs());
|
|
239364
|
-
if (cachedHit) {
|
|
239668
|
+
if (cachedHit && resolutionCardinalityMatches(intent, cachedHit.result ?? cachedHit.data)) {
|
|
239365
239669
|
const replay = markResolveCacheReplay(cachedHit);
|
|
239366
239670
|
const hostType2 = detectTelemetryHostType2();
|
|
239367
239671
|
if (process.env.UNBROWSE_LANDING_TOKEN || process.env.UNBROWSE_ATTRIBUTION_B64) {
|
|
@@ -239581,7 +239885,7 @@ async function cmdResolve(flags) {
|
|
|
239581
239885
|
if (skill?.skill_id && trace) {
|
|
239582
239886
|
result._feedback = `unbrowse feedback --skill ${skill.skill_id} --endpoint ${trace.endpoint_id || "?"} --rating <1-5>`;
|
|
239583
239887
|
}
|
|
239584
|
-
if (resolveCacheSafe(flags) && isResolveSuccessResult(result)) {
|
|
239888
|
+
if (resolveCacheSafe(flags) && isResolveSuccessResult(result) && resolutionCardinalityMatches(intent, result.result ?? result.data)) {
|
|
239585
239889
|
storeResolution(resolveCacheKeyFor(flags, intent), result, resolveCacheTtlMs());
|
|
239586
239890
|
}
|
|
239587
239891
|
output(result, !!flags.pretty);
|
|
@@ -239881,14 +240185,30 @@ async function cmdRun(args, flags, verb = "run") {
|
|
|
239881
240185
|
} else if (explicitEndpointId || !bestEndpoint || endpointIsSafeToAutoExecute(bestEndpoint)) {
|
|
239882
240186
|
runPlan.push({ step: "execute", mode: "direct_api", status: "started", endpoint_id: endpointToExecute });
|
|
239883
240187
|
const resolvedSource = typeof result.source === "string" ? result.source : undefined;
|
|
239884
|
-
|
|
239885
|
-
|
|
239886
|
-
|
|
239887
|
-
|
|
239888
|
-
|
|
239889
|
-
|
|
239890
|
-
|
|
239891
|
-
|
|
240188
|
+
const deferralResult = result;
|
|
240189
|
+
const executed = await withPendingNotice(api4("POST", `/v1/skills/${skillId}/execute`, execBody(endpointToExecute)), "Executing best endpoint...");
|
|
240190
|
+
if (!explicitEndpointId && isResolveSuccessResult(executed) && !resolutionCardinalityMatches(intent, executed.result ?? executed.data)) {
|
|
240191
|
+
runPlan[runPlan.length - 1] = {
|
|
240192
|
+
...runPlan[runPlan.length - 1],
|
|
240193
|
+
status: "skipped",
|
|
240194
|
+
reason: "cardinality_mismatch_single_item"
|
|
240195
|
+
};
|
|
240196
|
+
deferralResult.next_action = {
|
|
240197
|
+
title: "List intent returned a single item",
|
|
240198
|
+
command: `unbrowse execute --skill ${skillId} --endpoint ${endpointToExecute}`,
|
|
240199
|
+
why: "Auto-execute yielded a single item for a list/search intent; the page's listings are likely JS-rendered behind an internal API. Returning the route shortlist instead of one item."
|
|
240200
|
+
};
|
|
240201
|
+
result = deferralResult;
|
|
240202
|
+
} else {
|
|
240203
|
+
result = executed;
|
|
240204
|
+
if (resolvedSource && typeof result.source !== "string")
|
|
240205
|
+
result.source = resolvedSource;
|
|
240206
|
+
runPlan[runPlan.length - 1] = {
|
|
240207
|
+
...runPlan[runPlan.length - 1],
|
|
240208
|
+
status: isResolveSuccessResult(result) ? "complete" : "error",
|
|
240209
|
+
error: resolveResultError(result) ?? null
|
|
240210
|
+
};
|
|
240211
|
+
}
|
|
239892
240212
|
} else {
|
|
239893
240213
|
runPlan.push({
|
|
239894
240214
|
step: "execute",
|
|
@@ -241712,6 +242032,7 @@ var init_cli = __esm(async () => {
|
|
|
241712
242032
|
init_extract_auth_header();
|
|
241713
242033
|
init_kuri_proxy_bridge();
|
|
241714
242034
|
init_cached_resolution();
|
|
242035
|
+
init_cardinality();
|
|
241715
242036
|
init_issue();
|
|
241716
242037
|
init_client2();
|
|
241717
242038
|
init_impact_log();
|
package/runtime/mcp.js
CHANGED
|
@@ -36310,7 +36310,7 @@ var init_cached_resolution = __esm(() => {
|
|
|
36310
36310
|
});
|
|
36311
36311
|
|
|
36312
36312
|
// .tmp-runtime-src/build-info.generated.ts
|
|
36313
|
-
var BUILD_RELEASE_VERSION = "9.6.
|
|
36313
|
+
var BUILD_RELEASE_VERSION = "9.6.2", BUILD_GIT_SHA = "d2d14a6629a0", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiOS42LjIiLCJnaXRfc2hhIjoiZDJkMTRhNjYyOWEwIiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUBkMmQxNGE2NjI5YTAiLCJpc3N1ZWRfYXQiOiIyMDI2LTA2LTE4VDA0OjE3OjAzLjg1M1oifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "OefvyW2iLVVPQ-0HMg9Mz-bciCmP8LE5u7fVEss_98E", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai", BUILD_DEFAULT_PROFILE = "";
|
|
36314
36314
|
|
|
36315
36315
|
// .tmp-runtime-src/version.ts
|
|
36316
36316
|
import { createHash as createHash4 } from "crypto";
|
|
@@ -43177,6 +43177,133 @@ var init_header_classify = __esm(() => {
|
|
|
43177
43177
|
SENSITIVE_HEADER_PATTERN = /token|key|secret|credential|password|session/i;
|
|
43178
43178
|
});
|
|
43179
43179
|
|
|
43180
|
+
// .tmp-runtime-src/values/cardinality.ts
|
|
43181
|
+
function isListLikeIntent(intent) {
|
|
43182
|
+
return LIST_INTENT_RE.test(intent ?? "");
|
|
43183
|
+
}
|
|
43184
|
+
function valueLooksLikeSingleItem(value) {
|
|
43185
|
+
if (value == null || Array.isArray(value) || typeof value !== "object")
|
|
43186
|
+
return false;
|
|
43187
|
+
const obj = value;
|
|
43188
|
+
for (const key of COLLECTION_KEYS) {
|
|
43189
|
+
if (Array.isArray(obj[key]))
|
|
43190
|
+
return false;
|
|
43191
|
+
}
|
|
43192
|
+
for (const v of Object.values(obj)) {
|
|
43193
|
+
if (Array.isArray(v) && v.some((x) => x !== null && typeof x === "object"))
|
|
43194
|
+
return false;
|
|
43195
|
+
}
|
|
43196
|
+
const atType = typeof obj["@type"] === "string" ? obj["@type"].toLowerCase() : "";
|
|
43197
|
+
const isItemType = ITEM_SCHEMA_TYPES.has(atType);
|
|
43198
|
+
const hasName = "name" in obj || "title" in obj || "headline" in obj;
|
|
43199
|
+
const hasPriceish = "offers" in obj || "price" in obj || "sku" in obj;
|
|
43200
|
+
return isItemType || hasName && hasPriceish;
|
|
43201
|
+
}
|
|
43202
|
+
function schemaLooksLikeSingleItem(rs) {
|
|
43203
|
+
if (!rs || typeof rs !== "object")
|
|
43204
|
+
return false;
|
|
43205
|
+
const schema = rs;
|
|
43206
|
+
if (schema.type === "array")
|
|
43207
|
+
return false;
|
|
43208
|
+
const props = schema.properties ?? {};
|
|
43209
|
+
for (const key of COLLECTION_KEYS) {
|
|
43210
|
+
if (key in props)
|
|
43211
|
+
return false;
|
|
43212
|
+
}
|
|
43213
|
+
for (const value of Object.values(props)) {
|
|
43214
|
+
if (value && typeof value === "object" && value.type === "array" && value.items?.type === "object") {
|
|
43215
|
+
return false;
|
|
43216
|
+
}
|
|
43217
|
+
}
|
|
43218
|
+
if (schema.type !== "object")
|
|
43219
|
+
return false;
|
|
43220
|
+
const hasType = "@type" in props;
|
|
43221
|
+
const hasName = "name" in props || "title" in props;
|
|
43222
|
+
const hasPriceish = "offers" in props || "price" in props || "sku" in props;
|
|
43223
|
+
return hasType || hasName && hasPriceish;
|
|
43224
|
+
}
|
|
43225
|
+
function routeLooksLikeSingleItem(route) {
|
|
43226
|
+
const tmpl = route.url_template ?? "";
|
|
43227
|
+
let pathAndQuery = tmpl;
|
|
43228
|
+
try {
|
|
43229
|
+
const u = new URL(tmpl);
|
|
43230
|
+
pathAndQuery = `${u.pathname}${u.search}`;
|
|
43231
|
+
} catch {}
|
|
43232
|
+
const lower = pathAndQuery.toLowerCase();
|
|
43233
|
+
if (/\/(?:search|q|categories?|browse|results?|listings|explore|discover|feed|catalog(?:ue)?|collections?|shop|all)\b/.test(lower) || /[?&](?:q|query|keyword|keywords|search|term|category|cat|page)=/.test(lower)) {
|
|
43234
|
+
return false;
|
|
43235
|
+
}
|
|
43236
|
+
if (/\/(?:p|product|products|item|items|listing|detail|details|dp|pd|sku)\/[^/]+/.test(lower))
|
|
43237
|
+
return true;
|
|
43238
|
+
const lastSeg = lower.split("?")[0].replace(/\/+$/, "").split("/").pop() ?? "";
|
|
43239
|
+
if (/-\d{3,}$/.test(lastSeg) || /^\d{3,}$/.test(lastSeg))
|
|
43240
|
+
return true;
|
|
43241
|
+
if (/\{[^}]+\}/.test(lower))
|
|
43242
|
+
return false;
|
|
43243
|
+
return schemaLooksLikeSingleItem(route.response_schema);
|
|
43244
|
+
}
|
|
43245
|
+
function urlPathLooksListLike(contextUrl) {
|
|
43246
|
+
if (!contextUrl)
|
|
43247
|
+
return false;
|
|
43248
|
+
try {
|
|
43249
|
+
const pathname = new URL(contextUrl).pathname.toLowerCase();
|
|
43250
|
+
return /\/(?:search|basic-search|result-page|results?|discover|browse|categories?|q|listings|feed|catalog(?:ue)?)\b/.test(pathname);
|
|
43251
|
+
} catch {
|
|
43252
|
+
return false;
|
|
43253
|
+
}
|
|
43254
|
+
}
|
|
43255
|
+
function cardinalityMatches(intent, subject, opts) {
|
|
43256
|
+
const wantsMany = isListLikeIntent(intent) || urlPathLooksListLike(opts?.contextUrl);
|
|
43257
|
+
if (!wantsMany)
|
|
43258
|
+
return true;
|
|
43259
|
+
switch (subject.kind) {
|
|
43260
|
+
case "value":
|
|
43261
|
+
return !valueLooksLikeSingleItem(subject.value);
|
|
43262
|
+
case "schema":
|
|
43263
|
+
return !schemaLooksLikeSingleItem(subject.schema);
|
|
43264
|
+
case "route":
|
|
43265
|
+
return !routeLooksLikeSingleItem(subject.route);
|
|
43266
|
+
}
|
|
43267
|
+
}
|
|
43268
|
+
function resolutionCardinalityMatches(intent, data) {
|
|
43269
|
+
return cardinalityMatches(intent, { kind: "value", value: data });
|
|
43270
|
+
}
|
|
43271
|
+
var LIST_INTENT_RE, ITEM_SCHEMA_TYPES, COLLECTION_KEYS;
|
|
43272
|
+
var init_cardinality = __esm(() => {
|
|
43273
|
+
LIST_INTENT_RE = /\b(search|find|lookup|browse|discover|list(?:ings?)?|feed|catalog(?:ue)?)\b/i;
|
|
43274
|
+
ITEM_SCHEMA_TYPES = new Set([
|
|
43275
|
+
"product",
|
|
43276
|
+
"offer",
|
|
43277
|
+
"article",
|
|
43278
|
+
"newsarticle",
|
|
43279
|
+
"blogposting",
|
|
43280
|
+
"recipe",
|
|
43281
|
+
"event",
|
|
43282
|
+
"place",
|
|
43283
|
+
"localbusiness",
|
|
43284
|
+
"jobposting",
|
|
43285
|
+
"book",
|
|
43286
|
+
"movie",
|
|
43287
|
+
"creativework",
|
|
43288
|
+
"person",
|
|
43289
|
+
"organization"
|
|
43290
|
+
]);
|
|
43291
|
+
COLLECTION_KEYS = [
|
|
43292
|
+
"itemListElement",
|
|
43293
|
+
"items",
|
|
43294
|
+
"results",
|
|
43295
|
+
"products",
|
|
43296
|
+
"listings",
|
|
43297
|
+
"data",
|
|
43298
|
+
"edges",
|
|
43299
|
+
"hits",
|
|
43300
|
+
"records",
|
|
43301
|
+
"entries",
|
|
43302
|
+
"rows",
|
|
43303
|
+
"nodes"
|
|
43304
|
+
];
|
|
43305
|
+
});
|
|
43306
|
+
|
|
43180
43307
|
// node_modules/.bun/nanoid@5.1.11/node_modules/nanoid/url-alphabet/index.js
|
|
43181
43308
|
var urlAlphabet = "useandom-26T198340PX75pxJACKVERYMINDBUSHWOLF_GQZbfghjklqvwyzrict";
|
|
43182
43309
|
|
|
@@ -115440,7 +115567,7 @@ __export(exports_extraction, {
|
|
|
115440
115567
|
cleanDOM: () => cleanDOM,
|
|
115441
115568
|
buildStructuredDataHeader: () => buildStructuredDataHeader
|
|
115442
115569
|
});
|
|
115443
|
-
function extractHtmlMetadataFallback(html3) {
|
|
115570
|
+
function extractHtmlMetadataFallback(html3, intent) {
|
|
115444
115571
|
if (!html3 || html3.length < 100)
|
|
115445
115572
|
return null;
|
|
115446
115573
|
try {
|
|
@@ -115472,8 +115599,9 @@ function extractHtmlMetadataFallback(html3) {
|
|
|
115472
115599
|
jsonLdBlocks.push(parsed);
|
|
115473
115600
|
} catch {}
|
|
115474
115601
|
});
|
|
115475
|
-
|
|
115476
|
-
|
|
115602
|
+
const usableJsonLd = isListLikeIntent(intent) ? jsonLdBlocks.filter((b) => !valueLooksLikeSingleItem(b)) : jsonLdBlocks;
|
|
115603
|
+
if (usableJsonLd.length > 0)
|
|
115604
|
+
out.json_ld = usableJsonLd;
|
|
115477
115605
|
const headings = [];
|
|
115478
115606
|
$2("h1, h2").each((_, el) => {
|
|
115479
115607
|
const text3 = cleanText($2(el).text());
|
|
@@ -117796,6 +117924,16 @@ function scoreSiteMetaJsonLdDemotion(structure, intent) {
|
|
|
117796
117924
|
return 0;
|
|
117797
117925
|
return -200;
|
|
117798
117926
|
}
|
|
117927
|
+
function isSingleItemStructureForList(structure, intent) {
|
|
117928
|
+
if (!TINY_RESULT_LIST_INTENT.test(intent.toLowerCase()))
|
|
117929
|
+
return false;
|
|
117930
|
+
if (structure.type === "repeated-elements")
|
|
117931
|
+
return false;
|
|
117932
|
+
return valueLooksLikeSingleItem(structure.data);
|
|
117933
|
+
}
|
|
117934
|
+
function scoreSingleItemListMismatch(structure, intent) {
|
|
117935
|
+
return isSingleItemStructureForList(structure, intent) ? -200 : 0;
|
|
117936
|
+
}
|
|
117799
117937
|
function looksLikeTinyContentReadResult(data2, intent) {
|
|
117800
117938
|
if (data2 == null)
|
|
117801
117939
|
return { tiny: false, bytes: 0, stringLeafChars: 0 };
|
|
@@ -117949,9 +118087,9 @@ function extractFromDOM(html3, intent, contextUrl) {
|
|
|
117949
118087
|
const articleStructures = extractArticleBodySpecial(html3.length > 600000 ? html3.slice(0, 600000) : html3, intent);
|
|
117950
118088
|
const allStructures = [...flashStructures, ...githubStructures, ...repeatedPersonStructures, ...packageSearchStructures, ...xProfileStructures, ...postStructures, ...repeatedArticleStructures, ...trendStructures, ...definitionStructures, ...packageDetailStructures, ...arxivAbstractStructures, ...courseStructures, ...articleStructures, ...spaStructures, ...parseStructured(cleaned)].map((structure) => normalizeStructureForIntent(structure, intent));
|
|
117951
118089
|
const isListIntent = TINY_RESULT_LIST_INTENT.test(intent.toLowerCase());
|
|
117952
|
-
const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray(s.data)) && !looksLikeConfigShape(s.data) && !looksLikeEmptyContainer(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd(s.data)));
|
|
118090
|
+
const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray(s.data)) && !looksLikeConfigShape(s.data) && !looksLikeEmptyContainer(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd(s.data)) && !isSingleItemStructureForList(s, intent));
|
|
117953
118091
|
if (structures.length === 0) {
|
|
117954
|
-
const fallback2 = extractHtmlMetadataFallback(html3);
|
|
118092
|
+
const fallback2 = extractHtmlMetadataFallback(html3, intent);
|
|
117955
118093
|
if (fallback2) {
|
|
117956
118094
|
return _finalize({ data: fallback2, extraction_method: "html_metadata_fallback", confidence: 0.4 });
|
|
117957
118095
|
}
|
|
@@ -117960,7 +118098,7 @@ function extractFromDOM(html3, intent, contextUrl) {
|
|
|
117960
118098
|
const intentWords = intent.toLowerCase().split(/\s+/).filter(Boolean);
|
|
117961
118099
|
const scored = structures.map((s) => ({
|
|
117962
118100
|
structure: s,
|
|
117963
|
-
score: scoreRelevance(s, intentWords) + scoreSemanticFit(s, intent) + scoreSparseLinkList(s) + scoreFieldRichness(s) + scoreConfigShapeDemotion(s) + scoreDegenerateRowDemotion(s) + scoreDuplicateRowDemotion(s) + scoreEmptyContainerDemotion(s) + scoreSiteMetaJsonLdDemotion(s, intent) + scoreTableIntentOverlapDemotion(s, intent, contextUrl, structures)
|
|
118101
|
+
score: scoreRelevance(s, intentWords) + scoreSemanticFit(s, intent) + scoreSparseLinkList(s) + scoreFieldRichness(s) + scoreConfigShapeDemotion(s) + scoreDegenerateRowDemotion(s) + scoreDuplicateRowDemotion(s) + scoreEmptyContainerDemotion(s) + scoreSiteMetaJsonLdDemotion(s, intent) + scoreSingleItemListMismatch(s, intent) + scoreTableIntentOverlapDemotion(s, intent, contextUrl, structures)
|
|
117964
118102
|
}));
|
|
117965
118103
|
scored.sort((a, b) => b.score - a.score);
|
|
117966
118104
|
const passing = scored.filter((candidate) => assessIntentResult(candidate.structure.data, intent).verdict === "pass");
|
|
@@ -118576,6 +118714,7 @@ function sanitizeExtractionToJson(data2, depth = 0) {
|
|
|
118576
118714
|
var STRIP_TAGS, CHROME_TAGS, AD_PATTERNS, HIDDEN_ATTRS, CONTENT_SELECTORS, CARD_SELECTORS, CONFIG_TOP_LEVEL_KEYS, CONFIG_CHUNK_VALUE_KEYS, INTENT_OVERLAP_STOPWORDS, SITE_META_LD_TYPES, TINY_RESULT_LIST_INTENT, TINY_RESULT_DETAIL_INTENT, STRUCTURED_DATA_HIGHLIGHT_TYPES, HTML_TAG_RE, TABLE_RE;
|
|
118577
118715
|
var init_extraction = __esm(() => {
|
|
118578
118716
|
init_esm11();
|
|
118717
|
+
init_cardinality();
|
|
118579
118718
|
STRIP_TAGS = new Set(["script", "style", "noscript", "svg", "iframe"]);
|
|
118580
118719
|
CHROME_TAGS = new Set(["nav", "footer", "header"]);
|
|
118581
118720
|
AD_PATTERNS = /\b(ad|ads|advert|advertisement|tracking|tracker|cookie-banner|cookie-consent|cookie-notice|popup|modal-overlay|gdpr|consent|banner-promo)\b/i;
|
|
@@ -128277,7 +128416,8 @@ function selectBestEndpoint(endpoints, intent, skillDomain, contextUrl) {
|
|
|
128277
128416
|
const ranked = rankEndpoints(endpoints, intent, skillDomain, contextUrl);
|
|
128278
128417
|
if (ranked.length === 0)
|
|
128279
128418
|
throw new Error("All endpoints are disabled");
|
|
128280
|
-
|
|
128419
|
+
const preferred = ranked.find((r) => cardinalityMatches(intent, { kind: "route", route: r.endpoint }, { contextUrl }));
|
|
128420
|
+
return (preferred ?? ranked[0]).endpoint;
|
|
128281
128421
|
}
|
|
128282
128422
|
function isHtml2(text3) {
|
|
128283
128423
|
const trimmed = text3.trimStart().slice(0, 200).toLowerCase();
|
|
@@ -128324,6 +128464,7 @@ var init_execution = __esm(async () => {
|
|
|
128324
128464
|
init_reveng_server_first();
|
|
128325
128465
|
init_header_classify();
|
|
128326
128466
|
init_storage_hole_bindings();
|
|
128467
|
+
init_cardinality();
|
|
128327
128468
|
init_sealed_blob_store();
|
|
128328
128469
|
init_signer();
|
|
128329
128470
|
init_bundle_scanner();
|
|
@@ -130567,7 +130708,10 @@ function pickWalkTarget(requestedUrl, ranked, minScore = 0.8) {
|
|
|
130567
130708
|
return false;
|
|
130568
130709
|
}
|
|
130569
130710
|
};
|
|
130570
|
-
|
|
130711
|
+
const reqReg = registrableHost(requestedUrl);
|
|
130712
|
+
const sameDomain = reqReg ? eligible.filter((c) => registrableHost(c.url) === reqReg) : [];
|
|
130713
|
+
const pool2 = sameDomain.length > 0 ? sameDomain : eligible;
|
|
130714
|
+
return pool2.find((c) => hasPath(c.url)) ?? pool2[0];
|
|
130571
130715
|
}
|
|
130572
130716
|
function artifactResultWithShortlist(artifact, skillId, triggerUrl) {
|
|
130573
130717
|
const ep = artifact.endpoint;
|
|
@@ -131037,6 +131181,9 @@ function isResolveUsableEndpointForIntent(endpoint, intent, contextUrl) {
|
|
|
131037
131181
|
if (isFeedTimelineIntent(intent, contextUrl) && endpointHasNegativeTag(endpoint, "helper")) {
|
|
131038
131182
|
return false;
|
|
131039
131183
|
}
|
|
131184
|
+
if (!cardinalityMatches(intent, { kind: "route", route: endpoint }, { contextUrl })) {
|
|
131185
|
+
return false;
|
|
131186
|
+
}
|
|
131040
131187
|
return true;
|
|
131041
131188
|
}
|
|
131042
131189
|
function normalizeRouteContext(url) {
|
|
@@ -131179,7 +131326,7 @@ function withContextReplayEndpoint(skill, _intent, _contextUrl) {
|
|
|
131179
131326
|
return skill;
|
|
131180
131327
|
}
|
|
131181
131328
|
function isSearchLikeIntent(intent, contextUrl) {
|
|
131182
|
-
if (
|
|
131329
|
+
if (isListLikeIntent(intent))
|
|
131183
131330
|
return true;
|
|
131184
131331
|
try {
|
|
131185
131332
|
const pathname = contextUrl ? new URL(contextUrl).pathname.toLowerCase() : "";
|
|
@@ -132051,7 +132198,20 @@ function inferPreferredEntityTokens(intent) {
|
|
|
132051
132198
|
return [];
|
|
132052
132199
|
}
|
|
132053
132200
|
function isAcceptableIntentResult(result, intent) {
|
|
132054
|
-
|
|
132201
|
+
if (assessIntentResult(result, intent).verdict === "fail")
|
|
132202
|
+
return false;
|
|
132203
|
+
if (!cardinalityMatches(intent, { kind: "value", value: unwrapResultPayload(result) }))
|
|
132204
|
+
return false;
|
|
132205
|
+
return true;
|
|
132206
|
+
}
|
|
132207
|
+
function unwrapResultPayload(result) {
|
|
132208
|
+
if (result == null || typeof result !== "object" || Array.isArray(result))
|
|
132209
|
+
return result;
|
|
132210
|
+
const rec = { ...result };
|
|
132211
|
+
for (const k of ["available_endpoints", "available_operations", "shortlist_for_judgment", "workflow_dag", "walked_from", "exa_candidates", "run_plan"]) {
|
|
132212
|
+
delete rec[k];
|
|
132213
|
+
}
|
|
132214
|
+
return rec;
|
|
132055
132215
|
}
|
|
132056
132216
|
function candidateMatchesPreferredEntity(candidate, preferredTokens) {
|
|
132057
132217
|
if (preferredTokens.length === 0)
|
|
@@ -135893,6 +136053,8 @@ var init_orchestrator = __esm(async () => {
|
|
|
135893
136053
|
init_search_forms();
|
|
135894
136054
|
init_ddg_search();
|
|
135895
136055
|
init_cached_resolution();
|
|
136056
|
+
init_cardinality();
|
|
136057
|
+
init_cardinality();
|
|
135896
136058
|
init_principal_scope();
|
|
135897
136059
|
init_yield_safety();
|
|
135898
136060
|
init_trace_store();
|
|
@@ -142647,9 +142809,14 @@ async function registerRoutes(app) {
|
|
|
142647
142809
|
recovered = true;
|
|
142648
142810
|
} else if (errResult.available_endpoints?.length === 1) {
|
|
142649
142811
|
const only = errResult.available_endpoints[0].endpoint_id;
|
|
142650
|
-
|
|
142651
|
-
|
|
142652
|
-
|
|
142812
|
+
const onlyEp = (skill.endpoints ?? []).find((e) => e.endpoint_id === only);
|
|
142813
|
+
if (!onlyEp || cardinalityMatches(intent, { kind: "route", route: onlyEp }, { contextUrl: context_url })) {
|
|
142814
|
+
console.log(`[exec] D7 single-endpoint skill: rewriting endpoint_id ${want} → ${only}`);
|
|
142815
|
+
execParams.endpoint_id = only;
|
|
142816
|
+
recovered = true;
|
|
142817
|
+
} else {
|
|
142818
|
+
console.log(`[exec] D7 single-endpoint skill: only endpoint ${only} is a single-item route for a list intent — not forcing (cardinality gate)`);
|
|
142819
|
+
}
|
|
142653
142820
|
}
|
|
142654
142821
|
}
|
|
142655
142822
|
if (!recovered && (skill.endpoints?.length ?? 0) >= 2 && intent) {
|
|
@@ -143877,6 +144044,7 @@ var init_routes = __esm(async () => {
|
|
|
143877
144044
|
init_client();
|
|
143878
144045
|
init_reveng_server_first();
|
|
143879
144046
|
init_header_classify();
|
|
144047
|
+
init_cardinality();
|
|
143880
144048
|
init_capture_spool();
|
|
143881
144049
|
init_nanoid();
|
|
143882
144050
|
init_marketplace();
|
|
@@ -232579,7 +232747,7 @@ async function cmdResolve(flags) {
|
|
|
232579
232747
|
}
|
|
232580
232748
|
if (resolveCacheSafe(flags)) {
|
|
232581
232749
|
const cachedHit = peekResolution(resolveCacheKeyFor(flags, intent), resolveCacheTtlMs());
|
|
232582
|
-
if (cachedHit) {
|
|
232750
|
+
if (cachedHit && resolutionCardinalityMatches(intent, cachedHit.result ?? cachedHit.data)) {
|
|
232583
232751
|
const replay = markResolveCacheReplay(cachedHit);
|
|
232584
232752
|
const hostType2 = detectTelemetryHostType();
|
|
232585
232753
|
if (process.env.UNBROWSE_LANDING_TOKEN || process.env.UNBROWSE_ATTRIBUTION_B64) {
|
|
@@ -232799,7 +232967,7 @@ async function cmdResolve(flags) {
|
|
|
232799
232967
|
if (skill?.skill_id && trace) {
|
|
232800
232968
|
result._feedback = `unbrowse feedback --skill ${skill.skill_id} --endpoint ${trace.endpoint_id || "?"} --rating <1-5>`;
|
|
232801
232969
|
}
|
|
232802
|
-
if (resolveCacheSafe(flags) && isResolveSuccessResult(result)) {
|
|
232970
|
+
if (resolveCacheSafe(flags) && isResolveSuccessResult(result) && resolutionCardinalityMatches(intent, result.result ?? result.data)) {
|
|
232803
232971
|
storeResolution(resolveCacheKeyFor(flags, intent), result, resolveCacheTtlMs());
|
|
232804
232972
|
}
|
|
232805
232973
|
output(result, !!flags.pretty);
|
|
@@ -233085,14 +233253,30 @@ async function cmdRun(args, flags, verb = "run") {
|
|
|
233085
233253
|
} else if (explicitEndpointId || !bestEndpoint || endpointIsSafeToAutoExecute(bestEndpoint)) {
|
|
233086
233254
|
runPlan.push({ step: "execute", mode: "direct_api", status: "started", endpoint_id: endpointToExecute });
|
|
233087
233255
|
const resolvedSource = typeof result.source === "string" ? result.source : undefined;
|
|
233088
|
-
|
|
233089
|
-
|
|
233090
|
-
|
|
233091
|
-
|
|
233092
|
-
|
|
233093
|
-
|
|
233094
|
-
|
|
233095
|
-
|
|
233256
|
+
const deferralResult = result;
|
|
233257
|
+
const executed = await withPendingNotice(api4("POST", `/v1/skills/${skillId}/execute`, execBody(endpointToExecute)), "Executing best endpoint...");
|
|
233258
|
+
if (!explicitEndpointId && isResolveSuccessResult(executed) && !resolutionCardinalityMatches(intent, executed.result ?? executed.data)) {
|
|
233259
|
+
runPlan[runPlan.length - 1] = {
|
|
233260
|
+
...runPlan[runPlan.length - 1],
|
|
233261
|
+
status: "skipped",
|
|
233262
|
+
reason: "cardinality_mismatch_single_item"
|
|
233263
|
+
};
|
|
233264
|
+
deferralResult.next_action = {
|
|
233265
|
+
title: "List intent returned a single item",
|
|
233266
|
+
command: `unbrowse execute --skill ${skillId} --endpoint ${endpointToExecute}`,
|
|
233267
|
+
why: "Auto-execute yielded a single item for a list/search intent; the page's listings are likely JS-rendered behind an internal API. Returning the route shortlist instead of one item."
|
|
233268
|
+
};
|
|
233269
|
+
result = deferralResult;
|
|
233270
|
+
} else {
|
|
233271
|
+
result = executed;
|
|
233272
|
+
if (resolvedSource && typeof result.source !== "string")
|
|
233273
|
+
result.source = resolvedSource;
|
|
233274
|
+
runPlan[runPlan.length - 1] = {
|
|
233275
|
+
...runPlan[runPlan.length - 1],
|
|
233276
|
+
status: isResolveSuccessResult(result) ? "complete" : "error",
|
|
233277
|
+
error: resolveResultError(result) ?? null
|
|
233278
|
+
};
|
|
233279
|
+
}
|
|
233096
233280
|
} else {
|
|
233097
233281
|
runPlan.push({
|
|
233098
233282
|
step: "execute",
|
|
@@ -234916,6 +235100,7 @@ var init_cli = __esm(async () => {
|
|
|
234916
235100
|
init_extract_auth_header();
|
|
234917
235101
|
init_kuri_proxy_bridge();
|
|
234918
235102
|
init_cached_resolution();
|
|
235103
|
+
init_cardinality();
|
|
234919
235104
|
init_issue();
|
|
234920
235105
|
init_client2();
|
|
234921
235106
|
init_impact_log();
|
|
@@ -236206,6 +236391,7 @@ __export(exports_orchestrator, {
|
|
|
236206
236391
|
selectSkillIdsToHydrate: () => selectSkillIdsToHydrate2,
|
|
236207
236392
|
selectSearchTermsForExecution: () => selectSearchTermsForExecution2,
|
|
236208
236393
|
scopedCacheKey: () => scopedCacheKey2,
|
|
236394
|
+
schemaLooksLikeSingleItem: () => schemaLooksLikeSingleItem,
|
|
236209
236395
|
resolveEndpointTemplateBindings: () => resolveEndpointTemplateBindings2,
|
|
236210
236396
|
resolveAndExecute: () => resolveAndExecute2,
|
|
236211
236397
|
registrableHost: () => registrableHost2,
|
|
@@ -236220,7 +236406,9 @@ __export(exports_orchestrator, {
|
|
|
236220
236406
|
pickPreferredSkillSnapshot: () => pickPreferredSkillSnapshot2,
|
|
236221
236407
|
persistDomainCache: () => persistDomainCache2,
|
|
236222
236408
|
marketplaceSkillMatchesContext: () => marketplaceSkillMatchesContext2,
|
|
236409
|
+
looksLikeSingleItemRoute: () => looksLikeSingleItemRoute,
|
|
236223
236410
|
isRouteCacheEntryStale: () => isRouteCacheEntryStale2,
|
|
236411
|
+
isResolveUsableEndpointForIntent: () => isResolveUsableEndpointForIntent2,
|
|
236224
236412
|
isCachedSkillRelevantForIntent: () => isCachedSkillRelevantForIntent2,
|
|
236225
236413
|
invalidateRouteCacheForDomain: () => invalidateRouteCacheForDomain3,
|
|
236226
236414
|
inferSearchParamOverrides: () => inferSearchParamOverrides2,
|
|
@@ -236275,7 +236463,10 @@ function pickWalkTarget2(requestedUrl, ranked, minScore = 0.8) {
|
|
|
236275
236463
|
return false;
|
|
236276
236464
|
}
|
|
236277
236465
|
};
|
|
236278
|
-
|
|
236466
|
+
const reqReg = registrableHost2(requestedUrl);
|
|
236467
|
+
const sameDomain = reqReg ? eligible.filter((c) => registrableHost2(c.url) === reqReg) : [];
|
|
236468
|
+
const pool2 = sameDomain.length > 0 ? sameDomain : eligible;
|
|
236469
|
+
return pool2.find((c) => hasPath(c.url)) ?? pool2[0];
|
|
236279
236470
|
}
|
|
236280
236471
|
function artifactResultWithShortlist2(artifact, skillId, triggerUrl) {
|
|
236281
236472
|
const ep = artifact.endpoint;
|
|
@@ -236736,6 +236927,9 @@ function endpointTargetsMismatchedLocalReplayHost2(endpoint, contextUrl) {
|
|
|
236736
236927
|
function endpointHasNegativeTag2(endpoint, tag) {
|
|
236737
236928
|
return (endpoint.semantic?.negative_tags ?? []).some((candidate) => candidate.trim().toLowerCase() === tag.trim().toLowerCase());
|
|
236738
236929
|
}
|
|
236930
|
+
function looksLikeSingleItemRoute(endpoint) {
|
|
236931
|
+
return routeLooksLikeSingleItem(endpoint);
|
|
236932
|
+
}
|
|
236739
236933
|
function isResolveUsableEndpointForIntent2(endpoint, intent, contextUrl) {
|
|
236740
236934
|
if (endpointTargetsMismatchedLocalReplayHost2(endpoint, contextUrl))
|
|
236741
236935
|
return false;
|
|
@@ -236745,6 +236939,9 @@ function isResolveUsableEndpointForIntent2(endpoint, intent, contextUrl) {
|
|
|
236745
236939
|
if (isFeedTimelineIntent2(intent, contextUrl) && endpointHasNegativeTag2(endpoint, "helper")) {
|
|
236746
236940
|
return false;
|
|
236747
236941
|
}
|
|
236942
|
+
if (!cardinalityMatches(intent, { kind: "route", route: endpoint }, { contextUrl })) {
|
|
236943
|
+
return false;
|
|
236944
|
+
}
|
|
236748
236945
|
return true;
|
|
236749
236946
|
}
|
|
236750
236947
|
function normalizeRouteContext2(url) {
|
|
@@ -236887,7 +237084,7 @@ function withContextReplayEndpoint2(skill, _intent, _contextUrl) {
|
|
|
236887
237084
|
return skill;
|
|
236888
237085
|
}
|
|
236889
237086
|
function isSearchLikeIntent2(intent, contextUrl) {
|
|
236890
|
-
if (
|
|
237087
|
+
if (isListLikeIntent(intent))
|
|
236891
237088
|
return true;
|
|
236892
237089
|
try {
|
|
236893
237090
|
const pathname = contextUrl ? new URL(contextUrl).pathname.toLowerCase() : "";
|
|
@@ -237764,7 +237961,20 @@ function inferPreferredEntityTokens2(intent) {
|
|
|
237764
237961
|
return [];
|
|
237765
237962
|
}
|
|
237766
237963
|
function isAcceptableIntentResult2(result, intent) {
|
|
237767
|
-
|
|
237964
|
+
if (assessIntentResult(result, intent).verdict === "fail")
|
|
237965
|
+
return false;
|
|
237966
|
+
if (!cardinalityMatches(intent, { kind: "value", value: unwrapResultPayload2(result) }))
|
|
237967
|
+
return false;
|
|
237968
|
+
return true;
|
|
237969
|
+
}
|
|
237970
|
+
function unwrapResultPayload2(result) {
|
|
237971
|
+
if (result == null || typeof result !== "object" || Array.isArray(result))
|
|
237972
|
+
return result;
|
|
237973
|
+
const rec = { ...result };
|
|
237974
|
+
for (const k of ["available_endpoints", "available_operations", "shortlist_for_judgment", "workflow_dag", "walked_from", "exa_candidates", "run_plan"]) {
|
|
237975
|
+
delete rec[k];
|
|
237976
|
+
}
|
|
237977
|
+
return rec;
|
|
237768
237978
|
}
|
|
237769
237979
|
function candidateMatchesPreferredEntity2(candidate, preferredTokens) {
|
|
237770
237980
|
if (preferredTokens.length === 0)
|
|
@@ -241606,6 +241816,8 @@ var init_orchestrator2 = __esm(async () => {
|
|
|
241606
241816
|
init_search_forms();
|
|
241607
241817
|
init_ddg_search();
|
|
241608
241818
|
init_cached_resolution();
|
|
241819
|
+
init_cardinality();
|
|
241820
|
+
init_cardinality();
|
|
241609
241821
|
init_principal_scope();
|
|
241610
241822
|
init_yield_safety();
|
|
241611
241823
|
init_trace_store();
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"repo_url": "https://github.com/justrach/kuri.git",
|
|
3
3
|
"branch": "adding-extensions",
|
|
4
4
|
"source_sha": "149881254046a20778f642b69f20f0c6468f6fb4",
|
|
5
|
-
"built_at": "2026-06-
|
|
5
|
+
"built_at": "2026-06-18T03:58:10.362Z",
|
|
6
6
|
"binaries": {
|
|
7
7
|
"darwin-arm64": {
|
|
8
8
|
"zig_target": "aarch64-macos",
|
|
@@ -21,11 +21,11 @@
|
|
|
21
21
|
},
|
|
22
22
|
"linux-x64": {
|
|
23
23
|
"zig_target": "x86_64-linux",
|
|
24
|
-
"sha256": "
|
|
24
|
+
"sha256": "e73aecfbf07001ba0be5032118790eb253ad5d8d12caca6a1dd5ad3ccab44b9e"
|
|
25
25
|
},
|
|
26
26
|
"win-x64": {
|
|
27
27
|
"zig_target": "x86_64-windows-gnu",
|
|
28
|
-
"sha256": "
|
|
28
|
+
"sha256": "9ecbc82be646e755e4664051cf345d54dde3c6610e457d763deff67895047963",
|
|
29
29
|
"source": "pre-staged"
|
|
30
30
|
}
|
|
31
31
|
},
|
|
@@ -33,22 +33,22 @@
|
|
|
33
33
|
"darwin-arm64": {
|
|
34
34
|
"zig_target": "aarch64-macos",
|
|
35
35
|
"lib": "libkuri_ffi.dylib",
|
|
36
|
-
"sha256": "
|
|
36
|
+
"sha256": "6c72cf383df4fa3f870b745da43d64eae8f67e58c6f971214ac29602fb649939"
|
|
37
37
|
},
|
|
38
38
|
"darwin-x64": {
|
|
39
39
|
"zig_target": "x86_64-macos",
|
|
40
40
|
"lib": "libkuri_ffi.dylib",
|
|
41
|
-
"sha256": "
|
|
41
|
+
"sha256": "82480772ddc8e44c8e34e70b80d7dc0969004942f77276587af450b62d3d2750"
|
|
42
42
|
},
|
|
43
43
|
"linux-arm64": {
|
|
44
44
|
"zig_target": "aarch64-linux",
|
|
45
45
|
"lib": "libkuri_ffi.so",
|
|
46
|
-
"sha256": "
|
|
46
|
+
"sha256": "ef8dfa2b634f04294f93a94472d9856ba777681afaab2d4213f0e29821882e07"
|
|
47
47
|
},
|
|
48
48
|
"linux-x64": {
|
|
49
49
|
"zig_target": "x86_64-linux",
|
|
50
50
|
"lib": "libkuri_ffi.so",
|
|
51
|
-
"sha256": "
|
|
51
|
+
"sha256": "fb29ad2b71186d176306321d17e88074a67fea139991faef9aa4862333942c9e"
|
|
52
52
|
}
|
|
53
53
|
}
|
|
54
54
|
}
|
|
Binary file
|