unbrowse 9.6.0 → 9.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unbrowse",
3
- "version": "9.6.0",
3
+ "version": "9.6.2",
4
4
  "repository": {
5
5
  "type": "git",
6
6
  "url": "git+https://github.com/unbrowse-ai/unbrowse.git"
package/runtime/cli.js CHANGED
@@ -1730,6 +1730,133 @@ var init_cached_resolution = __esm(() => {
1730
1730
  init_principal_scope();
1731
1731
  });
1732
1732
 
1733
+ // .tmp-runtime-src/values/cardinality.ts
1734
+ function isListLikeIntent(intent) {
1735
+ return LIST_INTENT_RE.test(intent ?? "");
1736
+ }
1737
+ function valueLooksLikeSingleItem(value) {
1738
+ if (value == null || Array.isArray(value) || typeof value !== "object")
1739
+ return false;
1740
+ const obj = value;
1741
+ for (const key of COLLECTION_KEYS) {
1742
+ if (Array.isArray(obj[key]))
1743
+ return false;
1744
+ }
1745
+ for (const v of Object.values(obj)) {
1746
+ if (Array.isArray(v) && v.some((x) => x !== null && typeof x === "object"))
1747
+ return false;
1748
+ }
1749
+ const atType = typeof obj["@type"] === "string" ? obj["@type"].toLowerCase() : "";
1750
+ const isItemType = ITEM_SCHEMA_TYPES.has(atType);
1751
+ const hasName = "name" in obj || "title" in obj || "headline" in obj;
1752
+ const hasPriceish = "offers" in obj || "price" in obj || "sku" in obj;
1753
+ return isItemType || hasName && hasPriceish;
1754
+ }
1755
+ function schemaLooksLikeSingleItem(rs) {
1756
+ if (!rs || typeof rs !== "object")
1757
+ return false;
1758
+ const schema = rs;
1759
+ if (schema.type === "array")
1760
+ return false;
1761
+ const props = schema.properties ?? {};
1762
+ for (const key of COLLECTION_KEYS) {
1763
+ if (key in props)
1764
+ return false;
1765
+ }
1766
+ for (const value of Object.values(props)) {
1767
+ if (value && typeof value === "object" && value.type === "array" && value.items?.type === "object") {
1768
+ return false;
1769
+ }
1770
+ }
1771
+ if (schema.type !== "object")
1772
+ return false;
1773
+ const hasType = "@type" in props;
1774
+ const hasName = "name" in props || "title" in props;
1775
+ const hasPriceish = "offers" in props || "price" in props || "sku" in props;
1776
+ return hasType || hasName && hasPriceish;
1777
+ }
1778
+ function routeLooksLikeSingleItem(route) {
1779
+ const tmpl = route.url_template ?? "";
1780
+ let pathAndQuery = tmpl;
1781
+ try {
1782
+ const u = new URL(tmpl);
1783
+ pathAndQuery = `${u.pathname}${u.search}`;
1784
+ } catch {}
1785
+ const lower = pathAndQuery.toLowerCase();
1786
+ if (/\/(?:search|q|categories?|browse|results?|listings|explore|discover|feed|catalog(?:ue)?|collections?|shop|all)\b/.test(lower) || /[?&](?:q|query|keyword|keywords|search|term|category|cat|page)=/.test(lower)) {
1787
+ return false;
1788
+ }
1789
+ if (/\/(?:p|product|products|item|items|listing|detail|details|dp|pd|sku)\/[^/]+/.test(lower))
1790
+ return true;
1791
+ const lastSeg = lower.split("?")[0].replace(/\/+$/, "").split("/").pop() ?? "";
1792
+ if (/-\d{3,}$/.test(lastSeg) || /^\d{3,}$/.test(lastSeg))
1793
+ return true;
1794
+ if (/\{[^}]+\}/.test(lower))
1795
+ return false;
1796
+ return schemaLooksLikeSingleItem(route.response_schema);
1797
+ }
1798
+ function urlPathLooksListLike(contextUrl) {
1799
+ if (!contextUrl)
1800
+ return false;
1801
+ try {
1802
+ const pathname = new URL(contextUrl).pathname.toLowerCase();
1803
+ return /\/(?:search|basic-search|result-page|results?|discover|browse|categories?|q|listings|feed|catalog(?:ue)?)\b/.test(pathname);
1804
+ } catch {
1805
+ return false;
1806
+ }
1807
+ }
1808
+ function cardinalityMatches(intent, subject, opts) {
1809
+ const wantsMany = isListLikeIntent(intent) || urlPathLooksListLike(opts?.contextUrl);
1810
+ if (!wantsMany)
1811
+ return true;
1812
+ switch (subject.kind) {
1813
+ case "value":
1814
+ return !valueLooksLikeSingleItem(subject.value);
1815
+ case "schema":
1816
+ return !schemaLooksLikeSingleItem(subject.schema);
1817
+ case "route":
1818
+ return !routeLooksLikeSingleItem(subject.route);
1819
+ }
1820
+ }
1821
+ function resolutionCardinalityMatches(intent, data) {
1822
+ return cardinalityMatches(intent, { kind: "value", value: data });
1823
+ }
1824
+ var LIST_INTENT_RE, ITEM_SCHEMA_TYPES, COLLECTION_KEYS;
1825
+ var init_cardinality = __esm(() => {
1826
+ LIST_INTENT_RE = /\b(search|find|lookup|browse|discover|list(?:ings?)?|feed|catalog(?:ue)?)\b/i;
1827
+ ITEM_SCHEMA_TYPES = new Set([
1828
+ "product",
1829
+ "offer",
1830
+ "article",
1831
+ "newsarticle",
1832
+ "blogposting",
1833
+ "recipe",
1834
+ "event",
1835
+ "place",
1836
+ "localbusiness",
1837
+ "jobposting",
1838
+ "book",
1839
+ "movie",
1840
+ "creativework",
1841
+ "person",
1842
+ "organization"
1843
+ ]);
1844
+ COLLECTION_KEYS = [
1845
+ "itemListElement",
1846
+ "items",
1847
+ "results",
1848
+ "products",
1849
+ "listings",
1850
+ "data",
1851
+ "edges",
1852
+ "hits",
1853
+ "records",
1854
+ "entries",
1855
+ "rows",
1856
+ "nodes"
1857
+ ];
1858
+ });
1859
+
1733
1860
  // .tmp-runtime-src/values/cache-key.ts
1734
1861
  function requestCacheKey(parts) {
1735
1862
  const method = (parts.method ?? "GET").toUpperCase();
@@ -2223,7 +2350,7 @@ var init_telemetry = __esm(() => {
2223
2350
  });
2224
2351
 
2225
2352
  // .tmp-runtime-src/build-info.generated.ts
2226
- var BUILD_RELEASE_VERSION = "9.6.0", BUILD_GIT_SHA = "5b6b9dc9e8e1", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiOS42LjAiLCJnaXRfc2hhIjoiNWI2YjlkYzllOGUxIiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUA1YjZiOWRjOWU4ZTEiLCJpc3N1ZWRfYXQiOiIyMDI2LTA2LTE3VDIyOjAyOjQ0LjA3MVoifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "MqIyb1B_GA8W5Nv_Jictwt1jVCCTgxxdIjUFJqLoeAU", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai", BUILD_DEFAULT_PROFILE = "";
2353
+ var BUILD_RELEASE_VERSION = "9.6.2", BUILD_GIT_SHA = "d2d14a6629a0", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiOS42LjIiLCJnaXRfc2hhIjoiZDJkMTRhNjYyOWEwIiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUBkMmQxNGE2NjI5YTAiLCJpc3N1ZWRfYXQiOiIyMDI2LTA2LTE4VDA0OjE3OjAzLjg1M1oifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "OefvyW2iLVVPQ-0HMg9Mz-bciCmP8LE5u7fVEss_98E", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai", BUILD_DEFAULT_PROFILE = "";
2227
2354
 
2228
2355
  // .tmp-runtime-src/version.ts
2229
2356
  import { createHash as createHash7 } from "crypto";
@@ -45920,6 +46047,130 @@ var init_header_classify = __esm(() => {
45920
46047
  SENSITIVE_HEADER_PATTERN = /token|key|secret|credential|password|session/i;
45921
46048
  });
45922
46049
 
46050
+ // .tmp-runtime-src/values/cardinality.ts
46051
+ function isListLikeIntent2(intent) {
46052
+ return LIST_INTENT_RE2.test(intent ?? "");
46053
+ }
46054
+ function valueLooksLikeSingleItem2(value) {
46055
+ if (value == null || Array.isArray(value) || typeof value !== "object")
46056
+ return false;
46057
+ const obj = value;
46058
+ for (const key of COLLECTION_KEYS2) {
46059
+ if (Array.isArray(obj[key]))
46060
+ return false;
46061
+ }
46062
+ for (const v of Object.values(obj)) {
46063
+ if (Array.isArray(v) && v.some((x) => x !== null && typeof x === "object"))
46064
+ return false;
46065
+ }
46066
+ const atType = typeof obj["@type"] === "string" ? obj["@type"].toLowerCase() : "";
46067
+ const isItemType = ITEM_SCHEMA_TYPES2.has(atType);
46068
+ const hasName = "name" in obj || "title" in obj || "headline" in obj;
46069
+ const hasPriceish = "offers" in obj || "price" in obj || "sku" in obj;
46070
+ return isItemType || hasName && hasPriceish;
46071
+ }
46072
+ function schemaLooksLikeSingleItem2(rs) {
46073
+ if (!rs || typeof rs !== "object")
46074
+ return false;
46075
+ const schema = rs;
46076
+ if (schema.type === "array")
46077
+ return false;
46078
+ const props = schema.properties ?? {};
46079
+ for (const key of COLLECTION_KEYS2) {
46080
+ if (key in props)
46081
+ return false;
46082
+ }
46083
+ for (const value of Object.values(props)) {
46084
+ if (value && typeof value === "object" && value.type === "array" && value.items?.type === "object") {
46085
+ return false;
46086
+ }
46087
+ }
46088
+ if (schema.type !== "object")
46089
+ return false;
46090
+ const hasType = "@type" in props;
46091
+ const hasName = "name" in props || "title" in props;
46092
+ const hasPriceish = "offers" in props || "price" in props || "sku" in props;
46093
+ return hasType || hasName && hasPriceish;
46094
+ }
46095
+ function routeLooksLikeSingleItem2(route) {
46096
+ const tmpl = route.url_template ?? "";
46097
+ let pathAndQuery = tmpl;
46098
+ try {
46099
+ const u = new URL(tmpl);
46100
+ pathAndQuery = `${u.pathname}${u.search}`;
46101
+ } catch {}
46102
+ const lower = pathAndQuery.toLowerCase();
46103
+ if (/\/(?:search|q|categories?|browse|results?|listings|explore|discover|feed|catalog(?:ue)?|collections?|shop|all)\b/.test(lower) || /[?&](?:q|query|keyword|keywords|search|term|category|cat|page)=/.test(lower)) {
46104
+ return false;
46105
+ }
46106
+ if (/\/(?:p|product|products|item|items|listing|detail|details|dp|pd|sku)\/[^/]+/.test(lower))
46107
+ return true;
46108
+ const lastSeg = lower.split("?")[0].replace(/\/+$/, "").split("/").pop() ?? "";
46109
+ if (/-\d{3,}$/.test(lastSeg) || /^\d{3,}$/.test(lastSeg))
46110
+ return true;
46111
+ if (/\{[^}]+\}/.test(lower))
46112
+ return false;
46113
+ return schemaLooksLikeSingleItem2(route.response_schema);
46114
+ }
46115
+ function urlPathLooksListLike2(contextUrl) {
46116
+ if (!contextUrl)
46117
+ return false;
46118
+ try {
46119
+ const pathname = new URL(contextUrl).pathname.toLowerCase();
46120
+ return /\/(?:search|basic-search|result-page|results?|discover|browse|categories?|q|listings|feed|catalog(?:ue)?)\b/.test(pathname);
46121
+ } catch {
46122
+ return false;
46123
+ }
46124
+ }
46125
+ function cardinalityMatches2(intent, subject, opts) {
46126
+ const wantsMany = isListLikeIntent2(intent) || urlPathLooksListLike2(opts?.contextUrl);
46127
+ if (!wantsMany)
46128
+ return true;
46129
+ switch (subject.kind) {
46130
+ case "value":
46131
+ return !valueLooksLikeSingleItem2(subject.value);
46132
+ case "schema":
46133
+ return !schemaLooksLikeSingleItem2(subject.schema);
46134
+ case "route":
46135
+ return !routeLooksLikeSingleItem2(subject.route);
46136
+ }
46137
+ }
46138
+ var LIST_INTENT_RE2, ITEM_SCHEMA_TYPES2, COLLECTION_KEYS2;
46139
+ var init_cardinality2 = __esm(() => {
46140
+ LIST_INTENT_RE2 = /\b(search|find|lookup|browse|discover|list(?:ings?)?|feed|catalog(?:ue)?)\b/i;
46141
+ ITEM_SCHEMA_TYPES2 = new Set([
46142
+ "product",
46143
+ "offer",
46144
+ "article",
46145
+ "newsarticle",
46146
+ "blogposting",
46147
+ "recipe",
46148
+ "event",
46149
+ "place",
46150
+ "localbusiness",
46151
+ "jobposting",
46152
+ "book",
46153
+ "movie",
46154
+ "creativework",
46155
+ "person",
46156
+ "organization"
46157
+ ]);
46158
+ COLLECTION_KEYS2 = [
46159
+ "itemListElement",
46160
+ "items",
46161
+ "results",
46162
+ "products",
46163
+ "listings",
46164
+ "data",
46165
+ "edges",
46166
+ "hits",
46167
+ "records",
46168
+ "entries",
46169
+ "rows",
46170
+ "nodes"
46171
+ ];
46172
+ });
46173
+
45923
46174
  // node_modules/.bun/nanoid@5.1.11/node_modules/nanoid/url-alphabet/index.js
45924
46175
  var urlAlphabet = "useandom-26T198340PX75pxJACKVERYMINDBUSHWOLF_GQZbfghjklqvwyzrict";
45925
46176
 
@@ -117228,7 +117479,7 @@ __export(exports_extraction, {
117228
117479
  cleanDOM: () => cleanDOM,
117229
117480
  buildStructuredDataHeader: () => buildStructuredDataHeader
117230
117481
  });
117231
- function extractHtmlMetadataFallback(html3) {
117482
+ function extractHtmlMetadataFallback(html3, intent) {
117232
117483
  if (!html3 || html3.length < 100)
117233
117484
  return null;
117234
117485
  try {
@@ -117260,8 +117511,9 @@ function extractHtmlMetadataFallback(html3) {
117260
117511
  jsonLdBlocks.push(parsed);
117261
117512
  } catch {}
117262
117513
  });
117263
- if (jsonLdBlocks.length > 0)
117264
- out.json_ld = jsonLdBlocks;
117514
+ const usableJsonLd = isListLikeIntent2(intent) ? jsonLdBlocks.filter((b) => !valueLooksLikeSingleItem2(b)) : jsonLdBlocks;
117515
+ if (usableJsonLd.length > 0)
117516
+ out.json_ld = usableJsonLd;
117265
117517
  const headings = [];
117266
117518
  $2("h1, h2").each((_, el) => {
117267
117519
  const text3 = cleanText($2(el).text());
@@ -119584,6 +119836,16 @@ function scoreSiteMetaJsonLdDemotion(structure, intent) {
119584
119836
  return 0;
119585
119837
  return -200;
119586
119838
  }
119839
+ function isSingleItemStructureForList(structure, intent) {
119840
+ if (!TINY_RESULT_LIST_INTENT.test(intent.toLowerCase()))
119841
+ return false;
119842
+ if (structure.type === "repeated-elements")
119843
+ return false;
119844
+ return valueLooksLikeSingleItem2(structure.data);
119845
+ }
119846
+ function scoreSingleItemListMismatch(structure, intent) {
119847
+ return isSingleItemStructureForList(structure, intent) ? -200 : 0;
119848
+ }
119587
119849
  function looksLikeTinyContentReadResult(data2, intent) {
119588
119850
  if (data2 == null)
119589
119851
  return { tiny: false, bytes: 0, stringLeafChars: 0 };
@@ -119737,9 +119999,9 @@ function extractFromDOM(html3, intent, contextUrl) {
119737
119999
  const articleStructures = extractArticleBodySpecial(html3.length > 600000 ? html3.slice(0, 600000) : html3, intent);
119738
120000
  const allStructures = [...flashStructures, ...githubStructures, ...repeatedPersonStructures, ...packageSearchStructures, ...xProfileStructures, ...postStructures, ...repeatedArticleStructures, ...trendStructures, ...definitionStructures, ...packageDetailStructures, ...arxivAbstractStructures, ...courseStructures, ...articleStructures, ...spaStructures, ...parseStructured(cleaned)].map((structure) => normalizeStructureForIntent(structure, intent));
119739
120001
  const isListIntent = TINY_RESULT_LIST_INTENT.test(intent.toLowerCase());
119740
- const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray(s.data)) && !looksLikeConfigShape(s.data) && !looksLikeEmptyContainer(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd(s.data)));
120002
+ const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray(s.data)) && !looksLikeConfigShape(s.data) && !looksLikeEmptyContainer(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd(s.data)) && !isSingleItemStructureForList(s, intent));
119741
120003
  if (structures.length === 0) {
119742
- const fallback2 = extractHtmlMetadataFallback(html3);
120004
+ const fallback2 = extractHtmlMetadataFallback(html3, intent);
119743
120005
  if (fallback2) {
119744
120006
  return _finalize({ data: fallback2, extraction_method: "html_metadata_fallback", confidence: 0.4 });
119745
120007
  }
@@ -119748,7 +120010,7 @@ function extractFromDOM(html3, intent, contextUrl) {
119748
120010
  const intentWords = intent.toLowerCase().split(/\s+/).filter(Boolean);
119749
120011
  const scored = structures.map((s) => ({
119750
120012
  structure: s,
119751
- score: scoreRelevance(s, intentWords) + scoreSemanticFit(s, intent) + scoreSparseLinkList(s) + scoreFieldRichness(s) + scoreConfigShapeDemotion(s) + scoreDegenerateRowDemotion(s) + scoreDuplicateRowDemotion(s) + scoreEmptyContainerDemotion(s) + scoreSiteMetaJsonLdDemotion(s, intent) + scoreTableIntentOverlapDemotion(s, intent, contextUrl, structures)
120013
+ score: scoreRelevance(s, intentWords) + scoreSemanticFit(s, intent) + scoreSparseLinkList(s) + scoreFieldRichness(s) + scoreConfigShapeDemotion(s) + scoreDegenerateRowDemotion(s) + scoreDuplicateRowDemotion(s) + scoreEmptyContainerDemotion(s) + scoreSiteMetaJsonLdDemotion(s, intent) + scoreSingleItemListMismatch(s, intent) + scoreTableIntentOverlapDemotion(s, intent, contextUrl, structures)
119752
120014
  }));
119753
120015
  scored.sort((a, b) => b.score - a.score);
119754
120016
  const passing = scored.filter((candidate) => assessIntentResult(candidate.structure.data, intent).verdict === "pass");
@@ -120364,6 +120626,7 @@ function sanitizeExtractionToJson(data2, depth = 0) {
120364
120626
  var STRIP_TAGS, CHROME_TAGS, AD_PATTERNS, HIDDEN_ATTRS, CONTENT_SELECTORS, CARD_SELECTORS, CONFIG_TOP_LEVEL_KEYS, CONFIG_CHUNK_VALUE_KEYS, INTENT_OVERLAP_STOPWORDS, SITE_META_LD_TYPES, TINY_RESULT_LIST_INTENT, TINY_RESULT_DETAIL_INTENT, STRUCTURED_DATA_HIGHLIGHT_TYPES, HTML_TAG_RE, TABLE_RE;
120365
120627
  var init_extraction = __esm(() => {
120366
120628
  init_esm11();
120629
+ init_cardinality2();
120367
120630
  STRIP_TAGS = new Set(["script", "style", "noscript", "svg", "iframe"]);
120368
120631
  CHROME_TAGS = new Set(["nav", "footer", "header"]);
120369
120632
  AD_PATTERNS = /\b(ad|ads|advert|advertisement|tracking|tracker|cookie-banner|cookie-consent|cookie-notice|popup|modal-overlay|gdpr|consent|banner-promo)\b/i;
@@ -129889,7 +130152,8 @@ function selectBestEndpoint(endpoints, intent, skillDomain, contextUrl) {
129889
130152
  const ranked = rankEndpoints(endpoints, intent, skillDomain, contextUrl);
129890
130153
  if (ranked.length === 0)
129891
130154
  throw new Error("All endpoints are disabled");
129892
- return ranked[0].endpoint;
130155
+ const preferred = ranked.find((r) => cardinalityMatches2(intent, { kind: "route", route: r.endpoint }, { contextUrl }));
130156
+ return (preferred ?? ranked[0]).endpoint;
129893
130157
  }
129894
130158
  function isHtml2(text3) {
129895
130159
  const trimmed = text3.trimStart().slice(0, 200).toLowerCase();
@@ -129936,6 +130200,7 @@ var init_execution = __esm(async () => {
129936
130200
  init_reveng_server_first();
129937
130201
  init_header_classify();
129938
130202
  init_storage_hole_bindings();
130203
+ init_cardinality2();
129939
130204
  init_sealed_blob_store();
129940
130205
  init_signer();
129941
130206
  init_bundle_scanner();
@@ -132155,7 +132420,10 @@ function pickWalkTarget(requestedUrl, ranked, minScore = 0.8) {
132155
132420
  return false;
132156
132421
  }
132157
132422
  };
132158
- return eligible.find((c) => hasPath(c.url)) ?? eligible[0];
132423
+ const reqReg = registrableHost(requestedUrl);
132424
+ const sameDomain = reqReg ? eligible.filter((c) => registrableHost(c.url) === reqReg) : [];
132425
+ const pool2 = sameDomain.length > 0 ? sameDomain : eligible;
132426
+ return pool2.find((c) => hasPath(c.url)) ?? pool2[0];
132159
132427
  }
132160
132428
  function artifactResultWithShortlist(artifact, skillId, triggerUrl) {
132161
132429
  const ep = artifact.endpoint;
@@ -132625,6 +132893,9 @@ function isResolveUsableEndpointForIntent(endpoint, intent, contextUrl) {
132625
132893
  if (isFeedTimelineIntent(intent, contextUrl) && endpointHasNegativeTag(endpoint, "helper")) {
132626
132894
  return false;
132627
132895
  }
132896
+ if (!cardinalityMatches2(intent, { kind: "route", route: endpoint }, { contextUrl })) {
132897
+ return false;
132898
+ }
132628
132899
  return true;
132629
132900
  }
132630
132901
  function normalizeRouteContext(url) {
@@ -132767,7 +133038,7 @@ function withContextReplayEndpoint(skill, _intent, _contextUrl) {
132767
133038
  return skill;
132768
133039
  }
132769
133040
  function isSearchLikeIntent(intent, contextUrl) {
132770
- if (/\b(search|find|lookup|browse|discover)\b/i.test(intent ?? ""))
133041
+ if (isListLikeIntent2(intent))
132771
133042
  return true;
132772
133043
  try {
132773
133044
  const pathname = contextUrl ? new URL(contextUrl).pathname.toLowerCase() : "";
@@ -133639,7 +133910,20 @@ function inferPreferredEntityTokens(intent) {
133639
133910
  return [];
133640
133911
  }
133641
133912
  function isAcceptableIntentResult(result, intent) {
133642
- return assessIntentResult(result, intent).verdict !== "fail";
133913
+ if (assessIntentResult(result, intent).verdict === "fail")
133914
+ return false;
133915
+ if (!cardinalityMatches2(intent, { kind: "value", value: unwrapResultPayload(result) }))
133916
+ return false;
133917
+ return true;
133918
+ }
133919
+ function unwrapResultPayload(result) {
133920
+ if (result == null || typeof result !== "object" || Array.isArray(result))
133921
+ return result;
133922
+ const rec = { ...result };
133923
+ for (const k of ["available_endpoints", "available_operations", "shortlist_for_judgment", "workflow_dag", "walked_from", "exa_candidates", "run_plan"]) {
133924
+ delete rec[k];
133925
+ }
133926
+ return rec;
133643
133927
  }
133644
133928
  function candidateMatchesPreferredEntity(candidate, preferredTokens) {
133645
133929
  if (preferredTokens.length === 0)
@@ -137481,6 +137765,8 @@ var init_orchestrator = __esm(async () => {
137481
137765
  init_search_forms();
137482
137766
  init_ddg_search();
137483
137767
  init_cached_resolution2();
137768
+ init_cardinality2();
137769
+ init_cardinality2();
137484
137770
  init_principal_scope();
137485
137771
  init_yield_safety();
137486
137772
  init_trace_store();
@@ -143978,9 +144264,14 @@ async function registerRoutes(app) {
143978
144264
  recovered = true;
143979
144265
  } else if (errResult.available_endpoints?.length === 1) {
143980
144266
  const only = errResult.available_endpoints[0].endpoint_id;
143981
- console.log(`[exec] D7 single-endpoint skill: rewriting endpoint_id ${want} → ${only}`);
143982
- execParams.endpoint_id = only;
143983
- recovered = true;
144267
+ const onlyEp = (skill.endpoints ?? []).find((e) => e.endpoint_id === only);
144268
+ if (!onlyEp || cardinalityMatches2(intent, { kind: "route", route: onlyEp }, { contextUrl: context_url })) {
144269
+ console.log(`[exec] D7 single-endpoint skill: rewriting endpoint_id ${want} → ${only}`);
144270
+ execParams.endpoint_id = only;
144271
+ recovered = true;
144272
+ } else {
144273
+ console.log(`[exec] D7 single-endpoint skill: only endpoint ${only} is a single-item route for a list intent — not forcing (cardinality gate)`);
144274
+ }
143984
144275
  }
143985
144276
  }
143986
144277
  if (!recovered && (skill.endpoints?.length ?? 0) >= 2 && intent) {
@@ -145208,6 +145499,7 @@ var init_routes = __esm(async () => {
145208
145499
  init_client3();
145209
145500
  init_reveng_server_first();
145210
145501
  init_header_classify();
145502
+ init_cardinality2();
145211
145503
  init_capture_spool();
145212
145504
  init_nanoid();
145213
145505
  init_marketplace();
@@ -150601,7 +150893,7 @@ __export(exports_extraction2, {
150601
150893
  cleanDOM: () => cleanDOM2,
150602
150894
  buildStructuredDataHeader: () => buildStructuredDataHeader2
150603
150895
  });
150604
- function extractHtmlMetadataFallback2(html3) {
150896
+ function extractHtmlMetadataFallback2(html3, intent) {
150605
150897
  if (!html3 || html3.length < 100)
150606
150898
  return null;
150607
150899
  try {
@@ -150633,8 +150925,9 @@ function extractHtmlMetadataFallback2(html3) {
150633
150925
  jsonLdBlocks.push(parsed);
150634
150926
  } catch {}
150635
150927
  });
150636
- if (jsonLdBlocks.length > 0)
150637
- out.json_ld = jsonLdBlocks;
150928
+ const usableJsonLd = isListLikeIntent2(intent) ? jsonLdBlocks.filter((b) => !valueLooksLikeSingleItem2(b)) : jsonLdBlocks;
150929
+ if (usableJsonLd.length > 0)
150930
+ out.json_ld = usableJsonLd;
150638
150931
  const headings = [];
150639
150932
  $2("h1, h2").each((_, el) => {
150640
150933
  const text3 = cleanText2($2(el).text());
@@ -152957,6 +153250,16 @@ function scoreSiteMetaJsonLdDemotion2(structure, intent) {
152957
153250
  return 0;
152958
153251
  return -200;
152959
153252
  }
153253
+ function isSingleItemStructureForList2(structure, intent) {
153254
+ if (!TINY_RESULT_LIST_INTENT2.test(intent.toLowerCase()))
153255
+ return false;
153256
+ if (structure.type === "repeated-elements")
153257
+ return false;
153258
+ return valueLooksLikeSingleItem2(structure.data);
153259
+ }
153260
+ function scoreSingleItemListMismatch2(structure, intent) {
153261
+ return isSingleItemStructureForList2(structure, intent) ? -200 : 0;
153262
+ }
152960
153263
  function looksLikeTinyContentReadResult2(data2, intent) {
152961
153264
  if (data2 == null)
152962
153265
  return { tiny: false, bytes: 0, stringLeafChars: 0 };
@@ -153110,9 +153413,9 @@ function extractFromDOM2(html3, intent, contextUrl) {
153110
153413
  const articleStructures = extractArticleBodySpecial2(html3.length > 600000 ? html3.slice(0, 600000) : html3, intent);
153111
153414
  const allStructures = [...flashStructures, ...githubStructures, ...repeatedPersonStructures, ...packageSearchStructures, ...xProfileStructures, ...postStructures, ...repeatedArticleStructures, ...trendStructures, ...definitionStructures, ...packageDetailStructures, ...arxivAbstractStructures, ...courseStructures, ...articleStructures, ...spaStructures, ...parseStructured2(cleaned)].map((structure) => normalizeStructureForIntent2(structure, intent));
153112
153415
  const isListIntent = TINY_RESULT_LIST_INTENT2.test(intent.toLowerCase());
153113
- const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray2(s.data)) && !looksLikeConfigShape2(s.data) && !looksLikeEmptyContainer2(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd2(s.data)));
153416
+ const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray2(s.data)) && !looksLikeConfigShape2(s.data) && !looksLikeEmptyContainer2(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd2(s.data)) && !isSingleItemStructureForList2(s, intent));
153114
153417
  if (structures.length === 0) {
153115
- const fallback2 = extractHtmlMetadataFallback2(html3);
153418
+ const fallback2 = extractHtmlMetadataFallback2(html3, intent);
153116
153419
  if (fallback2) {
153117
153420
  return _finalize({ data: fallback2, extraction_method: "html_metadata_fallback", confidence: 0.4 });
153118
153421
  }
@@ -153121,7 +153424,7 @@ function extractFromDOM2(html3, intent, contextUrl) {
153121
153424
  const intentWords = intent.toLowerCase().split(/\s+/).filter(Boolean);
153122
153425
  const scored = structures.map((s) => ({
153123
153426
  structure: s,
153124
- score: scoreRelevance2(s, intentWords) + scoreSemanticFit2(s, intent) + scoreSparseLinkList2(s) + scoreFieldRichness2(s) + scoreConfigShapeDemotion2(s) + scoreDegenerateRowDemotion2(s) + scoreDuplicateRowDemotion2(s) + scoreEmptyContainerDemotion2(s) + scoreSiteMetaJsonLdDemotion2(s, intent) + scoreTableIntentOverlapDemotion2(s, intent, contextUrl, structures)
153427
+ score: scoreRelevance2(s, intentWords) + scoreSemanticFit2(s, intent) + scoreSparseLinkList2(s) + scoreFieldRichness2(s) + scoreConfigShapeDemotion2(s) + scoreDegenerateRowDemotion2(s) + scoreDuplicateRowDemotion2(s) + scoreEmptyContainerDemotion2(s) + scoreSiteMetaJsonLdDemotion2(s, intent) + scoreSingleItemListMismatch2(s, intent) + scoreTableIntentOverlapDemotion2(s, intent, contextUrl, structures)
153125
153428
  }));
153126
153429
  scored.sort((a, b) => b.score - a.score);
153127
153430
  const passing = scored.filter((candidate) => assessIntentResult(candidate.structure.data, intent).verdict === "pass");
@@ -153737,6 +154040,7 @@ function sanitizeExtractionToJson2(data2, depth = 0) {
153737
154040
  var STRIP_TAGS2, CHROME_TAGS2, AD_PATTERNS2, HIDDEN_ATTRS2, CONTENT_SELECTORS2, CARD_SELECTORS2, CONFIG_TOP_LEVEL_KEYS2, CONFIG_CHUNK_VALUE_KEYS2, INTENT_OVERLAP_STOPWORDS2, SITE_META_LD_TYPES2, TINY_RESULT_LIST_INTENT2, TINY_RESULT_DETAIL_INTENT2, STRUCTURED_DATA_HIGHLIGHT_TYPES2, HTML_TAG_RE2, TABLE_RE2;
153738
154041
  var init_extraction2 = __esm(() => {
153739
154042
  init_esm11();
154043
+ init_cardinality2();
153740
154044
  STRIP_TAGS2 = new Set(["script", "style", "noscript", "svg", "iframe"]);
153741
154045
  CHROME_TAGS2 = new Set(["nav", "footer", "header"]);
153742
154046
  AD_PATTERNS2 = /\b(ad|ads|advert|advertisement|tracking|tracker|cookie-banner|cookie-consent|cookie-notice|popup|modal-overlay|gdpr|consent|banner-promo)\b/i;
@@ -239361,7 +239665,7 @@ async function cmdResolve(flags) {
239361
239665
  }
239362
239666
  if (resolveCacheSafe(flags)) {
239363
239667
  const cachedHit = peekResolution(resolveCacheKeyFor(flags, intent), resolveCacheTtlMs());
239364
- if (cachedHit) {
239668
+ if (cachedHit && resolutionCardinalityMatches(intent, cachedHit.result ?? cachedHit.data)) {
239365
239669
  const replay = markResolveCacheReplay(cachedHit);
239366
239670
  const hostType2 = detectTelemetryHostType2();
239367
239671
  if (process.env.UNBROWSE_LANDING_TOKEN || process.env.UNBROWSE_ATTRIBUTION_B64) {
@@ -239581,7 +239885,7 @@ async function cmdResolve(flags) {
239581
239885
  if (skill?.skill_id && trace) {
239582
239886
  result._feedback = `unbrowse feedback --skill ${skill.skill_id} --endpoint ${trace.endpoint_id || "?"} --rating <1-5>`;
239583
239887
  }
239584
- if (resolveCacheSafe(flags) && isResolveSuccessResult(result)) {
239888
+ if (resolveCacheSafe(flags) && isResolveSuccessResult(result) && resolutionCardinalityMatches(intent, result.result ?? result.data)) {
239585
239889
  storeResolution(resolveCacheKeyFor(flags, intent), result, resolveCacheTtlMs());
239586
239890
  }
239587
239891
  output(result, !!flags.pretty);
@@ -239881,14 +240185,30 @@ async function cmdRun(args, flags, verb = "run") {
239881
240185
  } else if (explicitEndpointId || !bestEndpoint || endpointIsSafeToAutoExecute(bestEndpoint)) {
239882
240186
  runPlan.push({ step: "execute", mode: "direct_api", status: "started", endpoint_id: endpointToExecute });
239883
240187
  const resolvedSource = typeof result.source === "string" ? result.source : undefined;
239884
- result = await withPendingNotice(api4("POST", `/v1/skills/${skillId}/execute`, execBody(endpointToExecute)), "Executing best endpoint...");
239885
- if (resolvedSource && typeof result.source !== "string")
239886
- result.source = resolvedSource;
239887
- runPlan[runPlan.length - 1] = {
239888
- ...runPlan[runPlan.length - 1],
239889
- status: isResolveSuccessResult(result) ? "complete" : "error",
239890
- error: resolveResultError(result) ?? null
239891
- };
240188
+ const deferralResult = result;
240189
+ const executed = await withPendingNotice(api4("POST", `/v1/skills/${skillId}/execute`, execBody(endpointToExecute)), "Executing best endpoint...");
240190
+ if (!explicitEndpointId && isResolveSuccessResult(executed) && !resolutionCardinalityMatches(intent, executed.result ?? executed.data)) {
240191
+ runPlan[runPlan.length - 1] = {
240192
+ ...runPlan[runPlan.length - 1],
240193
+ status: "skipped",
240194
+ reason: "cardinality_mismatch_single_item"
240195
+ };
240196
+ deferralResult.next_action = {
240197
+ title: "List intent returned a single item",
240198
+ command: `unbrowse execute --skill ${skillId} --endpoint ${endpointToExecute}`,
240199
+ why: "Auto-execute yielded a single item for a list/search intent; the page's listings are likely JS-rendered behind an internal API. Returning the route shortlist instead of one item."
240200
+ };
240201
+ result = deferralResult;
240202
+ } else {
240203
+ result = executed;
240204
+ if (resolvedSource && typeof result.source !== "string")
240205
+ result.source = resolvedSource;
240206
+ runPlan[runPlan.length - 1] = {
240207
+ ...runPlan[runPlan.length - 1],
240208
+ status: isResolveSuccessResult(result) ? "complete" : "error",
240209
+ error: resolveResultError(result) ?? null
240210
+ };
240211
+ }
239892
240212
  } else {
239893
240213
  runPlan.push({
239894
240214
  step: "execute",
@@ -241712,6 +242032,7 @@ var init_cli = __esm(async () => {
241712
242032
  init_extract_auth_header();
241713
242033
  init_kuri_proxy_bridge();
241714
242034
  init_cached_resolution();
242035
+ init_cardinality();
241715
242036
  init_issue();
241716
242037
  init_client2();
241717
242038
  init_impact_log();
package/runtime/mcp.js CHANGED
@@ -36310,7 +36310,7 @@ var init_cached_resolution = __esm(() => {
36310
36310
  });
36311
36311
 
36312
36312
  // .tmp-runtime-src/build-info.generated.ts
36313
- var BUILD_RELEASE_VERSION = "9.6.0", BUILD_GIT_SHA = "5b6b9dc9e8e1", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiOS42LjAiLCJnaXRfc2hhIjoiNWI2YjlkYzllOGUxIiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUA1YjZiOWRjOWU4ZTEiLCJpc3N1ZWRfYXQiOiIyMDI2LTA2LTE3VDIyOjAyOjQ0LjA3MVoifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "MqIyb1B_GA8W5Nv_Jictwt1jVCCTgxxdIjUFJqLoeAU", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai", BUILD_DEFAULT_PROFILE = "";
36313
+ var BUILD_RELEASE_VERSION = "9.6.2", BUILD_GIT_SHA = "d2d14a6629a0", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiOS42LjIiLCJnaXRfc2hhIjoiZDJkMTRhNjYyOWEwIiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUBkMmQxNGE2NjI5YTAiLCJpc3N1ZWRfYXQiOiIyMDI2LTA2LTE4VDA0OjE3OjAzLjg1M1oifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "OefvyW2iLVVPQ-0HMg9Mz-bciCmP8LE5u7fVEss_98E", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai", BUILD_DEFAULT_PROFILE = "";
36314
36314
 
36315
36315
  // .tmp-runtime-src/version.ts
36316
36316
  import { createHash as createHash4 } from "crypto";
@@ -43177,6 +43177,133 @@ var init_header_classify = __esm(() => {
43177
43177
  SENSITIVE_HEADER_PATTERN = /token|key|secret|credential|password|session/i;
43178
43178
  });
43179
43179
 
43180
+ // .tmp-runtime-src/values/cardinality.ts
43181
+ function isListLikeIntent(intent) {
43182
+ return LIST_INTENT_RE.test(intent ?? "");
43183
+ }
43184
+ function valueLooksLikeSingleItem(value) {
43185
+ if (value == null || Array.isArray(value) || typeof value !== "object")
43186
+ return false;
43187
+ const obj = value;
43188
+ for (const key of COLLECTION_KEYS) {
43189
+ if (Array.isArray(obj[key]))
43190
+ return false;
43191
+ }
43192
+ for (const v of Object.values(obj)) {
43193
+ if (Array.isArray(v) && v.some((x) => x !== null && typeof x === "object"))
43194
+ return false;
43195
+ }
43196
+ const atType = typeof obj["@type"] === "string" ? obj["@type"].toLowerCase() : "";
43197
+ const isItemType = ITEM_SCHEMA_TYPES.has(atType);
43198
+ const hasName = "name" in obj || "title" in obj || "headline" in obj;
43199
+ const hasPriceish = "offers" in obj || "price" in obj || "sku" in obj;
43200
+ return isItemType || hasName && hasPriceish;
43201
+ }
43202
+ function schemaLooksLikeSingleItem(rs) {
43203
+ if (!rs || typeof rs !== "object")
43204
+ return false;
43205
+ const schema = rs;
43206
+ if (schema.type === "array")
43207
+ return false;
43208
+ const props = schema.properties ?? {};
43209
+ for (const key of COLLECTION_KEYS) {
43210
+ if (key in props)
43211
+ return false;
43212
+ }
43213
+ for (const value of Object.values(props)) {
43214
+ if (value && typeof value === "object" && value.type === "array" && value.items?.type === "object") {
43215
+ return false;
43216
+ }
43217
+ }
43218
+ if (schema.type !== "object")
43219
+ return false;
43220
+ const hasType = "@type" in props;
43221
+ const hasName = "name" in props || "title" in props;
43222
+ const hasPriceish = "offers" in props || "price" in props || "sku" in props;
43223
+ return hasType || hasName && hasPriceish;
43224
+ }
43225
+ function routeLooksLikeSingleItem(route) {
43226
+ const tmpl = route.url_template ?? "";
43227
+ let pathAndQuery = tmpl;
43228
+ try {
43229
+ const u = new URL(tmpl);
43230
+ pathAndQuery = `${u.pathname}${u.search}`;
43231
+ } catch {}
43232
+ const lower = pathAndQuery.toLowerCase();
43233
+ if (/\/(?:search|q|categories?|browse|results?|listings|explore|discover|feed|catalog(?:ue)?|collections?|shop|all)\b/.test(lower) || /[?&](?:q|query|keyword|keywords|search|term|category|cat|page)=/.test(lower)) {
43234
+ return false;
43235
+ }
43236
+ if (/\/(?:p|product|products|item|items|listing|detail|details|dp|pd|sku)\/[^/]+/.test(lower))
43237
+ return true;
43238
+ const lastSeg = lower.split("?")[0].replace(/\/+$/, "").split("/").pop() ?? "";
43239
+ if (/-\d{3,}$/.test(lastSeg) || /^\d{3,}$/.test(lastSeg))
43240
+ return true;
43241
+ if (/\{[^}]+\}/.test(lower))
43242
+ return false;
43243
+ return schemaLooksLikeSingleItem(route.response_schema);
43244
+ }
43245
+ function urlPathLooksListLike(contextUrl) {
43246
+ if (!contextUrl)
43247
+ return false;
43248
+ try {
43249
+ const pathname = new URL(contextUrl).pathname.toLowerCase();
43250
+ return /\/(?:search|basic-search|result-page|results?|discover|browse|categories?|q|listings|feed|catalog(?:ue)?)\b/.test(pathname);
43251
+ } catch {
43252
+ return false;
43253
+ }
43254
+ }
43255
+ function cardinalityMatches(intent, subject, opts) {
43256
+ const wantsMany = isListLikeIntent(intent) || urlPathLooksListLike(opts?.contextUrl);
43257
+ if (!wantsMany)
43258
+ return true;
43259
+ switch (subject.kind) {
43260
+ case "value":
43261
+ return !valueLooksLikeSingleItem(subject.value);
43262
+ case "schema":
43263
+ return !schemaLooksLikeSingleItem(subject.schema);
43264
+ case "route":
43265
+ return !routeLooksLikeSingleItem(subject.route);
43266
+ }
43267
+ }
43268
+ function resolutionCardinalityMatches(intent, data) {
43269
+ return cardinalityMatches(intent, { kind: "value", value: data });
43270
+ }
43271
+ var LIST_INTENT_RE, ITEM_SCHEMA_TYPES, COLLECTION_KEYS;
43272
+ var init_cardinality = __esm(() => {
43273
+ LIST_INTENT_RE = /\b(search|find|lookup|browse|discover|list(?:ings?)?|feed|catalog(?:ue)?)\b/i;
43274
+ ITEM_SCHEMA_TYPES = new Set([
43275
+ "product",
43276
+ "offer",
43277
+ "article",
43278
+ "newsarticle",
43279
+ "blogposting",
43280
+ "recipe",
43281
+ "event",
43282
+ "place",
43283
+ "localbusiness",
43284
+ "jobposting",
43285
+ "book",
43286
+ "movie",
43287
+ "creativework",
43288
+ "person",
43289
+ "organization"
43290
+ ]);
43291
+ COLLECTION_KEYS = [
43292
+ "itemListElement",
43293
+ "items",
43294
+ "results",
43295
+ "products",
43296
+ "listings",
43297
+ "data",
43298
+ "edges",
43299
+ "hits",
43300
+ "records",
43301
+ "entries",
43302
+ "rows",
43303
+ "nodes"
43304
+ ];
43305
+ });
43306
+
43180
43307
  // node_modules/.bun/nanoid@5.1.11/node_modules/nanoid/url-alphabet/index.js
43181
43308
  var urlAlphabet = "useandom-26T198340PX75pxJACKVERYMINDBUSHWOLF_GQZbfghjklqvwyzrict";
43182
43309
 
@@ -115440,7 +115567,7 @@ __export(exports_extraction, {
115440
115567
  cleanDOM: () => cleanDOM,
115441
115568
  buildStructuredDataHeader: () => buildStructuredDataHeader
115442
115569
  });
115443
- function extractHtmlMetadataFallback(html3) {
115570
+ function extractHtmlMetadataFallback(html3, intent) {
115444
115571
  if (!html3 || html3.length < 100)
115445
115572
  return null;
115446
115573
  try {
@@ -115472,8 +115599,9 @@ function extractHtmlMetadataFallback(html3) {
115472
115599
  jsonLdBlocks.push(parsed);
115473
115600
  } catch {}
115474
115601
  });
115475
- if (jsonLdBlocks.length > 0)
115476
- out.json_ld = jsonLdBlocks;
115602
+ const usableJsonLd = isListLikeIntent(intent) ? jsonLdBlocks.filter((b) => !valueLooksLikeSingleItem(b)) : jsonLdBlocks;
115603
+ if (usableJsonLd.length > 0)
115604
+ out.json_ld = usableJsonLd;
115477
115605
  const headings = [];
115478
115606
  $2("h1, h2").each((_, el) => {
115479
115607
  const text3 = cleanText($2(el).text());
@@ -117796,6 +117924,16 @@ function scoreSiteMetaJsonLdDemotion(structure, intent) {
117796
117924
  return 0;
117797
117925
  return -200;
117798
117926
  }
117927
+ function isSingleItemStructureForList(structure, intent) {
117928
+ if (!TINY_RESULT_LIST_INTENT.test(intent.toLowerCase()))
117929
+ return false;
117930
+ if (structure.type === "repeated-elements")
117931
+ return false;
117932
+ return valueLooksLikeSingleItem(structure.data);
117933
+ }
117934
+ function scoreSingleItemListMismatch(structure, intent) {
117935
+ return isSingleItemStructureForList(structure, intent) ? -200 : 0;
117936
+ }
117799
117937
  function looksLikeTinyContentReadResult(data2, intent) {
117800
117938
  if (data2 == null)
117801
117939
  return { tiny: false, bytes: 0, stringLeafChars: 0 };
@@ -117949,9 +118087,9 @@ function extractFromDOM(html3, intent, contextUrl) {
117949
118087
  const articleStructures = extractArticleBodySpecial(html3.length > 600000 ? html3.slice(0, 600000) : html3, intent);
117950
118088
  const allStructures = [...flashStructures, ...githubStructures, ...repeatedPersonStructures, ...packageSearchStructures, ...xProfileStructures, ...postStructures, ...repeatedArticleStructures, ...trendStructures, ...definitionStructures, ...packageDetailStructures, ...arxivAbstractStructures, ...courseStructures, ...articleStructures, ...spaStructures, ...parseStructured(cleaned)].map((structure) => normalizeStructureForIntent(structure, intent));
117951
118089
  const isListIntent = TINY_RESULT_LIST_INTENT.test(intent.toLowerCase());
117952
- const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray(s.data)) && !looksLikeConfigShape(s.data) && !looksLikeEmptyContainer(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd(s.data)));
118090
+ const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray(s.data)) && !looksLikeConfigShape(s.data) && !looksLikeEmptyContainer(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd(s.data)) && !isSingleItemStructureForList(s, intent));
117953
118091
  if (structures.length === 0) {
117954
- const fallback2 = extractHtmlMetadataFallback(html3);
118092
+ const fallback2 = extractHtmlMetadataFallback(html3, intent);
117955
118093
  if (fallback2) {
117956
118094
  return _finalize({ data: fallback2, extraction_method: "html_metadata_fallback", confidence: 0.4 });
117957
118095
  }
@@ -117960,7 +118098,7 @@ function extractFromDOM(html3, intent, contextUrl) {
117960
118098
  const intentWords = intent.toLowerCase().split(/\s+/).filter(Boolean);
117961
118099
  const scored = structures.map((s) => ({
117962
118100
  structure: s,
117963
- score: scoreRelevance(s, intentWords) + scoreSemanticFit(s, intent) + scoreSparseLinkList(s) + scoreFieldRichness(s) + scoreConfigShapeDemotion(s) + scoreDegenerateRowDemotion(s) + scoreDuplicateRowDemotion(s) + scoreEmptyContainerDemotion(s) + scoreSiteMetaJsonLdDemotion(s, intent) + scoreTableIntentOverlapDemotion(s, intent, contextUrl, structures)
118101
+ score: scoreRelevance(s, intentWords) + scoreSemanticFit(s, intent) + scoreSparseLinkList(s) + scoreFieldRichness(s) + scoreConfigShapeDemotion(s) + scoreDegenerateRowDemotion(s) + scoreDuplicateRowDemotion(s) + scoreEmptyContainerDemotion(s) + scoreSiteMetaJsonLdDemotion(s, intent) + scoreSingleItemListMismatch(s, intent) + scoreTableIntentOverlapDemotion(s, intent, contextUrl, structures)
117964
118102
  }));
117965
118103
  scored.sort((a, b) => b.score - a.score);
117966
118104
  const passing = scored.filter((candidate) => assessIntentResult(candidate.structure.data, intent).verdict === "pass");
@@ -118576,6 +118714,7 @@ function sanitizeExtractionToJson(data2, depth = 0) {
118576
118714
  var STRIP_TAGS, CHROME_TAGS, AD_PATTERNS, HIDDEN_ATTRS, CONTENT_SELECTORS, CARD_SELECTORS, CONFIG_TOP_LEVEL_KEYS, CONFIG_CHUNK_VALUE_KEYS, INTENT_OVERLAP_STOPWORDS, SITE_META_LD_TYPES, TINY_RESULT_LIST_INTENT, TINY_RESULT_DETAIL_INTENT, STRUCTURED_DATA_HIGHLIGHT_TYPES, HTML_TAG_RE, TABLE_RE;
118577
118715
  var init_extraction = __esm(() => {
118578
118716
  init_esm11();
118717
+ init_cardinality();
118579
118718
  STRIP_TAGS = new Set(["script", "style", "noscript", "svg", "iframe"]);
118580
118719
  CHROME_TAGS = new Set(["nav", "footer", "header"]);
118581
118720
  AD_PATTERNS = /\b(ad|ads|advert|advertisement|tracking|tracker|cookie-banner|cookie-consent|cookie-notice|popup|modal-overlay|gdpr|consent|banner-promo)\b/i;
@@ -128277,7 +128416,8 @@ function selectBestEndpoint(endpoints, intent, skillDomain, contextUrl) {
128277
128416
  const ranked = rankEndpoints(endpoints, intent, skillDomain, contextUrl);
128278
128417
  if (ranked.length === 0)
128279
128418
  throw new Error("All endpoints are disabled");
128280
- return ranked[0].endpoint;
128419
+ const preferred = ranked.find((r) => cardinalityMatches(intent, { kind: "route", route: r.endpoint }, { contextUrl }));
128420
+ return (preferred ?? ranked[0]).endpoint;
128281
128421
  }
128282
128422
  function isHtml2(text3) {
128283
128423
  const trimmed = text3.trimStart().slice(0, 200).toLowerCase();
@@ -128324,6 +128464,7 @@ var init_execution = __esm(async () => {
128324
128464
  init_reveng_server_first();
128325
128465
  init_header_classify();
128326
128466
  init_storage_hole_bindings();
128467
+ init_cardinality();
128327
128468
  init_sealed_blob_store();
128328
128469
  init_signer();
128329
128470
  init_bundle_scanner();
@@ -130567,7 +130708,10 @@ function pickWalkTarget(requestedUrl, ranked, minScore = 0.8) {
130567
130708
  return false;
130568
130709
  }
130569
130710
  };
130570
- return eligible.find((c) => hasPath(c.url)) ?? eligible[0];
130711
+ const reqReg = registrableHost(requestedUrl);
130712
+ const sameDomain = reqReg ? eligible.filter((c) => registrableHost(c.url) === reqReg) : [];
130713
+ const pool2 = sameDomain.length > 0 ? sameDomain : eligible;
130714
+ return pool2.find((c) => hasPath(c.url)) ?? pool2[0];
130571
130715
  }
130572
130716
  function artifactResultWithShortlist(artifact, skillId, triggerUrl) {
130573
130717
  const ep = artifact.endpoint;
@@ -131037,6 +131181,9 @@ function isResolveUsableEndpointForIntent(endpoint, intent, contextUrl) {
131037
131181
  if (isFeedTimelineIntent(intent, contextUrl) && endpointHasNegativeTag(endpoint, "helper")) {
131038
131182
  return false;
131039
131183
  }
131184
+ if (!cardinalityMatches(intent, { kind: "route", route: endpoint }, { contextUrl })) {
131185
+ return false;
131186
+ }
131040
131187
  return true;
131041
131188
  }
131042
131189
  function normalizeRouteContext(url) {
@@ -131179,7 +131326,7 @@ function withContextReplayEndpoint(skill, _intent, _contextUrl) {
131179
131326
  return skill;
131180
131327
  }
131181
131328
  function isSearchLikeIntent(intent, contextUrl) {
131182
- if (/\b(search|find|lookup|browse|discover)\b/i.test(intent ?? ""))
131329
+ if (isListLikeIntent(intent))
131183
131330
  return true;
131184
131331
  try {
131185
131332
  const pathname = contextUrl ? new URL(contextUrl).pathname.toLowerCase() : "";
@@ -132051,7 +132198,20 @@ function inferPreferredEntityTokens(intent) {
132051
132198
  return [];
132052
132199
  }
132053
132200
  function isAcceptableIntentResult(result, intent) {
132054
- return assessIntentResult(result, intent).verdict !== "fail";
132201
+ if (assessIntentResult(result, intent).verdict === "fail")
132202
+ return false;
132203
+ if (!cardinalityMatches(intent, { kind: "value", value: unwrapResultPayload(result) }))
132204
+ return false;
132205
+ return true;
132206
+ }
132207
+ function unwrapResultPayload(result) {
132208
+ if (result == null || typeof result !== "object" || Array.isArray(result))
132209
+ return result;
132210
+ const rec = { ...result };
132211
+ for (const k of ["available_endpoints", "available_operations", "shortlist_for_judgment", "workflow_dag", "walked_from", "exa_candidates", "run_plan"]) {
132212
+ delete rec[k];
132213
+ }
132214
+ return rec;
132055
132215
  }
132056
132216
  function candidateMatchesPreferredEntity(candidate, preferredTokens) {
132057
132217
  if (preferredTokens.length === 0)
@@ -135893,6 +136053,8 @@ var init_orchestrator = __esm(async () => {
135893
136053
  init_search_forms();
135894
136054
  init_ddg_search();
135895
136055
  init_cached_resolution();
136056
+ init_cardinality();
136057
+ init_cardinality();
135896
136058
  init_principal_scope();
135897
136059
  init_yield_safety();
135898
136060
  init_trace_store();
@@ -142647,9 +142809,14 @@ async function registerRoutes(app) {
142647
142809
  recovered = true;
142648
142810
  } else if (errResult.available_endpoints?.length === 1) {
142649
142811
  const only = errResult.available_endpoints[0].endpoint_id;
142650
- console.log(`[exec] D7 single-endpoint skill: rewriting endpoint_id ${want} → ${only}`);
142651
- execParams.endpoint_id = only;
142652
- recovered = true;
142812
+ const onlyEp = (skill.endpoints ?? []).find((e) => e.endpoint_id === only);
142813
+ if (!onlyEp || cardinalityMatches(intent, { kind: "route", route: onlyEp }, { contextUrl: context_url })) {
142814
+ console.log(`[exec] D7 single-endpoint skill: rewriting endpoint_id ${want} → ${only}`);
142815
+ execParams.endpoint_id = only;
142816
+ recovered = true;
142817
+ } else {
142818
+ console.log(`[exec] D7 single-endpoint skill: only endpoint ${only} is a single-item route for a list intent — not forcing (cardinality gate)`);
142819
+ }
142653
142820
  }
142654
142821
  }
142655
142822
  if (!recovered && (skill.endpoints?.length ?? 0) >= 2 && intent) {
@@ -143877,6 +144044,7 @@ var init_routes = __esm(async () => {
143877
144044
  init_client();
143878
144045
  init_reveng_server_first();
143879
144046
  init_header_classify();
144047
+ init_cardinality();
143880
144048
  init_capture_spool();
143881
144049
  init_nanoid();
143882
144050
  init_marketplace();
@@ -232579,7 +232747,7 @@ async function cmdResolve(flags) {
232579
232747
  }
232580
232748
  if (resolveCacheSafe(flags)) {
232581
232749
  const cachedHit = peekResolution(resolveCacheKeyFor(flags, intent), resolveCacheTtlMs());
232582
- if (cachedHit) {
232750
+ if (cachedHit && resolutionCardinalityMatches(intent, cachedHit.result ?? cachedHit.data)) {
232583
232751
  const replay = markResolveCacheReplay(cachedHit);
232584
232752
  const hostType2 = detectTelemetryHostType();
232585
232753
  if (process.env.UNBROWSE_LANDING_TOKEN || process.env.UNBROWSE_ATTRIBUTION_B64) {
@@ -232799,7 +232967,7 @@ async function cmdResolve(flags) {
232799
232967
  if (skill?.skill_id && trace) {
232800
232968
  result._feedback = `unbrowse feedback --skill ${skill.skill_id} --endpoint ${trace.endpoint_id || "?"} --rating <1-5>`;
232801
232969
  }
232802
- if (resolveCacheSafe(flags) && isResolveSuccessResult(result)) {
232970
+ if (resolveCacheSafe(flags) && isResolveSuccessResult(result) && resolutionCardinalityMatches(intent, result.result ?? result.data)) {
232803
232971
  storeResolution(resolveCacheKeyFor(flags, intent), result, resolveCacheTtlMs());
232804
232972
  }
232805
232973
  output(result, !!flags.pretty);
@@ -233085,14 +233253,30 @@ async function cmdRun(args, flags, verb = "run") {
233085
233253
  } else if (explicitEndpointId || !bestEndpoint || endpointIsSafeToAutoExecute(bestEndpoint)) {
233086
233254
  runPlan.push({ step: "execute", mode: "direct_api", status: "started", endpoint_id: endpointToExecute });
233087
233255
  const resolvedSource = typeof result.source === "string" ? result.source : undefined;
233088
- result = await withPendingNotice(api4("POST", `/v1/skills/${skillId}/execute`, execBody(endpointToExecute)), "Executing best endpoint...");
233089
- if (resolvedSource && typeof result.source !== "string")
233090
- result.source = resolvedSource;
233091
- runPlan[runPlan.length - 1] = {
233092
- ...runPlan[runPlan.length - 1],
233093
- status: isResolveSuccessResult(result) ? "complete" : "error",
233094
- error: resolveResultError(result) ?? null
233095
- };
233256
+ const deferralResult = result;
233257
+ const executed = await withPendingNotice(api4("POST", `/v1/skills/${skillId}/execute`, execBody(endpointToExecute)), "Executing best endpoint...");
233258
+ if (!explicitEndpointId && isResolveSuccessResult(executed) && !resolutionCardinalityMatches(intent, executed.result ?? executed.data)) {
233259
+ runPlan[runPlan.length - 1] = {
233260
+ ...runPlan[runPlan.length - 1],
233261
+ status: "skipped",
233262
+ reason: "cardinality_mismatch_single_item"
233263
+ };
233264
+ deferralResult.next_action = {
233265
+ title: "List intent returned a single item",
233266
+ command: `unbrowse execute --skill ${skillId} --endpoint ${endpointToExecute}`,
233267
+ why: "Auto-execute yielded a single item for a list/search intent; the page's listings are likely JS-rendered behind an internal API. Returning the route shortlist instead of one item."
233268
+ };
233269
+ result = deferralResult;
233270
+ } else {
233271
+ result = executed;
233272
+ if (resolvedSource && typeof result.source !== "string")
233273
+ result.source = resolvedSource;
233274
+ runPlan[runPlan.length - 1] = {
233275
+ ...runPlan[runPlan.length - 1],
233276
+ status: isResolveSuccessResult(result) ? "complete" : "error",
233277
+ error: resolveResultError(result) ?? null
233278
+ };
233279
+ }
233096
233280
  } else {
233097
233281
  runPlan.push({
233098
233282
  step: "execute",
@@ -234916,6 +235100,7 @@ var init_cli = __esm(async () => {
234916
235100
  init_extract_auth_header();
234917
235101
  init_kuri_proxy_bridge();
234918
235102
  init_cached_resolution();
235103
+ init_cardinality();
234919
235104
  init_issue();
234920
235105
  init_client2();
234921
235106
  init_impact_log();
@@ -236206,6 +236391,7 @@ __export(exports_orchestrator, {
236206
236391
  selectSkillIdsToHydrate: () => selectSkillIdsToHydrate2,
236207
236392
  selectSearchTermsForExecution: () => selectSearchTermsForExecution2,
236208
236393
  scopedCacheKey: () => scopedCacheKey2,
236394
+ schemaLooksLikeSingleItem: () => schemaLooksLikeSingleItem,
236209
236395
  resolveEndpointTemplateBindings: () => resolveEndpointTemplateBindings2,
236210
236396
  resolveAndExecute: () => resolveAndExecute2,
236211
236397
  registrableHost: () => registrableHost2,
@@ -236220,7 +236406,9 @@ __export(exports_orchestrator, {
236220
236406
  pickPreferredSkillSnapshot: () => pickPreferredSkillSnapshot2,
236221
236407
  persistDomainCache: () => persistDomainCache2,
236222
236408
  marketplaceSkillMatchesContext: () => marketplaceSkillMatchesContext2,
236409
+ looksLikeSingleItemRoute: () => looksLikeSingleItemRoute,
236223
236410
  isRouteCacheEntryStale: () => isRouteCacheEntryStale2,
236411
+ isResolveUsableEndpointForIntent: () => isResolveUsableEndpointForIntent2,
236224
236412
  isCachedSkillRelevantForIntent: () => isCachedSkillRelevantForIntent2,
236225
236413
  invalidateRouteCacheForDomain: () => invalidateRouteCacheForDomain3,
236226
236414
  inferSearchParamOverrides: () => inferSearchParamOverrides2,
@@ -236275,7 +236463,10 @@ function pickWalkTarget2(requestedUrl, ranked, minScore = 0.8) {
236275
236463
  return false;
236276
236464
  }
236277
236465
  };
236278
- return eligible.find((c) => hasPath(c.url)) ?? eligible[0];
236466
+ const reqReg = registrableHost2(requestedUrl);
236467
+ const sameDomain = reqReg ? eligible.filter((c) => registrableHost2(c.url) === reqReg) : [];
236468
+ const pool2 = sameDomain.length > 0 ? sameDomain : eligible;
236469
+ return pool2.find((c) => hasPath(c.url)) ?? pool2[0];
236279
236470
  }
236280
236471
  function artifactResultWithShortlist2(artifact, skillId, triggerUrl) {
236281
236472
  const ep = artifact.endpoint;
@@ -236736,6 +236927,9 @@ function endpointTargetsMismatchedLocalReplayHost2(endpoint, contextUrl) {
236736
236927
  function endpointHasNegativeTag2(endpoint, tag) {
236737
236928
  return (endpoint.semantic?.negative_tags ?? []).some((candidate) => candidate.trim().toLowerCase() === tag.trim().toLowerCase());
236738
236929
  }
236930
+ function looksLikeSingleItemRoute(endpoint) {
236931
+ return routeLooksLikeSingleItem(endpoint);
236932
+ }
236739
236933
  function isResolveUsableEndpointForIntent2(endpoint, intent, contextUrl) {
236740
236934
  if (endpointTargetsMismatchedLocalReplayHost2(endpoint, contextUrl))
236741
236935
  return false;
@@ -236745,6 +236939,9 @@ function isResolveUsableEndpointForIntent2(endpoint, intent, contextUrl) {
236745
236939
  if (isFeedTimelineIntent2(intent, contextUrl) && endpointHasNegativeTag2(endpoint, "helper")) {
236746
236940
  return false;
236747
236941
  }
236942
+ if (!cardinalityMatches(intent, { kind: "route", route: endpoint }, { contextUrl })) {
236943
+ return false;
236944
+ }
236748
236945
  return true;
236749
236946
  }
236750
236947
  function normalizeRouteContext2(url) {
@@ -236887,7 +237084,7 @@ function withContextReplayEndpoint2(skill, _intent, _contextUrl) {
236887
237084
  return skill;
236888
237085
  }
236889
237086
  function isSearchLikeIntent2(intent, contextUrl) {
236890
- if (/\b(search|find|lookup|browse|discover)\b/i.test(intent ?? ""))
237087
+ if (isListLikeIntent(intent))
236891
237088
  return true;
236892
237089
  try {
236893
237090
  const pathname = contextUrl ? new URL(contextUrl).pathname.toLowerCase() : "";
@@ -237764,7 +237961,20 @@ function inferPreferredEntityTokens2(intent) {
237764
237961
  return [];
237765
237962
  }
237766
237963
  function isAcceptableIntentResult2(result, intent) {
237767
- return assessIntentResult(result, intent).verdict !== "fail";
237964
+ if (assessIntentResult(result, intent).verdict === "fail")
237965
+ return false;
237966
+ if (!cardinalityMatches(intent, { kind: "value", value: unwrapResultPayload2(result) }))
237967
+ return false;
237968
+ return true;
237969
+ }
237970
+ function unwrapResultPayload2(result) {
237971
+ if (result == null || typeof result !== "object" || Array.isArray(result))
237972
+ return result;
237973
+ const rec = { ...result };
237974
+ for (const k of ["available_endpoints", "available_operations", "shortlist_for_judgment", "workflow_dag", "walked_from", "exa_candidates", "run_plan"]) {
237975
+ delete rec[k];
237976
+ }
237977
+ return rec;
237768
237978
  }
237769
237979
  function candidateMatchesPreferredEntity2(candidate, preferredTokens) {
237770
237980
  if (preferredTokens.length === 0)
@@ -241606,6 +241816,8 @@ var init_orchestrator2 = __esm(async () => {
241606
241816
  init_search_forms();
241607
241817
  init_ddg_search();
241608
241818
  init_cached_resolution();
241819
+ init_cardinality();
241820
+ init_cardinality();
241609
241821
  init_principal_scope();
241610
241822
  init_yield_safety();
241611
241823
  init_trace_store();
Binary file
@@ -2,7 +2,7 @@
2
2
  "repo_url": "https://github.com/justrach/kuri.git",
3
3
  "branch": "adding-extensions",
4
4
  "source_sha": "149881254046a20778f642b69f20f0c6468f6fb4",
5
- "built_at": "2026-06-17T21:47:52.749Z",
5
+ "built_at": "2026-06-18T03:58:10.362Z",
6
6
  "binaries": {
7
7
  "darwin-arm64": {
8
8
  "zig_target": "aarch64-macos",
@@ -21,11 +21,11 @@
21
21
  },
22
22
  "linux-x64": {
23
23
  "zig_target": "x86_64-linux",
24
- "sha256": "250b2dfafc912dfda669416984036cf745749a0f5322c85acf0f95d7902e2dff"
24
+ "sha256": "e73aecfbf07001ba0be5032118790eb253ad5d8d12caca6a1dd5ad3ccab44b9e"
25
25
  },
26
26
  "win-x64": {
27
27
  "zig_target": "x86_64-windows-gnu",
28
- "sha256": "f920a3a3f95e38c3245c52af11989a046a0c8c87b4091d8b3c0a1638b44b6179",
28
+ "sha256": "9ecbc82be646e755e4664051cf345d54dde3c6610e457d763deff67895047963",
29
29
  "source": "pre-staged"
30
30
  }
31
31
  },
@@ -33,22 +33,22 @@
33
33
  "darwin-arm64": {
34
34
  "zig_target": "aarch64-macos",
35
35
  "lib": "libkuri_ffi.dylib",
36
- "sha256": "898a9290964371d9814d98de79bc62678833a0b914c47f765d46d6080ee391d5"
36
+ "sha256": "6c72cf383df4fa3f870b745da43d64eae8f67e58c6f971214ac29602fb649939"
37
37
  },
38
38
  "darwin-x64": {
39
39
  "zig_target": "x86_64-macos",
40
40
  "lib": "libkuri_ffi.dylib",
41
- "sha256": "730a09622d95ee3a5c9f5c41a7971d8ab93e7ca401be717c54c13bbbe498f604"
41
+ "sha256": "82480772ddc8e44c8e34e70b80d7dc0969004942f77276587af450b62d3d2750"
42
42
  },
43
43
  "linux-arm64": {
44
44
  "zig_target": "aarch64-linux",
45
45
  "lib": "libkuri_ffi.so",
46
- "sha256": "ef27e02d48f90d159b0bbcbc35b07970354af2924653ba0b00a863d9153c0313"
46
+ "sha256": "ef8dfa2b634f04294f93a94472d9856ba777681afaab2d4213f0e29821882e07"
47
47
  },
48
48
  "linux-x64": {
49
49
  "zig_target": "x86_64-linux",
50
50
  "lib": "libkuri_ffi.so",
51
- "sha256": "de4075e2444204d2f860d1293577d4389247c6fe55049c7505196d9e8eb5f4fb"
51
+ "sha256": "fb29ad2b71186d176306321d17e88074a67fea139991faef9aa4862333942c9e"
52
52
  }
53
53
  }
54
54
  }
Binary file