unbrowse 9.6.1 → 9.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unbrowse",
3
- "version": "9.6.1",
3
+ "version": "9.6.2",
4
4
  "repository": {
5
5
  "type": "git",
6
6
  "url": "git+https://github.com/unbrowse-ai/unbrowse.git"
package/runtime/cli.js CHANGED
@@ -1752,10 +1752,74 @@ function valueLooksLikeSingleItem(value) {
1752
1752
  const hasPriceish = "offers" in obj || "price" in obj || "sku" in obj;
1753
1753
  return isItemType || hasName && hasPriceish;
1754
1754
  }
1755
- function resolutionCardinalityMatches(intent, data) {
1756
- if (!isListLikeIntent(intent))
1755
+ function schemaLooksLikeSingleItem(rs) {
1756
+ if (!rs || typeof rs !== "object")
1757
+ return false;
1758
+ const schema = rs;
1759
+ if (schema.type === "array")
1760
+ return false;
1761
+ const props = schema.properties ?? {};
1762
+ for (const key of COLLECTION_KEYS) {
1763
+ if (key in props)
1764
+ return false;
1765
+ }
1766
+ for (const value of Object.values(props)) {
1767
+ if (value && typeof value === "object" && value.type === "array" && value.items?.type === "object") {
1768
+ return false;
1769
+ }
1770
+ }
1771
+ if (schema.type !== "object")
1772
+ return false;
1773
+ const hasType = "@type" in props;
1774
+ const hasName = "name" in props || "title" in props;
1775
+ const hasPriceish = "offers" in props || "price" in props || "sku" in props;
1776
+ return hasType || hasName && hasPriceish;
1777
+ }
1778
+ function routeLooksLikeSingleItem(route) {
1779
+ const tmpl = route.url_template ?? "";
1780
+ let pathAndQuery = tmpl;
1781
+ try {
1782
+ const u = new URL(tmpl);
1783
+ pathAndQuery = `${u.pathname}${u.search}`;
1784
+ } catch {}
1785
+ const lower = pathAndQuery.toLowerCase();
1786
+ if (/\/(?:search|q|categories?|browse|results?|listings|explore|discover|feed|catalog(?:ue)?|collections?|shop|all)\b/.test(lower) || /[?&](?:q|query|keyword|keywords|search|term|category|cat|page)=/.test(lower)) {
1787
+ return false;
1788
+ }
1789
+ if (/\/(?:p|product|products|item|items|listing|detail|details|dp|pd|sku)\/[^/]+/.test(lower))
1757
1790
  return true;
1758
- return !valueLooksLikeSingleItem(data);
1791
+ const lastSeg = lower.split("?")[0].replace(/\/+$/, "").split("/").pop() ?? "";
1792
+ if (/-\d{3,}$/.test(lastSeg) || /^\d{3,}$/.test(lastSeg))
1793
+ return true;
1794
+ if (/\{[^}]+\}/.test(lower))
1795
+ return false;
1796
+ return schemaLooksLikeSingleItem(route.response_schema);
1797
+ }
1798
+ function urlPathLooksListLike(contextUrl) {
1799
+ if (!contextUrl)
1800
+ return false;
1801
+ try {
1802
+ const pathname = new URL(contextUrl).pathname.toLowerCase();
1803
+ return /\/(?:search|basic-search|result-page|results?|discover|browse|categories?|q|listings|feed|catalog(?:ue)?)\b/.test(pathname);
1804
+ } catch {
1805
+ return false;
1806
+ }
1807
+ }
1808
+ function cardinalityMatches(intent, subject, opts) {
1809
+ const wantsMany = isListLikeIntent(intent) || urlPathLooksListLike(opts?.contextUrl);
1810
+ if (!wantsMany)
1811
+ return true;
1812
+ switch (subject.kind) {
1813
+ case "value":
1814
+ return !valueLooksLikeSingleItem(subject.value);
1815
+ case "schema":
1816
+ return !schemaLooksLikeSingleItem(subject.schema);
1817
+ case "route":
1818
+ return !routeLooksLikeSingleItem(subject.route);
1819
+ }
1820
+ }
1821
+ function resolutionCardinalityMatches(intent, data) {
1822
+ return cardinalityMatches(intent, { kind: "value", value: data });
1759
1823
  }
1760
1824
  var LIST_INTENT_RE, ITEM_SCHEMA_TYPES, COLLECTION_KEYS;
1761
1825
  var init_cardinality = __esm(() => {
@@ -2286,7 +2350,7 @@ var init_telemetry = __esm(() => {
2286
2350
  });
2287
2351
 
2288
2352
  // .tmp-runtime-src/build-info.generated.ts
2289
- var BUILD_RELEASE_VERSION = "9.6.1", BUILD_GIT_SHA = "7c8049ccfb77", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiOS42LjEiLCJnaXRfc2hhIjoiN2M4MDQ5Y2NmYjc3IiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUA3YzgwNDljY2ZiNzciLCJpc3N1ZWRfYXQiOiIyMDI2LTA2LTE4VDAzOjA1OjI0LjExOVoifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "EvwEtBNSpI-heDzj2LEUfGnS7PdM_EEZif1VhvQBaz8", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai", BUILD_DEFAULT_PROFILE = "";
2353
+ var BUILD_RELEASE_VERSION = "9.6.2", BUILD_GIT_SHA = "d2d14a6629a0", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiOS42LjIiLCJnaXRfc2hhIjoiZDJkMTRhNjYyOWEwIiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUBkMmQxNGE2NjI5YTAiLCJpc3N1ZWRfYXQiOiIyMDI2LTA2LTE4VDA0OjE3OjAzLjg1M1oifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "OefvyW2iLVVPQ-0HMg9Mz-bciCmP8LE5u7fVEss_98E", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai", BUILD_DEFAULT_PROFILE = "";
2290
2354
 
2291
2355
  // .tmp-runtime-src/version.ts
2292
2356
  import { createHash as createHash7 } from "crypto";
@@ -45983,6 +46047,130 @@ var init_header_classify = __esm(() => {
45983
46047
  SENSITIVE_HEADER_PATTERN = /token|key|secret|credential|password|session/i;
45984
46048
  });
45985
46049
 
46050
+ // .tmp-runtime-src/values/cardinality.ts
46051
+ function isListLikeIntent2(intent) {
46052
+ return LIST_INTENT_RE2.test(intent ?? "");
46053
+ }
46054
+ function valueLooksLikeSingleItem2(value) {
46055
+ if (value == null || Array.isArray(value) || typeof value !== "object")
46056
+ return false;
46057
+ const obj = value;
46058
+ for (const key of COLLECTION_KEYS2) {
46059
+ if (Array.isArray(obj[key]))
46060
+ return false;
46061
+ }
46062
+ for (const v of Object.values(obj)) {
46063
+ if (Array.isArray(v) && v.some((x) => x !== null && typeof x === "object"))
46064
+ return false;
46065
+ }
46066
+ const atType = typeof obj["@type"] === "string" ? obj["@type"].toLowerCase() : "";
46067
+ const isItemType = ITEM_SCHEMA_TYPES2.has(atType);
46068
+ const hasName = "name" in obj || "title" in obj || "headline" in obj;
46069
+ const hasPriceish = "offers" in obj || "price" in obj || "sku" in obj;
46070
+ return isItemType || hasName && hasPriceish;
46071
+ }
46072
+ function schemaLooksLikeSingleItem2(rs) {
46073
+ if (!rs || typeof rs !== "object")
46074
+ return false;
46075
+ const schema = rs;
46076
+ if (schema.type === "array")
46077
+ return false;
46078
+ const props = schema.properties ?? {};
46079
+ for (const key of COLLECTION_KEYS2) {
46080
+ if (key in props)
46081
+ return false;
46082
+ }
46083
+ for (const value of Object.values(props)) {
46084
+ if (value && typeof value === "object" && value.type === "array" && value.items?.type === "object") {
46085
+ return false;
46086
+ }
46087
+ }
46088
+ if (schema.type !== "object")
46089
+ return false;
46090
+ const hasType = "@type" in props;
46091
+ const hasName = "name" in props || "title" in props;
46092
+ const hasPriceish = "offers" in props || "price" in props || "sku" in props;
46093
+ return hasType || hasName && hasPriceish;
46094
+ }
46095
+ function routeLooksLikeSingleItem2(route) {
46096
+ const tmpl = route.url_template ?? "";
46097
+ let pathAndQuery = tmpl;
46098
+ try {
46099
+ const u = new URL(tmpl);
46100
+ pathAndQuery = `${u.pathname}${u.search}`;
46101
+ } catch {}
46102
+ const lower = pathAndQuery.toLowerCase();
46103
+ if (/\/(?:search|q|categories?|browse|results?|listings|explore|discover|feed|catalog(?:ue)?|collections?|shop|all)\b/.test(lower) || /[?&](?:q|query|keyword|keywords|search|term|category|cat|page)=/.test(lower)) {
46104
+ return false;
46105
+ }
46106
+ if (/\/(?:p|product|products|item|items|listing|detail|details|dp|pd|sku)\/[^/]+/.test(lower))
46107
+ return true;
46108
+ const lastSeg = lower.split("?")[0].replace(/\/+$/, "").split("/").pop() ?? "";
46109
+ if (/-\d{3,}$/.test(lastSeg) || /^\d{3,}$/.test(lastSeg))
46110
+ return true;
46111
+ if (/\{[^}]+\}/.test(lower))
46112
+ return false;
46113
+ return schemaLooksLikeSingleItem2(route.response_schema);
46114
+ }
46115
+ function urlPathLooksListLike2(contextUrl) {
46116
+ if (!contextUrl)
46117
+ return false;
46118
+ try {
46119
+ const pathname = new URL(contextUrl).pathname.toLowerCase();
46120
+ return /\/(?:search|basic-search|result-page|results?|discover|browse|categories?|q|listings|feed|catalog(?:ue)?)\b/.test(pathname);
46121
+ } catch {
46122
+ return false;
46123
+ }
46124
+ }
46125
+ function cardinalityMatches2(intent, subject, opts) {
46126
+ const wantsMany = isListLikeIntent2(intent) || urlPathLooksListLike2(opts?.contextUrl);
46127
+ if (!wantsMany)
46128
+ return true;
46129
+ switch (subject.kind) {
46130
+ case "value":
46131
+ return !valueLooksLikeSingleItem2(subject.value);
46132
+ case "schema":
46133
+ return !schemaLooksLikeSingleItem2(subject.schema);
46134
+ case "route":
46135
+ return !routeLooksLikeSingleItem2(subject.route);
46136
+ }
46137
+ }
46138
+ var LIST_INTENT_RE2, ITEM_SCHEMA_TYPES2, COLLECTION_KEYS2;
46139
+ var init_cardinality2 = __esm(() => {
46140
+ LIST_INTENT_RE2 = /\b(search|find|lookup|browse|discover|list(?:ings?)?|feed|catalog(?:ue)?)\b/i;
46141
+ ITEM_SCHEMA_TYPES2 = new Set([
46142
+ "product",
46143
+ "offer",
46144
+ "article",
46145
+ "newsarticle",
46146
+ "blogposting",
46147
+ "recipe",
46148
+ "event",
46149
+ "place",
46150
+ "localbusiness",
46151
+ "jobposting",
46152
+ "book",
46153
+ "movie",
46154
+ "creativework",
46155
+ "person",
46156
+ "organization"
46157
+ ]);
46158
+ COLLECTION_KEYS2 = [
46159
+ "itemListElement",
46160
+ "items",
46161
+ "results",
46162
+ "products",
46163
+ "listings",
46164
+ "data",
46165
+ "edges",
46166
+ "hits",
46167
+ "records",
46168
+ "entries",
46169
+ "rows",
46170
+ "nodes"
46171
+ ];
46172
+ });
46173
+
45986
46174
  // node_modules/.bun/nanoid@5.1.11/node_modules/nanoid/url-alphabet/index.js
45987
46175
  var urlAlphabet = "useandom-26T198340PX75pxJACKVERYMINDBUSHWOLF_GQZbfghjklqvwyzrict";
45988
46176
 
@@ -117291,7 +117479,7 @@ __export(exports_extraction, {
117291
117479
  cleanDOM: () => cleanDOM,
117292
117480
  buildStructuredDataHeader: () => buildStructuredDataHeader
117293
117481
  });
117294
- function extractHtmlMetadataFallback(html3) {
117482
+ function extractHtmlMetadataFallback(html3, intent) {
117295
117483
  if (!html3 || html3.length < 100)
117296
117484
  return null;
117297
117485
  try {
@@ -117323,8 +117511,9 @@ function extractHtmlMetadataFallback(html3) {
117323
117511
  jsonLdBlocks.push(parsed);
117324
117512
  } catch {}
117325
117513
  });
117326
- if (jsonLdBlocks.length > 0)
117327
- out.json_ld = jsonLdBlocks;
117514
+ const usableJsonLd = isListLikeIntent2(intent) ? jsonLdBlocks.filter((b) => !valueLooksLikeSingleItem2(b)) : jsonLdBlocks;
117515
+ if (usableJsonLd.length > 0)
117516
+ out.json_ld = usableJsonLd;
117328
117517
  const headings = [];
117329
117518
  $2("h1, h2").each((_, el) => {
117330
117519
  const text3 = cleanText($2(el).text());
@@ -119647,6 +119836,16 @@ function scoreSiteMetaJsonLdDemotion(structure, intent) {
119647
119836
  return 0;
119648
119837
  return -200;
119649
119838
  }
119839
+ function isSingleItemStructureForList(structure, intent) {
119840
+ if (!TINY_RESULT_LIST_INTENT.test(intent.toLowerCase()))
119841
+ return false;
119842
+ if (structure.type === "repeated-elements")
119843
+ return false;
119844
+ return valueLooksLikeSingleItem2(structure.data);
119845
+ }
119846
+ function scoreSingleItemListMismatch(structure, intent) {
119847
+ return isSingleItemStructureForList(structure, intent) ? -200 : 0;
119848
+ }
119650
119849
  function looksLikeTinyContentReadResult(data2, intent) {
119651
119850
  if (data2 == null)
119652
119851
  return { tiny: false, bytes: 0, stringLeafChars: 0 };
@@ -119800,9 +119999,9 @@ function extractFromDOM(html3, intent, contextUrl) {
119800
119999
  const articleStructures = extractArticleBodySpecial(html3.length > 600000 ? html3.slice(0, 600000) : html3, intent);
119801
120000
  const allStructures = [...flashStructures, ...githubStructures, ...repeatedPersonStructures, ...packageSearchStructures, ...xProfileStructures, ...postStructures, ...repeatedArticleStructures, ...trendStructures, ...definitionStructures, ...packageDetailStructures, ...arxivAbstractStructures, ...courseStructures, ...articleStructures, ...spaStructures, ...parseStructured(cleaned)].map((structure) => normalizeStructureForIntent(structure, intent));
119802
120001
  const isListIntent = TINY_RESULT_LIST_INTENT.test(intent.toLowerCase());
119803
- const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray(s.data)) && !looksLikeConfigShape(s.data) && !looksLikeEmptyContainer(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd(s.data)));
120002
+ const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray(s.data)) && !looksLikeConfigShape(s.data) && !looksLikeEmptyContainer(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd(s.data)) && !isSingleItemStructureForList(s, intent));
119804
120003
  if (structures.length === 0) {
119805
- const fallback2 = extractHtmlMetadataFallback(html3);
120004
+ const fallback2 = extractHtmlMetadataFallback(html3, intent);
119806
120005
  if (fallback2) {
119807
120006
  return _finalize({ data: fallback2, extraction_method: "html_metadata_fallback", confidence: 0.4 });
119808
120007
  }
@@ -119811,7 +120010,7 @@ function extractFromDOM(html3, intent, contextUrl) {
119811
120010
  const intentWords = intent.toLowerCase().split(/\s+/).filter(Boolean);
119812
120011
  const scored = structures.map((s) => ({
119813
120012
  structure: s,
119814
- score: scoreRelevance(s, intentWords) + scoreSemanticFit(s, intent) + scoreSparseLinkList(s) + scoreFieldRichness(s) + scoreConfigShapeDemotion(s) + scoreDegenerateRowDemotion(s) + scoreDuplicateRowDemotion(s) + scoreEmptyContainerDemotion(s) + scoreSiteMetaJsonLdDemotion(s, intent) + scoreTableIntentOverlapDemotion(s, intent, contextUrl, structures)
120013
+ score: scoreRelevance(s, intentWords) + scoreSemanticFit(s, intent) + scoreSparseLinkList(s) + scoreFieldRichness(s) + scoreConfigShapeDemotion(s) + scoreDegenerateRowDemotion(s) + scoreDuplicateRowDemotion(s) + scoreEmptyContainerDemotion(s) + scoreSiteMetaJsonLdDemotion(s, intent) + scoreSingleItemListMismatch(s, intent) + scoreTableIntentOverlapDemotion(s, intent, contextUrl, structures)
119815
120014
  }));
119816
120015
  scored.sort((a, b) => b.score - a.score);
119817
120016
  const passing = scored.filter((candidate) => assessIntentResult(candidate.structure.data, intent).verdict === "pass");
@@ -120427,6 +120626,7 @@ function sanitizeExtractionToJson(data2, depth = 0) {
120427
120626
  var STRIP_TAGS, CHROME_TAGS, AD_PATTERNS, HIDDEN_ATTRS, CONTENT_SELECTORS, CARD_SELECTORS, CONFIG_TOP_LEVEL_KEYS, CONFIG_CHUNK_VALUE_KEYS, INTENT_OVERLAP_STOPWORDS, SITE_META_LD_TYPES, TINY_RESULT_LIST_INTENT, TINY_RESULT_DETAIL_INTENT, STRUCTURED_DATA_HIGHLIGHT_TYPES, HTML_TAG_RE, TABLE_RE;
120428
120627
  var init_extraction = __esm(() => {
120429
120628
  init_esm11();
120629
+ init_cardinality2();
120430
120630
  STRIP_TAGS = new Set(["script", "style", "noscript", "svg", "iframe"]);
120431
120631
  CHROME_TAGS = new Set(["nav", "footer", "header"]);
120432
120632
  AD_PATTERNS = /\b(ad|ads|advert|advertisement|tracking|tracker|cookie-banner|cookie-consent|cookie-notice|popup|modal-overlay|gdpr|consent|banner-promo)\b/i;
@@ -129952,7 +130152,8 @@ function selectBestEndpoint(endpoints, intent, skillDomain, contextUrl) {
129952
130152
  const ranked = rankEndpoints(endpoints, intent, skillDomain, contextUrl);
129953
130153
  if (ranked.length === 0)
129954
130154
  throw new Error("All endpoints are disabled");
129955
- return ranked[0].endpoint;
130155
+ const preferred = ranked.find((r) => cardinalityMatches2(intent, { kind: "route", route: r.endpoint }, { contextUrl }));
130156
+ return (preferred ?? ranked[0]).endpoint;
129956
130157
  }
129957
130158
  function isHtml2(text3) {
129958
130159
  const trimmed = text3.trimStart().slice(0, 200).toLowerCase();
@@ -129999,6 +130200,7 @@ var init_execution = __esm(async () => {
129999
130200
  init_reveng_server_first();
130000
130201
  init_header_classify();
130001
130202
  init_storage_hole_bindings();
130203
+ init_cardinality2();
130002
130204
  init_sealed_blob_store();
130003
130205
  init_signer();
130004
130206
  init_bundle_scanner();
@@ -130551,69 +130753,6 @@ function bindingGraphFromOperationGraph(og) {
130551
130753
  return { endpoints, edges };
130552
130754
  }
130553
130755
 
130554
- // .tmp-runtime-src/values/cardinality.ts
130555
- function isListLikeIntent2(intent) {
130556
- return LIST_INTENT_RE2.test(intent ?? "");
130557
- }
130558
- function schemaLooksLikeSingleItem(rs) {
130559
- if (!rs || typeof rs !== "object")
130560
- return false;
130561
- const schema = rs;
130562
- if (schema.type === "array")
130563
- return false;
130564
- const props = schema.properties ?? {};
130565
- for (const key2 of COLLECTION_KEYS2) {
130566
- if (key2 in props)
130567
- return false;
130568
- }
130569
- for (const value of Object.values(props)) {
130570
- if (value && typeof value === "object" && value.type === "array" && value.items?.type === "object") {
130571
- return false;
130572
- }
130573
- }
130574
- if (schema.type !== "object")
130575
- return false;
130576
- const hasType = "@type" in props;
130577
- const hasName = "name" in props || "title" in props;
130578
- const hasPriceish = "offers" in props || "price" in props || "sku" in props;
130579
- return hasType || hasName && hasPriceish;
130580
- }
130581
- var LIST_INTENT_RE2, ITEM_SCHEMA_TYPES2, COLLECTION_KEYS2;
130582
- var init_cardinality2 = __esm(() => {
130583
- LIST_INTENT_RE2 = /\b(search|find|lookup|browse|discover|list(?:ings?)?|feed|catalog(?:ue)?)\b/i;
130584
- ITEM_SCHEMA_TYPES2 = new Set([
130585
- "product",
130586
- "offer",
130587
- "article",
130588
- "newsarticle",
130589
- "blogposting",
130590
- "recipe",
130591
- "event",
130592
- "place",
130593
- "localbusiness",
130594
- "jobposting",
130595
- "book",
130596
- "movie",
130597
- "creativework",
130598
- "person",
130599
- "organization"
130600
- ]);
130601
- COLLECTION_KEYS2 = [
130602
- "itemListElement",
130603
- "items",
130604
- "results",
130605
- "products",
130606
- "listings",
130607
- "data",
130608
- "edges",
130609
- "hits",
130610
- "records",
130611
- "entries",
130612
- "rows",
130613
- "nodes"
130614
- ];
130615
- });
130616
-
130617
130756
  // .tmp-runtime-src/values/yield-safety.ts
130618
130757
  function tokenizeKey(key2) {
130619
130758
  return key2.replace(/([a-z0-9])([A-Z])/g, "$1 $2").split(/[_\-.\s]+/).map((s) => s.toLowerCase()).filter(Boolean);
@@ -132745,26 +132884,6 @@ function endpointTargetsMismatchedLocalReplayHost(endpoint, contextUrl) {
132745
132884
  function endpointHasNegativeTag(endpoint, tag) {
132746
132885
  return (endpoint.semantic?.negative_tags ?? []).some((candidate) => candidate.trim().toLowerCase() === tag.trim().toLowerCase());
132747
132886
  }
132748
- function looksLikeSingleItemRoute(endpoint) {
132749
- const tmpl = endpoint.url_template ?? "";
132750
- let pathAndQuery = tmpl;
132751
- try {
132752
- const u = new URL(tmpl);
132753
- pathAndQuery = `${u.pathname}${u.search}`;
132754
- } catch {}
132755
- const lower = pathAndQuery.toLowerCase();
132756
- if (/\/(?:search|q|categories?|browse|results?|listings|explore|discover|feed|catalog(?:ue)?|collections?|shop|all)\b/.test(lower) || /[?&](?:q|query|keyword|keywords|search|term|category|cat|page)=/.test(lower)) {
132757
- return false;
132758
- }
132759
- if (/\/(?:p|product|products|item|items|listing|detail|details|dp|pd|sku)\/[^/]+/.test(lower))
132760
- return true;
132761
- const lastSeg = lower.split("?")[0].replace(/\/+$/, "").split("/").pop() ?? "";
132762
- if (/-\d{3,}$/.test(lastSeg) || /^\d{3,}$/.test(lastSeg))
132763
- return true;
132764
- if (/\{[^}]+\}/.test(lower))
132765
- return false;
132766
- return schemaLooksLikeSingleItem(endpoint.response_schema);
132767
- }
132768
132887
  function isResolveUsableEndpointForIntent(endpoint, intent, contextUrl) {
132769
132888
  if (endpointTargetsMismatchedLocalReplayHost(endpoint, contextUrl))
132770
132889
  return false;
@@ -132774,7 +132893,7 @@ function isResolveUsableEndpointForIntent(endpoint, intent, contextUrl) {
132774
132893
  if (isFeedTimelineIntent(intent, contextUrl) && endpointHasNegativeTag(endpoint, "helper")) {
132775
132894
  return false;
132776
132895
  }
132777
- if (isSearchLikeIntent(intent, contextUrl) && looksLikeSingleItemRoute(endpoint)) {
132896
+ if (!cardinalityMatches2(intent, { kind: "route", route: endpoint }, { contextUrl })) {
132778
132897
  return false;
132779
132898
  }
132780
132899
  return true;
@@ -133791,7 +133910,20 @@ function inferPreferredEntityTokens(intent) {
133791
133910
  return [];
133792
133911
  }
133793
133912
  function isAcceptableIntentResult(result, intent) {
133794
- return assessIntentResult(result, intent).verdict !== "fail";
133913
+ if (assessIntentResult(result, intent).verdict === "fail")
133914
+ return false;
133915
+ if (!cardinalityMatches2(intent, { kind: "value", value: unwrapResultPayload(result) }))
133916
+ return false;
133917
+ return true;
133918
+ }
133919
+ function unwrapResultPayload(result) {
133920
+ if (result == null || typeof result !== "object" || Array.isArray(result))
133921
+ return result;
133922
+ const rec = { ...result };
133923
+ for (const k of ["available_endpoints", "available_operations", "shortlist_for_judgment", "workflow_dag", "walked_from", "exa_candidates", "run_plan"]) {
133924
+ delete rec[k];
133925
+ }
133926
+ return rec;
133795
133927
  }
133796
133928
  function candidateMatchesPreferredEntity(candidate, preferredTokens) {
133797
133929
  if (preferredTokens.length === 0)
@@ -144132,9 +144264,14 @@ async function registerRoutes(app) {
144132
144264
  recovered = true;
144133
144265
  } else if (errResult.available_endpoints?.length === 1) {
144134
144266
  const only = errResult.available_endpoints[0].endpoint_id;
144135
- console.log(`[exec] D7 single-endpoint skill: rewriting endpoint_id ${want} → ${only}`);
144136
- execParams.endpoint_id = only;
144137
- recovered = true;
144267
+ const onlyEp = (skill.endpoints ?? []).find((e) => e.endpoint_id === only);
144268
+ if (!onlyEp || cardinalityMatches2(intent, { kind: "route", route: onlyEp }, { contextUrl: context_url })) {
144269
+ console.log(`[exec] D7 single-endpoint skill: rewriting endpoint_id ${want} → ${only}`);
144270
+ execParams.endpoint_id = only;
144271
+ recovered = true;
144272
+ } else {
144273
+ console.log(`[exec] D7 single-endpoint skill: only endpoint ${only} is a single-item route for a list intent — not forcing (cardinality gate)`);
144274
+ }
144138
144275
  }
144139
144276
  }
144140
144277
  if (!recovered && (skill.endpoints?.length ?? 0) >= 2 && intent) {
@@ -145362,6 +145499,7 @@ var init_routes = __esm(async () => {
145362
145499
  init_client3();
145363
145500
  init_reveng_server_first();
145364
145501
  init_header_classify();
145502
+ init_cardinality2();
145365
145503
  init_capture_spool();
145366
145504
  init_nanoid();
145367
145505
  init_marketplace();
@@ -150755,7 +150893,7 @@ __export(exports_extraction2, {
150755
150893
  cleanDOM: () => cleanDOM2,
150756
150894
  buildStructuredDataHeader: () => buildStructuredDataHeader2
150757
150895
  });
150758
- function extractHtmlMetadataFallback2(html3) {
150896
+ function extractHtmlMetadataFallback2(html3, intent) {
150759
150897
  if (!html3 || html3.length < 100)
150760
150898
  return null;
150761
150899
  try {
@@ -150787,8 +150925,9 @@ function extractHtmlMetadataFallback2(html3) {
150787
150925
  jsonLdBlocks.push(parsed);
150788
150926
  } catch {}
150789
150927
  });
150790
- if (jsonLdBlocks.length > 0)
150791
- out.json_ld = jsonLdBlocks;
150928
+ const usableJsonLd = isListLikeIntent2(intent) ? jsonLdBlocks.filter((b) => !valueLooksLikeSingleItem2(b)) : jsonLdBlocks;
150929
+ if (usableJsonLd.length > 0)
150930
+ out.json_ld = usableJsonLd;
150792
150931
  const headings = [];
150793
150932
  $2("h1, h2").each((_, el) => {
150794
150933
  const text3 = cleanText2($2(el).text());
@@ -153111,6 +153250,16 @@ function scoreSiteMetaJsonLdDemotion2(structure, intent) {
153111
153250
  return 0;
153112
153251
  return -200;
153113
153252
  }
153253
+ function isSingleItemStructureForList2(structure, intent) {
153254
+ if (!TINY_RESULT_LIST_INTENT2.test(intent.toLowerCase()))
153255
+ return false;
153256
+ if (structure.type === "repeated-elements")
153257
+ return false;
153258
+ return valueLooksLikeSingleItem2(structure.data);
153259
+ }
153260
+ function scoreSingleItemListMismatch2(structure, intent) {
153261
+ return isSingleItemStructureForList2(structure, intent) ? -200 : 0;
153262
+ }
153114
153263
  function looksLikeTinyContentReadResult2(data2, intent) {
153115
153264
  if (data2 == null)
153116
153265
  return { tiny: false, bytes: 0, stringLeafChars: 0 };
@@ -153264,9 +153413,9 @@ function extractFromDOM2(html3, intent, contextUrl) {
153264
153413
  const articleStructures = extractArticleBodySpecial2(html3.length > 600000 ? html3.slice(0, 600000) : html3, intent);
153265
153414
  const allStructures = [...flashStructures, ...githubStructures, ...repeatedPersonStructures, ...packageSearchStructures, ...xProfileStructures, ...postStructures, ...repeatedArticleStructures, ...trendStructures, ...definitionStructures, ...packageDetailStructures, ...arxivAbstractStructures, ...courseStructures, ...articleStructures, ...spaStructures, ...parseStructured2(cleaned)].map((structure) => normalizeStructureForIntent2(structure, intent));
153266
153415
  const isListIntent = TINY_RESULT_LIST_INTENT2.test(intent.toLowerCase());
153267
- const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray2(s.data)) && !looksLikeConfigShape2(s.data) && !looksLikeEmptyContainer2(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd2(s.data)));
153416
+ const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray2(s.data)) && !looksLikeConfigShape2(s.data) && !looksLikeEmptyContainer2(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd2(s.data)) && !isSingleItemStructureForList2(s, intent));
153268
153417
  if (structures.length === 0) {
153269
- const fallback2 = extractHtmlMetadataFallback2(html3);
153418
+ const fallback2 = extractHtmlMetadataFallback2(html3, intent);
153270
153419
  if (fallback2) {
153271
153420
  return _finalize({ data: fallback2, extraction_method: "html_metadata_fallback", confidence: 0.4 });
153272
153421
  }
@@ -153275,7 +153424,7 @@ function extractFromDOM2(html3, intent, contextUrl) {
153275
153424
  const intentWords = intent.toLowerCase().split(/\s+/).filter(Boolean);
153276
153425
  const scored = structures.map((s) => ({
153277
153426
  structure: s,
153278
- score: scoreRelevance2(s, intentWords) + scoreSemanticFit2(s, intent) + scoreSparseLinkList2(s) + scoreFieldRichness2(s) + scoreConfigShapeDemotion2(s) + scoreDegenerateRowDemotion2(s) + scoreDuplicateRowDemotion2(s) + scoreEmptyContainerDemotion2(s) + scoreSiteMetaJsonLdDemotion2(s, intent) + scoreTableIntentOverlapDemotion2(s, intent, contextUrl, structures)
153427
+ score: scoreRelevance2(s, intentWords) + scoreSemanticFit2(s, intent) + scoreSparseLinkList2(s) + scoreFieldRichness2(s) + scoreConfigShapeDemotion2(s) + scoreDegenerateRowDemotion2(s) + scoreDuplicateRowDemotion2(s) + scoreEmptyContainerDemotion2(s) + scoreSiteMetaJsonLdDemotion2(s, intent) + scoreSingleItemListMismatch2(s, intent) + scoreTableIntentOverlapDemotion2(s, intent, contextUrl, structures)
153279
153428
  }));
153280
153429
  scored.sort((a, b) => b.score - a.score);
153281
153430
  const passing = scored.filter((candidate) => assessIntentResult(candidate.structure.data, intent).verdict === "pass");
@@ -153891,6 +154040,7 @@ function sanitizeExtractionToJson2(data2, depth = 0) {
153891
154040
  var STRIP_TAGS2, CHROME_TAGS2, AD_PATTERNS2, HIDDEN_ATTRS2, CONTENT_SELECTORS2, CARD_SELECTORS2, CONFIG_TOP_LEVEL_KEYS2, CONFIG_CHUNK_VALUE_KEYS2, INTENT_OVERLAP_STOPWORDS2, SITE_META_LD_TYPES2, TINY_RESULT_LIST_INTENT2, TINY_RESULT_DETAIL_INTENT2, STRUCTURED_DATA_HIGHLIGHT_TYPES2, HTML_TAG_RE2, TABLE_RE2;
153892
154041
  var init_extraction2 = __esm(() => {
153893
154042
  init_esm11();
154043
+ init_cardinality2();
153894
154044
  STRIP_TAGS2 = new Set(["script", "style", "noscript", "svg", "iframe"]);
153895
154045
  CHROME_TAGS2 = new Set(["nav", "footer", "header"]);
153896
154046
  AD_PATTERNS2 = /\b(ad|ads|advert|advertisement|tracking|tracker|cookie-banner|cookie-consent|cookie-notice|popup|modal-overlay|gdpr|consent|banner-promo)\b/i;
@@ -240035,14 +240185,30 @@ async function cmdRun(args, flags, verb = "run") {
240035
240185
  } else if (explicitEndpointId || !bestEndpoint || endpointIsSafeToAutoExecute(bestEndpoint)) {
240036
240186
  runPlan.push({ step: "execute", mode: "direct_api", status: "started", endpoint_id: endpointToExecute });
240037
240187
  const resolvedSource = typeof result.source === "string" ? result.source : undefined;
240038
- result = await withPendingNotice(api4("POST", `/v1/skills/${skillId}/execute`, execBody(endpointToExecute)), "Executing best endpoint...");
240039
- if (resolvedSource && typeof result.source !== "string")
240040
- result.source = resolvedSource;
240041
- runPlan[runPlan.length - 1] = {
240042
- ...runPlan[runPlan.length - 1],
240043
- status: isResolveSuccessResult(result) ? "complete" : "error",
240044
- error: resolveResultError(result) ?? null
240045
- };
240188
+ const deferralResult = result;
240189
+ const executed = await withPendingNotice(api4("POST", `/v1/skills/${skillId}/execute`, execBody(endpointToExecute)), "Executing best endpoint...");
240190
+ if (!explicitEndpointId && isResolveSuccessResult(executed) && !resolutionCardinalityMatches(intent, executed.result ?? executed.data)) {
240191
+ runPlan[runPlan.length - 1] = {
240192
+ ...runPlan[runPlan.length - 1],
240193
+ status: "skipped",
240194
+ reason: "cardinality_mismatch_single_item"
240195
+ };
240196
+ deferralResult.next_action = {
240197
+ title: "List intent returned a single item",
240198
+ command: `unbrowse execute --skill ${skillId} --endpoint ${endpointToExecute}`,
240199
+ why: "Auto-execute yielded a single item for a list/search intent; the page's listings are likely JS-rendered behind an internal API. Returning the route shortlist instead of one item."
240200
+ };
240201
+ result = deferralResult;
240202
+ } else {
240203
+ result = executed;
240204
+ if (resolvedSource && typeof result.source !== "string")
240205
+ result.source = resolvedSource;
240206
+ runPlan[runPlan.length - 1] = {
240207
+ ...runPlan[runPlan.length - 1],
240208
+ status: isResolveSuccessResult(result) ? "complete" : "error",
240209
+ error: resolveResultError(result) ?? null
240210
+ };
240211
+ }
240046
240212
  } else {
240047
240213
  runPlan.push({
240048
240214
  step: "execute",
package/runtime/mcp.js CHANGED
@@ -36310,7 +36310,7 @@ var init_cached_resolution = __esm(() => {
36310
36310
  });
36311
36311
 
36312
36312
  // .tmp-runtime-src/build-info.generated.ts
36313
- var BUILD_RELEASE_VERSION = "9.6.1", BUILD_GIT_SHA = "7c8049ccfb77", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiOS42LjEiLCJnaXRfc2hhIjoiN2M4MDQ5Y2NmYjc3IiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUA3YzgwNDljY2ZiNzciLCJpc3N1ZWRfYXQiOiIyMDI2LTA2LTE4VDAzOjA1OjI0LjExOVoifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "EvwEtBNSpI-heDzj2LEUfGnS7PdM_EEZif1VhvQBaz8", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai", BUILD_DEFAULT_PROFILE = "";
36313
+ var BUILD_RELEASE_VERSION = "9.6.2", BUILD_GIT_SHA = "d2d14a6629a0", BUILD_CODE_HASH = "5d9ebf619c61", BUILD_RELEASE_MANIFEST_BASE64 = "eyJzY2hlbWFfdmVyc2lvbiI6MSwicmVsZWFzZV92ZXJzaW9uIjoiOS42LjIiLCJnaXRfc2hhIjoiZDJkMTRhNjYyOWEwIiwiY29kZV9oYXNoIjoiNWQ5ZWJmNjE5YzYxIiwidHJhY2VfdmVyc2lvbiI6IjVkOWViZjYxOWM2MUBkMmQxNGE2NjI5YTAiLCJpc3N1ZWRfYXQiOiIyMDI2LTA2LTE4VDA0OjE3OjAzLjg1M1oifQ", BUILD_RELEASE_MANIFEST_SIGNATURE = "OefvyW2iLVVPQ-0HMg9Mz-bciCmP8LE5u7fVEss_98E", BUILD_DEFAULT_BACKEND_URL = "https://beta-api.unbrowse.ai", BUILD_DEFAULT_PROFILE = "";
36314
36314
 
36315
36315
  // .tmp-runtime-src/version.ts
36316
36316
  import { createHash as createHash4 } from "crypto";
@@ -43177,6 +43177,133 @@ var init_header_classify = __esm(() => {
43177
43177
  SENSITIVE_HEADER_PATTERN = /token|key|secret|credential|password|session/i;
43178
43178
  });
43179
43179
 
43180
+ // .tmp-runtime-src/values/cardinality.ts
43181
+ function isListLikeIntent(intent) {
43182
+ return LIST_INTENT_RE.test(intent ?? "");
43183
+ }
43184
+ function valueLooksLikeSingleItem(value) {
43185
+ if (value == null || Array.isArray(value) || typeof value !== "object")
43186
+ return false;
43187
+ const obj = value;
43188
+ for (const key of COLLECTION_KEYS) {
43189
+ if (Array.isArray(obj[key]))
43190
+ return false;
43191
+ }
43192
+ for (const v of Object.values(obj)) {
43193
+ if (Array.isArray(v) && v.some((x) => x !== null && typeof x === "object"))
43194
+ return false;
43195
+ }
43196
+ const atType = typeof obj["@type"] === "string" ? obj["@type"].toLowerCase() : "";
43197
+ const isItemType = ITEM_SCHEMA_TYPES.has(atType);
43198
+ const hasName = "name" in obj || "title" in obj || "headline" in obj;
43199
+ const hasPriceish = "offers" in obj || "price" in obj || "sku" in obj;
43200
+ return isItemType || hasName && hasPriceish;
43201
+ }
43202
+ function schemaLooksLikeSingleItem(rs) {
43203
+ if (!rs || typeof rs !== "object")
43204
+ return false;
43205
+ const schema = rs;
43206
+ if (schema.type === "array")
43207
+ return false;
43208
+ const props = schema.properties ?? {};
43209
+ for (const key of COLLECTION_KEYS) {
43210
+ if (key in props)
43211
+ return false;
43212
+ }
43213
+ for (const value of Object.values(props)) {
43214
+ if (value && typeof value === "object" && value.type === "array" && value.items?.type === "object") {
43215
+ return false;
43216
+ }
43217
+ }
43218
+ if (schema.type !== "object")
43219
+ return false;
43220
+ const hasType = "@type" in props;
43221
+ const hasName = "name" in props || "title" in props;
43222
+ const hasPriceish = "offers" in props || "price" in props || "sku" in props;
43223
+ return hasType || hasName && hasPriceish;
43224
+ }
43225
+ function routeLooksLikeSingleItem(route) {
43226
+ const tmpl = route.url_template ?? "";
43227
+ let pathAndQuery = tmpl;
43228
+ try {
43229
+ const u = new URL(tmpl);
43230
+ pathAndQuery = `${u.pathname}${u.search}`;
43231
+ } catch {}
43232
+ const lower = pathAndQuery.toLowerCase();
43233
+ if (/\/(?:search|q|categories?|browse|results?|listings|explore|discover|feed|catalog(?:ue)?|collections?|shop|all)\b/.test(lower) || /[?&](?:q|query|keyword|keywords|search|term|category|cat|page)=/.test(lower)) {
43234
+ return false;
43235
+ }
43236
+ if (/\/(?:p|product|products|item|items|listing|detail|details|dp|pd|sku)\/[^/]+/.test(lower))
43237
+ return true;
43238
+ const lastSeg = lower.split("?")[0].replace(/\/+$/, "").split("/").pop() ?? "";
43239
+ if (/-\d{3,}$/.test(lastSeg) || /^\d{3,}$/.test(lastSeg))
43240
+ return true;
43241
+ if (/\{[^}]+\}/.test(lower))
43242
+ return false;
43243
+ return schemaLooksLikeSingleItem(route.response_schema);
43244
+ }
43245
+ function urlPathLooksListLike(contextUrl) {
43246
+ if (!contextUrl)
43247
+ return false;
43248
+ try {
43249
+ const pathname = new URL(contextUrl).pathname.toLowerCase();
43250
+ return /\/(?:search|basic-search|result-page|results?|discover|browse|categories?|q|listings|feed|catalog(?:ue)?)\b/.test(pathname);
43251
+ } catch {
43252
+ return false;
43253
+ }
43254
+ }
43255
+ function cardinalityMatches(intent, subject, opts) {
43256
+ const wantsMany = isListLikeIntent(intent) || urlPathLooksListLike(opts?.contextUrl);
43257
+ if (!wantsMany)
43258
+ return true;
43259
+ switch (subject.kind) {
43260
+ case "value":
43261
+ return !valueLooksLikeSingleItem(subject.value);
43262
+ case "schema":
43263
+ return !schemaLooksLikeSingleItem(subject.schema);
43264
+ case "route":
43265
+ return !routeLooksLikeSingleItem(subject.route);
43266
+ }
43267
+ }
43268
+ function resolutionCardinalityMatches(intent, data) {
43269
+ return cardinalityMatches(intent, { kind: "value", value: data });
43270
+ }
43271
+ var LIST_INTENT_RE, ITEM_SCHEMA_TYPES, COLLECTION_KEYS;
43272
+ var init_cardinality = __esm(() => {
43273
+ LIST_INTENT_RE = /\b(search|find|lookup|browse|discover|list(?:ings?)?|feed|catalog(?:ue)?)\b/i;
43274
+ ITEM_SCHEMA_TYPES = new Set([
43275
+ "product",
43276
+ "offer",
43277
+ "article",
43278
+ "newsarticle",
43279
+ "blogposting",
43280
+ "recipe",
43281
+ "event",
43282
+ "place",
43283
+ "localbusiness",
43284
+ "jobposting",
43285
+ "book",
43286
+ "movie",
43287
+ "creativework",
43288
+ "person",
43289
+ "organization"
43290
+ ]);
43291
+ COLLECTION_KEYS = [
43292
+ "itemListElement",
43293
+ "items",
43294
+ "results",
43295
+ "products",
43296
+ "listings",
43297
+ "data",
43298
+ "edges",
43299
+ "hits",
43300
+ "records",
43301
+ "entries",
43302
+ "rows",
43303
+ "nodes"
43304
+ ];
43305
+ });
43306
+
43180
43307
  // node_modules/.bun/nanoid@5.1.11/node_modules/nanoid/url-alphabet/index.js
43181
43308
  var urlAlphabet = "useandom-26T198340PX75pxJACKVERYMINDBUSHWOLF_GQZbfghjklqvwyzrict";
43182
43309
 
@@ -115440,7 +115567,7 @@ __export(exports_extraction, {
115440
115567
  cleanDOM: () => cleanDOM,
115441
115568
  buildStructuredDataHeader: () => buildStructuredDataHeader
115442
115569
  });
115443
- function extractHtmlMetadataFallback(html3) {
115570
+ function extractHtmlMetadataFallback(html3, intent) {
115444
115571
  if (!html3 || html3.length < 100)
115445
115572
  return null;
115446
115573
  try {
@@ -115472,8 +115599,9 @@ function extractHtmlMetadataFallback(html3) {
115472
115599
  jsonLdBlocks.push(parsed);
115473
115600
  } catch {}
115474
115601
  });
115475
- if (jsonLdBlocks.length > 0)
115476
- out.json_ld = jsonLdBlocks;
115602
+ const usableJsonLd = isListLikeIntent(intent) ? jsonLdBlocks.filter((b) => !valueLooksLikeSingleItem(b)) : jsonLdBlocks;
115603
+ if (usableJsonLd.length > 0)
115604
+ out.json_ld = usableJsonLd;
115477
115605
  const headings = [];
115478
115606
  $2("h1, h2").each((_, el) => {
115479
115607
  const text3 = cleanText($2(el).text());
@@ -117796,6 +117924,16 @@ function scoreSiteMetaJsonLdDemotion(structure, intent) {
117796
117924
  return 0;
117797
117925
  return -200;
117798
117926
  }
117927
+ function isSingleItemStructureForList(structure, intent) {
117928
+ if (!TINY_RESULT_LIST_INTENT.test(intent.toLowerCase()))
117929
+ return false;
117930
+ if (structure.type === "repeated-elements")
117931
+ return false;
117932
+ return valueLooksLikeSingleItem(structure.data);
117933
+ }
117934
+ function scoreSingleItemListMismatch(structure, intent) {
117935
+ return isSingleItemStructureForList(structure, intent) ? -200 : 0;
117936
+ }
117799
117937
  function looksLikeTinyContentReadResult(data2, intent) {
117800
117938
  if (data2 == null)
117801
117939
  return { tiny: false, bytes: 0, stringLeafChars: 0 };
@@ -117949,9 +118087,9 @@ function extractFromDOM(html3, intent, contextUrl) {
117949
118087
  const articleStructures = extractArticleBodySpecial(html3.length > 600000 ? html3.slice(0, 600000) : html3, intent);
117950
118088
  const allStructures = [...flashStructures, ...githubStructures, ...repeatedPersonStructures, ...packageSearchStructures, ...xProfileStructures, ...postStructures, ...repeatedArticleStructures, ...trendStructures, ...definitionStructures, ...packageDetailStructures, ...arxivAbstractStructures, ...courseStructures, ...articleStructures, ...spaStructures, ...parseStructured(cleaned)].map((structure) => normalizeStructureForIntent(structure, intent));
117951
118089
  const isListIntent = TINY_RESULT_LIST_INTENT.test(intent.toLowerCase());
117952
- const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray(s.data)) && !looksLikeConfigShape(s.data) && !looksLikeEmptyContainer(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd(s.data)));
118090
+ const structures = allStructures.filter((s) => !(s.type === "repeated-elements" && looksLikeDegenerateRowArray(s.data)) && !looksLikeConfigShape(s.data) && !looksLikeEmptyContainer(s.data) && !(isListIntent && looksLikeSiteMetaJsonLd(s.data)) && !isSingleItemStructureForList(s, intent));
117953
118091
  if (structures.length === 0) {
117954
- const fallback2 = extractHtmlMetadataFallback(html3);
118092
+ const fallback2 = extractHtmlMetadataFallback(html3, intent);
117955
118093
  if (fallback2) {
117956
118094
  return _finalize({ data: fallback2, extraction_method: "html_metadata_fallback", confidence: 0.4 });
117957
118095
  }
@@ -117960,7 +118098,7 @@ function extractFromDOM(html3, intent, contextUrl) {
117960
118098
  const intentWords = intent.toLowerCase().split(/\s+/).filter(Boolean);
117961
118099
  const scored = structures.map((s) => ({
117962
118100
  structure: s,
117963
- score: scoreRelevance(s, intentWords) + scoreSemanticFit(s, intent) + scoreSparseLinkList(s) + scoreFieldRichness(s) + scoreConfigShapeDemotion(s) + scoreDegenerateRowDemotion(s) + scoreDuplicateRowDemotion(s) + scoreEmptyContainerDemotion(s) + scoreSiteMetaJsonLdDemotion(s, intent) + scoreTableIntentOverlapDemotion(s, intent, contextUrl, structures)
118101
+ score: scoreRelevance(s, intentWords) + scoreSemanticFit(s, intent) + scoreSparseLinkList(s) + scoreFieldRichness(s) + scoreConfigShapeDemotion(s) + scoreDegenerateRowDemotion(s) + scoreDuplicateRowDemotion(s) + scoreEmptyContainerDemotion(s) + scoreSiteMetaJsonLdDemotion(s, intent) + scoreSingleItemListMismatch(s, intent) + scoreTableIntentOverlapDemotion(s, intent, contextUrl, structures)
117964
118102
  }));
117965
118103
  scored.sort((a, b) => b.score - a.score);
117966
118104
  const passing = scored.filter((candidate) => assessIntentResult(candidate.structure.data, intent).verdict === "pass");
@@ -118576,6 +118714,7 @@ function sanitizeExtractionToJson(data2, depth = 0) {
118576
118714
  var STRIP_TAGS, CHROME_TAGS, AD_PATTERNS, HIDDEN_ATTRS, CONTENT_SELECTORS, CARD_SELECTORS, CONFIG_TOP_LEVEL_KEYS, CONFIG_CHUNK_VALUE_KEYS, INTENT_OVERLAP_STOPWORDS, SITE_META_LD_TYPES, TINY_RESULT_LIST_INTENT, TINY_RESULT_DETAIL_INTENT, STRUCTURED_DATA_HIGHLIGHT_TYPES, HTML_TAG_RE, TABLE_RE;
118577
118715
  var init_extraction = __esm(() => {
118578
118716
  init_esm11();
118717
+ init_cardinality();
118579
118718
  STRIP_TAGS = new Set(["script", "style", "noscript", "svg", "iframe"]);
118580
118719
  CHROME_TAGS = new Set(["nav", "footer", "header"]);
118581
118720
  AD_PATTERNS = /\b(ad|ads|advert|advertisement|tracking|tracker|cookie-banner|cookie-consent|cookie-notice|popup|modal-overlay|gdpr|consent|banner-promo)\b/i;
@@ -128277,7 +128416,8 @@ function selectBestEndpoint(endpoints, intent, skillDomain, contextUrl) {
128277
128416
  const ranked = rankEndpoints(endpoints, intent, skillDomain, contextUrl);
128278
128417
  if (ranked.length === 0)
128279
128418
  throw new Error("All endpoints are disabled");
128280
- return ranked[0].endpoint;
128419
+ const preferred = ranked.find((r) => cardinalityMatches(intent, { kind: "route", route: r.endpoint }, { contextUrl }));
128420
+ return (preferred ?? ranked[0]).endpoint;
128281
128421
  }
128282
128422
  function isHtml2(text3) {
128283
128423
  const trimmed = text3.trimStart().slice(0, 200).toLowerCase();
@@ -128324,6 +128464,7 @@ var init_execution = __esm(async () => {
128324
128464
  init_reveng_server_first();
128325
128465
  init_header_classify();
128326
128466
  init_storage_hole_bindings();
128467
+ init_cardinality();
128327
128468
  init_sealed_blob_store();
128328
128469
  init_signer();
128329
128470
  init_bundle_scanner();
@@ -128876,92 +129017,6 @@ function bindingGraphFromOperationGraph(og) {
128876
129017
  return { endpoints, edges };
128877
129018
  }
128878
129019
 
128879
- // .tmp-runtime-src/values/cardinality.ts
128880
- function isListLikeIntent(intent) {
128881
- return LIST_INTENT_RE.test(intent ?? "");
128882
- }
128883
- function valueLooksLikeSingleItem(value) {
128884
- if (value == null || Array.isArray(value) || typeof value !== "object")
128885
- return false;
128886
- const obj = value;
128887
- for (const key2 of COLLECTION_KEYS) {
128888
- if (Array.isArray(obj[key2]))
128889
- return false;
128890
- }
128891
- for (const v of Object.values(obj)) {
128892
- if (Array.isArray(v) && v.some((x) => x !== null && typeof x === "object"))
128893
- return false;
128894
- }
128895
- const atType = typeof obj["@type"] === "string" ? obj["@type"].toLowerCase() : "";
128896
- const isItemType = ITEM_SCHEMA_TYPES.has(atType);
128897
- const hasName = "name" in obj || "title" in obj || "headline" in obj;
128898
- const hasPriceish = "offers" in obj || "price" in obj || "sku" in obj;
128899
- return isItemType || hasName && hasPriceish;
128900
- }
128901
- function schemaLooksLikeSingleItem(rs) {
128902
- if (!rs || typeof rs !== "object")
128903
- return false;
128904
- const schema = rs;
128905
- if (schema.type === "array")
128906
- return false;
128907
- const props = schema.properties ?? {};
128908
- for (const key2 of COLLECTION_KEYS) {
128909
- if (key2 in props)
128910
- return false;
128911
- }
128912
- for (const value of Object.values(props)) {
128913
- if (value && typeof value === "object" && value.type === "array" && value.items?.type === "object") {
128914
- return false;
128915
- }
128916
- }
128917
- if (schema.type !== "object")
128918
- return false;
128919
- const hasType = "@type" in props;
128920
- const hasName = "name" in props || "title" in props;
128921
- const hasPriceish = "offers" in props || "price" in props || "sku" in props;
128922
- return hasType || hasName && hasPriceish;
128923
- }
128924
- function resolutionCardinalityMatches(intent, data2) {
128925
- if (!isListLikeIntent(intent))
128926
- return true;
128927
- return !valueLooksLikeSingleItem(data2);
128928
- }
128929
- var LIST_INTENT_RE, ITEM_SCHEMA_TYPES, COLLECTION_KEYS;
128930
- var init_cardinality = __esm(() => {
128931
- LIST_INTENT_RE = /\b(search|find|lookup|browse|discover|list(?:ings?)?|feed|catalog(?:ue)?)\b/i;
128932
- ITEM_SCHEMA_TYPES = new Set([
128933
- "product",
128934
- "offer",
128935
- "article",
128936
- "newsarticle",
128937
- "blogposting",
128938
- "recipe",
128939
- "event",
128940
- "place",
128941
- "localbusiness",
128942
- "jobposting",
128943
- "book",
128944
- "movie",
128945
- "creativework",
128946
- "person",
128947
- "organization"
128948
- ]);
128949
- COLLECTION_KEYS = [
128950
- "itemListElement",
128951
- "items",
128952
- "results",
128953
- "products",
128954
- "listings",
128955
- "data",
128956
- "edges",
128957
- "hits",
128958
- "records",
128959
- "entries",
128960
- "rows",
128961
- "nodes"
128962
- ];
128963
- });
128964
-
128965
129020
  // .tmp-runtime-src/values/yield-safety.ts
128966
129021
  function tokenizeKey(key2) {
128967
129022
  return key2.replace(/([a-z0-9])([A-Z])/g, "$1 $2").split(/[_\-.\s]+/).map((s) => s.toLowerCase()).filter(Boolean);
@@ -131117,26 +131172,6 @@ function endpointTargetsMismatchedLocalReplayHost(endpoint, contextUrl) {
131117
131172
  function endpointHasNegativeTag(endpoint, tag) {
131118
131173
  return (endpoint.semantic?.negative_tags ?? []).some((candidate) => candidate.trim().toLowerCase() === tag.trim().toLowerCase());
131119
131174
  }
131120
- function looksLikeSingleItemRoute(endpoint) {
131121
- const tmpl = endpoint.url_template ?? "";
131122
- let pathAndQuery = tmpl;
131123
- try {
131124
- const u = new URL(tmpl);
131125
- pathAndQuery = `${u.pathname}${u.search}`;
131126
- } catch {}
131127
- const lower = pathAndQuery.toLowerCase();
131128
- if (/\/(?:search|q|categories?|browse|results?|listings|explore|discover|feed|catalog(?:ue)?|collections?|shop|all)\b/.test(lower) || /[?&](?:q|query|keyword|keywords|search|term|category|cat|page)=/.test(lower)) {
131129
- return false;
131130
- }
131131
- if (/\/(?:p|product|products|item|items|listing|detail|details|dp|pd|sku)\/[^/]+/.test(lower))
131132
- return true;
131133
- const lastSeg = lower.split("?")[0].replace(/\/+$/, "").split("/").pop() ?? "";
131134
- if (/-\d{3,}$/.test(lastSeg) || /^\d{3,}$/.test(lastSeg))
131135
- return true;
131136
- if (/\{[^}]+\}/.test(lower))
131137
- return false;
131138
- return schemaLooksLikeSingleItem(endpoint.response_schema);
131139
- }
131140
131175
  function isResolveUsableEndpointForIntent(endpoint, intent, contextUrl) {
131141
131176
  if (endpointTargetsMismatchedLocalReplayHost(endpoint, contextUrl))
131142
131177
  return false;
@@ -131146,7 +131181,7 @@ function isResolveUsableEndpointForIntent(endpoint, intent, contextUrl) {
131146
131181
  if (isFeedTimelineIntent(intent, contextUrl) && endpointHasNegativeTag(endpoint, "helper")) {
131147
131182
  return false;
131148
131183
  }
131149
- if (isSearchLikeIntent(intent, contextUrl) && looksLikeSingleItemRoute(endpoint)) {
131184
+ if (!cardinalityMatches(intent, { kind: "route", route: endpoint }, { contextUrl })) {
131150
131185
  return false;
131151
131186
  }
131152
131187
  return true;
@@ -132163,7 +132198,20 @@ function inferPreferredEntityTokens(intent) {
132163
132198
  return [];
132164
132199
  }
132165
132200
  function isAcceptableIntentResult(result, intent) {
132166
- return assessIntentResult(result, intent).verdict !== "fail";
132201
+ if (assessIntentResult(result, intent).verdict === "fail")
132202
+ return false;
132203
+ if (!cardinalityMatches(intent, { kind: "value", value: unwrapResultPayload(result) }))
132204
+ return false;
132205
+ return true;
132206
+ }
132207
+ function unwrapResultPayload(result) {
132208
+ if (result == null || typeof result !== "object" || Array.isArray(result))
132209
+ return result;
132210
+ const rec = { ...result };
132211
+ for (const k of ["available_endpoints", "available_operations", "shortlist_for_judgment", "workflow_dag", "walked_from", "exa_candidates", "run_plan"]) {
132212
+ delete rec[k];
132213
+ }
132214
+ return rec;
132167
132215
  }
132168
132216
  function candidateMatchesPreferredEntity(candidate, preferredTokens) {
132169
132217
  if (preferredTokens.length === 0)
@@ -142761,9 +142809,14 @@ async function registerRoutes(app) {
142761
142809
  recovered = true;
142762
142810
  } else if (errResult.available_endpoints?.length === 1) {
142763
142811
  const only = errResult.available_endpoints[0].endpoint_id;
142764
- console.log(`[exec] D7 single-endpoint skill: rewriting endpoint_id ${want} → ${only}`);
142765
- execParams.endpoint_id = only;
142766
- recovered = true;
142812
+ const onlyEp = (skill.endpoints ?? []).find((e) => e.endpoint_id === only);
142813
+ if (!onlyEp || cardinalityMatches(intent, { kind: "route", route: onlyEp }, { contextUrl: context_url })) {
142814
+ console.log(`[exec] D7 single-endpoint skill: rewriting endpoint_id ${want} → ${only}`);
142815
+ execParams.endpoint_id = only;
142816
+ recovered = true;
142817
+ } else {
142818
+ console.log(`[exec] D7 single-endpoint skill: only endpoint ${only} is a single-item route for a list intent — not forcing (cardinality gate)`);
142819
+ }
142767
142820
  }
142768
142821
  }
142769
142822
  if (!recovered && (skill.endpoints?.length ?? 0) >= 2 && intent) {
@@ -143991,6 +144044,7 @@ var init_routes = __esm(async () => {
143991
144044
  init_client();
143992
144045
  init_reveng_server_first();
143993
144046
  init_header_classify();
144047
+ init_cardinality();
143994
144048
  init_capture_spool();
143995
144049
  init_nanoid();
143996
144050
  init_marketplace();
@@ -233199,14 +233253,30 @@ async function cmdRun(args, flags, verb = "run") {
233199
233253
  } else if (explicitEndpointId || !bestEndpoint || endpointIsSafeToAutoExecute(bestEndpoint)) {
233200
233254
  runPlan.push({ step: "execute", mode: "direct_api", status: "started", endpoint_id: endpointToExecute });
233201
233255
  const resolvedSource = typeof result.source === "string" ? result.source : undefined;
233202
- result = await withPendingNotice(api4("POST", `/v1/skills/${skillId}/execute`, execBody(endpointToExecute)), "Executing best endpoint...");
233203
- if (resolvedSource && typeof result.source !== "string")
233204
- result.source = resolvedSource;
233205
- runPlan[runPlan.length - 1] = {
233206
- ...runPlan[runPlan.length - 1],
233207
- status: isResolveSuccessResult(result) ? "complete" : "error",
233208
- error: resolveResultError(result) ?? null
233209
- };
233256
+ const deferralResult = result;
233257
+ const executed = await withPendingNotice(api4("POST", `/v1/skills/${skillId}/execute`, execBody(endpointToExecute)), "Executing best endpoint...");
233258
+ if (!explicitEndpointId && isResolveSuccessResult(executed) && !resolutionCardinalityMatches(intent, executed.result ?? executed.data)) {
233259
+ runPlan[runPlan.length - 1] = {
233260
+ ...runPlan[runPlan.length - 1],
233261
+ status: "skipped",
233262
+ reason: "cardinality_mismatch_single_item"
233263
+ };
233264
+ deferralResult.next_action = {
233265
+ title: "List intent returned a single item",
233266
+ command: `unbrowse execute --skill ${skillId} --endpoint ${endpointToExecute}`,
233267
+ why: "Auto-execute yielded a single item for a list/search intent; the page's listings are likely JS-rendered behind an internal API. Returning the route shortlist instead of one item."
233268
+ };
233269
+ result = deferralResult;
233270
+ } else {
233271
+ result = executed;
233272
+ if (resolvedSource && typeof result.source !== "string")
233273
+ result.source = resolvedSource;
233274
+ runPlan[runPlan.length - 1] = {
233275
+ ...runPlan[runPlan.length - 1],
233276
+ status: isResolveSuccessResult(result) ? "complete" : "error",
233277
+ error: resolveResultError(result) ?? null
233278
+ };
233279
+ }
233210
233280
  } else {
233211
233281
  runPlan.push({
233212
233282
  step: "execute",
@@ -236336,7 +236406,7 @@ __export(exports_orchestrator, {
236336
236406
  pickPreferredSkillSnapshot: () => pickPreferredSkillSnapshot2,
236337
236407
  persistDomainCache: () => persistDomainCache2,
236338
236408
  marketplaceSkillMatchesContext: () => marketplaceSkillMatchesContext2,
236339
- looksLikeSingleItemRoute: () => looksLikeSingleItemRoute2,
236409
+ looksLikeSingleItemRoute: () => looksLikeSingleItemRoute,
236340
236410
  isRouteCacheEntryStale: () => isRouteCacheEntryStale2,
236341
236411
  isResolveUsableEndpointForIntent: () => isResolveUsableEndpointForIntent2,
236342
236412
  isCachedSkillRelevantForIntent: () => isCachedSkillRelevantForIntent2,
@@ -236857,25 +236927,8 @@ function endpointTargetsMismatchedLocalReplayHost2(endpoint, contextUrl) {
236857
236927
  function endpointHasNegativeTag2(endpoint, tag) {
236858
236928
  return (endpoint.semantic?.negative_tags ?? []).some((candidate) => candidate.trim().toLowerCase() === tag.trim().toLowerCase());
236859
236929
  }
236860
- function looksLikeSingleItemRoute2(endpoint) {
236861
- const tmpl = endpoint.url_template ?? "";
236862
- let pathAndQuery = tmpl;
236863
- try {
236864
- const u = new URL(tmpl);
236865
- pathAndQuery = `${u.pathname}${u.search}`;
236866
- } catch {}
236867
- const lower = pathAndQuery.toLowerCase();
236868
- if (/\/(?:search|q|categories?|browse|results?|listings|explore|discover|feed|catalog(?:ue)?|collections?|shop|all)\b/.test(lower) || /[?&](?:q|query|keyword|keywords|search|term|category|cat|page)=/.test(lower)) {
236869
- return false;
236870
- }
236871
- if (/\/(?:p|product|products|item|items|listing|detail|details|dp|pd|sku)\/[^/]+/.test(lower))
236872
- return true;
236873
- const lastSeg = lower.split("?")[0].replace(/\/+$/, "").split("/").pop() ?? "";
236874
- if (/-\d{3,}$/.test(lastSeg) || /^\d{3,}$/.test(lastSeg))
236875
- return true;
236876
- if (/\{[^}]+\}/.test(lower))
236877
- return false;
236878
- return schemaLooksLikeSingleItem(endpoint.response_schema);
236930
+ function looksLikeSingleItemRoute(endpoint) {
236931
+ return routeLooksLikeSingleItem(endpoint);
236879
236932
  }
236880
236933
  function isResolveUsableEndpointForIntent2(endpoint, intent, contextUrl) {
236881
236934
  if (endpointTargetsMismatchedLocalReplayHost2(endpoint, contextUrl))
@@ -236886,7 +236939,7 @@ function isResolveUsableEndpointForIntent2(endpoint, intent, contextUrl) {
236886
236939
  if (isFeedTimelineIntent2(intent, contextUrl) && endpointHasNegativeTag2(endpoint, "helper")) {
236887
236940
  return false;
236888
236941
  }
236889
- if (isSearchLikeIntent2(intent, contextUrl) && looksLikeSingleItemRoute2(endpoint)) {
236942
+ if (!cardinalityMatches(intent, { kind: "route", route: endpoint }, { contextUrl })) {
236890
236943
  return false;
236891
236944
  }
236892
236945
  return true;
@@ -237908,7 +237961,20 @@ function inferPreferredEntityTokens2(intent) {
237908
237961
  return [];
237909
237962
  }
237910
237963
  function isAcceptableIntentResult2(result, intent) {
237911
- return assessIntentResult(result, intent).verdict !== "fail";
237964
+ if (assessIntentResult(result, intent).verdict === "fail")
237965
+ return false;
237966
+ if (!cardinalityMatches(intent, { kind: "value", value: unwrapResultPayload2(result) }))
237967
+ return false;
237968
+ return true;
237969
+ }
237970
+ function unwrapResultPayload2(result) {
237971
+ if (result == null || typeof result !== "object" || Array.isArray(result))
237972
+ return result;
237973
+ const rec = { ...result };
237974
+ for (const k of ["available_endpoints", "available_operations", "shortlist_for_judgment", "workflow_dag", "walked_from", "exa_candidates", "run_plan"]) {
237975
+ delete rec[k];
237976
+ }
237977
+ return rec;
237912
237978
  }
237913
237979
  function candidateMatchesPreferredEntity2(candidate, preferredTokens) {
237914
237980
  if (preferredTokens.length === 0)
Binary file
@@ -2,7 +2,7 @@
2
2
  "repo_url": "https://github.com/justrach/kuri.git",
3
3
  "branch": "adding-extensions",
4
4
  "source_sha": "149881254046a20778f642b69f20f0c6468f6fb4",
5
- "built_at": "2026-06-18T02:47:19.984Z",
5
+ "built_at": "2026-06-18T03:58:10.362Z",
6
6
  "binaries": {
7
7
  "darwin-arm64": {
8
8
  "zig_target": "aarch64-macos",
@@ -21,11 +21,11 @@
21
21
  },
22
22
  "linux-x64": {
23
23
  "zig_target": "x86_64-linux",
24
- "sha256": "ae7633e7140f4d98633aa28f3bcff33e1398f3caf709a530c6cd2b36c251d113"
24
+ "sha256": "e73aecfbf07001ba0be5032118790eb253ad5d8d12caca6a1dd5ad3ccab44b9e"
25
25
  },
26
26
  "win-x64": {
27
27
  "zig_target": "x86_64-windows-gnu",
28
- "sha256": "54a2d73ab37bd056e5e469c9c8aeb5c992645962aee10465f1452a33fab447db",
28
+ "sha256": "9ecbc82be646e755e4664051cf345d54dde3c6610e457d763deff67895047963",
29
29
  "source": "pre-staged"
30
30
  }
31
31
  },
@@ -33,22 +33,22 @@
33
33
  "darwin-arm64": {
34
34
  "zig_target": "aarch64-macos",
35
35
  "lib": "libkuri_ffi.dylib",
36
- "sha256": "774ff424fb6a4abb2634b7a891b10fa9760474851e2e7d25ebd6701917d959db"
36
+ "sha256": "6c72cf383df4fa3f870b745da43d64eae8f67e58c6f971214ac29602fb649939"
37
37
  },
38
38
  "darwin-x64": {
39
39
  "zig_target": "x86_64-macos",
40
40
  "lib": "libkuri_ffi.dylib",
41
- "sha256": "116afae77447b5036978d883541d264edbe3a2413788e904cf77b9123c0b79c3"
41
+ "sha256": "82480772ddc8e44c8e34e70b80d7dc0969004942f77276587af450b62d3d2750"
42
42
  },
43
43
  "linux-arm64": {
44
44
  "zig_target": "aarch64-linux",
45
45
  "lib": "libkuri_ffi.so",
46
- "sha256": "4558df5a2057910dd646491a9de9d1e8b81e958e0d830f1b33a1d30d0d91e95f"
46
+ "sha256": "ef8dfa2b634f04294f93a94472d9856ba777681afaab2d4213f0e29821882e07"
47
47
  },
48
48
  "linux-x64": {
49
49
  "zig_target": "x86_64-linux",
50
50
  "lib": "libkuri_ffi.so",
51
- "sha256": "b9bbb67c58aadfacbdf5304cdc7c1cea9014c6c3bb83dda7eb8661277f6c8b36"
51
+ "sha256": "fb29ad2b71186d176306321d17e88074a67fea139991faef9aa4862333942c9e"
52
52
  }
53
53
  }
54
54
  }
Binary file