soustack 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -133,8 +133,8 @@ function flattenInstructions(items) {
133
133
  // src/schema.json
134
134
  var schema_default = {
135
135
  $schema: "http://json-schema.org/draft-07/schema#",
136
- $id: "http://soustack.org/schema/v0.1",
137
- title: "Soustack Recipe Schema v0.1",
136
+ $id: "http://soustack.org/schema/v0.2",
137
+ title: "Soustack Recipe Schema v0.2",
138
138
  description: "A portable, scalable, interoperable recipe format.",
139
139
  type: "object",
140
140
  required: ["name", "ingredients", "instructions"],
@@ -164,8 +164,21 @@ var schema_default = {
164
164
  items: { type: "string" }
165
165
  },
166
166
  image: {
167
- type: "string",
168
- format: "uri"
167
+ description: "Recipe-level hero image(s)",
168
+ anyOf: [
169
+ {
170
+ type: "string",
171
+ format: "uri"
172
+ },
173
+ {
174
+ type: "array",
175
+ minItems: 1,
176
+ items: {
177
+ type: "string",
178
+ format: "uri"
179
+ }
180
+ }
181
+ ]
169
182
  },
170
183
  dateAdded: {
171
184
  type: "string",
@@ -330,6 +343,11 @@ var schema_default = {
330
343
  properties: {
331
344
  id: { type: "string" },
332
345
  text: { type: "string" },
346
+ image: {
347
+ type: "string",
348
+ format: "uri",
349
+ description: "Optional image that illustrates this instruction"
350
+ },
333
351
  destination: { type: "string" },
334
352
  dependsOn: {
335
353
  type: "array",
@@ -1227,6 +1245,40 @@ function smartParseDuration(input) {
1227
1245
  return parseHumanDuration(input);
1228
1246
  }
1229
1247
 
1248
+ // src/utils/image.ts
1249
+ function normalizeImage(image) {
1250
+ if (!image) {
1251
+ return void 0;
1252
+ }
1253
+ if (typeof image === "string") {
1254
+ const trimmed = image.trim();
1255
+ return trimmed || void 0;
1256
+ }
1257
+ if (Array.isArray(image)) {
1258
+ const urls = image.map((entry) => typeof entry === "string" ? entry.trim() : extractUrl(entry)).filter((url) => typeof url === "string" && Boolean(url));
1259
+ if (urls.length === 0) {
1260
+ return void 0;
1261
+ }
1262
+ if (urls.length === 1) {
1263
+ return urls[0];
1264
+ }
1265
+ return urls;
1266
+ }
1267
+ return extractUrl(image);
1268
+ }
1269
+ function extractUrl(value) {
1270
+ if (!value || typeof value !== "object") {
1271
+ return void 0;
1272
+ }
1273
+ const record = value;
1274
+ const candidate = typeof record.url === "string" ? record.url : typeof record.contentUrl === "string" ? record.contentUrl : void 0;
1275
+ if (!candidate) {
1276
+ return void 0;
1277
+ }
1278
+ const trimmed = candidate.trim();
1279
+ return trimmed || void 0;
1280
+ }
1281
+
1230
1282
  // src/fromSchemaOrg.ts
1231
1283
  function fromSchemaOrg(input) {
1232
1284
  const recipeNode = extractRecipeNode(input);
@@ -1239,13 +1291,12 @@ function fromSchemaOrg(input) {
1239
1291
  const recipeYield = parseYield(recipeNode.recipeYield);
1240
1292
  const tags = collectTags(recipeNode.recipeCuisine, recipeNode.keywords);
1241
1293
  const category = extractFirst(recipeNode.recipeCategory);
1242
- const image = convertImage(recipeNode.image);
1243
1294
  const source = convertSource(recipeNode);
1244
1295
  const nutrition = recipeNode.nutrition && typeof recipeNode.nutrition === "object" ? recipeNode.nutrition : void 0;
1245
1296
  return {
1246
1297
  name: recipeNode.name.trim(),
1247
1298
  description: recipeNode.description?.trim() || void 0,
1248
- image,
1299
+ image: normalizeImage(recipeNode.image),
1249
1300
  category,
1250
1301
  tags: tags.length ? tags : void 0,
1251
1302
  source,
@@ -1328,9 +1379,9 @@ function convertInstructions(value) {
1328
1379
  continue;
1329
1380
  }
1330
1381
  if (isHowToStep(entry)) {
1331
- const text = extractInstructionText(entry);
1332
- if (text) {
1333
- result.push(text);
1382
+ const parsed = convertHowToStep(entry);
1383
+ if (parsed) {
1384
+ result.push(parsed);
1334
1385
  }
1335
1386
  }
1336
1387
  }
@@ -1348,9 +1399,9 @@ function extractSectionItems(items = []) {
1348
1399
  continue;
1349
1400
  }
1350
1401
  if (isHowToStep(item)) {
1351
- const text = extractInstructionText(item);
1352
- if (text) {
1353
- result.push(text);
1402
+ const parsed = convertHowToStep(item);
1403
+ if (parsed) {
1404
+ result.push(parsed);
1354
1405
  }
1355
1406
  continue;
1356
1407
  }
@@ -1364,6 +1415,17 @@ function extractInstructionText(value) {
1364
1415
  const text = typeof value.text === "string" ? value.text : value.name;
1365
1416
  return typeof text === "string" ? text.trim() || void 0 : void 0;
1366
1417
  }
1418
+ function convertHowToStep(step) {
1419
+ const text = extractInstructionText(step);
1420
+ if (!text) {
1421
+ return void 0;
1422
+ }
1423
+ const normalizedImage = normalizeImage(step.image);
1424
+ if (typeof normalizedImage === "string") {
1425
+ return { text, image: normalizedImage };
1426
+ }
1427
+ return text;
1428
+ }
1367
1429
  function isHowToStep(value) {
1368
1430
  return Boolean(value) && typeof value === "object" && value["@type"] === "HowToStep";
1369
1431
  }
@@ -1405,26 +1467,6 @@ function extractFirst(value) {
1405
1467
  const arr = flattenStrings(value);
1406
1468
  return arr.length ? arr[0] : void 0;
1407
1469
  }
1408
- function convertImage(value) {
1409
- if (!value) return void 0;
1410
- if (typeof value === "string") {
1411
- return value;
1412
- }
1413
- if (Array.isArray(value)) {
1414
- for (const item of value) {
1415
- const url = typeof item === "string" ? item : extractImageUrl(item);
1416
- if (url) return url;
1417
- }
1418
- return void 0;
1419
- }
1420
- return extractImageUrl(value);
1421
- }
1422
- function extractImageUrl(value) {
1423
- if (!value || typeof value !== "object") return void 0;
1424
- const record = value;
1425
- const candidate = typeof record.url === "string" ? record.url : typeof record.contentUrl === "string" ? record.contentUrl : void 0;
1426
- return candidate?.trim() || void 0;
1427
- }
1428
1470
  function convertSource(recipe) {
1429
1471
  const author = extractEntityName(recipe.author);
1430
1472
  const publisher = extractEntityName(recipe.publisher);
@@ -1520,7 +1562,7 @@ function convertInstruction(entry) {
1520
1562
  return createHowToStep(entry);
1521
1563
  }
1522
1564
  if ("subsection" in entry) {
1523
- const steps = entry.items.map((item) => typeof item === "string" ? createHowToStep(item) : createHowToStep(item.text)).filter((step) => Boolean(step));
1565
+ const steps = entry.items.map((item) => createHowToStep(item)).filter((step) => Boolean(step));
1524
1566
  if (!steps.length) {
1525
1567
  return null;
1526
1568
  }
@@ -1531,18 +1573,34 @@ function convertInstruction(entry) {
1531
1573
  };
1532
1574
  }
1533
1575
  if ("text" in entry) {
1534
- return createHowToStep(entry.text);
1576
+ return createHowToStep(entry);
1535
1577
  }
1536
1578
  return createHowToStep(String(entry));
1537
1579
  }
1538
- function createHowToStep(text) {
1539
- if (!text) return null;
1540
- const trimmed = text.trim();
1541
- if (!trimmed) return null;
1542
- return {
1580
+ function createHowToStep(entry) {
1581
+ if (!entry) return null;
1582
+ if (typeof entry === "string") {
1583
+ const trimmed2 = entry.trim();
1584
+ if (!trimmed2) {
1585
+ return null;
1586
+ }
1587
+ return {
1588
+ "@type": "HowToStep",
1589
+ text: trimmed2
1590
+ };
1591
+ }
1592
+ const trimmed = entry.text?.trim();
1593
+ if (!trimmed) {
1594
+ return null;
1595
+ }
1596
+ const step = {
1543
1597
  "@type": "HowToStep",
1544
1598
  text: trimmed
1545
1599
  };
1600
+ if (entry.image) {
1601
+ step.image = entry.image;
1602
+ }
1603
+ return step;
1546
1604
  }
1547
1605
  function convertTime2(time) {
1548
1606
  if (!time) {
@@ -1705,7 +1763,7 @@ async function fetchPage(url, options = {}) {
1705
1763
  };
1706
1764
  const response = await resolvedFetch(url, requestInit);
1707
1765
  clearTimeout(timeoutId);
1708
- if (response && (typeof process === "undefined" || process.env.NODE_ENV !== "test")) {
1766
+ if (response && typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
1709
1767
  try {
1710
1768
  const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
1711
1769
  if (globalFetch) {
@@ -1723,7 +1781,7 @@ async function fetchPage(url, options = {}) {
1723
1781
  throw error;
1724
1782
  }
1725
1783
  const html = await response.text();
1726
- if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
1784
+ if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
1727
1785
  try {
1728
1786
  const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
1729
1787
  if (globalFetch) {
@@ -1983,14 +2041,30 @@ function extractRecipe(html) {
1983
2041
  return extractRecipeBrowser(html);
1984
2042
  }
1985
2043
  const jsonLdRecipe = extractJsonLd(html);
1986
- fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:6", message: "JSON-LD extraction result", data: { hasJsonLd: !!jsonLdRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "C,D" }) }).catch(() => {
1987
- });
2044
+ if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
2045
+ try {
2046
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
2047
+ if (globalFetch) {
2048
+ globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:6", message: "JSON-LD extraction result", data: { hasJsonLd: !!jsonLdRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "C,D" }) }).catch(() => {
2049
+ });
2050
+ }
2051
+ } catch {
2052
+ }
2053
+ }
1988
2054
  if (jsonLdRecipe) {
1989
2055
  return { recipe: jsonLdRecipe, source: "jsonld" };
1990
2056
  }
1991
2057
  const microdataRecipe = extractMicrodata(html);
1992
- fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:12", message: "Microdata extraction result", data: { hasMicrodata: !!microdataRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "D" }) }).catch(() => {
1993
- });
2058
+ if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
2059
+ try {
2060
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
2061
+ if (globalFetch) {
2062
+ globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:12", message: "Microdata extraction result", data: { hasMicrodata: !!microdataRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "D" }) }).catch(() => {
2063
+ });
2064
+ }
2065
+ } catch {
2066
+ }
2067
+ }
1994
2068
  if (microdataRecipe) {
1995
2069
  return { recipe: microdataRecipe, source: "microdata" };
1996
2070
  }
@@ -1999,20 +2073,52 @@ function extractRecipe(html) {
1999
2073
 
2000
2074
  // src/scraper/index.ts
2001
2075
  async function scrapeRecipe(url, options = {}) {
2002
- fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:7", message: "scrapeRecipe entry", data: { url, hasOptions: !!options }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,B,C,D,E" }) }).catch(() => {
2003
- });
2076
+ if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
2077
+ try {
2078
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
2079
+ if (globalFetch) {
2080
+ globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:7", message: "scrapeRecipe entry", data: { url, hasOptions: !!options }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,B,C,D,E" }) }).catch(() => {
2081
+ });
2082
+ }
2083
+ } catch {
2084
+ }
2085
+ }
2004
2086
  const html = await fetchPage(url, options);
2005
- fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:9", message: "HTML fetched", data: { htmlLength: html?.length, htmlPreview: html?.substring(0, 200) }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B" }) }).catch(() => {
2006
- });
2087
+ if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
2088
+ try {
2089
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
2090
+ if (globalFetch) {
2091
+ globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:9", message: "HTML fetched", data: { htmlLength: html?.length, htmlPreview: html?.substring(0, 200) }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B" }) }).catch(() => {
2092
+ });
2093
+ }
2094
+ } catch {
2095
+ }
2096
+ }
2007
2097
  const { recipe } = extractRecipe(html);
2008
- fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:11", message: "extractRecipe result", data: { hasRecipe: !!recipe, recipeType: recipe?.["@type"], recipeName: recipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C,D" }) }).catch(() => {
2009
- });
2098
+ if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
2099
+ try {
2100
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
2101
+ if (globalFetch) {
2102
+ globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:11", message: "extractRecipe result", data: { hasRecipe: !!recipe, recipeType: recipe?.["@type"], recipeName: recipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C,D" }) }).catch(() => {
2103
+ });
2104
+ }
2105
+ } catch {
2106
+ }
2107
+ }
2010
2108
  if (!recipe) {
2011
2109
  throw new Error("No Schema.org recipe data found in page");
2012
2110
  }
2013
2111
  const soustackRecipe = fromSchemaOrg(recipe);
2014
- fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:17", message: "fromSchemaOrg result", data: { hasSoustackRecipe: !!soustackRecipe, soustackRecipeName: soustackRecipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A" }) }).catch(() => {
2015
- });
2112
+ if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
2113
+ try {
2114
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
2115
+ if (globalFetch) {
2116
+ globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:17", message: "fromSchemaOrg result", data: { hasSoustackRecipe: !!soustackRecipe, soustackRecipeName: soustackRecipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A" }) }).catch(() => {
2117
+ });
2118
+ }
2119
+ } catch {
2120
+ }
2121
+ }
2016
2122
  if (!soustackRecipe) {
2017
2123
  throw new Error("Schema.org data did not include a valid recipe");
2018
2124
  }
@@ -2029,6 +2135,10 @@ function extractRecipeFromHTML(html) {
2029
2135
  }
2030
2136
  return soustackRecipe;
2031
2137
  }
2138
+ function extractSchemaOrgRecipeFromHTML(html) {
2139
+ const { recipe } = extractRecipe(html);
2140
+ return recipe;
2141
+ }
2032
2142
 
2033
2143
  // src/parsers/yield.ts
2034
2144
  var RANGE_PATTERN = /^(\d+)(?:\s*(?:[-–—]|to)\s*)(\d+)\s+(.+)$/i;
@@ -2272,6 +2382,6 @@ function wordToNumber(word) {
2272
2382
  return null;
2273
2383
  }
2274
2384
 
2275
- export { extractRecipeFromHTML, formatDuration, formatYield2 as formatYield, fromSchemaOrg, normalizeIngredientInput, normalizeYield, parseDuration, parseHumanDuration, parseIngredient, parseIngredientLine, parseIngredients, parseYield2 as parseYield, scaleRecipe, scrapeRecipe, smartParseDuration, toSchemaOrg, validateRecipe };
2385
+ export { extractRecipeFromHTML, extractSchemaOrgRecipeFromHTML, formatDuration, formatYield2 as formatYield, fromSchemaOrg, normalizeImage, normalizeIngredientInput, normalizeYield, parseDuration, parseHumanDuration, parseIngredient, parseIngredientLine, parseIngredients, parseYield2 as parseYield, scaleRecipe, scrapeRecipe, smartParseDuration, toSchemaOrg, validateRecipe };
2276
2386
  //# sourceMappingURL=index.mjs.map
2277
2387
  //# sourceMappingURL=index.mjs.map