soustack 0.1.3 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -35,10 +35,10 @@ Soustack is **computational**—it understands _how_ a recipe behaves.
35
35
  npm install soustack
36
36
  ```
37
37
 
38
- ## Whats Included
38
+ ## What's Included
39
39
 
40
40
  - **Validation**: `validateRecipe()` validates Soustack JSON against the bundled schema.
41
- - **Scaling & Computation**: `scaleRecipe()` produces a flat, UI-ready computed recipe (scaled ingredients + aggregated timing).
41
+ - **Scaling & Computation**: `scaleRecipe()` produces a flat, UI-ready "computed recipe" (scaled ingredients + aggregated timing).
42
42
  - **Parsers**:
43
43
  - Ingredient parsing (`parseIngredient`, `parseIngredientLine`)
44
44
  - Duration parsing (`smartParseDuration`)
@@ -46,6 +46,11 @@ npm install soustack
46
46
  - **Schema.org Conversion**:
47
47
  - `fromSchemaOrg()` (Schema.org JSON-LD → Soustack)
48
48
  - `toSchemaOrg()` (Soustack → Schema.org JSON-LD)
49
+ - `normalizeImage()` utility for converting Schema.org image formats to Soustack format
50
+ - **Image Support**:
51
+ - Recipe-level images: single URL or array of URLs
52
+ - Instruction-level images: optional image URL per step
53
+ - Automatic normalization from Schema.org ImageObject formats
49
54
  - **Web Scraping**:
50
55
  - `scrapeRecipe()` fetches a recipe page and extracts Schema.org recipe data (Node.js only)
51
56
  - `extractRecipeFromHTML()` extracts recipe data from HTML string, returns Soustack format (browser & Node.js compatible)
@@ -63,6 +68,7 @@ import {
63
68
  toSchemaOrg,
64
69
  validateRecipe,
65
70
  scaleRecipe,
71
+ normalizeImage,
66
72
  } from 'soustack';
67
73
 
68
74
  // Validate a Soustack recipe JSON object
@@ -90,17 +96,52 @@ const soustack = fromSchemaOrg(schemaOrgJsonLd);
90
96
 
91
97
  // Convert Soustack → Schema.org
92
98
  const jsonLd = toSchemaOrg(recipe);
99
+
100
+ // Normalize Schema.org image formats (strings, arrays, ImageObjects)
101
+ const normalized = normalizeImage(schemaOrgRecipe.image);
102
+ // Returns: string | string[] | undefined
93
103
  ```
94
104
 
95
105
  ## 🔁 Schema.org Conversion
96
106
 
97
- Use the new helpers to move between Schema.org JSON-LD and Soustack's structured recipe format.
107
+ Use the helpers to move between Schema.org JSON-LD and Soustack's structured recipe format. The conversion automatically handles image normalization, supporting multiple image formats from Schema.org.
98
108
 
99
109
  ```ts
100
- import { fromSchemaOrg, toSchemaOrg } from 'soustack';
110
+ import { fromSchemaOrg, toSchemaOrg, normalizeImage } from 'soustack';
101
111
 
112
+ // Convert Schema.org → Soustack (automatically normalizes images)
102
113
  const soustackRecipe = fromSchemaOrg(schemaOrgJsonLd);
114
+ // Recipe images: string | string[] | undefined
115
+ // Instruction images: optional image URL per step
116
+
117
+ // Convert Soustack → Schema.org (preserves images)
103
118
  const schemaOrgRecipe = toSchemaOrg(soustackRecipe);
119
+
120
+ // Manual image normalization (if needed)
121
+ const normalized = normalizeImage(schemaOrgImage);
122
+ // Handles: strings, arrays, ImageObjects with url/contentUrl
123
+ ```
124
+
125
+ ### Image Format Support
126
+
127
+ Soustack supports flexible image formats:
128
+
129
+ - **Recipe-level images**: Single URL (`string`) or multiple URLs (`string[]`)
130
+ - **Instruction-level images**: Optional `image` property on instruction objects
131
+ - **Automatic normalization**: Schema.org ImageObjects are automatically converted to URLs during import
132
+
133
+ Example recipe with images:
134
+
135
+ ```ts
136
+ const recipe = {
137
+ name: "Chocolate Cake",
138
+ image: ["https://example.com/hero.jpg", "https://example.com/gallery.jpg"],
139
+ instructions: [
140
+ "Mix dry ingredients",
141
+ { text: "Decorate the cake", image: "https://example.com/decorate.jpg" },
142
+ "Serve"
143
+ ]
144
+ };
104
145
  ```
105
146
 
106
147
  ## 🧰 Web Scraping
package/dist/cli/index.js CHANGED
@@ -163,8 +163,8 @@ function flattenInstructions(items) {
163
163
  // src/schema.json
164
164
  var schema_default = {
165
165
  $schema: "http://json-schema.org/draft-07/schema#",
166
- $id: "http://soustack.org/schema/v0.1",
167
- title: "Soustack Recipe Schema v0.1",
166
+ $id: "http://soustack.org/schema/v0.2",
167
+ title: "Soustack Recipe Schema v0.2",
168
168
  description: "A portable, scalable, interoperable recipe format.",
169
169
  type: "object",
170
170
  required: ["name", "ingredients", "instructions"],
@@ -194,8 +194,21 @@ var schema_default = {
194
194
  items: { type: "string" }
195
195
  },
196
196
  image: {
197
- type: "string",
198
- format: "uri"
197
+ description: "Recipe-level hero image(s)",
198
+ anyOf: [
199
+ {
200
+ type: "string",
201
+ format: "uri"
202
+ },
203
+ {
204
+ type: "array",
205
+ minItems: 1,
206
+ items: {
207
+ type: "string",
208
+ format: "uri"
209
+ }
210
+ }
211
+ ]
199
212
  },
200
213
  dateAdded: {
201
214
  type: "string",
@@ -360,6 +373,11 @@ var schema_default = {
360
373
  properties: {
361
374
  id: { type: "string" },
362
375
  text: { type: "string" },
376
+ image: {
377
+ type: "string",
378
+ format: "uri",
379
+ description: "Optional image that illustrates this instruction"
380
+ },
363
381
  destination: { type: "string" },
364
382
  dependsOn: {
365
383
  type: "array",
@@ -1250,6 +1268,40 @@ function smartParseDuration(input) {
1250
1268
  return parseHumanDuration(input);
1251
1269
  }
1252
1270
 
1271
+ // src/utils/image.ts
1272
+ function normalizeImage(image) {
1273
+ if (!image) {
1274
+ return void 0;
1275
+ }
1276
+ if (typeof image === "string") {
1277
+ const trimmed = image.trim();
1278
+ return trimmed || void 0;
1279
+ }
1280
+ if (Array.isArray(image)) {
1281
+ const urls = image.map((entry) => typeof entry === "string" ? entry.trim() : extractUrl(entry)).filter((url) => typeof url === "string" && Boolean(url));
1282
+ if (urls.length === 0) {
1283
+ return void 0;
1284
+ }
1285
+ if (urls.length === 1) {
1286
+ return urls[0];
1287
+ }
1288
+ return urls;
1289
+ }
1290
+ return extractUrl(image);
1291
+ }
1292
+ function extractUrl(value) {
1293
+ if (!value || typeof value !== "object") {
1294
+ return void 0;
1295
+ }
1296
+ const record = value;
1297
+ const candidate = typeof record.url === "string" ? record.url : typeof record.contentUrl === "string" ? record.contentUrl : void 0;
1298
+ if (!candidate) {
1299
+ return void 0;
1300
+ }
1301
+ const trimmed = candidate.trim();
1302
+ return trimmed || void 0;
1303
+ }
1304
+
1253
1305
  // src/fromSchemaOrg.ts
1254
1306
  function fromSchemaOrg(input) {
1255
1307
  const recipeNode = extractRecipeNode(input);
@@ -1262,13 +1314,12 @@ function fromSchemaOrg(input) {
1262
1314
  const recipeYield = parseYield(recipeNode.recipeYield);
1263
1315
  const tags = collectTags(recipeNode.recipeCuisine, recipeNode.keywords);
1264
1316
  const category = extractFirst(recipeNode.recipeCategory);
1265
- const image = convertImage(recipeNode.image);
1266
1317
  const source = convertSource(recipeNode);
1267
1318
  const nutrition = recipeNode.nutrition && typeof recipeNode.nutrition === "object" ? recipeNode.nutrition : void 0;
1268
1319
  return {
1269
1320
  name: recipeNode.name.trim(),
1270
1321
  description: recipeNode.description?.trim() || void 0,
1271
- image,
1322
+ image: normalizeImage(recipeNode.image),
1272
1323
  category,
1273
1324
  tags: tags.length ? tags : void 0,
1274
1325
  source,
@@ -1351,9 +1402,9 @@ function convertInstructions(value) {
1351
1402
  continue;
1352
1403
  }
1353
1404
  if (isHowToStep(entry)) {
1354
- const text = extractInstructionText(entry);
1355
- if (text) {
1356
- result.push(text);
1405
+ const parsed = convertHowToStep(entry);
1406
+ if (parsed) {
1407
+ result.push(parsed);
1357
1408
  }
1358
1409
  }
1359
1410
  }
@@ -1371,9 +1422,9 @@ function extractSectionItems(items = []) {
1371
1422
  continue;
1372
1423
  }
1373
1424
  if (isHowToStep(item)) {
1374
- const text = extractInstructionText(item);
1375
- if (text) {
1376
- result.push(text);
1425
+ const parsed = convertHowToStep(item);
1426
+ if (parsed) {
1427
+ result.push(parsed);
1377
1428
  }
1378
1429
  continue;
1379
1430
  }
@@ -1387,6 +1438,17 @@ function extractInstructionText(value) {
1387
1438
  const text = typeof value.text === "string" ? value.text : value.name;
1388
1439
  return typeof text === "string" ? text.trim() || void 0 : void 0;
1389
1440
  }
1441
+ function convertHowToStep(step) {
1442
+ const text = extractInstructionText(step);
1443
+ if (!text) {
1444
+ return void 0;
1445
+ }
1446
+ const normalizedImage = normalizeImage(step.image);
1447
+ if (typeof normalizedImage === "string") {
1448
+ return { text, image: normalizedImage };
1449
+ }
1450
+ return text;
1451
+ }
1390
1452
  function isHowToStep(value) {
1391
1453
  return Boolean(value) && typeof value === "object" && value["@type"] === "HowToStep";
1392
1454
  }
@@ -1428,26 +1490,6 @@ function extractFirst(value) {
1428
1490
  const arr = flattenStrings(value);
1429
1491
  return arr.length ? arr[0] : void 0;
1430
1492
  }
1431
- function convertImage(value) {
1432
- if (!value) return void 0;
1433
- if (typeof value === "string") {
1434
- return value;
1435
- }
1436
- if (Array.isArray(value)) {
1437
- for (const item of value) {
1438
- const url = typeof item === "string" ? item : extractImageUrl(item);
1439
- if (url) return url;
1440
- }
1441
- return void 0;
1442
- }
1443
- return extractImageUrl(value);
1444
- }
1445
- function extractImageUrl(value) {
1446
- if (!value || typeof value !== "object") return void 0;
1447
- const record = value;
1448
- const candidate = typeof record.url === "string" ? record.url : typeof record.contentUrl === "string" ? record.contentUrl : void 0;
1449
- return candidate?.trim() || void 0;
1450
- }
1451
1493
  function convertSource(recipe) {
1452
1494
  const author = extractEntityName(recipe.author);
1453
1495
  const publisher = extractEntityName(recipe.publisher);
@@ -1543,7 +1585,7 @@ function convertInstruction(entry) {
1543
1585
  return createHowToStep(entry);
1544
1586
  }
1545
1587
  if ("subsection" in entry) {
1546
- const steps = entry.items.map((item) => typeof item === "string" ? createHowToStep(item) : createHowToStep(item.text)).filter((step) => Boolean(step));
1588
+ const steps = entry.items.map((item) => createHowToStep(item)).filter((step) => Boolean(step));
1547
1589
  if (!steps.length) {
1548
1590
  return null;
1549
1591
  }
@@ -1554,18 +1596,34 @@ function convertInstruction(entry) {
1554
1596
  };
1555
1597
  }
1556
1598
  if ("text" in entry) {
1557
- return createHowToStep(entry.text);
1599
+ return createHowToStep(entry);
1558
1600
  }
1559
1601
  return createHowToStep(String(entry));
1560
1602
  }
1561
- function createHowToStep(text) {
1562
- if (!text) return null;
1563
- const trimmed = text.trim();
1564
- if (!trimmed) return null;
1565
- return {
1603
+ function createHowToStep(entry) {
1604
+ if (!entry) return null;
1605
+ if (typeof entry === "string") {
1606
+ const trimmed2 = entry.trim();
1607
+ if (!trimmed2) {
1608
+ return null;
1609
+ }
1610
+ return {
1611
+ "@type": "HowToStep",
1612
+ text: trimmed2
1613
+ };
1614
+ }
1615
+ const trimmed = entry.text?.trim();
1616
+ if (!trimmed) {
1617
+ return null;
1618
+ }
1619
+ const step = {
1566
1620
  "@type": "HowToStep",
1567
1621
  text: trimmed
1568
1622
  };
1623
+ if (entry.image) {
1624
+ step.image = entry.image;
1625
+ }
1626
+ return step;
1569
1627
  }
1570
1628
  function convertTime2(time) {
1571
1629
  if (!time) {
@@ -1728,7 +1786,7 @@ async function fetchPage(url, options = {}) {
1728
1786
  };
1729
1787
  const response = await resolvedFetch(url, requestInit);
1730
1788
  clearTimeout(timeoutId);
1731
- if (response && (typeof process === "undefined" || process.env.NODE_ENV !== "test")) {
1789
+ if (response && typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
1732
1790
  try {
1733
1791
  const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
1734
1792
  if (globalFetch) {
@@ -1746,7 +1804,7 @@ async function fetchPage(url, options = {}) {
1746
1804
  throw error;
1747
1805
  }
1748
1806
  const html = await response.text();
1749
- if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
1807
+ if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
1750
1808
  try {
1751
1809
  const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
1752
1810
  if (globalFetch) {
@@ -2006,7 +2064,7 @@ function extractRecipe(html) {
2006
2064
  return extractRecipeBrowser(html);
2007
2065
  }
2008
2066
  const jsonLdRecipe = extractJsonLd(html);
2009
- if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
2067
+ if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
2010
2068
  try {
2011
2069
  const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
2012
2070
  if (globalFetch) {
@@ -2020,7 +2078,7 @@ function extractRecipe(html) {
2020
2078
  return { recipe: jsonLdRecipe, source: "jsonld" };
2021
2079
  }
2022
2080
  const microdataRecipe = extractMicrodata(html);
2023
- if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
2081
+ if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
2024
2082
  try {
2025
2083
  const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
2026
2084
  if (globalFetch) {
@@ -2038,7 +2096,7 @@ function extractRecipe(html) {
2038
2096
 
2039
2097
  // src/scraper/index.ts
2040
2098
  async function scrapeRecipe(url, options = {}) {
2041
- if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
2099
+ if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
2042
2100
  try {
2043
2101
  const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
2044
2102
  if (globalFetch) {
@@ -2049,7 +2107,7 @@ async function scrapeRecipe(url, options = {}) {
2049
2107
  }
2050
2108
  }
2051
2109
  const html = await fetchPage(url, options);
2052
- if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
2110
+ if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
2053
2111
  try {
2054
2112
  const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
2055
2113
  if (globalFetch) {
@@ -2060,7 +2118,7 @@ async function scrapeRecipe(url, options = {}) {
2060
2118
  }
2061
2119
  }
2062
2120
  const { recipe } = extractRecipe(html);
2063
- if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
2121
+ if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
2064
2122
  try {
2065
2123
  const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
2066
2124
  if (globalFetch) {
@@ -2074,7 +2132,7 @@ async function scrapeRecipe(url, options = {}) {
2074
2132
  throw new Error("No Schema.org recipe data found in page");
2075
2133
  }
2076
2134
  const soustackRecipe = fromSchemaOrg(recipe);
2077
- if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
2135
+ if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
2078
2136
  try {
2079
2137
  const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
2080
2138
  if (globalFetch) {