soustack 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. package/README.md +44 -27
  2. package/dist/cli/index.js +5225 -992
  3. package/dist/cli/index.js.map +1 -1
  4. package/dist/index.d.mts +163 -91
  5. package/dist/index.d.ts +163 -91
  6. package/dist/index.js +5077 -1007
  7. package/dist/index.js.map +1 -1
  8. package/dist/index.mjs +5076 -1007
  9. package/dist/index.mjs.map +1 -1
  10. package/dist/{scrape.d.mts → scrape/index.d.mts} +88 -74
  11. package/dist/{scrape.d.ts → scrape/index.d.ts} +88 -74
  12. package/dist/{scrape.js → scrape/index.js} +255 -124
  13. package/dist/scrape/index.js.map +1 -0
  14. package/dist/{scrape.mjs → scrape/index.mjs} +255 -124
  15. package/dist/scrape/index.mjs.map +1 -0
  16. package/package.json +21 -9
  17. package/spec/.sync-meta.json +149 -0
  18. package/spec/SOUSTACK_SPEC_VERSION +1 -0
  19. package/spec/defs/common.schema.json +46 -0
  20. package/spec/defs/duration.schema.json +33 -0
  21. package/spec/defs/entities.schema.json +111 -0
  22. package/spec/defs/ingredientQuantified.schema.json +9 -0
  23. package/spec/defs/quantity.schema.json +16 -0
  24. package/spec/defs/scalingRule.schema.json +127 -0
  25. package/spec/defs/temperature.schema.json +63 -0
  26. package/spec/fixtures/content/illustrated-step.valid.json +24 -0
  27. package/spec/fixtures/invalid/equipment-unknown-reference.invalid.json +38 -0
  28. package/spec/fixtures/invalid/mise-en-place-unknown-equipment.invalid.json +37 -0
  29. package/spec/fixtures/invalid/mise-en-place-unknown-input.invalid.json +41 -0
  30. package/spec/fixtures/invalid/storage-leftovers-missing-method.invalid.json +31 -0
  31. package/spec/fixtures/invalid/storage-leftovers-wrong-type.invalid.json +23 -0
  32. package/spec/fixtures/level/base-full.valid.json +162 -0
  33. package/spec/fixtures/level/base-missing-yield.invalid.json +12 -0
  34. package/spec/fixtures/level/lite-min.valid.json +14 -0
  35. package/spec/fixtures/profile/profile-base.valid.json +20 -0
  36. package/spec/fixtures/profile/profile-equipped.valid.json +28 -0
  37. package/spec/fixtures/profile/profile-illustrated.valid.json +28 -0
  38. package/spec/fixtures/profile/profile-lite.valid.json +13 -0
  39. package/spec/fixtures/profile/profile-prepped.valid.json +31 -0
  40. package/spec/fixtures/profile/profile-scalable-missing-scaling.invalid.json +29 -0
  41. package/spec/fixtures/profile/profile-scalable.valid.json +49 -0
  42. package/spec/fixtures/profile/profile-timed-missing-structured.invalid.json +30 -0
  43. package/spec/fixtures/scaling/bakers-percent-missing-ref.invalid.json +41 -0
  44. package/spec/fixtures/scaling/bakers-percent.valid.json +51 -0
  45. package/spec/fixtures/scaling/discrete-range.invalid.json +36 -0
  46. package/spec/fixtures/scaling/missing-quantified.invalid.json +40 -0
  47. package/spec/fixtures/scaling/reject-bakersPercentage.invalid.json +50 -0
  48. package/spec/fixtures/stacks/compute-missing-timed.invalid.json +32 -0
  49. package/spec/fixtures/stacks/dietary-no-signal.invalid.json +16 -0
  50. package/spec/fixtures/stacks/illustrated-empty.invalid.json +13 -0
  51. package/spec/fixtures/stacks/quantified-string.invalid.json +22 -0
  52. package/spec/fixtures/stacks/referenced-missing-input.invalid.json +32 -0
  53. package/spec/fixtures/stacks/storage-min.valid.json +20 -0
  54. package/spec/fixtures/stacks/storage-no-duration.invalid.json +16 -0
  55. package/spec/fixtures/stacks/timed-implies-structured.valid.json +50 -0
  56. package/spec/fixtures/stacks/timed-range.invalid.json +33 -0
  57. package/spec/fixtures/valid/equipment-scaling-rules.valid.json +76 -0
  58. package/spec/fixtures/valid/equipment-strings.valid.json +31 -0
  59. package/spec/fixtures/valid/equipment-structured-uses.valid.json +47 -0
  60. package/spec/fixtures/valid/mise-en-place-basic.valid.json +31 -0
  61. package/spec/fixtures/valid/mise-en-place-referenced-equipment.valid.json +51 -0
  62. package/spec/fixtures/valid/prep-ingredient-strings.valid.json +48 -0
  63. package/spec/fixtures/valid/prep-ingredient-structured.valid.json +45 -0
  64. package/spec/fixtures/valid/profile-equipped.valid.json +29 -0
  65. package/spec/fixtures/valid/profile-prepped.valid.json +32 -0
  66. package/spec/fixtures/valid/quantified-nested-ingredient-sections.valid.json +61 -0
  67. package/spec/fixtures/valid/referenced-scaling.valid.json +67 -0
  68. package/spec/fixtures/valid/storage-leftovers-simple.valid.json +27 -0
  69. package/spec/fixtures/valid/storage-leftovers-structured.valid.json +43 -0
  70. package/spec/fixtures/valid/structured-nested-step-sections.valid.json +84 -0
  71. package/spec/schemas/stacks-registry.schema.json +108 -0
  72. package/spec/soustack.schema.json +2379 -0
  73. package/spec/stacks/compute.schema.json +7 -0
  74. package/spec/stacks/compute@1.md +22 -0
  75. package/spec/stacks/dietary.schema.json +45 -0
  76. package/spec/stacks/dietary@1.md +24 -0
  77. package/spec/stacks/equipment.schema.json +98 -0
  78. package/spec/stacks/equipment@1.md +244 -0
  79. package/spec/stacks/illustrated.schema.json +54 -0
  80. package/spec/stacks/illustrated@1.md +24 -0
  81. package/spec/stacks/prep.schema.json +76 -0
  82. package/spec/stacks/prep@1.md +276 -0
  83. package/spec/stacks/quantified.schema.json +74 -0
  84. package/spec/stacks/quantified@1.md +24 -0
  85. package/spec/stacks/referenced.schema.json +96 -0
  86. package/spec/stacks/referenced@1.md +23 -0
  87. package/spec/stacks/registry.json +112 -0
  88. package/spec/stacks/scaling.schema.json +99 -0
  89. package/spec/stacks/scaling@1.md +238 -0
  90. package/spec/stacks/storage.schema.json +132 -0
  91. package/spec/stacks/storage@1.md +256 -0
  92. package/spec/stacks/structured.schema.json +48 -0
  93. package/spec/stacks/structured@1.md +24 -0
  94. package/spec/stacks/substitutions.schema.json +43 -0
  95. package/spec/stacks/substitutions@1.md +24 -0
  96. package/spec/stacks/techniques.schema.json +28 -0
  97. package/spec/stacks/techniques@1.md +23 -0
  98. package/spec/stacks/timed.schema.json +60 -0
  99. package/spec/stacks/timed@1.md +23 -0
  100. package/src/defs/common.schema.json +46 -0
  101. package/src/defs/duration.schema.json +33 -0
  102. package/src/defs/entities.schema.json +111 -0
  103. package/src/defs/ingredientQuantified.schema.json +9 -0
  104. package/src/defs/quantity.schema.json +16 -0
  105. package/src/defs/scalingRule.schema.json +127 -0
  106. package/src/defs/temperature.schema.json +63 -0
  107. package/src/profiles/base.schema.json +2 -2
  108. package/src/profiles/equipped.schema.json +10 -0
  109. package/src/profiles/illustrated.schema.json +4 -4
  110. package/src/profiles/lite.schema.json +10 -0
  111. package/src/profiles/prepped.schema.json +10 -0
  112. package/src/profiles/scalable.schema.json +6 -6
  113. package/src/profiles/timed.schema.json +10 -0
  114. package/src/schema.json +2271 -248
  115. package/src/schemas/stacks-registry.schema.json +108 -0
  116. package/src/soustack.schema.json +2271 -248
  117. package/src/stacks/compute.schema.json +7 -0
  118. package/src/stacks/compute@1.md +22 -0
  119. package/src/stacks/dietary.schema.json +45 -0
  120. package/src/stacks/dietary@1.md +24 -0
  121. package/src/stacks/equipment.schema.json +98 -0
  122. package/src/stacks/equipment@1.md +244 -0
  123. package/src/stacks/illustrated.schema.json +54 -0
  124. package/src/stacks/illustrated@1.md +24 -0
  125. package/src/stacks/prep.schema.json +76 -0
  126. package/src/stacks/prep@1.md +276 -0
  127. package/src/stacks/quantified.schema.json +74 -0
  128. package/src/stacks/quantified@1.md +24 -0
  129. package/src/stacks/referenced.schema.json +96 -0
  130. package/src/stacks/referenced@1.md +23 -0
  131. package/src/stacks/registry.json +112 -0
  132. package/src/stacks/scaling.schema.json +99 -0
  133. package/src/stacks/scaling@1.md +238 -0
  134. package/src/stacks/storage.schema.json +132 -0
  135. package/src/stacks/storage@1.md +256 -0
  136. package/src/stacks/structured.schema.json +48 -0
  137. package/src/stacks/structured@1.md +24 -0
  138. package/src/stacks/substitutions.schema.json +43 -0
  139. package/src/stacks/substitutions@1.md +24 -0
  140. package/src/stacks/techniques.schema.json +28 -0
  141. package/src/stacks/techniques@1.md +23 -0
  142. package/src/stacks/timed.schema.json +60 -0
  143. package/src/stacks/timed@1.md +23 -0
  144. package/dist/scrape.js.map +0 -1
  145. package/dist/scrape.mjs.map +0 -1
  146. package/src/profiles/cookable.schema.json +0 -18
  147. package/src/profiles/quantified.schema.json +0 -43
  148. package/src/profiles/schedulable.schema.json +0 -43
@@ -128,6 +128,138 @@ function extractUrl(value) {
128
128
  return trimmed || void 0;
129
129
  }
130
130
 
131
+ // src/normalize.ts
132
+ function normalizeRecipe(input) {
133
+ if (!input || typeof input !== "object") {
134
+ throw new Error("Recipe input must be an object");
135
+ }
136
+ const recipe = JSON.parse(JSON.stringify(input));
137
+ const warnings = [];
138
+ const legacyField = ["mod", "ules"].join("");
139
+ if (legacyField in recipe) {
140
+ throw new Error("The legacy field is no longer supported. Use `stacks` instead.");
141
+ }
142
+ normalizeStacks(recipe, warnings);
143
+ if (!recipe.stacks) {
144
+ recipe.stacks = {};
145
+ }
146
+ if (recipe && typeof recipe === "object" && "version" in recipe && !recipe.recipeVersion && typeof recipe.version === "string") {
147
+ recipe.recipeVersion = recipe.version;
148
+ delete recipe.version;
149
+ warnings.push("'version' is deprecated; mapped to 'recipeVersion'.");
150
+ }
151
+ normalizeTime(recipe);
152
+ return {
153
+ recipe,
154
+ warnings
155
+ };
156
+ }
157
+ function normalizeStacks(recipe, warnings) {
158
+ let stacks = {};
159
+ if (recipe.stacks && typeof recipe.stacks === "object" && !Array.isArray(recipe.stacks)) {
160
+ for (const [key, value] of Object.entries(recipe.stacks)) {
161
+ if (typeof value === "number" && Number.isInteger(value) && value >= 1) {
162
+ stacks[key] = value;
163
+ } else {
164
+ warnings.push(`Invalid stack version for '${key}': expected positive integer, got ${value}`);
165
+ }
166
+ }
167
+ }
168
+ if (Array.isArray(recipe.stacks)) {
169
+ const stackIdentifiers = recipe.stacks.filter((s) => typeof s === "string");
170
+ for (const identifier of stackIdentifiers) {
171
+ const parsed = parseStackIdentifier(identifier);
172
+ if (parsed) {
173
+ const { name, version } = parsed;
174
+ if (!stacks[name] || stacks[name] < version) {
175
+ stacks[name] = version;
176
+ }
177
+ } else {
178
+ warnings.push(`Invalid stack identifier '${identifier}': expected format 'name@version' (e.g., 'scaling@1')`);
179
+ }
180
+ }
181
+ }
182
+ recipe.stacks = stacks;
183
+ }
184
+ function parseStackIdentifier(identifier) {
185
+ if (typeof identifier !== "string" || !identifier.trim()) {
186
+ return null;
187
+ }
188
+ const match = identifier.trim().match(/^([a-z0-9_-]+)@(\d+)$/i);
189
+ if (!match) {
190
+ return null;
191
+ }
192
+ const [, name, versionStr] = match;
193
+ const version = parseInt(versionStr, 10);
194
+ if (isNaN(version) || version < 1) {
195
+ return null;
196
+ }
197
+ return { name, version };
198
+ }
199
+ function normalizeTime(recipe) {
200
+ const time = recipe?.time;
201
+ if (!time || typeof time !== "object" || Array.isArray(time)) return;
202
+ const structuredKeys = [
203
+ "prep",
204
+ "active",
205
+ "passive",
206
+ "total"
207
+ ];
208
+ structuredKeys.forEach((key) => {
209
+ const value = time[key];
210
+ if (typeof value === "number") return;
211
+ const parsed = parseDuration(value);
212
+ if (parsed !== null) {
213
+ time[key] = parsed;
214
+ }
215
+ });
216
+ }
217
+
218
+ // src/specVersion.ts
219
+ var SOUSTACK_SPEC_VERSION = "0.0.2";
220
+
221
+ // src/schemaMetadata.ts
222
+ var CANONICAL_SCHEMA_ID = "https://soustack.spec/soustack.schema.json";
223
+ var LEGACY_SCHEMA_ID = `http://soustack.org/schema/v${SOUSTACK_SPEC_VERSION}`;
224
+ var RAW_SPEC_BASE = "https://raw.githubusercontent.com/soustack/soustack-spec";
225
+ var RAW_SPEC_FORK_BASE = "https://raw.githubusercontent.com/RichardHerold/soustack-spec";
226
+ var SCHEMA_ALIAS_MAP = /* @__PURE__ */ new Map([
227
+ [CANONICAL_SCHEMA_ID, CANONICAL_SCHEMA_ID],
228
+ [LEGACY_SCHEMA_ID, CANONICAL_SCHEMA_ID],
229
+ [`${LEGACY_SCHEMA_ID}/`, CANONICAL_SCHEMA_ID],
230
+ ["https://soustack.org/schema/v0.0.2", CANONICAL_SCHEMA_ID],
231
+ ["https://soustack.org/schema/v0.0.2/", CANONICAL_SCHEMA_ID],
232
+ [`${RAW_SPEC_BASE}/main/soustack.schema.json`, CANONICAL_SCHEMA_ID],
233
+ [`${RAW_SPEC_BASE}/v${SOUSTACK_SPEC_VERSION}/soustack.schema.json`, CANONICAL_SCHEMA_ID],
234
+ [`${RAW_SPEC_FORK_BASE}/main/soustack.schema.json`, CANONICAL_SCHEMA_ID],
235
+ [`${RAW_SPEC_FORK_BASE}/v${SOUSTACK_SPEC_VERSION}/soustack.schema.json`, CANONICAL_SCHEMA_ID]
236
+ ]);
237
+ function resolveSchemaHint(value) {
238
+ if (typeof value !== "string" || !value) {
239
+ return { canonicalId: void 0, isSoustackSchema: false, wasAlias: false };
240
+ }
241
+ const trimmed = value.replace(/#$/, "");
242
+ const mapped = SCHEMA_ALIAS_MAP.get(trimmed) ?? trimmed;
243
+ const isSoustackSchema = SCHEMA_ALIAS_MAP.has(trimmed) || mapped.startsWith("http://soustack.org/schema") || mapped.startsWith("https://soustack.org/schema") || mapped.startsWith("https://soustack.spec/") || mapped.startsWith("https://soustack.org/schemas/");
244
+ return {
245
+ canonicalId: mapped,
246
+ isSoustackSchema,
247
+ wasAlias: mapped !== trimmed || SCHEMA_ALIAS_MAP.has(trimmed)
248
+ };
249
+ }
250
+ function withCanonicalSchema(value) {
251
+ if (!value || typeof value !== "object" || Array.isArray(value)) {
252
+ return value;
253
+ }
254
+ const existing = typeof value.$schema === "string" ? value.$schema : void 0;
255
+ const resolved = resolveSchemaHint(existing);
256
+ const schemaId = resolved.isSoustackSchema ? resolved.canonicalId : CANONICAL_SCHEMA_ID;
257
+ return {
258
+ ...value,
259
+ $schema: schemaId ?? CANONICAL_SCHEMA_ID
260
+ };
261
+ }
262
+
131
263
  // src/fromSchemaOrg.ts
132
264
  function fromSchemaOrg(input) {
133
265
  const recipeNode = extractRecipeNode(input);
@@ -143,23 +275,18 @@ function fromSchemaOrg(input) {
143
275
  const source = convertSource(recipeNode);
144
276
  const dateModified = recipeNode.dateModified || void 0;
145
277
  const nutrition = convertNutrition(recipeNode.nutrition);
146
- const attribution = convertAttribution(recipeNode);
147
- const taxonomy = convertTaxonomy(tags, category, extractFirst(recipeNode.recipeCuisine));
148
- const media = convertMedia(recipeNode.image, recipeNode.video);
149
- const times = convertTimes(time);
150
- const modules = [];
151
- if (attribution) modules.push("attribution@1");
152
- if (taxonomy) modules.push("taxonomy@1");
153
- if (media) modules.push("media@1");
154
- if (nutrition) modules.push("nutrition@1");
155
- if (times) modules.push("times@1");
156
- return {
278
+ const images = toArray(normalizeImage(recipeNode.image));
279
+ const videos = normalizeMediaList(recipeNode.video);
280
+ const profile = recipeYield && time ? "base" : "lite";
281
+ const stacks = {};
282
+ const rawRecipe = {
157
283
  "@type": "Recipe",
158
- profile: "minimal",
159
- modules: modules.sort(),
284
+ profile,
285
+ stacks,
160
286
  name: recipeNode.name.trim(),
161
287
  description: recipeNode.description?.trim() || void 0,
162
- image: normalizeImage(recipeNode.image),
288
+ images: images.length ? images : void 0,
289
+ videos: videos.length ? videos : void 0,
163
290
  category,
164
291
  tags: tags.length ? tags : void 0,
165
292
  source,
@@ -169,12 +296,10 @@ function fromSchemaOrg(input) {
169
296
  ingredients,
170
297
  instructions,
171
298
  ...dateModified ? { dateModified } : {},
172
- ...nutrition ? { nutrition } : {},
173
- ...attribution ? { attribution } : {},
174
- ...taxonomy ? { taxonomy } : {},
175
- ...media ? { media } : {},
176
- ...times ? { times } : {}
299
+ ...nutrition ? { nutrition } : {}
177
300
  };
301
+ const { recipe } = normalizeRecipe(rawRecipe);
302
+ return withCanonicalSchema(recipe);
178
303
  }
179
304
  function extractRecipeNode(input) {
180
305
  if (!input) return null;
@@ -218,7 +343,10 @@ function isValidName(name) {
218
343
  function convertIngredients(value) {
219
344
  if (!value) return [];
220
345
  const normalized = Array.isArray(value) ? value : [value];
221
- return normalized.map((item) => typeof item === "string" ? item.trim() : "").filter(Boolean);
346
+ return normalized.map((item) => typeof item === "string" ? item.trim() : "").filter(Boolean).map((name) => ({
347
+ name,
348
+ scaling: { mode: "linear" }
349
+ }));
222
350
  }
223
351
  function convertInstructions(value) {
224
352
  if (!value) return [];
@@ -237,8 +365,8 @@ function convertInstructions(value) {
237
365
  const subsectionItems = extractSectionItems(entry.itemListElement);
238
366
  if (subsectionItems.length) {
239
367
  result.push({
240
- subsection: entry.name?.trim() || "Section",
241
- items: subsectionItems
368
+ section: entry.name?.trim() || "Section",
369
+ steps: subsectionItems
242
370
  });
243
371
  }
244
372
  continue;
@@ -294,7 +422,7 @@ function convertHowToStep(step) {
294
422
  }
295
423
  const instruction = { text };
296
424
  if (id) instruction.id = id;
297
- if (image) instruction.image = image;
425
+ if (image) instruction.images = Array.isArray(image) ? image : [image];
298
426
  if (timing) instruction.timing = timing;
299
427
  return instruction;
300
428
  }
@@ -304,7 +432,13 @@ function extractInstructionTiming(step) {
304
432
  return void 0;
305
433
  }
306
434
  const parsed = smartParseDuration(duration);
307
- return { duration: parsed ?? duration, type: "active" };
435
+ if (parsed === null || parsed === void 0) {
436
+ return void 0;
437
+ }
438
+ return {
439
+ activity: "active",
440
+ duration: { minutes: parsed }
441
+ };
308
442
  }
309
443
  function extractInstructionId(step) {
310
444
  const raw = step["@id"] || step.id || step.url;
@@ -321,14 +455,22 @@ function isHowToSection(value) {
321
455
  return Boolean(value) && typeof value === "object" && value["@type"] === "HowToSection" && Array.isArray(value.itemListElement);
322
456
  }
323
457
  function convertTime(recipe) {
458
+ const total = smartParseDuration(recipe.totalTime ?? "");
324
459
  const prep = smartParseDuration(recipe.prepTime ?? "");
325
460
  const cook = smartParseDuration(recipe.cookTime ?? "");
326
- const total = smartParseDuration(recipe.totalTime ?? "");
327
- const structured = {};
328
- if (prep !== null && prep !== void 0) structured.prep = prep;
329
- if (cook !== null && cook !== void 0) structured.active = cook;
330
- if (total !== null && total !== void 0) structured.total = total;
331
- return Object.keys(structured).length ? structured : void 0;
461
+ const minutes = isPositiveDuration(total) ? total : [prep, cook].filter(isPositiveDuration).reduce((sum, value) => {
462
+ if (sum === null) return value;
463
+ return sum + value;
464
+ }, null);
465
+ if (!isPositiveDuration(minutes)) {
466
+ return void 0;
467
+ }
468
+ return {
469
+ total: { minutes }
470
+ };
471
+ }
472
+ function isPositiveDuration(value) {
473
+ return typeof value === "number" && Number.isFinite(value) && value > 0;
332
474
  }
333
475
  function collectTags(cuisine, keywords) {
334
476
  const tags = /* @__PURE__ */ new Set();
@@ -386,23 +528,6 @@ function extractEntityName(value) {
386
528
  }
387
529
  return void 0;
388
530
  }
389
- function convertAttribution(recipe) {
390
- const attribution = {};
391
- const url = (recipe.url || recipe.mainEntityOfPage)?.trim();
392
- const author = extractEntityName(recipe.author);
393
- const datePublished = recipe.datePublished?.trim();
394
- if (url) attribution.url = url;
395
- if (author) attribution.author = author;
396
- if (datePublished) attribution.datePublished = datePublished;
397
- return Object.keys(attribution).length ? attribution : void 0;
398
- }
399
- function convertTaxonomy(keywords, category, cuisine) {
400
- const taxonomy = {};
401
- if (keywords.length) taxonomy.keywords = keywords;
402
- if (category) taxonomy.category = category;
403
- if (cuisine) taxonomy.cuisine = cuisine;
404
- return Object.keys(taxonomy).length ? taxonomy : void 0;
405
- }
406
531
  function normalizeMediaList(value) {
407
532
  if (!value) return [];
408
533
  if (typeof value === "string") return [value.trim()].filter(Boolean);
@@ -413,28 +538,18 @@ function normalizeMediaList(value) {
413
538
  return url ? [url] : [];
414
539
  }
415
540
  function extractMediaUrl(value) {
416
- if (value && typeof value === "object" && "url" in value && typeof value.url === "string") {
417
- const trimmed = value.url.trim();
418
- return trimmed || void 0;
541
+ if (value && typeof value === "object") {
542
+ const urlValue = typeof value.url === "string" ? value.url : typeof value.contentUrl === "string" ? value.contentUrl : void 0;
543
+ if (typeof urlValue === "string") {
544
+ const trimmed = urlValue.trim();
545
+ return trimmed || void 0;
546
+ }
419
547
  }
420
548
  return void 0;
421
549
  }
422
- function convertMedia(image, video) {
423
- const normalizedImage = normalizeImage(image);
424
- const images = normalizedImage ? Array.isArray(normalizedImage) ? normalizedImage : [normalizedImage] : [];
425
- const videos = normalizeMediaList(video);
426
- const media = {};
427
- if (images.length) media.images = images;
428
- if (videos.length) media.videos = videos;
429
- return Object.keys(media).length ? media : void 0;
430
- }
431
- function convertTimes(time) {
432
- if (!time) return void 0;
433
- const times = {};
434
- if (typeof time.prep === "number") times.prepMinutes = time.prep;
435
- if (typeof time.active === "number") times.cookMinutes = time.active;
436
- if (typeof time.total === "number") times.totalMinutes = time.total;
437
- return Object.keys(times).length ? times : void 0;
550
+ function toArray(value) {
551
+ if (!value) return [];
552
+ return Array.isArray(value) ? value : [value];
438
553
  }
439
554
  function convertNutrition(nutrition) {
440
555
  if (!nutrition || typeof nutrition !== "object") {
@@ -535,13 +650,16 @@ async function fetchPage(url, options = {}) {
535
650
  const response = await resolvedFetch(url, requestInit);
536
651
  clearTimeout(timeoutId);
537
652
  if (response && typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
538
- try {
539
- const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
540
- if (globalFetch) {
541
- globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/fetch.ts:63", message: "fetch response", data: { url, status: response.status, statusText: response.statusText, ok: response.ok, isNYTimes: url.includes("nytimes.com") }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B" }) }).catch(() => {
542
- });
653
+ const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
654
+ if (ingestUrl) {
655
+ try {
656
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
657
+ if (globalFetch) {
658
+ globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/fetch.ts:63", message: "fetch response", data: { url, status: response.status, statusText: response.statusText, ok: response.ok, isNYTimes: url.includes("nytimes.com") }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B" }) }).catch(() => {
659
+ });
660
+ }
661
+ } catch {
543
662
  }
544
- } catch {
545
663
  }
546
664
  }
547
665
  if (!response.ok) {
@@ -553,13 +671,16 @@ async function fetchPage(url, options = {}) {
553
671
  }
554
672
  const html = await response.text();
555
673
  if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
556
- try {
557
- const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
558
- if (globalFetch) {
559
- globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/fetch.ts:75", message: "HTML received", data: { htmlLength: html.length, hasLoginPage: html.toLowerCase().includes("login") || html.toLowerCase().includes("sign in"), hasRecipeData: html.includes("application/ld+json") || html.includes("schema.org/Recipe") }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B,D" }) }).catch(() => {
560
- });
674
+ const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
675
+ if (ingestUrl) {
676
+ try {
677
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
678
+ if (globalFetch) {
679
+ globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/fetch.ts:75", message: "HTML received", data: { htmlLength: html.length, hasLoginPage: html.toLowerCase().includes("login") || html.toLowerCase().includes("sign in"), hasRecipeData: html.includes("application/ld+json") || html.includes("schema.org/Recipe") }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B,D" }) }).catch(() => {
680
+ });
681
+ }
682
+ } catch {
561
683
  }
562
- } catch {
563
684
  }
564
685
  }
565
686
  return html;
@@ -589,8 +710,6 @@ function isRecipeNode(value) {
589
710
  return false;
590
711
  }
591
712
  const type = value["@type"];
592
- fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/utils.ts:14", message: "isRecipeNode check", data: { type, typeLower: typeof type === "string" ? type.toLowerCase() : Array.isArray(type) ? type.map((t) => typeof t === "string" ? t.toLowerCase() : t) : void 0, isMatch: typeof type === "string" ? RECIPE_TYPES.has(type.toLowerCase()) : Array.isArray(type) ? type.some((e) => typeof e === "string" && RECIPE_TYPES.has(e.toLowerCase())) : false }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A" }) }).catch(() => {
593
- });
594
713
  if (typeof type === "string") {
595
714
  return RECIPE_TYPES.has(type.toLowerCase());
596
715
  }
@@ -618,20 +737,14 @@ function normalizeText(value) {
618
737
  function extractJsonLd(html) {
619
738
  const $ = load(html);
620
739
  const scripts = $('script[type="application/ld+json"]');
621
- fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/jsonld.ts:8", message: "JSON-LD scripts found", data: { scriptCount: scripts.length }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "C,D" }) }).catch(() => {
622
- });
623
740
  const candidates = [];
624
741
  scripts.each((_, element) => {
625
742
  const content = $(element).html();
626
743
  if (!content) return;
627
744
  const parsed = safeJsonParse(content);
628
745
  if (!parsed) return;
629
- fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/jsonld.ts:18", message: "JSON-LD parsed", data: { hasGraph: !!(parsed && typeof parsed === "object" && "@graph" in parsed), type: parsed && typeof parsed === "object" && "@type" in parsed ? parsed["@type"] : void 0 }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C" }) }).catch(() => {
630
- });
631
746
  collectCandidates(parsed, candidates);
632
747
  });
633
- fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/jsonld.ts:22", message: "JSON-LD candidates", data: { candidateCount: candidates.length, candidateTypes: candidates.map((c) => c["@type"]) }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C" }) }).catch(() => {
634
- });
635
748
  return candidates[0] ?? null;
636
749
  }
637
750
  function collectCandidates(payload, bucket) {
@@ -813,13 +926,16 @@ function extractRecipe(html) {
813
926
  }
814
927
  const jsonLdRecipe = extractJsonLd(html);
815
928
  if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
816
- try {
817
- const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
818
- if (globalFetch) {
819
- globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:6", message: "JSON-LD extraction result", data: { hasJsonLd: !!jsonLdRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "C,D" }) }).catch(() => {
820
- });
929
+ const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
930
+ if (ingestUrl) {
931
+ try {
932
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
933
+ if (globalFetch) {
934
+ globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:6", message: "JSON-LD extraction result", data: { hasJsonLd: !!jsonLdRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "C,D" }) }).catch(() => {
935
+ });
936
+ }
937
+ } catch {
821
938
  }
822
- } catch {
823
939
  }
824
940
  }
825
941
  if (jsonLdRecipe) {
@@ -827,13 +943,16 @@ function extractRecipe(html) {
827
943
  }
828
944
  const microdataRecipe = extractMicrodata(html);
829
945
  if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
830
- try {
831
- const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
832
- if (globalFetch) {
833
- globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:12", message: "Microdata extraction result", data: { hasMicrodata: !!microdataRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "D" }) }).catch(() => {
834
- });
946
+ const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
947
+ if (ingestUrl) {
948
+ try {
949
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
950
+ if (globalFetch) {
951
+ globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:12", message: "Microdata extraction result", data: { hasMicrodata: !!microdataRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "D" }) }).catch(() => {
952
+ });
953
+ }
954
+ } catch {
835
955
  }
836
- } catch {
837
956
  }
838
957
  }
839
958
  if (microdataRecipe) {
@@ -845,35 +964,44 @@ function extractRecipe(html) {
845
964
  // src/scraper/index.ts
846
965
  async function scrapeRecipe(url, options = {}) {
847
966
  if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
848
- try {
849
- const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
850
- if (globalFetch) {
851
- globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:7", message: "scrapeRecipe entry", data: { url, hasOptions: !!options }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,B,C,D,E" }) }).catch(() => {
852
- });
967
+ const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
968
+ if (ingestUrl) {
969
+ try {
970
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
971
+ if (globalFetch) {
972
+ globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:7", message: "scrapeRecipe entry", data: { url, hasOptions: !!options }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,B,C,D,E" }) }).catch(() => {
973
+ });
974
+ }
975
+ } catch {
853
976
  }
854
- } catch {
855
977
  }
856
978
  }
857
979
  const html = await fetchPage(url, options);
858
980
  if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
859
- try {
860
- const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
861
- if (globalFetch) {
862
- globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:9", message: "HTML fetched", data: { htmlLength: html?.length, htmlPreview: html?.substring(0, 200) }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B" }) }).catch(() => {
863
- });
981
+ const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
982
+ if (ingestUrl) {
983
+ try {
984
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
985
+ if (globalFetch) {
986
+ globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:9", message: "HTML fetched", data: { htmlLength: html?.length, htmlPreview: html?.substring(0, 200) }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B" }) }).catch(() => {
987
+ });
988
+ }
989
+ } catch {
864
990
  }
865
- } catch {
866
991
  }
867
992
  }
868
993
  const { recipe } = extractRecipe(html);
869
994
  if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
870
- try {
871
- const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
872
- if (globalFetch) {
873
- globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:11", message: "extractRecipe result", data: { hasRecipe: !!recipe, recipeType: recipe?.["@type"], recipeName: recipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C,D" }) }).catch(() => {
874
- });
995
+ const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
996
+ if (ingestUrl) {
997
+ try {
998
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
999
+ if (globalFetch) {
1000
+ globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:11", message: "extractRecipe result", data: { hasRecipe: !!recipe, recipeType: recipe?.["@type"], recipeName: recipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C,D" }) }).catch(() => {
1001
+ });
1002
+ }
1003
+ } catch {
875
1004
  }
876
- } catch {
877
1005
  }
878
1006
  }
879
1007
  if (!recipe) {
@@ -881,13 +1009,16 @@ async function scrapeRecipe(url, options = {}) {
881
1009
  }
882
1010
  const soustackRecipe = fromSchemaOrg(recipe);
883
1011
  if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
884
- try {
885
- const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
886
- if (globalFetch) {
887
- globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:17", message: "fromSchemaOrg result", data: { hasSoustackRecipe: !!soustackRecipe, soustackRecipeName: soustackRecipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A" }) }).catch(() => {
888
- });
1012
+ const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
1013
+ if (ingestUrl) {
1014
+ try {
1015
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
1016
+ if (globalFetch) {
1017
+ globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:17", message: "fromSchemaOrg result", data: { hasSoustackRecipe: !!soustackRecipe, soustackRecipeName: soustackRecipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A" }) }).catch(() => {
1018
+ });
1019
+ }
1020
+ } catch {
889
1021
  }
890
- } catch {
891
1022
  }
892
1023
  }
893
1024
  if (!soustackRecipe) {
@@ -912,5 +1043,5 @@ function extractSchemaOrgRecipeFromHTML(html) {
912
1043
  }
913
1044
 
914
1045
  export { extractRecipeFromHTML, extractSchemaOrgRecipeFromHTML, fetchPage, scrapeRecipe };
915
- //# sourceMappingURL=scrape.mjs.map
916
- //# sourceMappingURL=scrape.mjs.map
1046
+ //# sourceMappingURL=index.mjs.map
1047
+ //# sourceMappingURL=index.mjs.map