soustack 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. package/README.md +44 -27
  2. package/dist/cli/index.js +5225 -992
  3. package/dist/cli/index.js.map +1 -1
  4. package/dist/index.d.mts +163 -91
  5. package/dist/index.d.ts +163 -91
  6. package/dist/index.js +5077 -1007
  7. package/dist/index.js.map +1 -1
  8. package/dist/index.mjs +5076 -1007
  9. package/dist/index.mjs.map +1 -1
  10. package/dist/{scrape.d.mts → scrape/index.d.mts} +88 -74
  11. package/dist/{scrape.d.ts → scrape/index.d.ts} +88 -74
  12. package/dist/{scrape.js → scrape/index.js} +255 -124
  13. package/dist/scrape/index.js.map +1 -0
  14. package/dist/{scrape.mjs → scrape/index.mjs} +255 -124
  15. package/dist/scrape/index.mjs.map +1 -0
  16. package/package.json +21 -9
  17. package/spec/.sync-meta.json +149 -0
  18. package/spec/SOUSTACK_SPEC_VERSION +1 -0
  19. package/spec/defs/common.schema.json +46 -0
  20. package/spec/defs/duration.schema.json +33 -0
  21. package/spec/defs/entities.schema.json +111 -0
  22. package/spec/defs/ingredientQuantified.schema.json +9 -0
  23. package/spec/defs/quantity.schema.json +16 -0
  24. package/spec/defs/scalingRule.schema.json +127 -0
  25. package/spec/defs/temperature.schema.json +63 -0
  26. package/spec/fixtures/content/illustrated-step.valid.json +24 -0
  27. package/spec/fixtures/invalid/equipment-unknown-reference.invalid.json +38 -0
  28. package/spec/fixtures/invalid/mise-en-place-unknown-equipment.invalid.json +37 -0
  29. package/spec/fixtures/invalid/mise-en-place-unknown-input.invalid.json +41 -0
  30. package/spec/fixtures/invalid/storage-leftovers-missing-method.invalid.json +31 -0
  31. package/spec/fixtures/invalid/storage-leftovers-wrong-type.invalid.json +23 -0
  32. package/spec/fixtures/level/base-full.valid.json +162 -0
  33. package/spec/fixtures/level/base-missing-yield.invalid.json +12 -0
  34. package/spec/fixtures/level/lite-min.valid.json +14 -0
  35. package/spec/fixtures/profile/profile-base.valid.json +20 -0
  36. package/spec/fixtures/profile/profile-equipped.valid.json +28 -0
  37. package/spec/fixtures/profile/profile-illustrated.valid.json +28 -0
  38. package/spec/fixtures/profile/profile-lite.valid.json +13 -0
  39. package/spec/fixtures/profile/profile-prepped.valid.json +31 -0
  40. package/spec/fixtures/profile/profile-scalable-missing-scaling.invalid.json +29 -0
  41. package/spec/fixtures/profile/profile-scalable.valid.json +49 -0
  42. package/spec/fixtures/profile/profile-timed-missing-structured.invalid.json +30 -0
  43. package/spec/fixtures/scaling/bakers-percent-missing-ref.invalid.json +41 -0
  44. package/spec/fixtures/scaling/bakers-percent.valid.json +51 -0
  45. package/spec/fixtures/scaling/discrete-range.invalid.json +36 -0
  46. package/spec/fixtures/scaling/missing-quantified.invalid.json +40 -0
  47. package/spec/fixtures/scaling/reject-bakersPercentage.invalid.json +50 -0
  48. package/spec/fixtures/stacks/compute-missing-timed.invalid.json +32 -0
  49. package/spec/fixtures/stacks/dietary-no-signal.invalid.json +16 -0
  50. package/spec/fixtures/stacks/illustrated-empty.invalid.json +13 -0
  51. package/spec/fixtures/stacks/quantified-string.invalid.json +22 -0
  52. package/spec/fixtures/stacks/referenced-missing-input.invalid.json +32 -0
  53. package/spec/fixtures/stacks/storage-min.valid.json +20 -0
  54. package/spec/fixtures/stacks/storage-no-duration.invalid.json +16 -0
  55. package/spec/fixtures/stacks/timed-implies-structured.valid.json +50 -0
  56. package/spec/fixtures/stacks/timed-range.invalid.json +33 -0
  57. package/spec/fixtures/valid/equipment-scaling-rules.valid.json +76 -0
  58. package/spec/fixtures/valid/equipment-strings.valid.json +31 -0
  59. package/spec/fixtures/valid/equipment-structured-uses.valid.json +47 -0
  60. package/spec/fixtures/valid/mise-en-place-basic.valid.json +31 -0
  61. package/spec/fixtures/valid/mise-en-place-referenced-equipment.valid.json +51 -0
  62. package/spec/fixtures/valid/prep-ingredient-strings.valid.json +48 -0
  63. package/spec/fixtures/valid/prep-ingredient-structured.valid.json +45 -0
  64. package/spec/fixtures/valid/profile-equipped.valid.json +29 -0
  65. package/spec/fixtures/valid/profile-prepped.valid.json +32 -0
  66. package/spec/fixtures/valid/quantified-nested-ingredient-sections.valid.json +61 -0
  67. package/spec/fixtures/valid/referenced-scaling.valid.json +67 -0
  68. package/spec/fixtures/valid/storage-leftovers-simple.valid.json +27 -0
  69. package/spec/fixtures/valid/storage-leftovers-structured.valid.json +43 -0
  70. package/spec/fixtures/valid/structured-nested-step-sections.valid.json +84 -0
  71. package/spec/schemas/stacks-registry.schema.json +108 -0
  72. package/spec/soustack.schema.json +2379 -0
  73. package/spec/stacks/compute.schema.json +7 -0
  74. package/spec/stacks/compute@1.md +22 -0
  75. package/spec/stacks/dietary.schema.json +45 -0
  76. package/spec/stacks/dietary@1.md +24 -0
  77. package/spec/stacks/equipment.schema.json +98 -0
  78. package/spec/stacks/equipment@1.md +244 -0
  79. package/spec/stacks/illustrated.schema.json +54 -0
  80. package/spec/stacks/illustrated@1.md +24 -0
  81. package/spec/stacks/prep.schema.json +76 -0
  82. package/spec/stacks/prep@1.md +276 -0
  83. package/spec/stacks/quantified.schema.json +74 -0
  84. package/spec/stacks/quantified@1.md +24 -0
  85. package/spec/stacks/referenced.schema.json +96 -0
  86. package/spec/stacks/referenced@1.md +23 -0
  87. package/spec/stacks/registry.json +112 -0
  88. package/spec/stacks/scaling.schema.json +99 -0
  89. package/spec/stacks/scaling@1.md +238 -0
  90. package/spec/stacks/storage.schema.json +132 -0
  91. package/spec/stacks/storage@1.md +256 -0
  92. package/spec/stacks/structured.schema.json +48 -0
  93. package/spec/stacks/structured@1.md +24 -0
  94. package/spec/stacks/substitutions.schema.json +43 -0
  95. package/spec/stacks/substitutions@1.md +24 -0
  96. package/spec/stacks/techniques.schema.json +28 -0
  97. package/spec/stacks/techniques@1.md +23 -0
  98. package/spec/stacks/timed.schema.json +60 -0
  99. package/spec/stacks/timed@1.md +23 -0
  100. package/src/defs/common.schema.json +46 -0
  101. package/src/defs/duration.schema.json +33 -0
  102. package/src/defs/entities.schema.json +111 -0
  103. package/src/defs/ingredientQuantified.schema.json +9 -0
  104. package/src/defs/quantity.schema.json +16 -0
  105. package/src/defs/scalingRule.schema.json +127 -0
  106. package/src/defs/temperature.schema.json +63 -0
  107. package/src/profiles/base.schema.json +2 -2
  108. package/src/profiles/equipped.schema.json +10 -0
  109. package/src/profiles/illustrated.schema.json +4 -4
  110. package/src/profiles/lite.schema.json +10 -0
  111. package/src/profiles/prepped.schema.json +10 -0
  112. package/src/profiles/scalable.schema.json +6 -6
  113. package/src/profiles/timed.schema.json +10 -0
  114. package/src/schema.json +2271 -248
  115. package/src/schemas/stacks-registry.schema.json +108 -0
  116. package/src/soustack.schema.json +2271 -248
  117. package/src/stacks/compute.schema.json +7 -0
  118. package/src/stacks/compute@1.md +22 -0
  119. package/src/stacks/dietary.schema.json +45 -0
  120. package/src/stacks/dietary@1.md +24 -0
  121. package/src/stacks/equipment.schema.json +98 -0
  122. package/src/stacks/equipment@1.md +244 -0
  123. package/src/stacks/illustrated.schema.json +54 -0
  124. package/src/stacks/illustrated@1.md +24 -0
  125. package/src/stacks/prep.schema.json +76 -0
  126. package/src/stacks/prep@1.md +276 -0
  127. package/src/stacks/quantified.schema.json +74 -0
  128. package/src/stacks/quantified@1.md +24 -0
  129. package/src/stacks/referenced.schema.json +96 -0
  130. package/src/stacks/referenced@1.md +23 -0
  131. package/src/stacks/registry.json +112 -0
  132. package/src/stacks/scaling.schema.json +99 -0
  133. package/src/stacks/scaling@1.md +238 -0
  134. package/src/stacks/storage.schema.json +132 -0
  135. package/src/stacks/storage@1.md +256 -0
  136. package/src/stacks/structured.schema.json +48 -0
  137. package/src/stacks/structured@1.md +24 -0
  138. package/src/stacks/substitutions.schema.json +43 -0
  139. package/src/stacks/substitutions@1.md +24 -0
  140. package/src/stacks/techniques.schema.json +28 -0
  141. package/src/stacks/techniques@1.md +23 -0
  142. package/src/stacks/timed.schema.json +60 -0
  143. package/src/stacks/timed@1.md +23 -0
  144. package/dist/scrape.js.map +0 -1
  145. package/dist/scrape.mjs.map +0 -1
  146. package/src/profiles/cookable.schema.json +0 -18
  147. package/src/profiles/quantified.schema.json +0 -43
  148. package/src/profiles/schedulable.schema.json +0 -43
@@ -130,6 +130,138 @@ function extractUrl(value) {
130
130
  return trimmed || void 0;
131
131
  }
132
132
 
133
+ // src/normalize.ts
134
+ function normalizeRecipe(input) {
135
+ if (!input || typeof input !== "object") {
136
+ throw new Error("Recipe input must be an object");
137
+ }
138
+ const recipe = JSON.parse(JSON.stringify(input));
139
+ const warnings = [];
140
+ const legacyField = ["mod", "ules"].join("");
141
+ if (legacyField in recipe) {
142
+ throw new Error("The legacy field is no longer supported. Use `stacks` instead.");
143
+ }
144
+ normalizeStacks(recipe, warnings);
145
+ if (!recipe.stacks) {
146
+ recipe.stacks = {};
147
+ }
148
+ if (recipe && typeof recipe === "object" && "version" in recipe && !recipe.recipeVersion && typeof recipe.version === "string") {
149
+ recipe.recipeVersion = recipe.version;
150
+ delete recipe.version;
151
+ warnings.push("'version' is deprecated; mapped to 'recipeVersion'.");
152
+ }
153
+ normalizeTime(recipe);
154
+ return {
155
+ recipe,
156
+ warnings
157
+ };
158
+ }
159
+ function normalizeStacks(recipe, warnings) {
160
+ let stacks = {};
161
+ if (recipe.stacks && typeof recipe.stacks === "object" && !Array.isArray(recipe.stacks)) {
162
+ for (const [key, value] of Object.entries(recipe.stacks)) {
163
+ if (typeof value === "number" && Number.isInteger(value) && value >= 1) {
164
+ stacks[key] = value;
165
+ } else {
166
+ warnings.push(`Invalid stack version for '${key}': expected positive integer, got ${value}`);
167
+ }
168
+ }
169
+ }
170
+ if (Array.isArray(recipe.stacks)) {
171
+ const stackIdentifiers = recipe.stacks.filter((s) => typeof s === "string");
172
+ for (const identifier of stackIdentifiers) {
173
+ const parsed = parseStackIdentifier(identifier);
174
+ if (parsed) {
175
+ const { name, version } = parsed;
176
+ if (!stacks[name] || stacks[name] < version) {
177
+ stacks[name] = version;
178
+ }
179
+ } else {
180
+ warnings.push(`Invalid stack identifier '${identifier}': expected format 'name@version' (e.g., 'scaling@1')`);
181
+ }
182
+ }
183
+ }
184
+ recipe.stacks = stacks;
185
+ }
186
+ function parseStackIdentifier(identifier) {
187
+ if (typeof identifier !== "string" || !identifier.trim()) {
188
+ return null;
189
+ }
190
+ const match = identifier.trim().match(/^([a-z0-9_-]+)@(\d+)$/i);
191
+ if (!match) {
192
+ return null;
193
+ }
194
+ const [, name, versionStr] = match;
195
+ const version = parseInt(versionStr, 10);
196
+ if (isNaN(version) || version < 1) {
197
+ return null;
198
+ }
199
+ return { name, version };
200
+ }
201
+ function normalizeTime(recipe) {
202
+ const time = recipe?.time;
203
+ if (!time || typeof time !== "object" || Array.isArray(time)) return;
204
+ const structuredKeys = [
205
+ "prep",
206
+ "active",
207
+ "passive",
208
+ "total"
209
+ ];
210
+ structuredKeys.forEach((key) => {
211
+ const value = time[key];
212
+ if (typeof value === "number") return;
213
+ const parsed = parseDuration(value);
214
+ if (parsed !== null) {
215
+ time[key] = parsed;
216
+ }
217
+ });
218
+ }
219
+
220
+ // src/specVersion.ts
221
+ var SOUSTACK_SPEC_VERSION = "0.0.2";
222
+
223
+ // src/schemaMetadata.ts
224
+ var CANONICAL_SCHEMA_ID = "https://soustack.spec/soustack.schema.json";
225
+ var LEGACY_SCHEMA_ID = `http://soustack.org/schema/v${SOUSTACK_SPEC_VERSION}`;
226
+ var RAW_SPEC_BASE = "https://raw.githubusercontent.com/soustack/soustack-spec";
227
+ var RAW_SPEC_FORK_BASE = "https://raw.githubusercontent.com/RichardHerold/soustack-spec";
228
+ var SCHEMA_ALIAS_MAP = /* @__PURE__ */ new Map([
229
+ [CANONICAL_SCHEMA_ID, CANONICAL_SCHEMA_ID],
230
+ [LEGACY_SCHEMA_ID, CANONICAL_SCHEMA_ID],
231
+ [`${LEGACY_SCHEMA_ID}/`, CANONICAL_SCHEMA_ID],
232
+ ["https://soustack.org/schema/v0.0.2", CANONICAL_SCHEMA_ID],
233
+ ["https://soustack.org/schema/v0.0.2/", CANONICAL_SCHEMA_ID],
234
+ [`${RAW_SPEC_BASE}/main/soustack.schema.json`, CANONICAL_SCHEMA_ID],
235
+ [`${RAW_SPEC_BASE}/v${SOUSTACK_SPEC_VERSION}/soustack.schema.json`, CANONICAL_SCHEMA_ID],
236
+ [`${RAW_SPEC_FORK_BASE}/main/soustack.schema.json`, CANONICAL_SCHEMA_ID],
237
+ [`${RAW_SPEC_FORK_BASE}/v${SOUSTACK_SPEC_VERSION}/soustack.schema.json`, CANONICAL_SCHEMA_ID]
238
+ ]);
239
+ function resolveSchemaHint(value) {
240
+ if (typeof value !== "string" || !value) {
241
+ return { canonicalId: void 0, isSoustackSchema: false, wasAlias: false };
242
+ }
243
+ const trimmed = value.replace(/#$/, "");
244
+ const mapped = SCHEMA_ALIAS_MAP.get(trimmed) ?? trimmed;
245
+ const isSoustackSchema = SCHEMA_ALIAS_MAP.has(trimmed) || mapped.startsWith("http://soustack.org/schema") || mapped.startsWith("https://soustack.org/schema") || mapped.startsWith("https://soustack.spec/") || mapped.startsWith("https://soustack.org/schemas/");
246
+ return {
247
+ canonicalId: mapped,
248
+ isSoustackSchema,
249
+ wasAlias: mapped !== trimmed || SCHEMA_ALIAS_MAP.has(trimmed)
250
+ };
251
+ }
252
+ function withCanonicalSchema(value) {
253
+ if (!value || typeof value !== "object" || Array.isArray(value)) {
254
+ return value;
255
+ }
256
+ const existing = typeof value.$schema === "string" ? value.$schema : void 0;
257
+ const resolved = resolveSchemaHint(existing);
258
+ const schemaId = resolved.isSoustackSchema ? resolved.canonicalId : CANONICAL_SCHEMA_ID;
259
+ return {
260
+ ...value,
261
+ $schema: schemaId ?? CANONICAL_SCHEMA_ID
262
+ };
263
+ }
264
+
133
265
  // src/fromSchemaOrg.ts
134
266
  function fromSchemaOrg(input) {
135
267
  const recipeNode = extractRecipeNode(input);
@@ -145,23 +277,18 @@ function fromSchemaOrg(input) {
145
277
  const source = convertSource(recipeNode);
146
278
  const dateModified = recipeNode.dateModified || void 0;
147
279
  const nutrition = convertNutrition(recipeNode.nutrition);
148
- const attribution = convertAttribution(recipeNode);
149
- const taxonomy = convertTaxonomy(tags, category, extractFirst(recipeNode.recipeCuisine));
150
- const media = convertMedia(recipeNode.image, recipeNode.video);
151
- const times = convertTimes(time);
152
- const modules = [];
153
- if (attribution) modules.push("attribution@1");
154
- if (taxonomy) modules.push("taxonomy@1");
155
- if (media) modules.push("media@1");
156
- if (nutrition) modules.push("nutrition@1");
157
- if (times) modules.push("times@1");
158
- return {
280
+ const images = toArray(normalizeImage(recipeNode.image));
281
+ const videos = normalizeMediaList(recipeNode.video);
282
+ const profile = recipeYield && time ? "base" : "lite";
283
+ const stacks = {};
284
+ const rawRecipe = {
159
285
  "@type": "Recipe",
160
- profile: "minimal",
161
- modules: modules.sort(),
286
+ profile,
287
+ stacks,
162
288
  name: recipeNode.name.trim(),
163
289
  description: recipeNode.description?.trim() || void 0,
164
- image: normalizeImage(recipeNode.image),
290
+ images: images.length ? images : void 0,
291
+ videos: videos.length ? videos : void 0,
165
292
  category,
166
293
  tags: tags.length ? tags : void 0,
167
294
  source,
@@ -171,12 +298,10 @@ function fromSchemaOrg(input) {
171
298
  ingredients,
172
299
  instructions,
173
300
  ...dateModified ? { dateModified } : {},
174
- ...nutrition ? { nutrition } : {},
175
- ...attribution ? { attribution } : {},
176
- ...taxonomy ? { taxonomy } : {},
177
- ...media ? { media } : {},
178
- ...times ? { times } : {}
301
+ ...nutrition ? { nutrition } : {}
179
302
  };
303
+ const { recipe } = normalizeRecipe(rawRecipe);
304
+ return withCanonicalSchema(recipe);
180
305
  }
181
306
  function extractRecipeNode(input) {
182
307
  if (!input) return null;
@@ -220,7 +345,10 @@ function isValidName(name) {
220
345
  function convertIngredients(value) {
221
346
  if (!value) return [];
222
347
  const normalized = Array.isArray(value) ? value : [value];
223
- return normalized.map((item) => typeof item === "string" ? item.trim() : "").filter(Boolean);
348
+ return normalized.map((item) => typeof item === "string" ? item.trim() : "").filter(Boolean).map((name) => ({
349
+ name,
350
+ scaling: { mode: "linear" }
351
+ }));
224
352
  }
225
353
  function convertInstructions(value) {
226
354
  if (!value) return [];
@@ -239,8 +367,8 @@ function convertInstructions(value) {
239
367
  const subsectionItems = extractSectionItems(entry.itemListElement);
240
368
  if (subsectionItems.length) {
241
369
  result.push({
242
- subsection: entry.name?.trim() || "Section",
243
- items: subsectionItems
370
+ section: entry.name?.trim() || "Section",
371
+ steps: subsectionItems
244
372
  });
245
373
  }
246
374
  continue;
@@ -296,7 +424,7 @@ function convertHowToStep(step) {
296
424
  }
297
425
  const instruction = { text };
298
426
  if (id) instruction.id = id;
299
- if (image) instruction.image = image;
427
+ if (image) instruction.images = Array.isArray(image) ? image : [image];
300
428
  if (timing) instruction.timing = timing;
301
429
  return instruction;
302
430
  }
@@ -306,7 +434,13 @@ function extractInstructionTiming(step) {
306
434
  return void 0;
307
435
  }
308
436
  const parsed = smartParseDuration(duration);
309
- return { duration: parsed ?? duration, type: "active" };
437
+ if (parsed === null || parsed === void 0) {
438
+ return void 0;
439
+ }
440
+ return {
441
+ activity: "active",
442
+ duration: { minutes: parsed }
443
+ };
310
444
  }
311
445
  function extractInstructionId(step) {
312
446
  const raw = step["@id"] || step.id || step.url;
@@ -323,14 +457,22 @@ function isHowToSection(value) {
323
457
  return Boolean(value) && typeof value === "object" && value["@type"] === "HowToSection" && Array.isArray(value.itemListElement);
324
458
  }
325
459
  function convertTime(recipe) {
460
+ const total = smartParseDuration(recipe.totalTime ?? "");
326
461
  const prep = smartParseDuration(recipe.prepTime ?? "");
327
462
  const cook = smartParseDuration(recipe.cookTime ?? "");
328
- const total = smartParseDuration(recipe.totalTime ?? "");
329
- const structured = {};
330
- if (prep !== null && prep !== void 0) structured.prep = prep;
331
- if (cook !== null && cook !== void 0) structured.active = cook;
332
- if (total !== null && total !== void 0) structured.total = total;
333
- return Object.keys(structured).length ? structured : void 0;
463
+ const minutes = isPositiveDuration(total) ? total : [prep, cook].filter(isPositiveDuration).reduce((sum, value) => {
464
+ if (sum === null) return value;
465
+ return sum + value;
466
+ }, null);
467
+ if (!isPositiveDuration(minutes)) {
468
+ return void 0;
469
+ }
470
+ return {
471
+ total: { minutes }
472
+ };
473
+ }
474
+ function isPositiveDuration(value) {
475
+ return typeof value === "number" && Number.isFinite(value) && value > 0;
334
476
  }
335
477
  function collectTags(cuisine, keywords) {
336
478
  const tags = /* @__PURE__ */ new Set();
@@ -388,23 +530,6 @@ function extractEntityName(value) {
388
530
  }
389
531
  return void 0;
390
532
  }
391
- function convertAttribution(recipe) {
392
- const attribution = {};
393
- const url = (recipe.url || recipe.mainEntityOfPage)?.trim();
394
- const author = extractEntityName(recipe.author);
395
- const datePublished = recipe.datePublished?.trim();
396
- if (url) attribution.url = url;
397
- if (author) attribution.author = author;
398
- if (datePublished) attribution.datePublished = datePublished;
399
- return Object.keys(attribution).length ? attribution : void 0;
400
- }
401
- function convertTaxonomy(keywords, category, cuisine) {
402
- const taxonomy = {};
403
- if (keywords.length) taxonomy.keywords = keywords;
404
- if (category) taxonomy.category = category;
405
- if (cuisine) taxonomy.cuisine = cuisine;
406
- return Object.keys(taxonomy).length ? taxonomy : void 0;
407
- }
408
533
  function normalizeMediaList(value) {
409
534
  if (!value) return [];
410
535
  if (typeof value === "string") return [value.trim()].filter(Boolean);
@@ -415,28 +540,18 @@ function normalizeMediaList(value) {
415
540
  return url ? [url] : [];
416
541
  }
417
542
  function extractMediaUrl(value) {
418
- if (value && typeof value === "object" && "url" in value && typeof value.url === "string") {
419
- const trimmed = value.url.trim();
420
- return trimmed || void 0;
543
+ if (value && typeof value === "object") {
544
+ const urlValue = typeof value.url === "string" ? value.url : typeof value.contentUrl === "string" ? value.contentUrl : void 0;
545
+ if (typeof urlValue === "string") {
546
+ const trimmed = urlValue.trim();
547
+ return trimmed || void 0;
548
+ }
421
549
  }
422
550
  return void 0;
423
551
  }
424
- function convertMedia(image, video) {
425
- const normalizedImage = normalizeImage(image);
426
- const images = normalizedImage ? Array.isArray(normalizedImage) ? normalizedImage : [normalizedImage] : [];
427
- const videos = normalizeMediaList(video);
428
- const media = {};
429
- if (images.length) media.images = images;
430
- if (videos.length) media.videos = videos;
431
- return Object.keys(media).length ? media : void 0;
432
- }
433
- function convertTimes(time) {
434
- if (!time) return void 0;
435
- const times = {};
436
- if (typeof time.prep === "number") times.prepMinutes = time.prep;
437
- if (typeof time.active === "number") times.cookMinutes = time.active;
438
- if (typeof time.total === "number") times.totalMinutes = time.total;
439
- return Object.keys(times).length ? times : void 0;
552
+ function toArray(value) {
553
+ if (!value) return [];
554
+ return Array.isArray(value) ? value : [value];
440
555
  }
441
556
  function convertNutrition(nutrition) {
442
557
  if (!nutrition || typeof nutrition !== "object") {
@@ -537,13 +652,16 @@ async function fetchPage(url, options = {}) {
537
652
  const response = await resolvedFetch(url, requestInit);
538
653
  clearTimeout(timeoutId);
539
654
  if (response && typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
540
- try {
541
- const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
542
- if (globalFetch) {
543
- globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/fetch.ts:63", message: "fetch response", data: { url, status: response.status, statusText: response.statusText, ok: response.ok, isNYTimes: url.includes("nytimes.com") }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B" }) }).catch(() => {
544
- });
655
+ const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
656
+ if (ingestUrl) {
657
+ try {
658
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
659
+ if (globalFetch) {
660
+ globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/fetch.ts:63", message: "fetch response", data: { url, status: response.status, statusText: response.statusText, ok: response.ok, isNYTimes: url.includes("nytimes.com") }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B" }) }).catch(() => {
661
+ });
662
+ }
663
+ } catch {
545
664
  }
546
- } catch {
547
665
  }
548
666
  }
549
667
  if (!response.ok) {
@@ -555,13 +673,16 @@ async function fetchPage(url, options = {}) {
555
673
  }
556
674
  const html = await response.text();
557
675
  if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
558
- try {
559
- const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
560
- if (globalFetch) {
561
- globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/fetch.ts:75", message: "HTML received", data: { htmlLength: html.length, hasLoginPage: html.toLowerCase().includes("login") || html.toLowerCase().includes("sign in"), hasRecipeData: html.includes("application/ld+json") || html.includes("schema.org/Recipe") }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B,D" }) }).catch(() => {
562
- });
676
+ const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
677
+ if (ingestUrl) {
678
+ try {
679
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
680
+ if (globalFetch) {
681
+ globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/fetch.ts:75", message: "HTML received", data: { htmlLength: html.length, hasLoginPage: html.toLowerCase().includes("login") || html.toLowerCase().includes("sign in"), hasRecipeData: html.includes("application/ld+json") || html.includes("schema.org/Recipe") }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B,D" }) }).catch(() => {
682
+ });
683
+ }
684
+ } catch {
563
685
  }
564
- } catch {
565
686
  }
566
687
  }
567
688
  return html;
@@ -591,8 +712,6 @@ function isRecipeNode(value) {
591
712
  return false;
592
713
  }
593
714
  const type = value["@type"];
594
- fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/utils.ts:14", message: "isRecipeNode check", data: { type, typeLower: typeof type === "string" ? type.toLowerCase() : Array.isArray(type) ? type.map((t) => typeof t === "string" ? t.toLowerCase() : t) : void 0, isMatch: typeof type === "string" ? RECIPE_TYPES.has(type.toLowerCase()) : Array.isArray(type) ? type.some((e) => typeof e === "string" && RECIPE_TYPES.has(e.toLowerCase())) : false }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A" }) }).catch(() => {
595
- });
596
715
  if (typeof type === "string") {
597
716
  return RECIPE_TYPES.has(type.toLowerCase());
598
717
  }
@@ -620,20 +739,14 @@ function normalizeText(value) {
620
739
  function extractJsonLd(html) {
621
740
  const $ = cheerio.load(html);
622
741
  const scripts = $('script[type="application/ld+json"]');
623
- fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/jsonld.ts:8", message: "JSON-LD scripts found", data: { scriptCount: scripts.length }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "C,D" }) }).catch(() => {
624
- });
625
742
  const candidates = [];
626
743
  scripts.each((_, element) => {
627
744
  const content = $(element).html();
628
745
  if (!content) return;
629
746
  const parsed = safeJsonParse(content);
630
747
  if (!parsed) return;
631
- fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/jsonld.ts:18", message: "JSON-LD parsed", data: { hasGraph: !!(parsed && typeof parsed === "object" && "@graph" in parsed), type: parsed && typeof parsed === "object" && "@type" in parsed ? parsed["@type"] : void 0 }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C" }) }).catch(() => {
632
- });
633
748
  collectCandidates(parsed, candidates);
634
749
  });
635
- fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/jsonld.ts:22", message: "JSON-LD candidates", data: { candidateCount: candidates.length, candidateTypes: candidates.map((c) => c["@type"]) }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C" }) }).catch(() => {
636
- });
637
750
  return candidates[0] ?? null;
638
751
  }
639
752
  function collectCandidates(payload, bucket) {
@@ -815,13 +928,16 @@ function extractRecipe(html) {
815
928
  }
816
929
  const jsonLdRecipe = extractJsonLd(html);
817
930
  if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
818
- try {
819
- const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
820
- if (globalFetch) {
821
- globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:6", message: "JSON-LD extraction result", data: { hasJsonLd: !!jsonLdRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "C,D" }) }).catch(() => {
822
- });
931
+ const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
932
+ if (ingestUrl) {
933
+ try {
934
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
935
+ if (globalFetch) {
936
+ globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:6", message: "JSON-LD extraction result", data: { hasJsonLd: !!jsonLdRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "C,D" }) }).catch(() => {
937
+ });
938
+ }
939
+ } catch {
823
940
  }
824
- } catch {
825
941
  }
826
942
  }
827
943
  if (jsonLdRecipe) {
@@ -829,13 +945,16 @@ function extractRecipe(html) {
829
945
  }
830
946
  const microdataRecipe = extractMicrodata(html);
831
947
  if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
832
- try {
833
- const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
834
- if (globalFetch) {
835
- globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:12", message: "Microdata extraction result", data: { hasMicrodata: !!microdataRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "D" }) }).catch(() => {
836
- });
948
+ const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
949
+ if (ingestUrl) {
950
+ try {
951
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
952
+ if (globalFetch) {
953
+ globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:12", message: "Microdata extraction result", data: { hasMicrodata: !!microdataRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "D" }) }).catch(() => {
954
+ });
955
+ }
956
+ } catch {
837
957
  }
838
- } catch {
839
958
  }
840
959
  }
841
960
  if (microdataRecipe) {
@@ -847,35 +966,44 @@ function extractRecipe(html) {
847
966
  // src/scraper/index.ts
848
967
  async function scrapeRecipe(url, options = {}) {
849
968
  if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
850
- try {
851
- const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
852
- if (globalFetch) {
853
- globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:7", message: "scrapeRecipe entry", data: { url, hasOptions: !!options }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,B,C,D,E" }) }).catch(() => {
854
- });
969
+ const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
970
+ if (ingestUrl) {
971
+ try {
972
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
973
+ if (globalFetch) {
974
+ globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:7", message: "scrapeRecipe entry", data: { url, hasOptions: !!options }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,B,C,D,E" }) }).catch(() => {
975
+ });
976
+ }
977
+ } catch {
855
978
  }
856
- } catch {
857
979
  }
858
980
  }
859
981
  const html = await fetchPage(url, options);
860
982
  if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
861
- try {
862
- const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
863
- if (globalFetch) {
864
- globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:9", message: "HTML fetched", data: { htmlLength: html?.length, htmlPreview: html?.substring(0, 200) }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B" }) }).catch(() => {
865
- });
983
+ const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
984
+ if (ingestUrl) {
985
+ try {
986
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
987
+ if (globalFetch) {
988
+ globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:9", message: "HTML fetched", data: { htmlLength: html?.length, htmlPreview: html?.substring(0, 200) }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B" }) }).catch(() => {
989
+ });
990
+ }
991
+ } catch {
866
992
  }
867
- } catch {
868
993
  }
869
994
  }
870
995
  const { recipe } = extractRecipe(html);
871
996
  if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
872
- try {
873
- const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
874
- if (globalFetch) {
875
- globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:11", message: "extractRecipe result", data: { hasRecipe: !!recipe, recipeType: recipe?.["@type"], recipeName: recipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C,D" }) }).catch(() => {
876
- });
997
+ const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
998
+ if (ingestUrl) {
999
+ try {
1000
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
1001
+ if (globalFetch) {
1002
+ globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:11", message: "extractRecipe result", data: { hasRecipe: !!recipe, recipeType: recipe?.["@type"], recipeName: recipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C,D" }) }).catch(() => {
1003
+ });
1004
+ }
1005
+ } catch {
877
1006
  }
878
- } catch {
879
1007
  }
880
1008
  }
881
1009
  if (!recipe) {
@@ -883,13 +1011,16 @@ async function scrapeRecipe(url, options = {}) {
883
1011
  }
884
1012
  const soustackRecipe = fromSchemaOrg(recipe);
885
1013
  if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
886
- try {
887
- const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
888
- if (globalFetch) {
889
- globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:17", message: "fromSchemaOrg result", data: { hasSoustackRecipe: !!soustackRecipe, soustackRecipeName: soustackRecipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A" }) }).catch(() => {
890
- });
1014
+ const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
1015
+ if (ingestUrl) {
1016
+ try {
1017
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
1018
+ if (globalFetch) {
1019
+ globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:17", message: "fromSchemaOrg result", data: { hasSoustackRecipe: !!soustackRecipe, soustackRecipeName: soustackRecipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A" }) }).catch(() => {
1020
+ });
1021
+ }
1022
+ } catch {
891
1023
  }
892
- } catch {
893
1024
  }
894
1025
  }
895
1026
  if (!soustackRecipe) {
@@ -917,5 +1048,5 @@ exports.extractRecipeFromHTML = extractRecipeFromHTML;
917
1048
  exports.extractSchemaOrgRecipeFromHTML = extractSchemaOrgRecipeFromHTML;
918
1049
  exports.fetchPage = fetchPage;
919
1050
  exports.scrapeRecipe = scrapeRecipe;
920
- //# sourceMappingURL=scrape.js.map
921
- //# sourceMappingURL=scrape.js.map
1051
+ //# sourceMappingURL=index.js.map
1052
+ //# sourceMappingURL=index.js.map