soustack 0.3.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -27
- package/dist/cli/index.js +5225 -992
- package/dist/cli/index.js.map +1 -1
- package/dist/index.d.mts +163 -91
- package/dist/index.d.ts +163 -91
- package/dist/index.js +5077 -1007
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +5076 -1007
- package/dist/index.mjs.map +1 -1
- package/dist/{scrape.d.mts → scrape/index.d.mts} +88 -74
- package/dist/{scrape.d.ts → scrape/index.d.ts} +88 -74
- package/dist/{scrape.js → scrape/index.js} +255 -124
- package/dist/scrape/index.js.map +1 -0
- package/dist/{scrape.mjs → scrape/index.mjs} +255 -124
- package/dist/scrape/index.mjs.map +1 -0
- package/package.json +21 -9
- package/spec/.sync-meta.json +149 -0
- package/spec/SOUSTACK_SPEC_VERSION +1 -0
- package/spec/defs/common.schema.json +46 -0
- package/spec/defs/duration.schema.json +33 -0
- package/spec/defs/entities.schema.json +111 -0
- package/spec/defs/ingredientQuantified.schema.json +9 -0
- package/spec/defs/quantity.schema.json +16 -0
- package/spec/defs/scalingRule.schema.json +127 -0
- package/spec/defs/temperature.schema.json +63 -0
- package/spec/fixtures/content/illustrated-step.valid.json +24 -0
- package/spec/fixtures/invalid/equipment-unknown-reference.invalid.json +38 -0
- package/spec/fixtures/invalid/mise-en-place-unknown-equipment.invalid.json +37 -0
- package/spec/fixtures/invalid/mise-en-place-unknown-input.invalid.json +41 -0
- package/spec/fixtures/invalid/storage-leftovers-missing-method.invalid.json +31 -0
- package/spec/fixtures/invalid/storage-leftovers-wrong-type.invalid.json +23 -0
- package/spec/fixtures/level/base-full.valid.json +162 -0
- package/spec/fixtures/level/base-missing-yield.invalid.json +12 -0
- package/spec/fixtures/level/lite-min.valid.json +14 -0
- package/spec/fixtures/profile/profile-base.valid.json +20 -0
- package/spec/fixtures/profile/profile-equipped.valid.json +28 -0
- package/spec/fixtures/profile/profile-illustrated.valid.json +28 -0
- package/spec/fixtures/profile/profile-lite.valid.json +13 -0
- package/spec/fixtures/profile/profile-prepped.valid.json +31 -0
- package/spec/fixtures/profile/profile-scalable-missing-scaling.invalid.json +29 -0
- package/spec/fixtures/profile/profile-scalable.valid.json +49 -0
- package/spec/fixtures/profile/profile-timed-missing-structured.invalid.json +30 -0
- package/spec/fixtures/scaling/bakers-percent-missing-ref.invalid.json +41 -0
- package/spec/fixtures/scaling/bakers-percent.valid.json +51 -0
- package/spec/fixtures/scaling/discrete-range.invalid.json +36 -0
- package/spec/fixtures/scaling/missing-quantified.invalid.json +40 -0
- package/spec/fixtures/scaling/reject-bakersPercentage.invalid.json +50 -0
- package/spec/fixtures/stacks/compute-missing-timed.invalid.json +32 -0
- package/spec/fixtures/stacks/dietary-no-signal.invalid.json +16 -0
- package/spec/fixtures/stacks/illustrated-empty.invalid.json +13 -0
- package/spec/fixtures/stacks/quantified-string.invalid.json +22 -0
- package/spec/fixtures/stacks/referenced-missing-input.invalid.json +32 -0
- package/spec/fixtures/stacks/storage-min.valid.json +20 -0
- package/spec/fixtures/stacks/storage-no-duration.invalid.json +16 -0
- package/spec/fixtures/stacks/timed-implies-structured.valid.json +50 -0
- package/spec/fixtures/stacks/timed-range.invalid.json +33 -0
- package/spec/fixtures/valid/equipment-scaling-rules.valid.json +76 -0
- package/spec/fixtures/valid/equipment-strings.valid.json +31 -0
- package/spec/fixtures/valid/equipment-structured-uses.valid.json +47 -0
- package/spec/fixtures/valid/mise-en-place-basic.valid.json +31 -0
- package/spec/fixtures/valid/mise-en-place-referenced-equipment.valid.json +51 -0
- package/spec/fixtures/valid/prep-ingredient-strings.valid.json +48 -0
- package/spec/fixtures/valid/prep-ingredient-structured.valid.json +45 -0
- package/spec/fixtures/valid/profile-equipped.valid.json +29 -0
- package/spec/fixtures/valid/profile-prepped.valid.json +32 -0
- package/spec/fixtures/valid/quantified-nested-ingredient-sections.valid.json +61 -0
- package/spec/fixtures/valid/referenced-scaling.valid.json +67 -0
- package/spec/fixtures/valid/storage-leftovers-simple.valid.json +27 -0
- package/spec/fixtures/valid/storage-leftovers-structured.valid.json +43 -0
- package/spec/fixtures/valid/structured-nested-step-sections.valid.json +84 -0
- package/spec/schemas/stacks-registry.schema.json +108 -0
- package/spec/soustack.schema.json +2379 -0
- package/spec/stacks/compute.schema.json +7 -0
- package/spec/stacks/compute@1.md +22 -0
- package/spec/stacks/dietary.schema.json +45 -0
- package/spec/stacks/dietary@1.md +24 -0
- package/spec/stacks/equipment.schema.json +98 -0
- package/spec/stacks/equipment@1.md +244 -0
- package/spec/stacks/illustrated.schema.json +54 -0
- package/spec/stacks/illustrated@1.md +24 -0
- package/spec/stacks/prep.schema.json +76 -0
- package/spec/stacks/prep@1.md +276 -0
- package/spec/stacks/quantified.schema.json +74 -0
- package/spec/stacks/quantified@1.md +24 -0
- package/spec/stacks/referenced.schema.json +96 -0
- package/spec/stacks/referenced@1.md +23 -0
- package/spec/stacks/registry.json +112 -0
- package/spec/stacks/scaling.schema.json +99 -0
- package/spec/stacks/scaling@1.md +238 -0
- package/spec/stacks/storage.schema.json +132 -0
- package/spec/stacks/storage@1.md +256 -0
- package/spec/stacks/structured.schema.json +48 -0
- package/spec/stacks/structured@1.md +24 -0
- package/spec/stacks/substitutions.schema.json +43 -0
- package/spec/stacks/substitutions@1.md +24 -0
- package/spec/stacks/techniques.schema.json +28 -0
- package/spec/stacks/techniques@1.md +23 -0
- package/spec/stacks/timed.schema.json +60 -0
- package/spec/stacks/timed@1.md +23 -0
- package/src/defs/common.schema.json +46 -0
- package/src/defs/duration.schema.json +33 -0
- package/src/defs/entities.schema.json +111 -0
- package/src/defs/ingredientQuantified.schema.json +9 -0
- package/src/defs/quantity.schema.json +16 -0
- package/src/defs/scalingRule.schema.json +127 -0
- package/src/defs/temperature.schema.json +63 -0
- package/src/profiles/base.schema.json +2 -2
- package/src/profiles/equipped.schema.json +10 -0
- package/src/profiles/illustrated.schema.json +4 -4
- package/src/profiles/lite.schema.json +10 -0
- package/src/profiles/prepped.schema.json +10 -0
- package/src/profiles/scalable.schema.json +6 -6
- package/src/profiles/timed.schema.json +10 -0
- package/src/schema.json +2271 -248
- package/src/schemas/stacks-registry.schema.json +108 -0
- package/src/soustack.schema.json +2271 -248
- package/src/stacks/compute.schema.json +7 -0
- package/src/stacks/compute@1.md +22 -0
- package/src/stacks/dietary.schema.json +45 -0
- package/src/stacks/dietary@1.md +24 -0
- package/src/stacks/equipment.schema.json +98 -0
- package/src/stacks/equipment@1.md +244 -0
- package/src/stacks/illustrated.schema.json +54 -0
- package/src/stacks/illustrated@1.md +24 -0
- package/src/stacks/prep.schema.json +76 -0
- package/src/stacks/prep@1.md +276 -0
- package/src/stacks/quantified.schema.json +74 -0
- package/src/stacks/quantified@1.md +24 -0
- package/src/stacks/referenced.schema.json +96 -0
- package/src/stacks/referenced@1.md +23 -0
- package/src/stacks/registry.json +112 -0
- package/src/stacks/scaling.schema.json +99 -0
- package/src/stacks/scaling@1.md +238 -0
- package/src/stacks/storage.schema.json +132 -0
- package/src/stacks/storage@1.md +256 -0
- package/src/stacks/structured.schema.json +48 -0
- package/src/stacks/structured@1.md +24 -0
- package/src/stacks/substitutions.schema.json +43 -0
- package/src/stacks/substitutions@1.md +24 -0
- package/src/stacks/techniques.schema.json +28 -0
- package/src/stacks/techniques@1.md +23 -0
- package/src/stacks/timed.schema.json +60 -0
- package/src/stacks/timed@1.md +23 -0
- package/dist/scrape.js.map +0 -1
- package/dist/scrape.mjs.map +0 -1
- package/src/profiles/cookable.schema.json +0 -18
- package/src/profiles/quantified.schema.json +0 -43
- package/src/profiles/schedulable.schema.json +0 -43
|
@@ -128,6 +128,138 @@ function extractUrl(value) {
|
|
|
128
128
|
return trimmed || void 0;
|
|
129
129
|
}
|
|
130
130
|
|
|
131
|
+
// src/normalize.ts
|
|
132
|
+
function normalizeRecipe(input) {
|
|
133
|
+
if (!input || typeof input !== "object") {
|
|
134
|
+
throw new Error("Recipe input must be an object");
|
|
135
|
+
}
|
|
136
|
+
const recipe = JSON.parse(JSON.stringify(input));
|
|
137
|
+
const warnings = [];
|
|
138
|
+
const legacyField = ["mod", "ules"].join("");
|
|
139
|
+
if (legacyField in recipe) {
|
|
140
|
+
throw new Error("The legacy field is no longer supported. Use `stacks` instead.");
|
|
141
|
+
}
|
|
142
|
+
normalizeStacks(recipe, warnings);
|
|
143
|
+
if (!recipe.stacks) {
|
|
144
|
+
recipe.stacks = {};
|
|
145
|
+
}
|
|
146
|
+
if (recipe && typeof recipe === "object" && "version" in recipe && !recipe.recipeVersion && typeof recipe.version === "string") {
|
|
147
|
+
recipe.recipeVersion = recipe.version;
|
|
148
|
+
delete recipe.version;
|
|
149
|
+
warnings.push("'version' is deprecated; mapped to 'recipeVersion'.");
|
|
150
|
+
}
|
|
151
|
+
normalizeTime(recipe);
|
|
152
|
+
return {
|
|
153
|
+
recipe,
|
|
154
|
+
warnings
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
function normalizeStacks(recipe, warnings) {
|
|
158
|
+
let stacks = {};
|
|
159
|
+
if (recipe.stacks && typeof recipe.stacks === "object" && !Array.isArray(recipe.stacks)) {
|
|
160
|
+
for (const [key, value] of Object.entries(recipe.stacks)) {
|
|
161
|
+
if (typeof value === "number" && Number.isInteger(value) && value >= 1) {
|
|
162
|
+
stacks[key] = value;
|
|
163
|
+
} else {
|
|
164
|
+
warnings.push(`Invalid stack version for '${key}': expected positive integer, got ${value}`);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
if (Array.isArray(recipe.stacks)) {
|
|
169
|
+
const stackIdentifiers = recipe.stacks.filter((s) => typeof s === "string");
|
|
170
|
+
for (const identifier of stackIdentifiers) {
|
|
171
|
+
const parsed = parseStackIdentifier(identifier);
|
|
172
|
+
if (parsed) {
|
|
173
|
+
const { name, version } = parsed;
|
|
174
|
+
if (!stacks[name] || stacks[name] < version) {
|
|
175
|
+
stacks[name] = version;
|
|
176
|
+
}
|
|
177
|
+
} else {
|
|
178
|
+
warnings.push(`Invalid stack identifier '${identifier}': expected format 'name@version' (e.g., 'scaling@1')`);
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
recipe.stacks = stacks;
|
|
183
|
+
}
|
|
184
|
+
function parseStackIdentifier(identifier) {
|
|
185
|
+
if (typeof identifier !== "string" || !identifier.trim()) {
|
|
186
|
+
return null;
|
|
187
|
+
}
|
|
188
|
+
const match = identifier.trim().match(/^([a-z0-9_-]+)@(\d+)$/i);
|
|
189
|
+
if (!match) {
|
|
190
|
+
return null;
|
|
191
|
+
}
|
|
192
|
+
const [, name, versionStr] = match;
|
|
193
|
+
const version = parseInt(versionStr, 10);
|
|
194
|
+
if (isNaN(version) || version < 1) {
|
|
195
|
+
return null;
|
|
196
|
+
}
|
|
197
|
+
return { name, version };
|
|
198
|
+
}
|
|
199
|
+
function normalizeTime(recipe) {
|
|
200
|
+
const time = recipe?.time;
|
|
201
|
+
if (!time || typeof time !== "object" || Array.isArray(time)) return;
|
|
202
|
+
const structuredKeys = [
|
|
203
|
+
"prep",
|
|
204
|
+
"active",
|
|
205
|
+
"passive",
|
|
206
|
+
"total"
|
|
207
|
+
];
|
|
208
|
+
structuredKeys.forEach((key) => {
|
|
209
|
+
const value = time[key];
|
|
210
|
+
if (typeof value === "number") return;
|
|
211
|
+
const parsed = parseDuration(value);
|
|
212
|
+
if (parsed !== null) {
|
|
213
|
+
time[key] = parsed;
|
|
214
|
+
}
|
|
215
|
+
});
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// src/specVersion.ts
|
|
219
|
+
var SOUSTACK_SPEC_VERSION = "0.0.2";
|
|
220
|
+
|
|
221
|
+
// src/schemaMetadata.ts
|
|
222
|
+
var CANONICAL_SCHEMA_ID = "https://soustack.spec/soustack.schema.json";
|
|
223
|
+
var LEGACY_SCHEMA_ID = `http://soustack.org/schema/v${SOUSTACK_SPEC_VERSION}`;
|
|
224
|
+
var RAW_SPEC_BASE = "https://raw.githubusercontent.com/soustack/soustack-spec";
|
|
225
|
+
var RAW_SPEC_FORK_BASE = "https://raw.githubusercontent.com/RichardHerold/soustack-spec";
|
|
226
|
+
var SCHEMA_ALIAS_MAP = /* @__PURE__ */ new Map([
|
|
227
|
+
[CANONICAL_SCHEMA_ID, CANONICAL_SCHEMA_ID],
|
|
228
|
+
[LEGACY_SCHEMA_ID, CANONICAL_SCHEMA_ID],
|
|
229
|
+
[`${LEGACY_SCHEMA_ID}/`, CANONICAL_SCHEMA_ID],
|
|
230
|
+
["https://soustack.org/schema/v0.0.2", CANONICAL_SCHEMA_ID],
|
|
231
|
+
["https://soustack.org/schema/v0.0.2/", CANONICAL_SCHEMA_ID],
|
|
232
|
+
[`${RAW_SPEC_BASE}/main/soustack.schema.json`, CANONICAL_SCHEMA_ID],
|
|
233
|
+
[`${RAW_SPEC_BASE}/v${SOUSTACK_SPEC_VERSION}/soustack.schema.json`, CANONICAL_SCHEMA_ID],
|
|
234
|
+
[`${RAW_SPEC_FORK_BASE}/main/soustack.schema.json`, CANONICAL_SCHEMA_ID],
|
|
235
|
+
[`${RAW_SPEC_FORK_BASE}/v${SOUSTACK_SPEC_VERSION}/soustack.schema.json`, CANONICAL_SCHEMA_ID]
|
|
236
|
+
]);
|
|
237
|
+
function resolveSchemaHint(value) {
|
|
238
|
+
if (typeof value !== "string" || !value) {
|
|
239
|
+
return { canonicalId: void 0, isSoustackSchema: false, wasAlias: false };
|
|
240
|
+
}
|
|
241
|
+
const trimmed = value.replace(/#$/, "");
|
|
242
|
+
const mapped = SCHEMA_ALIAS_MAP.get(trimmed) ?? trimmed;
|
|
243
|
+
const isSoustackSchema = SCHEMA_ALIAS_MAP.has(trimmed) || mapped.startsWith("http://soustack.org/schema") || mapped.startsWith("https://soustack.org/schema") || mapped.startsWith("https://soustack.spec/") || mapped.startsWith("https://soustack.org/schemas/");
|
|
244
|
+
return {
|
|
245
|
+
canonicalId: mapped,
|
|
246
|
+
isSoustackSchema,
|
|
247
|
+
wasAlias: mapped !== trimmed || SCHEMA_ALIAS_MAP.has(trimmed)
|
|
248
|
+
};
|
|
249
|
+
}
|
|
250
|
+
function withCanonicalSchema(value) {
|
|
251
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
252
|
+
return value;
|
|
253
|
+
}
|
|
254
|
+
const existing = typeof value.$schema === "string" ? value.$schema : void 0;
|
|
255
|
+
const resolved = resolveSchemaHint(existing);
|
|
256
|
+
const schemaId = resolved.isSoustackSchema ? resolved.canonicalId : CANONICAL_SCHEMA_ID;
|
|
257
|
+
return {
|
|
258
|
+
...value,
|
|
259
|
+
$schema: schemaId ?? CANONICAL_SCHEMA_ID
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
|
|
131
263
|
// src/fromSchemaOrg.ts
|
|
132
264
|
function fromSchemaOrg(input) {
|
|
133
265
|
const recipeNode = extractRecipeNode(input);
|
|
@@ -143,23 +275,18 @@ function fromSchemaOrg(input) {
|
|
|
143
275
|
const source = convertSource(recipeNode);
|
|
144
276
|
const dateModified = recipeNode.dateModified || void 0;
|
|
145
277
|
const nutrition = convertNutrition(recipeNode.nutrition);
|
|
146
|
-
const
|
|
147
|
-
const
|
|
148
|
-
const
|
|
149
|
-
const
|
|
150
|
-
const
|
|
151
|
-
if (attribution) modules.push("attribution@1");
|
|
152
|
-
if (taxonomy) modules.push("taxonomy@1");
|
|
153
|
-
if (media) modules.push("media@1");
|
|
154
|
-
if (nutrition) modules.push("nutrition@1");
|
|
155
|
-
if (times) modules.push("times@1");
|
|
156
|
-
return {
|
|
278
|
+
const images = toArray(normalizeImage(recipeNode.image));
|
|
279
|
+
const videos = normalizeMediaList(recipeNode.video);
|
|
280
|
+
const profile = recipeYield && time ? "base" : "lite";
|
|
281
|
+
const stacks = {};
|
|
282
|
+
const rawRecipe = {
|
|
157
283
|
"@type": "Recipe",
|
|
158
|
-
profile
|
|
159
|
-
|
|
284
|
+
profile,
|
|
285
|
+
stacks,
|
|
160
286
|
name: recipeNode.name.trim(),
|
|
161
287
|
description: recipeNode.description?.trim() || void 0,
|
|
162
|
-
|
|
288
|
+
images: images.length ? images : void 0,
|
|
289
|
+
videos: videos.length ? videos : void 0,
|
|
163
290
|
category,
|
|
164
291
|
tags: tags.length ? tags : void 0,
|
|
165
292
|
source,
|
|
@@ -169,12 +296,10 @@ function fromSchemaOrg(input) {
|
|
|
169
296
|
ingredients,
|
|
170
297
|
instructions,
|
|
171
298
|
...dateModified ? { dateModified } : {},
|
|
172
|
-
...nutrition ? { nutrition } : {}
|
|
173
|
-
...attribution ? { attribution } : {},
|
|
174
|
-
...taxonomy ? { taxonomy } : {},
|
|
175
|
-
...media ? { media } : {},
|
|
176
|
-
...times ? { times } : {}
|
|
299
|
+
...nutrition ? { nutrition } : {}
|
|
177
300
|
};
|
|
301
|
+
const { recipe } = normalizeRecipe(rawRecipe);
|
|
302
|
+
return withCanonicalSchema(recipe);
|
|
178
303
|
}
|
|
179
304
|
function extractRecipeNode(input) {
|
|
180
305
|
if (!input) return null;
|
|
@@ -218,7 +343,10 @@ function isValidName(name) {
|
|
|
218
343
|
function convertIngredients(value) {
|
|
219
344
|
if (!value) return [];
|
|
220
345
|
const normalized = Array.isArray(value) ? value : [value];
|
|
221
|
-
return normalized.map((item) => typeof item === "string" ? item.trim() : "").filter(Boolean)
|
|
346
|
+
return normalized.map((item) => typeof item === "string" ? item.trim() : "").filter(Boolean).map((name) => ({
|
|
347
|
+
name,
|
|
348
|
+
scaling: { mode: "linear" }
|
|
349
|
+
}));
|
|
222
350
|
}
|
|
223
351
|
function convertInstructions(value) {
|
|
224
352
|
if (!value) return [];
|
|
@@ -237,8 +365,8 @@ function convertInstructions(value) {
|
|
|
237
365
|
const subsectionItems = extractSectionItems(entry.itemListElement);
|
|
238
366
|
if (subsectionItems.length) {
|
|
239
367
|
result.push({
|
|
240
|
-
|
|
241
|
-
|
|
368
|
+
section: entry.name?.trim() || "Section",
|
|
369
|
+
steps: subsectionItems
|
|
242
370
|
});
|
|
243
371
|
}
|
|
244
372
|
continue;
|
|
@@ -294,7 +422,7 @@ function convertHowToStep(step) {
|
|
|
294
422
|
}
|
|
295
423
|
const instruction = { text };
|
|
296
424
|
if (id) instruction.id = id;
|
|
297
|
-
if (image) instruction.
|
|
425
|
+
if (image) instruction.images = Array.isArray(image) ? image : [image];
|
|
298
426
|
if (timing) instruction.timing = timing;
|
|
299
427
|
return instruction;
|
|
300
428
|
}
|
|
@@ -304,7 +432,13 @@ function extractInstructionTiming(step) {
|
|
|
304
432
|
return void 0;
|
|
305
433
|
}
|
|
306
434
|
const parsed = smartParseDuration(duration);
|
|
307
|
-
|
|
435
|
+
if (parsed === null || parsed === void 0) {
|
|
436
|
+
return void 0;
|
|
437
|
+
}
|
|
438
|
+
return {
|
|
439
|
+
activity: "active",
|
|
440
|
+
duration: { minutes: parsed }
|
|
441
|
+
};
|
|
308
442
|
}
|
|
309
443
|
function extractInstructionId(step) {
|
|
310
444
|
const raw = step["@id"] || step.id || step.url;
|
|
@@ -321,14 +455,22 @@ function isHowToSection(value) {
|
|
|
321
455
|
return Boolean(value) && typeof value === "object" && value["@type"] === "HowToSection" && Array.isArray(value.itemListElement);
|
|
322
456
|
}
|
|
323
457
|
function convertTime(recipe) {
|
|
458
|
+
const total = smartParseDuration(recipe.totalTime ?? "");
|
|
324
459
|
const prep = smartParseDuration(recipe.prepTime ?? "");
|
|
325
460
|
const cook = smartParseDuration(recipe.cookTime ?? "");
|
|
326
|
-
const
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
if (
|
|
331
|
-
|
|
461
|
+
const minutes = isPositiveDuration(total) ? total : [prep, cook].filter(isPositiveDuration).reduce((sum, value) => {
|
|
462
|
+
if (sum === null) return value;
|
|
463
|
+
return sum + value;
|
|
464
|
+
}, null);
|
|
465
|
+
if (!isPositiveDuration(minutes)) {
|
|
466
|
+
return void 0;
|
|
467
|
+
}
|
|
468
|
+
return {
|
|
469
|
+
total: { minutes }
|
|
470
|
+
};
|
|
471
|
+
}
|
|
472
|
+
function isPositiveDuration(value) {
|
|
473
|
+
return typeof value === "number" && Number.isFinite(value) && value > 0;
|
|
332
474
|
}
|
|
333
475
|
function collectTags(cuisine, keywords) {
|
|
334
476
|
const tags = /* @__PURE__ */ new Set();
|
|
@@ -386,23 +528,6 @@ function extractEntityName(value) {
|
|
|
386
528
|
}
|
|
387
529
|
return void 0;
|
|
388
530
|
}
|
|
389
|
-
function convertAttribution(recipe) {
|
|
390
|
-
const attribution = {};
|
|
391
|
-
const url = (recipe.url || recipe.mainEntityOfPage)?.trim();
|
|
392
|
-
const author = extractEntityName(recipe.author);
|
|
393
|
-
const datePublished = recipe.datePublished?.trim();
|
|
394
|
-
if (url) attribution.url = url;
|
|
395
|
-
if (author) attribution.author = author;
|
|
396
|
-
if (datePublished) attribution.datePublished = datePublished;
|
|
397
|
-
return Object.keys(attribution).length ? attribution : void 0;
|
|
398
|
-
}
|
|
399
|
-
function convertTaxonomy(keywords, category, cuisine) {
|
|
400
|
-
const taxonomy = {};
|
|
401
|
-
if (keywords.length) taxonomy.keywords = keywords;
|
|
402
|
-
if (category) taxonomy.category = category;
|
|
403
|
-
if (cuisine) taxonomy.cuisine = cuisine;
|
|
404
|
-
return Object.keys(taxonomy).length ? taxonomy : void 0;
|
|
405
|
-
}
|
|
406
531
|
function normalizeMediaList(value) {
|
|
407
532
|
if (!value) return [];
|
|
408
533
|
if (typeof value === "string") return [value.trim()].filter(Boolean);
|
|
@@ -413,28 +538,18 @@ function normalizeMediaList(value) {
|
|
|
413
538
|
return url ? [url] : [];
|
|
414
539
|
}
|
|
415
540
|
function extractMediaUrl(value) {
|
|
416
|
-
if (value && typeof value === "object"
|
|
417
|
-
const
|
|
418
|
-
|
|
541
|
+
if (value && typeof value === "object") {
|
|
542
|
+
const urlValue = typeof value.url === "string" ? value.url : typeof value.contentUrl === "string" ? value.contentUrl : void 0;
|
|
543
|
+
if (typeof urlValue === "string") {
|
|
544
|
+
const trimmed = urlValue.trim();
|
|
545
|
+
return trimmed || void 0;
|
|
546
|
+
}
|
|
419
547
|
}
|
|
420
548
|
return void 0;
|
|
421
549
|
}
|
|
422
|
-
function
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
const videos = normalizeMediaList(video);
|
|
426
|
-
const media = {};
|
|
427
|
-
if (images.length) media.images = images;
|
|
428
|
-
if (videos.length) media.videos = videos;
|
|
429
|
-
return Object.keys(media).length ? media : void 0;
|
|
430
|
-
}
|
|
431
|
-
function convertTimes(time) {
|
|
432
|
-
if (!time) return void 0;
|
|
433
|
-
const times = {};
|
|
434
|
-
if (typeof time.prep === "number") times.prepMinutes = time.prep;
|
|
435
|
-
if (typeof time.active === "number") times.cookMinutes = time.active;
|
|
436
|
-
if (typeof time.total === "number") times.totalMinutes = time.total;
|
|
437
|
-
return Object.keys(times).length ? times : void 0;
|
|
550
|
+
function toArray(value) {
|
|
551
|
+
if (!value) return [];
|
|
552
|
+
return Array.isArray(value) ? value : [value];
|
|
438
553
|
}
|
|
439
554
|
function convertNutrition(nutrition) {
|
|
440
555
|
if (!nutrition || typeof nutrition !== "object") {
|
|
@@ -535,13 +650,16 @@ async function fetchPage(url, options = {}) {
|
|
|
535
650
|
const response = await resolvedFetch(url, requestInit);
|
|
536
651
|
clearTimeout(timeoutId);
|
|
537
652
|
if (response && typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
globalFetch
|
|
542
|
-
|
|
653
|
+
const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
|
|
654
|
+
if (ingestUrl) {
|
|
655
|
+
try {
|
|
656
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
657
|
+
if (globalFetch) {
|
|
658
|
+
globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/fetch.ts:63", message: "fetch response", data: { url, status: response.status, statusText: response.statusText, ok: response.ok, isNYTimes: url.includes("nytimes.com") }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B" }) }).catch(() => {
|
|
659
|
+
});
|
|
660
|
+
}
|
|
661
|
+
} catch {
|
|
543
662
|
}
|
|
544
|
-
} catch {
|
|
545
663
|
}
|
|
546
664
|
}
|
|
547
665
|
if (!response.ok) {
|
|
@@ -553,13 +671,16 @@ async function fetchPage(url, options = {}) {
|
|
|
553
671
|
}
|
|
554
672
|
const html = await response.text();
|
|
555
673
|
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
globalFetch
|
|
560
|
-
|
|
674
|
+
const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
|
|
675
|
+
if (ingestUrl) {
|
|
676
|
+
try {
|
|
677
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
678
|
+
if (globalFetch) {
|
|
679
|
+
globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/fetch.ts:75", message: "HTML received", data: { htmlLength: html.length, hasLoginPage: html.toLowerCase().includes("login") || html.toLowerCase().includes("sign in"), hasRecipeData: html.includes("application/ld+json") || html.includes("schema.org/Recipe") }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B,D" }) }).catch(() => {
|
|
680
|
+
});
|
|
681
|
+
}
|
|
682
|
+
} catch {
|
|
561
683
|
}
|
|
562
|
-
} catch {
|
|
563
684
|
}
|
|
564
685
|
}
|
|
565
686
|
return html;
|
|
@@ -589,8 +710,6 @@ function isRecipeNode(value) {
|
|
|
589
710
|
return false;
|
|
590
711
|
}
|
|
591
712
|
const type = value["@type"];
|
|
592
|
-
fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/utils.ts:14", message: "isRecipeNode check", data: { type, typeLower: typeof type === "string" ? type.toLowerCase() : Array.isArray(type) ? type.map((t) => typeof t === "string" ? t.toLowerCase() : t) : void 0, isMatch: typeof type === "string" ? RECIPE_TYPES.has(type.toLowerCase()) : Array.isArray(type) ? type.some((e) => typeof e === "string" && RECIPE_TYPES.has(e.toLowerCase())) : false }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A" }) }).catch(() => {
|
|
593
|
-
});
|
|
594
713
|
if (typeof type === "string") {
|
|
595
714
|
return RECIPE_TYPES.has(type.toLowerCase());
|
|
596
715
|
}
|
|
@@ -618,20 +737,14 @@ function normalizeText(value) {
|
|
|
618
737
|
function extractJsonLd(html) {
|
|
619
738
|
const $ = load(html);
|
|
620
739
|
const scripts = $('script[type="application/ld+json"]');
|
|
621
|
-
fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/jsonld.ts:8", message: "JSON-LD scripts found", data: { scriptCount: scripts.length }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "C,D" }) }).catch(() => {
|
|
622
|
-
});
|
|
623
740
|
const candidates = [];
|
|
624
741
|
scripts.each((_, element) => {
|
|
625
742
|
const content = $(element).html();
|
|
626
743
|
if (!content) return;
|
|
627
744
|
const parsed = safeJsonParse(content);
|
|
628
745
|
if (!parsed) return;
|
|
629
|
-
fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/jsonld.ts:18", message: "JSON-LD parsed", data: { hasGraph: !!(parsed && typeof parsed === "object" && "@graph" in parsed), type: parsed && typeof parsed === "object" && "@type" in parsed ? parsed["@type"] : void 0 }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C" }) }).catch(() => {
|
|
630
|
-
});
|
|
631
746
|
collectCandidates(parsed, candidates);
|
|
632
747
|
});
|
|
633
|
-
fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/jsonld.ts:22", message: "JSON-LD candidates", data: { candidateCount: candidates.length, candidateTypes: candidates.map((c) => c["@type"]) }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C" }) }).catch(() => {
|
|
634
|
-
});
|
|
635
748
|
return candidates[0] ?? null;
|
|
636
749
|
}
|
|
637
750
|
function collectCandidates(payload, bucket) {
|
|
@@ -813,13 +926,16 @@ function extractRecipe(html) {
|
|
|
813
926
|
}
|
|
814
927
|
const jsonLdRecipe = extractJsonLd(html);
|
|
815
928
|
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
globalFetch
|
|
820
|
-
|
|
929
|
+
const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
|
|
930
|
+
if (ingestUrl) {
|
|
931
|
+
try {
|
|
932
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
933
|
+
if (globalFetch) {
|
|
934
|
+
globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:6", message: "JSON-LD extraction result", data: { hasJsonLd: !!jsonLdRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "C,D" }) }).catch(() => {
|
|
935
|
+
});
|
|
936
|
+
}
|
|
937
|
+
} catch {
|
|
821
938
|
}
|
|
822
|
-
} catch {
|
|
823
939
|
}
|
|
824
940
|
}
|
|
825
941
|
if (jsonLdRecipe) {
|
|
@@ -827,13 +943,16 @@ function extractRecipe(html) {
|
|
|
827
943
|
}
|
|
828
944
|
const microdataRecipe = extractMicrodata(html);
|
|
829
945
|
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
globalFetch
|
|
834
|
-
|
|
946
|
+
const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
|
|
947
|
+
if (ingestUrl) {
|
|
948
|
+
try {
|
|
949
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
950
|
+
if (globalFetch) {
|
|
951
|
+
globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:12", message: "Microdata extraction result", data: { hasMicrodata: !!microdataRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "D" }) }).catch(() => {
|
|
952
|
+
});
|
|
953
|
+
}
|
|
954
|
+
} catch {
|
|
835
955
|
}
|
|
836
|
-
} catch {
|
|
837
956
|
}
|
|
838
957
|
}
|
|
839
958
|
if (microdataRecipe) {
|
|
@@ -845,35 +964,44 @@ function extractRecipe(html) {
|
|
|
845
964
|
// src/scraper/index.ts
|
|
846
965
|
async function scrapeRecipe(url, options = {}) {
|
|
847
966
|
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
globalFetch
|
|
852
|
-
|
|
967
|
+
const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
|
|
968
|
+
if (ingestUrl) {
|
|
969
|
+
try {
|
|
970
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
971
|
+
if (globalFetch) {
|
|
972
|
+
globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:7", message: "scrapeRecipe entry", data: { url, hasOptions: !!options }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,B,C,D,E" }) }).catch(() => {
|
|
973
|
+
});
|
|
974
|
+
}
|
|
975
|
+
} catch {
|
|
853
976
|
}
|
|
854
|
-
} catch {
|
|
855
977
|
}
|
|
856
978
|
}
|
|
857
979
|
const html = await fetchPage(url, options);
|
|
858
980
|
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
globalFetch
|
|
863
|
-
|
|
981
|
+
const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
|
|
982
|
+
if (ingestUrl) {
|
|
983
|
+
try {
|
|
984
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
985
|
+
if (globalFetch) {
|
|
986
|
+
globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:9", message: "HTML fetched", data: { htmlLength: html?.length, htmlPreview: html?.substring(0, 200) }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B" }) }).catch(() => {
|
|
987
|
+
});
|
|
988
|
+
}
|
|
989
|
+
} catch {
|
|
864
990
|
}
|
|
865
|
-
} catch {
|
|
866
991
|
}
|
|
867
992
|
}
|
|
868
993
|
const { recipe } = extractRecipe(html);
|
|
869
994
|
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
globalFetch
|
|
874
|
-
|
|
995
|
+
const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
|
|
996
|
+
if (ingestUrl) {
|
|
997
|
+
try {
|
|
998
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
999
|
+
if (globalFetch) {
|
|
1000
|
+
globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:11", message: "extractRecipe result", data: { hasRecipe: !!recipe, recipeType: recipe?.["@type"], recipeName: recipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C,D" }) }).catch(() => {
|
|
1001
|
+
});
|
|
1002
|
+
}
|
|
1003
|
+
} catch {
|
|
875
1004
|
}
|
|
876
|
-
} catch {
|
|
877
1005
|
}
|
|
878
1006
|
}
|
|
879
1007
|
if (!recipe) {
|
|
@@ -881,13 +1009,16 @@ async function scrapeRecipe(url, options = {}) {
|
|
|
881
1009
|
}
|
|
882
1010
|
const soustackRecipe = fromSchemaOrg(recipe);
|
|
883
1011
|
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
globalFetch
|
|
888
|
-
|
|
1012
|
+
const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
|
|
1013
|
+
if (ingestUrl) {
|
|
1014
|
+
try {
|
|
1015
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
1016
|
+
if (globalFetch) {
|
|
1017
|
+
globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:17", message: "fromSchemaOrg result", data: { hasSoustackRecipe: !!soustackRecipe, soustackRecipeName: soustackRecipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A" }) }).catch(() => {
|
|
1018
|
+
});
|
|
1019
|
+
}
|
|
1020
|
+
} catch {
|
|
889
1021
|
}
|
|
890
|
-
} catch {
|
|
891
1022
|
}
|
|
892
1023
|
}
|
|
893
1024
|
if (!soustackRecipe) {
|
|
@@ -912,5 +1043,5 @@ function extractSchemaOrgRecipeFromHTML(html) {
|
|
|
912
1043
|
}
|
|
913
1044
|
|
|
914
1045
|
export { extractRecipeFromHTML, extractSchemaOrgRecipeFromHTML, fetchPage, scrapeRecipe };
|
|
915
|
-
//# sourceMappingURL=
|
|
916
|
-
//# sourceMappingURL=
|
|
1046
|
+
//# sourceMappingURL=index.mjs.map
|
|
1047
|
+
//# sourceMappingURL=index.mjs.map
|