soustack 0.3.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -27
- package/dist/cli/index.js +5225 -992
- package/dist/cli/index.js.map +1 -1
- package/dist/index.d.mts +163 -91
- package/dist/index.d.ts +163 -91
- package/dist/index.js +5077 -1007
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +5076 -1007
- package/dist/index.mjs.map +1 -1
- package/dist/{scrape.d.mts → scrape/index.d.mts} +88 -74
- package/dist/{scrape.d.ts → scrape/index.d.ts} +88 -74
- package/dist/{scrape.js → scrape/index.js} +255 -124
- package/dist/scrape/index.js.map +1 -0
- package/dist/{scrape.mjs → scrape/index.mjs} +255 -124
- package/dist/scrape/index.mjs.map +1 -0
- package/package.json +21 -9
- package/spec/.sync-meta.json +149 -0
- package/spec/SOUSTACK_SPEC_VERSION +1 -0
- package/spec/defs/common.schema.json +46 -0
- package/spec/defs/duration.schema.json +33 -0
- package/spec/defs/entities.schema.json +111 -0
- package/spec/defs/ingredientQuantified.schema.json +9 -0
- package/spec/defs/quantity.schema.json +16 -0
- package/spec/defs/scalingRule.schema.json +127 -0
- package/spec/defs/temperature.schema.json +63 -0
- package/spec/fixtures/content/illustrated-step.valid.json +24 -0
- package/spec/fixtures/invalid/equipment-unknown-reference.invalid.json +38 -0
- package/spec/fixtures/invalid/mise-en-place-unknown-equipment.invalid.json +37 -0
- package/spec/fixtures/invalid/mise-en-place-unknown-input.invalid.json +41 -0
- package/spec/fixtures/invalid/storage-leftovers-missing-method.invalid.json +31 -0
- package/spec/fixtures/invalid/storage-leftovers-wrong-type.invalid.json +23 -0
- package/spec/fixtures/level/base-full.valid.json +162 -0
- package/spec/fixtures/level/base-missing-yield.invalid.json +12 -0
- package/spec/fixtures/level/lite-min.valid.json +14 -0
- package/spec/fixtures/profile/profile-base.valid.json +20 -0
- package/spec/fixtures/profile/profile-equipped.valid.json +28 -0
- package/spec/fixtures/profile/profile-illustrated.valid.json +28 -0
- package/spec/fixtures/profile/profile-lite.valid.json +13 -0
- package/spec/fixtures/profile/profile-prepped.valid.json +31 -0
- package/spec/fixtures/profile/profile-scalable-missing-scaling.invalid.json +29 -0
- package/spec/fixtures/profile/profile-scalable.valid.json +49 -0
- package/spec/fixtures/profile/profile-timed-missing-structured.invalid.json +30 -0
- package/spec/fixtures/scaling/bakers-percent-missing-ref.invalid.json +41 -0
- package/spec/fixtures/scaling/bakers-percent.valid.json +51 -0
- package/spec/fixtures/scaling/discrete-range.invalid.json +36 -0
- package/spec/fixtures/scaling/missing-quantified.invalid.json +40 -0
- package/spec/fixtures/scaling/reject-bakersPercentage.invalid.json +50 -0
- package/spec/fixtures/stacks/compute-missing-timed.invalid.json +32 -0
- package/spec/fixtures/stacks/dietary-no-signal.invalid.json +16 -0
- package/spec/fixtures/stacks/illustrated-empty.invalid.json +13 -0
- package/spec/fixtures/stacks/quantified-string.invalid.json +22 -0
- package/spec/fixtures/stacks/referenced-missing-input.invalid.json +32 -0
- package/spec/fixtures/stacks/storage-min.valid.json +20 -0
- package/spec/fixtures/stacks/storage-no-duration.invalid.json +16 -0
- package/spec/fixtures/stacks/timed-implies-structured.valid.json +50 -0
- package/spec/fixtures/stacks/timed-range.invalid.json +33 -0
- package/spec/fixtures/valid/equipment-scaling-rules.valid.json +76 -0
- package/spec/fixtures/valid/equipment-strings.valid.json +31 -0
- package/spec/fixtures/valid/equipment-structured-uses.valid.json +47 -0
- package/spec/fixtures/valid/mise-en-place-basic.valid.json +31 -0
- package/spec/fixtures/valid/mise-en-place-referenced-equipment.valid.json +51 -0
- package/spec/fixtures/valid/prep-ingredient-strings.valid.json +48 -0
- package/spec/fixtures/valid/prep-ingredient-structured.valid.json +45 -0
- package/spec/fixtures/valid/profile-equipped.valid.json +29 -0
- package/spec/fixtures/valid/profile-prepped.valid.json +32 -0
- package/spec/fixtures/valid/quantified-nested-ingredient-sections.valid.json +61 -0
- package/spec/fixtures/valid/referenced-scaling.valid.json +67 -0
- package/spec/fixtures/valid/storage-leftovers-simple.valid.json +27 -0
- package/spec/fixtures/valid/storage-leftovers-structured.valid.json +43 -0
- package/spec/fixtures/valid/structured-nested-step-sections.valid.json +84 -0
- package/spec/schemas/stacks-registry.schema.json +108 -0
- package/spec/soustack.schema.json +2379 -0
- package/spec/stacks/compute.schema.json +7 -0
- package/spec/stacks/compute@1.md +22 -0
- package/spec/stacks/dietary.schema.json +45 -0
- package/spec/stacks/dietary@1.md +24 -0
- package/spec/stacks/equipment.schema.json +98 -0
- package/spec/stacks/equipment@1.md +244 -0
- package/spec/stacks/illustrated.schema.json +54 -0
- package/spec/stacks/illustrated@1.md +24 -0
- package/spec/stacks/prep.schema.json +76 -0
- package/spec/stacks/prep@1.md +276 -0
- package/spec/stacks/quantified.schema.json +74 -0
- package/spec/stacks/quantified@1.md +24 -0
- package/spec/stacks/referenced.schema.json +96 -0
- package/spec/stacks/referenced@1.md +23 -0
- package/spec/stacks/registry.json +112 -0
- package/spec/stacks/scaling.schema.json +99 -0
- package/spec/stacks/scaling@1.md +238 -0
- package/spec/stacks/storage.schema.json +132 -0
- package/spec/stacks/storage@1.md +256 -0
- package/spec/stacks/structured.schema.json +48 -0
- package/spec/stacks/structured@1.md +24 -0
- package/spec/stacks/substitutions.schema.json +43 -0
- package/spec/stacks/substitutions@1.md +24 -0
- package/spec/stacks/techniques.schema.json +28 -0
- package/spec/stacks/techniques@1.md +23 -0
- package/spec/stacks/timed.schema.json +60 -0
- package/spec/stacks/timed@1.md +23 -0
- package/src/defs/common.schema.json +46 -0
- package/src/defs/duration.schema.json +33 -0
- package/src/defs/entities.schema.json +111 -0
- package/src/defs/ingredientQuantified.schema.json +9 -0
- package/src/defs/quantity.schema.json +16 -0
- package/src/defs/scalingRule.schema.json +127 -0
- package/src/defs/temperature.schema.json +63 -0
- package/src/profiles/base.schema.json +2 -2
- package/src/profiles/equipped.schema.json +10 -0
- package/src/profiles/illustrated.schema.json +4 -4
- package/src/profiles/lite.schema.json +10 -0
- package/src/profiles/prepped.schema.json +10 -0
- package/src/profiles/scalable.schema.json +6 -6
- package/src/profiles/timed.schema.json +10 -0
- package/src/schema.json +2271 -248
- package/src/schemas/stacks-registry.schema.json +108 -0
- package/src/soustack.schema.json +2271 -248
- package/src/stacks/compute.schema.json +7 -0
- package/src/stacks/compute@1.md +22 -0
- package/src/stacks/dietary.schema.json +45 -0
- package/src/stacks/dietary@1.md +24 -0
- package/src/stacks/equipment.schema.json +98 -0
- package/src/stacks/equipment@1.md +244 -0
- package/src/stacks/illustrated.schema.json +54 -0
- package/src/stacks/illustrated@1.md +24 -0
- package/src/stacks/prep.schema.json +76 -0
- package/src/stacks/prep@1.md +276 -0
- package/src/stacks/quantified.schema.json +74 -0
- package/src/stacks/quantified@1.md +24 -0
- package/src/stacks/referenced.schema.json +96 -0
- package/src/stacks/referenced@1.md +23 -0
- package/src/stacks/registry.json +112 -0
- package/src/stacks/scaling.schema.json +99 -0
- package/src/stacks/scaling@1.md +238 -0
- package/src/stacks/storage.schema.json +132 -0
- package/src/stacks/storage@1.md +256 -0
- package/src/stacks/structured.schema.json +48 -0
- package/src/stacks/structured@1.md +24 -0
- package/src/stacks/substitutions.schema.json +43 -0
- package/src/stacks/substitutions@1.md +24 -0
- package/src/stacks/techniques.schema.json +28 -0
- package/src/stacks/techniques@1.md +23 -0
- package/src/stacks/timed.schema.json +60 -0
- package/src/stacks/timed@1.md +23 -0
- package/dist/scrape.js.map +0 -1
- package/dist/scrape.mjs.map +0 -1
- package/src/profiles/cookable.schema.json +0 -18
- package/src/profiles/quantified.schema.json +0 -43
- package/src/profiles/schedulable.schema.json +0 -43
|
@@ -130,6 +130,138 @@ function extractUrl(value) {
|
|
|
130
130
|
return trimmed || void 0;
|
|
131
131
|
}
|
|
132
132
|
|
|
133
|
+
// src/normalize.ts
|
|
134
|
+
function normalizeRecipe(input) {
|
|
135
|
+
if (!input || typeof input !== "object") {
|
|
136
|
+
throw new Error("Recipe input must be an object");
|
|
137
|
+
}
|
|
138
|
+
const recipe = JSON.parse(JSON.stringify(input));
|
|
139
|
+
const warnings = [];
|
|
140
|
+
const legacyField = ["mod", "ules"].join("");
|
|
141
|
+
if (legacyField in recipe) {
|
|
142
|
+
throw new Error("The legacy field is no longer supported. Use `stacks` instead.");
|
|
143
|
+
}
|
|
144
|
+
normalizeStacks(recipe, warnings);
|
|
145
|
+
if (!recipe.stacks) {
|
|
146
|
+
recipe.stacks = {};
|
|
147
|
+
}
|
|
148
|
+
if (recipe && typeof recipe === "object" && "version" in recipe && !recipe.recipeVersion && typeof recipe.version === "string") {
|
|
149
|
+
recipe.recipeVersion = recipe.version;
|
|
150
|
+
delete recipe.version;
|
|
151
|
+
warnings.push("'version' is deprecated; mapped to 'recipeVersion'.");
|
|
152
|
+
}
|
|
153
|
+
normalizeTime(recipe);
|
|
154
|
+
return {
|
|
155
|
+
recipe,
|
|
156
|
+
warnings
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
function normalizeStacks(recipe, warnings) {
|
|
160
|
+
let stacks = {};
|
|
161
|
+
if (recipe.stacks && typeof recipe.stacks === "object" && !Array.isArray(recipe.stacks)) {
|
|
162
|
+
for (const [key, value] of Object.entries(recipe.stacks)) {
|
|
163
|
+
if (typeof value === "number" && Number.isInteger(value) && value >= 1) {
|
|
164
|
+
stacks[key] = value;
|
|
165
|
+
} else {
|
|
166
|
+
warnings.push(`Invalid stack version for '${key}': expected positive integer, got ${value}`);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
if (Array.isArray(recipe.stacks)) {
|
|
171
|
+
const stackIdentifiers = recipe.stacks.filter((s) => typeof s === "string");
|
|
172
|
+
for (const identifier of stackIdentifiers) {
|
|
173
|
+
const parsed = parseStackIdentifier(identifier);
|
|
174
|
+
if (parsed) {
|
|
175
|
+
const { name, version } = parsed;
|
|
176
|
+
if (!stacks[name] || stacks[name] < version) {
|
|
177
|
+
stacks[name] = version;
|
|
178
|
+
}
|
|
179
|
+
} else {
|
|
180
|
+
warnings.push(`Invalid stack identifier '${identifier}': expected format 'name@version' (e.g., 'scaling@1')`);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
recipe.stacks = stacks;
|
|
185
|
+
}
|
|
186
|
+
function parseStackIdentifier(identifier) {
|
|
187
|
+
if (typeof identifier !== "string" || !identifier.trim()) {
|
|
188
|
+
return null;
|
|
189
|
+
}
|
|
190
|
+
const match = identifier.trim().match(/^([a-z0-9_-]+)@(\d+)$/i);
|
|
191
|
+
if (!match) {
|
|
192
|
+
return null;
|
|
193
|
+
}
|
|
194
|
+
const [, name, versionStr] = match;
|
|
195
|
+
const version = parseInt(versionStr, 10);
|
|
196
|
+
if (isNaN(version) || version < 1) {
|
|
197
|
+
return null;
|
|
198
|
+
}
|
|
199
|
+
return { name, version };
|
|
200
|
+
}
|
|
201
|
+
function normalizeTime(recipe) {
|
|
202
|
+
const time = recipe?.time;
|
|
203
|
+
if (!time || typeof time !== "object" || Array.isArray(time)) return;
|
|
204
|
+
const structuredKeys = [
|
|
205
|
+
"prep",
|
|
206
|
+
"active",
|
|
207
|
+
"passive",
|
|
208
|
+
"total"
|
|
209
|
+
];
|
|
210
|
+
structuredKeys.forEach((key) => {
|
|
211
|
+
const value = time[key];
|
|
212
|
+
if (typeof value === "number") return;
|
|
213
|
+
const parsed = parseDuration(value);
|
|
214
|
+
if (parsed !== null) {
|
|
215
|
+
time[key] = parsed;
|
|
216
|
+
}
|
|
217
|
+
});
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// src/specVersion.ts
|
|
221
|
+
var SOUSTACK_SPEC_VERSION = "0.0.2";
|
|
222
|
+
|
|
223
|
+
// src/schemaMetadata.ts
|
|
224
|
+
var CANONICAL_SCHEMA_ID = "https://soustack.spec/soustack.schema.json";
|
|
225
|
+
var LEGACY_SCHEMA_ID = `http://soustack.org/schema/v${SOUSTACK_SPEC_VERSION}`;
|
|
226
|
+
var RAW_SPEC_BASE = "https://raw.githubusercontent.com/soustack/soustack-spec";
|
|
227
|
+
var RAW_SPEC_FORK_BASE = "https://raw.githubusercontent.com/RichardHerold/soustack-spec";
|
|
228
|
+
var SCHEMA_ALIAS_MAP = /* @__PURE__ */ new Map([
|
|
229
|
+
[CANONICAL_SCHEMA_ID, CANONICAL_SCHEMA_ID],
|
|
230
|
+
[LEGACY_SCHEMA_ID, CANONICAL_SCHEMA_ID],
|
|
231
|
+
[`${LEGACY_SCHEMA_ID}/`, CANONICAL_SCHEMA_ID],
|
|
232
|
+
["https://soustack.org/schema/v0.0.2", CANONICAL_SCHEMA_ID],
|
|
233
|
+
["https://soustack.org/schema/v0.0.2/", CANONICAL_SCHEMA_ID],
|
|
234
|
+
[`${RAW_SPEC_BASE}/main/soustack.schema.json`, CANONICAL_SCHEMA_ID],
|
|
235
|
+
[`${RAW_SPEC_BASE}/v${SOUSTACK_SPEC_VERSION}/soustack.schema.json`, CANONICAL_SCHEMA_ID],
|
|
236
|
+
[`${RAW_SPEC_FORK_BASE}/main/soustack.schema.json`, CANONICAL_SCHEMA_ID],
|
|
237
|
+
[`${RAW_SPEC_FORK_BASE}/v${SOUSTACK_SPEC_VERSION}/soustack.schema.json`, CANONICAL_SCHEMA_ID]
|
|
238
|
+
]);
|
|
239
|
+
function resolveSchemaHint(value) {
|
|
240
|
+
if (typeof value !== "string" || !value) {
|
|
241
|
+
return { canonicalId: void 0, isSoustackSchema: false, wasAlias: false };
|
|
242
|
+
}
|
|
243
|
+
const trimmed = value.replace(/#$/, "");
|
|
244
|
+
const mapped = SCHEMA_ALIAS_MAP.get(trimmed) ?? trimmed;
|
|
245
|
+
const isSoustackSchema = SCHEMA_ALIAS_MAP.has(trimmed) || mapped.startsWith("http://soustack.org/schema") || mapped.startsWith("https://soustack.org/schema") || mapped.startsWith("https://soustack.spec/") || mapped.startsWith("https://soustack.org/schemas/");
|
|
246
|
+
return {
|
|
247
|
+
canonicalId: mapped,
|
|
248
|
+
isSoustackSchema,
|
|
249
|
+
wasAlias: mapped !== trimmed || SCHEMA_ALIAS_MAP.has(trimmed)
|
|
250
|
+
};
|
|
251
|
+
}
|
|
252
|
+
function withCanonicalSchema(value) {
|
|
253
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
254
|
+
return value;
|
|
255
|
+
}
|
|
256
|
+
const existing = typeof value.$schema === "string" ? value.$schema : void 0;
|
|
257
|
+
const resolved = resolveSchemaHint(existing);
|
|
258
|
+
const schemaId = resolved.isSoustackSchema ? resolved.canonicalId : CANONICAL_SCHEMA_ID;
|
|
259
|
+
return {
|
|
260
|
+
...value,
|
|
261
|
+
$schema: schemaId ?? CANONICAL_SCHEMA_ID
|
|
262
|
+
};
|
|
263
|
+
}
|
|
264
|
+
|
|
133
265
|
// src/fromSchemaOrg.ts
|
|
134
266
|
function fromSchemaOrg(input) {
|
|
135
267
|
const recipeNode = extractRecipeNode(input);
|
|
@@ -145,23 +277,18 @@ function fromSchemaOrg(input) {
|
|
|
145
277
|
const source = convertSource(recipeNode);
|
|
146
278
|
const dateModified = recipeNode.dateModified || void 0;
|
|
147
279
|
const nutrition = convertNutrition(recipeNode.nutrition);
|
|
148
|
-
const
|
|
149
|
-
const
|
|
150
|
-
const
|
|
151
|
-
const
|
|
152
|
-
const
|
|
153
|
-
if (attribution) modules.push("attribution@1");
|
|
154
|
-
if (taxonomy) modules.push("taxonomy@1");
|
|
155
|
-
if (media) modules.push("media@1");
|
|
156
|
-
if (nutrition) modules.push("nutrition@1");
|
|
157
|
-
if (times) modules.push("times@1");
|
|
158
|
-
return {
|
|
280
|
+
const images = toArray(normalizeImage(recipeNode.image));
|
|
281
|
+
const videos = normalizeMediaList(recipeNode.video);
|
|
282
|
+
const profile = recipeYield && time ? "base" : "lite";
|
|
283
|
+
const stacks = {};
|
|
284
|
+
const rawRecipe = {
|
|
159
285
|
"@type": "Recipe",
|
|
160
|
-
profile
|
|
161
|
-
|
|
286
|
+
profile,
|
|
287
|
+
stacks,
|
|
162
288
|
name: recipeNode.name.trim(),
|
|
163
289
|
description: recipeNode.description?.trim() || void 0,
|
|
164
|
-
|
|
290
|
+
images: images.length ? images : void 0,
|
|
291
|
+
videos: videos.length ? videos : void 0,
|
|
165
292
|
category,
|
|
166
293
|
tags: tags.length ? tags : void 0,
|
|
167
294
|
source,
|
|
@@ -171,12 +298,10 @@ function fromSchemaOrg(input) {
|
|
|
171
298
|
ingredients,
|
|
172
299
|
instructions,
|
|
173
300
|
...dateModified ? { dateModified } : {},
|
|
174
|
-
...nutrition ? { nutrition } : {}
|
|
175
|
-
...attribution ? { attribution } : {},
|
|
176
|
-
...taxonomy ? { taxonomy } : {},
|
|
177
|
-
...media ? { media } : {},
|
|
178
|
-
...times ? { times } : {}
|
|
301
|
+
...nutrition ? { nutrition } : {}
|
|
179
302
|
};
|
|
303
|
+
const { recipe } = normalizeRecipe(rawRecipe);
|
|
304
|
+
return withCanonicalSchema(recipe);
|
|
180
305
|
}
|
|
181
306
|
function extractRecipeNode(input) {
|
|
182
307
|
if (!input) return null;
|
|
@@ -220,7 +345,10 @@ function isValidName(name) {
|
|
|
220
345
|
function convertIngredients(value) {
|
|
221
346
|
if (!value) return [];
|
|
222
347
|
const normalized = Array.isArray(value) ? value : [value];
|
|
223
|
-
return normalized.map((item) => typeof item === "string" ? item.trim() : "").filter(Boolean)
|
|
348
|
+
return normalized.map((item) => typeof item === "string" ? item.trim() : "").filter(Boolean).map((name) => ({
|
|
349
|
+
name,
|
|
350
|
+
scaling: { mode: "linear" }
|
|
351
|
+
}));
|
|
224
352
|
}
|
|
225
353
|
function convertInstructions(value) {
|
|
226
354
|
if (!value) return [];
|
|
@@ -239,8 +367,8 @@ function convertInstructions(value) {
|
|
|
239
367
|
const subsectionItems = extractSectionItems(entry.itemListElement);
|
|
240
368
|
if (subsectionItems.length) {
|
|
241
369
|
result.push({
|
|
242
|
-
|
|
243
|
-
|
|
370
|
+
section: entry.name?.trim() || "Section",
|
|
371
|
+
steps: subsectionItems
|
|
244
372
|
});
|
|
245
373
|
}
|
|
246
374
|
continue;
|
|
@@ -296,7 +424,7 @@ function convertHowToStep(step) {
|
|
|
296
424
|
}
|
|
297
425
|
const instruction = { text };
|
|
298
426
|
if (id) instruction.id = id;
|
|
299
|
-
if (image) instruction.
|
|
427
|
+
if (image) instruction.images = Array.isArray(image) ? image : [image];
|
|
300
428
|
if (timing) instruction.timing = timing;
|
|
301
429
|
return instruction;
|
|
302
430
|
}
|
|
@@ -306,7 +434,13 @@ function extractInstructionTiming(step) {
|
|
|
306
434
|
return void 0;
|
|
307
435
|
}
|
|
308
436
|
const parsed = smartParseDuration(duration);
|
|
309
|
-
|
|
437
|
+
if (parsed === null || parsed === void 0) {
|
|
438
|
+
return void 0;
|
|
439
|
+
}
|
|
440
|
+
return {
|
|
441
|
+
activity: "active",
|
|
442
|
+
duration: { minutes: parsed }
|
|
443
|
+
};
|
|
310
444
|
}
|
|
311
445
|
function extractInstructionId(step) {
|
|
312
446
|
const raw = step["@id"] || step.id || step.url;
|
|
@@ -323,14 +457,22 @@ function isHowToSection(value) {
|
|
|
323
457
|
return Boolean(value) && typeof value === "object" && value["@type"] === "HowToSection" && Array.isArray(value.itemListElement);
|
|
324
458
|
}
|
|
325
459
|
function convertTime(recipe) {
|
|
460
|
+
const total = smartParseDuration(recipe.totalTime ?? "");
|
|
326
461
|
const prep = smartParseDuration(recipe.prepTime ?? "");
|
|
327
462
|
const cook = smartParseDuration(recipe.cookTime ?? "");
|
|
328
|
-
const
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
if (
|
|
333
|
-
|
|
463
|
+
const minutes = isPositiveDuration(total) ? total : [prep, cook].filter(isPositiveDuration).reduce((sum, value) => {
|
|
464
|
+
if (sum === null) return value;
|
|
465
|
+
return sum + value;
|
|
466
|
+
}, null);
|
|
467
|
+
if (!isPositiveDuration(minutes)) {
|
|
468
|
+
return void 0;
|
|
469
|
+
}
|
|
470
|
+
return {
|
|
471
|
+
total: { minutes }
|
|
472
|
+
};
|
|
473
|
+
}
|
|
474
|
+
function isPositiveDuration(value) {
|
|
475
|
+
return typeof value === "number" && Number.isFinite(value) && value > 0;
|
|
334
476
|
}
|
|
335
477
|
function collectTags(cuisine, keywords) {
|
|
336
478
|
const tags = /* @__PURE__ */ new Set();
|
|
@@ -388,23 +530,6 @@ function extractEntityName(value) {
|
|
|
388
530
|
}
|
|
389
531
|
return void 0;
|
|
390
532
|
}
|
|
391
|
-
function convertAttribution(recipe) {
|
|
392
|
-
const attribution = {};
|
|
393
|
-
const url = (recipe.url || recipe.mainEntityOfPage)?.trim();
|
|
394
|
-
const author = extractEntityName(recipe.author);
|
|
395
|
-
const datePublished = recipe.datePublished?.trim();
|
|
396
|
-
if (url) attribution.url = url;
|
|
397
|
-
if (author) attribution.author = author;
|
|
398
|
-
if (datePublished) attribution.datePublished = datePublished;
|
|
399
|
-
return Object.keys(attribution).length ? attribution : void 0;
|
|
400
|
-
}
|
|
401
|
-
function convertTaxonomy(keywords, category, cuisine) {
|
|
402
|
-
const taxonomy = {};
|
|
403
|
-
if (keywords.length) taxonomy.keywords = keywords;
|
|
404
|
-
if (category) taxonomy.category = category;
|
|
405
|
-
if (cuisine) taxonomy.cuisine = cuisine;
|
|
406
|
-
return Object.keys(taxonomy).length ? taxonomy : void 0;
|
|
407
|
-
}
|
|
408
533
|
function normalizeMediaList(value) {
|
|
409
534
|
if (!value) return [];
|
|
410
535
|
if (typeof value === "string") return [value.trim()].filter(Boolean);
|
|
@@ -415,28 +540,18 @@ function normalizeMediaList(value) {
|
|
|
415
540
|
return url ? [url] : [];
|
|
416
541
|
}
|
|
417
542
|
function extractMediaUrl(value) {
|
|
418
|
-
if (value && typeof value === "object"
|
|
419
|
-
const
|
|
420
|
-
|
|
543
|
+
if (value && typeof value === "object") {
|
|
544
|
+
const urlValue = typeof value.url === "string" ? value.url : typeof value.contentUrl === "string" ? value.contentUrl : void 0;
|
|
545
|
+
if (typeof urlValue === "string") {
|
|
546
|
+
const trimmed = urlValue.trim();
|
|
547
|
+
return trimmed || void 0;
|
|
548
|
+
}
|
|
421
549
|
}
|
|
422
550
|
return void 0;
|
|
423
551
|
}
|
|
424
|
-
function
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
const videos = normalizeMediaList(video);
|
|
428
|
-
const media = {};
|
|
429
|
-
if (images.length) media.images = images;
|
|
430
|
-
if (videos.length) media.videos = videos;
|
|
431
|
-
return Object.keys(media).length ? media : void 0;
|
|
432
|
-
}
|
|
433
|
-
function convertTimes(time) {
|
|
434
|
-
if (!time) return void 0;
|
|
435
|
-
const times = {};
|
|
436
|
-
if (typeof time.prep === "number") times.prepMinutes = time.prep;
|
|
437
|
-
if (typeof time.active === "number") times.cookMinutes = time.active;
|
|
438
|
-
if (typeof time.total === "number") times.totalMinutes = time.total;
|
|
439
|
-
return Object.keys(times).length ? times : void 0;
|
|
552
|
+
function toArray(value) {
|
|
553
|
+
if (!value) return [];
|
|
554
|
+
return Array.isArray(value) ? value : [value];
|
|
440
555
|
}
|
|
441
556
|
function convertNutrition(nutrition) {
|
|
442
557
|
if (!nutrition || typeof nutrition !== "object") {
|
|
@@ -537,13 +652,16 @@ async function fetchPage(url, options = {}) {
|
|
|
537
652
|
const response = await resolvedFetch(url, requestInit);
|
|
538
653
|
clearTimeout(timeoutId);
|
|
539
654
|
if (response && typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
globalFetch
|
|
544
|
-
|
|
655
|
+
const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
|
|
656
|
+
if (ingestUrl) {
|
|
657
|
+
try {
|
|
658
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
659
|
+
if (globalFetch) {
|
|
660
|
+
globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/fetch.ts:63", message: "fetch response", data: { url, status: response.status, statusText: response.statusText, ok: response.ok, isNYTimes: url.includes("nytimes.com") }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B" }) }).catch(() => {
|
|
661
|
+
});
|
|
662
|
+
}
|
|
663
|
+
} catch {
|
|
545
664
|
}
|
|
546
|
-
} catch {
|
|
547
665
|
}
|
|
548
666
|
}
|
|
549
667
|
if (!response.ok) {
|
|
@@ -555,13 +673,16 @@ async function fetchPage(url, options = {}) {
|
|
|
555
673
|
}
|
|
556
674
|
const html = await response.text();
|
|
557
675
|
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
globalFetch
|
|
562
|
-
|
|
676
|
+
const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
|
|
677
|
+
if (ingestUrl) {
|
|
678
|
+
try {
|
|
679
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
680
|
+
if (globalFetch) {
|
|
681
|
+
globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/fetch.ts:75", message: "HTML received", data: { htmlLength: html.length, hasLoginPage: html.toLowerCase().includes("login") || html.toLowerCase().includes("sign in"), hasRecipeData: html.includes("application/ld+json") || html.includes("schema.org/Recipe") }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B,D" }) }).catch(() => {
|
|
682
|
+
});
|
|
683
|
+
}
|
|
684
|
+
} catch {
|
|
563
685
|
}
|
|
564
|
-
} catch {
|
|
565
686
|
}
|
|
566
687
|
}
|
|
567
688
|
return html;
|
|
@@ -591,8 +712,6 @@ function isRecipeNode(value) {
|
|
|
591
712
|
return false;
|
|
592
713
|
}
|
|
593
714
|
const type = value["@type"];
|
|
594
|
-
fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/utils.ts:14", message: "isRecipeNode check", data: { type, typeLower: typeof type === "string" ? type.toLowerCase() : Array.isArray(type) ? type.map((t) => typeof t === "string" ? t.toLowerCase() : t) : void 0, isMatch: typeof type === "string" ? RECIPE_TYPES.has(type.toLowerCase()) : Array.isArray(type) ? type.some((e) => typeof e === "string" && RECIPE_TYPES.has(e.toLowerCase())) : false }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A" }) }).catch(() => {
|
|
595
|
-
});
|
|
596
715
|
if (typeof type === "string") {
|
|
597
716
|
return RECIPE_TYPES.has(type.toLowerCase());
|
|
598
717
|
}
|
|
@@ -620,20 +739,14 @@ function normalizeText(value) {
|
|
|
620
739
|
function extractJsonLd(html) {
|
|
621
740
|
const $ = cheerio.load(html);
|
|
622
741
|
const scripts = $('script[type="application/ld+json"]');
|
|
623
|
-
fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/jsonld.ts:8", message: "JSON-LD scripts found", data: { scriptCount: scripts.length }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "C,D" }) }).catch(() => {
|
|
624
|
-
});
|
|
625
742
|
const candidates = [];
|
|
626
743
|
scripts.each((_, element) => {
|
|
627
744
|
const content = $(element).html();
|
|
628
745
|
if (!content) return;
|
|
629
746
|
const parsed = safeJsonParse(content);
|
|
630
747
|
if (!parsed) return;
|
|
631
|
-
fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/jsonld.ts:18", message: "JSON-LD parsed", data: { hasGraph: !!(parsed && typeof parsed === "object" && "@graph" in parsed), type: parsed && typeof parsed === "object" && "@type" in parsed ? parsed["@type"] : void 0 }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C" }) }).catch(() => {
|
|
632
|
-
});
|
|
633
748
|
collectCandidates(parsed, candidates);
|
|
634
749
|
});
|
|
635
|
-
fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/jsonld.ts:22", message: "JSON-LD candidates", data: { candidateCount: candidates.length, candidateTypes: candidates.map((c) => c["@type"]) }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C" }) }).catch(() => {
|
|
636
|
-
});
|
|
637
750
|
return candidates[0] ?? null;
|
|
638
751
|
}
|
|
639
752
|
function collectCandidates(payload, bucket) {
|
|
@@ -815,13 +928,16 @@ function extractRecipe(html) {
|
|
|
815
928
|
}
|
|
816
929
|
const jsonLdRecipe = extractJsonLd(html);
|
|
817
930
|
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
globalFetch
|
|
822
|
-
|
|
931
|
+
const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
|
|
932
|
+
if (ingestUrl) {
|
|
933
|
+
try {
|
|
934
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
935
|
+
if (globalFetch) {
|
|
936
|
+
globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:6", message: "JSON-LD extraction result", data: { hasJsonLd: !!jsonLdRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "C,D" }) }).catch(() => {
|
|
937
|
+
});
|
|
938
|
+
}
|
|
939
|
+
} catch {
|
|
823
940
|
}
|
|
824
|
-
} catch {
|
|
825
941
|
}
|
|
826
942
|
}
|
|
827
943
|
if (jsonLdRecipe) {
|
|
@@ -829,13 +945,16 @@ function extractRecipe(html) {
|
|
|
829
945
|
}
|
|
830
946
|
const microdataRecipe = extractMicrodata(html);
|
|
831
947
|
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
globalFetch
|
|
836
|
-
|
|
948
|
+
const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
|
|
949
|
+
if (ingestUrl) {
|
|
950
|
+
try {
|
|
951
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
952
|
+
if (globalFetch) {
|
|
953
|
+
globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:12", message: "Microdata extraction result", data: { hasMicrodata: !!microdataRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "D" }) }).catch(() => {
|
|
954
|
+
});
|
|
955
|
+
}
|
|
956
|
+
} catch {
|
|
837
957
|
}
|
|
838
|
-
} catch {
|
|
839
958
|
}
|
|
840
959
|
}
|
|
841
960
|
if (microdataRecipe) {
|
|
@@ -847,35 +966,44 @@ function extractRecipe(html) {
|
|
|
847
966
|
// src/scraper/index.ts
|
|
848
967
|
async function scrapeRecipe(url, options = {}) {
|
|
849
968
|
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
globalFetch
|
|
854
|
-
|
|
969
|
+
const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
|
|
970
|
+
if (ingestUrl) {
|
|
971
|
+
try {
|
|
972
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
973
|
+
if (globalFetch) {
|
|
974
|
+
globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:7", message: "scrapeRecipe entry", data: { url, hasOptions: !!options }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,B,C,D,E" }) }).catch(() => {
|
|
975
|
+
});
|
|
976
|
+
}
|
|
977
|
+
} catch {
|
|
855
978
|
}
|
|
856
|
-
} catch {
|
|
857
979
|
}
|
|
858
980
|
}
|
|
859
981
|
const html = await fetchPage(url, options);
|
|
860
982
|
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
globalFetch
|
|
865
|
-
|
|
983
|
+
const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
|
|
984
|
+
if (ingestUrl) {
|
|
985
|
+
try {
|
|
986
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
987
|
+
if (globalFetch) {
|
|
988
|
+
globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:9", message: "HTML fetched", data: { htmlLength: html?.length, htmlPreview: html?.substring(0, 200) }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B" }) }).catch(() => {
|
|
989
|
+
});
|
|
990
|
+
}
|
|
991
|
+
} catch {
|
|
866
992
|
}
|
|
867
|
-
} catch {
|
|
868
993
|
}
|
|
869
994
|
}
|
|
870
995
|
const { recipe } = extractRecipe(html);
|
|
871
996
|
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
globalFetch
|
|
876
|
-
|
|
997
|
+
const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
|
|
998
|
+
if (ingestUrl) {
|
|
999
|
+
try {
|
|
1000
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
1001
|
+
if (globalFetch) {
|
|
1002
|
+
globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:11", message: "extractRecipe result", data: { hasRecipe: !!recipe, recipeType: recipe?.["@type"], recipeName: recipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C,D" }) }).catch(() => {
|
|
1003
|
+
});
|
|
1004
|
+
}
|
|
1005
|
+
} catch {
|
|
877
1006
|
}
|
|
878
|
-
} catch {
|
|
879
1007
|
}
|
|
880
1008
|
}
|
|
881
1009
|
if (!recipe) {
|
|
@@ -883,13 +1011,16 @@ async function scrapeRecipe(url, options = {}) {
|
|
|
883
1011
|
}
|
|
884
1012
|
const soustackRecipe = fromSchemaOrg(recipe);
|
|
885
1013
|
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
globalFetch
|
|
890
|
-
|
|
1014
|
+
const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
|
|
1015
|
+
if (ingestUrl) {
|
|
1016
|
+
try {
|
|
1017
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
1018
|
+
if (globalFetch) {
|
|
1019
|
+
globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:17", message: "fromSchemaOrg result", data: { hasSoustackRecipe: !!soustackRecipe, soustackRecipeName: soustackRecipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A" }) }).catch(() => {
|
|
1020
|
+
});
|
|
1021
|
+
}
|
|
1022
|
+
} catch {
|
|
891
1023
|
}
|
|
892
|
-
} catch {
|
|
893
1024
|
}
|
|
894
1025
|
}
|
|
895
1026
|
if (!soustackRecipe) {
|
|
@@ -917,5 +1048,5 @@ exports.extractRecipeFromHTML = extractRecipeFromHTML;
|
|
|
917
1048
|
exports.extractSchemaOrgRecipeFromHTML = extractSchemaOrgRecipeFromHTML;
|
|
918
1049
|
exports.fetchPage = fetchPage;
|
|
919
1050
|
exports.scrapeRecipe = scrapeRecipe;
|
|
920
|
-
//# sourceMappingURL=
|
|
921
|
-
//# sourceMappingURL=
|
|
1051
|
+
//# sourceMappingURL=index.js.map
|
|
1052
|
+
//# sourceMappingURL=index.js.map
|