soustack 0.1.2 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +86 -10
- package/dist/cli/index.js +159 -53
- package/dist/cli/index.js.map +1 -1
- package/dist/index.d.mts +75 -11
- package/dist/index.d.ts +75 -11
- package/dist/index.js +165 -53
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +164 -54
- package/dist/index.mjs.map +1 -1
- package/package.json +2 -2
- package/src/schema.json +22 -4
package/dist/index.mjs
CHANGED
|
@@ -133,8 +133,8 @@ function flattenInstructions(items) {
|
|
|
133
133
|
// src/schema.json
|
|
134
134
|
var schema_default = {
|
|
135
135
|
$schema: "http://json-schema.org/draft-07/schema#",
|
|
136
|
-
$id: "http://soustack.org/schema/v0.
|
|
137
|
-
title: "Soustack Recipe Schema v0.
|
|
136
|
+
$id: "http://soustack.org/schema/v0.2",
|
|
137
|
+
title: "Soustack Recipe Schema v0.2",
|
|
138
138
|
description: "A portable, scalable, interoperable recipe format.",
|
|
139
139
|
type: "object",
|
|
140
140
|
required: ["name", "ingredients", "instructions"],
|
|
@@ -164,8 +164,21 @@ var schema_default = {
|
|
|
164
164
|
items: { type: "string" }
|
|
165
165
|
},
|
|
166
166
|
image: {
|
|
167
|
-
|
|
168
|
-
|
|
167
|
+
description: "Recipe-level hero image(s)",
|
|
168
|
+
anyOf: [
|
|
169
|
+
{
|
|
170
|
+
type: "string",
|
|
171
|
+
format: "uri"
|
|
172
|
+
},
|
|
173
|
+
{
|
|
174
|
+
type: "array",
|
|
175
|
+
minItems: 1,
|
|
176
|
+
items: {
|
|
177
|
+
type: "string",
|
|
178
|
+
format: "uri"
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
]
|
|
169
182
|
},
|
|
170
183
|
dateAdded: {
|
|
171
184
|
type: "string",
|
|
@@ -330,6 +343,11 @@ var schema_default = {
|
|
|
330
343
|
properties: {
|
|
331
344
|
id: { type: "string" },
|
|
332
345
|
text: { type: "string" },
|
|
346
|
+
image: {
|
|
347
|
+
type: "string",
|
|
348
|
+
format: "uri",
|
|
349
|
+
description: "Optional image that illustrates this instruction"
|
|
350
|
+
},
|
|
333
351
|
destination: { type: "string" },
|
|
334
352
|
dependsOn: {
|
|
335
353
|
type: "array",
|
|
@@ -1227,6 +1245,40 @@ function smartParseDuration(input) {
|
|
|
1227
1245
|
return parseHumanDuration(input);
|
|
1228
1246
|
}
|
|
1229
1247
|
|
|
1248
|
+
// src/utils/image.ts
|
|
1249
|
+
function normalizeImage(image) {
|
|
1250
|
+
if (!image) {
|
|
1251
|
+
return void 0;
|
|
1252
|
+
}
|
|
1253
|
+
if (typeof image === "string") {
|
|
1254
|
+
const trimmed = image.trim();
|
|
1255
|
+
return trimmed || void 0;
|
|
1256
|
+
}
|
|
1257
|
+
if (Array.isArray(image)) {
|
|
1258
|
+
const urls = image.map((entry) => typeof entry === "string" ? entry.trim() : extractUrl(entry)).filter((url) => typeof url === "string" && Boolean(url));
|
|
1259
|
+
if (urls.length === 0) {
|
|
1260
|
+
return void 0;
|
|
1261
|
+
}
|
|
1262
|
+
if (urls.length === 1) {
|
|
1263
|
+
return urls[0];
|
|
1264
|
+
}
|
|
1265
|
+
return urls;
|
|
1266
|
+
}
|
|
1267
|
+
return extractUrl(image);
|
|
1268
|
+
}
|
|
1269
|
+
function extractUrl(value) {
|
|
1270
|
+
if (!value || typeof value !== "object") {
|
|
1271
|
+
return void 0;
|
|
1272
|
+
}
|
|
1273
|
+
const record = value;
|
|
1274
|
+
const candidate = typeof record.url === "string" ? record.url : typeof record.contentUrl === "string" ? record.contentUrl : void 0;
|
|
1275
|
+
if (!candidate) {
|
|
1276
|
+
return void 0;
|
|
1277
|
+
}
|
|
1278
|
+
const trimmed = candidate.trim();
|
|
1279
|
+
return trimmed || void 0;
|
|
1280
|
+
}
|
|
1281
|
+
|
|
1230
1282
|
// src/fromSchemaOrg.ts
|
|
1231
1283
|
function fromSchemaOrg(input) {
|
|
1232
1284
|
const recipeNode = extractRecipeNode(input);
|
|
@@ -1239,13 +1291,12 @@ function fromSchemaOrg(input) {
|
|
|
1239
1291
|
const recipeYield = parseYield(recipeNode.recipeYield);
|
|
1240
1292
|
const tags = collectTags(recipeNode.recipeCuisine, recipeNode.keywords);
|
|
1241
1293
|
const category = extractFirst(recipeNode.recipeCategory);
|
|
1242
|
-
const image = convertImage(recipeNode.image);
|
|
1243
1294
|
const source = convertSource(recipeNode);
|
|
1244
1295
|
const nutrition = recipeNode.nutrition && typeof recipeNode.nutrition === "object" ? recipeNode.nutrition : void 0;
|
|
1245
1296
|
return {
|
|
1246
1297
|
name: recipeNode.name.trim(),
|
|
1247
1298
|
description: recipeNode.description?.trim() || void 0,
|
|
1248
|
-
image,
|
|
1299
|
+
image: normalizeImage(recipeNode.image),
|
|
1249
1300
|
category,
|
|
1250
1301
|
tags: tags.length ? tags : void 0,
|
|
1251
1302
|
source,
|
|
@@ -1328,9 +1379,9 @@ function convertInstructions(value) {
|
|
|
1328
1379
|
continue;
|
|
1329
1380
|
}
|
|
1330
1381
|
if (isHowToStep(entry)) {
|
|
1331
|
-
const
|
|
1332
|
-
if (
|
|
1333
|
-
result.push(
|
|
1382
|
+
const parsed = convertHowToStep(entry);
|
|
1383
|
+
if (parsed) {
|
|
1384
|
+
result.push(parsed);
|
|
1334
1385
|
}
|
|
1335
1386
|
}
|
|
1336
1387
|
}
|
|
@@ -1348,9 +1399,9 @@ function extractSectionItems(items = []) {
|
|
|
1348
1399
|
continue;
|
|
1349
1400
|
}
|
|
1350
1401
|
if (isHowToStep(item)) {
|
|
1351
|
-
const
|
|
1352
|
-
if (
|
|
1353
|
-
result.push(
|
|
1402
|
+
const parsed = convertHowToStep(item);
|
|
1403
|
+
if (parsed) {
|
|
1404
|
+
result.push(parsed);
|
|
1354
1405
|
}
|
|
1355
1406
|
continue;
|
|
1356
1407
|
}
|
|
@@ -1364,6 +1415,17 @@ function extractInstructionText(value) {
|
|
|
1364
1415
|
const text = typeof value.text === "string" ? value.text : value.name;
|
|
1365
1416
|
return typeof text === "string" ? text.trim() || void 0 : void 0;
|
|
1366
1417
|
}
|
|
1418
|
+
function convertHowToStep(step) {
|
|
1419
|
+
const text = extractInstructionText(step);
|
|
1420
|
+
if (!text) {
|
|
1421
|
+
return void 0;
|
|
1422
|
+
}
|
|
1423
|
+
const normalizedImage = normalizeImage(step.image);
|
|
1424
|
+
if (typeof normalizedImage === "string") {
|
|
1425
|
+
return { text, image: normalizedImage };
|
|
1426
|
+
}
|
|
1427
|
+
return text;
|
|
1428
|
+
}
|
|
1367
1429
|
function isHowToStep(value) {
|
|
1368
1430
|
return Boolean(value) && typeof value === "object" && value["@type"] === "HowToStep";
|
|
1369
1431
|
}
|
|
@@ -1405,26 +1467,6 @@ function extractFirst(value) {
|
|
|
1405
1467
|
const arr = flattenStrings(value);
|
|
1406
1468
|
return arr.length ? arr[0] : void 0;
|
|
1407
1469
|
}
|
|
1408
|
-
function convertImage(value) {
|
|
1409
|
-
if (!value) return void 0;
|
|
1410
|
-
if (typeof value === "string") {
|
|
1411
|
-
return value;
|
|
1412
|
-
}
|
|
1413
|
-
if (Array.isArray(value)) {
|
|
1414
|
-
for (const item of value) {
|
|
1415
|
-
const url = typeof item === "string" ? item : extractImageUrl(item);
|
|
1416
|
-
if (url) return url;
|
|
1417
|
-
}
|
|
1418
|
-
return void 0;
|
|
1419
|
-
}
|
|
1420
|
-
return extractImageUrl(value);
|
|
1421
|
-
}
|
|
1422
|
-
function extractImageUrl(value) {
|
|
1423
|
-
if (!value || typeof value !== "object") return void 0;
|
|
1424
|
-
const record = value;
|
|
1425
|
-
const candidate = typeof record.url === "string" ? record.url : typeof record.contentUrl === "string" ? record.contentUrl : void 0;
|
|
1426
|
-
return candidate?.trim() || void 0;
|
|
1427
|
-
}
|
|
1428
1470
|
function convertSource(recipe) {
|
|
1429
1471
|
const author = extractEntityName(recipe.author);
|
|
1430
1472
|
const publisher = extractEntityName(recipe.publisher);
|
|
@@ -1520,7 +1562,7 @@ function convertInstruction(entry) {
|
|
|
1520
1562
|
return createHowToStep(entry);
|
|
1521
1563
|
}
|
|
1522
1564
|
if ("subsection" in entry) {
|
|
1523
|
-
const steps = entry.items.map((item) =>
|
|
1565
|
+
const steps = entry.items.map((item) => createHowToStep(item)).filter((step) => Boolean(step));
|
|
1524
1566
|
if (!steps.length) {
|
|
1525
1567
|
return null;
|
|
1526
1568
|
}
|
|
@@ -1531,18 +1573,34 @@ function convertInstruction(entry) {
|
|
|
1531
1573
|
};
|
|
1532
1574
|
}
|
|
1533
1575
|
if ("text" in entry) {
|
|
1534
|
-
return createHowToStep(entry
|
|
1576
|
+
return createHowToStep(entry);
|
|
1535
1577
|
}
|
|
1536
1578
|
return createHowToStep(String(entry));
|
|
1537
1579
|
}
|
|
1538
|
-
function createHowToStep(
|
|
1539
|
-
if (!
|
|
1540
|
-
|
|
1541
|
-
|
|
1542
|
-
|
|
1580
|
+
function createHowToStep(entry) {
|
|
1581
|
+
if (!entry) return null;
|
|
1582
|
+
if (typeof entry === "string") {
|
|
1583
|
+
const trimmed2 = entry.trim();
|
|
1584
|
+
if (!trimmed2) {
|
|
1585
|
+
return null;
|
|
1586
|
+
}
|
|
1587
|
+
return {
|
|
1588
|
+
"@type": "HowToStep",
|
|
1589
|
+
text: trimmed2
|
|
1590
|
+
};
|
|
1591
|
+
}
|
|
1592
|
+
const trimmed = entry.text?.trim();
|
|
1593
|
+
if (!trimmed) {
|
|
1594
|
+
return null;
|
|
1595
|
+
}
|
|
1596
|
+
const step = {
|
|
1543
1597
|
"@type": "HowToStep",
|
|
1544
1598
|
text: trimmed
|
|
1545
1599
|
};
|
|
1600
|
+
if (entry.image) {
|
|
1601
|
+
step.image = entry.image;
|
|
1602
|
+
}
|
|
1603
|
+
return step;
|
|
1546
1604
|
}
|
|
1547
1605
|
function convertTime2(time) {
|
|
1548
1606
|
if (!time) {
|
|
@@ -1705,7 +1763,7 @@ async function fetchPage(url, options = {}) {
|
|
|
1705
1763
|
};
|
|
1706
1764
|
const response = await resolvedFetch(url, requestInit);
|
|
1707
1765
|
clearTimeout(timeoutId);
|
|
1708
|
-
if (response &&
|
|
1766
|
+
if (response && typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
1709
1767
|
try {
|
|
1710
1768
|
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
1711
1769
|
if (globalFetch) {
|
|
@@ -1723,7 +1781,7 @@ async function fetchPage(url, options = {}) {
|
|
|
1723
1781
|
throw error;
|
|
1724
1782
|
}
|
|
1725
1783
|
const html = await response.text();
|
|
1726
|
-
if (typeof process
|
|
1784
|
+
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
1727
1785
|
try {
|
|
1728
1786
|
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
1729
1787
|
if (globalFetch) {
|
|
@@ -1983,14 +2041,30 @@ function extractRecipe(html) {
|
|
|
1983
2041
|
return extractRecipeBrowser(html);
|
|
1984
2042
|
}
|
|
1985
2043
|
const jsonLdRecipe = extractJsonLd(html);
|
|
1986
|
-
|
|
1987
|
-
|
|
2044
|
+
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
2045
|
+
try {
|
|
2046
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
2047
|
+
if (globalFetch) {
|
|
2048
|
+
globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:6", message: "JSON-LD extraction result", data: { hasJsonLd: !!jsonLdRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "C,D" }) }).catch(() => {
|
|
2049
|
+
});
|
|
2050
|
+
}
|
|
2051
|
+
} catch {
|
|
2052
|
+
}
|
|
2053
|
+
}
|
|
1988
2054
|
if (jsonLdRecipe) {
|
|
1989
2055
|
return { recipe: jsonLdRecipe, source: "jsonld" };
|
|
1990
2056
|
}
|
|
1991
2057
|
const microdataRecipe = extractMicrodata(html);
|
|
1992
|
-
|
|
1993
|
-
|
|
2058
|
+
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
2059
|
+
try {
|
|
2060
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
2061
|
+
if (globalFetch) {
|
|
2062
|
+
globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:12", message: "Microdata extraction result", data: { hasMicrodata: !!microdataRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "D" }) }).catch(() => {
|
|
2063
|
+
});
|
|
2064
|
+
}
|
|
2065
|
+
} catch {
|
|
2066
|
+
}
|
|
2067
|
+
}
|
|
1994
2068
|
if (microdataRecipe) {
|
|
1995
2069
|
return { recipe: microdataRecipe, source: "microdata" };
|
|
1996
2070
|
}
|
|
@@ -1999,20 +2073,52 @@ function extractRecipe(html) {
|
|
|
1999
2073
|
|
|
2000
2074
|
// src/scraper/index.ts
|
|
2001
2075
|
async function scrapeRecipe(url, options = {}) {
|
|
2002
|
-
|
|
2003
|
-
|
|
2076
|
+
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
2077
|
+
try {
|
|
2078
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
2079
|
+
if (globalFetch) {
|
|
2080
|
+
globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:7", message: "scrapeRecipe entry", data: { url, hasOptions: !!options }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,B,C,D,E" }) }).catch(() => {
|
|
2081
|
+
});
|
|
2082
|
+
}
|
|
2083
|
+
} catch {
|
|
2084
|
+
}
|
|
2085
|
+
}
|
|
2004
2086
|
const html = await fetchPage(url, options);
|
|
2005
|
-
|
|
2006
|
-
|
|
2087
|
+
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
2088
|
+
try {
|
|
2089
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
2090
|
+
if (globalFetch) {
|
|
2091
|
+
globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:9", message: "HTML fetched", data: { htmlLength: html?.length, htmlPreview: html?.substring(0, 200) }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B" }) }).catch(() => {
|
|
2092
|
+
});
|
|
2093
|
+
}
|
|
2094
|
+
} catch {
|
|
2095
|
+
}
|
|
2096
|
+
}
|
|
2007
2097
|
const { recipe } = extractRecipe(html);
|
|
2008
|
-
|
|
2009
|
-
|
|
2098
|
+
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
2099
|
+
try {
|
|
2100
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
2101
|
+
if (globalFetch) {
|
|
2102
|
+
globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:11", message: "extractRecipe result", data: { hasRecipe: !!recipe, recipeType: recipe?.["@type"], recipeName: recipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C,D" }) }).catch(() => {
|
|
2103
|
+
});
|
|
2104
|
+
}
|
|
2105
|
+
} catch {
|
|
2106
|
+
}
|
|
2107
|
+
}
|
|
2010
2108
|
if (!recipe) {
|
|
2011
2109
|
throw new Error("No Schema.org recipe data found in page");
|
|
2012
2110
|
}
|
|
2013
2111
|
const soustackRecipe = fromSchemaOrg(recipe);
|
|
2014
|
-
|
|
2015
|
-
|
|
2112
|
+
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
2113
|
+
try {
|
|
2114
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
2115
|
+
if (globalFetch) {
|
|
2116
|
+
globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:17", message: "fromSchemaOrg result", data: { hasSoustackRecipe: !!soustackRecipe, soustackRecipeName: soustackRecipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A" }) }).catch(() => {
|
|
2117
|
+
});
|
|
2118
|
+
}
|
|
2119
|
+
} catch {
|
|
2120
|
+
}
|
|
2121
|
+
}
|
|
2016
2122
|
if (!soustackRecipe) {
|
|
2017
2123
|
throw new Error("Schema.org data did not include a valid recipe");
|
|
2018
2124
|
}
|
|
@@ -2029,6 +2135,10 @@ function extractRecipeFromHTML(html) {
|
|
|
2029
2135
|
}
|
|
2030
2136
|
return soustackRecipe;
|
|
2031
2137
|
}
|
|
2138
|
+
function extractSchemaOrgRecipeFromHTML(html) {
|
|
2139
|
+
const { recipe } = extractRecipe(html);
|
|
2140
|
+
return recipe;
|
|
2141
|
+
}
|
|
2032
2142
|
|
|
2033
2143
|
// src/parsers/yield.ts
|
|
2034
2144
|
var RANGE_PATTERN = /^(\d+)(?:\s*(?:[-–—]|to)\s*)(\d+)\s+(.+)$/i;
|
|
@@ -2272,6 +2382,6 @@ function wordToNumber(word) {
|
|
|
2272
2382
|
return null;
|
|
2273
2383
|
}
|
|
2274
2384
|
|
|
2275
|
-
export { extractRecipeFromHTML, formatDuration, formatYield2 as formatYield, fromSchemaOrg, normalizeIngredientInput, normalizeYield, parseDuration, parseHumanDuration, parseIngredient, parseIngredientLine, parseIngredients, parseYield2 as parseYield, scaleRecipe, scrapeRecipe, smartParseDuration, toSchemaOrg, validateRecipe };
|
|
2385
|
+
export { extractRecipeFromHTML, extractSchemaOrgRecipeFromHTML, formatDuration, formatYield2 as formatYield, fromSchemaOrg, normalizeImage, normalizeIngredientInput, normalizeYield, parseDuration, parseHumanDuration, parseIngredient, parseIngredientLine, parseIngredients, parseYield2 as parseYield, scaleRecipe, scrapeRecipe, smartParseDuration, toSchemaOrg, validateRecipe };
|
|
2276
2386
|
//# sourceMappingURL=index.mjs.map
|
|
2277
2387
|
//# sourceMappingURL=index.mjs.map
|