soustack 0.1.2 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +86 -10
- package/dist/cli/index.js +159 -53
- package/dist/cli/index.js.map +1 -1
- package/dist/index.d.mts +75 -11
- package/dist/index.d.ts +75 -11
- package/dist/index.js +165 -53
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +164 -54
- package/dist/index.mjs.map +1 -1
- package/package.json +2 -2
- package/src/schema.json +22 -4
package/README.md
CHANGED
|
@@ -35,10 +35,10 @@ Soustack is **computational**—it understands _how_ a recipe behaves.
|
|
|
35
35
|
npm install soustack
|
|
36
36
|
```
|
|
37
37
|
|
|
38
|
-
## What
|
|
38
|
+
## What's Included
|
|
39
39
|
|
|
40
40
|
- **Validation**: `validateRecipe()` validates Soustack JSON against the bundled schema.
|
|
41
|
-
- **Scaling & Computation**: `scaleRecipe()` produces a flat, UI-ready
|
|
41
|
+
- **Scaling & Computation**: `scaleRecipe()` produces a flat, UI-ready "computed recipe" (scaled ingredients + aggregated timing).
|
|
42
42
|
- **Parsers**:
|
|
43
43
|
- Ingredient parsing (`parseIngredient`, `parseIngredientLine`)
|
|
44
44
|
- Duration parsing (`smartParseDuration`)
|
|
@@ -46,9 +46,15 @@ npm install soustack
|
|
|
46
46
|
- **Schema.org Conversion**:
|
|
47
47
|
- `fromSchemaOrg()` (Schema.org JSON-LD → Soustack)
|
|
48
48
|
- `toSchemaOrg()` (Soustack → Schema.org JSON-LD)
|
|
49
|
-
-
|
|
49
|
+
- `normalizeImage()` utility for converting Schema.org image formats to Soustack format
|
|
50
|
+
- **Image Support**:
|
|
51
|
+
- Recipe-level images: single URL or array of URLs
|
|
52
|
+
- Instruction-level images: optional image URL per step
|
|
53
|
+
- Automatic normalization from Schema.org ImageObject formats
|
|
54
|
+
- **Web Scraping**:
|
|
50
55
|
- `scrapeRecipe()` fetches a recipe page and extracts Schema.org recipe data (Node.js only)
|
|
51
|
-
- `extractRecipeFromHTML()` extracts recipe data from HTML string (browser & Node.js compatible)
|
|
56
|
+
- `extractRecipeFromHTML()` extracts recipe data from HTML string, returns Soustack format (browser & Node.js compatible)
|
|
57
|
+
- `extractSchemaOrgRecipeFromHTML()` extracts raw Schema.org recipe data from HTML string (browser & Node.js compatible)
|
|
52
58
|
- Supports JSON-LD (`<script type="application/ld+json">`) and Microdata (`itemscope/itemtype`)
|
|
53
59
|
|
|
54
60
|
## Programmatic Usage
|
|
@@ -57,10 +63,12 @@ npm install soustack
|
|
|
57
63
|
import {
|
|
58
64
|
scrapeRecipe,
|
|
59
65
|
extractRecipeFromHTML,
|
|
66
|
+
extractSchemaOrgRecipeFromHTML,
|
|
60
67
|
fromSchemaOrg,
|
|
61
68
|
toSchemaOrg,
|
|
62
69
|
validateRecipe,
|
|
63
70
|
scaleRecipe,
|
|
71
|
+
normalizeImage,
|
|
64
72
|
} from 'soustack';
|
|
65
73
|
|
|
66
74
|
// Validate a Soustack recipe JSON object
|
|
@@ -73,25 +81,67 @@ const computed = scaleRecipe(recipe, 2);
|
|
|
73
81
|
const scraped = await scrapeRecipe('https://example.com/recipe');
|
|
74
82
|
|
|
75
83
|
// Extract recipe from HTML string (browser & Node.js compatible)
|
|
84
|
+
// Option 1: Get Soustack format directly
|
|
76
85
|
const html = await fetch('https://example.com/recipe').then((r) => r.text());
|
|
77
86
|
const recipe = extractRecipeFromHTML(html);
|
|
78
87
|
|
|
88
|
+
// Option 2: Get Schema.org format first (for inspection/modification)
|
|
89
|
+
const schemaOrgRecipe = extractSchemaOrgRecipeFromHTML(html);
|
|
90
|
+
if (schemaOrgRecipe) {
|
|
91
|
+
const soustackRecipe = fromSchemaOrg(schemaOrgRecipe);
|
|
92
|
+
}
|
|
93
|
+
|
|
79
94
|
// Convert Schema.org → Soustack
|
|
80
95
|
const soustack = fromSchemaOrg(schemaOrgJsonLd);
|
|
81
96
|
|
|
82
97
|
// Convert Soustack → Schema.org
|
|
83
98
|
const jsonLd = toSchemaOrg(recipe);
|
|
99
|
+
|
|
100
|
+
// Normalize Schema.org image formats (strings, arrays, ImageObjects)
|
|
101
|
+
const normalized = normalizeImage(schemaOrgRecipe.image);
|
|
102
|
+
// Returns: string | string[] | undefined
|
|
84
103
|
```
|
|
85
104
|
|
|
86
105
|
## 🔁 Schema.org Conversion
|
|
87
106
|
|
|
88
|
-
Use the
|
|
107
|
+
Use the helpers to move between Schema.org JSON-LD and Soustack's structured recipe format. The conversion automatically handles image normalization, supporting multiple image formats from Schema.org.
|
|
89
108
|
|
|
90
109
|
```ts
|
|
91
|
-
import { fromSchemaOrg, toSchemaOrg } from 'soustack';
|
|
110
|
+
import { fromSchemaOrg, toSchemaOrg, normalizeImage } from 'soustack';
|
|
92
111
|
|
|
112
|
+
// Convert Schema.org → Soustack (automatically normalizes images)
|
|
93
113
|
const soustackRecipe = fromSchemaOrg(schemaOrgJsonLd);
|
|
114
|
+
// Recipe images: string | string[] | undefined
|
|
115
|
+
// Instruction images: optional image URL per step
|
|
116
|
+
|
|
117
|
+
// Convert Soustack → Schema.org (preserves images)
|
|
94
118
|
const schemaOrgRecipe = toSchemaOrg(soustackRecipe);
|
|
119
|
+
|
|
120
|
+
// Manual image normalization (if needed)
|
|
121
|
+
const normalized = normalizeImage(schemaOrgImage);
|
|
122
|
+
// Handles: strings, arrays, ImageObjects with url/contentUrl
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Image Format Support
|
|
126
|
+
|
|
127
|
+
Soustack supports flexible image formats:
|
|
128
|
+
|
|
129
|
+
- **Recipe-level images**: Single URL (`string`) or multiple URLs (`string[]`)
|
|
130
|
+
- **Instruction-level images**: Optional `image` property on instruction objects
|
|
131
|
+
- **Automatic normalization**: Schema.org ImageObjects are automatically converted to URLs during import
|
|
132
|
+
|
|
133
|
+
Example recipe with images:
|
|
134
|
+
|
|
135
|
+
```ts
|
|
136
|
+
const recipe = {
|
|
137
|
+
name: "Chocolate Cake",
|
|
138
|
+
image: ["https://example.com/hero.jpg", "https://example.com/gallery.jpg"],
|
|
139
|
+
instructions: [
|
|
140
|
+
"Mix dry ingredients",
|
|
141
|
+
{ text: "Decorate the cake", image: "https://example.com/decorate.jpg" },
|
|
142
|
+
"Serve"
|
|
143
|
+
]
|
|
144
|
+
};
|
|
95
145
|
```
|
|
96
146
|
|
|
97
147
|
## 🧰 Web Scraping
|
|
@@ -115,9 +165,11 @@ const recipe = await scrapeRecipe('https://example.com/recipe', {
|
|
|
115
165
|
});
|
|
116
166
|
```
|
|
117
167
|
|
|
118
|
-
### Browser: `extractRecipeFromHTML()`
|
|
168
|
+
### Browser: `extractRecipeFromHTML()` and `extractSchemaOrgRecipeFromHTML()`
|
|
169
|
+
|
|
170
|
+
#### `extractRecipeFromHTML()` - Returns Soustack Format
|
|
119
171
|
|
|
120
|
-
`extractRecipeFromHTML(html)` extracts recipe data from an HTML string. **Works in both browser and Node.js**. Perfect for browser usage where you fetch HTML yourself (with cookies/session for authenticated content).
|
|
172
|
+
`extractRecipeFromHTML(html)` extracts recipe data from an HTML string and returns it in Soustack format. **Works in both browser and Node.js**. Perfect for browser usage where you fetch HTML yourself (with cookies/session for authenticated content).
|
|
121
173
|
|
|
122
174
|
```ts
|
|
123
175
|
import { extractRecipeFromHTML } from 'soustack';
|
|
@@ -125,15 +177,39 @@ import { extractRecipeFromHTML } from 'soustack';
|
|
|
125
177
|
// In browser: fetch HTML yourself (bypasses CORS, uses your cookies/session)
|
|
126
178
|
const response = await fetch('https://example.com/recipe');
|
|
127
179
|
const html = await response.text();
|
|
128
|
-
const recipe = extractRecipeFromHTML(html);
|
|
180
|
+
const recipe = extractRecipeFromHTML(html); // Already in Soustack format
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
#### `extractSchemaOrgRecipeFromHTML()` - Returns Schema.org Format
|
|
184
|
+
|
|
185
|
+
`extractSchemaOrgRecipeFromHTML(html)` extracts the raw Schema.org recipe data from HTML. Returns `null` if no recipe is found. Use this when you need to inspect, debug, or modify the Schema.org data before converting to Soustack format.
|
|
186
|
+
|
|
187
|
+
```ts
|
|
188
|
+
import { extractSchemaOrgRecipeFromHTML, fromSchemaOrg } from 'soustack';
|
|
189
|
+
|
|
190
|
+
// In browser: fetch HTML yourself
|
|
191
|
+
const response = await fetch('https://example.com/recipe');
|
|
192
|
+
const html = await response.text();
|
|
193
|
+
|
|
194
|
+
// Extract Schema.org format (for inspection/modification)
|
|
195
|
+
const schemaOrgRecipe = extractSchemaOrgRecipeFromHTML(html);
|
|
196
|
+
|
|
197
|
+
if (schemaOrgRecipe) {
|
|
198
|
+
// Inspect or modify Schema.org data before converting
|
|
199
|
+
console.log('Found recipe:', schemaOrgRecipe.name);
|
|
200
|
+
|
|
201
|
+
// Convert to Soustack format when ready
|
|
202
|
+
const soustackRecipe = fromSchemaOrg(schemaOrgRecipe);
|
|
203
|
+
}
|
|
129
204
|
```
|
|
130
205
|
|
|
131
|
-
**Why use
|
|
206
|
+
**Why use these functions in browsers?**
|
|
132
207
|
|
|
133
208
|
- ✅ No CORS issues — you fetch HTML yourself
|
|
134
209
|
- ✅ Works with authenticated/paywalled content — uses browser cookies
|
|
135
210
|
- ✅ Smaller bundle — no Node.js dependencies
|
|
136
211
|
- ✅ Universal — works in both browser and Node.js environments
|
|
212
|
+
- ✅ Flexible — choose Schema.org format for inspection/modification, or Soustack format for direct use
|
|
137
213
|
|
|
138
214
|
### CLI
|
|
139
215
|
|
package/dist/cli/index.js
CHANGED
|
@@ -163,8 +163,8 @@ function flattenInstructions(items) {
|
|
|
163
163
|
// src/schema.json
|
|
164
164
|
var schema_default = {
|
|
165
165
|
$schema: "http://json-schema.org/draft-07/schema#",
|
|
166
|
-
$id: "http://soustack.org/schema/v0.
|
|
167
|
-
title: "Soustack Recipe Schema v0.
|
|
166
|
+
$id: "http://soustack.org/schema/v0.2",
|
|
167
|
+
title: "Soustack Recipe Schema v0.2",
|
|
168
168
|
description: "A portable, scalable, interoperable recipe format.",
|
|
169
169
|
type: "object",
|
|
170
170
|
required: ["name", "ingredients", "instructions"],
|
|
@@ -194,8 +194,21 @@ var schema_default = {
|
|
|
194
194
|
items: { type: "string" }
|
|
195
195
|
},
|
|
196
196
|
image: {
|
|
197
|
-
|
|
198
|
-
|
|
197
|
+
description: "Recipe-level hero image(s)",
|
|
198
|
+
anyOf: [
|
|
199
|
+
{
|
|
200
|
+
type: "string",
|
|
201
|
+
format: "uri"
|
|
202
|
+
},
|
|
203
|
+
{
|
|
204
|
+
type: "array",
|
|
205
|
+
minItems: 1,
|
|
206
|
+
items: {
|
|
207
|
+
type: "string",
|
|
208
|
+
format: "uri"
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
]
|
|
199
212
|
},
|
|
200
213
|
dateAdded: {
|
|
201
214
|
type: "string",
|
|
@@ -360,6 +373,11 @@ var schema_default = {
|
|
|
360
373
|
properties: {
|
|
361
374
|
id: { type: "string" },
|
|
362
375
|
text: { type: "string" },
|
|
376
|
+
image: {
|
|
377
|
+
type: "string",
|
|
378
|
+
format: "uri",
|
|
379
|
+
description: "Optional image that illustrates this instruction"
|
|
380
|
+
},
|
|
363
381
|
destination: { type: "string" },
|
|
364
382
|
dependsOn: {
|
|
365
383
|
type: "array",
|
|
@@ -1250,6 +1268,40 @@ function smartParseDuration(input) {
|
|
|
1250
1268
|
return parseHumanDuration(input);
|
|
1251
1269
|
}
|
|
1252
1270
|
|
|
1271
|
+
// src/utils/image.ts
|
|
1272
|
+
function normalizeImage(image) {
|
|
1273
|
+
if (!image) {
|
|
1274
|
+
return void 0;
|
|
1275
|
+
}
|
|
1276
|
+
if (typeof image === "string") {
|
|
1277
|
+
const trimmed = image.trim();
|
|
1278
|
+
return trimmed || void 0;
|
|
1279
|
+
}
|
|
1280
|
+
if (Array.isArray(image)) {
|
|
1281
|
+
const urls = image.map((entry) => typeof entry === "string" ? entry.trim() : extractUrl(entry)).filter((url) => typeof url === "string" && Boolean(url));
|
|
1282
|
+
if (urls.length === 0) {
|
|
1283
|
+
return void 0;
|
|
1284
|
+
}
|
|
1285
|
+
if (urls.length === 1) {
|
|
1286
|
+
return urls[0];
|
|
1287
|
+
}
|
|
1288
|
+
return urls;
|
|
1289
|
+
}
|
|
1290
|
+
return extractUrl(image);
|
|
1291
|
+
}
|
|
1292
|
+
function extractUrl(value) {
|
|
1293
|
+
if (!value || typeof value !== "object") {
|
|
1294
|
+
return void 0;
|
|
1295
|
+
}
|
|
1296
|
+
const record = value;
|
|
1297
|
+
const candidate = typeof record.url === "string" ? record.url : typeof record.contentUrl === "string" ? record.contentUrl : void 0;
|
|
1298
|
+
if (!candidate) {
|
|
1299
|
+
return void 0;
|
|
1300
|
+
}
|
|
1301
|
+
const trimmed = candidate.trim();
|
|
1302
|
+
return trimmed || void 0;
|
|
1303
|
+
}
|
|
1304
|
+
|
|
1253
1305
|
// src/fromSchemaOrg.ts
|
|
1254
1306
|
function fromSchemaOrg(input) {
|
|
1255
1307
|
const recipeNode = extractRecipeNode(input);
|
|
@@ -1262,13 +1314,12 @@ function fromSchemaOrg(input) {
|
|
|
1262
1314
|
const recipeYield = parseYield(recipeNode.recipeYield);
|
|
1263
1315
|
const tags = collectTags(recipeNode.recipeCuisine, recipeNode.keywords);
|
|
1264
1316
|
const category = extractFirst(recipeNode.recipeCategory);
|
|
1265
|
-
const image = convertImage(recipeNode.image);
|
|
1266
1317
|
const source = convertSource(recipeNode);
|
|
1267
1318
|
const nutrition = recipeNode.nutrition && typeof recipeNode.nutrition === "object" ? recipeNode.nutrition : void 0;
|
|
1268
1319
|
return {
|
|
1269
1320
|
name: recipeNode.name.trim(),
|
|
1270
1321
|
description: recipeNode.description?.trim() || void 0,
|
|
1271
|
-
image,
|
|
1322
|
+
image: normalizeImage(recipeNode.image),
|
|
1272
1323
|
category,
|
|
1273
1324
|
tags: tags.length ? tags : void 0,
|
|
1274
1325
|
source,
|
|
@@ -1351,9 +1402,9 @@ function convertInstructions(value) {
|
|
|
1351
1402
|
continue;
|
|
1352
1403
|
}
|
|
1353
1404
|
if (isHowToStep(entry)) {
|
|
1354
|
-
const
|
|
1355
|
-
if (
|
|
1356
|
-
result.push(
|
|
1405
|
+
const parsed = convertHowToStep(entry);
|
|
1406
|
+
if (parsed) {
|
|
1407
|
+
result.push(parsed);
|
|
1357
1408
|
}
|
|
1358
1409
|
}
|
|
1359
1410
|
}
|
|
@@ -1371,9 +1422,9 @@ function extractSectionItems(items = []) {
|
|
|
1371
1422
|
continue;
|
|
1372
1423
|
}
|
|
1373
1424
|
if (isHowToStep(item)) {
|
|
1374
|
-
const
|
|
1375
|
-
if (
|
|
1376
|
-
result.push(
|
|
1425
|
+
const parsed = convertHowToStep(item);
|
|
1426
|
+
if (parsed) {
|
|
1427
|
+
result.push(parsed);
|
|
1377
1428
|
}
|
|
1378
1429
|
continue;
|
|
1379
1430
|
}
|
|
@@ -1387,6 +1438,17 @@ function extractInstructionText(value) {
|
|
|
1387
1438
|
const text = typeof value.text === "string" ? value.text : value.name;
|
|
1388
1439
|
return typeof text === "string" ? text.trim() || void 0 : void 0;
|
|
1389
1440
|
}
|
|
1441
|
+
function convertHowToStep(step) {
|
|
1442
|
+
const text = extractInstructionText(step);
|
|
1443
|
+
if (!text) {
|
|
1444
|
+
return void 0;
|
|
1445
|
+
}
|
|
1446
|
+
const normalizedImage = normalizeImage(step.image);
|
|
1447
|
+
if (typeof normalizedImage === "string") {
|
|
1448
|
+
return { text, image: normalizedImage };
|
|
1449
|
+
}
|
|
1450
|
+
return text;
|
|
1451
|
+
}
|
|
1390
1452
|
function isHowToStep(value) {
|
|
1391
1453
|
return Boolean(value) && typeof value === "object" && value["@type"] === "HowToStep";
|
|
1392
1454
|
}
|
|
@@ -1428,26 +1490,6 @@ function extractFirst(value) {
|
|
|
1428
1490
|
const arr = flattenStrings(value);
|
|
1429
1491
|
return arr.length ? arr[0] : void 0;
|
|
1430
1492
|
}
|
|
1431
|
-
function convertImage(value) {
|
|
1432
|
-
if (!value) return void 0;
|
|
1433
|
-
if (typeof value === "string") {
|
|
1434
|
-
return value;
|
|
1435
|
-
}
|
|
1436
|
-
if (Array.isArray(value)) {
|
|
1437
|
-
for (const item of value) {
|
|
1438
|
-
const url = typeof item === "string" ? item : extractImageUrl(item);
|
|
1439
|
-
if (url) return url;
|
|
1440
|
-
}
|
|
1441
|
-
return void 0;
|
|
1442
|
-
}
|
|
1443
|
-
return extractImageUrl(value);
|
|
1444
|
-
}
|
|
1445
|
-
function extractImageUrl(value) {
|
|
1446
|
-
if (!value || typeof value !== "object") return void 0;
|
|
1447
|
-
const record = value;
|
|
1448
|
-
const candidate = typeof record.url === "string" ? record.url : typeof record.contentUrl === "string" ? record.contentUrl : void 0;
|
|
1449
|
-
return candidate?.trim() || void 0;
|
|
1450
|
-
}
|
|
1451
1493
|
function convertSource(recipe) {
|
|
1452
1494
|
const author = extractEntityName(recipe.author);
|
|
1453
1495
|
const publisher = extractEntityName(recipe.publisher);
|
|
@@ -1543,7 +1585,7 @@ function convertInstruction(entry) {
|
|
|
1543
1585
|
return createHowToStep(entry);
|
|
1544
1586
|
}
|
|
1545
1587
|
if ("subsection" in entry) {
|
|
1546
|
-
const steps = entry.items.map((item) =>
|
|
1588
|
+
const steps = entry.items.map((item) => createHowToStep(item)).filter((step) => Boolean(step));
|
|
1547
1589
|
if (!steps.length) {
|
|
1548
1590
|
return null;
|
|
1549
1591
|
}
|
|
@@ -1554,18 +1596,34 @@ function convertInstruction(entry) {
|
|
|
1554
1596
|
};
|
|
1555
1597
|
}
|
|
1556
1598
|
if ("text" in entry) {
|
|
1557
|
-
return createHowToStep(entry
|
|
1599
|
+
return createHowToStep(entry);
|
|
1558
1600
|
}
|
|
1559
1601
|
return createHowToStep(String(entry));
|
|
1560
1602
|
}
|
|
1561
|
-
function createHowToStep(
|
|
1562
|
-
if (!
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1603
|
+
function createHowToStep(entry) {
|
|
1604
|
+
if (!entry) return null;
|
|
1605
|
+
if (typeof entry === "string") {
|
|
1606
|
+
const trimmed2 = entry.trim();
|
|
1607
|
+
if (!trimmed2) {
|
|
1608
|
+
return null;
|
|
1609
|
+
}
|
|
1610
|
+
return {
|
|
1611
|
+
"@type": "HowToStep",
|
|
1612
|
+
text: trimmed2
|
|
1613
|
+
};
|
|
1614
|
+
}
|
|
1615
|
+
const trimmed = entry.text?.trim();
|
|
1616
|
+
if (!trimmed) {
|
|
1617
|
+
return null;
|
|
1618
|
+
}
|
|
1619
|
+
const step = {
|
|
1566
1620
|
"@type": "HowToStep",
|
|
1567
1621
|
text: trimmed
|
|
1568
1622
|
};
|
|
1623
|
+
if (entry.image) {
|
|
1624
|
+
step.image = entry.image;
|
|
1625
|
+
}
|
|
1626
|
+
return step;
|
|
1569
1627
|
}
|
|
1570
1628
|
function convertTime2(time) {
|
|
1571
1629
|
if (!time) {
|
|
@@ -1728,7 +1786,7 @@ async function fetchPage(url, options = {}) {
|
|
|
1728
1786
|
};
|
|
1729
1787
|
const response = await resolvedFetch(url, requestInit);
|
|
1730
1788
|
clearTimeout(timeoutId);
|
|
1731
|
-
if (response &&
|
|
1789
|
+
if (response && typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
1732
1790
|
try {
|
|
1733
1791
|
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
1734
1792
|
if (globalFetch) {
|
|
@@ -1746,7 +1804,7 @@ async function fetchPage(url, options = {}) {
|
|
|
1746
1804
|
throw error;
|
|
1747
1805
|
}
|
|
1748
1806
|
const html = await response.text();
|
|
1749
|
-
if (typeof process
|
|
1807
|
+
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
1750
1808
|
try {
|
|
1751
1809
|
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
1752
1810
|
if (globalFetch) {
|
|
@@ -2006,14 +2064,30 @@ function extractRecipe(html) {
|
|
|
2006
2064
|
return extractRecipeBrowser(html);
|
|
2007
2065
|
}
|
|
2008
2066
|
const jsonLdRecipe = extractJsonLd(html);
|
|
2009
|
-
|
|
2010
|
-
|
|
2067
|
+
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
2068
|
+
try {
|
|
2069
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
2070
|
+
if (globalFetch) {
|
|
2071
|
+
globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:6", message: "JSON-LD extraction result", data: { hasJsonLd: !!jsonLdRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "C,D" }) }).catch(() => {
|
|
2072
|
+
});
|
|
2073
|
+
}
|
|
2074
|
+
} catch {
|
|
2075
|
+
}
|
|
2076
|
+
}
|
|
2011
2077
|
if (jsonLdRecipe) {
|
|
2012
2078
|
return { recipe: jsonLdRecipe, source: "jsonld" };
|
|
2013
2079
|
}
|
|
2014
2080
|
const microdataRecipe = extractMicrodata(html);
|
|
2015
|
-
|
|
2016
|
-
|
|
2081
|
+
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
2082
|
+
try {
|
|
2083
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
2084
|
+
if (globalFetch) {
|
|
2085
|
+
globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:12", message: "Microdata extraction result", data: { hasMicrodata: !!microdataRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "D" }) }).catch(() => {
|
|
2086
|
+
});
|
|
2087
|
+
}
|
|
2088
|
+
} catch {
|
|
2089
|
+
}
|
|
2090
|
+
}
|
|
2017
2091
|
if (microdataRecipe) {
|
|
2018
2092
|
return { recipe: microdataRecipe, source: "microdata" };
|
|
2019
2093
|
}
|
|
@@ -2022,20 +2096,52 @@ function extractRecipe(html) {
|
|
|
2022
2096
|
|
|
2023
2097
|
// src/scraper/index.ts
|
|
2024
2098
|
async function scrapeRecipe(url, options = {}) {
|
|
2025
|
-
|
|
2026
|
-
|
|
2099
|
+
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
2100
|
+
try {
|
|
2101
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
2102
|
+
if (globalFetch) {
|
|
2103
|
+
globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:7", message: "scrapeRecipe entry", data: { url, hasOptions: !!options }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,B,C,D,E" }) }).catch(() => {
|
|
2104
|
+
});
|
|
2105
|
+
}
|
|
2106
|
+
} catch {
|
|
2107
|
+
}
|
|
2108
|
+
}
|
|
2027
2109
|
const html = await fetchPage(url, options);
|
|
2028
|
-
|
|
2029
|
-
|
|
2110
|
+
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
2111
|
+
try {
|
|
2112
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
2113
|
+
if (globalFetch) {
|
|
2114
|
+
globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:9", message: "HTML fetched", data: { htmlLength: html?.length, htmlPreview: html?.substring(0, 200) }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B" }) }).catch(() => {
|
|
2115
|
+
});
|
|
2116
|
+
}
|
|
2117
|
+
} catch {
|
|
2118
|
+
}
|
|
2119
|
+
}
|
|
2030
2120
|
const { recipe } = extractRecipe(html);
|
|
2031
|
-
|
|
2032
|
-
|
|
2121
|
+
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
2122
|
+
try {
|
|
2123
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
2124
|
+
if (globalFetch) {
|
|
2125
|
+
globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:11", message: "extractRecipe result", data: { hasRecipe: !!recipe, recipeType: recipe?.["@type"], recipeName: recipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C,D" }) }).catch(() => {
|
|
2126
|
+
});
|
|
2127
|
+
}
|
|
2128
|
+
} catch {
|
|
2129
|
+
}
|
|
2130
|
+
}
|
|
2033
2131
|
if (!recipe) {
|
|
2034
2132
|
throw new Error("No Schema.org recipe data found in page");
|
|
2035
2133
|
}
|
|
2036
2134
|
const soustackRecipe = fromSchemaOrg(recipe);
|
|
2037
|
-
|
|
2038
|
-
|
|
2135
|
+
if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
|
|
2136
|
+
try {
|
|
2137
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
2138
|
+
if (globalFetch) {
|
|
2139
|
+
globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:17", message: "fromSchemaOrg result", data: { hasSoustackRecipe: !!soustackRecipe, soustackRecipeName: soustackRecipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A" }) }).catch(() => {
|
|
2140
|
+
});
|
|
2141
|
+
}
|
|
2142
|
+
} catch {
|
|
2143
|
+
}
|
|
2144
|
+
}
|
|
2039
2145
|
if (!soustackRecipe) {
|
|
2040
2146
|
throw new Error("Schema.org data did not include a valid recipe");
|
|
2041
2147
|
}
|