recipe-scrapers-js 0.1.0-alpha.4 → 0.1.0-alpha.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -4
- package/dist/index.js +137 -24
- package/package.json +20 -9
- package/src/__tests__/abstract-extractor-plugin.test.ts +0 -234
- package/src/__tests__/abstract-scraper.test.ts +0 -201
- package/src/__tests__/logger.test.ts +0 -318
- package/src/__tests__/plugin-manager.test.ts +0 -64
- package/src/__tests__/recipe-extractor.test.ts +0 -103
- package/src/__tests__/scraper-diagnostics.test.ts +0 -102
- package/src/__tests__/setup.ts +0 -1
- package/src/abstract-extractor-plugin.ts +0 -16
- package/src/abstract-plugin.ts +0 -11
- package/src/abstract-postprocessor-plugin.ts +0 -13
- package/src/abstract-scraper.ts +0 -222
- package/src/constants.ts +0 -19
- package/src/exceptions/__tests__/index.test.ts +0 -44
- package/src/exceptions/index.ts +0 -33
- package/src/index.ts +0 -24
- package/src/logger.ts +0 -45
- package/src/plugin-manager.ts +0 -33
- package/src/plugins/__tests__/html-stripper.processor.test.ts +0 -63
- package/src/plugins/__tests__/opengraph.extractor.test.ts +0 -106
- package/src/plugins/html-stripper.processor.ts +0 -80
- package/src/plugins/opengraph.extractor.ts +0 -61
- package/src/plugins/schema-org.extractor/__tests__/index.test.ts +0 -136
- package/src/plugins/schema-org.extractor/__tests__/type-predicates.test.ts +0 -116
- package/src/plugins/schema-org.extractor/index.ts +0 -622
- package/src/plugins/schema-org.extractor/schema-org.interface.ts +0 -25
- package/src/plugins/schema-org.extractor/type-predicates.ts +0 -79
- package/src/recipe-extractor.ts +0 -93
- package/src/scraper-diagnostics.ts +0 -87
- package/src/scrapers/__tests__/scrapers.test.ts +0 -94
- package/src/scrapers/_index.ts +0 -19
- package/src/scrapers/allrecipes.ts +0 -9
- package/src/scrapers/bbcgoodfood.ts +0 -43
- package/src/scrapers/epicurious.ts +0 -17
- package/src/scrapers/nytimes.ts +0 -43
- package/src/scrapers/seriouseats.ts +0 -9
- package/src/scrapers/simplyrecipes.ts +0 -37
- package/src/types/recipe.interface.ts +0 -247
- package/src/types/scraper.interface.ts +0 -34
- package/src/utils/__tests__/index.test.ts +0 -128
- package/src/utils/__tests__/ingredients.test.ts +0 -439
- package/src/utils/__tests__/instructions.test.ts +0 -44
- package/src/utils/__tests__/microdata.test.ts +0 -93
- package/src/utils/__tests__/parse-yields.test.ts +0 -30
- package/src/utils/__tests__/parsing.test.ts +0 -69
- package/src/utils/fractions.ts +0 -60
- package/src/utils/index.ts +0 -40
- package/src/utils/ingredients.ts +0 -212
- package/src/utils/instructions.ts +0 -45
- package/src/utils/microdata.ts +0 -162
- package/src/utils/parse-yields.ts +0 -103
- package/src/utils/parsing.ts +0 -43
package/README.md
CHANGED
|
@@ -19,14 +19,16 @@ A TypeScript/JavaScript library for scraping recipe data from various cooking we
|
|
|
19
19
|
|
|
20
20
|
## Installation
|
|
21
21
|
|
|
22
|
+
Add the `recipe-scrapers-js` package and its peer dependencies.
|
|
23
|
+
|
|
22
24
|
```bash
|
|
23
|
-
npm install recipe-scrapers-js
|
|
25
|
+
npm install recipe-scrapers-js cheerio zod
|
|
24
26
|
# or
|
|
25
|
-
yarn add recipe-scrapers-js
|
|
27
|
+
yarn add recipe-scrapers-js cheerio zod
|
|
26
28
|
# or
|
|
27
|
-
pnpm add recipe-scrapers-js
|
|
29
|
+
pnpm add recipe-scrapers-js cheerio zod
|
|
28
30
|
# or
|
|
29
|
-
bun add recipe-scrapers-js
|
|
31
|
+
bun add recipe-scrapers-js cheerio zod
|
|
30
32
|
```
|
|
31
33
|
|
|
32
34
|
## Usage
|
|
@@ -195,6 +197,7 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
|
|
|
195
197
|
- Original [recipe-scrapers](https://github.com/hhursev/recipe-scrapers) Python library by [hhursev](https://github.com/hhursev)
|
|
196
198
|
- [Schema.org Recipe specification](https://schema.org/Recipe)
|
|
197
199
|
- [Cheerio](https://cheerio.js.org/) for HTML parsing
|
|
200
|
+
- [Zod](https://zod.dev/) for schema validation
|
|
198
201
|
|
|
199
202
|
## Copyright and Usage
|
|
200
203
|
|
package/dist/index.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import * as cheerio from "cheerio";
|
|
2
2
|
import { parse, toSeconds } from "iso8601-duration";
|
|
3
|
+
import z from "zod/v4";
|
|
3
4
|
|
|
4
5
|
//#region src/utils/index.ts
|
|
5
6
|
function isDefined(value) {
|
|
@@ -145,7 +146,7 @@ function parseMinutes(value) {
|
|
|
145
146
|
|
|
146
147
|
//#endregion
|
|
147
148
|
//#region src/utils/ingredients.ts
|
|
148
|
-
const
|
|
149
|
+
const DEFAULT_INGREDIENTS_GROUP_NAME = "Ingredients";
|
|
149
150
|
const DEFAULT_GROUPING_SELECTORS = {
|
|
150
151
|
wprm: {
|
|
151
152
|
headingSelectors: [".wprm-recipe-ingredient-group h4", ".wprm-recipe-group-name"],
|
|
@@ -227,13 +228,13 @@ function groupIngredients($, ingredientsList, headingSelector, itemSelector) {
|
|
|
227
228
|
const $el = $(el);
|
|
228
229
|
if ($el.is(groupNameSelector)) {
|
|
229
230
|
const headingText = normalizeString($el.text());
|
|
230
|
-
currentHeading = headingText ||
|
|
231
|
+
currentHeading = headingText || DEFAULT_INGREDIENTS_GROUP_NAME;
|
|
231
232
|
if (!groupings.has(currentHeading)) groupings.set(currentHeading, /* @__PURE__ */ new Set());
|
|
232
233
|
} else if ($el.is(ingredientSelector)) {
|
|
233
234
|
const text = normalizeString($el.text());
|
|
234
235
|
if (!text) continue;
|
|
235
236
|
const matched = bestMatch(text, ingredients);
|
|
236
|
-
const heading = currentHeading ||
|
|
237
|
+
const heading = currentHeading || DEFAULT_INGREDIENTS_GROUP_NAME;
|
|
237
238
|
if (!groupings.has(heading)) groupings.set(heading, /* @__PURE__ */ new Set());
|
|
238
239
|
groupings.get(heading)?.add(matched);
|
|
239
240
|
}
|
|
@@ -652,7 +653,7 @@ var SchemaOrgPlugin = class SchemaOrgPlugin extends ExtractorPlugin {
|
|
|
652
653
|
"title",
|
|
653
654
|
"@id"
|
|
654
655
|
]) {
|
|
655
|
-
let text
|
|
656
|
+
let text;
|
|
656
657
|
if (isString(value)) text = value;
|
|
657
658
|
else if (isNumber(value)) text = value.toString();
|
|
658
659
|
else if (Array.isArray(value)) text = this.getSchemaTextValue(value[0], props);
|
|
@@ -1076,6 +1077,127 @@ var AllRecipes = class extends AbstractScraper {
|
|
|
1076
1077
|
extractors = {};
|
|
1077
1078
|
};
|
|
1078
1079
|
|
|
1080
|
+
//#endregion
|
|
1081
|
+
//#region src/scrapers/americastestkitchen.ts
|
|
1082
|
+
const recipeIngredientItemSchema = z.object({ fields: z.object({
|
|
1083
|
+
qty: z.string(),
|
|
1084
|
+
preText: z.string(),
|
|
1085
|
+
postText: z.string(),
|
|
1086
|
+
measurement: z.string().nullable(),
|
|
1087
|
+
pluralIngredient: z.boolean(),
|
|
1088
|
+
ingredient: z.object({
|
|
1089
|
+
contentType: z.string(),
|
|
1090
|
+
fields: z.object({
|
|
1091
|
+
title: z.string(),
|
|
1092
|
+
pluralTitle: z.string(),
|
|
1093
|
+
kind: z.string()
|
|
1094
|
+
})
|
|
1095
|
+
})
|
|
1096
|
+
}) });
|
|
1097
|
+
const recipeIngredientGroupSchema = z.object({ fields: z.object({
|
|
1098
|
+
title: z.string(),
|
|
1099
|
+
recipeIngredientItems: z.array(recipeIngredientItemSchema)
|
|
1100
|
+
}) });
|
|
1101
|
+
const recipeInstructionSchema = z.object({ fields: z.object({ content: z.string() }) });
|
|
1102
|
+
const recipeDataSchema = z.object({
|
|
1103
|
+
totalCookTime: z.number(),
|
|
1104
|
+
recipeTimeNote: z.string().optional(),
|
|
1105
|
+
ingredientGroups: z.array(recipeIngredientGroupSchema),
|
|
1106
|
+
headnote: z.string().optional(),
|
|
1107
|
+
instructions: z.array(recipeInstructionSchema)
|
|
1108
|
+
});
|
|
1109
|
+
const pagePropsDataSchema = z.object({ props: z.object({ pageProps: z.object({ data: recipeDataSchema }) }) });
|
|
1110
|
+
var AmericasTestKitchen = class extends AbstractScraper {
|
|
1111
|
+
data = null;
|
|
1112
|
+
static host() {
|
|
1113
|
+
return "americastestkitchen.com";
|
|
1114
|
+
}
|
|
1115
|
+
extractors = {
|
|
1116
|
+
ingredients: this.ingredients.bind(this),
|
|
1117
|
+
instructions: this.instructions.bind(this),
|
|
1118
|
+
siteName: this.siteName.bind(this)
|
|
1119
|
+
};
|
|
1120
|
+
siteName() {
|
|
1121
|
+
return "America's Test Kitchen";
|
|
1122
|
+
}
|
|
1123
|
+
ingredients(prevValue) {
|
|
1124
|
+
let ingredients = this.parseIngredients();
|
|
1125
|
+
if (!ingredients) ingredients = this.parseHtmlIngredients(prevValue);
|
|
1126
|
+
if (!ingredients) throw new Error("Failed to extract ingredients");
|
|
1127
|
+
return ingredients;
|
|
1128
|
+
}
|
|
1129
|
+
instructions(prevValue) {
|
|
1130
|
+
const data = this.getRecipeData();
|
|
1131
|
+
if (!data) {
|
|
1132
|
+
if (prevValue) return prevValue;
|
|
1133
|
+
throw new Error("Failed to extract instructions");
|
|
1134
|
+
}
|
|
1135
|
+
const { headnote } = data;
|
|
1136
|
+
let headnoteText = "";
|
|
1137
|
+
if (headnote) headnoteText = `Note: ${normalizeString(headnote)}`;
|
|
1138
|
+
const instructionTexts = [];
|
|
1139
|
+
for (const instruction of data.instructions) instructionTexts.push(normalizeString(instruction.fields.content));
|
|
1140
|
+
return new Set([headnoteText, ...instructionTexts]);
|
|
1141
|
+
}
|
|
1142
|
+
parseHtmlIngredients(prevValue) {
|
|
1143
|
+
const headingSelector = "[class*=\"RecipeIngredientGroups_group\"] > span";
|
|
1144
|
+
const ingredientSelector = "[class*=\"RecipeIngredient\"] label";
|
|
1145
|
+
if (isList(prevValue) && prevValue.size > 0) {
|
|
1146
|
+
const result = groupIngredients(this.$, prevValue, headingSelector, ingredientSelector);
|
|
1147
|
+
return result;
|
|
1148
|
+
}
|
|
1149
|
+
return null;
|
|
1150
|
+
}
|
|
1151
|
+
getRecipeData() {
|
|
1152
|
+
if (this.data === null) {
|
|
1153
|
+
const jsonElement = this.$("script[type=\"application/json\"]");
|
|
1154
|
+
const jsonString = jsonElement.html();
|
|
1155
|
+
if (!jsonString) {
|
|
1156
|
+
this.logger.warn("Could not find JSON data script tag");
|
|
1157
|
+
return null;
|
|
1158
|
+
}
|
|
1159
|
+
try {
|
|
1160
|
+
const parsed = pagePropsDataSchema.parse(JSON.parse(jsonString));
|
|
1161
|
+
this.data = parsed.props.pageProps.data;
|
|
1162
|
+
} catch (error) {
|
|
1163
|
+
this.logger.error("Failed to parse JSON data:", error);
|
|
1164
|
+
return null;
|
|
1165
|
+
}
|
|
1166
|
+
}
|
|
1167
|
+
return this.data;
|
|
1168
|
+
}
|
|
1169
|
+
parseIngredientItem(ingredientItem) {
|
|
1170
|
+
const { fields } = ingredientItem;
|
|
1171
|
+
const fragments = [
|
|
1172
|
+
fields.qty || "",
|
|
1173
|
+
fields.measurement || "",
|
|
1174
|
+
fields.ingredient.fields.title || "",
|
|
1175
|
+
fields.postText || ""
|
|
1176
|
+
];
|
|
1177
|
+
const filteredFragments = [];
|
|
1178
|
+
for (const fragment of fragments) if (fragment) filteredFragments.push(fragment.trimEnd());
|
|
1179
|
+
return filteredFragments.join(" ").trimEnd().replace(" ,", ",");
|
|
1180
|
+
}
|
|
1181
|
+
parseIngredients() {
|
|
1182
|
+
const data = this.getRecipeData();
|
|
1183
|
+
if (!data) return null;
|
|
1184
|
+
const { ingredientGroups } = data;
|
|
1185
|
+
if (ingredientGroups.length === 1) {
|
|
1186
|
+
const ingredientSet = /* @__PURE__ */ new Set();
|
|
1187
|
+
for (const item of ingredientGroups[0].fields.recipeIngredientItems) ingredientSet.add(this.parseIngredientItem(item));
|
|
1188
|
+
return ingredientSet;
|
|
1189
|
+
}
|
|
1190
|
+
const ingredientMap = /* @__PURE__ */ new Map();
|
|
1191
|
+
for (const group of ingredientGroups) {
|
|
1192
|
+
const groupTitle = group.fields.title || DEFAULT_INGREDIENTS_GROUP_NAME;
|
|
1193
|
+
const ingredientSet = /* @__PURE__ */ new Set();
|
|
1194
|
+
for (const item of group.fields.recipeIngredientItems) ingredientSet.add(this.parseIngredientItem(item));
|
|
1195
|
+
ingredientMap.set(groupTitle, ingredientSet);
|
|
1196
|
+
}
|
|
1197
|
+
return ingredientMap;
|
|
1198
|
+
}
|
|
1199
|
+
};
|
|
1200
|
+
|
|
1079
1201
|
//#endregion
|
|
1080
1202
|
//#region src/scrapers/bbcgoodfood.ts
|
|
1081
1203
|
var BBCGoodFood = class extends AbstractScraper {
|
|
@@ -1083,16 +1205,6 @@ var BBCGoodFood = class extends AbstractScraper {
|
|
|
1083
1205
|
return "bbcgoodfood.com";
|
|
1084
1206
|
}
|
|
1085
1207
|
extractors = { ingredients: this.ingredients.bind(this) };
|
|
1086
|
-
/**
|
|
1087
|
-
* The NYTimes website appears to auto generate it's CSS class names,
|
|
1088
|
-
* which results in them ending with a string a random characters.
|
|
1089
|
-
* Matching the exact class name is likely to break fairly quickly
|
|
1090
|
-
* so instead we are going to match on a partial class name.
|
|
1091
|
-
* For example, h3[class*='ingredientgroup_name'] matches an h3 element
|
|
1092
|
-
* with a class that contains the value 'ingredient_groupname' at least once
|
|
1093
|
-
* anywhere in the element class attribute.
|
|
1094
|
-
* @link https://developer.mozilla.org/en-US/docs/Web/CSS/Attribute_selectors
|
|
1095
|
-
*/
|
|
1096
1208
|
ingredients(prevValue) {
|
|
1097
1209
|
const headingSelector = ".recipe__ingredients h3";
|
|
1098
1210
|
const ingredientSelector = ".recipe__ingredients li";
|
|
@@ -1104,6 +1216,15 @@ var BBCGoodFood = class extends AbstractScraper {
|
|
|
1104
1216
|
}
|
|
1105
1217
|
};
|
|
1106
1218
|
|
|
1219
|
+
//#endregion
|
|
1220
|
+
//#region src/scrapers/eatingwell.ts
|
|
1221
|
+
var EatingWell = class extends AbstractScraper {
|
|
1222
|
+
static host() {
|
|
1223
|
+
return "eatingwell.com";
|
|
1224
|
+
}
|
|
1225
|
+
extractors = {};
|
|
1226
|
+
};
|
|
1227
|
+
|
|
1107
1228
|
//#endregion
|
|
1108
1229
|
//#region src/scrapers/epicurious.ts
|
|
1109
1230
|
var Epicurious = class extends AbstractScraper {
|
|
@@ -1124,16 +1245,6 @@ var NYTimes = class extends AbstractScraper {
|
|
|
1124
1245
|
return "cooking.nytimes.com";
|
|
1125
1246
|
}
|
|
1126
1247
|
extractors = { ingredients: this.ingredients.bind(this) };
|
|
1127
|
-
/**
|
|
1128
|
-
* The NYTimes website appears to auto generate it's CSS class names,
|
|
1129
|
-
* which results in them ending with a string a random characters.
|
|
1130
|
-
* Matching the exact class name is likely to break fairly quickly
|
|
1131
|
-
* so instead we are going to match on a partial class name.
|
|
1132
|
-
* For example, h3[class*='ingredientgroup_name'] matches an h3 element
|
|
1133
|
-
* with a class that contains the value 'ingredient_groupname' at least once
|
|
1134
|
-
* anywhere in the element class attribute.
|
|
1135
|
-
* @link https://developer.mozilla.org/en-US/docs/Web/CSS/Attribute_selectors
|
|
1136
|
-
*/
|
|
1137
1248
|
ingredients(prevValue) {
|
|
1138
1249
|
const headingSelector = "h3[class*=\"ingredientgroup_name\"]";
|
|
1139
1250
|
const ingredientSelector = "li[class*=\"ingredient\"]";
|
|
@@ -1184,7 +1295,9 @@ var SimplyRecipes = class extends AbstractScraper {
|
|
|
1184
1295
|
*/
|
|
1185
1296
|
const scrapers = {
|
|
1186
1297
|
[AllRecipes.host()]: AllRecipes,
|
|
1298
|
+
[AmericasTestKitchen.host()]: AmericasTestKitchen,
|
|
1187
1299
|
[BBCGoodFood.host()]: BBCGoodFood,
|
|
1300
|
+
[EatingWell.host()]: EatingWell,
|
|
1188
1301
|
[Epicurious.host()]: Epicurious,
|
|
1189
1302
|
[SeriousEats.host()]: SeriousEats,
|
|
1190
1303
|
[SimplyRecipes.host()]: SimplyRecipes,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "recipe-scrapers-js",
|
|
3
|
-
"version": "0.1.0-alpha.
|
|
3
|
+
"version": "0.1.0-alpha.6",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"description": "A recipe scrapers library",
|
|
6
6
|
"author": {
|
|
@@ -12,11 +12,21 @@
|
|
|
12
12
|
"url": "git+https://github.com/nerdstep/recipe-scrapers-js.git"
|
|
13
13
|
},
|
|
14
14
|
"type": "module",
|
|
15
|
-
"module": "
|
|
15
|
+
"module": "dist/index.js",
|
|
16
16
|
"main": "dist/index.js",
|
|
17
17
|
"types": "dist/index.d.ts",
|
|
18
|
-
"files": [
|
|
19
|
-
|
|
18
|
+
"files": [
|
|
19
|
+
"dist",
|
|
20
|
+
"README.md",
|
|
21
|
+
"LICENSE"
|
|
22
|
+
],
|
|
23
|
+
"keywords": [
|
|
24
|
+
"recipe",
|
|
25
|
+
"scraper",
|
|
26
|
+
"parser",
|
|
27
|
+
"food",
|
|
28
|
+
"cooking"
|
|
29
|
+
],
|
|
20
30
|
"scripts": {
|
|
21
31
|
"build": "tsdown src/index.ts --outdir dist",
|
|
22
32
|
"test": "bun test",
|
|
@@ -29,17 +39,18 @@
|
|
|
29
39
|
"prepublishOnly": "bun run lint && bun run build"
|
|
30
40
|
},
|
|
31
41
|
"peerDependencies": {
|
|
32
|
-
"cheerio": "^1.
|
|
42
|
+
"cheerio": "^1.1.0",
|
|
43
|
+
"zod": "^3.25.74"
|
|
33
44
|
},
|
|
34
45
|
"dependencies": {
|
|
35
46
|
"iso8601-duration": "^2.1.2",
|
|
36
47
|
"schema-dts": "^1.1.5"
|
|
37
48
|
},
|
|
38
49
|
"devDependencies": {
|
|
39
|
-
"@biomejs/biome": "^
|
|
40
|
-
"@types/bun": "^1.2.
|
|
41
|
-
"cheerio": "^1.
|
|
42
|
-
"tsdown": "^0.12.
|
|
50
|
+
"@biomejs/biome": "^2.0.6",
|
|
51
|
+
"@types/bun": "^1.2.17",
|
|
52
|
+
"cheerio": "^1.1.0",
|
|
53
|
+
"tsdown": "^0.12.9",
|
|
43
54
|
"typescript": "^5.8.3"
|
|
44
55
|
}
|
|
45
56
|
}
|
|
@@ -1,234 +0,0 @@
|
|
|
1
|
-
import { beforeEach, describe, expect, it } from 'bun:test'
|
|
2
|
-
import {
|
|
3
|
-
NotImplementedException,
|
|
4
|
-
UnsupportedFieldException,
|
|
5
|
-
} from '@/exceptions'
|
|
6
|
-
import { load } from 'cheerio'
|
|
7
|
-
import { ExtractorPlugin } from '../abstract-extractor-plugin'
|
|
8
|
-
import type { RecipeFields } from '../types/recipe.interface'
|
|
9
|
-
|
|
10
|
-
class MockExtractorPlugin extends ExtractorPlugin {
|
|
11
|
-
name = 'MockExtractorPlugin'
|
|
12
|
-
priority = 100
|
|
13
|
-
|
|
14
|
-
private supportedFields: Set<keyof RecipeFields>
|
|
15
|
-
|
|
16
|
-
constructor(supportedFields: (keyof RecipeFields)[] = []) {
|
|
17
|
-
const $ = load('<html><body></body></html>')
|
|
18
|
-
super($)
|
|
19
|
-
this.supportedFields = new Set(supportedFields)
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
supports(field: keyof RecipeFields): boolean {
|
|
23
|
-
return this.supportedFields.has(field)
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
extract<Key extends keyof RecipeFields>(field: Key): RecipeFields[Key] {
|
|
27
|
-
if (!this.supports(field)) {
|
|
28
|
-
throw new UnsupportedFieldException(field)
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
// Mock extraction logic
|
|
32
|
-
switch (field) {
|
|
33
|
-
case 'title':
|
|
34
|
-
return 'Mock Recipe Title' as RecipeFields[Key]
|
|
35
|
-
case 'description':
|
|
36
|
-
return 'Mock Recipe Description' as RecipeFields[Key]
|
|
37
|
-
case 'ingredients':
|
|
38
|
-
return new Set(['ingredient 1', 'ingredient 2']) as RecipeFields[Key]
|
|
39
|
-
case 'instructions':
|
|
40
|
-
return new Set(['step 1', 'step 2']) as RecipeFields[Key]
|
|
41
|
-
case 'prepTime':
|
|
42
|
-
return 15 as RecipeFields[Key]
|
|
43
|
-
case 'cookTime':
|
|
44
|
-
return 30 as RecipeFields[Key]
|
|
45
|
-
case 'totalTime':
|
|
46
|
-
return 45 as RecipeFields[Key]
|
|
47
|
-
case 'yields':
|
|
48
|
-
return '4 servings' as RecipeFields[Key]
|
|
49
|
-
default:
|
|
50
|
-
throw new NotImplementedException(field)
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
class AsyncMockExtractorPlugin extends ExtractorPlugin {
|
|
56
|
-
name = 'AsyncMockExtractorPlugin'
|
|
57
|
-
priority = 100
|
|
58
|
-
|
|
59
|
-
constructor() {
|
|
60
|
-
const $ = load('<html><body></body></html>')
|
|
61
|
-
super($)
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
supports(field: keyof RecipeFields): boolean {
|
|
65
|
-
return ['title', 'description'].includes(field)
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
async extract<Key extends keyof RecipeFields>(
|
|
69
|
-
field: Key,
|
|
70
|
-
): Promise<RecipeFields[Key]> {
|
|
71
|
-
await new Promise((resolve) => setTimeout(resolve, 10))
|
|
72
|
-
|
|
73
|
-
if (!this.supports(field)) {
|
|
74
|
-
throw new UnsupportedFieldException(field)
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
switch (field) {
|
|
78
|
-
case 'title':
|
|
79
|
-
return 'Async Recipe Title' as RecipeFields[Key]
|
|
80
|
-
case 'description':
|
|
81
|
-
return 'Async Recipe Description' as RecipeFields[Key]
|
|
82
|
-
default:
|
|
83
|
-
throw new NotImplementedException(field)
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
class ThrowingExtractorPlugin extends ExtractorPlugin {
|
|
89
|
-
name = 'ThrowingExtractorPlugin'
|
|
90
|
-
priority = 100
|
|
91
|
-
|
|
92
|
-
constructor() {
|
|
93
|
-
const $ = load('<html><body></body></html>')
|
|
94
|
-
super($)
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
supports(field: keyof RecipeFields): boolean {
|
|
98
|
-
return true
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
extract<Key extends keyof RecipeFields>(field: Key): RecipeFields[Key] {
|
|
102
|
-
throw new Error(`Extraction failed for field: ${String(field)}`)
|
|
103
|
-
}
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
describe('ExtractorPlugin', () => {
|
|
107
|
-
let plugin: MockExtractorPlugin
|
|
108
|
-
|
|
109
|
-
beforeEach(() => {
|
|
110
|
-
plugin = new MockExtractorPlugin([
|
|
111
|
-
'title',
|
|
112
|
-
'description',
|
|
113
|
-
'ingredients',
|
|
114
|
-
'prepTime',
|
|
115
|
-
])
|
|
116
|
-
})
|
|
117
|
-
|
|
118
|
-
describe('inheritance', () => {
|
|
119
|
-
it('should extend AbstractPlugin', () => {
|
|
120
|
-
expect(plugin).toBeInstanceOf(ExtractorPlugin)
|
|
121
|
-
})
|
|
122
|
-
|
|
123
|
-
it('should have access to cheerio instance from parent', () => {
|
|
124
|
-
expect(plugin.$).toBeDefined()
|
|
125
|
-
expect(typeof plugin.$).toBe('function')
|
|
126
|
-
})
|
|
127
|
-
})
|
|
128
|
-
|
|
129
|
-
describe('supports method', () => {
|
|
130
|
-
it('should return true for supported fields', () => {
|
|
131
|
-
expect(plugin.supports('title')).toBe(true)
|
|
132
|
-
expect(plugin.supports('description')).toBe(true)
|
|
133
|
-
expect(plugin.supports('ingredients')).toBe(true)
|
|
134
|
-
expect(plugin.supports('prepTime')).toBe(true)
|
|
135
|
-
})
|
|
136
|
-
|
|
137
|
-
it('should return false for unsupported fields', () => {
|
|
138
|
-
expect(plugin.supports('cookTime')).toBe(false)
|
|
139
|
-
expect(plugin.supports('totalTime')).toBe(false)
|
|
140
|
-
expect(plugin.supports('yields')).toBe(false)
|
|
141
|
-
expect(plugin.supports('author')).toBe(false)
|
|
142
|
-
})
|
|
143
|
-
|
|
144
|
-
it('should handle empty supported fields', () => {
|
|
145
|
-
const emptyPlugin = new MockExtractorPlugin([])
|
|
146
|
-
expect(emptyPlugin.supports('title')).toBe(false)
|
|
147
|
-
expect(emptyPlugin.supports('description')).toBe(false)
|
|
148
|
-
})
|
|
149
|
-
|
|
150
|
-
it('should handle all fields as supported', () => {
|
|
151
|
-
const allFieldsPlugin = new MockExtractorPlugin([
|
|
152
|
-
'title',
|
|
153
|
-
'description',
|
|
154
|
-
'ingredients',
|
|
155
|
-
'instructions',
|
|
156
|
-
'prepTime',
|
|
157
|
-
'cookTime',
|
|
158
|
-
'totalTime',
|
|
159
|
-
'yields',
|
|
160
|
-
])
|
|
161
|
-
|
|
162
|
-
expect(allFieldsPlugin.supports('title')).toBe(true)
|
|
163
|
-
expect(allFieldsPlugin.supports('cookTime')).toBe(true)
|
|
164
|
-
expect(allFieldsPlugin.supports('yields')).toBe(true)
|
|
165
|
-
})
|
|
166
|
-
})
|
|
167
|
-
|
|
168
|
-
describe('extract method', () => {
|
|
169
|
-
it('should extract supported fields', () => {
|
|
170
|
-
expect(plugin.extract('title')).toBe('Mock Recipe Title')
|
|
171
|
-
expect(plugin.extract('description')).toBe('Mock Recipe Description')
|
|
172
|
-
expect(plugin.extract('prepTime')).toBe(15)
|
|
173
|
-
expect(plugin.extract('ingredients')).toEqual(
|
|
174
|
-
new Set(['ingredient 1', 'ingredient 2']),
|
|
175
|
-
)
|
|
176
|
-
})
|
|
177
|
-
|
|
178
|
-
it('should throw error for unsupported fields', () => {
|
|
179
|
-
expect(() => plugin.extract('cookTime')).toThrow(
|
|
180
|
-
'Extraction not supported for field: cookTime',
|
|
181
|
-
)
|
|
182
|
-
expect(() => plugin.extract('totalTime')).toThrow(
|
|
183
|
-
'Extraction not supported for field: totalTime',
|
|
184
|
-
)
|
|
185
|
-
})
|
|
186
|
-
})
|
|
187
|
-
|
|
188
|
-
describe('async extraction', () => {
|
|
189
|
-
let asyncPlugin: AsyncMockExtractorPlugin
|
|
190
|
-
|
|
191
|
-
beforeEach(() => {
|
|
192
|
-
asyncPlugin = new AsyncMockExtractorPlugin()
|
|
193
|
-
})
|
|
194
|
-
|
|
195
|
-
it('should handle async extraction', async () => {
|
|
196
|
-
const title = await asyncPlugin.extract('title')
|
|
197
|
-
expect(title).toBe('Async Recipe Title')
|
|
198
|
-
const description = await asyncPlugin.extract('description')
|
|
199
|
-
expect(description).toBe('Async Recipe Description')
|
|
200
|
-
})
|
|
201
|
-
|
|
202
|
-
it('should throw error for unsupported fields in async mode', async () => {
|
|
203
|
-
await expect(asyncPlugin.extract('cookTime')).rejects.toThrow(
|
|
204
|
-
'Extraction not supported for field: cookTime',
|
|
205
|
-
)
|
|
206
|
-
})
|
|
207
|
-
})
|
|
208
|
-
|
|
209
|
-
describe('error handling', () => {
|
|
210
|
-
let throwingPlugin: ThrowingExtractorPlugin
|
|
211
|
-
|
|
212
|
-
beforeEach(() => {
|
|
213
|
-
throwingPlugin = new ThrowingExtractorPlugin()
|
|
214
|
-
})
|
|
215
|
-
|
|
216
|
-
it('should propagate extraction errors', () => {
|
|
217
|
-
expect(() => throwingPlugin.extract('title')).toThrow(
|
|
218
|
-
'Extraction failed for field: title',
|
|
219
|
-
)
|
|
220
|
-
expect(() => throwingPlugin.extract('description')).toThrow(
|
|
221
|
-
'Extraction failed for field: description',
|
|
222
|
-
)
|
|
223
|
-
})
|
|
224
|
-
})
|
|
225
|
-
|
|
226
|
-
describe('edge cases', () => {
|
|
227
|
-
it('should throw on undefined extractor', () => {
|
|
228
|
-
const plugin = new MockExtractorPlugin(['author'])
|
|
229
|
-
expect(() => plugin.extract('author')).toThrow(
|
|
230
|
-
'Method should be implemented: author',
|
|
231
|
-
)
|
|
232
|
-
})
|
|
233
|
-
})
|
|
234
|
-
})
|