recipe-scrapers-js 0.1.0-alpha.5 → 0.1.0-alpha.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +7 -4
  2. package/dist/index.js +136 -23
  3. package/package.json +3 -2
package/README.md CHANGED
@@ -19,14 +19,16 @@ A TypeScript/JavaScript library for scraping recipe data from various cooking we
19
19
 
20
20
  ## Installation
21
21
 
22
+ Add the `recipe-scrapers-js` package and its peer dependencies.
23
+
22
24
  ```bash
23
- npm install recipe-scrapers-js
25
+ npm install recipe-scrapers-js cheerio zod
24
26
  # or
25
- yarn add recipe-scrapers-js
27
+ yarn add recipe-scrapers-js cheerio zod
26
28
  # or
27
- pnpm add recipe-scrapers-js
29
+ pnpm add recipe-scrapers-js cheerio zod
28
30
  # or
29
- bun add recipe-scrapers-js
31
+ bun add recipe-scrapers-js cheerio zod
30
32
  ```
31
33
 
32
34
  ## Usage
@@ -195,6 +197,7 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
195
197
  - Original [recipe-scrapers](https://github.com/hhursev/recipe-scrapers) Python library by [hhursev](https://github.com/hhursev)
196
198
  - [Schema.org Recipe specification](https://schema.org/Recipe)
197
199
  - [Cheerio](https://cheerio.js.org/) for HTML parsing
200
+ - [Zod](https://zod.dev/) for schema validation
198
201
 
199
202
  ## Copyright and Usage
200
203
 
package/dist/index.js CHANGED
@@ -1,5 +1,6 @@
1
1
  import * as cheerio from "cheerio";
2
2
  import { parse, toSeconds } from "iso8601-duration";
3
+ import z from "zod/v4";
3
4
 
4
5
  //#region src/utils/index.ts
5
6
  function isDefined(value) {
@@ -145,7 +146,7 @@ function parseMinutes(value) {
145
146
 
146
147
  //#endregion
147
148
  //#region src/utils/ingredients.ts
148
- const DEFAULT_GROUP_NAME = "Ingredients";
149
+ const DEFAULT_INGREDIENTS_GROUP_NAME = "Ingredients";
149
150
  const DEFAULT_GROUPING_SELECTORS = {
150
151
  wprm: {
151
152
  headingSelectors: [".wprm-recipe-ingredient-group h4", ".wprm-recipe-group-name"],
@@ -227,13 +228,13 @@ function groupIngredients($, ingredientsList, headingSelector, itemSelector) {
227
228
  const $el = $(el);
228
229
  if ($el.is(groupNameSelector)) {
229
230
  const headingText = normalizeString($el.text());
230
- currentHeading = headingText || DEFAULT_GROUP_NAME;
231
+ currentHeading = headingText || DEFAULT_INGREDIENTS_GROUP_NAME;
231
232
  if (!groupings.has(currentHeading)) groupings.set(currentHeading, /* @__PURE__ */ new Set());
232
233
  } else if ($el.is(ingredientSelector)) {
233
234
  const text = normalizeString($el.text());
234
235
  if (!text) continue;
235
236
  const matched = bestMatch(text, ingredients);
236
- const heading = currentHeading || DEFAULT_GROUP_NAME;
237
+ const heading = currentHeading || DEFAULT_INGREDIENTS_GROUP_NAME;
237
238
  if (!groupings.has(heading)) groupings.set(heading, /* @__PURE__ */ new Set());
238
239
  groupings.get(heading)?.add(matched);
239
240
  }
@@ -1076,6 +1077,127 @@ var AllRecipes = class extends AbstractScraper {
1076
1077
  extractors = {};
1077
1078
  };
1078
1079
 
1080
+ //#endregion
1081
+ //#region src/scrapers/americastestkitchen.ts
1082
+ const recipeIngredientItemSchema = z.object({ fields: z.object({
1083
+ qty: z.string(),
1084
+ preText: z.string(),
1085
+ postText: z.string(),
1086
+ measurement: z.string().nullable(),
1087
+ pluralIngredient: z.boolean(),
1088
+ ingredient: z.object({
1089
+ contentType: z.string(),
1090
+ fields: z.object({
1091
+ title: z.string(),
1092
+ pluralTitle: z.string(),
1093
+ kind: z.string()
1094
+ })
1095
+ })
1096
+ }) });
1097
+ const recipeIngredientGroupSchema = z.object({ fields: z.object({
1098
+ title: z.string(),
1099
+ recipeIngredientItems: z.array(recipeIngredientItemSchema)
1100
+ }) });
1101
+ const recipeInstructionSchema = z.object({ fields: z.object({ content: z.string() }) });
1102
+ const recipeDataSchema = z.object({
1103
+ totalCookTime: z.number(),
1104
+ recipeTimeNote: z.string().optional(),
1105
+ ingredientGroups: z.array(recipeIngredientGroupSchema),
1106
+ headnote: z.string().optional(),
1107
+ instructions: z.array(recipeInstructionSchema)
1108
+ });
1109
+ const pagePropsDataSchema = z.object({ props: z.object({ pageProps: z.object({ data: recipeDataSchema }) }) });
1110
+ var AmericasTestKitchen = class extends AbstractScraper {
1111
+ data = null;
1112
+ static host() {
1113
+ return "americastestkitchen.com";
1114
+ }
1115
+ extractors = {
1116
+ ingredients: this.ingredients.bind(this),
1117
+ instructions: this.instructions.bind(this),
1118
+ siteName: this.siteName.bind(this)
1119
+ };
1120
+ siteName() {
1121
+ return "America's Test Kitchen";
1122
+ }
1123
+ ingredients(prevValue) {
1124
+ let ingredients = this.parseIngredients();
1125
+ if (!ingredients) ingredients = this.parseHtmlIngredients(prevValue);
1126
+ if (!ingredients) throw new Error("Failed to extract ingredients");
1127
+ return ingredients;
1128
+ }
1129
+ instructions(prevValue) {
1130
+ const data = this.getRecipeData();
1131
+ if (!data) {
1132
+ if (prevValue) return prevValue;
1133
+ throw new Error("Failed to extract instructions");
1134
+ }
1135
+ const { headnote } = data;
1136
+ let headnoteText = "";
1137
+ if (headnote) headnoteText = `Note: ${normalizeString(headnote)}`;
1138
+ const instructionTexts = [];
1139
+ for (const instruction of data.instructions) instructionTexts.push(normalizeString(instruction.fields.content));
1140
+ return new Set([headnoteText, ...instructionTexts]);
1141
+ }
1142
+ parseHtmlIngredients(prevValue) {
1143
+ const headingSelector = "[class*=\"RecipeIngredientGroups_group\"] > span";
1144
+ const ingredientSelector = "[class*=\"RecipeIngredient\"] label";
1145
+ if (isList(prevValue) && prevValue.size > 0) {
1146
+ const result = groupIngredients(this.$, prevValue, headingSelector, ingredientSelector);
1147
+ return result;
1148
+ }
1149
+ return null;
1150
+ }
1151
+ getRecipeData() {
1152
+ if (this.data === null) {
1153
+ const jsonElement = this.$("script[type=\"application/json\"]");
1154
+ const jsonString = jsonElement.html();
1155
+ if (!jsonString) {
1156
+ this.logger.warn("Could not find JSON data script tag");
1157
+ return null;
1158
+ }
1159
+ try {
1160
+ const parsed = pagePropsDataSchema.parse(JSON.parse(jsonString));
1161
+ this.data = parsed.props.pageProps.data;
1162
+ } catch (error) {
1163
+ this.logger.error("Failed to parse JSON data:", error);
1164
+ return null;
1165
+ }
1166
+ }
1167
+ return this.data;
1168
+ }
1169
+ parseIngredientItem(ingredientItem) {
1170
+ const { fields } = ingredientItem;
1171
+ const fragments = [
1172
+ fields.qty || "",
1173
+ fields.measurement || "",
1174
+ fields.ingredient.fields.title || "",
1175
+ fields.postText || ""
1176
+ ];
1177
+ const filteredFragments = [];
1178
+ for (const fragment of fragments) if (fragment) filteredFragments.push(fragment.trimEnd());
1179
+ return filteredFragments.join(" ").trimEnd().replace(" ,", ",");
1180
+ }
1181
+ parseIngredients() {
1182
+ const data = this.getRecipeData();
1183
+ if (!data) return null;
1184
+ const { ingredientGroups } = data;
1185
+ if (ingredientGroups.length === 1) {
1186
+ const ingredientSet = /* @__PURE__ */ new Set();
1187
+ for (const item of ingredientGroups[0].fields.recipeIngredientItems) ingredientSet.add(this.parseIngredientItem(item));
1188
+ return ingredientSet;
1189
+ }
1190
+ const ingredientMap = /* @__PURE__ */ new Map();
1191
+ for (const group of ingredientGroups) {
1192
+ const groupTitle = group.fields.title || DEFAULT_INGREDIENTS_GROUP_NAME;
1193
+ const ingredientSet = /* @__PURE__ */ new Set();
1194
+ for (const item of group.fields.recipeIngredientItems) ingredientSet.add(this.parseIngredientItem(item));
1195
+ ingredientMap.set(groupTitle, ingredientSet);
1196
+ }
1197
+ return ingredientMap;
1198
+ }
1199
+ };
1200
+
1079
1201
  //#endregion
1080
1202
  //#region src/scrapers/bbcgoodfood.ts
1081
1203
  var BBCGoodFood = class extends AbstractScraper {
@@ -1083,16 +1205,6 @@ var BBCGoodFood = class extends AbstractScraper {
1083
1205
  return "bbcgoodfood.com";
1084
1206
  }
1085
1207
  extractors = { ingredients: this.ingredients.bind(this) };
1086
- /**
1087
- * The NYTimes website appears to auto generate it's CSS class names,
1088
- * which results in them ending with a string a random characters.
1089
- * Matching the exact class name is likely to break fairly quickly
1090
- * so instead we are going to match on a partial class name.
1091
- * For example, h3[class*='ingredientgroup_name'] matches an h3 element
1092
- * with a class that contains the value 'ingredient_groupname' at least once
1093
- * anywhere in the element class attribute.
1094
- * @link https://developer.mozilla.org/en-US/docs/Web/CSS/Attribute_selectors
1095
- */
1096
1208
  ingredients(prevValue) {
1097
1209
  const headingSelector = ".recipe__ingredients h3";
1098
1210
  const ingredientSelector = ".recipe__ingredients li";
@@ -1104,6 +1216,15 @@ var BBCGoodFood = class extends AbstractScraper {
1104
1216
  }
1105
1217
  };
1106
1218
 
1219
+ //#endregion
1220
+ //#region src/scrapers/eatingwell.ts
1221
+ var EatingWell = class extends AbstractScraper {
1222
+ static host() {
1223
+ return "eatingwell.com";
1224
+ }
1225
+ extractors = {};
1226
+ };
1227
+
1107
1228
  //#endregion
1108
1229
  //#region src/scrapers/epicurious.ts
1109
1230
  var Epicurious = class extends AbstractScraper {
@@ -1124,16 +1245,6 @@ var NYTimes = class extends AbstractScraper {
1124
1245
  return "cooking.nytimes.com";
1125
1246
  }
1126
1247
  extractors = { ingredients: this.ingredients.bind(this) };
1127
- /**
1128
- * The NYTimes website appears to auto generate it's CSS class names,
1129
- * which results in them ending with a string a random characters.
1130
- * Matching the exact class name is likely to break fairly quickly
1131
- * so instead we are going to match on a partial class name.
1132
- * For example, h3[class*='ingredientgroup_name'] matches an h3 element
1133
- * with a class that contains the value 'ingredient_groupname' at least once
1134
- * anywhere in the element class attribute.
1135
- * @link https://developer.mozilla.org/en-US/docs/Web/CSS/Attribute_selectors
1136
- */
1137
1248
  ingredients(prevValue) {
1138
1249
  const headingSelector = "h3[class*=\"ingredientgroup_name\"]";
1139
1250
  const ingredientSelector = "li[class*=\"ingredient\"]";
@@ -1184,7 +1295,9 @@ var SimplyRecipes = class extends AbstractScraper {
1184
1295
  */
1185
1296
  const scrapers = {
1186
1297
  [AllRecipes.host()]: AllRecipes,
1298
+ [AmericasTestKitchen.host()]: AmericasTestKitchen,
1187
1299
  [BBCGoodFood.host()]: BBCGoodFood,
1300
+ [EatingWell.host()]: EatingWell,
1188
1301
  [Epicurious.host()]: Epicurious,
1189
1302
  [SeriousEats.host()]: SeriousEats,
1190
1303
  [SimplyRecipes.host()]: SimplyRecipes,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "recipe-scrapers-js",
3
- "version": "0.1.0-alpha.5",
3
+ "version": "0.1.0-alpha.6",
4
4
  "license": "MIT",
5
5
  "description": "A recipe scrapers library",
6
6
  "author": {
@@ -39,7 +39,8 @@
39
39
  "prepublishOnly": "bun run lint && bun run build"
40
40
  },
41
41
  "peerDependencies": {
42
- "cheerio": "^1.1.0"
42
+ "cheerio": "^1.1.0",
43
+ "zod": "^3.25.74"
43
44
  },
44
45
  "dependencies": {
45
46
  "iso8601-duration": "^2.1.2",