npm - recipe-scrapers-js - Versions diffs - 0.1.0-alpha.5 → 0.1.0-alpha.7 - Mend

recipe-scrapers-js 0.1.0-alpha.5 → 0.1.0-alpha.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -19,14 +19,16 @@ A TypeScript/JavaScript library for scraping recipe data from various cooking we
 ## Installation
+Add the `recipe-scrapers-js` package and its peer dependencies.
 ```bash
-npm install recipe-scrapers-js
+npm install recipe-scrapers-js cheerio zod
 # or
-yarn add recipe-scrapers-js
+yarn add recipe-scrapers-js cheerio zod
 # or
-pnpm add recipe-scrapers-js
+pnpm add recipe-scrapers-js cheerio zod
 # or
-bun add recipe-scrapers-js
+bun add recipe-scrapers-js cheerio zod
 ```
 ## Usage
@@ -195,6 +197,7 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
 - Original [recipe-scrapers](https://github.com/hhursev/recipe-scrapers) Python library by [hhursev](https://github.com/hhursev)
 - [Schema.org Recipe specification](https://schema.org/Recipe)
 - [Cheerio](https://cheerio.js.org/) for HTML parsing
+- [Zod](https://zod.dev/) for schema validation
 ## Copyright and Usage

package/dist/index.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import * as cheerio from "cheerio";
 import { parse, toSeconds } from "iso8601-duration";
+import z from "zod/v4";
 //#region src/utils/index.ts
 function isDefined(value) {
@@ -145,7 +146,7 @@ function parseMinutes(value) {
 //#endregion
 //#region src/utils/ingredients.ts
-const DEFAULT_GROUP_NAME = "Ingredients";
+const DEFAULT_INGREDIENTS_GROUP_NAME = "Ingredients";
 const DEFAULT_GROUPING_SELECTORS = {
 	wprm: {
 		headingSelectors: [".wprm-recipe-ingredient-group h4", ".wprm-recipe-group-name"],
@@ -227,13 +228,13 @@ function groupIngredients($, ingredientsList, headingSelector, itemSelector) {
 		const $el = $(el);
 		if ($el.is(groupNameSelector)) {
 			const headingText = normalizeString($el.text());
-			currentHeading = headingText || DEFAULT_GROUP_NAME;
+			currentHeading = headingText || DEFAULT_INGREDIENTS_GROUP_NAME;
 			if (!groupings.has(currentHeading)) groupings.set(currentHeading, /* @__PURE__ */ new Set());
 		} else if ($el.is(ingredientSelector)) {
 			const text = normalizeString($el.text());
 			if (!text) continue;
 			const matched = bestMatch(text, ingredients);
-			const heading = currentHeading || DEFAULT_GROUP_NAME;
+			const heading = currentHeading || DEFAULT_INGREDIENTS_GROUP_NAME;
 			if (!groupings.has(heading)) groupings.set(heading, /* @__PURE__ */ new Set());
 			groupings.get(heading)?.add(matched);
 		}
@@ -1076,6 +1077,137 @@ var AllRecipes = class extends AbstractScraper {
 	extractors = {};
 };
+//#endregion
+//#region src/scrapers/americastestkitchen.ts
+const recipeIngredientItemSchema = z.object({ fields: z.object({
+	qty: z.string(),
+	preText: z.string(),
+	postText: z.string(),
+	measurement: z.string().nullable(),
+	pluralIngredient: z.boolean(),
+	ingredient: z.object({
+		contentType: z.string(),
+		fields: z.object({
+			title: z.string(),
+			pluralTitle: z.string(),
+			kind: z.string()
+		})
+	})
+}) });
+const recipeIngredientGroupSchema = z.object({ fields: z.object({
+	title: z.string(),
+	recipeIngredientItems: z.array(recipeIngredientItemSchema)
+}) });
+const recipeInstructionSchema = z.object({ fields: z.object({ content: z.string() }) });
+const recipeDataSchema = z.object({
+	totalCookTime: z.number(),
+	recipeTimeNote: z.string().optional(),
+	ingredientGroups: z.array(recipeIngredientGroupSchema),
+	headnote: z.string().optional(),
+	instructions: z.array(recipeInstructionSchema),
+	metaData: z.object({ fields: z.object({ photo: z.object({ url: z.url() }) }) })
+});
+const pagePropsDataSchema = z.object({ props: z.object({ pageProps: z.object({ data: recipeDataSchema }) }) });
+var AmericasTestKitchen = class extends AbstractScraper {
+	data = null;
+	static host() {
+		return "americastestkitchen.com";
+	}
+	extractors = {
+		image: this.image.bind(this),
+		ingredients: this.ingredients.bind(this),
+		instructions: this.instructions.bind(this),
+		siteName: this.siteName.bind(this)
+	};
+	siteName() {
+		return "America's Test Kitchen";
+	}
+	image(prevValue) {
+		const data = this.getRecipeData();
+		if (!data) {
+			if (prevValue) return prevValue;
+			throw new Error("Failed to extract image");
+		}
+		return data.metaData.fields.photo.url;
+	}
+	ingredients(prevValue) {
+		let ingredients = this.parseIngredients();
+		if (!ingredients) ingredients = this.parseHtmlIngredients(prevValue);
+		if (!ingredients) throw new Error("Failed to extract ingredients");
+		return ingredients;
+	}
+	instructions(prevValue) {
+		const data = this.getRecipeData();
+		if (!data) {
+			if (prevValue) return prevValue;
+			throw new Error("Failed to extract instructions");
+		}
+		const { headnote } = data;
+		let headnoteText = "";
+		if (headnote) headnoteText = `Note: ${normalizeString(headnote)}`;
+		const instructionTexts = [];
+		for (const instruction of data.instructions) instructionTexts.push(normalizeString(instruction.fields.content));
+		return new Set([headnoteText, ...instructionTexts]);
+	}
+	parseHtmlIngredients(prevValue) {
+		const headingSelector = "[class*=\"RecipeIngredientGroups_group\"] > span";
+		const ingredientSelector = "[class*=\"RecipeIngredient\"] label";
+		if (isList(prevValue) && prevValue.size > 0) {
+			const result = groupIngredients(this.$, prevValue, headingSelector, ingredientSelector);
+			return result;
+		}
+		return null;
+	}
+	getRecipeData() {
+		if (this.data === null) {
+			const jsonElement = this.$("script[type=\"application/json\"]");
+			const jsonString = jsonElement.html();
+			if (!jsonString) {
+				this.logger.warn("Could not find JSON data script tag");
+				return null;
+			}
+			try {
+				const parsed = pagePropsDataSchema.parse(JSON.parse(jsonString));
+				this.data = parsed.props.pageProps.data;
+			} catch (error) {
+				this.logger.error("Failed to parse JSON data:", error);
+				return null;
+			}
+		}
+		return this.data;
+	}
+	parseIngredientItem(ingredientItem) {
+		const { fields } = ingredientItem;
+		const fragments = [
+			fields.qty || "",
+			fields.measurement || "",
+			fields.ingredient.fields.title || "",
+			fields.postText || ""
+		];
+		const filteredFragments = [];
+		for (const fragment of fragments) if (fragment) filteredFragments.push(fragment.trimEnd());
+		return filteredFragments.join(" ").trimEnd().replace(" ,", ",");
+	}
+	parseIngredients() {
+		const data = this.getRecipeData();
+		if (!data) return null;
+		const { ingredientGroups } = data;
+		if (ingredientGroups.length === 1) {
+			const ingredientSet = /* @__PURE__ */ new Set();
+			for (const item of ingredientGroups[0].fields.recipeIngredientItems) ingredientSet.add(this.parseIngredientItem(item));
+			return ingredientSet;
+		}
+		const ingredientMap = /* @__PURE__ */ new Map();
+		for (const group of ingredientGroups) {
+			const groupTitle = group.fields.title || DEFAULT_INGREDIENTS_GROUP_NAME;
+			const ingredientSet = /* @__PURE__ */ new Set();
+			for (const item of group.fields.recipeIngredientItems) ingredientSet.add(this.parseIngredientItem(item));
+			ingredientMap.set(groupTitle, ingredientSet);
+		}
+		return ingredientMap;
+	}
+};
 //#endregion
 //#region src/scrapers/bbcgoodfood.ts
 var BBCGoodFood = class extends AbstractScraper {
@@ -1083,16 +1215,6 @@ var BBCGoodFood = class extends AbstractScraper {
 		return "bbcgoodfood.com";
 	}
 	extractors = { ingredients: this.ingredients.bind(this) };
-	/**
-	* The NYTimes website appears to auto generate it's CSS class names,
-	* which results in them ending with a string a random characters.
-	* Matching the exact class name is likely to break fairly quickly
-	* so instead we are going to match on a partial class name.
-	* For example, h3[class*='ingredientgroup_name'] matches an h3 element
-	* with a class that contains the value 'ingredient_groupname' at least once
-	* anywhere in the element class attribute.
-	* @link https://developer.mozilla.org/en-US/docs/Web/CSS/Attribute_selectors
-	*/
 	ingredients(prevValue) {
 		const headingSelector = ".recipe__ingredients h3";
 		const ingredientSelector = ".recipe__ingredients li";
@@ -1104,6 +1226,15 @@ var BBCGoodFood = class extends AbstractScraper {
 	}
 };
+//#endregion
+//#region src/scrapers/eatingwell.ts
+var EatingWell = class extends AbstractScraper {
+	static host() {
+		return "eatingwell.com";
+	}
+	extractors = {};
+};
 //#endregion
 //#region src/scrapers/epicurious.ts
 var Epicurious = class extends AbstractScraper {
@@ -1124,16 +1255,6 @@ var NYTimes = class extends AbstractScraper {
 		return "cooking.nytimes.com";
 	}
 	extractors = { ingredients: this.ingredients.bind(this) };
-	/**
-	* The NYTimes website appears to auto generate it's CSS class names,
-	* which results in them ending with a string a random characters.
-	* Matching the exact class name is likely to break fairly quickly
-	* so instead we are going to match on a partial class name.
-	* For example, h3[class*='ingredientgroup_name'] matches an h3 element
-	* with a class that contains the value 'ingredient_groupname' at least once
-	* anywhere in the element class attribute.
-	* @link https://developer.mozilla.org/en-US/docs/Web/CSS/Attribute_selectors
-	*/
 	ingredients(prevValue) {
 		const headingSelector = "h3[class*=\"ingredientgroup_name\"]";
 		const ingredientSelector = "li[class*=\"ingredient\"]";
@@ -1184,7 +1305,9 @@ var SimplyRecipes = class extends AbstractScraper {
 */
 const scrapers = {
 	[AllRecipes.host()]: AllRecipes,
+	[AmericasTestKitchen.host()]: AmericasTestKitchen,
 	[BBCGoodFood.host()]: BBCGoodFood,
+	[EatingWell.host()]: EatingWell,
 	[Epicurious.host()]: Epicurious,
 	[SeriousEats.host()]: SeriousEats,
 	[SimplyRecipes.host()]: SimplyRecipes,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "recipe-scrapers-js",
-  "version": "0.1.0-alpha.5",
+  "version": "0.1.0-alpha.7",
   "license": "MIT",
   "description": "A recipe scrapers library",
   "author": {
@@ -39,15 +39,16 @@
     "prepublishOnly": "bun run lint && bun run build"
   },
   "peerDependencies": {
-    "cheerio": "^1.1.0"
+    "cheerio": "^1.1.0",
+    "zod": "^3.25.76"
   },
   "dependencies": {
     "iso8601-duration": "^2.1.2",
     "schema-dts": "^1.1.5"
   },
   "devDependencies": {
-    "@biomejs/biome": "^2.0.6",
-    "@types/bun": "^1.2.17",
+    "@biomejs/biome": "^2.1.1",
+    "@types/bun": "^1.2.18",
     "cheerio": "^1.1.0",
     "tsdown": "^0.12.9",
     "typescript": "^5.8.3"