recipe-scrapers-js 0.1.0-alpha.6 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +31 -50
  2. package/package.json +9 -9
package/dist/index.js CHANGED
@@ -25,8 +25,7 @@ function isString(value) {
25
25
  */
26
26
  function getHostName(value) {
27
27
  try {
28
- const url = new URL(value.replace("www.", ""));
29
- return url.host;
28
+ return new URL(value.replace("www.", "")).host;
30
29
  } catch {
31
30
  throw new Error(`Invalid URL: ${value}`);
32
31
  }
@@ -139,8 +138,7 @@ function splitToList(value, separator) {
139
138
  * @TODO Implement [Temporal.Duration](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Temporal/Duration) once it lands.
140
139
  */
141
140
  function parseMinutes(value) {
142
- const duration = parse(value);
143
- const totalSeconds = toSeconds(duration);
141
+ const totalSeconds = toSeconds(parse(value));
144
142
  return Math.round(totalSeconds / 60);
145
143
  }
146
144
 
@@ -181,8 +179,7 @@ function scoreSentenceSimilarity(first, second) {
181
179
  const bigrams = (s) => new Set(Array.from({ length: s.length - 1 }, (_, i) => s.slice(i, i + 2)));
182
180
  const firstBigrams = bigrams(first);
183
181
  const secondBigrams = bigrams(second);
184
- const intersectionSize = [...firstBigrams].filter((b) => secondBigrams.has(b)).length;
185
- return 2 * intersectionSize / (firstBigrams.size + secondBigrams.size);
182
+ return 2 * [...firstBigrams].filter((b) => secondBigrams.has(b)).length / (firstBigrams.size + secondBigrams.size);
186
183
  }
187
184
  function bestMatch(testString, targetStrings) {
188
185
  if (targetStrings.length === 0) throw new Error("targetStrings cannot be empty");
@@ -227,8 +224,7 @@ function groupIngredients($, ingredientsList, headingSelector, itemSelector) {
227
224
  for (const el of elements) {
228
225
  const $el = $(el);
229
226
  if ($el.is(groupNameSelector)) {
230
- const headingText = normalizeString($el.text());
231
- currentHeading = headingText || DEFAULT_INGREDIENTS_GROUP_NAME;
227
+ currentHeading = normalizeString($el.text()) || DEFAULT_INGREDIENTS_GROUP_NAME;
232
228
  if (!groupings.has(currentHeading)) groupings.set(currentHeading, /* @__PURE__ */ new Set());
233
229
  } else if ($el.is(ingredientSelector)) {
234
230
  const text = normalizeString($el.text());
@@ -383,8 +379,7 @@ const extractValueFromElement = (element) => {
383
379
  return element.text().trim();
384
380
  };
385
381
  const extractSchemaType = (itemType) => {
386
- const typeMatch = itemType.match(/schema\.org\/(\w+)/);
387
- return typeMatch?.[1];
382
+ return itemType.match(/schema\.org\/(\w+)/)?.[1];
388
383
  };
389
384
  /**
390
385
  * Extracts microdata from HTML elements using itemtype and itemprop attributes
@@ -395,8 +390,7 @@ const extractSchemaType = (itemType) => {
395
390
  */
396
391
  function extractMicrodata($, selector) {
397
392
  const results = [];
398
- const elements = $(selector);
399
- elements.each((_, el) => {
393
+ $(selector).each((_, el) => {
400
394
  const $element = $(el);
401
395
  const itemType = $element.attr("itemtype");
402
396
  const rootObject = {};
@@ -406,13 +400,11 @@ function extractMicrodata($, selector) {
406
400
  }
407
401
  const allProps = $element.find("[itemprop]").addBack("[itemprop]");
408
402
  const nestedItemTypes = $element.find("[itemtype]");
409
- const rootLevelProps = allProps.filter((_$1, propEl) => {
403
+ allProps.filter((_$1, propEl) => {
410
404
  const $prop = $(propEl);
411
405
  if ($prop.attr("itemtype")) return true;
412
- const isInsideNestedType = nestedItemTypes.toArray().some((nestedEl) => $(nestedEl).find($prop).length > 0);
413
- return !isInsideNestedType;
414
- });
415
- rootLevelProps.each((_$1, propEl) => {
406
+ return !nestedItemTypes.toArray().some((nestedEl) => $(nestedEl).find($prop).length > 0);
407
+ }).each((_$1, propEl) => {
416
408
  const $prop = $(propEl);
417
409
  const propName = $prop.attr("itemprop");
418
410
  if (!propName) return;
@@ -497,8 +489,7 @@ function parseYields(element) {
497
489
  const splitMatch = serveText.match(SERVE_REGEX_TO);
498
490
  if (splitMatch && splitMatch.index !== void 0) serveText = serveText.slice(splitMatch.index + splitMatch[0].length).trim();
499
491
  }
500
- const match = serveText.match(SERVE_REGEX_NUMBER);
501
- const matched = match?.groups?.items || "0";
492
+ const matched = serveText.match(SERVE_REGEX_NUMBER)?.groups?.items || "0";
502
493
  const serveTextLower = serveText.toLowerCase();
503
494
  let bestMatch$1 = null;
504
495
  let bestMatchLength = 0;
@@ -535,8 +526,7 @@ function isSchemaOrgData(obj) {
535
526
  }
536
527
  function isThingType(obj, type) {
537
528
  if (!isBaseType(obj)) return false;
538
- const thingType = Array.isArray(obj["@type"]) ? obj["@type"][0] : obj["@type"];
539
- return thingType === type;
529
+ return (Array.isArray(obj["@type"]) ? obj["@type"][0] : obj["@type"]) === type;
540
530
  }
541
531
  function isAggregateRating(obj) {
542
532
  return isThingType(obj, "AggregateRating");
@@ -645,7 +635,6 @@ var SchemaOrgPlugin = class SchemaOrgPlugin extends ExtractorPlugin {
645
635
  pickFromObject(obj, props) {
646
636
  if (!isPlainObject(obj)) return void 0;
647
637
  for (const prop of props) if (isString(obj[prop])) return obj[prop];
648
- return void 0;
649
638
  }
650
639
  getSchemaTextValue(value, props = [
651
640
  "textValue",
@@ -714,10 +703,7 @@ var SchemaOrgPlugin = class SchemaOrgPlugin extends ExtractorPlugin {
714
703
  return value;
715
704
  }
716
705
  if (isString(value)) return parseMinutes(value);
717
- if (isBaseType(value) && "maxValue" in value) {
718
- const maxValue = this.getSchemaTextValue(value.maxValue);
719
- return parseMinutes(maxValue);
720
- }
706
+ if (isBaseType(value) && "maxValue" in value) return parseMinutes(this.getSchemaTextValue(value.maxValue));
721
707
  return null;
722
708
  }
723
709
  parseInstructions(value) {
@@ -921,8 +907,7 @@ var RecipeExtractor = class RecipeExtractor {
921
907
  this.logger.debug(`Extracting field: ${field}`);
922
908
  for (const plugin of this.plugins) {
923
909
  const pluginLogger = new Logger(this.getContext(plugin.name), this.options.logLevel);
924
- const isSupported = plugin.supports(field);
925
- if (isSupported && !isDefined(result)) try {
910
+ if (plugin.supports(field) && !isDefined(result)) try {
926
911
  result = await plugin.extract(field);
927
912
  } catch (err) {
928
913
  if (err instanceof ExtractionFailedException) pluginLogger.verbose(err.message);
@@ -964,9 +949,7 @@ var AbstractScraper = class {
964
949
  const { extraExtractors = [], extraPostProcessors = [], logLevel = LogLevel.WARN } = options;
965
950
  this.logger = new Logger(this.constructor.name, logLevel);
966
951
  this.$ = cheerio.load(html);
967
- const baseExtractors = [new OpenGraphPlugin(this.$), new SchemaOrgPlugin(this.$, logLevel)];
968
- const basePostProcessors = [new HtmlStripperPlugin()];
969
- this.pluginManager = new PluginManager(baseExtractors, basePostProcessors, extraExtractors, extraPostProcessors);
952
+ this.pluginManager = new PluginManager([new OpenGraphPlugin(this.$), new SchemaOrgPlugin(this.$, logLevel)], [new HtmlStripperPlugin()], extraExtractors, extraPostProcessors);
970
953
  this.recipeExtractor = new RecipeExtractor(this.pluginManager.getExtractors(), this.constructor.name, { logLevel });
971
954
  }
972
955
  /**
@@ -1104,7 +1087,8 @@ const recipeDataSchema = z.object({
1104
1087
  recipeTimeNote: z.string().optional(),
1105
1088
  ingredientGroups: z.array(recipeIngredientGroupSchema),
1106
1089
  headnote: z.string().optional(),
1107
- instructions: z.array(recipeInstructionSchema)
1090
+ instructions: z.array(recipeInstructionSchema),
1091
+ metaData: z.object({ fields: z.object({ photo: z.object({ url: z.url() }) }) })
1108
1092
  });
1109
1093
  const pagePropsDataSchema = z.object({ props: z.object({ pageProps: z.object({ data: recipeDataSchema }) }) });
1110
1094
  var AmericasTestKitchen = class extends AbstractScraper {
@@ -1113,6 +1097,7 @@ var AmericasTestKitchen = class extends AbstractScraper {
1113
1097
  return "americastestkitchen.com";
1114
1098
  }
1115
1099
  extractors = {
1100
+ image: this.image.bind(this),
1116
1101
  ingredients: this.ingredients.bind(this),
1117
1102
  instructions: this.instructions.bind(this),
1118
1103
  siteName: this.siteName.bind(this)
@@ -1120,6 +1105,14 @@ var AmericasTestKitchen = class extends AbstractScraper {
1120
1105
  siteName() {
1121
1106
  return "America's Test Kitchen";
1122
1107
  }
1108
+ image(prevValue) {
1109
+ const data = this.getRecipeData();
1110
+ if (!data) {
1111
+ if (prevValue) return prevValue;
1112
+ throw new Error("Failed to extract image");
1113
+ }
1114
+ return data.metaData.fields.photo.url;
1115
+ }
1123
1116
  ingredients(prevValue) {
1124
1117
  let ingredients = this.parseIngredients();
1125
1118
  if (!ingredients) ingredients = this.parseHtmlIngredients(prevValue);
@@ -1142,23 +1135,18 @@ var AmericasTestKitchen = class extends AbstractScraper {
1142
1135
  parseHtmlIngredients(prevValue) {
1143
1136
  const headingSelector = "[class*=\"RecipeIngredientGroups_group\"] > span";
1144
1137
  const ingredientSelector = "[class*=\"RecipeIngredient\"] label";
1145
- if (isList(prevValue) && prevValue.size > 0) {
1146
- const result = groupIngredients(this.$, prevValue, headingSelector, ingredientSelector);
1147
- return result;
1148
- }
1138
+ if (isList(prevValue) && prevValue.size > 0) return groupIngredients(this.$, prevValue, headingSelector, ingredientSelector);
1149
1139
  return null;
1150
1140
  }
1151
1141
  getRecipeData() {
1152
1142
  if (this.data === null) {
1153
- const jsonElement = this.$("script[type=\"application/json\"]");
1154
- const jsonString = jsonElement.html();
1143
+ const jsonString = this.$("script[type=\"application/json\"]").html();
1155
1144
  if (!jsonString) {
1156
1145
  this.logger.warn("Could not find JSON data script tag");
1157
1146
  return null;
1158
1147
  }
1159
1148
  try {
1160
- const parsed = pagePropsDataSchema.parse(JSON.parse(jsonString));
1161
- this.data = parsed.props.pageProps.data;
1149
+ this.data = pagePropsDataSchema.parse(JSON.parse(jsonString)).props.pageProps.data;
1162
1150
  } catch (error) {
1163
1151
  this.logger.error("Failed to parse JSON data:", error);
1164
1152
  return null;
@@ -1208,10 +1196,7 @@ var BBCGoodFood = class extends AbstractScraper {
1208
1196
  ingredients(prevValue) {
1209
1197
  const headingSelector = ".recipe__ingredients h3";
1210
1198
  const ingredientSelector = ".recipe__ingredients li";
1211
- if (isList(prevValue) && prevValue.size > 0) {
1212
- const result = groupIngredients(this.$, prevValue, headingSelector, ingredientSelector);
1213
- return result;
1214
- }
1199
+ if (isList(prevValue) && prevValue.size > 0) return groupIngredients(this.$, prevValue, headingSelector, ingredientSelector);
1215
1200
  throw new Error("No ingredients found to group");
1216
1201
  }
1217
1202
  };
@@ -1233,8 +1218,7 @@ var Epicurious = class extends AbstractScraper {
1233
1218
  }
1234
1219
  extractors = { author: this.author.bind(this) };
1235
1220
  author() {
1236
- const author = this.$("a[itemprop=\"author\"]").text().trim();
1237
- return author;
1221
+ return this.$("a[itemprop=\"author\"]").text().trim();
1238
1222
  }
1239
1223
  };
1240
1224
 
@@ -1248,10 +1232,7 @@ var NYTimes = class extends AbstractScraper {
1248
1232
  ingredients(prevValue) {
1249
1233
  const headingSelector = "h3[class*=\"ingredientgroup_name\"]";
1250
1234
  const ingredientSelector = "li[class*=\"ingredient\"]";
1251
- if (isList(prevValue) && prevValue.size > 0) {
1252
- const result = groupIngredients(this.$, prevValue, headingSelector, ingredientSelector);
1253
- return result;
1254
- }
1235
+ if (isList(prevValue) && prevValue.size > 0) return groupIngredients(this.$, prevValue, headingSelector, ingredientSelector);
1255
1236
  throw new Error("No ingredients found to group");
1256
1237
  }
1257
1238
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "recipe-scrapers-js",
3
- "version": "0.1.0-alpha.6",
3
+ "version": "0.1.0",
4
4
  "license": "MIT",
5
5
  "description": "A recipe scrapers library",
6
6
  "author": {
@@ -39,18 +39,18 @@
39
39
  "prepublishOnly": "bun run lint && bun run build"
40
40
  },
41
41
  "peerDependencies": {
42
- "cheerio": "^1.1.0",
43
- "zod": "^3.25.74"
42
+ "cheerio": "^1.1.2",
43
+ "zod": "^4.1.12"
44
44
  },
45
45
  "dependencies": {
46
- "iso8601-duration": "^2.1.2",
46
+ "iso8601-duration": "^2.1.3",
47
47
  "schema-dts": "^1.1.5"
48
48
  },
49
49
  "devDependencies": {
50
- "@biomejs/biome": "^2.0.6",
51
- "@types/bun": "^1.2.17",
52
- "cheerio": "^1.1.0",
53
- "tsdown": "^0.12.9",
54
- "typescript": "^5.8.3"
50
+ "@biomejs/biome": "^2.2.6",
51
+ "@types/bun": "^1.3.0",
52
+ "cheerio": "^1.1.2",
53
+ "tsdown": "^0.15.7",
54
+ "typescript": "^5.9.3"
55
55
  }
56
56
  }