recipe-scrapers-js 0.1.0-alpha.7 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +20 -49
  2. package/package.json +9 -9
package/dist/index.js CHANGED
@@ -25,8 +25,7 @@ function isString(value) {
25
25
  */
26
26
  function getHostName(value) {
27
27
  try {
28
- const url = new URL(value.replace("www.", ""));
29
- return url.host;
28
+ return new URL(value.replace("www.", "")).host;
30
29
  } catch {
31
30
  throw new Error(`Invalid URL: ${value}`);
32
31
  }
@@ -139,8 +138,7 @@ function splitToList(value, separator) {
139
138
  * @TODO Implement [Temporal.Duration](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Temporal/Duration) once it lands.
140
139
  */
141
140
  function parseMinutes(value) {
142
- const duration = parse(value);
143
- const totalSeconds = toSeconds(duration);
141
+ const totalSeconds = toSeconds(parse(value));
144
142
  return Math.round(totalSeconds / 60);
145
143
  }
146
144
 
@@ -181,8 +179,7 @@ function scoreSentenceSimilarity(first, second) {
181
179
  const bigrams = (s) => new Set(Array.from({ length: s.length - 1 }, (_, i) => s.slice(i, i + 2)));
182
180
  const firstBigrams = bigrams(first);
183
181
  const secondBigrams = bigrams(second);
184
- const intersectionSize = [...firstBigrams].filter((b) => secondBigrams.has(b)).length;
185
- return 2 * intersectionSize / (firstBigrams.size + secondBigrams.size);
182
+ return 2 * [...firstBigrams].filter((b) => secondBigrams.has(b)).length / (firstBigrams.size + secondBigrams.size);
186
183
  }
187
184
  function bestMatch(testString, targetStrings) {
188
185
  if (targetStrings.length === 0) throw new Error("targetStrings cannot be empty");
@@ -227,8 +224,7 @@ function groupIngredients($, ingredientsList, headingSelector, itemSelector) {
227
224
  for (const el of elements) {
228
225
  const $el = $(el);
229
226
  if ($el.is(groupNameSelector)) {
230
- const headingText = normalizeString($el.text());
231
- currentHeading = headingText || DEFAULT_INGREDIENTS_GROUP_NAME;
227
+ currentHeading = normalizeString($el.text()) || DEFAULT_INGREDIENTS_GROUP_NAME;
232
228
  if (!groupings.has(currentHeading)) groupings.set(currentHeading, /* @__PURE__ */ new Set());
233
229
  } else if ($el.is(ingredientSelector)) {
234
230
  const text = normalizeString($el.text());
@@ -383,8 +379,7 @@ const extractValueFromElement = (element) => {
383
379
  return element.text().trim();
384
380
  };
385
381
  const extractSchemaType = (itemType) => {
386
- const typeMatch = itemType.match(/schema\.org\/(\w+)/);
387
- return typeMatch?.[1];
382
+ return itemType.match(/schema\.org\/(\w+)/)?.[1];
388
383
  };
389
384
  /**
390
385
  * Extracts microdata from HTML elements using itemtype and itemprop attributes
@@ -395,8 +390,7 @@ const extractSchemaType = (itemType) => {
395
390
  */
396
391
  function extractMicrodata($, selector) {
397
392
  const results = [];
398
- const elements = $(selector);
399
- elements.each((_, el) => {
393
+ $(selector).each((_, el) => {
400
394
  const $element = $(el);
401
395
  const itemType = $element.attr("itemtype");
402
396
  const rootObject = {};
@@ -406,13 +400,11 @@ function extractMicrodata($, selector) {
406
400
  }
407
401
  const allProps = $element.find("[itemprop]").addBack("[itemprop]");
408
402
  const nestedItemTypes = $element.find("[itemtype]");
409
- const rootLevelProps = allProps.filter((_$1, propEl) => {
403
+ allProps.filter((_$1, propEl) => {
410
404
  const $prop = $(propEl);
411
405
  if ($prop.attr("itemtype")) return true;
412
- const isInsideNestedType = nestedItemTypes.toArray().some((nestedEl) => $(nestedEl).find($prop).length > 0);
413
- return !isInsideNestedType;
414
- });
415
- rootLevelProps.each((_$1, propEl) => {
406
+ return !nestedItemTypes.toArray().some((nestedEl) => $(nestedEl).find($prop).length > 0);
407
+ }).each((_$1, propEl) => {
416
408
  const $prop = $(propEl);
417
409
  const propName = $prop.attr("itemprop");
418
410
  if (!propName) return;
@@ -497,8 +489,7 @@ function parseYields(element) {
497
489
  const splitMatch = serveText.match(SERVE_REGEX_TO);
498
490
  if (splitMatch && splitMatch.index !== void 0) serveText = serveText.slice(splitMatch.index + splitMatch[0].length).trim();
499
491
  }
500
- const match = serveText.match(SERVE_REGEX_NUMBER);
501
- const matched = match?.groups?.items || "0";
492
+ const matched = serveText.match(SERVE_REGEX_NUMBER)?.groups?.items || "0";
502
493
  const serveTextLower = serveText.toLowerCase();
503
494
  let bestMatch$1 = null;
504
495
  let bestMatchLength = 0;
@@ -535,8 +526,7 @@ function isSchemaOrgData(obj) {
535
526
  }
536
527
  function isThingType(obj, type) {
537
528
  if (!isBaseType(obj)) return false;
538
- const thingType = Array.isArray(obj["@type"]) ? obj["@type"][0] : obj["@type"];
539
- return thingType === type;
529
+ return (Array.isArray(obj["@type"]) ? obj["@type"][0] : obj["@type"]) === type;
540
530
  }
541
531
  function isAggregateRating(obj) {
542
532
  return isThingType(obj, "AggregateRating");
@@ -645,7 +635,6 @@ var SchemaOrgPlugin = class SchemaOrgPlugin extends ExtractorPlugin {
645
635
  pickFromObject(obj, props) {
646
636
  if (!isPlainObject(obj)) return void 0;
647
637
  for (const prop of props) if (isString(obj[prop])) return obj[prop];
648
- return void 0;
649
638
  }
650
639
  getSchemaTextValue(value, props = [
651
640
  "textValue",
@@ -714,10 +703,7 @@ var SchemaOrgPlugin = class SchemaOrgPlugin extends ExtractorPlugin {
714
703
  return value;
715
704
  }
716
705
  if (isString(value)) return parseMinutes(value);
717
- if (isBaseType(value) && "maxValue" in value) {
718
- const maxValue = this.getSchemaTextValue(value.maxValue);
719
- return parseMinutes(maxValue);
720
- }
706
+ if (isBaseType(value) && "maxValue" in value) return parseMinutes(this.getSchemaTextValue(value.maxValue));
721
707
  return null;
722
708
  }
723
709
  parseInstructions(value) {
@@ -921,8 +907,7 @@ var RecipeExtractor = class RecipeExtractor {
921
907
  this.logger.debug(`Extracting field: ${field}`);
922
908
  for (const plugin of this.plugins) {
923
909
  const pluginLogger = new Logger(this.getContext(plugin.name), this.options.logLevel);
924
- const isSupported = plugin.supports(field);
925
- if (isSupported && !isDefined(result)) try {
910
+ if (plugin.supports(field) && !isDefined(result)) try {
926
911
  result = await plugin.extract(field);
927
912
  } catch (err) {
928
913
  if (err instanceof ExtractionFailedException) pluginLogger.verbose(err.message);
@@ -964,9 +949,7 @@ var AbstractScraper = class {
964
949
  const { extraExtractors = [], extraPostProcessors = [], logLevel = LogLevel.WARN } = options;
965
950
  this.logger = new Logger(this.constructor.name, logLevel);
966
951
  this.$ = cheerio.load(html);
967
- const baseExtractors = [new OpenGraphPlugin(this.$), new SchemaOrgPlugin(this.$, logLevel)];
968
- const basePostProcessors = [new HtmlStripperPlugin()];
969
- this.pluginManager = new PluginManager(baseExtractors, basePostProcessors, extraExtractors, extraPostProcessors);
952
+ this.pluginManager = new PluginManager([new OpenGraphPlugin(this.$), new SchemaOrgPlugin(this.$, logLevel)], [new HtmlStripperPlugin()], extraExtractors, extraPostProcessors);
970
953
  this.recipeExtractor = new RecipeExtractor(this.pluginManager.getExtractors(), this.constructor.name, { logLevel });
971
954
  }
972
955
  /**
@@ -1152,23 +1135,18 @@ var AmericasTestKitchen = class extends AbstractScraper {
1152
1135
  parseHtmlIngredients(prevValue) {
1153
1136
  const headingSelector = "[class*=\"RecipeIngredientGroups_group\"] > span";
1154
1137
  const ingredientSelector = "[class*=\"RecipeIngredient\"] label";
1155
- if (isList(prevValue) && prevValue.size > 0) {
1156
- const result = groupIngredients(this.$, prevValue, headingSelector, ingredientSelector);
1157
- return result;
1158
- }
1138
+ if (isList(prevValue) && prevValue.size > 0) return groupIngredients(this.$, prevValue, headingSelector, ingredientSelector);
1159
1139
  return null;
1160
1140
  }
1161
1141
  getRecipeData() {
1162
1142
  if (this.data === null) {
1163
- const jsonElement = this.$("script[type=\"application/json\"]");
1164
- const jsonString = jsonElement.html();
1143
+ const jsonString = this.$("script[type=\"application/json\"]").html();
1165
1144
  if (!jsonString) {
1166
1145
  this.logger.warn("Could not find JSON data script tag");
1167
1146
  return null;
1168
1147
  }
1169
1148
  try {
1170
- const parsed = pagePropsDataSchema.parse(JSON.parse(jsonString));
1171
- this.data = parsed.props.pageProps.data;
1149
+ this.data = pagePropsDataSchema.parse(JSON.parse(jsonString)).props.pageProps.data;
1172
1150
  } catch (error) {
1173
1151
  this.logger.error("Failed to parse JSON data:", error);
1174
1152
  return null;
@@ -1218,10 +1196,7 @@ var BBCGoodFood = class extends AbstractScraper {
1218
1196
  ingredients(prevValue) {
1219
1197
  const headingSelector = ".recipe__ingredients h3";
1220
1198
  const ingredientSelector = ".recipe__ingredients li";
1221
- if (isList(prevValue) && prevValue.size > 0) {
1222
- const result = groupIngredients(this.$, prevValue, headingSelector, ingredientSelector);
1223
- return result;
1224
- }
1199
+ if (isList(prevValue) && prevValue.size > 0) return groupIngredients(this.$, prevValue, headingSelector, ingredientSelector);
1225
1200
  throw new Error("No ingredients found to group");
1226
1201
  }
1227
1202
  };
@@ -1243,8 +1218,7 @@ var Epicurious = class extends AbstractScraper {
1243
1218
  }
1244
1219
  extractors = { author: this.author.bind(this) };
1245
1220
  author() {
1246
- const author = this.$("a[itemprop=\"author\"]").text().trim();
1247
- return author;
1221
+ return this.$("a[itemprop=\"author\"]").text().trim();
1248
1222
  }
1249
1223
  };
1250
1224
 
@@ -1258,10 +1232,7 @@ var NYTimes = class extends AbstractScraper {
1258
1232
  ingredients(prevValue) {
1259
1233
  const headingSelector = "h3[class*=\"ingredientgroup_name\"]";
1260
1234
  const ingredientSelector = "li[class*=\"ingredient\"]";
1261
- if (isList(prevValue) && prevValue.size > 0) {
1262
- const result = groupIngredients(this.$, prevValue, headingSelector, ingredientSelector);
1263
- return result;
1264
- }
1235
+ if (isList(prevValue) && prevValue.size > 0) return groupIngredients(this.$, prevValue, headingSelector, ingredientSelector);
1265
1236
  throw new Error("No ingredients found to group");
1266
1237
  }
1267
1238
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "recipe-scrapers-js",
3
- "version": "0.1.0-alpha.7",
3
+ "version": "0.1.0",
4
4
  "license": "MIT",
5
5
  "description": "A recipe scrapers library",
6
6
  "author": {
@@ -39,18 +39,18 @@
39
39
  "prepublishOnly": "bun run lint && bun run build"
40
40
  },
41
41
  "peerDependencies": {
42
- "cheerio": "^1.1.0",
43
- "zod": "^3.25.76"
42
+ "cheerio": "^1.1.2",
43
+ "zod": "^4.1.12"
44
44
  },
45
45
  "dependencies": {
46
- "iso8601-duration": "^2.1.2",
46
+ "iso8601-duration": "^2.1.3",
47
47
  "schema-dts": "^1.1.5"
48
48
  },
49
49
  "devDependencies": {
50
- "@biomejs/biome": "^2.1.1",
51
- "@types/bun": "^1.2.18",
52
- "cheerio": "^1.1.0",
53
- "tsdown": "^0.12.9",
54
- "typescript": "^5.8.3"
50
+ "@biomejs/biome": "^2.2.6",
51
+ "@types/bun": "^1.3.0",
52
+ "cheerio": "^1.1.2",
53
+ "tsdown": "^0.15.7",
54
+ "typescript": "^5.9.3"
55
55
  }
56
56
  }