recipe-scrapers-js 0.1.0 → 1.0.0-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md ADDED
@@ -0,0 +1,37 @@
1
+ <!-- markdownlint-disable MD024 -->
2
+ # Changelog
3
+
4
+ All notable changes to this project will be documented in this file.
5
+
6
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
7
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
8
+
9
+ ## [1.0.0-rc.1] - 2025-12-20
10
+
11
+ ### Added
12
+
13
+ - chore: tsdown configuration file
14
+
15
+ ### Fixed
16
+
17
+ - fix: main/module/type entriess in package.json; add exports field
18
+
19
+ ## [1.0.0-rc.0] - 2025-12-20
20
+
21
+ ### Added
22
+
23
+ - Optional ingredient parsing via [parse-ingredient](https://github.com/jakeboone02/parse-ingredient)
24
+ - `parse()` and `safeParse()` methods for Zod schema validated recipe extraction
25
+
26
+ ### Changed
27
+
28
+ - **BREAKING**: Renamed `toObject()` method to `toRecipeObject()` for clarity
29
+ - **BREAKING**: Ingredients and instructions now require grouped structures (each group has `name` and `items`) instead of flat arrays
30
+
31
+ ---
32
+
33
+ ## Pre-Release History
34
+
35
+ Prior to version 1.0.0-rc.0, this project was in alpha development. No formal changelog was maintained during the alpha phase.
36
+
37
+ [1.0.0-rc.0]: https://github.com/nerdstep/recipe-scrapers-js/releases/tag/v1.0.0-rc.0
package/README.md CHANGED
@@ -4,18 +4,15 @@
4
4
  [![build](https://img.shields.io/github/actions/workflow/status/nerdstep/recipe-scrapers-js/ci.yml?branch=main&style=flat-square)](https://github.com/nerdstep/recipe-scrapers-js/actions)
5
5
  [![license](https://img.shields.io/npm/l/recipe-scrapers-js.svg?style=flat-square)](LICENSE)
6
6
 
7
- > **⚠️ Alpha Version**
8
- > This library is currently in **alpha**, APIs and behavior may change without notice. Use at your own risk.
9
-
10
7
  A TypeScript/JavaScript library for scraping recipe data from various cooking websites. This is a JavaScript port inspired by the Python [recipe-scrapers](https://github.com/hhursev/recipe-scrapers) library.
11
8
 
12
9
  ## Features
13
10
 
14
- - 🍳 Extract structured recipe data from cooking websites
15
- - 🔍 Support for multiple popular recipe sites
16
- - 🚀 Built with TypeScript for better developer experience
17
- - Fast and lightweight using Bun runtime for development and testing
18
- - 🧪 Comprehensive test coverage
11
+ - Extract structured recipe data from cooking websites
12
+ - Support for multiple popular recipe sites
13
+ - Built with TypeScript for better developer experience
14
+ - Fast and lightweight using the Bun runtime for development and testing
15
+ - Comprehensive test coverage
19
16
 
20
17
  ## Installation
21
18
 
@@ -45,9 +42,12 @@ const url = 'https://allrecipes.com/recipe/example'
45
42
  // This function will throw if a scraper does not exist.
46
43
  const MyScraper = getScraper(url)
47
44
  const scraper = new MyScraper(html, url, /* { ...options } */)
48
- const recipe = await scraper.toObject()
49
45
 
50
- console.log(recipe)
46
+ // Get the recipe data
47
+ const rawRecipe = await scraper.toRecipeObject()
48
+
49
+ // Get the schema validated recipe data
50
+ const validatedRecipe = await scraper.parse()
51
51
  ```
52
52
 
53
53
  ### Options
@@ -79,9 +79,18 @@ interface ScraperOptions {
79
79
  /**
80
80
  * Logging level for the scraper.
81
81
  * This controls the verbosity of logs produced by the scraper.
82
- * @default LogLevel.Warn
82
+ * @default LogLevel.WARN
83
83
  */
84
84
  logLevel?: LogLevel
85
+ /**
86
+ * Enable ingredient parsing using the parse-ingredient library.
87
+ * When enabled, each ingredient item will include a `parsed` field
88
+ * containing structured data (quantity, unit, description, etc.).
89
+ * Can be `true` for defaults or an options object.
90
+ * @see https://github.com/jakeboone02/parse-ingredient
91
+ * @default false
92
+ */
93
+ parseIngredients?: boolean | ParseIngredientOptions
85
94
  }
86
95
  ```
87
96
 
@@ -100,7 +109,7 @@ This library supports recipe extraction from various popular cooking websites. T
100
109
  ```bash
101
110
  # Clone the repository
102
111
  git clone https://github.com/nerdstep/recipe-scrapers-js.git
103
- cd recipe-scrapers
112
+ cd recipe-scrapers-js
104
113
 
105
114
  # Install dependencies
106
115
  bun install
@@ -116,7 +125,7 @@ bun run build
116
125
 
117
126
  - `bun run build` - Build the library for distribution
118
127
  - `bun test` - Run the test suite
119
- - `bun test:coverage` - Run tests with coverage report
128
+ - `bun test:coverage` - Run tests with a coverage report
120
129
  - `bun fetch-test-data` - Fetch test data from the original Python repository
121
130
  - `bun lint` - Run linting and type checking
122
131
  - `bun lint:fix` - Fix linting issues automatically
@@ -155,11 +164,16 @@ export class NewSiteScraper extends AbstractScraper {
155
164
  }
156
165
 
157
166
  protected extractIngredients(): RecipeFields['ingredients'] {
158
- const items = this.$('.ingredient').map((_, el) =>
159
- this.$(el).text().trim()
160
- ).get()
161
-
162
- return new Set(items)
167
+ const items = this.$('.ingredient')
168
+ .map((_, el) => this.$(el).text().trim())
169
+ .get()
170
+
171
+ return [
172
+ {
173
+ name: null,
174
+ items: items.map((value) => ({ value })),
175
+ },
176
+ ]
163
177
  }
164
178
 
165
179
  // ... implement other extraction methods
@@ -198,6 +212,7 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
198
212
  - [Schema.org Recipe specification](https://schema.org/Recipe)
199
213
  - [Cheerio](https://cheerio.js.org/) for HTML parsing
200
214
  - [Zod](https://zod.dev/) for schema validation
215
+ - [parse-ingredient](https://github.com/jakeboone02/parse-ingredient) for ingredient parsing
201
216
 
202
217
  ## Copyright and Usage
203
218