recipe-scrapers-js 0.1.0 → 1.0.0-rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -4,18 +4,15 @@
4
4
  [![build](https://img.shields.io/github/actions/workflow/status/nerdstep/recipe-scrapers-js/ci.yml?branch=main&style=flat-square)](https://github.com/nerdstep/recipe-scrapers-js/actions)
5
5
  [![license](https://img.shields.io/npm/l/recipe-scrapers-js.svg?style=flat-square)](LICENSE)
6
6
 
7
- > **⚠️ Alpha Version**
8
- > This library is currently in **alpha**, APIs and behavior may change without notice. Use at your own risk.
9
-
10
7
  A TypeScript/JavaScript library for scraping recipe data from various cooking websites. This is a JavaScript port inspired by the Python [recipe-scrapers](https://github.com/hhursev/recipe-scrapers) library.
11
8
 
12
9
  ## Features
13
10
 
14
- - 🍳 Extract structured recipe data from cooking websites
15
- - 🔍 Support for multiple popular recipe sites
16
- - 🚀 Built with TypeScript for better developer experience
17
- - Fast and lightweight using Bun runtime for development and testing
18
- - 🧪 Comprehensive test coverage
11
+ - Extract structured recipe data from cooking websites
12
+ - Support for multiple popular recipe sites
13
+ - Built with TypeScript for better developer experience
14
+ - Fast and lightweight using the Bun runtime for development and testing
15
+ - Comprehensive test coverage
19
16
 
20
17
  ## Installation
21
18
 
@@ -45,9 +42,12 @@ const url = 'https://allrecipes.com/recipe/example'
45
42
  // This function will throw if a scraper does not exist.
46
43
  const MyScraper = getScraper(url)
47
44
  const scraper = new MyScraper(html, url, /* { ...options } */)
48
- const recipe = await scraper.toObject()
49
45
 
50
- console.log(recipe)
46
+ // Get the recipe data
47
+ const rawRecipe = await scraper.toRecipeObject()
48
+
49
+ // Get the schema validated recipe data
50
+ const validatedRecipe = await scraper.parse()
51
51
  ```
52
52
 
53
53
  ### Options
@@ -79,9 +79,18 @@ interface ScraperOptions {
79
79
  /**
80
80
  * Logging level for the scraper.
81
81
  * This controls the verbosity of logs produced by the scraper.
82
- * @default LogLevel.Warn
82
+ * @default LogLevel.WARN
83
83
  */
84
84
  logLevel?: LogLevel
85
+ /**
86
+ * Enable ingredient parsing using the parse-ingredient library.
87
+ * When enabled, each ingredient item will include a `parsed` field
88
+ * containing structured data (quantity, unit, description, etc.).
89
+ * Can be `true` for defaults or an options object.
90
+ * @see https://github.com/jakeboone02/parse-ingredient
91
+ * @default false
92
+ */
93
+ parseIngredients?: boolean | ParseIngredientOptions
85
94
  }
86
95
  ```
87
96
 
@@ -100,7 +109,7 @@ This library supports recipe extraction from various popular cooking websites. T
100
109
  ```bash
101
110
  # Clone the repository
102
111
  git clone https://github.com/nerdstep/recipe-scrapers-js.git
103
- cd recipe-scrapers
112
+ cd recipe-scrapers-js
104
113
 
105
114
  # Install dependencies
106
115
  bun install
@@ -116,7 +125,7 @@ bun run build
116
125
 
117
126
  - `bun run build` - Build the library for distribution
118
127
  - `bun test` - Run the test suite
119
- - `bun test:coverage` - Run tests with coverage report
128
+ - `bun test:coverage` - Run tests with a coverage report
120
129
  - `bun fetch-test-data` - Fetch test data from the original Python repository
121
130
  - `bun lint` - Run linting and type checking
122
131
  - `bun lint:fix` - Fix linting issues automatically
@@ -155,11 +164,16 @@ export class NewSiteScraper extends AbstractScraper {
155
164
  }
156
165
 
157
166
  protected extractIngredients(): RecipeFields['ingredients'] {
158
- const items = this.$('.ingredient').map((_, el) =>
159
- this.$(el).text().trim()
160
- ).get()
161
-
162
- return new Set(items)
167
+ const items = this.$('.ingredient')
168
+ .map((_, el) => this.$(el).text().trim())
169
+ .get()
170
+
171
+ return [
172
+ {
173
+ name: null,
174
+ items: items.map((value) => ({ value })),
175
+ },
176
+ ]
163
177
  }
164
178
 
165
179
  // ... implement other extraction methods
@@ -198,6 +212,7 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
198
212
  - [Schema.org Recipe specification](https://schema.org/Recipe)
199
213
  - [Cheerio](https://cheerio.js.org/) for HTML parsing
200
214
  - [Zod](https://zod.dev/) for schema validation
215
+ - [parse-ingredient](https://github.com/jakeboone02/parse-ingredient) for ingredient parsing
201
216
 
202
217
  ## Copyright and Usage
203
218