recipe-scrapers-js 0.1.0 → 1.0.0-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +37 -0
- package/README.md +33 -18
- package/dist/index.d.mts +964 -0
- package/dist/{index.js → index.mjs} +455 -152
- package/docs/architecture.md +578 -0
- package/docs/ingredients-architecture.md +363 -0
- package/package.json +22 -11
- package/dist/index.d.ts +0 -387
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
<!-- markdownlint-disable MD024 -->
|
|
2
|
+
# Changelog
|
|
3
|
+
|
|
4
|
+
All notable changes to this project will be documented in this file.
|
|
5
|
+
|
|
6
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
7
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
8
|
+
|
|
9
|
+
## [1.0.0-rc.1] - 2025-12-20
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
|
|
13
|
+
- chore: tsdown configuration file
|
|
14
|
+
|
|
15
|
+
### Fixed
|
|
16
|
+
|
|
17
|
+
- fix: main/module/type entriess in package.json; add exports field
|
|
18
|
+
|
|
19
|
+
## [1.0.0-rc.0] - 2025-12-20
|
|
20
|
+
|
|
21
|
+
### Added
|
|
22
|
+
|
|
23
|
+
- Optional ingredient parsing via [parse-ingredient](https://github.com/jakeboone02/parse-ingredient)
|
|
24
|
+
- `parse()` and `safeParse()` methods for Zod schema validated recipe extraction
|
|
25
|
+
|
|
26
|
+
### Changed
|
|
27
|
+
|
|
28
|
+
- **BREAKING**: Renamed `toObject()` method to `toRecipeObject()` for clarity
|
|
29
|
+
- **BREAKING**: Ingredients and instructions now require grouped structures (each group has `name` and `items`) instead of flat arrays
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Pre-Release History
|
|
34
|
+
|
|
35
|
+
Prior to version 1.0.0-rc.0, this project was in alpha development. No formal changelog was maintained during the alpha phase.
|
|
36
|
+
|
|
37
|
+
[1.0.0-rc.0]: https://github.com/nerdstep/recipe-scrapers-js/releases/tag/v1.0.0-rc.0
|
package/README.md
CHANGED
|
@@ -4,18 +4,15 @@
|
|
|
4
4
|
[](https://github.com/nerdstep/recipe-scrapers-js/actions)
|
|
5
5
|
[](LICENSE)
|
|
6
6
|
|
|
7
|
-
> **⚠️ Alpha Version**
|
|
8
|
-
> This library is currently in **alpha**, APIs and behavior may change without notice. Use at your own risk.
|
|
9
|
-
|
|
10
7
|
A TypeScript/JavaScript library for scraping recipe data from various cooking websites. This is a JavaScript port inspired by the Python [recipe-scrapers](https://github.com/hhursev/recipe-scrapers) library.
|
|
11
8
|
|
|
12
9
|
## Features
|
|
13
10
|
|
|
14
|
-
-
|
|
15
|
-
-
|
|
16
|
-
-
|
|
17
|
-
-
|
|
18
|
-
-
|
|
11
|
+
- Extract structured recipe data from cooking websites
|
|
12
|
+
- Support for multiple popular recipe sites
|
|
13
|
+
- Built with TypeScript for better developer experience
|
|
14
|
+
- Fast and lightweight using the Bun runtime for development and testing
|
|
15
|
+
- Comprehensive test coverage
|
|
19
16
|
|
|
20
17
|
## Installation
|
|
21
18
|
|
|
@@ -45,9 +42,12 @@ const url = 'https://allrecipes.com/recipe/example'
|
|
|
45
42
|
// This function will throw if a scraper does not exist.
|
|
46
43
|
const MyScraper = getScraper(url)
|
|
47
44
|
const scraper = new MyScraper(html, url, /* { ...options } */)
|
|
48
|
-
const recipe = await scraper.toObject()
|
|
49
45
|
|
|
50
|
-
|
|
46
|
+
// Get the recipe data
|
|
47
|
+
const rawRecipe = await scraper.toRecipeObject()
|
|
48
|
+
|
|
49
|
+
// Get the schema validated recipe data
|
|
50
|
+
const validatedRecipe = await scraper.parse()
|
|
51
51
|
```
|
|
52
52
|
|
|
53
53
|
### Options
|
|
@@ -79,9 +79,18 @@ interface ScraperOptions {
|
|
|
79
79
|
/**
|
|
80
80
|
* Logging level for the scraper.
|
|
81
81
|
* This controls the verbosity of logs produced by the scraper.
|
|
82
|
-
* @default LogLevel.
|
|
82
|
+
* @default LogLevel.WARN
|
|
83
83
|
*/
|
|
84
84
|
logLevel?: LogLevel
|
|
85
|
+
/**
|
|
86
|
+
* Enable ingredient parsing using the parse-ingredient library.
|
|
87
|
+
* When enabled, each ingredient item will include a `parsed` field
|
|
88
|
+
* containing structured data (quantity, unit, description, etc.).
|
|
89
|
+
* Can be `true` for defaults or an options object.
|
|
90
|
+
* @see https://github.com/jakeboone02/parse-ingredient
|
|
91
|
+
* @default false
|
|
92
|
+
*/
|
|
93
|
+
parseIngredients?: boolean | ParseIngredientOptions
|
|
85
94
|
}
|
|
86
95
|
```
|
|
87
96
|
|
|
@@ -100,7 +109,7 @@ This library supports recipe extraction from various popular cooking websites. T
|
|
|
100
109
|
```bash
|
|
101
110
|
# Clone the repository
|
|
102
111
|
git clone https://github.com/nerdstep/recipe-scrapers-js.git
|
|
103
|
-
cd recipe-scrapers
|
|
112
|
+
cd recipe-scrapers-js
|
|
104
113
|
|
|
105
114
|
# Install dependencies
|
|
106
115
|
bun install
|
|
@@ -116,7 +125,7 @@ bun run build
|
|
|
116
125
|
|
|
117
126
|
- `bun run build` - Build the library for distribution
|
|
118
127
|
- `bun test` - Run the test suite
|
|
119
|
-
- `bun test:coverage` - Run tests with coverage report
|
|
128
|
+
- `bun test:coverage` - Run tests with a coverage report
|
|
120
129
|
- `bun fetch-test-data` - Fetch test data from the original Python repository
|
|
121
130
|
- `bun lint` - Run linting and type checking
|
|
122
131
|
- `bun lint:fix` - Fix linting issues automatically
|
|
@@ -155,11 +164,16 @@ export class NewSiteScraper extends AbstractScraper {
|
|
|
155
164
|
}
|
|
156
165
|
|
|
157
166
|
protected extractIngredients(): RecipeFields['ingredients'] {
|
|
158
|
-
const items = this.$('.ingredient')
|
|
159
|
-
this.$(el).text().trim()
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
return
|
|
167
|
+
const items = this.$('.ingredient')
|
|
168
|
+
.map((_, el) => this.$(el).text().trim())
|
|
169
|
+
.get()
|
|
170
|
+
|
|
171
|
+
return [
|
|
172
|
+
{
|
|
173
|
+
name: null,
|
|
174
|
+
items: items.map((value) => ({ value })),
|
|
175
|
+
},
|
|
176
|
+
]
|
|
163
177
|
}
|
|
164
178
|
|
|
165
179
|
// ... implement other extraction methods
|
|
@@ -198,6 +212,7 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
|
|
|
198
212
|
- [Schema.org Recipe specification](https://schema.org/Recipe)
|
|
199
213
|
- [Cheerio](https://cheerio.js.org/) for HTML parsing
|
|
200
214
|
- [Zod](https://zod.dev/) for schema validation
|
|
215
|
+
- [parse-ingredient](https://github.com/jakeboone02/parse-ingredient) for ingredient parsing
|
|
201
216
|
|
|
202
217
|
## Copyright and Usage
|
|
203
218
|
|