recipe-scrapers-js 0.1.0-alpha.4 → 0.1.0-alpha.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/dist/index.js +1 -1
  2. package/package.json +19 -9
  3. package/src/__tests__/abstract-extractor-plugin.test.ts +0 -234
  4. package/src/__tests__/abstract-scraper.test.ts +0 -201
  5. package/src/__tests__/logger.test.ts +0 -318
  6. package/src/__tests__/plugin-manager.test.ts +0 -64
  7. package/src/__tests__/recipe-extractor.test.ts +0 -103
  8. package/src/__tests__/scraper-diagnostics.test.ts +0 -102
  9. package/src/__tests__/setup.ts +0 -1
  10. package/src/abstract-extractor-plugin.ts +0 -16
  11. package/src/abstract-plugin.ts +0 -11
  12. package/src/abstract-postprocessor-plugin.ts +0 -13
  13. package/src/abstract-scraper.ts +0 -222
  14. package/src/constants.ts +0 -19
  15. package/src/exceptions/__tests__/index.test.ts +0 -44
  16. package/src/exceptions/index.ts +0 -33
  17. package/src/index.ts +0 -24
  18. package/src/logger.ts +0 -45
  19. package/src/plugin-manager.ts +0 -33
  20. package/src/plugins/__tests__/html-stripper.processor.test.ts +0 -63
  21. package/src/plugins/__tests__/opengraph.extractor.test.ts +0 -106
  22. package/src/plugins/html-stripper.processor.ts +0 -80
  23. package/src/plugins/opengraph.extractor.ts +0 -61
  24. package/src/plugins/schema-org.extractor/__tests__/index.test.ts +0 -136
  25. package/src/plugins/schema-org.extractor/__tests__/type-predicates.test.ts +0 -116
  26. package/src/plugins/schema-org.extractor/index.ts +0 -622
  27. package/src/plugins/schema-org.extractor/schema-org.interface.ts +0 -25
  28. package/src/plugins/schema-org.extractor/type-predicates.ts +0 -79
  29. package/src/recipe-extractor.ts +0 -93
  30. package/src/scraper-diagnostics.ts +0 -87
  31. package/src/scrapers/__tests__/scrapers.test.ts +0 -94
  32. package/src/scrapers/_index.ts +0 -19
  33. package/src/scrapers/allrecipes.ts +0 -9
  34. package/src/scrapers/bbcgoodfood.ts +0 -43
  35. package/src/scrapers/epicurious.ts +0 -17
  36. package/src/scrapers/nytimes.ts +0 -43
  37. package/src/scrapers/seriouseats.ts +0 -9
  38. package/src/scrapers/simplyrecipes.ts +0 -37
  39. package/src/types/recipe.interface.ts +0 -247
  40. package/src/types/scraper.interface.ts +0 -34
  41. package/src/utils/__tests__/index.test.ts +0 -128
  42. package/src/utils/__tests__/ingredients.test.ts +0 -439
  43. package/src/utils/__tests__/instructions.test.ts +0 -44
  44. package/src/utils/__tests__/microdata.test.ts +0 -93
  45. package/src/utils/__tests__/parse-yields.test.ts +0 -30
  46. package/src/utils/__tests__/parsing.test.ts +0 -69
  47. package/src/utils/fractions.ts +0 -60
  48. package/src/utils/index.ts +0 -40
  49. package/src/utils/ingredients.ts +0 -212
  50. package/src/utils/instructions.ts +0 -45
  51. package/src/utils/microdata.ts +0 -162
  52. package/src/utils/parse-yields.ts +0 -103
  53. package/src/utils/parsing.ts +0 -43
package/dist/index.js CHANGED
@@ -652,7 +652,7 @@ var SchemaOrgPlugin = class SchemaOrgPlugin extends ExtractorPlugin {
652
652
  "title",
653
653
  "@id"
654
654
  ]) {
655
- let text = void 0;
655
+ let text;
656
656
  if (isString(value)) text = value;
657
657
  else if (isNumber(value)) text = value.toString();
658
658
  else if (Array.isArray(value)) text = this.getSchemaTextValue(value[0], props);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "recipe-scrapers-js",
3
- "version": "0.1.0-alpha.4",
3
+ "version": "0.1.0-alpha.5",
4
4
  "license": "MIT",
5
5
  "description": "A recipe scrapers library",
6
6
  "author": {
@@ -12,11 +12,21 @@
12
12
  "url": "git+https://github.com/nerdstep/recipe-scrapers-js.git"
13
13
  },
14
14
  "type": "module",
15
- "module": "src/index.ts",
15
+ "module": "dist/index.js",
16
16
  "main": "dist/index.js",
17
17
  "types": "dist/index.d.ts",
18
- "files": ["dist", "src", "README.md", "LICENSE"],
19
- "keywords": ["recipe", "scraper", "parser", "food", "cooking"],
18
+ "files": [
19
+ "dist",
20
+ "README.md",
21
+ "LICENSE"
22
+ ],
23
+ "keywords": [
24
+ "recipe",
25
+ "scraper",
26
+ "parser",
27
+ "food",
28
+ "cooking"
29
+ ],
20
30
  "scripts": {
21
31
  "build": "tsdown src/index.ts --outdir dist",
22
32
  "test": "bun test",
@@ -29,17 +39,17 @@
29
39
  "prepublishOnly": "bun run lint && bun run build"
30
40
  },
31
41
  "peerDependencies": {
32
- "cheerio": "^1.0.0"
42
+ "cheerio": "^1.1.0"
33
43
  },
34
44
  "dependencies": {
35
45
  "iso8601-duration": "^2.1.2",
36
46
  "schema-dts": "^1.1.5"
37
47
  },
38
48
  "devDependencies": {
39
- "@biomejs/biome": "^1.9.4",
40
- "@types/bun": "^1.2.15",
41
- "cheerio": "^1.0.0",
42
- "tsdown": "^0.12.7",
49
+ "@biomejs/biome": "^2.0.6",
50
+ "@types/bun": "^1.2.17",
51
+ "cheerio": "^1.1.0",
52
+ "tsdown": "^0.12.9",
43
53
  "typescript": "^5.8.3"
44
54
  }
45
55
  }
@@ -1,234 +0,0 @@
1
- import { beforeEach, describe, expect, it } from 'bun:test'
2
- import {
3
- NotImplementedException,
4
- UnsupportedFieldException,
5
- } from '@/exceptions'
6
- import { load } from 'cheerio'
7
- import { ExtractorPlugin } from '../abstract-extractor-plugin'
8
- import type { RecipeFields } from '../types/recipe.interface'
9
-
10
- class MockExtractorPlugin extends ExtractorPlugin {
11
- name = 'MockExtractorPlugin'
12
- priority = 100
13
-
14
- private supportedFields: Set<keyof RecipeFields>
15
-
16
- constructor(supportedFields: (keyof RecipeFields)[] = []) {
17
- const $ = load('<html><body></body></html>')
18
- super($)
19
- this.supportedFields = new Set(supportedFields)
20
- }
21
-
22
- supports(field: keyof RecipeFields): boolean {
23
- return this.supportedFields.has(field)
24
- }
25
-
26
- extract<Key extends keyof RecipeFields>(field: Key): RecipeFields[Key] {
27
- if (!this.supports(field)) {
28
- throw new UnsupportedFieldException(field)
29
- }
30
-
31
- // Mock extraction logic
32
- switch (field) {
33
- case 'title':
34
- return 'Mock Recipe Title' as RecipeFields[Key]
35
- case 'description':
36
- return 'Mock Recipe Description' as RecipeFields[Key]
37
- case 'ingredients':
38
- return new Set(['ingredient 1', 'ingredient 2']) as RecipeFields[Key]
39
- case 'instructions':
40
- return new Set(['step 1', 'step 2']) as RecipeFields[Key]
41
- case 'prepTime':
42
- return 15 as RecipeFields[Key]
43
- case 'cookTime':
44
- return 30 as RecipeFields[Key]
45
- case 'totalTime':
46
- return 45 as RecipeFields[Key]
47
- case 'yields':
48
- return '4 servings' as RecipeFields[Key]
49
- default:
50
- throw new NotImplementedException(field)
51
- }
52
- }
53
- }
54
-
55
- class AsyncMockExtractorPlugin extends ExtractorPlugin {
56
- name = 'AsyncMockExtractorPlugin'
57
- priority = 100
58
-
59
- constructor() {
60
- const $ = load('<html><body></body></html>')
61
- super($)
62
- }
63
-
64
- supports(field: keyof RecipeFields): boolean {
65
- return ['title', 'description'].includes(field)
66
- }
67
-
68
- async extract<Key extends keyof RecipeFields>(
69
- field: Key,
70
- ): Promise<RecipeFields[Key]> {
71
- await new Promise((resolve) => setTimeout(resolve, 10))
72
-
73
- if (!this.supports(field)) {
74
- throw new UnsupportedFieldException(field)
75
- }
76
-
77
- switch (field) {
78
- case 'title':
79
- return 'Async Recipe Title' as RecipeFields[Key]
80
- case 'description':
81
- return 'Async Recipe Description' as RecipeFields[Key]
82
- default:
83
- throw new NotImplementedException(field)
84
- }
85
- }
86
- }
87
-
88
- class ThrowingExtractorPlugin extends ExtractorPlugin {
89
- name = 'ThrowingExtractorPlugin'
90
- priority = 100
91
-
92
- constructor() {
93
- const $ = load('<html><body></body></html>')
94
- super($)
95
- }
96
-
97
- supports(field: keyof RecipeFields): boolean {
98
- return true
99
- }
100
-
101
- extract<Key extends keyof RecipeFields>(field: Key): RecipeFields[Key] {
102
- throw new Error(`Extraction failed for field: ${String(field)}`)
103
- }
104
- }
105
-
106
- describe('ExtractorPlugin', () => {
107
- let plugin: MockExtractorPlugin
108
-
109
- beforeEach(() => {
110
- plugin = new MockExtractorPlugin([
111
- 'title',
112
- 'description',
113
- 'ingredients',
114
- 'prepTime',
115
- ])
116
- })
117
-
118
- describe('inheritance', () => {
119
- it('should extend AbstractPlugin', () => {
120
- expect(plugin).toBeInstanceOf(ExtractorPlugin)
121
- })
122
-
123
- it('should have access to cheerio instance from parent', () => {
124
- expect(plugin.$).toBeDefined()
125
- expect(typeof plugin.$).toBe('function')
126
- })
127
- })
128
-
129
- describe('supports method', () => {
130
- it('should return true for supported fields', () => {
131
- expect(plugin.supports('title')).toBe(true)
132
- expect(plugin.supports('description')).toBe(true)
133
- expect(plugin.supports('ingredients')).toBe(true)
134
- expect(plugin.supports('prepTime')).toBe(true)
135
- })
136
-
137
- it('should return false for unsupported fields', () => {
138
- expect(plugin.supports('cookTime')).toBe(false)
139
- expect(plugin.supports('totalTime')).toBe(false)
140
- expect(plugin.supports('yields')).toBe(false)
141
- expect(plugin.supports('author')).toBe(false)
142
- })
143
-
144
- it('should handle empty supported fields', () => {
145
- const emptyPlugin = new MockExtractorPlugin([])
146
- expect(emptyPlugin.supports('title')).toBe(false)
147
- expect(emptyPlugin.supports('description')).toBe(false)
148
- })
149
-
150
- it('should handle all fields as supported', () => {
151
- const allFieldsPlugin = new MockExtractorPlugin([
152
- 'title',
153
- 'description',
154
- 'ingredients',
155
- 'instructions',
156
- 'prepTime',
157
- 'cookTime',
158
- 'totalTime',
159
- 'yields',
160
- ])
161
-
162
- expect(allFieldsPlugin.supports('title')).toBe(true)
163
- expect(allFieldsPlugin.supports('cookTime')).toBe(true)
164
- expect(allFieldsPlugin.supports('yields')).toBe(true)
165
- })
166
- })
167
-
168
- describe('extract method', () => {
169
- it('should extract supported fields', () => {
170
- expect(plugin.extract('title')).toBe('Mock Recipe Title')
171
- expect(plugin.extract('description')).toBe('Mock Recipe Description')
172
- expect(plugin.extract('prepTime')).toBe(15)
173
- expect(plugin.extract('ingredients')).toEqual(
174
- new Set(['ingredient 1', 'ingredient 2']),
175
- )
176
- })
177
-
178
- it('should throw error for unsupported fields', () => {
179
- expect(() => plugin.extract('cookTime')).toThrow(
180
- 'Extraction not supported for field: cookTime',
181
- )
182
- expect(() => plugin.extract('totalTime')).toThrow(
183
- 'Extraction not supported for field: totalTime',
184
- )
185
- })
186
- })
187
-
188
- describe('async extraction', () => {
189
- let asyncPlugin: AsyncMockExtractorPlugin
190
-
191
- beforeEach(() => {
192
- asyncPlugin = new AsyncMockExtractorPlugin()
193
- })
194
-
195
- it('should handle async extraction', async () => {
196
- const title = await asyncPlugin.extract('title')
197
- expect(title).toBe('Async Recipe Title')
198
- const description = await asyncPlugin.extract('description')
199
- expect(description).toBe('Async Recipe Description')
200
- })
201
-
202
- it('should throw error for unsupported fields in async mode', async () => {
203
- await expect(asyncPlugin.extract('cookTime')).rejects.toThrow(
204
- 'Extraction not supported for field: cookTime',
205
- )
206
- })
207
- })
208
-
209
- describe('error handling', () => {
210
- let throwingPlugin: ThrowingExtractorPlugin
211
-
212
- beforeEach(() => {
213
- throwingPlugin = new ThrowingExtractorPlugin()
214
- })
215
-
216
- it('should propagate extraction errors', () => {
217
- expect(() => throwingPlugin.extract('title')).toThrow(
218
- 'Extraction failed for field: title',
219
- )
220
- expect(() => throwingPlugin.extract('description')).toThrow(
221
- 'Extraction failed for field: description',
222
- )
223
- })
224
- })
225
-
226
- describe('edge cases', () => {
227
- it('should throw on undefined extractor', () => {
228
- const plugin = new MockExtractorPlugin(['author'])
229
- expect(() => plugin.extract('author')).toThrow(
230
- 'Method should be implemented: author',
231
- )
232
- })
233
- })
234
- })
@@ -1,201 +0,0 @@
1
- import { afterEach, beforeEach, describe, expect, it, spyOn } from 'bun:test'
2
- import { AbstractScraper } from '@/abstract-scraper'
3
- import { NotImplementedException } from '@/exceptions'
4
- import { Logger } from '@/logger'
5
- import type { RecipeFields, RecipeObject } from '@/types/recipe.interface'
6
-
7
- class DummyScraper extends AbstractScraper {
8
- // implement required static host
9
- static host(): string {
10
- return 'dummy.com'
11
- }
12
- // no site-specific extractors
13
- extractors = {}
14
- }
15
-
16
- describe('AbstractScraper utility methods', () => {
17
- let scraper: DummyScraper
18
-
19
- describe('static host()', () => {
20
- it('throws by default on base class', () => {
21
- expect(() => AbstractScraper.host()).toThrow(NotImplementedException)
22
- })
23
-
24
- it('returns host for subclass', () => {
25
- expect(DummyScraper.host()).toBe('dummy.com')
26
- })
27
- })
28
-
29
- describe('canonicalUrl()', () => {
30
- it('returns absolute canonical URL when provided', () => {
31
- const html = '<link rel="canonical" href="/foo/bar"/>'
32
- scraper = new DummyScraper(html, 'http://example.com/page', {})
33
- expect(scraper.canonicalUrl()).toBe('http://example.com/foo/bar')
34
- })
35
-
36
- it('returns base URL when no canonical link', () => {
37
- const html = '<html></html>'
38
- scraper = new DummyScraper(html, 'https://site.org/path?x=1', {})
39
- expect(scraper.canonicalUrl()).toBe('https://site.org/path?x=1')
40
- })
41
-
42
- it('prefixes URL with https when missing protocol', () => {
43
- const html = ''
44
- scraper = new DummyScraper(html, 'site.org/abc', {})
45
- expect(scraper.canonicalUrl()).toBe('https://site.org/abc')
46
- })
47
- })
48
-
49
- describe('language()', () => {
50
- let warnSpy: ReturnType<typeof spyOn>
51
-
52
- beforeEach(() => {
53
- warnSpy = spyOn(Logger.prototype, 'warn').mockImplementation(() => {})
54
- })
55
- afterEach(() => {
56
- warnSpy.mockRestore()
57
- })
58
-
59
- it('reads html lang attribute', () => {
60
- const html = '<html lang="fr"><body></body></html>'
61
- scraper = new DummyScraper(html, 'url', {})
62
- expect(scraper.language()).toBe('fr')
63
- expect(warnSpy).not.toHaveBeenCalled()
64
- })
65
-
66
- it('falls back to meta http-equiv content-language', () => {
67
- const html =
68
- '<html><head>' +
69
- '<meta http-equiv="content-language" content="de, en"/>' +
70
- '</head></html>'
71
- scraper = new DummyScraper(html, 'url', {})
72
- expect(scraper.language()).toBe('de')
73
- expect(warnSpy).not.toHaveBeenCalled()
74
- })
75
-
76
- it('defaults to "en" and logs warning when none found', () => {
77
- scraper = new DummyScraper('<html></html>', 'url', {})
78
- expect(scraper.language()).toBe('en')
79
- expect(warnSpy).toHaveBeenCalledWith('Could not determine language')
80
- })
81
- })
82
-
83
- describe('links()', () => {
84
- const html = `
85
- <a href="http://foo.com/page">Foo</a>
86
- <a href="/local">Local</a>
87
- <a>No href</a>
88
- `
89
- it('returns empty list when linksEnabled is false', () => {
90
- scraper = new DummyScraper(html, 'url', { linksEnabled: false })
91
- expect(scraper.links()).toEqual([])
92
- })
93
-
94
- it('returns only absolute links when linksEnabled is true', () => {
95
- scraper = new DummyScraper(html, 'url', { linksEnabled: true })
96
- const links = scraper.links()
97
- expect(links).toEqual([{ href: 'http://foo.com/page', text: 'Foo' }])
98
- })
99
- })
100
- })
101
-
102
- // Test subclass overriding extract, canonicalUrl, language, links, and host
103
- class TestScraper extends AbstractScraper {
104
- static host(): string {
105
- return 'hostVal'
106
- }
107
-
108
- // Provide no real HTML parsing
109
- extractors = {}
110
- private data: Partial<Record<keyof RecipeFields, unknown>>
111
- constructor(data: Partial<Record<keyof RecipeFields, unknown>>) {
112
- // html, url and options are unused because we override methods
113
- super('', '', { linksEnabled: true })
114
- this.data = data
115
- }
116
-
117
- // Return mocked values for every field
118
- async extract<Key extends keyof RecipeFields>(
119
- field: Key,
120
- ): Promise<RecipeFields[Key]> {
121
- return this.data[field] as RecipeFields[Key]
122
- }
123
-
124
- override canonicalUrl(): string {
125
- return this.data.canonicalUrl as string
126
- }
127
- override language(): string {
128
- return this.data.language as string
129
- }
130
- override links(): RecipeFields['links'] {
131
- return this.data.links as RecipeFields['links']
132
- }
133
- }
134
-
135
- describe('AbstractScraper.toObject', () => {
136
- it('returns a fully serialized RecipeObject', async () => {
137
- // Prepare mock values
138
- const mockValues: Partial<Record<keyof RecipeFields, unknown>> = {
139
- siteName: 'site',
140
- author: 'auth',
141
- title: 'ttl',
142
- image: 'img',
143
- description: 'desc',
144
- yields: '4 servings',
145
- totalTime: 30,
146
- cookTime: 10,
147
- prepTime: 20,
148
- cookingMethod: 'bake',
149
- ratings: 4.2,
150
- ratingsCount: 100,
151
- category: new Set(['cat1', 'cat2']),
152
- cuisine: new Set(['cui']),
153
- dietaryRestrictions: new Set(['veg']),
154
- equipment: new Set(['pan']),
155
- ingredients: new Set(['ing1', 'ing2']),
156
- instructions: new Set(['step1', 'step2']),
157
- keywords: new Set(['kw1']),
158
- nutrients: new Map([['cal', '200kcal']]),
159
- reviews: new Map([['rev1', 'Good']]),
160
- canonicalUrl: 'http://can.url',
161
- language: 'en-US',
162
- links: [{ href: 'http://link', text: 'LinkText' }],
163
- }
164
-
165
- const scraper = new TestScraper(mockValues)
166
- const result = await scraper.toObject()
167
-
168
- // Basic scalar fields
169
- const expectedRest = {
170
- host: 'hostVal',
171
- siteName: 'site',
172
- author: 'auth',
173
- title: 'ttl',
174
- image: 'img',
175
- canonicalUrl: 'http://can.url',
176
- language: 'en-US',
177
- links: [{ href: 'http://link', text: 'LinkText' }],
178
- description: 'desc',
179
- yields: '4 servings',
180
- totalTime: 30,
181
- cookTime: 10,
182
- prepTime: 20,
183
- cookingMethod: 'bake',
184
- ratings: 4.2,
185
- ratingsCount: 100,
186
- }
187
-
188
- expect(result).toEqual({
189
- ...expectedRest,
190
- category: ['cat1', 'cat2'],
191
- cuisine: ['cui'],
192
- dietaryRestrictions: ['veg'],
193
- equipment: ['pan'],
194
- ingredients: ['ing1', 'ing2'],
195
- instructions: ['step1', 'step2'],
196
- keywords: ['kw1'],
197
- nutrients: { cal: '200kcal' },
198
- reviews: { rev1: 'Good' },
199
- } as RecipeObject)
200
- })
201
- })