recipe-scrapers-js 0.1.0-alpha.4 → 0.1.0-alpha.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +1 -1
- package/package.json +19 -9
- package/src/__tests__/abstract-extractor-plugin.test.ts +0 -234
- package/src/__tests__/abstract-scraper.test.ts +0 -201
- package/src/__tests__/logger.test.ts +0 -318
- package/src/__tests__/plugin-manager.test.ts +0 -64
- package/src/__tests__/recipe-extractor.test.ts +0 -103
- package/src/__tests__/scraper-diagnostics.test.ts +0 -102
- package/src/__tests__/setup.ts +0 -1
- package/src/abstract-extractor-plugin.ts +0 -16
- package/src/abstract-plugin.ts +0 -11
- package/src/abstract-postprocessor-plugin.ts +0 -13
- package/src/abstract-scraper.ts +0 -222
- package/src/constants.ts +0 -19
- package/src/exceptions/__tests__/index.test.ts +0 -44
- package/src/exceptions/index.ts +0 -33
- package/src/index.ts +0 -24
- package/src/logger.ts +0 -45
- package/src/plugin-manager.ts +0 -33
- package/src/plugins/__tests__/html-stripper.processor.test.ts +0 -63
- package/src/plugins/__tests__/opengraph.extractor.test.ts +0 -106
- package/src/plugins/html-stripper.processor.ts +0 -80
- package/src/plugins/opengraph.extractor.ts +0 -61
- package/src/plugins/schema-org.extractor/__tests__/index.test.ts +0 -136
- package/src/plugins/schema-org.extractor/__tests__/type-predicates.test.ts +0 -116
- package/src/plugins/schema-org.extractor/index.ts +0 -622
- package/src/plugins/schema-org.extractor/schema-org.interface.ts +0 -25
- package/src/plugins/schema-org.extractor/type-predicates.ts +0 -79
- package/src/recipe-extractor.ts +0 -93
- package/src/scraper-diagnostics.ts +0 -87
- package/src/scrapers/__tests__/scrapers.test.ts +0 -94
- package/src/scrapers/_index.ts +0 -19
- package/src/scrapers/allrecipes.ts +0 -9
- package/src/scrapers/bbcgoodfood.ts +0 -43
- package/src/scrapers/epicurious.ts +0 -17
- package/src/scrapers/nytimes.ts +0 -43
- package/src/scrapers/seriouseats.ts +0 -9
- package/src/scrapers/simplyrecipes.ts +0 -37
- package/src/types/recipe.interface.ts +0 -247
- package/src/types/scraper.interface.ts +0 -34
- package/src/utils/__tests__/index.test.ts +0 -128
- package/src/utils/__tests__/ingredients.test.ts +0 -439
- package/src/utils/__tests__/instructions.test.ts +0 -44
- package/src/utils/__tests__/microdata.test.ts +0 -93
- package/src/utils/__tests__/parse-yields.test.ts +0 -30
- package/src/utils/__tests__/parsing.test.ts +0 -69
- package/src/utils/fractions.ts +0 -60
- package/src/utils/index.ts +0 -40
- package/src/utils/ingredients.ts +0 -212
- package/src/utils/instructions.ts +0 -45
- package/src/utils/microdata.ts +0 -162
- package/src/utils/parse-yields.ts +0 -103
- package/src/utils/parsing.ts +0 -43
package/dist/index.js
CHANGED
|
@@ -652,7 +652,7 @@ var SchemaOrgPlugin = class SchemaOrgPlugin extends ExtractorPlugin {
|
|
|
652
652
|
"title",
|
|
653
653
|
"@id"
|
|
654
654
|
]) {
|
|
655
|
-
let text
|
|
655
|
+
let text;
|
|
656
656
|
if (isString(value)) text = value;
|
|
657
657
|
else if (isNumber(value)) text = value.toString();
|
|
658
658
|
else if (Array.isArray(value)) text = this.getSchemaTextValue(value[0], props);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "recipe-scrapers-js",
|
|
3
|
-
"version": "0.1.0-alpha.
|
|
3
|
+
"version": "0.1.0-alpha.5",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"description": "A recipe scrapers library",
|
|
6
6
|
"author": {
|
|
@@ -12,11 +12,21 @@
|
|
|
12
12
|
"url": "git+https://github.com/nerdstep/recipe-scrapers-js.git"
|
|
13
13
|
},
|
|
14
14
|
"type": "module",
|
|
15
|
-
"module": "
|
|
15
|
+
"module": "dist/index.js",
|
|
16
16
|
"main": "dist/index.js",
|
|
17
17
|
"types": "dist/index.d.ts",
|
|
18
|
-
"files": [
|
|
19
|
-
|
|
18
|
+
"files": [
|
|
19
|
+
"dist",
|
|
20
|
+
"README.md",
|
|
21
|
+
"LICENSE"
|
|
22
|
+
],
|
|
23
|
+
"keywords": [
|
|
24
|
+
"recipe",
|
|
25
|
+
"scraper",
|
|
26
|
+
"parser",
|
|
27
|
+
"food",
|
|
28
|
+
"cooking"
|
|
29
|
+
],
|
|
20
30
|
"scripts": {
|
|
21
31
|
"build": "tsdown src/index.ts --outdir dist",
|
|
22
32
|
"test": "bun test",
|
|
@@ -29,17 +39,17 @@
|
|
|
29
39
|
"prepublishOnly": "bun run lint && bun run build"
|
|
30
40
|
},
|
|
31
41
|
"peerDependencies": {
|
|
32
|
-
"cheerio": "^1.
|
|
42
|
+
"cheerio": "^1.1.0"
|
|
33
43
|
},
|
|
34
44
|
"dependencies": {
|
|
35
45
|
"iso8601-duration": "^2.1.2",
|
|
36
46
|
"schema-dts": "^1.1.5"
|
|
37
47
|
},
|
|
38
48
|
"devDependencies": {
|
|
39
|
-
"@biomejs/biome": "^
|
|
40
|
-
"@types/bun": "^1.2.
|
|
41
|
-
"cheerio": "^1.
|
|
42
|
-
"tsdown": "^0.12.
|
|
49
|
+
"@biomejs/biome": "^2.0.6",
|
|
50
|
+
"@types/bun": "^1.2.17",
|
|
51
|
+
"cheerio": "^1.1.0",
|
|
52
|
+
"tsdown": "^0.12.9",
|
|
43
53
|
"typescript": "^5.8.3"
|
|
44
54
|
}
|
|
45
55
|
}
|
|
@@ -1,234 +0,0 @@
|
|
|
1
|
-
import { beforeEach, describe, expect, it } from 'bun:test'
|
|
2
|
-
import {
|
|
3
|
-
NotImplementedException,
|
|
4
|
-
UnsupportedFieldException,
|
|
5
|
-
} from '@/exceptions'
|
|
6
|
-
import { load } from 'cheerio'
|
|
7
|
-
import { ExtractorPlugin } from '../abstract-extractor-plugin'
|
|
8
|
-
import type { RecipeFields } from '../types/recipe.interface'
|
|
9
|
-
|
|
10
|
-
class MockExtractorPlugin extends ExtractorPlugin {
|
|
11
|
-
name = 'MockExtractorPlugin'
|
|
12
|
-
priority = 100
|
|
13
|
-
|
|
14
|
-
private supportedFields: Set<keyof RecipeFields>
|
|
15
|
-
|
|
16
|
-
constructor(supportedFields: (keyof RecipeFields)[] = []) {
|
|
17
|
-
const $ = load('<html><body></body></html>')
|
|
18
|
-
super($)
|
|
19
|
-
this.supportedFields = new Set(supportedFields)
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
supports(field: keyof RecipeFields): boolean {
|
|
23
|
-
return this.supportedFields.has(field)
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
extract<Key extends keyof RecipeFields>(field: Key): RecipeFields[Key] {
|
|
27
|
-
if (!this.supports(field)) {
|
|
28
|
-
throw new UnsupportedFieldException(field)
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
// Mock extraction logic
|
|
32
|
-
switch (field) {
|
|
33
|
-
case 'title':
|
|
34
|
-
return 'Mock Recipe Title' as RecipeFields[Key]
|
|
35
|
-
case 'description':
|
|
36
|
-
return 'Mock Recipe Description' as RecipeFields[Key]
|
|
37
|
-
case 'ingredients':
|
|
38
|
-
return new Set(['ingredient 1', 'ingredient 2']) as RecipeFields[Key]
|
|
39
|
-
case 'instructions':
|
|
40
|
-
return new Set(['step 1', 'step 2']) as RecipeFields[Key]
|
|
41
|
-
case 'prepTime':
|
|
42
|
-
return 15 as RecipeFields[Key]
|
|
43
|
-
case 'cookTime':
|
|
44
|
-
return 30 as RecipeFields[Key]
|
|
45
|
-
case 'totalTime':
|
|
46
|
-
return 45 as RecipeFields[Key]
|
|
47
|
-
case 'yields':
|
|
48
|
-
return '4 servings' as RecipeFields[Key]
|
|
49
|
-
default:
|
|
50
|
-
throw new NotImplementedException(field)
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
class AsyncMockExtractorPlugin extends ExtractorPlugin {
|
|
56
|
-
name = 'AsyncMockExtractorPlugin'
|
|
57
|
-
priority = 100
|
|
58
|
-
|
|
59
|
-
constructor() {
|
|
60
|
-
const $ = load('<html><body></body></html>')
|
|
61
|
-
super($)
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
supports(field: keyof RecipeFields): boolean {
|
|
65
|
-
return ['title', 'description'].includes(field)
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
async extract<Key extends keyof RecipeFields>(
|
|
69
|
-
field: Key,
|
|
70
|
-
): Promise<RecipeFields[Key]> {
|
|
71
|
-
await new Promise((resolve) => setTimeout(resolve, 10))
|
|
72
|
-
|
|
73
|
-
if (!this.supports(field)) {
|
|
74
|
-
throw new UnsupportedFieldException(field)
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
switch (field) {
|
|
78
|
-
case 'title':
|
|
79
|
-
return 'Async Recipe Title' as RecipeFields[Key]
|
|
80
|
-
case 'description':
|
|
81
|
-
return 'Async Recipe Description' as RecipeFields[Key]
|
|
82
|
-
default:
|
|
83
|
-
throw new NotImplementedException(field)
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
class ThrowingExtractorPlugin extends ExtractorPlugin {
|
|
89
|
-
name = 'ThrowingExtractorPlugin'
|
|
90
|
-
priority = 100
|
|
91
|
-
|
|
92
|
-
constructor() {
|
|
93
|
-
const $ = load('<html><body></body></html>')
|
|
94
|
-
super($)
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
supports(field: keyof RecipeFields): boolean {
|
|
98
|
-
return true
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
extract<Key extends keyof RecipeFields>(field: Key): RecipeFields[Key] {
|
|
102
|
-
throw new Error(`Extraction failed for field: ${String(field)}`)
|
|
103
|
-
}
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
describe('ExtractorPlugin', () => {
|
|
107
|
-
let plugin: MockExtractorPlugin
|
|
108
|
-
|
|
109
|
-
beforeEach(() => {
|
|
110
|
-
plugin = new MockExtractorPlugin([
|
|
111
|
-
'title',
|
|
112
|
-
'description',
|
|
113
|
-
'ingredients',
|
|
114
|
-
'prepTime',
|
|
115
|
-
])
|
|
116
|
-
})
|
|
117
|
-
|
|
118
|
-
describe('inheritance', () => {
|
|
119
|
-
it('should extend AbstractPlugin', () => {
|
|
120
|
-
expect(plugin).toBeInstanceOf(ExtractorPlugin)
|
|
121
|
-
})
|
|
122
|
-
|
|
123
|
-
it('should have access to cheerio instance from parent', () => {
|
|
124
|
-
expect(plugin.$).toBeDefined()
|
|
125
|
-
expect(typeof plugin.$).toBe('function')
|
|
126
|
-
})
|
|
127
|
-
})
|
|
128
|
-
|
|
129
|
-
describe('supports method', () => {
|
|
130
|
-
it('should return true for supported fields', () => {
|
|
131
|
-
expect(plugin.supports('title')).toBe(true)
|
|
132
|
-
expect(plugin.supports('description')).toBe(true)
|
|
133
|
-
expect(plugin.supports('ingredients')).toBe(true)
|
|
134
|
-
expect(plugin.supports('prepTime')).toBe(true)
|
|
135
|
-
})
|
|
136
|
-
|
|
137
|
-
it('should return false for unsupported fields', () => {
|
|
138
|
-
expect(plugin.supports('cookTime')).toBe(false)
|
|
139
|
-
expect(plugin.supports('totalTime')).toBe(false)
|
|
140
|
-
expect(plugin.supports('yields')).toBe(false)
|
|
141
|
-
expect(plugin.supports('author')).toBe(false)
|
|
142
|
-
})
|
|
143
|
-
|
|
144
|
-
it('should handle empty supported fields', () => {
|
|
145
|
-
const emptyPlugin = new MockExtractorPlugin([])
|
|
146
|
-
expect(emptyPlugin.supports('title')).toBe(false)
|
|
147
|
-
expect(emptyPlugin.supports('description')).toBe(false)
|
|
148
|
-
})
|
|
149
|
-
|
|
150
|
-
it('should handle all fields as supported', () => {
|
|
151
|
-
const allFieldsPlugin = new MockExtractorPlugin([
|
|
152
|
-
'title',
|
|
153
|
-
'description',
|
|
154
|
-
'ingredients',
|
|
155
|
-
'instructions',
|
|
156
|
-
'prepTime',
|
|
157
|
-
'cookTime',
|
|
158
|
-
'totalTime',
|
|
159
|
-
'yields',
|
|
160
|
-
])
|
|
161
|
-
|
|
162
|
-
expect(allFieldsPlugin.supports('title')).toBe(true)
|
|
163
|
-
expect(allFieldsPlugin.supports('cookTime')).toBe(true)
|
|
164
|
-
expect(allFieldsPlugin.supports('yields')).toBe(true)
|
|
165
|
-
})
|
|
166
|
-
})
|
|
167
|
-
|
|
168
|
-
describe('extract method', () => {
|
|
169
|
-
it('should extract supported fields', () => {
|
|
170
|
-
expect(plugin.extract('title')).toBe('Mock Recipe Title')
|
|
171
|
-
expect(plugin.extract('description')).toBe('Mock Recipe Description')
|
|
172
|
-
expect(plugin.extract('prepTime')).toBe(15)
|
|
173
|
-
expect(plugin.extract('ingredients')).toEqual(
|
|
174
|
-
new Set(['ingredient 1', 'ingredient 2']),
|
|
175
|
-
)
|
|
176
|
-
})
|
|
177
|
-
|
|
178
|
-
it('should throw error for unsupported fields', () => {
|
|
179
|
-
expect(() => plugin.extract('cookTime')).toThrow(
|
|
180
|
-
'Extraction not supported for field: cookTime',
|
|
181
|
-
)
|
|
182
|
-
expect(() => plugin.extract('totalTime')).toThrow(
|
|
183
|
-
'Extraction not supported for field: totalTime',
|
|
184
|
-
)
|
|
185
|
-
})
|
|
186
|
-
})
|
|
187
|
-
|
|
188
|
-
describe('async extraction', () => {
|
|
189
|
-
let asyncPlugin: AsyncMockExtractorPlugin
|
|
190
|
-
|
|
191
|
-
beforeEach(() => {
|
|
192
|
-
asyncPlugin = new AsyncMockExtractorPlugin()
|
|
193
|
-
})
|
|
194
|
-
|
|
195
|
-
it('should handle async extraction', async () => {
|
|
196
|
-
const title = await asyncPlugin.extract('title')
|
|
197
|
-
expect(title).toBe('Async Recipe Title')
|
|
198
|
-
const description = await asyncPlugin.extract('description')
|
|
199
|
-
expect(description).toBe('Async Recipe Description')
|
|
200
|
-
})
|
|
201
|
-
|
|
202
|
-
it('should throw error for unsupported fields in async mode', async () => {
|
|
203
|
-
await expect(asyncPlugin.extract('cookTime')).rejects.toThrow(
|
|
204
|
-
'Extraction not supported for field: cookTime',
|
|
205
|
-
)
|
|
206
|
-
})
|
|
207
|
-
})
|
|
208
|
-
|
|
209
|
-
describe('error handling', () => {
|
|
210
|
-
let throwingPlugin: ThrowingExtractorPlugin
|
|
211
|
-
|
|
212
|
-
beforeEach(() => {
|
|
213
|
-
throwingPlugin = new ThrowingExtractorPlugin()
|
|
214
|
-
})
|
|
215
|
-
|
|
216
|
-
it('should propagate extraction errors', () => {
|
|
217
|
-
expect(() => throwingPlugin.extract('title')).toThrow(
|
|
218
|
-
'Extraction failed for field: title',
|
|
219
|
-
)
|
|
220
|
-
expect(() => throwingPlugin.extract('description')).toThrow(
|
|
221
|
-
'Extraction failed for field: description',
|
|
222
|
-
)
|
|
223
|
-
})
|
|
224
|
-
})
|
|
225
|
-
|
|
226
|
-
describe('edge cases', () => {
|
|
227
|
-
it('should throw on undefined extractor', () => {
|
|
228
|
-
const plugin = new MockExtractorPlugin(['author'])
|
|
229
|
-
expect(() => plugin.extract('author')).toThrow(
|
|
230
|
-
'Method should be implemented: author',
|
|
231
|
-
)
|
|
232
|
-
})
|
|
233
|
-
})
|
|
234
|
-
})
|
|
@@ -1,201 +0,0 @@
|
|
|
1
|
-
import { afterEach, beforeEach, describe, expect, it, spyOn } from 'bun:test'
|
|
2
|
-
import { AbstractScraper } from '@/abstract-scraper'
|
|
3
|
-
import { NotImplementedException } from '@/exceptions'
|
|
4
|
-
import { Logger } from '@/logger'
|
|
5
|
-
import type { RecipeFields, RecipeObject } from '@/types/recipe.interface'
|
|
6
|
-
|
|
7
|
-
class DummyScraper extends AbstractScraper {
|
|
8
|
-
// implement required static host
|
|
9
|
-
static host(): string {
|
|
10
|
-
return 'dummy.com'
|
|
11
|
-
}
|
|
12
|
-
// no site-specific extractors
|
|
13
|
-
extractors = {}
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
describe('AbstractScraper utility methods', () => {
|
|
17
|
-
let scraper: DummyScraper
|
|
18
|
-
|
|
19
|
-
describe('static host()', () => {
|
|
20
|
-
it('throws by default on base class', () => {
|
|
21
|
-
expect(() => AbstractScraper.host()).toThrow(NotImplementedException)
|
|
22
|
-
})
|
|
23
|
-
|
|
24
|
-
it('returns host for subclass', () => {
|
|
25
|
-
expect(DummyScraper.host()).toBe('dummy.com')
|
|
26
|
-
})
|
|
27
|
-
})
|
|
28
|
-
|
|
29
|
-
describe('canonicalUrl()', () => {
|
|
30
|
-
it('returns absolute canonical URL when provided', () => {
|
|
31
|
-
const html = '<link rel="canonical" href="/foo/bar"/>'
|
|
32
|
-
scraper = new DummyScraper(html, 'http://example.com/page', {})
|
|
33
|
-
expect(scraper.canonicalUrl()).toBe('http://example.com/foo/bar')
|
|
34
|
-
})
|
|
35
|
-
|
|
36
|
-
it('returns base URL when no canonical link', () => {
|
|
37
|
-
const html = '<html></html>'
|
|
38
|
-
scraper = new DummyScraper(html, 'https://site.org/path?x=1', {})
|
|
39
|
-
expect(scraper.canonicalUrl()).toBe('https://site.org/path?x=1')
|
|
40
|
-
})
|
|
41
|
-
|
|
42
|
-
it('prefixes URL with https when missing protocol', () => {
|
|
43
|
-
const html = ''
|
|
44
|
-
scraper = new DummyScraper(html, 'site.org/abc', {})
|
|
45
|
-
expect(scraper.canonicalUrl()).toBe('https://site.org/abc')
|
|
46
|
-
})
|
|
47
|
-
})
|
|
48
|
-
|
|
49
|
-
describe('language()', () => {
|
|
50
|
-
let warnSpy: ReturnType<typeof spyOn>
|
|
51
|
-
|
|
52
|
-
beforeEach(() => {
|
|
53
|
-
warnSpy = spyOn(Logger.prototype, 'warn').mockImplementation(() => {})
|
|
54
|
-
})
|
|
55
|
-
afterEach(() => {
|
|
56
|
-
warnSpy.mockRestore()
|
|
57
|
-
})
|
|
58
|
-
|
|
59
|
-
it('reads html lang attribute', () => {
|
|
60
|
-
const html = '<html lang="fr"><body></body></html>'
|
|
61
|
-
scraper = new DummyScraper(html, 'url', {})
|
|
62
|
-
expect(scraper.language()).toBe('fr')
|
|
63
|
-
expect(warnSpy).not.toHaveBeenCalled()
|
|
64
|
-
})
|
|
65
|
-
|
|
66
|
-
it('falls back to meta http-equiv content-language', () => {
|
|
67
|
-
const html =
|
|
68
|
-
'<html><head>' +
|
|
69
|
-
'<meta http-equiv="content-language" content="de, en"/>' +
|
|
70
|
-
'</head></html>'
|
|
71
|
-
scraper = new DummyScraper(html, 'url', {})
|
|
72
|
-
expect(scraper.language()).toBe('de')
|
|
73
|
-
expect(warnSpy).not.toHaveBeenCalled()
|
|
74
|
-
})
|
|
75
|
-
|
|
76
|
-
it('defaults to "en" and logs warning when none found', () => {
|
|
77
|
-
scraper = new DummyScraper('<html></html>', 'url', {})
|
|
78
|
-
expect(scraper.language()).toBe('en')
|
|
79
|
-
expect(warnSpy).toHaveBeenCalledWith('Could not determine language')
|
|
80
|
-
})
|
|
81
|
-
})
|
|
82
|
-
|
|
83
|
-
describe('links()', () => {
|
|
84
|
-
const html = `
|
|
85
|
-
<a href="http://foo.com/page">Foo</a>
|
|
86
|
-
<a href="/local">Local</a>
|
|
87
|
-
<a>No href</a>
|
|
88
|
-
`
|
|
89
|
-
it('returns empty list when linksEnabled is false', () => {
|
|
90
|
-
scraper = new DummyScraper(html, 'url', { linksEnabled: false })
|
|
91
|
-
expect(scraper.links()).toEqual([])
|
|
92
|
-
})
|
|
93
|
-
|
|
94
|
-
it('returns only absolute links when linksEnabled is true', () => {
|
|
95
|
-
scraper = new DummyScraper(html, 'url', { linksEnabled: true })
|
|
96
|
-
const links = scraper.links()
|
|
97
|
-
expect(links).toEqual([{ href: 'http://foo.com/page', text: 'Foo' }])
|
|
98
|
-
})
|
|
99
|
-
})
|
|
100
|
-
})
|
|
101
|
-
|
|
102
|
-
// Test subclass overriding extract, canonicalUrl, language, links, and host
|
|
103
|
-
class TestScraper extends AbstractScraper {
|
|
104
|
-
static host(): string {
|
|
105
|
-
return 'hostVal'
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
// Provide no real HTML parsing
|
|
109
|
-
extractors = {}
|
|
110
|
-
private data: Partial<Record<keyof RecipeFields, unknown>>
|
|
111
|
-
constructor(data: Partial<Record<keyof RecipeFields, unknown>>) {
|
|
112
|
-
// html, url and options are unused because we override methods
|
|
113
|
-
super('', '', { linksEnabled: true })
|
|
114
|
-
this.data = data
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
// Return mocked values for every field
|
|
118
|
-
async extract<Key extends keyof RecipeFields>(
|
|
119
|
-
field: Key,
|
|
120
|
-
): Promise<RecipeFields[Key]> {
|
|
121
|
-
return this.data[field] as RecipeFields[Key]
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
override canonicalUrl(): string {
|
|
125
|
-
return this.data.canonicalUrl as string
|
|
126
|
-
}
|
|
127
|
-
override language(): string {
|
|
128
|
-
return this.data.language as string
|
|
129
|
-
}
|
|
130
|
-
override links(): RecipeFields['links'] {
|
|
131
|
-
return this.data.links as RecipeFields['links']
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
describe('AbstractScraper.toObject', () => {
|
|
136
|
-
it('returns a fully serialized RecipeObject', async () => {
|
|
137
|
-
// Prepare mock values
|
|
138
|
-
const mockValues: Partial<Record<keyof RecipeFields, unknown>> = {
|
|
139
|
-
siteName: 'site',
|
|
140
|
-
author: 'auth',
|
|
141
|
-
title: 'ttl',
|
|
142
|
-
image: 'img',
|
|
143
|
-
description: 'desc',
|
|
144
|
-
yields: '4 servings',
|
|
145
|
-
totalTime: 30,
|
|
146
|
-
cookTime: 10,
|
|
147
|
-
prepTime: 20,
|
|
148
|
-
cookingMethod: 'bake',
|
|
149
|
-
ratings: 4.2,
|
|
150
|
-
ratingsCount: 100,
|
|
151
|
-
category: new Set(['cat1', 'cat2']),
|
|
152
|
-
cuisine: new Set(['cui']),
|
|
153
|
-
dietaryRestrictions: new Set(['veg']),
|
|
154
|
-
equipment: new Set(['pan']),
|
|
155
|
-
ingredients: new Set(['ing1', 'ing2']),
|
|
156
|
-
instructions: new Set(['step1', 'step2']),
|
|
157
|
-
keywords: new Set(['kw1']),
|
|
158
|
-
nutrients: new Map([['cal', '200kcal']]),
|
|
159
|
-
reviews: new Map([['rev1', 'Good']]),
|
|
160
|
-
canonicalUrl: 'http://can.url',
|
|
161
|
-
language: 'en-US',
|
|
162
|
-
links: [{ href: 'http://link', text: 'LinkText' }],
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
const scraper = new TestScraper(mockValues)
|
|
166
|
-
const result = await scraper.toObject()
|
|
167
|
-
|
|
168
|
-
// Basic scalar fields
|
|
169
|
-
const expectedRest = {
|
|
170
|
-
host: 'hostVal',
|
|
171
|
-
siteName: 'site',
|
|
172
|
-
author: 'auth',
|
|
173
|
-
title: 'ttl',
|
|
174
|
-
image: 'img',
|
|
175
|
-
canonicalUrl: 'http://can.url',
|
|
176
|
-
language: 'en-US',
|
|
177
|
-
links: [{ href: 'http://link', text: 'LinkText' }],
|
|
178
|
-
description: 'desc',
|
|
179
|
-
yields: '4 servings',
|
|
180
|
-
totalTime: 30,
|
|
181
|
-
cookTime: 10,
|
|
182
|
-
prepTime: 20,
|
|
183
|
-
cookingMethod: 'bake',
|
|
184
|
-
ratings: 4.2,
|
|
185
|
-
ratingsCount: 100,
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
expect(result).toEqual({
|
|
189
|
-
...expectedRest,
|
|
190
|
-
category: ['cat1', 'cat2'],
|
|
191
|
-
cuisine: ['cui'],
|
|
192
|
-
dietaryRestrictions: ['veg'],
|
|
193
|
-
equipment: ['pan'],
|
|
194
|
-
ingredients: ['ing1', 'ing2'],
|
|
195
|
-
instructions: ['step1', 'step2'],
|
|
196
|
-
keywords: ['kw1'],
|
|
197
|
-
nutrients: { cal: '200kcal' },
|
|
198
|
-
reviews: { rev1: 'Good' },
|
|
199
|
-
} as RecipeObject)
|
|
200
|
-
})
|
|
201
|
-
})
|