portapack 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +67 -8
- package/.releaserc.js +25 -27
- package/CHANGELOG.md +14 -22
- package/LICENSE.md +21 -0
- package/README.md +22 -53
- package/commitlint.config.js +30 -34
- package/dist/cli/cli-entry.cjs +183 -98
- package/dist/cli/cli-entry.cjs.map +1 -1
- package/dist/index.d.ts +0 -3
- package/dist/index.js +178 -97
- package/dist/index.js.map +1 -1
- package/docs/.vitepress/config.ts +38 -33
- package/docs/.vitepress/sidebar-generator.ts +89 -38
- package/docs/architecture.md +186 -0
- package/docs/cli.md +23 -23
- package/docs/code-of-conduct.md +7 -1
- package/docs/configuration.md +12 -11
- package/docs/contributing.md +6 -2
- package/docs/deployment.md +10 -5
- package/docs/development.md +8 -5
- package/docs/getting-started.md +13 -13
- package/docs/index.md +1 -1
- package/docs/public/android-chrome-192x192.png +0 -0
- package/docs/public/android-chrome-512x512.png +0 -0
- package/docs/public/apple-touch-icon.png +0 -0
- package/docs/public/favicon-16x16.png +0 -0
- package/docs/public/favicon-32x32.png +0 -0
- package/docs/public/favicon.ico +0 -0
- package/docs/roadmap.md +233 -0
- package/docs/site.webmanifest +1 -0
- package/docs/troubleshooting.md +12 -1
- package/examples/main.ts +5 -30
- package/examples/sample-project/script.js +1 -1
- package/jest.config.ts +8 -13
- package/nodemon.json +5 -10
- package/package.json +2 -5
- package/src/cli/cli-entry.ts +2 -2
- package/src/cli/cli.ts +21 -16
- package/src/cli/options.ts +127 -113
- package/src/core/bundler.ts +253 -222
- package/src/core/extractor.ts +632 -565
- package/src/core/minifier.ts +173 -162
- package/src/core/packer.ts +141 -137
- package/src/core/parser.ts +74 -73
- package/src/core/web-fetcher.ts +270 -258
- package/src/index.ts +18 -17
- package/src/types.ts +9 -11
- package/src/utils/font.ts +12 -6
- package/src/utils/logger.ts +110 -105
- package/src/utils/meta.ts +75 -76
- package/src/utils/mime.ts +50 -50
- package/src/utils/slugify.ts +33 -34
- package/tests/unit/cli/cli-entry.test.ts +72 -70
- package/tests/unit/cli/cli.test.ts +314 -278
- package/tests/unit/cli/options.test.ts +294 -301
- package/tests/unit/core/bundler.test.ts +426 -329
- package/tests/unit/core/extractor.test.ts +793 -549
- package/tests/unit/core/minifier.test.ts +374 -274
- package/tests/unit/core/packer.test.ts +298 -264
- package/tests/unit/core/parser.test.ts +538 -150
- package/tests/unit/core/web-fetcher.test.ts +389 -359
- package/tests/unit/index.test.ts +238 -197
- package/tests/unit/utils/font.test.ts +26 -21
- package/tests/unit/utils/logger.test.ts +267 -260
- package/tests/unit/utils/meta.test.ts +29 -28
- package/tests/unit/utils/mime.test.ts +73 -74
- package/tests/unit/utils/slugify.test.ts +14 -12
- package/tsconfig.build.json +9 -10
- package/tsconfig.jest.json +1 -1
- package/tsconfig.json +2 -2
- package/tsup.config.ts +8 -9
- package/typedoc.json +5 -9
- /package/docs/{portapack-transparent.png → public/portapack-transparent.png} +0 -0
- /package/docs/{portapack.jpg → public/portapack.jpg} +0 -0
@@ -2,170 +2,558 @@
|
|
2
2
|
* @file parser.test.ts
|
3
3
|
* @description Unit tests for parseHTML.
|
4
4
|
*/
|
5
|
-
import type { ParsedHTML, Asset } from '../../../src/types';
|
5
|
+
import type { ParsedHTML, Asset } from '../../../src/types'; // No .js needed
|
6
6
|
import { jest, describe, it, beforeEach, expect, afterEach } from '@jest/globals';
|
7
|
-
import { Logger } from '../../../src/utils/logger';
|
8
|
-
import { LogLevel } from '../../../src/types';
|
7
|
+
import { Logger } from '../../../src/utils/logger'; // No .js needed
|
8
|
+
import { LogLevel } from '../../../src/types'; // No .js needed
|
9
|
+
|
10
|
+
// Import types for mocking fs/promises
|
9
11
|
import type { PathLike } from 'fs';
|
10
12
|
import type { FileHandle } from 'fs/promises';
|
11
13
|
import type { OpenMode } from 'node:fs';
|
12
14
|
|
13
|
-
|
14
15
|
// --- Mock Setup ---
|
15
|
-
|
16
|
+
// Define mock function first
|
17
|
+
const mockReadFileFn = jest.fn<
|
18
|
+
(
|
16
19
|
path: PathLike | FileHandle,
|
17
|
-
options?:
|
18
|
-
|
20
|
+
options?:
|
21
|
+
| {
|
22
|
+
encoding: BufferEncoding | null;
|
23
|
+
flag?: OpenMode | undefined;
|
24
|
+
signal?: AbortSignal | undefined;
|
25
|
+
}
|
26
|
+
| BufferEncoding
|
27
|
+
| null
|
28
|
+
) => Promise<string | Buffer>
|
29
|
+
>(); // Match fs/promises signature
|
19
30
|
|
31
|
+
// Mock the 'fs/promises' module *before* importing parser.ts
|
20
32
|
jest.mock('fs/promises', () => ({
|
21
|
-
|
22
|
-
|
33
|
+
__esModule: true, // Keep for safety
|
34
|
+
readFile: mockReadFileFn,
|
35
|
+
// Add other fs/promises functions if parser.ts uses them
|
23
36
|
}));
|
24
|
-
// --- End Mock Setup ---
|
25
37
|
|
26
38
|
// --- Import Module Under Test ---
|
27
|
-
|
28
|
-
|
39
|
+
// Use standard import AFTER mocks
|
40
|
+
import { parseHTML } from '../../../src/core/parser'; // No .js needed
|
29
41
|
|
30
42
|
// --- Test Suite ---
|
31
43
|
describe('🧠 HTML Parser - parseHTML()', () => {
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
44
|
+
let logger: Logger;
|
45
|
+
let loggerDebugSpy: jest.SpiedFunction<typeof logger.debug>;
|
46
|
+
let loggerInfoSpy: jest.SpiedFunction<typeof logger.info>;
|
47
|
+
let loggerErrorSpy: jest.SpiedFunction<typeof logger.error>;
|
48
|
+
|
49
|
+
/** Helper function to check assets flexibly */
|
50
|
+
const expectAssetsToContain = (actualAssets: Asset[], expectedAssets: Partial<Asset>[]) => {
|
51
|
+
expect(actualAssets).toHaveLength(expectedAssets.length);
|
52
|
+
const actualUrls = new Set(actualAssets.map(a => a.url));
|
53
|
+
expectedAssets.forEach(expected => {
|
54
|
+
expect(actualUrls).toContain(expected.url);
|
55
|
+
expect(actualAssets).toContainEqual(expect.objectContaining(expected));
|
56
|
+
});
|
57
|
+
};
|
58
|
+
|
59
|
+
// Define mock paths used in tests
|
60
|
+
const mockHtmlPath = 'mock.html';
|
61
|
+
const emptyHtmlPath = 'empty.html';
|
62
|
+
const assetsHtmlPath = 'assets.html';
|
63
|
+
const brokenHtmlPath = 'broken.html';
|
64
|
+
const srcsetHtmlPath = 'srcset.html';
|
65
|
+
const styleInlineHtmlPath = 'style-inline.html';
|
66
|
+
const specialcharsHtmlPath = 'specialchars.html';
|
67
|
+
const dedupeHtmlPath = 'dedupe.html';
|
68
|
+
const typesHtmlPath = 'types.html';
|
69
|
+
const emptySrcHtmlPath = 'empty-src.html';
|
70
|
+
const trickySrcsetHtmlPath = 'tricky-srcset.html';
|
71
|
+
const dataUriHtmlPath = 'datauri.html';
|
72
|
+
const unreadablePath = 'unreadable.html';
|
73
|
+
|
74
|
+
beforeEach(() => {
|
75
|
+
jest.clearAllMocks(); // Clear mocks before each test
|
76
|
+
|
77
|
+
// Default mock implementation for readFile
|
78
|
+
mockReadFileFn.mockResolvedValue('');
|
79
|
+
|
80
|
+
logger = new Logger(LogLevel.DEBUG); // Use DEBUG to capture all logs
|
81
|
+
loggerDebugSpy = jest.spyOn(logger, 'debug');
|
82
|
+
loggerInfoSpy = jest.spyOn(logger, 'info');
|
83
|
+
loggerErrorSpy = jest.spyOn(logger, 'error');
|
84
|
+
});
|
85
|
+
|
86
|
+
// Removed afterEach with jest.restoreAllMocks() as clearAllMocks + setup in beforeEach is often sufficient
|
87
|
+
|
88
|
+
describe('📄 File Reading', () => {
|
89
|
+
it('✅ reads the specified file with utf-8 encoding', async () => {
|
90
|
+
const htmlContent = '<html><head></head><body>Test</body></html>';
|
91
|
+
// Configure mock for this test
|
92
|
+
mockReadFileFn.mockResolvedValueOnce(htmlContent);
|
93
|
+
|
94
|
+
const result = await parseHTML(mockHtmlPath, logger);
|
95
|
+
|
96
|
+
expect(mockReadFileFn).toHaveBeenCalledTimes(1);
|
97
|
+
// fs/promises.readFile often gets called with just path and options object or encoding string
|
98
|
+
expect(mockReadFileFn).toHaveBeenCalledWith(mockHtmlPath, 'utf-8');
|
99
|
+
expect(result.htmlContent).toBe(htmlContent);
|
100
|
+
expect(loggerDebugSpy).toHaveBeenCalledWith(`Parsing HTML file: ${mockHtmlPath}`);
|
101
|
+
expect(loggerDebugSpy).toHaveBeenCalledWith(
|
102
|
+
expect.stringContaining(
|
103
|
+
`Successfully read HTML file (${Buffer.byteLength(htmlContent)} bytes).`
|
104
|
+
)
|
105
|
+
);
|
106
|
+
});
|
107
|
+
|
108
|
+
it('✅ handles empty HTML files gracefully', async () => {
|
109
|
+
// Default mock is already empty string
|
110
|
+
const result = await parseHTML(emptyHtmlPath, logger);
|
111
|
+
|
112
|
+
expect(mockReadFileFn).toHaveBeenCalledWith(emptyHtmlPath, 'utf-8');
|
113
|
+
expect(result.htmlContent).toBe('');
|
114
|
+
expect(result.assets).toEqual([]);
|
115
|
+
expect(loggerInfoSpy).toHaveBeenCalledWith(
|
116
|
+
'HTML parsing complete. Discovered 0 unique asset links.'
|
117
|
+
);
|
118
|
+
expect(loggerDebugSpy).toHaveBeenCalledWith(
|
119
|
+
expect.stringContaining(`Successfully read HTML file (0 bytes)`)
|
120
|
+
);
|
121
|
+
});
|
122
|
+
|
123
|
+
it('❌ throws a wrapped error if reading the file fails', async () => {
|
124
|
+
const readError = new Error('Permission denied');
|
125
|
+
(readError as any).code = 'EACCES'; // Add code for realism
|
126
|
+
// Configure mock rejection for this test
|
127
|
+
mockReadFileFn.mockRejectedValueOnce(readError);
|
128
|
+
|
129
|
+
await expect(parseHTML(unreadablePath, logger)).rejects.toThrow(
|
130
|
+
expect.objectContaining({
|
131
|
+
message: `Could not read input HTML file: ${unreadablePath}`,
|
132
|
+
cause: readError,
|
133
|
+
})
|
134
|
+
);
|
135
|
+
expect(mockReadFileFn).toHaveBeenCalledWith(unreadablePath, 'utf-8');
|
136
|
+
expect(loggerErrorSpy).toHaveBeenCalledWith(
|
137
|
+
`Failed to read HTML file "${unreadablePath}": ${readError.message}`
|
138
|
+
);
|
139
|
+
expect(loggerDebugSpy).not.toHaveBeenCalledWith(
|
140
|
+
expect.stringContaining('Successfully read HTML file')
|
141
|
+
);
|
142
|
+
expect(loggerInfoSpy).not.toHaveBeenCalledWith(
|
143
|
+
expect.stringContaining('HTML parsing complete')
|
144
|
+
);
|
145
|
+
});
|
146
|
+
});
|
147
|
+
|
148
|
+
describe('📦 Asset Discovery', () => {
|
149
|
+
it('✅ extracts basic <link rel="stylesheet">', async () => {
|
150
|
+
const html = `<link rel="stylesheet" href="style.css">`;
|
151
|
+
mockReadFileFn.mockResolvedValueOnce(html);
|
152
|
+
const result = await parseHTML(mockHtmlPath, logger);
|
153
|
+
expectAssetsToContain(result.assets, [{ type: 'css', url: 'style.css' }]);
|
154
|
+
expect(loggerDebugSpy).toHaveBeenCalledWith("Discovered asset: Type='css', URL='style.css'");
|
155
|
+
});
|
156
|
+
|
157
|
+
it('✅ extracts basic <script src="">', async () => {
|
158
|
+
const html = `<script src="app.js"></script>`;
|
159
|
+
mockReadFileFn.mockResolvedValueOnce(html);
|
160
|
+
const result = await parseHTML(mockHtmlPath, logger);
|
161
|
+
expectAssetsToContain(result.assets, [{ type: 'js', url: 'app.js' }]);
|
162
|
+
expect(loggerDebugSpy).toHaveBeenCalledWith("Discovered asset: Type='js', URL='app.js'");
|
163
|
+
});
|
164
|
+
|
165
|
+
it('✅ extracts basic <img src="">', async () => {
|
166
|
+
const html = `<img src="logo.png">`;
|
167
|
+
mockReadFileFn.mockResolvedValueOnce(html);
|
168
|
+
const result = await parseHTML(mockHtmlPath, logger);
|
169
|
+
expectAssetsToContain(result.assets, [{ type: 'image', url: 'logo.png' }]);
|
170
|
+
expect(loggerDebugSpy).toHaveBeenCalledWith("Discovered asset: Type='image', URL='logo.png'");
|
171
|
+
});
|
172
|
+
|
173
|
+
it('✅ extracts basic <input type="image" src="">', async () => {
|
174
|
+
const html = `<input type="image" src="button.gif">`;
|
175
|
+
mockReadFileFn.mockResolvedValueOnce(html);
|
176
|
+
const result = await parseHTML(mockHtmlPath, logger);
|
177
|
+
expectAssetsToContain(result.assets, [{ type: 'image', url: 'button.gif' }]);
|
178
|
+
expect(loggerDebugSpy).toHaveBeenCalledWith(
|
179
|
+
"Discovered asset: Type='image', URL='button.gif'"
|
180
|
+
);
|
181
|
+
});
|
182
|
+
|
183
|
+
it('✅ extracts basic <video src="">', async () => {
|
184
|
+
const html = `<video src="movie.mp4"></video>`;
|
185
|
+
mockReadFileFn.mockResolvedValueOnce(html);
|
186
|
+
const result = await parseHTML(mockHtmlPath, logger);
|
187
|
+
expectAssetsToContain(result.assets, [{ type: 'video', url: 'movie.mp4' }]);
|
188
|
+
expect(loggerDebugSpy).toHaveBeenCalledWith(
|
189
|
+
"Discovered asset: Type='video', URL='movie.mp4'"
|
190
|
+
);
|
191
|
+
});
|
192
|
+
|
193
|
+
it('✅ extracts basic <video poster="">', async () => {
|
194
|
+
const html = `<video poster="preview.jpg"></video>`;
|
195
|
+
mockReadFileFn.mockResolvedValueOnce(html);
|
196
|
+
const result = await parseHTML(mockHtmlPath, logger);
|
197
|
+
expectAssetsToContain(result.assets, [{ type: 'image', url: 'preview.jpg' }]);
|
198
|
+
expect(loggerDebugSpy).toHaveBeenCalledWith(
|
199
|
+
"Discovered asset: Type='image', URL='preview.jpg'"
|
200
|
+
);
|
201
|
+
});
|
202
|
+
|
203
|
+
it('✅ extracts basic <audio src="">', async () => {
|
204
|
+
const html = `<audio src="track.mp3"></audio>`;
|
205
|
+
mockReadFileFn.mockResolvedValueOnce(html);
|
206
|
+
const result = await parseHTML(mockHtmlPath, logger);
|
207
|
+
expectAssetsToContain(result.assets, [{ type: 'audio', url: 'track.mp3' }]);
|
208
|
+
expect(loggerDebugSpy).toHaveBeenCalledWith(
|
209
|
+
"Discovered asset: Type='audio', URL='track.mp3'"
|
210
|
+
);
|
211
|
+
});
|
212
|
+
|
213
|
+
it('✅ extracts <source src=""> within <video>', async () => {
|
214
|
+
const html = `<video><source src="movie.webm" type="video/webm"><source src="movie.mp4" type="video/mp4"></video>`;
|
215
|
+
mockReadFileFn.mockResolvedValueOnce(html);
|
216
|
+
const result = await parseHTML(mockHtmlPath, logger);
|
217
|
+
expectAssetsToContain(result.assets, [
|
218
|
+
{ type: 'video', url: 'movie.webm' },
|
219
|
+
{ type: 'video', url: 'movie.mp4' },
|
220
|
+
]);
|
221
|
+
expect(loggerDebugSpy).toHaveBeenCalledWith(
|
222
|
+
"Discovered asset: Type='video', URL='movie.webm'"
|
223
|
+
);
|
224
|
+
expect(loggerDebugSpy).toHaveBeenCalledWith(
|
225
|
+
"Discovered asset: Type='video', URL='movie.mp4'"
|
226
|
+
);
|
227
|
+
});
|
228
|
+
|
229
|
+
it('✅ extracts <source src=""> within <audio>', async () => {
|
230
|
+
const html = `<audio><source src="sound.ogg" type="audio/ogg"><source src="sound.mp3" type="audio/mpeg"></audio>`;
|
231
|
+
mockReadFileFn.mockResolvedValueOnce(html);
|
232
|
+
const result = await parseHTML(mockHtmlPath, logger);
|
233
|
+
expectAssetsToContain(result.assets, [
|
234
|
+
{ type: 'audio', url: 'sound.ogg' },
|
235
|
+
{ type: 'audio', url: 'sound.mp3' },
|
236
|
+
]);
|
237
|
+
expect(loggerDebugSpy).toHaveBeenCalledWith(
|
238
|
+
"Discovered asset: Type='audio', URL='sound.ogg'"
|
239
|
+
);
|
240
|
+
expect(loggerDebugSpy).toHaveBeenCalledWith(
|
241
|
+
"Discovered asset: Type='audio', URL='sound.mp3'"
|
242
|
+
);
|
243
|
+
});
|
244
|
+
|
245
|
+
it('✅ extracts various icons <link rel="icon/shortcut icon/apple-touch-icon">', async () => {
|
246
|
+
const html = `
|
106
247
|
<link rel="icon" href="favicon.ico">
|
107
|
-
<link rel="
|
108
|
-
<link rel="
|
109
|
-
<link rel="alternate" href="feed.xml" type="application/rss+xml"> <img src="unknown_ext_img">
|
110
|
-
<video src="movie.mkv"></video>
|
111
|
-
<audio src="music.flac"></audio>
|
112
|
-
`;
|
113
|
-
mockReadFileFn.mockResolvedValueOnce(htmlContent);
|
114
|
-
const result = await parseHTML(typesHtmlPath, logger);
|
115
|
-
|
116
|
-
// FIX: Remove feed.xml from expected list as parser doesn't handle rel="alternate"
|
117
|
-
const expected: Partial<Asset>[] = [
|
118
|
-
{ type: 'image', url: 'favicon.ico' },
|
119
|
-
{ type: 'font', url: 'font.woff2' },
|
120
|
-
{ type: 'other', url: 'app.webmanifest' },
|
121
|
-
// { type: 'other', url: 'feed.xml'}, // REMOVED
|
122
|
-
{ type: 'image', url: 'unknown_ext_img' },
|
123
|
-
{ type: 'video', url: 'movie.mkv' },
|
124
|
-
{ type: 'audio', url: 'music.flac' },
|
125
|
-
];
|
126
|
-
// The length check inside expectAssetsToContain will now expect 6
|
127
|
-
expectAssetsToContain(result.assets, expected);
|
128
|
-
});
|
129
|
-
// ----- END FAILING TEST 1 FIX -----
|
130
|
-
|
131
|
-
});
|
132
|
-
|
133
|
-
describe('🧪 Edge Cases & Robustness', () => {
|
134
|
-
// ... (passing tests remain the same) ...
|
135
|
-
it('✅ ignores data URIs', async () => { /* ... */ });
|
136
|
-
it('✅ ignores empty or missing src/href/srcset attributes', async () => { /* ... */ });
|
137
|
-
it('✅ handles tricky srcset values with extra spaces/commas', async () => { /* ... */ });
|
138
|
-
it('✅ supports malformed or partial tags (best effort by cheerio)', async () => { /* ... adjusted expectation previously ... */ });
|
139
|
-
it('✅ handles inline <style> and <script> tags without extracting them as assets', async () => { /* ... */ });
|
140
|
-
|
141
|
-
|
142
|
-
// ----- FAILING TEST 2 (FIXED) -----
|
143
|
-
it('✅ handles URLs with spaces, queries, and special chars preserving encoding', async () => {
|
144
|
-
const specialUrlEncoded = 'image%20with%20spaces.png?query=1&special=%C3%A4%C3%B6%C3%BC#hash'; // äöü
|
145
|
-
const scriptUrl = '/path/to/script.js?v=1.2.3';
|
146
|
-
const cssUrl = 'style.css#id-selector';
|
147
|
-
const mockSpecialCharsHtml = `
|
148
|
-
<img src="${specialUrlEncoded}">
|
149
|
-
<script src="${scriptUrl}"></script>
|
150
|
-
<link rel="stylesheet" href="${cssUrl}">
|
248
|
+
<link rel="shortcut icon" type="image/png" href="/fav/icon-16.png">
|
249
|
+
<link rel="apple-touch-icon" sizes="180x180" href="apple-icon.png">
|
151
250
|
`;
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
251
|
+
mockReadFileFn.mockResolvedValueOnce(html);
|
252
|
+
const result = await parseHTML(mockHtmlPath, logger);
|
253
|
+
expectAssetsToContain(result.assets, [
|
254
|
+
{ type: 'image', url: 'favicon.ico' },
|
255
|
+
{ type: 'image', url: '/fav/icon-16.png' },
|
256
|
+
{ type: 'image', url: 'apple-icon.png' },
|
257
|
+
]);
|
258
|
+
});
|
259
|
+
|
260
|
+
it('✅ extracts <link rel="manifest">', async () => {
|
261
|
+
const html = `<link rel="manifest" href="manifest.json">`;
|
262
|
+
mockReadFileFn.mockResolvedValueOnce(html);
|
263
|
+
const result = await parseHTML(mockHtmlPath, logger);
|
264
|
+
expectAssetsToContain(result.assets, [{ type: 'other', url: 'manifest.json' }]); // Type depends on guessMimeType
|
265
|
+
expect(loggerDebugSpy).toHaveBeenCalledWith(
|
266
|
+
"Discovered asset: Type='other', URL='manifest.json'"
|
267
|
+
);
|
268
|
+
});
|
269
|
+
|
270
|
+
it('✅ extracts <link rel="preload" as="font">', async () => {
|
271
|
+
const html = `<link rel="preload" href="font.woff2" as="font" type="font/woff2" crossorigin>`;
|
272
|
+
mockReadFileFn.mockResolvedValueOnce(html);
|
273
|
+
const result = await parseHTML(mockHtmlPath, logger);
|
274
|
+
expectAssetsToContain(result.assets, [{ type: 'font', url: 'font.woff2' }]);
|
275
|
+
expect(loggerDebugSpy).toHaveBeenCalledWith(
|
276
|
+
"Discovered asset: Type='font', URL='font.woff2'"
|
277
|
+
);
|
278
|
+
});
|
279
|
+
|
280
|
+
it('✅ extracts assets from <img srcset="">', async () => {
|
281
|
+
const html = `<img src="fallback.jpg" srcset="img-320w.jpg 320w, img-640w.jpg 640w, img-1200w.jpg 1200w">`;
|
282
|
+
mockReadFileFn.mockResolvedValueOnce(html);
|
283
|
+
const result = await parseHTML(srcsetHtmlPath, logger);
|
284
|
+
expectAssetsToContain(result.assets, [
|
285
|
+
{ type: 'image', url: 'fallback.jpg' },
|
286
|
+
{ type: 'image', url: 'img-320w.jpg' },
|
287
|
+
{ type: 'image', url: 'img-640w.jpg' },
|
288
|
+
{ type: 'image', url: 'img-1200w.jpg' },
|
289
|
+
]);
|
290
|
+
});
|
291
|
+
|
292
|
+
it('✅ extracts assets from <source srcset=""> within <picture>', async () => {
|
293
|
+
const html = `
|
294
|
+
<picture>
|
295
|
+
<source srcset="logo-wide.png 600w, logo-extrawide.png 1000w" media="(min-width: 600px)">
|
296
|
+
<source srcset="logo-square.png">
|
297
|
+
<img src="logo-fallback.png" alt="Logo">
|
298
|
+
</picture>`;
|
299
|
+
mockReadFileFn.mockResolvedValueOnce(html);
|
300
|
+
const result = await parseHTML(srcsetHtmlPath, logger);
|
301
|
+
expectAssetsToContain(result.assets, [
|
302
|
+
{ type: 'image', url: 'logo-wide.png' },
|
303
|
+
{ type: 'image', url: 'logo-extrawide.png' },
|
304
|
+
{ type: 'image', url: 'logo-square.png' },
|
305
|
+
{ type: 'image', url: 'logo-fallback.png' },
|
306
|
+
]);
|
307
|
+
});
|
308
|
+
|
309
|
+
it('✅ extracts a mix of different asset types', async () => {
|
310
|
+
const mockAssetsHtml = `
|
311
|
+
<html><head>
|
312
|
+
<link rel="stylesheet" href="css/main.css">
|
313
|
+
<link rel="icon" href="favicon.ico">
|
314
|
+
<script src="js/vendor.js" defer></script>
|
315
|
+
</head><body>
|
316
|
+
<h1>Title</h1>
|
317
|
+
<img src="images/header.png">
|
318
|
+
<video poster="vid/preview.jpg">
|
319
|
+
<source src="vid/intro.mp4" type="video/mp4">
|
320
|
+
</video>
|
321
|
+
<audio controls src="audio/theme.mp3"></audio>
|
322
|
+
<input type="image" src="/img/submit.gif"/>
|
323
|
+
<script src="js/app.js"></script>
|
324
|
+
</body></html>`;
|
325
|
+
mockReadFileFn.mockResolvedValueOnce(mockAssetsHtml);
|
326
|
+
const result = await parseHTML(assetsHtmlPath, logger);
|
327
|
+
|
328
|
+
const expected: Partial<Asset>[] = [
|
329
|
+
{ type: 'css', url: 'css/main.css' },
|
330
|
+
{ type: 'image', url: 'favicon.ico' },
|
331
|
+
{ type: 'js', url: 'js/vendor.js' },
|
332
|
+
{ type: 'image', url: 'images/header.png' },
|
333
|
+
{ type: 'image', url: 'vid/preview.jpg' }, // video poster
|
334
|
+
{ type: 'video', url: 'vid/intro.mp4' }, // video source
|
335
|
+
{ type: 'audio', url: 'audio/theme.mp3' }, // audio src
|
336
|
+
{ type: 'image', url: '/img/submit.gif' }, // input image
|
337
|
+
{ type: 'js', url: 'js/app.js' },
|
338
|
+
];
|
339
|
+
expectAssetsToContain(result.assets, expected);
|
340
|
+
expect(loggerInfoSpy).toHaveBeenCalledWith(
|
341
|
+
'HTML parsing complete. Discovered 9 unique asset links.'
|
342
|
+
);
|
343
|
+
});
|
344
|
+
|
345
|
+
it('✅ deduplicates identical asset URLs', async () => {
|
346
|
+
const htmlContent = `
|
347
|
+
<link rel="stylesheet" href="style.css">
|
348
|
+
<link rel="stylesheet" href="style.css"> <script src="app.js"></script>
|
349
|
+
<script src="app.js"></script> <img src="logo.png">
|
350
|
+
<img src="logo.png"> `;
|
351
|
+
mockReadFileFn.mockResolvedValueOnce(htmlContent);
|
352
|
+
const result = await parseHTML(dedupeHtmlPath, logger);
|
353
|
+
|
354
|
+
const expected: Partial<Asset>[] = [
|
355
|
+
{ type: 'css', url: 'style.css' },
|
356
|
+
{ type: 'js', url: 'app.js' },
|
357
|
+
{ type: 'image', url: 'logo.png' },
|
358
|
+
];
|
359
|
+
expectAssetsToContain(result.assets, expected);
|
360
|
+
expect(loggerInfoSpy).toHaveBeenCalledWith(
|
361
|
+
'HTML parsing complete. Discovered 3 unique asset links.'
|
362
|
+
);
|
363
|
+
expect(loggerDebugSpy).toHaveBeenCalledWith('Skipping duplicate asset URL: style.css');
|
364
|
+
expect(loggerDebugSpy).toHaveBeenCalledWith('Skipping duplicate asset URL: app.js');
|
365
|
+
expect(loggerDebugSpy).toHaveBeenCalledWith('Skipping duplicate asset URL: logo.png');
|
366
|
+
});
|
367
|
+
|
368
|
+
it('✅ categorizes asset types correctly (incl. guessing via extension)', async () => {
|
369
|
+
const htmlContent = `
|
370
|
+
<link rel="icon" href="favicon.ico">
|
371
|
+
<link rel="preload" href="font.woff2" as="font">
|
372
|
+
<link rel="manifest" href="app.webmanifest">
|
373
|
+
<link rel="alternate" href="feed.xml" type="application/rss+xml"> <img src="unknown_ext_img">
|
374
|
+
<video src="movie.mkv"></video>
|
375
|
+
<audio src="music.flac"></audio>
|
376
|
+
`;
|
377
|
+
mockReadFileFn.mockResolvedValueOnce(htmlContent);
|
378
|
+
const result = await parseHTML(typesHtmlPath, logger);
|
379
|
+
|
380
|
+
// FIX: Remove feed.xml from expected list as parser doesn't handle rel="alternate"
|
381
|
+
const expected: Partial<Asset>[] = [
|
382
|
+
{ type: 'image', url: 'favicon.ico' },
|
383
|
+
{ type: 'font', url: 'font.woff2' },
|
384
|
+
{ type: 'other', url: 'app.webmanifest' },
|
385
|
+
// { type: 'other', url: 'feed.xml'}, // REMOVED
|
386
|
+
{ type: 'image', url: 'unknown_ext_img' },
|
387
|
+
{ type: 'video', url: 'movie.mkv' },
|
388
|
+
{ type: 'audio', url: 'music.flac' },
|
389
|
+
];
|
390
|
+
// The length check inside expectAssetsToContain will now expect 6
|
391
|
+
expectAssetsToContain(result.assets, expected);
|
392
|
+
});
|
393
|
+
});
|
394
|
+
|
395
|
+
describe('🧪 Edge Cases & Robustness', () => {
|
396
|
+
it('✅ ignores data URIs', async () => {
|
397
|
+
const html = `
|
398
|
+
<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA">
|
399
|
+
<link rel="stylesheet" href="data:text/css;base64,Ym9keSB7IGJg==">
|
400
|
+
<script src="data:application/javascript;base64,YWxlcnQoJ2hpJyk7"></script>
|
401
|
+
<img src="actual_image.jpg"> `;
|
402
|
+
mockReadFileFn.mockResolvedValueOnce(html);
|
403
|
+
const result = await parseHTML(dataUriHtmlPath, logger);
|
404
|
+
expectAssetsToContain(result.assets, [{ type: 'image', url: 'actual_image.jpg' }]);
|
405
|
+
expect(loggerInfoSpy).toHaveBeenCalledWith(
|
406
|
+
'HTML parsing complete. Discovered 1 unique asset links.'
|
407
|
+
);
|
408
|
+
});
|
409
|
+
|
410
|
+
it('✅ ignores empty or missing src/href/srcset attributes', async () => {
|
411
|
+
const html = `
|
412
|
+
<link rel="stylesheet" href="">
|
413
|
+
<link rel="stylesheet">
|
414
|
+
<script src></script>
|
415
|
+
<script src=" "></script> <img src="">
|
416
|
+
<img>
|
417
|
+
<video src="">
|
418
|
+
<video poster="">
|
419
|
+
<audio src="">
|
420
|
+
<input type="image" src="">
|
421
|
+
<source src="">
|
422
|
+
<img srcset=" ,, ">
|
423
|
+
<img srcset=" ">
|
424
|
+
<source srcset="">
|
425
|
+
<link rel="icon" href="">
|
426
|
+
<link rel="manifest" href=" ">
|
427
|
+
<link rel="preload" as="font" href="">
|
428
|
+
<script src="real.js"></script> `;
|
429
|
+
mockReadFileFn.mockResolvedValueOnce(html);
|
430
|
+
const result = await parseHTML(emptySrcHtmlPath, logger);
|
431
|
+
expectAssetsToContain(result.assets, [{ type: 'js', url: 'real.js' }]);
|
432
|
+
expect(loggerInfoSpy).toHaveBeenCalledWith(
|
433
|
+
'HTML parsing complete. Discovered 1 unique asset links.'
|
434
|
+
);
|
435
|
+
});
|
436
|
+
|
437
|
+
it('✅ handles tricky srcset values with extra spaces/commas', async () => {
|
438
|
+
const html = `<img srcset=" ,, img1.png 1x ,, img2.png 2x, ,, img3.png 3x ,, , img4.png 4x">`;
|
439
|
+
mockReadFileFn.mockResolvedValueOnce(html);
|
440
|
+
const result = await parseHTML(trickySrcsetHtmlPath, logger);
|
441
|
+
const expected: Partial<Asset>[] = [
|
442
|
+
{ type: 'image', url: 'img1.png' },
|
443
|
+
{ type: 'image', url: 'img2.png' },
|
444
|
+
{ type: 'image', url: 'img3.png' },
|
445
|
+
{ type: 'image', url: 'img4.png' },
|
446
|
+
];
|
447
|
+
expectAssetsToContain(result.assets, expected);
|
448
|
+
expect(loggerInfoSpy).toHaveBeenCalledWith(
|
449
|
+
'HTML parsing complete. Discovered 4 unique asset links.'
|
450
|
+
);
|
451
|
+
});
|
452
|
+
|
453
|
+
it('✅ supports malformed or partial tags (best effort by cheerio)', async () => {
|
454
|
+
const mockBrokenHtml = `
|
455
|
+
<html><head>
|
456
|
+
<link rel="stylesheet" href="style.css" <script src="app.js </script> <img src="logo.png" alt="Logo" <p>This shouldn't be here</p> </img>
|
457
|
+
</head>
|
458
|
+
<body> Content </body></html>
|
459
|
+
`;
|
460
|
+
mockReadFileFn.mockResolvedValueOnce(mockBrokenHtml);
|
461
|
+
const result = await parseHTML(brokenHtmlPath, logger);
|
462
|
+
|
463
|
+
// Cheerio likely recovers the link tag correctly. Script/img might be broken.
|
464
|
+
const expected: Partial<Asset>[] = [
|
465
|
+
{ type: 'css', url: 'style.css' },
|
466
|
+
// { type: 'js', url: 'app.js' }, // Might not parse due to invalid structure
|
467
|
+
// { type: 'image', url: 'logo.png' }, // Might not parse
|
468
|
+
];
|
469
|
+
// Adjust expectation based on Cheerio's likely recovery
|
470
|
+
expectAssetsToContain(result.assets, expected);
|
471
|
+
expect(loggerInfoSpy).toHaveBeenCalledWith(
|
472
|
+
'HTML parsing complete. Discovered 1 unique asset links.'
|
473
|
+
); // Expect 1
|
474
|
+
});
|
475
|
+
|
476
|
+
it('✅ handles inline <style> and <script> tags without extracting them as assets', async () => {
|
477
|
+
const mockInlineHtml = `
|
478
|
+
<html><head>
|
479
|
+
<style> body { color: red; } </style>
|
480
|
+
<link rel="stylesheet" href="external.css">
|
481
|
+
</head><body>
|
482
|
+
<script> console.log('inline'); </script>
|
483
|
+
<script src="external.js"></script>
|
484
|
+
</body></html>
|
485
|
+
`;
|
486
|
+
mockReadFileFn.mockResolvedValueOnce(mockInlineHtml);
|
487
|
+
const result = await parseHTML(styleInlineHtmlPath, logger);
|
488
|
+
const expected: Partial<Asset>[] = [
|
489
|
+
{ type: 'css', url: 'external.css' },
|
490
|
+
{ type: 'js', url: 'external.js' },
|
491
|
+
];
|
492
|
+
expectAssetsToContain(result.assets, expected);
|
493
|
+
expect(result.htmlContent).toContain('<style> body { color: red; } </style>');
|
494
|
+
expect(result.htmlContent).toContain("<script> console.log('inline'); </script>");
|
495
|
+
expect(loggerInfoSpy).toHaveBeenCalledWith(
|
496
|
+
'HTML parsing complete. Discovered 2 unique asset links.'
|
497
|
+
);
|
498
|
+
});
|
499
|
+
|
500
|
+
it('✅ handles URLs with spaces, queries, and special chars preserving encoding', async () => {
|
501
|
+
const specialUrlEncoded = 'image%20with%20spaces.png?query=1&special=%C3%A4%C3%B6%C3%BC#hash'; // äöü
|
502
|
+
const scriptUrl = '/path/to/script.js?v=1.2.3';
|
503
|
+
const cssUrl = 'style.css#id-selector';
|
504
|
+
const mockSpecialCharsHtml = `
|
505
|
+
<img src="${specialUrlEncoded}">
|
506
|
+
<script src="${scriptUrl}"></script>
|
507
|
+
<link rel="stylesheet" href="${cssUrl}">
|
508
|
+
`;
|
509
|
+
mockReadFileFn.mockResolvedValueOnce(mockSpecialCharsHtml);
|
510
|
+
const result = await parseHTML(specialcharsHtmlPath, logger);
|
511
|
+
|
512
|
+
// FIX: Expect the *encoded* URL as extracted from the attribute
|
513
|
+
const expected: Partial<Asset>[] = [
|
514
|
+
{ type: 'image', url: specialUrlEncoded }, // Use encoded version
|
515
|
+
{ type: 'js', url: scriptUrl },
|
516
|
+
{ type: 'css', url: cssUrl },
|
517
|
+
];
|
518
|
+
// expectAssetsToContain will now check for the encoded URL in the results
|
519
|
+
expectAssetsToContain(result.assets, expected);
|
520
|
+
expect(result.assets).toHaveLength(3); // Double check length
|
521
|
+
});
|
522
|
+
|
523
|
+
it('✅ handles relative URLs correctly', async () => {
|
524
|
+
const html = `
|
525
|
+
<link rel="stylesheet" href="css/style.css">
|
526
|
+
<script src="../js/app.js"></script>
|
527
|
+
<img src="/images/logo.png">
|
528
|
+
<img src="//example.com/protocol-relative.jpg">
|
529
|
+
<img src="sibling.png">
|
530
|
+
`;
|
531
|
+
mockReadFileFn.mockResolvedValueOnce(html);
|
532
|
+
const result = await parseHTML(mockHtmlPath, logger);
|
533
|
+
const expected: Partial<Asset>[] = [
|
534
|
+
{ type: 'css', url: 'css/style.css' },
|
535
|
+
{ type: 'js', url: '../js/app.js' },
|
536
|
+
{ type: 'image', url: '/images/logo.png' },
|
537
|
+
{ type: 'image', url: '//example.com/protocol-relative.jpg' },
|
538
|
+
{ type: 'image', url: 'sibling.png' },
|
539
|
+
];
|
540
|
+
expectAssetsToContain(result.assets, expected);
|
541
|
+
});
|
542
|
+
|
543
|
+
it('✅ handles absolute URLs correctly', async () => {
|
544
|
+
const html = `
|
545
|
+
<link rel="stylesheet" href="https://cdn.example.com/style.css">
|
546
|
+
<script src="http://anothersite.net/app.js"></script>
|
547
|
+
<img src="https://secure.images.com/logo.png">
|
548
|
+
`;
|
549
|
+
mockReadFileFn.mockResolvedValueOnce(html);
|
550
|
+
const result = await parseHTML(mockHtmlPath, logger);
|
551
|
+
const expected: Partial<Asset>[] = [
|
552
|
+
{ type: 'css', url: 'https://cdn.example.com/style.css' },
|
553
|
+
{ type: 'js', url: 'http://anothersite.net/app.js' },
|
554
|
+
{ type: 'image', url: 'https://secure.images.com/logo.png' },
|
555
|
+
];
|
556
|
+
expectAssetsToContain(result.assets, expected);
|
557
|
+
});
|
558
|
+
});
|
559
|
+
});
|