portapack 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +9 -0
- package/.github/workflows/ci.yml +73 -0
- package/.github/workflows/deploy-pages.yml +56 -0
- package/.prettierrc +9 -0
- package/.releaserc.js +29 -0
- package/CHANGELOG.md +21 -0
- package/README.md +288 -0
- package/commitlint.config.js +36 -0
- package/dist/cli/cli-entry.js +1694 -0
- package/dist/cli/cli-entry.js.map +1 -0
- package/dist/index.d.ts +275 -0
- package/dist/index.js +1405 -0
- package/dist/index.js.map +1 -0
- package/docs/.vitepress/config.ts +89 -0
- package/docs/.vitepress/sidebar-generator.ts +73 -0
- package/docs/cli.md +117 -0
- package/docs/code-of-conduct.md +65 -0
- package/docs/configuration.md +151 -0
- package/docs/contributing.md +107 -0
- package/docs/demo.md +46 -0
- package/docs/deployment.md +132 -0
- package/docs/development.md +168 -0
- package/docs/getting-started.md +106 -0
- package/docs/index.md +40 -0
- package/docs/portapack-transparent.png +0 -0
- package/docs/portapack.jpg +0 -0
- package/docs/troubleshooting.md +107 -0
- package/examples/main.ts +118 -0
- package/examples/sample-project/index.html +12 -0
- package/examples/sample-project/logo.png +1 -0
- package/examples/sample-project/script.js +1 -0
- package/examples/sample-project/styles.css +1 -0
- package/jest.config.ts +124 -0
- package/jest.setup.cjs +211 -0
- package/nodemon.json +11 -0
- package/output.html +1 -0
- package/package.json +161 -0
- package/site-packed.html +1 -0
- package/src/cli/cli-entry.ts +28 -0
- package/src/cli/cli.ts +139 -0
- package/src/cli/options.ts +151 -0
- package/src/core/bundler.ts +201 -0
- package/src/core/extractor.ts +618 -0
- package/src/core/minifier.ts +233 -0
- package/src/core/packer.ts +191 -0
- package/src/core/parser.ts +115 -0
- package/src/core/web-fetcher.ts +292 -0
- package/src/index.ts +262 -0
- package/src/types.ts +163 -0
- package/src/utils/font.ts +41 -0
- package/src/utils/logger.ts +139 -0
- package/src/utils/meta.ts +100 -0
- package/src/utils/mime.ts +90 -0
- package/src/utils/slugify.ts +70 -0
- package/test-output.html +0 -0
- package/tests/__fixtures__/sample-project/index.html +5 -0
- package/tests/unit/cli/cli-entry.test.ts +104 -0
- package/tests/unit/cli/cli.test.ts +230 -0
- package/tests/unit/cli/options.test.ts +316 -0
- package/tests/unit/core/bundler.test.ts +287 -0
- package/tests/unit/core/extractor.test.ts +1129 -0
- package/tests/unit/core/minifier.test.ts +414 -0
- package/tests/unit/core/packer.test.ts +193 -0
- package/tests/unit/core/parser.test.ts +540 -0
- package/tests/unit/core/web-fetcher.test.ts +374 -0
- package/tests/unit/index.test.ts +339 -0
- package/tests/unit/utils/font.test.ts +81 -0
- package/tests/unit/utils/logger.test.ts +275 -0
- package/tests/unit/utils/meta.test.ts +70 -0
- package/tests/unit/utils/mime.test.ts +96 -0
- package/tests/unit/utils/slugify.test.ts +71 -0
- package/tsconfig.build.json +11 -0
- package/tsconfig.jest.json +17 -0
- package/tsconfig.json +20 -0
- package/tsup.config.ts +71 -0
- package/typedoc.json +28 -0
@@ -0,0 +1,233 @@
|
|
1
|
+
/**
|
2
|
+
* @file src/core/minifier.ts
|
3
|
+
* @description
|
4
|
+
* Provides the core functionality for minifying HTML, CSS, and JavaScript content
|
5
|
+
* within the PortaPack bundling process. Uses `html-minifier-terser`, `clean-css`,
|
6
|
+
* and `terser` libraries. Handles errors gracefully by logging warnings and returning
|
7
|
+
* original content for the specific asset that failed minification.
|
8
|
+
* Includes workarounds for apparent issues in @types/clean-css definitions.
|
9
|
+
*/
|
10
|
+
|
11
|
+
// --- Imports ---
|
12
|
+
import { minify as htmlMinify } from 'html-minifier-terser';
|
13
|
+
import type { Options as HtmlMinifyOptions } from 'html-minifier-terser';
|
14
|
+
import CleanCSS from 'clean-css';
|
15
|
+
// Import specific types from clean-css. Note: Using these directly caused issues.
|
16
|
+
import type { Options as CleanCSSOptions } from 'clean-css';
|
17
|
+
import { minify as jsMinify } from 'terser';
|
18
|
+
import type { MinifyOptions, MinifyOutput } from 'terser';
|
19
|
+
// Import necessary types from project - ensure these paths are correct and use .js extension
|
20
|
+
import type { ParsedHTML, BundleOptions, Asset } from '../types.js';
|
21
|
+
import { Logger } from '../utils/logger.js';
|
22
|
+
|
23
|
+
// --- Helper Interface for Workaround ---
|
24
|
+
|
25
|
+
/**
|
26
|
+
* Represents the expected structure of the synchronous output from clean-css.
|
27
|
+
* Used with type assertion as a workaround for problematic official type definitions.
|
28
|
+
*/
|
29
|
+
export interface CleanCSSSyncResult { // <<< MUST HAVE 'export'
|
30
|
+
styles?: string;
|
31
|
+
errors?: string[];
|
32
|
+
warnings?: string[];
|
33
|
+
stats?: {
|
34
|
+
originalSize: number;
|
35
|
+
minifiedSize: number;
|
36
|
+
};
|
37
|
+
}
|
38
|
+
|
39
|
+
// --- Default Minification Options Constants ---
|
40
|
+
|
41
|
+
/**
|
42
|
+
* Default options for html-minifier-terser.
|
43
|
+
*/
|
44
|
+
const HTML_MINIFY_OPTIONS: HtmlMinifyOptions = {
|
45
|
+
collapseWhitespace: true,
|
46
|
+
removeComments: true,
|
47
|
+
conservativeCollapse: true,
|
48
|
+
minifyCSS: false, // Handled separately
|
49
|
+
minifyJS: false, // Handled separately
|
50
|
+
removeAttributeQuotes: false,
|
51
|
+
removeRedundantAttributes: true,
|
52
|
+
removeScriptTypeAttributes: true,
|
53
|
+
removeStyleLinkTypeAttributes: true,
|
54
|
+
useShortDoctype: true,
|
55
|
+
};
|
56
|
+
|
57
|
+
/**
|
58
|
+
* Default options for clean-css.
|
59
|
+
* Explicitly set returnPromise to false to ensure synchronous operation.
|
60
|
+
*/
|
61
|
+
const CSS_MINIFY_OPTIONS: CleanCSSOptions = {
|
62
|
+
returnPromise: false, // <<< *** Ensures sync operation at runtime ***
|
63
|
+
level: {
|
64
|
+
1: { // Level 1 optimizations (safe transformations)
|
65
|
+
optimizeBackground: true,
|
66
|
+
optimizeBorderRadius: true,
|
67
|
+
optimizeFilter: true,
|
68
|
+
optimizeFontWeight: true,
|
69
|
+
optimizeOutline: true,
|
70
|
+
},
|
71
|
+
2: { // Level 2 optimizations (structural changes, generally safe)
|
72
|
+
mergeMedia: true,
|
73
|
+
mergeNonAdjacentRules: true,
|
74
|
+
removeDuplicateFontRules: true,
|
75
|
+
removeDuplicateMediaBlocks: true,
|
76
|
+
removeDuplicateRules: true,
|
77
|
+
restructureRules: true,
|
78
|
+
}
|
79
|
+
}
|
80
|
+
// Note: Type checking based on these options seems problematic with current @types/clean-css
|
81
|
+
};
|
82
|
+
|
83
|
+
/**
|
84
|
+
* Default options for terser (JavaScript minifier).
|
85
|
+
*/
|
86
|
+
const JS_MINIFY_OPTIONS: MinifyOptions = {
|
87
|
+
compress: {
|
88
|
+
dead_code: true,
|
89
|
+
drop_console: false,
|
90
|
+
drop_debugger: true,
|
91
|
+
ecma: 2020,
|
92
|
+
keep_classnames: true,
|
93
|
+
keep_fnames: true
|
94
|
+
},
|
95
|
+
mangle: {
|
96
|
+
keep_classnames: true,
|
97
|
+
keep_fnames: true
|
98
|
+
},
|
99
|
+
format: { comments: false }
|
100
|
+
};
|
101
|
+
|
102
|
+
// --- Main Minification Function ---
|
103
|
+
|
104
|
+
/**
|
105
|
+
* Applies HTML, CSS, and JS minification conditionally based on BundleOptions.
|
106
|
+
* Uses type assertion for clean-css result and @ts-ignore for its constructor
|
107
|
+
* due to persistent type definition issues.
|
108
|
+
* Creates and returns a *new* ParsedHTML object containing the potentially minified content.
|
109
|
+
*
|
110
|
+
* @param {ParsedHTML} parsed - Input ParsedHTML object.
|
111
|
+
* @param {BundleOptions} [options={}] - Options controlling minification.
|
112
|
+
* @param {Logger} [logger] - Optional logger instance.
|
113
|
+
* @returns {Promise<ParsedHTML>} A Promise resolving to a new ParsedHTML object.
|
114
|
+
*/
|
115
|
+
export async function minifyAssets(
|
116
|
+
parsed: ParsedHTML,
|
117
|
+
options: BundleOptions = {},
|
118
|
+
logger?: Logger
|
119
|
+
): Promise<ParsedHTML> {
|
120
|
+
const { htmlContent, assets } = parsed;
|
121
|
+
|
122
|
+
// Use optional chaining and nullish coalescing for safer access
|
123
|
+
const currentHtmlContent = htmlContent ?? '';
|
124
|
+
const currentAssets = assets ?? [];
|
125
|
+
|
126
|
+
|
127
|
+
if (!currentHtmlContent && currentAssets.length === 0) {
|
128
|
+
logger?.debug('Minification skipped: No content.');
|
129
|
+
return { htmlContent: currentHtmlContent, assets: currentAssets };
|
130
|
+
}
|
131
|
+
|
132
|
+
const minifyFlags = {
|
133
|
+
minifyHtml: options.minifyHtml !== false,
|
134
|
+
minifyCss: options.minifyCss !== false,
|
135
|
+
minifyJs: options.minifyJs !== false
|
136
|
+
};
|
137
|
+
|
138
|
+
logger?.debug(`Minification flags: ${JSON.stringify(minifyFlags)}`);
|
139
|
+
|
140
|
+
const minifiedAssets: Asset[] = await Promise.all(
|
141
|
+
currentAssets.map(async (asset): Promise<Asset> => {
|
142
|
+
// Make a shallow copy to avoid modifying the original asset object
|
143
|
+
let processedAsset = { ...asset };
|
144
|
+
|
145
|
+
if (typeof processedAsset.content !== 'string' || processedAsset.content.length === 0) {
|
146
|
+
return processedAsset; // Return the copy
|
147
|
+
}
|
148
|
+
|
149
|
+
let newContent = processedAsset.content; // Work with the content of the copy
|
150
|
+
const assetIdentifier = processedAsset.url || `inline ${processedAsset.type}`;
|
151
|
+
|
152
|
+
try {
|
153
|
+
// --- Minify CSS (Synchronous Call with Type Assertion Workaround) ---
|
154
|
+
if (minifyFlags.minifyCss && processedAsset.type === 'css') {
|
155
|
+
logger?.debug(`Minifying CSS: ${assetIdentifier}`);
|
156
|
+
|
157
|
+
// @ts-ignore - Suppress error TS2769 due to likely faulty @types/clean-css constructor overload definitions for sync mode.
|
158
|
+
const cssMinifier = new CleanCSS(CSS_MINIFY_OPTIONS); // <<< @ts-ignore HERE
|
159
|
+
|
160
|
+
// WORKAROUND using Type Assertion
|
161
|
+
const result = cssMinifier.minify(processedAsset.content) as CleanCSSSyncResult;
|
162
|
+
|
163
|
+
// Access properties based on the asserted type
|
164
|
+
if (result.errors && result.errors.length > 0) {
|
165
|
+
logger?.warn(`⚠️ CleanCSS failed for ${assetIdentifier}: ${result.errors.join(', ')}`);
|
166
|
+
} else {
|
167
|
+
if (result.warnings && result.warnings.length > 0) {
|
168
|
+
logger?.debug(`CleanCSS warnings for ${assetIdentifier}: ${result.warnings.join(', ')}`);
|
169
|
+
}
|
170
|
+
if (result.styles) {
|
171
|
+
newContent = result.styles; // Update newContent
|
172
|
+
logger?.debug(`CSS minified successfully: ${assetIdentifier}`);
|
173
|
+
} else {
|
174
|
+
logger?.warn(`⚠️ CleanCSS produced no styles but reported no errors for ${assetIdentifier}. Keeping original.`);
|
175
|
+
}
|
176
|
+
}
|
177
|
+
}
|
178
|
+
|
179
|
+
// --- Minify JS (Asynchronous Call) ---
|
180
|
+
if (minifyFlags.minifyJs && processedAsset.type === 'js') {
|
181
|
+
logger?.debug(`Minifying JS: ${assetIdentifier}`);
|
182
|
+
const result: MinifyOutput = await jsMinify(processedAsset.content, JS_MINIFY_OPTIONS);
|
183
|
+
if (result.code) {
|
184
|
+
newContent = result.code; // Update newContent
|
185
|
+
logger?.debug(`JS minified successfully: ${assetIdentifier}`);
|
186
|
+
} else {
|
187
|
+
const terserError = (result as any).error;
|
188
|
+
if (terserError) {
|
189
|
+
logger?.warn(`⚠️ Terser failed for ${assetIdentifier}: ${terserError.message || terserError}`);
|
190
|
+
} else {
|
191
|
+
logger?.warn(`⚠️ Terser produced no code but reported no errors for ${assetIdentifier}. Keeping original.`);
|
192
|
+
}
|
193
|
+
}
|
194
|
+
}
|
195
|
+
} catch (err: unknown) {
|
196
|
+
const errorMessage = err instanceof Error ? err.message : String(err);
|
197
|
+
logger?.warn(`⚠️ Failed to minify asset ${assetIdentifier} (${processedAsset.type}): ${errorMessage}`);
|
198
|
+
// Keep original content if error occurs (newContent remains unchanged)
|
199
|
+
}
|
200
|
+
|
201
|
+
// Update the content property of the copied asset
|
202
|
+
processedAsset.content = newContent;
|
203
|
+
return processedAsset; // Return the modified copy
|
204
|
+
})
|
205
|
+
);
|
206
|
+
|
207
|
+
// --- Minify the main HTML content itself ---
|
208
|
+
let finalHtml = currentHtmlContent; // Start with potentially empty original HTML
|
209
|
+
if (minifyFlags.minifyHtml && finalHtml.length > 0) {
|
210
|
+
logger?.debug('Minifying HTML content...');
|
211
|
+
try {
|
212
|
+
finalHtml = await htmlMinify(finalHtml, {
|
213
|
+
...HTML_MINIFY_OPTIONS,
|
214
|
+
minifyCSS: minifyFlags.minifyCss,
|
215
|
+
minifyJS: minifyFlags.minifyJs
|
216
|
+
});
|
217
|
+
logger?.debug('HTML minified successfully.');
|
218
|
+
} catch (err: unknown) {
|
219
|
+
const errorMessage = err instanceof Error ? err.message : String(err);
|
220
|
+
logger?.warn(`⚠️ HTML minification failed: ${errorMessage}`);
|
221
|
+
// Keep original HTML (finalHtml already holds it)
|
222
|
+
}
|
223
|
+
} else if (finalHtml.length > 0) {
|
224
|
+
logger?.debug('HTML minification skipped (disabled).');
|
225
|
+
}
|
226
|
+
|
227
|
+
|
228
|
+
// --- Return the final result object ---
|
229
|
+
return {
|
230
|
+
htmlContent: finalHtml,
|
231
|
+
assets: minifiedAssets // The array of processed asset copies
|
232
|
+
};
|
233
|
+
}
|
@@ -0,0 +1,191 @@
|
|
1
|
+
/**
|
2
|
+
* @file src/core/packer.ts
|
3
|
+
* @description Inlines CSS, JS, and images into an HTML document for full portability.
|
4
|
+
* Uses Cheerio for safe DOM manipulation.
|
5
|
+
*/
|
6
|
+
|
7
|
+
import * as cheerio from 'cheerio';
|
8
|
+
// Import CheerioAPI type
|
9
|
+
import type { CheerioAPI } from 'cheerio';
|
10
|
+
import type { ParsedHTML, Asset } from '../types'; // Assuming correct path
|
11
|
+
import { Logger } from '../utils/logger'; // Assuming correct path
|
12
|
+
import { guessMimeType } from '../utils/mime'; // Assuming correct path
|
13
|
+
|
14
|
+
/**
|
15
|
+
* Escapes characters potentially problematic within inline `<script>` tags.
|
16
|
+
*/
|
17
|
+
function escapeScriptContent(code: string): string {
|
18
|
+
return code.replace(/<\/(script)/gi, '<\\/$1');
|
19
|
+
}
|
20
|
+
|
21
|
+
/**
|
22
|
+
* Ensures a `<base href="./">` tag exists within the `<head>` of the HTML.
|
23
|
+
* Creates <head> or even <html> if necessary using Cheerio.
|
24
|
+
*
|
25
|
+
* @param {CheerioAPI} $ - The Cheerio instance representing the HTML document.
|
26
|
+
* @param {Logger} [logger] - Optional logger instance.
|
27
|
+
*/
|
28
|
+
function ensureBaseTag($: CheerioAPI, logger?: Logger): void {
|
29
|
+
let head = $('head');
|
30
|
+
|
31
|
+
// If <head> doesn't exist, create it, ensuring <html> exists first.
|
32
|
+
if (head.length === 0) {
|
33
|
+
logger?.debug('No <head> tag found. Creating <head> and ensuring <html> exists.');
|
34
|
+
let htmlElement = $('html');
|
35
|
+
|
36
|
+
// If <html> doesn't exist, create it and wrap the existing content.
|
37
|
+
if (htmlElement.length === 0) {
|
38
|
+
logger?.debug('No <html> tag found. Wrapping content in <html><body>...');
|
39
|
+
const bodyContent = $.root().html() || '';
|
40
|
+
$.root().empty();
|
41
|
+
// FIX: Use 'as any' for type assertion
|
42
|
+
htmlElement = $('<html>').appendTo($.root()) as any;
|
43
|
+
// FIX: Use 'as any' for type assertion
|
44
|
+
head = $('<head>').appendTo(htmlElement) as any;
|
45
|
+
$('<body>').html(bodyContent).appendTo(htmlElement);
|
46
|
+
} else {
|
47
|
+
// If <html> exists but <head> doesn't, prepend <head> to <html>
|
48
|
+
// FIX: Use 'as any' for type assertion
|
49
|
+
head = $('<head>').prependTo(htmlElement) as any;
|
50
|
+
}
|
51
|
+
}
|
52
|
+
|
53
|
+
// Now head should represent the head element selection.
|
54
|
+
// Check if <base> exists within the guaranteed <head>.
|
55
|
+
// Use type guard just in case head couldn't be created properly
|
56
|
+
if (head && head.length > 0 && head.find('base[href]').length === 0) {
|
57
|
+
logger?.debug('Prepending <base href="./"> to <head>.');
|
58
|
+
head.prepend('<base href="./">');
|
59
|
+
}
|
60
|
+
}
|
61
|
+
|
62
|
+
|
63
|
+
/**
|
64
|
+
* Inlines assets into the HTML document using Cheerio for safe DOM manipulation.
|
65
|
+
*/
|
66
|
+
function inlineAssets($: CheerioAPI, assets: Asset[], logger?: Logger): void {
|
67
|
+
logger?.debug(`Inlining ${assets.filter(a => a.content).length} assets with content...`);
|
68
|
+
const assetMap = new Map<string, Asset>(assets.map(asset => [asset.url, asset]));
|
69
|
+
|
70
|
+
// 1. Inline CSS (<link rel="stylesheet" href="...">)
|
71
|
+
$('link[rel="stylesheet"][href]').each((_, el) => {
|
72
|
+
const link = $(el);
|
73
|
+
const href = link.attr('href');
|
74
|
+
const asset = href ? assetMap.get(href) : undefined;
|
75
|
+
if (asset?.content && typeof asset.content === 'string') {
|
76
|
+
if (asset.content.startsWith('data:')) {
|
77
|
+
logger?.debug(`Replacing link with style tag using existing data URI: ${asset.url}`);
|
78
|
+
const styleTag = $('<style>').text(`@import url("${asset.content}");`);
|
79
|
+
link.replaceWith(styleTag);
|
80
|
+
} else {
|
81
|
+
logger?.debug(`Inlining CSS: ${asset.url}`);
|
82
|
+
const styleTag = $('<style>').text(asset.content);
|
83
|
+
link.replaceWith(styleTag);
|
84
|
+
}
|
85
|
+
} else if (href) {
|
86
|
+
logger?.warn(`Could not inline CSS: ${href}. Content missing or invalid.`);
|
87
|
+
}
|
88
|
+
});
|
89
|
+
|
90
|
+
// 2. Inline JS (<script src="...">)
|
91
|
+
$('script[src]').each((_, el) => {
|
92
|
+
const script = $(el);
|
93
|
+
const src = script.attr('src');
|
94
|
+
const asset = src ? assetMap.get(src) : undefined;
|
95
|
+
if (asset?.content && typeof asset.content === 'string') {
|
96
|
+
logger?.debug(`Inlining JS: ${asset.url}`);
|
97
|
+
const inlineScript = $('<script>');
|
98
|
+
inlineScript.text(escapeScriptContent(asset.content));
|
99
|
+
Object.entries(script.attr() || {}).forEach(([key, value]) => {
|
100
|
+
if (key.toLowerCase() !== 'src') inlineScript.attr(key, value);
|
101
|
+
});
|
102
|
+
script.replaceWith(inlineScript);
|
103
|
+
} else if (src) {
|
104
|
+
logger?.warn(`Could not inline JS: ${src}. Content missing or not string.`);
|
105
|
+
}
|
106
|
+
});
|
107
|
+
|
108
|
+
// 3. Inline Images (<img src="...">, <video poster="...">, etc.)
|
109
|
+
$('img[src], video[poster], input[type="image"][src]').each((_, el) => {
|
110
|
+
const element = $(el);
|
111
|
+
const srcAttr = element.is('video') ? 'poster' : 'src';
|
112
|
+
const src = element.attr(srcAttr);
|
113
|
+
const asset = src ? assetMap.get(src) : undefined;
|
114
|
+
if (asset?.content && typeof asset.content === 'string' && asset.content.startsWith('data:')) {
|
115
|
+
logger?.debug(`Inlining image via ${srcAttr}: ${asset.url}`);
|
116
|
+
element.attr(srcAttr, asset.content);
|
117
|
+
} else if (src) {
|
118
|
+
logger?.warn(`Could not inline image via ${srcAttr}: ${src}. Content missing or not a data URI.`);
|
119
|
+
}
|
120
|
+
});
|
121
|
+
|
122
|
+
// 4. Inline srcset attributes (<img srcset="...">, <source srcset="...">)
|
123
|
+
$('img[srcset], source[srcset]').each((_, el) => {
|
124
|
+
const element = $(el);
|
125
|
+
const srcset = element.attr('srcset');
|
126
|
+
if (!srcset) return;
|
127
|
+
const newSrcsetParts: string[] = [];
|
128
|
+
let changed = false;
|
129
|
+
srcset.split(',').forEach(part => {
|
130
|
+
const trimmedPart = part.trim();
|
131
|
+
const [url, descriptor] = trimmedPart.split(/\s+/, 2);
|
132
|
+
const asset = url ? assetMap.get(url) : undefined;
|
133
|
+
if (asset?.content && typeof asset.content === 'string' && asset.content.startsWith('data:')) {
|
134
|
+
newSrcsetParts.push(`${asset.content}${descriptor ? ' ' + descriptor : ''}`);
|
135
|
+
changed = true;
|
136
|
+
} else {
|
137
|
+
newSrcsetParts.push(trimmedPart);
|
138
|
+
}
|
139
|
+
});
|
140
|
+
if (changed) {
|
141
|
+
element.attr('srcset', newSrcsetParts.join(', '));
|
142
|
+
}
|
143
|
+
});
|
144
|
+
|
145
|
+
// 5. Inline other asset types (video, audio sources)
|
146
|
+
$('video[src], audio[src], video > source[src], audio > source[src]').each((_, el) => {
|
147
|
+
const element = $(el);
|
148
|
+
const src = element.attr('src');
|
149
|
+
const asset = src ? assetMap.get(src) : undefined;
|
150
|
+
if (asset?.content && typeof asset.content === 'string' && asset.content.startsWith('data:')) {
|
151
|
+
logger?.debug(`Inlining media source: ${asset.url}`);
|
152
|
+
element.attr('src', asset.content);
|
153
|
+
}
|
154
|
+
});
|
155
|
+
|
156
|
+
logger?.debug('Asset inlining process complete.');
|
157
|
+
}
|
158
|
+
|
159
|
+
|
160
|
+
/**
|
161
|
+
* Packs a ParsedHTML object into a single, self-contained HTML string.
|
162
|
+
* This involves ensuring a base tag exists and inlining all assets
|
163
|
+
* that have content available. Uses Cheerio for safe DOM manipulation.
|
164
|
+
*
|
165
|
+
* @export
|
166
|
+
* @param {ParsedHTML} parsed - The parsed HTML document object, including its list of assets (which may have content).
|
167
|
+
* @param {Logger} [logger] - Optional logger instance.
|
168
|
+
* @returns {string} The packed HTML string with assets inlined. Returns a minimal HTML structure if input is invalid.
|
169
|
+
*/
|
170
|
+
export function packHTML(parsed: ParsedHTML, logger?: Logger): string {
|
171
|
+
const { htmlContent, assets } = parsed;
|
172
|
+
if (!htmlContent || typeof htmlContent !== 'string') {
|
173
|
+
logger?.warn('Packer received empty or invalid htmlContent. Returning minimal HTML shell.');
|
174
|
+
return '<!DOCTYPE html><html><head><base href="./"></head><body></body></html>';
|
175
|
+
}
|
176
|
+
|
177
|
+
logger?.debug('Loading HTML content into Cheerio for packing...');
|
178
|
+
const $ = cheerio.load(htmlContent);
|
179
|
+
|
180
|
+
logger?.debug('Ensuring <base> tag exists...');
|
181
|
+
ensureBaseTag($, logger); // Ensure base tag safely
|
182
|
+
|
183
|
+
logger?.debug('Starting asset inlining...');
|
184
|
+
inlineAssets($, assets, logger); // Inline assets safely
|
185
|
+
|
186
|
+
logger?.debug('Generating final packed HTML string...');
|
187
|
+
const finalHtml = $.html();
|
188
|
+
|
189
|
+
logger?.debug(`Packing complete. Final size: ${Buffer.byteLength(finalHtml)} bytes.`);
|
190
|
+
return finalHtml;
|
191
|
+
}
|
@@ -0,0 +1,115 @@
|
|
1
|
+
/**
|
2
|
+
* @file src/core/parser.ts
|
3
|
+
* @description
|
4
|
+
* Parses an HTML file using Cheerio to extract the basic structure
|
5
|
+
* and identify top-level linked assets (CSS, JS, images, fonts, video, audio etc.).
|
6
|
+
* It relies on tag names, link relations, and file extensions to guess asset types.
|
7
|
+
* It does *not* fetch or analyze the content of linked assets. Inline styles/scripts
|
8
|
+
* and data URIs are ignored. Duplicate asset URLs are ignored.
|
9
|
+
*/
|
10
|
+
|
11
|
+
// FIX: Use only the named import for readFile
|
12
|
+
import { readFile } from 'fs/promises';
|
13
|
+
// NOTE: 'path' module was imported but not used, so removed. Add back if needed later.
|
14
|
+
// import path from 'path';
|
15
|
+
import * as cheerio from 'cheerio';
|
16
|
+
import type { CheerioAPI } from 'cheerio';
|
17
|
+
import type { Asset, ParsedHTML } from '../types.js';
|
18
|
+
import { Logger } from '../utils/logger.js';
|
19
|
+
import { guessMimeType } from '../utils/mime.js';
|
20
|
+
|
21
|
+
/**
|
22
|
+
* Parses an HTML file from the given path using Cheerio.
|
23
|
+
* Extracts references to external assets like CSS, JS, images, fonts, video, audio
|
24
|
+
* found in common HTML tags (<link>, <script>, <img>, <source>, <video>, <audio>, <input type="image">).
|
25
|
+
* Does not extract assets linked *within* CSS (like @import, fonts or background images).
|
26
|
+
* Data URIs and empty URLs are ignored. Duplicate URLs are ignored.
|
27
|
+
*
|
28
|
+
* @async
|
29
|
+
* @function parseHTML
|
30
|
+
* @param {string} entryFilePath - Absolute or relative path to the input HTML file.
|
31
|
+
* @param {Logger} [logger] - Optional logger instance.
|
32
|
+
* @returns {Promise<ParsedHTML>} A promise that resolves to the parsed HTML content
|
33
|
+
* and a list of discovered asset URLs with their inferred types.
|
34
|
+
* @throws {Error} Throws an error with cause if the file cannot be read.
|
35
|
+
*/
|
36
|
+
export async function parseHTML(entryFilePath: string, logger?: Logger): Promise<ParsedHTML> {
|
37
|
+
logger?.debug(`Parsing HTML file: ${entryFilePath}`);
|
38
|
+
let htmlContent: string;
|
39
|
+
try {
|
40
|
+
// FIX: Use the correctly imported 'readFile' function directly
|
41
|
+
htmlContent = await readFile(entryFilePath, 'utf-8');
|
42
|
+
logger?.debug(`Successfully read HTML file (${Buffer.byteLength(htmlContent)} bytes).`);
|
43
|
+
} catch (err: any) {
|
44
|
+
logger?.error(`Failed to read HTML file "${entryFilePath}": ${err.message}`);
|
45
|
+
throw new Error(`Could not read input HTML file: ${entryFilePath}`, { cause: err });
|
46
|
+
}
|
47
|
+
|
48
|
+
const $: CheerioAPI = cheerio.load(htmlContent);
|
49
|
+
const assets: Asset[] = [];
|
50
|
+
const addedUrls = new Set<string>();
|
51
|
+
|
52
|
+
/** Helper to add unique assets */
|
53
|
+
const addAsset = (url?: string, forcedType?: Asset['type']): void => {
|
54
|
+
if (!url || url.trim() === '' || url.startsWith('data:')) {
|
55
|
+
return;
|
56
|
+
}
|
57
|
+
if (!addedUrls.has(url)) {
|
58
|
+
addedUrls.add(url);
|
59
|
+
const mimeInfo = guessMimeType(url);
|
60
|
+
const type = forcedType ?? mimeInfo.assetType;
|
61
|
+
assets.push({ type, url });
|
62
|
+
logger?.debug(`Discovered asset: Type='${type}', URL='${url}'`);
|
63
|
+
} else {
|
64
|
+
logger?.debug(`Skipping duplicate asset URL: ${url}`);
|
65
|
+
}
|
66
|
+
};
|
67
|
+
|
68
|
+
logger?.debug('Extracting assets from HTML tags...');
|
69
|
+
|
70
|
+
// --- Extract Assets from Various Tags ---
|
71
|
+
// Stylesheets: <link rel="stylesheet" href="...">
|
72
|
+
$('link[rel="stylesheet"][href]').each((_, el) => {
|
73
|
+
addAsset($(el).attr('href'), 'css');
|
74
|
+
});
|
75
|
+
// JavaScript: <script src="...">
|
76
|
+
$('script[src]').each((_, el) => {
|
77
|
+
addAsset($(el).attr('src'), 'js');
|
78
|
+
});
|
79
|
+
// Images: <img src="...">, <input type="image" src="...">
|
80
|
+
$('img[src]').each((_, el) => addAsset($(el).attr('src'), 'image'));
|
81
|
+
$('input[type="image"][src]').each((_, el) => addAsset($(el).attr('src'), 'image'));
|
82
|
+
// Image srcset: <img srcset="...">, <source srcset="..."> (within picture)
|
83
|
+
$('img[srcset], picture source[srcset]').each((_, el) => {
|
84
|
+
const srcset = $(el).attr('srcset');
|
85
|
+
srcset?.split(',').forEach(entry => {
|
86
|
+
const [url] = entry.trim().split(/\s+/);
|
87
|
+
addAsset(url, 'image');
|
88
|
+
});
|
89
|
+
});
|
90
|
+
// Video: <video src="...">, <video poster="...">
|
91
|
+
$('video[src]').each((_, el) => addAsset($(el).attr('src'), 'video'));
|
92
|
+
$('video[poster]').each((_, el) => addAsset($(el).attr('poster'), 'image'));
|
93
|
+
// Audio: <audio src="...">
|
94
|
+
$('audio[src]').each((_, el) => addAsset($(el).attr('src'), 'audio'));
|
95
|
+
// Media Sources: <source src="..."> within <video> or <audio>
|
96
|
+
$('video > source[src]').each((_, el) => addAsset($(el).attr('src'), 'video'));
|
97
|
+
$('audio > source[src]').each((_, el) => addAsset($(el).attr('src'), 'audio'));
|
98
|
+
// Icons and Manifest: <link rel="icon/shortcut icon/apple-touch-icon/manifest" href="...">
|
99
|
+
$('link[href]').filter((_, el) => {
|
100
|
+
const rel = $(el).attr('rel')?.toLowerCase() ?? '';
|
101
|
+
return ['icon', 'shortcut icon', 'apple-touch-icon', 'manifest'].includes(rel);
|
102
|
+
}).each((_, el) => {
|
103
|
+
const rel = $(el).attr('rel')?.toLowerCase() ?? '';
|
104
|
+
const isIcon = ['icon', 'shortcut icon', 'apple-touch-icon'].includes(rel);
|
105
|
+
addAsset($(el).attr('href'), isIcon ? 'image' : undefined);
|
106
|
+
});
|
107
|
+
// Preloaded Fonts: <link rel="preload" as="font" href="...">
|
108
|
+
$('link[rel="preload"][as="font"][href]').each((_, el) => {
|
109
|
+
addAsset($(el).attr('href'), 'font');
|
110
|
+
});
|
111
|
+
|
112
|
+
// --- Parsing Complete ---
|
113
|
+
logger?.info(`HTML parsing complete. Discovered ${assets.length} unique asset links.`);
|
114
|
+
return { htmlContent, assets };
|
115
|
+
}
|