html-minifier-next 6.1.1 → 6.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.js +34 -18
- package/dist/htmlminifier.cjs +111 -59
- package/dist/types/htmlminifier.d.ts +1 -1
- package/dist/types/htmlminifier.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/htmlminifier.js +109 -59
- package/src/htmlparser.js +1 -1
- package/src/lib/attributes.js +1 -1
package/cli.js
CHANGED
|
@@ -173,32 +173,48 @@ function readFile(file) {
|
|
|
173
173
|
}
|
|
174
174
|
|
|
175
175
|
/**
|
|
176
|
-
* Load config from a file path
|
|
176
|
+
* Load config from a file path—for unambiguous extensions (.json, .cjs, .mjs) only the
|
|
177
|
+
* matching format is attempted and its error shown on failure; for .js or unknown extensions
|
|
178
|
+
* all formats are tried and the most relevant error is reported
|
|
177
179
|
* @param {string} configPath - Path to config file
|
|
178
180
|
* @returns {Promise<object>} Loaded config object
|
|
179
181
|
*/
|
|
180
182
|
async function loadConfigFromPath(configPath) {
|
|
181
|
-
const
|
|
183
|
+
const abs = path.resolve(configPath);
|
|
184
|
+
const ext = path.extname(configPath).toLowerCase();
|
|
182
185
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
}
|
|
187
|
-
const abs = path.resolve(configPath);
|
|
186
|
+
if (ext === '.json') {
|
|
187
|
+
try { return JSON.parse(readFile(abs).replace(/^\uFEFF/, '')); }
|
|
188
|
+
catch (err) { fatal(`Cannot parse config file as JSON: ${err.message}`); }
|
|
189
|
+
}
|
|
188
190
|
|
|
189
|
-
|
|
191
|
+
if (ext === '.cjs') {
|
|
190
192
|
try {
|
|
191
193
|
const result = require(abs);
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
194
|
+
return (result && typeof result === 'object' && result.__esModule === true) ? result.default : result;
|
|
195
|
+
} catch (err) { fatal(`Cannot load config file: ${err.message}`); }
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
if (ext === '.mjs') {
|
|
199
|
+
try { const mod = await import(pathToFileURL(abs).href); return 'default' in mod ? mod.default : mod; }
|
|
200
|
+
catch (err) { fatal(`Cannot load config file: ${err.message}`); }
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// For .js or extension-less files, try JSON first, then CJS, then ESM
|
|
204
|
+
let jsonErr;
|
|
205
|
+
try { return JSON.parse(readFile(abs).replace(/^\uFEFF/, '')); }
|
|
206
|
+
catch (err) { jsonErr = err; }
|
|
207
|
+
|
|
208
|
+
try {
|
|
209
|
+
const result = require(abs);
|
|
210
|
+
// Handle ESM interop: If `require()` loads an ESM file, it may return `{__esModule: true, default: …}`
|
|
211
|
+
return (result && typeof result === 'object' && result.__esModule === true) ? result.default : result;
|
|
212
|
+
} catch (cjsErr) {
|
|
213
|
+
try { const mod = await import(pathToFileURL(abs).href); return 'default' in mod ? mod.default : mod; }
|
|
214
|
+
catch (esmErr) {
|
|
215
|
+
fatal(ext === '.js'
|
|
216
|
+
? `Cannot load config file: ${cjsErr.message}\nAs module: ${esmErr.message}`
|
|
217
|
+
: `Cannot read the specified config file.\nAs JSON: ${jsonErr.message}\nAs CJS: ${cjsErr.message}\nAs module: ${esmErr.message}`);
|
|
202
218
|
}
|
|
203
219
|
}
|
|
204
220
|
}
|
package/dist/htmlminifier.cjs
CHANGED
|
@@ -607,7 +607,7 @@ class HTMLParser {
|
|
|
607
607
|
// Note: Unquoted attribute values are intentionally not handled here.
|
|
608
608
|
// Per HTML spec, unquoted values cannot contain spaces or special chars,
|
|
609
609
|
// making a 20 KB+ unquoted value practically impossible. If encountered,
|
|
610
|
-
// it
|
|
610
|
+
// it’s malformed HTML and using the truncated regex match is acceptable.
|
|
611
611
|
}
|
|
612
612
|
}
|
|
613
613
|
}
|
|
@@ -2632,7 +2632,7 @@ function buildAttr(normalized, hasUnarySlash, options, isLast, uidAttr) {
|
|
|
2632
2632
|
attrValue = attrValue.replace(/'/g, ''');
|
|
2633
2633
|
}
|
|
2634
2634
|
} else {
|
|
2635
|
-
// `preventAttributesEscaping` mode: Choose safe quotes but don
|
|
2635
|
+
// `preventAttributesEscaping` mode: Choose safe quotes but don’t escape
|
|
2636
2636
|
// except when both quote types are present—then escape to prevent invalid HTML
|
|
2637
2637
|
const hasDoubleQuote = attrValue.indexOf('"') !== -1;
|
|
2638
2638
|
const hasSingleQuote = attrValue.indexOf("'") !== -1;
|
|
@@ -3002,6 +3002,8 @@ let svgMinifyCache = null;
|
|
|
3002
3002
|
|
|
3003
3003
|
// Pre-compiled patterns for script merging (avoid repeated allocation in hot path)
|
|
3004
3004
|
const RE_SCRIPT_ATTRS = /([^\s=]+)(?:=(?:"([^"]*)"|'([^']*)'|([^\s>]+)))?/g;
|
|
3005
|
+
const RE_SCRIPT_OPEN = /<script(?=[\s>])/gi; // Finds tag start; use `findTagEnd()` for the actual closing `>`
|
|
3006
|
+
const RE_SCRIPT_CLOSE = /<\/script\s*>/gi;
|
|
3005
3007
|
const SCRIPT_BOOL_ATTRS = new Set(['async', 'defer', 'nomodule']);
|
|
3006
3008
|
const DEFAULT_JS_TYPES = new Set(['', 'text/javascript', 'application/javascript']);
|
|
3007
3009
|
|
|
@@ -3014,6 +3016,28 @@ const RE_HTML_ENCODING = /^(text\/html|application\/xhtml\+xml)$/i;
|
|
|
3014
3016
|
|
|
3015
3017
|
// Script merging
|
|
3016
3018
|
|
|
3019
|
+
/**
|
|
3020
|
+
* Find the index of the `>` that closes an opening tag, correctly skipping
|
|
3021
|
+
* over quoted attribute values (which may contain `>`).
|
|
3022
|
+
* @param {string} html
|
|
3023
|
+
* @param {number} pos - Start position (just after the tag name)
|
|
3024
|
+
* @returns {number} Index of the closing `>`, or -1 if not found
|
|
3025
|
+
*/
|
|
3026
|
+
function findTagEnd(html, pos) {
|
|
3027
|
+
let i = pos;
|
|
3028
|
+
while (i < html.length) {
|
|
3029
|
+
const ch = html[i];
|
|
3030
|
+
if (ch === '>') return i;
|
|
3031
|
+
if (ch === '"' || ch === "'") {
|
|
3032
|
+
const q = ch;
|
|
3033
|
+
i++;
|
|
3034
|
+
while (i < html.length && html[i] !== q) i++;
|
|
3035
|
+
}
|
|
3036
|
+
i++;
|
|
3037
|
+
}
|
|
3038
|
+
return -1;
|
|
3039
|
+
}
|
|
3040
|
+
|
|
3017
3041
|
/**
|
|
3018
3042
|
* Merge consecutive inline script tags into one (`mergeConsecutiveScripts`).
|
|
3019
3043
|
* Only merges scripts that are compatible:
|
|
@@ -3021,79 +3045,104 @@ const RE_HTML_ENCODING = /^(text\/html|application\/xhtml\+xml)$/i;
|
|
|
3021
3045
|
* - Same `type` (or both default JavaScript)
|
|
3022
3046
|
* - No conflicting attributes (`async`, `defer`, `nomodule`, different `nonce`)
|
|
3023
3047
|
*
|
|
3024
|
-
*
|
|
3025
|
-
*
|
|
3026
|
-
*
|
|
3027
|
-
* HTML, such strings should be escaped as `<\/script>` or split like
|
|
3028
|
-
* `'</scr' + 'ipt>'`, so this limitation rarely affects real-world code. The
|
|
3029
|
-
* earlier `minifyJS` step (if enabled) typically handles this escaping already.
|
|
3048
|
+
* Uses a scanner rather than a regex to locate script boundaries, so literal
|
|
3049
|
+
* `</script>` strings inside script content are handled correctly per the HTML
|
|
3050
|
+
* spec (raw text ends at the first `</script>`).
|
|
3030
3051
|
*
|
|
3031
3052
|
* @param {string} html - The HTML string to process
|
|
3032
3053
|
* @returns {string} HTML with consecutive scripts merged
|
|
3033
3054
|
*/
|
|
3034
3055
|
function mergeConsecutiveScripts(html) {
|
|
3035
|
-
//
|
|
3036
|
-
|
|
3037
|
-
|
|
3038
|
-
|
|
3039
|
-
|
|
3040
|
-
|
|
3041
|
-
|
|
3042
|
-
|
|
3043
|
-
|
|
3044
|
-
|
|
3045
|
-
|
|
3056
|
+
// Parse an attribute string into a name→value map
|
|
3057
|
+
const parseAttrs = (attrStr) => {
|
|
3058
|
+
const attrs = {};
|
|
3059
|
+
RE_SCRIPT_ATTRS.lastIndex = 0;
|
|
3060
|
+
let m;
|
|
3061
|
+
while ((m = RE_SCRIPT_ATTRS.exec(attrStr)) !== null) {
|
|
3062
|
+
const name = m[1].toLowerCase();
|
|
3063
|
+
const value = m[2] ?? m[3] ?? m[4] ?? '';
|
|
3064
|
+
attrs[name] = value;
|
|
3065
|
+
}
|
|
3066
|
+
return attrs;
|
|
3067
|
+
};
|
|
3068
|
+
|
|
3046
3069
|
let changed = true;
|
|
3047
3070
|
|
|
3048
3071
|
// Keep merging until no more changes (handles chains of 3+ scripts)
|
|
3049
3072
|
while (changed) {
|
|
3050
3073
|
changed = false;
|
|
3051
|
-
|
|
3052
|
-
|
|
3053
|
-
|
|
3054
|
-
|
|
3055
|
-
|
|
3056
|
-
|
|
3057
|
-
|
|
3058
|
-
|
|
3059
|
-
|
|
3060
|
-
|
|
3061
|
-
|
|
3062
|
-
|
|
3063
|
-
|
|
3074
|
+
RE_SCRIPT_OPEN.lastIndex = 0;
|
|
3075
|
+
let m1;
|
|
3076
|
+
|
|
3077
|
+
while ((m1 = RE_SCRIPT_OPEN.exec(html)) !== null) {
|
|
3078
|
+
// Use findTagEnd() to get the real closing '>', skipping quoted attribute values
|
|
3079
|
+
const tagEnd1 = findTagEnd(html, m1.index + 7);
|
|
3080
|
+
if (tagEnd1 === -1) break;
|
|
3081
|
+
|
|
3082
|
+
const attrs1Str = html.slice(m1.index + 7, tagEnd1);
|
|
3083
|
+
const contentStart1 = tagEnd1 + 1;
|
|
3084
|
+
|
|
3085
|
+
// Find end of this script’s content (first `</script>`—per HTML spec, raw text ends here)
|
|
3086
|
+
RE_SCRIPT_CLOSE.lastIndex = contentStart1;
|
|
3087
|
+
const close1 = RE_SCRIPT_CLOSE.exec(html);
|
|
3088
|
+
if (!close1) break;
|
|
3089
|
+
|
|
3090
|
+
const content1 = html.slice(contentStart1, close1.index);
|
|
3091
|
+
const afterClose1 = close1.index + close1[0].length;
|
|
3092
|
+
|
|
3093
|
+
// Skip optional whitespace and check for a consecutive <script> tag
|
|
3094
|
+
let i = afterClose1;
|
|
3095
|
+
while (i < html.length && (html[i] === ' ' || html[i] === '\t' || html[i] === '\n' || html[i] === '\r' || html[i] === '\f')) i++;
|
|
3096
|
+
if (html.slice(i, i + 7).toLowerCase() !== '<script' || (html[i + 7] !== '>' && !/\s/.test(html[i + 7]))) {
|
|
3097
|
+
RE_SCRIPT_OPEN.lastIndex = afterClose1;
|
|
3098
|
+
continue;
|
|
3099
|
+
}
|
|
3100
|
+
|
|
3101
|
+
const tagStart2 = i;
|
|
3102
|
+
const tagEnd2 = findTagEnd(html, tagStart2 + 7);
|
|
3103
|
+
if (tagEnd2 === -1) break;
|
|
3104
|
+
|
|
3105
|
+
const attrs2Str = html.slice(tagStart2 + 7, tagEnd2);
|
|
3106
|
+
const contentStart2 = tagEnd2 + 1;
|
|
3064
3107
|
|
|
3065
|
-
|
|
3066
|
-
|
|
3108
|
+
// Find end of second script’s content
|
|
3109
|
+
RE_SCRIPT_CLOSE.lastIndex = contentStart2;
|
|
3110
|
+
const close2 = RE_SCRIPT_CLOSE.exec(html);
|
|
3111
|
+
if (!close2) break;
|
|
3112
|
+
|
|
3113
|
+
const content2 = html.slice(contentStart2, close2.index);
|
|
3114
|
+
const afterClose2 = close2.index + close2[0].length;
|
|
3115
|
+
|
|
3116
|
+
const a1 = parseAttrs(attrs1Str);
|
|
3117
|
+
const a2 = parseAttrs(attrs2Str);
|
|
3067
3118
|
|
|
3068
3119
|
// Check for `src`—cannot merge external scripts
|
|
3069
3120
|
if ('src' in a1 || 'src' in a2) {
|
|
3070
|
-
|
|
3121
|
+
RE_SCRIPT_OPEN.lastIndex = afterClose1;
|
|
3122
|
+
continue;
|
|
3071
3123
|
}
|
|
3072
3124
|
|
|
3073
3125
|
// Check `type` compatibility (both must be default JS)
|
|
3126
|
+
// Non-JS types (modules, JSON, etc.) must not be merged:
|
|
3127
|
+
// Module scripts have per-script lexical scope, and non-JS content (e.g., JSON)
|
|
3128
|
+
// is not concatenable; even identical non-JS types are incompatible
|
|
3074
3129
|
const type1 = (a1.type || '').toLowerCase();
|
|
3075
3130
|
const type2 = (a2.type || '').toLowerCase();
|
|
3076
|
-
|
|
3077
|
-
|
|
3078
|
-
|
|
3079
|
-
// Module scripts have per-script lexical scope, and non-JS content (e.g., JSON)
|
|
3080
|
-
// is not concatenable. Even identical non-JS types are incompatible.
|
|
3081
|
-
return match;
|
|
3131
|
+
if (!DEFAULT_JS_TYPES.has(type1) || !DEFAULT_JS_TYPES.has(type2)) {
|
|
3132
|
+
RE_SCRIPT_OPEN.lastIndex = afterClose1;
|
|
3133
|
+
continue;
|
|
3082
3134
|
}
|
|
3083
3135
|
|
|
3084
|
-
// Check for conflicting boolean attributes
|
|
3136
|
+
// Check for conflicting boolean attributes
|
|
3137
|
+
let boolConflict = false;
|
|
3085
3138
|
for (const attr of SCRIPT_BOOL_ATTRS) {
|
|
3086
|
-
|
|
3087
|
-
const has2 = attr in a2;
|
|
3088
|
-
if (has1 !== has2) {
|
|
3089
|
-
// One has it, one doesn't - incompatible
|
|
3090
|
-
return match;
|
|
3091
|
-
}
|
|
3139
|
+
if ((attr in a1) !== (attr in a2)) { boolConflict = true; break; }
|
|
3092
3140
|
}
|
|
3093
3141
|
|
|
3094
3142
|
// Check `nonce`—must be same or both absent
|
|
3095
|
-
if (a1.nonce !== a2.nonce) {
|
|
3096
|
-
|
|
3143
|
+
if (boolConflict || a1.nonce !== a2.nonce) {
|
|
3144
|
+
RE_SCRIPT_OPEN.lastIndex = afterClose1;
|
|
3145
|
+
continue;
|
|
3097
3146
|
}
|
|
3098
3147
|
|
|
3099
3148
|
// Scripts are compatible—merge them
|
|
@@ -3114,11 +3163,12 @@ function mergeConsecutiveScripts(html) {
|
|
|
3114
3163
|
}
|
|
3115
3164
|
|
|
3116
3165
|
// Use first script’s attributes (they should be compatible)
|
|
3117
|
-
|
|
3118
|
-
|
|
3166
|
+
html = html.slice(0, m1.index) + `<script${attrs1Str}>${mergedContent}</script>` + html.slice(afterClose2);
|
|
3167
|
+
break; // Restart scanning (outer while loop)
|
|
3168
|
+
}
|
|
3119
3169
|
}
|
|
3120
3170
|
|
|
3121
|
-
return
|
|
3171
|
+
return html;
|
|
3122
3172
|
}
|
|
3123
3173
|
|
|
3124
3174
|
// Type definitions
|
|
@@ -3344,7 +3394,7 @@ function mergeConsecutiveScripts(html) {
|
|
|
3344
3394
|
* event handler attributes. If an object is provided, it can include:
|
|
3345
3395
|
* - `engine`: The minifier to use (`terser` or `swc`). Default: `terser`.
|
|
3346
3396
|
* Note: Inline event handlers (e.g., `onclick="…"`) always use Terser
|
|
3347
|
-
* regardless of engine setting, as
|
|
3397
|
+
* regardless of engine setting, as SWC doesn’t support bare return statements.
|
|
3348
3398
|
* - Engine-specific options (e.g., Terser options if `engine: 'terser'`,
|
|
3349
3399
|
* SWC options if `engine: 'swc'`).
|
|
3350
3400
|
* If a function is provided, it will be used to perform
|
|
@@ -4564,7 +4614,7 @@ function joinResultSegments(results, options, restoreCustom, restoreIgnore) {
|
|
|
4564
4614
|
* - Cache sizes are locked after first initialization—subsequent calls use the same caches
|
|
4565
4615
|
* even if different `cacheCSS`/`cacheJS`/`cacheSVG` options are provided
|
|
4566
4616
|
* - The first call’s options determine the cache sizes for subsequent calls
|
|
4567
|
-
* -
|
|
4617
|
+
* - Invalid values (NaN, Infinity) fall back to the default size (500); values below `1` are clamped to `1`
|
|
4568
4618
|
*/
|
|
4569
4619
|
function initCaches(options) {
|
|
4570
4620
|
// Only create caches once (on first call)—sizes are locked after this
|
|
@@ -4581,6 +4631,9 @@ function initCaches(options) {
|
|
|
4581
4631
|
return parsed;
|
|
4582
4632
|
};
|
|
4583
4633
|
|
|
4634
|
+
// Sanitize a cache size: Non-finite/NaN falls back to `defaultSize`; otherwise clamped to min 1 and floored
|
|
4635
|
+
const sanitizeSize = (size) => Number.isFinite(size) ? Math.max(1, Math.floor(size)) : defaultSize;
|
|
4636
|
+
|
|
4584
4637
|
// Get cache sizes with precedence: Options > env > default
|
|
4585
4638
|
const cssSize = options.cacheCSS !== undefined ? options.cacheCSS
|
|
4586
4639
|
: (parseEnvCacheSize(process.env.HMN_CACHE_CSS) ?? defaultSize);
|
|
@@ -4589,10 +4642,9 @@ function initCaches(options) {
|
|
|
4589
4642
|
const svgSize = options.cacheSVG !== undefined ? options.cacheSVG
|
|
4590
4643
|
: (parseEnvCacheSize(process.env.HMN_CACHE_SVG) ?? defaultSize);
|
|
4591
4644
|
|
|
4592
|
-
|
|
4593
|
-
const
|
|
4594
|
-
const
|
|
4595
|
-
const svgFinalSize = svgSize === 0 ? 1 : svgSize;
|
|
4645
|
+
const cssFinalSize = sanitizeSize(cssSize);
|
|
4646
|
+
const jsFinalSize = sanitizeSize(jsSize);
|
|
4647
|
+
const svgFinalSize = sanitizeSize(svgSize);
|
|
4596
4648
|
|
|
4597
4649
|
cssMinifyCache = new LRU(cssFinalSize);
|
|
4598
4650
|
jsMinifyCache = new LRU(jsFinalSize);
|
|
@@ -254,7 +254,7 @@ export type MinifierOptions = {
|
|
|
254
254
|
* event handler attributes. If an object is provided, it can include:
|
|
255
255
|
* - `engine`: The minifier to use (`terser` or `swc`). Default: `terser`.
|
|
256
256
|
* Note: Inline event handlers (e.g., `onclick="…"`) always use Terser
|
|
257
|
-
* regardless of engine setting, as
|
|
257
|
+
* regardless of engine setting, as SWC doesn’t support bare return statements.
|
|
258
258
|
* - Engine-specific options (e.g., Terser options if `engine: 'terser'`,
|
|
259
259
|
* SWC options if `engine: 'swc'`).
|
|
260
260
|
* If a function is provided, it will be used to perform
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"htmlminifier.d.ts","sourceRoot":"","sources":["../../src/htmlminifier.js"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"htmlminifier.d.ts","sourceRoot":"","sources":["../../src/htmlminifier.js"],"names":[],"mappings":"AA2uDO,8BAJI,MAAM,YACN,eAAe,GACb,OAAO,CAAC,MAAM,CAAC,CAwB3B;;;;;;;;;;;;UAh+CS,MAAM;;;;;;;;;;;;;;;;;;mCAaA,MAAM,SAAS,aAAa,EAAE,yBAAyB,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;+BAM3F,MAAM,GAAG,IAAI,SAAS,aAAa,EAAE,GAAG,SAAS,qBAAqB,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;qBA6JtG,OAAO,KAAK,IAAI;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;2HA2BiF,MAAM,SAAS,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM;;;;;;;;;;;;;;;;iBASxG,QAAQ,GAAG,KAAK;gBAAgC,MAAM,WAAW,OAAO,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM;;;;;;;;;;;eAa/H,MAAM;gBAAY,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM;;;;;;;;;;;;;;;;;mBAiBzE,MAAM,KAAK,MAAM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;kDA+DF,MAAM,OAAO,MAAM,KAAK,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;sCA2EpC,MAAM,SAAS,aAAa,EAAE,KAAK,IAAI;;;;;;;;;wCAQrC,MAAM,KAAK,MAAM;;;;;;;;;;;;;;;;;wBAjqBK,cAAc;0BAAd,cAAc;+BAAd,cAAc"}
|
package/package.json
CHANGED
package/src/htmlminifier.js
CHANGED
|
@@ -112,6 +112,8 @@ let svgMinifyCache = null;
|
|
|
112
112
|
|
|
113
113
|
// Pre-compiled patterns for script merging (avoid repeated allocation in hot path)
|
|
114
114
|
const RE_SCRIPT_ATTRS = /([^\s=]+)(?:=(?:"([^"]*)"|'([^']*)'|([^\s>]+)))?/g;
|
|
115
|
+
const RE_SCRIPT_OPEN = /<script(?=[\s>])/gi; // Finds tag start; use `findTagEnd()` for the actual closing `>`
|
|
116
|
+
const RE_SCRIPT_CLOSE = /<\/script\s*>/gi;
|
|
115
117
|
const SCRIPT_BOOL_ATTRS = new Set(['async', 'defer', 'nomodule']);
|
|
116
118
|
const DEFAULT_JS_TYPES = new Set(['', 'text/javascript', 'application/javascript']);
|
|
117
119
|
|
|
@@ -124,6 +126,28 @@ const RE_HTML_ENCODING = /^(text\/html|application\/xhtml\+xml)$/i;
|
|
|
124
126
|
|
|
125
127
|
// Script merging
|
|
126
128
|
|
|
129
|
+
/**
|
|
130
|
+
* Find the index of the `>` that closes an opening tag, correctly skipping
|
|
131
|
+
* over quoted attribute values (which may contain `>`).
|
|
132
|
+
* @param {string} html
|
|
133
|
+
* @param {number} pos - Start position (just after the tag name)
|
|
134
|
+
* @returns {number} Index of the closing `>`, or -1 if not found
|
|
135
|
+
*/
|
|
136
|
+
function findTagEnd(html, pos) {
|
|
137
|
+
let i = pos;
|
|
138
|
+
while (i < html.length) {
|
|
139
|
+
const ch = html[i];
|
|
140
|
+
if (ch === '>') return i;
|
|
141
|
+
if (ch === '"' || ch === "'") {
|
|
142
|
+
const q = ch;
|
|
143
|
+
i++;
|
|
144
|
+
while (i < html.length && html[i] !== q) i++;
|
|
145
|
+
}
|
|
146
|
+
i++;
|
|
147
|
+
}
|
|
148
|
+
return -1;
|
|
149
|
+
}
|
|
150
|
+
|
|
127
151
|
/**
|
|
128
152
|
* Merge consecutive inline script tags into one (`mergeConsecutiveScripts`).
|
|
129
153
|
* Only merges scripts that are compatible:
|
|
@@ -131,81 +155,104 @@ const RE_HTML_ENCODING = /^(text\/html|application\/xhtml\+xml)$/i;
|
|
|
131
155
|
* - Same `type` (or both default JavaScript)
|
|
132
156
|
* - No conflicting attributes (`async`, `defer`, `nomodule`, different `nonce`)
|
|
133
157
|
*
|
|
134
|
-
*
|
|
135
|
-
*
|
|
136
|
-
*
|
|
137
|
-
* HTML, such strings should be escaped as `<\/script>` or split like
|
|
138
|
-
* `'</scr' + 'ipt>'`, so this limitation rarely affects real-world code. The
|
|
139
|
-
* earlier `minifyJS` step (if enabled) typically handles this escaping already.
|
|
158
|
+
* Uses a scanner rather than a regex to locate script boundaries, so literal
|
|
159
|
+
* `</script>` strings inside script content are handled correctly per the HTML
|
|
160
|
+
* spec (raw text ends at the first `</script>`).
|
|
140
161
|
*
|
|
141
162
|
* @param {string} html - The HTML string to process
|
|
142
163
|
* @returns {string} HTML with consecutive scripts merged
|
|
143
164
|
*/
|
|
144
165
|
function mergeConsecutiveScripts(html) {
|
|
145
|
-
//
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
166
|
+
// Parse an attribute string into a name→value map
|
|
167
|
+
const parseAttrs = (attrStr) => {
|
|
168
|
+
const attrs = {};
|
|
169
|
+
RE_SCRIPT_ATTRS.lastIndex = 0;
|
|
170
|
+
let m;
|
|
171
|
+
while ((m = RE_SCRIPT_ATTRS.exec(attrStr)) !== null) {
|
|
172
|
+
const name = m[1].toLowerCase();
|
|
173
|
+
const value = m[2] ?? m[3] ?? m[4] ?? '';
|
|
174
|
+
attrs[name] = value;
|
|
175
|
+
}
|
|
176
|
+
return attrs;
|
|
177
|
+
};
|
|
178
|
+
|
|
156
179
|
let changed = true;
|
|
157
180
|
|
|
158
181
|
// Keep merging until no more changes (handles chains of 3+ scripts)
|
|
159
182
|
while (changed) {
|
|
160
183
|
changed = false;
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
184
|
+
RE_SCRIPT_OPEN.lastIndex = 0;
|
|
185
|
+
let m1;
|
|
186
|
+
|
|
187
|
+
while ((m1 = RE_SCRIPT_OPEN.exec(html)) !== null) {
|
|
188
|
+
// Use findTagEnd() to get the real closing '>', skipping quoted attribute values
|
|
189
|
+
const tagEnd1 = findTagEnd(html, m1.index + 7);
|
|
190
|
+
if (tagEnd1 === -1) break;
|
|
191
|
+
|
|
192
|
+
const attrs1Str = html.slice(m1.index + 7, tagEnd1);
|
|
193
|
+
const contentStart1 = tagEnd1 + 1;
|
|
194
|
+
|
|
195
|
+
// Find end of this script’s content (first `</script>`—per HTML spec, raw text ends here)
|
|
196
|
+
RE_SCRIPT_CLOSE.lastIndex = contentStart1;
|
|
197
|
+
const close1 = RE_SCRIPT_CLOSE.exec(html);
|
|
198
|
+
if (!close1) break;
|
|
199
|
+
|
|
200
|
+
const content1 = html.slice(contentStart1, close1.index);
|
|
201
|
+
const afterClose1 = close1.index + close1[0].length;
|
|
202
|
+
|
|
203
|
+
// Skip optional whitespace and check for a consecutive <script> tag
|
|
204
|
+
let i = afterClose1;
|
|
205
|
+
while (i < html.length && (html[i] === ' ' || html[i] === '\t' || html[i] === '\n' || html[i] === '\r' || html[i] === '\f')) i++;
|
|
206
|
+
if (html.slice(i, i + 7).toLowerCase() !== '<script' || (html[i + 7] !== '>' && !/\s/.test(html[i + 7]))) {
|
|
207
|
+
RE_SCRIPT_OPEN.lastIndex = afterClose1;
|
|
208
|
+
continue;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
const tagStart2 = i;
|
|
212
|
+
const tagEnd2 = findTagEnd(html, tagStart2 + 7);
|
|
213
|
+
if (tagEnd2 === -1) break;
|
|
174
214
|
|
|
175
|
-
const
|
|
176
|
-
const
|
|
215
|
+
const attrs2Str = html.slice(tagStart2 + 7, tagEnd2);
|
|
216
|
+
const contentStart2 = tagEnd2 + 1;
|
|
217
|
+
|
|
218
|
+
// Find end of second script’s content
|
|
219
|
+
RE_SCRIPT_CLOSE.lastIndex = contentStart2;
|
|
220
|
+
const close2 = RE_SCRIPT_CLOSE.exec(html);
|
|
221
|
+
if (!close2) break;
|
|
222
|
+
|
|
223
|
+
const content2 = html.slice(contentStart2, close2.index);
|
|
224
|
+
const afterClose2 = close2.index + close2[0].length;
|
|
225
|
+
|
|
226
|
+
const a1 = parseAttrs(attrs1Str);
|
|
227
|
+
const a2 = parseAttrs(attrs2Str);
|
|
177
228
|
|
|
178
229
|
// Check for `src`—cannot merge external scripts
|
|
179
230
|
if ('src' in a1 || 'src' in a2) {
|
|
180
|
-
|
|
231
|
+
RE_SCRIPT_OPEN.lastIndex = afterClose1;
|
|
232
|
+
continue;
|
|
181
233
|
}
|
|
182
234
|
|
|
183
235
|
// Check `type` compatibility (both must be default JS)
|
|
236
|
+
// Non-JS types (modules, JSON, etc.) must not be merged:
|
|
237
|
+
// Module scripts have per-script lexical scope, and non-JS content (e.g., JSON)
|
|
238
|
+
// is not concatenable; even identical non-JS types are incompatible
|
|
184
239
|
const type1 = (a1.type || '').toLowerCase();
|
|
185
240
|
const type2 = (a2.type || '').toLowerCase();
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
} else {
|
|
190
|
-
// Non-JS types (modules, JSON, etc.) must not be merged:
|
|
191
|
-
// Module scripts have per-script lexical scope, and non-JS content (e.g., JSON)
|
|
192
|
-
// is not concatenable. Even identical non-JS types are incompatible.
|
|
193
|
-
return match;
|
|
241
|
+
if (!DEFAULT_JS_TYPES.has(type1) || !DEFAULT_JS_TYPES.has(type2)) {
|
|
242
|
+
RE_SCRIPT_OPEN.lastIndex = afterClose1;
|
|
243
|
+
continue;
|
|
194
244
|
}
|
|
195
245
|
|
|
196
|
-
// Check for conflicting boolean attributes
|
|
246
|
+
// Check for conflicting boolean attributes
|
|
247
|
+
let boolConflict = false;
|
|
197
248
|
for (const attr of SCRIPT_BOOL_ATTRS) {
|
|
198
|
-
|
|
199
|
-
const has2 = attr in a2;
|
|
200
|
-
if (has1 !== has2) {
|
|
201
|
-
// One has it, one doesn't - incompatible
|
|
202
|
-
return match;
|
|
203
|
-
}
|
|
249
|
+
if ((attr in a1) !== (attr in a2)) { boolConflict = true; break; }
|
|
204
250
|
}
|
|
205
251
|
|
|
206
252
|
// Check `nonce`—must be same or both absent
|
|
207
|
-
if (a1.nonce !== a2.nonce) {
|
|
208
|
-
|
|
253
|
+
if (boolConflict || a1.nonce !== a2.nonce) {
|
|
254
|
+
RE_SCRIPT_OPEN.lastIndex = afterClose1;
|
|
255
|
+
continue;
|
|
209
256
|
}
|
|
210
257
|
|
|
211
258
|
// Scripts are compatible—merge them
|
|
@@ -226,11 +273,12 @@ function mergeConsecutiveScripts(html) {
|
|
|
226
273
|
}
|
|
227
274
|
|
|
228
275
|
// Use first script’s attributes (they should be compatible)
|
|
229
|
-
|
|
230
|
-
|
|
276
|
+
html = html.slice(0, m1.index) + `<script${attrs1Str}>${mergedContent}</script>` + html.slice(afterClose2);
|
|
277
|
+
break; // Restart scanning (outer while loop)
|
|
278
|
+
}
|
|
231
279
|
}
|
|
232
280
|
|
|
233
|
-
return
|
|
281
|
+
return html;
|
|
234
282
|
}
|
|
235
283
|
|
|
236
284
|
// Type definitions
|
|
@@ -456,7 +504,7 @@ function mergeConsecutiveScripts(html) {
|
|
|
456
504
|
* event handler attributes. If an object is provided, it can include:
|
|
457
505
|
* - `engine`: The minifier to use (`terser` or `swc`). Default: `terser`.
|
|
458
506
|
* Note: Inline event handlers (e.g., `onclick="…"`) always use Terser
|
|
459
|
-
* regardless of engine setting, as
|
|
507
|
+
* regardless of engine setting, as SWC doesn’t support bare return statements.
|
|
460
508
|
* - Engine-specific options (e.g., Terser options if `engine: 'terser'`,
|
|
461
509
|
* SWC options if `engine: 'swc'`).
|
|
462
510
|
* If a function is provided, it will be used to perform
|
|
@@ -1676,7 +1724,7 @@ function joinResultSegments(results, options, restoreCustom, restoreIgnore) {
|
|
|
1676
1724
|
* - Cache sizes are locked after first initialization—subsequent calls use the same caches
|
|
1677
1725
|
* even if different `cacheCSS`/`cacheJS`/`cacheSVG` options are provided
|
|
1678
1726
|
* - The first call’s options determine the cache sizes for subsequent calls
|
|
1679
|
-
* -
|
|
1727
|
+
* - Invalid values (NaN, Infinity) fall back to the default size (500); values below `1` are clamped to `1`
|
|
1680
1728
|
*/
|
|
1681
1729
|
function initCaches(options) {
|
|
1682
1730
|
// Only create caches once (on first call)—sizes are locked after this
|
|
@@ -1693,6 +1741,9 @@ function initCaches(options) {
|
|
|
1693
1741
|
return parsed;
|
|
1694
1742
|
};
|
|
1695
1743
|
|
|
1744
|
+
// Sanitize a cache size: Non-finite/NaN falls back to `defaultSize`; otherwise clamped to min 1 and floored
|
|
1745
|
+
const sanitizeSize = (size) => Number.isFinite(size) ? Math.max(1, Math.floor(size)) : defaultSize;
|
|
1746
|
+
|
|
1696
1747
|
// Get cache sizes with precedence: Options > env > default
|
|
1697
1748
|
const cssSize = options.cacheCSS !== undefined ? options.cacheCSS
|
|
1698
1749
|
: (parseEnvCacheSize(process.env.HMN_CACHE_CSS) ?? defaultSize);
|
|
@@ -1701,10 +1752,9 @@ function initCaches(options) {
|
|
|
1701
1752
|
const svgSize = options.cacheSVG !== undefined ? options.cacheSVG
|
|
1702
1753
|
: (parseEnvCacheSize(process.env.HMN_CACHE_SVG) ?? defaultSize);
|
|
1703
1754
|
|
|
1704
|
-
|
|
1705
|
-
const
|
|
1706
|
-
const
|
|
1707
|
-
const svgFinalSize = svgSize === 0 ? 1 : svgSize;
|
|
1755
|
+
const cssFinalSize = sanitizeSize(cssSize);
|
|
1756
|
+
const jsFinalSize = sanitizeSize(jsSize);
|
|
1757
|
+
const svgFinalSize = sanitizeSize(svgSize);
|
|
1708
1758
|
|
|
1709
1759
|
cssMinifyCache = new LRU(cssFinalSize);
|
|
1710
1760
|
jsMinifyCache = new LRU(jsFinalSize);
|
package/src/htmlparser.js
CHANGED
|
@@ -506,7 +506,7 @@ export class HTMLParser {
|
|
|
506
506
|
// Note: Unquoted attribute values are intentionally not handled here.
|
|
507
507
|
// Per HTML spec, unquoted values cannot contain spaces or special chars,
|
|
508
508
|
// making a 20 KB+ unquoted value practically impossible. If encountered,
|
|
509
|
-
// it
|
|
509
|
+
// it’s malformed HTML and using the truncated regex match is acceptable.
|
|
510
510
|
}
|
|
511
511
|
}
|
|
512
512
|
}
|
package/src/lib/attributes.js
CHANGED
|
@@ -580,7 +580,7 @@ function buildAttr(normalized, hasUnarySlash, options, isLast, uidAttr) {
|
|
|
580
580
|
attrValue = attrValue.replace(/'/g, ''');
|
|
581
581
|
}
|
|
582
582
|
} else {
|
|
583
|
-
// `preventAttributesEscaping` mode: Choose safe quotes but don
|
|
583
|
+
// `preventAttributesEscaping` mode: Choose safe quotes but don’t escape
|
|
584
584
|
// except when both quote types are present—then escape to prevent invalid HTML
|
|
585
585
|
const hasDoubleQuote = attrValue.indexOf('"') !== -1;
|
|
586
586
|
const hasSingleQuote = attrValue.indexOf("'") !== -1;
|