html-minifier-next 6.1.1 → 6.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli.js CHANGED
@@ -173,32 +173,48 @@ function readFile(file) {
173
173
  }
174
174
 
175
175
  /**
176
- * Load config from a file path, trying JSON, CJS, then ESM
176
+ * Load config from a file path—for unambiguous extensions (.json, .cjs, .mjs) only the
177
+ * matching format is attempted and its error shown on failure; for .js or unknown extensions
178
+ * all formats are tried and the most relevant error is reported
177
179
  * @param {string} configPath - Path to config file
178
180
  * @returns {Promise<object>} Loaded config object
179
181
  */
180
182
  async function loadConfigFromPath(configPath) {
181
- const data = readFile(configPath);
183
+ const abs = path.resolve(configPath);
184
+ const ext = path.extname(configPath).toLowerCase();
182
185
 
183
- // Try JSON first
184
- try {
185
- return JSON.parse(data);
186
- } catch (jsonErr) {
187
- const abs = path.resolve(configPath);
186
+ if (ext === '.json') {
187
+ try { return JSON.parse(readFile(abs).replace(/^\uFEFF/, '')); }
188
+ catch (err) { fatal(`Cannot parse config file as JSON: ${err.message}`); }
189
+ }
188
190
 
189
- // Try CJS require
191
+ if (ext === '.cjs') {
190
192
  try {
191
193
  const result = require(abs);
192
- // Handle ESM interop: If `require()` loads an ESM file, it may return `{__esModule: true, default: …}`
193
- return (result && result.__esModule && result.default) ? result.default : result;
194
- } catch (cjsErr) {
195
- // Try ESM import
196
- try {
197
- const mod = await import(pathToFileURL(abs).href);
198
- return mod.default || mod;
199
- } catch (esmErr) {
200
- fatal('Cannot read the specified config file.\nAs JSON: ' + jsonErr.message + '\nAs CJS: ' + cjsErr.message + '\nAs ESM: ' + esmErr.message);
201
- }
194
+ return (result && typeof result === 'object' && result.__esModule === true) ? result.default : result;
195
+ } catch (err) { fatal(`Cannot load config file: ${err.message}`); }
196
+ }
197
+
198
+ if (ext === '.mjs') {
199
+ try { const mod = await import(pathToFileURL(abs).href); return 'default' in mod ? mod.default : mod; }
200
+ catch (err) { fatal(`Cannot load config file: ${err.message}`); }
201
+ }
202
+
203
+ // For .js or extension-less files, try JSON first, then CJS, then ESM
204
+ let jsonErr;
205
+ try { return JSON.parse(readFile(abs).replace(/^\uFEFF/, '')); }
206
+ catch (err) { jsonErr = err; }
207
+
208
+ try {
209
+ const result = require(abs);
210
+ // Handle ESM interop: If `require()` loads an ESM file, it may return `{__esModule: true, default: …}`
211
+ return (result && typeof result === 'object' && result.__esModule === true) ? result.default : result;
212
+ } catch (cjsErr) {
213
+ try { const mod = await import(pathToFileURL(abs).href); return 'default' in mod ? mod.default : mod; }
214
+ catch (esmErr) {
215
+ fatal(ext === '.js'
216
+ ? `Cannot load config file: ${cjsErr.message}\nAs module: ${esmErr.message}`
217
+ : `Cannot read the specified config file.\nAs JSON: ${jsonErr.message}\nAs CJS: ${cjsErr.message}\nAs module: ${esmErr.message}`);
202
218
  }
203
219
  }
204
220
  }
@@ -607,7 +607,7 @@ class HTMLParser {
607
607
  // Note: Unquoted attribute values are intentionally not handled here.
608
608
  // Per HTML spec, unquoted values cannot contain spaces or special chars,
609
609
  // making a 20 KB+ unquoted value practically impossible. If encountered,
610
- // it's malformed HTML and using the truncated regex match is acceptable.
610
+ // its malformed HTML and using the truncated regex match is acceptable.
611
611
  }
612
612
  }
613
613
  }
@@ -2632,7 +2632,7 @@ function buildAttr(normalized, hasUnarySlash, options, isLast, uidAttr) {
2632
2632
  attrValue = attrValue.replace(/'/g, '&#39;');
2633
2633
  }
2634
2634
  } else {
2635
- // `preventAttributesEscaping` mode: Choose safe quotes but don't escape
2635
+ // `preventAttributesEscaping` mode: Choose safe quotes but dont escape
2636
2636
  // except when both quote types are present—then escape to prevent invalid HTML
2637
2637
  const hasDoubleQuote = attrValue.indexOf('"') !== -1;
2638
2638
  const hasSingleQuote = attrValue.indexOf("'") !== -1;
@@ -3002,6 +3002,8 @@ let svgMinifyCache = null;
3002
3002
 
3003
3003
  // Pre-compiled patterns for script merging (avoid repeated allocation in hot path)
3004
3004
  const RE_SCRIPT_ATTRS = /([^\s=]+)(?:=(?:"([^"]*)"|'([^']*)'|([^\s>]+)))?/g;
3005
+ const RE_SCRIPT_OPEN = /<script(?=[\s>])/gi; // Finds tag start; use `findTagEnd()` for the actual closing `>`
3006
+ const RE_SCRIPT_CLOSE = /<\/script\s*>/gi;
3005
3007
  const SCRIPT_BOOL_ATTRS = new Set(['async', 'defer', 'nomodule']);
3006
3008
  const DEFAULT_JS_TYPES = new Set(['', 'text/javascript', 'application/javascript']);
3007
3009
 
@@ -3014,6 +3016,28 @@ const RE_HTML_ENCODING = /^(text\/html|application\/xhtml\+xml)$/i;
3014
3016
 
3015
3017
  // Script merging
3016
3018
 
3019
+ /**
3020
+ * Find the index of the `>` that closes an opening tag, correctly skipping
3021
+ * over quoted attribute values (which may contain `>`).
3022
+ * @param {string} html
3023
+ * @param {number} pos - Start position (just after the tag name)
3024
+ * @returns {number} Index of the closing `>`, or -1 if not found
3025
+ */
3026
+ function findTagEnd(html, pos) {
3027
+ let i = pos;
3028
+ while (i < html.length) {
3029
+ const ch = html[i];
3030
+ if (ch === '>') return i;
3031
+ if (ch === '"' || ch === "'") {
3032
+ const q = ch;
3033
+ i++;
3034
+ while (i < html.length && html[i] !== q) i++;
3035
+ }
3036
+ i++;
3037
+ }
3038
+ return -1;
3039
+ }
3040
+
3017
3041
  /**
3018
3042
  * Merge consecutive inline script tags into one (`mergeConsecutiveScripts`).
3019
3043
  * Only merges scripts that are compatible:
@@ -3021,79 +3045,104 @@ const RE_HTML_ENCODING = /^(text\/html|application\/xhtml\+xml)$/i;
3021
3045
  * - Same `type` (or both default JavaScript)
3022
3046
  * - No conflicting attributes (`async`, `defer`, `nomodule`, different `nonce`)
3023
3047
  *
3024
- * Limitation: This function uses regex-based matching (`pattern` variable below),
3025
- * which can produce incorrect results if a script’s content contains a literal
3026
- * `</script>` string (e.g., `document.write('<script>…</script>')`). In valid
3027
- * HTML, such strings should be escaped as `<\/script>` or split like
3028
- * `'</scr' + 'ipt>'`, so this limitation rarely affects real-world code. The
3029
- * earlier `minifyJS` step (if enabled) typically handles this escaping already.
3048
+ * Uses a scanner rather than a regex to locate script boundaries, so literal
3049
+ * `</script>` strings inside script content are handled correctly per the HTML
3050
+ * spec (raw text ends at the first `</script>`).
3030
3051
  *
3031
3052
  * @param {string} html - The HTML string to process
3032
3053
  * @returns {string} HTML with consecutive scripts merged
3033
3054
  */
3034
3055
  function mergeConsecutiveScripts(html) {
3035
- // `pattern`: Regex to match consecutive `</script>` followed by `<script…>`.
3036
- // See function JSDoc above for known limitations with literal `</script>` in content.
3037
- // Captures:
3038
- // 1. first script attrs
3039
- // 2. first script content
3040
- // 3. whitespace between
3041
- // 4. second script attrs
3042
- // 5. second script content
3043
- const pattern = /<script([^>]*)>([\s\S]*?)<\/script>([\s]*)<script([^>]*)>([\s\S]*?)<\/script>/gi;
3044
-
3045
- let result = html;
3056
+ // Parse an attribute string into a name→value map
3057
+ const parseAttrs = (attrStr) => {
3058
+ const attrs = {};
3059
+ RE_SCRIPT_ATTRS.lastIndex = 0;
3060
+ let m;
3061
+ while ((m = RE_SCRIPT_ATTRS.exec(attrStr)) !== null) {
3062
+ const name = m[1].toLowerCase();
3063
+ const value = m[2] ?? m[3] ?? m[4] ?? '';
3064
+ attrs[name] = value;
3065
+ }
3066
+ return attrs;
3067
+ };
3068
+
3046
3069
  let changed = true;
3047
3070
 
3048
3071
  // Keep merging until no more changes (handles chains of 3+ scripts)
3049
3072
  while (changed) {
3050
3073
  changed = false;
3051
- result = result.replace(pattern, (match, attrs1, content1, whitespace, attrs2, content2) => {
3052
- // Parse attributes from both script tags (uses pre-compiled RE_SCRIPT_ATTRS)
3053
- const parseAttrs = (attrStr) => {
3054
- const attrs = {};
3055
- RE_SCRIPT_ATTRS.lastIndex = 0; // Reset for reuse
3056
- let m;
3057
- while ((m = RE_SCRIPT_ATTRS.exec(attrStr)) !== null) {
3058
- const name = m[1].toLowerCase();
3059
- const value = m[2] ?? m[3] ?? m[4] ?? '';
3060
- attrs[name] = value;
3061
- }
3062
- return attrs;
3063
- };
3074
+ RE_SCRIPT_OPEN.lastIndex = 0;
3075
+ let m1;
3076
+
3077
+ while ((m1 = RE_SCRIPT_OPEN.exec(html)) !== null) {
3078
+ // Use findTagEnd() to get the real closing '>', skipping quoted attribute values
3079
+ const tagEnd1 = findTagEnd(html, m1.index + 7);
3080
+ if (tagEnd1 === -1) break;
3081
+
3082
+ const attrs1Str = html.slice(m1.index + 7, tagEnd1);
3083
+ const contentStart1 = tagEnd1 + 1;
3084
+
3085
+ // Find end of this script’s content (first `</script>`—per HTML spec, raw text ends here)
3086
+ RE_SCRIPT_CLOSE.lastIndex = contentStart1;
3087
+ const close1 = RE_SCRIPT_CLOSE.exec(html);
3088
+ if (!close1) break;
3089
+
3090
+ const content1 = html.slice(contentStart1, close1.index);
3091
+ const afterClose1 = close1.index + close1[0].length;
3092
+
3093
+ // Skip optional whitespace and check for a consecutive <script> tag
3094
+ let i = afterClose1;
3095
+ while (i < html.length && (html[i] === ' ' || html[i] === '\t' || html[i] === '\n' || html[i] === '\r' || html[i] === '\f')) i++;
3096
+ if (html.slice(i, i + 7).toLowerCase() !== '<script' || (html[i + 7] !== '>' && !/\s/.test(html[i + 7]))) {
3097
+ RE_SCRIPT_OPEN.lastIndex = afterClose1;
3098
+ continue;
3099
+ }
3100
+
3101
+ const tagStart2 = i;
3102
+ const tagEnd2 = findTagEnd(html, tagStart2 + 7);
3103
+ if (tagEnd2 === -1) break;
3104
+
3105
+ const attrs2Str = html.slice(tagStart2 + 7, tagEnd2);
3106
+ const contentStart2 = tagEnd2 + 1;
3064
3107
 
3065
- const a1 = parseAttrs(attrs1);
3066
- const a2 = parseAttrs(attrs2);
3108
+ // Find end of second script’s content
3109
+ RE_SCRIPT_CLOSE.lastIndex = contentStart2;
3110
+ const close2 = RE_SCRIPT_CLOSE.exec(html);
3111
+ if (!close2) break;
3112
+
3113
+ const content2 = html.slice(contentStart2, close2.index);
3114
+ const afterClose2 = close2.index + close2[0].length;
3115
+
3116
+ const a1 = parseAttrs(attrs1Str);
3117
+ const a2 = parseAttrs(attrs2Str);
3067
3118
 
3068
3119
  // Check for `src`—cannot merge external scripts
3069
3120
  if ('src' in a1 || 'src' in a2) {
3070
- return match;
3121
+ RE_SCRIPT_OPEN.lastIndex = afterClose1;
3122
+ continue;
3071
3123
  }
3072
3124
 
3073
3125
  // Check `type` compatibility (both must be default JS)
3126
+ // Non-JS types (modules, JSON, etc.) must not be merged:
3127
+ // Module scripts have per-script lexical scope, and non-JS content (e.g., JSON)
3128
+ // is not concatenable; even identical non-JS types are incompatible
3074
3129
  const type1 = (a1.type || '').toLowerCase();
3075
3130
  const type2 = (a2.type || '').toLowerCase();
3076
-
3077
- if (DEFAULT_JS_TYPES.has(type1) && DEFAULT_JS_TYPES.has(type2)) ; else {
3078
- // Non-JS types (modules, JSON, etc.) must not be merged:
3079
- // Module scripts have per-script lexical scope, and non-JS content (e.g., JSON)
3080
- // is not concatenable. Even identical non-JS types are incompatible.
3081
- return match;
3131
+ if (!DEFAULT_JS_TYPES.has(type1) || !DEFAULT_JS_TYPES.has(type2)) {
3132
+ RE_SCRIPT_OPEN.lastIndex = afterClose1;
3133
+ continue;
3082
3134
  }
3083
3135
 
3084
- // Check for conflicting boolean attributes (uses pre-compiled SCRIPT_BOOL_ATTRS)
3136
+ // Check for conflicting boolean attributes
3137
+ let boolConflict = false;
3085
3138
  for (const attr of SCRIPT_BOOL_ATTRS) {
3086
- const has1 = attr in a1;
3087
- const has2 = attr in a2;
3088
- if (has1 !== has2) {
3089
- // One has it, one doesn't - incompatible
3090
- return match;
3091
- }
3139
+ if ((attr in a1) !== (attr in a2)) { boolConflict = true; break; }
3092
3140
  }
3093
3141
 
3094
3142
  // Check `nonce`—must be same or both absent
3095
- if (a1.nonce !== a2.nonce) {
3096
- return match;
3143
+ if (boolConflict || a1.nonce !== a2.nonce) {
3144
+ RE_SCRIPT_OPEN.lastIndex = afterClose1;
3145
+ continue;
3097
3146
  }
3098
3147
 
3099
3148
  // Scripts are compatible—merge them
@@ -3114,11 +3163,12 @@ function mergeConsecutiveScripts(html) {
3114
3163
  }
3115
3164
 
3116
3165
  // Use first script’s attributes (they should be compatible)
3117
- return `<script${attrs1}>${mergedContent}</script>`;
3118
- });
3166
+ html = html.slice(0, m1.index) + `<script${attrs1Str}>${mergedContent}</script>` + html.slice(afterClose2);
3167
+ break; // Restart scanning (outer while loop)
3168
+ }
3119
3169
  }
3120
3170
 
3121
- return result;
3171
+ return html;
3122
3172
  }
3123
3173
 
3124
3174
  // Type definitions
@@ -3344,7 +3394,7 @@ function mergeConsecutiveScripts(html) {
3344
3394
  * event handler attributes. If an object is provided, it can include:
3345
3395
  * - `engine`: The minifier to use (`terser` or `swc`). Default: `terser`.
3346
3396
  * Note: Inline event handlers (e.g., `onclick="…"`) always use Terser
3347
- * regardless of engine setting, as swc doesn’t support bare return statements.
3397
+ * regardless of engine setting, as SWC doesn’t support bare return statements.
3348
3398
  * - Engine-specific options (e.g., Terser options if `engine: 'terser'`,
3349
3399
  * SWC options if `engine: 'swc'`).
3350
3400
  * If a function is provided, it will be used to perform
@@ -4564,7 +4614,7 @@ function joinResultSegments(results, options, restoreCustom, restoreIgnore) {
4564
4614
  * - Cache sizes are locked after first initialization—subsequent calls use the same caches
4565
4615
  * even if different `cacheCSS`/`cacheJS`/`cacheSVG` options are provided
4566
4616
  * - The first call’s options determine the cache sizes for subsequent calls
4567
- * - Explicit `0` values are coerced to `1` (minimum functional cache size)
4617
+ * - Invalid values (NaN, Infinity) fall back to the default size (500); values below `1` are clamped to `1`
4568
4618
  */
4569
4619
  function initCaches(options) {
4570
4620
  // Only create caches once (on first call)—sizes are locked after this
@@ -4581,6 +4631,9 @@ function initCaches(options) {
4581
4631
  return parsed;
4582
4632
  };
4583
4633
 
4634
+ // Sanitize a cache size: Non-finite/NaN falls back to `defaultSize`; otherwise clamped to min 1 and floored
4635
+ const sanitizeSize = (size) => Number.isFinite(size) ? Math.max(1, Math.floor(size)) : defaultSize;
4636
+
4584
4637
  // Get cache sizes with precedence: Options > env > default
4585
4638
  const cssSize = options.cacheCSS !== undefined ? options.cacheCSS
4586
4639
  : (parseEnvCacheSize(process.env.HMN_CACHE_CSS) ?? defaultSize);
@@ -4589,10 +4642,9 @@ function initCaches(options) {
4589
4642
  const svgSize = options.cacheSVG !== undefined ? options.cacheSVG
4590
4643
  : (parseEnvCacheSize(process.env.HMN_CACHE_SVG) ?? defaultSize);
4591
4644
 
4592
- // Coerce `0` to `1` (minimum functional cache size) to avoid immediate eviction
4593
- const cssFinalSize = cssSize === 0 ? 1 : cssSize;
4594
- const jsFinalSize = jsSize === 0 ? 1 : jsSize;
4595
- const svgFinalSize = svgSize === 0 ? 1 : svgSize;
4645
+ const cssFinalSize = sanitizeSize(cssSize);
4646
+ const jsFinalSize = sanitizeSize(jsSize);
4647
+ const svgFinalSize = sanitizeSize(svgSize);
4596
4648
 
4597
4649
  cssMinifyCache = new LRU(cssFinalSize);
4598
4650
  jsMinifyCache = new LRU(jsFinalSize);
@@ -254,7 +254,7 @@ export type MinifierOptions = {
254
254
  * event handler attributes. If an object is provided, it can include:
255
255
  * - `engine`: The minifier to use (`terser` or `swc`). Default: `terser`.
256
256
  * Note: Inline event handlers (e.g., `onclick="…"`) always use Terser
257
- * regardless of engine setting, as swc doesn’t support bare return statements.
257
+ * regardless of engine setting, as SWC doesn’t support bare return statements.
258
258
  * - Engine-specific options (e.g., Terser options if `engine: 'terser'`,
259
259
  * SWC options if `engine: 'swc'`).
260
260
  * If a function is provided, it will be used to perform
@@ -1 +1 @@
1
- {"version":3,"file":"htmlminifier.d.ts","sourceRoot":"","sources":["../../src/htmlminifier.js"],"names":[],"mappings":"AAyrDO,8BAJI,MAAM,YACN,eAAe,GACb,OAAO,CAAC,MAAM,CAAC,CAwB3B;;;;;;;;;;;;UA99CS,MAAM;;;;;;;;;;;;;;;;;;mCAaA,MAAM,SAAS,aAAa,EAAE,yBAAyB,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;+BAM3F,MAAM,GAAG,IAAI,SAAS,aAAa,EAAE,GAAG,SAAS,qBAAqB,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;qBA6JtG,OAAO,KAAK,IAAI;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;2HA2BiF,MAAM,SAAS,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM;;;;;;;;;;;;;;;;iBASxG,QAAQ,GAAG,KAAK;gBAAgC,MAAM,WAAW,OAAO,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM;;;;;;;;;;;eAa/H,MAAM;gBAAY,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM;;;;;;;;;;;;;;;;;mBAiBzE,MAAM,KAAK,MAAM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;kDA+DF,MAAM,OAAO,MAAM,KAAK,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;sCA2EpC,MAAM,SAAS,aAAa,EAAE,KAAK,IAAI;;;;;;;;;wCAQrC,MAAM,KAAK,MAAM;;;;;;;;;;;;;;;;;wBAjnBK,cAAc;0BAAd,cAAc;+BAAd,cAAc"}
1
+ {"version":3,"file":"htmlminifier.d.ts","sourceRoot":"","sources":["../../src/htmlminifier.js"],"names":[],"mappings":"AA2uDO,8BAJI,MAAM,YACN,eAAe,GACb,OAAO,CAAC,MAAM,CAAC,CAwB3B;;;;;;;;;;;;UAh+CS,MAAM;;;;;;;;;;;;;;;;;;mCAaA,MAAM,SAAS,aAAa,EAAE,yBAAyB,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;+BAM3F,MAAM,GAAG,IAAI,SAAS,aAAa,EAAE,GAAG,SAAS,qBAAqB,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;qBA6JtG,OAAO,KAAK,IAAI;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;2HA2BiF,MAAM,SAAS,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM;;;;;;;;;;;;;;;;iBASxG,QAAQ,GAAG,KAAK;gBAAgC,MAAM,WAAW,OAAO,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM;;;;;;;;;;;eAa/H,MAAM;gBAAY,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM;;;;;;;;;;;;;;;;;mBAiBzE,MAAM,KAAK,MAAM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;kDA+DF,MAAM,OAAO,MAAM,KAAK,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;sCA2EpC,MAAM,SAAS,aAAa,EAAE,KAAK,IAAI;;;;;;;;;wCAQrC,MAAM,KAAK,MAAM;;;;;;;;;;;;;;;;;wBAjqBK,cAAc;0BAAd,cAAc;+BAAd,cAAc"}
package/package.json CHANGED
@@ -96,5 +96,5 @@
96
96
  },
97
97
  "type": "module",
98
98
  "types": "./dist/types/htmlminifier.d.ts",
99
- "version": "6.1.1"
99
+ "version": "6.1.2"
100
100
  }
@@ -112,6 +112,8 @@ let svgMinifyCache = null;
112
112
 
113
113
  // Pre-compiled patterns for script merging (avoid repeated allocation in hot path)
114
114
  const RE_SCRIPT_ATTRS = /([^\s=]+)(?:=(?:"([^"]*)"|'([^']*)'|([^\s>]+)))?/g;
115
+ const RE_SCRIPT_OPEN = /<script(?=[\s>])/gi; // Finds tag start; use `findTagEnd()` for the actual closing `>`
116
+ const RE_SCRIPT_CLOSE = /<\/script\s*>/gi;
115
117
  const SCRIPT_BOOL_ATTRS = new Set(['async', 'defer', 'nomodule']);
116
118
  const DEFAULT_JS_TYPES = new Set(['', 'text/javascript', 'application/javascript']);
117
119
 
@@ -124,6 +126,28 @@ const RE_HTML_ENCODING = /^(text\/html|application\/xhtml\+xml)$/i;
124
126
 
125
127
  // Script merging
126
128
 
129
+ /**
130
+ * Find the index of the `>` that closes an opening tag, correctly skipping
131
+ * over quoted attribute values (which may contain `>`).
132
+ * @param {string} html
133
+ * @param {number} pos - Start position (just after the tag name)
134
+ * @returns {number} Index of the closing `>`, or -1 if not found
135
+ */
136
+ function findTagEnd(html, pos) {
137
+ let i = pos;
138
+ while (i < html.length) {
139
+ const ch = html[i];
140
+ if (ch === '>') return i;
141
+ if (ch === '"' || ch === "'") {
142
+ const q = ch;
143
+ i++;
144
+ while (i < html.length && html[i] !== q) i++;
145
+ }
146
+ i++;
147
+ }
148
+ return -1;
149
+ }
150
+
127
151
  /**
128
152
  * Merge consecutive inline script tags into one (`mergeConsecutiveScripts`).
129
153
  * Only merges scripts that are compatible:
@@ -131,81 +155,104 @@ const RE_HTML_ENCODING = /^(text\/html|application\/xhtml\+xml)$/i;
131
155
  * - Same `type` (or both default JavaScript)
132
156
  * - No conflicting attributes (`async`, `defer`, `nomodule`, different `nonce`)
133
157
  *
134
- * Limitation: This function uses regex-based matching (`pattern` variable below),
135
- * which can produce incorrect results if a script’s content contains a literal
136
- * `</script>` string (e.g., `document.write('<script>…</script>')`). In valid
137
- * HTML, such strings should be escaped as `<\/script>` or split like
138
- * `'</scr' + 'ipt>'`, so this limitation rarely affects real-world code. The
139
- * earlier `minifyJS` step (if enabled) typically handles this escaping already.
158
+ * Uses a scanner rather than a regex to locate script boundaries, so literal
159
+ * `</script>` strings inside script content are handled correctly per the HTML
160
+ * spec (raw text ends at the first `</script>`).
140
161
  *
141
162
  * @param {string} html - The HTML string to process
142
163
  * @returns {string} HTML with consecutive scripts merged
143
164
  */
144
165
  function mergeConsecutiveScripts(html) {
145
- // `pattern`: Regex to match consecutive `</script>` followed by `<script…>`.
146
- // See function JSDoc above for known limitations with literal `</script>` in content.
147
- // Captures:
148
- // 1. first script attrs
149
- // 2. first script content
150
- // 3. whitespace between
151
- // 4. second script attrs
152
- // 5. second script content
153
- const pattern = /<script([^>]*)>([\s\S]*?)<\/script>([\s]*)<script([^>]*)>([\s\S]*?)<\/script>/gi;
154
-
155
- let result = html;
166
+ // Parse an attribute string into a name→value map
167
+ const parseAttrs = (attrStr) => {
168
+ const attrs = {};
169
+ RE_SCRIPT_ATTRS.lastIndex = 0;
170
+ let m;
171
+ while ((m = RE_SCRIPT_ATTRS.exec(attrStr)) !== null) {
172
+ const name = m[1].toLowerCase();
173
+ const value = m[2] ?? m[3] ?? m[4] ?? '';
174
+ attrs[name] = value;
175
+ }
176
+ return attrs;
177
+ };
178
+
156
179
  let changed = true;
157
180
 
158
181
  // Keep merging until no more changes (handles chains of 3+ scripts)
159
182
  while (changed) {
160
183
  changed = false;
161
- result = result.replace(pattern, (match, attrs1, content1, whitespace, attrs2, content2) => {
162
- // Parse attributes from both script tags (uses pre-compiled RE_SCRIPT_ATTRS)
163
- const parseAttrs = (attrStr) => {
164
- const attrs = {};
165
- RE_SCRIPT_ATTRS.lastIndex = 0; // Reset for reuse
166
- let m;
167
- while ((m = RE_SCRIPT_ATTRS.exec(attrStr)) !== null) {
168
- const name = m[1].toLowerCase();
169
- const value = m[2] ?? m[3] ?? m[4] ?? '';
170
- attrs[name] = value;
171
- }
172
- return attrs;
173
- };
184
+ RE_SCRIPT_OPEN.lastIndex = 0;
185
+ let m1;
186
+
187
+ while ((m1 = RE_SCRIPT_OPEN.exec(html)) !== null) {
188
+ // Use findTagEnd() to get the real closing '>', skipping quoted attribute values
189
+ const tagEnd1 = findTagEnd(html, m1.index + 7);
190
+ if (tagEnd1 === -1) break;
191
+
192
+ const attrs1Str = html.slice(m1.index + 7, tagEnd1);
193
+ const contentStart1 = tagEnd1 + 1;
194
+
195
+ // Find end of this script’s content (first `</script>`—per HTML spec, raw text ends here)
196
+ RE_SCRIPT_CLOSE.lastIndex = contentStart1;
197
+ const close1 = RE_SCRIPT_CLOSE.exec(html);
198
+ if (!close1) break;
199
+
200
+ const content1 = html.slice(contentStart1, close1.index);
201
+ const afterClose1 = close1.index + close1[0].length;
202
+
203
+ // Skip optional whitespace and check for a consecutive <script> tag
204
+ let i = afterClose1;
205
+ while (i < html.length && (html[i] === ' ' || html[i] === '\t' || html[i] === '\n' || html[i] === '\r' || html[i] === '\f')) i++;
206
+ if (html.slice(i, i + 7).toLowerCase() !== '<script' || (html[i + 7] !== '>' && !/\s/.test(html[i + 7]))) {
207
+ RE_SCRIPT_OPEN.lastIndex = afterClose1;
208
+ continue;
209
+ }
210
+
211
+ const tagStart2 = i;
212
+ const tagEnd2 = findTagEnd(html, tagStart2 + 7);
213
+ if (tagEnd2 === -1) break;
174
214
 
175
- const a1 = parseAttrs(attrs1);
176
- const a2 = parseAttrs(attrs2);
215
+ const attrs2Str = html.slice(tagStart2 + 7, tagEnd2);
216
+ const contentStart2 = tagEnd2 + 1;
217
+
218
+ // Find end of second script’s content
219
+ RE_SCRIPT_CLOSE.lastIndex = contentStart2;
220
+ const close2 = RE_SCRIPT_CLOSE.exec(html);
221
+ if (!close2) break;
222
+
223
+ const content2 = html.slice(contentStart2, close2.index);
224
+ const afterClose2 = close2.index + close2[0].length;
225
+
226
+ const a1 = parseAttrs(attrs1Str);
227
+ const a2 = parseAttrs(attrs2Str);
177
228
 
178
229
  // Check for `src`—cannot merge external scripts
179
230
  if ('src' in a1 || 'src' in a2) {
180
- return match;
231
+ RE_SCRIPT_OPEN.lastIndex = afterClose1;
232
+ continue;
181
233
  }
182
234
 
183
235
  // Check `type` compatibility (both must be default JS)
236
+ // Non-JS types (modules, JSON, etc.) must not be merged:
237
+ // Module scripts have per-script lexical scope, and non-JS content (e.g., JSON)
238
+ // is not concatenable; even identical non-JS types are incompatible
184
239
  const type1 = (a1.type || '').toLowerCase();
185
240
  const type2 = (a2.type || '').toLowerCase();
186
-
187
- if (DEFAULT_JS_TYPES.has(type1) && DEFAULT_JS_TYPES.has(type2)) {
188
- // Both are default JavaScript—compatible
189
- } else {
190
- // Non-JS types (modules, JSON, etc.) must not be merged:
191
- // Module scripts have per-script lexical scope, and non-JS content (e.g., JSON)
192
- // is not concatenable. Even identical non-JS types are incompatible.
193
- return match;
241
+ if (!DEFAULT_JS_TYPES.has(type1) || !DEFAULT_JS_TYPES.has(type2)) {
242
+ RE_SCRIPT_OPEN.lastIndex = afterClose1;
243
+ continue;
194
244
  }
195
245
 
196
- // Check for conflicting boolean attributes (uses pre-compiled SCRIPT_BOOL_ATTRS)
246
+ // Check for conflicting boolean attributes
247
+ let boolConflict = false;
197
248
  for (const attr of SCRIPT_BOOL_ATTRS) {
198
- const has1 = attr in a1;
199
- const has2 = attr in a2;
200
- if (has1 !== has2) {
201
- // One has it, one doesn't - incompatible
202
- return match;
203
- }
249
+ if ((attr in a1) !== (attr in a2)) { boolConflict = true; break; }
204
250
  }
205
251
 
206
252
  // Check `nonce`—must be same or both absent
207
- if (a1.nonce !== a2.nonce) {
208
- return match;
253
+ if (boolConflict || a1.nonce !== a2.nonce) {
254
+ RE_SCRIPT_OPEN.lastIndex = afterClose1;
255
+ continue;
209
256
  }
210
257
 
211
258
  // Scripts are compatible—merge them
@@ -226,11 +273,12 @@ function mergeConsecutiveScripts(html) {
226
273
  }
227
274
 
228
275
  // Use first script’s attributes (they should be compatible)
229
- return `<script${attrs1}>${mergedContent}</script>`;
230
- });
276
+ html = html.slice(0, m1.index) + `<script${attrs1Str}>${mergedContent}</script>` + html.slice(afterClose2);
277
+ break; // Restart scanning (outer while loop)
278
+ }
231
279
  }
232
280
 
233
- return result;
281
+ return html;
234
282
  }
235
283
 
236
284
  // Type definitions
@@ -456,7 +504,7 @@ function mergeConsecutiveScripts(html) {
456
504
  * event handler attributes. If an object is provided, it can include:
457
505
  * - `engine`: The minifier to use (`terser` or `swc`). Default: `terser`.
458
506
  * Note: Inline event handlers (e.g., `onclick="…"`) always use Terser
459
- * regardless of engine setting, as swc doesn’t support bare return statements.
507
+ * regardless of engine setting, as SWC doesn’t support bare return statements.
460
508
  * - Engine-specific options (e.g., Terser options if `engine: 'terser'`,
461
509
  * SWC options if `engine: 'swc'`).
462
510
  * If a function is provided, it will be used to perform
@@ -1676,7 +1724,7 @@ function joinResultSegments(results, options, restoreCustom, restoreIgnore) {
1676
1724
  * - Cache sizes are locked after first initialization—subsequent calls use the same caches
1677
1725
  * even if different `cacheCSS`/`cacheJS`/`cacheSVG` options are provided
1678
1726
  * - The first call’s options determine the cache sizes for subsequent calls
1679
- * - Explicit `0` values are coerced to `1` (minimum functional cache size)
1727
+ * - Invalid values (NaN, Infinity) fall back to the default size (500); values below `1` are clamped to `1`
1680
1728
  */
1681
1729
  function initCaches(options) {
1682
1730
  // Only create caches once (on first call)—sizes are locked after this
@@ -1693,6 +1741,9 @@ function initCaches(options) {
1693
1741
  return parsed;
1694
1742
  };
1695
1743
 
1744
+ // Sanitize a cache size: Non-finite/NaN falls back to `defaultSize`; otherwise clamped to min 1 and floored
1745
+ const sanitizeSize = (size) => Number.isFinite(size) ? Math.max(1, Math.floor(size)) : defaultSize;
1746
+
1696
1747
  // Get cache sizes with precedence: Options > env > default
1697
1748
  const cssSize = options.cacheCSS !== undefined ? options.cacheCSS
1698
1749
  : (parseEnvCacheSize(process.env.HMN_CACHE_CSS) ?? defaultSize);
@@ -1701,10 +1752,9 @@ function initCaches(options) {
1701
1752
  const svgSize = options.cacheSVG !== undefined ? options.cacheSVG
1702
1753
  : (parseEnvCacheSize(process.env.HMN_CACHE_SVG) ?? defaultSize);
1703
1754
 
1704
- // Coerce `0` to `1` (minimum functional cache size) to avoid immediate eviction
1705
- const cssFinalSize = cssSize === 0 ? 1 : cssSize;
1706
- const jsFinalSize = jsSize === 0 ? 1 : jsSize;
1707
- const svgFinalSize = svgSize === 0 ? 1 : svgSize;
1755
+ const cssFinalSize = sanitizeSize(cssSize);
1756
+ const jsFinalSize = sanitizeSize(jsSize);
1757
+ const svgFinalSize = sanitizeSize(svgSize);
1708
1758
 
1709
1759
  cssMinifyCache = new LRU(cssFinalSize);
1710
1760
  jsMinifyCache = new LRU(jsFinalSize);
package/src/htmlparser.js CHANGED
@@ -506,7 +506,7 @@ export class HTMLParser {
506
506
  // Note: Unquoted attribute values are intentionally not handled here.
507
507
  // Per HTML spec, unquoted values cannot contain spaces or special chars,
508
508
  // making a 20 KB+ unquoted value practically impossible. If encountered,
509
- // it's malformed HTML and using the truncated regex match is acceptable.
509
+ // its malformed HTML and using the truncated regex match is acceptable.
510
510
  }
511
511
  }
512
512
  }
@@ -580,7 +580,7 @@ function buildAttr(normalized, hasUnarySlash, options, isLast, uidAttr) {
580
580
  attrValue = attrValue.replace(/'/g, '&#39;');
581
581
  }
582
582
  } else {
583
- // `preventAttributesEscaping` mode: Choose safe quotes but don't escape
583
+ // `preventAttributesEscaping` mode: Choose safe quotes but dont escape
584
584
  // except when both quote types are present—then escape to prevent invalid HTML
585
585
  const hasDoubleQuote = attrValue.indexOf('"') !== -1;
586
586
  const hasSingleQuote = attrValue.indexOf("'") !== -1;