html-minifier-next 6.1.1 → 6.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # HTML Minifier Next
2
2
 
3
- [![npm version](https://img.shields.io/npm/v/html-minifier-next.svg)](https://www.npmjs.com/package/html-minifier-next) [![Build status](https://github.com/j9t/html-minifier-next/workflows/Tests/badge.svg)](https://github.com/j9t/html-minifier-next/actions) [![Socket](https://badge.socket.dev/npm/package/html-minifier-next)](https://socket.dev/npm/package/html-minifier-next)
3
+ [![npm version](https://img.shields.io/npm/v/html-minifier-next.svg)](https://www.npmjs.com/package/html-minifier-next) [![Build status](https://github.com/j9t/html-minifier-next/workflows/Tests/badge.svg)](https://github.com/j9t/html-minifier-next/actions) [![Socket](https://badge.socket.dev/npm/package/html-minifier-next)](https://socket.dev/npm/package/html-minifier-next) [![GitHub Sponsors](https://badgen.net/static/Support/Open%20Source/cyan)](https://github.com/j9t/html-minifier-next?sponsor=1)
4
4
 
5
5
  Your web page optimization precision tool: HTML Minifier Next (HMN) is a **super-configurable, well-tested, JavaScript-based HTML minifier** able to also handle in-document CSS, JavaScript, and SVG minification.
6
6
 
@@ -603,4 +603,12 @@ Parameters:
603
603
 
604
604
  ## Acknowledgements
605
605
 
606
- With many thanks to all the previous authors of HTML Minifier, especially [Juriy “kangax” Zaytsev](https://github.com/kangax), and to everyone who helped make this new edition better, particularly [Daniel Ruf](https://github.com/DanielRuf) and [Jonas Geiler](https://github.com/jonasgeiler).
606
+ With many thanks to all the previous authors of HTML Minifier, especially [Juriy “kangax” Zaytsev](https://github.com/kangax), and to everyone who helped make this new edition better, particularly [Daniel Ruf](https://github.com/DanielRuf) and [Jonas Geiler](https://github.com/jonasgeiler).
607
+
608
+ ***
609
+
610
+ You might like some of my other work:
611
+
612
+ * Optimization tools: HTML Minifier Next · [ObsoHTML](https://github.com/j9t/obsohtml) · [Image Guard](https://github.com/j9t/image-guard) · [Compressor.js Next](https://github.com/j9t/compressorjs-next) · [.htaccess Punk](https://github.com/j9t/htaccess-punk)
613
+ * Defense tools: [IA Defensa](https://iadefensa.com/solutions/)
614
+ * Resources for quality web development: [Articles](https://meiert.com/topics/development/) · [Books](https://meiert.com/topics/books/) (including [_On Web Development_](https://meiert.com/blog/on-web-development-2/)) · [News](https://frontenddogma.com/) · [Terminology](https://webglossary.info/)
package/cli.js CHANGED
@@ -173,32 +173,48 @@ function readFile(file) {
173
173
  }
174
174
 
175
175
  /**
176
- * Load config from a file path, trying JSON, CJS, then ESM
176
+ * Load config from a file path—for unambiguous extensions (.json, .cjs, .mjs) only the
177
+ * matching format is attempted and its error shown on failure; for .js or unknown extensions
178
+ * all formats are tried and the most relevant error is reported
177
179
  * @param {string} configPath - Path to config file
178
180
  * @returns {Promise<object>} Loaded config object
179
181
  */
180
182
  async function loadConfigFromPath(configPath) {
181
- const data = readFile(configPath);
183
+ const abs = path.resolve(configPath);
184
+ const ext = path.extname(configPath).toLowerCase();
182
185
 
183
- // Try JSON first
184
- try {
185
- return JSON.parse(data);
186
- } catch (jsonErr) {
187
- const abs = path.resolve(configPath);
186
+ if (ext === '.json') {
187
+ try { return JSON.parse(readFile(abs).replace(/^\uFEFF/, '')); }
188
+ catch (err) { fatal(`Cannot parse config file as JSON: ${err.message}`); }
189
+ }
188
190
 
189
- // Try CJS require
191
+ if (ext === '.cjs') {
190
192
  try {
191
193
  const result = require(abs);
192
- // Handle ESM interop: If `require()` loads an ESM file, it may return `{__esModule: true, default: …}`
193
- return (result && result.__esModule && result.default) ? result.default : result;
194
- } catch (cjsErr) {
195
- // Try ESM import
196
- try {
197
- const mod = await import(pathToFileURL(abs).href);
198
- return mod.default || mod;
199
- } catch (esmErr) {
200
- fatal('Cannot read the specified config file.\nAs JSON: ' + jsonErr.message + '\nAs CJS: ' + cjsErr.message + '\nAs ESM: ' + esmErr.message);
201
- }
194
+ return (result && typeof result === 'object' && result.__esModule === true) ? result.default : result;
195
+ } catch (err) { fatal(`Cannot load config file: ${err.message}`); }
196
+ }
197
+
198
+ if (ext === '.mjs') {
199
+ try { const mod = await import(pathToFileURL(abs).href); return 'default' in mod ? mod.default : mod; }
200
+ catch (err) { fatal(`Cannot load config file: ${err.message}`); }
201
+ }
202
+
203
+ // For .js or extension-less files, try JSON first, then CJS, then ESM
204
+ let jsonErr;
205
+ try { return JSON.parse(readFile(abs).replace(/^\uFEFF/, '')); }
206
+ catch (err) { jsonErr = err; }
207
+
208
+ try {
209
+ const result = require(abs);
210
+ // Handle ESM interop: If `require()` loads an ESM file, it may return `{__esModule: true, default: …}`
211
+ return (result && typeof result === 'object' && result.__esModule === true) ? result.default : result;
212
+ } catch (cjsErr) {
213
+ try { const mod = await import(pathToFileURL(abs).href); return 'default' in mod ? mod.default : mod; }
214
+ catch (esmErr) {
215
+ fatal(ext === '.js'
216
+ ? `Cannot load config file: ${cjsErr.message}\nAs module: ${esmErr.message}`
217
+ : `Cannot read the specified config file.\nAs JSON: ${jsonErr.message}\nAs CJS: ${cjsErr.message}\nAs module: ${esmErr.message}`);
202
218
  }
203
219
  }
204
220
  }
@@ -135,9 +135,14 @@ const singleAttrValues = [
135
135
  /"([^"]*)"+/.source,
136
136
  // Attr value, single quotes
137
137
  /'([^']*)'+/.source,
138
- // Attr value, no quotes
138
+ // Attr value, no quotes (strict: excludes `=` per HTML spec)
139
139
  /([^ \t\n\f\r"'`=<>]+)/.source
140
140
  ];
141
+ // Lenient unquoted value pattern for `continueOnParseError`:
142
+ // allows `=` and `` ` `` per spec error-recovery rules
143
+ // (both are parse errors in unquoted-attribute-value state but appended to the value)
144
+ // `"` and `'` remain excluded—permitting them requires broader test coverage
145
+ const singleAttrValueLenientUnquoted = /([^ \t\n\f\r"'<>]+)/.source;
141
146
  // https://www.w3.org/TR/1999/REC-xml-names-19990114/#NT-QName
142
147
  const qnameCapture = (function () {
143
148
  // https://www.npmjs.com/package/ncname
@@ -203,9 +208,11 @@ function stripDelimited(str, open, close) {
203
208
  }
204
209
 
205
210
  function buildAttrRegex(handler) {
211
+ const unquotedValue = handler.continueOnParseError ? singleAttrValueLenientUnquoted : singleAttrValues[2];
212
+ const attrValues = [singleAttrValues[0], singleAttrValues[1], unquotedValue];
206
213
  let pattern = singleAttrIdentifier.source +
207
214
  '(?:\\s*(' + joinSingleAttrAssigns(handler) + ')' +
208
- '[ \\t\\n\\f\\r]*(?:' + singleAttrValues.join('|') + '))?';
215
+ '[ \\t\\n\\f\\r]*(?:' + attrValues.join('|') + '))?';
209
216
  if (handler.customAttrSurround) {
210
217
  const attrClauses = [];
211
218
  for (let i = handler.customAttrSurround.length - 1; i >= 0; i--) {
@@ -607,7 +614,7 @@ class HTMLParser {
607
614
  // Note: Unquoted attribute values are intentionally not handled here.
608
615
  // Per HTML spec, unquoted values cannot contain spaces or special chars,
609
616
  // making a 20 KB+ unquoted value practically impossible. If encountered,
610
- // it's malformed HTML and using the truncated regex match is acceptable.
617
+ // its malformed HTML and using the truncated regex match is acceptable.
611
618
  }
612
619
  }
613
620
  }
@@ -2632,7 +2639,7 @@ function buildAttr(normalized, hasUnarySlash, options, isLast, uidAttr) {
2632
2639
  attrValue = attrValue.replace(/'/g, '&#39;');
2633
2640
  }
2634
2641
  } else {
2635
- // `preventAttributesEscaping` mode: Choose safe quotes but don't escape
2642
+ // `preventAttributesEscaping` mode: Choose safe quotes but dont escape
2636
2643
  // except when both quote types are present—then escape to prevent invalid HTML
2637
2644
  const hasDoubleQuote = attrValue.indexOf('"') !== -1;
2638
2645
  const hasSingleQuote = attrValue.indexOf("'") !== -1;
@@ -3002,6 +3009,8 @@ let svgMinifyCache = null;
3002
3009
 
3003
3010
  // Pre-compiled patterns for script merging (avoid repeated allocation in hot path)
3004
3011
  const RE_SCRIPT_ATTRS = /([^\s=]+)(?:=(?:"([^"]*)"|'([^']*)'|([^\s>]+)))?/g;
3012
+ const RE_SCRIPT_OPEN = /<script(?=[\s>])/gi; // Finds tag start; use `findTagEnd()` for the actual closing `>`
3013
+ const RE_SCRIPT_CLOSE = /<\/script\s*>/gi;
3005
3014
  const SCRIPT_BOOL_ATTRS = new Set(['async', 'defer', 'nomodule']);
3006
3015
  const DEFAULT_JS_TYPES = new Set(['', 'text/javascript', 'application/javascript']);
3007
3016
 
@@ -3014,6 +3023,28 @@ const RE_HTML_ENCODING = /^(text\/html|application\/xhtml\+xml)$/i;
3014
3023
 
3015
3024
  // Script merging
3016
3025
 
3026
+ /**
3027
+ * Find the index of the `>` that closes an opening tag, correctly skipping
3028
+ * over quoted attribute values (which may contain `>`).
3029
+ * @param {string} html
3030
+ * @param {number} pos - Start position (just after the tag name)
3031
+ * @returns {number} Index of the closing `>`, or -1 if not found
3032
+ */
3033
+ function findTagEnd(html, pos) {
3034
+ let i = pos;
3035
+ while (i < html.length) {
3036
+ const ch = html[i];
3037
+ if (ch === '>') return i;
3038
+ if (ch === '"' || ch === "'") {
3039
+ const q = ch;
3040
+ i++;
3041
+ while (i < html.length && html[i] !== q) i++;
3042
+ }
3043
+ i++;
3044
+ }
3045
+ return -1;
3046
+ }
3047
+
3017
3048
  /**
3018
3049
  * Merge consecutive inline script tags into one (`mergeConsecutiveScripts`).
3019
3050
  * Only merges scripts that are compatible:
@@ -3021,79 +3052,104 @@ const RE_HTML_ENCODING = /^(text\/html|application\/xhtml\+xml)$/i;
3021
3052
  * - Same `type` (or both default JavaScript)
3022
3053
  * - No conflicting attributes (`async`, `defer`, `nomodule`, different `nonce`)
3023
3054
  *
3024
- * Limitation: This function uses regex-based matching (`pattern` variable below),
3025
- * which can produce incorrect results if a script’s content contains a literal
3026
- * `</script>` string (e.g., `document.write('<script>…</script>')`). In valid
3027
- * HTML, such strings should be escaped as `<\/script>` or split like
3028
- * `'</scr' + 'ipt>'`, so this limitation rarely affects real-world code. The
3029
- * earlier `minifyJS` step (if enabled) typically handles this escaping already.
3055
+ * Uses a scanner rather than a regex to locate script boundaries, so literal
3056
+ * `</script>` strings inside script content are handled correctly per the HTML
3057
+ * spec (raw text ends at the first `</script>`).
3030
3058
  *
3031
3059
  * @param {string} html - The HTML string to process
3032
3060
  * @returns {string} HTML with consecutive scripts merged
3033
3061
  */
3034
3062
  function mergeConsecutiveScripts(html) {
3035
- // `pattern`: Regex to match consecutive `</script>` followed by `<script…>`.
3036
- // See function JSDoc above for known limitations with literal `</script>` in content.
3037
- // Captures:
3038
- // 1. first script attrs
3039
- // 2. first script content
3040
- // 3. whitespace between
3041
- // 4. second script attrs
3042
- // 5. second script content
3043
- const pattern = /<script([^>]*)>([\s\S]*?)<\/script>([\s]*)<script([^>]*)>([\s\S]*?)<\/script>/gi;
3044
-
3045
- let result = html;
3063
+ // Parse an attribute string into a name→value map
3064
+ const parseAttrs = (attrStr) => {
3065
+ const attrs = {};
3066
+ RE_SCRIPT_ATTRS.lastIndex = 0;
3067
+ let m;
3068
+ while ((m = RE_SCRIPT_ATTRS.exec(attrStr)) !== null) {
3069
+ const name = m[1].toLowerCase();
3070
+ const value = m[2] ?? m[3] ?? m[4] ?? '';
3071
+ attrs[name] = value;
3072
+ }
3073
+ return attrs;
3074
+ };
3075
+
3046
3076
  let changed = true;
3047
3077
 
3048
3078
  // Keep merging until no more changes (handles chains of 3+ scripts)
3049
3079
  while (changed) {
3050
3080
  changed = false;
3051
- result = result.replace(pattern, (match, attrs1, content1, whitespace, attrs2, content2) => {
3052
- // Parse attributes from both script tags (uses pre-compiled RE_SCRIPT_ATTRS)
3053
- const parseAttrs = (attrStr) => {
3054
- const attrs = {};
3055
- RE_SCRIPT_ATTRS.lastIndex = 0; // Reset for reuse
3056
- let m;
3057
- while ((m = RE_SCRIPT_ATTRS.exec(attrStr)) !== null) {
3058
- const name = m[1].toLowerCase();
3059
- const value = m[2] ?? m[3] ?? m[4] ?? '';
3060
- attrs[name] = value;
3061
- }
3062
- return attrs;
3063
- };
3081
+ RE_SCRIPT_OPEN.lastIndex = 0;
3082
+ let m1;
3083
+
3084
+ while ((m1 = RE_SCRIPT_OPEN.exec(html)) !== null) {
3085
+ // Use findTagEnd() to get the real closing '>', skipping quoted attribute values
3086
+ const tagEnd1 = findTagEnd(html, m1.index + 7);
3087
+ if (tagEnd1 === -1) break;
3088
+
3089
+ const attrs1Str = html.slice(m1.index + 7, tagEnd1);
3090
+ const contentStart1 = tagEnd1 + 1;
3091
+
3092
+ // Find end of this script’s content (first `</script>`—per HTML spec, raw text ends here)
3093
+ RE_SCRIPT_CLOSE.lastIndex = contentStart1;
3094
+ const close1 = RE_SCRIPT_CLOSE.exec(html);
3095
+ if (!close1) break;
3096
+
3097
+ const content1 = html.slice(contentStart1, close1.index);
3098
+ const afterClose1 = close1.index + close1[0].length;
3099
+
3100
+ // Skip optional whitespace and check for a consecutive <script> tag
3101
+ let i = afterClose1;
3102
+ while (i < html.length && (html[i] === ' ' || html[i] === '\t' || html[i] === '\n' || html[i] === '\r' || html[i] === '\f')) i++;
3103
+ if (html.slice(i, i + 7).toLowerCase() !== '<script' || (html[i + 7] !== '>' && !/\s/.test(html[i + 7]))) {
3104
+ RE_SCRIPT_OPEN.lastIndex = afterClose1;
3105
+ continue;
3106
+ }
3107
+
3108
+ const tagStart2 = i;
3109
+ const tagEnd2 = findTagEnd(html, tagStart2 + 7);
3110
+ if (tagEnd2 === -1) break;
3111
+
3112
+ const attrs2Str = html.slice(tagStart2 + 7, tagEnd2);
3113
+ const contentStart2 = tagEnd2 + 1;
3064
3114
 
3065
- const a1 = parseAttrs(attrs1);
3066
- const a2 = parseAttrs(attrs2);
3115
+ // Find end of second script’s content
3116
+ RE_SCRIPT_CLOSE.lastIndex = contentStart2;
3117
+ const close2 = RE_SCRIPT_CLOSE.exec(html);
3118
+ if (!close2) break;
3119
+
3120
+ const content2 = html.slice(contentStart2, close2.index);
3121
+ const afterClose2 = close2.index + close2[0].length;
3122
+
3123
+ const a1 = parseAttrs(attrs1Str);
3124
+ const a2 = parseAttrs(attrs2Str);
3067
3125
 
3068
3126
  // Check for `src`—cannot merge external scripts
3069
3127
  if ('src' in a1 || 'src' in a2) {
3070
- return match;
3128
+ RE_SCRIPT_OPEN.lastIndex = afterClose1;
3129
+ continue;
3071
3130
  }
3072
3131
 
3073
3132
  // Check `type` compatibility (both must be default JS)
3133
+ // Non-JS types (modules, JSON, etc.) must not be merged:
3134
+ // Module scripts have per-script lexical scope, and non-JS content (e.g., JSON)
3135
+ // is not concatenable; even identical non-JS types are incompatible
3074
3136
  const type1 = (a1.type || '').toLowerCase();
3075
3137
  const type2 = (a2.type || '').toLowerCase();
3076
-
3077
- if (DEFAULT_JS_TYPES.has(type1) && DEFAULT_JS_TYPES.has(type2)) ; else {
3078
- // Non-JS types (modules, JSON, etc.) must not be merged:
3079
- // Module scripts have per-script lexical scope, and non-JS content (e.g., JSON)
3080
- // is not concatenable. Even identical non-JS types are incompatible.
3081
- return match;
3138
+ if (!DEFAULT_JS_TYPES.has(type1) || !DEFAULT_JS_TYPES.has(type2)) {
3139
+ RE_SCRIPT_OPEN.lastIndex = afterClose1;
3140
+ continue;
3082
3141
  }
3083
3142
 
3084
- // Check for conflicting boolean attributes (uses pre-compiled SCRIPT_BOOL_ATTRS)
3143
+ // Check for conflicting boolean attributes
3144
+ let boolConflict = false;
3085
3145
  for (const attr of SCRIPT_BOOL_ATTRS) {
3086
- const has1 = attr in a1;
3087
- const has2 = attr in a2;
3088
- if (has1 !== has2) {
3089
- // One has it, one doesn't - incompatible
3090
- return match;
3091
- }
3146
+ if ((attr in a1) !== (attr in a2)) { boolConflict = true; break; }
3092
3147
  }
3093
3148
 
3094
3149
  // Check `nonce`—must be same or both absent
3095
- if (a1.nonce !== a2.nonce) {
3096
- return match;
3150
+ if (boolConflict || a1.nonce !== a2.nonce) {
3151
+ RE_SCRIPT_OPEN.lastIndex = afterClose1;
3152
+ continue;
3097
3153
  }
3098
3154
 
3099
3155
  // Scripts are compatible—merge them
@@ -3114,11 +3170,12 @@ function mergeConsecutiveScripts(html) {
3114
3170
  }
3115
3171
 
3116
3172
  // Use first script’s attributes (they should be compatible)
3117
- return `<script${attrs1}>${mergedContent}</script>`;
3118
- });
3173
+ html = html.slice(0, m1.index) + `<script${attrs1Str}>${mergedContent}</script>` + html.slice(afterClose2);
3174
+ break; // Restart scanning (outer while loop)
3175
+ }
3119
3176
  }
3120
3177
 
3121
- return result;
3178
+ return html;
3122
3179
  }
3123
3180
 
3124
3181
  // Type definitions
@@ -3344,7 +3401,7 @@ function mergeConsecutiveScripts(html) {
3344
3401
  * event handler attributes. If an object is provided, it can include:
3345
3402
  * - `engine`: The minifier to use (`terser` or `swc`). Default: `terser`.
3346
3403
  * Note: Inline event handlers (e.g., `onclick="…"`) always use Terser
3347
- * regardless of engine setting, as swc doesn’t support bare return statements.
3404
+ * regardless of engine setting, as SWC doesn’t support bare return statements.
3348
3405
  * - Engine-specific options (e.g., Terser options if `engine: 'terser'`,
3349
3406
  * SWC options if `engine: 'swc'`).
3350
3407
  * If a function is provided, it will be used to perform
@@ -4564,7 +4621,7 @@ function joinResultSegments(results, options, restoreCustom, restoreIgnore) {
4564
4621
  * - Cache sizes are locked after first initialization—subsequent calls use the same caches
4565
4622
  * even if different `cacheCSS`/`cacheJS`/`cacheSVG` options are provided
4566
4623
  * - The first call’s options determine the cache sizes for subsequent calls
4567
- * - Explicit `0` values are coerced to `1` (minimum functional cache size)
4624
+ * - Invalid values (NaN, Infinity) fall back to the default size (500); values below `1` are clamped to `1`
4568
4625
  */
4569
4626
  function initCaches(options) {
4570
4627
  // Only create caches once (on first call)—sizes are locked after this
@@ -4581,6 +4638,9 @@ function initCaches(options) {
4581
4638
  return parsed;
4582
4639
  };
4583
4640
 
4641
+ // Sanitize a cache size: Non-finite/NaN falls back to `defaultSize`; otherwise clamped to min 1 and floored
4642
+ const sanitizeSize = (size) => Number.isFinite(size) ? Math.max(1, Math.floor(size)) : defaultSize;
4643
+
4584
4644
  // Get cache sizes with precedence: Options > env > default
4585
4645
  const cssSize = options.cacheCSS !== undefined ? options.cacheCSS
4586
4646
  : (parseEnvCacheSize(process.env.HMN_CACHE_CSS) ?? defaultSize);
@@ -4589,10 +4649,9 @@ function initCaches(options) {
4589
4649
  const svgSize = options.cacheSVG !== undefined ? options.cacheSVG
4590
4650
  : (parseEnvCacheSize(process.env.HMN_CACHE_SVG) ?? defaultSize);
4591
4651
 
4592
- // Coerce `0` to `1` (minimum functional cache size) to avoid immediate eviction
4593
- const cssFinalSize = cssSize === 0 ? 1 : cssSize;
4594
- const jsFinalSize = jsSize === 0 ? 1 : jsSize;
4595
- const svgFinalSize = svgSize === 0 ? 1 : svgSize;
4652
+ const cssFinalSize = sanitizeSize(cssSize);
4653
+ const jsFinalSize = sanitizeSize(jsSize);
4654
+ const svgFinalSize = sanitizeSize(svgSize);
4596
4655
 
4597
4656
  cssMinifyCache = new LRU(cssFinalSize);
4598
4657
  jsMinifyCache = new LRU(jsFinalSize);
@@ -254,7 +254,7 @@ export type MinifierOptions = {
254
254
  * event handler attributes. If an object is provided, it can include:
255
255
  * - `engine`: The minifier to use (`terser` or `swc`). Default: `terser`.
256
256
  * Note: Inline event handlers (e.g., `onclick="…"`) always use Terser
257
- * regardless of engine setting, as swc doesn’t support bare return statements.
257
+ * regardless of engine setting, as SWC doesn’t support bare return statements.
258
258
  * - Engine-specific options (e.g., Terser options if `engine: 'terser'`,
259
259
  * SWC options if `engine: 'swc'`).
260
260
  * If a function is provided, it will be used to perform
@@ -1 +1 @@
1
- {"version":3,"file":"htmlminifier.d.ts","sourceRoot":"","sources":["../../src/htmlminifier.js"],"names":[],"mappings":"AAyrDO,8BAJI,MAAM,YACN,eAAe,GACb,OAAO,CAAC,MAAM,CAAC,CAwB3B;;;;;;;;;;;;UA99CS,MAAM;;;;;;;;;;;;;;;;;;mCAaA,MAAM,SAAS,aAAa,EAAE,yBAAyB,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;+BAM3F,MAAM,GAAG,IAAI,SAAS,aAAa,EAAE,GAAG,SAAS,qBAAqB,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;qBA6JtG,OAAO,KAAK,IAAI;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;2HA2BiF,MAAM,SAAS,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM;;;;;;;;;;;;;;;;iBASxG,QAAQ,GAAG,KAAK;gBAAgC,MAAM,WAAW,OAAO,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM;;;;;;;;;;;eAa/H,MAAM;gBAAY,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM;;;;;;;;;;;;;;;;;mBAiBzE,MAAM,KAAK,MAAM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;kDA+DF,MAAM,OAAO,MAAM,KAAK,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;sCA2EpC,MAAM,SAAS,aAAa,EAAE,KAAK,IAAI;;;;;;;;;wCAQrC,MAAM,KAAK,MAAM;;;;;;;;;;;;;;;;;wBAjnBK,cAAc;0BAAd,cAAc;+BAAd,cAAc"}
1
+ {"version":3,"file":"htmlminifier.d.ts","sourceRoot":"","sources":["../../src/htmlminifier.js"],"names":[],"mappings":"AA2uDO,8BAJI,MAAM,YACN,eAAe,GACb,OAAO,CAAC,MAAM,CAAC,CAwB3B;;;;;;;;;;;;UAh+CS,MAAM;;;;;;;;;;;;;;;;;;mCAaA,MAAM,SAAS,aAAa,EAAE,yBAAyB,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;+BAM3F,MAAM,GAAG,IAAI,SAAS,aAAa,EAAE,GAAG,SAAS,qBAAqB,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;qBA6JtG,OAAO,KAAK,IAAI;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;2HA2BiF,MAAM,SAAS,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM;;;;;;;;;;;;;;;;iBASxG,QAAQ,GAAG,KAAK;gBAAgC,MAAM,WAAW,OAAO,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM;;;;;;;;;;;eAa/H,MAAM;gBAAY,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM;;;;;;;;;;;;;;;;;mBAiBzE,MAAM,KAAK,MAAM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;kDA+DF,MAAM,OAAO,MAAM,KAAK,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;sCA2EpC,MAAM,SAAS,aAAa,EAAE,KAAK,IAAI;;;;;;;;;wCAQrC,MAAM,KAAK,MAAM;;;;;;;;;;;;;;;;;wBAjqBK,cAAc;0BAAd,cAAc;+BAAd,cAAc"}
@@ -1 +1 @@
1
- {"version":3,"file":"htmlparser.d.ts","sourceRoot":"","sources":["../../src/htmlparser.js"],"names":[],"mappings":"AAgDA,4BAAkE;AAwGlE;IACE,qCAGC;IAFC,UAAgB;IAChB,aAAsB;IAGxB,uBAmmBC;CACF"}
1
+ {"version":3,"file":"htmlparser.d.ts","sourceRoot":"","sources":["../../src/htmlparser.js"],"names":[],"mappings":"AAqDA,4BAAkE;AA0GlE;IACE,qCAGC;IAFC,UAAgB;IAChB,aAAsB;IAGxB,uBAmmBC;CACF"}
package/package.json CHANGED
@@ -19,11 +19,11 @@
19
19
  "@rollup/plugin-json": "^6.1.0",
20
20
  "@rollup/plugin-node-resolve": "^16.0.3",
21
21
  "@swc/core": "^1.15.21",
22
- "eslint": "^10.1.0",
22
+ "eslint": "^10.2.0",
23
23
  "rollup": "^4.60.0",
24
24
  "rollup-plugin-polyfill-node": "^0.13.0",
25
25
  "typescript": "^6.0.2",
26
- "vite": "^8.0.5"
26
+ "vite": "^8.0.8"
27
27
  },
28
28
  "exports": {
29
29
  ".": {
@@ -96,5 +96,5 @@
96
96
  },
97
97
  "type": "module",
98
98
  "types": "./dist/types/htmlminifier.d.ts",
99
- "version": "6.1.1"
99
+ "version": "6.1.3"
100
100
  }
@@ -112,6 +112,8 @@ let svgMinifyCache = null;
112
112
 
113
113
  // Pre-compiled patterns for script merging (avoid repeated allocation in hot path)
114
114
  const RE_SCRIPT_ATTRS = /([^\s=]+)(?:=(?:"([^"]*)"|'([^']*)'|([^\s>]+)))?/g;
115
+ const RE_SCRIPT_OPEN = /<script(?=[\s>])/gi; // Finds tag start; use `findTagEnd()` for the actual closing `>`
116
+ const RE_SCRIPT_CLOSE = /<\/script\s*>/gi;
115
117
  const SCRIPT_BOOL_ATTRS = new Set(['async', 'defer', 'nomodule']);
116
118
  const DEFAULT_JS_TYPES = new Set(['', 'text/javascript', 'application/javascript']);
117
119
 
@@ -124,6 +126,28 @@ const RE_HTML_ENCODING = /^(text\/html|application\/xhtml\+xml)$/i;
124
126
 
125
127
  // Script merging
126
128
 
129
+ /**
130
+ * Find the index of the `>` that closes an opening tag, correctly skipping
131
+ * over quoted attribute values (which may contain `>`).
132
+ * @param {string} html
133
+ * @param {number} pos - Start position (just after the tag name)
134
+ * @returns {number} Index of the closing `>`, or -1 if not found
135
+ */
136
+ function findTagEnd(html, pos) {
137
+ let i = pos;
138
+ while (i < html.length) {
139
+ const ch = html[i];
140
+ if (ch === '>') return i;
141
+ if (ch === '"' || ch === "'") {
142
+ const q = ch;
143
+ i++;
144
+ while (i < html.length && html[i] !== q) i++;
145
+ }
146
+ i++;
147
+ }
148
+ return -1;
149
+ }
150
+
127
151
  /**
128
152
  * Merge consecutive inline script tags into one (`mergeConsecutiveScripts`).
129
153
  * Only merges scripts that are compatible:
@@ -131,81 +155,104 @@ const RE_HTML_ENCODING = /^(text\/html|application\/xhtml\+xml)$/i;
131
155
  * - Same `type` (or both default JavaScript)
132
156
  * - No conflicting attributes (`async`, `defer`, `nomodule`, different `nonce`)
133
157
  *
134
- * Limitation: This function uses regex-based matching (`pattern` variable below),
135
- * which can produce incorrect results if a script’s content contains a literal
136
- * `</script>` string (e.g., `document.write('<script>…</script>')`). In valid
137
- * HTML, such strings should be escaped as `<\/script>` or split like
138
- * `'</scr' + 'ipt>'`, so this limitation rarely affects real-world code. The
139
- * earlier `minifyJS` step (if enabled) typically handles this escaping already.
158
+ * Uses a scanner rather than a regex to locate script boundaries, so literal
159
+ * `</script>` strings inside script content are handled correctly per the HTML
160
+ * spec (raw text ends at the first `</script>`).
140
161
  *
141
162
  * @param {string} html - The HTML string to process
142
163
  * @returns {string} HTML with consecutive scripts merged
143
164
  */
144
165
  function mergeConsecutiveScripts(html) {
145
- // `pattern`: Regex to match consecutive `</script>` followed by `<script…>`.
146
- // See function JSDoc above for known limitations with literal `</script>` in content.
147
- // Captures:
148
- // 1. first script attrs
149
- // 2. first script content
150
- // 3. whitespace between
151
- // 4. second script attrs
152
- // 5. second script content
153
- const pattern = /<script([^>]*)>([\s\S]*?)<\/script>([\s]*)<script([^>]*)>([\s\S]*?)<\/script>/gi;
154
-
155
- let result = html;
166
+ // Parse an attribute string into a name→value map
167
+ const parseAttrs = (attrStr) => {
168
+ const attrs = {};
169
+ RE_SCRIPT_ATTRS.lastIndex = 0;
170
+ let m;
171
+ while ((m = RE_SCRIPT_ATTRS.exec(attrStr)) !== null) {
172
+ const name = m[1].toLowerCase();
173
+ const value = m[2] ?? m[3] ?? m[4] ?? '';
174
+ attrs[name] = value;
175
+ }
176
+ return attrs;
177
+ };
178
+
156
179
  let changed = true;
157
180
 
158
181
  // Keep merging until no more changes (handles chains of 3+ scripts)
159
182
  while (changed) {
160
183
  changed = false;
161
- result = result.replace(pattern, (match, attrs1, content1, whitespace, attrs2, content2) => {
162
- // Parse attributes from both script tags (uses pre-compiled RE_SCRIPT_ATTRS)
163
- const parseAttrs = (attrStr) => {
164
- const attrs = {};
165
- RE_SCRIPT_ATTRS.lastIndex = 0; // Reset for reuse
166
- let m;
167
- while ((m = RE_SCRIPT_ATTRS.exec(attrStr)) !== null) {
168
- const name = m[1].toLowerCase();
169
- const value = m[2] ?? m[3] ?? m[4] ?? '';
170
- attrs[name] = value;
171
- }
172
- return attrs;
173
- };
184
+ RE_SCRIPT_OPEN.lastIndex = 0;
185
+ let m1;
186
+
187
+ while ((m1 = RE_SCRIPT_OPEN.exec(html)) !== null) {
188
+ // Use findTagEnd() to get the real closing '>', skipping quoted attribute values
189
+ const tagEnd1 = findTagEnd(html, m1.index + 7);
190
+ if (tagEnd1 === -1) break;
191
+
192
+ const attrs1Str = html.slice(m1.index + 7, tagEnd1);
193
+ const contentStart1 = tagEnd1 + 1;
194
+
195
+ // Find end of this script’s content (first `</script>`—per HTML spec, raw text ends here)
196
+ RE_SCRIPT_CLOSE.lastIndex = contentStart1;
197
+ const close1 = RE_SCRIPT_CLOSE.exec(html);
198
+ if (!close1) break;
199
+
200
+ const content1 = html.slice(contentStart1, close1.index);
201
+ const afterClose1 = close1.index + close1[0].length;
202
+
203
+ // Skip optional whitespace and check for a consecutive <script> tag
204
+ let i = afterClose1;
205
+ while (i < html.length && (html[i] === ' ' || html[i] === '\t' || html[i] === '\n' || html[i] === '\r' || html[i] === '\f')) i++;
206
+ if (html.slice(i, i + 7).toLowerCase() !== '<script' || (html[i + 7] !== '>' && !/\s/.test(html[i + 7]))) {
207
+ RE_SCRIPT_OPEN.lastIndex = afterClose1;
208
+ continue;
209
+ }
210
+
211
+ const tagStart2 = i;
212
+ const tagEnd2 = findTagEnd(html, tagStart2 + 7);
213
+ if (tagEnd2 === -1) break;
174
214
 
175
- const a1 = parseAttrs(attrs1);
176
- const a2 = parseAttrs(attrs2);
215
+ const attrs2Str = html.slice(tagStart2 + 7, tagEnd2);
216
+ const contentStart2 = tagEnd2 + 1;
217
+
218
+ // Find end of second script’s content
219
+ RE_SCRIPT_CLOSE.lastIndex = contentStart2;
220
+ const close2 = RE_SCRIPT_CLOSE.exec(html);
221
+ if (!close2) break;
222
+
223
+ const content2 = html.slice(contentStart2, close2.index);
224
+ const afterClose2 = close2.index + close2[0].length;
225
+
226
+ const a1 = parseAttrs(attrs1Str);
227
+ const a2 = parseAttrs(attrs2Str);
177
228
 
178
229
  // Check for `src`—cannot merge external scripts
179
230
  if ('src' in a1 || 'src' in a2) {
180
- return match;
231
+ RE_SCRIPT_OPEN.lastIndex = afterClose1;
232
+ continue;
181
233
  }
182
234
 
183
235
  // Check `type` compatibility (both must be default JS)
236
+ // Non-JS types (modules, JSON, etc.) must not be merged:
237
+ // Module scripts have per-script lexical scope, and non-JS content (e.g., JSON)
238
+ // is not concatenable; even identical non-JS types are incompatible
184
239
  const type1 = (a1.type || '').toLowerCase();
185
240
  const type2 = (a2.type || '').toLowerCase();
186
-
187
- if (DEFAULT_JS_TYPES.has(type1) && DEFAULT_JS_TYPES.has(type2)) {
188
- // Both are default JavaScript—compatible
189
- } else {
190
- // Non-JS types (modules, JSON, etc.) must not be merged:
191
- // Module scripts have per-script lexical scope, and non-JS content (e.g., JSON)
192
- // is not concatenable. Even identical non-JS types are incompatible.
193
- return match;
241
+ if (!DEFAULT_JS_TYPES.has(type1) || !DEFAULT_JS_TYPES.has(type2)) {
242
+ RE_SCRIPT_OPEN.lastIndex = afterClose1;
243
+ continue;
194
244
  }
195
245
 
196
- // Check for conflicting boolean attributes (uses pre-compiled SCRIPT_BOOL_ATTRS)
246
+ // Check for conflicting boolean attributes
247
+ let boolConflict = false;
197
248
  for (const attr of SCRIPT_BOOL_ATTRS) {
198
- const has1 = attr in a1;
199
- const has2 = attr in a2;
200
- if (has1 !== has2) {
201
- // One has it, one doesn't - incompatible
202
- return match;
203
- }
249
+ if ((attr in a1) !== (attr in a2)) { boolConflict = true; break; }
204
250
  }
205
251
 
206
252
  // Check `nonce`—must be same or both absent
207
- if (a1.nonce !== a2.nonce) {
208
- return match;
253
+ if (boolConflict || a1.nonce !== a2.nonce) {
254
+ RE_SCRIPT_OPEN.lastIndex = afterClose1;
255
+ continue;
209
256
  }
210
257
 
211
258
  // Scripts are compatible—merge them
@@ -226,11 +273,12 @@ function mergeConsecutiveScripts(html) {
226
273
  }
227
274
 
228
275
  // Use first script’s attributes (they should be compatible)
229
- return `<script${attrs1}>${mergedContent}</script>`;
230
- });
276
+ html = html.slice(0, m1.index) + `<script${attrs1Str}>${mergedContent}</script>` + html.slice(afterClose2);
277
+ break; // Restart scanning (outer while loop)
278
+ }
231
279
  }
232
280
 
233
- return result;
281
+ return html;
234
282
  }
235
283
 
236
284
  // Type definitions
@@ -456,7 +504,7 @@ function mergeConsecutiveScripts(html) {
456
504
  * event handler attributes. If an object is provided, it can include:
457
505
  * - `engine`: The minifier to use (`terser` or `swc`). Default: `terser`.
458
506
  * Note: Inline event handlers (e.g., `onclick="…"`) always use Terser
459
- * regardless of engine setting, as swc doesn’t support bare return statements.
507
+ * regardless of engine setting, as SWC doesn’t support bare return statements.
460
508
  * - Engine-specific options (e.g., Terser options if `engine: 'terser'`,
461
509
  * SWC options if `engine: 'swc'`).
462
510
  * If a function is provided, it will be used to perform
@@ -1676,7 +1724,7 @@ function joinResultSegments(results, options, restoreCustom, restoreIgnore) {
1676
1724
  * - Cache sizes are locked after first initialization—subsequent calls use the same caches
1677
1725
  * even if different `cacheCSS`/`cacheJS`/`cacheSVG` options are provided
1678
1726
  * - The first call’s options determine the cache sizes for subsequent calls
1679
- * - Explicit `0` values are coerced to `1` (minimum functional cache size)
1727
+ * - Invalid values (NaN, Infinity) fall back to the default size (500); values below `1` are clamped to `1`
1680
1728
  */
1681
1729
  function initCaches(options) {
1682
1730
  // Only create caches once (on first call)—sizes are locked after this
@@ -1693,6 +1741,9 @@ function initCaches(options) {
1693
1741
  return parsed;
1694
1742
  };
1695
1743
 
1744
+ // Sanitize a cache size: Non-finite/NaN falls back to `defaultSize`; otherwise clamped to min 1 and floored
1745
+ const sanitizeSize = (size) => Number.isFinite(size) ? Math.max(1, Math.floor(size)) : defaultSize;
1746
+
1696
1747
  // Get cache sizes with precedence: Options > env > default
1697
1748
  const cssSize = options.cacheCSS !== undefined ? options.cacheCSS
1698
1749
  : (parseEnvCacheSize(process.env.HMN_CACHE_CSS) ?? defaultSize);
@@ -1701,10 +1752,9 @@ function initCaches(options) {
1701
1752
  const svgSize = options.cacheSVG !== undefined ? options.cacheSVG
1702
1753
  : (parseEnvCacheSize(process.env.HMN_CACHE_SVG) ?? defaultSize);
1703
1754
 
1704
- // Coerce `0` to `1` (minimum functional cache size) to avoid immediate eviction
1705
- const cssFinalSize = cssSize === 0 ? 1 : cssSize;
1706
- const jsFinalSize = jsSize === 0 ? 1 : jsSize;
1707
- const svgFinalSize = svgSize === 0 ? 1 : svgSize;
1755
+ const cssFinalSize = sanitizeSize(cssSize);
1756
+ const jsFinalSize = sanitizeSize(jsSize);
1757
+ const svgFinalSize = sanitizeSize(svgSize);
1708
1758
 
1709
1759
  cssMinifyCache = new LRU(cssFinalSize);
1710
1760
  jsMinifyCache = new LRU(jsFinalSize);
package/src/htmlparser.js CHANGED
@@ -32,9 +32,14 @@ const singleAttrValues = [
32
32
  /"([^"]*)"+/.source,
33
33
  // Attr value, single quotes
34
34
  /'([^']*)'+/.source,
35
- // Attr value, no quotes
35
+ // Attr value, no quotes (strict: excludes `=` per HTML spec)
36
36
  /([^ \t\n\f\r"'`=<>]+)/.source
37
37
  ];
38
+ // Lenient unquoted value pattern for `continueOnParseError`:
39
+ // allows `=` and `` ` `` per spec error-recovery rules
40
+ // (both are parse errors in unquoted-attribute-value state but appended to the value)
41
+ // `"` and `'` remain excluded—permitting them requires broader test coverage
42
+ const singleAttrValueLenientUnquoted = /([^ \t\n\f\r"'<>]+)/.source;
38
43
  // https://www.w3.org/TR/1999/REC-xml-names-19990114/#NT-QName
39
44
  const qnameCapture = (function () {
40
45
  // https://www.npmjs.com/package/ncname
@@ -100,9 +105,11 @@ function stripDelimited(str, open, close) {
100
105
  }
101
106
 
102
107
  function buildAttrRegex(handler) {
108
+ const unquotedValue = handler.continueOnParseError ? singleAttrValueLenientUnquoted : singleAttrValues[2];
109
+ const attrValues = [singleAttrValues[0], singleAttrValues[1], unquotedValue];
103
110
  let pattern = singleAttrIdentifier.source +
104
111
  '(?:\\s*(' + joinSingleAttrAssigns(handler) + ')' +
105
- '[ \\t\\n\\f\\r]*(?:' + singleAttrValues.join('|') + '))?';
112
+ '[ \\t\\n\\f\\r]*(?:' + attrValues.join('|') + '))?';
106
113
  if (handler.customAttrSurround) {
107
114
  const attrClauses = [];
108
115
  for (let i = handler.customAttrSurround.length - 1; i >= 0; i--) {
@@ -506,7 +513,7 @@ export class HTMLParser {
506
513
  // Note: Unquoted attribute values are intentionally not handled here.
507
514
  // Per HTML spec, unquoted values cannot contain spaces or special chars,
508
515
  // making a 20 KB+ unquoted value practically impossible. If encountered,
509
- // it's malformed HTML and using the truncated regex match is acceptable.
516
+ // its malformed HTML and using the truncated regex match is acceptable.
510
517
  }
511
518
  }
512
519
  }
@@ -580,7 +580,7 @@ function buildAttr(normalized, hasUnarySlash, options, isLast, uidAttr) {
580
580
  attrValue = attrValue.replace(/'/g, '&#39;');
581
581
  }
582
582
  } else {
583
- // `preventAttributesEscaping` mode: Choose safe quotes but don't escape
583
+ // `preventAttributesEscaping` mode: Choose safe quotes but dont escape
584
584
  // except when both quote types are present—then escape to prevent invalid HTML
585
585
  const hasDoubleQuote = attrValue.indexOf('"') !== -1;
586
586
  const hasSingleQuote = attrValue.indexOf("'") !== -1;