npm - html-minifier-next - Versions diffs - 4.6.0 → 4.7.0 - Mend

html-minifier-next 4.6.0 → 4.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +34 -31
package/cli.js +1 -1
package/dist/htmlminifier.cjs +253 -30
package/dist/htmlminifier.esm.bundle.js +253 -30
package/dist/types/htmlminifier.d.ts.map +1 -1
package/dist/types/htmlparser.d.ts.map +1 -1
package/package.json +2 -2
package/src/htmlminifier.js +152 -25
package/src/htmlparser.js +101 -5
package/src/utils.js +1 -1

package/src/htmlminifier.js CHANGED Viewed

@@ -7,18 +7,88 @@ import TokenChain from './tokenchain.js';
 import { replaceAsync } from './utils.js';
 import { presets, getPreset, getPresetNames } from './presets.js';
-const trimWhitespace = str => str && str.replace(/^[ \n\r\t\f]+/, '').replace(/[ \n\r\t\f]+$/, '');
+// Hoisted, reusable RegExp patterns and tiny helpers to avoid repeated allocations in hot paths
+const RE_WS_START = /^[ \n\r\t\f]+/;
+const RE_WS_END = /[ \n\r\t\f]+$/;
+const RE_ALL_WS_NBSP = /[ \n\r\t\f\xA0]+/g;
+const RE_NBSP_LEADING_GROUP = /(^|\xA0+)[^\xA0]+/g;
+const RE_NBSP_LEAD_GROUP = /(\xA0+)[^\xA0]+/g;
+const RE_NBSP_TRAILING_GROUP = /[^\xA0]+(\xA0+)/g;
+const RE_NBSP_TRAILING_STRIP = /[^\xA0]+$/;
+const RE_CONDITIONAL_COMMENT = /^\[if\s[^\]]+]|\[endif]$/;
+const RE_EVENT_ATTR_DEFAULT = /^on[a-z]{3,}$/;
+const RE_CAN_REMOVE_ATTR_QUOTES = /^[^ \t\n\f\r"'`=<>]+$/;
+const RE_TRAILING_SEMICOLON = /;$/;
+const RE_AMP_ENTITY = /&(#?[0-9a-zA-Z]+;)/g;
+// Tiny stable stringify for options signatures (sorted keys, shallow, nested objects)
+function stableStringify(obj) {
+  if (obj == null || typeof obj !== 'object') return JSON.stringify(obj);
+  if (Array.isArray(obj)) return '[' + obj.map(stableStringify).join(',') + ']';
+  const keys = Object.keys(obj).sort();
+  let out = '{';
+  for (let i = 0; i < keys.length; i++) {
+    const k = keys[i];
+    out += JSON.stringify(k) + ':' + stableStringify(obj[k]) + (i < keys.length - 1 ? ',' : '');
+  }
+  return out + '}';
+}
+// Minimal LRU cache for strings and promises
+class LRU {
+  constructor(limit = 200) {
+    this.limit = limit;
+    this.map = new Map();
+  }
+  get(key) {
+    const v = this.map.get(key);
+    if (v !== undefined) {
+      this.map.delete(key);
+      this.map.set(key, v);
+    }
+    return v;
+  }
+  set(key, value) {
+    if (this.map.has(key)) this.map.delete(key);
+    this.map.set(key, value);
+    if (this.map.size > this.limit) {
+      const first = this.map.keys().next().value;
+      this.map.delete(first);
+    }
+  }
+  delete(key) { this.map.delete(key); }
+}
+// Per-process caches
+const jsMinifyCache = new LRU(200);
+const cssMinifyCache = new LRU(200);
+const trimWhitespace = str => {
+  if (!str) return str;
+  // Fast path: if no whitespace at start or end, return early
+  if (!/^[ \n\r\t\f]/.test(str) && !/[ \n\r\t\f]$/.test(str)) {
+    return str;
+  }
+  return str.replace(RE_WS_START, '').replace(RE_WS_END, '');
+};
 function collapseWhitespaceAll(str) {
+  if (!str) return str;
+  // Fast path: if there are no common whitespace characters, return early
+  if (!/[ \n\r\t\f\xA0]/.test(str)) {
+    return str;
+  }
   // Non-breaking space is specifically handled inside the replacer function here:
-  return str && str.replace(/[ \n\r\t\f\xA0]+/g, function (spaces) {
-    return spaces === '\t' ? '\t' : spaces.replace(/(^|\xA0+)[^\xA0]+/g, '$1 ');
+  return str.replace(RE_ALL_WS_NBSP, function (spaces) {
+    return spaces === '\t' ? '\t' : spaces.replace(RE_NBSP_LEADING_GROUP, '$1 ');
   });
 }
 function collapseWhitespace(str, options, trimLeft, trimRight, collapseAll) {
   let lineBreakBefore = ''; let lineBreakAfter = '';
+  if (!str) return str;
   if (options.preserveLineBreaks) {
     str = str.replace(/^[ \n\r\t\f]*?[\n\r][ \n\r\t\f]*/, function () {
       lineBreakBefore = '\n';
@@ -36,7 +106,7 @@ function collapseWhitespace(str, options, trimLeft, trimRight, collapseAll) {
       if (conservative && spaces === '\t') {
         return '\t';
       }
-      return spaces.replace(/^[^\xA0]+/, '').replace(/(\xA0+)[^\xA0]+/g, '$1 ') || (conservative ? ' ' : '');
+      return spaces.replace(/^[^\xA0]+/, '').replace(RE_NBSP_LEAD_GROUP, '$1 ') || (conservative ? ' ' : '');
     });
   }
@@ -47,7 +117,7 @@ function collapseWhitespace(str, options, trimLeft, trimRight, collapseAll) {
       if (conservative && spaces === '\t') {
         return '\t';
       }
-      return spaces.replace(/[^\xA0]+(\xA0+)/g, ' $1').replace(/[^\xA0]+$/, '') || (conservative ? ' ' : '');
+      return spaces.replace(RE_NBSP_TRAILING_GROUP, ' $1').replace(RE_NBSP_TRAILING_STRIP, '') || (conservative ? ' ' : '');
     });
   }
@@ -79,7 +149,7 @@ function collapseWhitespaceSmart(str, prevTag, nextTag, options, inlineElements,
 }
 function isConditionalComment(text) {
-  return /^\[if\s[^\]]+]|\[endif]$/.test(text);
+  return RE_CONDITIONAL_COMMENT.test(text);
 }
 function isIgnoredComment(text, options) {
@@ -101,12 +171,12 @@ function isEventAttribute(attrName, options) {
     }
     return false;
   }
-  return /^on[a-z]{3,}$/.test(attrName);
+  return RE_EVENT_ATTR_DEFAULT.test(attrName);
 }
 function canRemoveAttributeQuotes(value) {
   // https://mathiasbynens.be/notes/unquoted-attribute-values
-  return /^[^ \t\n\f\r"'`=<>]+$/.test(value);
+  return RE_CAN_REMOVE_ATTR_QUOTES.test(value);
 }
 function attributesInclude(attributes, attribute) {
@@ -317,7 +387,7 @@ async function cleanAttributeValue(tag, attrName, attrValue, options, attrs, min
   } else if (attrName === 'style') {
     attrValue = trimWhitespace(attrValue);
     if (attrValue) {
-      if (/;$/.test(attrValue) && !/&#?[0-9a-zA-Z]+;$/.test(attrValue)) {
+      if (attrValue.endsWith(';') && !/&#?[0-9a-zA-Z]+;$/.test(attrValue)) {
         attrValue = attrValue.replace(/\s*;$/, ';');
       }
       attrValue = await options.minifyCSS(attrValue, 'inline');
@@ -636,7 +706,10 @@ async function normalizeAttr(attr, attrs, tag, options) {
   let attrValue = attr.value;
   if (options.decodeEntities && attrValue) {
-    attrValue = decodeHTMLStrict(attrValue);
+    // Fast path: only decode when entities are present
+    if (attrValue.indexOf('&') !== -1) {
+      attrValue = decodeHTMLStrict(attrValue);
+    }
   }
   if ((options.removeRedundantAttributes &&
@@ -657,8 +730,8 @@ async function normalizeAttr(attr, attrs, tag, options) {
     return;
   }
-  if (options.decodeEntities && attrValue) {
-    attrValue = attrValue.replace(/&(#?[0-9a-zA-Z]+;)/g, '&amp;$1');
+  if (options.decodeEntities && attrValue && attrValue.indexOf('&') !== -1) {
+    attrValue = attrValue.replace(RE_AMP_ENTITY, '&amp;$1');
   }
   return {
@@ -778,6 +851,10 @@ const processOptions = (inputOptions) => {
       const lightningCssOptions = typeof option === 'object' ? option : {};
       options.minifyCSS = async function (text, type) {
+        // Fast path: nothing to minify
+        if (!text || !text.trim()) {
+          return text;
+        }
         text = await replaceAsync(
           text,
           /(url\s*\(\s*)(?:"([^"]*)"|'([^']*)'|([^\s)]+))(\s*\))/ig,
@@ -796,10 +873,20 @@ const processOptions = (inputOptions) => {
             }
           }
         );
+        // Cache key: wrapped content, type, options signature
         const inputCSS = wrapCSS(text, type);
+        const cssSig = stableStringify({ type, opts: lightningCssOptions, cont: !!options.continueOnMinifyError });
+        // For large inputs, use length and content fingerprint (first/last 50 chars) to prevent collisions
+        const cssKey = inputCSS.length > 2048
+          ? (inputCSS.length + '|' + inputCSS.slice(0, 50) + inputCSS.slice(-50) + '|' + type + '|' + cssSig)
+          : (inputCSS + '|' + type + '|' + cssSig);
         try {
+          const cached = cssMinifyCache.get(cssKey);
+          if (cached) {
+            return cached;
+          }
           const result = transformCSS({
             filename: 'input.css',
             code: Buffer.from(inputCSS),
@@ -822,12 +909,12 @@ const processOptions = (inputOptions) => {
           // Preserve if output is empty and input had template syntax or UIDs
           // This catches cases where Lightning CSS removed content that should be preserved
-          if (text.trim() && !outputCSS.trim() && (looksLikeTemplate || hasUID)) {
-            return text;
-          }
+          const finalOutput = (text.trim() && !outputCSS.trim() && (looksLikeTemplate || hasUID)) ? text : outputCSS;
-          return outputCSS;
+          cssMinifyCache.set(cssKey, finalOutput);
+          return finalOutput;
         } catch (err) {
+          cssMinifyCache.delete(cssKey);
           if (!options.continueOnMinifyError) {
             throw err;
           }
@@ -853,10 +940,39 @@ const processOptions = (inputOptions) => {
         terserOptions.parse.bare_returns = inline;
+        let jsKey;
         try {
-          const result = await terser(code, terserOptions);
-          return result.code.replace(/;$/, '');
+          // Fast path: avoid invoking Terser for empty/whitespace-only content
+          if (!code || !code.trim()) {
+            return '';
+          }
+          // Cache key: content, inline, options signature (subset)
+          const terserSig = stableStringify({
+            compress: terserOptions.compress,
+            mangle: terserOptions.mangle,
+            ecma: terserOptions.ecma,
+            toplevel: terserOptions.toplevel,
+            module: terserOptions.module,
+            keep_fnames: terserOptions.keep_fnames,
+            format: terserOptions.format,
+            cont: !!options.continueOnMinifyError,
+          });
+          // For large inputs, use length and content fingerprint (first/last 50 chars) to prevent collisions
+          jsKey = (code.length > 2048 ? (code.length + '|' + code.slice(0, 50) + code.slice(-50) + '|') : (code + '|')) + (inline ? '1' : '0') + '|' + terserSig;
+          const cached = jsMinifyCache.get(jsKey);
+          if (cached) {
+            return await cached;
+          }
+          const inFlight = (async () => {
+            const result = await terser(code, terserOptions);
+            return result.code.replace(RE_TRAILING_SEMICOLON, '');
+          })();
+          jsMinifyCache.set(jsKey, inFlight);
+          const resolved = await inFlight;
+          jsMinifyCache.set(jsKey, resolved);
+          return resolved;
         } catch (err) {
+          if (jsKey) jsMinifyCache.delete(jsKey);
           if (!options.continueOnMinifyError) {
             throw err;
           }
@@ -947,8 +1063,11 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
         currentTag = '';
       },
       chars: async function (text) {
+        // Only recursively scan HTML content, not JSON-LD or other non-HTML script types
+        // `scan()` is for analyzing HTML attribute order, not for parsing JSON
         if (options.processScripts && specialContentTags.has(currentTag) &&
-          options.processScripts.indexOf(currentType) > -1) {
+          options.processScripts.indexOf(currentType) > -1 &&
+          currentType === 'text/html') {
           await scan(text);
         }
       }
@@ -961,7 +1080,8 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
   options.log = identity;
   options.sortAttributes = false;
   options.sortClassName = false;
-  await scan(await minifyHTML(value, options));
+  const firstPassOutput = await minifyHTML(value, options);
+  await scan(firstPassOutput);
   options.log = log;
   if (attrChains) {
     const attrSorters = Object.create(null);
@@ -1314,7 +1434,9 @@ async function minifyHTML(value, options, partialMarkup) {
       prevTag = prevTag === '' ? 'comment' : prevTag;
       nextTag = nextTag === '' ? 'comment' : nextTag;
       if (options.decodeEntities && text && !specialContentTags.has(currentTag)) {
-        text = decodeHTML(text);
+        if (text.indexOf('&') !== -1) {
+          text = decodeHTML(text);
+        }
       }
       if (options.collapseWhitespace) {
         if (!stackNoTrimWhitespace.length) {
@@ -1388,11 +1510,16 @@ async function minifyHTML(value, options, partialMarkup) {
       charsPrevTag = /^\s*$/.test(text) ? prevTag : 'comment';
       if (options.decodeEntities && text && !specialContentTags.has(currentTag)) {
         // Escape any `&` symbols that start either:
-        // 1) a legacy named character reference (i.e. one that doesn't end with `;`)
-        // 2) or any other character reference (i.e. one that does end with `;`)
+        // 1) a legacy named character reference (i.e., one that doesn’t end with `;`)
+        // 2) or any other character reference (i.e., one that does end with `;`)
         // Note that `&` can be escaped as `&amp`, without the semi-colon.
         // https://mathiasbynens.be/notes/ambiguous-ampersands
-        text = text.replace(/&((?:Iacute|aacute|uacute|plusmn|Otilde|otilde|agrave|Agrave|Yacute|yacute|Oslash|oslash|atilde|Atilde|brvbar|ccedil|Ccedil|Ograve|curren|divide|eacute|Eacute|ograve|Oacute|egrave|Egrave|Ugrave|frac12|frac14|frac34|ugrave|oacute|iacute|Ntilde|ntilde|Uacute|middot|igrave|Igrave|iquest|Aacute|cedil|laquo|micro|iexcl|Icirc|icirc|acirc|Ucirc|Ecirc|ocirc|Ocirc|ecirc|ucirc|Aring|aring|AElig|aelig|acute|pound|raquo|Acirc|times|THORN|szlig|thorn|COPY|auml|ordf|ordm|Uuml|macr|uuml|Auml|ouml|Ouml|para|nbsp|euml|quot|QUOT|Euml|yuml|cent|sect|copy|sup1|sup2|sup3|iuml|Iuml|ETH|shy|reg|not|yen|amp|AMP|REG|uml|eth|deg|gt|GT|LT|lt)(?!;)|(?:#?[0-9a-zA-Z]+;))/g, '&amp$1').replace(/</g, '&lt;');
+        if (text.indexOf('&') !== -1) {
+          text = text.replace(/&((?:Iacute|aacute|uacute|plusmn|Otilde|otilde|agrave|Agrave|Yacute|yacute|Oslash|oslash|atilde|Atilde|brvbar|ccedil|Ccedil|Ograve|curren|divide|eacute|Eacute|ograve|Oacute|egrave|Egrave|Ugrave|frac12|frac14|frac34|ugrave|oacute|iacute|Ntilde|ntilde|Uacute|middot|igrave|Igrave|iquest|Aacute|cedil|laquo|micro|iexcl|Icirc|icirc|acirc|Ucirc|Ecirc|ocirc|Ocirc|ecirc|ucirc|Aring|aring|AElig|aelig|acute|pound|raquo|Acirc|times|THORN|szlig|thorn|COPY|auml|ordf|ordm|Uuml|macr|uuml|Auml|ouml|Ouml|para|nbsp|euml|quot|QUOT|Euml|yuml|cent|sect|copy|sup1|sup2|sup3|iuml|Iuml|ETH|shy|reg|not|yen|amp|AMP|REG|uml|eth|deg|gt|GT|LT|lt)(?!;)|(?:#?[0-9a-zA-Z]+;))/g, '&amp$1');
+        }
+        if (text.indexOf('<') !== -1) {
+          text = text.replace(/</g, '&lt;');
+        }
       }
       if (uidPattern && options.collapseWhitespace && stackNoTrimWhitespace.length) {
         text = text.replace(uidPattern, function (match, prefix, index) {

package/src/htmlparser.js CHANGED Viewed

@@ -103,6 +103,9 @@ function joinSingleAttrAssigns(handler) {
   }).join('|');
 }
+// Number of captured parts per `customAttrSurround` pattern
+const NCP = 7;
 export class HTMLParser {
   constructor(html, handler) {
     this.html = html;
@@ -115,7 +118,15 @@ export class HTMLParser {
     const stack = []; let lastTag;
     const attribute = attrForHandler(handler);
-    let last, prevTag, nextTag;
+    let last, prevTag = undefined, nextTag = undefined;
+    // Track position for better error messages
+    let position = 0;
+    const getLineColumn = (pos) => {
+      const lines = this.html.slice(0, pos).split('\n');
+      return { line: lines.length, column: lines[lines.length - 1].length + 1 };
+    };
     while (html) {
       last = html;
       // Make sure we’re not in a `script` or `style` element
@@ -233,8 +244,27 @@ export class HTMLParser {
       }
       if (html === last) {
-        throw new Error('Parse Error: ' + html);
+        if (handler.continueOnParseError) {
+          // Skip the problematic character and continue
+          if (handler.chars) {
+            await handler.chars(html[0], prevTag, '');
+          }
+          html = html.substring(1);
+          position++;
+          prevTag = '';
+          continue;
+        }
+        const loc = getLineColumn(position);
+        // Include some context before the error position so the snippet contains
+        // the offending markup plus preceding characters (e.g. "invalid<tag").
+        const CONTEXT_BEFORE = 50;
+        const startPos = Math.max(0, position - CONTEXT_BEFORE);
+        const snippet = this.html.slice(startPos, startPos + 200).replace(/\n/g, ' ');
+        throw new Error(
+          `Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${this.html.length > startPos + 200 ? '…' : ''}`
+        );
       }
+      position = this.html.length - html.length;
     }
     if (!handler.partialMarkup) {
@@ -251,10 +281,77 @@ export class HTMLParser {
         };
         input = input.slice(start[0].length);
         let end, attr;
-        while (!(end = input.match(startTagClose)) && (attr = input.match(attribute))) {
+        // Safety limit: max length of input to check for attributes
+        // Protects against catastrophic backtracking on massive attribute values
+        const MAX_ATTR_PARSE_LENGTH = 20000; // 20 KB should be enough for any reasonable tag
+        while (true) {
+          // Check for closing tag first
+          end = input.match(startTagClose);
+          if (end) {
+            break;
+          }
+          // Limit the input length we pass to the regex to prevent catastrophic backtracking
+          const isLimited = input.length > MAX_ATTR_PARSE_LENGTH;
+          const searchInput = isLimited ? input.slice(0, MAX_ATTR_PARSE_LENGTH) : input;
+          attr = searchInput.match(attribute);
+          // If we limited the input and got a match, check if the value might be truncated
+          if (attr && isLimited) {
+            // Check if the attribute value extends beyond our search window
+            const attrEnd = attr[0].length;
+            // If the match ends near the limit, the value might be truncated
+            if (attrEnd > MAX_ATTR_PARSE_LENGTH - 100) {
+              // Manually extract this attribute to handle potentially huge value
+              const manualMatch = input.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
+              if (manualMatch) {
+                const quoteChar = input[manualMatch[0].length];
+                if (quoteChar === '"' || quoteChar === "'") {
+                  const closeQuote = input.indexOf(quoteChar, manualMatch[0].length + 1);
+                  if (closeQuote !== -1) {
+                    const fullAttr = input.slice(0, closeQuote + 1);
+                    const numCustomParts = handler.customAttrSurround
+                      ? handler.customAttrSurround.length * NCP
+                      : 0;
+                    const baseIndex = 1 + numCustomParts;
+                    attr = [];
+                    attr[0] = fullAttr;
+                    attr[baseIndex] = manualMatch[1]; // Attribute name
+                    attr[baseIndex + 1] = '='; // customAssign (falls back to “=” for huge attributes)
+                    const value = input.slice(manualMatch[0].length + 1, closeQuote);
+                    // Place value at correct index based on quote type
+                    if (quoteChar === '"') {
+                      attr[baseIndex + 2] = value; // Double-quoted value
+                    } else {
+                      attr[baseIndex + 3] = value; // Single-quoted value
+                    }
+                    input = input.slice(fullAttr.length);
+                    match.attrs.push(attr);
+                    continue;
+                  }
+                }
+                // Note: Unquoted attribute values are intentionally not handled here.
+                // Per HTML spec, unquoted values cannot contain spaces or special chars,
+                // making a 20 KB+ unquoted value practically impossible. If encountered,
+                // it’s malformed HTML and using the truncated regex match is acceptable.
+              }
+            }
+          }
+          if (!attr) {
+            break;
+          }
           input = input.slice(attr[0].length);
           match.attrs.push(attr);
         }
+        // Check for closing tag
+        end = input.match(startTagClose);
         if (end) {
           match.unarySlash = end[1];
           match.rest = input.slice(end[0].length);
@@ -347,7 +444,6 @@ export class HTMLParser {
       const attrs = match.attrs.map(function (args) {
         let name, value, customOpen, customClose, customAssign, quote;
-        const ncp = 7; // Number of captured parts, scalar
         // Hackish workaround for FF bug https://bugzilla.mozilla.org/show_bug.cgi?id=369778
         if (IS_REGEX_CAPTURING_BROKEN && args[0].indexOf('""') === -1) {
@@ -375,7 +471,7 @@ export class HTMLParser {
         let j = 1;
         if (handler.customAttrSurround) {
-          for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += ncp) {
+          for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += NCP) {
             name = args[j + 1];
             if (name) {
               quote = populate(j + 2);

package/src/utils.js CHANGED Viewed

@@ -8,4 +8,4 @@ export async function replaceAsync(str, regex, asyncFn) {
   const data = await Promise.all(promises);
   return str.replace(regex, () => data.shift());
-}
+}