npm - html-minifier-next - Versions diffs - 4.6.0 → 4.6.1 - Mend

html-minifier-next 4.6.0 → 4.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md +27 -23
package/dist/htmlminifier.cjs +107 -7
package/dist/htmlminifier.esm.bundle.js +107 -7
package/dist/types/htmlminifier.d.ts.map +1 -1
package/dist/types/htmlparser.d.ts.map +1 -1
package/package.json +1 -1
package/src/htmlminifier.js +6 -2
package/src/htmlparser.js +101 -5
package/src/utils.js +1 -1

package/README.md CHANGED Viewed

@@ -223,29 +223,33 @@ const result = await minify(html, {
 ## Minification comparison
-How does HTML Minifier Next compare to other solutions, like [minimize](https://github.com/Swaagie/minimize), [htmlcompressor.com](http://htmlcompressor.com/), [htmlnano](https://github.com/posthtml/htmlnano), and [minify-html](https://github.com/wilsonzlin/minify-html)? (All with the most aggressive settings, though without [hyper-optimization](https://meiert.com/blog/the-ways-of-writing-html/#toc-hyper-optimized).)
-| Site | Original Size (KB) | HTML Minifier Next | minimize | htmlcompressor.com | htmlnano | minify-html |
-| --- | --- | --- | --- | --- | --- | --- |
-| [A List Apart](https://alistapart.com/) | 62 | **52** | 58 | 56 | 54 | 55 |
-| [Amazon](https://www.amazon.com/) | 822 | **735** | 806 | n/a | n/a | n/a |
-| [Apple](https://www.apple.com/) | 210 | **166** | 195 | 192 | 186 | 191 |
-| [BBC](https://www.bbc.co.uk/) | 698 | **632** | 692 | n/a | 655 | 656 |
-| [CSS-Tricks](https://css-tricks.com/) | 163 | **124** | 149 | 146 | 127 | 145 |
-| [ECMAScript](https://tc39.es/ecma262/) | 7238 | **6342** | 6615 | n/a | 6561 | 6567 |
-| [EFF](https://www.eff.org/) | 54 | **46** | 49 | 49 | 49 | 47 |
-| [FAZ](https://www.faz.net/aktuell/) | 1860 | **1737** | 1775 | n/a | n/a | 1779 |
-| [Frontend Dogma](https://frontenddogma.com/) | 218 | **209** | 235 | 216 | 230 | 217 |
-| [Google](https://www.google.com/) | 18 | **17** | 18 | 18 | **17** | n/a |
-| [Ground News](https://ground.news/) | 1827 | **1585** | 1814 | n/a | 1679 | n/a |
-| [HTML](https://html.spec.whatwg.org/multipage/) | 149 | **147** | 155 | 148 | 153 | 149 |
-| [Leanpub](https://leanpub.com/) | 1161 | **974** | 1155 | n/a | 981 | n/a |
-| [Mastodon](https://mastodon.social/explore) | 35 | **26** | 34 | 34 | 30 | 33 |
-| [MDN](https://developer.mozilla.org/en-US/) | 107 | **62** | 67 | 68 | 64 | n/a |
-| [Middle East Eye](https://www.middleeasteye.net/) | 223 | **196** | 203 | 203 | 203 | 200 |
-| [SitePoint](https://www.sitepoint.com/) | 494 | **353** | 491 | n/a | 429 | 474 |
-| [United Nations](https://www.un.org/en/) | 152 | **113** | 131 | 124 | 122 | 126 |
-| [W3C](https://www.w3.org/) | 50 | **36** | 41 | 39 | 39 | 39 |
+How does HTML Minifier Next compare to other minifiers, like [htmlnano](https://github.com/posthtml/htmlnano), [@swc/html](https://github.com/swc-project/swc), [minify-html](https://github.com/wilsonzlin/minify-html), [minimize](https://github.com/Swaagie/minimize), and [htmlcompressor.com](https://htmlcompressor.com/)? (All with the most aggressive settings, though without [hyper-optimization](https://meiert.com/blog/the-ways-of-writing-html/#toc-hyper-optimized).)
+<!-- Auto-generated benchmarks, don’t edit -->
+| Site | Original Size (KB) | HTML Minifier Next | htmlnano | @swc/html | minify-html | minimize | htmlcompressor.com |
+| --- | --- | --- | --- | --- | --- | --- | --- |
+| [A List Apart](https://alistapart.com/) | 62 | **52** | 54 | 55 | 55 | 58 | 56 |
+| [Apple](https://www.apple.com/) | 190 | **146** | 166 | 169 | 172 | 175 | 172 |
+| [BBC](https://www.bbc.co.uk/) | 673 | **613** | 633 | 633 | 634 | 668 | n/a |
+| [Codeberg](https://codeberg.org/) | 33 | 29 | **27** | 30 | 30 | 30 | 30 |
+| [CSS-Tricks](https://css-tricks.com/) | 165 | **125** | 129 | 146 | 146 | 151 | 148 |
+| [ECMAScript](https://tc39.es/ecma262/) | 7238 | **6341** | 6561 | 6444 | 6567 | 6615 | n/a |
+| [EFF](https://www.eff.org/) | 54 | **46** | 49 | 47 | 47 | 49 | 49 |
+| [FAZ](https://www.faz.net/aktuell/) | 1609 | 1500 | **1431** | 1532 | 1544 | 1555 | n/a |
+| [Frontend Dogma](https://frontenddogma.com/) | 220 | **211** | 232 | 217 | 219 | 237 | 218 |
+| [Google](https://www.google.com/) | 18 | **17** | **17** | **17** | n/a | 18 | 18 |
+| [Ground News](https://ground.news/) | 2358 | **2067** | 2169 | 2199 | n/a | 2345 | n/a |
+| [HTML Living Standard](https://html.spec.whatwg.org/multipage/) | 149 | **147** | 153 | **147** | 149 | 155 | 148 |
+| [Leanpub](https://leanpub.com/) | 1348 | **1142** | 1149 | 1148 | n/a | 1343 | n/a |
+| [Mastodon](https://mastodon.social/explore) | 35 | **26** | 30 | 33 | 33 | 34 | 34 |
+| [MDN](https://developer.mozilla.org/en-US/) | 107 | **62** | 64 | 64 | n/a | 67 | 67 |
+| [Middle East Eye](https://www.middleeasteye.net/) | 224 | **197** | 204 | 202 | 202 | 204 | 205 |
+| [SitePoint](https://www.sitepoint.com/) | 492 | **350** | 426 | 465 | 472 | 488 | n/a |
+| [United Nations](https://www.un.org/en/) | 151 | **113** | 121 | 125 | 125 | 130 | 123 |
+| [W3C](https://www.w3.org/) | 50 | **36** | 38 | 38 | 38 | 40 | 38 |
+(Last updated: Dec 1, 2025)
+<!-- End auto-generated -->
 ## Examples

package/dist/htmlminifier.cjs CHANGED Viewed

@@ -113,6 +113,9 @@ function joinSingleAttrAssigns(handler) {
   }).join('|');
 }
+// Number of captured parts per `customAttrSurround` pattern
+const NCP = 7;
 class HTMLParser {
   constructor(html, handler) {
     this.html = html;
@@ -125,7 +128,15 @@ class HTMLParser {
     const stack = []; let lastTag;
     const attribute = attrForHandler(handler);
-    let last, prevTag, nextTag;
+    let last, prevTag = undefined, nextTag = undefined;
+    // Track position for better error messages
+    let position = 0;
+    const getLineColumn = (pos) => {
+      const lines = this.html.slice(0, pos).split('\n');
+      return { line: lines.length, column: lines[lines.length - 1].length + 1 };
+    };
     while (html) {
       last = html;
       // Make sure we’re not in a `script` or `style` element
@@ -243,8 +254,27 @@ class HTMLParser {
       }
       if (html === last) {
-        throw new Error('Parse Error: ' + html);
+        if (handler.continueOnParseError) {
+          // Skip the problematic character and continue
+          if (handler.chars) {
+            await handler.chars(html[0], prevTag, '');
+          }
+          html = html.substring(1);
+          position++;
+          prevTag = '';
+          continue;
+        }
+        const loc = getLineColumn(position);
+        // Include some context before the error position so the snippet contains
+        // the offending markup plus preceding characters (e.g. "invalid<tag").
+        const CONTEXT_BEFORE = 50;
+        const startPos = Math.max(0, position - CONTEXT_BEFORE);
+        const snippet = this.html.slice(startPos, startPos + 200).replace(/\n/g, ' ');
+        throw new Error(
+          `Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${this.html.length > startPos + 200 ? '…' : ''}`
+        );
       }
+      position = this.html.length - html.length;
     }
     if (!handler.partialMarkup) {
@@ -261,10 +291,77 @@ class HTMLParser {
         };
         input = input.slice(start[0].length);
         let end, attr;
-        while (!(end = input.match(startTagClose)) && (attr = input.match(attribute))) {
+        // Safety limit: max length of input to check for attributes
+        // Protects against catastrophic backtracking on massive attribute values
+        const MAX_ATTR_PARSE_LENGTH = 20000; // 20 KB should be enough for any reasonable tag
+        while (true) {
+          // Check for closing tag first
+          end = input.match(startTagClose);
+          if (end) {
+            break;
+          }
+          // Limit the input length we pass to the regex to prevent catastrophic backtracking
+          const isLimited = input.length > MAX_ATTR_PARSE_LENGTH;
+          const searchInput = isLimited ? input.slice(0, MAX_ATTR_PARSE_LENGTH) : input;
+          attr = searchInput.match(attribute);
+          // If we limited the input and got a match, check if the value might be truncated
+          if (attr && isLimited) {
+            // Check if the attribute value extends beyond our search window
+            const attrEnd = attr[0].length;
+            // If the match ends near the limit, the value might be truncated
+            if (attrEnd > MAX_ATTR_PARSE_LENGTH - 100) {
+              // Manually extract this attribute to handle potentially huge value
+              const manualMatch = input.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
+              if (manualMatch) {
+                const quoteChar = input[manualMatch[0].length];
+                if (quoteChar === '"' || quoteChar === "'") {
+                  const closeQuote = input.indexOf(quoteChar, manualMatch[0].length + 1);
+                  if (closeQuote !== -1) {
+                    const fullAttr = input.slice(0, closeQuote + 1);
+                    const numCustomParts = handler.customAttrSurround
+                      ? handler.customAttrSurround.length * NCP
+                      : 0;
+                    const baseIndex = 1 + numCustomParts;
+                    attr = [];
+                    attr[0] = fullAttr;
+                    attr[baseIndex] = manualMatch[1]; // Attribute name
+                    attr[baseIndex + 1] = '='; // customAssign (falls back to “=” for huge attributes)
+                    const value = input.slice(manualMatch[0].length + 1, closeQuote);
+                    // Place value at correct index based on quote type
+                    if (quoteChar === '"') {
+                      attr[baseIndex + 2] = value; // Double-quoted value
+                    } else {
+                      attr[baseIndex + 3] = value; // Single-quoted value
+                    }
+                    input = input.slice(fullAttr.length);
+                    match.attrs.push(attr);
+                    continue;
+                  }
+                }
+                // Note: Unquoted attribute values are intentionally not handled here.
+                // Per HTML spec, unquoted values cannot contain spaces or special chars,
+                // making a 20 KB+ unquoted value practically impossible. If encountered,
+                // it’s malformed HTML and using the truncated regex match is acceptable.
+              }
+            }
+          }
+          if (!attr) {
+            break;
+          }
           input = input.slice(attr[0].length);
           match.attrs.push(attr);
         }
+        // Check for closing tag
+        end = input.match(startTagClose);
         if (end) {
           match.unarySlash = end[1];
           match.rest = input.slice(end[0].length);
@@ -357,7 +454,6 @@ class HTMLParser {
       const attrs = match.attrs.map(function (args) {
         let name, value, customOpen, customClose, customAssign, quote;
-        const ncp = 7; // Number of captured parts, scalar
         // Hackish workaround for FF bug https://bugzilla.mozilla.org/show_bug.cgi?id=369778
         if (IS_REGEX_CAPTURING_BROKEN && args[0].indexOf('""') === -1) {
@@ -385,7 +481,7 @@ class HTMLParser {
         let j = 1;
         if (handler.customAttrSurround) {
-          for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += ncp) {
+          for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += NCP) {
             name = args[j + 1];
             if (name) {
               quote = populate(j + 2);
@@ -1548,8 +1644,11 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
         currentTag = '';
       },
       chars: async function (text) {
+        // Only recursively scan HTML content, not JSON-LD or other non-HTML script types
+        // `scan()` is for analyzing HTML attribute order, not for parsing JSON
         if (options.processScripts && specialContentTags.has(currentTag) &&
-          options.processScripts.indexOf(currentType) > -1) {
+          options.processScripts.indexOf(currentType) > -1 &&
+          currentType === 'text/html') {
           await scan(text);
         }
       }
@@ -1562,7 +1661,8 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
   options.log = identity;
   options.sortAttributes = false;
   options.sortClassName = false;
-  await scan(await minifyHTML(value, options));
+  const firstPassOutput = await minifyHTML(value, options);
+  await scan(firstPassOutput);
   options.log = log;
   if (attrChains) {
     const attrSorters = Object.create(null);

package/dist/htmlminifier.esm.bundle.js CHANGED Viewed

@@ -39166,6 +39166,9 @@ function joinSingleAttrAssigns(handler) {
   }).join('|');
 }
+// Number of captured parts per `customAttrSurround` pattern
+const NCP = 7;
 class HTMLParser {
   constructor(html, handler) {
     this.html = html;
@@ -39178,7 +39181,15 @@ class HTMLParser {
     const stack = []; let lastTag;
     const attribute = attrForHandler(handler);
-    let last, prevTag, nextTag;
+    let last, prevTag = undefined, nextTag = undefined;
+    // Track position for better error messages
+    let position = 0;
+    const getLineColumn = (pos) => {
+      const lines = this.html.slice(0, pos).split('\n');
+      return { line: lines.length, column: lines[lines.length - 1].length + 1 };
+    };
     while (html) {
       last = html;
       // Make sure we’re not in a `script` or `style` element
@@ -39296,8 +39307,27 @@ class HTMLParser {
       }
       if (html === last) {
-        throw new Error('Parse Error: ' + html);
+        if (handler.continueOnParseError) {
+          // Skip the problematic character and continue
+          if (handler.chars) {
+            await handler.chars(html[0], prevTag, '');
+          }
+          html = html.substring(1);
+          position++;
+          prevTag = '';
+          continue;
+        }
+        const loc = getLineColumn(position);
+        // Include some context before the error position so the snippet contains
+        // the offending markup plus preceding characters (e.g. "invalid<tag").
+        const CONTEXT_BEFORE = 50;
+        const startPos = Math.max(0, position - CONTEXT_BEFORE);
+        const snippet = this.html.slice(startPos, startPos + 200).replace(/\n/g, ' ');
+        throw new Error(
+          `Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${this.html.length > startPos + 200 ? '…' : ''}`
+        );
       }
+      position = this.html.length - html.length;
     }
     if (!handler.partialMarkup) {
@@ -39314,10 +39344,77 @@ class HTMLParser {
         };
         input = input.slice(start[0].length);
         let end, attr;
-        while (!(end = input.match(startTagClose)) && (attr = input.match(attribute))) {
+        // Safety limit: max length of input to check for attributes
+        // Protects against catastrophic backtracking on massive attribute values
+        const MAX_ATTR_PARSE_LENGTH = 20000; // 20 KB should be enough for any reasonable tag
+        while (true) {
+          // Check for closing tag first
+          end = input.match(startTagClose);
+          if (end) {
+            break;
+          }
+          // Limit the input length we pass to the regex to prevent catastrophic backtracking
+          const isLimited = input.length > MAX_ATTR_PARSE_LENGTH;
+          const searchInput = isLimited ? input.slice(0, MAX_ATTR_PARSE_LENGTH) : input;
+          attr = searchInput.match(attribute);
+          // If we limited the input and got a match, check if the value might be truncated
+          if (attr && isLimited) {
+            // Check if the attribute value extends beyond our search window
+            const attrEnd = attr[0].length;
+            // If the match ends near the limit, the value might be truncated
+            if (attrEnd > MAX_ATTR_PARSE_LENGTH - 100) {
+              // Manually extract this attribute to handle potentially huge value
+              const manualMatch = input.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
+              if (manualMatch) {
+                const quoteChar = input[manualMatch[0].length];
+                if (quoteChar === '"' || quoteChar === "'") {
+                  const closeQuote = input.indexOf(quoteChar, manualMatch[0].length + 1);
+                  if (closeQuote !== -1) {
+                    const fullAttr = input.slice(0, closeQuote + 1);
+                    const numCustomParts = handler.customAttrSurround
+                      ? handler.customAttrSurround.length * NCP
+                      : 0;
+                    const baseIndex = 1 + numCustomParts;
+                    attr = [];
+                    attr[0] = fullAttr;
+                    attr[baseIndex] = manualMatch[1]; // Attribute name
+                    attr[baseIndex + 1] = '='; // customAssign (falls back to “=” for huge attributes)
+                    const value = input.slice(manualMatch[0].length + 1, closeQuote);
+                    // Place value at correct index based on quote type
+                    if (quoteChar === '"') {
+                      attr[baseIndex + 2] = value; // Double-quoted value
+                    } else {
+                      attr[baseIndex + 3] = value; // Single-quoted value
+                    }
+                    input = input.slice(fullAttr.length);
+                    match.attrs.push(attr);
+                    continue;
+                  }
+                }
+                // Note: Unquoted attribute values are intentionally not handled here.
+                // Per HTML spec, unquoted values cannot contain spaces or special chars,
+                // making a 20 KB+ unquoted value practically impossible. If encountered,
+                // it’s malformed HTML and using the truncated regex match is acceptable.
+              }
+            }
+          }
+          if (!attr) {
+            break;
+          }
           input = input.slice(attr[0].length);
           match.attrs.push(attr);
         }
+        // Check for closing tag
+        end = input.match(startTagClose);
         if (end) {
           match.unarySlash = end[1];
           match.rest = input.slice(end[0].length);
@@ -39410,7 +39507,6 @@ class HTMLParser {
       const attrs = match.attrs.map(function (args) {
         let name, value, customOpen, customClose, customAssign, quote;
-        const ncp = 7; // Number of captured parts, scalar
         // Hackish workaround for FF bug https://bugzilla.mozilla.org/show_bug.cgi?id=369778
         if (IS_REGEX_CAPTURING_BROKEN && args[0].indexOf('""') === -1) {
@@ -39438,7 +39534,7 @@ class HTMLParser {
         let j = 1;
         if (handler.customAttrSurround) {
-          for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += ncp) {
+          for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += NCP) {
             name = args[j + 1];
             if (name) {
               quote = populate(j + 2);
@@ -40601,8 +40697,11 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
         currentTag = '';
       },
       chars: async function (text) {
+        // Only recursively scan HTML content, not JSON-LD or other non-HTML script types
+        // `scan()` is for analyzing HTML attribute order, not for parsing JSON
         if (options.processScripts && specialContentTags.has(currentTag) &&
-          options.processScripts.indexOf(currentType) > -1) {
+          options.processScripts.indexOf(currentType) > -1 &&
+          currentType === 'text/html') {
           await scan(text);
         }
       }
@@ -40615,7 +40714,8 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
   options.log = identity;
   options.sortAttributes = false;
   options.sortClassName = false;
-  await scan(await minifyHTML(value, options));
+  const firstPassOutput = await minifyHTML(value, options);
+  await scan(firstPassOutput);
   options.log = log;
   if (attrChains) {
     const attrSorters = Object.create(null);

package/dist/types/htmlminifier.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"htmlminifier.d.ts","sourceRoot":"","sources":["../../src/htmlminifier.js"],"names":[],"mappings":"~~AAm~~/CO,8BAJI,MAAM,YACN,eAAe,GACb,OAAO,CAAC,MAAM,CAAC,CAQ3B;;;;;;;;;;;;UAUS,MAAM;YACN,MAAM;YACN,MAAM;mBACN,MAAM;iBACN,MAAM;kBACN,MAAM;;;;;;;;;;;;;4BAQN,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE,EAAE,qBAAqB,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;wBAMjG,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,EAAE,KAAK,EAAE,aAAa,EAAE,GAAG,SAAS,EAAE,iBAAiB,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;;oBAMhH,OAAO;;;;;;;;gCAOP,OAAO;;;;;;;;kCAOP,OAAO;;;;;;;;yBAOP,OAAO;;;;;;;;2BAOP,OAAO;;;;;;;;4BAOP,OAAO;;;;;;;2BAOP,OAAO;;;;;;;;uBAMP,MAAM,EAAE;;;;;;yBAOR,MAAM;;;;;;yBAKN,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE;;;;;;;4BAKlB,MAAM,EAAE;;;;;;;oCAMR,MAAM;;;;;;;qBAMN,OAAO;;;;;;;YAMP,OAAO;;;;;;;;2BAMP,MAAM,EAAE;;;;;;;;;4BAOR,MAAM,EAAE;;;;;;;+BAQR,OAAO;;;;;;;2BAMP,SAAS,CAAC,MAAM,CAAC;;;;;;uBAMjB,OAAO;;;;;;;;UAKP,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI;;;;;;;;qBAO1B,MAAM;;;;;;;oBAON,MAAM;;;;;;;;;;gBAMN,OAAO,GAAG,OAAO,CAAC,OAAO,cAAc,EAAE,gBAAgB,CAAC,OAAO,cAAc,EAAE,aAAa,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;;;eAS9J,OAAO,GAAG,OAAO,QAAQ,EAAE,aAAa,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,OAAO,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;;;iBASzG,OAAO,GAAG,MAAM,GAAG,OAAO,WAAW,EAAE,OAAO,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;WAS7F,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM;;;;;;;+BAOxB,OAAO;;;;;;;;;;oBAMP,OAAO;;;;;;;;yBASP,OAAO;;;;;;;gCAOP,OAAO;;;;;;;;iCAMP,OAAO;;;;;;;;;;qBAOP,MAAM,EAAE;;;;;;;qBASR,IAAI,GAAG,GAAG;;;;;;;4BAMV,OAAO;;;;;;;;qBAMP,OAAO;;;;;;;;;4BAOP,OAAO,GAAG,CAAC,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC;;;;;;;;0BAQtD,OAAO;;;;;;;;yBAOP,OAAO;;;;;;;;gCAOP,OAAO;;;;;;;iCAOP,OAAO;;;;;;;oCAMP,OAAO;;;;;;;;;;0BAMP,OAAO;;;;;;;;;qBASP,OAAO,GAAG,CAAC,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE,KAAK,IAAI,CAAC;;;;;;;;;oBAQzD,OAAO,GAAG,CAAC,CAAC,KAAK,EAAE,MAAM,KAAK,MAAM,CAAC;;;;;;;;0BAQrC,OAAO;;;;;;;sBAOP,OAAO;;~~wBA50DkC~~,cAAc;0BAAd,cAAc;+BAAd,cAAc"}
1	+ {"version":3,"file":"htmlminifier.d.ts","sourceRoot":"","sources":["../../src/htmlminifier.js"],"names":[],"mappings":"AAu/CO,8BAJI,MAAM,YACN,eAAe,GACb,OAAO,CAAC,MAAM,CAAC,CAQ3B;;;;;;;;;;;;UAUS,MAAM;YACN,MAAM;YACN,MAAM;mBACN,MAAM;iBACN,MAAM;kBACN,MAAM;;;;;;;;;;;;;4BAQN,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE,EAAE,qBAAqB,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;wBAMjG,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,EAAE,KAAK,EAAE,aAAa,EAAE,GAAG,SAAS,EAAE,iBAAiB,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;;oBAMhH,OAAO;;;;;;;;gCAOP,OAAO;;;;;;;;kCAOP,OAAO;;;;;;;;yBAOP,OAAO;;;;;;;;2BAOP,OAAO;;;;;;;;4BAOP,OAAO;;;;;;;2BAOP,OAAO;;;;;;;;uBAMP,MAAM,EAAE;;;;;;yBAOR,MAAM;;;;;;yBAKN,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE;;;;;;;4BAKlB,MAAM,EAAE;;;;;;;oCAMR,MAAM;;;;;;;qBAMN,OAAO;;;;;;;YAMP,OAAO;;;;;;;;2BAMP,MAAM,EAAE;;;;;;;;;4BAOR,MAAM,EAAE;;;;;;;+BAQR,OAAO;;;;;;;2BAMP,SAAS,CAAC,MAAM,CAAC;;;;;;uBAMjB,OAAO;;;;;;;;UAKP,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI;;;;;;;;qBAO1B,MAAM;;;;;;;oBAON,MAAM;;;;;;;;;;gBAMN,OAAO,GAAG,OAAO,CAAC,OAAO,cAAc,EAAE,gBAAgB,CAAC,OAAO,cAAc,EAAE,aAAa,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;;;eAS9J,OAAO,GAAG,OAAO,QAAQ,EAAE,aAAa,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,OAAO,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;;;iBASzG,OAAO,GAAG,MAAM,GAAG,OAAO,WAAW,EAAE,OAAO,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;WAS7F,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM;;;;;;;+BAOxB,OAAO;;;;;;;;;;oBAMP,OAAO;;;;;;;;yBASP,OAAO;;;;;;;gCAOP,OAAO;;;;;;;;iCAMP,OAAO;;;;;;;;;;qBAOP,MAAM,EAAE;;;;;;;qBASR,IAAI,GAAG,GAAG;;;;;;;4BAMV,OAAO;;;;;;;;qBAMP,OAAO;;;;;;;;;4BAOP,OAAO,GAAG,CAAC,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC;;;;;;;;0BAQtD,OAAO;;;;;;;;yBAOP,OAAO;;;;;;;;gCAOP,OAAO;;;;;;;iCAOP,OAAO;;;;;;;oCAMP,OAAO;;;;;;;;;;0BAMP,OAAO;;;;;;;;;qBASP,OAAO,GAAG,CAAC,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE,KAAK,IAAI,CAAC;;;;;;;;;oBAQzD,OAAO,GAAG,CAAC,CAAC,KAAK,EAAE,MAAM,KAAK,MAAM,CAAC;;;;;;;;0BAQrC,OAAO;;;;;;;sBAOP,OAAO;;wBAh1DkC,cAAc;0BAAd,cAAc;+BAAd,cAAc"}

package/dist/types/htmlparser.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"htmlparser.d.ts","sourceRoot":"","sources":["../../src/htmlparser.js"],"names":[],"mappings":"AAgDA,4BAAoE;~~AAyDpE~~;IACE,qCAGC;IAFC,UAAgB;IAChB,aAAsB;IAGxB,~~uBAgWC~~;CACF"}
1	+ {"version":3,"file":"htmlparser.d.ts","sourceRoot":"","sources":["../../src/htmlparser.js"],"names":[],"mappings":"AAgDA,4BAAoE;AA4DpE;IACE,qCAGC;IAFC,UAAgB;IAChB,aAAsB;IAGxB,uBA6bC;CACF"}

package/package.json CHANGED Viewed

@@ -84,5 +84,5 @@
     "test:watch": "node --test --watch tests/*.spec.js"
   },
   "type": "module",
-  "version": "4.6.0"
+  "version": "4.6.1"
 }

package/src/htmlminifier.js CHANGED Viewed

@@ -947,8 +947,11 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
         currentTag = '';
       },
       chars: async function (text) {
+        // Only recursively scan HTML content, not JSON-LD or other non-HTML script types
+        // `scan()` is for analyzing HTML attribute order, not for parsing JSON
         if (options.processScripts && specialContentTags.has(currentTag) &&
-          options.processScripts.indexOf(currentType) > -1) {
+          options.processScripts.indexOf(currentType) > -1 &&
+          currentType === 'text/html') {
           await scan(text);
         }
       }
@@ -961,7 +964,8 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
   options.log = identity;
   options.sortAttributes = false;
   options.sortClassName = false;
-  await scan(await minifyHTML(value, options));
+  const firstPassOutput = await minifyHTML(value, options);
+  await scan(firstPassOutput);
   options.log = log;
   if (attrChains) {
     const attrSorters = Object.create(null);

package/src/htmlparser.js CHANGED Viewed

@@ -103,6 +103,9 @@ function joinSingleAttrAssigns(handler) {
   }).join('|');
 }
+// Number of captured parts per `customAttrSurround` pattern
+const NCP = 7;
 export class HTMLParser {
   constructor(html, handler) {
     this.html = html;
@@ -115,7 +118,15 @@ export class HTMLParser {
     const stack = []; let lastTag;
     const attribute = attrForHandler(handler);
-    let last, prevTag, nextTag;
+    let last, prevTag = undefined, nextTag = undefined;
+    // Track position for better error messages
+    let position = 0;
+    const getLineColumn = (pos) => {
+      const lines = this.html.slice(0, pos).split('\n');
+      return { line: lines.length, column: lines[lines.length - 1].length + 1 };
+    };
     while (html) {
       last = html;
       // Make sure we’re not in a `script` or `style` element
@@ -233,8 +244,27 @@ export class HTMLParser {
       }
       if (html === last) {
-        throw new Error('Parse Error: ' + html);
+        if (handler.continueOnParseError) {
+          // Skip the problematic character and continue
+          if (handler.chars) {
+            await handler.chars(html[0], prevTag, '');
+          }
+          html = html.substring(1);
+          position++;
+          prevTag = '';
+          continue;
+        }
+        const loc = getLineColumn(position);
+        // Include some context before the error position so the snippet contains
+        // the offending markup plus preceding characters (e.g. "invalid<tag").
+        const CONTEXT_BEFORE = 50;
+        const startPos = Math.max(0, position - CONTEXT_BEFORE);
+        const snippet = this.html.slice(startPos, startPos + 200).replace(/\n/g, ' ');
+        throw new Error(
+          `Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${this.html.length > startPos + 200 ? '…' : ''}`
+        );
       }
+      position = this.html.length - html.length;
     }
     if (!handler.partialMarkup) {
@@ -251,10 +281,77 @@ export class HTMLParser {
         };
         input = input.slice(start[0].length);
         let end, attr;
-        while (!(end = input.match(startTagClose)) && (attr = input.match(attribute))) {
+        // Safety limit: max length of input to check for attributes
+        // Protects against catastrophic backtracking on massive attribute values
+        const MAX_ATTR_PARSE_LENGTH = 20000; // 20 KB should be enough for any reasonable tag
+        while (true) {
+          // Check for closing tag first
+          end = input.match(startTagClose);
+          if (end) {
+            break;
+          }
+          // Limit the input length we pass to the regex to prevent catastrophic backtracking
+          const isLimited = input.length > MAX_ATTR_PARSE_LENGTH;
+          const searchInput = isLimited ? input.slice(0, MAX_ATTR_PARSE_LENGTH) : input;
+          attr = searchInput.match(attribute);
+          // If we limited the input and got a match, check if the value might be truncated
+          if (attr && isLimited) {
+            // Check if the attribute value extends beyond our search window
+            const attrEnd = attr[0].length;
+            // If the match ends near the limit, the value might be truncated
+            if (attrEnd > MAX_ATTR_PARSE_LENGTH - 100) {
+              // Manually extract this attribute to handle potentially huge value
+              const manualMatch = input.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
+              if (manualMatch) {
+                const quoteChar = input[manualMatch[0].length];
+                if (quoteChar === '"' || quoteChar === "'") {
+                  const closeQuote = input.indexOf(quoteChar, manualMatch[0].length + 1);
+                  if (closeQuote !== -1) {
+                    const fullAttr = input.slice(0, closeQuote + 1);
+                    const numCustomParts = handler.customAttrSurround
+                      ? handler.customAttrSurround.length * NCP
+                      : 0;
+                    const baseIndex = 1 + numCustomParts;
+                    attr = [];
+                    attr[0] = fullAttr;
+                    attr[baseIndex] = manualMatch[1]; // Attribute name
+                    attr[baseIndex + 1] = '='; // customAssign (falls back to “=” for huge attributes)
+                    const value = input.slice(manualMatch[0].length + 1, closeQuote);
+                    // Place value at correct index based on quote type
+                    if (quoteChar === '"') {
+                      attr[baseIndex + 2] = value; // Double-quoted value
+                    } else {
+                      attr[baseIndex + 3] = value; // Single-quoted value
+                    }
+                    input = input.slice(fullAttr.length);
+                    match.attrs.push(attr);
+                    continue;
+                  }
+                }
+                // Note: Unquoted attribute values are intentionally not handled here.
+                // Per HTML spec, unquoted values cannot contain spaces or special chars,
+                // making a 20 KB+ unquoted value practically impossible. If encountered,
+                // it’s malformed HTML and using the truncated regex match is acceptable.
+              }
+            }
+          }
+          if (!attr) {
+            break;
+          }
           input = input.slice(attr[0].length);
           match.attrs.push(attr);
         }
+        // Check for closing tag
+        end = input.match(startTagClose);
         if (end) {
           match.unarySlash = end[1];
           match.rest = input.slice(end[0].length);
@@ -347,7 +444,6 @@ export class HTMLParser {
       const attrs = match.attrs.map(function (args) {
         let name, value, customOpen, customClose, customAssign, quote;
-        const ncp = 7; // Number of captured parts, scalar
         // Hackish workaround for FF bug https://bugzilla.mozilla.org/show_bug.cgi?id=369778
         if (IS_REGEX_CAPTURING_BROKEN && args[0].indexOf('""') === -1) {
@@ -375,7 +471,7 @@ export class HTMLParser {
         let j = 1;
         if (handler.customAttrSurround) {
-          for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += ncp) {
+          for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += NCP) {
             name = args[j + 1];
             if (name) {
               quote = populate(j + 2);

package/src/utils.js CHANGED Viewed

@@ -8,4 +8,4 @@ export async function replaceAsync(str, regex, asyncFn) {
   const data = await Promise.all(promises);
   return str.replace(regex, () => data.shift());
-}
+}