html-minifier-next 4.6.0 → 4.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -223,29 +223,33 @@ const result = await minify(html, {
223
223
 
224
224
  ## Minification comparison
225
225
 
226
- How does HTML Minifier Next compare to other solutions, like [minimize](https://github.com/Swaagie/minimize), [htmlcompressor.com](http://htmlcompressor.com/), [htmlnano](https://github.com/posthtml/htmlnano), and [minify-html](https://github.com/wilsonzlin/minify-html)? (All with the most aggressive settings, though without [hyper-optimization](https://meiert.com/blog/the-ways-of-writing-html/#toc-hyper-optimized).)
227
-
228
- | Site | Original Size (KB) | HTML Minifier Next | minimize | html­compressor.com | htmlnano | minify-html |
229
- | --- | --- | --- | --- | --- | --- | --- |
230
- | [A List Apart](https://alistapart.com/) | 62 | **52** | 58 | 56 | 54 | 55 |
231
- | [Amazon](https://www.amazon.com/) | 822 | **735** | 806 | n/a | n/a | n/a |
232
- | [Apple](https://www.apple.com/) | 210 | **166** | 195 | 192 | 186 | 191 |
233
- | [BBC](https://www.bbc.co.uk/) | 698 | **632** | 692 | n/a | 655 | 656 |
234
- | [CSS-Tricks](https://css-tricks.com/) | 163 | **124** | 149 | 146 | 127 | 145 |
235
- | [ECMAScript](https://tc39.es/ecma262/) | 7238 | **6342** | 6615 | n/a | 6561 | 6567 |
236
- | [EFF](https://www.eff.org/) | 54 | **46** | 49 | 49 | 49 | 47 |
237
- | [FAZ](https://www.faz.net/aktuell/) | 1860 | **1737** | 1775 | n/a | n/a | 1779 |
238
- | [Frontend Dogma](https://frontenddogma.com/) | 218 | **209** | 235 | 216 | 230 | 217 |
239
- | [Google](https://www.google.com/) | 18 | **17** | 18 | 18 | **17** | n/a |
240
- | [Ground News](https://ground.news/) | 1827 | **1585** | 1814 | n/a | 1679 | n/a |
241
- | [HTML](https://html.spec.whatwg.org/multipage/) | 149 | **147** | 155 | 148 | 153 | 149 |
242
- | [Leanpub](https://leanpub.com/) | 1161 | **974** | 1155 | n/a | 981 | n/a |
243
- | [Mastodon](https://mastodon.social/explore) | 35 | **26** | 34 | 34 | 30 | 33 |
244
- | [MDN](https://developer.mozilla.org/en-US/) | 107 | **62** | 67 | 68 | 64 | n/a |
245
- | [Middle East Eye](https://www.middleeasteye.net/) | 223 | **196** | 203 | 203 | 203 | 200 |
246
- | [SitePoint](https://www.sitepoint.com/) | 494 | **353** | 491 | n/a | 429 | 474 |
247
- | [United Nations](https://www.un.org/en/) | 152 | **113** | 131 | 124 | 122 | 126 |
248
- | [W3C](https://www.w3.org/) | 50 | **36** | 41 | 39 | 39 | 39 |
226
+ How does HTML Minifier Next compare to other minifiers, like [htmlnano](https://github.com/posthtml/htmlnano), [@swc/html](https://github.com/swc-project/swc), [minify-html](https://github.com/wilsonzlin/minify-html), [minimize](https://github.com/Swaagie/minimize), and [htmlcompressor.com](https://htmlcompressor.com/)? (All with the most aggressive settings, though without [hyper-optimization](https://meiert.com/blog/the-ways-of-writing-html/#toc-hyper-optimized).)
227
+
228
+ <!-- Auto-generated benchmarks, don’t edit -->
229
+ | Site | Original Size (KB) | HTML Minifier Next | htmlnano | @swc/html | minify-html | minimize | html­com­pressor.­com |
230
+ | --- | --- | --- | --- | --- | --- | --- | --- |
231
+ | [A List Apart](https://alistapart.com/) | 62 | **52** | 54 | 55 | 55 | 58 | 56 |
232
+ | [Apple](https://www.apple.com/) | 190 | **146** | 166 | 169 | 172 | 175 | 172 |
233
+ | [BBC](https://www.bbc.co.uk/) | 673 | **613** | 633 | 633 | 634 | 668 | n/a |
234
+ | [Codeberg](https://codeberg.org/) | 33 | 29 | **27** | 30 | 30 | 30 | 30 |
235
+ | [CSS-Tricks](https://css-tricks.com/) | 165 | **125** | 129 | 146 | 146 | 151 | 148 |
236
+ | [ECMAScript](https://tc39.es/ecma262/) | 7238 | **6341** | 6561 | 6444 | 6567 | 6615 | n/a |
237
+ | [EFF](https://www.eff.org/) | 54 | **46** | 49 | 47 | 47 | 49 | 49 |
238
+ | [FAZ](https://www.faz.net/aktuell/) | 1609 | 1500 | **1431** | 1532 | 1544 | 1555 | n/a |
239
+ | [Frontend Dogma](https://frontenddogma.com/) | 220 | **211** | 232 | 217 | 219 | 237 | 218 |
240
+ | [Google](https://www.google.com/) | 18 | **17** | **17** | **17** | n/a | 18 | 18 |
241
+ | [Ground News](https://ground.news/) | 2358 | **2067** | 2169 | 2199 | n/a | 2345 | n/a |
242
+ | [HTML Living Standard](https://html.spec.whatwg.org/multipage/) | 149 | **147** | 153 | **147** | 149 | 155 | 148 |
243
+ | [Leanpub](https://leanpub.com/) | 1348 | **1142** | 1149 | 1148 | n/a | 1343 | n/a |
244
+ | [Mastodon](https://mastodon.social/explore) | 35 | **26** | 30 | 33 | 33 | 34 | 34 |
245
+ | [MDN](https://developer.mozilla.org/en-US/) | 107 | **62** | 64 | 64 | n/a | 67 | 67 |
246
+ | [Middle East Eye](https://www.middleeasteye.net/) | 224 | **197** | 204 | 202 | 202 | 204 | 205 |
247
+ | [SitePoint](https://www.sitepoint.com/) | 492 | **350** | 426 | 465 | 472 | 488 | n/a |
248
+ | [United Nations](https://www.un.org/en/) | 151 | **113** | 121 | 125 | 125 | 130 | 123 |
249
+ | [W3C](https://www.w3.org/) | 50 | **36** | 38 | 38 | 38 | 40 | 38 |
250
+
251
+ (Last updated: Dec 1, 2025)
252
+ <!-- End auto-generated -->
249
253
 
250
254
  ## Examples
251
255
 
@@ -113,6 +113,9 @@ function joinSingleAttrAssigns(handler) {
113
113
  }).join('|');
114
114
  }
115
115
 
116
+ // Number of captured parts per `customAttrSurround` pattern
117
+ const NCP = 7;
118
+
116
119
  class HTMLParser {
117
120
  constructor(html, handler) {
118
121
  this.html = html;
@@ -125,7 +128,15 @@ class HTMLParser {
125
128
 
126
129
  const stack = []; let lastTag;
127
130
  const attribute = attrForHandler(handler);
128
- let last, prevTag, nextTag;
131
+ let last, prevTag = undefined, nextTag = undefined;
132
+
133
+ // Track position for better error messages
134
+ let position = 0;
135
+ const getLineColumn = (pos) => {
136
+ const lines = this.html.slice(0, pos).split('\n');
137
+ return { line: lines.length, column: lines[lines.length - 1].length + 1 };
138
+ };
139
+
129
140
  while (html) {
130
141
  last = html;
131
142
  // Make sure we’re not in a `script` or `style` element
@@ -243,8 +254,27 @@ class HTMLParser {
243
254
  }
244
255
 
245
256
  if (html === last) {
246
- throw new Error('Parse Error: ' + html);
257
+ if (handler.continueOnParseError) {
258
+ // Skip the problematic character and continue
259
+ if (handler.chars) {
260
+ await handler.chars(html[0], prevTag, '');
261
+ }
262
+ html = html.substring(1);
263
+ position++;
264
+ prevTag = '';
265
+ continue;
266
+ }
267
+ const loc = getLineColumn(position);
268
+ // Include some context before the error position so the snippet contains
269
+ // the offending markup plus preceding characters (e.g. "invalid<tag").
270
+ const CONTEXT_BEFORE = 50;
271
+ const startPos = Math.max(0, position - CONTEXT_BEFORE);
272
+ const snippet = this.html.slice(startPos, startPos + 200).replace(/\n/g, ' ');
273
+ throw new Error(
274
+ `Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${this.html.length > startPos + 200 ? '…' : ''}`
275
+ );
247
276
  }
277
+ position = this.html.length - html.length;
248
278
  }
249
279
 
250
280
  if (!handler.partialMarkup) {
@@ -261,10 +291,77 @@ class HTMLParser {
261
291
  };
262
292
  input = input.slice(start[0].length);
263
293
  let end, attr;
264
- while (!(end = input.match(startTagClose)) && (attr = input.match(attribute))) {
294
+
295
+ // Safety limit: max length of input to check for attributes
296
+ // Protects against catastrophic backtracking on massive attribute values
297
+ const MAX_ATTR_PARSE_LENGTH = 20000; // 20 KB should be enough for any reasonable tag
298
+
299
+ while (true) {
300
+ // Check for closing tag first
301
+ end = input.match(startTagClose);
302
+ if (end) {
303
+ break;
304
+ }
305
+
306
+ // Limit the input length we pass to the regex to prevent catastrophic backtracking
307
+ const isLimited = input.length > MAX_ATTR_PARSE_LENGTH;
308
+ const searchInput = isLimited ? input.slice(0, MAX_ATTR_PARSE_LENGTH) : input;
309
+
310
+ attr = searchInput.match(attribute);
311
+
312
+ // If we limited the input and got a match, check if the value might be truncated
313
+ if (attr && isLimited) {
314
+ // Check if the attribute value extends beyond our search window
315
+ const attrEnd = attr[0].length;
316
+ // If the match ends near the limit, the value might be truncated
317
+ if (attrEnd > MAX_ATTR_PARSE_LENGTH - 100) {
318
+ // Manually extract this attribute to handle potentially huge value
319
+ const manualMatch = input.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
320
+ if (manualMatch) {
321
+ const quoteChar = input[manualMatch[0].length];
322
+ if (quoteChar === '"' || quoteChar === "'") {
323
+ const closeQuote = input.indexOf(quoteChar, manualMatch[0].length + 1);
324
+ if (closeQuote !== -1) {
325
+ const fullAttr = input.slice(0, closeQuote + 1);
326
+ const numCustomParts = handler.customAttrSurround
327
+ ? handler.customAttrSurround.length * NCP
328
+ : 0;
329
+ const baseIndex = 1 + numCustomParts;
330
+
331
+ attr = [];
332
+ attr[0] = fullAttr;
333
+ attr[baseIndex] = manualMatch[1]; // Attribute name
334
+ attr[baseIndex + 1] = '='; // customAssign (falls back to “=” for huge attributes)
335
+ const value = input.slice(manualMatch[0].length + 1, closeQuote);
336
+ // Place value at correct index based on quote type
337
+ if (quoteChar === '"') {
338
+ attr[baseIndex + 2] = value; // Double-quoted value
339
+ } else {
340
+ attr[baseIndex + 3] = value; // Single-quoted value
341
+ }
342
+ input = input.slice(fullAttr.length);
343
+ match.attrs.push(attr);
344
+ continue;
345
+ }
346
+ }
347
+ // Note: Unquoted attribute values are intentionally not handled here.
348
+ // Per HTML spec, unquoted values cannot contain spaces or special chars,
349
+ // making a 20 KB+ unquoted value practically impossible. If encountered,
350
+ // it’s malformed HTML and using the truncated regex match is acceptable.
351
+ }
352
+ }
353
+ }
354
+
355
+ if (!attr) {
356
+ break;
357
+ }
358
+
265
359
  input = input.slice(attr[0].length);
266
360
  match.attrs.push(attr);
267
361
  }
362
+
363
+ // Check for closing tag
364
+ end = input.match(startTagClose);
268
365
  if (end) {
269
366
  match.unarySlash = end[1];
270
367
  match.rest = input.slice(end[0].length);
@@ -357,7 +454,6 @@ class HTMLParser {
357
454
 
358
455
  const attrs = match.attrs.map(function (args) {
359
456
  let name, value, customOpen, customClose, customAssign, quote;
360
- const ncp = 7; // Number of captured parts, scalar
361
457
 
362
458
  // Hackish workaround for FF bug https://bugzilla.mozilla.org/show_bug.cgi?id=369778
363
459
  if (IS_REGEX_CAPTURING_BROKEN && args[0].indexOf('""') === -1) {
@@ -385,7 +481,7 @@ class HTMLParser {
385
481
 
386
482
  let j = 1;
387
483
  if (handler.customAttrSurround) {
388
- for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += ncp) {
484
+ for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += NCP) {
389
485
  name = args[j + 1];
390
486
  if (name) {
391
487
  quote = populate(j + 2);
@@ -1548,8 +1644,11 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
1548
1644
  currentTag = '';
1549
1645
  },
1550
1646
  chars: async function (text) {
1647
+ // Only recursively scan HTML content, not JSON-LD or other non-HTML script types
1648
+ // `scan()` is for analyzing HTML attribute order, not for parsing JSON
1551
1649
  if (options.processScripts && specialContentTags.has(currentTag) &&
1552
- options.processScripts.indexOf(currentType) > -1) {
1650
+ options.processScripts.indexOf(currentType) > -1 &&
1651
+ currentType === 'text/html') {
1553
1652
  await scan(text);
1554
1653
  }
1555
1654
  }
@@ -1562,7 +1661,8 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
1562
1661
  options.log = identity;
1563
1662
  options.sortAttributes = false;
1564
1663
  options.sortClassName = false;
1565
- await scan(await minifyHTML(value, options));
1664
+ const firstPassOutput = await minifyHTML(value, options);
1665
+ await scan(firstPassOutput);
1566
1666
  options.log = log;
1567
1667
  if (attrChains) {
1568
1668
  const attrSorters = Object.create(null);
@@ -39166,6 +39166,9 @@ function joinSingleAttrAssigns(handler) {
39166
39166
  }).join('|');
39167
39167
  }
39168
39168
 
39169
+ // Number of captured parts per `customAttrSurround` pattern
39170
+ const NCP = 7;
39171
+
39169
39172
  class HTMLParser {
39170
39173
  constructor(html, handler) {
39171
39174
  this.html = html;
@@ -39178,7 +39181,15 @@ class HTMLParser {
39178
39181
 
39179
39182
  const stack = []; let lastTag;
39180
39183
  const attribute = attrForHandler(handler);
39181
- let last, prevTag, nextTag;
39184
+ let last, prevTag = undefined, nextTag = undefined;
39185
+
39186
+ // Track position for better error messages
39187
+ let position = 0;
39188
+ const getLineColumn = (pos) => {
39189
+ const lines = this.html.slice(0, pos).split('\n');
39190
+ return { line: lines.length, column: lines[lines.length - 1].length + 1 };
39191
+ };
39192
+
39182
39193
  while (html) {
39183
39194
  last = html;
39184
39195
  // Make sure we’re not in a `script` or `style` element
@@ -39296,8 +39307,27 @@ class HTMLParser {
39296
39307
  }
39297
39308
 
39298
39309
  if (html === last) {
39299
- throw new Error('Parse Error: ' + html);
39310
+ if (handler.continueOnParseError) {
39311
+ // Skip the problematic character and continue
39312
+ if (handler.chars) {
39313
+ await handler.chars(html[0], prevTag, '');
39314
+ }
39315
+ html = html.substring(1);
39316
+ position++;
39317
+ prevTag = '';
39318
+ continue;
39319
+ }
39320
+ const loc = getLineColumn(position);
39321
+ // Include some context before the error position so the snippet contains
39322
+ // the offending markup plus preceding characters (e.g. "invalid<tag").
39323
+ const CONTEXT_BEFORE = 50;
39324
+ const startPos = Math.max(0, position - CONTEXT_BEFORE);
39325
+ const snippet = this.html.slice(startPos, startPos + 200).replace(/\n/g, ' ');
39326
+ throw new Error(
39327
+ `Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${this.html.length > startPos + 200 ? '…' : ''}`
39328
+ );
39300
39329
  }
39330
+ position = this.html.length - html.length;
39301
39331
  }
39302
39332
 
39303
39333
  if (!handler.partialMarkup) {
@@ -39314,10 +39344,77 @@ class HTMLParser {
39314
39344
  };
39315
39345
  input = input.slice(start[0].length);
39316
39346
  let end, attr;
39317
- while (!(end = input.match(startTagClose)) && (attr = input.match(attribute))) {
39347
+
39348
+ // Safety limit: max length of input to check for attributes
39349
+ // Protects against catastrophic backtracking on massive attribute values
39350
+ const MAX_ATTR_PARSE_LENGTH = 20000; // 20 KB should be enough for any reasonable tag
39351
+
39352
+ while (true) {
39353
+ // Check for closing tag first
39354
+ end = input.match(startTagClose);
39355
+ if (end) {
39356
+ break;
39357
+ }
39358
+
39359
+ // Limit the input length we pass to the regex to prevent catastrophic backtracking
39360
+ const isLimited = input.length > MAX_ATTR_PARSE_LENGTH;
39361
+ const searchInput = isLimited ? input.slice(0, MAX_ATTR_PARSE_LENGTH) : input;
39362
+
39363
+ attr = searchInput.match(attribute);
39364
+
39365
+ // If we limited the input and got a match, check if the value might be truncated
39366
+ if (attr && isLimited) {
39367
+ // Check if the attribute value extends beyond our search window
39368
+ const attrEnd = attr[0].length;
39369
+ // If the match ends near the limit, the value might be truncated
39370
+ if (attrEnd > MAX_ATTR_PARSE_LENGTH - 100) {
39371
+ // Manually extract this attribute to handle potentially huge value
39372
+ const manualMatch = input.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
39373
+ if (manualMatch) {
39374
+ const quoteChar = input[manualMatch[0].length];
39375
+ if (quoteChar === '"' || quoteChar === "'") {
39376
+ const closeQuote = input.indexOf(quoteChar, manualMatch[0].length + 1);
39377
+ if (closeQuote !== -1) {
39378
+ const fullAttr = input.slice(0, closeQuote + 1);
39379
+ const numCustomParts = handler.customAttrSurround
39380
+ ? handler.customAttrSurround.length * NCP
39381
+ : 0;
39382
+ const baseIndex = 1 + numCustomParts;
39383
+
39384
+ attr = [];
39385
+ attr[0] = fullAttr;
39386
+ attr[baseIndex] = manualMatch[1]; // Attribute name
39387
+ attr[baseIndex + 1] = '='; // customAssign (falls back to “=” for huge attributes)
39388
+ const value = input.slice(manualMatch[0].length + 1, closeQuote);
39389
+ // Place value at correct index based on quote type
39390
+ if (quoteChar === '"') {
39391
+ attr[baseIndex + 2] = value; // Double-quoted value
39392
+ } else {
39393
+ attr[baseIndex + 3] = value; // Single-quoted value
39394
+ }
39395
+ input = input.slice(fullAttr.length);
39396
+ match.attrs.push(attr);
39397
+ continue;
39398
+ }
39399
+ }
39400
+ // Note: Unquoted attribute values are intentionally not handled here.
39401
+ // Per HTML spec, unquoted values cannot contain spaces or special chars,
39402
+ // making a 20 KB+ unquoted value practically impossible. If encountered,
39403
+ // it’s malformed HTML and using the truncated regex match is acceptable.
39404
+ }
39405
+ }
39406
+ }
39407
+
39408
+ if (!attr) {
39409
+ break;
39410
+ }
39411
+
39318
39412
  input = input.slice(attr[0].length);
39319
39413
  match.attrs.push(attr);
39320
39414
  }
39415
+
39416
+ // Check for closing tag
39417
+ end = input.match(startTagClose);
39321
39418
  if (end) {
39322
39419
  match.unarySlash = end[1];
39323
39420
  match.rest = input.slice(end[0].length);
@@ -39410,7 +39507,6 @@ class HTMLParser {
39410
39507
 
39411
39508
  const attrs = match.attrs.map(function (args) {
39412
39509
  let name, value, customOpen, customClose, customAssign, quote;
39413
- const ncp = 7; // Number of captured parts, scalar
39414
39510
 
39415
39511
  // Hackish workaround for FF bug https://bugzilla.mozilla.org/show_bug.cgi?id=369778
39416
39512
  if (IS_REGEX_CAPTURING_BROKEN && args[0].indexOf('""') === -1) {
@@ -39438,7 +39534,7 @@ class HTMLParser {
39438
39534
 
39439
39535
  let j = 1;
39440
39536
  if (handler.customAttrSurround) {
39441
- for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += ncp) {
39537
+ for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += NCP) {
39442
39538
  name = args[j + 1];
39443
39539
  if (name) {
39444
39540
  quote = populate(j + 2);
@@ -40601,8 +40697,11 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
40601
40697
  currentTag = '';
40602
40698
  },
40603
40699
  chars: async function (text) {
40700
+ // Only recursively scan HTML content, not JSON-LD or other non-HTML script types
40701
+ // `scan()` is for analyzing HTML attribute order, not for parsing JSON
40604
40702
  if (options.processScripts && specialContentTags.has(currentTag) &&
40605
- options.processScripts.indexOf(currentType) > -1) {
40703
+ options.processScripts.indexOf(currentType) > -1 &&
40704
+ currentType === 'text/html') {
40606
40705
  await scan(text);
40607
40706
  }
40608
40707
  }
@@ -40615,7 +40714,8 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
40615
40714
  options.log = identity;
40616
40715
  options.sortAttributes = false;
40617
40716
  options.sortClassName = false;
40618
- await scan(await minifyHTML(value, options));
40717
+ const firstPassOutput = await minifyHTML(value, options);
40718
+ await scan(firstPassOutput);
40619
40719
  options.log = log;
40620
40720
  if (attrChains) {
40621
40721
  const attrSorters = Object.create(null);
@@ -1 +1 @@
1
- {"version":3,"file":"htmlminifier.d.ts","sourceRoot":"","sources":["../../src/htmlminifier.js"],"names":[],"mappings":"AAm/CO,8BAJI,MAAM,YACN,eAAe,GACb,OAAO,CAAC,MAAM,CAAC,CAQ3B;;;;;;;;;;;;UAUS,MAAM;YACN,MAAM;YACN,MAAM;mBACN,MAAM;iBACN,MAAM;kBACN,MAAM;;;;;;;;;;;;;4BAQN,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE,EAAE,qBAAqB,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;wBAMjG,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,EAAE,KAAK,EAAE,aAAa,EAAE,GAAG,SAAS,EAAE,iBAAiB,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;;oBAMhH,OAAO;;;;;;;;gCAOP,OAAO;;;;;;;;kCAOP,OAAO;;;;;;;;yBAOP,OAAO;;;;;;;;2BAOP,OAAO;;;;;;;;4BAOP,OAAO;;;;;;;2BAOP,OAAO;;;;;;;;uBAMP,MAAM,EAAE;;;;;;yBAOR,MAAM;;;;;;yBAKN,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE;;;;;;;4BAKlB,MAAM,EAAE;;;;;;;oCAMR,MAAM;;;;;;;qBAMN,OAAO;;;;;;;YAMP,OAAO;;;;;;;;2BAMP,MAAM,EAAE;;;;;;;;;4BAOR,MAAM,EAAE;;;;;;;+BAQR,OAAO;;;;;;;2BAMP,SAAS,CAAC,MAAM,CAAC;;;;;;uBAMjB,OAAO;;;;;;;;UAKP,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI;;;;;;;;qBAO1B,MAAM;;;;;;;oBAON,MAAM;;;;;;;;;;gBAMN,OAAO,GAAG,OAAO,CAAC,OAAO,cAAc,EAAE,gBAAgB,CAAC,OAAO,cAAc,EAAE,aAAa,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;;;eAS9J,OAAO,GAAG,OAAO,QAAQ,EAAE,aAAa,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,OAAO,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;;;iBASzG,OAAO,GAAG,MAAM,GAAG,OAAO,WAAW,EAAE,OAAO,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;WAS7F,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM;;;;;;;+BAOxB,OAAO;;;;;;;;;;oBAMP,OAAO;;;;;;;;yBASP,OAAO;;;;;;;gCAOP,OAAO;;;;;;;;iCAMP,OAAO;;;;;;;;;;qBAOP,MAAM,EAAE;;;;;;;qBASR,IAAI,GAAG,GAAG;;;;;;;4BAMV,OAAO;;;;;;;;qBAMP,OAAO;;;;;;;;;4BAOP,OAAO,GAAG,CAAC,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC;;;;;;;;0BAQtD,OAAO;;;;;;;;yBAOP,OAAO;;;;;;;;gCAOP,OAAO;;;;;;;iCAOP,OAAO;;;;;;;oCAMP,OAAO;;;;;;;;;;0BAMP,OAAO;;;;;;;;;qBASP,OAAO,GAAG,CAAC,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE,KAAK,IAAI,CAAC;;;;;;;;;oBAQzD,OAAO,GAAG,CAAC,CAAC,KAAK,EAAE,MAAM,KAAK,MAAM,CAAC;;;;;;;;0BAQrC,OAAO;;;;;;;sBAOP,OAAO;;wBA50DkC,cAAc;0BAAd,cAAc;+BAAd,cAAc"}
1
+ {"version":3,"file":"htmlminifier.d.ts","sourceRoot":"","sources":["../../src/htmlminifier.js"],"names":[],"mappings":"AAu/CO,8BAJI,MAAM,YACN,eAAe,GACb,OAAO,CAAC,MAAM,CAAC,CAQ3B;;;;;;;;;;;;UAUS,MAAM;YACN,MAAM;YACN,MAAM;mBACN,MAAM;iBACN,MAAM;kBACN,MAAM;;;;;;;;;;;;;4BAQN,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE,EAAE,qBAAqB,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;wBAMjG,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,EAAE,KAAK,EAAE,aAAa,EAAE,GAAG,SAAS,EAAE,iBAAiB,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;;oBAMhH,OAAO;;;;;;;;gCAOP,OAAO;;;;;;;;kCAOP,OAAO;;;;;;;;yBAOP,OAAO;;;;;;;;2BAOP,OAAO;;;;;;;;4BAOP,OAAO;;;;;;;2BAOP,OAAO;;;;;;;;uBAMP,MAAM,EAAE;;;;;;yBAOR,MAAM;;;;;;yBAKN,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE;;;;;;;4BAKlB,MAAM,EAAE;;;;;;;oCAMR,MAAM;;;;;;;qBAMN,OAAO;;;;;;;YAMP,OAAO;;;;;;;;2BAMP,MAAM,EAAE;;;;;;;;;4BAOR,MAAM,EAAE;;;;;;;+BAQR,OAAO;;;;;;;2BAMP,SAAS,CAAC,MAAM,CAAC;;;;;;uBAMjB,OAAO;;;;;;;;UAKP,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI;;;;;;;;qBAO1B,MAAM;;;;;;;oBAON,MAAM;;;;;;;;;;gBAMN,OAAO,GAAG,OAAO,CAAC,OAAO,cAAc,EAAE,gBAAgB,CAAC,OAAO,cAAc,EAAE,aAAa,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;;;eAS9J,OAAO,GAAG,OAAO,QAAQ,EAAE,aAAa,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,OAAO,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;;;iBASzG,OAAO,GAAG,MAAM,GAAG,OAAO,WAAW,EAAE,OAAO,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;WAS7F,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM;;;;;;;+BAOxB,OAAO;;;;;;;;;;oBAMP,OAAO;;;;;;;;yBASP,OAAO;;;;;;;gCAOP,OAAO;;;;;;;;iCAMP,OAAO;;;;;;;;;;qBAOP,MAAM,EAAE;;;;;;;qBASR,IAAI,GAAG,GAAG;;;;;;;4BAMV,OAAO;;;;;;;;qBAMP,OAAO;;;;;;;;;4BAOP,OAAO,GAAG,CAAC,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC;;;;;;;;0BAQtD,OAAO;;;;;;;;yBAOP,OAAO;;;;;;;;gCAOP,OAAO;;;;;;;iCAOP,OAAO;;;;;;;oCAMP,OAAO;;;;;;;;;;0BAMP,OAAO;;;;;;;;;qBASP,OAAO,GAAG,CAAC,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE,KAAK,IAAI,CAAC;;;;;;;;;oBAQzD,OAAO,GAAG,CAAC,CAAC,KAAK,EAAE,MAAM,KAAK,MAAM,CAAC;;;;;;;;0BAQrC,OAAO;;;;;;;sBAOP,OAAO;;wBAh1DkC,cAAc;0BAAd,cAAc;+BAAd,cAAc"}
@@ -1 +1 @@
1
- {"version":3,"file":"htmlparser.d.ts","sourceRoot":"","sources":["../../src/htmlparser.js"],"names":[],"mappings":"AAgDA,4BAAoE;AAyDpE;IACE,qCAGC;IAFC,UAAgB;IAChB,aAAsB;IAGxB,uBAgWC;CACF"}
1
+ {"version":3,"file":"htmlparser.d.ts","sourceRoot":"","sources":["../../src/htmlparser.js"],"names":[],"mappings":"AAgDA,4BAAoE;AA4DpE;IACE,qCAGC;IAFC,UAAgB;IAChB,aAAsB;IAGxB,uBA6bC;CACF"}
package/package.json CHANGED
@@ -84,5 +84,5 @@
84
84
  "test:watch": "node --test --watch tests/*.spec.js"
85
85
  },
86
86
  "type": "module",
87
- "version": "4.6.0"
87
+ "version": "4.6.1"
88
88
  }
@@ -947,8 +947,11 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
947
947
  currentTag = '';
948
948
  },
949
949
  chars: async function (text) {
950
+ // Only recursively scan HTML content, not JSON-LD or other non-HTML script types
951
+ // `scan()` is for analyzing HTML attribute order, not for parsing JSON
950
952
  if (options.processScripts && specialContentTags.has(currentTag) &&
951
- options.processScripts.indexOf(currentType) > -1) {
953
+ options.processScripts.indexOf(currentType) > -1 &&
954
+ currentType === 'text/html') {
952
955
  await scan(text);
953
956
  }
954
957
  }
@@ -961,7 +964,8 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
961
964
  options.log = identity;
962
965
  options.sortAttributes = false;
963
966
  options.sortClassName = false;
964
- await scan(await minifyHTML(value, options));
967
+ const firstPassOutput = await minifyHTML(value, options);
968
+ await scan(firstPassOutput);
965
969
  options.log = log;
966
970
  if (attrChains) {
967
971
  const attrSorters = Object.create(null);
package/src/htmlparser.js CHANGED
@@ -103,6 +103,9 @@ function joinSingleAttrAssigns(handler) {
103
103
  }).join('|');
104
104
  }
105
105
 
106
+ // Number of captured parts per `customAttrSurround` pattern
107
+ const NCP = 7;
108
+
106
109
  export class HTMLParser {
107
110
  constructor(html, handler) {
108
111
  this.html = html;
@@ -115,7 +118,15 @@ export class HTMLParser {
115
118
 
116
119
  const stack = []; let lastTag;
117
120
  const attribute = attrForHandler(handler);
118
- let last, prevTag, nextTag;
121
+ let last, prevTag = undefined, nextTag = undefined;
122
+
123
+ // Track position for better error messages
124
+ let position = 0;
125
+ const getLineColumn = (pos) => {
126
+ const lines = this.html.slice(0, pos).split('\n');
127
+ return { line: lines.length, column: lines[lines.length - 1].length + 1 };
128
+ };
129
+
119
130
  while (html) {
120
131
  last = html;
121
132
  // Make sure we’re not in a `script` or `style` element
@@ -233,8 +244,27 @@ export class HTMLParser {
233
244
  }
234
245
 
235
246
  if (html === last) {
236
- throw new Error('Parse Error: ' + html);
247
+ if (handler.continueOnParseError) {
248
+ // Skip the problematic character and continue
249
+ if (handler.chars) {
250
+ await handler.chars(html[0], prevTag, '');
251
+ }
252
+ html = html.substring(1);
253
+ position++;
254
+ prevTag = '';
255
+ continue;
256
+ }
257
+ const loc = getLineColumn(position);
258
+ // Include some context before the error position so the snippet contains
259
+ // the offending markup plus preceding characters (e.g. "invalid<tag").
260
+ const CONTEXT_BEFORE = 50;
261
+ const startPos = Math.max(0, position - CONTEXT_BEFORE);
262
+ const snippet = this.html.slice(startPos, startPos + 200).replace(/\n/g, ' ');
263
+ throw new Error(
264
+ `Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${this.html.length > startPos + 200 ? '…' : ''}`
265
+ );
237
266
  }
267
+ position = this.html.length - html.length;
238
268
  }
239
269
 
240
270
  if (!handler.partialMarkup) {
@@ -251,10 +281,77 @@ export class HTMLParser {
251
281
  };
252
282
  input = input.slice(start[0].length);
253
283
  let end, attr;
254
- while (!(end = input.match(startTagClose)) && (attr = input.match(attribute))) {
284
+
285
+ // Safety limit: max length of input to check for attributes
286
+ // Protects against catastrophic backtracking on massive attribute values
287
+ const MAX_ATTR_PARSE_LENGTH = 20000; // 20 KB should be enough for any reasonable tag
288
+
289
+ while (true) {
290
+ // Check for closing tag first
291
+ end = input.match(startTagClose);
292
+ if (end) {
293
+ break;
294
+ }
295
+
296
+ // Limit the input length we pass to the regex to prevent catastrophic backtracking
297
+ const isLimited = input.length > MAX_ATTR_PARSE_LENGTH;
298
+ const searchInput = isLimited ? input.slice(0, MAX_ATTR_PARSE_LENGTH) : input;
299
+
300
+ attr = searchInput.match(attribute);
301
+
302
+ // If we limited the input and got a match, check if the value might be truncated
303
+ if (attr && isLimited) {
304
+ // Check if the attribute value extends beyond our search window
305
+ const attrEnd = attr[0].length;
306
+ // If the match ends near the limit, the value might be truncated
307
+ if (attrEnd > MAX_ATTR_PARSE_LENGTH - 100) {
308
+ // Manually extract this attribute to handle potentially huge value
309
+ const manualMatch = input.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
310
+ if (manualMatch) {
311
+ const quoteChar = input[manualMatch[0].length];
312
+ if (quoteChar === '"' || quoteChar === "'") {
313
+ const closeQuote = input.indexOf(quoteChar, manualMatch[0].length + 1);
314
+ if (closeQuote !== -1) {
315
+ const fullAttr = input.slice(0, closeQuote + 1);
316
+ const numCustomParts = handler.customAttrSurround
317
+ ? handler.customAttrSurround.length * NCP
318
+ : 0;
319
+ const baseIndex = 1 + numCustomParts;
320
+
321
+ attr = [];
322
+ attr[0] = fullAttr;
323
+ attr[baseIndex] = manualMatch[1]; // Attribute name
324
+ attr[baseIndex + 1] = '='; // customAssign (falls back to “=” for huge attributes)
325
+ const value = input.slice(manualMatch[0].length + 1, closeQuote);
326
+ // Place value at correct index based on quote type
327
+ if (quoteChar === '"') {
328
+ attr[baseIndex + 2] = value; // Double-quoted value
329
+ } else {
330
+ attr[baseIndex + 3] = value; // Single-quoted value
331
+ }
332
+ input = input.slice(fullAttr.length);
333
+ match.attrs.push(attr);
334
+ continue;
335
+ }
336
+ }
337
+ // Note: Unquoted attribute values are intentionally not handled here.
338
+ // Per HTML spec, unquoted values cannot contain spaces or special chars,
339
+ // making a 20 KB+ unquoted value practically impossible. If encountered,
340
+ // it’s malformed HTML and using the truncated regex match is acceptable.
341
+ }
342
+ }
343
+ }
344
+
345
+ if (!attr) {
346
+ break;
347
+ }
348
+
255
349
  input = input.slice(attr[0].length);
256
350
  match.attrs.push(attr);
257
351
  }
352
+
353
+ // Check for closing tag
354
+ end = input.match(startTagClose);
258
355
  if (end) {
259
356
  match.unarySlash = end[1];
260
357
  match.rest = input.slice(end[0].length);
@@ -347,7 +444,6 @@ export class HTMLParser {
347
444
 
348
445
  const attrs = match.attrs.map(function (args) {
349
446
  let name, value, customOpen, customClose, customAssign, quote;
350
- const ncp = 7; // Number of captured parts, scalar
351
447
 
352
448
  // Hackish workaround for FF bug https://bugzilla.mozilla.org/show_bug.cgi?id=369778
353
449
  if (IS_REGEX_CAPTURING_BROKEN && args[0].indexOf('""') === -1) {
@@ -375,7 +471,7 @@ export class HTMLParser {
375
471
 
376
472
  let j = 1;
377
473
  if (handler.customAttrSurround) {
378
- for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += ncp) {
474
+ for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += NCP) {
379
475
  name = args[j + 1];
380
476
  if (name) {
381
477
  quote = populate(j + 2);
package/src/utils.js CHANGED
@@ -8,4 +8,4 @@ export async function replaceAsync(str, regex, asyncFn) {
8
8
 
9
9
  const data = await Promise.all(promises);
10
10
  return str.replace(regex, () => data.shift());
11
- }
11
+ }