html-minifier-next 4.5.1 → 4.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -107,7 +107,7 @@ For lint-like capabilities, take a look at [HTMLLint](https://github.com/kangax/
107
107
  HTML Minifier Next provides presets for common use cases. Presets are pre-configured option sets that can be used as a starting point:
108
108
 
109
109
  * `conservative`: Safe minification suitable for most projects. Includes whitespace collapsing, comment removal, and doctype normalization.
110
- * `comprehensive`: Aggressive minification for maximum file size reduction. Includes all conservative options plus attribute quote removal, optional tag removal, and more.
110
+ * `comprehensive`: Aggressive minification for maximum file size reduction. Includes relevant conservative options plus attribute quote removal, optional tag removal, and more.
111
111
 
112
112
  **Using presets:**
113
113
 
@@ -223,29 +223,33 @@ const result = await minify(html, {
223
223
 
224
224
  ## Minification comparison
225
225
 
226
- How does HTML Minifier Next compare to other solutions, like [minimize](https://github.com/Swaagie/minimize), [htmlcompressor.com](http://htmlcompressor.com/), [htmlnano](https://github.com/posthtml/htmlnano), and [minify-html](https://github.com/wilsonzlin/minify-html)? (All with the most aggressive settings, though without [hyper-optimization](https://meiert.com/blog/the-ways-of-writing-html/#toc-hyper-optimized).)
227
-
228
- | Site | Original Size (KB) | HTML Minifier Next | minimize | html­compressor.com | htmlnano | minify-html |
229
- | --- | --- | --- | --- | --- | --- | --- |
230
- | [A List Apart](https://alistapart.com/) | 62 | **52** | 58 | 56 | 54 | 55 |
231
- | [Amazon](https://www.amazon.com/) | 822 | **735** | 806 | n/a | n/a | n/a |
232
- | [Apple](https://www.apple.com/) | 210 | **166** | 195 | 192 | 186 | 191 |
233
- | [BBC](https://www.bbc.co.uk/) | 698 | **632** | 692 | n/a | 655 | 656 |
234
- | [CSS-Tricks](https://css-tricks.com/) | 163 | **124** | 149 | 146 | 127 | 145 |
235
- | [ECMAScript](https://tc39.es/ecma262/) | 7238 | **6342** | 6615 | n/a | 6561 | 6567 |
236
- | [EFF](https://www.eff.org/) | 54 | **46** | 49 | 49 | 49 | 47 |
237
- | [FAZ](https://www.faz.net/aktuell/) | 1860 | **1737** | 1775 | n/a | n/a | 1779 |
238
- | [Frontend Dogma](https://frontenddogma.com/) | 218 | **209** | 235 | 216 | 230 | 217 |
239
- | [Google](https://www.google.com/) | 18 | **17** | 18 | 18 | **17** | n/a |
240
- | [Ground News](https://ground.news/) | 1827 | **1585** | 1814 | n/a | 1679 | n/a |
241
- | [HTML](https://html.spec.whatwg.org/multipage/) | 149 | **147** | 155 | 148 | 153 | 149 |
242
- | [Leanpub](https://leanpub.com/) | 1161 | **974** | 1155 | n/a | 981 | n/a |
243
- | [Mastodon](https://mastodon.social/explore) | 35 | **26** | 34 | 34 | 30 | 33 |
244
- | [MDN](https://developer.mozilla.org/en-US/) | 107 | **62** | 67 | 68 | 64 | n/a |
245
- | [Middle East Eye](https://www.middleeasteye.net/) | 223 | **196** | 203 | 203 | 203 | 200 |
246
- | [SitePoint](https://www.sitepoint.com/) | 494 | **353** | 491 | n/a | 429 | 474 |
247
- | [United Nations](https://www.un.org/en/) | 152 | **113** | 131 | 124 | 122 | 126 |
248
- | [W3C](https://www.w3.org/) | 50 | **36** | 41 | 39 | 39 | 39 |
226
+ How does HTML Minifier Next compare to other minifiers, like [htmlnano](https://github.com/posthtml/htmlnano), [@swc/html](https://github.com/swc-project/swc), [minify-html](https://github.com/wilsonzlin/minify-html), [minimize](https://github.com/Swaagie/minimize), and [htmlcompressor.com](https://htmlcompressor.com/)? (All with the most aggressive settings, though without [hyper-optimization](https://meiert.com/blog/the-ways-of-writing-html/#toc-hyper-optimized).)
227
+
228
+ <!-- Auto-generated benchmarks, don’t edit -->
229
+ | Site | Original Size (KB) | HTML Minifier Next | htmlnano | @swc/html | minify-html | minimize | html­com­pressor.­com |
230
+ | --- | --- | --- | --- | --- | --- | --- | --- |
231
+ | [A List Apart](https://alistapart.com/) | 62 | **52** | 54 | 55 | 55 | 58 | 56 |
232
+ | [Apple](https://www.apple.com/) | 190 | **146** | 166 | 169 | 172 | 175 | 172 |
233
+ | [BBC](https://www.bbc.co.uk/) | 673 | **613** | 633 | 633 | 634 | 668 | n/a |
234
+ | [Codeberg](https://codeberg.org/) | 33 | 29 | **27** | 30 | 30 | 30 | 30 |
235
+ | [CSS-Tricks](https://css-tricks.com/) | 165 | **125** | 129 | 146 | 146 | 151 | 148 |
236
+ | [ECMAScript](https://tc39.es/ecma262/) | 7238 | **6341** | 6561 | 6444 | 6567 | 6615 | n/a |
237
+ | [EFF](https://www.eff.org/) | 54 | **46** | 49 | 47 | 47 | 49 | 49 |
238
+ | [FAZ](https://www.faz.net/aktuell/) | 1609 | 1500 | **1431** | 1532 | 1544 | 1555 | n/a |
239
+ | [Frontend Dogma](https://frontenddogma.com/) | 220 | **211** | 232 | 217 | 219 | 237 | 218 |
240
+ | [Google](https://www.google.com/) | 18 | **17** | **17** | **17** | n/a | 18 | 18 |
241
+ | [Ground News](https://ground.news/) | 2358 | **2067** | 2169 | 2199 | n/a | 2345 | n/a |
242
+ | [HTML Living Standard](https://html.spec.whatwg.org/multipage/) | 149 | **147** | 153 | **147** | 149 | 155 | 148 |
243
+ | [Leanpub](https://leanpub.com/) | 1348 | **1142** | 1149 | 1148 | n/a | 1343 | n/a |
244
+ | [Mastodon](https://mastodon.social/explore) | 35 | **26** | 30 | 33 | 33 | 34 | 34 |
245
+ | [MDN](https://developer.mozilla.org/en-US/) | 107 | **62** | 64 | 64 | n/a | 67 | 67 |
246
+ | [Middle East Eye](https://www.middleeasteye.net/) | 224 | **197** | 204 | 202 | 202 | 204 | 205 |
247
+ | [SitePoint](https://www.sitepoint.com/) | 492 | **350** | 426 | 465 | 472 | 488 | n/a |
248
+ | [United Nations](https://www.un.org/en/) | 151 | **113** | 121 | 125 | 125 | 130 | 123 |
249
+ | [W3C](https://www.w3.org/) | 50 | **36** | 38 | 38 | 38 | 40 | 38 |
250
+
251
+ (Last updated: Dec 1, 2025)
252
+ <!-- End auto-generated -->
249
253
 
250
254
  ## Examples
251
255
 
@@ -113,6 +113,9 @@ function joinSingleAttrAssigns(handler) {
113
113
  }).join('|');
114
114
  }
115
115
 
116
+ // Number of captured parts per `customAttrSurround` pattern
117
+ const NCP = 7;
118
+
116
119
  class HTMLParser {
117
120
  constructor(html, handler) {
118
121
  this.html = html;
@@ -125,7 +128,15 @@ class HTMLParser {
125
128
 
126
129
  const stack = []; let lastTag;
127
130
  const attribute = attrForHandler(handler);
128
- let last, prevTag, nextTag;
131
+ let last, prevTag = undefined, nextTag = undefined;
132
+
133
+ // Track position for better error messages
134
+ let position = 0;
135
+ const getLineColumn = (pos) => {
136
+ const lines = this.html.slice(0, pos).split('\n');
137
+ return { line: lines.length, column: lines[lines.length - 1].length + 1 };
138
+ };
139
+
129
140
  while (html) {
130
141
  last = html;
131
142
  // Make sure we’re not in a `script` or `style` element
@@ -243,8 +254,27 @@ class HTMLParser {
243
254
  }
244
255
 
245
256
  if (html === last) {
246
- throw new Error('Parse Error: ' + html);
257
+ if (handler.continueOnParseError) {
258
+ // Skip the problematic character and continue
259
+ if (handler.chars) {
260
+ await handler.chars(html[0], prevTag, '');
261
+ }
262
+ html = html.substring(1);
263
+ position++;
264
+ prevTag = '';
265
+ continue;
266
+ }
267
+ const loc = getLineColumn(position);
268
+ // Include some context before the error position so the snippet contains
269
+ // the offending markup plus preceding characters (e.g. "invalid<tag").
270
+ const CONTEXT_BEFORE = 50;
271
+ const startPos = Math.max(0, position - CONTEXT_BEFORE);
272
+ const snippet = this.html.slice(startPos, startPos + 200).replace(/\n/g, ' ');
273
+ throw new Error(
274
+ `Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${this.html.length > startPos + 200 ? '…' : ''}`
275
+ );
247
276
  }
277
+ position = this.html.length - html.length;
248
278
  }
249
279
 
250
280
  if (!handler.partialMarkup) {
@@ -261,10 +291,77 @@ class HTMLParser {
261
291
  };
262
292
  input = input.slice(start[0].length);
263
293
  let end, attr;
264
- while (!(end = input.match(startTagClose)) && (attr = input.match(attribute))) {
294
+
295
+ // Safety limit: max length of input to check for attributes
296
+ // Protects against catastrophic backtracking on massive attribute values
297
+ const MAX_ATTR_PARSE_LENGTH = 20000; // 20 KB should be enough for any reasonable tag
298
+
299
+ while (true) {
300
+ // Check for closing tag first
301
+ end = input.match(startTagClose);
302
+ if (end) {
303
+ break;
304
+ }
305
+
306
+ // Limit the input length we pass to the regex to prevent catastrophic backtracking
307
+ const isLimited = input.length > MAX_ATTR_PARSE_LENGTH;
308
+ const searchInput = isLimited ? input.slice(0, MAX_ATTR_PARSE_LENGTH) : input;
309
+
310
+ attr = searchInput.match(attribute);
311
+
312
+ // If we limited the input and got a match, check if the value might be truncated
313
+ if (attr && isLimited) {
314
+ // Check if the attribute value extends beyond our search window
315
+ const attrEnd = attr[0].length;
316
+ // If the match ends near the limit, the value might be truncated
317
+ if (attrEnd > MAX_ATTR_PARSE_LENGTH - 100) {
318
+ // Manually extract this attribute to handle potentially huge value
319
+ const manualMatch = input.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
320
+ if (manualMatch) {
321
+ const quoteChar = input[manualMatch[0].length];
322
+ if (quoteChar === '"' || quoteChar === "'") {
323
+ const closeQuote = input.indexOf(quoteChar, manualMatch[0].length + 1);
324
+ if (closeQuote !== -1) {
325
+ const fullAttr = input.slice(0, closeQuote + 1);
326
+ const numCustomParts = handler.customAttrSurround
327
+ ? handler.customAttrSurround.length * NCP
328
+ : 0;
329
+ const baseIndex = 1 + numCustomParts;
330
+
331
+ attr = [];
332
+ attr[0] = fullAttr;
333
+ attr[baseIndex] = manualMatch[1]; // Attribute name
334
+ attr[baseIndex + 1] = '='; // customAssign (falls back to “=” for huge attributes)
335
+ const value = input.slice(manualMatch[0].length + 1, closeQuote);
336
+ // Place value at correct index based on quote type
337
+ if (quoteChar === '"') {
338
+ attr[baseIndex + 2] = value; // Double-quoted value
339
+ } else {
340
+ attr[baseIndex + 3] = value; // Single-quoted value
341
+ }
342
+ input = input.slice(fullAttr.length);
343
+ match.attrs.push(attr);
344
+ continue;
345
+ }
346
+ }
347
+ // Note: Unquoted attribute values are intentionally not handled here.
348
+ // Per HTML spec, unquoted values cannot contain spaces or special chars,
349
+ // making a 20 KB+ unquoted value practically impossible. If encountered,
350
+ // it’s malformed HTML and using the truncated regex match is acceptable.
351
+ }
352
+ }
353
+ }
354
+
355
+ if (!attr) {
356
+ break;
357
+ }
358
+
265
359
  input = input.slice(attr[0].length);
266
360
  match.attrs.push(attr);
267
361
  }
362
+
363
+ // Check for closing tag
364
+ end = input.match(startTagClose);
268
365
  if (end) {
269
366
  match.unarySlash = end[1];
270
367
  match.rest = input.slice(end[0].length);
@@ -357,7 +454,6 @@ class HTMLParser {
357
454
 
358
455
  const attrs = match.attrs.map(function (args) {
359
456
  let name, value, customOpen, customClose, customAssign, quote;
360
- const ncp = 7; // Number of captured parts, scalar
361
457
 
362
458
  // Hackish workaround for FF bug https://bugzilla.mozilla.org/show_bug.cgi?id=369778
363
459
  if (IS_REGEX_CAPTURING_BROKEN && args[0].indexOf('""') === -1) {
@@ -385,7 +481,7 @@ class HTMLParser {
385
481
 
386
482
  let j = 1;
387
483
  if (handler.customAttrSurround) {
388
- for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += ncp) {
484
+ for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += NCP) {
389
485
  name = args[j + 1];
390
486
  if (name) {
391
487
  quote = populate(j + 2);
@@ -1032,11 +1128,55 @@ async function cleanConditionalComment(comment, options) {
1032
1128
  : comment;
1033
1129
  }
1034
1130
 
1131
+ const jsonScriptTypes = new Set([
1132
+ 'application/json',
1133
+ 'application/ld+json',
1134
+ 'application/manifest+json',
1135
+ 'application/vnd.geo+json',
1136
+ 'importmap',
1137
+ 'speculationrules',
1138
+ ]);
1139
+
1140
+ function minifyJson(text, options) {
1141
+ try {
1142
+ return JSON.stringify(JSON.parse(text));
1143
+ }
1144
+ catch (err) {
1145
+ if (!options.continueOnMinifyError) {
1146
+ throw err;
1147
+ }
1148
+ options.log && options.log(err);
1149
+ return text;
1150
+ }
1151
+ }
1152
+
1153
+ function hasJsonScriptType(attrs) {
1154
+ for (let i = 0, len = attrs.length; i < len; i++) {
1155
+ const attrName = attrs[i].name.toLowerCase();
1156
+ if (attrName === 'type') {
1157
+ const attrValue = trimWhitespace((attrs[i].value || '').split(/;/, 2)[0]).toLowerCase();
1158
+ if (jsonScriptTypes.has(attrValue)) {
1159
+ return true;
1160
+ }
1161
+ }
1162
+ }
1163
+ return false;
1164
+ }
1165
+
1035
1166
  async function processScript(text, options, currentAttrs) {
1036
1167
  for (let i = 0, len = currentAttrs.length; i < len; i++) {
1037
- if (currentAttrs[i].name.toLowerCase() === 'type' &&
1038
- options.processScripts.indexOf(currentAttrs[i].value) > -1) {
1039
- return await minifyHTML(text, options);
1168
+ const attrName = currentAttrs[i].name.toLowerCase();
1169
+ if (attrName === 'type') {
1170
+ const rawValue = currentAttrs[i].value;
1171
+ const normalizedValue = trimWhitespace((rawValue || '').split(/;/, 2)[0]).toLowerCase();
1172
+ // Minify JSON script types automatically
1173
+ if (jsonScriptTypes.has(normalizedValue)) {
1174
+ return minifyJson(text, options);
1175
+ }
1176
+ // Process custom script types if specified
1177
+ if (options.processScripts && options.processScripts.indexOf(rawValue) > -1) {
1178
+ return await minifyHTML(text, options);
1179
+ }
1040
1180
  }
1041
1181
  }
1042
1182
  return text;
@@ -1504,8 +1644,11 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
1504
1644
  currentTag = '';
1505
1645
  },
1506
1646
  chars: async function (text) {
1647
+ // Only recursively scan HTML content, not JSON-LD or other non-HTML script types
1648
+ // `scan()` is for analyzing HTML attribute order, not for parsing JSON
1507
1649
  if (options.processScripts && specialContentTags.has(currentTag) &&
1508
- options.processScripts.indexOf(currentType) > -1) {
1650
+ options.processScripts.indexOf(currentType) > -1 &&
1651
+ currentType === 'text/html') {
1509
1652
  await scan(text);
1510
1653
  }
1511
1654
  }
@@ -1518,7 +1661,8 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
1518
1661
  options.log = identity;
1519
1662
  options.sortAttributes = false;
1520
1663
  options.sortClassName = false;
1521
- await scan(await minifyHTML(value, options));
1664
+ const firstPassOutput = await minifyHTML(value, options);
1665
+ await scan(firstPassOutput);
1522
1666
  options.log = log;
1523
1667
  if (attrChains) {
1524
1668
  const attrSorters = Object.create(null);
@@ -1916,7 +2060,7 @@ async function minifyHTML(value, options, partialMarkup) {
1916
2060
  text = collapseWhitespace(text, options, false, false, true);
1917
2061
  }
1918
2062
  }
1919
- if (options.processScripts && specialContentTags.has(currentTag)) {
2063
+ if (specialContentTags.has(currentTag) && (options.processScripts || hasJsonScriptType(currentAttrs))) {
1920
2064
  text = await processScript(text, options, currentAttrs);
1921
2065
  }
1922
2066
  if (isExecutableScript(currentTag, currentAttrs)) {
@@ -39166,6 +39166,9 @@ function joinSingleAttrAssigns(handler) {
39166
39166
  }).join('|');
39167
39167
  }
39168
39168
 
39169
+ // Number of captured parts per `customAttrSurround` pattern
39170
+ const NCP = 7;
39171
+
39169
39172
  class HTMLParser {
39170
39173
  constructor(html, handler) {
39171
39174
  this.html = html;
@@ -39178,7 +39181,15 @@ class HTMLParser {
39178
39181
 
39179
39182
  const stack = []; let lastTag;
39180
39183
  const attribute = attrForHandler(handler);
39181
- let last, prevTag, nextTag;
39184
+ let last, prevTag = undefined, nextTag = undefined;
39185
+
39186
+ // Track position for better error messages
39187
+ let position = 0;
39188
+ const getLineColumn = (pos) => {
39189
+ const lines = this.html.slice(0, pos).split('\n');
39190
+ return { line: lines.length, column: lines[lines.length - 1].length + 1 };
39191
+ };
39192
+
39182
39193
  while (html) {
39183
39194
  last = html;
39184
39195
  // Make sure we’re not in a `script` or `style` element
@@ -39296,8 +39307,27 @@ class HTMLParser {
39296
39307
  }
39297
39308
 
39298
39309
  if (html === last) {
39299
- throw new Error('Parse Error: ' + html);
39310
+ if (handler.continueOnParseError) {
39311
+ // Skip the problematic character and continue
39312
+ if (handler.chars) {
39313
+ await handler.chars(html[0], prevTag, '');
39314
+ }
39315
+ html = html.substring(1);
39316
+ position++;
39317
+ prevTag = '';
39318
+ continue;
39319
+ }
39320
+ const loc = getLineColumn(position);
39321
+ // Include some context before the error position so the snippet contains
39322
+ // the offending markup plus preceding characters (e.g. "invalid<tag").
39323
+ const CONTEXT_BEFORE = 50;
39324
+ const startPos = Math.max(0, position - CONTEXT_BEFORE);
39325
+ const snippet = this.html.slice(startPos, startPos + 200).replace(/\n/g, ' ');
39326
+ throw new Error(
39327
+ `Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${this.html.length > startPos + 200 ? '…' : ''}`
39328
+ );
39300
39329
  }
39330
+ position = this.html.length - html.length;
39301
39331
  }
39302
39332
 
39303
39333
  if (!handler.partialMarkup) {
@@ -39314,10 +39344,77 @@ class HTMLParser {
39314
39344
  };
39315
39345
  input = input.slice(start[0].length);
39316
39346
  let end, attr;
39317
- while (!(end = input.match(startTagClose)) && (attr = input.match(attribute))) {
39347
+
39348
+ // Safety limit: max length of input to check for attributes
39349
+ // Protects against catastrophic backtracking on massive attribute values
39350
+ const MAX_ATTR_PARSE_LENGTH = 20000; // 20 KB should be enough for any reasonable tag
39351
+
39352
+ while (true) {
39353
+ // Check for closing tag first
39354
+ end = input.match(startTagClose);
39355
+ if (end) {
39356
+ break;
39357
+ }
39358
+
39359
+ // Limit the input length we pass to the regex to prevent catastrophic backtracking
39360
+ const isLimited = input.length > MAX_ATTR_PARSE_LENGTH;
39361
+ const searchInput = isLimited ? input.slice(0, MAX_ATTR_PARSE_LENGTH) : input;
39362
+
39363
+ attr = searchInput.match(attribute);
39364
+
39365
+ // If we limited the input and got a match, check if the value might be truncated
39366
+ if (attr && isLimited) {
39367
+ // Check if the attribute value extends beyond our search window
39368
+ const attrEnd = attr[0].length;
39369
+ // If the match ends near the limit, the value might be truncated
39370
+ if (attrEnd > MAX_ATTR_PARSE_LENGTH - 100) {
39371
+ // Manually extract this attribute to handle potentially huge value
39372
+ const manualMatch = input.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
39373
+ if (manualMatch) {
39374
+ const quoteChar = input[manualMatch[0].length];
39375
+ if (quoteChar === '"' || quoteChar === "'") {
39376
+ const closeQuote = input.indexOf(quoteChar, manualMatch[0].length + 1);
39377
+ if (closeQuote !== -1) {
39378
+ const fullAttr = input.slice(0, closeQuote + 1);
39379
+ const numCustomParts = handler.customAttrSurround
39380
+ ? handler.customAttrSurround.length * NCP
39381
+ : 0;
39382
+ const baseIndex = 1 + numCustomParts;
39383
+
39384
+ attr = [];
39385
+ attr[0] = fullAttr;
39386
+ attr[baseIndex] = manualMatch[1]; // Attribute name
39387
+ attr[baseIndex + 1] = '='; // customAssign (falls back to “=” for huge attributes)
39388
+ const value = input.slice(manualMatch[0].length + 1, closeQuote);
39389
+ // Place value at correct index based on quote type
39390
+ if (quoteChar === '"') {
39391
+ attr[baseIndex + 2] = value; // Double-quoted value
39392
+ } else {
39393
+ attr[baseIndex + 3] = value; // Single-quoted value
39394
+ }
39395
+ input = input.slice(fullAttr.length);
39396
+ match.attrs.push(attr);
39397
+ continue;
39398
+ }
39399
+ }
39400
+ // Note: Unquoted attribute values are intentionally not handled here.
39401
+ // Per HTML spec, unquoted values cannot contain spaces or special chars,
39402
+ // making a 20 KB+ unquoted value practically impossible. If encountered,
39403
+ // it’s malformed HTML and using the truncated regex match is acceptable.
39404
+ }
39405
+ }
39406
+ }
39407
+
39408
+ if (!attr) {
39409
+ break;
39410
+ }
39411
+
39318
39412
  input = input.slice(attr[0].length);
39319
39413
  match.attrs.push(attr);
39320
39414
  }
39415
+
39416
+ // Check for closing tag
39417
+ end = input.match(startTagClose);
39321
39418
  if (end) {
39322
39419
  match.unarySlash = end[1];
39323
39420
  match.rest = input.slice(end[0].length);
@@ -39410,7 +39507,6 @@ class HTMLParser {
39410
39507
 
39411
39508
  const attrs = match.attrs.map(function (args) {
39412
39509
  let name, value, customOpen, customClose, customAssign, quote;
39413
- const ncp = 7; // Number of captured parts, scalar
39414
39510
 
39415
39511
  // Hackish workaround for FF bug https://bugzilla.mozilla.org/show_bug.cgi?id=369778
39416
39512
  if (IS_REGEX_CAPTURING_BROKEN && args[0].indexOf('""') === -1) {
@@ -39438,7 +39534,7 @@ class HTMLParser {
39438
39534
 
39439
39535
  let j = 1;
39440
39536
  if (handler.customAttrSurround) {
39441
- for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += ncp) {
39537
+ for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += NCP) {
39442
39538
  name = args[j + 1];
39443
39539
  if (name) {
39444
39540
  quote = populate(j + 2);
@@ -40085,11 +40181,55 @@ async function cleanConditionalComment(comment, options) {
40085
40181
  : comment;
40086
40182
  }
40087
40183
 
40184
+ const jsonScriptTypes = new Set([
40185
+ 'application/json',
40186
+ 'application/ld+json',
40187
+ 'application/manifest+json',
40188
+ 'application/vnd.geo+json',
40189
+ 'importmap',
40190
+ 'speculationrules',
40191
+ ]);
40192
+
40193
+ function minifyJson(text, options) {
40194
+ try {
40195
+ return JSON.stringify(JSON.parse(text));
40196
+ }
40197
+ catch (err) {
40198
+ if (!options.continueOnMinifyError) {
40199
+ throw err;
40200
+ }
40201
+ options.log && options.log(err);
40202
+ return text;
40203
+ }
40204
+ }
40205
+
40206
+ function hasJsonScriptType(attrs) {
40207
+ for (let i = 0, len = attrs.length; i < len; i++) {
40208
+ const attrName = attrs[i].name.toLowerCase();
40209
+ if (attrName === 'type') {
40210
+ const attrValue = trimWhitespace((attrs[i].value || '').split(/;/, 2)[0]).toLowerCase();
40211
+ if (jsonScriptTypes.has(attrValue)) {
40212
+ return true;
40213
+ }
40214
+ }
40215
+ }
40216
+ return false;
40217
+ }
40218
+
40088
40219
  async function processScript(text, options, currentAttrs) {
40089
40220
  for (let i = 0, len = currentAttrs.length; i < len; i++) {
40090
- if (currentAttrs[i].name.toLowerCase() === 'type' &&
40091
- options.processScripts.indexOf(currentAttrs[i].value) > -1) {
40092
- return await minifyHTML(text, options);
40221
+ const attrName = currentAttrs[i].name.toLowerCase();
40222
+ if (attrName === 'type') {
40223
+ const rawValue = currentAttrs[i].value;
40224
+ const normalizedValue = trimWhitespace((rawValue || '').split(/;/, 2)[0]).toLowerCase();
40225
+ // Minify JSON script types automatically
40226
+ if (jsonScriptTypes.has(normalizedValue)) {
40227
+ return minifyJson(text, options);
40228
+ }
40229
+ // Process custom script types if specified
40230
+ if (options.processScripts && options.processScripts.indexOf(rawValue) > -1) {
40231
+ return await minifyHTML(text, options);
40232
+ }
40093
40233
  }
40094
40234
  }
40095
40235
  return text;
@@ -40557,8 +40697,11 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
40557
40697
  currentTag = '';
40558
40698
  },
40559
40699
  chars: async function (text) {
40700
+ // Only recursively scan HTML content, not JSON-LD or other non-HTML script types
40701
+ // `scan()` is for analyzing HTML attribute order, not for parsing JSON
40560
40702
  if (options.processScripts && specialContentTags.has(currentTag) &&
40561
- options.processScripts.indexOf(currentType) > -1) {
40703
+ options.processScripts.indexOf(currentType) > -1 &&
40704
+ currentType === 'text/html') {
40562
40705
  await scan(text);
40563
40706
  }
40564
40707
  }
@@ -40571,7 +40714,8 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
40571
40714
  options.log = identity;
40572
40715
  options.sortAttributes = false;
40573
40716
  options.sortClassName = false;
40574
- await scan(await minifyHTML(value, options));
40717
+ const firstPassOutput = await minifyHTML(value, options);
40718
+ await scan(firstPassOutput);
40575
40719
  options.log = log;
40576
40720
  if (attrChains) {
40577
40721
  const attrSorters = Object.create(null);
@@ -40969,7 +41113,7 @@ async function minifyHTML(value, options, partialMarkup) {
40969
41113
  text = collapseWhitespace(text, options, false, false, true);
40970
41114
  }
40971
41115
  }
40972
- if (options.processScripts && specialContentTags.has(currentTag)) {
41116
+ if (specialContentTags.has(currentTag) && (options.processScripts || hasJsonScriptType(currentAttrs))) {
40973
41117
  text = await processScript(text, options, currentAttrs);
40974
41118
  }
40975
41119
  if (isExecutableScript(currentTag, currentAttrs)) {
@@ -1 +1 @@
1
- {"version":3,"file":"htmlminifier.d.ts","sourceRoot":"","sources":["../../src/htmlminifier.js"],"names":[],"mappings":"AAu8CO,8BAJI,MAAM,YACN,eAAe,GACb,OAAO,CAAC,MAAM,CAAC,CAQ3B;;;;;;;;;;;;UAUS,MAAM;YACN,MAAM;YACN,MAAM;mBACN,MAAM;iBACN,MAAM;kBACN,MAAM;;;;;;;;;;;;;4BAQN,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE,EAAE,qBAAqB,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;wBAMjG,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,EAAE,KAAK,EAAE,aAAa,EAAE,GAAG,SAAS,EAAE,iBAAiB,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;;oBAMhH,OAAO;;;;;;;;gCAOP,OAAO;;;;;;;;kCAOP,OAAO;;;;;;;;yBAOP,OAAO;;;;;;;;2BAOP,OAAO;;;;;;;;4BAOP,OAAO;;;;;;;2BAOP,OAAO;;;;;;;;uBAMP,MAAM,EAAE;;;;;;yBAOR,MAAM;;;;;;yBAKN,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE;;;;;;;4BAKlB,MAAM,EAAE;;;;;;;oCAMR,MAAM;;;;;;;qBAMN,OAAO;;;;;;;YAMP,OAAO;;;;;;;;2BAMP,MAAM,EAAE;;;;;;;;;4BAOR,MAAM,EAAE;;;;;;;+BAQR,OAAO;;;;;;;2BAMP,SAAS,CAAC,MAAM,CAAC;;;;;;uBAMjB,OAAO;;;;;;;;UAKP,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI;;;;;;;;qBAO1B,MAAM;;;;;;;oBAON,MAAM;;;;;;;;;;gBAMN,OAAO,GAAG,OAAO,CAAC,OAAO,cAAc,EAAE,gBAAgB,CAAC,OAAO,cAAc,EAAE,aAAa,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;;;eAS9J,OAAO,GAAG,OAAO,QAAQ,EAAE,aAAa,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,OAAO,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;;;iBASzG,OAAO,GAAG,MAAM,GAAG,OAAO,WAAW,EAAE,OAAO,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;WAS7F,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM;;;;;;;+BAOxB,OAAO;;;;;;;;;;oBAMP,OAAO;;;;;;;;yBASP,OAAO;;;;;;;gCAOP,OAAO;;;;;;;;iCAMP,OAAO;;;;;;;;;;qBAOP,MAAM,EAAE;;;;;;;qBASR,IAAI,GAAG,GAAG;;;;;;;4BAMV,OAAO;;;;;;;;qBAMP,OAAO;;;;;;;;;4BAOP,OAAO,GAAG,CAAC,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC;;;;;;;;0BAQtD,OAAO;;;;;;;;yBAOP,OAAO;;;;;;;;gCAOP,OAAO;;;;;;;iCAOP,OAAO;;;;;;;oCAMP,OAAO;;;;;;;;;;0BAMP,OAAO;;;;;;;;;qBASP,OAAO,GAAG,CAAC,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE,KAAK,IAAI,CAAC;;;;;;;;;oBAQzD,OAAO,GAAG,CAAC,CAAC,KAAK,EAAE,MAAM,KAAK,MAAM,CAAC;;;;;;;;0BAQrC,OAAO;;;;;;;sBAOP,OAAO;;wBAhyDkC,cAAc;0BAAd,cAAc;+BAAd,cAAc"}
1
+ {"version":3,"file":"htmlminifier.d.ts","sourceRoot":"","sources":["../../src/htmlminifier.js"],"names":[],"mappings":"AAu/CO,8BAJI,MAAM,YACN,eAAe,GACb,OAAO,CAAC,MAAM,CAAC,CAQ3B;;;;;;;;;;;;UAUS,MAAM;YACN,MAAM;YACN,MAAM;mBACN,MAAM;iBACN,MAAM;kBACN,MAAM;;;;;;;;;;;;;4BAQN,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE,EAAE,qBAAqB,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;wBAMjG,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,EAAE,KAAK,EAAE,aAAa,EAAE,GAAG,SAAS,EAAE,iBAAiB,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;;oBAMhH,OAAO;;;;;;;;gCAOP,OAAO;;;;;;;;kCAOP,OAAO;;;;;;;;yBAOP,OAAO;;;;;;;;2BAOP,OAAO;;;;;;;;4BAOP,OAAO;;;;;;;2BAOP,OAAO;;;;;;;;uBAMP,MAAM,EAAE;;;;;;yBAOR,MAAM;;;;;;yBAKN,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE;;;;;;;4BAKlB,MAAM,EAAE;;;;;;;oCAMR,MAAM;;;;;;;qBAMN,OAAO;;;;;;;YAMP,OAAO;;;;;;;;2BAMP,MAAM,EAAE;;;;;;;;;4BAOR,MAAM,EAAE;;;;;;;+BAQR,OAAO;;;;;;;2BAMP,SAAS,CAAC,MAAM,CAAC;;;;;;uBAMjB,OAAO;;;;;;;;UAKP,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI;;;;;;;;qBAO1B,MAAM;;;;;;;oBAON,MAAM;;;;;;;;;;gBAMN,OAAO,GAAG,OAAO,CAAC,OAAO,cAAc,EAAE,gBAAgB,CAAC,OAAO,cAAc,EAAE,aAAa,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;;;eAS9J,OAAO,GAAG,OAAO,QAAQ,EAAE,aAAa,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,OAAO,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;;;iBASzG,OAAO,GAAG,MAAM,GAAG,OAAO,WAAW,EAAE,OAAO,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;WAS7F,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM;;;;;;;+BAOxB,OAAO;;;;;;;;;;oBAMP,OAAO;;;;;;;;yBASP,OAAO;;;;;;;gCAOP,OAAO;;;;;;;;iCAMP,OAAO;;;;;;;;;;qBAOP,MAAM,EAAE;;;;;;;qBASR,IAAI,GAAG,GAAG;;;;;;;4BAMV,OAAO;;;;;;;;qBAMP,OAAO;;;;;;;;;4BAOP,OAAO,GAAG,CAAC,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC;;;;;;;;0BAQtD,OAAO;;;;;;;;yBAOP,OAAO;;;;;;;;gCAOP,OAAO;;;;;;;iCAOP,OAAO;;;;;;;oCAMP,OAAO;;;;;;;;;;0BAMP,OAAO;;;;;;;;;qBASP,OAAO,GAAG,CAAC,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE,KAAK,IAAI,CAAC;;;;;;;;;oBAQzD,OAAO,GAAG,CAAC,CAAC,KAAK,EAAE,MAAM,KAAK,MAAM,CAAC;;;;;;;;0BAQrC,OAAO;;;;;;;sBAOP,OAAO;;wBAh1DkC,cAAc;0BAAd,cAAc;+BAAd,cAAc"}
@@ -1 +1 @@
1
- {"version":3,"file":"htmlparser.d.ts","sourceRoot":"","sources":["../../src/htmlparser.js"],"names":[],"mappings":"AAgDA,4BAAoE;AAyDpE;IACE,qCAGC;IAFC,UAAgB;IAChB,aAAsB;IAGxB,uBAgWC;CACF"}
1
+ {"version":3,"file":"htmlparser.d.ts","sourceRoot":"","sources":["../../src/htmlparser.js"],"names":[],"mappings":"AAgDA,4BAAoE;AA4DpE;IACE,qCAGC;IAFC,UAAgB;IAChB,aAAsB;IAGxB,uBA6bC;CACF"}
package/package.json CHANGED
@@ -84,5 +84,5 @@
84
84
  "test:watch": "node --test --watch tests/*.spec.js"
85
85
  },
86
86
  "type": "module",
87
- "version": "4.5.1"
87
+ "version": "4.6.1"
88
88
  }
@@ -431,11 +431,55 @@ async function cleanConditionalComment(comment, options) {
431
431
  : comment;
432
432
  }
433
433
 
434
+ const jsonScriptTypes = new Set([
435
+ 'application/json',
436
+ 'application/ld+json',
437
+ 'application/manifest+json',
438
+ 'application/vnd.geo+json',
439
+ 'importmap',
440
+ 'speculationrules',
441
+ ]);
442
+
443
+ function minifyJson(text, options) {
444
+ try {
445
+ return JSON.stringify(JSON.parse(text));
446
+ }
447
+ catch (err) {
448
+ if (!options.continueOnMinifyError) {
449
+ throw err;
450
+ }
451
+ options.log && options.log(err);
452
+ return text;
453
+ }
454
+ }
455
+
456
+ function hasJsonScriptType(attrs) {
457
+ for (let i = 0, len = attrs.length; i < len; i++) {
458
+ const attrName = attrs[i].name.toLowerCase();
459
+ if (attrName === 'type') {
460
+ const attrValue = trimWhitespace((attrs[i].value || '').split(/;/, 2)[0]).toLowerCase();
461
+ if (jsonScriptTypes.has(attrValue)) {
462
+ return true;
463
+ }
464
+ }
465
+ }
466
+ return false;
467
+ }
468
+
434
469
  async function processScript(text, options, currentAttrs) {
435
470
  for (let i = 0, len = currentAttrs.length; i < len; i++) {
436
- if (currentAttrs[i].name.toLowerCase() === 'type' &&
437
- options.processScripts.indexOf(currentAttrs[i].value) > -1) {
438
- return await minifyHTML(text, options);
471
+ const attrName = currentAttrs[i].name.toLowerCase();
472
+ if (attrName === 'type') {
473
+ const rawValue = currentAttrs[i].value;
474
+ const normalizedValue = trimWhitespace((rawValue || '').split(/;/, 2)[0]).toLowerCase();
475
+ // Minify JSON script types automatically
476
+ if (jsonScriptTypes.has(normalizedValue)) {
477
+ return minifyJson(text, options);
478
+ }
479
+ // Process custom script types if specified
480
+ if (options.processScripts && options.processScripts.indexOf(rawValue) > -1) {
481
+ return await minifyHTML(text, options);
482
+ }
439
483
  }
440
484
  }
441
485
  return text;
@@ -903,8 +947,11 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
903
947
  currentTag = '';
904
948
  },
905
949
  chars: async function (text) {
950
+ // Only recursively scan HTML content, not JSON-LD or other non-HTML script types
951
+ // `scan()` is for analyzing HTML attribute order, not for parsing JSON
906
952
  if (options.processScripts && specialContentTags.has(currentTag) &&
907
- options.processScripts.indexOf(currentType) > -1) {
953
+ options.processScripts.indexOf(currentType) > -1 &&
954
+ currentType === 'text/html') {
908
955
  await scan(text);
909
956
  }
910
957
  }
@@ -917,7 +964,8 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
917
964
  options.log = identity;
918
965
  options.sortAttributes = false;
919
966
  options.sortClassName = false;
920
- await scan(await minifyHTML(value, options));
967
+ const firstPassOutput = await minifyHTML(value, options);
968
+ await scan(firstPassOutput);
921
969
  options.log = log;
922
970
  if (attrChains) {
923
971
  const attrSorters = Object.create(null);
@@ -1315,7 +1363,7 @@ async function minifyHTML(value, options, partialMarkup) {
1315
1363
  text = collapseWhitespace(text, options, false, false, true);
1316
1364
  }
1317
1365
  }
1318
- if (options.processScripts && specialContentTags.has(currentTag)) {
1366
+ if (specialContentTags.has(currentTag) && (options.processScripts || hasJsonScriptType(currentAttrs))) {
1319
1367
  text = await processScript(text, options, currentAttrs);
1320
1368
  }
1321
1369
  if (isExecutableScript(currentTag, currentAttrs)) {
package/src/htmlparser.js CHANGED
@@ -103,6 +103,9 @@ function joinSingleAttrAssigns(handler) {
103
103
  }).join('|');
104
104
  }
105
105
 
106
+ // Number of captured parts per `customAttrSurround` pattern
107
+ const NCP = 7;
108
+
106
109
  export class HTMLParser {
107
110
  constructor(html, handler) {
108
111
  this.html = html;
@@ -115,7 +118,15 @@ export class HTMLParser {
115
118
 
116
119
  const stack = []; let lastTag;
117
120
  const attribute = attrForHandler(handler);
118
- let last, prevTag, nextTag;
121
+ let last, prevTag = undefined, nextTag = undefined;
122
+
123
+ // Track position for better error messages
124
+ let position = 0;
125
+ const getLineColumn = (pos) => {
126
+ const lines = this.html.slice(0, pos).split('\n');
127
+ return { line: lines.length, column: lines[lines.length - 1].length + 1 };
128
+ };
129
+
119
130
  while (html) {
120
131
  last = html;
121
132
  // Make sure we’re not in a `script` or `style` element
@@ -233,8 +244,27 @@ export class HTMLParser {
233
244
  }
234
245
 
235
246
  if (html === last) {
236
- throw new Error('Parse Error: ' + html);
247
+ if (handler.continueOnParseError) {
248
+ // Skip the problematic character and continue
249
+ if (handler.chars) {
250
+ await handler.chars(html[0], prevTag, '');
251
+ }
252
+ html = html.substring(1);
253
+ position++;
254
+ prevTag = '';
255
+ continue;
256
+ }
257
+ const loc = getLineColumn(position);
258
+ // Include some context before the error position so the snippet contains
259
+ // the offending markup plus preceding characters (e.g. "invalid<tag").
260
+ const CONTEXT_BEFORE = 50;
261
+ const startPos = Math.max(0, position - CONTEXT_BEFORE);
262
+ const snippet = this.html.slice(startPos, startPos + 200).replace(/\n/g, ' ');
263
+ throw new Error(
264
+ `Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${this.html.length > startPos + 200 ? '…' : ''}`
265
+ );
237
266
  }
267
+ position = this.html.length - html.length;
238
268
  }
239
269
 
240
270
  if (!handler.partialMarkup) {
@@ -251,10 +281,77 @@ export class HTMLParser {
251
281
  };
252
282
  input = input.slice(start[0].length);
253
283
  let end, attr;
254
- while (!(end = input.match(startTagClose)) && (attr = input.match(attribute))) {
284
+
285
+ // Safety limit: max length of input to check for attributes
286
+ // Protects against catastrophic backtracking on massive attribute values
287
+ const MAX_ATTR_PARSE_LENGTH = 20000; // 20 KB should be enough for any reasonable tag
288
+
289
+ while (true) {
290
+ // Check for closing tag first
291
+ end = input.match(startTagClose);
292
+ if (end) {
293
+ break;
294
+ }
295
+
296
+ // Limit the input length we pass to the regex to prevent catastrophic backtracking
297
+ const isLimited = input.length > MAX_ATTR_PARSE_LENGTH;
298
+ const searchInput = isLimited ? input.slice(0, MAX_ATTR_PARSE_LENGTH) : input;
299
+
300
+ attr = searchInput.match(attribute);
301
+
302
+ // If we limited the input and got a match, check if the value might be truncated
303
+ if (attr && isLimited) {
304
+ // Check if the attribute value extends beyond our search window
305
+ const attrEnd = attr[0].length;
306
+ // If the match ends near the limit, the value might be truncated
307
+ if (attrEnd > MAX_ATTR_PARSE_LENGTH - 100) {
308
+ // Manually extract this attribute to handle potentially huge value
309
+ const manualMatch = input.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
310
+ if (manualMatch) {
311
+ const quoteChar = input[manualMatch[0].length];
312
+ if (quoteChar === '"' || quoteChar === "'") {
313
+ const closeQuote = input.indexOf(quoteChar, manualMatch[0].length + 1);
314
+ if (closeQuote !== -1) {
315
+ const fullAttr = input.slice(0, closeQuote + 1);
316
+ const numCustomParts = handler.customAttrSurround
317
+ ? handler.customAttrSurround.length * NCP
318
+ : 0;
319
+ const baseIndex = 1 + numCustomParts;
320
+
321
+ attr = [];
322
+ attr[0] = fullAttr;
323
+ attr[baseIndex] = manualMatch[1]; // Attribute name
324
+ attr[baseIndex + 1] = '='; // customAssign (falls back to “=” for huge attributes)
325
+ const value = input.slice(manualMatch[0].length + 1, closeQuote);
326
+ // Place value at correct index based on quote type
327
+ if (quoteChar === '"') {
328
+ attr[baseIndex + 2] = value; // Double-quoted value
329
+ } else {
330
+ attr[baseIndex + 3] = value; // Single-quoted value
331
+ }
332
+ input = input.slice(fullAttr.length);
333
+ match.attrs.push(attr);
334
+ continue;
335
+ }
336
+ }
337
+ // Note: Unquoted attribute values are intentionally not handled here.
338
+ // Per HTML spec, unquoted values cannot contain spaces or special chars,
339
+ // making a 20 KB+ unquoted value practically impossible. If encountered,
340
+ // it’s malformed HTML and using the truncated regex match is acceptable.
341
+ }
342
+ }
343
+ }
344
+
345
+ if (!attr) {
346
+ break;
347
+ }
348
+
255
349
  input = input.slice(attr[0].length);
256
350
  match.attrs.push(attr);
257
351
  }
352
+
353
+ // Check for closing tag
354
+ end = input.match(startTagClose);
258
355
  if (end) {
259
356
  match.unarySlash = end[1];
260
357
  match.rest = input.slice(end[0].length);
@@ -347,7 +444,6 @@ export class HTMLParser {
347
444
 
348
445
  const attrs = match.attrs.map(function (args) {
349
446
  let name, value, customOpen, customClose, customAssign, quote;
350
- const ncp = 7; // Number of captured parts, scalar
351
447
 
352
448
  // Hackish workaround for FF bug https://bugzilla.mozilla.org/show_bug.cgi?id=369778
353
449
  if (IS_REGEX_CAPTURING_BROKEN && args[0].indexOf('""') === -1) {
@@ -375,7 +471,7 @@ export class HTMLParser {
375
471
 
376
472
  let j = 1;
377
473
  if (handler.customAttrSurround) {
378
- for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += ncp) {
474
+ for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += NCP) {
379
475
  name = args[j + 1];
380
476
  if (name) {
381
477
  quote = populate(j + 2);
package/src/utils.js CHANGED
@@ -8,4 +8,4 @@ export async function replaceAsync(str, regex, asyncFn) {
8
8
 
9
9
  const data = await Promise.all(promises);
10
10
  return str.replace(regex, () => data.shift());
11
- }
11
+ }