html-minifier-next 4.6.0 → 4.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -23
- package/dist/htmlminifier.cjs +107 -7
- package/dist/htmlminifier.esm.bundle.js +107 -7
- package/dist/types/htmlminifier.d.ts.map +1 -1
- package/dist/types/htmlparser.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/htmlminifier.js +6 -2
- package/src/htmlparser.js +101 -5
- package/src/utils.js +1 -1
package/README.md
CHANGED
|
@@ -223,29 +223,33 @@ const result = await minify(html, {
|
|
|
223
223
|
|
|
224
224
|
## Minification comparison
|
|
225
225
|
|
|
226
|
-
How does HTML Minifier Next compare to other
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
|
230
|
-
|
|
|
231
|
-
| [
|
|
232
|
-
| [Apple](https://www.apple.com/) |
|
|
233
|
-
| [BBC](https://www.bbc.co.uk/) |
|
|
234
|
-
| [
|
|
235
|
-
| [
|
|
236
|
-
| [
|
|
237
|
-
| [
|
|
238
|
-
| [
|
|
239
|
-
| [
|
|
240
|
-
| [
|
|
241
|
-
| [
|
|
242
|
-
| [
|
|
243
|
-
| [
|
|
244
|
-
| [
|
|
245
|
-
| [
|
|
246
|
-
| [
|
|
247
|
-
| [
|
|
248
|
-
| [
|
|
226
|
+
How does HTML Minifier Next compare to other minifiers, like [htmlnano](https://github.com/posthtml/htmlnano), [@swc/html](https://github.com/swc-project/swc), [minify-html](https://github.com/wilsonzlin/minify-html), [minimize](https://github.com/Swaagie/minimize), and [htmlcompressor.com](https://htmlcompressor.com/)? (All with the most aggressive settings, though without [hyper-optimization](https://meiert.com/blog/the-ways-of-writing-html/#toc-hyper-optimized).)
|
|
227
|
+
|
|
228
|
+
<!-- Auto-generated benchmarks, don’t edit -->
|
|
229
|
+
| Site | Original Size (KB) | HTML Minifier Next | htmlnano | @swc/html | minify-html | minimize | htmlcompressor.com |
|
|
230
|
+
| --- | --- | --- | --- | --- | --- | --- | --- |
|
|
231
|
+
| [A List Apart](https://alistapart.com/) | 62 | **52** | 54 | 55 | 55 | 58 | 56 |
|
|
232
|
+
| [Apple](https://www.apple.com/) | 190 | **146** | 166 | 169 | 172 | 175 | 172 |
|
|
233
|
+
| [BBC](https://www.bbc.co.uk/) | 673 | **613** | 633 | 633 | 634 | 668 | n/a |
|
|
234
|
+
| [Codeberg](https://codeberg.org/) | 33 | 29 | **27** | 30 | 30 | 30 | 30 |
|
|
235
|
+
| [CSS-Tricks](https://css-tricks.com/) | 165 | **125** | 129 | 146 | 146 | 151 | 148 |
|
|
236
|
+
| [ECMAScript](https://tc39.es/ecma262/) | 7238 | **6341** | 6561 | 6444 | 6567 | 6615 | n/a |
|
|
237
|
+
| [EFF](https://www.eff.org/) | 54 | **46** | 49 | 47 | 47 | 49 | 49 |
|
|
238
|
+
| [FAZ](https://www.faz.net/aktuell/) | 1609 | 1500 | **1431** | 1532 | 1544 | 1555 | n/a |
|
|
239
|
+
| [Frontend Dogma](https://frontenddogma.com/) | 220 | **211** | 232 | 217 | 219 | 237 | 218 |
|
|
240
|
+
| [Google](https://www.google.com/) | 18 | **17** | **17** | **17** | n/a | 18 | 18 |
|
|
241
|
+
| [Ground News](https://ground.news/) | 2358 | **2067** | 2169 | 2199 | n/a | 2345 | n/a |
|
|
242
|
+
| [HTML Living Standard](https://html.spec.whatwg.org/multipage/) | 149 | **147** | 153 | **147** | 149 | 155 | 148 |
|
|
243
|
+
| [Leanpub](https://leanpub.com/) | 1348 | **1142** | 1149 | 1148 | n/a | 1343 | n/a |
|
|
244
|
+
| [Mastodon](https://mastodon.social/explore) | 35 | **26** | 30 | 33 | 33 | 34 | 34 |
|
|
245
|
+
| [MDN](https://developer.mozilla.org/en-US/) | 107 | **62** | 64 | 64 | n/a | 67 | 67 |
|
|
246
|
+
| [Middle East Eye](https://www.middleeasteye.net/) | 224 | **197** | 204 | 202 | 202 | 204 | 205 |
|
|
247
|
+
| [SitePoint](https://www.sitepoint.com/) | 492 | **350** | 426 | 465 | 472 | 488 | n/a |
|
|
248
|
+
| [United Nations](https://www.un.org/en/) | 151 | **113** | 121 | 125 | 125 | 130 | 123 |
|
|
249
|
+
| [W3C](https://www.w3.org/) | 50 | **36** | 38 | 38 | 38 | 40 | 38 |
|
|
250
|
+
|
|
251
|
+
(Last updated: Dec 1, 2025)
|
|
252
|
+
<!-- End auto-generated -->
|
|
249
253
|
|
|
250
254
|
## Examples
|
|
251
255
|
|
package/dist/htmlminifier.cjs
CHANGED
|
@@ -113,6 +113,9 @@ function joinSingleAttrAssigns(handler) {
|
|
|
113
113
|
}).join('|');
|
|
114
114
|
}
|
|
115
115
|
|
|
116
|
+
// Number of captured parts per `customAttrSurround` pattern
|
|
117
|
+
const NCP = 7;
|
|
118
|
+
|
|
116
119
|
class HTMLParser {
|
|
117
120
|
constructor(html, handler) {
|
|
118
121
|
this.html = html;
|
|
@@ -125,7 +128,15 @@ class HTMLParser {
|
|
|
125
128
|
|
|
126
129
|
const stack = []; let lastTag;
|
|
127
130
|
const attribute = attrForHandler(handler);
|
|
128
|
-
let last, prevTag, nextTag;
|
|
131
|
+
let last, prevTag = undefined, nextTag = undefined;
|
|
132
|
+
|
|
133
|
+
// Track position for better error messages
|
|
134
|
+
let position = 0;
|
|
135
|
+
const getLineColumn = (pos) => {
|
|
136
|
+
const lines = this.html.slice(0, pos).split('\n');
|
|
137
|
+
return { line: lines.length, column: lines[lines.length - 1].length + 1 };
|
|
138
|
+
};
|
|
139
|
+
|
|
129
140
|
while (html) {
|
|
130
141
|
last = html;
|
|
131
142
|
// Make sure we’re not in a `script` or `style` element
|
|
@@ -243,8 +254,27 @@ class HTMLParser {
|
|
|
243
254
|
}
|
|
244
255
|
|
|
245
256
|
if (html === last) {
|
|
246
|
-
|
|
257
|
+
if (handler.continueOnParseError) {
|
|
258
|
+
// Skip the problematic character and continue
|
|
259
|
+
if (handler.chars) {
|
|
260
|
+
await handler.chars(html[0], prevTag, '');
|
|
261
|
+
}
|
|
262
|
+
html = html.substring(1);
|
|
263
|
+
position++;
|
|
264
|
+
prevTag = '';
|
|
265
|
+
continue;
|
|
266
|
+
}
|
|
267
|
+
const loc = getLineColumn(position);
|
|
268
|
+
// Include some context before the error position so the snippet contains
|
|
269
|
+
// the offending markup plus preceding characters (e.g. "invalid<tag").
|
|
270
|
+
const CONTEXT_BEFORE = 50;
|
|
271
|
+
const startPos = Math.max(0, position - CONTEXT_BEFORE);
|
|
272
|
+
const snippet = this.html.slice(startPos, startPos + 200).replace(/\n/g, ' ');
|
|
273
|
+
throw new Error(
|
|
274
|
+
`Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${this.html.length > startPos + 200 ? '…' : ''}`
|
|
275
|
+
);
|
|
247
276
|
}
|
|
277
|
+
position = this.html.length - html.length;
|
|
248
278
|
}
|
|
249
279
|
|
|
250
280
|
if (!handler.partialMarkup) {
|
|
@@ -261,10 +291,77 @@ class HTMLParser {
|
|
|
261
291
|
};
|
|
262
292
|
input = input.slice(start[0].length);
|
|
263
293
|
let end, attr;
|
|
264
|
-
|
|
294
|
+
|
|
295
|
+
// Safety limit: max length of input to check for attributes
|
|
296
|
+
// Protects against catastrophic backtracking on massive attribute values
|
|
297
|
+
const MAX_ATTR_PARSE_LENGTH = 20000; // 20 KB should be enough for any reasonable tag
|
|
298
|
+
|
|
299
|
+
while (true) {
|
|
300
|
+
// Check for closing tag first
|
|
301
|
+
end = input.match(startTagClose);
|
|
302
|
+
if (end) {
|
|
303
|
+
break;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
// Limit the input length we pass to the regex to prevent catastrophic backtracking
|
|
307
|
+
const isLimited = input.length > MAX_ATTR_PARSE_LENGTH;
|
|
308
|
+
const searchInput = isLimited ? input.slice(0, MAX_ATTR_PARSE_LENGTH) : input;
|
|
309
|
+
|
|
310
|
+
attr = searchInput.match(attribute);
|
|
311
|
+
|
|
312
|
+
// If we limited the input and got a match, check if the value might be truncated
|
|
313
|
+
if (attr && isLimited) {
|
|
314
|
+
// Check if the attribute value extends beyond our search window
|
|
315
|
+
const attrEnd = attr[0].length;
|
|
316
|
+
// If the match ends near the limit, the value might be truncated
|
|
317
|
+
if (attrEnd > MAX_ATTR_PARSE_LENGTH - 100) {
|
|
318
|
+
// Manually extract this attribute to handle potentially huge value
|
|
319
|
+
const manualMatch = input.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
|
|
320
|
+
if (manualMatch) {
|
|
321
|
+
const quoteChar = input[manualMatch[0].length];
|
|
322
|
+
if (quoteChar === '"' || quoteChar === "'") {
|
|
323
|
+
const closeQuote = input.indexOf(quoteChar, manualMatch[0].length + 1);
|
|
324
|
+
if (closeQuote !== -1) {
|
|
325
|
+
const fullAttr = input.slice(0, closeQuote + 1);
|
|
326
|
+
const numCustomParts = handler.customAttrSurround
|
|
327
|
+
? handler.customAttrSurround.length * NCP
|
|
328
|
+
: 0;
|
|
329
|
+
const baseIndex = 1 + numCustomParts;
|
|
330
|
+
|
|
331
|
+
attr = [];
|
|
332
|
+
attr[0] = fullAttr;
|
|
333
|
+
attr[baseIndex] = manualMatch[1]; // Attribute name
|
|
334
|
+
attr[baseIndex + 1] = '='; // customAssign (falls back to “=” for huge attributes)
|
|
335
|
+
const value = input.slice(manualMatch[0].length + 1, closeQuote);
|
|
336
|
+
// Place value at correct index based on quote type
|
|
337
|
+
if (quoteChar === '"') {
|
|
338
|
+
attr[baseIndex + 2] = value; // Double-quoted value
|
|
339
|
+
} else {
|
|
340
|
+
attr[baseIndex + 3] = value; // Single-quoted value
|
|
341
|
+
}
|
|
342
|
+
input = input.slice(fullAttr.length);
|
|
343
|
+
match.attrs.push(attr);
|
|
344
|
+
continue;
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
// Note: Unquoted attribute values are intentionally not handled here.
|
|
348
|
+
// Per HTML spec, unquoted values cannot contain spaces or special chars,
|
|
349
|
+
// making a 20 KB+ unquoted value practically impossible. If encountered,
|
|
350
|
+
// it’s malformed HTML and using the truncated regex match is acceptable.
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
if (!attr) {
|
|
356
|
+
break;
|
|
357
|
+
}
|
|
358
|
+
|
|
265
359
|
input = input.slice(attr[0].length);
|
|
266
360
|
match.attrs.push(attr);
|
|
267
361
|
}
|
|
362
|
+
|
|
363
|
+
// Check for closing tag
|
|
364
|
+
end = input.match(startTagClose);
|
|
268
365
|
if (end) {
|
|
269
366
|
match.unarySlash = end[1];
|
|
270
367
|
match.rest = input.slice(end[0].length);
|
|
@@ -357,7 +454,6 @@ class HTMLParser {
|
|
|
357
454
|
|
|
358
455
|
const attrs = match.attrs.map(function (args) {
|
|
359
456
|
let name, value, customOpen, customClose, customAssign, quote;
|
|
360
|
-
const ncp = 7; // Number of captured parts, scalar
|
|
361
457
|
|
|
362
458
|
// Hackish workaround for FF bug https://bugzilla.mozilla.org/show_bug.cgi?id=369778
|
|
363
459
|
if (IS_REGEX_CAPTURING_BROKEN && args[0].indexOf('""') === -1) {
|
|
@@ -385,7 +481,7 @@ class HTMLParser {
|
|
|
385
481
|
|
|
386
482
|
let j = 1;
|
|
387
483
|
if (handler.customAttrSurround) {
|
|
388
|
-
for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j +=
|
|
484
|
+
for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += NCP) {
|
|
389
485
|
name = args[j + 1];
|
|
390
486
|
if (name) {
|
|
391
487
|
quote = populate(j + 2);
|
|
@@ -1548,8 +1644,11 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
|
|
|
1548
1644
|
currentTag = '';
|
|
1549
1645
|
},
|
|
1550
1646
|
chars: async function (text) {
|
|
1647
|
+
// Only recursively scan HTML content, not JSON-LD or other non-HTML script types
|
|
1648
|
+
// `scan()` is for analyzing HTML attribute order, not for parsing JSON
|
|
1551
1649
|
if (options.processScripts && specialContentTags.has(currentTag) &&
|
|
1552
|
-
options.processScripts.indexOf(currentType) > -1
|
|
1650
|
+
options.processScripts.indexOf(currentType) > -1 &&
|
|
1651
|
+
currentType === 'text/html') {
|
|
1553
1652
|
await scan(text);
|
|
1554
1653
|
}
|
|
1555
1654
|
}
|
|
@@ -1562,7 +1661,8 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
|
|
|
1562
1661
|
options.log = identity;
|
|
1563
1662
|
options.sortAttributes = false;
|
|
1564
1663
|
options.sortClassName = false;
|
|
1565
|
-
|
|
1664
|
+
const firstPassOutput = await minifyHTML(value, options);
|
|
1665
|
+
await scan(firstPassOutput);
|
|
1566
1666
|
options.log = log;
|
|
1567
1667
|
if (attrChains) {
|
|
1568
1668
|
const attrSorters = Object.create(null);
|
|
@@ -39166,6 +39166,9 @@ function joinSingleAttrAssigns(handler) {
|
|
|
39166
39166
|
}).join('|');
|
|
39167
39167
|
}
|
|
39168
39168
|
|
|
39169
|
+
// Number of captured parts per `customAttrSurround` pattern
|
|
39170
|
+
const NCP = 7;
|
|
39171
|
+
|
|
39169
39172
|
class HTMLParser {
|
|
39170
39173
|
constructor(html, handler) {
|
|
39171
39174
|
this.html = html;
|
|
@@ -39178,7 +39181,15 @@ class HTMLParser {
|
|
|
39178
39181
|
|
|
39179
39182
|
const stack = []; let lastTag;
|
|
39180
39183
|
const attribute = attrForHandler(handler);
|
|
39181
|
-
let last, prevTag, nextTag;
|
|
39184
|
+
let last, prevTag = undefined, nextTag = undefined;
|
|
39185
|
+
|
|
39186
|
+
// Track position for better error messages
|
|
39187
|
+
let position = 0;
|
|
39188
|
+
const getLineColumn = (pos) => {
|
|
39189
|
+
const lines = this.html.slice(0, pos).split('\n');
|
|
39190
|
+
return { line: lines.length, column: lines[lines.length - 1].length + 1 };
|
|
39191
|
+
};
|
|
39192
|
+
|
|
39182
39193
|
while (html) {
|
|
39183
39194
|
last = html;
|
|
39184
39195
|
// Make sure we’re not in a `script` or `style` element
|
|
@@ -39296,8 +39307,27 @@ class HTMLParser {
|
|
|
39296
39307
|
}
|
|
39297
39308
|
|
|
39298
39309
|
if (html === last) {
|
|
39299
|
-
|
|
39310
|
+
if (handler.continueOnParseError) {
|
|
39311
|
+
// Skip the problematic character and continue
|
|
39312
|
+
if (handler.chars) {
|
|
39313
|
+
await handler.chars(html[0], prevTag, '');
|
|
39314
|
+
}
|
|
39315
|
+
html = html.substring(1);
|
|
39316
|
+
position++;
|
|
39317
|
+
prevTag = '';
|
|
39318
|
+
continue;
|
|
39319
|
+
}
|
|
39320
|
+
const loc = getLineColumn(position);
|
|
39321
|
+
// Include some context before the error position so the snippet contains
|
|
39322
|
+
// the offending markup plus preceding characters (e.g. "invalid<tag").
|
|
39323
|
+
const CONTEXT_BEFORE = 50;
|
|
39324
|
+
const startPos = Math.max(0, position - CONTEXT_BEFORE);
|
|
39325
|
+
const snippet = this.html.slice(startPos, startPos + 200).replace(/\n/g, ' ');
|
|
39326
|
+
throw new Error(
|
|
39327
|
+
`Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${this.html.length > startPos + 200 ? '…' : ''}`
|
|
39328
|
+
);
|
|
39300
39329
|
}
|
|
39330
|
+
position = this.html.length - html.length;
|
|
39301
39331
|
}
|
|
39302
39332
|
|
|
39303
39333
|
if (!handler.partialMarkup) {
|
|
@@ -39314,10 +39344,77 @@ class HTMLParser {
|
|
|
39314
39344
|
};
|
|
39315
39345
|
input = input.slice(start[0].length);
|
|
39316
39346
|
let end, attr;
|
|
39317
|
-
|
|
39347
|
+
|
|
39348
|
+
// Safety limit: max length of input to check for attributes
|
|
39349
|
+
// Protects against catastrophic backtracking on massive attribute values
|
|
39350
|
+
const MAX_ATTR_PARSE_LENGTH = 20000; // 20 KB should be enough for any reasonable tag
|
|
39351
|
+
|
|
39352
|
+
while (true) {
|
|
39353
|
+
// Check for closing tag first
|
|
39354
|
+
end = input.match(startTagClose);
|
|
39355
|
+
if (end) {
|
|
39356
|
+
break;
|
|
39357
|
+
}
|
|
39358
|
+
|
|
39359
|
+
// Limit the input length we pass to the regex to prevent catastrophic backtracking
|
|
39360
|
+
const isLimited = input.length > MAX_ATTR_PARSE_LENGTH;
|
|
39361
|
+
const searchInput = isLimited ? input.slice(0, MAX_ATTR_PARSE_LENGTH) : input;
|
|
39362
|
+
|
|
39363
|
+
attr = searchInput.match(attribute);
|
|
39364
|
+
|
|
39365
|
+
// If we limited the input and got a match, check if the value might be truncated
|
|
39366
|
+
if (attr && isLimited) {
|
|
39367
|
+
// Check if the attribute value extends beyond our search window
|
|
39368
|
+
const attrEnd = attr[0].length;
|
|
39369
|
+
// If the match ends near the limit, the value might be truncated
|
|
39370
|
+
if (attrEnd > MAX_ATTR_PARSE_LENGTH - 100) {
|
|
39371
|
+
// Manually extract this attribute to handle potentially huge value
|
|
39372
|
+
const manualMatch = input.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
|
|
39373
|
+
if (manualMatch) {
|
|
39374
|
+
const quoteChar = input[manualMatch[0].length];
|
|
39375
|
+
if (quoteChar === '"' || quoteChar === "'") {
|
|
39376
|
+
const closeQuote = input.indexOf(quoteChar, manualMatch[0].length + 1);
|
|
39377
|
+
if (closeQuote !== -1) {
|
|
39378
|
+
const fullAttr = input.slice(0, closeQuote + 1);
|
|
39379
|
+
const numCustomParts = handler.customAttrSurround
|
|
39380
|
+
? handler.customAttrSurround.length * NCP
|
|
39381
|
+
: 0;
|
|
39382
|
+
const baseIndex = 1 + numCustomParts;
|
|
39383
|
+
|
|
39384
|
+
attr = [];
|
|
39385
|
+
attr[0] = fullAttr;
|
|
39386
|
+
attr[baseIndex] = manualMatch[1]; // Attribute name
|
|
39387
|
+
attr[baseIndex + 1] = '='; // customAssign (falls back to “=” for huge attributes)
|
|
39388
|
+
const value = input.slice(manualMatch[0].length + 1, closeQuote);
|
|
39389
|
+
// Place value at correct index based on quote type
|
|
39390
|
+
if (quoteChar === '"') {
|
|
39391
|
+
attr[baseIndex + 2] = value; // Double-quoted value
|
|
39392
|
+
} else {
|
|
39393
|
+
attr[baseIndex + 3] = value; // Single-quoted value
|
|
39394
|
+
}
|
|
39395
|
+
input = input.slice(fullAttr.length);
|
|
39396
|
+
match.attrs.push(attr);
|
|
39397
|
+
continue;
|
|
39398
|
+
}
|
|
39399
|
+
}
|
|
39400
|
+
// Note: Unquoted attribute values are intentionally not handled here.
|
|
39401
|
+
// Per HTML spec, unquoted values cannot contain spaces or special chars,
|
|
39402
|
+
// making a 20 KB+ unquoted value practically impossible. If encountered,
|
|
39403
|
+
// it’s malformed HTML and using the truncated regex match is acceptable.
|
|
39404
|
+
}
|
|
39405
|
+
}
|
|
39406
|
+
}
|
|
39407
|
+
|
|
39408
|
+
if (!attr) {
|
|
39409
|
+
break;
|
|
39410
|
+
}
|
|
39411
|
+
|
|
39318
39412
|
input = input.slice(attr[0].length);
|
|
39319
39413
|
match.attrs.push(attr);
|
|
39320
39414
|
}
|
|
39415
|
+
|
|
39416
|
+
// Check for closing tag
|
|
39417
|
+
end = input.match(startTagClose);
|
|
39321
39418
|
if (end) {
|
|
39322
39419
|
match.unarySlash = end[1];
|
|
39323
39420
|
match.rest = input.slice(end[0].length);
|
|
@@ -39410,7 +39507,6 @@ class HTMLParser {
|
|
|
39410
39507
|
|
|
39411
39508
|
const attrs = match.attrs.map(function (args) {
|
|
39412
39509
|
let name, value, customOpen, customClose, customAssign, quote;
|
|
39413
|
-
const ncp = 7; // Number of captured parts, scalar
|
|
39414
39510
|
|
|
39415
39511
|
// Hackish workaround for FF bug https://bugzilla.mozilla.org/show_bug.cgi?id=369778
|
|
39416
39512
|
if (IS_REGEX_CAPTURING_BROKEN && args[0].indexOf('""') === -1) {
|
|
@@ -39438,7 +39534,7 @@ class HTMLParser {
|
|
|
39438
39534
|
|
|
39439
39535
|
let j = 1;
|
|
39440
39536
|
if (handler.customAttrSurround) {
|
|
39441
|
-
for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j +=
|
|
39537
|
+
for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += NCP) {
|
|
39442
39538
|
name = args[j + 1];
|
|
39443
39539
|
if (name) {
|
|
39444
39540
|
quote = populate(j + 2);
|
|
@@ -40601,8 +40697,11 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
|
|
|
40601
40697
|
currentTag = '';
|
|
40602
40698
|
},
|
|
40603
40699
|
chars: async function (text) {
|
|
40700
|
+
// Only recursively scan HTML content, not JSON-LD or other non-HTML script types
|
|
40701
|
+
// `scan()` is for analyzing HTML attribute order, not for parsing JSON
|
|
40604
40702
|
if (options.processScripts && specialContentTags.has(currentTag) &&
|
|
40605
|
-
options.processScripts.indexOf(currentType) > -1
|
|
40703
|
+
options.processScripts.indexOf(currentType) > -1 &&
|
|
40704
|
+
currentType === 'text/html') {
|
|
40606
40705
|
await scan(text);
|
|
40607
40706
|
}
|
|
40608
40707
|
}
|
|
@@ -40615,7 +40714,8 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
|
|
|
40615
40714
|
options.log = identity;
|
|
40616
40715
|
options.sortAttributes = false;
|
|
40617
40716
|
options.sortClassName = false;
|
|
40618
|
-
|
|
40717
|
+
const firstPassOutput = await minifyHTML(value, options);
|
|
40718
|
+
await scan(firstPassOutput);
|
|
40619
40719
|
options.log = log;
|
|
40620
40720
|
if (attrChains) {
|
|
40621
40721
|
const attrSorters = Object.create(null);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"htmlminifier.d.ts","sourceRoot":"","sources":["../../src/htmlminifier.js"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"htmlminifier.d.ts","sourceRoot":"","sources":["../../src/htmlminifier.js"],"names":[],"mappings":"AAu/CO,8BAJI,MAAM,YACN,eAAe,GACb,OAAO,CAAC,MAAM,CAAC,CAQ3B;;;;;;;;;;;;UAUS,MAAM;YACN,MAAM;YACN,MAAM;mBACN,MAAM;iBACN,MAAM;kBACN,MAAM;;;;;;;;;;;;;4BAQN,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE,EAAE,qBAAqB,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;wBAMjG,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,EAAE,KAAK,EAAE,aAAa,EAAE,GAAG,SAAS,EAAE,iBAAiB,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;;oBAMhH,OAAO;;;;;;;;gCAOP,OAAO;;;;;;;;kCAOP,OAAO;;;;;;;;yBAOP,OAAO;;;;;;;;2BAOP,OAAO;;;;;;;;4BAOP,OAAO;;;;;;;2BAOP,OAAO;;;;;;;;uBAMP,MAAM,EAAE;;;;;;yBAOR,MAAM;;;;;;yBAKN,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE;;;;;;;4BAKlB,MAAM,EAAE;;;;;;;oCAMR,MAAM;;;;;;;qBAMN,OAAO;;;;;;;YAMP,OAAO;;;;;;;;2BAMP,MAAM,EAAE;;;;;;;;;4BAOR,MAAM,EAAE;;;;;;;+BAQR,OAAO;;;;;;;2BAMP,SAAS,CAAC,MAAM,CAAC;;;;;;uBAMjB,OAAO;;;;;;;;UAKP,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI;;;;;;;;qBAO1B,MAAM;;;;;;;oBAON,MAAM;;;;;;;;;;gBAMN,OAAO,GAAG,OAAO,CAAC,OAAO,cAAc,EAAE,gBAAgB,CAAC,OAAO,cAAc,EAAE,aAAa,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;;;eAS9J,OAAO,GAAG,OAAO,QAAQ,EAAE,aAAa,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,OAAO,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;;;iBASzG,OAAO,GAAG,MAAM,GAAG,OAAO,WAAW,EAAE,OAAO,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;WAS7F,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM;;;;;;;+BAOxB,OAAO;;;;;;;;;;oBAMP,OAAO;;;;;;;;yBASP,OAAO;;;;;;;gCAOP,OAAO;;;;;;;;iCAMP,OAAO;;;;;;;;;;qBAOP,MAAM,EAAE;;;;;;;qBASR,IAAI,GAAG,GAAG;;;;;;;4BAMV,OAAO;;;;;;;;qBAMP,OAAO;;;;;;;;;4BAOP,OAAO,GAAG,CAAC,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC;;;;;;;;0BAQtD,OAAO;;;;;;;;yBAOP,OAAO;;;;;;;;gCAOP,OAAO;;;;;;;iCAOP,OAAO;;;;;;;oCAMP,OAAO;;;;;;;;;;0BAMP,OAAO;;;;;;;;;qBASP,OAAO,GAAG,CAAC,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE,KAAK,IAAI,CAAC;;;;;;;;;oBAQzD,OAAO,GAAG,CAAC,CAAC,KAAK,EAAE,MAAM,KAAK,MAAM,CAAC;;;;;;;;0BAQrC,OAAO;;;;;;;sBAOP,OAAO;;wBAh1DkC,cAAc;0BAAd,cAAc;+BAAd,cAAc"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"htmlparser.d.ts","sourceRoot":"","sources":["../../src/htmlparser.js"],"names":[],"mappings":"AAgDA,4BAAoE;
|
|
1
|
+
{"version":3,"file":"htmlparser.d.ts","sourceRoot":"","sources":["../../src/htmlparser.js"],"names":[],"mappings":"AAgDA,4BAAoE;AA4DpE;IACE,qCAGC;IAFC,UAAgB;IAChB,aAAsB;IAGxB,uBA6bC;CACF"}
|
package/package.json
CHANGED
package/src/htmlminifier.js
CHANGED
|
@@ -947,8 +947,11 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
|
|
|
947
947
|
currentTag = '';
|
|
948
948
|
},
|
|
949
949
|
chars: async function (text) {
|
|
950
|
+
// Only recursively scan HTML content, not JSON-LD or other non-HTML script types
|
|
951
|
+
// `scan()` is for analyzing HTML attribute order, not for parsing JSON
|
|
950
952
|
if (options.processScripts && specialContentTags.has(currentTag) &&
|
|
951
|
-
options.processScripts.indexOf(currentType) > -1
|
|
953
|
+
options.processScripts.indexOf(currentType) > -1 &&
|
|
954
|
+
currentType === 'text/html') {
|
|
952
955
|
await scan(text);
|
|
953
956
|
}
|
|
954
957
|
}
|
|
@@ -961,7 +964,8 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
|
|
|
961
964
|
options.log = identity;
|
|
962
965
|
options.sortAttributes = false;
|
|
963
966
|
options.sortClassName = false;
|
|
964
|
-
|
|
967
|
+
const firstPassOutput = await minifyHTML(value, options);
|
|
968
|
+
await scan(firstPassOutput);
|
|
965
969
|
options.log = log;
|
|
966
970
|
if (attrChains) {
|
|
967
971
|
const attrSorters = Object.create(null);
|
package/src/htmlparser.js
CHANGED
|
@@ -103,6 +103,9 @@ function joinSingleAttrAssigns(handler) {
|
|
|
103
103
|
}).join('|');
|
|
104
104
|
}
|
|
105
105
|
|
|
106
|
+
// Number of captured parts per `customAttrSurround` pattern
|
|
107
|
+
const NCP = 7;
|
|
108
|
+
|
|
106
109
|
export class HTMLParser {
|
|
107
110
|
constructor(html, handler) {
|
|
108
111
|
this.html = html;
|
|
@@ -115,7 +118,15 @@ export class HTMLParser {
|
|
|
115
118
|
|
|
116
119
|
const stack = []; let lastTag;
|
|
117
120
|
const attribute = attrForHandler(handler);
|
|
118
|
-
let last, prevTag, nextTag;
|
|
121
|
+
let last, prevTag = undefined, nextTag = undefined;
|
|
122
|
+
|
|
123
|
+
// Track position for better error messages
|
|
124
|
+
let position = 0;
|
|
125
|
+
const getLineColumn = (pos) => {
|
|
126
|
+
const lines = this.html.slice(0, pos).split('\n');
|
|
127
|
+
return { line: lines.length, column: lines[lines.length - 1].length + 1 };
|
|
128
|
+
};
|
|
129
|
+
|
|
119
130
|
while (html) {
|
|
120
131
|
last = html;
|
|
121
132
|
// Make sure we’re not in a `script` or `style` element
|
|
@@ -233,8 +244,27 @@ export class HTMLParser {
|
|
|
233
244
|
}
|
|
234
245
|
|
|
235
246
|
if (html === last) {
|
|
236
|
-
|
|
247
|
+
if (handler.continueOnParseError) {
|
|
248
|
+
// Skip the problematic character and continue
|
|
249
|
+
if (handler.chars) {
|
|
250
|
+
await handler.chars(html[0], prevTag, '');
|
|
251
|
+
}
|
|
252
|
+
html = html.substring(1);
|
|
253
|
+
position++;
|
|
254
|
+
prevTag = '';
|
|
255
|
+
continue;
|
|
256
|
+
}
|
|
257
|
+
const loc = getLineColumn(position);
|
|
258
|
+
// Include some context before the error position so the snippet contains
|
|
259
|
+
// the offending markup plus preceding characters (e.g. "invalid<tag").
|
|
260
|
+
const CONTEXT_BEFORE = 50;
|
|
261
|
+
const startPos = Math.max(0, position - CONTEXT_BEFORE);
|
|
262
|
+
const snippet = this.html.slice(startPos, startPos + 200).replace(/\n/g, ' ');
|
|
263
|
+
throw new Error(
|
|
264
|
+
`Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${this.html.length > startPos + 200 ? '…' : ''}`
|
|
265
|
+
);
|
|
237
266
|
}
|
|
267
|
+
position = this.html.length - html.length;
|
|
238
268
|
}
|
|
239
269
|
|
|
240
270
|
if (!handler.partialMarkup) {
|
|
@@ -251,10 +281,77 @@ export class HTMLParser {
|
|
|
251
281
|
};
|
|
252
282
|
input = input.slice(start[0].length);
|
|
253
283
|
let end, attr;
|
|
254
|
-
|
|
284
|
+
|
|
285
|
+
// Safety limit: max length of input to check for attributes
|
|
286
|
+
// Protects against catastrophic backtracking on massive attribute values
|
|
287
|
+
const MAX_ATTR_PARSE_LENGTH = 20000; // 20 KB should be enough for any reasonable tag
|
|
288
|
+
|
|
289
|
+
while (true) {
|
|
290
|
+
// Check for closing tag first
|
|
291
|
+
end = input.match(startTagClose);
|
|
292
|
+
if (end) {
|
|
293
|
+
break;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// Limit the input length we pass to the regex to prevent catastrophic backtracking
|
|
297
|
+
const isLimited = input.length > MAX_ATTR_PARSE_LENGTH;
|
|
298
|
+
const searchInput = isLimited ? input.slice(0, MAX_ATTR_PARSE_LENGTH) : input;
|
|
299
|
+
|
|
300
|
+
attr = searchInput.match(attribute);
|
|
301
|
+
|
|
302
|
+
// If we limited the input and got a match, check if the value might be truncated
|
|
303
|
+
if (attr && isLimited) {
|
|
304
|
+
// Check if the attribute value extends beyond our search window
|
|
305
|
+
const attrEnd = attr[0].length;
|
|
306
|
+
// If the match ends near the limit, the value might be truncated
|
|
307
|
+
if (attrEnd > MAX_ATTR_PARSE_LENGTH - 100) {
|
|
308
|
+
// Manually extract this attribute to handle potentially huge value
|
|
309
|
+
const manualMatch = input.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
|
|
310
|
+
if (manualMatch) {
|
|
311
|
+
const quoteChar = input[manualMatch[0].length];
|
|
312
|
+
if (quoteChar === '"' || quoteChar === "'") {
|
|
313
|
+
const closeQuote = input.indexOf(quoteChar, manualMatch[0].length + 1);
|
|
314
|
+
if (closeQuote !== -1) {
|
|
315
|
+
const fullAttr = input.slice(0, closeQuote + 1);
|
|
316
|
+
const numCustomParts = handler.customAttrSurround
|
|
317
|
+
? handler.customAttrSurround.length * NCP
|
|
318
|
+
: 0;
|
|
319
|
+
const baseIndex = 1 + numCustomParts;
|
|
320
|
+
|
|
321
|
+
attr = [];
|
|
322
|
+
attr[0] = fullAttr;
|
|
323
|
+
attr[baseIndex] = manualMatch[1]; // Attribute name
|
|
324
|
+
attr[baseIndex + 1] = '='; // customAssign (falls back to “=” for huge attributes)
|
|
325
|
+
const value = input.slice(manualMatch[0].length + 1, closeQuote);
|
|
326
|
+
// Place value at correct index based on quote type
|
|
327
|
+
if (quoteChar === '"') {
|
|
328
|
+
attr[baseIndex + 2] = value; // Double-quoted value
|
|
329
|
+
} else {
|
|
330
|
+
attr[baseIndex + 3] = value; // Single-quoted value
|
|
331
|
+
}
|
|
332
|
+
input = input.slice(fullAttr.length);
|
|
333
|
+
match.attrs.push(attr);
|
|
334
|
+
continue;
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
// Note: Unquoted attribute values are intentionally not handled here.
|
|
338
|
+
// Per HTML spec, unquoted values cannot contain spaces or special chars,
|
|
339
|
+
// making a 20 KB+ unquoted value practically impossible. If encountered,
|
|
340
|
+
// it’s malformed HTML and using the truncated regex match is acceptable.
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
if (!attr) {
|
|
346
|
+
break;
|
|
347
|
+
}
|
|
348
|
+
|
|
255
349
|
input = input.slice(attr[0].length);
|
|
256
350
|
match.attrs.push(attr);
|
|
257
351
|
}
|
|
352
|
+
|
|
353
|
+
// Check for closing tag
|
|
354
|
+
end = input.match(startTagClose);
|
|
258
355
|
if (end) {
|
|
259
356
|
match.unarySlash = end[1];
|
|
260
357
|
match.rest = input.slice(end[0].length);
|
|
@@ -347,7 +444,6 @@ export class HTMLParser {
|
|
|
347
444
|
|
|
348
445
|
const attrs = match.attrs.map(function (args) {
|
|
349
446
|
let name, value, customOpen, customClose, customAssign, quote;
|
|
350
|
-
const ncp = 7; // Number of captured parts, scalar
|
|
351
447
|
|
|
352
448
|
// Hackish workaround for FF bug https://bugzilla.mozilla.org/show_bug.cgi?id=369778
|
|
353
449
|
if (IS_REGEX_CAPTURING_BROKEN && args[0].indexOf('""') === -1) {
|
|
@@ -375,7 +471,7 @@ export class HTMLParser {
|
|
|
375
471
|
|
|
376
472
|
let j = 1;
|
|
377
473
|
if (handler.customAttrSurround) {
|
|
378
|
-
for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j +=
|
|
474
|
+
for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += NCP) {
|
|
379
475
|
name = args[j + 1];
|
|
380
476
|
if (name) {
|
|
381
477
|
quote = populate(j + 2);
|
package/src/utils.js
CHANGED