html-minifier-next 4.5.1 → 4.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -24
- package/dist/htmlminifier.cjs +155 -11
- package/dist/htmlminifier.esm.bundle.js +155 -11
- package/dist/types/htmlminifier.d.ts.map +1 -1
- package/dist/types/htmlparser.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/htmlminifier.js +54 -6
- package/src/htmlparser.js +101 -5
- package/src/utils.js +1 -1
package/README.md
CHANGED
|
@@ -107,7 +107,7 @@ For lint-like capabilities, take a look at [HTMLLint](https://github.com/kangax/
|
|
|
107
107
|
HTML Minifier Next provides presets for common use cases. Presets are pre-configured option sets that can be used as a starting point:
|
|
108
108
|
|
|
109
109
|
* `conservative`: Safe minification suitable for most projects. Includes whitespace collapsing, comment removal, and doctype normalization.
|
|
110
|
-
* `comprehensive`: Aggressive minification for maximum file size reduction. Includes
|
|
110
|
+
* `comprehensive`: Aggressive minification for maximum file size reduction. Includes relevant conservative options plus attribute quote removal, optional tag removal, and more.
|
|
111
111
|
|
|
112
112
|
**Using presets:**
|
|
113
113
|
|
|
@@ -223,29 +223,33 @@ const result = await minify(html, {
|
|
|
223
223
|
|
|
224
224
|
## Minification comparison
|
|
225
225
|
|
|
226
|
-
How does HTML Minifier Next compare to other
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
|
230
|
-
|
|
|
231
|
-
| [
|
|
232
|
-
| [Apple](https://www.apple.com/) |
|
|
233
|
-
| [BBC](https://www.bbc.co.uk/) |
|
|
234
|
-
| [
|
|
235
|
-
| [
|
|
236
|
-
| [
|
|
237
|
-
| [
|
|
238
|
-
| [
|
|
239
|
-
| [
|
|
240
|
-
| [
|
|
241
|
-
| [
|
|
242
|
-
| [
|
|
243
|
-
| [
|
|
244
|
-
| [
|
|
245
|
-
| [
|
|
246
|
-
| [
|
|
247
|
-
| [
|
|
248
|
-
| [
|
|
226
|
+
How does HTML Minifier Next compare to other minifiers, like [htmlnano](https://github.com/posthtml/htmlnano), [@swc/html](https://github.com/swc-project/swc), [minify-html](https://github.com/wilsonzlin/minify-html), [minimize](https://github.com/Swaagie/minimize), and [htmlcompressor.com](https://htmlcompressor.com/)? (All with the most aggressive settings, though without [hyper-optimization](https://meiert.com/blog/the-ways-of-writing-html/#toc-hyper-optimized).)
|
|
227
|
+
|
|
228
|
+
<!-- Auto-generated benchmarks, don’t edit -->
|
|
229
|
+
| Site | Original Size (KB) | HTML Minifier Next | htmlnano | @swc/html | minify-html | minimize | htmlcompressor.com |
|
|
230
|
+
| --- | --- | --- | --- | --- | --- | --- | --- |
|
|
231
|
+
| [A List Apart](https://alistapart.com/) | 62 | **52** | 54 | 55 | 55 | 58 | 56 |
|
|
232
|
+
| [Apple](https://www.apple.com/) | 190 | **146** | 166 | 169 | 172 | 175 | 172 |
|
|
233
|
+
| [BBC](https://www.bbc.co.uk/) | 673 | **613** | 633 | 633 | 634 | 668 | n/a |
|
|
234
|
+
| [Codeberg](https://codeberg.org/) | 33 | 29 | **27** | 30 | 30 | 30 | 30 |
|
|
235
|
+
| [CSS-Tricks](https://css-tricks.com/) | 165 | **125** | 129 | 146 | 146 | 151 | 148 |
|
|
236
|
+
| [ECMAScript](https://tc39.es/ecma262/) | 7238 | **6341** | 6561 | 6444 | 6567 | 6615 | n/a |
|
|
237
|
+
| [EFF](https://www.eff.org/) | 54 | **46** | 49 | 47 | 47 | 49 | 49 |
|
|
238
|
+
| [FAZ](https://www.faz.net/aktuell/) | 1609 | 1500 | **1431** | 1532 | 1544 | 1555 | n/a |
|
|
239
|
+
| [Frontend Dogma](https://frontenddogma.com/) | 220 | **211** | 232 | 217 | 219 | 237 | 218 |
|
|
240
|
+
| [Google](https://www.google.com/) | 18 | **17** | **17** | **17** | n/a | 18 | 18 |
|
|
241
|
+
| [Ground News](https://ground.news/) | 2358 | **2067** | 2169 | 2199 | n/a | 2345 | n/a |
|
|
242
|
+
| [HTML Living Standard](https://html.spec.whatwg.org/multipage/) | 149 | **147** | 153 | **147** | 149 | 155 | 148 |
|
|
243
|
+
| [Leanpub](https://leanpub.com/) | 1348 | **1142** | 1149 | 1148 | n/a | 1343 | n/a |
|
|
244
|
+
| [Mastodon](https://mastodon.social/explore) | 35 | **26** | 30 | 33 | 33 | 34 | 34 |
|
|
245
|
+
| [MDN](https://developer.mozilla.org/en-US/) | 107 | **62** | 64 | 64 | n/a | 67 | 67 |
|
|
246
|
+
| [Middle East Eye](https://www.middleeasteye.net/) | 224 | **197** | 204 | 202 | 202 | 204 | 205 |
|
|
247
|
+
| [SitePoint](https://www.sitepoint.com/) | 492 | **350** | 426 | 465 | 472 | 488 | n/a |
|
|
248
|
+
| [United Nations](https://www.un.org/en/) | 151 | **113** | 121 | 125 | 125 | 130 | 123 |
|
|
249
|
+
| [W3C](https://www.w3.org/) | 50 | **36** | 38 | 38 | 38 | 40 | 38 |
|
|
250
|
+
|
|
251
|
+
(Last updated: Dec 1, 2025)
|
|
252
|
+
<!-- End auto-generated -->
|
|
249
253
|
|
|
250
254
|
## Examples
|
|
251
255
|
|
package/dist/htmlminifier.cjs
CHANGED
|
@@ -113,6 +113,9 @@ function joinSingleAttrAssigns(handler) {
|
|
|
113
113
|
}).join('|');
|
|
114
114
|
}
|
|
115
115
|
|
|
116
|
+
// Number of captured parts per `customAttrSurround` pattern
|
|
117
|
+
const NCP = 7;
|
|
118
|
+
|
|
116
119
|
class HTMLParser {
|
|
117
120
|
constructor(html, handler) {
|
|
118
121
|
this.html = html;
|
|
@@ -125,7 +128,15 @@ class HTMLParser {
|
|
|
125
128
|
|
|
126
129
|
const stack = []; let lastTag;
|
|
127
130
|
const attribute = attrForHandler(handler);
|
|
128
|
-
let last, prevTag, nextTag;
|
|
131
|
+
let last, prevTag = undefined, nextTag = undefined;
|
|
132
|
+
|
|
133
|
+
// Track position for better error messages
|
|
134
|
+
let position = 0;
|
|
135
|
+
const getLineColumn = (pos) => {
|
|
136
|
+
const lines = this.html.slice(0, pos).split('\n');
|
|
137
|
+
return { line: lines.length, column: lines[lines.length - 1].length + 1 };
|
|
138
|
+
};
|
|
139
|
+
|
|
129
140
|
while (html) {
|
|
130
141
|
last = html;
|
|
131
142
|
// Make sure we’re not in a `script` or `style` element
|
|
@@ -243,8 +254,27 @@ class HTMLParser {
|
|
|
243
254
|
}
|
|
244
255
|
|
|
245
256
|
if (html === last) {
|
|
246
|
-
|
|
257
|
+
if (handler.continueOnParseError) {
|
|
258
|
+
// Skip the problematic character and continue
|
|
259
|
+
if (handler.chars) {
|
|
260
|
+
await handler.chars(html[0], prevTag, '');
|
|
261
|
+
}
|
|
262
|
+
html = html.substring(1);
|
|
263
|
+
position++;
|
|
264
|
+
prevTag = '';
|
|
265
|
+
continue;
|
|
266
|
+
}
|
|
267
|
+
const loc = getLineColumn(position);
|
|
268
|
+
// Include some context before the error position so the snippet contains
|
|
269
|
+
// the offending markup plus preceding characters (e.g. "invalid<tag").
|
|
270
|
+
const CONTEXT_BEFORE = 50;
|
|
271
|
+
const startPos = Math.max(0, position - CONTEXT_BEFORE);
|
|
272
|
+
const snippet = this.html.slice(startPos, startPos + 200).replace(/\n/g, ' ');
|
|
273
|
+
throw new Error(
|
|
274
|
+
`Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${this.html.length > startPos + 200 ? '…' : ''}`
|
|
275
|
+
);
|
|
247
276
|
}
|
|
277
|
+
position = this.html.length - html.length;
|
|
248
278
|
}
|
|
249
279
|
|
|
250
280
|
if (!handler.partialMarkup) {
|
|
@@ -261,10 +291,77 @@ class HTMLParser {
|
|
|
261
291
|
};
|
|
262
292
|
input = input.slice(start[0].length);
|
|
263
293
|
let end, attr;
|
|
264
|
-
|
|
294
|
+
|
|
295
|
+
// Safety limit: max length of input to check for attributes
|
|
296
|
+
// Protects against catastrophic backtracking on massive attribute values
|
|
297
|
+
const MAX_ATTR_PARSE_LENGTH = 20000; // 20 KB should be enough for any reasonable tag
|
|
298
|
+
|
|
299
|
+
while (true) {
|
|
300
|
+
// Check for closing tag first
|
|
301
|
+
end = input.match(startTagClose);
|
|
302
|
+
if (end) {
|
|
303
|
+
break;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
// Limit the input length we pass to the regex to prevent catastrophic backtracking
|
|
307
|
+
const isLimited = input.length > MAX_ATTR_PARSE_LENGTH;
|
|
308
|
+
const searchInput = isLimited ? input.slice(0, MAX_ATTR_PARSE_LENGTH) : input;
|
|
309
|
+
|
|
310
|
+
attr = searchInput.match(attribute);
|
|
311
|
+
|
|
312
|
+
// If we limited the input and got a match, check if the value might be truncated
|
|
313
|
+
if (attr && isLimited) {
|
|
314
|
+
// Check if the attribute value extends beyond our search window
|
|
315
|
+
const attrEnd = attr[0].length;
|
|
316
|
+
// If the match ends near the limit, the value might be truncated
|
|
317
|
+
if (attrEnd > MAX_ATTR_PARSE_LENGTH - 100) {
|
|
318
|
+
// Manually extract this attribute to handle potentially huge value
|
|
319
|
+
const manualMatch = input.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
|
|
320
|
+
if (manualMatch) {
|
|
321
|
+
const quoteChar = input[manualMatch[0].length];
|
|
322
|
+
if (quoteChar === '"' || quoteChar === "'") {
|
|
323
|
+
const closeQuote = input.indexOf(quoteChar, manualMatch[0].length + 1);
|
|
324
|
+
if (closeQuote !== -1) {
|
|
325
|
+
const fullAttr = input.slice(0, closeQuote + 1);
|
|
326
|
+
const numCustomParts = handler.customAttrSurround
|
|
327
|
+
? handler.customAttrSurround.length * NCP
|
|
328
|
+
: 0;
|
|
329
|
+
const baseIndex = 1 + numCustomParts;
|
|
330
|
+
|
|
331
|
+
attr = [];
|
|
332
|
+
attr[0] = fullAttr;
|
|
333
|
+
attr[baseIndex] = manualMatch[1]; // Attribute name
|
|
334
|
+
attr[baseIndex + 1] = '='; // customAssign (falls back to “=” for huge attributes)
|
|
335
|
+
const value = input.slice(manualMatch[0].length + 1, closeQuote);
|
|
336
|
+
// Place value at correct index based on quote type
|
|
337
|
+
if (quoteChar === '"') {
|
|
338
|
+
attr[baseIndex + 2] = value; // Double-quoted value
|
|
339
|
+
} else {
|
|
340
|
+
attr[baseIndex + 3] = value; // Single-quoted value
|
|
341
|
+
}
|
|
342
|
+
input = input.slice(fullAttr.length);
|
|
343
|
+
match.attrs.push(attr);
|
|
344
|
+
continue;
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
// Note: Unquoted attribute values are intentionally not handled here.
|
|
348
|
+
// Per HTML spec, unquoted values cannot contain spaces or special chars,
|
|
349
|
+
// making a 20 KB+ unquoted value practically impossible. If encountered,
|
|
350
|
+
// it’s malformed HTML and using the truncated regex match is acceptable.
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
if (!attr) {
|
|
356
|
+
break;
|
|
357
|
+
}
|
|
358
|
+
|
|
265
359
|
input = input.slice(attr[0].length);
|
|
266
360
|
match.attrs.push(attr);
|
|
267
361
|
}
|
|
362
|
+
|
|
363
|
+
// Check for closing tag
|
|
364
|
+
end = input.match(startTagClose);
|
|
268
365
|
if (end) {
|
|
269
366
|
match.unarySlash = end[1];
|
|
270
367
|
match.rest = input.slice(end[0].length);
|
|
@@ -357,7 +454,6 @@ class HTMLParser {
|
|
|
357
454
|
|
|
358
455
|
const attrs = match.attrs.map(function (args) {
|
|
359
456
|
let name, value, customOpen, customClose, customAssign, quote;
|
|
360
|
-
const ncp = 7; // Number of captured parts, scalar
|
|
361
457
|
|
|
362
458
|
// Hackish workaround for FF bug https://bugzilla.mozilla.org/show_bug.cgi?id=369778
|
|
363
459
|
if (IS_REGEX_CAPTURING_BROKEN && args[0].indexOf('""') === -1) {
|
|
@@ -385,7 +481,7 @@ class HTMLParser {
|
|
|
385
481
|
|
|
386
482
|
let j = 1;
|
|
387
483
|
if (handler.customAttrSurround) {
|
|
388
|
-
for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j +=
|
|
484
|
+
for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += NCP) {
|
|
389
485
|
name = args[j + 1];
|
|
390
486
|
if (name) {
|
|
391
487
|
quote = populate(j + 2);
|
|
@@ -1032,11 +1128,55 @@ async function cleanConditionalComment(comment, options) {
|
|
|
1032
1128
|
: comment;
|
|
1033
1129
|
}
|
|
1034
1130
|
|
|
1131
|
+
const jsonScriptTypes = new Set([
|
|
1132
|
+
'application/json',
|
|
1133
|
+
'application/ld+json',
|
|
1134
|
+
'application/manifest+json',
|
|
1135
|
+
'application/vnd.geo+json',
|
|
1136
|
+
'importmap',
|
|
1137
|
+
'speculationrules',
|
|
1138
|
+
]);
|
|
1139
|
+
|
|
1140
|
+
function minifyJson(text, options) {
|
|
1141
|
+
try {
|
|
1142
|
+
return JSON.stringify(JSON.parse(text));
|
|
1143
|
+
}
|
|
1144
|
+
catch (err) {
|
|
1145
|
+
if (!options.continueOnMinifyError) {
|
|
1146
|
+
throw err;
|
|
1147
|
+
}
|
|
1148
|
+
options.log && options.log(err);
|
|
1149
|
+
return text;
|
|
1150
|
+
}
|
|
1151
|
+
}
|
|
1152
|
+
|
|
1153
|
+
function hasJsonScriptType(attrs) {
|
|
1154
|
+
for (let i = 0, len = attrs.length; i < len; i++) {
|
|
1155
|
+
const attrName = attrs[i].name.toLowerCase();
|
|
1156
|
+
if (attrName === 'type') {
|
|
1157
|
+
const attrValue = trimWhitespace((attrs[i].value || '').split(/;/, 2)[0]).toLowerCase();
|
|
1158
|
+
if (jsonScriptTypes.has(attrValue)) {
|
|
1159
|
+
return true;
|
|
1160
|
+
}
|
|
1161
|
+
}
|
|
1162
|
+
}
|
|
1163
|
+
return false;
|
|
1164
|
+
}
|
|
1165
|
+
|
|
1035
1166
|
async function processScript(text, options, currentAttrs) {
|
|
1036
1167
|
for (let i = 0, len = currentAttrs.length; i < len; i++) {
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1168
|
+
const attrName = currentAttrs[i].name.toLowerCase();
|
|
1169
|
+
if (attrName === 'type') {
|
|
1170
|
+
const rawValue = currentAttrs[i].value;
|
|
1171
|
+
const normalizedValue = trimWhitespace((rawValue || '').split(/;/, 2)[0]).toLowerCase();
|
|
1172
|
+
// Minify JSON script types automatically
|
|
1173
|
+
if (jsonScriptTypes.has(normalizedValue)) {
|
|
1174
|
+
return minifyJson(text, options);
|
|
1175
|
+
}
|
|
1176
|
+
// Process custom script types if specified
|
|
1177
|
+
if (options.processScripts && options.processScripts.indexOf(rawValue) > -1) {
|
|
1178
|
+
return await minifyHTML(text, options);
|
|
1179
|
+
}
|
|
1040
1180
|
}
|
|
1041
1181
|
}
|
|
1042
1182
|
return text;
|
|
@@ -1504,8 +1644,11 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
|
|
|
1504
1644
|
currentTag = '';
|
|
1505
1645
|
},
|
|
1506
1646
|
chars: async function (text) {
|
|
1647
|
+
// Only recursively scan HTML content, not JSON-LD or other non-HTML script types
|
|
1648
|
+
// `scan()` is for analyzing HTML attribute order, not for parsing JSON
|
|
1507
1649
|
if (options.processScripts && specialContentTags.has(currentTag) &&
|
|
1508
|
-
options.processScripts.indexOf(currentType) > -1
|
|
1650
|
+
options.processScripts.indexOf(currentType) > -1 &&
|
|
1651
|
+
currentType === 'text/html') {
|
|
1509
1652
|
await scan(text);
|
|
1510
1653
|
}
|
|
1511
1654
|
}
|
|
@@ -1518,7 +1661,8 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
|
|
|
1518
1661
|
options.log = identity;
|
|
1519
1662
|
options.sortAttributes = false;
|
|
1520
1663
|
options.sortClassName = false;
|
|
1521
|
-
|
|
1664
|
+
const firstPassOutput = await minifyHTML(value, options);
|
|
1665
|
+
await scan(firstPassOutput);
|
|
1522
1666
|
options.log = log;
|
|
1523
1667
|
if (attrChains) {
|
|
1524
1668
|
const attrSorters = Object.create(null);
|
|
@@ -1916,7 +2060,7 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
1916
2060
|
text = collapseWhitespace(text, options, false, false, true);
|
|
1917
2061
|
}
|
|
1918
2062
|
}
|
|
1919
|
-
if (options.processScripts
|
|
2063
|
+
if (specialContentTags.has(currentTag) && (options.processScripts || hasJsonScriptType(currentAttrs))) {
|
|
1920
2064
|
text = await processScript(text, options, currentAttrs);
|
|
1921
2065
|
}
|
|
1922
2066
|
if (isExecutableScript(currentTag, currentAttrs)) {
|
|
@@ -39166,6 +39166,9 @@ function joinSingleAttrAssigns(handler) {
|
|
|
39166
39166
|
}).join('|');
|
|
39167
39167
|
}
|
|
39168
39168
|
|
|
39169
|
+
// Number of captured parts per `customAttrSurround` pattern
|
|
39170
|
+
const NCP = 7;
|
|
39171
|
+
|
|
39169
39172
|
class HTMLParser {
|
|
39170
39173
|
constructor(html, handler) {
|
|
39171
39174
|
this.html = html;
|
|
@@ -39178,7 +39181,15 @@ class HTMLParser {
|
|
|
39178
39181
|
|
|
39179
39182
|
const stack = []; let lastTag;
|
|
39180
39183
|
const attribute = attrForHandler(handler);
|
|
39181
|
-
let last, prevTag, nextTag;
|
|
39184
|
+
let last, prevTag = undefined, nextTag = undefined;
|
|
39185
|
+
|
|
39186
|
+
// Track position for better error messages
|
|
39187
|
+
let position = 0;
|
|
39188
|
+
const getLineColumn = (pos) => {
|
|
39189
|
+
const lines = this.html.slice(0, pos).split('\n');
|
|
39190
|
+
return { line: lines.length, column: lines[lines.length - 1].length + 1 };
|
|
39191
|
+
};
|
|
39192
|
+
|
|
39182
39193
|
while (html) {
|
|
39183
39194
|
last = html;
|
|
39184
39195
|
// Make sure we’re not in a `script` or `style` element
|
|
@@ -39296,8 +39307,27 @@ class HTMLParser {
|
|
|
39296
39307
|
}
|
|
39297
39308
|
|
|
39298
39309
|
if (html === last) {
|
|
39299
|
-
|
|
39310
|
+
if (handler.continueOnParseError) {
|
|
39311
|
+
// Skip the problematic character and continue
|
|
39312
|
+
if (handler.chars) {
|
|
39313
|
+
await handler.chars(html[0], prevTag, '');
|
|
39314
|
+
}
|
|
39315
|
+
html = html.substring(1);
|
|
39316
|
+
position++;
|
|
39317
|
+
prevTag = '';
|
|
39318
|
+
continue;
|
|
39319
|
+
}
|
|
39320
|
+
const loc = getLineColumn(position);
|
|
39321
|
+
// Include some context before the error position so the snippet contains
|
|
39322
|
+
// the offending markup plus preceding characters (e.g. "invalid<tag").
|
|
39323
|
+
const CONTEXT_BEFORE = 50;
|
|
39324
|
+
const startPos = Math.max(0, position - CONTEXT_BEFORE);
|
|
39325
|
+
const snippet = this.html.slice(startPos, startPos + 200).replace(/\n/g, ' ');
|
|
39326
|
+
throw new Error(
|
|
39327
|
+
`Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${this.html.length > startPos + 200 ? '…' : ''}`
|
|
39328
|
+
);
|
|
39300
39329
|
}
|
|
39330
|
+
position = this.html.length - html.length;
|
|
39301
39331
|
}
|
|
39302
39332
|
|
|
39303
39333
|
if (!handler.partialMarkup) {
|
|
@@ -39314,10 +39344,77 @@ class HTMLParser {
|
|
|
39314
39344
|
};
|
|
39315
39345
|
input = input.slice(start[0].length);
|
|
39316
39346
|
let end, attr;
|
|
39317
|
-
|
|
39347
|
+
|
|
39348
|
+
// Safety limit: max length of input to check for attributes
|
|
39349
|
+
// Protects against catastrophic backtracking on massive attribute values
|
|
39350
|
+
const MAX_ATTR_PARSE_LENGTH = 20000; // 20 KB should be enough for any reasonable tag
|
|
39351
|
+
|
|
39352
|
+
while (true) {
|
|
39353
|
+
// Check for closing tag first
|
|
39354
|
+
end = input.match(startTagClose);
|
|
39355
|
+
if (end) {
|
|
39356
|
+
break;
|
|
39357
|
+
}
|
|
39358
|
+
|
|
39359
|
+
// Limit the input length we pass to the regex to prevent catastrophic backtracking
|
|
39360
|
+
const isLimited = input.length > MAX_ATTR_PARSE_LENGTH;
|
|
39361
|
+
const searchInput = isLimited ? input.slice(0, MAX_ATTR_PARSE_LENGTH) : input;
|
|
39362
|
+
|
|
39363
|
+
attr = searchInput.match(attribute);
|
|
39364
|
+
|
|
39365
|
+
// If we limited the input and got a match, check if the value might be truncated
|
|
39366
|
+
if (attr && isLimited) {
|
|
39367
|
+
// Check if the attribute value extends beyond our search window
|
|
39368
|
+
const attrEnd = attr[0].length;
|
|
39369
|
+
// If the match ends near the limit, the value might be truncated
|
|
39370
|
+
if (attrEnd > MAX_ATTR_PARSE_LENGTH - 100) {
|
|
39371
|
+
// Manually extract this attribute to handle potentially huge value
|
|
39372
|
+
const manualMatch = input.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
|
|
39373
|
+
if (manualMatch) {
|
|
39374
|
+
const quoteChar = input[manualMatch[0].length];
|
|
39375
|
+
if (quoteChar === '"' || quoteChar === "'") {
|
|
39376
|
+
const closeQuote = input.indexOf(quoteChar, manualMatch[0].length + 1);
|
|
39377
|
+
if (closeQuote !== -1) {
|
|
39378
|
+
const fullAttr = input.slice(0, closeQuote + 1);
|
|
39379
|
+
const numCustomParts = handler.customAttrSurround
|
|
39380
|
+
? handler.customAttrSurround.length * NCP
|
|
39381
|
+
: 0;
|
|
39382
|
+
const baseIndex = 1 + numCustomParts;
|
|
39383
|
+
|
|
39384
|
+
attr = [];
|
|
39385
|
+
attr[0] = fullAttr;
|
|
39386
|
+
attr[baseIndex] = manualMatch[1]; // Attribute name
|
|
39387
|
+
attr[baseIndex + 1] = '='; // customAssign (falls back to “=” for huge attributes)
|
|
39388
|
+
const value = input.slice(manualMatch[0].length + 1, closeQuote);
|
|
39389
|
+
// Place value at correct index based on quote type
|
|
39390
|
+
if (quoteChar === '"') {
|
|
39391
|
+
attr[baseIndex + 2] = value; // Double-quoted value
|
|
39392
|
+
} else {
|
|
39393
|
+
attr[baseIndex + 3] = value; // Single-quoted value
|
|
39394
|
+
}
|
|
39395
|
+
input = input.slice(fullAttr.length);
|
|
39396
|
+
match.attrs.push(attr);
|
|
39397
|
+
continue;
|
|
39398
|
+
}
|
|
39399
|
+
}
|
|
39400
|
+
// Note: Unquoted attribute values are intentionally not handled here.
|
|
39401
|
+
// Per HTML spec, unquoted values cannot contain spaces or special chars,
|
|
39402
|
+
// making a 20 KB+ unquoted value practically impossible. If encountered,
|
|
39403
|
+
// it’s malformed HTML and using the truncated regex match is acceptable.
|
|
39404
|
+
}
|
|
39405
|
+
}
|
|
39406
|
+
}
|
|
39407
|
+
|
|
39408
|
+
if (!attr) {
|
|
39409
|
+
break;
|
|
39410
|
+
}
|
|
39411
|
+
|
|
39318
39412
|
input = input.slice(attr[0].length);
|
|
39319
39413
|
match.attrs.push(attr);
|
|
39320
39414
|
}
|
|
39415
|
+
|
|
39416
|
+
// Check for closing tag
|
|
39417
|
+
end = input.match(startTagClose);
|
|
39321
39418
|
if (end) {
|
|
39322
39419
|
match.unarySlash = end[1];
|
|
39323
39420
|
match.rest = input.slice(end[0].length);
|
|
@@ -39410,7 +39507,6 @@ class HTMLParser {
|
|
|
39410
39507
|
|
|
39411
39508
|
const attrs = match.attrs.map(function (args) {
|
|
39412
39509
|
let name, value, customOpen, customClose, customAssign, quote;
|
|
39413
|
-
const ncp = 7; // Number of captured parts, scalar
|
|
39414
39510
|
|
|
39415
39511
|
// Hackish workaround for FF bug https://bugzilla.mozilla.org/show_bug.cgi?id=369778
|
|
39416
39512
|
if (IS_REGEX_CAPTURING_BROKEN && args[0].indexOf('""') === -1) {
|
|
@@ -39438,7 +39534,7 @@ class HTMLParser {
|
|
|
39438
39534
|
|
|
39439
39535
|
let j = 1;
|
|
39440
39536
|
if (handler.customAttrSurround) {
|
|
39441
|
-
for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j +=
|
|
39537
|
+
for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += NCP) {
|
|
39442
39538
|
name = args[j + 1];
|
|
39443
39539
|
if (name) {
|
|
39444
39540
|
quote = populate(j + 2);
|
|
@@ -40085,11 +40181,55 @@ async function cleanConditionalComment(comment, options) {
|
|
|
40085
40181
|
: comment;
|
|
40086
40182
|
}
|
|
40087
40183
|
|
|
40184
|
+
const jsonScriptTypes = new Set([
|
|
40185
|
+
'application/json',
|
|
40186
|
+
'application/ld+json',
|
|
40187
|
+
'application/manifest+json',
|
|
40188
|
+
'application/vnd.geo+json',
|
|
40189
|
+
'importmap',
|
|
40190
|
+
'speculationrules',
|
|
40191
|
+
]);
|
|
40192
|
+
|
|
40193
|
+
function minifyJson(text, options) {
|
|
40194
|
+
try {
|
|
40195
|
+
return JSON.stringify(JSON.parse(text));
|
|
40196
|
+
}
|
|
40197
|
+
catch (err) {
|
|
40198
|
+
if (!options.continueOnMinifyError) {
|
|
40199
|
+
throw err;
|
|
40200
|
+
}
|
|
40201
|
+
options.log && options.log(err);
|
|
40202
|
+
return text;
|
|
40203
|
+
}
|
|
40204
|
+
}
|
|
40205
|
+
|
|
40206
|
+
function hasJsonScriptType(attrs) {
|
|
40207
|
+
for (let i = 0, len = attrs.length; i < len; i++) {
|
|
40208
|
+
const attrName = attrs[i].name.toLowerCase();
|
|
40209
|
+
if (attrName === 'type') {
|
|
40210
|
+
const attrValue = trimWhitespace((attrs[i].value || '').split(/;/, 2)[0]).toLowerCase();
|
|
40211
|
+
if (jsonScriptTypes.has(attrValue)) {
|
|
40212
|
+
return true;
|
|
40213
|
+
}
|
|
40214
|
+
}
|
|
40215
|
+
}
|
|
40216
|
+
return false;
|
|
40217
|
+
}
|
|
40218
|
+
|
|
40088
40219
|
async function processScript(text, options, currentAttrs) {
|
|
40089
40220
|
for (let i = 0, len = currentAttrs.length; i < len; i++) {
|
|
40090
|
-
|
|
40091
|
-
|
|
40092
|
-
|
|
40221
|
+
const attrName = currentAttrs[i].name.toLowerCase();
|
|
40222
|
+
if (attrName === 'type') {
|
|
40223
|
+
const rawValue = currentAttrs[i].value;
|
|
40224
|
+
const normalizedValue = trimWhitespace((rawValue || '').split(/;/, 2)[0]).toLowerCase();
|
|
40225
|
+
// Minify JSON script types automatically
|
|
40226
|
+
if (jsonScriptTypes.has(normalizedValue)) {
|
|
40227
|
+
return minifyJson(text, options);
|
|
40228
|
+
}
|
|
40229
|
+
// Process custom script types if specified
|
|
40230
|
+
if (options.processScripts && options.processScripts.indexOf(rawValue) > -1) {
|
|
40231
|
+
return await minifyHTML(text, options);
|
|
40232
|
+
}
|
|
40093
40233
|
}
|
|
40094
40234
|
}
|
|
40095
40235
|
return text;
|
|
@@ -40557,8 +40697,11 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
|
|
|
40557
40697
|
currentTag = '';
|
|
40558
40698
|
},
|
|
40559
40699
|
chars: async function (text) {
|
|
40700
|
+
// Only recursively scan HTML content, not JSON-LD or other non-HTML script types
|
|
40701
|
+
// `scan()` is for analyzing HTML attribute order, not for parsing JSON
|
|
40560
40702
|
if (options.processScripts && specialContentTags.has(currentTag) &&
|
|
40561
|
-
options.processScripts.indexOf(currentType) > -1
|
|
40703
|
+
options.processScripts.indexOf(currentType) > -1 &&
|
|
40704
|
+
currentType === 'text/html') {
|
|
40562
40705
|
await scan(text);
|
|
40563
40706
|
}
|
|
40564
40707
|
}
|
|
@@ -40571,7 +40714,8 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
|
|
|
40571
40714
|
options.log = identity;
|
|
40572
40715
|
options.sortAttributes = false;
|
|
40573
40716
|
options.sortClassName = false;
|
|
40574
|
-
|
|
40717
|
+
const firstPassOutput = await minifyHTML(value, options);
|
|
40718
|
+
await scan(firstPassOutput);
|
|
40575
40719
|
options.log = log;
|
|
40576
40720
|
if (attrChains) {
|
|
40577
40721
|
const attrSorters = Object.create(null);
|
|
@@ -40969,7 +41113,7 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
40969
41113
|
text = collapseWhitespace(text, options, false, false, true);
|
|
40970
41114
|
}
|
|
40971
41115
|
}
|
|
40972
|
-
if (options.processScripts
|
|
41116
|
+
if (specialContentTags.has(currentTag) && (options.processScripts || hasJsonScriptType(currentAttrs))) {
|
|
40973
41117
|
text = await processScript(text, options, currentAttrs);
|
|
40974
41118
|
}
|
|
40975
41119
|
if (isExecutableScript(currentTag, currentAttrs)) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"htmlminifier.d.ts","sourceRoot":"","sources":["../../src/htmlminifier.js"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"htmlminifier.d.ts","sourceRoot":"","sources":["../../src/htmlminifier.js"],"names":[],"mappings":"AAu/CO,8BAJI,MAAM,YACN,eAAe,GACb,OAAO,CAAC,MAAM,CAAC,CAQ3B;;;;;;;;;;;;UAUS,MAAM;YACN,MAAM;YACN,MAAM;mBACN,MAAM;iBACN,MAAM;kBACN,MAAM;;;;;;;;;;;;;4BAQN,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE,EAAE,qBAAqB,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;wBAMjG,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,EAAE,KAAK,EAAE,aAAa,EAAE,GAAG,SAAS,EAAE,iBAAiB,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,KAAK,OAAO;;;;;;;;oBAMhH,OAAO;;;;;;;;gCAOP,OAAO;;;;;;;;kCAOP,OAAO;;;;;;;;yBAOP,OAAO;;;;;;;;2BAOP,OAAO;;;;;;;;4BAOP,OAAO;;;;;;;2BAOP,OAAO;;;;;;;;uBAMP,MAAM,EAAE;;;;;;yBAOR,MAAM;;;;;;yBAKN,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE;;;;;;;4BAKlB,MAAM,EAAE;;;;;;;oCAMR,MAAM;;;;;;;qBAMN,OAAO;;;;;;;YAMP,OAAO;;;;;;;;2BAMP,MAAM,EAAE;;;;;;;;;4BAOR,MAAM,EAAE;;;;;;;+BAQR,OAAO;;;;;;;2BAMP,SAAS,CAAC,MAAM,CAAC;;;;;;uBAMjB,OAAO;;;;;;;;UAKP,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI;;;;;;;;qBAO1B,MAAM;;;;;;;oBAON,MAAM;;;;;;;;;;gBAMN,OAAO,GAAG,OAAO,CAAC,OAAO,cAAc,EAAE,gBAAgB,CAAC,OAAO,cAAc,EAAE,aAAa,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;;;eAS9J,OAAO,GAAG,OAAO,QAAQ,EAAE,aAAa,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,OAAO,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;;;iBASzG,OAAO,GAAG,MAAM,GAAG,OAAO,WAAW,EAAE,OAAO,GAAG,CAAC,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;;;;;;;;WAS7F,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM;;;;;;;+BAOxB,OAAO;;;;;;;;;;oBAMP,OAAO;;;;;;;;yBASP,OAAO;;;;;;;gCAOP,OAAO;;;;;;;;iCAMP,OAAO;;;;;;;;;;qBAOP,MAAM,EAAE;;;;;;;qBASR,IAAI,GAAG,GAAG;;;;;;;4BAMV,OAAO;;;;;;;;qBAMP,OAAO;;;;;;;;;4BAOP,OAAO,GAAG,CAAC,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC;;;;;;;;0BAQtD,OAAO;;;;;;;;yBAOP,OAAO;;;;;;;;gCAOP,OAAO;;;;;;;iCAOP,OAAO;;;;;;;oCAMP,OAAO;;;;;;;;;;0BAMP,OAAO;;;;;;;;;qBASP,OAAO,GAAG,CAAC,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE,KAAK,IAAI,CAAC;;;;;;;;;oBAQzD,OAAO,GAAG,CAAC,CAAC,KAAK,EAAE,MAAM,KAAK,MAAM,CAAC;;;;;;;;0BAQrC,OAAO;;;;;;;sBAOP,OAAO;;wBAh1DkC,cAAc;0BAAd,cAAc;+BAAd,cAAc"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"htmlparser.d.ts","sourceRoot":"","sources":["../../src/htmlparser.js"],"names":[],"mappings":"AAgDA,4BAAoE;
|
|
1
|
+
{"version":3,"file":"htmlparser.d.ts","sourceRoot":"","sources":["../../src/htmlparser.js"],"names":[],"mappings":"AAgDA,4BAAoE;AA4DpE;IACE,qCAGC;IAFC,UAAgB;IAChB,aAAsB;IAGxB,uBA6bC;CACF"}
|
package/package.json
CHANGED
package/src/htmlminifier.js
CHANGED
|
@@ -431,11 +431,55 @@ async function cleanConditionalComment(comment, options) {
|
|
|
431
431
|
: comment;
|
|
432
432
|
}
|
|
433
433
|
|
|
434
|
+
const jsonScriptTypes = new Set([
|
|
435
|
+
'application/json',
|
|
436
|
+
'application/ld+json',
|
|
437
|
+
'application/manifest+json',
|
|
438
|
+
'application/vnd.geo+json',
|
|
439
|
+
'importmap',
|
|
440
|
+
'speculationrules',
|
|
441
|
+
]);
|
|
442
|
+
|
|
443
|
+
function minifyJson(text, options) {
|
|
444
|
+
try {
|
|
445
|
+
return JSON.stringify(JSON.parse(text));
|
|
446
|
+
}
|
|
447
|
+
catch (err) {
|
|
448
|
+
if (!options.continueOnMinifyError) {
|
|
449
|
+
throw err;
|
|
450
|
+
}
|
|
451
|
+
options.log && options.log(err);
|
|
452
|
+
return text;
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
function hasJsonScriptType(attrs) {
|
|
457
|
+
for (let i = 0, len = attrs.length; i < len; i++) {
|
|
458
|
+
const attrName = attrs[i].name.toLowerCase();
|
|
459
|
+
if (attrName === 'type') {
|
|
460
|
+
const attrValue = trimWhitespace((attrs[i].value || '').split(/;/, 2)[0]).toLowerCase();
|
|
461
|
+
if (jsonScriptTypes.has(attrValue)) {
|
|
462
|
+
return true;
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
return false;
|
|
467
|
+
}
|
|
468
|
+
|
|
434
469
|
async function processScript(text, options, currentAttrs) {
|
|
435
470
|
for (let i = 0, len = currentAttrs.length; i < len; i++) {
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
471
|
+
const attrName = currentAttrs[i].name.toLowerCase();
|
|
472
|
+
if (attrName === 'type') {
|
|
473
|
+
const rawValue = currentAttrs[i].value;
|
|
474
|
+
const normalizedValue = trimWhitespace((rawValue || '').split(/;/, 2)[0]).toLowerCase();
|
|
475
|
+
// Minify JSON script types automatically
|
|
476
|
+
if (jsonScriptTypes.has(normalizedValue)) {
|
|
477
|
+
return minifyJson(text, options);
|
|
478
|
+
}
|
|
479
|
+
// Process custom script types if specified
|
|
480
|
+
if (options.processScripts && options.processScripts.indexOf(rawValue) > -1) {
|
|
481
|
+
return await minifyHTML(text, options);
|
|
482
|
+
}
|
|
439
483
|
}
|
|
440
484
|
}
|
|
441
485
|
return text;
|
|
@@ -903,8 +947,11 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
|
|
|
903
947
|
currentTag = '';
|
|
904
948
|
},
|
|
905
949
|
chars: async function (text) {
|
|
950
|
+
// Only recursively scan HTML content, not JSON-LD or other non-HTML script types
|
|
951
|
+
// `scan()` is for analyzing HTML attribute order, not for parsing JSON
|
|
906
952
|
if (options.processScripts && specialContentTags.has(currentTag) &&
|
|
907
|
-
options.processScripts.indexOf(currentType) > -1
|
|
953
|
+
options.processScripts.indexOf(currentType) > -1 &&
|
|
954
|
+
currentType === 'text/html') {
|
|
908
955
|
await scan(text);
|
|
909
956
|
}
|
|
910
957
|
}
|
|
@@ -917,7 +964,8 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
|
|
|
917
964
|
options.log = identity;
|
|
918
965
|
options.sortAttributes = false;
|
|
919
966
|
options.sortClassName = false;
|
|
920
|
-
|
|
967
|
+
const firstPassOutput = await minifyHTML(value, options);
|
|
968
|
+
await scan(firstPassOutput);
|
|
921
969
|
options.log = log;
|
|
922
970
|
if (attrChains) {
|
|
923
971
|
const attrSorters = Object.create(null);
|
|
@@ -1315,7 +1363,7 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
1315
1363
|
text = collapseWhitespace(text, options, false, false, true);
|
|
1316
1364
|
}
|
|
1317
1365
|
}
|
|
1318
|
-
if (options.processScripts
|
|
1366
|
+
if (specialContentTags.has(currentTag) && (options.processScripts || hasJsonScriptType(currentAttrs))) {
|
|
1319
1367
|
text = await processScript(text, options, currentAttrs);
|
|
1320
1368
|
}
|
|
1321
1369
|
if (isExecutableScript(currentTag, currentAttrs)) {
|
package/src/htmlparser.js
CHANGED
|
@@ -103,6 +103,9 @@ function joinSingleAttrAssigns(handler) {
|
|
|
103
103
|
}).join('|');
|
|
104
104
|
}
|
|
105
105
|
|
|
106
|
+
// Number of captured parts per `customAttrSurround` pattern
|
|
107
|
+
const NCP = 7;
|
|
108
|
+
|
|
106
109
|
export class HTMLParser {
|
|
107
110
|
constructor(html, handler) {
|
|
108
111
|
this.html = html;
|
|
@@ -115,7 +118,15 @@ export class HTMLParser {
|
|
|
115
118
|
|
|
116
119
|
const stack = []; let lastTag;
|
|
117
120
|
const attribute = attrForHandler(handler);
|
|
118
|
-
let last, prevTag, nextTag;
|
|
121
|
+
let last, prevTag = undefined, nextTag = undefined;
|
|
122
|
+
|
|
123
|
+
// Track position for better error messages
|
|
124
|
+
let position = 0;
|
|
125
|
+
const getLineColumn = (pos) => {
|
|
126
|
+
const lines = this.html.slice(0, pos).split('\n');
|
|
127
|
+
return { line: lines.length, column: lines[lines.length - 1].length + 1 };
|
|
128
|
+
};
|
|
129
|
+
|
|
119
130
|
while (html) {
|
|
120
131
|
last = html;
|
|
121
132
|
// Make sure we’re not in a `script` or `style` element
|
|
@@ -233,8 +244,27 @@ export class HTMLParser {
|
|
|
233
244
|
}
|
|
234
245
|
|
|
235
246
|
if (html === last) {
|
|
236
|
-
|
|
247
|
+
if (handler.continueOnParseError) {
|
|
248
|
+
// Skip the problematic character and continue
|
|
249
|
+
if (handler.chars) {
|
|
250
|
+
await handler.chars(html[0], prevTag, '');
|
|
251
|
+
}
|
|
252
|
+
html = html.substring(1);
|
|
253
|
+
position++;
|
|
254
|
+
prevTag = '';
|
|
255
|
+
continue;
|
|
256
|
+
}
|
|
257
|
+
const loc = getLineColumn(position);
|
|
258
|
+
// Include some context before the error position so the snippet contains
|
|
259
|
+
// the offending markup plus preceding characters (e.g. "invalid<tag").
|
|
260
|
+
const CONTEXT_BEFORE = 50;
|
|
261
|
+
const startPos = Math.max(0, position - CONTEXT_BEFORE);
|
|
262
|
+
const snippet = this.html.slice(startPos, startPos + 200).replace(/\n/g, ' ');
|
|
263
|
+
throw new Error(
|
|
264
|
+
`Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${this.html.length > startPos + 200 ? '…' : ''}`
|
|
265
|
+
);
|
|
237
266
|
}
|
|
267
|
+
position = this.html.length - html.length;
|
|
238
268
|
}
|
|
239
269
|
|
|
240
270
|
if (!handler.partialMarkup) {
|
|
@@ -251,10 +281,77 @@ export class HTMLParser {
|
|
|
251
281
|
};
|
|
252
282
|
input = input.slice(start[0].length);
|
|
253
283
|
let end, attr;
|
|
254
|
-
|
|
284
|
+
|
|
285
|
+
// Safety limit: max length of input to check for attributes
|
|
286
|
+
// Protects against catastrophic backtracking on massive attribute values
|
|
287
|
+
const MAX_ATTR_PARSE_LENGTH = 20000; // 20 KB should be enough for any reasonable tag
|
|
288
|
+
|
|
289
|
+
while (true) {
|
|
290
|
+
// Check for closing tag first
|
|
291
|
+
end = input.match(startTagClose);
|
|
292
|
+
if (end) {
|
|
293
|
+
break;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// Limit the input length we pass to the regex to prevent catastrophic backtracking
|
|
297
|
+
const isLimited = input.length > MAX_ATTR_PARSE_LENGTH;
|
|
298
|
+
const searchInput = isLimited ? input.slice(0, MAX_ATTR_PARSE_LENGTH) : input;
|
|
299
|
+
|
|
300
|
+
attr = searchInput.match(attribute);
|
|
301
|
+
|
|
302
|
+
// If we limited the input and got a match, check if the value might be truncated
|
|
303
|
+
if (attr && isLimited) {
|
|
304
|
+
// Check if the attribute value extends beyond our search window
|
|
305
|
+
const attrEnd = attr[0].length;
|
|
306
|
+
// If the match ends near the limit, the value might be truncated
|
|
307
|
+
if (attrEnd > MAX_ATTR_PARSE_LENGTH - 100) {
|
|
308
|
+
// Manually extract this attribute to handle potentially huge value
|
|
309
|
+
const manualMatch = input.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
|
|
310
|
+
if (manualMatch) {
|
|
311
|
+
const quoteChar = input[manualMatch[0].length];
|
|
312
|
+
if (quoteChar === '"' || quoteChar === "'") {
|
|
313
|
+
const closeQuote = input.indexOf(quoteChar, manualMatch[0].length + 1);
|
|
314
|
+
if (closeQuote !== -1) {
|
|
315
|
+
const fullAttr = input.slice(0, closeQuote + 1);
|
|
316
|
+
const numCustomParts = handler.customAttrSurround
|
|
317
|
+
? handler.customAttrSurround.length * NCP
|
|
318
|
+
: 0;
|
|
319
|
+
const baseIndex = 1 + numCustomParts;
|
|
320
|
+
|
|
321
|
+
attr = [];
|
|
322
|
+
attr[0] = fullAttr;
|
|
323
|
+
attr[baseIndex] = manualMatch[1]; // Attribute name
|
|
324
|
+
attr[baseIndex + 1] = '='; // customAssign (falls back to “=” for huge attributes)
|
|
325
|
+
const value = input.slice(manualMatch[0].length + 1, closeQuote);
|
|
326
|
+
// Place value at correct index based on quote type
|
|
327
|
+
if (quoteChar === '"') {
|
|
328
|
+
attr[baseIndex + 2] = value; // Double-quoted value
|
|
329
|
+
} else {
|
|
330
|
+
attr[baseIndex + 3] = value; // Single-quoted value
|
|
331
|
+
}
|
|
332
|
+
input = input.slice(fullAttr.length);
|
|
333
|
+
match.attrs.push(attr);
|
|
334
|
+
continue;
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
// Note: Unquoted attribute values are intentionally not handled here.
|
|
338
|
+
// Per HTML spec, unquoted values cannot contain spaces or special chars,
|
|
339
|
+
// making a 20 KB+ unquoted value practically impossible. If encountered,
|
|
340
|
+
// it’s malformed HTML and using the truncated regex match is acceptable.
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
if (!attr) {
|
|
346
|
+
break;
|
|
347
|
+
}
|
|
348
|
+
|
|
255
349
|
input = input.slice(attr[0].length);
|
|
256
350
|
match.attrs.push(attr);
|
|
257
351
|
}
|
|
352
|
+
|
|
353
|
+
// Check for closing tag
|
|
354
|
+
end = input.match(startTagClose);
|
|
258
355
|
if (end) {
|
|
259
356
|
match.unarySlash = end[1];
|
|
260
357
|
match.rest = input.slice(end[0].length);
|
|
@@ -347,7 +444,6 @@ export class HTMLParser {
|
|
|
347
444
|
|
|
348
445
|
const attrs = match.attrs.map(function (args) {
|
|
349
446
|
let name, value, customOpen, customClose, customAssign, quote;
|
|
350
|
-
const ncp = 7; // Number of captured parts, scalar
|
|
351
447
|
|
|
352
448
|
// Hackish workaround for FF bug https://bugzilla.mozilla.org/show_bug.cgi?id=369778
|
|
353
449
|
if (IS_REGEX_CAPTURING_BROKEN && args[0].indexOf('""') === -1) {
|
|
@@ -375,7 +471,7 @@ export class HTMLParser {
|
|
|
375
471
|
|
|
376
472
|
let j = 1;
|
|
377
473
|
if (handler.customAttrSurround) {
|
|
378
|
-
for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j +=
|
|
474
|
+
for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += NCP) {
|
|
379
475
|
name = args[j + 1];
|
|
380
476
|
if (name) {
|
|
381
477
|
quote = populate(j + 2);
|
package/src/utils.js
CHANGED