html-minifier-next 4.6.0 → 4.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -31
- package/cli.js +1 -1
- package/dist/htmlminifier.cjs +253 -30
- package/dist/htmlminifier.esm.bundle.js +253 -30
- package/dist/types/htmlminifier.d.ts.map +1 -1
- package/dist/types/htmlparser.d.ts.map +1 -1
- package/package.json +2 -2
- package/src/htmlminifier.js +152 -25
- package/src/htmlparser.js +101 -5
- package/src/utils.js +1 -1
package/src/htmlminifier.js
CHANGED
|
@@ -7,18 +7,88 @@ import TokenChain from './tokenchain.js';
|
|
|
7
7
|
import { replaceAsync } from './utils.js';
|
|
8
8
|
import { presets, getPreset, getPresetNames } from './presets.js';
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
// Hoisted, reusable RegExp patterns and tiny helpers to avoid repeated allocations in hot paths
|
|
11
|
+
const RE_WS_START = /^[ \n\r\t\f]+/;
|
|
12
|
+
const RE_WS_END = /[ \n\r\t\f]+$/;
|
|
13
|
+
const RE_ALL_WS_NBSP = /[ \n\r\t\f\xA0]+/g;
|
|
14
|
+
const RE_NBSP_LEADING_GROUP = /(^|\xA0+)[^\xA0]+/g;
|
|
15
|
+
const RE_NBSP_LEAD_GROUP = /(\xA0+)[^\xA0]+/g;
|
|
16
|
+
const RE_NBSP_TRAILING_GROUP = /[^\xA0]+(\xA0+)/g;
|
|
17
|
+
const RE_NBSP_TRAILING_STRIP = /[^\xA0]+$/;
|
|
18
|
+
const RE_CONDITIONAL_COMMENT = /^\[if\s[^\]]+]|\[endif]$/;
|
|
19
|
+
const RE_EVENT_ATTR_DEFAULT = /^on[a-z]{3,}$/;
|
|
20
|
+
const RE_CAN_REMOVE_ATTR_QUOTES = /^[^ \t\n\f\r"'`=<>]+$/;
|
|
21
|
+
const RE_TRAILING_SEMICOLON = /;$/;
|
|
22
|
+
const RE_AMP_ENTITY = /&(#?[0-9a-zA-Z]+;)/g;
|
|
23
|
+
|
|
24
|
+
// Tiny stable stringify for options signatures (sorted keys, shallow, nested objects)
|
|
25
|
+
function stableStringify(obj) {
|
|
26
|
+
if (obj == null || typeof obj !== 'object') return JSON.stringify(obj);
|
|
27
|
+
if (Array.isArray(obj)) return '[' + obj.map(stableStringify).join(',') + ']';
|
|
28
|
+
const keys = Object.keys(obj).sort();
|
|
29
|
+
let out = '{';
|
|
30
|
+
for (let i = 0; i < keys.length; i++) {
|
|
31
|
+
const k = keys[i];
|
|
32
|
+
out += JSON.stringify(k) + ':' + stableStringify(obj[k]) + (i < keys.length - 1 ? ',' : '');
|
|
33
|
+
}
|
|
34
|
+
return out + '}';
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Minimal LRU cache for strings and promises
|
|
38
|
+
class LRU {
|
|
39
|
+
constructor(limit = 200) {
|
|
40
|
+
this.limit = limit;
|
|
41
|
+
this.map = new Map();
|
|
42
|
+
}
|
|
43
|
+
get(key) {
|
|
44
|
+
const v = this.map.get(key);
|
|
45
|
+
if (v !== undefined) {
|
|
46
|
+
this.map.delete(key);
|
|
47
|
+
this.map.set(key, v);
|
|
48
|
+
}
|
|
49
|
+
return v;
|
|
50
|
+
}
|
|
51
|
+
set(key, value) {
|
|
52
|
+
if (this.map.has(key)) this.map.delete(key);
|
|
53
|
+
this.map.set(key, value);
|
|
54
|
+
if (this.map.size > this.limit) {
|
|
55
|
+
const first = this.map.keys().next().value;
|
|
56
|
+
this.map.delete(first);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
delete(key) { this.map.delete(key); }
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Per-process caches
|
|
63
|
+
const jsMinifyCache = new LRU(200);
|
|
64
|
+
const cssMinifyCache = new LRU(200);
|
|
65
|
+
|
|
66
|
+
const trimWhitespace = str => {
|
|
67
|
+
if (!str) return str;
|
|
68
|
+
// Fast path: if no whitespace at start or end, return early
|
|
69
|
+
if (!/^[ \n\r\t\f]/.test(str) && !/[ \n\r\t\f]$/.test(str)) {
|
|
70
|
+
return str;
|
|
71
|
+
}
|
|
72
|
+
return str.replace(RE_WS_START, '').replace(RE_WS_END, '');
|
|
73
|
+
};
|
|
11
74
|
|
|
12
75
|
function collapseWhitespaceAll(str) {
|
|
76
|
+
if (!str) return str;
|
|
77
|
+
// Fast path: if there are no common whitespace characters, return early
|
|
78
|
+
if (!/[ \n\r\t\f\xA0]/.test(str)) {
|
|
79
|
+
return str;
|
|
80
|
+
}
|
|
13
81
|
// Non-breaking space is specifically handled inside the replacer function here:
|
|
14
|
-
return str
|
|
15
|
-
return spaces === '\t' ? '\t' : spaces.replace(
|
|
82
|
+
return str.replace(RE_ALL_WS_NBSP, function (spaces) {
|
|
83
|
+
return spaces === '\t' ? '\t' : spaces.replace(RE_NBSP_LEADING_GROUP, '$1 ');
|
|
16
84
|
});
|
|
17
85
|
}
|
|
18
86
|
|
|
19
87
|
function collapseWhitespace(str, options, trimLeft, trimRight, collapseAll) {
|
|
20
88
|
let lineBreakBefore = ''; let lineBreakAfter = '';
|
|
21
89
|
|
|
90
|
+
if (!str) return str;
|
|
91
|
+
|
|
22
92
|
if (options.preserveLineBreaks) {
|
|
23
93
|
str = str.replace(/^[ \n\r\t\f]*?[\n\r][ \n\r\t\f]*/, function () {
|
|
24
94
|
lineBreakBefore = '\n';
|
|
@@ -36,7 +106,7 @@ function collapseWhitespace(str, options, trimLeft, trimRight, collapseAll) {
|
|
|
36
106
|
if (conservative && spaces === '\t') {
|
|
37
107
|
return '\t';
|
|
38
108
|
}
|
|
39
|
-
return spaces.replace(/^[^\xA0]+/, '').replace(
|
|
109
|
+
return spaces.replace(/^[^\xA0]+/, '').replace(RE_NBSP_LEAD_GROUP, '$1 ') || (conservative ? ' ' : '');
|
|
40
110
|
});
|
|
41
111
|
}
|
|
42
112
|
|
|
@@ -47,7 +117,7 @@ function collapseWhitespace(str, options, trimLeft, trimRight, collapseAll) {
|
|
|
47
117
|
if (conservative && spaces === '\t') {
|
|
48
118
|
return '\t';
|
|
49
119
|
}
|
|
50
|
-
return spaces.replace(
|
|
120
|
+
return spaces.replace(RE_NBSP_TRAILING_GROUP, ' $1').replace(RE_NBSP_TRAILING_STRIP, '') || (conservative ? ' ' : '');
|
|
51
121
|
});
|
|
52
122
|
}
|
|
53
123
|
|
|
@@ -79,7 +149,7 @@ function collapseWhitespaceSmart(str, prevTag, nextTag, options, inlineElements,
|
|
|
79
149
|
}
|
|
80
150
|
|
|
81
151
|
function isConditionalComment(text) {
|
|
82
|
-
return
|
|
152
|
+
return RE_CONDITIONAL_COMMENT.test(text);
|
|
83
153
|
}
|
|
84
154
|
|
|
85
155
|
function isIgnoredComment(text, options) {
|
|
@@ -101,12 +171,12 @@ function isEventAttribute(attrName, options) {
|
|
|
101
171
|
}
|
|
102
172
|
return false;
|
|
103
173
|
}
|
|
104
|
-
return
|
|
174
|
+
return RE_EVENT_ATTR_DEFAULT.test(attrName);
|
|
105
175
|
}
|
|
106
176
|
|
|
107
177
|
function canRemoveAttributeQuotes(value) {
|
|
108
178
|
// https://mathiasbynens.be/notes/unquoted-attribute-values
|
|
109
|
-
return
|
|
179
|
+
return RE_CAN_REMOVE_ATTR_QUOTES.test(value);
|
|
110
180
|
}
|
|
111
181
|
|
|
112
182
|
function attributesInclude(attributes, attribute) {
|
|
@@ -317,7 +387,7 @@ async function cleanAttributeValue(tag, attrName, attrValue, options, attrs, min
|
|
|
317
387
|
} else if (attrName === 'style') {
|
|
318
388
|
attrValue = trimWhitespace(attrValue);
|
|
319
389
|
if (attrValue) {
|
|
320
|
-
if (
|
|
390
|
+
if (attrValue.endsWith(';') && !/&#?[0-9a-zA-Z]+;$/.test(attrValue)) {
|
|
321
391
|
attrValue = attrValue.replace(/\s*;$/, ';');
|
|
322
392
|
}
|
|
323
393
|
attrValue = await options.minifyCSS(attrValue, 'inline');
|
|
@@ -636,7 +706,10 @@ async function normalizeAttr(attr, attrs, tag, options) {
|
|
|
636
706
|
let attrValue = attr.value;
|
|
637
707
|
|
|
638
708
|
if (options.decodeEntities && attrValue) {
|
|
639
|
-
|
|
709
|
+
// Fast path: only decode when entities are present
|
|
710
|
+
if (attrValue.indexOf('&') !== -1) {
|
|
711
|
+
attrValue = decodeHTMLStrict(attrValue);
|
|
712
|
+
}
|
|
640
713
|
}
|
|
641
714
|
|
|
642
715
|
if ((options.removeRedundantAttributes &&
|
|
@@ -657,8 +730,8 @@ async function normalizeAttr(attr, attrs, tag, options) {
|
|
|
657
730
|
return;
|
|
658
731
|
}
|
|
659
732
|
|
|
660
|
-
if (options.decodeEntities && attrValue) {
|
|
661
|
-
attrValue = attrValue.replace(
|
|
733
|
+
if (options.decodeEntities && attrValue && attrValue.indexOf('&') !== -1) {
|
|
734
|
+
attrValue = attrValue.replace(RE_AMP_ENTITY, '&$1');
|
|
662
735
|
}
|
|
663
736
|
|
|
664
737
|
return {
|
|
@@ -778,6 +851,10 @@ const processOptions = (inputOptions) => {
|
|
|
778
851
|
const lightningCssOptions = typeof option === 'object' ? option : {};
|
|
779
852
|
|
|
780
853
|
options.minifyCSS = async function (text, type) {
|
|
854
|
+
// Fast path: nothing to minify
|
|
855
|
+
if (!text || !text.trim()) {
|
|
856
|
+
return text;
|
|
857
|
+
}
|
|
781
858
|
text = await replaceAsync(
|
|
782
859
|
text,
|
|
783
860
|
/(url\s*\(\s*)(?:"([^"]*)"|'([^']*)'|([^\s)]+))(\s*\))/ig,
|
|
@@ -796,10 +873,20 @@ const processOptions = (inputOptions) => {
|
|
|
796
873
|
}
|
|
797
874
|
}
|
|
798
875
|
);
|
|
799
|
-
|
|
876
|
+
// Cache key: wrapped content, type, options signature
|
|
800
877
|
const inputCSS = wrapCSS(text, type);
|
|
878
|
+
const cssSig = stableStringify({ type, opts: lightningCssOptions, cont: !!options.continueOnMinifyError });
|
|
879
|
+
// For large inputs, use length and content fingerprint (first/last 50 chars) to prevent collisions
|
|
880
|
+
const cssKey = inputCSS.length > 2048
|
|
881
|
+
? (inputCSS.length + '|' + inputCSS.slice(0, 50) + inputCSS.slice(-50) + '|' + type + '|' + cssSig)
|
|
882
|
+
: (inputCSS + '|' + type + '|' + cssSig);
|
|
801
883
|
|
|
802
884
|
try {
|
|
885
|
+
const cached = cssMinifyCache.get(cssKey);
|
|
886
|
+
if (cached) {
|
|
887
|
+
return cached;
|
|
888
|
+
}
|
|
889
|
+
|
|
803
890
|
const result = transformCSS({
|
|
804
891
|
filename: 'input.css',
|
|
805
892
|
code: Buffer.from(inputCSS),
|
|
@@ -822,12 +909,12 @@ const processOptions = (inputOptions) => {
|
|
|
822
909
|
|
|
823
910
|
// Preserve if output is empty and input had template syntax or UIDs
|
|
824
911
|
// This catches cases where Lightning CSS removed content that should be preserved
|
|
825
|
-
|
|
826
|
-
return text;
|
|
827
|
-
}
|
|
912
|
+
const finalOutput = (text.trim() && !outputCSS.trim() && (looksLikeTemplate || hasUID)) ? text : outputCSS;
|
|
828
913
|
|
|
829
|
-
|
|
914
|
+
cssMinifyCache.set(cssKey, finalOutput);
|
|
915
|
+
return finalOutput;
|
|
830
916
|
} catch (err) {
|
|
917
|
+
cssMinifyCache.delete(cssKey);
|
|
831
918
|
if (!options.continueOnMinifyError) {
|
|
832
919
|
throw err;
|
|
833
920
|
}
|
|
@@ -853,10 +940,39 @@ const processOptions = (inputOptions) => {
|
|
|
853
940
|
|
|
854
941
|
terserOptions.parse.bare_returns = inline;
|
|
855
942
|
|
|
943
|
+
let jsKey;
|
|
856
944
|
try {
|
|
857
|
-
|
|
858
|
-
|
|
945
|
+
// Fast path: avoid invoking Terser for empty/whitespace-only content
|
|
946
|
+
if (!code || !code.trim()) {
|
|
947
|
+
return '';
|
|
948
|
+
}
|
|
949
|
+
// Cache key: content, inline, options signature (subset)
|
|
950
|
+
const terserSig = stableStringify({
|
|
951
|
+
compress: terserOptions.compress,
|
|
952
|
+
mangle: terserOptions.mangle,
|
|
953
|
+
ecma: terserOptions.ecma,
|
|
954
|
+
toplevel: terserOptions.toplevel,
|
|
955
|
+
module: terserOptions.module,
|
|
956
|
+
keep_fnames: terserOptions.keep_fnames,
|
|
957
|
+
format: terserOptions.format,
|
|
958
|
+
cont: !!options.continueOnMinifyError,
|
|
959
|
+
});
|
|
960
|
+
// For large inputs, use length and content fingerprint (first/last 50 chars) to prevent collisions
|
|
961
|
+
jsKey = (code.length > 2048 ? (code.length + '|' + code.slice(0, 50) + code.slice(-50) + '|') : (code + '|')) + (inline ? '1' : '0') + '|' + terserSig;
|
|
962
|
+
const cached = jsMinifyCache.get(jsKey);
|
|
963
|
+
if (cached) {
|
|
964
|
+
return await cached;
|
|
965
|
+
}
|
|
966
|
+
const inFlight = (async () => {
|
|
967
|
+
const result = await terser(code, terserOptions);
|
|
968
|
+
return result.code.replace(RE_TRAILING_SEMICOLON, '');
|
|
969
|
+
})();
|
|
970
|
+
jsMinifyCache.set(jsKey, inFlight);
|
|
971
|
+
const resolved = await inFlight;
|
|
972
|
+
jsMinifyCache.set(jsKey, resolved);
|
|
973
|
+
return resolved;
|
|
859
974
|
} catch (err) {
|
|
975
|
+
if (jsKey) jsMinifyCache.delete(jsKey);
|
|
860
976
|
if (!options.continueOnMinifyError) {
|
|
861
977
|
throw err;
|
|
862
978
|
}
|
|
@@ -947,8 +1063,11 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
|
|
|
947
1063
|
currentTag = '';
|
|
948
1064
|
},
|
|
949
1065
|
chars: async function (text) {
|
|
1066
|
+
// Only recursively scan HTML content, not JSON-LD or other non-HTML script types
|
|
1067
|
+
// `scan()` is for analyzing HTML attribute order, not for parsing JSON
|
|
950
1068
|
if (options.processScripts && specialContentTags.has(currentTag) &&
|
|
951
|
-
options.processScripts.indexOf(currentType) > -1
|
|
1069
|
+
options.processScripts.indexOf(currentType) > -1 &&
|
|
1070
|
+
currentType === 'text/html') {
|
|
952
1071
|
await scan(text);
|
|
953
1072
|
}
|
|
954
1073
|
}
|
|
@@ -961,7 +1080,8 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
|
|
|
961
1080
|
options.log = identity;
|
|
962
1081
|
options.sortAttributes = false;
|
|
963
1082
|
options.sortClassName = false;
|
|
964
|
-
|
|
1083
|
+
const firstPassOutput = await minifyHTML(value, options);
|
|
1084
|
+
await scan(firstPassOutput);
|
|
965
1085
|
options.log = log;
|
|
966
1086
|
if (attrChains) {
|
|
967
1087
|
const attrSorters = Object.create(null);
|
|
@@ -1314,7 +1434,9 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
1314
1434
|
prevTag = prevTag === '' ? 'comment' : prevTag;
|
|
1315
1435
|
nextTag = nextTag === '' ? 'comment' : nextTag;
|
|
1316
1436
|
if (options.decodeEntities && text && !specialContentTags.has(currentTag)) {
|
|
1317
|
-
|
|
1437
|
+
if (text.indexOf('&') !== -1) {
|
|
1438
|
+
text = decodeHTML(text);
|
|
1439
|
+
}
|
|
1318
1440
|
}
|
|
1319
1441
|
if (options.collapseWhitespace) {
|
|
1320
1442
|
if (!stackNoTrimWhitespace.length) {
|
|
@@ -1388,11 +1510,16 @@ async function minifyHTML(value, options, partialMarkup) {
|
|
|
1388
1510
|
charsPrevTag = /^\s*$/.test(text) ? prevTag : 'comment';
|
|
1389
1511
|
if (options.decodeEntities && text && !specialContentTags.has(currentTag)) {
|
|
1390
1512
|
// Escape any `&` symbols that start either:
|
|
1391
|
-
// 1) a legacy named character reference (i.e
|
|
1392
|
-
// 2) or any other character reference (i.e
|
|
1513
|
+
// 1) a legacy named character reference (i.e., one that doesn’t end with `;`)
|
|
1514
|
+
// 2) or any other character reference (i.e., one that does end with `;`)
|
|
1393
1515
|
// Note that `&` can be escaped as `&`, without the semi-colon.
|
|
1394
1516
|
// https://mathiasbynens.be/notes/ambiguous-ampersands
|
|
1395
|
-
|
|
1517
|
+
if (text.indexOf('&') !== -1) {
|
|
1518
|
+
text = text.replace(/&((?:Iacute|aacute|uacute|plusmn|Otilde|otilde|agrave|Agrave|Yacute|yacute|Oslash|oslash|atilde|Atilde|brvbar|ccedil|Ccedil|Ograve|curren|divide|eacute|Eacute|ograve|Oacute|egrave|Egrave|Ugrave|frac12|frac14|frac34|ugrave|oacute|iacute|Ntilde|ntilde|Uacute|middot|igrave|Igrave|iquest|Aacute|cedil|laquo|micro|iexcl|Icirc|icirc|acirc|Ucirc|Ecirc|ocirc|Ocirc|ecirc|ucirc|Aring|aring|AElig|aelig|acute|pound|raquo|Acirc|times|THORN|szlig|thorn|COPY|auml|ordf|ordm|Uuml|macr|uuml|Auml|ouml|Ouml|para|nbsp|euml|quot|QUOT|Euml|yuml|cent|sect|copy|sup1|sup2|sup3|iuml|Iuml|ETH|shy|reg|not|yen|amp|AMP|REG|uml|eth|deg|gt|GT|LT|lt)(?!;)|(?:#?[0-9a-zA-Z]+;))/g, '&$1');
|
|
1519
|
+
}
|
|
1520
|
+
if (text.indexOf('<') !== -1) {
|
|
1521
|
+
text = text.replace(/</g, '<');
|
|
1522
|
+
}
|
|
1396
1523
|
}
|
|
1397
1524
|
if (uidPattern && options.collapseWhitespace && stackNoTrimWhitespace.length) {
|
|
1398
1525
|
text = text.replace(uidPattern, function (match, prefix, index) {
|
package/src/htmlparser.js
CHANGED
|
@@ -103,6 +103,9 @@ function joinSingleAttrAssigns(handler) {
|
|
|
103
103
|
}).join('|');
|
|
104
104
|
}
|
|
105
105
|
|
|
106
|
+
// Number of captured parts per `customAttrSurround` pattern
|
|
107
|
+
const NCP = 7;
|
|
108
|
+
|
|
106
109
|
export class HTMLParser {
|
|
107
110
|
constructor(html, handler) {
|
|
108
111
|
this.html = html;
|
|
@@ -115,7 +118,15 @@ export class HTMLParser {
|
|
|
115
118
|
|
|
116
119
|
const stack = []; let lastTag;
|
|
117
120
|
const attribute = attrForHandler(handler);
|
|
118
|
-
let last, prevTag, nextTag;
|
|
121
|
+
let last, prevTag = undefined, nextTag = undefined;
|
|
122
|
+
|
|
123
|
+
// Track position for better error messages
|
|
124
|
+
let position = 0;
|
|
125
|
+
const getLineColumn = (pos) => {
|
|
126
|
+
const lines = this.html.slice(0, pos).split('\n');
|
|
127
|
+
return { line: lines.length, column: lines[lines.length - 1].length + 1 };
|
|
128
|
+
};
|
|
129
|
+
|
|
119
130
|
while (html) {
|
|
120
131
|
last = html;
|
|
121
132
|
// Make sure we’re not in a `script` or `style` element
|
|
@@ -233,8 +244,27 @@ export class HTMLParser {
|
|
|
233
244
|
}
|
|
234
245
|
|
|
235
246
|
if (html === last) {
|
|
236
|
-
|
|
247
|
+
if (handler.continueOnParseError) {
|
|
248
|
+
// Skip the problematic character and continue
|
|
249
|
+
if (handler.chars) {
|
|
250
|
+
await handler.chars(html[0], prevTag, '');
|
|
251
|
+
}
|
|
252
|
+
html = html.substring(1);
|
|
253
|
+
position++;
|
|
254
|
+
prevTag = '';
|
|
255
|
+
continue;
|
|
256
|
+
}
|
|
257
|
+
const loc = getLineColumn(position);
|
|
258
|
+
// Include some context before the error position so the snippet contains
|
|
259
|
+
// the offending markup plus preceding characters (e.g. "invalid<tag").
|
|
260
|
+
const CONTEXT_BEFORE = 50;
|
|
261
|
+
const startPos = Math.max(0, position - CONTEXT_BEFORE);
|
|
262
|
+
const snippet = this.html.slice(startPos, startPos + 200).replace(/\n/g, ' ');
|
|
263
|
+
throw new Error(
|
|
264
|
+
`Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${this.html.length > startPos + 200 ? '…' : ''}`
|
|
265
|
+
);
|
|
237
266
|
}
|
|
267
|
+
position = this.html.length - html.length;
|
|
238
268
|
}
|
|
239
269
|
|
|
240
270
|
if (!handler.partialMarkup) {
|
|
@@ -251,10 +281,77 @@ export class HTMLParser {
|
|
|
251
281
|
};
|
|
252
282
|
input = input.slice(start[0].length);
|
|
253
283
|
let end, attr;
|
|
254
|
-
|
|
284
|
+
|
|
285
|
+
// Safety limit: max length of input to check for attributes
|
|
286
|
+
// Protects against catastrophic backtracking on massive attribute values
|
|
287
|
+
const MAX_ATTR_PARSE_LENGTH = 20000; // 20 KB should be enough for any reasonable tag
|
|
288
|
+
|
|
289
|
+
while (true) {
|
|
290
|
+
// Check for closing tag first
|
|
291
|
+
end = input.match(startTagClose);
|
|
292
|
+
if (end) {
|
|
293
|
+
break;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// Limit the input length we pass to the regex to prevent catastrophic backtracking
|
|
297
|
+
const isLimited = input.length > MAX_ATTR_PARSE_LENGTH;
|
|
298
|
+
const searchInput = isLimited ? input.slice(0, MAX_ATTR_PARSE_LENGTH) : input;
|
|
299
|
+
|
|
300
|
+
attr = searchInput.match(attribute);
|
|
301
|
+
|
|
302
|
+
// If we limited the input and got a match, check if the value might be truncated
|
|
303
|
+
if (attr && isLimited) {
|
|
304
|
+
// Check if the attribute value extends beyond our search window
|
|
305
|
+
const attrEnd = attr[0].length;
|
|
306
|
+
// If the match ends near the limit, the value might be truncated
|
|
307
|
+
if (attrEnd > MAX_ATTR_PARSE_LENGTH - 100) {
|
|
308
|
+
// Manually extract this attribute to handle potentially huge value
|
|
309
|
+
const manualMatch = input.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
|
|
310
|
+
if (manualMatch) {
|
|
311
|
+
const quoteChar = input[manualMatch[0].length];
|
|
312
|
+
if (quoteChar === '"' || quoteChar === "'") {
|
|
313
|
+
const closeQuote = input.indexOf(quoteChar, manualMatch[0].length + 1);
|
|
314
|
+
if (closeQuote !== -1) {
|
|
315
|
+
const fullAttr = input.slice(0, closeQuote + 1);
|
|
316
|
+
const numCustomParts = handler.customAttrSurround
|
|
317
|
+
? handler.customAttrSurround.length * NCP
|
|
318
|
+
: 0;
|
|
319
|
+
const baseIndex = 1 + numCustomParts;
|
|
320
|
+
|
|
321
|
+
attr = [];
|
|
322
|
+
attr[0] = fullAttr;
|
|
323
|
+
attr[baseIndex] = manualMatch[1]; // Attribute name
|
|
324
|
+
attr[baseIndex + 1] = '='; // customAssign (falls back to “=” for huge attributes)
|
|
325
|
+
const value = input.slice(manualMatch[0].length + 1, closeQuote);
|
|
326
|
+
// Place value at correct index based on quote type
|
|
327
|
+
if (quoteChar === '"') {
|
|
328
|
+
attr[baseIndex + 2] = value; // Double-quoted value
|
|
329
|
+
} else {
|
|
330
|
+
attr[baseIndex + 3] = value; // Single-quoted value
|
|
331
|
+
}
|
|
332
|
+
input = input.slice(fullAttr.length);
|
|
333
|
+
match.attrs.push(attr);
|
|
334
|
+
continue;
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
// Note: Unquoted attribute values are intentionally not handled here.
|
|
338
|
+
// Per HTML spec, unquoted values cannot contain spaces or special chars,
|
|
339
|
+
// making a 20 KB+ unquoted value practically impossible. If encountered,
|
|
340
|
+
// it’s malformed HTML and using the truncated regex match is acceptable.
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
if (!attr) {
|
|
346
|
+
break;
|
|
347
|
+
}
|
|
348
|
+
|
|
255
349
|
input = input.slice(attr[0].length);
|
|
256
350
|
match.attrs.push(attr);
|
|
257
351
|
}
|
|
352
|
+
|
|
353
|
+
// Check for closing tag
|
|
354
|
+
end = input.match(startTagClose);
|
|
258
355
|
if (end) {
|
|
259
356
|
match.unarySlash = end[1];
|
|
260
357
|
match.rest = input.slice(end[0].length);
|
|
@@ -347,7 +444,6 @@ export class HTMLParser {
|
|
|
347
444
|
|
|
348
445
|
const attrs = match.attrs.map(function (args) {
|
|
349
446
|
let name, value, customOpen, customClose, customAssign, quote;
|
|
350
|
-
const ncp = 7; // Number of captured parts, scalar
|
|
351
447
|
|
|
352
448
|
// Hackish workaround for FF bug https://bugzilla.mozilla.org/show_bug.cgi?id=369778
|
|
353
449
|
if (IS_REGEX_CAPTURING_BROKEN && args[0].indexOf('""') === -1) {
|
|
@@ -375,7 +471,7 @@ export class HTMLParser {
|
|
|
375
471
|
|
|
376
472
|
let j = 1;
|
|
377
473
|
if (handler.customAttrSurround) {
|
|
378
|
-
for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j +=
|
|
474
|
+
for (let i = 0, l = handler.customAttrSurround.length; i < l; i++, j += NCP) {
|
|
379
475
|
name = args[j + 1];
|
|
380
476
|
if (name) {
|
|
381
477
|
quote = populate(j + 2);
|
package/src/utils.js
CHANGED