html-minifier-next 4.9.2 → 4.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -19
- package/cli.js +3 -2
- package/dist/htmlminifier.cjs +166 -75
- package/dist/htmlminifier.esm.bundle.js +166 -75
- package/dist/types/htmlminifier.d.ts +9 -0
- package/dist/types/htmlminifier.d.ts.map +1 -1
- package/dist/types/htmlparser.d.ts.map +1 -1
- package/dist/types/presets.d.ts.map +1 -1
- package/dist/types/tokenchain.d.ts +1 -0
- package/dist/types/tokenchain.d.ts.map +1 -1
- package/package.json +4 -4
- package/src/htmlminifier.js +14 -0
- package/src/htmlparser.js +75 -42
- package/src/presets.js +1 -0
- package/src/tokenchain.js +77 -34
package/src/htmlparser.js
CHANGED
|
@@ -82,6 +82,9 @@ const preCompiledStackedTags = {
|
|
|
82
82
|
'noscript': /([\s\S]*?)<\/noscript[^>]*>/i
|
|
83
83
|
};
|
|
84
84
|
|
|
85
|
+
// Cache for compiled attribute regexes per handler configuration
|
|
86
|
+
const attrRegexCache = new WeakMap();
|
|
87
|
+
|
|
85
88
|
function attrForHandler(handler) {
|
|
86
89
|
let pattern = singleAttrIdentifier.source +
|
|
87
90
|
'(?:\\s*(' + joinSingleAttrAssigns(handler) + ')' +
|
|
@@ -119,22 +122,47 @@ export class HTMLParser {
|
|
|
119
122
|
}
|
|
120
123
|
|
|
121
124
|
async parse() {
|
|
122
|
-
let html = this.html;
|
|
123
125
|
const handler = this.handler;
|
|
126
|
+
const fullHtml = this.html;
|
|
127
|
+
const fullLength = fullHtml.length;
|
|
124
128
|
|
|
125
129
|
const stack = []; let lastTag;
|
|
126
|
-
|
|
127
|
-
let
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
130
|
+
// Use cached attribute regex if available
|
|
131
|
+
let attribute = attrRegexCache.get(handler);
|
|
132
|
+
if (!attribute) {
|
|
133
|
+
attribute = attrForHandler(handler);
|
|
134
|
+
attrRegexCache.set(handler, attribute);
|
|
135
|
+
}
|
|
136
|
+
let prevTag = undefined, nextTag = undefined;
|
|
137
|
+
|
|
138
|
+
// Index-based parsing
|
|
139
|
+
let pos = 0;
|
|
140
|
+
let lastPos;
|
|
141
|
+
|
|
142
|
+
// Helper to get remaining HTML from current position
|
|
143
|
+
const remaining = () => fullHtml.slice(pos);
|
|
144
|
+
|
|
145
|
+
// Helper to advance position
|
|
146
|
+
const advance = (n) => { pos += n; };
|
|
147
|
+
|
|
148
|
+
// Lazy line/column calculation—only compute on actual errors
|
|
149
|
+
const getLineColumn = (position) => {
|
|
150
|
+
let line = 1;
|
|
151
|
+
let column = 1;
|
|
152
|
+
for (let i = 0; i < position; i++) {
|
|
153
|
+
if (fullHtml[i] === '\n') {
|
|
154
|
+
line++;
|
|
155
|
+
column = 1;
|
|
156
|
+
} else {
|
|
157
|
+
column++;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
return { line, column };
|
|
134
161
|
};
|
|
135
162
|
|
|
136
|
-
while (
|
|
137
|
-
|
|
163
|
+
while (pos < fullLength) {
|
|
164
|
+
lastPos = pos;
|
|
165
|
+
const html = remaining();
|
|
138
166
|
// Make sure we’re not in a `script` or `style` element
|
|
139
167
|
if (!lastTag || !special.has(lastTag)) {
|
|
140
168
|
let textEnd = html.indexOf('<');
|
|
@@ -147,7 +175,7 @@ export class HTMLParser {
|
|
|
147
175
|
if (handler.comment) {
|
|
148
176
|
await handler.comment(html.substring(4, commentEnd));
|
|
149
177
|
}
|
|
150
|
-
|
|
178
|
+
advance(commentEnd + 3);
|
|
151
179
|
prevTag = '';
|
|
152
180
|
continue;
|
|
153
181
|
}
|
|
@@ -161,7 +189,7 @@ export class HTMLParser {
|
|
|
161
189
|
if (handler.comment) {
|
|
162
190
|
await handler.comment(html.substring(2, conditionalEnd + 1), true /* non-standard */);
|
|
163
191
|
}
|
|
164
|
-
|
|
192
|
+
advance(conditionalEnd + 2);
|
|
165
193
|
prevTag = '';
|
|
166
194
|
continue;
|
|
167
195
|
}
|
|
@@ -173,7 +201,7 @@ export class HTMLParser {
|
|
|
173
201
|
if (handler.doctype) {
|
|
174
202
|
handler.doctype(doctypeMatch[0]);
|
|
175
203
|
}
|
|
176
|
-
|
|
204
|
+
advance(doctypeMatch[0].length);
|
|
177
205
|
prevTag = '';
|
|
178
206
|
continue;
|
|
179
207
|
}
|
|
@@ -181,7 +209,7 @@ export class HTMLParser {
|
|
|
181
209
|
// End tag
|
|
182
210
|
const endTagMatch = html.match(endTag);
|
|
183
211
|
if (endTagMatch) {
|
|
184
|
-
|
|
212
|
+
advance(endTagMatch[0].length);
|
|
185
213
|
await parseEndTag(endTagMatch[0], endTagMatch[1]);
|
|
186
214
|
prevTag = '/' + endTagMatch[1].toLowerCase();
|
|
187
215
|
continue;
|
|
@@ -190,7 +218,7 @@ export class HTMLParser {
|
|
|
190
218
|
// Start tag
|
|
191
219
|
const startTagMatch = parseStartTag(html);
|
|
192
220
|
if (startTagMatch) {
|
|
193
|
-
|
|
221
|
+
advance(startTagMatch.advance);
|
|
194
222
|
await handleStartTag(startTagMatch);
|
|
195
223
|
prevTag = startTagMatch.tagName.toLowerCase();
|
|
196
224
|
continue;
|
|
@@ -205,18 +233,19 @@ export class HTMLParser {
|
|
|
205
233
|
let text;
|
|
206
234
|
if (textEnd >= 0) {
|
|
207
235
|
text = html.substring(0, textEnd);
|
|
208
|
-
|
|
236
|
+
advance(textEnd);
|
|
209
237
|
} else {
|
|
210
238
|
text = html;
|
|
211
|
-
html
|
|
239
|
+
advance(html.length);
|
|
212
240
|
}
|
|
213
241
|
|
|
214
242
|
// Next tag
|
|
215
|
-
|
|
243
|
+
const nextHtml = remaining();
|
|
244
|
+
let nextTagMatch = parseStartTag(nextHtml);
|
|
216
245
|
if (nextTagMatch) {
|
|
217
246
|
nextTag = nextTagMatch.tagName;
|
|
218
247
|
} else {
|
|
219
|
-
nextTagMatch =
|
|
248
|
+
nextTagMatch = nextHtml.match(endTag);
|
|
220
249
|
if (nextTagMatch) {
|
|
221
250
|
nextTag = '/' + nextTagMatch[1];
|
|
222
251
|
} else {
|
|
@@ -245,41 +274,38 @@ export class HTMLParser {
|
|
|
245
274
|
await handler.chars(text);
|
|
246
275
|
}
|
|
247
276
|
// Advance HTML past the matched special tag content and its closing tag
|
|
248
|
-
|
|
277
|
+
advance(m.index + m[0].length);
|
|
249
278
|
await parseEndTag('</' + stackedTag + '>', stackedTag);
|
|
250
279
|
} else {
|
|
251
280
|
// No closing tag found; to avoid infinite loop, break similarly to previous behavior
|
|
252
281
|
if (handler.continueOnParseError && handler.chars && html) {
|
|
253
282
|
await handler.chars(html[0], prevTag, '');
|
|
254
|
-
|
|
283
|
+
advance(1);
|
|
255
284
|
} else {
|
|
256
285
|
break;
|
|
257
286
|
}
|
|
258
287
|
}
|
|
259
288
|
}
|
|
260
289
|
|
|
261
|
-
if (
|
|
290
|
+
if (pos === lastPos) {
|
|
262
291
|
if (handler.continueOnParseError) {
|
|
263
292
|
// Skip the problematic character and continue
|
|
264
293
|
if (handler.chars) {
|
|
265
|
-
await handler.chars(
|
|
294
|
+
await handler.chars(fullHtml[pos], prevTag, '');
|
|
266
295
|
}
|
|
267
|
-
|
|
268
|
-
position++;
|
|
296
|
+
advance(1);
|
|
269
297
|
prevTag = '';
|
|
270
298
|
continue;
|
|
271
299
|
}
|
|
272
|
-
const loc = getLineColumn(
|
|
273
|
-
// Include some context before the error position so the snippet contains
|
|
274
|
-
// the offending markup plus preceding characters (e.g. "invalid<tag").
|
|
300
|
+
const loc = getLineColumn(pos);
|
|
301
|
+
// Include some context before the error position so the snippet contains the offending markup plus preceding characters (e.g., “invalid<tag”)
|
|
275
302
|
const CONTEXT_BEFORE = 50;
|
|
276
|
-
const startPos = Math.max(0,
|
|
277
|
-
const snippet =
|
|
303
|
+
const startPos = Math.max(0, pos - CONTEXT_BEFORE);
|
|
304
|
+
const snippet = fullHtml.slice(startPos, startPos + 200).replace(/\n/g, ' ');
|
|
278
305
|
throw new Error(
|
|
279
|
-
`Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${
|
|
306
|
+
`Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${fullHtml.length > startPos + 200 ? '…' : ''}`
|
|
280
307
|
);
|
|
281
308
|
}
|
|
282
|
-
position = this.html.length - html.length;
|
|
283
309
|
}
|
|
284
310
|
|
|
285
311
|
if (!handler.partialMarkup) {
|
|
@@ -292,9 +318,11 @@ export class HTMLParser {
|
|
|
292
318
|
if (start) {
|
|
293
319
|
const match = {
|
|
294
320
|
tagName: start[1],
|
|
295
|
-
attrs: []
|
|
321
|
+
attrs: [],
|
|
322
|
+
advance: 0
|
|
296
323
|
};
|
|
297
|
-
|
|
324
|
+
let consumed = start[0].length;
|
|
325
|
+
input = input.slice(consumed);
|
|
298
326
|
let end, attr;
|
|
299
327
|
|
|
300
328
|
// Safety limit: max length of input to check for attributes
|
|
@@ -344,7 +372,9 @@ export class HTMLParser {
|
|
|
344
372
|
} else {
|
|
345
373
|
attr[baseIndex + 3] = value; // Single-quoted value
|
|
346
374
|
}
|
|
347
|
-
|
|
375
|
+
const attrLen = fullAttr.length;
|
|
376
|
+
input = input.slice(attrLen);
|
|
377
|
+
consumed += attrLen;
|
|
348
378
|
match.attrs.push(attr);
|
|
349
379
|
continue;
|
|
350
380
|
}
|
|
@@ -361,7 +391,9 @@ export class HTMLParser {
|
|
|
361
391
|
break;
|
|
362
392
|
}
|
|
363
393
|
|
|
364
|
-
|
|
394
|
+
const attrLen = attr[0].length;
|
|
395
|
+
input = input.slice(attrLen);
|
|
396
|
+
consumed += attrLen;
|
|
365
397
|
match.attrs.push(attr);
|
|
366
398
|
}
|
|
367
399
|
|
|
@@ -369,7 +401,8 @@ export class HTMLParser {
|
|
|
369
401
|
end = input.match(startTagClose);
|
|
370
402
|
if (end) {
|
|
371
403
|
match.unarySlash = end[1];
|
|
372
|
-
|
|
404
|
+
consumed += end[0].length;
|
|
405
|
+
match.advance = consumed;
|
|
373
406
|
return match;
|
|
374
407
|
}
|
|
375
408
|
}
|
|
@@ -379,7 +412,7 @@ export class HTMLParser {
|
|
|
379
412
|
let pos;
|
|
380
413
|
const needle = tagName.toLowerCase();
|
|
381
414
|
for (pos = stack.length - 1; pos >= 0; pos--) {
|
|
382
|
-
const currentTag = stack[pos].
|
|
415
|
+
const currentTag = stack[pos].lowerTag;
|
|
383
416
|
if (currentTag === needle) {
|
|
384
417
|
return pos;
|
|
385
418
|
}
|
|
@@ -433,7 +466,7 @@ export class HTMLParser {
|
|
|
433
466
|
}
|
|
434
467
|
if (tagName === 'col' && findTag('colgroup') < 0) {
|
|
435
468
|
lastTag = 'colgroup';
|
|
436
|
-
stack.push({ tag: lastTag, attrs: [] });
|
|
469
|
+
stack.push({ tag: lastTag, lowerTag: 'colgroup', attrs: [] });
|
|
437
470
|
if (handler.start) {
|
|
438
471
|
await handler.start(lastTag, [], false, '');
|
|
439
472
|
}
|
|
@@ -512,7 +545,7 @@ export class HTMLParser {
|
|
|
512
545
|
});
|
|
513
546
|
|
|
514
547
|
if (!unary) {
|
|
515
|
-
stack.push({ tag: tagName, attrs });
|
|
548
|
+
stack.push({ tag: tagName, lowerTag: tagName.toLowerCase(), attrs });
|
|
516
549
|
lastTag = tagName;
|
|
517
550
|
unarySlash = '';
|
|
518
551
|
}
|
|
@@ -526,7 +559,7 @@ export class HTMLParser {
|
|
|
526
559
|
let pos;
|
|
527
560
|
const needle = tagName.toLowerCase();
|
|
528
561
|
for (pos = stack.length - 1; pos >= 0; pos--) {
|
|
529
|
-
if (stack[pos].
|
|
562
|
+
if (stack[pos].lowerTag === needle) {
|
|
530
563
|
break;
|
|
531
564
|
}
|
|
532
565
|
}
|
package/src/presets.js
CHANGED
package/src/tokenchain.js
CHANGED
|
@@ -1,21 +1,40 @@
|
|
|
1
1
|
class Sorter {
|
|
2
2
|
sort(tokens, fromIndex = 0) {
|
|
3
3
|
for (let i = 0, len = this.keys.length; i < len; i++) {
|
|
4
|
-
const
|
|
5
|
-
const token = key.slice(1);
|
|
4
|
+
const token = this.keys[i];
|
|
6
5
|
|
|
7
|
-
|
|
6
|
+
// Build position map for this token to avoid repeated `indexOf`
|
|
7
|
+
const positions = [];
|
|
8
|
+
for (let j = fromIndex; j < tokens.length; j++) {
|
|
9
|
+
if (tokens[j] === token) {
|
|
10
|
+
positions.push(j);
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
if (positions.length > 0) {
|
|
15
|
+
// Build new array with tokens in sorted order instead of splicing
|
|
16
|
+
const result = [];
|
|
8
17
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
18
|
+
// Add all instances of the current token first
|
|
19
|
+
for (let j = 0; j < positions.length; j++) {
|
|
20
|
+
result.push(token);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// Add other tokens, skipping positions where current token was
|
|
24
|
+
const posSet = new Set(positions);
|
|
25
|
+
for (let j = fromIndex; j < tokens.length; j++) {
|
|
26
|
+
if (!posSet.has(j)) {
|
|
27
|
+
result.push(tokens[j]);
|
|
14
28
|
}
|
|
15
|
-
|
|
16
|
-
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// Copy sorted portion back to tokens array
|
|
32
|
+
for (let j = 0; j < result.length; j++) {
|
|
33
|
+
tokens[fromIndex + j] = result[j];
|
|
34
|
+
}
|
|
17
35
|
|
|
18
|
-
|
|
36
|
+
const newFromIndex = fromIndex + positions.length;
|
|
37
|
+
return this.sorterMap.get(token).sort(tokens, newFromIndex);
|
|
19
38
|
}
|
|
20
39
|
}
|
|
21
40
|
return tokens;
|
|
@@ -23,46 +42,70 @@ class Sorter {
|
|
|
23
42
|
}
|
|
24
43
|
|
|
25
44
|
class TokenChain {
|
|
45
|
+
constructor() {
|
|
46
|
+
// Use Map instead of object properties for better performance
|
|
47
|
+
this.map = new Map();
|
|
48
|
+
}
|
|
49
|
+
|
|
26
50
|
add(tokens) {
|
|
27
51
|
tokens.forEach((token) => {
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
this[key] = [];
|
|
31
|
-
this[key].processed = 0;
|
|
52
|
+
if (!this.map.has(token)) {
|
|
53
|
+
this.map.set(token, { arrays: [], processed: 0 });
|
|
32
54
|
}
|
|
33
|
-
this
|
|
55
|
+
this.map.get(token).arrays.push(tokens);
|
|
34
56
|
});
|
|
35
57
|
}
|
|
36
58
|
|
|
37
59
|
createSorter() {
|
|
38
60
|
const sorter = new Sorter();
|
|
61
|
+
sorter.sorterMap = new Map();
|
|
62
|
+
|
|
63
|
+
// Convert Map entries to array and sort
|
|
64
|
+
const entries = Array.from(this.map.entries()).sort((a, b) => {
|
|
65
|
+
const m = a[1].arrays.length;
|
|
66
|
+
const n = b[1].arrays.length;
|
|
67
|
+
// Sort by length descending (larger first)
|
|
68
|
+
const lengthDiff = n - m;
|
|
69
|
+
if (lengthDiff !== 0) return lengthDiff;
|
|
70
|
+
// If lengths equal, sort by key ascending
|
|
71
|
+
return a[0].localeCompare(b[0]);
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
sorter.keys = [];
|
|
39
75
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
const n = this[k].length;
|
|
43
|
-
return m < n ? 1 : m > n ? -1 : j < k ? -1 : j > k ? 1 : 0;
|
|
44
|
-
}).filter((key) => {
|
|
45
|
-
if (this[key].processed < this[key].length) {
|
|
46
|
-
const token = key.slice(1);
|
|
76
|
+
entries.forEach(([token, data]) => {
|
|
77
|
+
if (data.processed < data.arrays.length) {
|
|
47
78
|
const chain = new TokenChain();
|
|
48
79
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
80
|
+
data.arrays.forEach((tokens) => {
|
|
81
|
+
// Build new array without the current token instead of splicing
|
|
82
|
+
const filtered = [];
|
|
83
|
+
for (let i = 0; i < tokens.length; i++) {
|
|
84
|
+
if (tokens[i] !== token) {
|
|
85
|
+
filtered.push(tokens[i]);
|
|
86
|
+
}
|
|
53
87
|
}
|
|
54
|
-
|
|
55
|
-
|
|
88
|
+
|
|
89
|
+
// Mark remaining tokens as processed
|
|
90
|
+
filtered.forEach((t) => {
|
|
91
|
+
const tData = this.map.get(t);
|
|
92
|
+
if (tData) {
|
|
93
|
+
tData.processed++;
|
|
94
|
+
}
|
|
56
95
|
});
|
|
57
|
-
|
|
96
|
+
|
|
97
|
+
if (filtered.length > 0) {
|
|
98
|
+
chain.add(filtered);
|
|
99
|
+
}
|
|
58
100
|
});
|
|
59
|
-
|
|
60
|
-
|
|
101
|
+
|
|
102
|
+
sorter.keys.push(token);
|
|
103
|
+
sorter.sorterMap.set(token, chain.createSorter());
|
|
61
104
|
}
|
|
62
|
-
return false;
|
|
63
105
|
});
|
|
106
|
+
|
|
64
107
|
return sorter;
|
|
65
108
|
}
|
|
66
109
|
}
|
|
67
110
|
|
|
68
|
-
export default TokenChain;
|
|
111
|
+
export default TokenChain;
|