html-minifier-next 4.9.1 → 4.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -19
- package/dist/htmlminifier.cjs +191 -94
- package/dist/htmlminifier.esm.bundle.js +191 -94
- package/dist/types/htmlminifier.d.ts.map +1 -1
- package/dist/types/htmlparser.d.ts.map +1 -1
- package/dist/types/tokenchain.d.ts +1 -0
- package/dist/types/tokenchain.d.ts.map +1 -1
- package/package.json +4 -4
- package/src/htmlminifier.js +5 -1
- package/src/htmlparser.js +89 -50
- package/src/tokenchain.js +77 -34
package/src/htmlparser.js
CHANGED
|
@@ -15,8 +15,6 @@
|
|
|
15
15
|
* });
|
|
16
16
|
*/
|
|
17
17
|
|
|
18
|
-
import { replaceAsync } from './utils.js';
|
|
19
|
-
|
|
20
18
|
class CaseInsensitiveSet extends Set {
|
|
21
19
|
has(str) {
|
|
22
20
|
return super.has(str.toLowerCase());
|
|
@@ -84,6 +82,9 @@ const preCompiledStackedTags = {
|
|
|
84
82
|
'noscript': /([\s\S]*?)<\/noscript[^>]*>/i
|
|
85
83
|
};
|
|
86
84
|
|
|
85
|
+
// Cache for compiled attribute regexes per handler configuration
|
|
86
|
+
const attrRegexCache = new WeakMap();
|
|
87
|
+
|
|
87
88
|
function attrForHandler(handler) {
|
|
88
89
|
let pattern = singleAttrIdentifier.source +
|
|
89
90
|
'(?:\\s*(' + joinSingleAttrAssigns(handler) + ')' +
|
|
@@ -121,22 +122,47 @@ export class HTMLParser {
|
|
|
121
122
|
}
|
|
122
123
|
|
|
123
124
|
async parse() {
|
|
124
|
-
let html = this.html;
|
|
125
125
|
const handler = this.handler;
|
|
126
|
+
const fullHtml = this.html;
|
|
127
|
+
const fullLength = fullHtml.length;
|
|
126
128
|
|
|
127
129
|
const stack = []; let lastTag;
|
|
128
|
-
|
|
129
|
-
let
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
130
|
+
// Use cached attribute regex if available
|
|
131
|
+
let attribute = attrRegexCache.get(handler);
|
|
132
|
+
if (!attribute) {
|
|
133
|
+
attribute = attrForHandler(handler);
|
|
134
|
+
attrRegexCache.set(handler, attribute);
|
|
135
|
+
}
|
|
136
|
+
let prevTag = undefined, nextTag = undefined;
|
|
137
|
+
|
|
138
|
+
// Index-based parsing
|
|
139
|
+
let pos = 0;
|
|
140
|
+
let lastPos;
|
|
141
|
+
|
|
142
|
+
// Helper to get remaining HTML from current position
|
|
143
|
+
const remaining = () => fullHtml.slice(pos);
|
|
144
|
+
|
|
145
|
+
// Helper to advance position
|
|
146
|
+
const advance = (n) => { pos += n; };
|
|
147
|
+
|
|
148
|
+
// Lazy line/column calculation—only compute on actual errors
|
|
149
|
+
const getLineColumn = (position) => {
|
|
150
|
+
let line = 1;
|
|
151
|
+
let column = 1;
|
|
152
|
+
for (let i = 0; i < position; i++) {
|
|
153
|
+
if (fullHtml[i] === '\n') {
|
|
154
|
+
line++;
|
|
155
|
+
column = 1;
|
|
156
|
+
} else {
|
|
157
|
+
column++;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
return { line, column };
|
|
136
161
|
};
|
|
137
162
|
|
|
138
|
-
while (
|
|
139
|
-
|
|
163
|
+
while (pos < fullLength) {
|
|
164
|
+
lastPos = pos;
|
|
165
|
+
const html = remaining();
|
|
140
166
|
// Make sure we’re not in a `script` or `style` element
|
|
141
167
|
if (!lastTag || !special.has(lastTag)) {
|
|
142
168
|
let textEnd = html.indexOf('<');
|
|
@@ -149,7 +175,7 @@ export class HTMLParser {
|
|
|
149
175
|
if (handler.comment) {
|
|
150
176
|
await handler.comment(html.substring(4, commentEnd));
|
|
151
177
|
}
|
|
152
|
-
|
|
178
|
+
advance(commentEnd + 3);
|
|
153
179
|
prevTag = '';
|
|
154
180
|
continue;
|
|
155
181
|
}
|
|
@@ -163,7 +189,7 @@ export class HTMLParser {
|
|
|
163
189
|
if (handler.comment) {
|
|
164
190
|
await handler.comment(html.substring(2, conditionalEnd + 1), true /* non-standard */);
|
|
165
191
|
}
|
|
166
|
-
|
|
192
|
+
advance(conditionalEnd + 2);
|
|
167
193
|
prevTag = '';
|
|
168
194
|
continue;
|
|
169
195
|
}
|
|
@@ -175,7 +201,7 @@ export class HTMLParser {
|
|
|
175
201
|
if (handler.doctype) {
|
|
176
202
|
handler.doctype(doctypeMatch[0]);
|
|
177
203
|
}
|
|
178
|
-
|
|
204
|
+
advance(doctypeMatch[0].length);
|
|
179
205
|
prevTag = '';
|
|
180
206
|
continue;
|
|
181
207
|
}
|
|
@@ -183,8 +209,8 @@ export class HTMLParser {
|
|
|
183
209
|
// End tag
|
|
184
210
|
const endTagMatch = html.match(endTag);
|
|
185
211
|
if (endTagMatch) {
|
|
186
|
-
|
|
187
|
-
await
|
|
212
|
+
advance(endTagMatch[0].length);
|
|
213
|
+
await parseEndTag(endTagMatch[0], endTagMatch[1]);
|
|
188
214
|
prevTag = '/' + endTagMatch[1].toLowerCase();
|
|
189
215
|
continue;
|
|
190
216
|
}
|
|
@@ -192,7 +218,7 @@ export class HTMLParser {
|
|
|
192
218
|
// Start tag
|
|
193
219
|
const startTagMatch = parseStartTag(html);
|
|
194
220
|
if (startTagMatch) {
|
|
195
|
-
|
|
221
|
+
advance(startTagMatch.advance);
|
|
196
222
|
await handleStartTag(startTagMatch);
|
|
197
223
|
prevTag = startTagMatch.tagName.toLowerCase();
|
|
198
224
|
continue;
|
|
@@ -207,18 +233,19 @@ export class HTMLParser {
|
|
|
207
233
|
let text;
|
|
208
234
|
if (textEnd >= 0) {
|
|
209
235
|
text = html.substring(0, textEnd);
|
|
210
|
-
|
|
236
|
+
advance(textEnd);
|
|
211
237
|
} else {
|
|
212
238
|
text = html;
|
|
213
|
-
html
|
|
239
|
+
advance(html.length);
|
|
214
240
|
}
|
|
215
241
|
|
|
216
242
|
// Next tag
|
|
217
|
-
|
|
243
|
+
const nextHtml = remaining();
|
|
244
|
+
let nextTagMatch = parseStartTag(nextHtml);
|
|
218
245
|
if (nextTagMatch) {
|
|
219
246
|
nextTag = nextTagMatch.tagName;
|
|
220
247
|
} else {
|
|
221
|
-
nextTagMatch =
|
|
248
|
+
nextTagMatch = nextHtml.match(endTag);
|
|
222
249
|
if (nextTagMatch) {
|
|
223
250
|
nextTag = '/' + nextTagMatch[1];
|
|
224
251
|
} else {
|
|
@@ -235,45 +262,50 @@ export class HTMLParser {
|
|
|
235
262
|
// Use pre-compiled regex for common tags (`script`, `style`, `noscript`) to avoid regex creation overhead
|
|
236
263
|
const reStackedTag = preCompiledStackedTags[stackedTag] || reCache[stackedTag] || (reCache[stackedTag] = new RegExp('([\\s\\S]*?)</' + stackedTag + '[^>]*>', 'i'));
|
|
237
264
|
|
|
238
|
-
|
|
265
|
+
const m = reStackedTag.exec(html);
|
|
266
|
+
if (m) {
|
|
267
|
+
let text = m[1];
|
|
239
268
|
if (stackedTag !== 'script' && stackedTag !== 'style' && stackedTag !== 'noscript') {
|
|
240
269
|
text = text
|
|
241
270
|
.replace(/<!--([\s\S]*?)-->/g, '$1')
|
|
242
271
|
.replace(/<!\[CDATA\[([\s\S]*?)]]>/g, '$1');
|
|
243
272
|
}
|
|
244
|
-
|
|
245
273
|
if (handler.chars) {
|
|
246
274
|
await handler.chars(text);
|
|
247
275
|
}
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
276
|
+
// Advance HTML past the matched special tag content and its closing tag
|
|
277
|
+
advance(m.index + m[0].length);
|
|
278
|
+
await parseEndTag('</' + stackedTag + '>', stackedTag);
|
|
279
|
+
} else {
|
|
280
|
+
// No closing tag found; to avoid infinite loop, break similarly to previous behavior
|
|
281
|
+
if (handler.continueOnParseError && handler.chars && html) {
|
|
282
|
+
await handler.chars(html[0], prevTag, '');
|
|
283
|
+
advance(1);
|
|
284
|
+
} else {
|
|
285
|
+
break;
|
|
286
|
+
}
|
|
287
|
+
}
|
|
253
288
|
}
|
|
254
289
|
|
|
255
|
-
if (
|
|
290
|
+
if (pos === lastPos) {
|
|
256
291
|
if (handler.continueOnParseError) {
|
|
257
292
|
// Skip the problematic character and continue
|
|
258
293
|
if (handler.chars) {
|
|
259
|
-
await handler.chars(
|
|
294
|
+
await handler.chars(fullHtml[pos], prevTag, '');
|
|
260
295
|
}
|
|
261
|
-
|
|
262
|
-
position++;
|
|
296
|
+
advance(1);
|
|
263
297
|
prevTag = '';
|
|
264
298
|
continue;
|
|
265
299
|
}
|
|
266
|
-
const loc = getLineColumn(
|
|
267
|
-
// Include some context before the error position so the snippet contains
|
|
268
|
-
// the offending markup plus preceding characters (e.g. "invalid<tag").
|
|
300
|
+
const loc = getLineColumn(pos);
|
|
301
|
+
// Include some context before the error position so the snippet contains the offending markup plus preceding characters (e.g., “invalid<tag”)
|
|
269
302
|
const CONTEXT_BEFORE = 50;
|
|
270
|
-
const startPos = Math.max(0,
|
|
271
|
-
const snippet =
|
|
303
|
+
const startPos = Math.max(0, pos - CONTEXT_BEFORE);
|
|
304
|
+
const snippet = fullHtml.slice(startPos, startPos + 200).replace(/\n/g, ' ');
|
|
272
305
|
throw new Error(
|
|
273
|
-
`Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${
|
|
306
|
+
`Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${fullHtml.length > startPos + 200 ? '…' : ''}`
|
|
274
307
|
);
|
|
275
308
|
}
|
|
276
|
-
position = this.html.length - html.length;
|
|
277
309
|
}
|
|
278
310
|
|
|
279
311
|
if (!handler.partialMarkup) {
|
|
@@ -286,9 +318,11 @@ export class HTMLParser {
|
|
|
286
318
|
if (start) {
|
|
287
319
|
const match = {
|
|
288
320
|
tagName: start[1],
|
|
289
|
-
attrs: []
|
|
321
|
+
attrs: [],
|
|
322
|
+
advance: 0
|
|
290
323
|
};
|
|
291
|
-
|
|
324
|
+
let consumed = start[0].length;
|
|
325
|
+
input = input.slice(consumed);
|
|
292
326
|
let end, attr;
|
|
293
327
|
|
|
294
328
|
// Safety limit: max length of input to check for attributes
|
|
@@ -338,7 +372,9 @@ export class HTMLParser {
|
|
|
338
372
|
} else {
|
|
339
373
|
attr[baseIndex + 3] = value; // Single-quoted value
|
|
340
374
|
}
|
|
341
|
-
|
|
375
|
+
const attrLen = fullAttr.length;
|
|
376
|
+
input = input.slice(attrLen);
|
|
377
|
+
consumed += attrLen;
|
|
342
378
|
match.attrs.push(attr);
|
|
343
379
|
continue;
|
|
344
380
|
}
|
|
@@ -355,7 +391,9 @@ export class HTMLParser {
|
|
|
355
391
|
break;
|
|
356
392
|
}
|
|
357
393
|
|
|
358
|
-
|
|
394
|
+
const attrLen = attr[0].length;
|
|
395
|
+
input = input.slice(attrLen);
|
|
396
|
+
consumed += attrLen;
|
|
359
397
|
match.attrs.push(attr);
|
|
360
398
|
}
|
|
361
399
|
|
|
@@ -363,7 +401,8 @@ export class HTMLParser {
|
|
|
363
401
|
end = input.match(startTagClose);
|
|
364
402
|
if (end) {
|
|
365
403
|
match.unarySlash = end[1];
|
|
366
|
-
|
|
404
|
+
consumed += end[0].length;
|
|
405
|
+
match.advance = consumed;
|
|
367
406
|
return match;
|
|
368
407
|
}
|
|
369
408
|
}
|
|
@@ -373,7 +412,7 @@ export class HTMLParser {
|
|
|
373
412
|
let pos;
|
|
374
413
|
const needle = tagName.toLowerCase();
|
|
375
414
|
for (pos = stack.length - 1; pos >= 0; pos--) {
|
|
376
|
-
const currentTag = stack[pos].
|
|
415
|
+
const currentTag = stack[pos].lowerTag;
|
|
377
416
|
if (currentTag === needle) {
|
|
378
417
|
return pos;
|
|
379
418
|
}
|
|
@@ -427,7 +466,7 @@ export class HTMLParser {
|
|
|
427
466
|
}
|
|
428
467
|
if (tagName === 'col' && findTag('colgroup') < 0) {
|
|
429
468
|
lastTag = 'colgroup';
|
|
430
|
-
stack.push({ tag: lastTag, attrs: [] });
|
|
469
|
+
stack.push({ tag: lastTag, lowerTag: 'colgroup', attrs: [] });
|
|
431
470
|
if (handler.start) {
|
|
432
471
|
await handler.start(lastTag, [], false, '');
|
|
433
472
|
}
|
|
@@ -506,7 +545,7 @@ export class HTMLParser {
|
|
|
506
545
|
});
|
|
507
546
|
|
|
508
547
|
if (!unary) {
|
|
509
|
-
stack.push({ tag: tagName, attrs });
|
|
548
|
+
stack.push({ tag: tagName, lowerTag: tagName.toLowerCase(), attrs });
|
|
510
549
|
lastTag = tagName;
|
|
511
550
|
unarySlash = '';
|
|
512
551
|
}
|
|
@@ -520,7 +559,7 @@ export class HTMLParser {
|
|
|
520
559
|
let pos;
|
|
521
560
|
const needle = tagName.toLowerCase();
|
|
522
561
|
for (pos = stack.length - 1; pos >= 0; pos--) {
|
|
523
|
-
if (stack[pos].
|
|
562
|
+
if (stack[pos].lowerTag === needle) {
|
|
524
563
|
break;
|
|
525
564
|
}
|
|
526
565
|
}
|
package/src/tokenchain.js
CHANGED
|
@@ -1,21 +1,40 @@
|
|
|
1
1
|
class Sorter {
|
|
2
2
|
sort(tokens, fromIndex = 0) {
|
|
3
3
|
for (let i = 0, len = this.keys.length; i < len; i++) {
|
|
4
|
-
const
|
|
5
|
-
const token = key.slice(1);
|
|
4
|
+
const token = this.keys[i];
|
|
6
5
|
|
|
7
|
-
|
|
6
|
+
// Build position map for this token to avoid repeated `indexOf`
|
|
7
|
+
const positions = [];
|
|
8
|
+
for (let j = fromIndex; j < tokens.length; j++) {
|
|
9
|
+
if (tokens[j] === token) {
|
|
10
|
+
positions.push(j);
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
if (positions.length > 0) {
|
|
15
|
+
// Build new array with tokens in sorted order instead of splicing
|
|
16
|
+
const result = [];
|
|
8
17
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
18
|
+
// Add all instances of the current token first
|
|
19
|
+
for (let j = 0; j < positions.length; j++) {
|
|
20
|
+
result.push(token);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// Add other tokens, skipping positions where current token was
|
|
24
|
+
const posSet = new Set(positions);
|
|
25
|
+
for (let j = fromIndex; j < tokens.length; j++) {
|
|
26
|
+
if (!posSet.has(j)) {
|
|
27
|
+
result.push(tokens[j]);
|
|
14
28
|
}
|
|
15
|
-
|
|
16
|
-
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// Copy sorted portion back to tokens array
|
|
32
|
+
for (let j = 0; j < result.length; j++) {
|
|
33
|
+
tokens[fromIndex + j] = result[j];
|
|
34
|
+
}
|
|
17
35
|
|
|
18
|
-
|
|
36
|
+
const newFromIndex = fromIndex + positions.length;
|
|
37
|
+
return this.sorterMap.get(token).sort(tokens, newFromIndex);
|
|
19
38
|
}
|
|
20
39
|
}
|
|
21
40
|
return tokens;
|
|
@@ -23,46 +42,70 @@ class Sorter {
|
|
|
23
42
|
}
|
|
24
43
|
|
|
25
44
|
class TokenChain {
|
|
45
|
+
constructor() {
|
|
46
|
+
// Use Map instead of object properties for better performance
|
|
47
|
+
this.map = new Map();
|
|
48
|
+
}
|
|
49
|
+
|
|
26
50
|
add(tokens) {
|
|
27
51
|
tokens.forEach((token) => {
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
this[key] = [];
|
|
31
|
-
this[key].processed = 0;
|
|
52
|
+
if (!this.map.has(token)) {
|
|
53
|
+
this.map.set(token, { arrays: [], processed: 0 });
|
|
32
54
|
}
|
|
33
|
-
this
|
|
55
|
+
this.map.get(token).arrays.push(tokens);
|
|
34
56
|
});
|
|
35
57
|
}
|
|
36
58
|
|
|
37
59
|
createSorter() {
|
|
38
60
|
const sorter = new Sorter();
|
|
61
|
+
sorter.sorterMap = new Map();
|
|
62
|
+
|
|
63
|
+
// Convert Map entries to array and sort
|
|
64
|
+
const entries = Array.from(this.map.entries()).sort((a, b) => {
|
|
65
|
+
const m = a[1].arrays.length;
|
|
66
|
+
const n = b[1].arrays.length;
|
|
67
|
+
// Sort by length descending (larger first)
|
|
68
|
+
const lengthDiff = n - m;
|
|
69
|
+
if (lengthDiff !== 0) return lengthDiff;
|
|
70
|
+
// If lengths equal, sort by key ascending
|
|
71
|
+
return a[0].localeCompare(b[0]);
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
sorter.keys = [];
|
|
39
75
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
const n = this[k].length;
|
|
43
|
-
return m < n ? 1 : m > n ? -1 : j < k ? -1 : j > k ? 1 : 0;
|
|
44
|
-
}).filter((key) => {
|
|
45
|
-
if (this[key].processed < this[key].length) {
|
|
46
|
-
const token = key.slice(1);
|
|
76
|
+
entries.forEach(([token, data]) => {
|
|
77
|
+
if (data.processed < data.arrays.length) {
|
|
47
78
|
const chain = new TokenChain();
|
|
48
79
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
80
|
+
data.arrays.forEach((tokens) => {
|
|
81
|
+
// Build new array without the current token instead of splicing
|
|
82
|
+
const filtered = [];
|
|
83
|
+
for (let i = 0; i < tokens.length; i++) {
|
|
84
|
+
if (tokens[i] !== token) {
|
|
85
|
+
filtered.push(tokens[i]);
|
|
86
|
+
}
|
|
53
87
|
}
|
|
54
|
-
|
|
55
|
-
|
|
88
|
+
|
|
89
|
+
// Mark remaining tokens as processed
|
|
90
|
+
filtered.forEach((t) => {
|
|
91
|
+
const tData = this.map.get(t);
|
|
92
|
+
if (tData) {
|
|
93
|
+
tData.processed++;
|
|
94
|
+
}
|
|
56
95
|
});
|
|
57
|
-
|
|
96
|
+
|
|
97
|
+
if (filtered.length > 0) {
|
|
98
|
+
chain.add(filtered);
|
|
99
|
+
}
|
|
58
100
|
});
|
|
59
|
-
|
|
60
|
-
|
|
101
|
+
|
|
102
|
+
sorter.keys.push(token);
|
|
103
|
+
sorter.sorterMap.set(token, chain.createSorter());
|
|
61
104
|
}
|
|
62
|
-
return false;
|
|
63
105
|
});
|
|
106
|
+
|
|
64
107
|
return sorter;
|
|
65
108
|
}
|
|
66
109
|
}
|
|
67
110
|
|
|
68
|
-
export default TokenChain;
|
|
111
|
+
export default TokenChain;
|