html-minifier-next 4.9.2 → 4.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/htmlparser.js CHANGED
@@ -82,6 +82,9 @@ const preCompiledStackedTags = {
82
82
  'noscript': /([\s\S]*?)<\/noscript[^>]*>/i
83
83
  };
84
84
 
85
+ // Cache for compiled attribute regexes per handler configuration
86
+ const attrRegexCache = new WeakMap();
87
+
85
88
  function attrForHandler(handler) {
86
89
  let pattern = singleAttrIdentifier.source +
87
90
  '(?:\\s*(' + joinSingleAttrAssigns(handler) + ')' +
@@ -119,22 +122,47 @@ export class HTMLParser {
119
122
  }
120
123
 
121
124
  async parse() {
122
- let html = this.html;
123
125
  const handler = this.handler;
126
+ const fullHtml = this.html;
127
+ const fullLength = fullHtml.length;
124
128
 
125
129
  const stack = []; let lastTag;
126
- const attribute = attrForHandler(handler);
127
- let last, prevTag = undefined, nextTag = undefined;
128
-
129
- // Track position for better error messages
130
- let position = 0;
131
- const getLineColumn = (pos) => {
132
- const lines = this.html.slice(0, pos).split('\n');
133
- return { line: lines.length, column: lines[lines.length - 1].length + 1 };
130
+ // Use cached attribute regex if available
131
+ let attribute = attrRegexCache.get(handler);
132
+ if (!attribute) {
133
+ attribute = attrForHandler(handler);
134
+ attrRegexCache.set(handler, attribute);
135
+ }
136
+ let prevTag = undefined, nextTag = undefined;
137
+
138
+ // Index-based parsing
139
+ let pos = 0;
140
+ let lastPos;
141
+
142
+ // Helper to get remaining HTML from current position
143
+ const remaining = () => fullHtml.slice(pos);
144
+
145
+ // Helper to advance position
146
+ const advance = (n) => { pos += n; };
147
+
148
+ // Lazy line/column calculation—only compute on actual errors
149
+ const getLineColumn = (position) => {
150
+ let line = 1;
151
+ let column = 1;
152
+ for (let i = 0; i < position; i++) {
153
+ if (fullHtml[i] === '\n') {
154
+ line++;
155
+ column = 1;
156
+ } else {
157
+ column++;
158
+ }
159
+ }
160
+ return { line, column };
134
161
  };
135
162
 
136
- while (html) {
137
- last = html;
163
+ while (pos < fullLength) {
164
+ lastPos = pos;
165
+ const html = remaining();
138
166
  // Make sure we’re not in a `script` or `style` element
139
167
  if (!lastTag || !special.has(lastTag)) {
140
168
  let textEnd = html.indexOf('<');
@@ -147,7 +175,7 @@ export class HTMLParser {
147
175
  if (handler.comment) {
148
176
  await handler.comment(html.substring(4, commentEnd));
149
177
  }
150
- html = html.substring(commentEnd + 3);
178
+ advance(commentEnd + 3);
151
179
  prevTag = '';
152
180
  continue;
153
181
  }
@@ -161,7 +189,7 @@ export class HTMLParser {
161
189
  if (handler.comment) {
162
190
  await handler.comment(html.substring(2, conditionalEnd + 1), true /* non-standard */);
163
191
  }
164
- html = html.substring(conditionalEnd + 2);
192
+ advance(conditionalEnd + 2);
165
193
  prevTag = '';
166
194
  continue;
167
195
  }
@@ -173,7 +201,7 @@ export class HTMLParser {
173
201
  if (handler.doctype) {
174
202
  handler.doctype(doctypeMatch[0]);
175
203
  }
176
- html = html.substring(doctypeMatch[0].length);
204
+ advance(doctypeMatch[0].length);
177
205
  prevTag = '';
178
206
  continue;
179
207
  }
@@ -181,7 +209,7 @@ export class HTMLParser {
181
209
  // End tag
182
210
  const endTagMatch = html.match(endTag);
183
211
  if (endTagMatch) {
184
- html = html.substring(endTagMatch[0].length);
212
+ advance(endTagMatch[0].length);
185
213
  await parseEndTag(endTagMatch[0], endTagMatch[1]);
186
214
  prevTag = '/' + endTagMatch[1].toLowerCase();
187
215
  continue;
@@ -190,7 +218,7 @@ export class HTMLParser {
190
218
  // Start tag
191
219
  const startTagMatch = parseStartTag(html);
192
220
  if (startTagMatch) {
193
- html = startTagMatch.rest;
221
+ advance(startTagMatch.advance);
194
222
  await handleStartTag(startTagMatch);
195
223
  prevTag = startTagMatch.tagName.toLowerCase();
196
224
  continue;
@@ -205,18 +233,19 @@ export class HTMLParser {
205
233
  let text;
206
234
  if (textEnd >= 0) {
207
235
  text = html.substring(0, textEnd);
208
- html = html.substring(textEnd);
236
+ advance(textEnd);
209
237
  } else {
210
238
  text = html;
211
- html = '';
239
+ advance(html.length);
212
240
  }
213
241
 
214
242
  // Next tag
215
- let nextTagMatch = parseStartTag(html);
243
+ const nextHtml = remaining();
244
+ let nextTagMatch = parseStartTag(nextHtml);
216
245
  if (nextTagMatch) {
217
246
  nextTag = nextTagMatch.tagName;
218
247
  } else {
219
- nextTagMatch = html.match(endTag);
248
+ nextTagMatch = nextHtml.match(endTag);
220
249
  if (nextTagMatch) {
221
250
  nextTag = '/' + nextTagMatch[1];
222
251
  } else {
@@ -245,41 +274,38 @@ export class HTMLParser {
245
274
  await handler.chars(text);
246
275
  }
247
276
  // Advance HTML past the matched special tag content and its closing tag
248
- html = html.slice(m.index + m[0].length);
277
+ advance(m.index + m[0].length);
249
278
  await parseEndTag('</' + stackedTag + '>', stackedTag);
250
279
  } else {
251
280
  // No closing tag found; to avoid infinite loop, break similarly to previous behavior
252
281
  if (handler.continueOnParseError && handler.chars && html) {
253
282
  await handler.chars(html[0], prevTag, '');
254
- html = html.substring(1);
283
+ advance(1);
255
284
  } else {
256
285
  break;
257
286
  }
258
287
  }
259
288
  }
260
289
 
261
- if (html === last) {
290
+ if (pos === lastPos) {
262
291
  if (handler.continueOnParseError) {
263
292
  // Skip the problematic character and continue
264
293
  if (handler.chars) {
265
- await handler.chars(html[0], prevTag, '');
294
+ await handler.chars(fullHtml[pos], prevTag, '');
266
295
  }
267
- html = html.substring(1);
268
- position++;
296
+ advance(1);
269
297
  prevTag = '';
270
298
  continue;
271
299
  }
272
- const loc = getLineColumn(position);
273
- // Include some context before the error position so the snippet contains
274
- // the offending markup plus preceding characters (e.g. "invalid<tag").
300
+ const loc = getLineColumn(pos);
301
+ // Include some context before the error position so the snippet contains the offending markup plus preceding characters (e.g., “invalid<tag”)
275
302
  const CONTEXT_BEFORE = 50;
276
- const startPos = Math.max(0, position - CONTEXT_BEFORE);
277
- const snippet = this.html.slice(startPos, startPos + 200).replace(/\n/g, ' ');
303
+ const startPos = Math.max(0, pos - CONTEXT_BEFORE);
304
+ const snippet = fullHtml.slice(startPos, startPos + 200).replace(/\n/g, ' ');
278
305
  throw new Error(
279
- `Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${this.html.length > startPos + 200 ? '…' : ''}`
306
+ `Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${fullHtml.length > startPos + 200 ? '…' : ''}`
280
307
  );
281
308
  }
282
- position = this.html.length - html.length;
283
309
  }
284
310
 
285
311
  if (!handler.partialMarkup) {
@@ -292,9 +318,11 @@ export class HTMLParser {
292
318
  if (start) {
293
319
  const match = {
294
320
  tagName: start[1],
295
- attrs: []
321
+ attrs: [],
322
+ advance: 0
296
323
  };
297
- input = input.slice(start[0].length);
324
+ let consumed = start[0].length;
325
+ input = input.slice(consumed);
298
326
  let end, attr;
299
327
 
300
328
  // Safety limit: max length of input to check for attributes
@@ -344,7 +372,9 @@ export class HTMLParser {
344
372
  } else {
345
373
  attr[baseIndex + 3] = value; // Single-quoted value
346
374
  }
347
- input = input.slice(fullAttr.length);
375
+ const attrLen = fullAttr.length;
376
+ input = input.slice(attrLen);
377
+ consumed += attrLen;
348
378
  match.attrs.push(attr);
349
379
  continue;
350
380
  }
@@ -361,7 +391,9 @@ export class HTMLParser {
361
391
  break;
362
392
  }
363
393
 
364
- input = input.slice(attr[0].length);
394
+ const attrLen = attr[0].length;
395
+ input = input.slice(attrLen);
396
+ consumed += attrLen;
365
397
  match.attrs.push(attr);
366
398
  }
367
399
 
@@ -369,7 +401,8 @@ export class HTMLParser {
369
401
  end = input.match(startTagClose);
370
402
  if (end) {
371
403
  match.unarySlash = end[1];
372
- match.rest = input.slice(end[0].length);
404
+ consumed += end[0].length;
405
+ match.advance = consumed;
373
406
  return match;
374
407
  }
375
408
  }
@@ -379,7 +412,7 @@ export class HTMLParser {
379
412
  let pos;
380
413
  const needle = tagName.toLowerCase();
381
414
  for (pos = stack.length - 1; pos >= 0; pos--) {
382
- const currentTag = stack[pos].tag.toLowerCase();
415
+ const currentTag = stack[pos].lowerTag;
383
416
  if (currentTag === needle) {
384
417
  return pos;
385
418
  }
@@ -433,7 +466,7 @@ export class HTMLParser {
433
466
  }
434
467
  if (tagName === 'col' && findTag('colgroup') < 0) {
435
468
  lastTag = 'colgroup';
436
- stack.push({ tag: lastTag, attrs: [] });
469
+ stack.push({ tag: lastTag, lowerTag: 'colgroup', attrs: [] });
437
470
  if (handler.start) {
438
471
  await handler.start(lastTag, [], false, '');
439
472
  }
@@ -512,7 +545,7 @@ export class HTMLParser {
512
545
  });
513
546
 
514
547
  if (!unary) {
515
- stack.push({ tag: tagName, attrs });
548
+ stack.push({ tag: tagName, lowerTag: tagName.toLowerCase(), attrs });
516
549
  lastTag = tagName;
517
550
  unarySlash = '';
518
551
  }
@@ -526,7 +559,7 @@ export class HTMLParser {
526
559
  let pos;
527
560
  const needle = tagName.toLowerCase();
528
561
  for (pos = stack.length - 1; pos >= 0; pos--) {
529
- if (stack[pos].tag.toLowerCase() === needle) {
562
+ if (stack[pos].lowerTag === needle) {
530
563
  break;
531
564
  }
532
565
  }
package/src/presets.js CHANGED
@@ -22,6 +22,7 @@ export const presets = {
22
22
  useShortDoctype: true
23
23
  },
24
24
  comprehensive: {
25
+ // @@ Add `collapseAttributeWhitespace: true` (also add to preset in demo)
25
26
  caseSensitive: true,
26
27
  collapseBooleanAttributes: true,
27
28
  collapseInlineTagWhitespace: true,
package/src/tokenchain.js CHANGED
@@ -1,21 +1,40 @@
1
1
  class Sorter {
2
2
  sort(tokens, fromIndex = 0) {
3
3
  for (let i = 0, len = this.keys.length; i < len; i++) {
4
- const key = this.keys[i];
5
- const token = key.slice(1);
4
+ const token = this.keys[i];
6
5
 
7
- let index = tokens.indexOf(token, fromIndex);
6
+ // Build position map for this token to avoid repeated `indexOf`
7
+ const positions = [];
8
+ for (let j = fromIndex; j < tokens.length; j++) {
9
+ if (tokens[j] === token) {
10
+ positions.push(j);
11
+ }
12
+ }
13
+
14
+ if (positions.length > 0) {
15
+ // Build new array with tokens in sorted order instead of splicing
16
+ const result = [];
8
17
 
9
- if (index !== -1) {
10
- do {
11
- if (index !== fromIndex) {
12
- tokens.splice(index, 1);
13
- tokens.splice(fromIndex, 0, token);
18
+ // Add all instances of the current token first
19
+ for (let j = 0; j < positions.length; j++) {
20
+ result.push(token);
21
+ }
22
+
23
+ // Add other tokens, skipping positions where current token was
24
+ const posSet = new Set(positions);
25
+ for (let j = fromIndex; j < tokens.length; j++) {
26
+ if (!posSet.has(j)) {
27
+ result.push(tokens[j]);
14
28
  }
15
- fromIndex++;
16
- } while ((index = tokens.indexOf(token, fromIndex)) !== -1);
29
+ }
30
+
31
+ // Copy sorted portion back to tokens array
32
+ for (let j = 0; j < result.length; j++) {
33
+ tokens[fromIndex + j] = result[j];
34
+ }
17
35
 
18
- return this[key].sort(tokens, fromIndex);
36
+ const newFromIndex = fromIndex + positions.length;
37
+ return this.sorterMap.get(token).sort(tokens, newFromIndex);
19
38
  }
20
39
  }
21
40
  return tokens;
@@ -23,46 +42,70 @@ class Sorter {
23
42
  }
24
43
 
25
44
  class TokenChain {
45
+ constructor() {
46
+ // Use Map instead of object properties for better performance
47
+ this.map = new Map();
48
+ }
49
+
26
50
  add(tokens) {
27
51
  tokens.forEach((token) => {
28
- const key = '$' + token;
29
- if (!this[key]) {
30
- this[key] = [];
31
- this[key].processed = 0;
52
+ if (!this.map.has(token)) {
53
+ this.map.set(token, { arrays: [], processed: 0 });
32
54
  }
33
- this[key].push(tokens);
55
+ this.map.get(token).arrays.push(tokens);
34
56
  });
35
57
  }
36
58
 
37
59
  createSorter() {
38
60
  const sorter = new Sorter();
61
+ sorter.sorterMap = new Map();
62
+
63
+ // Convert Map entries to array and sort
64
+ const entries = Array.from(this.map.entries()).sort((a, b) => {
65
+ const m = a[1].arrays.length;
66
+ const n = b[1].arrays.length;
67
+ // Sort by length descending (larger first)
68
+ const lengthDiff = n - m;
69
+ if (lengthDiff !== 0) return lengthDiff;
70
+ // If lengths equal, sort by key ascending
71
+ return a[0].localeCompare(b[0]);
72
+ });
73
+
74
+ sorter.keys = [];
39
75
 
40
- sorter.keys = Object.keys(this).sort((j, k) => {
41
- const m = this[j].length;
42
- const n = this[k].length;
43
- return m < n ? 1 : m > n ? -1 : j < k ? -1 : j > k ? 1 : 0;
44
- }).filter((key) => {
45
- if (this[key].processed < this[key].length) {
46
- const token = key.slice(1);
76
+ entries.forEach(([token, data]) => {
77
+ if (data.processed < data.arrays.length) {
47
78
  const chain = new TokenChain();
48
79
 
49
- this[key].forEach((tokens) => {
50
- let index;
51
- while ((index = tokens.indexOf(token)) !== -1) {
52
- tokens.splice(index, 1);
80
+ data.arrays.forEach((tokens) => {
81
+ // Build new array without the current token instead of splicing
82
+ const filtered = [];
83
+ for (let i = 0; i < tokens.length; i++) {
84
+ if (tokens[i] !== token) {
85
+ filtered.push(tokens[i]);
86
+ }
53
87
  }
54
- tokens.forEach((token) => {
55
- this['$' + token].processed++;
88
+
89
+ // Mark remaining tokens as processed
90
+ filtered.forEach((t) => {
91
+ const tData = this.map.get(t);
92
+ if (tData) {
93
+ tData.processed++;
94
+ }
56
95
  });
57
- chain.add(tokens.slice(0));
96
+
97
+ if (filtered.length > 0) {
98
+ chain.add(filtered);
99
+ }
58
100
  });
59
- sorter[key] = chain.createSorter();
60
- return true;
101
+
102
+ sorter.keys.push(token);
103
+ sorter.sorterMap.set(token, chain.createSorter());
61
104
  }
62
- return false;
63
105
  });
106
+
64
107
  return sorter;
65
108
  }
66
109
  }
67
110
 
68
- export default TokenChain;
111
+ export default TokenChain;