html-minifier-next 4.9.1 → 4.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/htmlparser.js CHANGED
@@ -15,8 +15,6 @@
15
15
  * });
16
16
  */
17
17
 
18
- import { replaceAsync } from './utils.js';
19
-
20
18
  class CaseInsensitiveSet extends Set {
21
19
  has(str) {
22
20
  return super.has(str.toLowerCase());
@@ -84,6 +82,9 @@ const preCompiledStackedTags = {
84
82
  'noscript': /([\s\S]*?)<\/noscript[^>]*>/i
85
83
  };
86
84
 
85
+ // Cache for compiled attribute regexes per handler configuration
86
+ const attrRegexCache = new WeakMap();
87
+
87
88
  function attrForHandler(handler) {
88
89
  let pattern = singleAttrIdentifier.source +
89
90
  '(?:\\s*(' + joinSingleAttrAssigns(handler) + ')' +
@@ -121,22 +122,47 @@ export class HTMLParser {
121
122
  }
122
123
 
123
124
  async parse() {
124
- let html = this.html;
125
125
  const handler = this.handler;
126
+ const fullHtml = this.html;
127
+ const fullLength = fullHtml.length;
126
128
 
127
129
  const stack = []; let lastTag;
128
- const attribute = attrForHandler(handler);
129
- let last, prevTag = undefined, nextTag = undefined;
130
-
131
- // Track position for better error messages
132
- let position = 0;
133
- const getLineColumn = (pos) => {
134
- const lines = this.html.slice(0, pos).split('\n');
135
- return { line: lines.length, column: lines[lines.length - 1].length + 1 };
130
+ // Use cached attribute regex if available
131
+ let attribute = attrRegexCache.get(handler);
132
+ if (!attribute) {
133
+ attribute = attrForHandler(handler);
134
+ attrRegexCache.set(handler, attribute);
135
+ }
136
+ let prevTag = undefined, nextTag = undefined;
137
+
138
+ // Index-based parsing
139
+ let pos = 0;
140
+ let lastPos;
141
+
142
+ // Helper to get remaining HTML from current position
143
+ const remaining = () => fullHtml.slice(pos);
144
+
145
+ // Helper to advance position
146
+ const advance = (n) => { pos += n; };
147
+
148
+ // Lazy line/column calculation—only compute on actual errors
149
+ const getLineColumn = (position) => {
150
+ let line = 1;
151
+ let column = 1;
152
+ for (let i = 0; i < position; i++) {
153
+ if (fullHtml[i] === '\n') {
154
+ line++;
155
+ column = 1;
156
+ } else {
157
+ column++;
158
+ }
159
+ }
160
+ return { line, column };
136
161
  };
137
162
 
138
- while (html) {
139
- last = html;
163
+ while (pos < fullLength) {
164
+ lastPos = pos;
165
+ const html = remaining();
140
166
  // Make sure we’re not in a `script` or `style` element
141
167
  if (!lastTag || !special.has(lastTag)) {
142
168
  let textEnd = html.indexOf('<');
@@ -149,7 +175,7 @@ export class HTMLParser {
149
175
  if (handler.comment) {
150
176
  await handler.comment(html.substring(4, commentEnd));
151
177
  }
152
- html = html.substring(commentEnd + 3);
178
+ advance(commentEnd + 3);
153
179
  prevTag = '';
154
180
  continue;
155
181
  }
@@ -163,7 +189,7 @@ export class HTMLParser {
163
189
  if (handler.comment) {
164
190
  await handler.comment(html.substring(2, conditionalEnd + 1), true /* non-standard */);
165
191
  }
166
- html = html.substring(conditionalEnd + 2);
192
+ advance(conditionalEnd + 2);
167
193
  prevTag = '';
168
194
  continue;
169
195
  }
@@ -175,7 +201,7 @@ export class HTMLParser {
175
201
  if (handler.doctype) {
176
202
  handler.doctype(doctypeMatch[0]);
177
203
  }
178
- html = html.substring(doctypeMatch[0].length);
204
+ advance(doctypeMatch[0].length);
179
205
  prevTag = '';
180
206
  continue;
181
207
  }
@@ -183,8 +209,8 @@ export class HTMLParser {
183
209
  // End tag
184
210
  const endTagMatch = html.match(endTag);
185
211
  if (endTagMatch) {
186
- html = html.substring(endTagMatch[0].length);
187
- await replaceAsync(endTagMatch[0], endTag, parseEndTag);
212
+ advance(endTagMatch[0].length);
213
+ await parseEndTag(endTagMatch[0], endTagMatch[1]);
188
214
  prevTag = '/' + endTagMatch[1].toLowerCase();
189
215
  continue;
190
216
  }
@@ -192,7 +218,7 @@ export class HTMLParser {
192
218
  // Start tag
193
219
  const startTagMatch = parseStartTag(html);
194
220
  if (startTagMatch) {
195
- html = startTagMatch.rest;
221
+ advance(startTagMatch.advance);
196
222
  await handleStartTag(startTagMatch);
197
223
  prevTag = startTagMatch.tagName.toLowerCase();
198
224
  continue;
@@ -207,18 +233,19 @@ export class HTMLParser {
207
233
  let text;
208
234
  if (textEnd >= 0) {
209
235
  text = html.substring(0, textEnd);
210
- html = html.substring(textEnd);
236
+ advance(textEnd);
211
237
  } else {
212
238
  text = html;
213
- html = '';
239
+ advance(html.length);
214
240
  }
215
241
 
216
242
  // Next tag
217
- let nextTagMatch = parseStartTag(html);
243
+ const nextHtml = remaining();
244
+ let nextTagMatch = parseStartTag(nextHtml);
218
245
  if (nextTagMatch) {
219
246
  nextTag = nextTagMatch.tagName;
220
247
  } else {
221
- nextTagMatch = html.match(endTag);
248
+ nextTagMatch = nextHtml.match(endTag);
222
249
  if (nextTagMatch) {
223
250
  nextTag = '/' + nextTagMatch[1];
224
251
  } else {
@@ -235,45 +262,50 @@ export class HTMLParser {
235
262
  // Use pre-compiled regex for common tags (`script`, `style`, `noscript`) to avoid regex creation overhead
236
263
  const reStackedTag = preCompiledStackedTags[stackedTag] || reCache[stackedTag] || (reCache[stackedTag] = new RegExp('([\\s\\S]*?)</' + stackedTag + '[^>]*>', 'i'));
237
264
 
238
- html = await replaceAsync(html, reStackedTag, async (_, text) => {
265
+ const m = reStackedTag.exec(html);
266
+ if (m) {
267
+ let text = m[1];
239
268
  if (stackedTag !== 'script' && stackedTag !== 'style' && stackedTag !== 'noscript') {
240
269
  text = text
241
270
  .replace(/<!--([\s\S]*?)-->/g, '$1')
242
271
  .replace(/<!\[CDATA\[([\s\S]*?)]]>/g, '$1');
243
272
  }
244
-
245
273
  if (handler.chars) {
246
274
  await handler.chars(text);
247
275
  }
248
-
249
- return '';
250
- });
251
-
252
- await parseEndTag('</' + stackedTag + '>', stackedTag);
276
+ // Advance HTML past the matched special tag content and its closing tag
277
+ advance(m.index + m[0].length);
278
+ await parseEndTag('</' + stackedTag + '>', stackedTag);
279
+ } else {
280
+ // No closing tag found; to avoid infinite loop, break similarly to previous behavior
281
+ if (handler.continueOnParseError && handler.chars && html) {
282
+ await handler.chars(html[0], prevTag, '');
283
+ advance(1);
284
+ } else {
285
+ break;
286
+ }
287
+ }
253
288
  }
254
289
 
255
- if (html === last) {
290
+ if (pos === lastPos) {
256
291
  if (handler.continueOnParseError) {
257
292
  // Skip the problematic character and continue
258
293
  if (handler.chars) {
259
- await handler.chars(html[0], prevTag, '');
294
+ await handler.chars(fullHtml[pos], prevTag, '');
260
295
  }
261
- html = html.substring(1);
262
- position++;
296
+ advance(1);
263
297
  prevTag = '';
264
298
  continue;
265
299
  }
266
- const loc = getLineColumn(position);
267
- // Include some context before the error position so the snippet contains
268
- // the offending markup plus preceding characters (e.g. "invalid<tag").
300
+ const loc = getLineColumn(pos);
301
+ // Include some context before the error position so the snippet contains the offending markup plus preceding characters (e.g., “invalid<tag”)
269
302
  const CONTEXT_BEFORE = 50;
270
- const startPos = Math.max(0, position - CONTEXT_BEFORE);
271
- const snippet = this.html.slice(startPos, startPos + 200).replace(/\n/g, ' ');
303
+ const startPos = Math.max(0, pos - CONTEXT_BEFORE);
304
+ const snippet = fullHtml.slice(startPos, startPos + 200).replace(/\n/g, ' ');
272
305
  throw new Error(
273
- `Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${this.html.length > startPos + 200 ? '…' : ''}`
306
+ `Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${fullHtml.length > startPos + 200 ? '…' : ''}`
274
307
  );
275
308
  }
276
- position = this.html.length - html.length;
277
309
  }
278
310
 
279
311
  if (!handler.partialMarkup) {
@@ -286,9 +318,11 @@ export class HTMLParser {
286
318
  if (start) {
287
319
  const match = {
288
320
  tagName: start[1],
289
- attrs: []
321
+ attrs: [],
322
+ advance: 0
290
323
  };
291
- input = input.slice(start[0].length);
324
+ let consumed = start[0].length;
325
+ input = input.slice(consumed);
292
326
  let end, attr;
293
327
 
294
328
  // Safety limit: max length of input to check for attributes
@@ -338,7 +372,9 @@ export class HTMLParser {
338
372
  } else {
339
373
  attr[baseIndex + 3] = value; // Single-quoted value
340
374
  }
341
- input = input.slice(fullAttr.length);
375
+ const attrLen = fullAttr.length;
376
+ input = input.slice(attrLen);
377
+ consumed += attrLen;
342
378
  match.attrs.push(attr);
343
379
  continue;
344
380
  }
@@ -355,7 +391,9 @@ export class HTMLParser {
355
391
  break;
356
392
  }
357
393
 
358
- input = input.slice(attr[0].length);
394
+ const attrLen = attr[0].length;
395
+ input = input.slice(attrLen);
396
+ consumed += attrLen;
359
397
  match.attrs.push(attr);
360
398
  }
361
399
 
@@ -363,7 +401,8 @@ export class HTMLParser {
363
401
  end = input.match(startTagClose);
364
402
  if (end) {
365
403
  match.unarySlash = end[1];
366
- match.rest = input.slice(end[0].length);
404
+ consumed += end[0].length;
405
+ match.advance = consumed;
367
406
  return match;
368
407
  }
369
408
  }
@@ -373,7 +412,7 @@ export class HTMLParser {
373
412
  let pos;
374
413
  const needle = tagName.toLowerCase();
375
414
  for (pos = stack.length - 1; pos >= 0; pos--) {
376
- const currentTag = stack[pos].tag.toLowerCase();
415
+ const currentTag = stack[pos].lowerTag;
377
416
  if (currentTag === needle) {
378
417
  return pos;
379
418
  }
@@ -427,7 +466,7 @@ export class HTMLParser {
427
466
  }
428
467
  if (tagName === 'col' && findTag('colgroup') < 0) {
429
468
  lastTag = 'colgroup';
430
- stack.push({ tag: lastTag, attrs: [] });
469
+ stack.push({ tag: lastTag, lowerTag: 'colgroup', attrs: [] });
431
470
  if (handler.start) {
432
471
  await handler.start(lastTag, [], false, '');
433
472
  }
@@ -506,7 +545,7 @@ export class HTMLParser {
506
545
  });
507
546
 
508
547
  if (!unary) {
509
- stack.push({ tag: tagName, attrs });
548
+ stack.push({ tag: tagName, lowerTag: tagName.toLowerCase(), attrs });
510
549
  lastTag = tagName;
511
550
  unarySlash = '';
512
551
  }
@@ -520,7 +559,7 @@ export class HTMLParser {
520
559
  let pos;
521
560
  const needle = tagName.toLowerCase();
522
561
  for (pos = stack.length - 1; pos >= 0; pos--) {
523
- if (stack[pos].tag.toLowerCase() === needle) {
562
+ if (stack[pos].lowerTag === needle) {
524
563
  break;
525
564
  }
526
565
  }
package/src/tokenchain.js CHANGED
@@ -1,21 +1,40 @@
1
1
  class Sorter {
2
2
  sort(tokens, fromIndex = 0) {
3
3
  for (let i = 0, len = this.keys.length; i < len; i++) {
4
- const key = this.keys[i];
5
- const token = key.slice(1);
4
+ const token = this.keys[i];
6
5
 
7
- let index = tokens.indexOf(token, fromIndex);
6
+ // Build position map for this token to avoid repeated `indexOf`
7
+ const positions = [];
8
+ for (let j = fromIndex; j < tokens.length; j++) {
9
+ if (tokens[j] === token) {
10
+ positions.push(j);
11
+ }
12
+ }
13
+
14
+ if (positions.length > 0) {
15
+ // Build new array with tokens in sorted order instead of splicing
16
+ const result = [];
8
17
 
9
- if (index !== -1) {
10
- do {
11
- if (index !== fromIndex) {
12
- tokens.splice(index, 1);
13
- tokens.splice(fromIndex, 0, token);
18
+ // Add all instances of the current token first
19
+ for (let j = 0; j < positions.length; j++) {
20
+ result.push(token);
21
+ }
22
+
23
+ // Add other tokens, skipping positions where current token was
24
+ const posSet = new Set(positions);
25
+ for (let j = fromIndex; j < tokens.length; j++) {
26
+ if (!posSet.has(j)) {
27
+ result.push(tokens[j]);
14
28
  }
15
- fromIndex++;
16
- } while ((index = tokens.indexOf(token, fromIndex)) !== -1);
29
+ }
30
+
31
+ // Copy sorted portion back to tokens array
32
+ for (let j = 0; j < result.length; j++) {
33
+ tokens[fromIndex + j] = result[j];
34
+ }
17
35
 
18
- return this[key].sort(tokens, fromIndex);
36
+ const newFromIndex = fromIndex + positions.length;
37
+ return this.sorterMap.get(token).sort(tokens, newFromIndex);
19
38
  }
20
39
  }
21
40
  return tokens;
@@ -23,46 +42,70 @@ class Sorter {
23
42
  }
24
43
 
25
44
  class TokenChain {
45
+ constructor() {
46
+ // Use Map instead of object properties for better performance
47
+ this.map = new Map();
48
+ }
49
+
26
50
  add(tokens) {
27
51
  tokens.forEach((token) => {
28
- const key = '$' + token;
29
- if (!this[key]) {
30
- this[key] = [];
31
- this[key].processed = 0;
52
+ if (!this.map.has(token)) {
53
+ this.map.set(token, { arrays: [], processed: 0 });
32
54
  }
33
- this[key].push(tokens);
55
+ this.map.get(token).arrays.push(tokens);
34
56
  });
35
57
  }
36
58
 
37
59
  createSorter() {
38
60
  const sorter = new Sorter();
61
+ sorter.sorterMap = new Map();
62
+
63
+ // Convert Map entries to array and sort
64
+ const entries = Array.from(this.map.entries()).sort((a, b) => {
65
+ const m = a[1].arrays.length;
66
+ const n = b[1].arrays.length;
67
+ // Sort by length descending (larger first)
68
+ const lengthDiff = n - m;
69
+ if (lengthDiff !== 0) return lengthDiff;
70
+ // If lengths equal, sort by key ascending
71
+ return a[0].localeCompare(b[0]);
72
+ });
73
+
74
+ sorter.keys = [];
39
75
 
40
- sorter.keys = Object.keys(this).sort((j, k) => {
41
- const m = this[j].length;
42
- const n = this[k].length;
43
- return m < n ? 1 : m > n ? -1 : j < k ? -1 : j > k ? 1 : 0;
44
- }).filter((key) => {
45
- if (this[key].processed < this[key].length) {
46
- const token = key.slice(1);
76
+ entries.forEach(([token, data]) => {
77
+ if (data.processed < data.arrays.length) {
47
78
  const chain = new TokenChain();
48
79
 
49
- this[key].forEach((tokens) => {
50
- let index;
51
- while ((index = tokens.indexOf(token)) !== -1) {
52
- tokens.splice(index, 1);
80
+ data.arrays.forEach((tokens) => {
81
+ // Build new array without the current token instead of splicing
82
+ const filtered = [];
83
+ for (let i = 0; i < tokens.length; i++) {
84
+ if (tokens[i] !== token) {
85
+ filtered.push(tokens[i]);
86
+ }
53
87
  }
54
- tokens.forEach((token) => {
55
- this['$' + token].processed++;
88
+
89
+ // Mark remaining tokens as processed
90
+ filtered.forEach((t) => {
91
+ const tData = this.map.get(t);
92
+ if (tData) {
93
+ tData.processed++;
94
+ }
56
95
  });
57
- chain.add(tokens.slice(0));
96
+
97
+ if (filtered.length > 0) {
98
+ chain.add(filtered);
99
+ }
58
100
  });
59
- sorter[key] = chain.createSorter();
60
- return true;
101
+
102
+ sorter.keys.push(token);
103
+ sorter.sorterMap.set(token, chain.createSorter());
61
104
  }
62
- return false;
63
105
  });
106
+
64
107
  return sorter;
65
108
  }
66
109
  }
67
110
 
68
- export default TokenChain;
111
+ export default TokenChain;