html-minifier-next 5.1.2 → 5.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/htmlparser.js CHANGED
@@ -5,6 +5,8 @@
5
5
  * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js
6
6
  */
7
7
 
8
+ import { isThenable } from './lib/utils.js';
9
+
8
10
  /*
9
11
  * Use like so:
10
12
  *
@@ -80,6 +82,23 @@ const preCompiledStackedTags = {
80
82
  // Cache for compiled attribute regexes per handler configuration
81
83
  const attrRegexCache = new WeakMap();
82
84
 
85
+ // O(n) helper: Strip all occurrences of `open…close` delimiters, keeping inner content
86
+ // Used instead of a regex replace to avoid O(n²) behavior on adversarial inputs
87
+ function stripDelimited(str, open, close) {
88
+ let result = '';
89
+ let i = 0;
90
+ while (i < str.length) {
91
+ const start = str.indexOf(open, i);
92
+ if (start === -1) { result += str.slice(i); break; }
93
+ result += str.slice(i, start);
94
+ const end = str.indexOf(close, start + open.length);
95
+ if (end === -1) { result += str.slice(start); break; }
96
+ result += str.slice(start + open.length, end);
97
+ i = end + close.length;
98
+ }
99
+ return result;
100
+ }
101
+
83
102
  function buildAttrRegex(handler) {
84
103
  let pattern = singleAttrIdentifier.source +
85
104
  '(?:\\s*(' + joinSingleAttrAssigns(handler) + ')' +
@@ -151,9 +170,10 @@ export class HTMLParser {
151
170
 
152
171
  // Sticky regex versions for position-based matching (avoids string slicing)
153
172
  const startTagOpenY = new RegExp(startTagOpen.source.slice(1), 'y');
173
+ // `\s*` with sticky flag is O(n) at worst—no retry from different positions possible
154
174
  const startTagCloseY = /\s*(\/?)>/y;
155
175
  const endTagY = new RegExp(endTag.source.slice(1), 'y');
156
- const doctypeY = /<!DOCTYPE\s?[^>]+>/iy;
176
+ const doctypeY = /<!DOCTYPE[^<>]+>/iy;
157
177
  const commentTestY = /<!--/y;
158
178
  const conditionalTestY = /<!\[/y;
159
179
 
@@ -228,7 +248,8 @@ export class HTMLParser {
228
248
 
229
249
  if (commentEnd >= 0) {
230
250
  if (handler.comment) {
231
- await handler.comment(fullHtml.substring(pos + 4, commentEnd));
251
+ const result = handler.comment(fullHtml.substring(pos + 4, commentEnd));
252
+ if (isThenable(result)) await result;
232
253
  }
233
254
  advance(commentEnd + 3 - pos);
234
255
  prevTag = '';
@@ -244,7 +265,8 @@ export class HTMLParser {
244
265
 
245
266
  if (conditionalEnd >= 0) {
246
267
  if (handler.comment) {
247
- await handler.comment(fullHtml.substring(pos + 2, conditionalEnd + 1), true /* Non-standard */);
268
+ const result = handler.comment(fullHtml.substring(pos + 2, conditionalEnd + 1), true /* Non-standard */);
269
+ if (isThenable(result)) await result;
248
270
  }
249
271
  advance(conditionalEnd + 2 - pos);
250
272
  prevTag = '';
@@ -324,7 +346,8 @@ export class HTMLParser {
324
346
  }
325
347
 
326
348
  if (handler.chars) {
327
- await handler.chars(text, prevTag, nextTag, prevAttrs, nextAttrs);
349
+ const result = handler.chars(text, prevTag, nextTag, prevAttrs, nextAttrs);
350
+ if (isThenable(result)) await result;
328
351
  }
329
352
  prevTag = '';
330
353
  prevAttrs = [];
@@ -338,12 +361,11 @@ export class HTMLParser {
338
361
  if (m && m.index === 0) {
339
362
  let text = m[1];
340
363
  if (stackedTag !== 'script' && stackedTag !== 'style' && stackedTag !== 'noscript') {
341
- text = text
342
- .replace(/<!--([\s\S]*?)-->/g, '$1')
343
- .replace(/<!\[CDATA\[([\s\S]*?)]]>/g, '$1');
364
+ text = stripDelimited(stripDelimited(text, '<!--', '-->'), '<![CDATA[', ']]>');
344
365
  }
345
366
  if (handler.chars) {
346
- await handler.chars(text);
367
+ const result = handler.chars(text);
368
+ if (isThenable(result)) await result;
347
369
  }
348
370
  // Advance HTML past the matched special tag content and its closing tag
349
371
  advance(m[0].length);
@@ -351,7 +373,8 @@ export class HTMLParser {
351
373
  } else {
352
374
  // No closing tag found; to avoid infinite loop, break similarly to previous behavior
353
375
  if (handler.continueOnParseError && handler.chars && pos < fullLength) {
354
- await handler.chars(fullHtml[pos], prevTag, '', prevAttrs, []);
376
+ const result = handler.chars(fullHtml[pos], prevTag, '', prevAttrs, []);
377
+ if (isThenable(result)) await result;
355
378
  advance(1);
356
379
  } else {
357
380
  break;
@@ -363,7 +386,8 @@ export class HTMLParser {
363
386
  if (handler.continueOnParseError) {
364
387
  // Skip the problematic character and continue
365
388
  if (handler.chars) {
366
- await handler.chars(fullHtml[pos], prevTag, '', prevAttrs, []);
389
+ const result = handler.chars(fullHtml[pos], prevTag, '', prevAttrs, []);
390
+ if (isThenable(result)) await result;
367
391
  }
368
392
  advance(1);
369
393
  prevTag = '';
@@ -20,6 +20,7 @@ import {
20
20
  } from './constants.js';
21
21
  import { trimWhitespace, collapseWhitespaceAll } from './whitespace.js';
22
22
  import { shouldMinifyInnerHTML } from './options.js';
23
+ import { isThenable } from './utils.js';
23
24
 
24
25
  // Lazy-load entities only when `decodeEntities` is enabled
25
26
 
@@ -298,7 +299,9 @@ function hasAttrName(name, attrs) {
298
299
 
299
300
  // Cleaners
300
301
 
301
- async function cleanAttributeValue(tag, attrName, attrValue, options, attrs, minifyHTMLSelf) {
302
+ // Returns the cleaned attribute value directly (sync) or as a Promise (async);
303
+ // callers must handle both cases—use `isThenable()` to distinguish
304
+ function cleanAttributeValue(tag, attrName, attrValue, options, attrs, minifyHTMLSelf) {
302
305
  // Apply early whitespace normalization if enabled
303
306
  // Preserves special spaces (no-break space, hair space, etc.) for consistency with `collapseWhitespace`
304
307
  if (options.collapseAttributeWhitespace) {
@@ -313,16 +316,18 @@ async function cleanAttributeValue(tag, attrName, attrValue, options, attrs, min
313
316
 
314
317
  if (isEventAttribute(attrName, options)) {
315
318
  attrValue = trimWhitespace(attrValue).replace(/^javascript:\s*/i, '');
316
- try {
317
- return await options.minifyJS(attrValue, true);
318
- } catch (err) {
319
- if (!options.continueOnMinifyError) {
320
- throw err;
321
- }
322
- options.log && options.log(err);
323
- return attrValue;
319
+ const result = options.minifyJS(attrValue, true);
320
+ if (isThenable(result)) {
321
+ return result.catch(err => {
322
+ if (!options.continueOnMinifyError) throw err;
323
+ options.log && options.log(err);
324
+ return attrValue;
325
+ });
324
326
  }
325
- } else if (attrName === 'class') {
327
+ return result;
328
+ }
329
+
330
+ if (attrName === 'class') {
326
331
  attrValue = trimWhitespace(attrValue);
327
332
  if (options.sortClassNames) {
328
333
  attrValue = options.sortClassNames(attrValue);
@@ -330,47 +335,63 @@ async function cleanAttributeValue(tag, attrName, attrValue, options, attrs, min
330
335
  attrValue = collapseWhitespaceAll(attrValue);
331
336
  }
332
337
  return attrValue;
333
- } else if (isUriTypeAttribute(attrName, tag)) {
338
+ }
339
+
340
+ if (isUriTypeAttribute(attrName, tag)) {
334
341
  attrValue = trimWhitespace(attrValue);
335
342
  if (isLinkType(tag, attrs, 'canonical')) {
336
343
  return attrValue;
337
344
  }
338
- try {
339
- const out = await options.minifyURLs(attrValue);
340
- return typeof out === 'string' ? out : attrValue;
341
- } catch (err) {
342
- if (!options.continueOnMinifyError) {
343
- throw err;
344
- }
345
- options.log && options.log(err);
346
- return attrValue;
345
+ const result = options.minifyURLs(attrValue);
346
+ if (isThenable(result)) {
347
+ return result
348
+ .then(out => typeof out === 'string' ? out : attrValue)
349
+ .catch(err => {
350
+ if (!options.continueOnMinifyError) throw err;
351
+ options.log && options.log(err);
352
+ return attrValue;
353
+ });
347
354
  }
348
- } else if (isNumberTypeAttribute(attrName, tag)) {
355
+ return typeof result === 'string' ? result : attrValue;
356
+ }
357
+
358
+ if (isNumberTypeAttribute(attrName, tag)) {
349
359
  return trimWhitespace(attrValue);
350
- } else if (attrName === 'style') {
360
+ }
361
+
362
+ if (attrName === 'style') {
351
363
  attrValue = trimWhitespace(attrValue);
352
364
  if (attrValue) {
353
365
  if (attrValue.endsWith(';') && !/&#?[0-9a-zA-Z]+;$/.test(attrValue)) {
354
366
  attrValue = attrValue.replace(/\s*;$/, ';');
355
367
  }
356
- try {
357
- attrValue = await options.minifyCSS(attrValue, 'inline');
358
- // After minification, check if CSS consists entirely of invalid properties (no values)
359
- // I.e., `color:` or `margin:;padding:` should be treated as empty
360
- if (attrValue && /^(?:[a-z-]+:[;\s]*)+$/i.test(attrValue)) {
361
- attrValue = '';
362
- }
363
- } catch (err) {
364
- if (!options.continueOnMinifyError) {
365
- throw err;
366
- }
367
- options.log && options.log(err);
368
+ const originalAttrValue = attrValue;
369
+ const cssResult = options.minifyCSS(attrValue, 'inline');
370
+ if (isThenable(cssResult)) {
371
+ return cssResult
372
+ .then(minified => {
373
+ // After minification, check if CSS consists entirely of invalid properties (no values)
374
+ // I.e., `color:` or `margin:;padding:` should be treated as empty
375
+ if (minified && /^(?:[a-z-]+:[;\s]*)+$/i.test(minified)) return '';
376
+ return minified;
377
+ })
378
+ .catch(err => {
379
+ if (!options.continueOnMinifyError) throw err;
380
+ options.log && options.log(err);
381
+ return originalAttrValue;
382
+ });
368
383
  }
384
+ // Sync path (`minifyCSS` disabled—identity function)
385
+ if (cssResult && /^(?:[a-z-]+:[;\s]*)+$/i.test(cssResult)) return '';
386
+ return cssResult != null ? cssResult : attrValue;
369
387
  }
370
388
  return attrValue;
371
- } else if (isSrcset(attrName, tag)) {
389
+ }
390
+
391
+ if (isSrcset(attrName, tag)) {
372
392
  // https://html.spec.whatwg.org/multipage/embedded-content.html#attr-img-srcset
373
- attrValue = (await Promise.all(trimWhitespace(attrValue).split(/\s*,\s*/).map(async function (candidate) {
393
+ const candidates = trimWhitespace(attrValue).split(/\s*,\s*/);
394
+ const processed = candidates.map(candidate => {
374
395
  let url = candidate;
375
396
  let descriptor = '';
376
397
  const match = candidate.match(/\s+([1-9][0-9]*w|[0-9]+(?:\.[0-9]+)?x)$/);
@@ -382,47 +403,65 @@ async function cleanAttributeValue(tag, attrName, attrValue, options, attrs, min
382
403
  descriptor = ' ' + num + suffix;
383
404
  }
384
405
  }
385
- try {
386
- const out = await options.minifyURLs(url);
387
- return (typeof out === 'string' ? out : url) + descriptor;
388
- } catch (err) {
389
- if (!options.continueOnMinifyError) {
390
- throw err;
391
- }
392
- options.log && options.log(err);
393
- return url + descriptor;
406
+ const out = options.minifyURLs(url);
407
+ if (isThenable(out)) {
408
+ return out
409
+ .then(result => (typeof result === 'string' ? result : url) + descriptor)
410
+ .catch(err => {
411
+ if (!options.continueOnMinifyError) throw err;
412
+ options.log && options.log(err);
413
+ return url + descriptor;
414
+ });
394
415
  }
395
- }))).join(', ');
396
- } else if (isMetaViewport(tag, attrs) && attrName === 'content') {
397
- attrValue = attrValue.replace(/\s+/g, '').replace(/[0-9]+\.[0-9]+/g, function (numString) {
416
+ return (typeof out === 'string' ? out : url) + descriptor;
417
+ });
418
+ if (processed.some(isThenable)) {
419
+ return Promise.all(processed).then(results => results.join(', '));
420
+ }
421
+ return processed.join(', ');
422
+ }
423
+
424
+ if (isMetaViewport(tag, attrs) && attrName === 'content') {
425
+ return attrValue.replace(/\s+/g, '').replace(/[0-9]+\.[0-9]+/g, function (numString) {
398
426
  // 0.90000 → 0.9
399
427
  // 1.0 → 1
400
428
  // 1.0001 → 1.0001 (unchanged)
401
429
  return (+numString).toString();
402
430
  });
403
- } else if (isContentSecurityPolicy(tag, attrs) && attrName.toLowerCase() === 'content') {
431
+ }
432
+
433
+ if (isContentSecurityPolicy(tag, attrs) && attrName.toLowerCase() === 'content') {
404
434
  return collapseWhitespaceAll(attrValue);
405
- } else if (options.customAttrCollapse && options.customAttrCollapse.test(attrName)) {
406
- attrValue = trimWhitespace(attrValue.replace(/ ?[\n\r]+ ?/g, '').replace(/\s{2,}/g, options.conservativeCollapse ? ' ' : ''));
407
- } else if (tag === 'script' && attrName === 'type') {
408
- attrValue = trimWhitespace(attrValue.replace(/\s*;\s*/g, ';'));
409
- } else if (isMediaQuery(tag, attrs, attrName)) {
435
+ }
436
+
437
+ if (options.customAttrCollapse && options.customAttrCollapse.test(attrName)) {
438
+ return trimWhitespace(attrValue.replace(/ ?[\n\r]+ ?/g, '').replace(/\s{2,}/g, options.conservativeCollapse ? ' ' : ''));
439
+ }
440
+
441
+ if (tag === 'script' && attrName === 'type') {
442
+ return trimWhitespace(attrValue.replace(/\s*;\s*/g, ';'));
443
+ }
444
+
445
+ if (isMediaQuery(tag, attrs, attrName)) {
410
446
  attrValue = trimWhitespace(attrValue);
411
447
  // Only minify actual media queries (those with features in parentheses)
412
448
  // Skip simple media types like `all`, `screen`, `print` which are already minimal
413
449
  if (!/[()]/.test(attrValue)) {
414
450
  return attrValue;
415
451
  }
416
- try {
417
- return await options.minifyCSS(attrValue, 'media');
418
- } catch (err) {
419
- if (!options.continueOnMinifyError) {
420
- throw err;
421
- }
422
- options.log && options.log(err);
423
- return attrValue;
452
+ const originalAttrValue = attrValue;
453
+ const cssResult = options.minifyCSS(attrValue, 'media');
454
+ if (isThenable(cssResult)) {
455
+ return cssResult.catch(err => {
456
+ if (!options.continueOnMinifyError) throw err;
457
+ options.log && options.log(err);
458
+ return originalAttrValue;
459
+ });
424
460
  }
425
- } else if (tag === 'iframe' && attrName === 'srcdoc') {
461
+ return cssResult != null ? cssResult : attrValue;
462
+ }
463
+
464
+ if (tag === 'iframe' && attrName === 'srcdoc') {
426
465
  // Recursively minify HTML content within `srcdoc` attribute
427
466
  // Fast-path: Skip if nothing would change
428
467
  if (!shouldMinifyInnerHTML(options)) {
@@ -430,6 +469,7 @@ async function cleanAttributeValue(tag, attrName, attrValue, options, attrs, min
430
469
  }
431
470
  return minifyHTMLSelf(attrValue, options, true);
432
471
  }
472
+
433
473
  return attrValue;
434
474
  }
435
475
 
@@ -453,17 +493,24 @@ function chooseAttributeQuote(attrValue, options) {
453
493
  return apos < quot ? '\'' : '"';
454
494
  }
455
495
 
456
- async function normalizeAttr(attr, attrs, tag, options, minifyHTML) {
496
+ // Returns the normalized attribute object directly (sync) or as a Promise (async);
497
+ // callers must handle both cases—use `isThenable()` to distinguish
498
+ function normalizeAttr(attr, attrs, tag, options, minifyHTML) {
457
499
  const attrName = options.name(attr.name);
458
500
  let attrValue = attr.value;
459
501
 
460
- if (options.decodeEntities && attrValue) {
461
- // Fast path: Only decode when entities are present
462
- if (attrValue.indexOf('&') !== -1) {
463
- attrValue = (await getDecodeHTMLStrict())(attrValue);
464
- }
502
+ // Entity decoding requires a lazy import—async only when `&` is present
503
+ if (options.decodeEntities && attrValue && attrValue.indexOf('&') !== -1) {
504
+ return getDecodeHTMLStrict().then(decode => {
505
+ return normalizeAttrContinue(attrName, decode(attrValue), attr, attrs, tag, options, minifyHTML);
506
+ });
465
507
  }
466
508
 
509
+ return normalizeAttrContinue(attrName, attrValue, attr, attrs, tag, options, minifyHTML);
510
+ }
511
+
512
+ // Internal: Handles attribute normalization after entity decoding (if any)
513
+ function normalizeAttrContinue(attrName, attrValue, attr, attrs, tag, options, minifyHTML) {
467
514
  if ((options.removeRedundantAttributes &&
468
515
  isAttributeRedundant(tag, attrName, attrValue, attrs)) ||
469
516
  (options.removeScriptTypeAttributes && tag === 'script' &&
@@ -474,9 +521,18 @@ async function normalizeAttr(attr, attrs, tag, options, minifyHTML) {
474
521
  }
475
522
 
476
523
  if (attrValue) {
477
- attrValue = await cleanAttributeValue(tag, attrName, attrValue, options, attrs, minifyHTML);
524
+ const cleaned = cleanAttributeValue(tag, attrName, attrValue, options, attrs, minifyHTML);
525
+ if (isThenable(cleaned)) {
526
+ return cleaned.then(v => normalizeAttrFinish(attrName, v, attr, tag, options));
527
+ }
528
+ return normalizeAttrFinish(attrName, cleaned, attr, tag, options);
478
529
  }
479
530
 
531
+ return normalizeAttrFinish(attrName, attrValue, attr, tag, options);
532
+ }
533
+
534
+ // Internal: Final checks and result assembly after value cleaning
535
+ function normalizeAttrFinish(attrName, attrValue, attr, tag, options) {
480
536
  if (options.removeEmptyAttributes &&
481
537
  canDeleteEmptyAttribute(tag, attrName, attrValue, options)) {
482
538
  return;
@@ -1,7 +1,7 @@
1
1
  // Imports
2
2
 
3
3
  import { createUrlMinifier } from './urls.js';
4
- import { LRU, stableStringify, identity, lowercase, identityAsync, replaceAsync, parseRegExp } from './utils.js';
4
+ import { LRU, stableStringify, identity, lowercase, replaceAsync, parseRegExp } from './utils.js';
5
5
  import { RE_TRAILING_SEMICOLON } from './constants.js';
6
6
  import { canCollapseWhitespace, canTrimWhitespace } from './whitespace.js';
7
7
  import { wrapCSS, unwrapCSS } from './content.js';
@@ -16,7 +16,7 @@ function shouldMinifyInnerHTML(options) {
16
16
  options.removeComments ||
17
17
  options.removeOptionalTags ||
18
18
  options.minifyJS !== identity ||
19
- options.minifyCSS !== identityAsync ||
19
+ options.minifyCSS !== identity ||
20
20
  options.minifyURLs !== identity ||
21
21
  options.minifySVG
22
22
  );
@@ -43,7 +43,7 @@ const processOptions = (inputOptions, { getLightningCSS, getTerser, getSwc, getS
43
43
  canTrimWhitespace,
44
44
  ...optionDefaults,
45
45
  log: identity,
46
- minifyCSS: identityAsync,
46
+ minifyCSS: identity,
47
47
  minifyJS: identity,
48
48
  minifyURLs: identity,
49
49
  minifySVG: null
package/src/lib/utils.js CHANGED
@@ -44,7 +44,7 @@ class LRU {
44
44
  function uniqueId(value) {
45
45
  let id;
46
46
  do {
47
- id = Math.random().toString(36).replace(/^0\.[0-9]*/, '');
47
+ id = 'u' + crypto.randomUUID().replace(/-/g, '');
48
48
  } while (~value.indexOf(id));
49
49
  return id;
50
50
  }
@@ -55,8 +55,8 @@ function identity(value) {
55
55
  return value;
56
56
  }
57
57
 
58
- function identityAsync(value) {
59
- return Promise.resolve(value);
58
+ function isThenable(value) {
59
+ return value != null && typeof value === 'object' && typeof value.then === 'function';
60
60
  }
61
61
 
62
62
  function lowercase(value) {
@@ -104,7 +104,7 @@ export { stableStringify };
104
104
  export { LRU };
105
105
  export { uniqueId };
106
106
  export { identity };
107
- export { identityAsync };
107
+ export { isThenable };
108
108
  export { lowercase };
109
109
  export { replaceAsync };
110
110
  export { parseRegExp };