html-minifier-next 4.11.0 → 4.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -463,7 +463,7 @@ const cssMinifyCache = new LRU(200);
463
463
 
464
464
  const trimWhitespace = str => {
465
465
  if (!str) return str;
466
- // Fast path: if no whitespace at start or end, return early
466
+ // Fast path: If no whitespace at start or end, return early
467
467
  if (!/^[ \n\r\t\f]/.test(str) && !/[ \n\r\t\f]$/.test(str)) {
468
468
  return str;
469
469
  }
@@ -472,7 +472,7 @@ const trimWhitespace = str => {
472
472
 
473
473
  function collapseWhitespaceAll(str) {
474
474
  if (!str) return str;
475
- // Fast path: if there are no common whitespace characters, return early
475
+ // Fast path: If there are no common whitespace characters, return early
476
476
  if (!/[ \n\r\t\f\xA0]/.test(str)) {
477
477
  return str;
478
478
  }
@@ -840,7 +840,7 @@ async function cleanAttributeValue(tag, attrName, attrValue, options, attrs, min
840
840
  return options.minifyCSS(attrValue, 'media');
841
841
  } else if (tag === 'iframe' && attrName === 'srcdoc') {
842
842
  // Recursively minify HTML content within srcdoc attribute
843
- // Fast-path: skip if nothing would change
843
+ // Fast-path: Skip if nothing would change
844
844
  if (!shouldMinifyInnerHTML(options)) {
845
845
  return attrValue;
846
846
  }
@@ -1227,7 +1227,7 @@ async function normalizeAttr(attr, attrs, tag, options) {
1227
1227
  let attrValue = attr.value;
1228
1228
 
1229
1229
  if (options.decodeEntities && attrValue) {
1230
- // Fast path: only decode when entities are present
1230
+ // Fast path: Only decode when entities are present
1231
1231
  if (attrValue.indexOf('&') !== -1) {
1232
1232
  attrValue = decodeHTMLStrict(attrValue);
1233
1233
  }
@@ -1272,7 +1272,9 @@ function buildAttr(normalized, hasUnarySlash, options, isLast, uidAttr) {
1272
1272
 
1273
1273
  if (typeof attrValue !== 'undefined' && (!options.removeAttributeQuotes ||
1274
1274
  ~attrValue.indexOf(uidAttr) || !canRemoveAttributeQuotes(attrValue))) {
1275
+ // Determine the appropriate quote character
1275
1276
  if (!options.preventAttributesEscaping) {
1277
+ // Normal mode: choose quotes and escape
1276
1278
  if (typeof options.quoteCharacter === 'undefined') {
1277
1279
  // Count quotes in a single pass instead of two regex operations
1278
1280
  let apos = 0, quot = 0;
@@ -1289,6 +1291,50 @@ function buildAttr(normalized, hasUnarySlash, options, isLast, uidAttr) {
1289
1291
  } else {
1290
1292
  attrValue = attrValue.replace(/'/g, ''');
1291
1293
  }
1294
+ } else {
1295
+ // `preventAttributesEscaping` mode: choose safe quotes but don’t escape
1296
+ // EXCEPT when both quote types are present—then escape to prevent invalid HTML
1297
+ const hasDoubleQuote = attrValue.indexOf('"') !== -1;
1298
+ const hasSingleQuote = attrValue.indexOf("'") !== -1;
1299
+
1300
+ if (hasDoubleQuote && hasSingleQuote) {
1301
+ // Both quote types present: `preventAttributesEscaping` is ignored to ensure valid HTML
1302
+ // Choose the quote type with fewer occurrences and escape the other
1303
+ if (typeof options.quoteCharacter === 'undefined') {
1304
+ let apos = 0, quot = 0;
1305
+ for (let i = 0; i < attrValue.length; i++) {
1306
+ if (attrValue[i] === "'") apos++;
1307
+ else if (attrValue[i] === '"') quot++;
1308
+ }
1309
+ attrQuote = apos < quot ? '\'' : '"';
1310
+ } else {
1311
+ attrQuote = options.quoteCharacter === '\'' ? '\'' : '"';
1312
+ }
1313
+ if (attrQuote === '"') {
1314
+ attrValue = attrValue.replace(/"/g, '&#34;');
1315
+ } else {
1316
+ attrValue = attrValue.replace(/'/g, '&#39;');
1317
+ }
1318
+ } else if (typeof options.quoteCharacter === 'undefined') {
1319
+ // Single or no quote type: Choose safe quote delimiter
1320
+ if (attrQuote === '"' && hasDoubleQuote && !hasSingleQuote) {
1321
+ attrQuote = "'";
1322
+ } else if (attrQuote === "'" && hasSingleQuote && !hasDoubleQuote) {
1323
+ attrQuote = '"';
1324
+ } else if (attrQuote !== '"' && attrQuote !== "'" && attrQuote !== '') {
1325
+ // `attrQuote` is invalid/undefined (not `"`, `'`, or empty string)
1326
+ // Set a safe default based on the value’s content
1327
+ if (hasSingleQuote && !hasDoubleQuote) {
1328
+ attrQuote = '"'; // Value has single quotes, use double quotes as delimiter
1329
+ } else if (hasDoubleQuote && !hasSingleQuote) {
1330
+ attrQuote = "'"; // Value has double quotes, use single quotes as delimiter
1331
+ } else {
1332
+ attrQuote = '"'; // No quotes in value, default to double quotes
1333
+ }
1334
+ }
1335
+ } else {
1336
+ attrQuote = options.quoteCharacter === '\'' ? '\'' : '"';
1337
+ }
1292
1338
  }
1293
1339
  emittedAttrValue = attrQuote + attrValue + attrQuote;
1294
1340
  if (!isLast && !options.removeTagWhitespace) {
@@ -1381,7 +1427,7 @@ const processOptions = (inputOptions) => {
1381
1427
  const lightningCssOptions = typeof option === 'object' ? option : {};
1382
1428
 
1383
1429
  options.minifyCSS = async function (text, type) {
1384
- // Fast path: nothing to minify
1430
+ // Fast path: Nothing to minify
1385
1431
  if (!text || !text.trim()) {
1386
1432
  return text;
1387
1433
  }
@@ -1473,7 +1519,7 @@ const processOptions = (inputOptions) => {
1473
1519
 
1474
1520
  let jsKey;
1475
1521
  try {
1476
- // Fast path: avoid invoking Terser for empty/whitespace-only content
1522
+ // Fast path: Avoid invoking Terser for empty/whitespace-only content
1477
1523
  if (!code || !code.trim()) {
1478
1524
  return '';
1479
1525
  }
@@ -1525,9 +1571,18 @@ const processOptions = (inputOptions) => {
1525
1571
  relateUrlOptions = {};
1526
1572
  }
1527
1573
 
1574
+ // Cache RelateURL instance for reuse (expensive to create)
1575
+ const relateUrlInstance = new RelateURL(relateUrlOptions.site || '', relateUrlOptions);
1576
+
1528
1577
  options.minifyURLs = function (text) {
1578
+ // Fast-path: Skip if text doesn’t look like a URL that needs processing
1579
+ // Only process if contains URL-like characters (`/`, `:`, `#`, `?`) or spaces that need encoding
1580
+ if (!/[/:?#\s]/.test(text)) {
1581
+ return text;
1582
+ }
1583
+
1529
1584
  try {
1530
- return RelateURL.relate(text, relateUrlOptions);
1585
+ return relateUrlInstance.relate(text);
1531
1586
  } catch (err) {
1532
1587
  if (!options.continueOnMinifyError) {
1533
1588
  throw err;
@@ -1553,7 +1608,7 @@ function uniqueId(value) {
1553
1608
 
1554
1609
  const specialContentTags = new Set(['script', 'style']);
1555
1610
 
1556
- async function createSortFns(value, options, uidIgnore, uidAttr) {
1611
+ async function createSortFns(value, options, uidIgnore, uidAttr, ignoredMarkupChunks) {
1557
1612
  const attrChains = options.sortAttributes && Object.create(null);
1558
1613
  const classChain = options.sortClassName && new TokenChain();
1559
1614
 
@@ -1567,10 +1622,20 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
1567
1622
  return !uid || token.indexOf(uid) === -1;
1568
1623
  }
1569
1624
 
1570
- function shouldSkipUIDs(token) {
1625
+ function shouldKeepToken(token) {
1626
+ // Filter out any HTML comment tokens (UID placeholders)
1627
+ // These are temporary markers created by `htmlmin:ignore` and `ignoreCustomFragments`
1628
+ if (token.startsWith('<!--') && token.endsWith('-->')) {
1629
+ return false;
1630
+ }
1571
1631
  return shouldSkipUID(token, uidIgnore) && shouldSkipUID(token, uidAttr);
1572
1632
  }
1573
1633
 
1634
+ // Pre-compile regex patterns for reuse (performance optimization)
1635
+ // These must be declared before scan() since scan uses them
1636
+ const whitespaceSplitPatternScan = /[ \t\n\f\r]+/;
1637
+ const whitespaceSplitPatternSort = /[ \n\f\r]+/;
1638
+
1574
1639
  async function scan(input) {
1575
1640
  let currentTag, currentType;
1576
1641
  const parser = new HTMLParser(input, {
@@ -1579,12 +1644,14 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
1579
1644
  if (!attrChains[tag]) {
1580
1645
  attrChains[tag] = new TokenChain();
1581
1646
  }
1582
- attrChains[tag].add(attrNames(attrs).filter(shouldSkipUIDs));
1647
+ const attrNamesList = attrNames(attrs).filter(shouldKeepToken);
1648
+ attrChains[tag].add(attrNamesList);
1583
1649
  }
1584
1650
  for (let i = 0, len = attrs.length; i < len; i++) {
1585
1651
  const attr = attrs[i];
1586
1652
  if (classChain && attr.value && options.name(attr.name) === 'class') {
1587
- classChain.add(trimWhitespace(attr.value).split(/[ \t\n\f\r]+/).filter(shouldSkipUIDs));
1653
+ const classes = trimWhitespace(attr.value).split(whitespaceSplitPatternScan).filter(shouldKeepToken);
1654
+ classChain.add(classes);
1588
1655
  } else if (options.processScripts && attr.name.toLowerCase() === 'type') {
1589
1656
  currentTag = tag;
1590
1657
  currentType = attr.value;
@@ -1604,19 +1671,84 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
1604
1671
  }
1605
1672
  },
1606
1673
  // We never need `nextTag` information in this scan
1607
- wantsNextTag: false
1674
+ wantsNextTag: false,
1675
+ // Continue on parse errors during analysis pass
1676
+ continueOnParseError: options.continueOnParseError
1608
1677
  });
1609
1678
 
1610
- await parser.parse();
1679
+ try {
1680
+ await parser.parse();
1681
+ } catch (err) {
1682
+ // If parsing fails during analysis pass, just skip it—we’ll still have
1683
+ // partial frequency data from what we could parse
1684
+ if (!options.continueOnParseError) {
1685
+ throw err;
1686
+ }
1687
+ }
1611
1688
  }
1612
1689
 
1613
- const log = options.log;
1614
- options.log = identity;
1615
- options.sortAttributes = false;
1616
- options.sortClassName = false;
1617
- const firstPassOutput = await minifyHTML(value, options);
1618
- await scan(firstPassOutput);
1619
- options.log = log;
1690
+ // For the first pass, create a copy of options and disable aggressive minification.
1691
+ // Keep attribute transformations (like `removeStyleLinkTypeAttributes`) for accurate analysis.
1692
+ // This is safe because `createSortFns` is called before custom fragment UID markers (uidAttr) are added.
1693
+ // Note: `htmlmin:ignore` UID markers (uidIgnore) already exist and are expanded for analysis.
1694
+ const firstPassOptions = Object.assign({}, options, {
1695
+ // Disable sorting for the analysis pass
1696
+ sortAttributes: false,
1697
+ sortClassName: false,
1698
+ // Disable aggressive minification that doesn’t affect attribute analysis
1699
+ collapseWhitespace: false,
1700
+ removeAttributeQuotes: false,
1701
+ removeTagWhitespace: false,
1702
+ decodeEntities: false,
1703
+ processScripts: false,
1704
+ // Keep `ignoreCustomFragments` to handle template syntax correctly
1705
+ // This is safe because `createSortFns` is now called before UID markers are added
1706
+ // Continue on parse errors during analysis (e.g., template syntax)
1707
+ continueOnParseError: true,
1708
+ log: identity
1709
+ });
1710
+
1711
+ // Temporarily enable `continueOnParseError` for the `scan()` function call below.
1712
+ // Note: `firstPassOptions` already has `continueOnParseError: true` for the minifyHTML call.
1713
+ const originalContinueOnParseError = options.continueOnParseError;
1714
+ options.continueOnParseError = true;
1715
+
1716
+ // Pre-compile regex patterns for UID replacement and custom fragments
1717
+ const uidReplacePattern = uidIgnore && ignoredMarkupChunks
1718
+ ? new RegExp('<!--' + uidIgnore + '(\\d+)-->', 'g')
1719
+ : null;
1720
+ const customFragmentPattern = options.ignoreCustomFragments && options.ignoreCustomFragments.length > 0
1721
+ ? new RegExp('(' + options.ignoreCustomFragments.map(re => re.source).join('|') + ')', 'g')
1722
+ : null;
1723
+
1724
+ try {
1725
+ // Expand UID tokens back to original content for frequency analysis
1726
+ let expandedValue = value;
1727
+ if (uidReplacePattern) {
1728
+ expandedValue = value.replace(uidReplacePattern, function (match, index) {
1729
+ return ignoredMarkupChunks[+index] || '';
1730
+ });
1731
+ // Reset `lastIndex` for pattern reuse
1732
+ uidReplacePattern.lastIndex = 0;
1733
+ }
1734
+
1735
+ // First pass minification applies attribute transformations
1736
+ // like removeStyleLinkTypeAttributes for accurate frequency analysis
1737
+ const firstPassOutput = await minifyHTML(expandedValue, firstPassOptions);
1738
+
1739
+ // For frequency analysis, we need to remove custom fragments temporarily
1740
+ // because HTML comments in opening tags prevent proper attribute parsing.
1741
+ // We remove them with a space to preserve attribute boundaries.
1742
+ let scanValue = firstPassOutput;
1743
+ if (customFragmentPattern) {
1744
+ scanValue = firstPassOutput.replace(customFragmentPattern, ' ');
1745
+ }
1746
+
1747
+ await scan(scanValue);
1748
+ } finally {
1749
+ // Restore original option
1750
+ options.continueOnParseError = originalContinueOnParseError;
1751
+ }
1620
1752
  if (attrChains) {
1621
1753
  const attrSorters = Object.create(null);
1622
1754
  for (const tag in attrChains) {
@@ -1630,7 +1762,8 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
1630
1762
  names.forEach(function (name, index) {
1631
1763
  (attrMap[name] || (attrMap[name] = [])).push(attrs[index]);
1632
1764
  });
1633
- sorter.sort(names).forEach(function (name, index) {
1765
+ const sorted = sorter.sort(names);
1766
+ sorted.forEach(function (name, index) {
1634
1767
  attrs[index] = attrMap[name].shift();
1635
1768
  });
1636
1769
  }
@@ -1638,8 +1771,40 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
1638
1771
  }
1639
1772
  if (classChain) {
1640
1773
  const sorter = classChain.createSorter();
1774
+ // Memoize `sortClassName` results—class lists often repeat in templates
1775
+ const classNameCache = new LRU(200);
1776
+
1641
1777
  options.sortClassName = function (value) {
1642
- return sorter.sort(value.split(/[ \n\f\r]+/)).join(' ');
1778
+ // Fast path: Single class (no spaces) needs no sorting
1779
+ if (value.indexOf(' ') === -1) {
1780
+ return value;
1781
+ }
1782
+
1783
+ // Check cache first
1784
+ const cached = classNameCache.get(value);
1785
+ if (cached !== undefined) {
1786
+ return cached;
1787
+ }
1788
+
1789
+ // Expand UID tokens back to original content before sorting
1790
+ // Fast path: Skip if no HTML comments (UID markers) present
1791
+ let expandedValue = value;
1792
+ if (uidReplacePattern && value.indexOf('<!--') !== -1) {
1793
+ expandedValue = value.replace(uidReplacePattern, function (match, index) {
1794
+ return ignoredMarkupChunks[+index] || '';
1795
+ });
1796
+ // Reset `lastIndex` for pattern reuse
1797
+ uidReplacePattern.lastIndex = 0;
1798
+ }
1799
+ const classes = expandedValue.split(whitespaceSplitPatternSort).filter(function(cls) {
1800
+ return cls !== '';
1801
+ });
1802
+ const sorted = sorter.sort(classes);
1803
+ const result = sorted.join(' ');
1804
+
1805
+ // Cache the result
1806
+ classNameCache.set(value, result);
1807
+ return result;
1643
1808
  };
1644
1809
  }
1645
1810
  }
@@ -1680,7 +1845,7 @@ async function minifyHTML(value, options, partialMarkup) {
1680
1845
  const customElementsInput = options.inlineCustomElements ?? [];
1681
1846
  const customElementsArr = Array.isArray(customElementsInput) ? customElementsInput : Array.from(customElementsInput);
1682
1847
  const normalizedCustomElements = customElementsArr.map(name => options.name(name));
1683
- // Fast path: reuse base Sets if no custom elements
1848
+ // Fast path: Reuse base Sets if no custom elements
1684
1849
  const inlineTextSet = normalizedCustomElements.length
1685
1850
  ? new Set([...inlineElementsToKeepWhitespaceWithin, ...normalizedCustomElements])
1686
1851
  : inlineElementsToKeepWhitespaceWithin;
@@ -1720,6 +1885,13 @@ async function minifyHTML(value, options, partialMarkup) {
1720
1885
  return token;
1721
1886
  });
1722
1887
 
1888
+ // Create sort functions after `htmlmin:ignore` processing but before custom fragment UID markers
1889
+ // This allows proper frequency analysis with access to ignored content via UID tokens
1890
+ if ((options.sortAttributes && typeof options.sortAttributes !== 'function') ||
1891
+ (options.sortClassName && typeof options.sortClassName !== 'function')) {
1892
+ await createSortFns(value, options, uidIgnore, null, ignoredMarkupChunks);
1893
+ }
1894
+
1723
1895
  const customFragments = options.ignoreCustomFragments.map(function (re) {
1724
1896
  return re.source;
1725
1897
  });
@@ -1778,11 +1950,6 @@ async function minifyHTML(value, options, partialMarkup) {
1778
1950
  });
1779
1951
  }
1780
1952
 
1781
- if ((options.sortAttributes && typeof options.sortAttributes !== 'function') ||
1782
- (options.sortClassName && typeof options.sortClassName !== 'function')) {
1783
- await createSortFns(value, options, uidIgnore, uidAttr);
1784
- }
1785
-
1786
1953
  function _canCollapseWhitespace(tag, attrs) {
1787
1954
  return options.canCollapseWhitespace(tag, attrs, canCollapseWhitespace);
1788
1955
  }
package/src/htmlparser.js CHANGED
@@ -85,7 +85,7 @@ const preCompiledStackedTags = {
85
85
  // Cache for compiled attribute regexes per handler configuration
86
86
  const attrRegexCache = new WeakMap();
87
87
 
88
- function attrForHandler(handler) {
88
+ function buildAttrRegex(handler) {
89
89
  let pattern = singleAttrIdentifier.source +
90
90
  '(?:\\s*(' + joinSingleAttrAssigns(handler) + ')' +
91
91
  '[ \\t\\n\\f\\r]*(?:' + singleAttrValues.join('|') + '))?';
@@ -104,6 +104,14 @@ function attrForHandler(handler) {
104
104
  return new RegExp('^\\s*' + pattern);
105
105
  }
106
106
 
107
+ function getAttrRegexForHandler(handler) {
108
+ let cached = attrRegexCache.get(handler);
109
+ if (cached) return cached;
110
+ const compiled = buildAttrRegex(handler);
111
+ attrRegexCache.set(handler, compiled);
112
+ return compiled;
113
+ }
114
+
107
115
  function joinSingleAttrAssigns(handler) {
108
116
  return singleAttrAssigns.concat(
109
117
  handler.customAttrAssign || []
@@ -127,12 +135,8 @@ export class HTMLParser {
127
135
  const fullLength = fullHtml.length;
128
136
 
129
137
  const stack = []; let lastTag;
130
- // Use cached attribute regex if available
131
- let attribute = attrRegexCache.get(handler);
132
- if (!attribute) {
133
- attribute = attrForHandler(handler);
134
- attrRegexCache.set(handler, attribute);
135
- }
138
+ // Use cached attribute regex for this handler configuration
139
+ const attribute = getAttrRegexForHandler(handler);
136
140
  let prevTag = undefined, nextTag = undefined;
137
141
 
138
142
  // Index-based parsing