html-minifier-next 4.11.0 → 4.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -92,7 +92,7 @@ const preCompiledStackedTags = {
92
92
  // Cache for compiled attribute regexes per handler configuration
93
93
  const attrRegexCache = new WeakMap();
94
94
 
95
- function attrForHandler(handler) {
95
+ function buildAttrRegex(handler) {
96
96
  let pattern = singleAttrIdentifier.source +
97
97
  '(?:\\s*(' + joinSingleAttrAssigns(handler) + ')' +
98
98
  '[ \\t\\n\\f\\r]*(?:' + singleAttrValues.join('|') + '))?';
@@ -111,6 +111,14 @@ function attrForHandler(handler) {
111
111
  return new RegExp('^\\s*' + pattern);
112
112
  }
113
113
 
114
+ function getAttrRegexForHandler(handler) {
115
+ let cached = attrRegexCache.get(handler);
116
+ if (cached) return cached;
117
+ const compiled = buildAttrRegex(handler);
118
+ attrRegexCache.set(handler, compiled);
119
+ return compiled;
120
+ }
121
+
114
122
  function joinSingleAttrAssigns(handler) {
115
123
  return singleAttrAssigns.concat(
116
124
  handler.customAttrAssign || []
@@ -134,12 +142,8 @@ class HTMLParser {
134
142
  const fullLength = fullHtml.length;
135
143
 
136
144
  const stack = []; let lastTag;
137
- // Use cached attribute regex if available
138
- let attribute = attrRegexCache.get(handler);
139
- if (!attribute) {
140
- attribute = attrForHandler(handler);
141
- attrRegexCache.set(handler, attribute);
142
- }
145
+ // Use cached attribute regex for this handler configuration
146
+ const attribute = getAttrRegexForHandler(handler);
143
147
  let prevTag = undefined, nextTag = undefined;
144
148
 
145
149
  // Index-based parsing
@@ -1263,7 +1267,7 @@ const cssMinifyCache = new LRU(200);
1263
1267
 
1264
1268
  const trimWhitespace = str => {
1265
1269
  if (!str) return str;
1266
- // Fast path: if no whitespace at start or end, return early
1270
+ // Fast path: If no whitespace at start or end, return early
1267
1271
  if (!/^[ \n\r\t\f]/.test(str) && !/[ \n\r\t\f]$/.test(str)) {
1268
1272
  return str;
1269
1273
  }
@@ -1272,7 +1276,7 @@ const trimWhitespace = str => {
1272
1276
 
1273
1277
  function collapseWhitespaceAll(str) {
1274
1278
  if (!str) return str;
1275
- // Fast path: if there are no common whitespace characters, return early
1279
+ // Fast path: If there are no common whitespace characters, return early
1276
1280
  if (!/[ \n\r\t\f\xA0]/.test(str)) {
1277
1281
  return str;
1278
1282
  }
@@ -1640,7 +1644,7 @@ async function cleanAttributeValue(tag, attrName, attrValue, options, attrs, min
1640
1644
  return options.minifyCSS(attrValue, 'media');
1641
1645
  } else if (tag === 'iframe' && attrName === 'srcdoc') {
1642
1646
  // Recursively minify HTML content within srcdoc attribute
1643
- // Fast-path: skip if nothing would change
1647
+ // Fast-path: Skip if nothing would change
1644
1648
  if (!shouldMinifyInnerHTML(options)) {
1645
1649
  return attrValue;
1646
1650
  }
@@ -2027,7 +2031,7 @@ async function normalizeAttr(attr, attrs, tag, options) {
2027
2031
  let attrValue = attr.value;
2028
2032
 
2029
2033
  if (options.decodeEntities && attrValue) {
2030
- // Fast path: only decode when entities are present
2034
+ // Fast path: Only decode when entities are present
2031
2035
  if (attrValue.indexOf('&') !== -1) {
2032
2036
  attrValue = entities.decodeHTMLStrict(attrValue);
2033
2037
  }
@@ -2072,7 +2076,9 @@ function buildAttr(normalized, hasUnarySlash, options, isLast, uidAttr) {
2072
2076
 
2073
2077
  if (typeof attrValue !== 'undefined' && (!options.removeAttributeQuotes ||
2074
2078
  ~attrValue.indexOf(uidAttr) || !canRemoveAttributeQuotes(attrValue))) {
2079
+ // Determine the appropriate quote character
2075
2080
  if (!options.preventAttributesEscaping) {
2081
+ // Normal mode: choose quotes and escape
2076
2082
  if (typeof options.quoteCharacter === 'undefined') {
2077
2083
  // Count quotes in a single pass instead of two regex operations
2078
2084
  let apos = 0, quot = 0;
@@ -2089,6 +2095,50 @@ function buildAttr(normalized, hasUnarySlash, options, isLast, uidAttr) {
2089
2095
  } else {
2090
2096
  attrValue = attrValue.replace(/'/g, ''');
2091
2097
  }
2098
+ } else {
2099
+ // `preventAttributesEscaping` mode: choose safe quotes but don’t escape
2100
+ // EXCEPT when both quote types are present—then escape to prevent invalid HTML
2101
+ const hasDoubleQuote = attrValue.indexOf('"') !== -1;
2102
+ const hasSingleQuote = attrValue.indexOf("'") !== -1;
2103
+
2104
+ if (hasDoubleQuote && hasSingleQuote) {
2105
+ // Both quote types present: `preventAttributesEscaping` is ignored to ensure valid HTML
2106
+ // Choose the quote type with fewer occurrences and escape the other
2107
+ if (typeof options.quoteCharacter === 'undefined') {
2108
+ let apos = 0, quot = 0;
2109
+ for (let i = 0; i < attrValue.length; i++) {
2110
+ if (attrValue[i] === "'") apos++;
2111
+ else if (attrValue[i] === '"') quot++;
2112
+ }
2113
+ attrQuote = apos < quot ? '\'' : '"';
2114
+ } else {
2115
+ attrQuote = options.quoteCharacter === '\'' ? '\'' : '"';
2116
+ }
2117
+ if (attrQuote === '"') {
2118
+ attrValue = attrValue.replace(/"/g, '&#34;');
2119
+ } else {
2120
+ attrValue = attrValue.replace(/'/g, '&#39;');
2121
+ }
2122
+ } else if (typeof options.quoteCharacter === 'undefined') {
2123
+ // Single or no quote type: Choose safe quote delimiter
2124
+ if (attrQuote === '"' && hasDoubleQuote && !hasSingleQuote) {
2125
+ attrQuote = "'";
2126
+ } else if (attrQuote === "'" && hasSingleQuote && !hasDoubleQuote) {
2127
+ attrQuote = '"';
2128
+ } else if (attrQuote !== '"' && attrQuote !== "'" && attrQuote !== '') {
2129
+ // `attrQuote` is invalid/undefined (not `"`, `'`, or empty string)
2130
+ // Set a safe default based on the value’s content
2131
+ if (hasSingleQuote && !hasDoubleQuote) {
2132
+ attrQuote = '"'; // Value has single quotes, use double quotes as delimiter
2133
+ } else if (hasDoubleQuote && !hasSingleQuote) {
2134
+ attrQuote = "'"; // Value has double quotes, use single quotes as delimiter
2135
+ } else {
2136
+ attrQuote = '"'; // No quotes in value, default to double quotes
2137
+ }
2138
+ }
2139
+ } else {
2140
+ attrQuote = options.quoteCharacter === '\'' ? '\'' : '"';
2141
+ }
2092
2142
  }
2093
2143
  emittedAttrValue = attrQuote + attrValue + attrQuote;
2094
2144
  if (!isLast && !options.removeTagWhitespace) {
@@ -2181,7 +2231,7 @@ const processOptions = (inputOptions) => {
2181
2231
  const lightningCssOptions = typeof option === 'object' ? option : {};
2182
2232
 
2183
2233
  options.minifyCSS = async function (text, type) {
2184
- // Fast path: nothing to minify
2234
+ // Fast path: Nothing to minify
2185
2235
  if (!text || !text.trim()) {
2186
2236
  return text;
2187
2237
  }
@@ -2273,7 +2323,7 @@ const processOptions = (inputOptions) => {
2273
2323
 
2274
2324
  let jsKey;
2275
2325
  try {
2276
- // Fast path: avoid invoking Terser for empty/whitespace-only content
2326
+ // Fast path: Avoid invoking Terser for empty/whitespace-only content
2277
2327
  if (!code || !code.trim()) {
2278
2328
  return '';
2279
2329
  }
@@ -2325,9 +2375,18 @@ const processOptions = (inputOptions) => {
2325
2375
  relateUrlOptions = {};
2326
2376
  }
2327
2377
 
2378
+ // Cache RelateURL instance for reuse (expensive to create)
2379
+ const relateUrlInstance = new RelateURL(relateUrlOptions.site || '', relateUrlOptions);
2380
+
2328
2381
  options.minifyURLs = function (text) {
2382
+ // Fast-path: Skip if text doesn’t look like a URL that needs processing
2383
+ // Only process if contains URL-like characters (`/`, `:`, `#`, `?`) or spaces that need encoding
2384
+ if (!/[/:?#\s]/.test(text)) {
2385
+ return text;
2386
+ }
2387
+
2329
2388
  try {
2330
- return RelateURL.relate(text, relateUrlOptions);
2389
+ return relateUrlInstance.relate(text);
2331
2390
  } catch (err) {
2332
2391
  if (!options.continueOnMinifyError) {
2333
2392
  throw err;
@@ -2353,7 +2412,7 @@ function uniqueId(value) {
2353
2412
 
2354
2413
  const specialContentTags = new Set(['script', 'style']);
2355
2414
 
2356
- async function createSortFns(value, options, uidIgnore, uidAttr) {
2415
+ async function createSortFns(value, options, uidIgnore, uidAttr, ignoredMarkupChunks) {
2357
2416
  const attrChains = options.sortAttributes && Object.create(null);
2358
2417
  const classChain = options.sortClassName && new TokenChain();
2359
2418
 
@@ -2367,10 +2426,20 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
2367
2426
  return !uid || token.indexOf(uid) === -1;
2368
2427
  }
2369
2428
 
2370
- function shouldSkipUIDs(token) {
2429
+ function shouldKeepToken(token) {
2430
+ // Filter out any HTML comment tokens (UID placeholders)
2431
+ // These are temporary markers created by `htmlmin:ignore` and `ignoreCustomFragments`
2432
+ if (token.startsWith('<!--') && token.endsWith('-->')) {
2433
+ return false;
2434
+ }
2371
2435
  return shouldSkipUID(token, uidIgnore) && shouldSkipUID(token, uidAttr);
2372
2436
  }
2373
2437
 
2438
+ // Pre-compile regex patterns for reuse (performance optimization)
2439
+ // These must be declared before scan() since scan uses them
2440
+ const whitespaceSplitPatternScan = /[ \t\n\f\r]+/;
2441
+ const whitespaceSplitPatternSort = /[ \n\f\r]+/;
2442
+
2374
2443
  async function scan(input) {
2375
2444
  let currentTag, currentType;
2376
2445
  const parser = new HTMLParser(input, {
@@ -2379,12 +2448,14 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
2379
2448
  if (!attrChains[tag]) {
2380
2449
  attrChains[tag] = new TokenChain();
2381
2450
  }
2382
- attrChains[tag].add(attrNames(attrs).filter(shouldSkipUIDs));
2451
+ const attrNamesList = attrNames(attrs).filter(shouldKeepToken);
2452
+ attrChains[tag].add(attrNamesList);
2383
2453
  }
2384
2454
  for (let i = 0, len = attrs.length; i < len; i++) {
2385
2455
  const attr = attrs[i];
2386
2456
  if (classChain && attr.value && options.name(attr.name) === 'class') {
2387
- classChain.add(trimWhitespace(attr.value).split(/[ \t\n\f\r]+/).filter(shouldSkipUIDs));
2457
+ const classes = trimWhitespace(attr.value).split(whitespaceSplitPatternScan).filter(shouldKeepToken);
2458
+ classChain.add(classes);
2388
2459
  } else if (options.processScripts && attr.name.toLowerCase() === 'type') {
2389
2460
  currentTag = tag;
2390
2461
  currentType = attr.value;
@@ -2404,19 +2475,84 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
2404
2475
  }
2405
2476
  },
2406
2477
  // We never need `nextTag` information in this scan
2407
- wantsNextTag: false
2478
+ wantsNextTag: false,
2479
+ // Continue on parse errors during analysis pass
2480
+ continueOnParseError: options.continueOnParseError
2408
2481
  });
2409
2482
 
2410
- await parser.parse();
2483
+ try {
2484
+ await parser.parse();
2485
+ } catch (err) {
2486
+ // If parsing fails during analysis pass, just skip it—we’ll still have
2487
+ // partial frequency data from what we could parse
2488
+ if (!options.continueOnParseError) {
2489
+ throw err;
2490
+ }
2491
+ }
2411
2492
  }
2412
2493
 
2413
- const log = options.log;
2414
- options.log = identity;
2415
- options.sortAttributes = false;
2416
- options.sortClassName = false;
2417
- const firstPassOutput = await minifyHTML(value, options);
2418
- await scan(firstPassOutput);
2419
- options.log = log;
2494
+ // For the first pass, create a copy of options and disable aggressive minification.
2495
+ // Keep attribute transformations (like `removeStyleLinkTypeAttributes`) for accurate analysis.
2496
+ // This is safe because `createSortFns` is called before custom fragment UID markers (uidAttr) are added.
2497
+ // Note: `htmlmin:ignore` UID markers (uidIgnore) already exist and are expanded for analysis.
2498
+ const firstPassOptions = Object.assign({}, options, {
2499
+ // Disable sorting for the analysis pass
2500
+ sortAttributes: false,
2501
+ sortClassName: false,
2502
+ // Disable aggressive minification that doesn’t affect attribute analysis
2503
+ collapseWhitespace: false,
2504
+ removeAttributeQuotes: false,
2505
+ removeTagWhitespace: false,
2506
+ decodeEntities: false,
2507
+ processScripts: false,
2508
+ // Keep `ignoreCustomFragments` to handle template syntax correctly
2509
+ // This is safe because `createSortFns` is now called before UID markers are added
2510
+ // Continue on parse errors during analysis (e.g., template syntax)
2511
+ continueOnParseError: true,
2512
+ log: identity
2513
+ });
2514
+
2515
+ // Temporarily enable `continueOnParseError` for the `scan()` function call below.
2516
+ // Note: `firstPassOptions` already has `continueOnParseError: true` for the minifyHTML call.
2517
+ const originalContinueOnParseError = options.continueOnParseError;
2518
+ options.continueOnParseError = true;
2519
+
2520
+ // Pre-compile regex patterns for UID replacement and custom fragments
2521
+ const uidReplacePattern = uidIgnore && ignoredMarkupChunks
2522
+ ? new RegExp('<!--' + uidIgnore + '(\\d+)-->', 'g')
2523
+ : null;
2524
+ const customFragmentPattern = options.ignoreCustomFragments && options.ignoreCustomFragments.length > 0
2525
+ ? new RegExp('(' + options.ignoreCustomFragments.map(re => re.source).join('|') + ')', 'g')
2526
+ : null;
2527
+
2528
+ try {
2529
+ // Expand UID tokens back to original content for frequency analysis
2530
+ let expandedValue = value;
2531
+ if (uidReplacePattern) {
2532
+ expandedValue = value.replace(uidReplacePattern, function (match, index) {
2533
+ return ignoredMarkupChunks[+index] || '';
2534
+ });
2535
+ // Reset `lastIndex` for pattern reuse
2536
+ uidReplacePattern.lastIndex = 0;
2537
+ }
2538
+
2539
+ // First pass minification applies attribute transformations
2540
+ // like removeStyleLinkTypeAttributes for accurate frequency analysis
2541
+ const firstPassOutput = await minifyHTML(expandedValue, firstPassOptions);
2542
+
2543
+ // For frequency analysis, we need to remove custom fragments temporarily
2544
+ // because HTML comments in opening tags prevent proper attribute parsing.
2545
+ // We remove them with a space to preserve attribute boundaries.
2546
+ let scanValue = firstPassOutput;
2547
+ if (customFragmentPattern) {
2548
+ scanValue = firstPassOutput.replace(customFragmentPattern, ' ');
2549
+ }
2550
+
2551
+ await scan(scanValue);
2552
+ } finally {
2553
+ // Restore original option
2554
+ options.continueOnParseError = originalContinueOnParseError;
2555
+ }
2420
2556
  if (attrChains) {
2421
2557
  const attrSorters = Object.create(null);
2422
2558
  for (const tag in attrChains) {
@@ -2430,7 +2566,8 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
2430
2566
  names.forEach(function (name, index) {
2431
2567
  (attrMap[name] || (attrMap[name] = [])).push(attrs[index]);
2432
2568
  });
2433
- sorter.sort(names).forEach(function (name, index) {
2569
+ const sorted = sorter.sort(names);
2570
+ sorted.forEach(function (name, index) {
2434
2571
  attrs[index] = attrMap[name].shift();
2435
2572
  });
2436
2573
  }
@@ -2438,8 +2575,40 @@ async function createSortFns(value, options, uidIgnore, uidAttr) {
2438
2575
  }
2439
2576
  if (classChain) {
2440
2577
  const sorter = classChain.createSorter();
2578
+ // Memoize `sortClassName` results—class lists often repeat in templates
2579
+ const classNameCache = new LRU(200);
2580
+
2441
2581
  options.sortClassName = function (value) {
2442
- return sorter.sort(value.split(/[ \n\f\r]+/)).join(' ');
2582
+ // Fast path: Single class (no spaces) needs no sorting
2583
+ if (value.indexOf(' ') === -1) {
2584
+ return value;
2585
+ }
2586
+
2587
+ // Check cache first
2588
+ const cached = classNameCache.get(value);
2589
+ if (cached !== undefined) {
2590
+ return cached;
2591
+ }
2592
+
2593
+ // Expand UID tokens back to original content before sorting
2594
+ // Fast path: Skip if no HTML comments (UID markers) present
2595
+ let expandedValue = value;
2596
+ if (uidReplacePattern && value.indexOf('<!--') !== -1) {
2597
+ expandedValue = value.replace(uidReplacePattern, function (match, index) {
2598
+ return ignoredMarkupChunks[+index] || '';
2599
+ });
2600
+ // Reset `lastIndex` for pattern reuse
2601
+ uidReplacePattern.lastIndex = 0;
2602
+ }
2603
+ const classes = expandedValue.split(whitespaceSplitPatternSort).filter(function(cls) {
2604
+ return cls !== '';
2605
+ });
2606
+ const sorted = sorter.sort(classes);
2607
+ const result = sorted.join(' ');
2608
+
2609
+ // Cache the result
2610
+ classNameCache.set(value, result);
2611
+ return result;
2443
2612
  };
2444
2613
  }
2445
2614
  }
@@ -2480,7 +2649,7 @@ async function minifyHTML(value, options, partialMarkup) {
2480
2649
  const customElementsInput = options.inlineCustomElements ?? [];
2481
2650
  const customElementsArr = Array.isArray(customElementsInput) ? customElementsInput : Array.from(customElementsInput);
2482
2651
  const normalizedCustomElements = customElementsArr.map(name => options.name(name));
2483
- // Fast path: reuse base Sets if no custom elements
2652
+ // Fast path: Reuse base Sets if no custom elements
2484
2653
  const inlineTextSet = normalizedCustomElements.length
2485
2654
  ? new Set([...inlineElementsToKeepWhitespaceWithin, ...normalizedCustomElements])
2486
2655
  : inlineElementsToKeepWhitespaceWithin;
@@ -2520,6 +2689,13 @@ async function minifyHTML(value, options, partialMarkup) {
2520
2689
  return token;
2521
2690
  });
2522
2691
 
2692
+ // Create sort functions after `htmlmin:ignore` processing but before custom fragment UID markers
2693
+ // This allows proper frequency analysis with access to ignored content via UID tokens
2694
+ if ((options.sortAttributes && typeof options.sortAttributes !== 'function') ||
2695
+ (options.sortClassName && typeof options.sortClassName !== 'function')) {
2696
+ await createSortFns(value, options, uidIgnore, null, ignoredMarkupChunks);
2697
+ }
2698
+
2523
2699
  const customFragments = options.ignoreCustomFragments.map(function (re) {
2524
2700
  return re.source;
2525
2701
  });
@@ -2578,11 +2754,6 @@ async function minifyHTML(value, options, partialMarkup) {
2578
2754
  });
2579
2755
  }
2580
2756
 
2581
- if ((options.sortAttributes && typeof options.sortAttributes !== 'function') ||
2582
- (options.sortClassName && typeof options.sortClassName !== 'function')) {
2583
- await createSortFns(value, options, uidIgnore, uidAttr);
2584
- }
2585
-
2586
2757
  function _canCollapseWhitespace(tag, attrs) {
2587
2758
  return options.canCollapseWhitespace(tag, attrs, canCollapseWhitespace);
2588
2759
  }