html-minifier-next 4.18.0 → 4.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -91,10 +91,9 @@ async function getSwc() {
91
91
  return swcPromise;
92
92
  }
93
93
 
94
- // Minification caches
95
- const cssMinifyCache = new LRU(500);
96
- const jsMinifyCache = new LRU(500);
97
- const urlMinifyCache = new LRU(500);
94
+ // Minification caches (initialized on first use with configurable sizes)
95
+ let cssMinifyCache = null;
96
+ let jsMinifyCache = null;
98
97
 
99
98
  // Pre-compiled patterns for script merging (avoid repeated allocation in hot path)
100
99
  const RE_SCRIPT_ATTRS = /([^\s=]+)(?:=(?:"([^"]*)"|'([^']*)'|([^\s>]+)))?/g;
@@ -247,6 +246,24 @@ function mergeConsecutiveScripts(html) {
247
246
  *
248
247
  * Default: Built-in `canTrimWhitespace` function
249
248
  *
249
+ * @prop {number} [cacheCSS]
250
+ * The maximum number of entries for the CSS minification cache. Higher values
251
+ * improve performance for inputs with repeated CSS (e.g., batch processing).
252
+ * - Cache is created on first `minify()` call and persists for the process lifetime
253
+ * - Cache size is locked after first call—subsequent calls reuse the same cache
254
+ * - Explicit `0` values are coerced to `1` (minimum functional cache size)
255
+ *
256
+ * Default: `500` (or `1000` when `CI=true` environment variable is set)
257
+ *
258
+ * @prop {number} [cacheJS]
259
+ * The maximum number of entries for the JavaScript minification cache. Higher
260
+ * values improve performance for inputs with repeated JavaScript.
261
+ * - Cache is created on first `minify()` call and persists for the process lifetime
262
+ * - Cache size is locked after first call—subsequent calls reuse the same cache
263
+ * - Explicit `0` values are coerced to `1` (minimum functional cache size)
264
+ *
265
+ * Default: `500` (or `1000` when `CI=true` environment variable is set)
266
+ *
250
267
  * @prop {boolean} [caseSensitive]
251
268
  * When true, tag and attribute names are treated as case-sensitive.
252
269
  * Useful for custom HTML tags.
@@ -1505,6 +1522,49 @@ function joinResultSegments(results, options, restoreCustom, restoreIgnore) {
1505
1522
  return options.collapseWhitespace ? collapseWhitespace(str, options, true, true) : str;
1506
1523
  }
1507
1524
 
1525
+ /**
1526
+ * Initialize minification caches with configurable sizes.
1527
+ *
1528
+ * Important behavior notes:
1529
+ * - Caches are created on the first `minify()` call and persist for the lifetime of the process
1530
+ * - Cache sizes are locked after first initialization—subsequent calls use the same caches
1531
+ * even if different `cacheCSS`/`cacheJS` options are provided
1532
+ * - The first call’s options determine the cache sizes for subsequent calls
1533
+ * - Explicit `0` values are coerced to `1` (minimum functional cache size)
1534
+ */
1535
+ function initCaches(options) {
1536
+ // Only create caches once (on first call)—sizes are locked after this
1537
+ if (!cssMinifyCache) {
1538
+ // Determine default size based on environment
1539
+ const defaultSize = process.env.CI === 'true' ? 1000 : 500;
1540
+
1541
+ // Helper to parse env var—returns parsed number (including 0) or undefined if absent, invalid, or negative
1542
+ const parseEnvCacheSize = (envVar) => {
1543
+ if (envVar === undefined) return undefined;
1544
+ const parsed = Number(envVar);
1545
+ if (Number.isNaN(parsed) || !Number.isFinite(parsed) || parsed < 0) {
1546
+ return undefined;
1547
+ }
1548
+ return parsed;
1549
+ };
1550
+
1551
+ // Get cache sizes with precedence: Options > env > default
1552
+ const cssSize = options.cacheCSS !== undefined ? options.cacheCSS
1553
+ : (parseEnvCacheSize(process.env.HMN_CACHE_CSS) ?? defaultSize);
1554
+ const jsSize = options.cacheJS !== undefined ? options.cacheJS
1555
+ : (parseEnvCacheSize(process.env.HMN_CACHE_JS) ?? defaultSize);
1556
+
1557
+ // Coerce `0` to `1` (minimum functional cache size) to avoid immediate eviction
1558
+ const cssFinalSize = cssSize === 0 ? 1 : cssSize;
1559
+ const jsFinalSize = jsSize === 0 ? 1 : jsSize;
1560
+
1561
+ cssMinifyCache = new LRU(cssFinalSize);
1562
+ jsMinifyCache = new LRU(jsFinalSize);
1563
+ }
1564
+
1565
+ return { cssMinifyCache, jsMinifyCache };
1566
+ }
1567
+
1508
1568
  /**
1509
1569
  * @param {string} value
1510
1570
  * @param {MinifierOptions} [options]
@@ -1512,13 +1572,15 @@ function joinResultSegments(results, options, restoreCustom, restoreIgnore) {
1512
1572
  */
1513
1573
  export const minify = async function (value, options) {
1514
1574
  const start = Date.now();
1575
+
1576
+ // Initialize caches on first use with configurable sizes
1577
+ const caches = initCaches(options || {});
1578
+
1515
1579
  options = processOptions(options || {}, {
1516
1580
  getLightningCSS,
1517
1581
  getTerser,
1518
1582
  getSwc,
1519
- cssMinifyCache,
1520
- jsMinifyCache,
1521
- urlMinifyCache
1583
+ ...caches
1522
1584
  });
1523
1585
  let result = await minifyHTML(value, options);
1524
1586
 
package/src/htmlparser.js CHANGED
@@ -36,16 +36,16 @@ const singleAttrValues = [
36
36
  // https://www.w3.org/TR/1999/REC-xml-names-19990114/#NT-QName
37
37
  const qnameCapture = (function () {
38
38
  // https://www.npmjs.com/package/ncname
39
- const combiningChar = '\\u0300-\\u0345\\u0360\\u0361\\u0483-\\u0486\\u0591-\\u05A1\\u05A3-\\u05B9\\u05BB-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u064B-\\u0652\\u0670\\u06D6-\\u06E4\\u06E7\\u06E8\\u06EA-\\u06ED\\u0901-\\u0903\\u093C\\u093E-\\u094D\\u0951-\\u0954\\u0962\\u0963\\u0981-\\u0983\\u09BC\\u09BE-\\u09C4\\u09C7\\u09C8\\u09CB-\\u09CD\\u09D7\\u09E2\\u09E3\\u0A02\\u0A3C\\u0A3E-\\u0A42\\u0A47\\u0A48\\u0A4B-\\u0A4D\\u0A70\\u0A71\\u0A81-\\u0A83\\u0ABC\\u0ABE-\\u0AC5\\u0AC7-\\u0AC9\\u0ACB-\\u0ACD\\u0B01-\\u0B03\\u0B3C\\u0B3E-\\u0B43\\u0B47\\u0B48\\u0B4B-\\u0B4D\\u0B56\\u0B57\\u0B82\\u0B83\\u0BBE-\\u0BC2\\u0BC6-\\u0BC8\\u0BCA-\\u0BCD\\u0BD7\\u0C01-\\u0C03\\u0C3E-\\u0C44\\u0C46-\\u0C48\\u0C4A-\\u0C4D\\u0C55\\u0C56\\u0C82\\u0C83\\u0CBE-\\u0CC4\\u0CC6-\\u0CC8\\u0CCA-\\u0CCD\\u0CD5\\u0CD6\\u0D02\\u0D03\\u0D3E-\\u0D43\\u0D46-\\u0D48\\u0D4A-\\u0D4D\\u0D57\\u0E31\\u0E34-\\u0E3A\\u0E47-\\u0E4E\\u0EB1\\u0EB4-\\u0EB9\\u0EBB\\u0EBC\\u0EC8-\\u0ECD\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F3E\\u0F3F\\u0F71-\\u0F84\\u0F86-\\u0F8B\\u0F90-\\u0F95\\u0F97\\u0F99-\\u0FAD\\u0FB1-\\u0FB7\\u0FB9\\u20D0-\\u20DC\\u20E1\\u302A-\\u302F\\u3099\\u309A';
40
- const digit = '0-9\\u0660-\\u0669\\u06F0-\\u06F9\\u0966-\\u096F\\u09E6-\\u09EF\\u0A66-\\u0A6F\\u0AE6-\\u0AEF\\u0B66-\\u0B6F\\u0BE7-\\u0BEF\\u0C66-\\u0C6F\\u0CE6-\\u0CEF\\u0D66-\\u0D6F\\u0E50-\\u0E59\\u0ED0-\\u0ED9\\u0F20-\\u0F29';
41
- const extender = '\\xB7\\u02D0\\u02D1\\u0387\\u0640\\u0E46\\u0EC6\\u3005\\u3031-\\u3035\\u309D\\u309E\\u30FC-\\u30FE';
42
- const letter = 'A-Za-z\\xC0-\\xD6\\xD8-\\xF6\\xF8-\\u0131\\u0134-\\u013E\\u0141-\\u0148\\u014A-\\u017E\\u0180-\\u01C3\\u01CD-\\u01F0\\u01F4\\u01F5\\u01FA-\\u0217\\u0250-\\u02A8\\u02BB-\\u02C1\\u0386\\u0388-\\u038A\\u038C\\u038E-\\u03A1\\u03A3-\\u03CE\\u03D0-\\u03D6\\u03DA\\u03DC\\u03DE\\u03E0\\u03E2-\\u03F3\\u0401-\\u040C\\u040E-\\u044F\\u0451-\\u045C\\u045E-\\u0481\\u0490-\\u04C4\\u04C7\\u04C8\\u04CB\\u04CC\\u04D0-\\u04EB\\u04EE-\\u04F5\\u04F8\\u04F9\\u0531-\\u0556\\u0559\\u0561-\\u0586\\u05D0-\\u05EA\\u05F0-\\u05F2\\u0621-\\u063A\\u0641-\\u064A\\u0671-\\u06B7\\u06BA-\\u06BE\\u06C0-\\u06CE\\u06D0-\\u06D3\\u06D5\\u06E5\\u06E6\\u0905-\\u0939\\u093D\\u0958-\\u0961\\u0985-\\u098C\\u098F\\u0990\\u0993-\\u09A8\\u09AA-\\u09B0\\u09B2\\u09B6-\\u09B9\\u09DC\\u09DD\\u09DF-\\u09E1\\u09F0\\u09F1\\u0A05-\\u0A0A\\u0A0F\\u0A10\\u0A13-\\u0A28\\u0A2A-\\u0A30\\u0A32\\u0A33\\u0A35\\u0A36\\u0A38\\u0A39\\u0A59-\\u0A5C\\u0A5E\\u0A72-\\u0A74\\u0A85-\\u0A8B\\u0A8D\\u0A8F-\\u0A91\\u0A93-\\u0AA8\\u0AAA-\\u0AB0\\u0AB2\\u0AB3\\u0AB5-\\u0AB9\\u0ABD\\u0AE0\\u0B05-\\u0B0C\\u0B0F\\u0B10\\u0B13-\\u0B28\\u0B2A-\\u0B30\\u0B32\\u0B33\\u0B36-\\u0B39\\u0B3D\\u0B5C\\u0B5D\\u0B5F-\\u0B61\\u0B85-\\u0B8A\\u0B8E-\\u0B90\\u0B92-\\u0B95\\u0B99\\u0B9A\\u0B9C\\u0B9E\\u0B9F\\u0BA3\\u0BA4\\u0BA8-\\u0BAA\\u0BAE-\\u0BB5\\u0BB7-\\u0BB9\\u0C05-\\u0C0C\\u0C0E-\\u0C10\\u0C12-\\u0C28\\u0C2A-\\u0C33\\u0C35-\\u0C39\\u0C60\\u0C61\\u0C85-\\u0C8C\\u0C8E-\\u0C90\\u0C92-\\u0CA8\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0CDE\\u0CE0\\u0CE1\\u0D05-\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D28\\u0D2A-\\u0D39\\u0D60\\u0D61\\u0E01-\\u0E2E\\u0E30\\u0E32\\u0E33\\u0E40-\\u0E45\\u0E81\\u0E82\\u0E84\\u0E87\\u0E88\\u0E8A\\u0E8D\\u0E94-\\u0E97\\u0E99-\\u0E9F\\u0EA1-\\u0EA3\\u0EA5\\u0EA7\\u0EAA\\u0EAB\\u0EAD\\u0EAE\\u0EB0\\u0EB2\\u0EB3\\u0EBD\\u0EC0-\\u0EC4\\u0F40-\\u0F47\\u0F49-\\u0F69\\u10A0-\\u10C5\\u10D0-\\u10F6\\u1100\\u1102\\u1103\\u1105-\\u1107\\u1109\\u110B\\u110C\\u110E-\\u1112\\u113C\\u113E\\u1140\\u114C\\u114E\\u1150\\u1154\\u1155\\u1159\\u115F-\\u1161\\u1163\\u1165\\u1167\\u1169\\u116D\\u116E\\u1172\\u1173\\u1175\\u119E\\u11A8\\u11AB\\u11AE\\u11AF\\u11B7\\u11B8\\u11BA\\u11BC-\\u11C2\\u11EB\\u11F0\\u11F9\\u1E00-\\u1E9B\\u1EA0-\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-\\u1FB4\\u1FB6-\\u1FBC\\u1FBE\\u1FC2-\\u1FC4\\u1FC6-\\u1FCC\\u1FD0-\\u1FD3\\u1FD6-\\u1FDB\\u1FE0-\\u1FEC\\u1FF2-\\u1FF4\\u1FF6-\\u1FFC\\u2126\\u212A\\u212B\\u212E\\u2180-\\u2182\\u3007\\u3021-\\u3029\\u3041-\\u3094\\u30A1-\\u30FA\\u3105-\\u312C\\u4E00-\\u9FA5\\uAC00-\\uD7A3';
39
+ const combiningChar = '\u0300-\u0345\u0360\u0361\u0483-\u0486\u0591-\u05A1\u05A3-\u05B9\u05BB-\u05BD\u05BF\u05C1\u05C2\u05C4\u064B-\u0652\u0670\u06D6-\u06E4\u06E7\u06E8\u06EA-\u06ED\u0901-\u0903\u093C\u093E-\u094D\u0951-\u0954\u0962\u0963\u0981-\u0983\u09BC\u09BE-\u09C4\u09C7\u09C8\u09CB-\u09CD\u09D7\u09E2\u09E3\u0A02\u0A3C\u0A3E-\u0A42\u0A47\u0A48\u0A4B-\u0A4D\u0A70\u0A71\u0A81-\u0A83\u0ABC\u0ABE-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0B01-\u0B03\u0B3C\u0B3E-\u0B43\u0B47\u0B48\u0B4B-\u0B4D\u0B56\u0B57\u0B82\u0B83\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0C01-\u0C03\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55\u0C56\u0C82\u0C83\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5\u0CD6\u0D02\u0D03\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EB9\u0EBB\u0EBC\u0EC8-\u0ECD\u0F18\u0F19\u0F35\u0F37\u0F39\u0F3E\u0F3F\u0F71-\u0F84\u0F86-\u0F8B\u0F90-\u0F95\u0F97\u0F99-\u0FAD\u0FB1-\u0FB7\u0FB9\u20D0-\u20DC\u20E1\u302A-\u302F\u3099\u309A';
40
+ const digit = '0-9\u0660-\u0669\u06F0-\u06F9\u0966-\u096F\u09E6-\u09EF\u0A66-\u0A6F\u0AE6-\u0AEF\u0B66-\u0B6F\u0BE7-\u0BEF\u0C66-\u0C6F\u0CE6-\u0CEF\u0D66-\u0D6F\u0E50-\u0E59\u0ED0-\u0ED9\u0F20-\u0F29';
41
+ const extender = '\xB7\u02D0\u02D1\u0387\u0640\u0E46\u0EC6\u3005\u3031-\u3035\u309D\u309E\u30FC-\u30FE';
42
+ const letter = 'A-Za-z\xC0-\xD6\xD8-\xF6\xF8-\u0131\u0134-\u013E\u0141-\u0148\u014A-\u017E\u0180-\u01C3\u01CD-\u01F0\u01F4\u01F5\u01FA-\u0217\u0250-\u02A8\u02BB-\u02C1\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2-\u03F3\u0401-\u040C\u040E-\u044F\u0451-\u045C\u045E-\u0481\u0490-\u04C4\u04C7\u04C8\u04CB\u04CC\u04D0-\u04EB\u04EE-\u04F5\u04F8\u04F9\u0531-\u0556\u0559\u0561-\u0586\u05D0-\u05EA\u05F0-\u05F2\u0621-\u063A\u0641-\u064A\u0671-\u06B7\u06BA-\u06BE\u06C0-\u06CE\u06D0-\u06D3\u06D5\u06E5\u06E6\u0905-\u0939\u093D\u0958-\u0961\u0985-\u098C\u098F\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09DC\u09DD\u09DF-\u09E1\u09F0\u09F1\u0A05-\u0A0A\u0A0F\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32\u0A33\u0A35\u0A36\u0A38\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8B\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2\u0AB3\u0AB5-\u0AB9\u0ABD\u0AE0\u0B05-\u0B0C\u0B0F\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32\u0B33\u0B36-\u0B39\u0B3D\u0B5C\u0B5D\u0B5F-\u0B61\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99\u0B9A\u0B9C\u0B9E\u0B9F\u0BA3\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C60\u0C61\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CDE\u0CE0\u0CE1\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D60\u0D61\u0E01-\u0E2E\u0E30\u0E32\u0E33\u0E40-\u0E45\u0E81\u0E82\u0E84\u0E87\u0E88\u0E8A\u0E8D\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3\u0EA5\u0EA7\u0EAA\u0EAB\u0EAD\u0EAE\u0EB0\u0EB2\u0EB3\u0EBD\u0EC0-\u0EC4\u0F40-\u0F47\u0F49-\u0F69\u10A0-\u10C5\u10D0-\u10F6\u1100\u1102\u1103\u1105-\u1107\u1109\u110B\u110C\u110E-\u1112\u113C\u113E\u1140\u114C\u114E\u1150\u1154\u1155\u1159\u115F-\u1161\u1163\u1165\u1167\u1169\u116D\u116E\u1172\u1173\u1175\u119E\u11A8\u11AB\u11AE\u11AF\u11B7\u11B8\u11BA\u11BC-\u11C2\u11EB\u11F0\u11F9\u1E00-\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FCC\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u212A\u212B\u212E\u2180-\u2182\u3007\u3021-\u3029\u3041-\u3094\u30A1-\u30FA\u3105-\u312C\u4E00-\u9FA5\uAC00-\uD7A3';
43
43
  const ncname = '[' + letter + '_][' + letter + digit + '\\.\\-_' + combiningChar + extender + ']*';
44
44
  return '((?:' + ncname + '\\:)?' + ncname + ')';
45
45
  })();
46
46
  const startTagOpen = new RegExp('^<' + qnameCapture);
47
47
  const startTagClose = /^\s*(\/?)>/;
48
- export const endTag = new RegExp('^<\\/' + qnameCapture + '[^>]*>');
48
+ export const endTag = new RegExp('^</' + qnameCapture + '[^>]*>');
49
49
  const doctype = /^<!DOCTYPE\s?[^>]+>/i;
50
50
 
51
51
  let IS_REGEX_CAPTURING_BROKEN = false;
@@ -144,9 +144,6 @@ export class HTMLParser {
144
144
  let pos = 0;
145
145
  let lastPos;
146
146
 
147
- // Helper to get remaining HTML from current position
148
- const remaining = () => fullHtml.slice(pos);
149
-
150
147
  // Helper to advance position
151
148
  const advance = (n) => { pos += n; };
152
149
 
@@ -165,22 +162,32 @@ export class HTMLParser {
165
162
  return { line, column };
166
163
  };
167
164
 
165
+ // Helper to safely extract substring when needed for regex operations
166
+ const sliceFromPos = (startPos, len) => {
167
+ const endPos = len !== undefined ? startPos + len : fullLength;
168
+ return fullHtml.slice(startPos, endPos);
169
+ };
170
+
168
171
  while (pos < fullLength) {
169
172
  lastPos = pos;
170
- const html = remaining();
173
+
171
174
  // Make sure we’re not in a `script` or `style` element
172
175
  if (!lastTag || !special.has(lastTag)) {
173
- let textEnd = html.indexOf('<');
174
- if (textEnd === 0) {
176
+ const textEnd = fullHtml.indexOf('<', pos);
177
+
178
+ if (textEnd === pos) {
179
+ // We found a tag at current position
180
+ const remaining = sliceFromPos(pos);
181
+
175
182
  // Comment
176
- if (/^<!--/.test(html)) {
177
- const commentEnd = html.indexOf('-->');
183
+ if (/^<!--/.test(remaining)) {
184
+ const commentEnd = fullHtml.indexOf('-->', pos + 4);
178
185
 
179
186
  if (commentEnd >= 0) {
180
187
  if (handler.comment) {
181
- await handler.comment(html.substring(4, commentEnd));
188
+ await handler.comment(fullHtml.substring(pos + 4, commentEnd));
182
189
  }
183
- advance(commentEnd + 3);
190
+ advance(commentEnd + 3 - pos);
184
191
  prevTag = '';
185
192
  prevAttrs = [];
186
193
  continue;
@@ -188,14 +195,14 @@ export class HTMLParser {
188
195
  }
189
196
 
190
197
  // https://web.archive.org/web/20241201212701/https://en.wikipedia.org/wiki/Conditional_comment#Downlevel-revealed_conditional_comment
191
- if (/^<!\[/.test(html)) {
192
- const conditionalEnd = html.indexOf(']>');
198
+ if (/^<!\[/.test(remaining)) {
199
+ const conditionalEnd = fullHtml.indexOf(']>', pos + 3);
193
200
 
194
201
  if (conditionalEnd >= 0) {
195
202
  if (handler.comment) {
196
- await handler.comment(html.substring(2, conditionalEnd + 1), true /* Non-standard */);
203
+ await handler.comment(fullHtml.substring(pos + 2, conditionalEnd + 1), true /* Non-standard */);
197
204
  }
198
- advance(conditionalEnd + 2);
205
+ advance(conditionalEnd + 2 - pos);
199
206
  prevTag = '';
200
207
  prevAttrs = [];
201
208
  continue;
@@ -203,8 +210,8 @@ export class HTMLParser {
203
210
  }
204
211
 
205
212
  // Doctype
206
- const doctypeMatch = html.match(doctype);
207
- if (doctypeMatch) {
213
+ if (doctype.test(remaining)) {
214
+ const doctypeMatch = remaining.match(doctype);
208
215
  if (handler.doctype) {
209
216
  handler.doctype(doctypeMatch[0]);
210
217
  }
@@ -215,8 +222,8 @@ export class HTMLParser {
215
222
  }
216
223
 
217
224
  // End tag
218
- const endTagMatch = html.match(endTag);
219
- if (endTagMatch) {
225
+ if (endTag.test(remaining)) {
226
+ const endTagMatch = remaining.match(endTag);
220
227
  advance(endTagMatch[0].length);
221
228
  await parseEndTag(endTagMatch[0], endTagMatch[1]);
222
229
  prevTag = '/' + endTagMatch[1].toLowerCase();
@@ -225,7 +232,7 @@ export class HTMLParser {
225
232
  }
226
233
 
227
234
  // Start tag
228
- const startTagMatch = parseStartTag(html);
235
+ const startTagMatch = parseStartTag(remaining, pos);
229
236
  if (startTagMatch) {
230
237
  advance(startTagMatch.advance);
231
238
  await handleStartTag(startTagMatch);
@@ -235,30 +242,30 @@ export class HTMLParser {
235
242
 
236
243
  // Treat `<` as text
237
244
  if (handler.continueOnParseError) {
238
- textEnd = html.indexOf('<', 1);
245
+ // Continue looking for next tag
239
246
  }
240
247
  }
241
248
 
242
249
  let text;
243
250
  if (textEnd >= 0) {
244
- text = html.substring(0, textEnd);
245
- advance(textEnd);
251
+ text = fullHtml.substring(pos, textEnd);
252
+ advance(textEnd - pos);
246
253
  } else {
247
- text = html;
248
- advance(html.length);
254
+ text = fullHtml.substring(pos);
255
+ advance(fullLength - pos);
249
256
  }
250
257
 
251
- // Next tag
252
- const nextHtml = remaining();
253
- let nextTagMatch = parseStartTag(nextHtml);
258
+ // Next tag for whitespace processing context
259
+ const remainingAfterText = sliceFromPos(pos);
260
+ let nextTagMatch = parseStartTag(remainingAfterText, pos);
254
261
  if (nextTagMatch) {
255
262
  nextTag = nextTagMatch.tagName;
256
263
  // Extract minimal attribute info for whitespace logic (just name/value pairs)
257
264
  nextAttrs = extractAttrInfo(nextTagMatch.attrs);
258
265
  } else {
259
- nextTagMatch = nextHtml.match(endTag);
260
- if (nextTagMatch) {
261
- nextTag = '/' + nextTagMatch[1];
266
+ const endTagMatch = remainingAfterText.match(endTag);
267
+ if (endTagMatch) {
268
+ nextTag = '/' + endTagMatch[1];
262
269
  nextAttrs = [];
263
270
  } else {
264
271
  nextTag = '';
@@ -274,10 +281,11 @@ export class HTMLParser {
274
281
  } else {
275
282
  const stackedTag = lastTag.toLowerCase();
276
283
  // Use pre-compiled regex for common tags (`script`, `style`, `noscript`) to avoid regex creation overhead
277
- const reStackedTag = preCompiledStackedTags[stackedTag] || reCache[stackedTag] || (reCache[stackedTag] = new RegExp('([\\s\\S]*?)</' + stackedTag + '[^>]*>', 'i'));
284
+ const reStackedTag = preCompiledStackedTags[stackedTag] || reCache[stackedTag] || (reCache[stackedTag] = new RegExp('([\\s\\S]*?)\\x3c/' + stackedTag + '[^>]*>', 'i'));
278
285
 
279
- const m = reStackedTag.exec(html);
280
- if (m) {
286
+ const remaining = sliceFromPos(pos);
287
+ const m = reStackedTag.exec(remaining);
288
+ if (m && m.index === 0) {
281
289
  let text = m[1];
282
290
  if (stackedTag !== 'script' && stackedTag !== 'style' && stackedTag !== 'noscript') {
283
291
  text = text
@@ -288,12 +296,12 @@ export class HTMLParser {
288
296
  await handler.chars(text);
289
297
  }
290
298
  // Advance HTML past the matched special tag content and its closing tag
291
- advance(m.index + m[0].length);
299
+ advance(m[0].length);
292
300
  await parseEndTag('</' + stackedTag + '>', stackedTag);
293
301
  } else {
294
302
  // No closing tag found; to avoid infinite loop, break similarly to previous behavior
295
- if (handler.continueOnParseError && handler.chars && html) {
296
- await handler.chars(html[0], prevTag, '', prevAttrs, []);
303
+ if (handler.continueOnParseError && handler.chars && pos < fullLength) {
304
+ await handler.chars(fullHtml[pos], prevTag, '', prevAttrs, []);
297
305
  advance(1);
298
306
  } else {
299
307
  break;
@@ -313,7 +321,7 @@ export class HTMLParser {
313
321
  continue;
314
322
  }
315
323
  const loc = getLineColumn(pos);
316
- // Include some context before the error position so the snippet contains the offending markup plus preceding characters (e.g., invalid<tag)
324
+ // Include some context before the error position so the snippet contains the offending markup plus preceding characters (e.g., `invalid<tag`)
317
325
  const CONTEXT_BEFORE = 50;
318
326
  const startPos = Math.max(0, pos - CONTEXT_BEFORE);
319
327
  const snippet = fullHtml.slice(startPos, startPos + 200).replace(/\n/g, ' ');
@@ -345,8 +353,8 @@ export class HTMLParser {
345
353
  }).filter(attr => attr.name); // Filter out invalid entries
346
354
  }
347
355
 
348
- function parseStartTag(input) {
349
- const start = input.match(startTagOpen);
356
+ function parseStartTag(remaining, startPos) {
357
+ const start = remaining.match(startTagOpen);
350
358
  if (start) {
351
359
  const match = {
352
360
  tagName: start[1],
@@ -354,7 +362,7 @@ export class HTMLParser {
354
362
  advance: 0
355
363
  };
356
364
  let consumed = start[0].length;
357
- input = input.slice(consumed);
365
+ let currentPos = startPos + consumed;
358
366
  let end, attr;
359
367
 
360
368
  // Safety limit: Max length of input to check for attributes
@@ -363,16 +371,20 @@ export class HTMLParser {
363
371
 
364
372
  while (true) {
365
373
  // Check for closing tag first
366
- end = input.match(startTagClose);
374
+ const remainingForEnd = sliceFromPos(currentPos);
375
+ end = remainingForEnd.match(startTagClose);
367
376
  if (end) {
368
377
  break;
369
378
  }
370
379
 
371
380
  // Limit the input length we pass to the regex to prevent catastrophic backtracking
372
- const isLimited = input.length > MAX_ATTR_PARSE_LENGTH;
373
- const searchInput = isLimited ? input.slice(0, MAX_ATTR_PARSE_LENGTH) : input;
381
+ const remainingLen = fullLength - currentPos;
382
+ const isLimited = remainingLen > MAX_ATTR_PARSE_LENGTH;
383
+ const extractEndPos = isLimited ? currentPos + MAX_ATTR_PARSE_LENGTH : fullLength;
374
384
 
375
- attr = searchInput.match(attribute);
385
+ // Create a temporary substring only for attribute parsing (this is limited and necessary for regex)
386
+ const searchStr = fullHtml.substring(currentPos, extractEndPos);
387
+ attr = searchStr.match(attribute);
376
388
 
377
389
  // If we limited the input and got a match, check if the value might be truncated
378
390
  if (attr && isLimited) {
@@ -381,32 +393,31 @@ export class HTMLParser {
381
393
  // If the match ends near the limit, the value might be truncated
382
394
  if (attrEnd > MAX_ATTR_PARSE_LENGTH - 100) {
383
395
  // Manually extract this attribute to handle potentially huge value
384
- const manualMatch = input.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
396
+ const manualMatch = searchStr.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
385
397
  if (manualMatch) {
386
- const quoteChar = input[manualMatch[0].length];
398
+ const quoteChar = searchStr[manualMatch[0].length];
387
399
  if (quoteChar === '"' || quoteChar === "'") {
388
- const closeQuote = input.indexOf(quoteChar, manualMatch[0].length + 1);
400
+ const closeQuote = searchStr.indexOf(quoteChar, manualMatch[0].length + 1);
389
401
  if (closeQuote !== -1) {
390
- const fullAttr = input.slice(0, closeQuote + 1);
402
+ const fullAttrLen = closeQuote + 1;
391
403
  const numCustomParts = handler.customAttrSurround
392
404
  ? handler.customAttrSurround.length * NCP
393
405
  : 0;
394
406
  const baseIndex = 1 + numCustomParts;
395
407
 
396
408
  attr = [];
397
- attr[0] = fullAttr;
409
+ attr[0] = searchStr.substring(0, fullAttrLen);
398
410
  attr[baseIndex] = manualMatch[1]; // Attribute name
399
- attr[baseIndex + 1] = '='; // `customAssign` (falls back to “=” for huge attributes)
400
- const value = input.slice(manualMatch[0].length + 1, closeQuote);
411
+ attr[baseIndex + 1] = '='; // `customAssign` (falls back to "=" for huge attributes)
412
+ const value = searchStr.substring(manualMatch[0].length + 1, closeQuote);
401
413
  // Place value at correct index based on quote type
402
414
  if (quoteChar === '"') {
403
415
  attr[baseIndex + 2] = value; // Double-quoted value
404
416
  } else {
405
417
  attr[baseIndex + 3] = value; // Single-quoted value
406
418
  }
407
- const attrLen = fullAttr.length;
408
- input = input.slice(attrLen);
409
- consumed += attrLen;
419
+ currentPos += fullAttrLen;
420
+ consumed += fullAttrLen;
410
421
  match.attrs.push(attr);
411
422
  continue;
412
423
  }
@@ -419,18 +430,55 @@ export class HTMLParser {
419
430
  }
420
431
  }
421
432
 
433
+ if (!attr && isLimited) {
434
+ // If we limited the input and got no match, try manual extraction
435
+ // This handles cases where quoted attributes exceed `MAX_ATTR_PARSE_LENGTH`
436
+ const manualMatch = searchStr.match(/^\s*([^\s"'<>/=]+)\s*=\s*/);
437
+ if (manualMatch) {
438
+ const quoteChar = searchStr[manualMatch[0].length];
439
+ if (quoteChar === '"' || quoteChar === "'") {
440
+ // Search in the full HTML (not limited substring) for closing quote
441
+ const closeQuote = fullHtml.indexOf(quoteChar, currentPos + manualMatch[0].length + 1);
442
+ if (closeQuote !== -1) {
443
+ const fullAttrLen = closeQuote - currentPos + 1;
444
+ const numCustomParts = handler.customAttrSurround
445
+ ? handler.customAttrSurround.length * NCP
446
+ : 0;
447
+ const baseIndex = 1 + numCustomParts;
448
+
449
+ attr = [];
450
+ attr[0] = fullHtml.substring(currentPos, closeQuote + 1);
451
+ attr[baseIndex] = manualMatch[1]; // Attribute name
452
+ attr[baseIndex + 1] = '='; // customAssign
453
+ const value = fullHtml.substring(currentPos + manualMatch[0].length + 1, closeQuote);
454
+ // Place value at correct index based on quote type
455
+ if (quoteChar === '"') {
456
+ attr[baseIndex + 2] = value; // Double-quoted value
457
+ } else {
458
+ attr[baseIndex + 3] = value; // Single-quoted value
459
+ }
460
+ currentPos += fullAttrLen;
461
+ consumed += fullAttrLen;
462
+ match.attrs.push(attr);
463
+ continue;
464
+ }
465
+ }
466
+ }
467
+ }
468
+
422
469
  if (!attr) {
423
470
  break;
424
471
  }
425
472
 
426
473
  const attrLen = attr[0].length;
427
- input = input.slice(attrLen);
474
+ currentPos += attrLen;
428
475
  consumed += attrLen;
429
476
  match.attrs.push(attr);
430
477
  }
431
478
 
432
479
  // Check for closing tag
433
- end = input.match(startTagClose);
480
+ const remainingForClose = sliceFromPos(currentPos);
481
+ end = remainingForClose.match(startTagClose);
434
482
  if (end) {
435
483
  match.unarySlash = end[1];
436
484
  consumed += end[0].length;
@@ -627,11 +675,11 @@ export class HTMLParser {
627
675
  if (handler.end) {
628
676
  handler.end(tagName, [], false);
629
677
  }
630
- } else if (tagName.toLowerCase() === 'br') {
678
+ } else if (tagName && tagName.toLowerCase() === 'br') {
631
679
  if (handler.start) {
632
680
  await handler.start(tagName, [], true, '');
633
681
  }
634
- } else if (tagName.toLowerCase() === 'p') {
682
+ } else if (tagName && tagName.toLowerCase() === 'p') {
635
683
  if (handler.start) {
636
684
  await handler.start(tagName, [], false, '', true);
637
685
  }
@@ -1,7 +1,7 @@
1
1
  // Imports
2
2
 
3
3
  import RelateURL from 'relateurl';
4
- import { stableStringify, identity, identityAsync, replaceAsync } from './utils.js';
4
+ import { LRU, stableStringify, identity, identityAsync, replaceAsync } from './utils.js';
5
5
  import { RE_TRAILING_SEMICOLON } from './constants.js';
6
6
  import { canCollapseWhitespace, canTrimWhitespace } from './whitespace.js';
7
7
  import { wrapCSS, unwrapCSS } from './content.js';
@@ -32,10 +32,9 @@ function shouldMinifyInnerHTML(options) {
32
32
  * @param {Function} deps.getSwc - Function to lazily load @swc/core
33
33
  * @param {LRU} deps.cssMinifyCache - CSS minification cache
34
34
  * @param {LRU} deps.jsMinifyCache - JS minification cache
35
- * @param {LRU} deps.urlMinifyCache - URL minification cache
36
35
  * @returns {MinifierOptions} Normalized options with defaults applied
37
36
  */
38
- const processOptions = (inputOptions, { getLightningCSS, getTerser, getSwc, cssMinifyCache, jsMinifyCache, urlMinifyCache } = {}) => {
37
+ const processOptions = (inputOptions, { getLightningCSS, getTerser, getSwc, cssMinifyCache, jsMinifyCache } = {}) => {
39
38
  const options = {
40
39
  name: function (name) {
41
40
  return name.toLowerCase();
@@ -329,7 +328,7 @@ const processOptions = (inputOptions, { getLightningCSS, getTerser, getSwc, cssM
329
328
  const relateUrlInstance = new RelateURL(relateUrlOptions.site || '', relateUrlOptions);
330
329
 
331
330
  // Create instance-specific cache (results depend on site configuration)
332
- const instanceCache = urlMinifyCache ? new (urlMinifyCache.constructor)(500) : null;
331
+ const instanceCache = new LRU(500);
333
332
 
334
333
  options.minifyURLs = function (text) {
335
334
  // Fast-path: Skip if text doesn’t look like a URL that needs processing
@@ -338,20 +337,15 @@ const processOptions = (inputOptions, { getLightningCSS, getTerser, getSwc, cssM
338
337
  return text;
339
338
  }
340
339
 
341
- // Check instance-specific cache
342
- if (instanceCache) {
343
- const cached = instanceCache.get(text);
344
- if (cached !== undefined) {
345
- return cached;
346
- }
340
+ // Check cache
341
+ const cached = instanceCache.get(text);
342
+ if (cached !== undefined) {
343
+ return cached;
347
344
  }
348
345
 
349
346
  try {
350
347
  const result = relateUrlInstance.relate(text);
351
- // Cache successful results
352
- if (instanceCache) {
353
- instanceCache.set(text, result);
354
- }
348
+ instanceCache.set(text, result);
355
349
  return result;
356
350
  } catch (err) {
357
351
  // Don’t cache errors