npm - html-minifier-next - Versions diffs - 4.9.1 → 4.10.0 - Mend

html-minifier-next 4.9.1 → 4.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/README.md +25 -19
package/dist/htmlminifier.cjs +191 -94
package/dist/htmlminifier.esm.bundle.js +191 -94
package/dist/types/htmlminifier.d.ts.map +1 -1
package/dist/types/htmlparser.d.ts.map +1 -1
package/dist/types/tokenchain.d.ts +1 -0
package/dist/types/tokenchain.d.ts.map +1 -1
package/package.json +4 -4
package/src/htmlminifier.js +5 -1
package/src/htmlparser.js +89 -50
package/src/tokenchain.js +77 -34

package/src/htmlparser.js CHANGED Viewed

@@ -15,8 +15,6 @@
  * });
  */
-import { replaceAsync } from './utils.js';
 class CaseInsensitiveSet extends Set {
   has(str) {
     return super.has(str.toLowerCase());
@@ -84,6 +82,9 @@ const preCompiledStackedTags = {
   'noscript': /([\s\S]*?)<\/noscript[^>]*>/i
 };
+// Cache for compiled attribute regexes per handler configuration
+const attrRegexCache = new WeakMap();
 function attrForHandler(handler) {
   let pattern = singleAttrIdentifier.source +
     '(?:\\s*(' + joinSingleAttrAssigns(handler) + ')' +
@@ -121,22 +122,47 @@ export class HTMLParser {
   }
   async parse() {
-    let html = this.html;
     const handler = this.handler;
+    const fullHtml = this.html;
+    const fullLength = fullHtml.length;
     const stack = []; let lastTag;
-    const attribute = attrForHandler(handler);
-    let last, prevTag = undefined, nextTag = undefined;
-    // Track position for better error messages
-    let position = 0;
-    const getLineColumn = (pos) => {
-      const lines = this.html.slice(0, pos).split('\n');
-      return { line: lines.length, column: lines[lines.length - 1].length + 1 };
+    // Use cached attribute regex if available
+    let attribute = attrRegexCache.get(handler);
+    if (!attribute) {
+      attribute = attrForHandler(handler);
+      attrRegexCache.set(handler, attribute);
+    }
+    let prevTag = undefined, nextTag = undefined;
+    // Index-based parsing
+    let pos = 0;
+    let lastPos;
+    // Helper to get remaining HTML from current position
+    const remaining = () => fullHtml.slice(pos);
+    // Helper to advance position
+    const advance = (n) => { pos += n; };
+    // Lazy line/column calculation—only compute on actual errors
+    const getLineColumn = (position) => {
+      let line = 1;
+      let column = 1;
+      for (let i = 0; i < position; i++) {
+        if (fullHtml[i] === '\n') {
+          line++;
+          column = 1;
+        } else {
+          column++;
+        }
+      }
+      return { line, column };
     };
-    while (html) {
-      last = html;
+    while (pos < fullLength) {
+      lastPos = pos;
+      const html = remaining();
       // Make sure we’re not in a `script` or `style` element
       if (!lastTag || !special.has(lastTag)) {
         let textEnd = html.indexOf('<');
@@ -149,7 +175,7 @@ export class HTMLParser {
               if (handler.comment) {
                 await handler.comment(html.substring(4, commentEnd));
               }
-              html = html.substring(commentEnd + 3);
+              advance(commentEnd + 3);
               prevTag = '';
               continue;
             }
@@ -163,7 +189,7 @@ export class HTMLParser {
               if (handler.comment) {
                 await handler.comment(html.substring(2, conditionalEnd + 1), true /* non-standard */);
               }
-              html = html.substring(conditionalEnd + 2);
+              advance(conditionalEnd + 2);
               prevTag = '';
               continue;
             }
@@ -175,7 +201,7 @@ export class HTMLParser {
             if (handler.doctype) {
               handler.doctype(doctypeMatch[0]);
             }
-            html = html.substring(doctypeMatch[0].length);
+            advance(doctypeMatch[0].length);
             prevTag = '';
             continue;
           }
@@ -183,8 +209,8 @@ export class HTMLParser {
           // End tag
           const endTagMatch = html.match(endTag);
           if (endTagMatch) {
-            html = html.substring(endTagMatch[0].length);
-            await replaceAsync(endTagMatch[0], endTag, parseEndTag);
+            advance(endTagMatch[0].length);
+            await parseEndTag(endTagMatch[0], endTagMatch[1]);
             prevTag = '/' + endTagMatch[1].toLowerCase();
             continue;
           }
@@ -192,7 +218,7 @@ export class HTMLParser {
           // Start tag
           const startTagMatch = parseStartTag(html);
           if (startTagMatch) {
-            html = startTagMatch.rest;
+            advance(startTagMatch.advance);
             await handleStartTag(startTagMatch);
             prevTag = startTagMatch.tagName.toLowerCase();
             continue;
@@ -207,18 +233,19 @@ export class HTMLParser {
         let text;
         if (textEnd >= 0) {
           text = html.substring(0, textEnd);
-          html = html.substring(textEnd);
+          advance(textEnd);
         } else {
           text = html;
-          html = '';
+          advance(html.length);
         }
         // Next tag
-        let nextTagMatch = parseStartTag(html);
+        const nextHtml = remaining();
+        let nextTagMatch = parseStartTag(nextHtml);
         if (nextTagMatch) {
           nextTag = nextTagMatch.tagName;
         } else {
-          nextTagMatch = html.match(endTag);
+          nextTagMatch = nextHtml.match(endTag);
           if (nextTagMatch) {
             nextTag = '/' + nextTagMatch[1];
           } else {
@@ -235,45 +262,50 @@ export class HTMLParser {
         // Use pre-compiled regex for common tags (`script`, `style`, `noscript`) to avoid regex creation overhead
         const reStackedTag = preCompiledStackedTags[stackedTag] || reCache[stackedTag] || (reCache[stackedTag] = new RegExp('([\\s\\S]*?)</' + stackedTag + '[^>]*>', 'i'));
-        html = await replaceAsync(html, reStackedTag, async (_, text) => {
+        const m = reStackedTag.exec(html);
+        if (m) {
+          let text = m[1];
           if (stackedTag !== 'script' && stackedTag !== 'style' && stackedTag !== 'noscript') {
             text = text
               .replace(/<!--([\s\S]*?)-->/g, '$1')
               .replace(/<!\[CDATA\[([\s\S]*?)]]>/g, '$1');
           }
           if (handler.chars) {
             await handler.chars(text);
           }
-          return '';
-        });
-        await parseEndTag('</' + stackedTag + '>', stackedTag);
+          // Advance HTML past the matched special tag content and its closing tag
+          advance(m.index + m[0].length);
+          await parseEndTag('</' + stackedTag + '>', stackedTag);
+        } else {
+          // No closing tag found; to avoid infinite loop, break similarly to previous behavior
+          if (handler.continueOnParseError && handler.chars && html) {
+            await handler.chars(html[0], prevTag, '');
+            advance(1);
+          } else {
+            break;
+          }
+        }
       }
-      if (html === last) {
+      if (pos === lastPos) {
         if (handler.continueOnParseError) {
           // Skip the problematic character and continue
           if (handler.chars) {
-            await handler.chars(html[0], prevTag, '');
+            await handler.chars(fullHtml[pos], prevTag, '');
           }
-          html = html.substring(1);
-          position++;
+          advance(1);
           prevTag = '';
           continue;
         }
-        const loc = getLineColumn(position);
-        // Include some context before the error position so the snippet contains
-        // the offending markup plus preceding characters (e.g. "invalid<tag").
+        const loc = getLineColumn(pos);
+        // Include some context before the error position so the snippet contains the offending markup plus preceding characters (e.g., “invalid<tag”)
         const CONTEXT_BEFORE = 50;
-        const startPos = Math.max(0, position - CONTEXT_BEFORE);
-        const snippet = this.html.slice(startPos, startPos + 200).replace(/\n/g, ' ');
+        const startPos = Math.max(0, pos - CONTEXT_BEFORE);
+        const snippet = fullHtml.slice(startPos, startPos + 200).replace(/\n/g, ' ');
         throw new Error(
-          `Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${this.html.length > startPos + 200 ? '…' : ''}`
+          `Parse error at line ${loc.line}, column ${loc.column}:\n${snippet}${fullHtml.length > startPos + 200 ? '…' : ''}`
         );
       }
-      position = this.html.length - html.length;
     }
     if (!handler.partialMarkup) {
@@ -286,9 +318,11 @@ export class HTMLParser {
       if (start) {
         const match = {
           tagName: start[1],
-          attrs: []
+          attrs: [],
+          advance: 0
         };
-        input = input.slice(start[0].length);
+        let consumed = start[0].length;
+        input = input.slice(consumed);
         let end, attr;
         // Safety limit: max length of input to check for attributes
@@ -338,7 +372,9 @@ export class HTMLParser {
                     } else {
                       attr[baseIndex + 3] = value; // Single-quoted value
                     }
-                    input = input.slice(fullAttr.length);
+                    const attrLen = fullAttr.length;
+                    input = input.slice(attrLen);
+                    consumed += attrLen;
                     match.attrs.push(attr);
                     continue;
                   }
@@ -355,7 +391,9 @@ export class HTMLParser {
             break;
           }
-          input = input.slice(attr[0].length);
+          const attrLen = attr[0].length;
+          input = input.slice(attrLen);
+          consumed += attrLen;
           match.attrs.push(attr);
         }
@@ -363,7 +401,8 @@ export class HTMLParser {
         end = input.match(startTagClose);
         if (end) {
           match.unarySlash = end[1];
-          match.rest = input.slice(end[0].length);
+          consumed += end[0].length;
+          match.advance = consumed;
           return match;
         }
       }
@@ -373,7 +412,7 @@ export class HTMLParser {
       let pos;
       const needle = tagName.toLowerCase();
       for (pos = stack.length - 1; pos >= 0; pos--) {
-        const currentTag = stack[pos].tag.toLowerCase();
+        const currentTag = stack[pos].lowerTag;
         if (currentTag === needle) {
           return pos;
         }
@@ -427,7 +466,7 @@ export class HTMLParser {
         }
         if (tagName === 'col' && findTag('colgroup') < 0) {
           lastTag = 'colgroup';
-          stack.push({ tag: lastTag, attrs: [] });
+          stack.push({ tag: lastTag, lowerTag: 'colgroup', attrs: [] });
           if (handler.start) {
             await handler.start(lastTag, [], false, '');
           }
@@ -506,7 +545,7 @@ export class HTMLParser {
       });
       if (!unary) {
-        stack.push({ tag: tagName, attrs });
+        stack.push({ tag: tagName, lowerTag: tagName.toLowerCase(), attrs });
         lastTag = tagName;
         unarySlash = '';
       }
@@ -520,7 +559,7 @@ export class HTMLParser {
       let pos;
       const needle = tagName.toLowerCase();
       for (pos = stack.length - 1; pos >= 0; pos--) {
-        if (stack[pos].tag.toLowerCase() === needle) {
+        if (stack[pos].lowerTag === needle) {
           break;
         }
       }

package/src/tokenchain.js CHANGED Viewed

@@ -1,21 +1,40 @@
 class Sorter {
   sort(tokens, fromIndex = 0) {
     for (let i = 0, len = this.keys.length; i < len; i++) {
-      const key = this.keys[i];
-      const token = key.slice(1);
+      const token = this.keys[i];
-      let index = tokens.indexOf(token, fromIndex);
+      // Build position map for this token to avoid repeated `indexOf`
+      const positions = [];
+      for (let j = fromIndex; j < tokens.length; j++) {
+        if (tokens[j] === token) {
+          positions.push(j);
+        }
+      }
+      if (positions.length > 0) {
+        // Build new array with tokens in sorted order instead of splicing
+        const result = [];
-      if (index !== -1) {
-        do {
-          if (index !== fromIndex) {
-            tokens.splice(index, 1);
-            tokens.splice(fromIndex, 0, token);
+        // Add all instances of the current token first
+        for (let j = 0; j < positions.length; j++) {
+          result.push(token);
+        }
+        // Add other tokens, skipping positions where current token was
+        const posSet = new Set(positions);
+        for (let j = fromIndex; j < tokens.length; j++) {
+          if (!posSet.has(j)) {
+            result.push(tokens[j]);
           }
-          fromIndex++;
-        } while ((index = tokens.indexOf(token, fromIndex)) !== -1);
+        }
+        // Copy sorted portion back to tokens array
+        for (let j = 0; j < result.length; j++) {
+          tokens[fromIndex + j] = result[j];
+        }
-        return this[key].sort(tokens, fromIndex);
+        const newFromIndex = fromIndex + positions.length;
+        return this.sorterMap.get(token).sort(tokens, newFromIndex);
       }
     }
     return tokens;
@@ -23,46 +42,70 @@ class Sorter {
 }
 class TokenChain {
+  constructor() {
+    // Use Map instead of object properties for better performance
+    this.map = new Map();
+  }
   add(tokens) {
     tokens.forEach((token) => {
-      const key = '$' + token;
-      if (!this[key]) {
-        this[key] = [];
-        this[key].processed = 0;
+      if (!this.map.has(token)) {
+        this.map.set(token, { arrays: [], processed: 0 });
       }
-      this[key].push(tokens);
+      this.map.get(token).arrays.push(tokens);
     });
   }
   createSorter() {
     const sorter = new Sorter();
+    sorter.sorterMap = new Map();
+    // Convert Map entries to array and sort
+    const entries = Array.from(this.map.entries()).sort((a, b) => {
+      const m = a[1].arrays.length;
+      const n = b[1].arrays.length;
+      // Sort by length descending (larger first)
+      const lengthDiff = n - m;
+      if (lengthDiff !== 0) return lengthDiff;
+      // If lengths equal, sort by key ascending
+      return a[0].localeCompare(b[0]);
+    });
+    sorter.keys = [];
-    sorter.keys = Object.keys(this).sort((j, k) => {
-      const m = this[j].length;
-      const n = this[k].length;
-      return m < n ? 1 : m > n ? -1 : j < k ? -1 : j > k ? 1 : 0;
-    }).filter((key) => {
-      if (this[key].processed < this[key].length) {
-        const token = key.slice(1);
+    entries.forEach(([token, data]) => {
+      if (data.processed < data.arrays.length) {
         const chain = new TokenChain();
-        this[key].forEach((tokens) => {
-          let index;
-          while ((index = tokens.indexOf(token)) !== -1) {
-            tokens.splice(index, 1);
+        data.arrays.forEach((tokens) => {
+          // Build new array without the current token instead of splicing
+          const filtered = [];
+          for (let i = 0; i < tokens.length; i++) {
+            if (tokens[i] !== token) {
+              filtered.push(tokens[i]);
+            }
           }
-          tokens.forEach((token) => {
-            this['$' + token].processed++;
+          // Mark remaining tokens as processed
+          filtered.forEach((t) => {
+            const tData = this.map.get(t);
+            if (tData) {
+              tData.processed++;
+            }
           });
-          chain.add(tokens.slice(0));
+          if (filtered.length > 0) {
+            chain.add(filtered);
+          }
         });
-        sorter[key] = chain.createSorter();
-        return true;
+        sorter.keys.push(token);
+        sorter.sorterMap.set(token, chain.createSorter());
       }
-      return false;
     });
     return sorter;
   }
 }
-export default TokenChain;
+export default TokenChain;