npm - entities - Versions diffs - 6.0.1 → 8.0.0 - Mend

entities 6.0.1 → 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (125) hide show

package/dist/decode-codepoint.d.ts +8 -0
package/dist/decode-codepoint.d.ts.map +1 -0
package/dist/decode-codepoint.js +46 -0
package/dist/decode-codepoint.js.map +1 -0
package/dist/{esm/decode.d.ts → decode.d.ts} +11 -26
package/dist/decode.d.ts.map +1 -0
package/dist/{esm/decode.js → decode.js} +130 -83
package/dist/decode.js.map +1 -0
package/dist/{commonjs/encode.d.ts → encode.d.ts} +2 -0
package/dist/encode.d.ts.map +1 -0
package/dist/encode.js +90 -0
package/dist/encode.js.map +1 -0
package/dist/{esm/escape.d.ts → escape.d.ts} +13 -8
package/dist/escape.d.ts.map +1 -0
package/dist/{esm/escape.js → escape.js} +49 -34
package/dist/escape.js.map +1 -0
package/dist/generated/decode-data-html.d.ts +3 -0
package/dist/generated/decode-data-html.d.ts.map +1 -0
package/dist/generated/decode-data-html.js +5 -0
package/dist/generated/decode-data-html.js.map +1 -0
package/dist/generated/decode-data-xml.d.ts +3 -0
package/dist/generated/decode-data-xml.d.ts.map +1 -0
package/dist/generated/decode-data-xml.js +5 -0
package/dist/generated/decode-data-xml.js.map +1 -0
package/dist/generated/encode-html.d.ts +5 -0
package/dist/generated/encode-html.d.ts.map +1 -0
package/dist/generated/encode-html.js +12 -0
package/dist/generated/encode-html.js.map +1 -0
package/dist/{commonjs/index.d.ts → index.d.ts} +10 -17
package/dist/index.d.ts.map +1 -0
package/dist/{esm/index.js → index.js} +9 -25
package/dist/index.js.map +1 -0
package/dist/internal/bin-trie-flags.d.ts +17 -0
package/dist/internal/bin-trie-flags.d.ts.map +1 -0
package/dist/internal/bin-trie-flags.js +18 -0
package/dist/internal/bin-trie-flags.js.map +1 -0
package/dist/internal/decode-shared.d.ts +7 -0
package/dist/internal/decode-shared.d.ts.map +1 -0
package/dist/internal/decode-shared.js +17 -0
package/dist/internal/decode-shared.js.map +1 -0
package/dist/internal/encode-shared.d.ts +33 -0
package/dist/internal/encode-shared.d.ts.map +1 -0
package/dist/internal/encode-shared.js +93 -0
package/dist/internal/encode-shared.js.map +1 -0
package/package.json +38 -73
package/readme.md +36 -27
package/src/decode-codepoint.ts +1 -32
package/src/decode.ts +127 -76
package/src/encode.ts +49 -31
package/src/escape.ts +50 -38
package/src/generated/decode-data-html.ts +4 -5
package/src/generated/decode-data-xml.ts +4 -5
package/src/generated/encode-html.ts +15 -14
package/src/index.ts +23 -49
package/src/internal/bin-trie-flags.ts +16 -0
package/src/internal/decode-shared.ts +18 -0
package/src/internal/encode-shared.ts +123 -0
package/decode.d.ts +0 -1
package/decode.js +0 -3
package/dist/commonjs/decode-codepoint.d.ts +0 -19
package/dist/commonjs/decode-codepoint.d.ts.map +0 -1
package/dist/commonjs/decode-codepoint.js +0 -77
package/dist/commonjs/decode-codepoint.js.map +0 -1
package/dist/commonjs/decode.d.ts +0 -209
package/dist/commonjs/decode.d.ts.map +0 -1
package/dist/commonjs/decode.js +0 -511
package/dist/commonjs/decode.js.map +0 -1
package/dist/commonjs/encode.d.ts.map +0 -1
package/dist/commonjs/encode.js +0 -73
package/dist/commonjs/encode.js.map +0 -1
package/dist/commonjs/escape.d.ts +0 -43
package/dist/commonjs/escape.d.ts.map +0 -1
package/dist/commonjs/escape.js +0 -121
package/dist/commonjs/escape.js.map +0 -1
package/dist/commonjs/generated/decode-data-html.d.ts +0 -2
package/dist/commonjs/generated/decode-data-html.d.ts.map +0 -1
package/dist/commonjs/generated/decode-data-html.js +0 -10
package/dist/commonjs/generated/decode-data-html.js.map +0 -1
package/dist/commonjs/generated/decode-data-xml.d.ts +0 -2
package/dist/commonjs/generated/decode-data-xml.d.ts.map +0 -1
package/dist/commonjs/generated/decode-data-xml.js +0 -10
package/dist/commonjs/generated/decode-data-xml.js.map +0 -1
package/dist/commonjs/generated/encode-html.d.ts +0 -8
package/dist/commonjs/generated/encode-html.d.ts.map +0 -1
package/dist/commonjs/generated/encode-html.js +0 -13
package/dist/commonjs/generated/encode-html.js.map +0 -1
package/dist/commonjs/index.d.ts.map +0 -1
package/dist/commonjs/index.js +0 -131
package/dist/commonjs/index.js.map +0 -1
package/dist/commonjs/package.json +0 -3
package/dist/esm/decode-codepoint.d.ts +0 -19
package/dist/esm/decode-codepoint.d.ts.map +0 -1
package/dist/esm/decode-codepoint.js +0 -72
package/dist/esm/decode-codepoint.js.map +0 -1
package/dist/esm/decode.d.ts.map +0 -1
package/dist/esm/decode.js.map +0 -1
package/dist/esm/encode.d.ts +0 -22
package/dist/esm/encode.d.ts.map +0 -1
package/dist/esm/encode.js +0 -69
package/dist/esm/encode.js.map +0 -1
package/dist/esm/escape.d.ts.map +0 -1
package/dist/esm/escape.js.map +0 -1
package/dist/esm/generated/decode-data-html.d.ts +0 -2
package/dist/esm/generated/decode-data-html.d.ts.map +0 -1
package/dist/esm/generated/decode-data-html.js +0 -7
package/dist/esm/generated/decode-data-html.js.map +0 -1
package/dist/esm/generated/decode-data-xml.d.ts +0 -2
package/dist/esm/generated/decode-data-xml.d.ts.map +0 -1
package/dist/esm/generated/decode-data-xml.js +0 -7
package/dist/esm/generated/decode-data-xml.js.map +0 -1
package/dist/esm/generated/encode-html.d.ts +0 -8
package/dist/esm/generated/encode-html.d.ts.map +0 -1
package/dist/esm/generated/encode-html.js +0 -10
package/dist/esm/generated/encode-html.js.map +0 -1
package/dist/esm/index.d.ts +0 -96
package/dist/esm/index.d.ts.map +0 -1
package/dist/esm/index.js.map +0 -1
package/dist/esm/package.json +0 -3
package/escape.d.ts +0 -1
package/escape.js +0 -3
package/src/decode.spec.ts +0 -320
package/src/encode.spec.ts +0 -78
package/src/escape.spec.ts +0 -14
package/src/generated/.eslintrc.json +0 -10
package/src/index.spec.ts +0 -125

package/src/decode.ts CHANGED Viewed

@@ -1,6 +1,7 @@
+import { replaceCodePoint } from "./decode-codepoint.js";
 import { htmlDecodeTree } from "./generated/decode-data-html.js";
 import { xmlDecodeTree } from "./generated/decode-data-xml.js";
-import { replaceCodePoint, fromCodePoint } from "./decode-codepoint.js";
+import { BinTrieFlags } from "./internal/bin-trie-flags.js";
 const enum CharCodes {
     NUM = 35, // "#"
@@ -20,12 +21,6 @@ const enum CharCodes {
 /** Bit that needs to be set to convert an upper case ASCII character to lower case */
 const TO_LOWER_BIT = 0b10_0000;
-export enum BinTrieFlags {
-    VALUE_LENGTH = 0b1100_0000_0000_0000,
-    BRANCH_LENGTH = 0b0011_1111_1000_0000,
-    JUMP_TABLE = 0b0000_0000_0111_1111,
-}
 function isNumber(code: number): boolean {
     return code >= CharCodes.ZERO && code <= CharCodes.NINE;
 }
@@ -50,6 +45,7 @@ function isAsciiAlphaNumeric(code: number): boolean {
  *
  * Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error.
  * See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
+ * @param code Code point to decode.
  */
 function isEntityInAttributeInvalidEnd(code: number): boolean {
     return code === CharCodes.EQUALS || isAsciiAlphaNumeric(code);
@@ -63,6 +59,9 @@ const enum EntityDecoderState {
     NamedEntity,
 }
+/**
+ * Decoding mode for named entities.
+ */
 export enum DecodingMode {
     /** Entities in text nodes that can end with any character. */
     Legacy = 0,
@@ -89,13 +88,13 @@ export interface EntityErrorProducer {
 export class EntityDecoder {
     constructor(
         /** The tree used to decode entities. */
+        // biome-ignore lint/correctness/noUnusedPrivateClassMembers: False positive
         private readonly decodeTree: Uint16Array,
         /**
          * The function that is called when a codepoint is decoded.
          *
          * For multi-byte named entities, this will be called multiple times,
          * with the second codepoint, and the same `consumed` value.
-         *
          * @param codepoint The decoded codepoint.
          * @param consumed The number of bytes consumed by the decoder.
          */
@@ -122,8 +121,13 @@ export class EntityDecoder {
     private excess = 1;
     /** The mode in which the decoder is operating. */
     private decodeMode = DecodingMode.Strict;
+    /** The number of characters that have been consumed in the current run. */
+    private runConsumed = 0;
-    /** Resets the instance to make it reusable. */
+    /**
+     * Resets the instance to make it reusable.
+     * @param decodeMode Entity decoding mode to use.
+     */
     startEntity(decodeMode: DecodingMode): void {
         this.decodeMode = decodeMode;
         this.state = EntityDecoderState.EntityStart;
@@ -131,6 +135,7 @@ export class EntityDecoder {
         this.treeIndex = 0;
         this.excess = 1;
         this.consumed = 1;
+        this.runConsumed = 0;
     }
     /**
@@ -139,7 +144,6 @@ export class EntityDecoder {
      *
      * Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the
      * entity is incomplete, and resume when the next string is written.
-     *
      * @param input The string containing the entity (or a continuation of the entity).
      * @param offset The offset at which the entity begins. Should be 0 if this is not the first call.
      * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
@@ -178,7 +182,6 @@ export class EntityDecoder {
      * Switches between the numeric decimal and hexadecimal states.
      *
      * Equivalent to the `Numeric character reference state` in the HTML spec.
-     *
      * @param input The string containing the entity (or a continuation of the entity).
      * @param offset The current offset.
      * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
@@ -198,73 +201,53 @@ export class EntityDecoder {
         return this.stateNumericDecimal(input, offset);
     }
-    private addToNumericResult(
-        input: string,
-        start: number,
-        end: number,
-        base: number,
-    ): void {
-        if (start !== end) {
-            const digitCount = end - start;
-            this.result =
-                this.result * Math.pow(base, digitCount) +
-                Number.parseInt(input.substr(start, digitCount), base);
-            this.consumed += digitCount;
-        }
-    }
     /**
      * Parses a hexadecimal numeric entity.
      *
      * Equivalent to the `Hexademical character reference state` in the HTML spec.
-     *
      * @param input The string containing the entity (or a continuation of the entity).
      * @param offset The current offset.
      * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
      */
     private stateNumericHex(input: string, offset: number): number {
-        const startIndex = offset;
         while (offset < input.length) {
             const char = input.charCodeAt(offset);
             if (isNumber(char) || isHexadecimalCharacter(char)) {
-                offset += 1;
+                // Convert hex digit to value (0-15); 'a'/'A' -> 10.
+                const digit =
+                    char <= CharCodes.NINE
+                        ? char - CharCodes.ZERO
+                        : (char | TO_LOWER_BIT) - CharCodes.LOWER_A + 10;
+                this.result = this.result * 16 + digit;
+                this.consumed++;
+                offset++;
             } else {
-                this.addToNumericResult(input, startIndex, offset, 16);
                 return this.emitNumericEntity(char, 3);
             }
         }
-        this.addToNumericResult(input, startIndex, offset, 16);
-        return -1;
+        return -1; // Incomplete entity
     }
     /**
      * Parses a decimal numeric entity.
      *
      * Equivalent to the `Decimal character reference state` in the HTML spec.
-     *
      * @param input The string containing the entity (or a continuation of the entity).
      * @param offset The current offset.
      * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
      */
     private stateNumericDecimal(input: string, offset: number): number {
-        const startIndex = offset;
         while (offset < input.length) {
             const char = input.charCodeAt(offset);
             if (isNumber(char)) {
-                offset += 1;
+                this.result = this.result * 10 + (char - CharCodes.ZERO);
+                this.consumed++;
+                offset++;
             } else {
-                this.addToNumericResult(input, startIndex, offset, 10);
                 return this.emitNumericEntity(char, 2);
             }
         }
-        this.addToNumericResult(input, startIndex, offset, 10);
-        return -1;
+        return -1; // Incomplete entity
     }
     /**
@@ -272,7 +255,6 @@ export class EntityDecoder {
      *
      * Implements the logic from the `Hexademical character reference start
      * state` and `Numeric character reference end state` in the HTML spec.
-     *
      * @param lastCp The last code point of the entity. Used to see if the
      *               entity was terminated with a semicolon.
      * @param expectedLength The minimum number of characters that should be
@@ -313,7 +295,6 @@ export class EntityDecoder {
      * Parses a named entity.
      *
      * Equivalent to the `Named character reference state` in the HTML spec.
-     *
      * @param input The string containing the entity (or a continuation of the entity).
      * @param offset The current offset.
      * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
@@ -321,12 +302,84 @@ export class EntityDecoder {
     private stateNamedEntity(input: string, offset: number): number {
         const { decodeTree } = this;
         let current = decodeTree[this.treeIndex];
-        // The mask is the number of bytes of the value, including the current byte.
+        // The length is the number of bytes of the value, including the current byte.
         let valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
-        for (; offset < input.length; offset++, this.excess++) {
+        while (offset < input.length) {
+            // Handle compact runs (possibly inline): valueLength == 0 and SEMI_REQUIRED bit set.
+            if (valueLength === 0 && (current & BinTrieFlags.FLAG13) !== 0) {
+                const runLength =
+                    (current & BinTrieFlags.BRANCH_LENGTH) >> 7; /* 2..63 */
+                // If we are starting a run, check the first char.
+                if (this.runConsumed === 0) {
+                    const firstChar = current & BinTrieFlags.JUMP_TABLE;
+                    if (input.charCodeAt(offset) !== firstChar) {
+                        return this.result === 0
+                            ? 0
+                            : this.emitNotTerminatedNamedEntity();
+                    }
+                    offset++;
+                    this.excess++;
+                    this.runConsumed++;
+                }
+                // Check remaining characters in the run.
+                while (this.runConsumed < runLength) {
+                    if (offset >= input.length) {
+                        return -1;
+                    }
+                    const charIndexInPacked = this.runConsumed - 1;
+                    const packedWord =
+                        decodeTree[
+                            this.treeIndex + 1 + (charIndexInPacked >> 1)
+                        ];
+                    const expectedChar =
+                        charIndexInPacked % 2 === 0
+                            ? packedWord & 0xff
+                            : (packedWord >> 8) & 0xff;
+                    if (input.charCodeAt(offset) !== expectedChar) {
+                        this.runConsumed = 0;
+                        return this.result === 0
+                            ? 0
+                            : this.emitNotTerminatedNamedEntity();
+                    }
+                    offset++;
+                    this.excess++;
+                    this.runConsumed++;
+                }
+                this.runConsumed = 0;
+                this.treeIndex += 1 + (runLength >> 1);
+                current = decodeTree[this.treeIndex];
+                valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
+            }
+            if (offset >= input.length) break;
             const char = input.charCodeAt(offset);
+            /*
+             * Implicit semicolon handling for nodes that require a semicolon but
+             * don't have an explicit ';' branch stored in the trie. If we have
+             * a value on the current node, it requires a semicolon, and the
+             * current input character is a semicolon, emit the entity using the
+             * current node (without descending further).
+             */
+            if (
+                char === CharCodes.SEMI &&
+                valueLength !== 0 &&
+                (current & BinTrieFlags.FLAG13) !== 0
+            ) {
+                return this.emitNamedEntityData(
+                    this.treeIndex,
+                    valueLength,
+                    this.consumed + this.excess,
+                );
+            }
             this.treeIndex = determineBranch(
                 decodeTree,
                 current,
@@ -361,12 +414,18 @@ export class EntityDecoder {
                 }
                 // If we encounter a non-terminated (legacy) entity while parsing strictly, then ignore it.
-                if (this.decodeMode !== DecodingMode.Strict) {
+                if (
+                    this.decodeMode !== DecodingMode.Strict &&
+                    (current & BinTrieFlags.FLAG13) === 0
+                ) {
                     this.result = this.treeIndex;
                     this.consumed += this.excess;
                     this.excess = 0;
                 }
             }
+            // Increment offset & excess for next iteration
+            offset++;
+            this.excess++;
         }
         return -1;
@@ -374,7 +433,6 @@ export class EntityDecoder {
     /**
      * Emit a named entity that was not terminated with a semicolon.
-     *
      * @returns The number of characters consumed.
      */
     private emitNotTerminatedNamedEntity(): number {
@@ -391,11 +449,9 @@ export class EntityDecoder {
     /**
      * Emit a named entity.
-     *
      * @param result The index of the entity in the decode tree.
      * @param valueLength The number of bytes in the entity.
      * @param consumed The number of characters consumed.
-     *
      * @returns The number of characters consumed.
      */
     private emitNamedEntityData(
@@ -407,7 +463,8 @@ export class EntityDecoder {
         this.emitCodePoint(
             valueLength === 1
-                ? decodeTree[result] & ~BinTrieFlags.VALUE_LENGTH
+                ? decodeTree[result] &
+                      ~(BinTrieFlags.VALUE_LENGTH | BinTrieFlags.FLAG13)
                 : decodeTree[result + 1],
             consumed,
         );
@@ -423,7 +480,6 @@ export class EntityDecoder {
      * Signal to the parser that the end of the input was reached.
      *
      * Remaining data will be emitted and relevant errors will be produced.
-     *
      * @returns The number of characters consumed.
      */
     end(): number {
@@ -459,7 +515,6 @@ export class EntityDecoder {
 /**
  * Creates a function that decodes entities in a string.
- *
  * @param decodeTree The decode tree.
  * @returns A function that decodes entities in a string.
  */
@@ -467,7 +522,7 @@ function getDecoder(decodeTree: Uint16Array) {
     let returnValue = "";
     const decoder = new EntityDecoder(
         decodeTree,
-        (data) => (returnValue += fromCodePoint(data)),
+        (data) => (returnValue += String.fromCodePoint(data)),
     );
     return function decodeWithTrie(
@@ -510,10 +565,9 @@ function getDecoder(decodeTree: Uint16Array) {
 /**
  * Determines the branch of the current node that is taken given the current
  * character. This function is used to traverse the trie.
- *
  * @param decodeTree The trie.
  * @param current The current node.
- * @param nodeIdx The index right after the current node and its value.
+ * @param nodeIndex Index immediately after the current node header.
  * @param char The current character.
  * @returns The index of the next node, or -1 if no branch is taken.
  */
@@ -540,22 +594,28 @@ export function determineBranch(
             : decodeTree[nodeIndex + value] - 1;
     }
-    // Case 3: Multiple branches encoded in dictionary
+    // Case 3: Multiple branches encoded in packed dictionary (two keys per uint16)
+    const packedKeySlots = (branchCount + 1) >> 1;
-    // Binary search for the character.
-    let lo = nodeIndex;
-    let hi = lo + branchCount - 1;
+    /*
+     * Treat packed keys as a virtual sorted array of length `branchCount`.
+     * Key(i) = low byte for even i, high byte for odd i in slot i>>1.
+     */
+    let lo = 0;
+    let hi = branchCount - 1;
     while (lo <= hi) {
         const mid = (lo + hi) >>> 1;
-        const midValue = decodeTree[mid];
+        const slot = mid >> 1;
+        const packed = decodeTree[nodeIndex + slot];
+        const midKey = (packed >> ((mid & 1) * 8)) & 0xff;
-        if (midValue < char) {
+        if (midKey < char) {
             lo = mid + 1;
-        } else if (midValue > char) {
+        } else if (midKey > char) {
             hi = mid - 1;
         } else {
-            return decodeTree[mid + branchCount];
+            return decodeTree[nodeIndex + packedKeySlots + mid];
         }
     }
@@ -567,7 +627,6 @@ const xmlDecoder = /* #__PURE__ */ getDecoder(xmlDecodeTree);
 /**
  * Decodes an HTML string.
- *
  * @param htmlString The string to decode.
  * @param mode The decoding mode.
  * @returns The decoded string.
@@ -581,7 +640,6 @@ export function decodeHTML(
 /**
  * Decodes an HTML string in an attribute.
- *
  * @param htmlAttribute The string to decode.
  * @returns The decoded string.
  */
@@ -591,7 +649,6 @@ export function decodeHTMLAttribute(htmlAttribute: string): string {
 /**
  * Decodes an HTML string, requiring all entities to be terminated by a semicolon.
- *
  * @param htmlString The string to decode.
  * @returns The decoded string.
  */
@@ -601,7 +658,6 @@ export function decodeHTMLStrict(htmlString: string): string {
 /**
  * Decodes an XML string, requiring all entities to be terminated by a semicolon.
- *
  * @param xmlString The string to decode.
  * @returns The decoded string.
  */
@@ -609,12 +665,7 @@ export function decodeXML(xmlString: string): string {
     return xmlDecoder(xmlString, DecodingMode.Strict);
 }
+export { replaceCodePoint } from "./decode-codepoint.js";
 // Re-export for use by eg. htmlparser2
 export { htmlDecodeTree } from "./generated/decode-data-html.js";
 export { xmlDecodeTree } from "./generated/decode-data-xml.js";
-export {
-    decodeCodePoint,
-    replaceCodePoint,
-    fromCodePoint,
-} from "./decode-codepoint.js";

package/src/encode.ts CHANGED Viewed

@@ -1,7 +1,17 @@
+import { getCodePoint, XML_BITSET_VALUE } from "./escape.js";
 import { htmlTrie } from "./generated/encode-html.js";
-import { xmlReplacer, getCodePoint } from "./escape.js";
-const htmlReplacer = /[\t\n\f!-,./:-@[-`{-}\u0080-\uFFFF]/g;
+/**
+ * We store the characters to consider as a compact bitset for fast lookups.
+ */
+const HTML_BITSET = /* #__PURE__ */ new Uint32Array([
+    0x16_00, // Bits for 09,0A,0C
+    0xfc_00_ff_fe, // 32..63 -> 21-2D (minus space), 2E,2F,3A-3F
+    0xf8_00_00_01, // 64..95 -> 40, 5B-5F
+    0x38_00_00_01, // 96..127-> 60, 7B-7D
+]);
+const XML_BITSET = /* #__PURE__ */ new Uint32Array([0, XML_BITSET_VALUE, 0, 0]);
 /**
  * Encodes all characters in the input using HTML entities. This includes
@@ -13,9 +23,10 @@ const htmlReplacer = /[\t\n\f!-,./:-@[-`{-}\u0080-\uFFFF]/g;
  *
  * If a character has no equivalent entity, a numeric hexadecimal reference
  * (eg. `&#xfc;`) will be used.
+ * @param input Input string to encode or decode.
  */
 export function encodeHTML(input: string): string {
-    return encodeHTMLTrieRe(htmlReplacer, input);
+    return encodeHTMLTrieRe(HTML_BITSET, input);
 }
 /**
  * Encodes all non-ASCII characters, as well as characters not valid in HTML
@@ -24,54 +35,61 @@ export function encodeHTML(input: string): string {
  *
  * If a character has no equivalent entity, a numeric hexadecimal reference
  * (eg. `&#xfc;`) will be used.
+ * @param input Input string to encode or decode.
  */
 export function encodeNonAsciiHTML(input: string): string {
-    return encodeHTMLTrieRe(xmlReplacer, input);
+    return encodeHTMLTrieRe(XML_BITSET, input);
 }
-function encodeHTMLTrieRe(regExp: RegExp, input: string): string {
-    let returnValue = "";
-    let lastIndex = 0;
-    let match;
+function encodeHTMLTrieRe(bitset: Uint32Array, input: string): string {
+    let out: string | undefined;
+    let last = 0; // Start of the next untouched slice.
+    const { length } = input;
-    while ((match = regExp.exec(input)) !== null) {
-        const { index } = match;
-        returnValue += input.substring(lastIndex, index);
+    for (let index = 0; index < length; index++) {
         const char = input.charCodeAt(index);
-        let next = htmlTrie.get(char);
+        // Skip ASCII characters that don't need encoding
+        if (char < 0x80 && !((bitset[char >>> 5] >>> char) & 1)) {
+            continue;
+        }
-        if (typeof next === "object") {
-            // We are in a branch. Try to match the next char.
-            if (index + 1 < input.length) {
+        if (out === undefined) out = input.substring(0, index);
+        else if (last !== index) out += input.substring(last, index);
+        let node = htmlTrie.get(char);
+        if (typeof node === "object") {
+            if (index + 1 < length) {
                 const nextChar = input.charCodeAt(index + 1);
                 const value =
-                    typeof next.n === "number"
-                        ? next.n === nextChar
-                            ? next.o
+                    typeof node.next === "number"
+                        ? node.next === nextChar
+                            ? node.nextValue
                             : undefined
-                        : next.n.get(nextChar);
+                        : node.next.get(nextChar);
                 if (value !== undefined) {
-                    returnValue += value;
-                    lastIndex = regExp.lastIndex += 1;
+                    out += value;
+                    index++;
+                    last = index + 1;
                     continue;
                 }
             }
-            next = next.v;
+            node = node.value;
         }
-        // We might have a tree node without a value; skip and use a numeric entity.
-        if (next === undefined) {
+        if (node === undefined) {
             const cp = getCodePoint(input, index);
-            returnValue += `&#x${cp.toString(16)};`;
-            // Increase by 1 if we have a surrogate pair
-            lastIndex = regExp.lastIndex += Number(cp !== char);
+            out += `&#x${cp.toString(16)};`;
+            if (cp !== char) index++;
+            last = index + 1;
         } else {
-            returnValue += next;
-            lastIndex = index + 1;
+            out += node;
+            last = index + 1;
         }
     }
-    return returnValue + input.substr(lastIndex);
+    if (out === undefined) return input;
+    if (last < length) out += input.substr(last);
+    return out;
 }