entities 6.0.0 → 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/decode.d.ts +3 -0
  2. package/dist/commonjs/decode-codepoint.d.ts.map +1 -1
  3. package/dist/commonjs/decode-codepoint.js +2 -2
  4. package/dist/commonjs/decode-codepoint.js.map +1 -1
  5. package/dist/commonjs/decode.d.ts +1 -7
  6. package/dist/commonjs/decode.d.ts.map +1 -1
  7. package/dist/commonjs/decode.js +105 -48
  8. package/dist/commonjs/decode.js.map +1 -1
  9. package/dist/commonjs/encode.d.ts.map +1 -1
  10. package/dist/commonjs/encode.js +49 -30
  11. package/dist/commonjs/encode.js.map +1 -1
  12. package/dist/commonjs/escape.d.ts +7 -4
  13. package/dist/commonjs/escape.d.ts.map +1 -1
  14. package/dist/commonjs/escape.js +36 -19
  15. package/dist/commonjs/escape.js.map +1 -1
  16. package/dist/commonjs/generated/decode-data-html.d.ts.map +1 -1
  17. package/dist/commonjs/generated/decode-data-html.js +2 -5
  18. package/dist/commonjs/generated/decode-data-html.js.map +1 -1
  19. package/dist/commonjs/generated/decode-data-xml.d.ts.map +1 -1
  20. package/dist/commonjs/generated/decode-data-xml.js +2 -5
  21. package/dist/commonjs/generated/decode-data-xml.js.map +1 -1
  22. package/dist/commonjs/generated/encode-html.d.ts +1 -6
  23. package/dist/commonjs/generated/encode-html.d.ts.map +1 -1
  24. package/dist/commonjs/generated/encode-html.js +9 -8
  25. package/dist/commonjs/generated/encode-html.js.map +1 -1
  26. package/dist/commonjs/index.d.ts +3 -3
  27. package/dist/commonjs/index.d.ts.map +1 -1
  28. package/dist/commonjs/index.js +19 -19
  29. package/dist/commonjs/index.js.map +1 -1
  30. package/dist/commonjs/internal/bin-trie-flags.d.ts +17 -0
  31. package/dist/commonjs/internal/bin-trie-flags.d.ts.map +1 -0
  32. package/dist/commonjs/internal/bin-trie-flags.js +21 -0
  33. package/dist/commonjs/internal/bin-trie-flags.js.map +1 -0
  34. package/dist/commonjs/internal/decode-shared.d.ts +2 -0
  35. package/dist/commonjs/internal/decode-shared.d.ts.map +1 -0
  36. package/dist/commonjs/internal/decode-shared.js +31 -0
  37. package/dist/commonjs/internal/decode-shared.js.map +1 -0
  38. package/dist/commonjs/internal/encode-shared.d.ts +32 -0
  39. package/dist/commonjs/internal/encode-shared.d.ts.map +1 -0
  40. package/dist/commonjs/internal/encode-shared.js +94 -0
  41. package/dist/commonjs/internal/encode-shared.js.map +1 -0
  42. package/dist/esm/decode-codepoint.d.ts.map +1 -1
  43. package/dist/esm/decode-codepoint.js +2 -2
  44. package/dist/esm/decode-codepoint.js.map +1 -1
  45. package/dist/esm/decode.d.ts +1 -7
  46. package/dist/esm/decode.d.ts.map +1 -1
  47. package/dist/esm/decode.js +96 -39
  48. package/dist/esm/decode.js.map +1 -1
  49. package/dist/esm/encode.d.ts.map +1 -1
  50. package/dist/esm/encode.js +49 -30
  51. package/dist/esm/encode.js.map +1 -1
  52. package/dist/esm/escape.d.ts +7 -4
  53. package/dist/esm/escape.d.ts.map +1 -1
  54. package/dist/esm/escape.js +35 -18
  55. package/dist/esm/escape.js.map +1 -1
  56. package/dist/esm/generated/decode-data-html.d.ts.map +1 -1
  57. package/dist/esm/generated/decode-data-html.js +2 -5
  58. package/dist/esm/generated/decode-data-html.js.map +1 -1
  59. package/dist/esm/generated/decode-data-xml.d.ts.map +1 -1
  60. package/dist/esm/generated/decode-data-xml.js +2 -5
  61. package/dist/esm/generated/decode-data-xml.js.map +1 -1
  62. package/dist/esm/generated/encode-html.d.ts +1 -6
  63. package/dist/esm/generated/encode-html.d.ts.map +1 -1
  64. package/dist/esm/generated/encode-html.js +9 -8
  65. package/dist/esm/generated/encode-html.js.map +1 -1
  66. package/dist/esm/index.d.ts +3 -3
  67. package/dist/esm/index.d.ts.map +1 -1
  68. package/dist/esm/index.js +9 -9
  69. package/dist/esm/index.js.map +1 -1
  70. package/dist/esm/internal/bin-trie-flags.d.ts +17 -0
  71. package/dist/esm/internal/bin-trie-flags.d.ts.map +1 -0
  72. package/dist/esm/internal/bin-trie-flags.js +18 -0
  73. package/dist/esm/internal/bin-trie-flags.js.map +1 -0
  74. package/dist/esm/internal/decode-shared.d.ts +2 -0
  75. package/dist/esm/internal/decode-shared.d.ts.map +1 -0
  76. package/dist/esm/internal/decode-shared.js +28 -0
  77. package/dist/esm/internal/decode-shared.js.map +1 -0
  78. package/dist/esm/internal/encode-shared.d.ts +32 -0
  79. package/dist/esm/internal/encode-shared.d.ts.map +1 -0
  80. package/dist/esm/internal/encode-shared.js +91 -0
  81. package/dist/esm/internal/encode-shared.js.map +1 -0
  82. package/escape.d.ts +3 -0
  83. package/package.json +19 -22
  84. package/src/decode-codepoint.ts +2 -2
  85. package/src/decode.spec.ts +44 -1
  86. package/src/decode.ts +111 -55
  87. package/src/encode.spec.ts +1 -1
  88. package/src/encode.ts +47 -31
  89. package/src/escape.spec.ts +1 -1
  90. package/src/escape.ts +39 -26
  91. package/src/generated/decode-data-html.ts +3 -5
  92. package/src/generated/decode-data-xml.ts +3 -5
  93. package/src/generated/encode-html.ts +14 -14
  94. package/src/index.spec.ts +2 -2
  95. package/src/index.ts +23 -24
  96. package/src/internal/bin-trie-flags.ts +16 -0
  97. package/src/internal/decode-shared.ts +30 -0
  98. package/src/internal/encode-shared.ts +121 -0
package/src/decode.ts CHANGED
@@ -1,6 +1,7 @@
1
+ import { fromCodePoint, replaceCodePoint } from "./decode-codepoint.js";
1
2
  import { htmlDecodeTree } from "./generated/decode-data-html.js";
2
3
  import { xmlDecodeTree } from "./generated/decode-data-xml.js";
3
- import { replaceCodePoint, fromCodePoint } from "./decode-codepoint.js";
4
+ import { BinTrieFlags } from "./internal/bin-trie-flags.js";
4
5
 
5
6
  const enum CharCodes {
6
7
  NUM = 35, // "#"
@@ -20,12 +21,6 @@ const enum CharCodes {
20
21
  /** Bit that needs to be set to convert an upper case ASCII character to lower case */
21
22
  const TO_LOWER_BIT = 0b10_0000;
22
23
 
23
- export enum BinTrieFlags {
24
- VALUE_LENGTH = 0b1100_0000_0000_0000,
25
- BRANCH_LENGTH = 0b0011_1111_1000_0000,
26
- JUMP_TABLE = 0b0000_0000_0111_1111,
27
- }
28
-
29
24
  function isNumber(code: number): boolean {
30
25
  return code >= CharCodes.ZERO && code <= CharCodes.NINE;
31
26
  }
@@ -89,6 +84,7 @@ export interface EntityErrorProducer {
89
84
  export class EntityDecoder {
90
85
  constructor(
91
86
  /** The tree used to decode entities. */
87
+ // biome-ignore lint/correctness/noUnusedPrivateClassMembers: False positive
92
88
  private readonly decodeTree: Uint16Array,
93
89
  /**
94
90
  * The function that is called when a codepoint is decoded.
@@ -198,21 +194,6 @@ export class EntityDecoder {
198
194
  return this.stateNumericDecimal(input, offset);
199
195
  }
200
196
 
201
- private addToNumericResult(
202
- input: string,
203
- start: number,
204
- end: number,
205
- base: number,
206
- ): void {
207
- if (start !== end) {
208
- const digitCount = end - start;
209
- this.result =
210
- this.result * Math.pow(base, digitCount) +
211
- Number.parseInt(input.substr(start, digitCount), base);
212
- this.consumed += digitCount;
213
- }
214
- }
215
-
216
197
  /**
217
198
  * Parses a hexadecimal numeric entity.
218
199
  *
@@ -223,21 +204,22 @@ export class EntityDecoder {
223
204
  * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
224
205
  */
225
206
  private stateNumericHex(input: string, offset: number): number {
226
- const startIndex = offset;
227
-
228
207
  while (offset < input.length) {
229
208
  const char = input.charCodeAt(offset);
230
209
  if (isNumber(char) || isHexadecimalCharacter(char)) {
231
- offset += 1;
210
+ // Convert hex digit to value (0-15); 'a'/'A' -> 10.
211
+ const digit =
212
+ char <= CharCodes.NINE
213
+ ? char - CharCodes.ZERO
214
+ : (char | TO_LOWER_BIT) - CharCodes.LOWER_A + 10;
215
+ this.result = this.result * 16 + digit;
216
+ this.consumed++;
217
+ offset++;
232
218
  } else {
233
- this.addToNumericResult(input, startIndex, offset, 16);
234
219
  return this.emitNumericEntity(char, 3);
235
220
  }
236
221
  }
237
-
238
- this.addToNumericResult(input, startIndex, offset, 16);
239
-
240
- return -1;
222
+ return -1; // Incomplete entity
241
223
  }
242
224
 
243
225
  /**
@@ -250,21 +232,17 @@ export class EntityDecoder {
250
232
  * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
251
233
  */
252
234
  private stateNumericDecimal(input: string, offset: number): number {
253
- const startIndex = offset;
254
-
255
235
  while (offset < input.length) {
256
236
  const char = input.charCodeAt(offset);
257
237
  if (isNumber(char)) {
258
- offset += 1;
238
+ this.result = this.result * 10 + (char - CharCodes.ZERO);
239
+ this.consumed++;
240
+ offset++;
259
241
  } else {
260
- this.addToNumericResult(input, startIndex, offset, 10);
261
242
  return this.emitNumericEntity(char, 2);
262
243
  }
263
244
  }
264
-
265
- this.addToNumericResult(input, startIndex, offset, 10);
266
-
267
- return -1;
245
+ return -1; // Incomplete entity
268
246
  }
269
247
 
270
248
  /**
@@ -321,12 +299,78 @@ export class EntityDecoder {
321
299
  private stateNamedEntity(input: string, offset: number): number {
322
300
  const { decodeTree } = this;
323
301
  let current = decodeTree[this.treeIndex];
324
- // The mask is the number of bytes of the value, including the current byte.
302
+ // The length is the number of bytes of the value, including the current byte.
325
303
  let valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
326
304
 
327
- for (; offset < input.length; offset++, this.excess++) {
305
+ while (offset < input.length) {
306
+ // Handle compact runs (possibly inline): valueLength == 0 and SEMI_REQUIRED bit set.
307
+ if (valueLength === 0 && (current & BinTrieFlags.FLAG13) !== 0) {
308
+ const runLength =
309
+ (current & BinTrieFlags.BRANCH_LENGTH) >> 7; /* 2..63 */
310
+ const firstChar = current & BinTrieFlags.JUMP_TABLE;
311
+ // Fast-fail if we don't have enough remaining input for the full run (incomplete entity)
312
+ if (offset + runLength > input.length) return -1;
313
+ // Verify first char
314
+ if (input.charCodeAt(offset) !== firstChar) {
315
+ return this.result === 0
316
+ ? 0
317
+ : this.emitNotTerminatedNamedEntity();
318
+ }
319
+ offset++;
320
+ this.excess++;
321
+ // Remaining characters after the first
322
+ const remaining = runLength - 1;
323
+ // Iterate over packed 2-char words
324
+ for (let runPos = 1; runPos < runLength; runPos += 2) {
325
+ const packedWord =
326
+ decodeTree[this.treeIndex + 1 + ((runPos - 1) >> 1)];
327
+ const low = packedWord & 0xff;
328
+ if (input.charCodeAt(offset) !== low) {
329
+ return this.result === 0
330
+ ? 0
331
+ : this.emitNotTerminatedNamedEntity();
332
+ }
333
+ offset++;
334
+ this.excess++;
335
+ const high = (packedWord >> 8) & 0xff;
336
+ if (runPos + 1 < runLength) {
337
+ if (input.charCodeAt(offset) !== high) {
338
+ return this.result === 0
339
+ ? 0
340
+ : this.emitNotTerminatedNamedEntity();
341
+ }
342
+ offset++;
343
+ this.excess++;
344
+ }
345
+ }
346
+ this.treeIndex += 1 + ((remaining + 1) >> 1);
347
+ current = decodeTree[this.treeIndex];
348
+ valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
349
+ }
350
+
351
+ if (offset >= input.length) break;
352
+
328
353
  const char = input.charCodeAt(offset);
329
354
 
355
+ /*
356
+ * Implicit semicolon handling for nodes that require a semicolon but
357
+ * don't have an explicit ';' branch stored in the trie. If we have
358
+ * a value on the current node, it requires a semicolon, and the
359
+ * current input character is a semicolon, emit the entity using the
360
+ * current node (without descending further).
361
+ */
362
+ if (
363
+ char === CharCodes.SEMI &&
364
+ valueLength !== 0 &&
365
+ (current & BinTrieFlags.FLAG13) !== 0
366
+ ) {
367
+ return this.emitNamedEntityData(
368
+ this.treeIndex,
369
+ valueLength,
370
+ this.consumed + this.excess,
371
+ );
372
+ }
373
+
330
374
  this.treeIndex = determineBranch(
331
375
  decodeTree,
332
376
  current,
@@ -361,12 +405,18 @@ export class EntityDecoder {
361
405
  }
362
406
 
363
407
  // If we encounter a non-terminated (legacy) entity while parsing strictly, then ignore it.
364
- if (this.decodeMode !== DecodingMode.Strict) {
408
+ if (
409
+ this.decodeMode !== DecodingMode.Strict &&
410
+ (current & BinTrieFlags.FLAG13) === 0
411
+ ) {
365
412
  this.result = this.treeIndex;
366
413
  this.consumed += this.excess;
367
414
  this.excess = 0;
368
415
  }
369
416
  }
417
+ // Increment offset & excess for next iteration
418
+ offset++;
419
+ this.excess++;
370
420
  }
371
421
 
372
422
  return -1;
@@ -407,7 +457,8 @@ export class EntityDecoder {
407
457
 
408
458
  this.emitCodePoint(
409
459
  valueLength === 1
410
- ? decodeTree[result] & ~BinTrieFlags.VALUE_LENGTH
460
+ ? decodeTree[result] &
461
+ ~(BinTrieFlags.VALUE_LENGTH | BinTrieFlags.FLAG13)
411
462
  : decodeTree[result + 1],
412
463
  consumed,
413
464
  );
@@ -540,22 +591,28 @@ export function determineBranch(
540
591
  : decodeTree[nodeIndex + value] - 1;
541
592
  }
542
593
 
543
- // Case 3: Multiple branches encoded in dictionary
594
+ // Case 3: Multiple branches encoded in packed dictionary (two keys per uint16)
595
+ const packedKeySlots = (branchCount + 1) >> 1;
544
596
 
545
- // Binary search for the character.
546
- let lo = nodeIndex;
547
- let hi = lo + branchCount - 1;
597
+ /*
598
+ * Treat packed keys as a virtual sorted array of length `branchCount`.
599
+ * Key(i) = low byte for even i, high byte for odd i in slot i>>1.
600
+ */
601
+ let lo = 0;
602
+ let hi = branchCount - 1;
548
603
 
549
604
  while (lo <= hi) {
550
605
  const mid = (lo + hi) >>> 1;
551
- const midValue = decodeTree[mid];
606
+ const slot = mid >> 1;
607
+ const packed = decodeTree[nodeIndex + slot];
608
+ const midKey = (packed >> ((mid & 1) * 8)) & 0xff;
552
609
 
553
- if (midValue < char) {
610
+ if (midKey < char) {
554
611
  lo = mid + 1;
555
- } else if (midValue > char) {
612
+ } else if (midKey > char) {
556
613
  hi = mid - 1;
557
614
  } else {
558
- return decodeTree[mid + branchCount];
615
+ return decodeTree[nodeIndex + packedKeySlots + mid];
559
616
  }
560
617
  }
561
618
 
@@ -609,12 +666,11 @@ export function decodeXML(xmlString: string): string {
609
666
  return xmlDecoder(xmlString, DecodingMode.Strict);
610
667
  }
611
668
 
612
- // Re-export for use by eg. htmlparser2
613
- export { htmlDecodeTree } from "./generated/decode-data-html.js";
614
- export { xmlDecodeTree } from "./generated/decode-data-xml.js";
615
-
616
669
  export {
617
670
  decodeCodePoint,
618
- replaceCodePoint,
619
671
  fromCodePoint,
672
+ replaceCodePoint,
620
673
  } from "./decode-codepoint.js";
674
+ // Re-export for use by eg. htmlparser2
675
+ export { htmlDecodeTree } from "./generated/decode-data-html.js";
676
+ export { xmlDecodeTree } from "./generated/decode-data-xml.js";
@@ -1,4 +1,4 @@
1
- import { describe, it, expect } from "vitest";
1
+ import { describe, expect, it } from "vitest";
2
2
  import * as entities from "./index.js";
3
3
 
4
4
  describe("Encode->decode test", () => {
package/src/encode.ts CHANGED
@@ -1,7 +1,17 @@
1
+ import { getCodePoint, XML_BITSET_VALUE } from "./escape.js";
1
2
  import { htmlTrie } from "./generated/encode-html.js";
2
- import { xmlReplacer, getCodePoint } from "./escape.js";
3
3
 
4
- const htmlReplacer = /[\t\n\f!-,./:-@[-`{-}\u0080-\uFFFF]/g;
4
+ /**
5
+ * We store the characters to consider as a compact bitset for fast lookups.
6
+ */
7
+ const HTML_BITSET = /* #__PURE__ */ new Uint32Array([
8
+ 0x16_00, // Bits for 09,0A,0C
9
+ 0xfc_00_ff_fe, // 32..63 -> 21-2D (minus space), 2E,2F,3A-3F
10
+ 0xf8_00_00_01, // 64..95 -> 40, 5B-5F
11
+ 0x38_00_00_01, // 96..127-> 60, 7B-7D
12
+ ]);
13
+
14
+ const XML_BITSET = /* #__PURE__ */ new Uint32Array([0, XML_BITSET_VALUE, 0, 0]);
5
15
 
6
16
  /**
7
17
  * Encodes all characters in the input using HTML entities. This includes
@@ -15,7 +25,7 @@ const htmlReplacer = /[\t\n\f!-,./:-@[-`{-}\u0080-\uFFFF]/g;
15
25
  * (eg. `&#xfc;`) will be used.
16
26
  */
17
27
  export function encodeHTML(input: string): string {
18
- return encodeHTMLTrieRe(htmlReplacer, input);
28
+ return encodeHTMLTrieRe(HTML_BITSET, input);
19
29
  }
20
30
  /**
21
31
  * Encodes all non-ASCII characters, as well as characters not valid in HTML
@@ -26,52 +36,58 @@ export function encodeHTML(input: string): string {
26
36
  * (eg. `&#xfc;`) will be used.
27
37
  */
28
38
  export function encodeNonAsciiHTML(input: string): string {
29
- return encodeHTMLTrieRe(xmlReplacer, input);
39
+ return encodeHTMLTrieRe(XML_BITSET, input);
30
40
  }
31
41
 
32
- function encodeHTMLTrieRe(regExp: RegExp, input: string): string {
33
- let returnValue = "";
34
- let lastIndex = 0;
35
- let match;
42
+ function encodeHTMLTrieRe(bitset: Uint32Array, input: string): string {
43
+ let out: string | undefined;
44
+ let last = 0; // Start of the next untouched slice.
45
+ const { length } = input;
36
46
 
37
- while ((match = regExp.exec(input)) !== null) {
38
- const { index } = match;
39
- returnValue += input.substring(lastIndex, index);
47
+ for (let index = 0; index < length; index++) {
40
48
  const char = input.charCodeAt(index);
41
- let next = htmlTrie.get(char);
49
+ // Skip ASCII characters that don't need encoding
50
+ if (char < 0x80 && !((bitset[char >>> 5] >>> char) & 1)) {
51
+ continue;
52
+ }
42
53
 
43
- if (typeof next === "object") {
44
- // We are in a branch. Try to match the next char.
45
- if (index + 1 < input.length) {
54
+ if (out === undefined) out = input.substring(0, index);
55
+ else if (last !== index) out += input.substring(last, index);
56
+
57
+ let node = htmlTrie.get(char);
58
+
59
+ if (typeof node === "object") {
60
+ if (index + 1 < length) {
46
61
  const nextChar = input.charCodeAt(index + 1);
47
62
  const value =
48
- typeof next.n === "number"
49
- ? next.n === nextChar
50
- ? next.o
63
+ typeof node.next === "number"
64
+ ? node.next === nextChar
65
+ ? node.nextValue
51
66
  : undefined
52
- : next.n.get(nextChar);
67
+ : node.next.get(nextChar);
53
68
 
54
69
  if (value !== undefined) {
55
- returnValue += value;
56
- lastIndex = regExp.lastIndex += 1;
70
+ out += value;
71
+ index++;
72
+ last = index + 1;
57
73
  continue;
58
74
  }
59
75
  }
60
-
61
- next = next.v;
76
+ node = node.value;
62
77
  }
63
78
 
64
- // We might have a tree node without a value; skip and use a numeric entity.
65
- if (next === undefined) {
79
+ if (node === undefined) {
66
80
  const cp = getCodePoint(input, index);
67
- returnValue += `&#x${cp.toString(16)};`;
68
- // Increase by 1 if we have a surrogate pair
69
- lastIndex = regExp.lastIndex += Number(cp !== char);
81
+ out += `&#x${cp.toString(16)};`;
82
+ if (cp !== char) index++;
83
+ last = index + 1;
70
84
  } else {
71
- returnValue += next;
72
- lastIndex = index + 1;
85
+ out += node;
86
+ last = index + 1;
73
87
  }
74
88
  }
75
89
 
76
- return returnValue + input.substr(lastIndex);
90
+ if (out === undefined) return input;
91
+ if (last < length) out += input.substr(last);
92
+ return out;
77
93
  }
@@ -1,4 +1,4 @@
1
- import { describe, it, expect } from "vitest";
1
+ import { describe, expect, it } from "vitest";
2
2
  import * as entities from "./index.js";
3
3
 
4
4
  describe("escape HTML", () => {
package/src/escape.ts CHANGED
@@ -1,5 +1,3 @@
1
- export const xmlReplacer: RegExp = /["$&'<>\u0080-\uFFFF]/g;
2
-
3
1
  const xmlCodeMap = new Map([
4
2
  [34, "&quot;"],
5
3
  [38, "&amp;"],
@@ -22,39 +20,54 @@ export const getCodePoint: (c: string, index: number) => number =
22
20
  : // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
23
21
  (input: string, index: number): number => input.codePointAt(index)!;
24
22
 
23
+ /**
24
+ * Bitset for ASCII characters that need to be escaped in XML.
25
+ */
26
+ export const XML_BITSET_VALUE = 0x50_00_00_c4; // 32..63 -> 34 ("),38 (&),39 ('),60 (<),62 (>)
27
+
25
28
  /**
26
29
  * Encodes all non-ASCII characters, as well as characters not valid in XML
27
- * documents using XML entities.
30
+ * documents using XML entities. Uses a fast bitset scan instead of RegExp.
28
31
  *
29
- * If a character has no equivalent entity, a
30
- * numeric hexadecimal reference (eg. `&#xfc;`) will be used.
32
+ * If a character has no equivalent entity, a numeric hexadecimal reference
33
+ * (eg. `&#xfc;`) will be used.
31
34
  */
32
35
  export function encodeXML(input: string): string {
33
- let returnValue = "";
34
- let lastIndex = 0;
35
- let match;
36
+ let out: string | undefined;
37
+ let last = 0;
38
+ const { length } = input;
36
39
 
37
- while ((match = xmlReplacer.exec(input)) !== null) {
38
- const { index } = match;
40
+ for (let index = 0; index < length; index++) {
39
41
  const char = input.charCodeAt(index);
40
- const next = xmlCodeMap.get(char);
41
-
42
- if (next === undefined) {
43
- returnValue += `${input.substring(lastIndex, index)}&#x${getCodePoint(
44
- input,
45
- index,
46
- ).toString(16)};`;
47
- // Increase by 1 if we have a surrogate pair
48
- lastIndex = xmlReplacer.lastIndex += Number(
49
- (char & 0xfc_00) === 0xd8_00,
50
- );
51
- } else {
52
- returnValue += input.substring(lastIndex, index) + next;
53
- lastIndex = index + 1;
42
+
43
+ // Check for ASCII chars that don't need escaping
44
+ if (
45
+ char < 0x80 &&
46
+ (((XML_BITSET_VALUE >>> char) & 1) === 0 || char >= 64 || char < 32)
47
+ ) {
48
+ continue;
54
49
  }
50
+
51
+ if (out === undefined) out = input.substring(0, index);
52
+ else if (last !== index) out += input.substring(last, index);
53
+
54
+ if (char < 64) {
55
+ // Known replacement
56
+ out += xmlCodeMap.get(char)!;
57
+ last = index + 1;
58
+ continue;
59
+ }
60
+
61
+ // Non-ASCII: encode as numeric entity (handle surrogate pair)
62
+ const cp = getCodePoint(input, index);
63
+ out += `&#x${cp.toString(16)};`;
64
+ if (cp !== char) index++; // Skip trailing surrogate
65
+ last = index + 1;
55
66
  }
56
67
 
57
- return returnValue + input.substr(lastIndex);
68
+ if (out === undefined) return input;
69
+ if (last < length) out += input.substr(last);
70
+ return out;
58
71
  }
59
72
 
60
73
  /**
@@ -83,7 +96,7 @@ function getEscaper(
83
96
  map: Map<number, string>,
84
97
  ): (data: string) => string {
85
98
  return function escape(data: string): string {
86
- let match;
99
+ let match: RegExpExecArray | null;
87
100
  let lastIndex = 0;
88
101
  let result = "";
89
102