entities 6.0.1 → 7.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/decode.d.ts +2 -0
  2. package/dist/commonjs/decode-codepoint.d.ts.map +1 -1
  3. package/dist/commonjs/decode-codepoint.js +2 -2
  4. package/dist/commonjs/decode-codepoint.js.map +1 -1
  5. package/dist/commonjs/decode.d.ts +3 -7
  6. package/dist/commonjs/decode.d.ts.map +1 -1
  7. package/dist/commonjs/decode.js +105 -48
  8. package/dist/commonjs/decode.js.map +1 -1
  9. package/dist/commonjs/encode.d.ts.map +1 -1
  10. package/dist/commonjs/encode.js +49 -30
  11. package/dist/commonjs/encode.js.map +1 -1
  12. package/dist/commonjs/escape.d.ts +7 -4
  13. package/dist/commonjs/escape.d.ts.map +1 -1
  14. package/dist/commonjs/escape.js +36 -19
  15. package/dist/commonjs/escape.js.map +1 -1
  16. package/dist/commonjs/generated/decode-data-html.d.ts.map +1 -1
  17. package/dist/commonjs/generated/decode-data-html.js +2 -5
  18. package/dist/commonjs/generated/decode-data-html.js.map +1 -1
  19. package/dist/commonjs/generated/decode-data-xml.d.ts.map +1 -1
  20. package/dist/commonjs/generated/decode-data-xml.js +2 -5
  21. package/dist/commonjs/generated/decode-data-xml.js.map +1 -1
  22. package/dist/commonjs/generated/encode-html.d.ts +1 -6
  23. package/dist/commonjs/generated/encode-html.d.ts.map +1 -1
  24. package/dist/commonjs/generated/encode-html.js +9 -8
  25. package/dist/commonjs/generated/encode-html.js.map +1 -1
  26. package/dist/commonjs/index.d.ts +3 -3
  27. package/dist/commonjs/index.d.ts.map +1 -1
  28. package/dist/commonjs/index.js +19 -19
  29. package/dist/commonjs/index.js.map +1 -1
  30. package/dist/commonjs/internal/bin-trie-flags.d.ts +17 -0
  31. package/dist/commonjs/internal/bin-trie-flags.d.ts.map +1 -0
  32. package/dist/commonjs/internal/bin-trie-flags.js +21 -0
  33. package/dist/commonjs/internal/bin-trie-flags.js.map +1 -0
  34. package/dist/commonjs/internal/decode-shared.d.ts +2 -0
  35. package/dist/commonjs/internal/decode-shared.d.ts.map +1 -0
  36. package/dist/commonjs/internal/decode-shared.js +31 -0
  37. package/dist/commonjs/internal/decode-shared.js.map +1 -0
  38. package/dist/commonjs/internal/encode-shared.d.ts +32 -0
  39. package/dist/commonjs/internal/encode-shared.d.ts.map +1 -0
  40. package/dist/commonjs/internal/encode-shared.js +94 -0
  41. package/dist/commonjs/internal/encode-shared.js.map +1 -0
  42. package/dist/esm/decode-codepoint.d.ts.map +1 -1
  43. package/dist/esm/decode-codepoint.js +2 -2
  44. package/dist/esm/decode-codepoint.js.map +1 -1
  45. package/dist/esm/decode.d.ts +3 -7
  46. package/dist/esm/decode.d.ts.map +1 -1
  47. package/dist/esm/decode.js +96 -39
  48. package/dist/esm/decode.js.map +1 -1
  49. package/dist/esm/encode.d.ts.map +1 -1
  50. package/dist/esm/encode.js +49 -30
  51. package/dist/esm/encode.js.map +1 -1
  52. package/dist/esm/escape.d.ts +7 -4
  53. package/dist/esm/escape.d.ts.map +1 -1
  54. package/dist/esm/escape.js +35 -18
  55. package/dist/esm/escape.js.map +1 -1
  56. package/dist/esm/generated/decode-data-html.d.ts.map +1 -1
  57. package/dist/esm/generated/decode-data-html.js +2 -5
  58. package/dist/esm/generated/decode-data-html.js.map +1 -1
  59. package/dist/esm/generated/decode-data-xml.d.ts.map +1 -1
  60. package/dist/esm/generated/decode-data-xml.js +2 -5
  61. package/dist/esm/generated/decode-data-xml.js.map +1 -1
  62. package/dist/esm/generated/encode-html.d.ts +1 -6
  63. package/dist/esm/generated/encode-html.d.ts.map +1 -1
  64. package/dist/esm/generated/encode-html.js +9 -8
  65. package/dist/esm/generated/encode-html.js.map +1 -1
  66. package/dist/esm/index.d.ts +3 -3
  67. package/dist/esm/index.d.ts.map +1 -1
  68. package/dist/esm/index.js +9 -9
  69. package/dist/esm/index.js.map +1 -1
  70. package/dist/esm/internal/bin-trie-flags.d.ts +17 -0
  71. package/dist/esm/internal/bin-trie-flags.d.ts.map +1 -0
  72. package/dist/esm/internal/bin-trie-flags.js +18 -0
  73. package/dist/esm/internal/bin-trie-flags.js.map +1 -0
  74. package/dist/esm/internal/decode-shared.d.ts +2 -0
  75. package/dist/esm/internal/decode-shared.d.ts.map +1 -0
  76. package/dist/esm/internal/decode-shared.js +28 -0
  77. package/dist/esm/internal/decode-shared.js.map +1 -0
  78. package/dist/esm/internal/encode-shared.d.ts +32 -0
  79. package/dist/esm/internal/encode-shared.d.ts.map +1 -0
  80. package/dist/esm/internal/encode-shared.js +91 -0
  81. package/dist/esm/internal/encode-shared.js.map +1 -0
  82. package/escape.d.ts +2 -0
  83. package/package.json +26 -24
  84. package/readme.md +32 -11
  85. package/src/decode-codepoint.ts +2 -2
  86. package/src/decode.ts +120 -55
  87. package/src/encode.ts +47 -31
  88. package/src/escape.ts +39 -26
  89. package/src/generated/decode-data-html.ts +3 -5
  90. package/src/generated/decode-data-xml.ts +3 -5
  91. package/src/generated/encode-html.ts +14 -14
  92. package/src/index.ts +23 -24
  93. package/src/internal/bin-trie-flags.ts +16 -0
  94. package/src/internal/decode-shared.ts +30 -0
  95. package/src/internal/encode-shared.ts +121 -0
  96. package/src/decode.spec.ts +0 -320
  97. package/src/encode.spec.ts +0 -78
  98. package/src/escape.spec.ts +0 -14
  99. package/src/index.spec.ts +0 -125
package/src/decode.ts CHANGED
@@ -1,6 +1,7 @@
1
+ import { fromCodePoint, replaceCodePoint } from "./decode-codepoint.js";
1
2
  import { htmlDecodeTree } from "./generated/decode-data-html.js";
2
3
  import { xmlDecodeTree } from "./generated/decode-data-xml.js";
3
- import { replaceCodePoint, fromCodePoint } from "./decode-codepoint.js";
4
+ import { BinTrieFlags } from "./internal/bin-trie-flags.js";
4
5
 
5
6
  const enum CharCodes {
6
7
  NUM = 35, // "#"
@@ -20,12 +21,6 @@ const enum CharCodes {
20
21
  /** Bit that needs to be set to convert an upper case ASCII character to lower case */
21
22
  const TO_LOWER_BIT = 0b10_0000;
22
23
 
23
- export enum BinTrieFlags {
24
- VALUE_LENGTH = 0b1100_0000_0000_0000,
25
- BRANCH_LENGTH = 0b0011_1111_1000_0000,
26
- JUMP_TABLE = 0b0000_0000_0111_1111,
27
- }
28
-
29
24
  function isNumber(code: number): boolean {
30
25
  return code >= CharCodes.ZERO && code <= CharCodes.NINE;
31
26
  }
@@ -89,6 +84,7 @@ export interface EntityErrorProducer {
89
84
  export class EntityDecoder {
90
85
  constructor(
91
86
  /** The tree used to decode entities. */
87
+ // biome-ignore lint/correctness/noUnusedPrivateClassMembers: False positive
92
88
  private readonly decodeTree: Uint16Array,
93
89
  /**
94
90
  * The function that is called when a codepoint is decoded.
@@ -122,6 +118,8 @@ export class EntityDecoder {
122
118
  private excess = 1;
123
119
  /** The mode in which the decoder is operating. */
124
120
  private decodeMode = DecodingMode.Strict;
121
+ /** The number of characters that have been consumed in the current run. */
122
+ private runConsumed = 0;
125
123
 
126
124
  /** Resets the instance to make it reusable. */
127
125
  startEntity(decodeMode: DecodingMode): void {
@@ -131,6 +129,7 @@ export class EntityDecoder {
131
129
  this.treeIndex = 0;
132
130
  this.excess = 1;
133
131
  this.consumed = 1;
132
+ this.runConsumed = 0;
134
133
  }
135
134
 
136
135
  /**
@@ -198,21 +197,6 @@ export class EntityDecoder {
198
197
  return this.stateNumericDecimal(input, offset);
199
198
  }
200
199
 
201
- private addToNumericResult(
202
- input: string,
203
- start: number,
204
- end: number,
205
- base: number,
206
- ): void {
207
- if (start !== end) {
208
- const digitCount = end - start;
209
- this.result =
210
- this.result * Math.pow(base, digitCount) +
211
- Number.parseInt(input.substr(start, digitCount), base);
212
- this.consumed += digitCount;
213
- }
214
- }
215
-
216
200
  /**
217
201
  * Parses a hexadecimal numeric entity.
218
202
  *
@@ -223,21 +207,22 @@ export class EntityDecoder {
223
207
  * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
224
208
  */
225
209
  private stateNumericHex(input: string, offset: number): number {
226
- const startIndex = offset;
227
-
228
210
  while (offset < input.length) {
229
211
  const char = input.charCodeAt(offset);
230
212
  if (isNumber(char) || isHexadecimalCharacter(char)) {
231
- offset += 1;
213
+ // Convert hex digit to value (0-15); 'a'/'A' -> 10.
214
+ const digit =
215
+ char <= CharCodes.NINE
216
+ ? char - CharCodes.ZERO
217
+ : (char | TO_LOWER_BIT) - CharCodes.LOWER_A + 10;
218
+ this.result = this.result * 16 + digit;
219
+ this.consumed++;
220
+ offset++;
232
221
  } else {
233
- this.addToNumericResult(input, startIndex, offset, 16);
234
222
  return this.emitNumericEntity(char, 3);
235
223
  }
236
224
  }
237
-
238
- this.addToNumericResult(input, startIndex, offset, 16);
239
-
240
- return -1;
225
+ return -1; // Incomplete entity
241
226
  }
242
227
 
243
228
  /**
@@ -250,21 +235,17 @@ export class EntityDecoder {
250
235
  * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
251
236
  */
252
237
  private stateNumericDecimal(input: string, offset: number): number {
253
- const startIndex = offset;
254
-
255
238
  while (offset < input.length) {
256
239
  const char = input.charCodeAt(offset);
257
240
  if (isNumber(char)) {
258
- offset += 1;
241
+ this.result = this.result * 10 + (char - CharCodes.ZERO);
242
+ this.consumed++;
243
+ offset++;
259
244
  } else {
260
- this.addToNumericResult(input, startIndex, offset, 10);
261
245
  return this.emitNumericEntity(char, 2);
262
246
  }
263
247
  }
264
-
265
- this.addToNumericResult(input, startIndex, offset, 10);
266
-
267
- return -1;
248
+ return -1; // Incomplete entity
268
249
  }
269
250
 
270
251
  /**
@@ -321,12 +302,84 @@ export class EntityDecoder {
321
302
  private stateNamedEntity(input: string, offset: number): number {
322
303
  const { decodeTree } = this;
323
304
  let current = decodeTree[this.treeIndex];
324
- // The mask is the number of bytes of the value, including the current byte.
305
+ // The length is the number of bytes of the value, including the current byte.
325
306
  let valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
326
307
 
327
- for (; offset < input.length; offset++, this.excess++) {
308
+ while (offset < input.length) {
309
+ // Handle compact runs (possibly inline): valueLength == 0 and SEMI_REQUIRED bit set.
310
+ if (valueLength === 0 && (current & BinTrieFlags.FLAG13) !== 0) {
311
+ const runLength =
312
+ (current & BinTrieFlags.BRANCH_LENGTH) >> 7; /* 2..63 */
313
+
314
+ // If we are starting a run, check the first char.
315
+ if (this.runConsumed === 0) {
316
+ const firstChar = current & BinTrieFlags.JUMP_TABLE;
317
+ if (input.charCodeAt(offset) !== firstChar) {
318
+ return this.result === 0
319
+ ? 0
320
+ : this.emitNotTerminatedNamedEntity();
321
+ }
322
+ offset++;
323
+ this.excess++;
324
+ this.runConsumed++;
325
+ }
326
+
327
+ // Check remaining characters in the run.
328
+ while (this.runConsumed < runLength) {
329
+ if (offset >= input.length) {
330
+ return -1;
331
+ }
332
+
333
+ const charIndexInPacked = this.runConsumed - 1;
334
+ const packedWord =
335
+ decodeTree[
336
+ this.treeIndex + 1 + (charIndexInPacked >> 1)
337
+ ];
338
+ const expectedChar =
339
+ charIndexInPacked % 2 === 0
340
+ ? packedWord & 0xff
341
+ : (packedWord >> 8) & 0xff;
342
+
343
+ if (input.charCodeAt(offset) !== expectedChar) {
344
+ this.runConsumed = 0;
345
+ return this.result === 0
346
+ ? 0
347
+ : this.emitNotTerminatedNamedEntity();
348
+ }
349
+ offset++;
350
+ this.excess++;
351
+ this.runConsumed++;
352
+ }
353
+
354
+ this.runConsumed = 0;
355
+ this.treeIndex += 1 + (runLength >> 1);
356
+ current = decodeTree[this.treeIndex];
357
+ valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
358
+ }
359
+
360
+ if (offset >= input.length) break;
361
+
328
362
  const char = input.charCodeAt(offset);
329
363
 
364
+ /*
365
+ * Implicit semicolon handling for nodes that require a semicolon but
366
+ * don't have an explicit ';' branch stored in the trie. If we have
367
+ * a value on the current node, it requires a semicolon, and the
368
+ * current input character is a semicolon, emit the entity using the
369
+ * current node (without descending further).
370
+ */
371
+ if (
372
+ char === CharCodes.SEMI &&
373
+ valueLength !== 0 &&
374
+ (current & BinTrieFlags.FLAG13) !== 0
375
+ ) {
376
+ return this.emitNamedEntityData(
377
+ this.treeIndex,
378
+ valueLength,
379
+ this.consumed + this.excess,
380
+ );
381
+ }
382
+
330
383
  this.treeIndex = determineBranch(
331
384
  decodeTree,
332
385
  current,
@@ -361,12 +414,18 @@ export class EntityDecoder {
361
414
  }
362
415
 
363
416
  // If we encounter a non-terminated (legacy) entity while parsing strictly, then ignore it.
364
- if (this.decodeMode !== DecodingMode.Strict) {
417
+ if (
418
+ this.decodeMode !== DecodingMode.Strict &&
419
+ (current & BinTrieFlags.FLAG13) === 0
420
+ ) {
365
421
  this.result = this.treeIndex;
366
422
  this.consumed += this.excess;
367
423
  this.excess = 0;
368
424
  }
369
425
  }
426
+ // Increment offset & excess for next iteration
427
+ offset++;
428
+ this.excess++;
370
429
  }
371
430
 
372
431
  return -1;
@@ -407,7 +466,8 @@ export class EntityDecoder {
407
466
 
408
467
  this.emitCodePoint(
409
468
  valueLength === 1
410
- ? decodeTree[result] & ~BinTrieFlags.VALUE_LENGTH
469
+ ? decodeTree[result] &
470
+ ~(BinTrieFlags.VALUE_LENGTH | BinTrieFlags.FLAG13)
411
471
  : decodeTree[result + 1],
412
472
  consumed,
413
473
  );
@@ -540,22 +600,28 @@ export function determineBranch(
540
600
  : decodeTree[nodeIndex + value] - 1;
541
601
  }
542
602
 
543
- // Case 3: Multiple branches encoded in dictionary
603
+ // Case 3: Multiple branches encoded in packed dictionary (two keys per uint16)
604
+ const packedKeySlots = (branchCount + 1) >> 1;
544
605
 
545
- // Binary search for the character.
546
- let lo = nodeIndex;
547
- let hi = lo + branchCount - 1;
606
+ /*
607
+ * Treat packed keys as a virtual sorted array of length `branchCount`.
608
+ * Key(i) = low byte for even i, high byte for odd i in slot i>>1.
609
+ */
610
+ let lo = 0;
611
+ let hi = branchCount - 1;
548
612
 
549
613
  while (lo <= hi) {
550
614
  const mid = (lo + hi) >>> 1;
551
- const midValue = decodeTree[mid];
615
+ const slot = mid >> 1;
616
+ const packed = decodeTree[nodeIndex + slot];
617
+ const midKey = (packed >> ((mid & 1) * 8)) & 0xff;
552
618
 
553
- if (midValue < char) {
619
+ if (midKey < char) {
554
620
  lo = mid + 1;
555
- } else if (midValue > char) {
621
+ } else if (midKey > char) {
556
622
  hi = mid - 1;
557
623
  } else {
558
- return decodeTree[mid + branchCount];
624
+ return decodeTree[nodeIndex + packedKeySlots + mid];
559
625
  }
560
626
  }
561
627
 
@@ -609,12 +675,11 @@ export function decodeXML(xmlString: string): string {
609
675
  return xmlDecoder(xmlString, DecodingMode.Strict);
610
676
  }
611
677
 
612
- // Re-export for use by eg. htmlparser2
613
- export { htmlDecodeTree } from "./generated/decode-data-html.js";
614
- export { xmlDecodeTree } from "./generated/decode-data-xml.js";
615
-
616
678
  export {
617
679
  decodeCodePoint,
618
- replaceCodePoint,
619
680
  fromCodePoint,
681
+ replaceCodePoint,
620
682
  } from "./decode-codepoint.js";
683
+ // Re-export for use by eg. htmlparser2
684
+ export { htmlDecodeTree } from "./generated/decode-data-html.js";
685
+ export { xmlDecodeTree } from "./generated/decode-data-xml.js";
package/src/encode.ts CHANGED
@@ -1,7 +1,17 @@
1
+ import { getCodePoint, XML_BITSET_VALUE } from "./escape.js";
1
2
  import { htmlTrie } from "./generated/encode-html.js";
2
- import { xmlReplacer, getCodePoint } from "./escape.js";
3
3
 
4
- const htmlReplacer = /[\t\n\f!-,./:-@[-`{-}\u0080-\uFFFF]/g;
4
+ /**
5
+ * We store the characters to consider as a compact bitset for fast lookups.
6
+ */
7
+ const HTML_BITSET = /* #__PURE__ */ new Uint32Array([
8
+ 0x16_00, // Bits for 09,0A,0C
9
+ 0xfc_00_ff_fe, // 32..63 -> 21-2D (minus space), 2E,2F,3A-3F
10
+ 0xf8_00_00_01, // 64..95 -> 40, 5B-5F
11
+ 0x38_00_00_01, // 96..127-> 60, 7B-7D
12
+ ]);
13
+
14
+ const XML_BITSET = /* #__PURE__ */ new Uint32Array([0, XML_BITSET_VALUE, 0, 0]);
5
15
 
6
16
  /**
7
17
  * Encodes all characters in the input using HTML entities. This includes
@@ -15,7 +25,7 @@ const htmlReplacer = /[\t\n\f!-,./:-@[-`{-}\u0080-\uFFFF]/g;
15
25
  * (eg. `&#xfc;`) will be used.
16
26
  */
17
27
  export function encodeHTML(input: string): string {
18
- return encodeHTMLTrieRe(htmlReplacer, input);
28
+ return encodeHTMLTrieRe(HTML_BITSET, input);
19
29
  }
20
30
  /**
21
31
  * Encodes all non-ASCII characters, as well as characters not valid in HTML
@@ -26,52 +36,58 @@ export function encodeHTML(input: string): string {
26
36
  * (eg. `&#xfc;`) will be used.
27
37
  */
28
38
  export function encodeNonAsciiHTML(input: string): string {
29
- return encodeHTMLTrieRe(xmlReplacer, input);
39
+ return encodeHTMLTrieRe(XML_BITSET, input);
30
40
  }
31
41
 
32
- function encodeHTMLTrieRe(regExp: RegExp, input: string): string {
33
- let returnValue = "";
34
- let lastIndex = 0;
35
- let match;
42
+ function encodeHTMLTrieRe(bitset: Uint32Array, input: string): string {
43
+ let out: string | undefined;
44
+ let last = 0; // Start of the next untouched slice.
45
+ const { length } = input;
36
46
 
37
- while ((match = regExp.exec(input)) !== null) {
38
- const { index } = match;
39
- returnValue += input.substring(lastIndex, index);
47
+ for (let index = 0; index < length; index++) {
40
48
  const char = input.charCodeAt(index);
41
- let next = htmlTrie.get(char);
49
+ // Skip ASCII characters that don't need encoding
50
+ if (char < 0x80 && !((bitset[char >>> 5] >>> char) & 1)) {
51
+ continue;
52
+ }
42
53
 
43
- if (typeof next === "object") {
44
- // We are in a branch. Try to match the next char.
45
- if (index + 1 < input.length) {
54
+ if (out === undefined) out = input.substring(0, index);
55
+ else if (last !== index) out += input.substring(last, index);
56
+
57
+ let node = htmlTrie.get(char);
58
+
59
+ if (typeof node === "object") {
60
+ if (index + 1 < length) {
46
61
  const nextChar = input.charCodeAt(index + 1);
47
62
  const value =
48
- typeof next.n === "number"
49
- ? next.n === nextChar
50
- ? next.o
63
+ typeof node.next === "number"
64
+ ? node.next === nextChar
65
+ ? node.nextValue
51
66
  : undefined
52
- : next.n.get(nextChar);
67
+ : node.next.get(nextChar);
53
68
 
54
69
  if (value !== undefined) {
55
- returnValue += value;
56
- lastIndex = regExp.lastIndex += 1;
70
+ out += value;
71
+ index++;
72
+ last = index + 1;
57
73
  continue;
58
74
  }
59
75
  }
60
-
61
- next = next.v;
76
+ node = node.value;
62
77
  }
63
78
 
64
- // We might have a tree node without a value; skip and use a numeric entity.
65
- if (next === undefined) {
79
+ if (node === undefined) {
66
80
  const cp = getCodePoint(input, index);
67
- returnValue += `&#x${cp.toString(16)};`;
68
- // Increase by 1 if we have a surrogate pair
69
- lastIndex = regExp.lastIndex += Number(cp !== char);
81
+ out += `&#x${cp.toString(16)};`;
82
+ if (cp !== char) index++;
83
+ last = index + 1;
70
84
  } else {
71
- returnValue += next;
72
- lastIndex = index + 1;
85
+ out += node;
86
+ last = index + 1;
73
87
  }
74
88
  }
75
89
 
76
- return returnValue + input.substr(lastIndex);
90
+ if (out === undefined) return input;
91
+ if (last < length) out += input.substr(last);
92
+ return out;
77
93
  }
package/src/escape.ts CHANGED
@@ -1,5 +1,3 @@
1
- export const xmlReplacer: RegExp = /["$&'<>\u0080-\uFFFF]/g;
2
-
3
1
  const xmlCodeMap = new Map([
4
2
  [34, "&quot;"],
5
3
  [38, "&amp;"],
@@ -22,39 +20,54 @@ export const getCodePoint: (c: string, index: number) => number =
22
20
  : // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
23
21
  (input: string, index: number): number => input.codePointAt(index)!;
24
22
 
23
+ /**
24
+ * Bitset for ASCII characters that need to be escaped in XML.
25
+ */
26
+ export const XML_BITSET_VALUE = 0x50_00_00_c4; // 32..63 -> 34 ("),38 (&),39 ('),60 (<),62 (>)
27
+
25
28
  /**
26
29
  * Encodes all non-ASCII characters, as well as characters not valid in XML
27
- * documents using XML entities.
30
+ * documents using XML entities. Uses a fast bitset scan instead of RegExp.
28
31
  *
29
- * If a character has no equivalent entity, a
30
- * numeric hexadecimal reference (eg. `&#xfc;`) will be used.
32
+ * If a character has no equivalent entity, a numeric hexadecimal reference
33
+ * (eg. `&#xfc;`) will be used.
31
34
  */
32
35
  export function encodeXML(input: string): string {
33
- let returnValue = "";
34
- let lastIndex = 0;
35
- let match;
36
+ let out: string | undefined;
37
+ let last = 0;
38
+ const { length } = input;
36
39
 
37
- while ((match = xmlReplacer.exec(input)) !== null) {
38
- const { index } = match;
40
+ for (let index = 0; index < length; index++) {
39
41
  const char = input.charCodeAt(index);
40
- const next = xmlCodeMap.get(char);
41
-
42
- if (next === undefined) {
43
- returnValue += `${input.substring(lastIndex, index)}&#x${getCodePoint(
44
- input,
45
- index,
46
- ).toString(16)};`;
47
- // Increase by 1 if we have a surrogate pair
48
- lastIndex = xmlReplacer.lastIndex += Number(
49
- (char & 0xfc_00) === 0xd8_00,
50
- );
51
- } else {
52
- returnValue += input.substring(lastIndex, index) + next;
53
- lastIndex = index + 1;
42
+
43
+ // Check for ASCII chars that don't need escaping
44
+ if (
45
+ char < 0x80 &&
46
+ (((XML_BITSET_VALUE >>> char) & 1) === 0 || char >= 64 || char < 32)
47
+ ) {
48
+ continue;
54
49
  }
50
+
51
+ if (out === undefined) out = input.substring(0, index);
52
+ else if (last !== index) out += input.substring(last, index);
53
+
54
+ if (char < 64) {
55
+ // Known replacement
56
+ out += xmlCodeMap.get(char)!;
57
+ last = index + 1;
58
+ continue;
59
+ }
60
+
61
+ // Non-ASCII: encode as numeric entity (handle surrogate pair)
62
+ const cp = getCodePoint(input, index);
63
+ out += `&#x${cp.toString(16)};`;
64
+ if (cp !== char) index++; // Skip trailing surrogate
65
+ last = index + 1;
55
66
  }
56
67
 
57
- return returnValue + input.substr(lastIndex);
68
+ if (out === undefined) return input;
69
+ if (last < length) out += input.substr(last);
70
+ return out;
58
71
  }
59
72
 
60
73
  /**
@@ -83,7 +96,7 @@ function getEscaper(
83
96
  map: Map<number, string>,
84
97
  ): (data: string) => string {
85
98
  return function escape(data: string): string {
86
- let match;
99
+ let match: RegExpExecArray | null;
87
100
  let lastIndex = 0;
88
101
  let result = "";
89
102