entities 6.0.1 → 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/dist/decode-codepoint.d.ts +8 -0
  2. package/dist/decode-codepoint.d.ts.map +1 -0
  3. package/dist/decode-codepoint.js +46 -0
  4. package/dist/decode-codepoint.js.map +1 -0
  5. package/dist/{esm/decode.d.ts → decode.d.ts} +11 -26
  6. package/dist/decode.d.ts.map +1 -0
  7. package/dist/{esm/decode.js → decode.js} +130 -83
  8. package/dist/decode.js.map +1 -0
  9. package/dist/{commonjs/encode.d.ts → encode.d.ts} +2 -0
  10. package/dist/encode.d.ts.map +1 -0
  11. package/dist/encode.js +90 -0
  12. package/dist/encode.js.map +1 -0
  13. package/dist/{esm/escape.d.ts → escape.d.ts} +13 -8
  14. package/dist/escape.d.ts.map +1 -0
  15. package/dist/{esm/escape.js → escape.js} +49 -34
  16. package/dist/escape.js.map +1 -0
  17. package/dist/generated/decode-data-html.d.ts +3 -0
  18. package/dist/generated/decode-data-html.d.ts.map +1 -0
  19. package/dist/generated/decode-data-html.js +5 -0
  20. package/dist/generated/decode-data-html.js.map +1 -0
  21. package/dist/generated/decode-data-xml.d.ts +3 -0
  22. package/dist/generated/decode-data-xml.d.ts.map +1 -0
  23. package/dist/generated/decode-data-xml.js +5 -0
  24. package/dist/generated/decode-data-xml.js.map +1 -0
  25. package/dist/generated/encode-html.d.ts +5 -0
  26. package/dist/generated/encode-html.d.ts.map +1 -0
  27. package/dist/generated/encode-html.js +12 -0
  28. package/dist/generated/encode-html.js.map +1 -0
  29. package/dist/{commonjs/index.d.ts → index.d.ts} +10 -17
  30. package/dist/index.d.ts.map +1 -0
  31. package/dist/{esm/index.js → index.js} +9 -25
  32. package/dist/index.js.map +1 -0
  33. package/dist/internal/bin-trie-flags.d.ts +17 -0
  34. package/dist/internal/bin-trie-flags.d.ts.map +1 -0
  35. package/dist/internal/bin-trie-flags.js +18 -0
  36. package/dist/internal/bin-trie-flags.js.map +1 -0
  37. package/dist/internal/decode-shared.d.ts +7 -0
  38. package/dist/internal/decode-shared.d.ts.map +1 -0
  39. package/dist/internal/decode-shared.js +17 -0
  40. package/dist/internal/decode-shared.js.map +1 -0
  41. package/dist/internal/encode-shared.d.ts +33 -0
  42. package/dist/internal/encode-shared.d.ts.map +1 -0
  43. package/dist/internal/encode-shared.js +93 -0
  44. package/dist/internal/encode-shared.js.map +1 -0
  45. package/package.json +38 -73
  46. package/readme.md +36 -27
  47. package/src/decode-codepoint.ts +1 -32
  48. package/src/decode.ts +127 -76
  49. package/src/encode.ts +49 -31
  50. package/src/escape.ts +50 -38
  51. package/src/generated/decode-data-html.ts +4 -5
  52. package/src/generated/decode-data-xml.ts +4 -5
  53. package/src/generated/encode-html.ts +15 -14
  54. package/src/index.ts +23 -49
  55. package/src/internal/bin-trie-flags.ts +16 -0
  56. package/src/internal/decode-shared.ts +18 -0
  57. package/src/internal/encode-shared.ts +123 -0
  58. package/decode.d.ts +0 -1
  59. package/decode.js +0 -3
  60. package/dist/commonjs/decode-codepoint.d.ts +0 -19
  61. package/dist/commonjs/decode-codepoint.d.ts.map +0 -1
  62. package/dist/commonjs/decode-codepoint.js +0 -77
  63. package/dist/commonjs/decode-codepoint.js.map +0 -1
  64. package/dist/commonjs/decode.d.ts +0 -209
  65. package/dist/commonjs/decode.d.ts.map +0 -1
  66. package/dist/commonjs/decode.js +0 -511
  67. package/dist/commonjs/decode.js.map +0 -1
  68. package/dist/commonjs/encode.d.ts.map +0 -1
  69. package/dist/commonjs/encode.js +0 -73
  70. package/dist/commonjs/encode.js.map +0 -1
  71. package/dist/commonjs/escape.d.ts +0 -43
  72. package/dist/commonjs/escape.d.ts.map +0 -1
  73. package/dist/commonjs/escape.js +0 -121
  74. package/dist/commonjs/escape.js.map +0 -1
  75. package/dist/commonjs/generated/decode-data-html.d.ts +0 -2
  76. package/dist/commonjs/generated/decode-data-html.d.ts.map +0 -1
  77. package/dist/commonjs/generated/decode-data-html.js +0 -10
  78. package/dist/commonjs/generated/decode-data-html.js.map +0 -1
  79. package/dist/commonjs/generated/decode-data-xml.d.ts +0 -2
  80. package/dist/commonjs/generated/decode-data-xml.d.ts.map +0 -1
  81. package/dist/commonjs/generated/decode-data-xml.js +0 -10
  82. package/dist/commonjs/generated/decode-data-xml.js.map +0 -1
  83. package/dist/commonjs/generated/encode-html.d.ts +0 -8
  84. package/dist/commonjs/generated/encode-html.d.ts.map +0 -1
  85. package/dist/commonjs/generated/encode-html.js +0 -13
  86. package/dist/commonjs/generated/encode-html.js.map +0 -1
  87. package/dist/commonjs/index.d.ts.map +0 -1
  88. package/dist/commonjs/index.js +0 -131
  89. package/dist/commonjs/index.js.map +0 -1
  90. package/dist/commonjs/package.json +0 -3
  91. package/dist/esm/decode-codepoint.d.ts +0 -19
  92. package/dist/esm/decode-codepoint.d.ts.map +0 -1
  93. package/dist/esm/decode-codepoint.js +0 -72
  94. package/dist/esm/decode-codepoint.js.map +0 -1
  95. package/dist/esm/decode.d.ts.map +0 -1
  96. package/dist/esm/decode.js.map +0 -1
  97. package/dist/esm/encode.d.ts +0 -22
  98. package/dist/esm/encode.d.ts.map +0 -1
  99. package/dist/esm/encode.js +0 -69
  100. package/dist/esm/encode.js.map +0 -1
  101. package/dist/esm/escape.d.ts.map +0 -1
  102. package/dist/esm/escape.js.map +0 -1
  103. package/dist/esm/generated/decode-data-html.d.ts +0 -2
  104. package/dist/esm/generated/decode-data-html.d.ts.map +0 -1
  105. package/dist/esm/generated/decode-data-html.js +0 -7
  106. package/dist/esm/generated/decode-data-html.js.map +0 -1
  107. package/dist/esm/generated/decode-data-xml.d.ts +0 -2
  108. package/dist/esm/generated/decode-data-xml.d.ts.map +0 -1
  109. package/dist/esm/generated/decode-data-xml.js +0 -7
  110. package/dist/esm/generated/decode-data-xml.js.map +0 -1
  111. package/dist/esm/generated/encode-html.d.ts +0 -8
  112. package/dist/esm/generated/encode-html.d.ts.map +0 -1
  113. package/dist/esm/generated/encode-html.js +0 -10
  114. package/dist/esm/generated/encode-html.js.map +0 -1
  115. package/dist/esm/index.d.ts +0 -96
  116. package/dist/esm/index.d.ts.map +0 -1
  117. package/dist/esm/index.js.map +0 -1
  118. package/dist/esm/package.json +0 -3
  119. package/escape.d.ts +0 -1
  120. package/escape.js +0 -3
  121. package/src/decode.spec.ts +0 -320
  122. package/src/encode.spec.ts +0 -78
  123. package/src/escape.spec.ts +0 -14
  124. package/src/generated/.eslintrc.json +0 -10
  125. package/src/index.spec.ts +0 -125
@@ -1,6 +1,7 @@
1
+ import { replaceCodePoint } from "./decode-codepoint.js";
1
2
  import { htmlDecodeTree } from "./generated/decode-data-html.js";
2
3
  import { xmlDecodeTree } from "./generated/decode-data-xml.js";
3
- import { replaceCodePoint, fromCodePoint } from "./decode-codepoint.js";
4
+ import { BinTrieFlags } from "./internal/bin-trie-flags.js";
4
5
  var CharCodes;
5
6
  (function (CharCodes) {
6
7
  CharCodes[CharCodes["NUM"] = 35] = "NUM";
@@ -17,13 +18,7 @@ var CharCodes;
17
18
  CharCodes[CharCodes["UPPER_Z"] = 90] = "UPPER_Z";
18
19
  })(CharCodes || (CharCodes = {}));
19
20
  /** Bit that needs to be set to convert an upper case ASCII character to lower case */
20
- const TO_LOWER_BIT = 32;
21
- export var BinTrieFlags;
22
- (function (BinTrieFlags) {
23
- BinTrieFlags[BinTrieFlags["VALUE_LENGTH"] = 49152] = "VALUE_LENGTH";
24
- BinTrieFlags[BinTrieFlags["BRANCH_LENGTH"] = 16256] = "BRANCH_LENGTH";
25
- BinTrieFlags[BinTrieFlags["JUMP_TABLE"] = 127] = "JUMP_TABLE";
26
- })(BinTrieFlags || (BinTrieFlags = {}));
21
+ const TO_LOWER_BIT = 0b10_0000;
27
22
  function isNumber(code) {
28
23
  return code >= CharCodes.ZERO && code <= CharCodes.NINE;
29
24
  }
@@ -41,6 +36,7 @@ function isAsciiAlphaNumeric(code) {
41
36
  *
42
37
  * Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error.
43
38
  * See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
39
+ * @param code Code point to decode.
44
40
  */
45
41
  function isEntityInAttributeInvalidEnd(code) {
46
42
  return code === CharCodes.EQUALS || isAsciiAlphaNumeric(code);
@@ -53,6 +49,9 @@ var EntityDecoderState;
53
49
  EntityDecoderState[EntityDecoderState["NumericHex"] = 3] = "NumericHex";
54
50
  EntityDecoderState[EntityDecoderState["NamedEntity"] = 4] = "NamedEntity";
55
51
  })(EntityDecoderState || (EntityDecoderState = {}));
52
+ /**
53
+ * Decoding mode for named entities.
54
+ */
56
55
  export var DecodingMode;
57
56
  (function (DecodingMode) {
58
57
  /** Entities in text nodes that can end with any character. */
@@ -66,15 +65,18 @@ export var DecodingMode;
66
65
  * Token decoder with support of writing partial entities.
67
66
  */
68
67
  export class EntityDecoder {
68
+ decodeTree;
69
+ emitCodePoint;
70
+ errors;
69
71
  constructor(
70
72
  /** The tree used to decode entities. */
73
+ // biome-ignore lint/correctness/noUnusedPrivateClassMembers: False positive
71
74
  decodeTree,
72
75
  /**
73
76
  * The function that is called when a codepoint is decoded.
74
77
  *
75
78
  * For multi-byte named entities, this will be called multiple times,
76
79
  * with the second codepoint, and the same `consumed` value.
77
- *
78
80
  * @param codepoint The decoded codepoint.
79
81
  * @param consumed The number of bytes consumed by the decoder.
80
82
  */
@@ -84,25 +86,30 @@ export class EntityDecoder {
84
86
  this.decodeTree = decodeTree;
85
87
  this.emitCodePoint = emitCodePoint;
86
88
  this.errors = errors;
87
- /** The current state of the decoder. */
88
- this.state = EntityDecoderState.EntityStart;
89
- /** Characters that were consumed while parsing an entity. */
90
- this.consumed = 1;
91
- /**
92
- * The result of the entity.
93
- *
94
- * Either the result index of a numeric entity, or the codepoint of a
95
- * numeric entity.
96
- */
97
- this.result = 0;
98
- /** The current index in the decode tree. */
99
- this.treeIndex = 0;
100
- /** The number of characters that were consumed in excess. */
101
- this.excess = 1;
102
- /** The mode in which the decoder is operating. */
103
- this.decodeMode = DecodingMode.Strict;
104
89
  }
105
- /** Resets the instance to make it reusable. */
90
+ /** The current state of the decoder. */
91
+ state = EntityDecoderState.EntityStart;
92
+ /** Characters that were consumed while parsing an entity. */
93
+ consumed = 1;
94
+ /**
95
+ * The result of the entity.
96
+ *
97
+ * Either the result index of a numeric entity, or the codepoint of a
98
+ * numeric entity.
99
+ */
100
+ result = 0;
101
+ /** The current index in the decode tree. */
102
+ treeIndex = 0;
103
+ /** The number of characters that were consumed in excess. */
104
+ excess = 1;
105
+ /** The mode in which the decoder is operating. */
106
+ decodeMode = DecodingMode.Strict;
107
+ /** The number of characters that have been consumed in the current run. */
108
+ runConsumed = 0;
109
+ /**
110
+ * Resets the instance to make it reusable.
111
+ * @param decodeMode Entity decoding mode to use.
112
+ */
106
113
  startEntity(decodeMode) {
107
114
  this.decodeMode = decodeMode;
108
115
  this.state = EntityDecoderState.EntityStart;
@@ -110,6 +117,7 @@ export class EntityDecoder {
110
117
  this.treeIndex = 0;
111
118
  this.excess = 1;
112
119
  this.consumed = 1;
120
+ this.runConsumed = 0;
113
121
  }
114
122
  /**
115
123
  * Write an entity to the decoder. This can be called multiple times with partial entities.
@@ -117,7 +125,6 @@ export class EntityDecoder {
117
125
  *
118
126
  * Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the
119
127
  * entity is incomplete, and resume when the next string is written.
120
- *
121
128
  * @param input The string containing the entity (or a continuation of the entity).
122
129
  * @param offset The offset at which the entity begins. Should be 0 if this is not the first call.
123
130
  * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
@@ -151,7 +158,6 @@ export class EntityDecoder {
151
158
  * Switches between the numeric decimal and hexadecimal states.
152
159
  *
153
160
  * Equivalent to the `Numeric character reference state` in the HTML spec.
154
- *
155
161
  * @param input The string containing the entity (or a continuation of the entity).
156
162
  * @param offset The current offset.
157
163
  * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
@@ -168,69 +174,59 @@ export class EntityDecoder {
168
174
  this.state = EntityDecoderState.NumericDecimal;
169
175
  return this.stateNumericDecimal(input, offset);
170
176
  }
171
- addToNumericResult(input, start, end, base) {
172
- if (start !== end) {
173
- const digitCount = end - start;
174
- this.result =
175
- this.result * Math.pow(base, digitCount) +
176
- Number.parseInt(input.substr(start, digitCount), base);
177
- this.consumed += digitCount;
178
- }
179
- }
180
177
  /**
181
178
  * Parses a hexadecimal numeric entity.
182
179
  *
183
180
  * Equivalent to the `Hexademical character reference state` in the HTML spec.
184
- *
185
181
  * @param input The string containing the entity (or a continuation of the entity).
186
182
  * @param offset The current offset.
187
183
  * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
188
184
  */
189
185
  stateNumericHex(input, offset) {
190
- const startIndex = offset;
191
186
  while (offset < input.length) {
192
187
  const char = input.charCodeAt(offset);
193
188
  if (isNumber(char) || isHexadecimalCharacter(char)) {
194
- offset += 1;
189
+ // Convert hex digit to value (0-15); 'a'/'A' -> 10.
190
+ const digit = char <= CharCodes.NINE
191
+ ? char - CharCodes.ZERO
192
+ : (char | TO_LOWER_BIT) - CharCodes.LOWER_A + 10;
193
+ this.result = this.result * 16 + digit;
194
+ this.consumed++;
195
+ offset++;
195
196
  }
196
197
  else {
197
- this.addToNumericResult(input, startIndex, offset, 16);
198
198
  return this.emitNumericEntity(char, 3);
199
199
  }
200
200
  }
201
- this.addToNumericResult(input, startIndex, offset, 16);
202
- return -1;
201
+ return -1; // Incomplete entity
203
202
  }
204
203
  /**
205
204
  * Parses a decimal numeric entity.
206
205
  *
207
206
  * Equivalent to the `Decimal character reference state` in the HTML spec.
208
- *
209
207
  * @param input The string containing the entity (or a continuation of the entity).
210
208
  * @param offset The current offset.
211
209
  * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
212
210
  */
213
211
  stateNumericDecimal(input, offset) {
214
- const startIndex = offset;
215
212
  while (offset < input.length) {
216
213
  const char = input.charCodeAt(offset);
217
214
  if (isNumber(char)) {
218
- offset += 1;
215
+ this.result = this.result * 10 + (char - CharCodes.ZERO);
216
+ this.consumed++;
217
+ offset++;
219
218
  }
220
219
  else {
221
- this.addToNumericResult(input, startIndex, offset, 10);
222
220
  return this.emitNumericEntity(char, 2);
223
221
  }
224
222
  }
225
- this.addToNumericResult(input, startIndex, offset, 10);
226
- return -1;
223
+ return -1; // Incomplete entity
227
224
  }
228
225
  /**
229
226
  * Validate and emit a numeric entity.
230
227
  *
231
228
  * Implements the logic from the `Hexademical character reference start
232
229
  * state` and `Numeric character reference end state` in the HTML spec.
233
- *
234
230
  * @param lastCp The last code point of the entity. Used to see if the
235
231
  * entity was terminated with a semicolon.
236
232
  * @param expectedLength The minimum number of characters that should be
@@ -239,10 +235,9 @@ export class EntityDecoder {
239
235
  * @returns The number of characters that were consumed.
240
236
  */
241
237
  emitNumericEntity(lastCp, expectedLength) {
242
- var _a;
243
238
  // Ensure we consumed at least one digit.
244
239
  if (this.consumed <= expectedLength) {
245
- (_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed);
240
+ this.errors?.absenceOfDigitsInNumericCharacterReference(this.consumed);
246
241
  return 0;
247
242
  }
248
243
  // Figure out if this is a legit end of the entity
@@ -265,7 +260,6 @@ export class EntityDecoder {
265
260
  * Parses a named entity.
266
261
  *
267
262
  * Equivalent to the `Named character reference state` in the HTML spec.
268
- *
269
263
  * @param input The string containing the entity (or a continuation of the entity).
270
264
  * @param offset The current offset.
271
265
  * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
@@ -273,10 +267,64 @@ export class EntityDecoder {
273
267
  stateNamedEntity(input, offset) {
274
268
  const { decodeTree } = this;
275
269
  let current = decodeTree[this.treeIndex];
276
- // The mask is the number of bytes of the value, including the current byte.
270
+ // The length is the number of bytes of the value, including the current byte.
277
271
  let valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
278
- for (; offset < input.length; offset++, this.excess++) {
272
+ while (offset < input.length) {
273
+ // Handle compact runs (possibly inline): valueLength == 0 and SEMI_REQUIRED bit set.
274
+ if (valueLength === 0 && (current & BinTrieFlags.FLAG13) !== 0) {
275
+ const runLength = (current & BinTrieFlags.BRANCH_LENGTH) >> 7; /* 2..63 */
276
+ // If we are starting a run, check the first char.
277
+ if (this.runConsumed === 0) {
278
+ const firstChar = current & BinTrieFlags.JUMP_TABLE;
279
+ if (input.charCodeAt(offset) !== firstChar) {
280
+ return this.result === 0
281
+ ? 0
282
+ : this.emitNotTerminatedNamedEntity();
283
+ }
284
+ offset++;
285
+ this.excess++;
286
+ this.runConsumed++;
287
+ }
288
+ // Check remaining characters in the run.
289
+ while (this.runConsumed < runLength) {
290
+ if (offset >= input.length) {
291
+ return -1;
292
+ }
293
+ const charIndexInPacked = this.runConsumed - 1;
294
+ const packedWord = decodeTree[this.treeIndex + 1 + (charIndexInPacked >> 1)];
295
+ const expectedChar = charIndexInPacked % 2 === 0
296
+ ? packedWord & 0xff
297
+ : (packedWord >> 8) & 0xff;
298
+ if (input.charCodeAt(offset) !== expectedChar) {
299
+ this.runConsumed = 0;
300
+ return this.result === 0
301
+ ? 0
302
+ : this.emitNotTerminatedNamedEntity();
303
+ }
304
+ offset++;
305
+ this.excess++;
306
+ this.runConsumed++;
307
+ }
308
+ this.runConsumed = 0;
309
+ this.treeIndex += 1 + (runLength >> 1);
310
+ current = decodeTree[this.treeIndex];
311
+ valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
312
+ }
313
+ if (offset >= input.length)
314
+ break;
279
315
  const char = input.charCodeAt(offset);
316
+ /*
317
+ * Implicit semicolon handling for nodes that require a semicolon but
318
+ * don't have an explicit ';' branch stored in the trie. If we have
319
+ * a value on the current node, it requires a semicolon, and the
320
+ * current input character is a semicolon, emit the entity using the
321
+ * current node (without descending further).
322
+ */
323
+ if (char === CharCodes.SEMI &&
324
+ valueLength !== 0 &&
325
+ (current & BinTrieFlags.FLAG13) !== 0) {
326
+ return this.emitNamedEntityData(this.treeIndex, valueLength, this.consumed + this.excess);
327
+ }
280
328
  this.treeIndex = determineBranch(decodeTree, current, this.treeIndex + Math.max(1, valueLength), char);
281
329
  if (this.treeIndex < 0) {
282
330
  return this.result === 0 ||
@@ -298,41 +346,42 @@ export class EntityDecoder {
298
346
  return this.emitNamedEntityData(this.treeIndex, valueLength, this.consumed + this.excess);
299
347
  }
300
348
  // If we encounter a non-terminated (legacy) entity while parsing strictly, then ignore it.
301
- if (this.decodeMode !== DecodingMode.Strict) {
349
+ if (this.decodeMode !== DecodingMode.Strict &&
350
+ (current & BinTrieFlags.FLAG13) === 0) {
302
351
  this.result = this.treeIndex;
303
352
  this.consumed += this.excess;
304
353
  this.excess = 0;
305
354
  }
306
355
  }
356
+ // Increment offset & excess for next iteration
357
+ offset++;
358
+ this.excess++;
307
359
  }
308
360
  return -1;
309
361
  }
310
362
  /**
311
363
  * Emit a named entity that was not terminated with a semicolon.
312
- *
313
364
  * @returns The number of characters consumed.
314
365
  */
315
366
  emitNotTerminatedNamedEntity() {
316
- var _a;
317
367
  const { result, decodeTree } = this;
318
368
  const valueLength = (decodeTree[result] & BinTrieFlags.VALUE_LENGTH) >> 14;
319
369
  this.emitNamedEntityData(result, valueLength, this.consumed);
320
- (_a = this.errors) === null || _a === void 0 ? void 0 : _a.missingSemicolonAfterCharacterReference();
370
+ this.errors?.missingSemicolonAfterCharacterReference();
321
371
  return this.consumed;
322
372
  }
323
373
  /**
324
374
  * Emit a named entity.
325
- *
326
375
  * @param result The index of the entity in the decode tree.
327
376
  * @param valueLength The number of bytes in the entity.
328
377
  * @param consumed The number of characters consumed.
329
- *
330
378
  * @returns The number of characters consumed.
331
379
  */
332
380
  emitNamedEntityData(result, valueLength, consumed) {
333
381
  const { decodeTree } = this;
334
382
  this.emitCodePoint(valueLength === 1
335
- ? decodeTree[result] & ~BinTrieFlags.VALUE_LENGTH
383
+ ? decodeTree[result] &
384
+ ~(BinTrieFlags.VALUE_LENGTH | BinTrieFlags.FLAG13)
336
385
  : decodeTree[result + 1], consumed);
337
386
  if (valueLength === 3) {
338
387
  // For multi-byte values, we need to emit the second byte.
@@ -344,11 +393,9 @@ export class EntityDecoder {
344
393
  * Signal to the parser that the end of the input was reached.
345
394
  *
346
395
  * Remaining data will be emitted and relevant errors will be produced.
347
- *
348
396
  * @returns The number of characters consumed.
349
397
  */
350
398
  end() {
351
- var _a;
352
399
  switch (this.state) {
353
400
  case EntityDecoderState.NamedEntity: {
354
401
  // Emit a named entity if we have one.
@@ -366,7 +413,7 @@ export class EntityDecoder {
366
413
  return this.emitNumericEntity(0, 3);
367
414
  }
368
415
  case EntityDecoderState.NumericStart: {
369
- (_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed);
416
+ this.errors?.absenceOfDigitsInNumericCharacterReference(this.consumed);
370
417
  return 0;
371
418
  }
372
419
  case EntityDecoderState.EntityStart: {
@@ -378,13 +425,12 @@ export class EntityDecoder {
378
425
  }
379
426
  /**
380
427
  * Creates a function that decodes entities in a string.
381
- *
382
428
  * @param decodeTree The decode tree.
383
429
  * @returns A function that decodes entities in a string.
384
430
  */
385
431
  function getDecoder(decodeTree) {
386
432
  let returnValue = "";
387
- const decoder = new EntityDecoder(decodeTree, (data) => (returnValue += fromCodePoint(data)));
433
+ const decoder = new EntityDecoder(decodeTree, (data) => (returnValue += String.fromCodePoint(data)));
388
434
  return function decodeWithTrie(input, decodeMode) {
389
435
  let lastIndex = 0;
390
436
  let offset = 0;
@@ -411,10 +457,9 @@ function getDecoder(decodeTree) {
411
457
  /**
412
458
  * Determines the branch of the current node that is taken given the current
413
459
  * character. This function is used to traverse the trie.
414
- *
415
460
  * @param decodeTree The trie.
416
461
  * @param current The current node.
417
- * @param nodeIdx The index right after the current node and its value.
462
+ * @param nodeIndex Index immediately after the current node header.
418
463
  * @param char The current character.
419
464
  * @returns The index of the next node, or -1 if no branch is taken.
420
465
  */
@@ -432,21 +477,27 @@ export function determineBranch(decodeTree, current, nodeIndex, char) {
432
477
  ? -1
433
478
  : decodeTree[nodeIndex + value] - 1;
434
479
  }
435
- // Case 3: Multiple branches encoded in dictionary
436
- // Binary search for the character.
437
- let lo = nodeIndex;
438
- let hi = lo + branchCount - 1;
480
+ // Case 3: Multiple branches encoded in packed dictionary (two keys per uint16)
481
+ const packedKeySlots = (branchCount + 1) >> 1;
482
+ /*
483
+ * Treat packed keys as a virtual sorted array of length `branchCount`.
484
+ * Key(i) = low byte for even i, high byte for odd i in slot i>>1.
485
+ */
486
+ let lo = 0;
487
+ let hi = branchCount - 1;
439
488
  while (lo <= hi) {
440
489
  const mid = (lo + hi) >>> 1;
441
- const midValue = decodeTree[mid];
442
- if (midValue < char) {
490
+ const slot = mid >> 1;
491
+ const packed = decodeTree[nodeIndex + slot];
492
+ const midKey = (packed >> ((mid & 1) * 8)) & 0xff;
493
+ if (midKey < char) {
443
494
  lo = mid + 1;
444
495
  }
445
- else if (midValue > char) {
496
+ else if (midKey > char) {
446
497
  hi = mid - 1;
447
498
  }
448
499
  else {
449
- return decodeTree[mid + branchCount];
500
+ return decodeTree[nodeIndex + packedKeySlots + mid];
450
501
  }
451
502
  }
452
503
  return -1;
@@ -455,7 +506,6 @@ const htmlDecoder = /* #__PURE__ */ getDecoder(htmlDecodeTree);
455
506
  const xmlDecoder = /* #__PURE__ */ getDecoder(xmlDecodeTree);
456
507
  /**
457
508
  * Decodes an HTML string.
458
- *
459
509
  * @param htmlString The string to decode.
460
510
  * @param mode The decoding mode.
461
511
  * @returns The decoded string.
@@ -465,7 +515,6 @@ export function decodeHTML(htmlString, mode = DecodingMode.Legacy) {
465
515
  }
466
516
  /**
467
517
  * Decodes an HTML string in an attribute.
468
- *
469
518
  * @param htmlAttribute The string to decode.
470
519
  * @returns The decoded string.
471
520
  */
@@ -474,7 +523,6 @@ export function decodeHTMLAttribute(htmlAttribute) {
474
523
  }
475
524
  /**
476
525
  * Decodes an HTML string, requiring all entities to be terminated by a semicolon.
477
- *
478
526
  * @param htmlString The string to decode.
479
527
  * @returns The decoded string.
480
528
  */
@@ -483,15 +531,14 @@ export function decodeHTMLStrict(htmlString) {
483
531
  }
484
532
  /**
485
533
  * Decodes an XML string, requiring all entities to be terminated by a semicolon.
486
- *
487
534
  * @param xmlString The string to decode.
488
535
  * @returns The decoded string.
489
536
  */
490
537
  export function decodeXML(xmlString) {
491
538
  return xmlDecoder(xmlString, DecodingMode.Strict);
492
539
  }
540
+ export { replaceCodePoint } from "./decode-codepoint.js";
493
541
  // Re-export for use by eg. htmlparser2
494
542
  export { htmlDecodeTree } from "./generated/decode-data-html.js";
495
543
  export { xmlDecodeTree } from "./generated/decode-data-xml.js";
496
- export { decodeCodePoint, replaceCodePoint, fromCodePoint, } from "./decode-codepoint.js";
497
544
  //# sourceMappingURL=decode.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"decode.js","sourceRoot":"","sources":["../src/decode.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AACzD,OAAO,EAAE,cAAc,EAAE,MAAM,iCAAiC,CAAC;AACjE,OAAO,EAAE,aAAa,EAAE,MAAM,gCAAgC,CAAC;AAC/D,OAAO,EAAE,YAAY,EAAE,MAAM,8BAA8B,CAAC;AAE5D,IAAW,SAaV;AAbD,WAAW,SAAS;IAChB,wCAAQ,CAAA;IACR,0CAAS,CAAA;IACT,8CAAW,CAAA;IACX,0CAAS,CAAA;IACT,0CAAS,CAAA;IACT,gDAAY,CAAA;IACZ,iDAAa,CAAA;IACb,iDAAa,CAAA;IACb,iDAAa,CAAA;IACb,gDAAY,CAAA;IACZ,gDAAY,CAAA;IACZ,gDAAY,CAAA;AAChB,CAAC,EAbU,SAAS,KAAT,SAAS,QAanB;AAED,sFAAsF;AACtF,MAAM,YAAY,GAAG,SAAS,CAAC;AAE/B,SAAS,QAAQ,CAAC,IAAY;IAC1B,OAAO,IAAI,IAAI,SAAS,CAAC,IAAI,IAAI,IAAI,IAAI,SAAS,CAAC,IAAI,CAAC;AAC5D,CAAC;AAED,SAAS,sBAAsB,CAAC,IAAY;IACxC,OAAO,CACH,CAAC,IAAI,IAAI,SAAS,CAAC,OAAO,IAAI,IAAI,IAAI,SAAS,CAAC,OAAO,CAAC;QACxD,CAAC,IAAI,IAAI,SAAS,CAAC,OAAO,IAAI,IAAI,IAAI,SAAS,CAAC,OAAO,CAAC,CAC3D,CAAC;AACN,CAAC;AAED,SAAS,mBAAmB,CAAC,IAAY;IACrC,OAAO,CACH,CAAC,IAAI,IAAI,SAAS,CAAC,OAAO,IAAI,IAAI,IAAI,SAAS,CAAC,OAAO,CAAC;QACxD,CAAC,IAAI,IAAI,SAAS,CAAC,OAAO,IAAI,IAAI,IAAI,SAAS,CAAC,OAAO,CAAC;QACxD,QAAQ,CAAC,IAAI,CAAC,CACjB,CAAC;AACN,CAAC;AAED;;;;;;GAMG;AACH,SAAS,6BAA6B,CAAC,IAAY;IAC/C,OAAO,IAAI,KAAK,SAAS,CAAC,MAAM,IAAI,mBAAmB,CAAC,IAAI,CAAC,CAAC;AAClE,CAAC;AAED,IAAW,kBAMV;AAND,WAAW,kBAAkB;IACzB,yEAAW,CAAA;IACX,2EAAY,CAAA;IACZ,+EAAc,CAAA;IACd,uEAAU,CAAA;IACV,yEAAW,CAAA;AACf,CAAC,EANU,kBAAkB,KAAlB,kBAAkB,QAM5B;AAED;;GAEG;AACH,MAAM,CAAN,IAAY,YAOX;AAPD,WAAY,YAAY;IACpB,8DAA8D;IAC9D,mDAAU,CAAA;IACV,uDAAuD;IACvD,mDAAU,CAAA;IACV,oEAAoE;IACpE,yDAAa,CAAA;AACjB,CAAC,EAPW,YAAY,KAAZ,YAAY,QAOvB;AAaD;;GAEG;AACH,MAAM,OAAO,aAAa;IAID;IASA;IAEA;IAdrB;IACI,wCAAwC;IACxC,4EAA4E;IAC3D,UAAuB;IACxC;;;;;;;OAOG;IACc,aAAqD;IACtE,gDAAgD;IAC/B,MAAwC;QAXxC,eAAU,GAAV,UAAU,CAAa;QASvB,kBAAa,GAAb,aAAa,CAAwC;QAErD,WAAM,GAAN,MAAM,CAAkC;IAC1D,CAAC;IAEJ,wCAAwC;IAChC,KAAK,GAAG,kBAAkB,CAAC,WAAW,CAAC;IAC/C,6DAA6D;IACrD,QAAQ,GAAG,CAAC,CAAC;IACrB;;;;;OAKG;IACK,MAAM,GAAG,CAAC,CAAC;IAEnB,4CAA4C;IACpC,SAAS,GAAG,CAAC,CAAC;IACtB,6DAA6D;IACrD,MAAM,GAAG,CAAC,CAAC;IACnB,kDAAkD;IAC1C,UAAU,GAAG,YAAY,CAAC,MAAM,CAAC;IACzC,2EAA2E;IACnE,WAAW,GAAG,CAAC,CAAC;IAExB;;;OAGG;IACH,WAAW,CAAC,UAAwB;QAChC,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7B,IAAI,CAAC,KAAK,GAAG,kBAAkB,CAAC,WAAW,CAAC;QAC5C,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;QAChB,IAAI,CAAC,SAAS,GAAG,CAAC,CAAC;QACnB,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;QAChB,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;QAClB,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC;IACzB,CAAC;IAED;;;;;;;;;OASG;IACH,KAAK,CAAC,KAAa,EAAE,MAAc;QAC/B,QAAQ,IAAI,CAAC,KAAK,EAAE,CAAC;YACjB,KAAK,kBAAkB,CAAC,WAAW,CAAC,CAAC,CAAC;gBAClC,IAAI,KAAK,CAAC,UAAU,CAAC,MAAM,CAAC,KAAK,SAAS,CAAC,GAAG,EAAE,CAAC;oBAC7C,IAAI,CAAC,KAAK,GAAG,kBAAkB,CAAC,YAAY,CAAC;oBAC7C,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;oBACnB,OAAO,IAAI,CAAC,iBAAiB,CAAC,KAAK,EAAE,MAAM,GAAG,CAAC,CAAC,CAAC;gBACrD,CAAC;gBACD,IAAI,CAAC,KAAK,GAAG,kBAAkB,CAAC,WAAW,CAAC;gBAC5C,OAAO,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;YAChD,CAAC;YAED,KAAK,kBAAkB,CAAC,YAAY,CAAC,CAAC,CAAC;gBACnC,OAAO,IAAI,CAAC,iBAAiB,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;YACjD,CAAC;YAED,KAAK,kBAAkB,CAAC,cAAc,CAAC,CAAC,CAAC;gBACrC,OAAO,IAAI,CAAC,mBAAmB,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;YACnD,CAAC;YAED,KAAK,kBAAkB,CAAC,UAAU,CAAC,CAAC,CAAC;gBACjC,OAAO,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;YAC/C,CAAC;YAED,KAAK,kBAAkB,CAAC,WAAW,CAAC,CAAC,CAAC;gBAClC,OAAO,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;YAChD,CAAC;QACL,CAAC;IACL,CAAC;IAED;;;;;;;OAOG;IACK,iBAAiB,CAAC,KAAa,EAAE,MAAc;QACnD,IAAI,MAAM,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;YACzB,OAAO,CAAC,CAAC,CAAC;QACd,CAAC;QAED,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,MAAM,CAAC,GAAG,YAAY,CAAC,KAAK,SAAS,CAAC,OAAO,EAAE,CAAC;YAClE,IAAI,CAAC,KAAK,GAAG,kBAAkB,CAAC,UAAU,CAAC;YAC3C,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;YACnB,OAAO,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,CAAC,CAAC,CAAC;QACnD,CAAC;QAED,IAAI,CAAC,KAAK,GAAG,kBAAkB,CAAC,cAAc,CAAC;QAC/C,OAAO,IAAI,CAAC,mBAAmB,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;IACnD,CAAC;IAED;;;;;;;OAOG;IACK,eAAe,CAAC,KAAa,EAAE,MAAc;QACjD,OAAO,MAAM,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;YAC3B,MAAM,IAAI,GAAG,KAAK,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;YACtC,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,sBAAsB,CAAC,IAAI,CAAC,EAAE,CAAC;gBACjD,oDAAoD;gBACpD,MAAM,KAAK,GACP,IAAI,IAAI,SAAS,CAAC,IAAI;oBAClB,CAAC,CAAC,IAAI,GAAG,SAAS,CAAC,IAAI;oBACvB,CAAC,CAAC,CAAC,IAAI,GAAG,YAAY,CAAC,GAAG,SAAS,CAAC,OAAO,GAAG,EAAE,CAAC;gBACzD,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,KAAK,CAAC;gBACvC,IAAI,CAAC,QAAQ,EAAE,CAAC;gBAChB,MAAM,EAAE,CAAC;YACb,CAAC;iBAAM,CAAC;gBACJ,OAAO,IAAI,CAAC,iBAAiB,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;YAC3C,CAAC;QACL,CAAC;QACD,OAAO,CAAC,CAAC,CAAC,CAAC,oBAAoB;IACnC,CAAC;IAED;;;;;;;OAOG;IACK,mBAAmB,CAAC,KAAa,EAAE,MAAc;QACrD,OAAO,MAAM,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;YAC3B,MAAM,IAAI,GAAG,KAAK,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;YACtC,IAAI,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;gBACjB,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,CAAC,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;gBACzD,IAAI,CAAC,QAAQ,EAAE,CAAC;gBAChB,MAAM,EAAE,CAAC;YACb,CAAC;iBAAM,CAAC;gBACJ,OAAO,IAAI,CAAC,iBAAiB,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;YAC3C,CAAC;QACL,CAAC;QACD,OAAO,CAAC,CAAC,CAAC,CAAC,oBAAoB;IACnC,CAAC;IAED;;;;;;;;;;;OAWG;IACK,iBAAiB,CAAC,MAAc,EAAE,cAAsB;QAC5D,yCAAyC;QACzC,IAAI,IAAI,CAAC,QAAQ,IAAI,cAAc,EAAE,CAAC;YAClC,IAAI,CAAC,MAAM,EAAE,0CAA0C,CACnD,IAAI,CAAC,QAAQ,CAChB,CAAC;YACF,OAAO,CAAC,CAAC;QACb,CAAC;QAED,kDAAkD;QAClD,IAAI,MAAM,KAAK,SAAS,CAAC,IAAI,EAAE,CAAC;YAC5B,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;QACvB,CAAC;aAAM,IAAI,IAAI,CAAC,UAAU,KAAK,YAAY,CAAC,MAAM,EAAE,CAAC;YACjD,OAAO,CAAC,CAAC;QACb,CAAC;QAED,IAAI,CAAC,aAAa,CAAC,gBAAgB,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;QAEjE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YACd,IAAI,MAAM,KAAK,SAAS,CAAC,IAAI,EAAE,CAAC;gBAC5B,IAAI,CAAC,MAAM,CAAC,uCAAuC,EAAE,CAAC;YAC1D,CAAC;YAED,IAAI,CAAC,MAAM,CAAC,iCAAiC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC/D,CAAC;QAED,OAAO,IAAI,CAAC,QAAQ,CAAC;IACzB,CAAC;IAED;;;;;;;OAOG;IACK,gBAAgB,CAAC,KAAa,EAAE,MAAc;QAClD,MAAM,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC;QAC5B,IAAI,OAAO,GAAG,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACzC,8EAA8E;QAC9E,IAAI,WAAW,GAAG,CAAC,OAAO,GAAG,YAAY,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC;QAE9D,OAAO,MAAM,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;YAC3B,qFAAqF;YACrF,IAAI,WAAW,KAAK,CAAC,IAAI,CAAC,OAAO,GAAG,YAAY,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC7D,MAAM,SAAS,GACX,CAAC,OAAO,GAAG,YAAY,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,WAAW;gBAE5D,kDAAkD;gBAClD,IAAI,IAAI,CAAC,WAAW,KAAK,CAAC,EAAE,CAAC;oBACzB,MAAM,SAAS,GAAG,OAAO,GAAG,YAAY,CAAC,UAAU,CAAC;oBACpD,IAAI,KAAK,CAAC,UAAU,CAAC,MAAM,CAAC,KAAK,SAAS,EAAE,CAAC;wBACzC,OAAO,IAAI,CAAC,MAAM,KAAK,CAAC;4BACpB,CAAC,CAAC,CAAC;4BACH,CAAC,CAAC,IAAI,CAAC,4BAA4B,EAAE,CAAC;oBAC9C,CAAC;oBACD,MAAM,EAAE,CAAC;oBACT,IAAI,CAAC,MAAM,EAAE,CAAC;oBACd,IAAI,CAAC,WAAW,EAAE,CAAC;gBACvB,CAAC;gBAED,yCAAyC;gBACzC,OAAO,IAAI,CAAC,WAAW,GAAG,SAAS,EAAE,CAAC;oBAClC,IAAI,MAAM,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;wBACzB,OAAO,CAAC,CAAC,CAAC;oBACd,CAAC;oBAED,MAAM,iBAAiB,GAAG,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC;oBAC/C,MAAM,UAAU,GACZ,UAAU,CACN,IAAI,CAAC,SAAS,GAAG,CAAC,GAAG,CAAC,iBAAiB,IAAI,CAAC,CAAC,CAChD,CAAC;oBACN,MAAM,YAAY,GACd,iBAAiB,GAAG,CAAC,KAAK,CAAC;wBACvB,CAAC,CAAC,UAAU,GAAG,IAAI;wBACnB,CAAC,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC;oBAEnC,IAAI,KAAK,CAAC,UAAU,CAAC,MAAM,CAAC,KAAK,YAAY,EAAE,CAAC;wBAC5C,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC;wBACrB,OAAO,IAAI,CAAC,MAAM,KAAK,CAAC;4BACpB,CAAC,CAAC,CAAC;4BACH,CAAC,CAAC,IAAI,CAAC,4BAA4B,EAAE,CAAC;oBAC9C,CAAC;oBACD,MAAM,EAAE,CAAC;oBACT,IAAI,CAAC,MAAM,EAAE,CAAC;oBACd,IAAI,CAAC,WAAW,EAAE,CAAC;gBACvB,CAAC;gBAED,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC;gBACrB,IAAI,CAAC,SAAS,IAAI,CAAC,GAAG,CAAC,SAAS,IAAI,CAAC,CAAC,CAAC;gBACvC,OAAO,GAAG,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBACrC,WAAW,GAAG,CAAC,OAAO,GAAG,YAAY,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC;YAC9D,CAAC;YAED,IAAI,MAAM,IAAI,KAAK,CAAC,MAAM;gBAAE,MAAM;YAElC,MAAM,IAAI,GAAG,KAAK,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;YAEtC;;;;;;eAMG;YACH,IACI,IAAI,KAAK,SAAS,CAAC,IAAI;gBACvB,WAAW,KAAK,CAAC;gBACjB,CAAC,OAAO,GAAG,YAAY,CAAC,MAAM,CAAC,KAAK,CAAC,EACvC,CAAC;gBACC,OAAO,IAAI,CAAC,mBAAmB,CAC3B,IAAI,CAAC,SAAS,EACd,WAAW,EACX,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,MAAM,CAC9B,CAAC;YACN,CAAC;YAED,IAAI,CAAC,SAAS,GAAG,eAAe,CAC5B,UAAU,EACV,OAAO,EACP,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,WAAW,CAAC,EACzC,IAAI,CACP,CAAC;YAEF,IAAI,IAAI,CAAC,SAAS,GAAG,CAAC,EAAE,CAAC;gBACrB,OAAO,IAAI,CAAC,MAAM,KAAK,CAAC;oBACpB,iCAAiC;oBACjC,CAAC,IAAI,CAAC,UAAU,KAAK,YAAY,CAAC,SAAS;wBACvC,8DAA8D;wBAC9D,CAAC,WAAW,KAAK,CAAC;4BACd,6CAA6C;4BAC7C,6BAA6B,CAAC,IAAI,CAAC,CAAC,CAAC;oBAC7C,CAAC,CAAC,CAAC;oBACH,CAAC,CAAC,IAAI,CAAC,4BAA4B,EAAE,CAAC;YAC9C,CAAC;YAED,OAAO,GAAG,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACrC,WAAW,GAAG,CAAC,OAAO,GAAG,YAAY,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC;YAE1D,kDAAkD;YAClD,IAAI,WAAW,KAAK,CAAC,EAAE,CAAC;gBACpB,2DAA2D;gBAC3D,IAAI,IAAI,KAAK,SAAS,CAAC,IAAI,EAAE,CAAC;oBAC1B,OAAO,IAAI,CAAC,mBAAmB,CAC3B,IAAI,CAAC,SAAS,EACd,WAAW,EACX,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,MAAM,CAC9B,CAAC;gBACN,CAAC;gBAED,2FAA2F;gBAC3F,IACI,IAAI,CAAC,UAAU,KAAK,YAAY,CAAC,MAAM;oBACvC,CAAC,OAAO,GAAG,YAAY,CAAC,MAAM,CAAC,KAAK,CAAC,EACvC,CAAC;oBACC,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC;oBAC7B,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,MAAM,CAAC;oBAC7B,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;gBACpB,CAAC;YACL,CAAC;YACD,+CAA+C;YAC/C,MAAM,EAAE,CAAC;YACT,IAAI,CAAC,MAAM,EAAE,CAAC;QAClB,CAAC;QAED,OAAO,CAAC,CAAC,CAAC;IACd,CAAC;IAED;;;OAGG;IACK,4BAA4B;QAChC,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC;QAEpC,MAAM,WAAW,GACb,CAAC,UAAU,CAAC,MAAM,CAAC,GAAG,YAAY,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC;QAE3D,IAAI,CAAC,mBAAmB,CAAC,MAAM,EAAE,WAAW,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC7D,IAAI,CAAC,MAAM,EAAE,uCAAuC,EAAE,CAAC;QAEvD,OAAO,IAAI,CAAC,QAAQ,CAAC;IACzB,CAAC;IAED;;;;;;OAMG;IACK,mBAAmB,CACvB,MAAc,EACd,WAAmB,EACnB,QAAgB;QAEhB,MAAM,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC;QAE5B,IAAI,CAAC,aAAa,CACd,WAAW,KAAK,CAAC;YACb,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC;gBACd,CAAC,CAAC,YAAY,CAAC,YAAY,GAAG,YAAY,CAAC,MAAM,CAAC;YACxD,CAAC,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,EAC5B,QAAQ,CACX,CAAC;QACF,IAAI,WAAW,KAAK,CAAC,EAAE,CAAC;YACpB,0DAA0D;YAC1D,IAAI,CAAC,aAAa,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;QACzD,CAAC;QAED,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED;;;;;OAKG;IACH,GAAG;QACC,QAAQ,IAAI,CAAC,KAAK,EAAE,CAAC;YACjB,KAAK,kBAAkB,CAAC,WAAW,CAAC,CAAC,CAAC;gBAClC,sCAAsC;gBACtC,OAAO,IAAI,CAAC,MAAM,KAAK,CAAC;oBACpB,CAAC,IAAI,CAAC,UAAU,KAAK,YAAY,CAAC,SAAS;wBACvC,IAAI,CAAC,MAAM,KAAK,IAAI,CAAC,SAAS,CAAC;oBACnC,CAAC,CAAC,IAAI,CAAC,4BAA4B,EAAE;oBACrC,CAAC,CAAC,CAAC,CAAC;YACZ,CAAC;YACD,mDAAmD;YACnD,KAAK,kBAAkB,CAAC,cAAc,CAAC,CAAC,CAAC;gBACrC,OAAO,IAAI,CAAC,iBAAiB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YACxC,CAAC;YACD,KAAK,kBAAkB,CAAC,UAAU,CAAC,CAAC,CAAC;gBACjC,OAAO,IAAI,CAAC,iBAAiB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YACxC,CAAC;YACD,KAAK,kBAAkB,CAAC,YAAY,CAAC,CAAC,CAAC;gBACnC,IAAI,CAAC,MAAM,EAAE,0CAA0C,CACnD,IAAI,CAAC,QAAQ,CAChB,CAAC;gBACF,OAAO,CAAC,CAAC;YACb,CAAC;YACD,KAAK,kBAAkB,CAAC,WAAW,CAAC,CAAC,CAAC;gBAClC,iCAAiC;gBACjC,OAAO,CAAC,CAAC;YACb,CAAC;QACL,CAAC;IACL,CAAC;CACJ;AAED;;;;GAIG;AACH,SAAS,UAAU,CAAC,UAAuB;IACvC,IAAI,WAAW,GAAG,EAAE,CAAC;IACrB,MAAM,OAAO,GAAG,IAAI,aAAa,CAC7B,UAAU,EACV,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,WAAW,IAAI,MAAM,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CACxD,CAAC;IAEF,OAAO,SAAS,cAAc,CAC1B,KAAa,EACb,UAAwB;QAExB,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,IAAI,MAAM,GAAG,CAAC,CAAC;QAEf,OAAO,CAAC,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC;YAChD,WAAW,IAAI,KAAK,CAAC,KAAK,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;YAE9C,OAAO,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC;YAEhC,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CACxB,KAAK;YACL,eAAe;YACf,MAAM,GAAG,CAAC,CACb,CAAC;YAEF,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;gBACb,SAAS,GAAG,MAAM,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;gBACnC,MAAM;YACV,CAAC;YAED,SAAS,GAAG,MAAM,GAAG,MAAM,CAAC;YAC5B,uDAAuD;YACvD,MAAM,GAAG,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QACtD,CAAC;QAED,MAAM,MAAM,GAAG,WAAW,GAAG,KAAK,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;QAEpD,2DAA2D;QAC3D,WAAW,GAAG,EAAE,CAAC;QAEjB,OAAO,MAAM,CAAC;IAClB,CAAC,CAAC;AACN,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,eAAe,CAC3B,UAAuB,EACvB,OAAe,EACf,SAAiB,EACjB,IAAY;IAEZ,MAAM,WAAW,GAAG,CAAC,OAAO,GAAG,YAAY,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;IAChE,MAAM,UAAU,GAAG,OAAO,GAAG,YAAY,CAAC,UAAU,CAAC;IAErD,+CAA+C;IAC/C,IAAI,WAAW,KAAK,CAAC,EAAE,CAAC;QACpB,OAAO,UAAU,KAAK,CAAC,IAAI,IAAI,KAAK,UAAU,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACpE,CAAC;IAED,kDAAkD;IAClD,IAAI,UAAU,EAAE,CAAC;QACb,MAAM,KAAK,GAAG,IAAI,GAAG,UAAU,CAAC;QAEhC,OAAO,KAAK,GAAG,CAAC,IAAI,KAAK,IAAI,WAAW;YACpC,CAAC,CAAC,CAAC,CAAC;YACJ,CAAC,CAAC,UAAU,CAAC,SAAS,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC;IAC5C,CAAC;IAED,+EAA+E;IAC/E,MAAM,cAAc,GAAG,CAAC,WAAW,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;IAE9C;;;OAGG;IACH,IAAI,EAAE,GAAG,CAAC,CAAC;IACX,IAAI,EAAE,GAAG,WAAW,GAAG,CAAC,CAAC;IAEzB,OAAO,EAAE,IAAI,EAAE,EAAE,CAAC;QACd,MAAM,GAAG,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC;QAC5B,MAAM,IAAI,GAAG,GAAG,IAAI,CAAC,CAAC;QACtB,MAAM,MAAM,GAAG,UAAU,CAAC,SAAS,GAAG,IAAI,CAAC,CAAC;QAC5C,MAAM,MAAM,GAAG,CAAC,MAAM,IAAI,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC;QAElD,IAAI,MAAM,GAAG,IAAI,EAAE,CAAC;YAChB,EAAE,GAAG,GAAG,GAAG,CAAC,CAAC;QACjB,CAAC;aAAM,IAAI,MAAM,GAAG,IAAI,EAAE,CAAC;YACvB,EAAE,GAAG,GAAG,GAAG,CAAC,CAAC;QACjB,CAAC;aAAM,CAAC;YACJ,OAAO,UAAU,CAAC,SAAS,GAAG,cAAc,GAAG,GAAG,CAAC,CAAC;QACxD,CAAC;IACL,CAAC;IAED,OAAO,CAAC,CAAC,CAAC;AACd,CAAC;AAED,MAAM,WAAW,GAAG,eAAe,CAAC,UAAU,CAAC,cAAc,CAAC,CAAC;AAC/D,MAAM,UAAU,GAAG,eAAe,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC;AAE7D;;;;;GAKG;AACH,MAAM,UAAU,UAAU,CACtB,UAAkB,EAClB,OAAqB,YAAY,CAAC,MAAM;IAExC,OAAO,WAAW,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;AACzC,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,mBAAmB,CAAC,aAAqB;IACrD,OAAO,WAAW,CAAC,aAAa,EAAE,YAAY,CAAC,SAAS,CAAC,CAAC;AAC9D,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,gBAAgB,CAAC,UAAkB;IAC/C,OAAO,WAAW,CAAC,UAAU,EAAE,YAAY,CAAC,MAAM,CAAC,CAAC;AACxD,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,SAAS,CAAC,SAAiB;IACvC,OAAO,UAAU,CAAC,SAAS,EAAE,YAAY,CAAC,MAAM,CAAC,CAAC;AACtD,CAAC;AAED,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AACzD,uCAAuC;AACvC,OAAO,EAAE,cAAc,EAAE,MAAM,iCAAiC,CAAC;AACjE,OAAO,EAAE,aAAa,EAAE,MAAM,gCAAgC,CAAC"}
@@ -8,6 +8,7 @@
8
8
  *
9
9
  * If a character has no equivalent entity, a numeric hexadecimal reference
10
10
  * (eg. `&#xfc;`) will be used.
11
+ * @param input Input string to encode or decode.
11
12
  */
12
13
  export declare function encodeHTML(input: string): string;
13
14
  /**
@@ -17,6 +18,7 @@ export declare function encodeHTML(input: string): string;
17
18
  *
18
19
  * If a character has no equivalent entity, a numeric hexadecimal reference
19
20
  * (eg. `&#xfc;`) will be used.
21
+ * @param input Input string to encode or decode.
20
22
  */
21
23
  export declare function encodeNonAsciiHTML(input: string): string;
22
24
  //# sourceMappingURL=encode.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"encode.d.ts","sourceRoot":"","sources":["../src/encode.ts"],"names":[],"mappings":"AAeA;;;;;;;;;;;GAWG;AACH,wBAAgB,UAAU,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAEhD;AACD;;;;;;;;GAQG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAExD"}
package/dist/encode.js ADDED
@@ -0,0 +1,90 @@
1
+ import { getCodePoint, XML_BITSET_VALUE } from "./escape.js";
2
+ import { htmlTrie } from "./generated/encode-html.js";
3
+ /**
4
+ * We store the characters to consider as a compact bitset for fast lookups.
5
+ */
6
+ const HTML_BITSET = /* #__PURE__ */ new Uint32Array([
7
+ 0x16_00, // Bits for 09,0A,0C
8
+ 0xfc_00_ff_fe, // 32..63 -> 21-2D (minus space), 2E,2F,3A-3F
9
+ 0xf8_00_00_01, // 64..95 -> 40, 5B-5F
10
+ 0x38_00_00_01, // 96..127-> 60, 7B-7D
11
+ ]);
12
+ const XML_BITSET = /* #__PURE__ */ new Uint32Array([0, XML_BITSET_VALUE, 0, 0]);
13
+ /**
14
+ * Encodes all characters in the input using HTML entities. This includes
15
+ * characters that are valid ASCII characters in HTML documents, such as `#`.
16
+ *
17
+ * To get a more compact output, consider using the `encodeNonAsciiHTML`
18
+ * function, which will only encode characters that are not valid in HTML
19
+ * documents, as well as non-ASCII characters.
20
+ *
21
+ * If a character has no equivalent entity, a numeric hexadecimal reference
22
+ * (eg. `&#xfc;`) will be used.
23
+ * @param input Input string to encode or decode.
24
+ */
25
+ export function encodeHTML(input) {
26
+ return encodeHTMLTrieRe(HTML_BITSET, input);
27
+ }
28
+ /**
29
+ * Encodes all non-ASCII characters, as well as characters not valid in HTML
30
+ * documents using HTML entities. This function will not encode characters that
31
+ * are valid in HTML documents, such as `#`.
32
+ *
33
+ * If a character has no equivalent entity, a numeric hexadecimal reference
34
+ * (eg. `&#xfc;`) will be used.
35
+ * @param input Input string to encode or decode.
36
+ */
37
+ export function encodeNonAsciiHTML(input) {
38
+ return encodeHTMLTrieRe(XML_BITSET, input);
39
+ }
40
+ function encodeHTMLTrieRe(bitset, input) {
41
+ let out;
42
+ let last = 0; // Start of the next untouched slice.
43
+ const { length } = input;
44
+ for (let index = 0; index < length; index++) {
45
+ const char = input.charCodeAt(index);
46
+ // Skip ASCII characters that don't need encoding
47
+ if (char < 0x80 && !((bitset[char >>> 5] >>> char) & 1)) {
48
+ continue;
49
+ }
50
+ if (out === undefined)
51
+ out = input.substring(0, index);
52
+ else if (last !== index)
53
+ out += input.substring(last, index);
54
+ let node = htmlTrie.get(char);
55
+ if (typeof node === "object") {
56
+ if (index + 1 < length) {
57
+ const nextChar = input.charCodeAt(index + 1);
58
+ const value = typeof node.next === "number"
59
+ ? node.next === nextChar
60
+ ? node.nextValue
61
+ : undefined
62
+ : node.next.get(nextChar);
63
+ if (value !== undefined) {
64
+ out += value;
65
+ index++;
66
+ last = index + 1;
67
+ continue;
68
+ }
69
+ }
70
+ node = node.value;
71
+ }
72
+ if (node === undefined) {
73
+ const cp = getCodePoint(input, index);
74
+ out += `&#x${cp.toString(16)};`;
75
+ if (cp !== char)
76
+ index++;
77
+ last = index + 1;
78
+ }
79
+ else {
80
+ out += node;
81
+ last = index + 1;
82
+ }
83
+ }
84
+ if (out === undefined)
85
+ return input;
86
+ if (last < length)
87
+ out += input.substr(last);
88
+ return out;
89
+ }
90
+ //# sourceMappingURL=encode.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"encode.js","sourceRoot":"","sources":["../src/encode.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAC7D,OAAO,EAAE,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAEtD;;GAEG;AACH,MAAM,WAAW,GAAG,eAAe,CAAC,IAAI,WAAW,CAAC;IAChD,OAAO,EAAE,oBAAoB;IAC7B,aAAa,EAAE,6CAA6C;IAC5D,aAAa,EAAE,sBAAsB;IACrC,aAAa,EAAE,sBAAsB;CACxC,CAAC,CAAC;AAEH,MAAM,UAAU,GAAG,eAAe,CAAC,IAAI,WAAW,CAAC,CAAC,CAAC,EAAE,gBAAgB,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;AAEhF;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,UAAU,CAAC,KAAa;IACpC,OAAO,gBAAgB,CAAC,WAAW,EAAE,KAAK,CAAC,CAAC;AAChD,CAAC;AACD;;;;;;;;GAQG;AACH,MAAM,UAAU,kBAAkB,CAAC,KAAa;IAC5C,OAAO,gBAAgB,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;AAC/C,CAAC;AAED,SAAS,gBAAgB,CAAC,MAAmB,EAAE,KAAa;IACxD,IAAI,GAAuB,CAAC;IAC5B,IAAI,IAAI,GAAG,CAAC,CAAC,CAAC,qCAAqC;IACnD,MAAM,EAAE,MAAM,EAAE,GAAG,KAAK,CAAC;IAEzB,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,MAAM,EAAE,KAAK,EAAE,EAAE,CAAC;QAC1C,MAAM,IAAI,GAAG,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QACrC,iDAAiD;QACjD,IAAI,IAAI,GAAG,IAAI,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,CAAC,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YACtD,SAAS;QACb,CAAC;QAED,IAAI,GAAG,KAAK,SAAS;YAAE,GAAG,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;aAClD,IAAI,IAAI,KAAK,KAAK;YAAE,GAAG,IAAI,KAAK,CAAC,SAAS,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;QAE7D,IAAI,IAAI,GAAG,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAE9B,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC3B,IAAI,KAAK,GAAG,CAAC,GAAG,MAAM,EAAE,CAAC;gBACrB,MAAM,QAAQ,GAAG,KAAK,CAAC,UAAU,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;gBAC7C,MAAM,KAAK,GACP,OAAO,IAAI,CAAC,IAAI,KAAK,QAAQ;oBACzB,CAAC,CAAC,IAAI,CAAC,IAAI,KAAK,QAAQ;wBACpB,CAAC,CAAC,IAAI,CAAC,SAAS;wBAChB,CAAC,CAAC,SAAS;oBACf,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;gBAElC,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;oBACtB,GAAG,IAAI,KAAK,CAAC;oBACb,KAAK,EAAE,CAAC;oBACR,IAAI,GAAG,KAAK,GAAG,CAAC,CAAC;oBACjB,SAAS;gBACb,CAAC;YACL,CAAC;YACD,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC;QACtB,CAAC;QAED,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YACrB,MAAM,EAAE,GAAG,YAAY,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;YACtC,GAAG,IAAI,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,GAAG,CAAC;YAChC,IAAI,EAAE,KAAK,IAAI;gBAAE,KAAK,EAAE,CAAC;YACzB,IAAI,GAAG,KAAK,GAAG,CAAC,CAAC;QACrB,CAAC;aAAM,CAAC;YACJ,GAAG,IAAI,IAAI,CAAC;YACZ,IAAI,GAAG,KAAK,GAAG,CAAC,CAAC;QACrB,CAAC;IACL,CAAC;IAED,IAAI,GAAG,KAAK,SAAS;QAAE,OAAO,KAAK,CAAC;IACpC,IAAI,IAAI,GAAG,MAAM;QAAE,GAAG,IAAI,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAC7C,OAAO,GAAG,CAAC;AACf,CAAC"}