entities 6.0.0 → 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/decode.d.ts +3 -0
  2. package/dist/commonjs/decode-codepoint.d.ts.map +1 -1
  3. package/dist/commonjs/decode-codepoint.js +2 -2
  4. package/dist/commonjs/decode-codepoint.js.map +1 -1
  5. package/dist/commonjs/decode.d.ts +1 -7
  6. package/dist/commonjs/decode.d.ts.map +1 -1
  7. package/dist/commonjs/decode.js +105 -48
  8. package/dist/commonjs/decode.js.map +1 -1
  9. package/dist/commonjs/encode.d.ts.map +1 -1
  10. package/dist/commonjs/encode.js +49 -30
  11. package/dist/commonjs/encode.js.map +1 -1
  12. package/dist/commonjs/escape.d.ts +7 -4
  13. package/dist/commonjs/escape.d.ts.map +1 -1
  14. package/dist/commonjs/escape.js +36 -19
  15. package/dist/commonjs/escape.js.map +1 -1
  16. package/dist/commonjs/generated/decode-data-html.d.ts.map +1 -1
  17. package/dist/commonjs/generated/decode-data-html.js +2 -5
  18. package/dist/commonjs/generated/decode-data-html.js.map +1 -1
  19. package/dist/commonjs/generated/decode-data-xml.d.ts.map +1 -1
  20. package/dist/commonjs/generated/decode-data-xml.js +2 -5
  21. package/dist/commonjs/generated/decode-data-xml.js.map +1 -1
  22. package/dist/commonjs/generated/encode-html.d.ts +1 -6
  23. package/dist/commonjs/generated/encode-html.d.ts.map +1 -1
  24. package/dist/commonjs/generated/encode-html.js +9 -8
  25. package/dist/commonjs/generated/encode-html.js.map +1 -1
  26. package/dist/commonjs/index.d.ts +3 -3
  27. package/dist/commonjs/index.d.ts.map +1 -1
  28. package/dist/commonjs/index.js +19 -19
  29. package/dist/commonjs/index.js.map +1 -1
  30. package/dist/commonjs/internal/bin-trie-flags.d.ts +17 -0
  31. package/dist/commonjs/internal/bin-trie-flags.d.ts.map +1 -0
  32. package/dist/commonjs/internal/bin-trie-flags.js +21 -0
  33. package/dist/commonjs/internal/bin-trie-flags.js.map +1 -0
  34. package/dist/commonjs/internal/decode-shared.d.ts +2 -0
  35. package/dist/commonjs/internal/decode-shared.d.ts.map +1 -0
  36. package/dist/commonjs/internal/decode-shared.js +31 -0
  37. package/dist/commonjs/internal/decode-shared.js.map +1 -0
  38. package/dist/commonjs/internal/encode-shared.d.ts +32 -0
  39. package/dist/commonjs/internal/encode-shared.d.ts.map +1 -0
  40. package/dist/commonjs/internal/encode-shared.js +94 -0
  41. package/dist/commonjs/internal/encode-shared.js.map +1 -0
  42. package/dist/esm/decode-codepoint.d.ts.map +1 -1
  43. package/dist/esm/decode-codepoint.js +2 -2
  44. package/dist/esm/decode-codepoint.js.map +1 -1
  45. package/dist/esm/decode.d.ts +1 -7
  46. package/dist/esm/decode.d.ts.map +1 -1
  47. package/dist/esm/decode.js +96 -39
  48. package/dist/esm/decode.js.map +1 -1
  49. package/dist/esm/encode.d.ts.map +1 -1
  50. package/dist/esm/encode.js +49 -30
  51. package/dist/esm/encode.js.map +1 -1
  52. package/dist/esm/escape.d.ts +7 -4
  53. package/dist/esm/escape.d.ts.map +1 -1
  54. package/dist/esm/escape.js +35 -18
  55. package/dist/esm/escape.js.map +1 -1
  56. package/dist/esm/generated/decode-data-html.d.ts.map +1 -1
  57. package/dist/esm/generated/decode-data-html.js +2 -5
  58. package/dist/esm/generated/decode-data-html.js.map +1 -1
  59. package/dist/esm/generated/decode-data-xml.d.ts.map +1 -1
  60. package/dist/esm/generated/decode-data-xml.js +2 -5
  61. package/dist/esm/generated/decode-data-xml.js.map +1 -1
  62. package/dist/esm/generated/encode-html.d.ts +1 -6
  63. package/dist/esm/generated/encode-html.d.ts.map +1 -1
  64. package/dist/esm/generated/encode-html.js +9 -8
  65. package/dist/esm/generated/encode-html.js.map +1 -1
  66. package/dist/esm/index.d.ts +3 -3
  67. package/dist/esm/index.d.ts.map +1 -1
  68. package/dist/esm/index.js +9 -9
  69. package/dist/esm/index.js.map +1 -1
  70. package/dist/esm/internal/bin-trie-flags.d.ts +17 -0
  71. package/dist/esm/internal/bin-trie-flags.d.ts.map +1 -0
  72. package/dist/esm/internal/bin-trie-flags.js +18 -0
  73. package/dist/esm/internal/bin-trie-flags.js.map +1 -0
  74. package/dist/esm/internal/decode-shared.d.ts +2 -0
  75. package/dist/esm/internal/decode-shared.d.ts.map +1 -0
  76. package/dist/esm/internal/decode-shared.js +28 -0
  77. package/dist/esm/internal/decode-shared.js.map +1 -0
  78. package/dist/esm/internal/encode-shared.d.ts +32 -0
  79. package/dist/esm/internal/encode-shared.d.ts.map +1 -0
  80. package/dist/esm/internal/encode-shared.js +91 -0
  81. package/dist/esm/internal/encode-shared.js.map +1 -0
  82. package/escape.d.ts +3 -0
  83. package/package.json +19 -22
  84. package/src/decode-codepoint.ts +2 -2
  85. package/src/decode.spec.ts +44 -1
  86. package/src/decode.ts +111 -55
  87. package/src/encode.spec.ts +1 -1
  88. package/src/encode.ts +47 -31
  89. package/src/escape.spec.ts +1 -1
  90. package/src/escape.ts +39 -26
  91. package/src/generated/decode-data-html.ts +3 -5
  92. package/src/generated/decode-data-xml.ts +3 -5
  93. package/src/generated/encode-html.ts +14 -14
  94. package/src/index.spec.ts +2 -2
  95. package/src/index.ts +23 -24
  96. package/src/internal/bin-trie-flags.ts +16 -0
  97. package/src/internal/decode-shared.ts +30 -0
  98. package/src/internal/encode-shared.ts +121 -0
@@ -0,0 +1,121 @@
1
+ /**
2
+ * A node inside the encoding trie used by `encode.ts`.
3
+ *
4
+ * There are two physical shapes to minimize allocations and lookup cost:
5
+ *
6
+ * 1. Leaf node (string)
7
+ * - A plain string (already in the form `"&name;"`).
8
+ * - Represents a terminal match with no children.
9
+ *
10
+ * 2. Branch / value node (object)
11
+ */
12
+ export type EncodeTrieNode =
13
+ | string
14
+ | {
15
+ /**
16
+ * Entity value for the current code point sequence (wrapped: `&...;`).
17
+ * Present when the path to this node itself is a valid named entity.
18
+ */
19
+ value: string | undefined;
20
+ /** If a number, the next code unit of the only next character. */
21
+ next: number | Map<number, EncodeTrieNode>;
22
+ /** If next is a number, `nextValue` contains the entity value. */
23
+ nextValue?: string;
24
+ };
25
+
26
+ /**
27
+ * Parse a compact encode trie string into a Map structure used for encoding.
28
+ *
29
+ * Format per entry (ascending code points using delta encoding):
30
+ * <diffBase36>[&name;][{<children>}] -- diff omitted when 0
31
+ * Where diff = currentKey - previousKey - 1 (first entry stores absolute key).
32
+ * `&name;` is the entity value (already wrapped); a following `{` denotes children.
33
+ */
34
+ export function parseEncodeTrie(
35
+ serialized: string,
36
+ ): Map<number, EncodeTrieNode> {
37
+ const top = new Map<number, EncodeTrieNode>();
38
+ const totalLength = serialized.length;
39
+ let cursor = 0;
40
+ let lastTopKey = -1;
41
+
42
+ function readDiff(): number {
43
+ const start = cursor;
44
+ while (cursor < totalLength) {
45
+ const char = serialized.charAt(cursor);
46
+
47
+ if ((char < "0" || char > "9") && (char < "a" || char > "z")) {
48
+ break;
49
+ }
50
+ cursor++;
51
+ }
52
+ if (cursor === start) return 0;
53
+ return Number.parseInt(serialized.slice(start, cursor), 36);
54
+ }
55
+
56
+ function readEntity(): string {
57
+ if (serialized[cursor] !== "&") {
58
+ throw new Error(`Child entry missing value near index ${cursor}`);
59
+ }
60
+
61
+ // Cursor currently points at '&'
62
+ const start = cursor;
63
+ const end = serialized.indexOf(";", cursor + 1);
64
+ if (end === -1) {
65
+ throw new Error(`Unterminated entity starting at index ${start}`);
66
+ }
67
+ cursor = end + 1; // Move past ';'
68
+ return serialized.slice(start, cursor); // Includes & ... ;
69
+ }
70
+
71
+ while (cursor < totalLength) {
72
+ const keyDiff = readDiff();
73
+ const key = lastTopKey === -1 ? keyDiff : lastTopKey + keyDiff + 1;
74
+
75
+ let value: string | undefined;
76
+ if (serialized[cursor] === "&") value = readEntity();
77
+
78
+ if (serialized[cursor] === "{") {
79
+ cursor++; // Skip '{'
80
+ // Parse first child
81
+ let diff = readDiff();
82
+ let childKey = diff; // First key (lastChildKey = -1)
83
+ const firstValue = readEntity();
84
+ if (serialized[cursor] === "{") {
85
+ throw new Error("Unexpected nested '{' beyond depth 2");
86
+ }
87
+ // If end of block -> single child optimization
88
+ if (serialized[cursor] === "}") {
89
+ top.set(key, { value, next: childKey, nextValue: firstValue });
90
+ cursor++; // Skip '}'
91
+ } else {
92
+ const childMap = new Map<number, EncodeTrieNode>();
93
+ childMap.set(childKey, firstValue);
94
+ let lastChildKey = childKey;
95
+ while (cursor < totalLength && serialized[cursor] !== "}") {
96
+ diff = readDiff();
97
+ childKey = lastChildKey + diff + 1;
98
+ const childValue = readEntity();
99
+ if (serialized[cursor] === "{") {
100
+ throw new Error("Unexpected nested '{' beyond depth 2");
101
+ }
102
+ childMap.set(childKey, childValue);
103
+ lastChildKey = childKey;
104
+ }
105
+ if (serialized[cursor] !== "}") {
106
+ throw new Error("Unterminated child block");
107
+ }
108
+ cursor++; // Skip '}'
109
+ top.set(key, { value, next: childMap });
110
+ }
111
+ } else if (value === undefined) {
112
+ throw new Error(
113
+ `Malformed encode trie: missing value at index ${cursor}`,
114
+ );
115
+ } else {
116
+ top.set(key, value);
117
+ }
118
+ lastTopKey = key;
119
+ }
120
+ return top;
121
+ }