entities 7.0.0 → 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/dist/decode-codepoint.d.ts +8 -0
  2. package/dist/decode-codepoint.d.ts.map +1 -0
  3. package/dist/decode-codepoint.js +46 -0
  4. package/dist/decode-codepoint.js.map +1 -0
  5. package/dist/{commonjs/decode.d.ts → decode.d.ts} +11 -20
  6. package/dist/decode.d.ts.map +1 -0
  7. package/dist/{esm/decode.js → decode.js} +65 -75
  8. package/dist/decode.js.map +1 -0
  9. package/dist/{commonjs/encode.d.ts → encode.d.ts} +2 -0
  10. package/dist/encode.d.ts.map +1 -0
  11. package/dist/{esm/encode.js → encode.js} +6 -4
  12. package/dist/encode.js.map +1 -0
  13. package/dist/{esm/escape.d.ts → escape.d.ts} +6 -4
  14. package/dist/escape.d.ts.map +1 -0
  15. package/dist/{esm/escape.js → escape.js} +15 -17
  16. package/dist/escape.js.map +1 -0
  17. package/dist/generated/decode-data-html.d.ts +3 -0
  18. package/dist/generated/decode-data-html.d.ts.map +1 -0
  19. package/dist/{esm/generated → generated}/decode-data-html.js +1 -0
  20. package/dist/generated/decode-data-html.js.map +1 -0
  21. package/dist/generated/decode-data-xml.d.ts +3 -0
  22. package/dist/generated/decode-data-xml.d.ts.map +1 -0
  23. package/dist/{esm/generated → generated}/decode-data-xml.js +1 -0
  24. package/dist/generated/decode-data-xml.js.map +1 -0
  25. package/dist/generated/encode-html.d.ts +5 -0
  26. package/dist/generated/encode-html.d.ts.map +1 -0
  27. package/dist/{esm/generated → generated}/encode-html.js +2 -1
  28. package/dist/generated/encode-html.js.map +1 -0
  29. package/dist/{commonjs/index.d.ts → index.d.ts} +9 -16
  30. package/dist/index.d.ts.map +1 -0
  31. package/dist/{esm/index.js → index.js} +7 -23
  32. package/dist/index.js.map +1 -0
  33. package/dist/internal/bin-trie-flags.d.ts.map +1 -0
  34. package/dist/internal/bin-trie-flags.js.map +1 -0
  35. package/dist/internal/decode-shared.d.ts +7 -0
  36. package/dist/internal/decode-shared.d.ts.map +1 -0
  37. package/dist/internal/decode-shared.js +17 -0
  38. package/dist/internal/decode-shared.js.map +1 -0
  39. package/dist/{commonjs/internal → internal}/encode-shared.d.ts +1 -0
  40. package/dist/internal/encode-shared.d.ts.map +1 -0
  41. package/dist/{esm/internal → internal}/encode-shared.js +4 -2
  42. package/dist/internal/encode-shared.js.map +1 -0
  43. package/package.json +33 -63
  44. package/readme.md +36 -27
  45. package/src/decode-codepoint.ts +1 -32
  46. package/src/decode.ts +49 -54
  47. package/src/encode.ts +2 -0
  48. package/src/escape.ts +11 -12
  49. package/src/generated/decode-data-html.ts +1 -0
  50. package/src/generated/decode-data-xml.ts +1 -0
  51. package/src/generated/encode-html.ts +2 -1
  52. package/src/index.ts +8 -33
  53. package/src/internal/decode-shared.ts +3 -15
  54. package/src/internal/encode-shared.ts +4 -2
  55. package/decode.d.ts +0 -3
  56. package/decode.js +0 -3
  57. package/dist/commonjs/decode-codepoint.d.ts +0 -19
  58. package/dist/commonjs/decode-codepoint.d.ts.map +0 -1
  59. package/dist/commonjs/decode-codepoint.js +0 -77
  60. package/dist/commonjs/decode-codepoint.js.map +0 -1
  61. package/dist/commonjs/decode.d.ts.map +0 -1
  62. package/dist/commonjs/decode.js +0 -568
  63. package/dist/commonjs/decode.js.map +0 -1
  64. package/dist/commonjs/encode.d.ts.map +0 -1
  65. package/dist/commonjs/encode.js +0 -92
  66. package/dist/commonjs/encode.js.map +0 -1
  67. package/dist/commonjs/escape.d.ts +0 -46
  68. package/dist/commonjs/escape.d.ts.map +0 -1
  69. package/dist/commonjs/escape.js +0 -138
  70. package/dist/commonjs/escape.js.map +0 -1
  71. package/dist/commonjs/generated/decode-data-html.d.ts +0 -2
  72. package/dist/commonjs/generated/decode-data-html.d.ts.map +0 -1
  73. package/dist/commonjs/generated/decode-data-html.js +0 -7
  74. package/dist/commonjs/generated/decode-data-html.js.map +0 -1
  75. package/dist/commonjs/generated/decode-data-xml.d.ts +0 -2
  76. package/dist/commonjs/generated/decode-data-xml.d.ts.map +0 -1
  77. package/dist/commonjs/generated/decode-data-xml.js +0 -7
  78. package/dist/commonjs/generated/decode-data-xml.js.map +0 -1
  79. package/dist/commonjs/generated/encode-html.d.ts +0 -3
  80. package/dist/commonjs/generated/encode-html.d.ts.map +0 -1
  81. package/dist/commonjs/generated/encode-html.js +0 -14
  82. package/dist/commonjs/generated/encode-html.js.map +0 -1
  83. package/dist/commonjs/index.d.ts.map +0 -1
  84. package/dist/commonjs/index.js +0 -131
  85. package/dist/commonjs/index.js.map +0 -1
  86. package/dist/commonjs/internal/bin-trie-flags.d.ts.map +0 -1
  87. package/dist/commonjs/internal/bin-trie-flags.js +0 -21
  88. package/dist/commonjs/internal/bin-trie-flags.js.map +0 -1
  89. package/dist/commonjs/internal/decode-shared.d.ts +0 -2
  90. package/dist/commonjs/internal/decode-shared.d.ts.map +0 -1
  91. package/dist/commonjs/internal/decode-shared.js +0 -31
  92. package/dist/commonjs/internal/decode-shared.js.map +0 -1
  93. package/dist/commonjs/internal/encode-shared.d.ts.map +0 -1
  94. package/dist/commonjs/internal/encode-shared.js +0 -94
  95. package/dist/commonjs/internal/encode-shared.js.map +0 -1
  96. package/dist/commonjs/package.json +0 -3
  97. package/dist/esm/decode-codepoint.d.ts +0 -19
  98. package/dist/esm/decode-codepoint.d.ts.map +0 -1
  99. package/dist/esm/decode-codepoint.js +0 -72
  100. package/dist/esm/decode-codepoint.js.map +0 -1
  101. package/dist/esm/decode.d.ts +0 -203
  102. package/dist/esm/decode.d.ts.map +0 -1
  103. package/dist/esm/decode.js.map +0 -1
  104. package/dist/esm/encode.d.ts +0 -22
  105. package/dist/esm/encode.d.ts.map +0 -1
  106. package/dist/esm/encode.js.map +0 -1
  107. package/dist/esm/escape.d.ts.map +0 -1
  108. package/dist/esm/escape.js.map +0 -1
  109. package/dist/esm/generated/decode-data-html.d.ts +0 -2
  110. package/dist/esm/generated/decode-data-html.d.ts.map +0 -1
  111. package/dist/esm/generated/decode-data-html.js.map +0 -1
  112. package/dist/esm/generated/decode-data-xml.d.ts +0 -2
  113. package/dist/esm/generated/decode-data-xml.d.ts.map +0 -1
  114. package/dist/esm/generated/decode-data-xml.js.map +0 -1
  115. package/dist/esm/generated/encode-html.d.ts +0 -3
  116. package/dist/esm/generated/encode-html.d.ts.map +0 -1
  117. package/dist/esm/generated/encode-html.js.map +0 -1
  118. package/dist/esm/index.d.ts +0 -96
  119. package/dist/esm/index.d.ts.map +0 -1
  120. package/dist/esm/index.js.map +0 -1
  121. package/dist/esm/internal/bin-trie-flags.d.ts +0 -17
  122. package/dist/esm/internal/bin-trie-flags.d.ts.map +0 -1
  123. package/dist/esm/internal/bin-trie-flags.js.map +0 -1
  124. package/dist/esm/internal/decode-shared.d.ts +0 -2
  125. package/dist/esm/internal/decode-shared.d.ts.map +0 -1
  126. package/dist/esm/internal/decode-shared.js +0 -28
  127. package/dist/esm/internal/decode-shared.js.map +0 -1
  128. package/dist/esm/internal/encode-shared.d.ts +0 -32
  129. package/dist/esm/internal/encode-shared.d.ts.map +0 -1
  130. package/dist/esm/internal/encode-shared.js.map +0 -1
  131. package/dist/esm/package.json +0 -3
  132. package/escape.d.ts +0 -3
  133. package/escape.js +0 -3
  134. package/src/decode.spec.ts +0 -363
  135. package/src/encode.spec.ts +0 -78
  136. package/src/escape.spec.ts +0 -14
  137. package/src/generated/.eslintrc.json +0 -10
  138. package/src/index.spec.ts +0 -125
  139. /package/dist/{commonjs/internal → internal}/bin-trie-flags.d.ts +0 -0
  140. /package/dist/{esm/internal → internal}/bin-trie-flags.js +0 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "entities",
3
- "version": "7.0.0",
3
+ "version": "8.0.0",
4
4
  "description": "Encode & decode XML and HTML entities with ease & speed",
5
5
  "keywords": [
6
6
  "html entities",
@@ -22,92 +22,62 @@
22
22
  "type": "module",
23
23
  "exports": {
24
24
  ".": {
25
- "import": {
26
- "types": "./dist/esm/index.d.ts",
27
- "default": "./dist/esm/index.js"
28
- },
29
- "require": {
30
- "types": "./dist/commonjs/index.d.ts",
31
- "default": "./dist/commonjs/index.js"
32
- }
25
+ "types": "./dist/index.d.ts",
26
+ "default": "./dist/index.js"
33
27
  },
34
28
  "./decode": {
35
- "import": {
36
- "types": "./dist/esm/decode.d.ts",
37
- "default": "./dist/esm/decode.js"
38
- },
39
- "require": {
40
- "types": "./dist/commonjs/decode.d.ts",
41
- "default": "./dist/commonjs/decode.js"
42
- }
29
+ "types": "./dist/decode.d.ts",
30
+ "default": "./dist/decode.js"
43
31
  },
44
32
  "./escape": {
45
- "import": {
46
- "types": "./dist/esm/escape.d.ts",
47
- "default": "./dist/esm/escape.js"
48
- },
49
- "require": {
50
- "types": "./dist/commonjs/escape.d.ts",
51
- "default": "./dist/commonjs/escape.js"
52
- }
33
+ "types": "./dist/escape.d.ts",
34
+ "default": "./dist/escape.js"
53
35
  }
54
36
  },
55
- "main": "./dist/commonjs/index.js",
56
- "module": "./dist/esm/index.js",
57
- "types": "./dist/commonjs/index.d.ts",
37
+ "main": "./dist/index.js",
38
+ "types": "./dist/index.d.ts",
58
39
  "files": [
59
- "decode.js",
60
- "decode.d.ts",
61
- "escape.js",
62
- "escape.d.ts",
63
40
  "dist",
64
- "src"
41
+ "src",
42
+ "!**/*.spec.ts"
65
43
  ],
66
44
  "scripts": {
45
+ "benchmark": "node --import=tsx scripts/benchmark.ts",
46
+ "build": "tsc",
67
47
  "build:docs": "typedoc --hideGenerator src/index.ts",
68
48
  "build:encode-trie": "node --import=tsx scripts/write-encode-map.ts",
69
49
  "build:trie": "node --import=tsx scripts/write-decode-map.ts",
70
50
  "format": "npm run format:es && npm run format:biome",
71
- "format:es": "npm run lint:es -- --fix",
72
51
  "format:biome": "biome check --fix .",
52
+ "format:es": "npm run lint:es -- --fix",
73
53
  "lint": "npm run lint:es && npm run lint:ts && npm run lint:biome",
74
- "lint:es": "eslint . --ignore-path .gitignore",
75
54
  "lint:biome": "biome check .",
55
+ "lint:es": "eslint .",
76
56
  "lint:ts": "tsc --noEmit",
77
- "prepublishOnly": "tshy",
57
+ "prepublishOnly": "npm run build",
78
58
  "test": "npm run test:vi && npm run lint",
79
59
  "test:vi": "vitest run"
80
60
  },
81
61
  "devDependencies": {
82
- "@biomejs/biome": "^2.2.3",
83
- "@types/node": "^24.3.1",
84
- "@typescript-eslint/eslint-plugin": "^8.42.0",
85
- "@typescript-eslint/parser": "^8.33.1",
86
- "@vitest/coverage-v8": "^3.2.4",
87
- "eslint": "^8.57.1",
62
+ "@biomejs/biome": "^2.4.7",
63
+ "@eslint/compat": "^2.0.3",
64
+ "@feedic/eslint-config": "^0.3.1",
65
+ "@types/he": "^1.2.3",
66
+ "@types/node": "^25.5.0",
67
+ "eslint": "^10.0.3",
88
68
  "eslint-config-biome": "^2.1.3",
89
- "eslint-plugin-n": "^17.21.3",
90
- "eslint-plugin-unicorn": "^56.0.1",
91
- "tshy": "^3.0.2",
92
- "tsx": "^4.20.5",
93
- "typedoc": "^0.28.12",
94
- "typescript": "^5.9.2",
95
- "vitest": "^3.2.4"
69
+ "globals": "^17.4.0",
70
+ "he": "^1.2.0",
71
+ "html-entities": "^2.6.0",
72
+ "parse-entities": "^4.0.2",
73
+ "tinybench": "^6.0.0",
74
+ "tsx": "^4.21.0",
75
+ "typedoc": "^0.28.17",
76
+ "typescript": "^5.9.3",
77
+ "typescript-eslint": "^8.57.1",
78
+ "vitest": "^4.0.17"
96
79
  },
97
80
  "engines": {
98
- "node": ">=0.12"
99
- },
100
- "tshy": {
101
- "exclude": [
102
- "**/*.spec.ts",
103
- "**/__fixtures__/*",
104
- "**/__tests__/*",
105
- "**/__snapshots__/*"
106
- ],
107
- "exports": {
108
- ".": "./src/index.ts",
109
- "./decode": "./src/decode.ts",
110
- "./escape": "./src/escape.ts"
111
- }
81
+ "node": ">=20.19.0"
112
82
  }
113
83
  }
package/readme.md CHANGED
@@ -10,7 +10,7 @@ Encode & decode HTML & XML entities with ease & speed.
10
10
  [`commonmark`](https://github.com/commonmark/commonmark.js) use it to process
11
11
  HTML entities.
12
12
  - ⚡️ Fast: `entities` is the fastest library for decoding HTML entities (as of
13
- April 2022); see [performance](#performance).
13
+ September 2025); see [performance](#performance).
14
14
  - 🎛 Configurable: Get an output tailored for your needs. You are fine with
15
15
  UTF8? That'll save you some bytes. Prefer to only have ASCII characters? We
16
16
  can do that as well!
@@ -24,7 +24,7 @@ Encode & decode HTML & XML entities with ease & speed.
24
24
  ### …use `entities`
25
25
 
26
26
  ```javascript
27
- const entities = require("entities");
27
+ import * as entities from "entities";
28
28
 
29
29
  // Encoding
30
30
  entities.escapeUTF8("& ü"); // "& ü"
@@ -38,15 +38,36 @@ entities.decodeHTML("asdf & ÿ ü '"); // "asdf & ÿ ü '"
38
38
 
39
39
  ## Performance
40
40
 
41
- This is how `entities` compares to other libraries on a very basic benchmark
42
- (see `scripts/benchmark.ts`, for 10,000,000 iterations; **lower is better**):
41
+ Benchmarked in September 2025 with Node v24.6.0 on Apple M2 using `tinybench`.
42
+ Higher ops/s is better; `avg (μs)` is the mean time per operation.
43
+ See `scripts/benchmark.ts` to reproduce.
43
44
 
44
- | Library | Version | `decode` perf | `encode` perf | `escape` perf |
45
- | -------------- | ------- | ------------- | ------------- | ------------- |
46
- | entities | `3.0.1` | 1.418s | 6.786s | 2.196s |
47
- | html-entities | `2.3.2` | 2.530s | 6.829s | 2.415s |
48
- | he | `1.2.0` | 5.800s | 24.237s | 3.624s |
49
- | parse-entities | `3.0.0` | 9.660s | N/A | N/A |
45
+ ### Decoding
46
+
47
+ | Library | Version | ops/s | avg (μs) | ±% | slower |
48
+ | -------------- | ------- | --------- | -------- | ---- | ------ |
49
+ | entities | 7.0.0 | 5,838,416 | 175.57 | 0.06 | — |
50
+ | html-entities | 2.6.0 | 2,919,637 | 347.77 | 0.33 | 50.0% |
51
+ | he | 1.2.0 | 2,318,438 | 446.48 | 0.70 | 60.3% |
52
+ | parse-entities | 4.0.2 | 852,855 | 1,199.51 | 0.36 | 85.4% |
53
+
54
+ ### Encoding
55
+
56
+ | Library | Version | ops/s | avg (μs) | ±% | slower |
57
+ | -------------- | ------- | --------- | -------- | ---- | ------ |
58
+ | entities | 7.0.0 | 2,770,115 | 368.09 | 0.11 | — |
59
+ | html-entities | 2.6.0 | 1,491,963 | 679.96 | 0.58 | 46.2% |
60
+ | he | 1.2.0 | 481,278 | 2,118.25 | 0.61 | 82.6% |
61
+
62
+ ### Escaping
63
+
64
+ | Library | Version | ops/s | avg (μs) | ±% | slower |
65
+ | -------------- | ------- | --------- | -------- | ---- | ------ |
66
+ | entities | 7.0.0 | 4,616,468 | 223.84 | 0.17 | — |
67
+ | he | 1.2.0 | 3,659,301 | 280.76 | 0.58 | 20.7% |
68
+ | html-entities | 2.6.0 | 3,555,301 | 296.63 | 0.84 | 23.0% |
69
+
70
+ Note: Micro-benchmarks may vary across machines and Node versions.
50
71
 
51
72
  ---
52
73
 
@@ -68,8 +89,8 @@ This is helpful for decoding entities in legacy environments.
68
89
 
69
90
  > Why should I use `entities` instead of alternative modules?
70
91
 
71
- As of April 2022, `entities` is a bit faster than other modules. Still, this is
72
- not a very differentiated space and other modules can catch up.
92
+ As of September 2025, `entities` is faster than other modules. Still, this is
93
+ not a differentiated space and other modules can catch up.
73
94
 
74
95
  **More importantly**, you might already have `entities` in your dependency graph
75
96
  (as a dependency of eg. `cheerio`, or `htmlparser2`), and including it directly
@@ -78,10 +99,9 @@ libraries, so have a look through your `node_modules` directory!
78
99
 
79
100
  > Does `entities` support tree shaking?
80
101
 
81
- Yes! `entities` ships as both a CommonJS and a ES module. Note that for best
82
- results, you should not use the `encode` and `decode` functions, as they wrap
83
- around a number of other functions, all of which will remain in the bundle.
84
- Instead, use the functions that you need directly.
102
+ Yes! Note that for best results, you should not use the `encode` and `decode`
103
+ functions, as they wrap around a number of other functions, all of which will
104
+ remain in the bundle. Instead, use the functions that you need directly.
85
105
 
86
106
  ---
87
107
 
@@ -109,14 +129,3 @@ License: BSD-2-Clause
109
129
  To report a security vulnerability, please use the
110
130
  [Tidelift security contact](https://tidelift.com/security). Tidelift will
111
131
  coordinate the fix and disclosure.
112
-
113
- ## `entities` for enterprise
114
-
115
- Available as part of the Tidelift Subscription
116
-
117
- The maintainers of `entities` and thousands of other packages are working with
118
- Tidelift to deliver commercial support and maintenance for the open source
119
- dependencies you use to build your applications. Save time, reduce risk, and
120
- improve code health, while paying the maintainers of the exact dependencies you
121
- use.
122
- [Learn more.](https://tidelift.com/subscription/pkg/npm-entities?utm_source=npm-entities&utm_medium=referral&utm_campaign=enterprise&utm_term=repo)
@@ -32,31 +32,11 @@ const decodeMap = new Map([
32
32
  [159, 376],
33
33
  ]);
34
34
 
35
- /**
36
- * Polyfill for `String.fromCodePoint`. It is used to create a string from a Unicode code point.
37
- */
38
- export const fromCodePoint: (...codePoints: number[]) => string =
39
- // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, n/no-unsupported-features/es-builtins
40
- String.fromCodePoint ??
41
- ((codePoint: number): string => {
42
- let output = "";
43
-
44
- if (codePoint > 0xff_ff) {
45
- codePoint -= 0x1_00_00;
46
- output += String.fromCharCode(
47
- ((codePoint >>> 10) & 0x3_ff) | 0xd8_00,
48
- );
49
- codePoint = 0xdc_00 | (codePoint & 0x3_ff);
50
- }
51
-
52
- output += String.fromCharCode(codePoint);
53
- return output;
54
- });
55
-
56
35
  /**
57
36
  * Replace the given code point with a replacement character if it is a
58
37
  * surrogate or is outside the valid range. Otherwise return the code
59
38
  * point unchanged.
39
+ * @param codePoint Unicode code point to convert.
60
40
  */
61
41
  export function replaceCodePoint(codePoint: number): number {
62
42
  if (
@@ -68,14 +48,3 @@ export function replaceCodePoint(codePoint: number): number {
68
48
 
69
49
  return decodeMap.get(codePoint) ?? codePoint;
70
50
  }
71
-
72
- /**
73
- * Replace the code point if relevant, then convert it to a string.
74
- *
75
- * @deprecated Use `fromCodePoint(replaceCodePoint(codePoint))` instead.
76
- * @param codePoint The code point to decode.
77
- * @returns The decoded code point.
78
- */
79
- export function decodeCodePoint(codePoint: number): string {
80
- return fromCodePoint(replaceCodePoint(codePoint));
81
- }
package/src/decode.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { fromCodePoint, replaceCodePoint } from "./decode-codepoint.js";
1
+ import { replaceCodePoint } from "./decode-codepoint.js";
2
2
  import { htmlDecodeTree } from "./generated/decode-data-html.js";
3
3
  import { xmlDecodeTree } from "./generated/decode-data-xml.js";
4
4
  import { BinTrieFlags } from "./internal/bin-trie-flags.js";
@@ -45,6 +45,7 @@ function isAsciiAlphaNumeric(code: number): boolean {
45
45
  *
46
46
  * Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error.
47
47
  * See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
48
+ * @param code Code point to decode.
48
49
  */
49
50
  function isEntityInAttributeInvalidEnd(code: number): boolean {
50
51
  return code === CharCodes.EQUALS || isAsciiAlphaNumeric(code);
@@ -58,6 +59,9 @@ const enum EntityDecoderState {
58
59
  NamedEntity,
59
60
  }
60
61
 
62
+ /**
63
+ * Decoding mode for named entities.
64
+ */
61
65
  export enum DecodingMode {
62
66
  /** Entities in text nodes that can end with any character. */
63
67
  Legacy = 0,
@@ -91,7 +95,6 @@ export class EntityDecoder {
91
95
  *
92
96
  * For multi-byte named entities, this will be called multiple times,
93
97
  * with the second codepoint, and the same `consumed` value.
94
- *
95
98
  * @param codepoint The decoded codepoint.
96
99
  * @param consumed The number of bytes consumed by the decoder.
97
100
  */
@@ -118,8 +121,13 @@ export class EntityDecoder {
118
121
  private excess = 1;
119
122
  /** The mode in which the decoder is operating. */
120
123
  private decodeMode = DecodingMode.Strict;
124
+ /** The number of characters that have been consumed in the current run. */
125
+ private runConsumed = 0;
121
126
 
122
- /** Resets the instance to make it reusable. */
127
+ /**
128
+ * Resets the instance to make it reusable.
129
+ * @param decodeMode Entity decoding mode to use.
130
+ */
123
131
  startEntity(decodeMode: DecodingMode): void {
124
132
  this.decodeMode = decodeMode;
125
133
  this.state = EntityDecoderState.EntityStart;
@@ -127,6 +135,7 @@ export class EntityDecoder {
127
135
  this.treeIndex = 0;
128
136
  this.excess = 1;
129
137
  this.consumed = 1;
138
+ this.runConsumed = 0;
130
139
  }
131
140
 
132
141
  /**
@@ -135,7 +144,6 @@ export class EntityDecoder {
135
144
  *
136
145
  * Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the
137
146
  * entity is incomplete, and resume when the next string is written.
138
- *
139
147
  * @param input The string containing the entity (or a continuation of the entity).
140
148
  * @param offset The offset at which the entity begins. Should be 0 if this is not the first call.
141
149
  * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
@@ -174,7 +182,6 @@ export class EntityDecoder {
174
182
  * Switches between the numeric decimal and hexadecimal states.
175
183
  *
176
184
  * Equivalent to the `Numeric character reference state` in the HTML spec.
177
- *
178
185
  * @param input The string containing the entity (or a continuation of the entity).
179
186
  * @param offset The current offset.
180
187
  * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
@@ -198,7 +205,6 @@ export class EntityDecoder {
198
205
  * Parses a hexadecimal numeric entity.
199
206
  *
200
207
  * Equivalent to the `Hexademical character reference state` in the HTML spec.
201
- *
202
208
  * @param input The string containing the entity (or a continuation of the entity).
203
209
  * @param offset The current offset.
204
210
  * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
@@ -226,7 +232,6 @@ export class EntityDecoder {
226
232
  * Parses a decimal numeric entity.
227
233
  *
228
234
  * Equivalent to the `Decimal character reference state` in the HTML spec.
229
- *
230
235
  * @param input The string containing the entity (or a continuation of the entity).
231
236
  * @param offset The current offset.
232
237
  * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
@@ -250,7 +255,6 @@ export class EntityDecoder {
250
255
  *
251
256
  * Implements the logic from the `Hexademical character reference start
252
257
  * state` and `Numeric character reference end state` in the HTML spec.
253
- *
254
258
  * @param lastCp The last code point of the entity. Used to see if the
255
259
  * entity was terminated with a semicolon.
256
260
  * @param expectedLength The minimum number of characters that should be
@@ -291,7 +295,6 @@ export class EntityDecoder {
291
295
  * Parses a named entity.
292
296
  *
293
297
  * Equivalent to the `Named character reference state` in the HTML spec.
294
- *
295
298
  * @param input The string containing the entity (or a continuation of the entity).
296
299
  * @param offset The current offset.
297
300
  * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
@@ -307,43 +310,49 @@ export class EntityDecoder {
307
310
  if (valueLength === 0 && (current & BinTrieFlags.FLAG13) !== 0) {
308
311
  const runLength =
309
312
  (current & BinTrieFlags.BRANCH_LENGTH) >> 7; /* 2..63 */
310
- const firstChar = current & BinTrieFlags.JUMP_TABLE;
311
- // Fast-fail if we don't have enough remaining input for the full run (incomplete entity)
312
- if (offset + runLength > input.length) return -1;
313
- // Verify first char
314
- if (input.charCodeAt(offset) !== firstChar) {
315
- return this.result === 0
316
- ? 0
317
- : this.emitNotTerminatedNamedEntity();
313
+
314
+ // If we are starting a run, check the first char.
315
+ if (this.runConsumed === 0) {
316
+ const firstChar = current & BinTrieFlags.JUMP_TABLE;
317
+ if (input.charCodeAt(offset) !== firstChar) {
318
+ return this.result === 0
319
+ ? 0
320
+ : this.emitNotTerminatedNamedEntity();
321
+ }
322
+ offset++;
323
+ this.excess++;
324
+ this.runConsumed++;
318
325
  }
319
- offset++;
320
- this.excess++;
321
- // Remaining characters after the first
322
- const remaining = runLength - 1;
323
- // Iterate over packed 2-char words
324
- for (let runPos = 1; runPos < runLength; runPos += 2) {
326
+
327
+ // Check remaining characters in the run.
328
+ while (this.runConsumed < runLength) {
329
+ if (offset >= input.length) {
330
+ return -1;
331
+ }
332
+
333
+ const charIndexInPacked = this.runConsumed - 1;
325
334
  const packedWord =
326
- decodeTree[this.treeIndex + 1 + ((runPos - 1) >> 1)];
327
- const low = packedWord & 0xff;
328
- if (input.charCodeAt(offset) !== low) {
335
+ decodeTree[
336
+ this.treeIndex + 1 + (charIndexInPacked >> 1)
337
+ ];
338
+ const expectedChar =
339
+ charIndexInPacked % 2 === 0
340
+ ? packedWord & 0xff
341
+ : (packedWord >> 8) & 0xff;
342
+
343
+ if (input.charCodeAt(offset) !== expectedChar) {
344
+ this.runConsumed = 0;
329
345
  return this.result === 0
330
346
  ? 0
331
347
  : this.emitNotTerminatedNamedEntity();
332
348
  }
333
349
  offset++;
334
350
  this.excess++;
335
- const high = (packedWord >> 8) & 0xff;
336
- if (runPos + 1 < runLength) {
337
- if (input.charCodeAt(offset) !== high) {
338
- return this.result === 0
339
- ? 0
340
- : this.emitNotTerminatedNamedEntity();
341
- }
342
- offset++;
343
- this.excess++;
344
- }
351
+ this.runConsumed++;
345
352
  }
346
- this.treeIndex += 1 + ((remaining + 1) >> 1);
353
+
354
+ this.runConsumed = 0;
355
+ this.treeIndex += 1 + (runLength >> 1);
347
356
  current = decodeTree[this.treeIndex];
348
357
  valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
349
358
  }
@@ -424,7 +433,6 @@ export class EntityDecoder {
424
433
 
425
434
  /**
426
435
  * Emit a named entity that was not terminated with a semicolon.
427
- *
428
436
  * @returns The number of characters consumed.
429
437
  */
430
438
  private emitNotTerminatedNamedEntity(): number {
@@ -441,11 +449,9 @@ export class EntityDecoder {
441
449
 
442
450
  /**
443
451
  * Emit a named entity.
444
- *
445
452
  * @param result The index of the entity in the decode tree.
446
453
  * @param valueLength The number of bytes in the entity.
447
454
  * @param consumed The number of characters consumed.
448
- *
449
455
  * @returns The number of characters consumed.
450
456
  */
451
457
  private emitNamedEntityData(
@@ -474,7 +480,6 @@ export class EntityDecoder {
474
480
  * Signal to the parser that the end of the input was reached.
475
481
  *
476
482
  * Remaining data will be emitted and relevant errors will be produced.
477
- *
478
483
  * @returns The number of characters consumed.
479
484
  */
480
485
  end(): number {
@@ -510,7 +515,6 @@ export class EntityDecoder {
510
515
 
511
516
  /**
512
517
  * Creates a function that decodes entities in a string.
513
- *
514
518
  * @param decodeTree The decode tree.
515
519
  * @returns A function that decodes entities in a string.
516
520
  */
@@ -518,7 +522,7 @@ function getDecoder(decodeTree: Uint16Array) {
518
522
  let returnValue = "";
519
523
  const decoder = new EntityDecoder(
520
524
  decodeTree,
521
- (data) => (returnValue += fromCodePoint(data)),
525
+ (data) => (returnValue += String.fromCodePoint(data)),
522
526
  );
523
527
 
524
528
  return function decodeWithTrie(
@@ -561,10 +565,9 @@ function getDecoder(decodeTree: Uint16Array) {
561
565
  /**
562
566
  * Determines the branch of the current node that is taken given the current
563
567
  * character. This function is used to traverse the trie.
564
- *
565
568
  * @param decodeTree The trie.
566
569
  * @param current The current node.
567
- * @param nodeIdx The index right after the current node and its value.
570
+ * @param nodeIndex Index immediately after the current node header.
568
571
  * @param char The current character.
569
572
  * @returns The index of the next node, or -1 if no branch is taken.
570
573
  */
@@ -624,7 +627,6 @@ const xmlDecoder = /* #__PURE__ */ getDecoder(xmlDecodeTree);
624
627
 
625
628
  /**
626
629
  * Decodes an HTML string.
627
- *
628
630
  * @param htmlString The string to decode.
629
631
  * @param mode The decoding mode.
630
632
  * @returns The decoded string.
@@ -638,7 +640,6 @@ export function decodeHTML(
638
640
 
639
641
  /**
640
642
  * Decodes an HTML string in an attribute.
641
- *
642
643
  * @param htmlAttribute The string to decode.
643
644
  * @returns The decoded string.
644
645
  */
@@ -648,7 +649,6 @@ export function decodeHTMLAttribute(htmlAttribute: string): string {
648
649
 
649
650
  /**
650
651
  * Decodes an HTML string, requiring all entities to be terminated by a semicolon.
651
- *
652
652
  * @param htmlString The string to decode.
653
653
  * @returns The decoded string.
654
654
  */
@@ -658,7 +658,6 @@ export function decodeHTMLStrict(htmlString: string): string {
658
658
 
659
659
  /**
660
660
  * Decodes an XML string, requiring all entities to be terminated by a semicolon.
661
- *
662
661
  * @param xmlString The string to decode.
663
662
  * @returns The decoded string.
664
663
  */
@@ -666,11 +665,7 @@ export function decodeXML(xmlString: string): string {
666
665
  return xmlDecoder(xmlString, DecodingMode.Strict);
667
666
  }
668
667
 
669
- export {
670
- decodeCodePoint,
671
- fromCodePoint,
672
- replaceCodePoint,
673
- } from "./decode-codepoint.js";
668
+ export { replaceCodePoint } from "./decode-codepoint.js";
674
669
  // Re-export for use by eg. htmlparser2
675
670
  export { htmlDecodeTree } from "./generated/decode-data-html.js";
676
671
  export { xmlDecodeTree } from "./generated/decode-data-xml.js";
package/src/encode.ts CHANGED
@@ -23,6 +23,7 @@ const XML_BITSET = /* #__PURE__ */ new Uint32Array([0, XML_BITSET_VALUE, 0, 0]);
23
23
  *
24
24
  * If a character has no equivalent entity, a numeric hexadecimal reference
25
25
  * (eg. `&#xfc;`) will be used.
26
+ * @param input Input string to encode or decode.
26
27
  */
27
28
  export function encodeHTML(input: string): string {
28
29
  return encodeHTMLTrieRe(HTML_BITSET, input);
@@ -34,6 +35,7 @@ export function encodeHTML(input: string): string {
34
35
  *
35
36
  * If a character has no equivalent entity, a numeric hexadecimal reference
36
37
  * (eg. `&#xfc;`) will be used.
38
+ * @param input Input string to encode or decode.
37
39
  */
38
40
  export function encodeNonAsciiHTML(input: string): string {
39
41
  return encodeHTMLTrieRe(XML_BITSET, input);
package/src/escape.ts CHANGED
@@ -7,18 +7,22 @@ const xmlCodeMap = new Map([
7
7
  ]);
8
8
 
9
9
  // For compatibility with node < 4, we wrap `codePointAt`
10
+ /**
11
+ * Read a code point at a given index.
12
+ * @param input Input string to encode or decode.
13
+ * @param index Current read position in the input string.
14
+ */
10
15
  export const getCodePoint: (c: string, index: number) => number =
11
- // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
12
- String.prototype.codePointAt == null
13
- ? (c: string, index: number): number =>
16
+ typeof String.prototype.codePointAt === "function"
17
+ ? (input: string, index: number): number => input.codePointAt(index)!
18
+ : // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
19
+ (c: string, index: number): number =>
14
20
  (c.charCodeAt(index) & 0xfc_00) === 0xd8_00
15
21
  ? (c.charCodeAt(index) - 0xd8_00) * 0x4_00 +
16
22
  c.charCodeAt(index + 1) -
17
23
  0xdc_00 +
18
24
  0x1_00_00
19
- : c.charCodeAt(index)
20
- : // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
21
- (input: string, index: number): number => input.codePointAt(index)!;
25
+ : c.charCodeAt(index);
22
26
 
23
27
  /**
24
28
  * Bitset for ASCII characters that need to be escaped in XML.
@@ -31,6 +35,7 @@ export const XML_BITSET_VALUE = 0x50_00_00_c4; // 32..63 -> 34 ("),38 (&),39 (')
31
35
  *
32
36
  * If a character has no equivalent entity, a numeric hexadecimal reference
33
37
  * (eg. `&#xfc;`) will be used.
38
+ * @param input Input string to encode or decode.
34
39
  */
35
40
  export function encodeXML(input: string): string {
36
41
  let out: string | undefined;
@@ -76,7 +81,6 @@ export function encodeXML(input: string): string {
76
81
  *
77
82
  * Have a look at `escapeUTF8` if you want a more concise output at the expense
78
83
  * of reduced transportability.
79
- *
80
84
  * @param data String to escape.
81
85
  */
82
86
  export const escape: typeof encodeXML = encodeXML;
@@ -84,10 +88,8 @@ export const escape: typeof encodeXML = encodeXML;
84
88
  /**
85
89
  * Creates a function that escapes all characters matched by the given regular
86
90
  * expression using the given map of characters to escape to their entities.
87
- *
88
91
  * @param regex Regular expression to match characters to escape.
89
92
  * @param map Map of characters to escape to their entities.
90
- *
91
93
  * @returns Function that escapes all characters matched by the given regular
92
94
  * expression using the given map of characters to escape to their entities.
93
95
  */
@@ -120,7 +122,6 @@ function getEscaper(
120
122
  * Encodes all characters not valid in XML documents using XML entities.
121
123
  *
122
124
  * Note that the output will be character-set dependent.
123
- *
124
125
  * @param data String to escape.
125
126
  */
126
127
  export const escapeUTF8: (data: string) => string = /* #__PURE__ */ getEscaper(
@@ -131,7 +132,6 @@ export const escapeUTF8: (data: string) => string = /* #__PURE__ */ getEscaper(
131
132
  /**
132
133
  * Encodes all characters that have to be escaped in HTML attributes,
133
134
  * following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
134
- *
135
135
  * @param data String to escape.
136
136
  */
137
137
  export const escapeAttribute: (data: string) => string =
@@ -147,7 +147,6 @@ export const escapeAttribute: (data: string) => string =
147
147
  /**
148
148
  * Encodes all characters that have to be escaped in HTML text,
149
149
  * following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
150
- *
151
150
  * @param data String to escape.
152
151
  */
153
152
  export const escapeText: (data: string) => string = /* #__PURE__ */ getEscaper(