entities 7.0.0 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/decode-codepoint.d.ts +8 -0
- package/dist/decode-codepoint.d.ts.map +1 -0
- package/dist/decode-codepoint.js +46 -0
- package/dist/decode-codepoint.js.map +1 -0
- package/dist/{commonjs/decode.d.ts → decode.d.ts} +11 -20
- package/dist/decode.d.ts.map +1 -0
- package/dist/{esm/decode.js → decode.js} +65 -75
- package/dist/decode.js.map +1 -0
- package/dist/{commonjs/encode.d.ts → encode.d.ts} +2 -0
- package/dist/encode.d.ts.map +1 -0
- package/dist/{esm/encode.js → encode.js} +6 -4
- package/dist/encode.js.map +1 -0
- package/dist/{esm/escape.d.ts → escape.d.ts} +6 -4
- package/dist/escape.d.ts.map +1 -0
- package/dist/{esm/escape.js → escape.js} +15 -17
- package/dist/escape.js.map +1 -0
- package/dist/generated/decode-data-html.d.ts +3 -0
- package/dist/generated/decode-data-html.d.ts.map +1 -0
- package/dist/{esm/generated → generated}/decode-data-html.js +1 -0
- package/dist/generated/decode-data-html.js.map +1 -0
- package/dist/generated/decode-data-xml.d.ts +3 -0
- package/dist/generated/decode-data-xml.d.ts.map +1 -0
- package/dist/{esm/generated → generated}/decode-data-xml.js +1 -0
- package/dist/generated/decode-data-xml.js.map +1 -0
- package/dist/generated/encode-html.d.ts +5 -0
- package/dist/generated/encode-html.d.ts.map +1 -0
- package/dist/{esm/generated → generated}/encode-html.js +2 -1
- package/dist/generated/encode-html.js.map +1 -0
- package/dist/{commonjs/index.d.ts → index.d.ts} +9 -16
- package/dist/index.d.ts.map +1 -0
- package/dist/{esm/index.js → index.js} +7 -23
- package/dist/index.js.map +1 -0
- package/dist/internal/bin-trie-flags.d.ts.map +1 -0
- package/dist/internal/bin-trie-flags.js.map +1 -0
- package/dist/internal/decode-shared.d.ts +7 -0
- package/dist/internal/decode-shared.d.ts.map +1 -0
- package/dist/internal/decode-shared.js +17 -0
- package/dist/internal/decode-shared.js.map +1 -0
- package/dist/{commonjs/internal → internal}/encode-shared.d.ts +1 -0
- package/dist/internal/encode-shared.d.ts.map +1 -0
- package/dist/{esm/internal → internal}/encode-shared.js +4 -2
- package/dist/internal/encode-shared.js.map +1 -0
- package/package.json +33 -63
- package/readme.md +36 -27
- package/src/decode-codepoint.ts +1 -32
- package/src/decode.ts +49 -54
- package/src/encode.ts +2 -0
- package/src/escape.ts +11 -12
- package/src/generated/decode-data-html.ts +1 -0
- package/src/generated/decode-data-xml.ts +1 -0
- package/src/generated/encode-html.ts +2 -1
- package/src/index.ts +8 -33
- package/src/internal/decode-shared.ts +3 -15
- package/src/internal/encode-shared.ts +4 -2
- package/decode.d.ts +0 -3
- package/decode.js +0 -3
- package/dist/commonjs/decode-codepoint.d.ts +0 -19
- package/dist/commonjs/decode-codepoint.d.ts.map +0 -1
- package/dist/commonjs/decode-codepoint.js +0 -77
- package/dist/commonjs/decode-codepoint.js.map +0 -1
- package/dist/commonjs/decode.d.ts.map +0 -1
- package/dist/commonjs/decode.js +0 -568
- package/dist/commonjs/decode.js.map +0 -1
- package/dist/commonjs/encode.d.ts.map +0 -1
- package/dist/commonjs/encode.js +0 -92
- package/dist/commonjs/encode.js.map +0 -1
- package/dist/commonjs/escape.d.ts +0 -46
- package/dist/commonjs/escape.d.ts.map +0 -1
- package/dist/commonjs/escape.js +0 -138
- package/dist/commonjs/escape.js.map +0 -1
- package/dist/commonjs/generated/decode-data-html.d.ts +0 -2
- package/dist/commonjs/generated/decode-data-html.d.ts.map +0 -1
- package/dist/commonjs/generated/decode-data-html.js +0 -7
- package/dist/commonjs/generated/decode-data-html.js.map +0 -1
- package/dist/commonjs/generated/decode-data-xml.d.ts +0 -2
- package/dist/commonjs/generated/decode-data-xml.d.ts.map +0 -1
- package/dist/commonjs/generated/decode-data-xml.js +0 -7
- package/dist/commonjs/generated/decode-data-xml.js.map +0 -1
- package/dist/commonjs/generated/encode-html.d.ts +0 -3
- package/dist/commonjs/generated/encode-html.d.ts.map +0 -1
- package/dist/commonjs/generated/encode-html.js +0 -14
- package/dist/commonjs/generated/encode-html.js.map +0 -1
- package/dist/commonjs/index.d.ts.map +0 -1
- package/dist/commonjs/index.js +0 -131
- package/dist/commonjs/index.js.map +0 -1
- package/dist/commonjs/internal/bin-trie-flags.d.ts.map +0 -1
- package/dist/commonjs/internal/bin-trie-flags.js +0 -21
- package/dist/commonjs/internal/bin-trie-flags.js.map +0 -1
- package/dist/commonjs/internal/decode-shared.d.ts +0 -2
- package/dist/commonjs/internal/decode-shared.d.ts.map +0 -1
- package/dist/commonjs/internal/decode-shared.js +0 -31
- package/dist/commonjs/internal/decode-shared.js.map +0 -1
- package/dist/commonjs/internal/encode-shared.d.ts.map +0 -1
- package/dist/commonjs/internal/encode-shared.js +0 -94
- package/dist/commonjs/internal/encode-shared.js.map +0 -1
- package/dist/commonjs/package.json +0 -3
- package/dist/esm/decode-codepoint.d.ts +0 -19
- package/dist/esm/decode-codepoint.d.ts.map +0 -1
- package/dist/esm/decode-codepoint.js +0 -72
- package/dist/esm/decode-codepoint.js.map +0 -1
- package/dist/esm/decode.d.ts +0 -203
- package/dist/esm/decode.d.ts.map +0 -1
- package/dist/esm/decode.js.map +0 -1
- package/dist/esm/encode.d.ts +0 -22
- package/dist/esm/encode.d.ts.map +0 -1
- package/dist/esm/encode.js.map +0 -1
- package/dist/esm/escape.d.ts.map +0 -1
- package/dist/esm/escape.js.map +0 -1
- package/dist/esm/generated/decode-data-html.d.ts +0 -2
- package/dist/esm/generated/decode-data-html.d.ts.map +0 -1
- package/dist/esm/generated/decode-data-html.js.map +0 -1
- package/dist/esm/generated/decode-data-xml.d.ts +0 -2
- package/dist/esm/generated/decode-data-xml.d.ts.map +0 -1
- package/dist/esm/generated/decode-data-xml.js.map +0 -1
- package/dist/esm/generated/encode-html.d.ts +0 -3
- package/dist/esm/generated/encode-html.d.ts.map +0 -1
- package/dist/esm/generated/encode-html.js.map +0 -1
- package/dist/esm/index.d.ts +0 -96
- package/dist/esm/index.d.ts.map +0 -1
- package/dist/esm/index.js.map +0 -1
- package/dist/esm/internal/bin-trie-flags.d.ts +0 -17
- package/dist/esm/internal/bin-trie-flags.d.ts.map +0 -1
- package/dist/esm/internal/bin-trie-flags.js.map +0 -1
- package/dist/esm/internal/decode-shared.d.ts +0 -2
- package/dist/esm/internal/decode-shared.d.ts.map +0 -1
- package/dist/esm/internal/decode-shared.js +0 -28
- package/dist/esm/internal/decode-shared.js.map +0 -1
- package/dist/esm/internal/encode-shared.d.ts +0 -32
- package/dist/esm/internal/encode-shared.d.ts.map +0 -1
- package/dist/esm/internal/encode-shared.js.map +0 -1
- package/dist/esm/package.json +0 -3
- package/escape.d.ts +0 -3
- package/escape.js +0 -3
- package/src/decode.spec.ts +0 -363
- package/src/encode.spec.ts +0 -78
- package/src/escape.spec.ts +0 -14
- package/src/generated/.eslintrc.json +0 -10
- package/src/index.spec.ts +0 -125
- /package/dist/{commonjs/internal → internal}/bin-trie-flags.d.ts +0 -0
- /package/dist/{esm/internal → internal}/bin-trie-flags.js +0 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "entities",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "8.0.0",
|
|
4
4
|
"description": "Encode & decode XML and HTML entities with ease & speed",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"html entities",
|
|
@@ -22,92 +22,62 @@
|
|
|
22
22
|
"type": "module",
|
|
23
23
|
"exports": {
|
|
24
24
|
".": {
|
|
25
|
-
"
|
|
26
|
-
|
|
27
|
-
"default": "./dist/esm/index.js"
|
|
28
|
-
},
|
|
29
|
-
"require": {
|
|
30
|
-
"types": "./dist/commonjs/index.d.ts",
|
|
31
|
-
"default": "./dist/commonjs/index.js"
|
|
32
|
-
}
|
|
25
|
+
"types": "./dist/index.d.ts",
|
|
26
|
+
"default": "./dist/index.js"
|
|
33
27
|
},
|
|
34
28
|
"./decode": {
|
|
35
|
-
"
|
|
36
|
-
|
|
37
|
-
"default": "./dist/esm/decode.js"
|
|
38
|
-
},
|
|
39
|
-
"require": {
|
|
40
|
-
"types": "./dist/commonjs/decode.d.ts",
|
|
41
|
-
"default": "./dist/commonjs/decode.js"
|
|
42
|
-
}
|
|
29
|
+
"types": "./dist/decode.d.ts",
|
|
30
|
+
"default": "./dist/decode.js"
|
|
43
31
|
},
|
|
44
32
|
"./escape": {
|
|
45
|
-
"
|
|
46
|
-
|
|
47
|
-
"default": "./dist/esm/escape.js"
|
|
48
|
-
},
|
|
49
|
-
"require": {
|
|
50
|
-
"types": "./dist/commonjs/escape.d.ts",
|
|
51
|
-
"default": "./dist/commonjs/escape.js"
|
|
52
|
-
}
|
|
33
|
+
"types": "./dist/escape.d.ts",
|
|
34
|
+
"default": "./dist/escape.js"
|
|
53
35
|
}
|
|
54
36
|
},
|
|
55
|
-
"main": "./dist/
|
|
56
|
-
"
|
|
57
|
-
"types": "./dist/commonjs/index.d.ts",
|
|
37
|
+
"main": "./dist/index.js",
|
|
38
|
+
"types": "./dist/index.d.ts",
|
|
58
39
|
"files": [
|
|
59
|
-
"decode.js",
|
|
60
|
-
"decode.d.ts",
|
|
61
|
-
"escape.js",
|
|
62
|
-
"escape.d.ts",
|
|
63
40
|
"dist",
|
|
64
|
-
"src"
|
|
41
|
+
"src",
|
|
42
|
+
"!**/*.spec.ts"
|
|
65
43
|
],
|
|
66
44
|
"scripts": {
|
|
45
|
+
"benchmark": "node --import=tsx scripts/benchmark.ts",
|
|
46
|
+
"build": "tsc",
|
|
67
47
|
"build:docs": "typedoc --hideGenerator src/index.ts",
|
|
68
48
|
"build:encode-trie": "node --import=tsx scripts/write-encode-map.ts",
|
|
69
49
|
"build:trie": "node --import=tsx scripts/write-decode-map.ts",
|
|
70
50
|
"format": "npm run format:es && npm run format:biome",
|
|
71
|
-
"format:es": "npm run lint:es -- --fix",
|
|
72
51
|
"format:biome": "biome check --fix .",
|
|
52
|
+
"format:es": "npm run lint:es -- --fix",
|
|
73
53
|
"lint": "npm run lint:es && npm run lint:ts && npm run lint:biome",
|
|
74
|
-
"lint:es": "eslint . --ignore-path .gitignore",
|
|
75
54
|
"lint:biome": "biome check .",
|
|
55
|
+
"lint:es": "eslint .",
|
|
76
56
|
"lint:ts": "tsc --noEmit",
|
|
77
|
-
"prepublishOnly": "
|
|
57
|
+
"prepublishOnly": "npm run build",
|
|
78
58
|
"test": "npm run test:vi && npm run lint",
|
|
79
59
|
"test:vi": "vitest run"
|
|
80
60
|
},
|
|
81
61
|
"devDependencies": {
|
|
82
|
-
"@biomejs/biome": "^2.
|
|
83
|
-
"@
|
|
84
|
-
"@
|
|
85
|
-
"@
|
|
86
|
-
"@
|
|
87
|
-
"eslint": "^
|
|
62
|
+
"@biomejs/biome": "^2.4.7",
|
|
63
|
+
"@eslint/compat": "^2.0.3",
|
|
64
|
+
"@feedic/eslint-config": "^0.3.1",
|
|
65
|
+
"@types/he": "^1.2.3",
|
|
66
|
+
"@types/node": "^25.5.0",
|
|
67
|
+
"eslint": "^10.0.3",
|
|
88
68
|
"eslint-config-biome": "^2.1.3",
|
|
89
|
-
"
|
|
90
|
-
"
|
|
91
|
-
"
|
|
92
|
-
"
|
|
93
|
-
"
|
|
94
|
-
"
|
|
95
|
-
"
|
|
69
|
+
"globals": "^17.4.0",
|
|
70
|
+
"he": "^1.2.0",
|
|
71
|
+
"html-entities": "^2.6.0",
|
|
72
|
+
"parse-entities": "^4.0.2",
|
|
73
|
+
"tinybench": "^6.0.0",
|
|
74
|
+
"tsx": "^4.21.0",
|
|
75
|
+
"typedoc": "^0.28.17",
|
|
76
|
+
"typescript": "^5.9.3",
|
|
77
|
+
"typescript-eslint": "^8.57.1",
|
|
78
|
+
"vitest": "^4.0.17"
|
|
96
79
|
},
|
|
97
80
|
"engines": {
|
|
98
|
-
"node": ">=0
|
|
99
|
-
},
|
|
100
|
-
"tshy": {
|
|
101
|
-
"exclude": [
|
|
102
|
-
"**/*.spec.ts",
|
|
103
|
-
"**/__fixtures__/*",
|
|
104
|
-
"**/__tests__/*",
|
|
105
|
-
"**/__snapshots__/*"
|
|
106
|
-
],
|
|
107
|
-
"exports": {
|
|
108
|
-
".": "./src/index.ts",
|
|
109
|
-
"./decode": "./src/decode.ts",
|
|
110
|
-
"./escape": "./src/escape.ts"
|
|
111
|
-
}
|
|
81
|
+
"node": ">=20.19.0"
|
|
112
82
|
}
|
|
113
83
|
}
|
package/readme.md
CHANGED
|
@@ -10,7 +10,7 @@ Encode & decode HTML & XML entities with ease & speed.
|
|
|
10
10
|
[`commonmark`](https://github.com/commonmark/commonmark.js) use it to process
|
|
11
11
|
HTML entities.
|
|
12
12
|
- ⚡️ Fast: `entities` is the fastest library for decoding HTML entities (as of
|
|
13
|
-
|
|
13
|
+
September 2025); see [performance](#performance).
|
|
14
14
|
- 🎛 Configurable: Get an output tailored for your needs. You are fine with
|
|
15
15
|
UTF8? That'll save you some bytes. Prefer to only have ASCII characters? We
|
|
16
16
|
can do that as well!
|
|
@@ -24,7 +24,7 @@ Encode & decode HTML & XML entities with ease & speed.
|
|
|
24
24
|
### …use `entities`
|
|
25
25
|
|
|
26
26
|
```javascript
|
|
27
|
-
|
|
27
|
+
import * as entities from "entities";
|
|
28
28
|
|
|
29
29
|
// Encoding
|
|
30
30
|
entities.escapeUTF8("& ü"); // "& ü"
|
|
@@ -38,15 +38,36 @@ entities.decodeHTML("asdf & ÿ ü '"); // "asdf & ÿ ü '"
|
|
|
38
38
|
|
|
39
39
|
## Performance
|
|
40
40
|
|
|
41
|
-
|
|
42
|
-
|
|
41
|
+
Benchmarked in September 2025 with Node v24.6.0 on Apple M2 using `tinybench`.
|
|
42
|
+
Higher ops/s is better; `avg (μs)` is the mean time per operation.
|
|
43
|
+
See `scripts/benchmark.ts` to reproduce.
|
|
43
44
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
|
47
|
-
|
|
|
48
|
-
|
|
|
49
|
-
|
|
|
45
|
+
### Decoding
|
|
46
|
+
|
|
47
|
+
| Library | Version | ops/s | avg (μs) | ±% | slower |
|
|
48
|
+
| -------------- | ------- | --------- | -------- | ---- | ------ |
|
|
49
|
+
| entities | 7.0.0 | 5,838,416 | 175.57 | 0.06 | — |
|
|
50
|
+
| html-entities | 2.6.0 | 2,919,637 | 347.77 | 0.33 | 50.0% |
|
|
51
|
+
| he | 1.2.0 | 2,318,438 | 446.48 | 0.70 | 60.3% |
|
|
52
|
+
| parse-entities | 4.0.2 | 852,855 | 1,199.51 | 0.36 | 85.4% |
|
|
53
|
+
|
|
54
|
+
### Encoding
|
|
55
|
+
|
|
56
|
+
| Library | Version | ops/s | avg (μs) | ±% | slower |
|
|
57
|
+
| -------------- | ------- | --------- | -------- | ---- | ------ |
|
|
58
|
+
| entities | 7.0.0 | 2,770,115 | 368.09 | 0.11 | — |
|
|
59
|
+
| html-entities | 2.6.0 | 1,491,963 | 679.96 | 0.58 | 46.2% |
|
|
60
|
+
| he | 1.2.0 | 481,278 | 2,118.25 | 0.61 | 82.6% |
|
|
61
|
+
|
|
62
|
+
### Escaping
|
|
63
|
+
|
|
64
|
+
| Library | Version | ops/s | avg (μs) | ±% | slower |
|
|
65
|
+
| -------------- | ------- | --------- | -------- | ---- | ------ |
|
|
66
|
+
| entities | 7.0.0 | 4,616,468 | 223.84 | 0.17 | — |
|
|
67
|
+
| he | 1.2.0 | 3,659,301 | 280.76 | 0.58 | 20.7% |
|
|
68
|
+
| html-entities | 2.6.0 | 3,555,301 | 296.63 | 0.84 | 23.0% |
|
|
69
|
+
|
|
70
|
+
Note: Micro-benchmarks may vary across machines and Node versions.
|
|
50
71
|
|
|
51
72
|
---
|
|
52
73
|
|
|
@@ -68,8 +89,8 @@ This is helpful for decoding entities in legacy environments.
|
|
|
68
89
|
|
|
69
90
|
> Why should I use `entities` instead of alternative modules?
|
|
70
91
|
|
|
71
|
-
As of
|
|
72
|
-
not a
|
|
92
|
+
As of September 2025, `entities` is faster than other modules. Still, this is
|
|
93
|
+
not a differentiated space and other modules can catch up.
|
|
73
94
|
|
|
74
95
|
**More importantly**, you might already have `entities` in your dependency graph
|
|
75
96
|
(as a dependency of eg. `cheerio`, or `htmlparser2`), and including it directly
|
|
@@ -78,10 +99,9 @@ libraries, so have a look through your `node_modules` directory!
|
|
|
78
99
|
|
|
79
100
|
> Does `entities` support tree shaking?
|
|
80
101
|
|
|
81
|
-
Yes!
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
Instead, use the functions that you need directly.
|
|
102
|
+
Yes! Note that for best results, you should not use the `encode` and `decode`
|
|
103
|
+
functions, as they wrap around a number of other functions, all of which will
|
|
104
|
+
remain in the bundle. Instead, use the functions that you need directly.
|
|
85
105
|
|
|
86
106
|
---
|
|
87
107
|
|
|
@@ -109,14 +129,3 @@ License: BSD-2-Clause
|
|
|
109
129
|
To report a security vulnerability, please use the
|
|
110
130
|
[Tidelift security contact](https://tidelift.com/security). Tidelift will
|
|
111
131
|
coordinate the fix and disclosure.
|
|
112
|
-
|
|
113
|
-
## `entities` for enterprise
|
|
114
|
-
|
|
115
|
-
Available as part of the Tidelift Subscription
|
|
116
|
-
|
|
117
|
-
The maintainers of `entities` and thousands of other packages are working with
|
|
118
|
-
Tidelift to deliver commercial support and maintenance for the open source
|
|
119
|
-
dependencies you use to build your applications. Save time, reduce risk, and
|
|
120
|
-
improve code health, while paying the maintainers of the exact dependencies you
|
|
121
|
-
use.
|
|
122
|
-
[Learn more.](https://tidelift.com/subscription/pkg/npm-entities?utm_source=npm-entities&utm_medium=referral&utm_campaign=enterprise&utm_term=repo)
|
package/src/decode-codepoint.ts
CHANGED
|
@@ -32,31 +32,11 @@ const decodeMap = new Map([
|
|
|
32
32
|
[159, 376],
|
|
33
33
|
]);
|
|
34
34
|
|
|
35
|
-
/**
|
|
36
|
-
* Polyfill for `String.fromCodePoint`. It is used to create a string from a Unicode code point.
|
|
37
|
-
*/
|
|
38
|
-
export const fromCodePoint: (...codePoints: number[]) => string =
|
|
39
|
-
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, n/no-unsupported-features/es-builtins
|
|
40
|
-
String.fromCodePoint ??
|
|
41
|
-
((codePoint: number): string => {
|
|
42
|
-
let output = "";
|
|
43
|
-
|
|
44
|
-
if (codePoint > 0xff_ff) {
|
|
45
|
-
codePoint -= 0x1_00_00;
|
|
46
|
-
output += String.fromCharCode(
|
|
47
|
-
((codePoint >>> 10) & 0x3_ff) | 0xd8_00,
|
|
48
|
-
);
|
|
49
|
-
codePoint = 0xdc_00 | (codePoint & 0x3_ff);
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
output += String.fromCharCode(codePoint);
|
|
53
|
-
return output;
|
|
54
|
-
});
|
|
55
|
-
|
|
56
35
|
/**
|
|
57
36
|
* Replace the given code point with a replacement character if it is a
|
|
58
37
|
* surrogate or is outside the valid range. Otherwise return the code
|
|
59
38
|
* point unchanged.
|
|
39
|
+
* @param codePoint Unicode code point to convert.
|
|
60
40
|
*/
|
|
61
41
|
export function replaceCodePoint(codePoint: number): number {
|
|
62
42
|
if (
|
|
@@ -68,14 +48,3 @@ export function replaceCodePoint(codePoint: number): number {
|
|
|
68
48
|
|
|
69
49
|
return decodeMap.get(codePoint) ?? codePoint;
|
|
70
50
|
}
|
|
71
|
-
|
|
72
|
-
/**
|
|
73
|
-
* Replace the code point if relevant, then convert it to a string.
|
|
74
|
-
*
|
|
75
|
-
* @deprecated Use `fromCodePoint(replaceCodePoint(codePoint))` instead.
|
|
76
|
-
* @param codePoint The code point to decode.
|
|
77
|
-
* @returns The decoded code point.
|
|
78
|
-
*/
|
|
79
|
-
export function decodeCodePoint(codePoint: number): string {
|
|
80
|
-
return fromCodePoint(replaceCodePoint(codePoint));
|
|
81
|
-
}
|
package/src/decode.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { replaceCodePoint } from "./decode-codepoint.js";
|
|
2
2
|
import { htmlDecodeTree } from "./generated/decode-data-html.js";
|
|
3
3
|
import { xmlDecodeTree } from "./generated/decode-data-xml.js";
|
|
4
4
|
import { BinTrieFlags } from "./internal/bin-trie-flags.js";
|
|
@@ -45,6 +45,7 @@ function isAsciiAlphaNumeric(code: number): boolean {
|
|
|
45
45
|
*
|
|
46
46
|
* Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error.
|
|
47
47
|
* See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
|
|
48
|
+
* @param code Code point to decode.
|
|
48
49
|
*/
|
|
49
50
|
function isEntityInAttributeInvalidEnd(code: number): boolean {
|
|
50
51
|
return code === CharCodes.EQUALS || isAsciiAlphaNumeric(code);
|
|
@@ -58,6 +59,9 @@ const enum EntityDecoderState {
|
|
|
58
59
|
NamedEntity,
|
|
59
60
|
}
|
|
60
61
|
|
|
62
|
+
/**
|
|
63
|
+
* Decoding mode for named entities.
|
|
64
|
+
*/
|
|
61
65
|
export enum DecodingMode {
|
|
62
66
|
/** Entities in text nodes that can end with any character. */
|
|
63
67
|
Legacy = 0,
|
|
@@ -91,7 +95,6 @@ export class EntityDecoder {
|
|
|
91
95
|
*
|
|
92
96
|
* For multi-byte named entities, this will be called multiple times,
|
|
93
97
|
* with the second codepoint, and the same `consumed` value.
|
|
94
|
-
*
|
|
95
98
|
* @param codepoint The decoded codepoint.
|
|
96
99
|
* @param consumed The number of bytes consumed by the decoder.
|
|
97
100
|
*/
|
|
@@ -118,8 +121,13 @@ export class EntityDecoder {
|
|
|
118
121
|
private excess = 1;
|
|
119
122
|
/** The mode in which the decoder is operating. */
|
|
120
123
|
private decodeMode = DecodingMode.Strict;
|
|
124
|
+
/** The number of characters that have been consumed in the current run. */
|
|
125
|
+
private runConsumed = 0;
|
|
121
126
|
|
|
122
|
-
/**
|
|
127
|
+
/**
|
|
128
|
+
* Resets the instance to make it reusable.
|
|
129
|
+
* @param decodeMode Entity decoding mode to use.
|
|
130
|
+
*/
|
|
123
131
|
startEntity(decodeMode: DecodingMode): void {
|
|
124
132
|
this.decodeMode = decodeMode;
|
|
125
133
|
this.state = EntityDecoderState.EntityStart;
|
|
@@ -127,6 +135,7 @@ export class EntityDecoder {
|
|
|
127
135
|
this.treeIndex = 0;
|
|
128
136
|
this.excess = 1;
|
|
129
137
|
this.consumed = 1;
|
|
138
|
+
this.runConsumed = 0;
|
|
130
139
|
}
|
|
131
140
|
|
|
132
141
|
/**
|
|
@@ -135,7 +144,6 @@ export class EntityDecoder {
|
|
|
135
144
|
*
|
|
136
145
|
* Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the
|
|
137
146
|
* entity is incomplete, and resume when the next string is written.
|
|
138
|
-
*
|
|
139
147
|
* @param input The string containing the entity (or a continuation of the entity).
|
|
140
148
|
* @param offset The offset at which the entity begins. Should be 0 if this is not the first call.
|
|
141
149
|
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
|
@@ -174,7 +182,6 @@ export class EntityDecoder {
|
|
|
174
182
|
* Switches between the numeric decimal and hexadecimal states.
|
|
175
183
|
*
|
|
176
184
|
* Equivalent to the `Numeric character reference state` in the HTML spec.
|
|
177
|
-
*
|
|
178
185
|
* @param input The string containing the entity (or a continuation of the entity).
|
|
179
186
|
* @param offset The current offset.
|
|
180
187
|
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
|
@@ -198,7 +205,6 @@ export class EntityDecoder {
|
|
|
198
205
|
* Parses a hexadecimal numeric entity.
|
|
199
206
|
*
|
|
200
207
|
* Equivalent to the `Hexademical character reference state` in the HTML spec.
|
|
201
|
-
*
|
|
202
208
|
* @param input The string containing the entity (or a continuation of the entity).
|
|
203
209
|
* @param offset The current offset.
|
|
204
210
|
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
|
@@ -226,7 +232,6 @@ export class EntityDecoder {
|
|
|
226
232
|
* Parses a decimal numeric entity.
|
|
227
233
|
*
|
|
228
234
|
* Equivalent to the `Decimal character reference state` in the HTML spec.
|
|
229
|
-
*
|
|
230
235
|
* @param input The string containing the entity (or a continuation of the entity).
|
|
231
236
|
* @param offset The current offset.
|
|
232
237
|
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
|
@@ -250,7 +255,6 @@ export class EntityDecoder {
|
|
|
250
255
|
*
|
|
251
256
|
* Implements the logic from the `Hexademical character reference start
|
|
252
257
|
* state` and `Numeric character reference end state` in the HTML spec.
|
|
253
|
-
*
|
|
254
258
|
* @param lastCp The last code point of the entity. Used to see if the
|
|
255
259
|
* entity was terminated with a semicolon.
|
|
256
260
|
* @param expectedLength The minimum number of characters that should be
|
|
@@ -291,7 +295,6 @@ export class EntityDecoder {
|
|
|
291
295
|
* Parses a named entity.
|
|
292
296
|
*
|
|
293
297
|
* Equivalent to the `Named character reference state` in the HTML spec.
|
|
294
|
-
*
|
|
295
298
|
* @param input The string containing the entity (or a continuation of the entity).
|
|
296
299
|
* @param offset The current offset.
|
|
297
300
|
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
|
@@ -307,43 +310,49 @@ export class EntityDecoder {
|
|
|
307
310
|
if (valueLength === 0 && (current & BinTrieFlags.FLAG13) !== 0) {
|
|
308
311
|
const runLength =
|
|
309
312
|
(current & BinTrieFlags.BRANCH_LENGTH) >> 7; /* 2..63 */
|
|
310
|
-
|
|
311
|
-
//
|
|
312
|
-
if (
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
313
|
+
|
|
314
|
+
// If we are starting a run, check the first char.
|
|
315
|
+
if (this.runConsumed === 0) {
|
|
316
|
+
const firstChar = current & BinTrieFlags.JUMP_TABLE;
|
|
317
|
+
if (input.charCodeAt(offset) !== firstChar) {
|
|
318
|
+
return this.result === 0
|
|
319
|
+
? 0
|
|
320
|
+
: this.emitNotTerminatedNamedEntity();
|
|
321
|
+
}
|
|
322
|
+
offset++;
|
|
323
|
+
this.excess++;
|
|
324
|
+
this.runConsumed++;
|
|
318
325
|
}
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
326
|
+
|
|
327
|
+
// Check remaining characters in the run.
|
|
328
|
+
while (this.runConsumed < runLength) {
|
|
329
|
+
if (offset >= input.length) {
|
|
330
|
+
return -1;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
const charIndexInPacked = this.runConsumed - 1;
|
|
325
334
|
const packedWord =
|
|
326
|
-
decodeTree[
|
|
327
|
-
|
|
328
|
-
|
|
335
|
+
decodeTree[
|
|
336
|
+
this.treeIndex + 1 + (charIndexInPacked >> 1)
|
|
337
|
+
];
|
|
338
|
+
const expectedChar =
|
|
339
|
+
charIndexInPacked % 2 === 0
|
|
340
|
+
? packedWord & 0xff
|
|
341
|
+
: (packedWord >> 8) & 0xff;
|
|
342
|
+
|
|
343
|
+
if (input.charCodeAt(offset) !== expectedChar) {
|
|
344
|
+
this.runConsumed = 0;
|
|
329
345
|
return this.result === 0
|
|
330
346
|
? 0
|
|
331
347
|
: this.emitNotTerminatedNamedEntity();
|
|
332
348
|
}
|
|
333
349
|
offset++;
|
|
334
350
|
this.excess++;
|
|
335
|
-
|
|
336
|
-
if (runPos + 1 < runLength) {
|
|
337
|
-
if (input.charCodeAt(offset) !== high) {
|
|
338
|
-
return this.result === 0
|
|
339
|
-
? 0
|
|
340
|
-
: this.emitNotTerminatedNamedEntity();
|
|
341
|
-
}
|
|
342
|
-
offset++;
|
|
343
|
-
this.excess++;
|
|
344
|
-
}
|
|
351
|
+
this.runConsumed++;
|
|
345
352
|
}
|
|
346
|
-
|
|
353
|
+
|
|
354
|
+
this.runConsumed = 0;
|
|
355
|
+
this.treeIndex += 1 + (runLength >> 1);
|
|
347
356
|
current = decodeTree[this.treeIndex];
|
|
348
357
|
valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
|
|
349
358
|
}
|
|
@@ -424,7 +433,6 @@ export class EntityDecoder {
|
|
|
424
433
|
|
|
425
434
|
/**
|
|
426
435
|
* Emit a named entity that was not terminated with a semicolon.
|
|
427
|
-
*
|
|
428
436
|
* @returns The number of characters consumed.
|
|
429
437
|
*/
|
|
430
438
|
private emitNotTerminatedNamedEntity(): number {
|
|
@@ -441,11 +449,9 @@ export class EntityDecoder {
|
|
|
441
449
|
|
|
442
450
|
/**
|
|
443
451
|
* Emit a named entity.
|
|
444
|
-
*
|
|
445
452
|
* @param result The index of the entity in the decode tree.
|
|
446
453
|
* @param valueLength The number of bytes in the entity.
|
|
447
454
|
* @param consumed The number of characters consumed.
|
|
448
|
-
*
|
|
449
455
|
* @returns The number of characters consumed.
|
|
450
456
|
*/
|
|
451
457
|
private emitNamedEntityData(
|
|
@@ -474,7 +480,6 @@ export class EntityDecoder {
|
|
|
474
480
|
* Signal to the parser that the end of the input was reached.
|
|
475
481
|
*
|
|
476
482
|
* Remaining data will be emitted and relevant errors will be produced.
|
|
477
|
-
*
|
|
478
483
|
* @returns The number of characters consumed.
|
|
479
484
|
*/
|
|
480
485
|
end(): number {
|
|
@@ -510,7 +515,6 @@ export class EntityDecoder {
|
|
|
510
515
|
|
|
511
516
|
/**
|
|
512
517
|
* Creates a function that decodes entities in a string.
|
|
513
|
-
*
|
|
514
518
|
* @param decodeTree The decode tree.
|
|
515
519
|
* @returns A function that decodes entities in a string.
|
|
516
520
|
*/
|
|
@@ -518,7 +522,7 @@ function getDecoder(decodeTree: Uint16Array) {
|
|
|
518
522
|
let returnValue = "";
|
|
519
523
|
const decoder = new EntityDecoder(
|
|
520
524
|
decodeTree,
|
|
521
|
-
(data) => (returnValue += fromCodePoint(data)),
|
|
525
|
+
(data) => (returnValue += String.fromCodePoint(data)),
|
|
522
526
|
);
|
|
523
527
|
|
|
524
528
|
return function decodeWithTrie(
|
|
@@ -561,10 +565,9 @@ function getDecoder(decodeTree: Uint16Array) {
|
|
|
561
565
|
/**
|
|
562
566
|
* Determines the branch of the current node that is taken given the current
|
|
563
567
|
* character. This function is used to traverse the trie.
|
|
564
|
-
*
|
|
565
568
|
* @param decodeTree The trie.
|
|
566
569
|
* @param current The current node.
|
|
567
|
-
* @param
|
|
570
|
+
* @param nodeIndex Index immediately after the current node header.
|
|
568
571
|
* @param char The current character.
|
|
569
572
|
* @returns The index of the next node, or -1 if no branch is taken.
|
|
570
573
|
*/
|
|
@@ -624,7 +627,6 @@ const xmlDecoder = /* #__PURE__ */ getDecoder(xmlDecodeTree);
|
|
|
624
627
|
|
|
625
628
|
/**
|
|
626
629
|
* Decodes an HTML string.
|
|
627
|
-
*
|
|
628
630
|
* @param htmlString The string to decode.
|
|
629
631
|
* @param mode The decoding mode.
|
|
630
632
|
* @returns The decoded string.
|
|
@@ -638,7 +640,6 @@ export function decodeHTML(
|
|
|
638
640
|
|
|
639
641
|
/**
|
|
640
642
|
* Decodes an HTML string in an attribute.
|
|
641
|
-
*
|
|
642
643
|
* @param htmlAttribute The string to decode.
|
|
643
644
|
* @returns The decoded string.
|
|
644
645
|
*/
|
|
@@ -648,7 +649,6 @@ export function decodeHTMLAttribute(htmlAttribute: string): string {
|
|
|
648
649
|
|
|
649
650
|
/**
|
|
650
651
|
* Decodes an HTML string, requiring all entities to be terminated by a semicolon.
|
|
651
|
-
*
|
|
652
652
|
* @param htmlString The string to decode.
|
|
653
653
|
* @returns The decoded string.
|
|
654
654
|
*/
|
|
@@ -658,7 +658,6 @@ export function decodeHTMLStrict(htmlString: string): string {
|
|
|
658
658
|
|
|
659
659
|
/**
|
|
660
660
|
* Decodes an XML string, requiring all entities to be terminated by a semicolon.
|
|
661
|
-
*
|
|
662
661
|
* @param xmlString The string to decode.
|
|
663
662
|
* @returns The decoded string.
|
|
664
663
|
*/
|
|
@@ -666,11 +665,7 @@ export function decodeXML(xmlString: string): string {
|
|
|
666
665
|
return xmlDecoder(xmlString, DecodingMode.Strict);
|
|
667
666
|
}
|
|
668
667
|
|
|
669
|
-
export {
|
|
670
|
-
decodeCodePoint,
|
|
671
|
-
fromCodePoint,
|
|
672
|
-
replaceCodePoint,
|
|
673
|
-
} from "./decode-codepoint.js";
|
|
668
|
+
export { replaceCodePoint } from "./decode-codepoint.js";
|
|
674
669
|
// Re-export for use by eg. htmlparser2
|
|
675
670
|
export { htmlDecodeTree } from "./generated/decode-data-html.js";
|
|
676
671
|
export { xmlDecodeTree } from "./generated/decode-data-xml.js";
|
package/src/encode.ts
CHANGED
|
@@ -23,6 +23,7 @@ const XML_BITSET = /* #__PURE__ */ new Uint32Array([0, XML_BITSET_VALUE, 0, 0]);
|
|
|
23
23
|
*
|
|
24
24
|
* If a character has no equivalent entity, a numeric hexadecimal reference
|
|
25
25
|
* (eg. `ü`) will be used.
|
|
26
|
+
* @param input Input string to encode or decode.
|
|
26
27
|
*/
|
|
27
28
|
export function encodeHTML(input: string): string {
|
|
28
29
|
return encodeHTMLTrieRe(HTML_BITSET, input);
|
|
@@ -34,6 +35,7 @@ export function encodeHTML(input: string): string {
|
|
|
34
35
|
*
|
|
35
36
|
* If a character has no equivalent entity, a numeric hexadecimal reference
|
|
36
37
|
* (eg. `ü`) will be used.
|
|
38
|
+
* @param input Input string to encode or decode.
|
|
37
39
|
*/
|
|
38
40
|
export function encodeNonAsciiHTML(input: string): string {
|
|
39
41
|
return encodeHTMLTrieRe(XML_BITSET, input);
|
package/src/escape.ts
CHANGED
|
@@ -7,18 +7,22 @@ const xmlCodeMap = new Map([
|
|
|
7
7
|
]);
|
|
8
8
|
|
|
9
9
|
// For compatibility with node < 4, we wrap `codePointAt`
|
|
10
|
+
/**
|
|
11
|
+
* Read a code point at a given index.
|
|
12
|
+
* @param input Input string to encode or decode.
|
|
13
|
+
* @param index Current read position in the input string.
|
|
14
|
+
*/
|
|
10
15
|
export const getCodePoint: (c: string, index: number) => number =
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
16
|
+
typeof String.prototype.codePointAt === "function"
|
|
17
|
+
? (input: string, index: number): number => input.codePointAt(index)!
|
|
18
|
+
: // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
|
|
19
|
+
(c: string, index: number): number =>
|
|
14
20
|
(c.charCodeAt(index) & 0xfc_00) === 0xd8_00
|
|
15
21
|
? (c.charCodeAt(index) - 0xd8_00) * 0x4_00 +
|
|
16
22
|
c.charCodeAt(index + 1) -
|
|
17
23
|
0xdc_00 +
|
|
18
24
|
0x1_00_00
|
|
19
|
-
: c.charCodeAt(index)
|
|
20
|
-
: // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
|
|
21
|
-
(input: string, index: number): number => input.codePointAt(index)!;
|
|
25
|
+
: c.charCodeAt(index);
|
|
22
26
|
|
|
23
27
|
/**
|
|
24
28
|
* Bitset for ASCII characters that need to be escaped in XML.
|
|
@@ -31,6 +35,7 @@ export const XML_BITSET_VALUE = 0x50_00_00_c4; // 32..63 -> 34 ("),38 (&),39 (')
|
|
|
31
35
|
*
|
|
32
36
|
* If a character has no equivalent entity, a numeric hexadecimal reference
|
|
33
37
|
* (eg. `ü`) will be used.
|
|
38
|
+
* @param input Input string to encode or decode.
|
|
34
39
|
*/
|
|
35
40
|
export function encodeXML(input: string): string {
|
|
36
41
|
let out: string | undefined;
|
|
@@ -76,7 +81,6 @@ export function encodeXML(input: string): string {
|
|
|
76
81
|
*
|
|
77
82
|
* Have a look at `escapeUTF8` if you want a more concise output at the expense
|
|
78
83
|
* of reduced transportability.
|
|
79
|
-
*
|
|
80
84
|
* @param data String to escape.
|
|
81
85
|
*/
|
|
82
86
|
export const escape: typeof encodeXML = encodeXML;
|
|
@@ -84,10 +88,8 @@ export const escape: typeof encodeXML = encodeXML;
|
|
|
84
88
|
/**
|
|
85
89
|
* Creates a function that escapes all characters matched by the given regular
|
|
86
90
|
* expression using the given map of characters to escape to their entities.
|
|
87
|
-
*
|
|
88
91
|
* @param regex Regular expression to match characters to escape.
|
|
89
92
|
* @param map Map of characters to escape to their entities.
|
|
90
|
-
*
|
|
91
93
|
* @returns Function that escapes all characters matched by the given regular
|
|
92
94
|
* expression using the given map of characters to escape to their entities.
|
|
93
95
|
*/
|
|
@@ -120,7 +122,6 @@ function getEscaper(
|
|
|
120
122
|
* Encodes all characters not valid in XML documents using XML entities.
|
|
121
123
|
*
|
|
122
124
|
* Note that the output will be character-set dependent.
|
|
123
|
-
*
|
|
124
125
|
* @param data String to escape.
|
|
125
126
|
*/
|
|
126
127
|
export const escapeUTF8: (data: string) => string = /* #__PURE__ */ getEscaper(
|
|
@@ -131,7 +132,6 @@ export const escapeUTF8: (data: string) => string = /* #__PURE__ */ getEscaper(
|
|
|
131
132
|
/**
|
|
132
133
|
* Encodes all characters that have to be escaped in HTML attributes,
|
|
133
134
|
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
|
|
134
|
-
*
|
|
135
135
|
* @param data String to escape.
|
|
136
136
|
*/
|
|
137
137
|
export const escapeAttribute: (data: string) => string =
|
|
@@ -147,7 +147,6 @@ export const escapeAttribute: (data: string) => string =
|
|
|
147
147
|
/**
|
|
148
148
|
* Encodes all characters that have to be escaped in HTML text,
|
|
149
149
|
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
|
|
150
|
-
*
|
|
151
150
|
* @param data String to escape.
|
|
152
151
|
*/
|
|
153
152
|
export const escapeText: (data: string) => string = /* #__PURE__ */ getEscaper(
|