entities 4.5.0 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/decode.js +3 -0
- package/{lib/decode_codepoint.d.ts → dist/commonjs/decode-codepoint.d.ts} +2 -2
- package/dist/commonjs/decode-codepoint.d.ts.map +1 -0
- package/{lib/decode_codepoint.js → dist/commonjs/decode-codepoint.js} +14 -13
- package/dist/commonjs/decode-codepoint.js.map +1 -0
- package/{lib → dist/commonjs}/decode.d.ts +18 -20
- package/dist/commonjs/decode.d.ts.map +1 -0
- package/{lib → dist/commonjs}/decode.js +122 -147
- package/dist/commonjs/decode.js.map +1 -0
- package/{lib/esm → dist/commonjs}/encode.d.ts +2 -2
- package/dist/commonjs/encode.d.ts.map +1 -0
- package/{lib → dist/commonjs}/encode.js +31 -35
- package/dist/commonjs/encode.js.map +1 -0
- package/{lib/esm → dist/commonjs}/escape.d.ts +2 -2
- package/dist/commonjs/escape.d.ts.map +1 -0
- package/{lib → dist/commonjs}/escape.js +37 -38
- package/dist/commonjs/escape.js.map +1 -0
- package/dist/commonjs/generated/decode-data-html.d.ts +2 -0
- package/dist/commonjs/generated/decode-data-html.d.ts.map +1 -0
- package/dist/commonjs/generated/decode-data-html.js +10 -0
- package/dist/commonjs/generated/decode-data-html.js.map +1 -0
- package/dist/commonjs/generated/decode-data-xml.d.ts +2 -0
- package/dist/commonjs/generated/decode-data-xml.d.ts.map +1 -0
- package/dist/commonjs/generated/decode-data-xml.js +10 -0
- package/dist/commonjs/generated/decode-data-xml.js.map +1 -0
- package/dist/commonjs/generated/encode-html.d.ts +8 -0
- package/dist/commonjs/generated/encode-html.d.ts.map +1 -0
- package/dist/commonjs/generated/encode-html.js +13 -0
- package/dist/commonjs/generated/encode-html.js.map +1 -0
- package/{lib → dist/commonjs}/index.d.ts +6 -6
- package/dist/commonjs/index.d.ts.map +1 -0
- package/{lib → dist/commonjs}/index.js +44 -39
- package/dist/commonjs/index.js.map +1 -0
- package/dist/commonjs/package.json +3 -0
- package/{lib/esm/decode_codepoint.d.ts → dist/esm/decode-codepoint.d.ts} +2 -2
- package/dist/esm/decode-codepoint.d.ts.map +1 -0
- package/{lib/esm/decode_codepoint.js → dist/esm/decode-codepoint.js} +10 -9
- package/dist/esm/decode-codepoint.js.map +1 -0
- package/{lib → dist}/esm/decode.d.ts +18 -20
- package/dist/esm/decode.d.ts.map +1 -0
- package/{lib → dist}/esm/decode.js +76 -75
- package/dist/esm/decode.js.map +1 -0
- package/{lib → dist/esm}/encode.d.ts +2 -2
- package/dist/esm/encode.d.ts.map +1 -0
- package/{lib → dist}/esm/encode.js +25 -25
- package/dist/esm/encode.js.map +1 -0
- package/{lib → dist/esm}/escape.d.ts +2 -2
- package/dist/esm/escape.d.ts.map +1 -0
- package/{lib → dist}/esm/escape.js +31 -30
- package/dist/esm/escape.js.map +1 -0
- package/dist/esm/generated/decode-data-html.d.ts +2 -0
- package/dist/esm/generated/decode-data-html.d.ts.map +1 -0
- package/dist/esm/generated/decode-data-html.js +7 -0
- package/dist/esm/generated/decode-data-html.js.map +1 -0
- package/dist/esm/generated/decode-data-xml.d.ts +2 -0
- package/dist/esm/generated/decode-data-xml.d.ts.map +1 -0
- package/dist/esm/generated/decode-data-xml.js +7 -0
- package/dist/esm/generated/decode-data-xml.js.map +1 -0
- package/dist/esm/generated/encode-html.d.ts +8 -0
- package/dist/esm/generated/encode-html.d.ts.map +1 -0
- package/dist/esm/generated/encode-html.js +10 -0
- package/dist/esm/generated/encode-html.js.map +1 -0
- package/{lib → dist}/esm/index.d.ts +6 -6
- package/dist/esm/index.d.ts.map +1 -0
- package/{lib → dist}/esm/index.js +33 -25
- package/dist/esm/index.js.map +1 -0
- package/dist/esm/package.json +3 -0
- package/escape.js +3 -0
- package/package.json +92 -66
- package/readme.md +19 -19
- package/src/decode-codepoint.ts +81 -0
- package/src/decode.spec.ts +320 -0
- package/src/decode.ts +620 -0
- package/src/encode.spec.ts +78 -0
- package/src/encode.ts +77 -0
- package/src/escape.spec.ts +14 -0
- package/src/escape.ts +148 -0
- package/src/generated/.eslintrc.json +10 -0
- package/src/generated/decode-data-html.ts +8 -0
- package/src/generated/decode-data-xml.ts +8 -0
- package/src/generated/encode-html.ts +17 -0
- package/src/index.spec.ts +125 -0
- package/src/index.ts +188 -0
- package/lib/decode.d.ts.map +0 -1
- package/lib/decode.js.map +0 -1
- package/lib/decode_codepoint.d.ts.map +0 -1
- package/lib/decode_codepoint.js.map +0 -1
- package/lib/encode.d.ts.map +0 -1
- package/lib/encode.js.map +0 -1
- package/lib/escape.d.ts.map +0 -1
- package/lib/escape.js.map +0 -1
- package/lib/esm/decode.d.ts.map +0 -1
- package/lib/esm/decode.js.map +0 -1
- package/lib/esm/decode_codepoint.d.ts.map +0 -1
- package/lib/esm/decode_codepoint.js.map +0 -1
- package/lib/esm/encode.d.ts.map +0 -1
- package/lib/esm/encode.js.map +0 -1
- package/lib/esm/escape.d.ts.map +0 -1
- package/lib/esm/escape.js.map +0 -1
- package/lib/esm/generated/decode-data-html.d.ts +0 -3
- package/lib/esm/generated/decode-data-html.d.ts.map +0 -1
- package/lib/esm/generated/decode-data-html.js +0 -7
- package/lib/esm/generated/decode-data-html.js.map +0 -1
- package/lib/esm/generated/decode-data-xml.d.ts +0 -3
- package/lib/esm/generated/decode-data-xml.d.ts.map +0 -1
- package/lib/esm/generated/decode-data-xml.js +0 -7
- package/lib/esm/generated/decode-data-xml.js.map +0 -1
- package/lib/esm/generated/encode-html.d.ts +0 -8
- package/lib/esm/generated/encode-html.d.ts.map +0 -1
- package/lib/esm/generated/encode-html.js +0 -10
- package/lib/esm/generated/encode-html.js.map +0 -1
- package/lib/esm/index.d.ts.map +0 -1
- package/lib/esm/index.js.map +0 -1
- package/lib/esm/package.json +0 -1
- package/lib/generated/decode-data-html.d.ts +0 -3
- package/lib/generated/decode-data-html.d.ts.map +0 -1
- package/lib/generated/decode-data-html.js +0 -9
- package/lib/generated/decode-data-html.js.map +0 -1
- package/lib/generated/decode-data-xml.d.ts +0 -3
- package/lib/generated/decode-data-xml.d.ts.map +0 -1
- package/lib/generated/decode-data-xml.js +0 -9
- package/lib/generated/decode-data-xml.js.map +0 -1
- package/lib/generated/encode-html.d.ts +0 -8
- package/lib/generated/encode-html.d.ts.map +0 -1
- package/lib/generated/encode-html.js +0 -12
- package/lib/generated/encode-html.js.map +0 -1
- package/lib/index.d.ts.map +0 -1
- package/lib/index.js.map +0 -1
package/package.json
CHANGED
|
@@ -1,90 +1,116 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "entities",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "6.0.0",
|
|
4
4
|
"description": "Encode & decode XML and HTML entities with ease & speed",
|
|
5
|
-
"author": "Felix Boehm <me@feedic.com>",
|
|
6
|
-
"funding": "https://github.com/fb55/entities?sponsor=1",
|
|
7
|
-
"sideEffects": false,
|
|
8
5
|
"keywords": [
|
|
9
|
-
"
|
|
10
|
-
"
|
|
11
|
-
"encoding",
|
|
12
|
-
"html",
|
|
13
|
-
"
|
|
14
|
-
"
|
|
6
|
+
"html entities",
|
|
7
|
+
"entity decoder",
|
|
8
|
+
"entity encoding",
|
|
9
|
+
"html decoding",
|
|
10
|
+
"html encoding",
|
|
11
|
+
"xml decoding",
|
|
12
|
+
"xml encoding"
|
|
15
13
|
],
|
|
16
|
-
"
|
|
17
|
-
"
|
|
14
|
+
"repository": {
|
|
15
|
+
"type": "git",
|
|
16
|
+
"url": "git://github.com/fb55/entities.git"
|
|
18
17
|
},
|
|
19
|
-
"
|
|
20
|
-
"
|
|
21
|
-
"
|
|
18
|
+
"funding": "https://github.com/fb55/entities?sponsor=1",
|
|
19
|
+
"license": "BSD-2-Clause",
|
|
20
|
+
"author": "Felix Boehm <me@feedic.com>",
|
|
21
|
+
"sideEffects": false,
|
|
22
|
+
"type": "module",
|
|
22
23
|
"exports": {
|
|
23
24
|
".": {
|
|
24
|
-
"
|
|
25
|
-
|
|
25
|
+
"import": {
|
|
26
|
+
"types": "./dist/esm/index.d.ts",
|
|
27
|
+
"default": "./dist/esm/index.js"
|
|
28
|
+
},
|
|
29
|
+
"require": {
|
|
30
|
+
"types": "./dist/commonjs/index.d.ts",
|
|
31
|
+
"default": "./dist/commonjs/index.js"
|
|
32
|
+
}
|
|
26
33
|
},
|
|
27
|
-
"./
|
|
28
|
-
"
|
|
29
|
-
|
|
34
|
+
"./decode": {
|
|
35
|
+
"import": {
|
|
36
|
+
"types": "./dist/esm/decode.d.ts",
|
|
37
|
+
"default": "./dist/esm/decode.js"
|
|
38
|
+
},
|
|
39
|
+
"require": {
|
|
40
|
+
"types": "./dist/commonjs/decode.d.ts",
|
|
41
|
+
"default": "./dist/commonjs/decode.js"
|
|
42
|
+
}
|
|
30
43
|
},
|
|
31
|
-
"./
|
|
32
|
-
"
|
|
33
|
-
|
|
44
|
+
"./escape": {
|
|
45
|
+
"import": {
|
|
46
|
+
"types": "./dist/esm/escape.d.ts",
|
|
47
|
+
"default": "./dist/esm/escape.js"
|
|
48
|
+
},
|
|
49
|
+
"require": {
|
|
50
|
+
"types": "./dist/commonjs/escape.d.ts",
|
|
51
|
+
"default": "./dist/commonjs/escape.js"
|
|
52
|
+
}
|
|
34
53
|
}
|
|
35
54
|
},
|
|
55
|
+
"main": "./dist/commonjs/index.js",
|
|
56
|
+
"module": "./dist/esm/index.js",
|
|
57
|
+
"types": "./dist/commonjs/index.d.ts",
|
|
36
58
|
"files": [
|
|
37
|
-
"
|
|
59
|
+
"decode.js",
|
|
60
|
+
"escape.js",
|
|
61
|
+
"dist",
|
|
62
|
+
"src"
|
|
38
63
|
],
|
|
39
|
-
"engines": {
|
|
40
|
-
"node": ">=0.12"
|
|
41
|
-
},
|
|
42
|
-
"devDependencies": {
|
|
43
|
-
"@types/jest": "^28.1.8",
|
|
44
|
-
"@types/node": "^18.15.11",
|
|
45
|
-
"@typescript-eslint/eslint-plugin": "^5.58.0",
|
|
46
|
-
"@typescript-eslint/parser": "^5.58.0",
|
|
47
|
-
"eslint": "^8.38.0",
|
|
48
|
-
"eslint-config-prettier": "^8.8.0",
|
|
49
|
-
"eslint-plugin-node": "^11.1.0",
|
|
50
|
-
"jest": "^28.1.3",
|
|
51
|
-
"prettier": "^2.8.7",
|
|
52
|
-
"ts-jest": "^28.0.8",
|
|
53
|
-
"typedoc": "^0.24.1",
|
|
54
|
-
"typescript": "^5.0.4"
|
|
55
|
-
},
|
|
56
64
|
"scripts": {
|
|
57
|
-
"
|
|
58
|
-
"
|
|
59
|
-
"
|
|
60
|
-
"lint:es": "eslint .",
|
|
61
|
-
"lint:prettier": "npm run prettier -- --check",
|
|
65
|
+
"build:docs": "typedoc --hideGenerator src/index.ts",
|
|
66
|
+
"build:encode-trie": "node --import=tsx scripts/write-encode-map.ts",
|
|
67
|
+
"build:trie": "node --import=tsx scripts/write-decode-map.ts",
|
|
62
68
|
"format": "npm run format:es && npm run format:prettier",
|
|
63
69
|
"format:es": "npm run lint:es -- --fix",
|
|
64
70
|
"format:prettier": "npm run prettier -- --write",
|
|
71
|
+
"lint": "npm run lint:es && npm run lint:ts && npm run lint:prettier",
|
|
72
|
+
"lint:es": "eslint . --ignore-path .gitignore",
|
|
73
|
+
"lint:prettier": "npm run prettier -- --check",
|
|
74
|
+
"lint:ts": "tsc --noEmit",
|
|
75
|
+
"prepublishOnly": "tshy",
|
|
65
76
|
"prettier": "prettier '**/*.{ts,md,json,yml}'",
|
|
66
|
-
"
|
|
67
|
-
"
|
|
68
|
-
"build:esm": "npm run build:cjs -- --module esnext --target es2019 --outDir lib/esm && echo '{\"type\":\"module\"}' > lib/esm/package.json",
|
|
69
|
-
"build:docs": "typedoc --hideGenerator src/index.ts",
|
|
70
|
-
"build:trie": "ts-node scripts/write-decode-map.ts",
|
|
71
|
-
"build:encode-trie": "ts-node scripts/write-encode-map.ts",
|
|
72
|
-
"prepare": "npm run build"
|
|
77
|
+
"test": "npm run test:vi && npm run lint",
|
|
78
|
+
"test:vi": "vitest run"
|
|
73
79
|
},
|
|
74
|
-
"
|
|
75
|
-
"
|
|
76
|
-
"
|
|
80
|
+
"prettier": {
|
|
81
|
+
"proseWrap": "always",
|
|
82
|
+
"tabWidth": 4
|
|
77
83
|
},
|
|
78
|
-
"
|
|
79
|
-
|
|
80
|
-
"
|
|
81
|
-
"
|
|
82
|
-
"
|
|
83
|
-
|
|
84
|
-
|
|
84
|
+
"devDependencies": {
|
|
85
|
+
"@types/node": "^22.10.2",
|
|
86
|
+
"@typescript-eslint/eslint-plugin": "^8.18.1",
|
|
87
|
+
"@typescript-eslint/parser": "^8.18.0",
|
|
88
|
+
"@vitest/coverage-v8": "^2.1.8",
|
|
89
|
+
"eslint": "^8.57.1",
|
|
90
|
+
"eslint-config-prettier": "^9.1.0",
|
|
91
|
+
"eslint-plugin-n": "^17.15.1",
|
|
92
|
+
"eslint-plugin-unicorn": "^56.0.1",
|
|
93
|
+
"prettier": "^3.4.2",
|
|
94
|
+
"tshy": "^3.0.2",
|
|
95
|
+
"tsx": "^4.19.2",
|
|
96
|
+
"typedoc": "^0.27.5",
|
|
97
|
+
"typescript": "^5.7.2",
|
|
98
|
+
"vitest": "^2.0.2"
|
|
85
99
|
},
|
|
86
|
-
"
|
|
87
|
-
"
|
|
88
|
-
|
|
100
|
+
"engines": {
|
|
101
|
+
"node": ">=0.12"
|
|
102
|
+
},
|
|
103
|
+
"tshy": {
|
|
104
|
+
"exclude": [
|
|
105
|
+
"**/*.spec.ts",
|
|
106
|
+
"**/__fixtures__/*",
|
|
107
|
+
"**/__tests__/*",
|
|
108
|
+
"**/__snapshots__/*"
|
|
109
|
+
],
|
|
110
|
+
"exports": {
|
|
111
|
+
".": "./src/index.ts",
|
|
112
|
+
"./decode": "./src/decode.ts",
|
|
113
|
+
"./escape": "./src/escape.ts"
|
|
114
|
+
}
|
|
89
115
|
}
|
|
90
116
|
}
|
package/readme.md
CHANGED
|
@@ -4,16 +4,16 @@ Encode & decode HTML & XML entities with ease & speed.
|
|
|
4
4
|
|
|
5
5
|
## Features
|
|
6
6
|
|
|
7
|
-
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
-
|
|
13
|
-
|
|
14
|
-
-
|
|
15
|
-
|
|
16
|
-
|
|
7
|
+
- 😇 Tried and true: `entities` is used by many popular libraries; eg.
|
|
8
|
+
[`htmlparser2`](https://github.com/fb55/htmlparser2), the official
|
|
9
|
+
[AWS SDK](https://github.com/aws/aws-sdk-js-v3) and
|
|
10
|
+
[`commonmark`](https://github.com/commonmark/commonmark.js) use it to process
|
|
11
|
+
HTML entities.
|
|
12
|
+
- ⚡️ Fast: `entities` is the fastest library for decoding HTML entities (as of
|
|
13
|
+
April 2022); see [performance](#performance).
|
|
14
|
+
- 🎛 Configurable: Get an output tailored for your needs. You are fine with
|
|
15
|
+
UTF8? That'll save you some bytes. Prefer to only have ASCII characters? We
|
|
16
|
+
can do that as well!
|
|
17
17
|
|
|
18
18
|
## How to…
|
|
19
19
|
|
|
@@ -90,15 +90,15 @@ Instead, use the functions that you need directly.
|
|
|
90
90
|
This library wouldn't be possible without the work of these individuals. Thanks
|
|
91
91
|
to
|
|
92
92
|
|
|
93
|
-
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
-
|
|
97
|
-
|
|
98
|
-
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
93
|
+
- [@mathiasbynens](https://github.com/mathiasbynens) for his explanations about
|
|
94
|
+
character encodings, and his library `he`, which was one of the inspirations
|
|
95
|
+
for `entities`
|
|
96
|
+
- [@inikulin](https://github.com/inikulin) for his work on optimized tries for
|
|
97
|
+
decoding HTML entities for the `parse5` project
|
|
98
|
+
- [@mdevils](https://github.com/mdevils) for taking on the challenge of
|
|
99
|
+
producing a quick entity library with his `html-entities` library. `entities`
|
|
100
|
+
would be quite a bit slower if there wasn't any competition. Right now
|
|
101
|
+
`entities` is on top, but we'll see how long that lasts!
|
|
102
102
|
|
|
103
103
|
---
|
|
104
104
|
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
// Adapted from https://github.com/mathiasbynens/he/blob/36afe179392226cf1b6ccdb16ebbb7a5a844d93a/src/he.js#L106-L134
|
|
2
|
+
|
|
3
|
+
const decodeMap = new Map([
|
|
4
|
+
[0, 65_533],
|
|
5
|
+
// C1 Unicode control character reference replacements
|
|
6
|
+
[128, 8364],
|
|
7
|
+
[130, 8218],
|
|
8
|
+
[131, 402],
|
|
9
|
+
[132, 8222],
|
|
10
|
+
[133, 8230],
|
|
11
|
+
[134, 8224],
|
|
12
|
+
[135, 8225],
|
|
13
|
+
[136, 710],
|
|
14
|
+
[137, 8240],
|
|
15
|
+
[138, 352],
|
|
16
|
+
[139, 8249],
|
|
17
|
+
[140, 338],
|
|
18
|
+
[142, 381],
|
|
19
|
+
[145, 8216],
|
|
20
|
+
[146, 8217],
|
|
21
|
+
[147, 8220],
|
|
22
|
+
[148, 8221],
|
|
23
|
+
[149, 8226],
|
|
24
|
+
[150, 8211],
|
|
25
|
+
[151, 8212],
|
|
26
|
+
[152, 732],
|
|
27
|
+
[153, 8482],
|
|
28
|
+
[154, 353],
|
|
29
|
+
[155, 8250],
|
|
30
|
+
[156, 339],
|
|
31
|
+
[158, 382],
|
|
32
|
+
[159, 376],
|
|
33
|
+
]);
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Polyfill for `String.fromCodePoint`. It is used to create a string from a Unicode code point.
|
|
37
|
+
*/
|
|
38
|
+
export const fromCodePoint: (...codePoints: number[]) => string =
|
|
39
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, n/no-unsupported-features/es-builtins
|
|
40
|
+
String.fromCodePoint ??
|
|
41
|
+
function (codePoint: number): string {
|
|
42
|
+
let output = "";
|
|
43
|
+
|
|
44
|
+
if (codePoint > 0xff_ff) {
|
|
45
|
+
codePoint -= 0x1_00_00;
|
|
46
|
+
output += String.fromCharCode(
|
|
47
|
+
((codePoint >>> 10) & 0x3_ff) | 0xd8_00,
|
|
48
|
+
);
|
|
49
|
+
codePoint = 0xdc_00 | (codePoint & 0x3_ff);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
output += String.fromCharCode(codePoint);
|
|
53
|
+
return output;
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Replace the given code point with a replacement character if it is a
|
|
58
|
+
* surrogate or is outside the valid range. Otherwise return the code
|
|
59
|
+
* point unchanged.
|
|
60
|
+
*/
|
|
61
|
+
export function replaceCodePoint(codePoint: number): number {
|
|
62
|
+
if (
|
|
63
|
+
(codePoint >= 0xd8_00 && codePoint <= 0xdf_ff) ||
|
|
64
|
+
codePoint > 0x10_ff_ff
|
|
65
|
+
) {
|
|
66
|
+
return 0xff_fd;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return decodeMap.get(codePoint) ?? codePoint;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Replace the code point if relevant, then convert it to a string.
|
|
74
|
+
*
|
|
75
|
+
* @deprecated Use `fromCodePoint(replaceCodePoint(codePoint))` instead.
|
|
76
|
+
* @param codePoint The code point to decode.
|
|
77
|
+
* @returns The decoded code point.
|
|
78
|
+
*/
|
|
79
|
+
export function decodeCodePoint(codePoint: number): string {
|
|
80
|
+
return fromCodePoint(replaceCodePoint(codePoint));
|
|
81
|
+
}
|
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
import { describe, it, expect, vitest } from "vitest";
|
|
2
|
+
import * as entities from "./decode.js";
|
|
3
|
+
|
|
4
|
+
describe("Decode test", () => {
|
|
5
|
+
const testcases = [
|
|
6
|
+
{ input: "&amp;", output: "&" },
|
|
7
|
+
{ input: "&#38;", output: "&" },
|
|
8
|
+
{ input: "&#x26;", output: "&" },
|
|
9
|
+
{ input: "&#X26;", output: "&" },
|
|
10
|
+
{ input: "&#38;", output: "&" },
|
|
11
|
+
{ input: "&#38;", output: "&" },
|
|
12
|
+
{ input: "&#38;", output: "&" },
|
|
13
|
+
{ input: ":", output: ":" },
|
|
14
|
+
{ input: ":", output: ":" },
|
|
15
|
+
{ input: ":", output: ":" },
|
|
16
|
+
{ input: ":", output: ":" },
|
|
17
|
+
{ input: "&#", output: "&#" },
|
|
18
|
+
{ input: "&>", output: "&>" },
|
|
19
|
+
{ input: "id=770&#anchor", output: "id=770&#anchor" },
|
|
20
|
+
];
|
|
21
|
+
|
|
22
|
+
for (const { input, output } of testcases) {
|
|
23
|
+
it(`should XML decode ${input}`, () =>
|
|
24
|
+
expect(entities.decodeXML(input)).toBe(output));
|
|
25
|
+
it(`should HTML decode ${input}`, () =>
|
|
26
|
+
expect(entities.decodeHTML(input)).toBe(output));
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
it("should HTML decode partial legacy entity", () => {
|
|
30
|
+
expect(entities.decodeHTMLStrict("×bar")).toBe("×bar");
|
|
31
|
+
expect(entities.decodeHTML("×bar")).toBe("×bar");
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it("should HTML decode legacy entities according to spec", () =>
|
|
35
|
+
expect(entities.decodeHTML("?&image_uri=1&ℑ=2&image=3")).toBe(
|
|
36
|
+
"?&image_uri=1&ℑ=2&image=3",
|
|
37
|
+
));
|
|
38
|
+
|
|
39
|
+
it("should back out of legacy entities", () =>
|
|
40
|
+
expect(entities.decodeHTML("&a")).toBe("&a"));
|
|
41
|
+
|
|
42
|
+
it("should not parse numeric entities in strict mode", () =>
|
|
43
|
+
expect(entities.decodeHTMLStrict("7")).toBe("7"));
|
|
44
|
+
|
|
45
|
+
it("should parse   followed by < (#852)", () =>
|
|
46
|
+
expect(entities.decodeHTML(" <")).toBe("\u00A0<"));
|
|
47
|
+
|
|
48
|
+
it("should decode trailing legacy entities", () => {
|
|
49
|
+
expect(entities.decodeHTML("⨱×bar")).toBe("⨱×bar");
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it("should decode multi-byte entities", () => {
|
|
53
|
+
expect(entities.decodeHTML("≧̸")).toBe("≧̸");
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
it("should not decode legacy entities followed by text in attribute mode", () => {
|
|
57
|
+
expect(
|
|
58
|
+
entities.decodeHTML("¬", entities.DecodingMode.Attribute),
|
|
59
|
+
).toBe("¬");
|
|
60
|
+
|
|
61
|
+
expect(
|
|
62
|
+
entities.decodeHTML("¬i", entities.DecodingMode.Attribute),
|
|
63
|
+
).toBe("¬i");
|
|
64
|
+
|
|
65
|
+
expect(
|
|
66
|
+
entities.decodeHTML("¬=", entities.DecodingMode.Attribute),
|
|
67
|
+
).toBe("¬=");
|
|
68
|
+
|
|
69
|
+
expect(entities.decodeHTMLAttribute("¬p")).toBe("¬p");
|
|
70
|
+
expect(entities.decodeHTMLAttribute("¬P")).toBe("¬P");
|
|
71
|
+
expect(entities.decodeHTMLAttribute("¬3")).toBe("¬3");
|
|
72
|
+
});
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
describe("EntityDecoder", () => {
|
|
76
|
+
it("should decode decimal entities", () => {
|
|
77
|
+
const callback = vitest.fn();
|
|
78
|
+
const decoder = new entities.EntityDecoder(
|
|
79
|
+
entities.htmlDecodeTree,
|
|
80
|
+
callback,
|
|
81
|
+
);
|
|
82
|
+
|
|
83
|
+
expect(decoder.write("", 1)).toBe(-1);
|
|
84
|
+
expect(decoder.write("8;", 0)).toBe(5);
|
|
85
|
+
|
|
86
|
+
expect(callback).toHaveBeenCalledTimes(1);
|
|
87
|
+
expect(callback).toHaveBeenCalledWith(":".charCodeAt(0), 5);
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
it("should decode hex entities", () => {
|
|
91
|
+
const callback = vitest.fn();
|
|
92
|
+
const decoder = new entities.EntityDecoder(
|
|
93
|
+
entities.htmlDecodeTree,
|
|
94
|
+
callback,
|
|
95
|
+
);
|
|
96
|
+
|
|
97
|
+
expect(decoder.write(":", 1)).toBe(6);
|
|
98
|
+
|
|
99
|
+
expect(callback).toHaveBeenCalledTimes(1);
|
|
100
|
+
expect(callback).toHaveBeenCalledWith(":".charCodeAt(0), 6);
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
it("should decode named entities", () => {
|
|
104
|
+
const callback = vitest.fn();
|
|
105
|
+
const decoder = new entities.EntityDecoder(
|
|
106
|
+
entities.htmlDecodeTree,
|
|
107
|
+
callback,
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
expect(decoder.write("&", 1)).toBe(5);
|
|
111
|
+
|
|
112
|
+
expect(callback).toHaveBeenCalledTimes(1);
|
|
113
|
+
expect(callback).toHaveBeenCalledWith("&".charCodeAt(0), 5);
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it("should decode legacy entities", () => {
|
|
117
|
+
const callback = vitest.fn();
|
|
118
|
+
const decoder = new entities.EntityDecoder(
|
|
119
|
+
entities.htmlDecodeTree,
|
|
120
|
+
callback,
|
|
121
|
+
);
|
|
122
|
+
decoder.startEntity(entities.DecodingMode.Legacy);
|
|
123
|
+
|
|
124
|
+
expect(decoder.write("&", 1)).toBe(-1);
|
|
125
|
+
|
|
126
|
+
expect(callback).toHaveBeenCalledTimes(0);
|
|
127
|
+
|
|
128
|
+
expect(decoder.end()).toBe(4);
|
|
129
|
+
|
|
130
|
+
expect(callback).toHaveBeenCalledTimes(1);
|
|
131
|
+
expect(callback).toHaveBeenCalledWith("&".charCodeAt(0), 4);
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
it("should decode named entity written character by character", () => {
|
|
135
|
+
const callback = vitest.fn();
|
|
136
|
+
const decoder = new entities.EntityDecoder(
|
|
137
|
+
entities.htmlDecodeTree,
|
|
138
|
+
callback,
|
|
139
|
+
);
|
|
140
|
+
|
|
141
|
+
for (const c of "amp") {
|
|
142
|
+
expect(decoder.write(c, 0)).toBe(-1);
|
|
143
|
+
}
|
|
144
|
+
expect(decoder.write(";", 0)).toBe(5);
|
|
145
|
+
|
|
146
|
+
expect(callback).toHaveBeenCalledTimes(1);
|
|
147
|
+
expect(callback).toHaveBeenCalledWith("&".charCodeAt(0), 5);
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
it("should decode numeric entity written character by character", () => {
|
|
151
|
+
const callback = vitest.fn();
|
|
152
|
+
const decoder = new entities.EntityDecoder(
|
|
153
|
+
entities.htmlDecodeTree,
|
|
154
|
+
callback,
|
|
155
|
+
);
|
|
156
|
+
|
|
157
|
+
for (const c of "#x3a") {
|
|
158
|
+
expect(decoder.write(c, 0)).toBe(-1);
|
|
159
|
+
}
|
|
160
|
+
expect(decoder.write(";", 0)).toBe(6);
|
|
161
|
+
|
|
162
|
+
expect(callback).toHaveBeenCalledTimes(1);
|
|
163
|
+
expect(callback).toHaveBeenCalledWith(":".charCodeAt(0), 6);
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
it("should decode hex entities across several chunks", () => {
|
|
167
|
+
const callback = vitest.fn();
|
|
168
|
+
const decoder = new entities.EntityDecoder(
|
|
169
|
+
entities.htmlDecodeTree,
|
|
170
|
+
callback,
|
|
171
|
+
);
|
|
172
|
+
|
|
173
|
+
for (const chunk of ["#x", "cf", "ff", "d"]) {
|
|
174
|
+
expect(decoder.write(chunk, 0)).toBe(-1);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
expect(decoder.write(";", 0)).toBe(9);
|
|
178
|
+
expect(callback).toHaveBeenCalledTimes(1);
|
|
179
|
+
expect(callback).toHaveBeenCalledWith(0xc_ff_fd, 9);
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
it("should not fail if nothing is written", () => {
|
|
183
|
+
const callback = vitest.fn();
|
|
184
|
+
const decoder = new entities.EntityDecoder(
|
|
185
|
+
entities.htmlDecodeTree,
|
|
186
|
+
callback,
|
|
187
|
+
);
|
|
188
|
+
|
|
189
|
+
expect(decoder.end()).toBe(0);
|
|
190
|
+
expect(callback).toHaveBeenCalledTimes(0);
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
describe("errors", () => {
|
|
194
|
+
it("should produce an error for a named entity without a semicolon", () => {
|
|
195
|
+
const errorHandlers = {
|
|
196
|
+
missingSemicolonAfterCharacterReference: vitest.fn(),
|
|
197
|
+
absenceOfDigitsInNumericCharacterReference: vitest.fn(),
|
|
198
|
+
validateNumericCharacterReference: vitest.fn(),
|
|
199
|
+
};
|
|
200
|
+
const callback = vitest.fn();
|
|
201
|
+
const decoder = new entities.EntityDecoder(
|
|
202
|
+
entities.htmlDecodeTree,
|
|
203
|
+
callback,
|
|
204
|
+
errorHandlers,
|
|
205
|
+
);
|
|
206
|
+
|
|
207
|
+
decoder.startEntity(entities.DecodingMode.Legacy);
|
|
208
|
+
expect(decoder.write("&", 1)).toBe(5);
|
|
209
|
+
expect(callback).toHaveBeenCalledTimes(1);
|
|
210
|
+
expect(callback).toHaveBeenCalledWith("&".charCodeAt(0), 5);
|
|
211
|
+
expect(
|
|
212
|
+
errorHandlers.missingSemicolonAfterCharacterReference,
|
|
213
|
+
).toHaveBeenCalledTimes(0);
|
|
214
|
+
|
|
215
|
+
decoder.startEntity(entities.DecodingMode.Legacy);
|
|
216
|
+
expect(decoder.write("&", 1)).toBe(-1);
|
|
217
|
+
expect(decoder.end()).toBe(4);
|
|
218
|
+
|
|
219
|
+
expect(callback).toHaveBeenCalledTimes(2);
|
|
220
|
+
expect(callback).toHaveBeenLastCalledWith("&".charCodeAt(0), 4);
|
|
221
|
+
expect(
|
|
222
|
+
errorHandlers.missingSemicolonAfterCharacterReference,
|
|
223
|
+
).toHaveBeenCalledTimes(1);
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
it("should produce an error for a numeric entity without a semicolon", () => {
|
|
227
|
+
const errorHandlers = {
|
|
228
|
+
missingSemicolonAfterCharacterReference: vitest.fn(),
|
|
229
|
+
absenceOfDigitsInNumericCharacterReference: vitest.fn(),
|
|
230
|
+
validateNumericCharacterReference: vitest.fn(),
|
|
231
|
+
};
|
|
232
|
+
const callback = vitest.fn();
|
|
233
|
+
const decoder = new entities.EntityDecoder(
|
|
234
|
+
entities.htmlDecodeTree,
|
|
235
|
+
callback,
|
|
236
|
+
errorHandlers,
|
|
237
|
+
);
|
|
238
|
+
|
|
239
|
+
decoder.startEntity(entities.DecodingMode.Legacy);
|
|
240
|
+
expect(decoder.write(":", 1)).toBe(-1);
|
|
241
|
+
expect(decoder.end()).toBe(5);
|
|
242
|
+
|
|
243
|
+
expect(callback).toHaveBeenCalledTimes(1);
|
|
244
|
+
expect(callback).toHaveBeenCalledWith(0x3a, 5);
|
|
245
|
+
expect(
|
|
246
|
+
errorHandlers.missingSemicolonAfterCharacterReference,
|
|
247
|
+
).toHaveBeenCalledTimes(1);
|
|
248
|
+
expect(
|
|
249
|
+
errorHandlers.absenceOfDigitsInNumericCharacterReference,
|
|
250
|
+
).toHaveBeenCalledTimes(0);
|
|
251
|
+
expect(
|
|
252
|
+
errorHandlers.validateNumericCharacterReference,
|
|
253
|
+
).toHaveBeenCalledTimes(1);
|
|
254
|
+
expect(
|
|
255
|
+
errorHandlers.validateNumericCharacterReference,
|
|
256
|
+
).toHaveBeenCalledWith(0x3a);
|
|
257
|
+
});
|
|
258
|
+
|
|
259
|
+
it("should produce an error for numeric entities without digits", () => {
|
|
260
|
+
const errorHandlers = {
|
|
261
|
+
missingSemicolonAfterCharacterReference: vitest.fn(),
|
|
262
|
+
absenceOfDigitsInNumericCharacterReference: vitest.fn(),
|
|
263
|
+
validateNumericCharacterReference: vitest.fn(),
|
|
264
|
+
};
|
|
265
|
+
const callback = vitest.fn();
|
|
266
|
+
const decoder = new entities.EntityDecoder(
|
|
267
|
+
entities.htmlDecodeTree,
|
|
268
|
+
callback,
|
|
269
|
+
errorHandlers,
|
|
270
|
+
);
|
|
271
|
+
|
|
272
|
+
decoder.startEntity(entities.DecodingMode.Legacy);
|
|
273
|
+
expect(decoder.write("&#", 1)).toBe(-1);
|
|
274
|
+
expect(decoder.end()).toBe(0);
|
|
275
|
+
|
|
276
|
+
expect(callback).toHaveBeenCalledTimes(0);
|
|
277
|
+
expect(
|
|
278
|
+
errorHandlers.missingSemicolonAfterCharacterReference,
|
|
279
|
+
).toHaveBeenCalledTimes(0);
|
|
280
|
+
expect(
|
|
281
|
+
errorHandlers.absenceOfDigitsInNumericCharacterReference,
|
|
282
|
+
).toHaveBeenCalledTimes(1);
|
|
283
|
+
expect(
|
|
284
|
+
errorHandlers.absenceOfDigitsInNumericCharacterReference,
|
|
285
|
+
).toHaveBeenCalledWith(2);
|
|
286
|
+
expect(
|
|
287
|
+
errorHandlers.validateNumericCharacterReference,
|
|
288
|
+
).toHaveBeenCalledTimes(0);
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
it("should produce an error for hex entities without digits", () => {
|
|
292
|
+
const errorHandlers = {
|
|
293
|
+
missingSemicolonAfterCharacterReference: vitest.fn(),
|
|
294
|
+
absenceOfDigitsInNumericCharacterReference: vitest.fn(),
|
|
295
|
+
validateNumericCharacterReference: vitest.fn(),
|
|
296
|
+
};
|
|
297
|
+
const callback = vitest.fn();
|
|
298
|
+
const decoder = new entities.EntityDecoder(
|
|
299
|
+
entities.htmlDecodeTree,
|
|
300
|
+
callback,
|
|
301
|
+
errorHandlers,
|
|
302
|
+
);
|
|
303
|
+
|
|
304
|
+
decoder.startEntity(entities.DecodingMode.Legacy);
|
|
305
|
+
expect(decoder.write("&#x", 1)).toBe(-1);
|
|
306
|
+
expect(decoder.end()).toBe(0);
|
|
307
|
+
|
|
308
|
+
expect(callback).toHaveBeenCalledTimes(0);
|
|
309
|
+
expect(
|
|
310
|
+
errorHandlers.missingSemicolonAfterCharacterReference,
|
|
311
|
+
).toHaveBeenCalledTimes(0);
|
|
312
|
+
expect(
|
|
313
|
+
errorHandlers.absenceOfDigitsInNumericCharacterReference,
|
|
314
|
+
).toHaveBeenCalledTimes(1);
|
|
315
|
+
expect(
|
|
316
|
+
errorHandlers.validateNumericCharacterReference,
|
|
317
|
+
).toHaveBeenCalledTimes(0);
|
|
318
|
+
});
|
|
319
|
+
});
|
|
320
|
+
});
|