entities 4.5.0 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. package/decode.js +3 -0
  2. package/{lib/decode_codepoint.d.ts → dist/commonjs/decode-codepoint.d.ts} +2 -2
  3. package/dist/commonjs/decode-codepoint.d.ts.map +1 -0
  4. package/{lib/decode_codepoint.js → dist/commonjs/decode-codepoint.js} +14 -13
  5. package/dist/commonjs/decode-codepoint.js.map +1 -0
  6. package/{lib → dist/commonjs}/decode.d.ts +18 -20
  7. package/dist/commonjs/decode.d.ts.map +1 -0
  8. package/{lib → dist/commonjs}/decode.js +122 -147
  9. package/dist/commonjs/decode.js.map +1 -0
  10. package/{lib/esm → dist/commonjs}/encode.d.ts +2 -2
  11. package/dist/commonjs/encode.d.ts.map +1 -0
  12. package/{lib → dist/commonjs}/encode.js +31 -35
  13. package/dist/commonjs/encode.js.map +1 -0
  14. package/{lib/esm → dist/commonjs}/escape.d.ts +2 -2
  15. package/dist/commonjs/escape.d.ts.map +1 -0
  16. package/{lib → dist/commonjs}/escape.js +37 -38
  17. package/dist/commonjs/escape.js.map +1 -0
  18. package/dist/commonjs/generated/decode-data-html.d.ts +2 -0
  19. package/dist/commonjs/generated/decode-data-html.d.ts.map +1 -0
  20. package/dist/commonjs/generated/decode-data-html.js +10 -0
  21. package/dist/commonjs/generated/decode-data-html.js.map +1 -0
  22. package/dist/commonjs/generated/decode-data-xml.d.ts +2 -0
  23. package/dist/commonjs/generated/decode-data-xml.d.ts.map +1 -0
  24. package/dist/commonjs/generated/decode-data-xml.js +10 -0
  25. package/dist/commonjs/generated/decode-data-xml.js.map +1 -0
  26. package/dist/commonjs/generated/encode-html.d.ts +8 -0
  27. package/dist/commonjs/generated/encode-html.d.ts.map +1 -0
  28. package/dist/commonjs/generated/encode-html.js +13 -0
  29. package/dist/commonjs/generated/encode-html.js.map +1 -0
  30. package/{lib → dist/commonjs}/index.d.ts +6 -6
  31. package/dist/commonjs/index.d.ts.map +1 -0
  32. package/{lib → dist/commonjs}/index.js +44 -39
  33. package/dist/commonjs/index.js.map +1 -0
  34. package/dist/commonjs/package.json +3 -0
  35. package/{lib/esm/decode_codepoint.d.ts → dist/esm/decode-codepoint.d.ts} +2 -2
  36. package/dist/esm/decode-codepoint.d.ts.map +1 -0
  37. package/{lib/esm/decode_codepoint.js → dist/esm/decode-codepoint.js} +10 -9
  38. package/dist/esm/decode-codepoint.js.map +1 -0
  39. package/{lib → dist}/esm/decode.d.ts +18 -20
  40. package/dist/esm/decode.d.ts.map +1 -0
  41. package/{lib → dist}/esm/decode.js +76 -75
  42. package/dist/esm/decode.js.map +1 -0
  43. package/{lib → dist/esm}/encode.d.ts +2 -2
  44. package/dist/esm/encode.d.ts.map +1 -0
  45. package/{lib → dist}/esm/encode.js +25 -25
  46. package/dist/esm/encode.js.map +1 -0
  47. package/{lib → dist/esm}/escape.d.ts +2 -2
  48. package/dist/esm/escape.d.ts.map +1 -0
  49. package/{lib → dist}/esm/escape.js +31 -30
  50. package/dist/esm/escape.js.map +1 -0
  51. package/dist/esm/generated/decode-data-html.d.ts +2 -0
  52. package/dist/esm/generated/decode-data-html.d.ts.map +1 -0
  53. package/dist/esm/generated/decode-data-html.js +7 -0
  54. package/dist/esm/generated/decode-data-html.js.map +1 -0
  55. package/dist/esm/generated/decode-data-xml.d.ts +2 -0
  56. package/dist/esm/generated/decode-data-xml.d.ts.map +1 -0
  57. package/dist/esm/generated/decode-data-xml.js +7 -0
  58. package/dist/esm/generated/decode-data-xml.js.map +1 -0
  59. package/dist/esm/generated/encode-html.d.ts +8 -0
  60. package/dist/esm/generated/encode-html.d.ts.map +1 -0
  61. package/dist/esm/generated/encode-html.js +10 -0
  62. package/dist/esm/generated/encode-html.js.map +1 -0
  63. package/{lib → dist}/esm/index.d.ts +6 -6
  64. package/dist/esm/index.d.ts.map +1 -0
  65. package/{lib → dist}/esm/index.js +33 -25
  66. package/dist/esm/index.js.map +1 -0
  67. package/dist/esm/package.json +3 -0
  68. package/escape.js +3 -0
  69. package/package.json +92 -66
  70. package/readme.md +19 -19
  71. package/src/decode-codepoint.ts +81 -0
  72. package/src/decode.spec.ts +320 -0
  73. package/src/decode.ts +620 -0
  74. package/src/encode.spec.ts +78 -0
  75. package/src/encode.ts +77 -0
  76. package/src/escape.spec.ts +14 -0
  77. package/src/escape.ts +148 -0
  78. package/src/generated/.eslintrc.json +10 -0
  79. package/src/generated/decode-data-html.ts +8 -0
  80. package/src/generated/decode-data-xml.ts +8 -0
  81. package/src/generated/encode-html.ts +17 -0
  82. package/src/index.spec.ts +125 -0
  83. package/src/index.ts +188 -0
  84. package/lib/decode.d.ts.map +0 -1
  85. package/lib/decode.js.map +0 -1
  86. package/lib/decode_codepoint.d.ts.map +0 -1
  87. package/lib/decode_codepoint.js.map +0 -1
  88. package/lib/encode.d.ts.map +0 -1
  89. package/lib/encode.js.map +0 -1
  90. package/lib/escape.d.ts.map +0 -1
  91. package/lib/escape.js.map +0 -1
  92. package/lib/esm/decode.d.ts.map +0 -1
  93. package/lib/esm/decode.js.map +0 -1
  94. package/lib/esm/decode_codepoint.d.ts.map +0 -1
  95. package/lib/esm/decode_codepoint.js.map +0 -1
  96. package/lib/esm/encode.d.ts.map +0 -1
  97. package/lib/esm/encode.js.map +0 -1
  98. package/lib/esm/escape.d.ts.map +0 -1
  99. package/lib/esm/escape.js.map +0 -1
  100. package/lib/esm/generated/decode-data-html.d.ts +0 -3
  101. package/lib/esm/generated/decode-data-html.d.ts.map +0 -1
  102. package/lib/esm/generated/decode-data-html.js +0 -7
  103. package/lib/esm/generated/decode-data-html.js.map +0 -1
  104. package/lib/esm/generated/decode-data-xml.d.ts +0 -3
  105. package/lib/esm/generated/decode-data-xml.d.ts.map +0 -1
  106. package/lib/esm/generated/decode-data-xml.js +0 -7
  107. package/lib/esm/generated/decode-data-xml.js.map +0 -1
  108. package/lib/esm/generated/encode-html.d.ts +0 -8
  109. package/lib/esm/generated/encode-html.d.ts.map +0 -1
  110. package/lib/esm/generated/encode-html.js +0 -10
  111. package/lib/esm/generated/encode-html.js.map +0 -1
  112. package/lib/esm/index.d.ts.map +0 -1
  113. package/lib/esm/index.js.map +0 -1
  114. package/lib/esm/package.json +0 -1
  115. package/lib/generated/decode-data-html.d.ts +0 -3
  116. package/lib/generated/decode-data-html.d.ts.map +0 -1
  117. package/lib/generated/decode-data-html.js +0 -9
  118. package/lib/generated/decode-data-html.js.map +0 -1
  119. package/lib/generated/decode-data-xml.d.ts +0 -3
  120. package/lib/generated/decode-data-xml.d.ts.map +0 -1
  121. package/lib/generated/decode-data-xml.js +0 -9
  122. package/lib/generated/decode-data-xml.js.map +0 -1
  123. package/lib/generated/encode-html.d.ts +0 -8
  124. package/lib/generated/encode-html.d.ts.map +0 -1
  125. package/lib/generated/encode-html.js +0 -12
  126. package/lib/generated/encode-html.js.map +0 -1
  127. package/lib/index.d.ts.map +0 -1
  128. package/lib/index.js.map +0 -1
package/package.json CHANGED
@@ -1,90 +1,116 @@
1
1
  {
2
2
  "name": "entities",
3
- "version": "4.5.0",
3
+ "version": "6.0.0",
4
4
  "description": "Encode & decode XML and HTML entities with ease & speed",
5
- "author": "Felix Boehm <me@feedic.com>",
6
- "funding": "https://github.com/fb55/entities?sponsor=1",
7
- "sideEffects": false,
8
5
  "keywords": [
9
- "entity",
10
- "decoding",
11
- "encoding",
12
- "html",
13
- "xml",
14
- "html entities"
6
+ "html entities",
7
+ "entity decoder",
8
+ "entity encoding",
9
+ "html decoding",
10
+ "html encoding",
11
+ "xml decoding",
12
+ "xml encoding"
15
13
  ],
16
- "directories": {
17
- "lib": "lib/"
14
+ "repository": {
15
+ "type": "git",
16
+ "url": "git://github.com/fb55/entities.git"
18
17
  },
19
- "main": "lib/index.js",
20
- "types": "lib/index.d.ts",
21
- "module": "lib/esm/index.js",
18
+ "funding": "https://github.com/fb55/entities?sponsor=1",
19
+ "license": "BSD-2-Clause",
20
+ "author": "Felix Boehm <me@feedic.com>",
21
+ "sideEffects": false,
22
+ "type": "module",
22
23
  "exports": {
23
24
  ".": {
24
- "require": "./lib/index.js",
25
- "import": "./lib/esm/index.js"
25
+ "import": {
26
+ "types": "./dist/esm/index.d.ts",
27
+ "default": "./dist/esm/index.js"
28
+ },
29
+ "require": {
30
+ "types": "./dist/commonjs/index.d.ts",
31
+ "default": "./dist/commonjs/index.js"
32
+ }
26
33
  },
27
- "./lib/decode.js": {
28
- "require": "./lib/decode.js",
29
- "import": "./lib/esm/decode.js"
34
+ "./decode": {
35
+ "import": {
36
+ "types": "./dist/esm/decode.d.ts",
37
+ "default": "./dist/esm/decode.js"
38
+ },
39
+ "require": {
40
+ "types": "./dist/commonjs/decode.d.ts",
41
+ "default": "./dist/commonjs/decode.js"
42
+ }
30
43
  },
31
- "./lib/escape.js": {
32
- "require": "./lib/escape.js",
33
- "import": "./lib/esm/escape.js"
44
+ "./escape": {
45
+ "import": {
46
+ "types": "./dist/esm/escape.d.ts",
47
+ "default": "./dist/esm/escape.js"
48
+ },
49
+ "require": {
50
+ "types": "./dist/commonjs/escape.d.ts",
51
+ "default": "./dist/commonjs/escape.js"
52
+ }
34
53
  }
35
54
  },
55
+ "main": "./dist/commonjs/index.js",
56
+ "module": "./dist/esm/index.js",
57
+ "types": "./dist/commonjs/index.d.ts",
36
58
  "files": [
37
- "lib/**/*"
59
+ "decode.js",
60
+ "escape.js",
61
+ "dist",
62
+ "src"
38
63
  ],
39
- "engines": {
40
- "node": ">=0.12"
41
- },
42
- "devDependencies": {
43
- "@types/jest": "^28.1.8",
44
- "@types/node": "^18.15.11",
45
- "@typescript-eslint/eslint-plugin": "^5.58.0",
46
- "@typescript-eslint/parser": "^5.58.0",
47
- "eslint": "^8.38.0",
48
- "eslint-config-prettier": "^8.8.0",
49
- "eslint-plugin-node": "^11.1.0",
50
- "jest": "^28.1.3",
51
- "prettier": "^2.8.7",
52
- "ts-jest": "^28.0.8",
53
- "typedoc": "^0.24.1",
54
- "typescript": "^5.0.4"
55
- },
56
64
  "scripts": {
57
- "test": "npm run test:jest && npm run lint",
58
- "test:jest": "jest",
59
- "lint": "npm run lint:es && npm run lint:prettier",
60
- "lint:es": "eslint .",
61
- "lint:prettier": "npm run prettier -- --check",
65
+ "build:docs": "typedoc --hideGenerator src/index.ts",
66
+ "build:encode-trie": "node --import=tsx scripts/write-encode-map.ts",
67
+ "build:trie": "node --import=tsx scripts/write-decode-map.ts",
62
68
  "format": "npm run format:es && npm run format:prettier",
63
69
  "format:es": "npm run lint:es -- --fix",
64
70
  "format:prettier": "npm run prettier -- --write",
71
+ "lint": "npm run lint:es && npm run lint:ts && npm run lint:prettier",
72
+ "lint:es": "eslint . --ignore-path .gitignore",
73
+ "lint:prettier": "npm run prettier -- --check",
74
+ "lint:ts": "tsc --noEmit",
75
+ "prepublishOnly": "tshy",
65
76
  "prettier": "prettier '**/*.{ts,md,json,yml}'",
66
- "build": "npm run build:cjs && npm run build:esm",
67
- "build:cjs": "tsc --sourceRoot https://raw.githubusercontent.com/fb55/entities/$(git rev-parse HEAD)/src/",
68
- "build:esm": "npm run build:cjs -- --module esnext --target es2019 --outDir lib/esm && echo '{\"type\":\"module\"}' > lib/esm/package.json",
69
- "build:docs": "typedoc --hideGenerator src/index.ts",
70
- "build:trie": "ts-node scripts/write-decode-map.ts",
71
- "build:encode-trie": "ts-node scripts/write-encode-map.ts",
72
- "prepare": "npm run build"
77
+ "test": "npm run test:vi && npm run lint",
78
+ "test:vi": "vitest run"
73
79
  },
74
- "repository": {
75
- "type": "git",
76
- "url": "git://github.com/fb55/entities.git"
80
+ "prettier": {
81
+ "proseWrap": "always",
82
+ "tabWidth": 4
77
83
  },
78
- "license": "BSD-2-Clause",
79
- "jest": {
80
- "preset": "ts-jest",
81
- "coverageProvider": "v8",
82
- "moduleNameMapper": {
83
- "^(.*)\\.js$": "$1"
84
- }
84
+ "devDependencies": {
85
+ "@types/node": "^22.10.2",
86
+ "@typescript-eslint/eslint-plugin": "^8.18.1",
87
+ "@typescript-eslint/parser": "^8.18.0",
88
+ "@vitest/coverage-v8": "^2.1.8",
89
+ "eslint": "^8.57.1",
90
+ "eslint-config-prettier": "^9.1.0",
91
+ "eslint-plugin-n": "^17.15.1",
92
+ "eslint-plugin-unicorn": "^56.0.1",
93
+ "prettier": "^3.4.2",
94
+ "tshy": "^3.0.2",
95
+ "tsx": "^4.19.2",
96
+ "typedoc": "^0.27.5",
97
+ "typescript": "^5.7.2",
98
+ "vitest": "^2.0.2"
85
99
  },
86
- "prettier": {
87
- "tabWidth": 4,
88
- "proseWrap": "always"
100
+ "engines": {
101
+ "node": ">=0.12"
102
+ },
103
+ "tshy": {
104
+ "exclude": [
105
+ "**/*.spec.ts",
106
+ "**/__fixtures__/*",
107
+ "**/__tests__/*",
108
+ "**/__snapshots__/*"
109
+ ],
110
+ "exports": {
111
+ ".": "./src/index.ts",
112
+ "./decode": "./src/decode.ts",
113
+ "./escape": "./src/escape.ts"
114
+ }
89
115
  }
90
116
  }
package/readme.md CHANGED
@@ -4,16 +4,16 @@ Encode & decode HTML & XML entities with ease & speed.
4
4
 
5
5
  ## Features
6
6
 
7
- - 😇 Tried and true: `entities` is used by many popular libraries; eg.
8
- [`htmlparser2`](https://github.com/fb55/htmlparser2), the official
9
- [AWS SDK](https://github.com/aws/aws-sdk-js-v3) and
10
- [`commonmark`](https://github.com/commonmark/commonmark.js) use it to
11
- process HTML entities.
12
- - ⚡️ Fast: `entities` is the fastest library for decoding HTML entities (as
13
- of April 2022); see [performance](#performance).
14
- - 🎛 Configurable: Get an output tailored for your needs. You are fine with
15
- UTF8? That'll save you some bytes. Prefer to only have ASCII characters? We
16
- can do that as well!
7
+ - 😇 Tried and true: `entities` is used by many popular libraries; eg.
8
+ [`htmlparser2`](https://github.com/fb55/htmlparser2), the official
9
+ [AWS SDK](https://github.com/aws/aws-sdk-js-v3) and
10
+ [`commonmark`](https://github.com/commonmark/commonmark.js) use it to process
11
+ HTML entities.
12
+ - ⚡️ Fast: `entities` is the fastest library for decoding HTML entities (as of
13
+ April 2022); see [performance](#performance).
14
+ - 🎛 Configurable: Get an output tailored for your needs. You are fine with
15
+ UTF8? That'll save you some bytes. Prefer to only have ASCII characters? We
16
+ can do that as well!
17
17
 
18
18
  ## How to…
19
19
 
@@ -90,15 +90,15 @@ Instead, use the functions that you need directly.
90
90
  This library wouldn't be possible without the work of these individuals. Thanks
91
91
  to
92
92
 
93
- - [@mathiasbynens](https://github.com/mathiasbynens) for his explanations
94
- about character encodings, and his library `he`, which was one of the
95
- inspirations for `entities`
96
- - [@inikulin](https://github.com/inikulin) for his work on optimized tries for
97
- decoding HTML entities for the `parse5` project
98
- - [@mdevils](https://github.com/mdevils) for taking on the challenge of
99
- producing a quick entity library with his `html-entities` library.
100
- `entities` would be quite a bit slower if there wasn't any competition.
101
- Right now `entities` is on top, but we'll see how long that lasts!
93
+ - [@mathiasbynens](https://github.com/mathiasbynens) for his explanations about
94
+ character encodings, and his library `he`, which was one of the inspirations
95
+ for `entities`
96
+ - [@inikulin](https://github.com/inikulin) for his work on optimized tries for
97
+ decoding HTML entities for the `parse5` project
98
+ - [@mdevils](https://github.com/mdevils) for taking on the challenge of
99
+ producing a quick entity library with his `html-entities` library. `entities`
100
+ would be quite a bit slower if there wasn't any competition. Right now
101
+ `entities` is on top, but we'll see how long that lasts!
102
102
 
103
103
  ---
104
104
 
@@ -0,0 +1,81 @@
1
+ // Adapted from https://github.com/mathiasbynens/he/blob/36afe179392226cf1b6ccdb16ebbb7a5a844d93a/src/he.js#L106-L134
2
+
3
+ const decodeMap = new Map([
4
+ [0, 65_533],
5
+ // C1 Unicode control character reference replacements
6
+ [128, 8364],
7
+ [130, 8218],
8
+ [131, 402],
9
+ [132, 8222],
10
+ [133, 8230],
11
+ [134, 8224],
12
+ [135, 8225],
13
+ [136, 710],
14
+ [137, 8240],
15
+ [138, 352],
16
+ [139, 8249],
17
+ [140, 338],
18
+ [142, 381],
19
+ [145, 8216],
20
+ [146, 8217],
21
+ [147, 8220],
22
+ [148, 8221],
23
+ [149, 8226],
24
+ [150, 8211],
25
+ [151, 8212],
26
+ [152, 732],
27
+ [153, 8482],
28
+ [154, 353],
29
+ [155, 8250],
30
+ [156, 339],
31
+ [158, 382],
32
+ [159, 376],
33
+ ]);
34
+
35
+ /**
36
+ * Polyfill for `String.fromCodePoint`. It is used to create a string from a Unicode code point.
37
+ */
38
+ export const fromCodePoint: (...codePoints: number[]) => string =
39
+ // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, n/no-unsupported-features/es-builtins
40
+ String.fromCodePoint ??
41
+ function (codePoint: number): string {
42
+ let output = "";
43
+
44
+ if (codePoint > 0xff_ff) {
45
+ codePoint -= 0x1_00_00;
46
+ output += String.fromCharCode(
47
+ ((codePoint >>> 10) & 0x3_ff) | 0xd8_00,
48
+ );
49
+ codePoint = 0xdc_00 | (codePoint & 0x3_ff);
50
+ }
51
+
52
+ output += String.fromCharCode(codePoint);
53
+ return output;
54
+ };
55
+
56
+ /**
57
+ * Replace the given code point with a replacement character if it is a
58
+ * surrogate or is outside the valid range. Otherwise return the code
59
+ * point unchanged.
60
+ */
61
+ export function replaceCodePoint(codePoint: number): number {
62
+ if (
63
+ (codePoint >= 0xd8_00 && codePoint <= 0xdf_ff) ||
64
+ codePoint > 0x10_ff_ff
65
+ ) {
66
+ return 0xff_fd;
67
+ }
68
+
69
+ return decodeMap.get(codePoint) ?? codePoint;
70
+ }
71
+
72
+ /**
73
+ * Replace the code point if relevant, then convert it to a string.
74
+ *
75
+ * @deprecated Use `fromCodePoint(replaceCodePoint(codePoint))` instead.
76
+ * @param codePoint The code point to decode.
77
+ * @returns The decoded code point.
78
+ */
79
+ export function decodeCodePoint(codePoint: number): string {
80
+ return fromCodePoint(replaceCodePoint(codePoint));
81
+ }
@@ -0,0 +1,320 @@
1
+ import { describe, it, expect, vitest } from "vitest";
2
+ import * as entities from "./decode.js";
3
+
4
+ describe("Decode test", () => {
5
+ const testcases = [
6
+ { input: "&amp;amp;", output: "&amp;" },
7
+ { input: "&amp;#38;", output: "&#38;" },
8
+ { input: "&amp;#x26;", output: "&#x26;" },
9
+ { input: "&amp;#X26;", output: "&#X26;" },
10
+ { input: "&#38;#38;", output: "&#38;" },
11
+ { input: "&#x26;#38;", output: "&#38;" },
12
+ { input: "&#X26;#38;", output: "&#38;" },
13
+ { input: "&#x3a;", output: ":" },
14
+ { input: "&#x3A;", output: ":" },
15
+ { input: "&#X3a;", output: ":" },
16
+ { input: "&#X3A;", output: ":" },
17
+ { input: "&#", output: "&#" },
18
+ { input: "&>", output: "&>" },
19
+ { input: "id=770&#anchor", output: "id=770&#anchor" },
20
+ ];
21
+
22
+ for (const { input, output } of testcases) {
23
+ it(`should XML decode ${input}`, () =>
24
+ expect(entities.decodeXML(input)).toBe(output));
25
+ it(`should HTML decode ${input}`, () =>
26
+ expect(entities.decodeHTML(input)).toBe(output));
27
+ }
28
+
29
+ it("should HTML decode partial legacy entity", () => {
30
+ expect(entities.decodeHTMLStrict("&timesbar")).toBe("&timesbar");
31
+ expect(entities.decodeHTML("&timesbar")).toBe("×bar");
32
+ });
33
+
34
+ it("should HTML decode legacy entities according to spec", () =>
35
+ expect(entities.decodeHTML("?&image_uri=1&ℑ=2&image=3")).toBe(
36
+ "?&image_uri=1&ℑ=2&image=3",
37
+ ));
38
+
39
+ it("should back out of legacy entities", () =>
40
+ expect(entities.decodeHTML("&ampa")).toBe("&a"));
41
+
42
+ it("should not parse numeric entities in strict mode", () =>
43
+ expect(entities.decodeHTMLStrict("&#55")).toBe("&#55"));
44
+
45
+ it("should parse &nbsp followed by < (#852)", () =>
46
+ expect(entities.decodeHTML("&nbsp<")).toBe("\u00A0<"));
47
+
48
+ it("should decode trailing legacy entities", () => {
49
+ expect(entities.decodeHTML("&timesbar;&timesbar")).toBe("⨱×bar");
50
+ });
51
+
52
+ it("should decode multi-byte entities", () => {
53
+ expect(entities.decodeHTML("&NotGreaterFullEqual;")).toBe("≧̸");
54
+ });
55
+
56
+ it("should not decode legacy entities followed by text in attribute mode", () => {
57
+ expect(
58
+ entities.decodeHTML("&not", entities.DecodingMode.Attribute),
59
+ ).toBe("¬");
60
+
61
+ expect(
62
+ entities.decodeHTML("&noti", entities.DecodingMode.Attribute),
63
+ ).toBe("&noti");
64
+
65
+ expect(
66
+ entities.decodeHTML("&not=", entities.DecodingMode.Attribute),
67
+ ).toBe("&not=");
68
+
69
+ expect(entities.decodeHTMLAttribute("&notp")).toBe("&notp");
70
+ expect(entities.decodeHTMLAttribute("&notP")).toBe("&notP");
71
+ expect(entities.decodeHTMLAttribute("&not3")).toBe("&not3");
72
+ });
73
+ });
74
+
75
+ describe("EntityDecoder", () => {
76
+ it("should decode decimal entities", () => {
77
+ const callback = vitest.fn();
78
+ const decoder = new entities.EntityDecoder(
79
+ entities.htmlDecodeTree,
80
+ callback,
81
+ );
82
+
83
+ expect(decoder.write("&#5", 1)).toBe(-1);
84
+ expect(decoder.write("8;", 0)).toBe(5);
85
+
86
+ expect(callback).toHaveBeenCalledTimes(1);
87
+ expect(callback).toHaveBeenCalledWith(":".charCodeAt(0), 5);
88
+ });
89
+
90
+ it("should decode hex entities", () => {
91
+ const callback = vitest.fn();
92
+ const decoder = new entities.EntityDecoder(
93
+ entities.htmlDecodeTree,
94
+ callback,
95
+ );
96
+
97
+ expect(decoder.write("&#x3a;", 1)).toBe(6);
98
+
99
+ expect(callback).toHaveBeenCalledTimes(1);
100
+ expect(callback).toHaveBeenCalledWith(":".charCodeAt(0), 6);
101
+ });
102
+
103
+ it("should decode named entities", () => {
104
+ const callback = vitest.fn();
105
+ const decoder = new entities.EntityDecoder(
106
+ entities.htmlDecodeTree,
107
+ callback,
108
+ );
109
+
110
+ expect(decoder.write("&amp;", 1)).toBe(5);
111
+
112
+ expect(callback).toHaveBeenCalledTimes(1);
113
+ expect(callback).toHaveBeenCalledWith("&".charCodeAt(0), 5);
114
+ });
115
+
116
+ it("should decode legacy entities", () => {
117
+ const callback = vitest.fn();
118
+ const decoder = new entities.EntityDecoder(
119
+ entities.htmlDecodeTree,
120
+ callback,
121
+ );
122
+ decoder.startEntity(entities.DecodingMode.Legacy);
123
+
124
+ expect(decoder.write("&amp", 1)).toBe(-1);
125
+
126
+ expect(callback).toHaveBeenCalledTimes(0);
127
+
128
+ expect(decoder.end()).toBe(4);
129
+
130
+ expect(callback).toHaveBeenCalledTimes(1);
131
+ expect(callback).toHaveBeenCalledWith("&".charCodeAt(0), 4);
132
+ });
133
+
134
+ it("should decode named entity written character by character", () => {
135
+ const callback = vitest.fn();
136
+ const decoder = new entities.EntityDecoder(
137
+ entities.htmlDecodeTree,
138
+ callback,
139
+ );
140
+
141
+ for (const c of "amp") {
142
+ expect(decoder.write(c, 0)).toBe(-1);
143
+ }
144
+ expect(decoder.write(";", 0)).toBe(5);
145
+
146
+ expect(callback).toHaveBeenCalledTimes(1);
147
+ expect(callback).toHaveBeenCalledWith("&".charCodeAt(0), 5);
148
+ });
149
+
150
+ it("should decode numeric entity written character by character", () => {
151
+ const callback = vitest.fn();
152
+ const decoder = new entities.EntityDecoder(
153
+ entities.htmlDecodeTree,
154
+ callback,
155
+ );
156
+
157
+ for (const c of "#x3a") {
158
+ expect(decoder.write(c, 0)).toBe(-1);
159
+ }
160
+ expect(decoder.write(";", 0)).toBe(6);
161
+
162
+ expect(callback).toHaveBeenCalledTimes(1);
163
+ expect(callback).toHaveBeenCalledWith(":".charCodeAt(0), 6);
164
+ });
165
+
166
+ it("should decode hex entities across several chunks", () => {
167
+ const callback = vitest.fn();
168
+ const decoder = new entities.EntityDecoder(
169
+ entities.htmlDecodeTree,
170
+ callback,
171
+ );
172
+
173
+ for (const chunk of ["#x", "cf", "ff", "d"]) {
174
+ expect(decoder.write(chunk, 0)).toBe(-1);
175
+ }
176
+
177
+ expect(decoder.write(";", 0)).toBe(9);
178
+ expect(callback).toHaveBeenCalledTimes(1);
179
+ expect(callback).toHaveBeenCalledWith(0xc_ff_fd, 9);
180
+ });
181
+
182
+ it("should not fail if nothing is written", () => {
183
+ const callback = vitest.fn();
184
+ const decoder = new entities.EntityDecoder(
185
+ entities.htmlDecodeTree,
186
+ callback,
187
+ );
188
+
189
+ expect(decoder.end()).toBe(0);
190
+ expect(callback).toHaveBeenCalledTimes(0);
191
+ });
192
+
193
+ describe("errors", () => {
194
+ it("should produce an error for a named entity without a semicolon", () => {
195
+ const errorHandlers = {
196
+ missingSemicolonAfterCharacterReference: vitest.fn(),
197
+ absenceOfDigitsInNumericCharacterReference: vitest.fn(),
198
+ validateNumericCharacterReference: vitest.fn(),
199
+ };
200
+ const callback = vitest.fn();
201
+ const decoder = new entities.EntityDecoder(
202
+ entities.htmlDecodeTree,
203
+ callback,
204
+ errorHandlers,
205
+ );
206
+
207
+ decoder.startEntity(entities.DecodingMode.Legacy);
208
+ expect(decoder.write("&amp;", 1)).toBe(5);
209
+ expect(callback).toHaveBeenCalledTimes(1);
210
+ expect(callback).toHaveBeenCalledWith("&".charCodeAt(0), 5);
211
+ expect(
212
+ errorHandlers.missingSemicolonAfterCharacterReference,
213
+ ).toHaveBeenCalledTimes(0);
214
+
215
+ decoder.startEntity(entities.DecodingMode.Legacy);
216
+ expect(decoder.write("&amp", 1)).toBe(-1);
217
+ expect(decoder.end()).toBe(4);
218
+
219
+ expect(callback).toHaveBeenCalledTimes(2);
220
+ expect(callback).toHaveBeenLastCalledWith("&".charCodeAt(0), 4);
221
+ expect(
222
+ errorHandlers.missingSemicolonAfterCharacterReference,
223
+ ).toHaveBeenCalledTimes(1);
224
+ });
225
+
226
+ it("should produce an error for a numeric entity without a semicolon", () => {
227
+ const errorHandlers = {
228
+ missingSemicolonAfterCharacterReference: vitest.fn(),
229
+ absenceOfDigitsInNumericCharacterReference: vitest.fn(),
230
+ validateNumericCharacterReference: vitest.fn(),
231
+ };
232
+ const callback = vitest.fn();
233
+ const decoder = new entities.EntityDecoder(
234
+ entities.htmlDecodeTree,
235
+ callback,
236
+ errorHandlers,
237
+ );
238
+
239
+ decoder.startEntity(entities.DecodingMode.Legacy);
240
+ expect(decoder.write("&#x3a", 1)).toBe(-1);
241
+ expect(decoder.end()).toBe(5);
242
+
243
+ expect(callback).toHaveBeenCalledTimes(1);
244
+ expect(callback).toHaveBeenCalledWith(0x3a, 5);
245
+ expect(
246
+ errorHandlers.missingSemicolonAfterCharacterReference,
247
+ ).toHaveBeenCalledTimes(1);
248
+ expect(
249
+ errorHandlers.absenceOfDigitsInNumericCharacterReference,
250
+ ).toHaveBeenCalledTimes(0);
251
+ expect(
252
+ errorHandlers.validateNumericCharacterReference,
253
+ ).toHaveBeenCalledTimes(1);
254
+ expect(
255
+ errorHandlers.validateNumericCharacterReference,
256
+ ).toHaveBeenCalledWith(0x3a);
257
+ });
258
+
259
+ it("should produce an error for numeric entities without digits", () => {
260
+ const errorHandlers = {
261
+ missingSemicolonAfterCharacterReference: vitest.fn(),
262
+ absenceOfDigitsInNumericCharacterReference: vitest.fn(),
263
+ validateNumericCharacterReference: vitest.fn(),
264
+ };
265
+ const callback = vitest.fn();
266
+ const decoder = new entities.EntityDecoder(
267
+ entities.htmlDecodeTree,
268
+ callback,
269
+ errorHandlers,
270
+ );
271
+
272
+ decoder.startEntity(entities.DecodingMode.Legacy);
273
+ expect(decoder.write("&#", 1)).toBe(-1);
274
+ expect(decoder.end()).toBe(0);
275
+
276
+ expect(callback).toHaveBeenCalledTimes(0);
277
+ expect(
278
+ errorHandlers.missingSemicolonAfterCharacterReference,
279
+ ).toHaveBeenCalledTimes(0);
280
+ expect(
281
+ errorHandlers.absenceOfDigitsInNumericCharacterReference,
282
+ ).toHaveBeenCalledTimes(1);
283
+ expect(
284
+ errorHandlers.absenceOfDigitsInNumericCharacterReference,
285
+ ).toHaveBeenCalledWith(2);
286
+ expect(
287
+ errorHandlers.validateNumericCharacterReference,
288
+ ).toHaveBeenCalledTimes(0);
289
+ });
290
+
291
+ it("should produce an error for hex entities without digits", () => {
292
+ const errorHandlers = {
293
+ missingSemicolonAfterCharacterReference: vitest.fn(),
294
+ absenceOfDigitsInNumericCharacterReference: vitest.fn(),
295
+ validateNumericCharacterReference: vitest.fn(),
296
+ };
297
+ const callback = vitest.fn();
298
+ const decoder = new entities.EntityDecoder(
299
+ entities.htmlDecodeTree,
300
+ callback,
301
+ errorHandlers,
302
+ );
303
+
304
+ decoder.startEntity(entities.DecodingMode.Legacy);
305
+ expect(decoder.write("&#x", 1)).toBe(-1);
306
+ expect(decoder.end()).toBe(0);
307
+
308
+ expect(callback).toHaveBeenCalledTimes(0);
309
+ expect(
310
+ errorHandlers.missingSemicolonAfterCharacterReference,
311
+ ).toHaveBeenCalledTimes(0);
312
+ expect(
313
+ errorHandlers.absenceOfDigitsInNumericCharacterReference,
314
+ ).toHaveBeenCalledTimes(1);
315
+ expect(
316
+ errorHandlers.validateNumericCharacterReference,
317
+ ).toHaveBeenCalledTimes(0);
318
+ });
319
+ });
320
+ });