entities 4.4.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/dist/commonjs/decode-codepoint.d.ts +19 -0
  2. package/dist/commonjs/decode-codepoint.d.ts.map +1 -0
  3. package/{lib/decode_codepoint.js → dist/commonjs/decode-codepoint.js} +30 -13
  4. package/dist/commonjs/decode-codepoint.js.map +1 -0
  5. package/dist/commonjs/decode.d.ts +209 -0
  6. package/dist/commonjs/decode.d.ts.map +1 -0
  7. package/dist/commonjs/decode.js +514 -0
  8. package/dist/commonjs/decode.js.map +1 -0
  9. package/{lib/esm → dist/commonjs}/encode.d.ts +2 -2
  10. package/dist/commonjs/encode.d.ts.map +1 -0
  11. package/{lib → dist/commonjs}/encode.js +32 -33
  12. package/dist/commonjs/encode.js.map +1 -0
  13. package/{lib/esm → dist/commonjs}/escape.d.ts +2 -2
  14. package/dist/commonjs/escape.d.ts.map +1 -0
  15. package/{lib → dist/commonjs}/escape.js +46 -38
  16. package/dist/commonjs/escape.js.map +1 -0
  17. package/dist/commonjs/generated/decode-data-html.d.ts.map +1 -0
  18. package/{lib → dist/commonjs}/generated/decode-data-html.js +1 -1
  19. package/dist/commonjs/generated/decode-data-html.js.map +1 -0
  20. package/dist/commonjs/generated/decode-data-xml.d.ts.map +1 -0
  21. package/{lib → dist/commonjs}/generated/decode-data-xml.js +1 -1
  22. package/dist/commonjs/generated/decode-data-xml.js.map +1 -0
  23. package/{lib/esm → dist/commonjs}/generated/encode-html.d.ts +1 -1
  24. package/dist/commonjs/generated/encode-html.d.ts.map +1 -0
  25. package/{lib → dist/commonjs}/generated/encode-html.js +4 -4
  26. package/dist/commonjs/generated/encode-html.js.map +1 -0
  27. package/{lib/esm → dist/commonjs}/index.d.ts +9 -15
  28. package/dist/commonjs/index.d.ts.map +1 -0
  29. package/{lib → dist/commonjs}/index.js +46 -55
  30. package/dist/commonjs/index.js.map +1 -0
  31. package/dist/commonjs/package.json +3 -0
  32. package/dist/esm/decode-codepoint.d.ts +19 -0
  33. package/dist/esm/decode-codepoint.d.ts.map +1 -0
  34. package/{lib/esm/decode_codepoint.js → dist/esm/decode-codepoint.js} +25 -8
  35. package/dist/esm/decode-codepoint.js.map +1 -0
  36. package/dist/esm/decode.d.ts +209 -0
  37. package/dist/esm/decode.d.ts.map +1 -0
  38. package/dist/esm/decode.js +497 -0
  39. package/dist/esm/decode.js.map +1 -0
  40. package/{lib → dist/esm}/encode.d.ts +2 -2
  41. package/dist/esm/encode.d.ts.map +1 -0
  42. package/{lib → dist}/esm/encode.js +25 -25
  43. package/dist/esm/encode.js.map +1 -0
  44. package/{lib → dist/esm}/escape.d.ts +2 -2
  45. package/dist/esm/escape.d.ts.map +1 -0
  46. package/{lib → dist}/esm/escape.js +39 -29
  47. package/dist/esm/escape.js.map +1 -0
  48. package/dist/esm/generated/decode-data-html.d.ts.map +1 -0
  49. package/dist/esm/generated/decode-data-html.js.map +1 -0
  50. package/dist/esm/generated/decode-data-xml.d.ts.map +1 -0
  51. package/dist/esm/generated/decode-data-xml.js.map +1 -0
  52. package/{lib → dist/esm}/generated/encode-html.d.ts +1 -1
  53. package/dist/esm/generated/encode-html.d.ts.map +1 -0
  54. package/{lib → dist}/esm/generated/encode-html.js +4 -4
  55. package/dist/esm/generated/encode-html.js.map +1 -0
  56. package/{lib → dist/esm}/index.d.ts +9 -15
  57. package/dist/esm/index.d.ts.map +1 -0
  58. package/{lib → dist}/esm/index.js +36 -45
  59. package/dist/esm/index.js.map +1 -0
  60. package/dist/esm/package.json +3 -0
  61. package/package.json +96 -66
  62. package/readme.md +5 -5
  63. package/src/decode-codepoint.ts +81 -0
  64. package/src/decode.spec.ts +320 -0
  65. package/src/decode.ts +620 -0
  66. package/src/encode.spec.ts +78 -0
  67. package/src/encode.ts +77 -0
  68. package/src/escape.spec.ts +14 -0
  69. package/src/escape.ts +144 -0
  70. package/src/generated/.eslintrc.json +10 -0
  71. package/src/generated/decode-data-html.ts +8 -0
  72. package/src/generated/decode-data-xml.ts +8 -0
  73. package/src/generated/encode-html.ts +17 -0
  74. package/src/index.spec.ts +125 -0
  75. package/src/index.ts +185 -0
  76. package/lib/decode.d.ts +0 -33
  77. package/lib/decode.d.ts.map +0 -1
  78. package/lib/decode.js +0 -179
  79. package/lib/decode.js.map +0 -1
  80. package/lib/decode_codepoint.d.ts +0 -4
  81. package/lib/decode_codepoint.d.ts.map +0 -1
  82. package/lib/decode_codepoint.js.map +0 -1
  83. package/lib/encode.d.ts.map +0 -1
  84. package/lib/encode.js.map +0 -1
  85. package/lib/escape.d.ts.map +0 -1
  86. package/lib/escape.js.map +0 -1
  87. package/lib/esm/decode.d.ts +0 -33
  88. package/lib/esm/decode.d.ts.map +0 -1
  89. package/lib/esm/decode.js +0 -166
  90. package/lib/esm/decode.js.map +0 -1
  91. package/lib/esm/decode_codepoint.d.ts +0 -4
  92. package/lib/esm/decode_codepoint.d.ts.map +0 -1
  93. package/lib/esm/decode_codepoint.js.map +0 -1
  94. package/lib/esm/encode.d.ts.map +0 -1
  95. package/lib/esm/encode.js.map +0 -1
  96. package/lib/esm/escape.d.ts.map +0 -1
  97. package/lib/esm/escape.js.map +0 -1
  98. package/lib/esm/generated/decode-data-html.d.ts.map +0 -1
  99. package/lib/esm/generated/decode-data-html.js.map +0 -1
  100. package/lib/esm/generated/decode-data-xml.d.ts.map +0 -1
  101. package/lib/esm/generated/decode-data-xml.js.map +0 -1
  102. package/lib/esm/generated/encode-html.d.ts.map +0 -1
  103. package/lib/esm/generated/encode-html.js.map +0 -1
  104. package/lib/esm/index.d.ts.map +0 -1
  105. package/lib/esm/index.js.map +0 -1
  106. package/lib/esm/package.json +0 -1
  107. package/lib/generated/decode-data-html.d.ts.map +0 -1
  108. package/lib/generated/decode-data-html.js.map +0 -1
  109. package/lib/generated/decode-data-xml.d.ts.map +0 -1
  110. package/lib/generated/decode-data-xml.js.map +0 -1
  111. package/lib/generated/encode-html.d.ts.map +0 -1
  112. package/lib/generated/encode-html.js.map +0 -1
  113. package/lib/index.d.ts.map +0 -1
  114. package/lib/index.js.map +0 -1
  115. /package/{lib/esm → dist/commonjs}/generated/decode-data-html.d.ts +0 -0
  116. /package/{lib/esm → dist/commonjs}/generated/decode-data-xml.d.ts +0 -0
  117. /package/{lib → dist/esm}/generated/decode-data-html.d.ts +0 -0
  118. /package/{lib → dist}/esm/generated/decode-data-html.js +0 -0
  119. /package/{lib → dist/esm}/generated/decode-data-xml.d.ts +0 -0
  120. /package/{lib → dist}/esm/generated/decode-data-xml.js +0 -0
package/package.json CHANGED
@@ -1,90 +1,120 @@
1
1
  {
2
2
  "name": "entities",
3
- "version": "4.4.0",
3
+ "version": "5.0.0",
4
4
  "description": "Encode & decode XML and HTML entities with ease & speed",
5
- "author": "Felix Boehm <me@feedic.com>",
6
- "funding": "https://github.com/fb55/entities?sponsor=1",
7
- "sideEffects": false,
8
5
  "keywords": [
9
- "entity",
10
- "decoding",
11
- "encoding",
12
- "html",
13
- "xml",
14
- "html entities"
6
+ "html entities",
7
+ "entity decoder",
8
+ "entity encoding",
9
+ "html decoding",
10
+ "html encoding",
11
+ "xml decoding",
12
+ "xml encoding"
15
13
  ],
16
- "directories": {
17
- "lib": "lib/"
14
+ "repository": {
15
+ "type": "git",
16
+ "url": "git://github.com/fb55/entities.git"
18
17
  },
19
- "main": "lib/index.js",
20
- "types": "lib/index.d.ts",
21
- "module": "lib/esm/index.js",
18
+ "funding": "https://github.com/fb55/entities?sponsor=1",
19
+ "license": "BSD-2-Clause",
20
+ "author": "Felix Boehm <me@feedic.com>",
21
+ "sideEffects": false,
22
+ "type": "module",
22
23
  "exports": {
23
24
  ".": {
24
- "require": "./lib/index.js",
25
- "import": "./lib/esm/index.js"
25
+ "import": {
26
+ "source": "./src/index.ts",
27
+ "types": "./dist/esm/index.d.ts",
28
+ "default": "./dist/esm/index.js"
29
+ },
30
+ "require": {
31
+ "source": "./src/index.ts",
32
+ "types": "./dist/commonjs/index.d.ts",
33
+ "default": "./dist/commonjs/index.js"
34
+ }
26
35
  },
27
- "./lib/decode.js": {
28
- "require": "./lib/decode.js",
29
- "import": "./lib/esm/decode.js"
36
+ "./dist/decode.js": {
37
+ "import": {
38
+ "source": "./src/decode.ts",
39
+ "types": "./dist/esm/decode.d.ts",
40
+ "default": "./dist/esm/decode.js"
41
+ },
42
+ "require": {
43
+ "source": "./src/decode.ts",
44
+ "types": "./dist/commonjs/decode.d.ts",
45
+ "default": "./dist/commonjs/decode.js"
46
+ }
30
47
  },
31
- "./lib/escape.js": {
32
- "require": "./lib/escape.js",
33
- "import": "./lib/esm/escape.js"
48
+ "./dist/escape.js": {
49
+ "import": {
50
+ "source": "./src/escape.ts",
51
+ "types": "./dist/esm/escape.d.ts",
52
+ "default": "./dist/esm/escape.js"
53
+ },
54
+ "require": {
55
+ "source": "./src/escape.ts",
56
+ "types": "./dist/commonjs/escape.d.ts",
57
+ "default": "./dist/commonjs/escape.js"
58
+ }
34
59
  }
35
60
  },
61
+ "main": "./dist/commonjs/index.js",
62
+ "types": "./dist/commonjs/index.d.ts",
36
63
  "files": [
37
- "lib/**/*"
64
+ "dist",
65
+ "src"
38
66
  ],
39
- "engines": {
40
- "node": ">=0.12"
41
- },
42
- "devDependencies": {
43
- "@types/jest": "^28.1.8",
44
- "@types/node": "^18.7.14",
45
- "@typescript-eslint/eslint-plugin": "^5.36.1",
46
- "@typescript-eslint/parser": "^5.36.1",
47
- "eslint": "^8.23.0",
48
- "eslint-config-prettier": "^8.5.0",
49
- "eslint-plugin-node": "^11.1.0",
50
- "jest": "^28.1.3",
51
- "prettier": "^2.7.1",
52
- "ts-jest": "^28.0.8",
53
- "typedoc": "^0.23.12",
54
- "typescript": "^4.8.2"
55
- },
56
67
  "scripts": {
57
- "test": "npm run test:jest && npm run lint",
58
- "test:jest": "jest",
59
- "lint": "npm run lint:es && npm run lint:prettier",
60
- "lint:es": "eslint .",
61
- "lint:prettier": "npm run prettier -- --check",
68
+ "build": "tshy",
69
+ "build:docs": "typedoc --hideGenerator src/index.ts",
70
+ "build:encode-trie": "node --import=tsx scripts/write-encode-map.ts",
71
+ "build:trie": "node --import=tsx scripts/write-decode-map.ts",
62
72
  "format": "npm run format:es && npm run format:prettier",
63
73
  "format:es": "npm run lint:es -- --fix",
64
74
  "format:prettier": "npm run prettier -- --write",
75
+ "lint": "npm run lint:es && npm run lint:ts && npm run lint:prettier",
76
+ "lint:es": "eslint . --ignore-path .gitignore",
77
+ "lint:prettier": "npm run prettier -- --check",
78
+ "lint:ts": "tsc --noEmit",
79
+ "prepare": "npm run build",
65
80
  "prettier": "prettier '**/*.{ts,md,json,yml}'",
66
- "build": "npm run build:cjs && npm run build:esm",
67
- "build:cjs": "tsc --sourceRoot https://raw.githubusercontent.com/fb55/entities/$(git rev-parse HEAD)/src/",
68
- "build:esm": "npm run build:cjs -- --module esnext --target es2019 --outDir lib/esm && echo '{\"type\":\"module\"}' > lib/esm/package.json",
69
- "build:docs": "typedoc --hideGenerator src/index.ts",
70
- "build:trie": "ts-node scripts/write-decode-map.ts",
71
- "build:encode-trie": "ts-node scripts/write-encode-map.ts",
72
- "prepare": "npm run build"
81
+ "test": "npm run test:vi && npm run lint",
82
+ "test:vi": "vitest run"
73
83
  },
74
- "repository": {
75
- "type": "git",
76
- "url": "git://github.com/fb55/entities.git"
84
+ "prettier": {
85
+ "proseWrap": "always",
86
+ "tabWidth": 4
77
87
  },
78
- "license": "BSD-2-Clause",
79
- "jest": {
80
- "preset": "ts-jest",
81
- "coverageProvider": "v8",
82
- "moduleNameMapper": {
83
- "^(.*)\\.js$": "$1"
84
- }
88
+ "devDependencies": {
89
+ "@types/node": "^20.14.8",
90
+ "@typescript-eslint/eslint-plugin": "^7.14.1",
91
+ "@typescript-eslint/parser": "^7.14.1",
92
+ "@vitest/coverage-v8": "^1.6.0",
93
+ "eslint": "^8.57.0",
94
+ "eslint-config-prettier": "^9.1.0",
95
+ "eslint-plugin-n": "^17.9.0",
96
+ "eslint-plugin-unicorn": "^54.0.0",
97
+ "prettier": "^3.3.2",
98
+ "tshy": "^1.16.1",
99
+ "tsx": "^4.15.7",
100
+ "typedoc": "^0.26.2",
101
+ "typescript": "^5.5.2",
102
+ "vitest": "^1.6.0"
85
103
  },
86
- "prettier": {
87
- "tabWidth": 4,
88
- "proseWrap": "always"
104
+ "engines": {
105
+ "node": ">=0.12"
106
+ },
107
+ "tshy": {
108
+ "exclude": [
109
+ "**/*.spec.ts",
110
+ "**/__fixtures__/*",
111
+ "**/__tests__/*",
112
+ "**/__snapshots__/*"
113
+ ],
114
+ "exports": {
115
+ ".": "./src/index.ts",
116
+ "./dist/decode.js": "./src/decode.ts",
117
+ "./dist/escape.js": "./src/escape.ts"
118
+ }
89
119
  }
90
120
  }
package/readme.md CHANGED
@@ -1,4 +1,4 @@
1
- # entities [![NPM version](http://img.shields.io/npm/v/entities.svg)](https://npmjs.org/package/entities) [![Downloads](https://img.shields.io/npm/dm/entities.svg)](https://npmjs.org/package/entities) [![Build Status](http://img.shields.io/travis/fb55/entities.svg)](http://travis-ci.org/fb55/entities) [![Coverage](http://img.shields.io/coveralls/fb55/entities.svg)](https://coveralls.io/r/fb55/entities)
1
+ # entities [![NPM version](https://img.shields.io/npm/v/entities.svg)](https://npmjs.org/package/entities) [![Downloads](https://img.shields.io/npm/dm/entities.svg)](https://npmjs.org/package/entities) [![Node.js CI](https://github.com/fb55/entities/actions/workflows/nodejs-test.yml/badge.svg)](https://github.com/fb55/entities/actions/workflows/nodejs-test.yml)
2
2
 
3
3
  Encode & decode HTML & XML entities with ease & speed.
4
4
 
@@ -9,10 +9,10 @@ Encode & decode HTML & XML entities with ease & speed.
9
9
  [AWS SDK](https://github.com/aws/aws-sdk-js-v3) and
10
10
  [`commonmark`](https://github.com/commonmark/commonmark.js) use it to
11
11
  process HTML entities.
12
- - ⚡️ Fast: `entities` is the fastes library for decoding HTML entities (as of
13
- April 2022); see [performance](#performance).
12
+ - ⚡️ Fast: `entities` is the fastest library for decoding HTML entities (as
13
+ of April 2022); see [performance](#performance).
14
14
  - 🎛 Configurable: Get an output tailored for your needs. You are fine with
15
- UTF8? That'll safe you some bytes. Prefer to only have ASCII characters? We
15
+ UTF8? That'll save you some bytes. Prefer to only have ASCII characters? We
16
16
  can do that as well!
17
17
 
18
18
  ## How to…
@@ -87,7 +87,7 @@ Instead, use the functions that you need directly.
87
87
 
88
88
  ## Acknowledgements
89
89
 
90
- This libary wouldn't be possible without the work of these individuals. Thanks
90
+ This library wouldn't be possible without the work of these individuals. Thanks
91
91
  to
92
92
 
93
93
  - [@mathiasbynens](https://github.com/mathiasbynens) for his explanations
@@ -0,0 +1,81 @@
1
+ // Adapted from https://github.com/mathiasbynens/he/blob/36afe179392226cf1b6ccdb16ebbb7a5a844d93a/src/he.js#L106-L134
2
+
3
+ const decodeMap = new Map([
4
+ [0, 65_533],
5
+ // C1 Unicode control character reference replacements
6
+ [128, 8364],
7
+ [130, 8218],
8
+ [131, 402],
9
+ [132, 8222],
10
+ [133, 8230],
11
+ [134, 8224],
12
+ [135, 8225],
13
+ [136, 710],
14
+ [137, 8240],
15
+ [138, 352],
16
+ [139, 8249],
17
+ [140, 338],
18
+ [142, 381],
19
+ [145, 8216],
20
+ [146, 8217],
21
+ [147, 8220],
22
+ [148, 8221],
23
+ [149, 8226],
24
+ [150, 8211],
25
+ [151, 8212],
26
+ [152, 732],
27
+ [153, 8482],
28
+ [154, 353],
29
+ [155, 8250],
30
+ [156, 339],
31
+ [158, 382],
32
+ [159, 376],
33
+ ]);
34
+
35
+ /**
36
+ * Polyfill for `String.fromCodePoint`. It is used to create a string from a Unicode code point.
37
+ */
38
+ export const fromCodePoint =
39
+ // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, n/no-unsupported-features/es-builtins
40
+ String.fromCodePoint ??
41
+ function (codePoint: number): string {
42
+ let output = "";
43
+
44
+ if (codePoint > 0xff_ff) {
45
+ codePoint -= 0x1_00_00;
46
+ output += String.fromCharCode(
47
+ ((codePoint >>> 10) & 0x3_ff) | 0xd8_00,
48
+ );
49
+ codePoint = 0xdc_00 | (codePoint & 0x3_ff);
50
+ }
51
+
52
+ output += String.fromCharCode(codePoint);
53
+ return output;
54
+ };
55
+
56
+ /**
57
+ * Replace the given code point with a replacement character if it is a
58
+ * surrogate or is outside the valid range. Otherwise return the code
59
+ * point unchanged.
60
+ */
61
+ export function replaceCodePoint(codePoint: number) {
62
+ if (
63
+ (codePoint >= 0xd8_00 && codePoint <= 0xdf_ff) ||
64
+ codePoint > 0x10_ff_ff
65
+ ) {
66
+ return 0xff_fd;
67
+ }
68
+
69
+ return decodeMap.get(codePoint) ?? codePoint;
70
+ }
71
+
72
+ /**
73
+ * Replace the code point if relevant, then convert it to a string.
74
+ *
75
+ * @deprecated Use `fromCodePoint(replaceCodePoint(codePoint))` instead.
76
+ * @param codePoint The code point to decode.
77
+ * @returns The decoded code point.
78
+ */
79
+ export default function decodeCodePoint(codePoint: number): string {
80
+ return fromCodePoint(replaceCodePoint(codePoint));
81
+ }
@@ -0,0 +1,320 @@
1
+ import { describe, it, expect, vitest } from "vitest";
2
+ import * as entities from "./decode.js";
3
+
4
+ describe("Decode test", () => {
5
+ const testcases = [
6
+ { input: "&amp;amp;", output: "&amp;" },
7
+ { input: "&amp;#38;", output: "&#38;" },
8
+ { input: "&amp;#x26;", output: "&#x26;" },
9
+ { input: "&amp;#X26;", output: "&#X26;" },
10
+ { input: "&#38;#38;", output: "&#38;" },
11
+ { input: "&#x26;#38;", output: "&#38;" },
12
+ { input: "&#X26;#38;", output: "&#38;" },
13
+ { input: "&#x3a;", output: ":" },
14
+ { input: "&#x3A;", output: ":" },
15
+ { input: "&#X3a;", output: ":" },
16
+ { input: "&#X3A;", output: ":" },
17
+ { input: "&#", output: "&#" },
18
+ { input: "&>", output: "&>" },
19
+ { input: "id=770&#anchor", output: "id=770&#anchor" },
20
+ ];
21
+
22
+ for (const { input, output } of testcases) {
23
+ it(`should XML decode ${input}`, () =>
24
+ expect(entities.decodeXML(input)).toBe(output));
25
+ it(`should HTML decode ${input}`, () =>
26
+ expect(entities.decodeHTML(input)).toBe(output));
27
+ }
28
+
29
+ it("should HTML decode partial legacy entity", () => {
30
+ expect(entities.decodeHTMLStrict("&timesbar")).toBe("&timesbar");
31
+ expect(entities.decodeHTML("&timesbar")).toBe("×bar");
32
+ });
33
+
34
+ it("should HTML decode legacy entities according to spec", () =>
35
+ expect(entities.decodeHTML("?&image_uri=1&ℑ=2&image=3")).toBe(
36
+ "?&image_uri=1&ℑ=2&image=3",
37
+ ));
38
+
39
+ it("should back out of legacy entities", () =>
40
+ expect(entities.decodeHTML("&ampa")).toBe("&a"));
41
+
42
+ it("should not parse numeric entities in strict mode", () =>
43
+ expect(entities.decodeHTMLStrict("&#55")).toBe("&#55"));
44
+
45
+ it("should parse &nbsp followed by < (#852)", () =>
46
+ expect(entities.decodeHTML("&nbsp<")).toBe("\u00A0<"));
47
+
48
+ it("should decode trailing legacy entities", () => {
49
+ expect(entities.decodeHTML("&timesbar;&timesbar")).toBe("⨱×bar");
50
+ });
51
+
52
+ it("should decode multi-byte entities", () => {
53
+ expect(entities.decodeHTML("&NotGreaterFullEqual;")).toBe("≧̸");
54
+ });
55
+
56
+ it("should not decode legacy entities followed by text in attribute mode", () => {
57
+ expect(
58
+ entities.decodeHTML("&not", entities.DecodingMode.Attribute),
59
+ ).toBe("¬");
60
+
61
+ expect(
62
+ entities.decodeHTML("&noti", entities.DecodingMode.Attribute),
63
+ ).toBe("&noti");
64
+
65
+ expect(
66
+ entities.decodeHTML("&not=", entities.DecodingMode.Attribute),
67
+ ).toBe("&not=");
68
+
69
+ expect(entities.decodeHTMLAttribute("&notp")).toBe("&notp");
70
+ expect(entities.decodeHTMLAttribute("&notP")).toBe("&notP");
71
+ expect(entities.decodeHTMLAttribute("&not3")).toBe("&not3");
72
+ });
73
+ });
74
+
75
+ describe("EntityDecoder", () => {
76
+ it("should decode decimal entities", () => {
77
+ const callback = vitest.fn();
78
+ const decoder = new entities.EntityDecoder(
79
+ entities.htmlDecodeTree,
80
+ callback,
81
+ );
82
+
83
+ expect(decoder.write("&#5", 1)).toBe(-1);
84
+ expect(decoder.write("8;", 0)).toBe(5);
85
+
86
+ expect(callback).toHaveBeenCalledTimes(1);
87
+ expect(callback).toHaveBeenCalledWith(":".charCodeAt(0), 5);
88
+ });
89
+
90
+ it("should decode hex entities", () => {
91
+ const callback = vitest.fn();
92
+ const decoder = new entities.EntityDecoder(
93
+ entities.htmlDecodeTree,
94
+ callback,
95
+ );
96
+
97
+ expect(decoder.write("&#x3a;", 1)).toBe(6);
98
+
99
+ expect(callback).toHaveBeenCalledTimes(1);
100
+ expect(callback).toHaveBeenCalledWith(":".charCodeAt(0), 6);
101
+ });
102
+
103
+ it("should decode named entities", () => {
104
+ const callback = vitest.fn();
105
+ const decoder = new entities.EntityDecoder(
106
+ entities.htmlDecodeTree,
107
+ callback,
108
+ );
109
+
110
+ expect(decoder.write("&amp;", 1)).toBe(5);
111
+
112
+ expect(callback).toHaveBeenCalledTimes(1);
113
+ expect(callback).toHaveBeenCalledWith("&".charCodeAt(0), 5);
114
+ });
115
+
116
+ it("should decode legacy entities", () => {
117
+ const callback = vitest.fn();
118
+ const decoder = new entities.EntityDecoder(
119
+ entities.htmlDecodeTree,
120
+ callback,
121
+ );
122
+ decoder.startEntity(entities.DecodingMode.Legacy);
123
+
124
+ expect(decoder.write("&amp", 1)).toBe(-1);
125
+
126
+ expect(callback).toHaveBeenCalledTimes(0);
127
+
128
+ expect(decoder.end()).toBe(4);
129
+
130
+ expect(callback).toHaveBeenCalledTimes(1);
131
+ expect(callback).toHaveBeenCalledWith("&".charCodeAt(0), 4);
132
+ });
133
+
134
+ it("should decode named entity written character by character", () => {
135
+ const callback = vitest.fn();
136
+ const decoder = new entities.EntityDecoder(
137
+ entities.htmlDecodeTree,
138
+ callback,
139
+ );
140
+
141
+ for (const c of "amp") {
142
+ expect(decoder.write(c, 0)).toBe(-1);
143
+ }
144
+ expect(decoder.write(";", 0)).toBe(5);
145
+
146
+ expect(callback).toHaveBeenCalledTimes(1);
147
+ expect(callback).toHaveBeenCalledWith("&".charCodeAt(0), 5);
148
+ });
149
+
150
+ it("should decode numeric entity written character by character", () => {
151
+ const callback = vitest.fn();
152
+ const decoder = new entities.EntityDecoder(
153
+ entities.htmlDecodeTree,
154
+ callback,
155
+ );
156
+
157
+ for (const c of "#x3a") {
158
+ expect(decoder.write(c, 0)).toBe(-1);
159
+ }
160
+ expect(decoder.write(";", 0)).toBe(6);
161
+
162
+ expect(callback).toHaveBeenCalledTimes(1);
163
+ expect(callback).toHaveBeenCalledWith(":".charCodeAt(0), 6);
164
+ });
165
+
166
+ it("should decode hex entities across several chunks", () => {
167
+ const callback = vitest.fn();
168
+ const decoder = new entities.EntityDecoder(
169
+ entities.htmlDecodeTree,
170
+ callback,
171
+ );
172
+
173
+ for (const chunk of ["#x", "cf", "ff", "d"]) {
174
+ expect(decoder.write(chunk, 0)).toBe(-1);
175
+ }
176
+
177
+ expect(decoder.write(";", 0)).toBe(9);
178
+ expect(callback).toHaveBeenCalledTimes(1);
179
+ expect(callback).toHaveBeenCalledWith(0xc_ff_fd, 9);
180
+ });
181
+
182
+ it("should not fail if nothing is written", () => {
183
+ const callback = vitest.fn();
184
+ const decoder = new entities.EntityDecoder(
185
+ entities.htmlDecodeTree,
186
+ callback,
187
+ );
188
+
189
+ expect(decoder.end()).toBe(0);
190
+ expect(callback).toHaveBeenCalledTimes(0);
191
+ });
192
+
193
+ describe("errors", () => {
194
+ it("should produce an error for a named entity without a semicolon", () => {
195
+ const errorHandlers = {
196
+ missingSemicolonAfterCharacterReference: vitest.fn(),
197
+ absenceOfDigitsInNumericCharacterReference: vitest.fn(),
198
+ validateNumericCharacterReference: vitest.fn(),
199
+ };
200
+ const callback = vitest.fn();
201
+ const decoder = new entities.EntityDecoder(
202
+ entities.htmlDecodeTree,
203
+ callback,
204
+ errorHandlers,
205
+ );
206
+
207
+ decoder.startEntity(entities.DecodingMode.Legacy);
208
+ expect(decoder.write("&amp;", 1)).toBe(5);
209
+ expect(callback).toHaveBeenCalledTimes(1);
210
+ expect(callback).toHaveBeenCalledWith("&".charCodeAt(0), 5);
211
+ expect(
212
+ errorHandlers.missingSemicolonAfterCharacterReference,
213
+ ).toHaveBeenCalledTimes(0);
214
+
215
+ decoder.startEntity(entities.DecodingMode.Legacy);
216
+ expect(decoder.write("&amp", 1)).toBe(-1);
217
+ expect(decoder.end()).toBe(4);
218
+
219
+ expect(callback).toHaveBeenCalledTimes(2);
220
+ expect(callback).toHaveBeenLastCalledWith("&".charCodeAt(0), 4);
221
+ expect(
222
+ errorHandlers.missingSemicolonAfterCharacterReference,
223
+ ).toHaveBeenCalledTimes(1);
224
+ });
225
+
226
+ it("should produce an error for a numeric entity without a semicolon", () => {
227
+ const errorHandlers = {
228
+ missingSemicolonAfterCharacterReference: vitest.fn(),
229
+ absenceOfDigitsInNumericCharacterReference: vitest.fn(),
230
+ validateNumericCharacterReference: vitest.fn(),
231
+ };
232
+ const callback = vitest.fn();
233
+ const decoder = new entities.EntityDecoder(
234
+ entities.htmlDecodeTree,
235
+ callback,
236
+ errorHandlers,
237
+ );
238
+
239
+ decoder.startEntity(entities.DecodingMode.Legacy);
240
+ expect(decoder.write("&#x3a", 1)).toBe(-1);
241
+ expect(decoder.end()).toBe(5);
242
+
243
+ expect(callback).toHaveBeenCalledTimes(1);
244
+ expect(callback).toHaveBeenCalledWith(0x3a, 5);
245
+ expect(
246
+ errorHandlers.missingSemicolonAfterCharacterReference,
247
+ ).toHaveBeenCalledTimes(1);
248
+ expect(
249
+ errorHandlers.absenceOfDigitsInNumericCharacterReference,
250
+ ).toHaveBeenCalledTimes(0);
251
+ expect(
252
+ errorHandlers.validateNumericCharacterReference,
253
+ ).toHaveBeenCalledTimes(1);
254
+ expect(
255
+ errorHandlers.validateNumericCharacterReference,
256
+ ).toHaveBeenCalledWith(0x3a);
257
+ });
258
+
259
+ it("should produce an error for numeric entities without digits", () => {
260
+ const errorHandlers = {
261
+ missingSemicolonAfterCharacterReference: vitest.fn(),
262
+ absenceOfDigitsInNumericCharacterReference: vitest.fn(),
263
+ validateNumericCharacterReference: vitest.fn(),
264
+ };
265
+ const callback = vitest.fn();
266
+ const decoder = new entities.EntityDecoder(
267
+ entities.htmlDecodeTree,
268
+ callback,
269
+ errorHandlers,
270
+ );
271
+
272
+ decoder.startEntity(entities.DecodingMode.Legacy);
273
+ expect(decoder.write("&#", 1)).toBe(-1);
274
+ expect(decoder.end()).toBe(0);
275
+
276
+ expect(callback).toHaveBeenCalledTimes(0);
277
+ expect(
278
+ errorHandlers.missingSemicolonAfterCharacterReference,
279
+ ).toHaveBeenCalledTimes(0);
280
+ expect(
281
+ errorHandlers.absenceOfDigitsInNumericCharacterReference,
282
+ ).toHaveBeenCalledTimes(1);
283
+ expect(
284
+ errorHandlers.absenceOfDigitsInNumericCharacterReference,
285
+ ).toHaveBeenCalledWith(2);
286
+ expect(
287
+ errorHandlers.validateNumericCharacterReference,
288
+ ).toHaveBeenCalledTimes(0);
289
+ });
290
+
291
+ it("should produce an error for hex entities without digits", () => {
292
+ const errorHandlers = {
293
+ missingSemicolonAfterCharacterReference: vitest.fn(),
294
+ absenceOfDigitsInNumericCharacterReference: vitest.fn(),
295
+ validateNumericCharacterReference: vitest.fn(),
296
+ };
297
+ const callback = vitest.fn();
298
+ const decoder = new entities.EntityDecoder(
299
+ entities.htmlDecodeTree,
300
+ callback,
301
+ errorHandlers,
302
+ );
303
+
304
+ decoder.startEntity(entities.DecodingMode.Legacy);
305
+ expect(decoder.write("&#x", 1)).toBe(-1);
306
+ expect(decoder.end()).toBe(0);
307
+
308
+ expect(callback).toHaveBeenCalledTimes(0);
309
+ expect(
310
+ errorHandlers.missingSemicolonAfterCharacterReference,
311
+ ).toHaveBeenCalledTimes(0);
312
+ expect(
313
+ errorHandlers.absenceOfDigitsInNumericCharacterReference,
314
+ ).toHaveBeenCalledTimes(1);
315
+ expect(
316
+ errorHandlers.validateNumericCharacterReference,
317
+ ).toHaveBeenCalledTimes(0);
318
+ });
319
+ });
320
+ });