entities 2.2.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/lib/decode.d.ts +13 -4
  2. package/lib/decode.d.ts.map +1 -1
  3. package/lib/decode.js +135 -42
  4. package/lib/decode.js.map +1 -0
  5. package/lib/decode_codepoint.d.ts +1 -0
  6. package/lib/decode_codepoint.d.ts.map +1 -1
  7. package/lib/decode_codepoint.js +42 -12
  8. package/lib/decode_codepoint.js.map +1 -0
  9. package/lib/encode-trie.d.ts +8 -0
  10. package/lib/encode-trie.d.ts.map +1 -0
  11. package/lib/encode-trie.js +78 -0
  12. package/lib/encode-trie.js.map +1 -0
  13. package/lib/encode.d.ts +20 -7
  14. package/lib/encode.d.ts.map +1 -1
  15. package/lib/encode.js +82 -93
  16. package/lib/encode.js.map +1 -0
  17. package/lib/esm/decode.d.ts +14 -0
  18. package/lib/esm/decode.d.ts.map +1 -0
  19. package/lib/esm/decode.js +135 -0
  20. package/lib/esm/decode.js.map +1 -0
  21. package/lib/esm/decode_codepoint.d.ts +3 -0
  22. package/lib/esm/decode_codepoint.d.ts.map +1 -0
  23. package/lib/esm/decode_codepoint.js +55 -0
  24. package/lib/esm/decode_codepoint.js.map +1 -0
  25. package/lib/esm/encode-trie.d.ts +8 -0
  26. package/lib/esm/encode-trie.d.ts.map +1 -0
  27. package/lib/esm/encode-trie.js +67 -0
  28. package/lib/esm/encode-trie.js.map +1 -0
  29. package/lib/esm/encode.d.ts +60 -0
  30. package/lib/esm/encode.d.ts.map +1 -0
  31. package/lib/esm/encode.js +119 -0
  32. package/lib/esm/encode.js.map +1 -0
  33. package/lib/esm/generated/decode-data-html.d.ts +3 -0
  34. package/lib/esm/generated/decode-data-html.d.ts.map +1 -0
  35. package/lib/esm/generated/decode-data-html.js +4 -0
  36. package/lib/esm/generated/decode-data-html.js.map +1 -0
  37. package/lib/esm/generated/decode-data-xml.d.ts +3 -0
  38. package/lib/esm/generated/decode-data-xml.d.ts.map +1 -0
  39. package/lib/esm/generated/decode-data-xml.js +4 -0
  40. package/lib/esm/generated/decode-data-xml.js.map +1 -0
  41. package/lib/esm/index.d.ts +101 -0
  42. package/lib/esm/index.d.ts.map +1 -0
  43. package/lib/esm/index.js +111 -0
  44. package/lib/esm/index.js.map +1 -0
  45. package/lib/esm/maps/entities-encode.json +1 -0
  46. package/lib/esm/package.json +1 -0
  47. package/lib/generated/decode-data-html.d.ts +3 -0
  48. package/lib/generated/decode-data-html.d.ts.map +1 -0
  49. package/lib/generated/decode-data-html.js +6 -0
  50. package/lib/generated/decode-data-html.js.map +1 -0
  51. package/lib/generated/decode-data-xml.d.ts +3 -0
  52. package/lib/generated/decode-data-xml.d.ts.map +1 -0
  53. package/lib/generated/decode-data-xml.js +6 -0
  54. package/lib/generated/decode-data-xml.js.map +1 -0
  55. package/lib/index.d.ts +85 -11
  56. package/lib/index.d.ts.map +1 -1
  57. package/lib/index.js +110 -32
  58. package/lib/index.js.map +1 -0
  59. package/lib/maps/entities-encode.json +1 -0
  60. package/package.json +41 -17
  61. package/readme.md +79 -13
  62. package/lib/maps/decode.json +0 -1
  63. package/lib/maps/entities.json +0 -1
  64. package/lib/maps/legacy.json +0 -1
  65. package/lib/maps/xml.json +0 -1
package/lib/encode.js CHANGED
@@ -1,12 +1,16 @@
1
1
  "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
2
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.escapeUTF8 = exports.escape = exports.encodeNonAsciiHTML = exports.encodeHTML = exports.encodeXML = void 0;
7
- var xml_json_1 = __importDefault(require("./maps/xml.json"));
8
- var inverseXML = getInverseObj(xml_json_1.default);
9
- var xmlReplacer = getInverseReplacer(inverseXML);
3
+ exports.escapeText = exports.escapeAttribute = exports.escapeUTF8 = exports.escape = exports.encodeNonAsciiHTML = exports.encodeHTML = exports.encodeXML = void 0;
4
+ var encode_trie_js_1 = require("./encode-trie.js");
5
+ var htmlReplacer = /[\t\n!-,./:-@[-`\f{-}$\x80-\uFFFF]/g;
6
+ var xmlReplacer = /["&'<>$\x80-\uFFFF]/g;
7
+ var xmlCodeMap = new Map([
8
+ [34, "&quot;"],
9
+ [38, "&amp;"],
10
+ [39, "&apos;"],
11
+ [60, "&lt;"],
12
+ [62, "&gt;"],
13
+ ]);
10
14
  /**
11
15
  * Encodes all non-ASCII characters, as well as characters not valid in XML
12
16
  * documents using XML entities.
@@ -14,10 +18,27 @@ var xmlReplacer = getInverseReplacer(inverseXML);
14
18
  * If a character has no equivalent entity, a
15
19
  * numeric hexadecimal reference (eg. `&#xfc;`) will be used.
16
20
  */
17
- exports.encodeXML = getASCIIEncoder(inverseXML);
18
- var entities_json_1 = __importDefault(require("./maps/entities.json"));
19
- var inverseHTML = getInverseObj(entities_json_1.default);
20
- var htmlReplacer = getInverseReplacer(inverseHTML);
21
+ function encodeXML(str) {
22
+ var ret = "";
23
+ var lastIdx = 0;
24
+ var match;
25
+ while ((match = xmlReplacer.exec(str)) !== null) {
26
+ var i = match.index;
27
+ var char = str.charCodeAt(i);
28
+ var next = xmlCodeMap.get(char);
29
+ if (next) {
30
+ ret += str.substring(lastIdx, i) + next;
31
+ lastIdx = i + 1;
32
+ }
33
+ else {
34
+ ret += "".concat(str.substring(lastIdx, i), "&#x").concat((0, encode_trie_js_1.getCodePoint)(str, i).toString(16), ";");
35
+ // Increase by 1 if we have a surrogate pair
36
+ lastIdx = xmlReplacer.lastIndex += Number((char & 65408) === 0xd800);
37
+ }
38
+ }
39
+ return ret + str.substr(lastIdx);
40
+ }
41
+ exports.encodeXML = encodeXML;
21
42
  /**
22
43
  * Encodes all entities and non-ASCII characters in the input.
23
44
  *
@@ -28,7 +49,10 @@ var htmlReplacer = getInverseReplacer(inverseHTML);
28
49
  * If a character has no equivalent entity, a
29
50
  * numeric hexadecimal reference (eg. `&#xfc;`) will be used.
30
51
  */
31
- exports.encodeHTML = getInverse(inverseHTML, htmlReplacer);
52
+ function encodeHTML(data) {
53
+ return (0, encode_trie_js_1.encodeHTMLTrieRe)(htmlReplacer, data);
54
+ }
55
+ exports.encodeHTML = encodeHTML;
32
56
  /**
33
57
  * Encodes all non-ASCII characters, as well as characters not valid in HTML
34
58
  * documents using HTML entities.
@@ -36,74 +60,10 @@ exports.encodeHTML = getInverse(inverseHTML, htmlReplacer);
36
60
  * If a character has no equivalent entity, a
37
61
  * numeric hexadecimal reference (eg. `&#xfc;`) will be used.
38
62
  */
39
- exports.encodeNonAsciiHTML = getASCIIEncoder(inverseHTML);
40
- function getInverseObj(obj) {
41
- return Object.keys(obj)
42
- .sort()
43
- .reduce(function (inverse, name) {
44
- inverse[obj[name]] = "&" + name + ";";
45
- return inverse;
46
- }, {});
63
+ function encodeNonAsciiHTML(data) {
64
+ return (0, encode_trie_js_1.encodeHTMLTrieRe)(xmlReplacer, data);
47
65
  }
48
- function getInverseReplacer(inverse) {
49
- var single = [];
50
- var multiple = [];
51
- for (var _i = 0, _a = Object.keys(inverse); _i < _a.length; _i++) {
52
- var k = _a[_i];
53
- if (k.length === 1) {
54
- // Add value to single array
55
- single.push("\\" + k);
56
- }
57
- else {
58
- // Add value to multiple array
59
- multiple.push(k);
60
- }
61
- }
62
- // Add ranges to single characters.
63
- single.sort();
64
- for (var start = 0; start < single.length - 1; start++) {
65
- // Find the end of a run of characters
66
- var end = start;
67
- while (end < single.length - 1 &&
68
- single[end].charCodeAt(1) + 1 === single[end + 1].charCodeAt(1)) {
69
- end += 1;
70
- }
71
- var count = 1 + end - start;
72
- // We want to replace at least three characters
73
- if (count < 3)
74
- continue;
75
- single.splice(start, count, single[start] + "-" + single[end]);
76
- }
77
- multiple.unshift("[" + single.join("") + "]");
78
- return new RegExp(multiple.join("|"), "g");
79
- }
80
- // /[^\0-\x7F]/gu
81
- var reNonASCII = /(?:[\x80-\uD7FF\uE000-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF])/g;
82
- var getCodePoint =
83
- // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
84
- String.prototype.codePointAt != null
85
- ? // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
86
- function (str) { return str.codePointAt(0); }
87
- : // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
88
- function (c) {
89
- return (c.charCodeAt(0) - 0xd800) * 0x400 +
90
- c.charCodeAt(1) -
91
- 0xdc00 +
92
- 0x10000;
93
- };
94
- function singleCharReplacer(c) {
95
- return "&#x" + (c.length > 1 ? getCodePoint(c) : c.charCodeAt(0))
96
- .toString(16)
97
- .toUpperCase() + ";";
98
- }
99
- function getInverse(inverse, re) {
100
- return function (data) {
101
- return data
102
- .replace(re, function (name) { return inverse[name]; })
103
- .replace(reNonASCII, singleCharReplacer);
104
- };
105
- }
106
- var reEscapeChars = new RegExp(xmlReplacer.source + "|" + reNonASCII.source, "g");
66
+ exports.encodeNonAsciiHTML = encodeNonAsciiHTML;
107
67
  /**
108
68
  * Encodes all non-ASCII characters, as well as characters not valid in XML
109
69
  * documents using numeric hexadecimal reference (eg. `&#xfc;`).
@@ -113,24 +73,53 @@ var reEscapeChars = new RegExp(xmlReplacer.source + "|" + reNonASCII.source, "g"
113
73
  *
114
74
  * @param data String to escape.
115
75
  */
116
- function escape(data) {
117
- return data.replace(reEscapeChars, singleCharReplacer);
76
+ exports.escape = encodeXML;
77
+ function getEscaper(regex, map) {
78
+ return function escape(data) {
79
+ var match;
80
+ var lastIdx = 0;
81
+ var result = "";
82
+ while ((match = regex.exec(data))) {
83
+ if (lastIdx !== match.index) {
84
+ result += data.substring(lastIdx, match.index);
85
+ }
86
+ // We know that this chararcter will be in the map.
87
+ result += map.get(match[0].charCodeAt(0));
88
+ // Every match will be of length 1
89
+ lastIdx = match.index + 1;
90
+ }
91
+ return result + data.substring(lastIdx);
92
+ };
118
93
  }
119
- exports.escape = escape;
120
94
  /**
121
- * Encodes all characters not valid in XML documents using numeric hexadecimal
122
- * reference (eg. `&#xfc;`).
95
+ * Encodes all characters not valid in XML documents using XML entities.
123
96
  *
124
97
  * Note that the output will be character-set dependent.
125
98
  *
126
99
  * @param data String to escape.
127
100
  */
128
- function escapeUTF8(data) {
129
- return data.replace(xmlReplacer, singleCharReplacer);
130
- }
131
- exports.escapeUTF8 = escapeUTF8;
132
- function getASCIIEncoder(obj) {
133
- return function (data) {
134
- return data.replace(reEscapeChars, function (c) { return obj[c] || singleCharReplacer(c); });
135
- };
136
- }
101
+ exports.escapeUTF8 = getEscaper(/[&<>'"]/g, xmlCodeMap);
102
+ /**
103
+ * Encodes all characters that have to be escaped in HTML attributes,
104
+ * following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
105
+ *
106
+ * @param data String to escape.
107
+ */
108
+ exports.escapeAttribute = getEscaper(/["&\u00A0]/g, new Map([
109
+ [34, "&quot;"],
110
+ [38, "&amp;"],
111
+ [160, "&nbsp;"],
112
+ ]));
113
+ /**
114
+ * Encodes all characters that have to be escaped in HTML text,
115
+ * following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
116
+ *
117
+ * @param data String to escape.
118
+ */
119
+ exports.escapeText = getEscaper(/[&<>\u00A0]/g, new Map([
120
+ [38, "&amp;"],
121
+ [60, "&lt;"],
122
+ [62, "&gt;"],
123
+ [160, "&nbsp;"],
124
+ ]));
125
+ //# sourceMappingURL=encode.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"encode.js","sourceRoot":"","sources":["../src/encode.ts"],"names":[],"mappings":";;;AAAA,mDAAkE;AAElE,IAAM,YAAY,GAAG,qCAAqC,CAAC;AAC3D,IAAM,WAAW,GAAG,sBAAsB,CAAC;AAE3C,IAAM,UAAU,GAAG,IAAI,GAAG,CAAC;IACvB,CAAC,EAAE,EAAE,QAAQ,CAAC;IACd,CAAC,EAAE,EAAE,OAAO,CAAC;IACb,CAAC,EAAE,EAAE,QAAQ,CAAC;IACd,CAAC,EAAE,EAAE,MAAM,CAAC;IACZ,CAAC,EAAE,EAAE,MAAM,CAAC;CACf,CAAC,CAAC;AAEH;;;;;;GAMG;AACH,SAAgB,SAAS,CAAC,GAAW;IACjC,IAAI,GAAG,GAAG,EAAE,CAAC;IACb,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,KAAK,CAAC;IAEV,OAAO,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE;QAC7C,IAAM,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC;QACtB,IAAM,IAAI,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QAC/B,IAAM,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAElC,IAAI,IAAI,EAAE;YACN,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC;YACxC,OAAO,GAAG,CAAC,GAAG,CAAC,CAAC;SACnB;aAAM;YACH,GAAG,IAAI,UAAG,GAAG,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC,CAAC,gBAAM,IAAA,6BAAY,EACjD,GAAG,EACH,CAAC,CACJ,CAAC,QAAQ,CAAC,EAAE,CAAC,MAAG,CAAC;YAClB,4CAA4C;YAC5C,OAAO,GAAG,WAAW,CAAC,SAAS,IAAI,MAAM,CACrC,CAAC,IAAI,GAAG,KAAqB,CAAC,KAAK,MAAM,CAC5C,CAAC;SACL;KACJ;IAED,OAAO,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;AACrC,CAAC;AA1BD,8BA0BC;AAED;;;;;;;;;GASG;AACH,SAAgB,UAAU,CAAC,IAAY;IACnC,OAAO,IAAA,iCAAgB,EAAC,YAAY,EAAE,IAAI,CAAC,CAAC;AAChD,CAAC;AAFD,gCAEC;AACD;;;;;;GAMG;AACH,SAAgB,kBAAkB,CAAC,IAAY;IAC3C,OAAO,IAAA,iCAAgB,EAAC,WAAW,EAAE,IAAI,CAAC,CAAC;AAC/C,CAAC;AAFD,gDAEC;AAED;;;;;;;;GAQG;AACU,QAAA,MAAM,GAAG,SAAS,CAAC;AAEhC,SAAS,UAAU,CACf,KAAa,EACb,GAAwB;IAExB,OAAO,SAAS,MAAM,CAAC,IAAY;QAC/B,IAAI,KAAK,CAAC;QACV,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,IAAI,MAAM,GAAG,EAAE,CAAC;QAEhB,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE;YAC/B,IAAI,OAAO,KAAK,KAAK,CAAC,KAAK,EAAE;gBACzB,MAAM,IAAI,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC;aAClD;YAED,mDAAmD;YACnD,MAAM,IAAI,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAE,CAAC;YAE3C,kCAAkC;YAClC,OAAO,GAAG,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC;SAC7B;QAED,OAAO,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;IAC5C,CAAC,CAAC;AACN,CAAC;AAED;;;;;;GAMG;AACU,QAAA,UAAU,GAAG,UAAU,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;AAE7D;;;;;GAKG;AACU,QAAA,eAAe,GAAG,UAAU,CACrC,aAAa,EACb,IAAI,GAAG,CAAC;IACJ,CAAC,EAAE,EAAE,QAAQ,CAAC;IACd,CAAC,EAAE,EAAE,OAAO,CAAC;IACb,CAAC,GAAG,EAAE,QAAQ,CAAC;CAClB,CAAC,CACL,CAAC;AAEF;;;;;GAKG;AACU,QAAA,UAAU,GAAG,UAAU,CAChC,cAAc,EACd,IAAI,GAAG,CAAC;IACJ,CAAC,EAAE,EAAE,OAAO,CAAC;IACb,CAAC,EAAE,EAAE,MAAM,CAAC;IACZ,CAAC,EAAE,EAAE,MAAM,CAAC;IACZ,CAAC,GAAG,EAAE,QAAQ,CAAC;CAClB,CAAC,CACL,CAAC"}
@@ -0,0 +1,14 @@
1
+ import htmlDecodeTree from "./generated/decode-data-html.js";
2
+ import xmlDecodeTree from "./generated/decode-data-xml.js";
3
+ import decodeCodePoint from "./decode_codepoint.js";
4
+ export { htmlDecodeTree, xmlDecodeTree, decodeCodePoint };
5
+ export declare enum BinTrieFlags {
6
+ VALUE_LENGTH = 49152,
7
+ BRANCH_LENGTH = 16256,
8
+ JUMP_TABLE = 127
9
+ }
10
+ export declare function determineBranch(decodeTree: Uint16Array, current: number, nodeIdx: number, char: number): number;
11
+ export declare function decodeHTML(str: string): string;
12
+ export declare function decodeHTMLStrict(str: string): string;
13
+ export declare function decodeXML(str: string): string;
14
+ //# sourceMappingURL=decode.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"decode.d.ts","sourceRoot":"","sources":["../../src/decode.ts"],"names":[],"mappings":"AAAA,OAAO,cAAc,MAAM,iCAAiC,CAAC;AAC7D,OAAO,aAAa,MAAM,gCAAgC,CAAC;AAC3D,OAAO,eAAe,MAAM,uBAAuB,CAAC;AAGpD,OAAO,EAAE,cAAc,EAAE,aAAa,EAAE,eAAe,EAAE,CAAC;AAc1D,oBAAY,YAAY;IACpB,YAAY,QAAwB;IACpC,aAAa,QAAwB;IACrC,UAAU,MAAwB;CACrC;AA8GD,wBAAgB,eAAe,CAC3B,UAAU,EAAE,WAAW,EACvB,OAAO,EAAE,MAAM,EACf,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,MAAM,GACb,MAAM,CAsCR;AAKD,wBAAgB,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAE9C;AAED,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAEpD;AAED,wBAAgB,SAAS,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAE7C"}
@@ -0,0 +1,135 @@
1
+ import htmlDecodeTree from "./generated/decode-data-html.js";
2
+ import xmlDecodeTree from "./generated/decode-data-xml.js";
3
+ import decodeCodePoint from "./decode_codepoint.js";
4
+ // Re-export for use by eg. htmlparser2
5
+ export { htmlDecodeTree, xmlDecodeTree, decodeCodePoint };
6
+ export var BinTrieFlags;
7
+ (function (BinTrieFlags) {
8
+ BinTrieFlags[BinTrieFlags["VALUE_LENGTH"] = 49152] = "VALUE_LENGTH";
9
+ BinTrieFlags[BinTrieFlags["BRANCH_LENGTH"] = 16256] = "BRANCH_LENGTH";
10
+ BinTrieFlags[BinTrieFlags["JUMP_TABLE"] = 127] = "JUMP_TABLE";
11
+ })(BinTrieFlags || (BinTrieFlags = {}));
12
+ function getDecoder(decodeTree) {
13
+ return function decodeHTMLBinary(str, strict) {
14
+ let ret = "";
15
+ let lastIdx = 0;
16
+ let strIdx = 0;
17
+ while ((strIdx = str.indexOf("&", strIdx)) >= 0) {
18
+ ret += str.slice(lastIdx, strIdx);
19
+ lastIdx = strIdx;
20
+ // Skip the "&"
21
+ strIdx += 1;
22
+ // If we have a numeric entity, handle this separately.
23
+ if (str.charCodeAt(strIdx) === 35 /* NUM */) {
24
+ // Skip the leading "&#". For hex entities, also skip the leading "x".
25
+ let start = strIdx + 1;
26
+ let base = 10;
27
+ let cp = str.charCodeAt(start);
28
+ if ((cp | 32 /* To_LOWER_BIT */) === 120 /* LOWER_X */) {
29
+ base = 16;
30
+ strIdx += 1;
31
+ start += 1;
32
+ }
33
+ do
34
+ cp = str.charCodeAt(++strIdx);
35
+ while ((cp >= 48 /* ZERO */ && cp <= 57 /* NINE */) ||
36
+ (base === 16 &&
37
+ (cp | 32 /* To_LOWER_BIT */) >= 97 /* LOWER_A */ &&
38
+ (cp | 32 /* To_LOWER_BIT */) <= 102 /* LOWER_F */));
39
+ if (start !== strIdx) {
40
+ const entity = str.substring(start, strIdx);
41
+ const parsed = parseInt(entity, base);
42
+ if (str.charCodeAt(strIdx) === 59 /* SEMI */) {
43
+ strIdx += 1;
44
+ }
45
+ else if (strict) {
46
+ continue;
47
+ }
48
+ ret += decodeCodePoint(parsed);
49
+ lastIdx = strIdx;
50
+ }
51
+ continue;
52
+ }
53
+ let resultIdx = 0;
54
+ let excess = 1;
55
+ let treeIdx = 0;
56
+ let current = decodeTree[treeIdx];
57
+ for (; strIdx < str.length; strIdx++, excess++) {
58
+ treeIdx = determineBranch(decodeTree, current, treeIdx + 1, str.charCodeAt(strIdx));
59
+ if (treeIdx < 0)
60
+ break;
61
+ current = decodeTree[treeIdx];
62
+ const masked = current & BinTrieFlags.VALUE_LENGTH;
63
+ // If the branch is a value, store it and continue
64
+ if (masked) {
65
+ // If we have a legacy entity while parsing strictly, just skip the number of bytes
66
+ if (!strict || str.charCodeAt(strIdx) === 59 /* SEMI */) {
67
+ resultIdx = treeIdx;
68
+ excess = 0;
69
+ }
70
+ // The mask is the number of bytes of the value, including the current byte.
71
+ const valueLength = (masked >> 14) - 1;
72
+ if (valueLength === 0)
73
+ break;
74
+ treeIdx += valueLength;
75
+ }
76
+ }
77
+ if (resultIdx !== 0) {
78
+ const valueLength = (decodeTree[resultIdx] & BinTrieFlags.VALUE_LENGTH) >> 14;
79
+ ret +=
80
+ valueLength === 1
81
+ ? String.fromCharCode(decodeTree[resultIdx] & ~BinTrieFlags.VALUE_LENGTH)
82
+ : valueLength === 2
83
+ ? String.fromCharCode(decodeTree[resultIdx + 1])
84
+ : String.fromCharCode(decodeTree[resultIdx + 1], decodeTree[resultIdx + 2]);
85
+ lastIdx = strIdx - excess + 1;
86
+ }
87
+ }
88
+ return ret + str.slice(lastIdx);
89
+ };
90
+ }
91
+ export function determineBranch(decodeTree, current, nodeIdx, char) {
92
+ const branchCount = (current & BinTrieFlags.BRANCH_LENGTH) >> 7;
93
+ const jumpOffset = current & BinTrieFlags.JUMP_TABLE;
94
+ // Case 1: Single branch encoded in jump offset
95
+ if (branchCount === 0) {
96
+ return jumpOffset !== 0 && char === jumpOffset ? nodeIdx : -1;
97
+ }
98
+ // Case 2: Multiple branches encoded in jump table
99
+ if (jumpOffset) {
100
+ const value = char - jumpOffset;
101
+ return value < 0 || value > branchCount
102
+ ? -1
103
+ : decodeTree[nodeIdx + value] - 1;
104
+ }
105
+ // Case 3: Multiple branches encoded in dictionary
106
+ // Binary search for the character.
107
+ let lo = nodeIdx;
108
+ let hi = lo + branchCount - 1;
109
+ while (lo <= hi) {
110
+ const mid = (lo + hi) >>> 1;
111
+ const midVal = decodeTree[mid];
112
+ if (midVal < char) {
113
+ lo = mid + 1;
114
+ }
115
+ else if (midVal > char) {
116
+ hi = mid - 1;
117
+ }
118
+ else {
119
+ return decodeTree[mid + branchCount];
120
+ }
121
+ }
122
+ return -1;
123
+ }
124
+ const htmlDecoder = getDecoder(htmlDecodeTree);
125
+ const xmlDecoder = getDecoder(xmlDecodeTree);
126
+ export function decodeHTML(str) {
127
+ return htmlDecoder(str, false);
128
+ }
129
+ export function decodeHTMLStrict(str) {
130
+ return htmlDecoder(str, true);
131
+ }
132
+ export function decodeXML(str) {
133
+ return xmlDecoder(str, true);
134
+ }
135
+ //# sourceMappingURL=decode.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"decode.js","sourceRoot":"","sources":["../../src/decode.ts"],"names":[],"mappings":"AAAA,OAAO,cAAc,MAAM,iCAAiC,CAAC;AAC7D,OAAO,aAAa,MAAM,gCAAgC,CAAC;AAC3D,OAAO,eAAe,MAAM,uBAAuB,CAAC;AAEpD,uCAAuC;AACvC,OAAO,EAAE,cAAc,EAAE,aAAa,EAAE,eAAe,EAAE,CAAC;AAc1D,MAAM,CAAN,IAAY,YAIX;AAJD,WAAY,YAAY;IACpB,mEAAoC,CAAA;IACpC,qEAAqC,CAAA;IACrC,6DAAkC,CAAA;AACtC,CAAC,EAJW,YAAY,KAAZ,YAAY,QAIvB;AAED,SAAS,UAAU,CAAC,UAAuB;IACvC,OAAO,SAAS,gBAAgB,CAAC,GAAW,EAAE,MAAe;QACzD,IAAI,GAAG,GAAG,EAAE,CAAC;QACb,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,IAAI,MAAM,GAAG,CAAC,CAAC;QAEf,OAAO,CAAC,MAAM,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,EAAE;YAC7C,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;YAClC,OAAO,GAAG,MAAM,CAAC;YACjB,eAAe;YACf,MAAM,IAAI,CAAC,CAAC;YAEZ,uDAAuD;YACvD,IAAI,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,iBAAkB,EAAE;gBAC1C,sEAAsE;gBACtE,IAAI,KAAK,GAAG,MAAM,GAAG,CAAC,CAAC;gBACvB,IAAI,IAAI,GAAG,EAAE,CAAC;gBAEd,IAAI,EAAE,GAAG,GAAG,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;gBAC/B,IAAI,CAAC,EAAE,wBAAyB,CAAC,sBAAsB,EAAE;oBACrD,IAAI,GAAG,EAAE,CAAC;oBACV,MAAM,IAAI,CAAC,CAAC;oBACZ,KAAK,IAAI,CAAC,CAAC;iBACd;gBAED;oBAAG,EAAE,GAAG,GAAG,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC,CAAC;uBAE7B,CAAC,EAAE,iBAAkB,IAAI,EAAE,iBAAkB,CAAC;oBAC9C,CAAC,IAAI,KAAK,EAAE;wBACR,CAAC,EAAE,wBAAyB,CAAC,oBAAqB;wBAClD,CAAC,EAAE,wBAAyB,CAAC,qBAAqB,CAAC,EACzD;gBAEF,IAAI,KAAK,KAAK,MAAM,EAAE;oBAClB,MAAM,MAAM,GAAG,GAAG,CAAC,SAAS,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;oBAC5C,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;oBAEtC,IAAI,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,kBAAmB,EAAE;wBAC3C,MAAM,IAAI,CAAC,CAAC;qBACf;yBAAM,IAAI,MAAM,EAAE;wBACf,SAAS;qBACZ;oBAED,GAAG,IAAI,eAAe,CAAC,MAAM,CAAC,CAAC;oBAC/B,OAAO,GAAG,MAAM,CAAC;iBACpB;gBAED,SAAS;aACZ;YAED,IAAI,SAAS,GAAG,CAAC,CAAC;YAClB,IAAI,MAAM,GAAG,CAAC,CAAC;YACf,IAAI,OAAO,GAAG,CAAC,CAAC;YAChB,IAAI,OAAO,GAAG,UAAU,CAAC,OAAO,CAAC,CAAC;YAElC,OAAO,MAAM,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,MAAM,EAAE,EAAE;gBAC5C,OAAO,GAAG,eAAe,CACrB,UAAU,EACV,OAAO,EACP,OAAO,GAAG,CAAC,EACX,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,CACzB,CAAC;gBAEF,IAAI,OAAO,GAAG,CAAC;oBAAE,MAAM;gBAEvB,OAAO,GAAG,UAAU,CAAC,OAAO,CAAC,CAAC;gBAE9B,MAAM,MAAM,GAAG,OAAO,GAAG,YAAY,CAAC,YAAY,CAAC;gBAEnD,kDAAkD;gBAClD,IAAI,MAAM,EAAE;oBACR,mFAAmF;oBACnF,IAAI,CAAC,MAAM,IAAI,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,kBAAmB,EAAE;wBACtD,SAAS,GAAG,OAAO,CAAC;wBACpB,MAAM,GAAG,CAAC,CAAC;qBACd;oBAED,4EAA4E;oBAC5E,MAAM,WAAW,GAAG,CAAC,MAAM,IAAI,EAAE,CAAC,GAAG,CAAC,CAAC;oBAEvC,IAAI,WAAW,KAAK,CAAC;wBAAE,MAAM;oBAE7B,OAAO,IAAI,WAAW,CAAC;iBAC1B;aACJ;YAED,IAAI,SAAS,KAAK,CAAC,EAAE;gBACjB,MAAM,WAAW,GACb,CAAC,UAAU,CAAC,SAAS,CAAC,GAAG,YAAY,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC;gBAC9D,GAAG;oBACC,WAAW,KAAK,CAAC;wBACb,CAAC,CAAC,MAAM,CAAC,YAAY,CACf,UAAU,CAAC,SAAS,CAAC,GAAG,CAAC,YAAY,CAAC,YAAY,CACrD;wBACH,CAAC,CAAC,WAAW,KAAK,CAAC;4BACnB,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC,UAAU,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;4BAChD,CAAC,CAAC,MAAM,CAAC,YAAY,CACf,UAAU,CAAC,SAAS,GAAG,CAAC,CAAC,EACzB,UAAU,CAAC,SAAS,GAAG,CAAC,CAAC,CAC5B,CAAC;gBACZ,OAAO,GAAG,MAAM,GAAG,MAAM,GAAG,CAAC,CAAC;aACjC;SACJ;QAED,OAAO,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACpC,CAAC,CAAC;AACN,CAAC;AAED,MAAM,UAAU,eAAe,CAC3B,UAAuB,EACvB,OAAe,EACf,OAAe,EACf,IAAY;IAEZ,MAAM,WAAW,GAAG,CAAC,OAAO,GAAG,YAAY,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;IAChE,MAAM,UAAU,GAAG,OAAO,GAAG,YAAY,CAAC,UAAU,CAAC;IAErD,+CAA+C;IAC/C,IAAI,WAAW,KAAK,CAAC,EAAE;QACnB,OAAO,UAAU,KAAK,CAAC,IAAI,IAAI,KAAK,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;KACjE;IAED,kDAAkD;IAClD,IAAI,UAAU,EAAE;QACZ,MAAM,KAAK,GAAG,IAAI,GAAG,UAAU,CAAC;QAEhC,OAAO,KAAK,GAAG,CAAC,IAAI,KAAK,GAAG,WAAW;YACnC,CAAC,CAAC,CAAC,CAAC;YACJ,CAAC,CAAC,UAAU,CAAC,OAAO,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC;KACzC;IAED,kDAAkD;IAElD,mCAAmC;IACnC,IAAI,EAAE,GAAG,OAAO,CAAC;IACjB,IAAI,EAAE,GAAG,EAAE,GAAG,WAAW,GAAG,CAAC,CAAC;IAE9B,OAAO,EAAE,IAAI,EAAE,EAAE;QACb,MAAM,GAAG,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC;QAC5B,MAAM,MAAM,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC;QAE/B,IAAI,MAAM,GAAG,IAAI,EAAE;YACf,EAAE,GAAG,GAAG,GAAG,CAAC,CAAC;SAChB;aAAM,IAAI,MAAM,GAAG,IAAI,EAAE;YACtB,EAAE,GAAG,GAAG,GAAG,CAAC,CAAC;SAChB;aAAM;YACH,OAAO,UAAU,CAAC,GAAG,GAAG,WAAW,CAAC,CAAC;SACxC;KACJ;IAED,OAAO,CAAC,CAAC,CAAC;AACd,CAAC;AAED,MAAM,WAAW,GAAG,UAAU,CAAC,cAAc,CAAC,CAAC;AAC/C,MAAM,UAAU,GAAG,UAAU,CAAC,aAAa,CAAC,CAAC;AAE7C,MAAM,UAAU,UAAU,CAAC,GAAW;IAClC,OAAO,WAAW,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;AACnC,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,GAAW;IACxC,OAAO,WAAW,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;AAClC,CAAC;AAED,MAAM,UAAU,SAAS,CAAC,GAAW;IACjC,OAAO,UAAU,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;AACjC,CAAC"}
@@ -0,0 +1,3 @@
1
+ export declare function replaceCodePoint(codePoint: number): number;
2
+ export default function decodeCodePoint(codePoint: number): string;
3
+ //# sourceMappingURL=decode_codepoint.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"decode_codepoint.d.ts","sourceRoot":"","sources":["../../src/decode_codepoint.ts"],"names":[],"mappings":"AAmDA,wBAAgB,gBAAgB,CAAC,SAAS,EAAE,MAAM,UAMjD;AAED,MAAM,CAAC,OAAO,UAAU,eAAe,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CAEjE"}
@@ -0,0 +1,55 @@
1
+ // Adapted from https://github.com/mathiasbynens/he/blob/36afe179392226cf1b6ccdb16ebbb7a5a844d93a/src/he.js#L106-L134
2
+ const decodeMap = new Map([
3
+ [0, 65533],
4
+ [128, 8364],
5
+ [130, 8218],
6
+ [131, 402],
7
+ [132, 8222],
8
+ [133, 8230],
9
+ [134, 8224],
10
+ [135, 8225],
11
+ [136, 710],
12
+ [137, 8240],
13
+ [138, 352],
14
+ [139, 8249],
15
+ [140, 338],
16
+ [142, 381],
17
+ [145, 8216],
18
+ [146, 8217],
19
+ [147, 8220],
20
+ [148, 8221],
21
+ [149, 8226],
22
+ [150, 8211],
23
+ [151, 8212],
24
+ [152, 732],
25
+ [153, 8482],
26
+ [154, 353],
27
+ [155, 8250],
28
+ [156, 339],
29
+ [158, 382],
30
+ [159, 376],
31
+ ]);
32
+ const fromCodePoint =
33
+ // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, node/no-unsupported-features/es-builtins
34
+ String.fromCodePoint ||
35
+ function (codePoint) {
36
+ let output = "";
37
+ if (codePoint > 0xffff) {
38
+ codePoint -= 0x10000;
39
+ output += String.fromCharCode(((codePoint >>> 10) & 0x3ff) | 0xd800);
40
+ codePoint = 0xdc00 | (codePoint & 0x3ff);
41
+ }
42
+ output += String.fromCharCode(codePoint);
43
+ return output;
44
+ };
45
+ export function replaceCodePoint(codePoint) {
46
+ var _a;
47
+ if ((codePoint >= 0xd800 && codePoint <= 0xdfff) || codePoint > 0x10ffff) {
48
+ return 0xfffd;
49
+ }
50
+ return (_a = decodeMap.get(codePoint)) !== null && _a !== void 0 ? _a : codePoint;
51
+ }
52
+ export default function decodeCodePoint(codePoint) {
53
+ return fromCodePoint(replaceCodePoint(codePoint));
54
+ }
55
+ //# sourceMappingURL=decode_codepoint.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"decode_codepoint.js","sourceRoot":"","sources":["../../src/decode_codepoint.ts"],"names":[],"mappings":"AAAA,qHAAqH;AAErH,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC;IACtB,CAAC,CAAC,EAAE,KAAK,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,GAAG,CAAC;CACb,CAAC,CAAC;AAEH,MAAM,aAAa;AACf,iHAAiH;AACjH,MAAM,CAAC,aAAa;IACpB,UAAU,SAAiB;QACvB,IAAI,MAAM,GAAG,EAAE,CAAC;QAEhB,IAAI,SAAS,GAAG,MAAM,EAAE;YACpB,SAAS,IAAI,OAAO,CAAC;YACrB,MAAM,IAAI,MAAM,CAAC,YAAY,CACzB,CAAC,CAAC,SAAS,KAAK,EAAE,CAAC,GAAG,KAAK,CAAC,GAAG,MAAM,CACxC,CAAC;YACF,SAAS,GAAG,MAAM,GAAG,CAAC,SAAS,GAAG,KAAK,CAAC,CAAC;SAC5C;QAED,MAAM,IAAI,MAAM,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;QACzC,OAAO,MAAM,CAAC;IAClB,CAAC,CAAC;AAEN,MAAM,UAAU,gBAAgB,CAAC,SAAiB;;IAC9C,IAAI,CAAC,SAAS,IAAI,MAAM,IAAI,SAAS,IAAI,MAAM,CAAC,IAAI,SAAS,GAAG,QAAQ,EAAE;QACtE,OAAO,MAAM,CAAC;KACjB;IAED,OAAO,MAAA,SAAS,CAAC,GAAG,CAAC,SAAS,CAAC,mCAAI,SAAS,CAAC;AACjD,CAAC;AAED,MAAM,CAAC,OAAO,UAAU,eAAe,CAAC,SAAiB;IACrD,OAAO,aAAa,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC,CAAC;AACtD,CAAC"}
@@ -0,0 +1,8 @@
1
+ export declare const getCodePoint: (str: string, index: number) => number;
2
+ export declare function encodeHTMLTrieRe(regExp: RegExp, str: string): string;
3
+ export interface TrieNode {
4
+ value?: string;
5
+ next?: Map<number, TrieNode>;
6
+ }
7
+ export declare function getTrie(map: Record<string, string>): Map<number, TrieNode>;
8
+ //# sourceMappingURL=encode-trie.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"encode-trie.d.ts","sourceRoot":"","sources":["../../src/encode-trie.ts"],"names":[],"mappings":"AAYA,eAAO,MAAM,YAAY,QAGT,MAAM,SAAS,MAAM,KAAG,MAQD,CAAC;AAIxC,wBAAgB,gBAAgB,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,MAAM,CAkCpE;AAED,MAAM,WAAW,QAAQ;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,GAAG,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;CAChC;AAED,wBAAgB,OAAO,CAAC,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,QAAQ,CAAC,CAmB1E"}
@@ -0,0 +1,67 @@
1
+ import htmlMap from "../maps/entities-encode.json" assert {type:"json"};
2
+ function isHighSurrugate(c) {
3
+ return (c & 64512 /* Mask */) === 55296 /* High */;
4
+ }
5
+ // For compatibility with node < 4, we wrap `codePointAt`
6
+ export const getCodePoint =
7
+ // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
8
+ String.prototype.codePointAt != null
9
+ ? (str, index) => str.codePointAt(index)
10
+ : // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
11
+ (c, index) => isHighSurrugate(c.charCodeAt(index))
12
+ ? (c.charCodeAt(index) - 55296 /* High */) * 0x400 +
13
+ c.charCodeAt(index + 1) -
14
+ 0xdc00 +
15
+ 0x10000
16
+ : c.charCodeAt(index);
17
+ const htmlTrie = getTrie(htmlMap);
18
+ export function encodeHTMLTrieRe(regExp, str) {
19
+ var _a;
20
+ let ret = "";
21
+ let lastIdx = 0;
22
+ let match;
23
+ while ((match = regExp.exec(str)) !== null) {
24
+ const i = match.index;
25
+ const char = str.charCodeAt(i);
26
+ const next = htmlTrie.get(char);
27
+ if (next) {
28
+ if (next.next != null && i + 1 < str.length) {
29
+ const value = (_a = next.next.get(str.charCodeAt(i + 1))) === null || _a === void 0 ? void 0 : _a.value;
30
+ if (value != null) {
31
+ ret += str.substring(lastIdx, i) + value;
32
+ regExp.lastIndex += 1;
33
+ lastIdx = i + 2;
34
+ continue;
35
+ }
36
+ }
37
+ ret += str.substring(lastIdx, i) + next.value;
38
+ lastIdx = i + 1;
39
+ }
40
+ else {
41
+ ret += `${str.substring(lastIdx, i)}&#x${getCodePoint(str, i).toString(16)};`;
42
+ // Increase by 1 if we have a surrogate pair
43
+ lastIdx = regExp.lastIndex += Number(isHighSurrugate(char));
44
+ }
45
+ }
46
+ return ret + str.substr(lastIdx);
47
+ }
48
+ export function getTrie(map) {
49
+ var _a, _b, _c, _d;
50
+ const trie = new Map();
51
+ for (const decoded of Object.keys(map)) {
52
+ const entity = map[decoded];
53
+ // Resolve the key
54
+ let lastMap = trie;
55
+ for (let i = 0; i < decoded.length - 1; i++) {
56
+ const char = decoded.charCodeAt(i);
57
+ const next = (_a = lastMap.get(char)) !== null && _a !== void 0 ? _a : {};
58
+ lastMap.set(char, next);
59
+ lastMap = (_b = next.next) !== null && _b !== void 0 ? _b : (next.next = new Map());
60
+ }
61
+ const val = (_c = lastMap.get(decoded.charCodeAt(decoded.length - 1))) !== null && _c !== void 0 ? _c : {};
62
+ (_d = val.value) !== null && _d !== void 0 ? _d : (val.value = `&${entity};`);
63
+ lastMap.set(decoded.charCodeAt(decoded.length - 1), val);
64
+ }
65
+ return trie;
66
+ }
67
+ //# sourceMappingURL=encode-trie.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"encode-trie.js","sourceRoot":"","sources":["../../src/encode-trie.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,6BAA6B,CAAC;AAOlD,SAAS,eAAe,CAAC,CAAS;IAC9B,OAAO,CAAC,CAAC,mBAAiB,CAAC,qBAAmB,CAAC;AACnD,CAAC;AAED,yDAAyD;AACzD,MAAM,CAAC,MAAM,YAAY;AACrB,uEAAuE;AACvE,MAAM,CAAC,SAAS,CAAC,WAAW,IAAI,IAAI;IAChC,CAAC,CAAC,CAAC,GAAW,EAAE,KAAa,EAAU,EAAE,CAAC,GAAG,CAAC,WAAW,CAAC,KAAK,CAAE;IACjE,CAAC,CAAC,uEAAuE;QACvE,CAAC,CAAS,EAAE,KAAa,EAAU,EAAE,CACjC,eAAe,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;YAChC,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,mBAAiB,CAAC,GAAG,KAAK;gBAC9C,CAAC,CAAC,UAAU,CAAC,KAAK,GAAG,CAAC,CAAC;gBACvB,MAAM;gBACN,OAAO;YACT,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;AAExC,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;AAElC,MAAM,UAAU,gBAAgB,CAAC,MAAc,EAAE,GAAW;;IACxD,IAAI,GAAG,GAAG,EAAE,CAAC;IACb,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,KAAK,CAAC;IAEV,OAAO,CAAC,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE;QACxC,MAAM,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC;QACtB,MAAM,IAAI,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,IAAI,GAAG,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAEhC,IAAI,IAAI,EAAE;YACN,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE;gBACzC,MAAM,KAAK,GAAG,MAAA,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,0CAAE,KAAK,CAAC;gBAC1D,IAAI,KAAK,IAAI,IAAI,EAAE;oBACf,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,KAAK,CAAC;oBACzC,MAAM,CAAC,SAAS,IAAI,CAAC,CAAC;oBACtB,OAAO,GAAG,CAAC,GAAG,CAAC,CAAC;oBAChB,SAAS;iBACZ;aACJ;YAED,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC;YAC9C,OAAO,GAAG,CAAC,GAAG,CAAC,CAAC;SACnB;aAAM;YACH,GAAG,IAAI,GAAG,GAAG,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC,CAAC,MAAM,YAAY,CACjD,GAAG,EACH,CAAC,CACJ,CAAC,QAAQ,CAAC,EAAE,CAAC,GAAG,CAAC;YAClB,4CAA4C;YAC5C,OAAO,GAAG,MAAM,CAAC,SAAS,IAAI,MAAM,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC;SAC/D;KACJ;IAED,OAAO,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;AACrC,CAAC;AAOD,MAAM,UAAU,OAAO,CAAC,GAA2B;;IAC/C,MAAM,IAAI,GAAG,IAAI,GAAG,EAAoB,CAAC;IAEzC,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE;QACpC,MAAM,MAAM,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC;QAC5B,kBAAkB;QAClB,IAAI,OAAO,GAAG,IAAI,CAAC;QACnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE;YACzC,MAAM,IAAI,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YACnC,MAAM,IAAI,GAAG,MAAA,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,mCAAI,EAAE,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;YACxB,OAAO,SAAG,IAAI,CAAC,IAAI,oCAAT,IAAI,CAAC,IAAI,GAAK,IAAI,GAAG,EAAE,CAAA,CAAC;SACrC;QACD,MAAM,GAAG,GAAG,MAAA,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,mCAAI,EAAE,CAAC;QACtE,MAAA,GAAG,CAAC,KAAK,oCAAT,GAAG,CAAC,KAAK,GAAK,IAAI,MAAM,GAAG,EAAC;QAC5B,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;KAC5D;IAED,OAAO,IAAI,CAAC;AAChB,CAAC"}
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Encodes all non-ASCII characters, as well as characters not valid in XML
3
+ * documents using XML entities.
4
+ *
5
+ * If a character has no equivalent entity, a
6
+ * numeric hexadecimal reference (eg. `&#xfc;`) will be used.
7
+ */
8
+ export declare function encodeXML(str: string): string;
9
+ /**
10
+ * Encodes all entities and non-ASCII characters in the input.
11
+ *
12
+ * This includes characters that are valid ASCII characters in HTML documents.
13
+ * For example `#` will be encoded as `&num;`. To get a more compact output,
14
+ * consider using the `encodeNonAsciiHTML` function.
15
+ *
16
+ * If a character has no equivalent entity, a
17
+ * numeric hexadecimal reference (eg. `&#xfc;`) will be used.
18
+ */
19
+ export declare function encodeHTML(data: string): string;
20
+ /**
21
+ * Encodes all non-ASCII characters, as well as characters not valid in HTML
22
+ * documents using HTML entities.
23
+ *
24
+ * If a character has no equivalent entity, a
25
+ * numeric hexadecimal reference (eg. `&#xfc;`) will be used.
26
+ */
27
+ export declare function encodeNonAsciiHTML(data: string): string;
28
+ /**
29
+ * Encodes all non-ASCII characters, as well as characters not valid in XML
30
+ * documents using numeric hexadecimal reference (eg. `&#xfc;`).
31
+ *
32
+ * Have a look at `escapeUTF8` if you want a more concise output at the expense
33
+ * of reduced transportability.
34
+ *
35
+ * @param data String to escape.
36
+ */
37
+ export declare const escape: typeof encodeXML;
38
+ /**
39
+ * Encodes all characters not valid in XML documents using XML entities.
40
+ *
41
+ * Note that the output will be character-set dependent.
42
+ *
43
+ * @param data String to escape.
44
+ */
45
+ export declare const escapeUTF8: (data: string) => string;
46
+ /**
47
+ * Encodes all characters that have to be escaped in HTML attributes,
48
+ * following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
49
+ *
50
+ * @param data String to escape.
51
+ */
52
+ export declare const escapeAttribute: (data: string) => string;
53
+ /**
54
+ * Encodes all characters that have to be escaped in HTML text,
55
+ * following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
56
+ *
57
+ * @param data String to escape.
58
+ */
59
+ export declare const escapeText: (data: string) => string;
60
+ //# sourceMappingURL=encode.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"encode.d.ts","sourceRoot":"","sources":["../../src/encode.ts"],"names":[],"mappings":"AAaA;;;;;;GAMG;AACH,wBAAgB,SAAS,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CA0B7C;AAED;;;;;;;;;GASG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAE/C;AACD;;;;;;GAMG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEvD;AAED;;;;;;;;GAQG;AACH,eAAO,MAAM,MAAM,kBAAY,CAAC;AA2BhC;;;;;;GAMG;AACH,eAAO,MAAM,UAAU,SA7Bb,MAAM,KAAK,MA6BuC,CAAC;AAE7D;;;;;GAKG;AACH,eAAO,MAAM,eAAe,SArClB,MAAM,KAAK,MA4CpB,CAAC;AAEF;;;;;GAKG;AACH,eAAO,MAAM,UAAU,SApDb,MAAM,KAAK,MA4DpB,CAAC"}