entities 2.0.3 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/decode.d.ts CHANGED
@@ -1,7 +1,15 @@
1
- export declare const decodeXML: (str: string) => string;
2
- export declare const decodeHTMLStrict: (str: string) => string;
3
- export interface MapType {
4
- [key: string]: string;
1
+ import htmlDecodeTree from "./generated/decode-data-html";
2
+ import xmlDecodeTree from "./generated/decode-data-xml";
3
+ export { htmlDecodeTree, xmlDecodeTree };
4
+ export declare enum BinTrieFlags {
5
+ HAS_VALUE = 32768,
6
+ BRANCH_LENGTH = 32512,
7
+ MULTI_BYTE = 128,
8
+ JUMP_TABLE = 127
5
9
  }
6
- export declare const decodeHTML: (str: string) => string;
10
+ export declare const JUMP_OFFSET_BASE: number;
11
+ export declare function determineBranch(decodeTree: Uint16Array, current: number, nodeIdx: number, char: number): number;
12
+ export declare function decodeHTML(str: string): string;
13
+ export declare function decodeHTMLStrict(str: string): string;
14
+ export declare function decodeXML(str: string): string;
7
15
  //# sourceMappingURL=decode.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"decode.d.ts","sourceRoot":"","sources":["../src/decode.ts"],"names":[],"mappings":"AAKA,eAAO,MAAM,SAAS,QAeL,MAAM,WAf0B,CAAC;AAClD,eAAO,MAAM,gBAAgB,QAcZ,MAAM,WAdoC,CAAC;AAE5D,MAAM,WAAW,OAAO;IACpB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAAC;CACzB;AAeD,eAAO,MAAM,UAAU,QAyBN,MAAM,WACnB,CAAC"}
1
+ {"version":3,"file":"decode.d.ts","sourceRoot":"","sources":["../src/decode.ts"],"names":[],"mappings":"AAAA,OAAO,cAAc,MAAM,8BAA8B,CAAC;AAC1D,OAAO,aAAa,MAAM,6BAA6B,CAAC;AAIxD,OAAO,EAAE,cAAc,EAAE,aAAa,EAAE,CAAC;AAczC,oBAAY,YAAY;IACpB,SAAS,QAAwB;IACjC,aAAa,QAAwB;IACrC,UAAU,MAAwB;IAClC,UAAU,MAAwB;CACrC;AAED,eAAO,MAAM,gBAAgB,QAAqB,CAAC;AAmGnD,wBAAgB,eAAe,CAC3B,UAAU,EAAE,WAAW,EACvB,OAAO,EAAE,MAAM,EACf,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,MAAM,GACb,MAAM,CA0CR;AAKD,wBAAgB,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAE9C;AAED,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAEpD;AAED,wBAAgB,SAAS,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAE7C"}
package/lib/decode.js CHANGED
@@ -3,52 +3,143 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.decodeHTML = exports.decodeHTMLStrict = exports.decodeXML = void 0;
7
- var entities_json_1 = __importDefault(require("./maps/entities.json"));
8
- var legacy_json_1 = __importDefault(require("./maps/legacy.json"));
9
- var xml_json_1 = __importDefault(require("./maps/xml.json"));
6
+ exports.decodeXML = exports.decodeHTMLStrict = exports.decodeHTML = exports.determineBranch = exports.JUMP_OFFSET_BASE = exports.BinTrieFlags = exports.xmlDecodeTree = exports.htmlDecodeTree = void 0;
7
+ var decode_data_html_1 = __importDefault(require("./generated/decode-data-html"));
8
+ exports.htmlDecodeTree = decode_data_html_1.default;
9
+ var decode_data_xml_1 = __importDefault(require("./generated/decode-data-xml"));
10
+ exports.xmlDecodeTree = decode_data_xml_1.default;
10
11
  var decode_codepoint_1 = __importDefault(require("./decode_codepoint"));
11
- exports.decodeXML = getStrictDecoder(xml_json_1.default);
12
- exports.decodeHTMLStrict = getStrictDecoder(entities_json_1.default);
13
- function getStrictDecoder(map) {
14
- var keys = Object.keys(map).join("|");
15
- var replace = getReplacer(map);
16
- keys += "|#[xX][\\da-fA-F]+|#\\d+";
17
- var re = new RegExp("&(?:" + keys + ");", "g");
18
- return function (str) { return String(str).replace(re, replace); };
12
+ var BinTrieFlags;
13
+ (function (BinTrieFlags) {
14
+ BinTrieFlags[BinTrieFlags["HAS_VALUE"] = 32768] = "HAS_VALUE";
15
+ BinTrieFlags[BinTrieFlags["BRANCH_LENGTH"] = 32512] = "BRANCH_LENGTH";
16
+ BinTrieFlags[BinTrieFlags["MULTI_BYTE"] = 128] = "MULTI_BYTE";
17
+ BinTrieFlags[BinTrieFlags["JUMP_TABLE"] = 127] = "JUMP_TABLE";
18
+ })(BinTrieFlags = exports.BinTrieFlags || (exports.BinTrieFlags = {}));
19
+ exports.JUMP_OFFSET_BASE = 48 /* ZERO */ - 1;
20
+ function getDecoder(decodeTree) {
21
+ return function decodeHTMLBinary(str, strict) {
22
+ var ret = "";
23
+ var lastIdx = 0;
24
+ var strIdx = 0;
25
+ while ((strIdx = str.indexOf("&", strIdx)) >= 0) {
26
+ ret += str.slice(lastIdx, strIdx);
27
+ lastIdx = strIdx;
28
+ // Skip the "&"
29
+ strIdx += 1;
30
+ // If we have a numeric entity, handle this separately.
31
+ if (str.charCodeAt(strIdx) === 35 /* NUM */) {
32
+ // Skip the leading "&#". For hex entities, also skip the leading "x".
33
+ var start = strIdx + 1;
34
+ var base = 10;
35
+ var cp = str.charCodeAt(start);
36
+ if ((cp | 32 /* To_LOWER_BIT */) === 120 /* LOWER_X */) {
37
+ base = 16;
38
+ strIdx += 1;
39
+ start += 1;
40
+ }
41
+ while (((cp = str.charCodeAt(++strIdx)) >= 48 /* ZERO */ &&
42
+ cp <= 57 /* NINE */) ||
43
+ (base === 16 &&
44
+ (cp | 32 /* To_LOWER_BIT */) >= 97 /* LOWER_A */ &&
45
+ (cp | 32 /* To_LOWER_BIT */) <= 102 /* LOWER_F */))
46
+ ;
47
+ if (start !== strIdx) {
48
+ var entity = str.substring(start, strIdx);
49
+ var parsed = parseInt(entity, base);
50
+ if (str.charCodeAt(strIdx) === 59 /* SEMI */) {
51
+ strIdx += 1;
52
+ }
53
+ else if (strict) {
54
+ continue;
55
+ }
56
+ ret += decode_codepoint_1.default(parsed);
57
+ lastIdx = strIdx;
58
+ }
59
+ continue;
60
+ }
61
+ var result = null;
62
+ var excess = 1;
63
+ var treeIdx = 0;
64
+ var current = decodeTree[treeIdx];
65
+ for (; strIdx < str.length; strIdx++, excess++) {
66
+ treeIdx = determineBranch(decodeTree, current, treeIdx + 1, str.charCodeAt(strIdx));
67
+ if (treeIdx < 0)
68
+ break;
69
+ current = decodeTree[treeIdx];
70
+ // If the branch is a value, store it and continue
71
+ if (current & BinTrieFlags.HAS_VALUE) {
72
+ // If we have a legacy entity while parsing strictly, just skip the number of bytes
73
+ if (strict && str.charCodeAt(strIdx) !== 59 /* SEMI */) {
74
+ // No need to consider multi-byte values, as the legacy entity is always a single byte
75
+ treeIdx += 1;
76
+ }
77
+ else {
78
+ // If this is a surrogate pair, combine the higher bits from the node with the next byte
79
+ result =
80
+ current & BinTrieFlags.MULTI_BYTE
81
+ ? String.fromCharCode(decodeTree[++treeIdx], decodeTree[++treeIdx])
82
+ : String.fromCharCode(decodeTree[++treeIdx]);
83
+ excess = 0;
84
+ }
85
+ }
86
+ }
87
+ if (result != null) {
88
+ ret += result;
89
+ lastIdx = strIdx - excess + 1;
90
+ }
91
+ }
92
+ return ret + str.slice(lastIdx);
93
+ };
19
94
  }
20
- var sorter = function (a, b) { return (a < b ? 1 : -1); };
21
- exports.decodeHTML = (function () {
22
- var legacy = Object.keys(legacy_json_1.default).sort(sorter);
23
- var keys = Object.keys(entities_json_1.default).sort(sorter);
24
- for (var i = 0, j = 0; i < keys.length; i++) {
25
- if (legacy[j] === keys[i]) {
26
- keys[i] += ";?";
27
- j++;
95
+ function determineBranch(decodeTree, current, nodeIdx, char) {
96
+ if (current <= 128) {
97
+ return char === current ? nodeIdx : -1;
98
+ }
99
+ var branchCount = (current & BinTrieFlags.BRANCH_LENGTH) >> 8;
100
+ if (branchCount === 0) {
101
+ return -1;
102
+ }
103
+ if (branchCount === 1) {
104
+ return char === decodeTree[nodeIdx] ? nodeIdx + 1 : -1;
105
+ }
106
+ var jumpOffset = current & BinTrieFlags.JUMP_TABLE;
107
+ if (jumpOffset) {
108
+ var value = char - exports.JUMP_OFFSET_BASE - jumpOffset;
109
+ return value < 0 || value > branchCount
110
+ ? -1
111
+ : decodeTree[nodeIdx + value] - 1;
112
+ }
113
+ // Binary search for the character.
114
+ var lo = nodeIdx;
115
+ var hi = lo + branchCount - 1;
116
+ while (lo <= hi) {
117
+ var mid = (lo + hi) >>> 1;
118
+ var midVal = decodeTree[mid];
119
+ if (midVal < char) {
120
+ lo = mid + 1;
121
+ }
122
+ else if (midVal > char) {
123
+ hi = mid - 1;
28
124
  }
29
125
  else {
30
- keys[i] += ";";
126
+ return decodeTree[mid + branchCount];
31
127
  }
32
128
  }
33
- var re = new RegExp("&(?:" + keys.join("|") + "|#[xX][\\da-fA-F]+;?|#\\d+;?)", "g");
34
- var replace = getReplacer(entities_json_1.default);
35
- function replacer(str) {
36
- if (str.substr(-1) !== ";")
37
- str += ";";
38
- return replace(str);
39
- }
40
- //TODO consider creating a merged map
41
- return function (str) { return String(str).replace(re, replacer); };
42
- })();
43
- function getReplacer(map) {
44
- return function replace(str) {
45
- if (str.charAt(1) === "#") {
46
- var secondChar = str.charAt(2);
47
- if (secondChar === "X" || secondChar === "x") {
48
- return decode_codepoint_1.default(parseInt(str.substr(3), 16));
49
- }
50
- return decode_codepoint_1.default(parseInt(str.substr(2), 10));
51
- }
52
- return map[str.slice(1, -1)];
53
- };
129
+ return -1;
130
+ }
131
+ exports.determineBranch = determineBranch;
132
+ var htmlDecoder = getDecoder(decode_data_html_1.default);
133
+ var xmlDecoder = getDecoder(decode_data_xml_1.default);
134
+ function decodeHTML(str) {
135
+ return htmlDecoder(str, false);
136
+ }
137
+ exports.decodeHTML = decodeHTML;
138
+ function decodeHTMLStrict(str) {
139
+ return htmlDecoder(str, true);
140
+ }
141
+ exports.decodeHTMLStrict = decodeHTMLStrict;
142
+ function decodeXML(str) {
143
+ return xmlDecoder(str, true);
54
144
  }
145
+ exports.decodeXML = decodeXML;
@@ -1 +1 @@
1
- {"version":3,"file":"decode_codepoint.d.ts","sourceRoot":"","sources":["../src/decode_codepoint.ts"],"names":[],"mappings":"AAGA,MAAM,CAAC,OAAO,UAAU,eAAe,CAAC,SAAS,EAAE,MAAM,UAmBxD"}
1
+ {"version":3,"file":"decode_codepoint.d.ts","sourceRoot":"","sources":["../src/decode_codepoint.ts"],"names":[],"mappings":"AAmDA,MAAM,CAAC,OAAO,UAAU,eAAe,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CAMjE"}
@@ -1,24 +1,54 @@
1
1
  "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
2
+ // Adapted from https://github.com/mathiasbynens/he/blob/36afe179392226cf1b6ccdb16ebbb7a5a844d93a/src/he.js#L106-L134
5
3
  Object.defineProperty(exports, "__esModule", { value: true });
6
- var decode_json_1 = __importDefault(require("./maps/decode.json"));
7
- // modified version of https://github.com/mathiasbynens/he/blob/master/src/he.js#L94-L119
4
+ var decodeMap = new Map([
5
+ [0, 65533],
6
+ [128, 8364],
7
+ [130, 8218],
8
+ [131, 402],
9
+ [132, 8222],
10
+ [133, 8230],
11
+ [134, 8224],
12
+ [135, 8225],
13
+ [136, 710],
14
+ [137, 8240],
15
+ [138, 352],
16
+ [139, 8249],
17
+ [140, 338],
18
+ [142, 381],
19
+ [145, 8216],
20
+ [146, 8217],
21
+ [147, 8220],
22
+ [148, 8221],
23
+ [149, 8226],
24
+ [150, 8211],
25
+ [151, 8212],
26
+ [152, 732],
27
+ [153, 8482],
28
+ [154, 353],
29
+ [155, 8250],
30
+ [156, 339],
31
+ [158, 382],
32
+ [159, 376],
33
+ ]);
34
+ var fromCodePoint =
35
+ // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, node/no-unsupported-features/es-builtins
36
+ String.fromCodePoint ||
37
+ function (codePoint) {
38
+ var output = "";
39
+ if (codePoint > 0xffff) {
40
+ codePoint -= 0x10000;
41
+ output += String.fromCharCode(((codePoint >>> 10) & 0x3ff) | 0xd800);
42
+ codePoint = 0xdc00 | (codePoint & 0x3ff);
43
+ }
44
+ output += String.fromCharCode(codePoint);
45
+ return output;
46
+ };
8
47
  function decodeCodePoint(codePoint) {
48
+ var _a;
9
49
  if ((codePoint >= 0xd800 && codePoint <= 0xdfff) || codePoint > 0x10ffff) {
10
50
  return "\uFFFD";
11
51
  }
12
- if (codePoint in decode_json_1.default) {
13
- codePoint = decode_json_1.default[codePoint];
14
- }
15
- var output = "";
16
- if (codePoint > 0xffff) {
17
- codePoint -= 0x10000;
18
- output += String.fromCharCode(((codePoint >>> 10) & 0x3ff) | 0xd800);
19
- codePoint = 0xdc00 | (codePoint & 0x3ff);
20
- }
21
- output += String.fromCharCode(codePoint);
22
- return output;
52
+ return fromCodePoint((_a = decodeMap.get(codePoint)) !== null && _a !== void 0 ? _a : codePoint);
23
53
  }
24
54
  exports.default = decodeCodePoint;
@@ -0,0 +1,8 @@
1
+ export declare const getCodePoint: (str: string, index: number) => number;
2
+ export declare function encodeHTMLTrieRe(regExp: RegExp, str: string): string;
3
+ export interface TrieNode {
4
+ value?: string;
5
+ next?: Map<number, TrieNode>;
6
+ }
7
+ export declare function getTrie(map: Record<string, string>): Map<number, TrieNode>;
8
+ //# sourceMappingURL=encode-trie.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"encode-trie.d.ts","sourceRoot":"","sources":["../src/encode-trie.ts"],"names":[],"mappings":"AAYA,eAAO,MAAM,YAAY,QAGT,MAAM,SAAS,MAAM,KAAG,MAQD,CAAC;AAIxC,wBAAgB,gBAAgB,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,MAAM,CAkCpE;AAED,MAAM,WAAW,QAAQ;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,GAAG,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;CAChC;AAED,wBAAgB,OAAO,CAAC,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,QAAQ,CAAC,CAmB1E"}
@@ -0,0 +1,77 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.getTrie = exports.encodeHTMLTrieRe = exports.getCodePoint = void 0;
7
+ var entities_json_1 = __importDefault(require("./maps/entities.json"));
8
+ function isHighSurrugate(c) {
9
+ return (c & 64512 /* Mask */) === 55296 /* High */;
10
+ }
11
+ // For compatibility with node < 4, we wrap `codePointAt`
12
+ exports.getCodePoint =
13
+ // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
14
+ String.prototype.codePointAt != null
15
+ ? function (str, index) { return str.codePointAt(index); }
16
+ : // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
17
+ function (c, index) {
18
+ return isHighSurrugate(c.charCodeAt(index))
19
+ ? (c.charCodeAt(index) - 55296 /* High */) * 0x400 +
20
+ c.charCodeAt(index + 1) -
21
+ 0xdc00 +
22
+ 0x10000
23
+ : c.charCodeAt(index);
24
+ };
25
+ var htmlTrie = getTrie(entities_json_1.default);
26
+ function encodeHTMLTrieRe(regExp, str) {
27
+ var _a;
28
+ var ret = "";
29
+ var lastIdx = 0;
30
+ var match;
31
+ while ((match = regExp.exec(str)) !== null) {
32
+ var i = match.index;
33
+ var char = str.charCodeAt(i);
34
+ var next = htmlTrie.get(char);
35
+ if (next) {
36
+ if (next.next != null && i + 1 < str.length) {
37
+ var value = (_a = next.next.get(str.charCodeAt(i + 1))) === null || _a === void 0 ? void 0 : _a.value;
38
+ if (value != null) {
39
+ ret += str.substring(lastIdx, i) + value;
40
+ regExp.lastIndex += 1;
41
+ lastIdx = i + 2;
42
+ continue;
43
+ }
44
+ }
45
+ ret += str.substring(lastIdx, i) + next.value;
46
+ lastIdx = i + 1;
47
+ }
48
+ else {
49
+ ret += str.substring(lastIdx, i) + "&#x" + exports.getCodePoint(str, i).toString(16) + ";";
50
+ // Increase by 1 if we have a surrogate pair
51
+ lastIdx = regExp.lastIndex += Number(isHighSurrugate(char));
52
+ }
53
+ }
54
+ return ret + str.substr(lastIdx);
55
+ }
56
+ exports.encodeHTMLTrieRe = encodeHTMLTrieRe;
57
+ function getTrie(map) {
58
+ var _a, _b, _c, _d;
59
+ var trie = new Map();
60
+ for (var _i = 0, _e = Object.keys(map); _i < _e.length; _i++) {
61
+ var value = _e[_i];
62
+ var key = map[value];
63
+ // Resolve the key
64
+ var lastMap = trie;
65
+ for (var i = 0; i < key.length - 1; i++) {
66
+ var char = key.charCodeAt(i);
67
+ var next = (_a = lastMap.get(char)) !== null && _a !== void 0 ? _a : {};
68
+ lastMap.set(char, next);
69
+ lastMap = (_b = next.next) !== null && _b !== void 0 ? _b : (next.next = new Map());
70
+ }
71
+ var val = (_c = lastMap.get(key.charCodeAt(key.length - 1))) !== null && _c !== void 0 ? _c : {};
72
+ (_d = val.value) !== null && _d !== void 0 ? _d : (val.value = "&" + value + ";");
73
+ lastMap.set(key.charCodeAt(key.length - 1), val);
74
+ }
75
+ return trie;
76
+ }
77
+ exports.getTrie = getTrie;
package/lib/encode.d.ts CHANGED
@@ -1,4 +1,46 @@
1
- export declare const encodeXML: (data: string) => string;
2
- export declare const encodeHTML: (data: string) => string;
3
- export declare function escape(data: string): string;
1
+ /**
2
+ * Encodes all non-ASCII characters, as well as characters not valid in XML
3
+ * documents using XML entities.
4
+ *
5
+ * If a character has no equivalent entity, a
6
+ * numeric hexadecimal reference (eg. `&#xfc;`) will be used.
7
+ */
8
+ export declare function encodeXML(str: string): string;
9
+ /**
10
+ * Encodes all entities and non-ASCII characters in the input.
11
+ *
12
+ * This includes characters that are valid ASCII characters in HTML documents.
13
+ * For example `#` will be encoded as `&num;`. To get a more compact output,
14
+ * consider using the `encodeNonAsciiHTML` function.
15
+ *
16
+ * If a character has no equivalent entity, a
17
+ * numeric hexadecimal reference (eg. `&#xfc;`) will be used.
18
+ */
19
+ export declare function encodeHTML(data: string): string;
20
+ /**
21
+ * Encodes all non-ASCII characters, as well as characters not valid in HTML
22
+ * documents using HTML entities.
23
+ *
24
+ * If a character has no equivalent entity, a
25
+ * numeric hexadecimal reference (eg. `&#xfc;`) will be used.
26
+ */
27
+ export declare function encodeNonAsciiHTML(data: string): string;
28
+ /**
29
+ * Encodes all non-ASCII characters, as well as characters not valid in XML
30
+ * documents using numeric hexadecimal reference (eg. `&#xfc;`).
31
+ *
32
+ * Have a look at `escapeUTF8` if you want a more concise output at the expense
33
+ * of reduced transportability.
34
+ *
35
+ * @param data String to escape.
36
+ */
37
+ export declare const escape: typeof encodeXML;
38
+ /**
39
+ * Encodes all characters not valid in XML documents using XML entities.
40
+ *
41
+ * Note that the output will be character-set dependent.
42
+ *
43
+ * @param data String to escape.
44
+ */
45
+ export declare function escapeUTF8(data: string): string;
4
46
  //# sourceMappingURL=encode.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"encode.d.ts","sourceRoot":"","sources":["../src/encode.ts"],"names":[],"mappings":"AAKA,eAAO,MAAM,SAAS,SAmEJ,MAAM,WAnEoC,CAAC;AAO7D,eAAO,MAAM,UAAU,SA4DL,MAAM,WA5DuC,CAAC;AAoEhE,wBAAgB,MAAM,CAAC,IAAI,EAAE,MAAM,UAIlC"}
1
+ {"version":3,"file":"encode.d.ts","sourceRoot":"","sources":["../src/encode.ts"],"names":[],"mappings":"AAgBA;;;;;;GAMG;AACH,wBAAgB,SAAS,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CA0B7C;AAED;;;;;;;;;GASG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAE/C;AACD;;;;;;GAMG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEvD;AAoCD;;;;;;;;GAQG;AACH,eAAO,MAAM,MAAM,kBAAY,CAAC;AAEhC;;;;;;GAMG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAkB/C"}
package/lib/encode.js CHANGED
@@ -3,71 +3,124 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.escape = exports.encodeHTML = exports.encodeXML = void 0;
6
+ exports.escapeUTF8 = exports.escape = exports.encodeNonAsciiHTML = exports.encodeHTML = exports.encodeXML = void 0;
7
7
  var xml_json_1 = __importDefault(require("./maps/xml.json"));
8
- var inverseXML = getInverseObj(xml_json_1.default);
9
- var xmlReplacer = getInverseReplacer(inverseXML);
10
- exports.encodeXML = getInverse(inverseXML, xmlReplacer);
8
+ var encode_trie_1 = require("./encode-trie");
11
9
  var entities_json_1 = __importDefault(require("./maps/entities.json"));
12
- var inverseHTML = getInverseObj(entities_json_1.default);
13
- var htmlReplacer = getInverseReplacer(inverseHTML);
14
- exports.encodeHTML = getInverse(inverseHTML, htmlReplacer);
15
- function getInverseObj(obj) {
16
- return Object.keys(obj)
17
- .sort()
18
- .reduce(function (inverse, name) {
19
- inverse[obj[name]] = "&" + name + ";";
20
- return inverse;
21
- }, {});
22
- }
23
- function getInverseReplacer(inverse) {
24
- var single = [];
25
- var multiple = [];
26
- for (var _i = 0, _a = Object.keys(inverse); _i < _a.length; _i++) {
27
- var k = _a[_i];
28
- if (k.length === 1) {
29
- // Add value to single array
30
- single.push("\\" + k);
10
+ var htmlReplacer = getCharRegExp(entities_json_1.default, true);
11
+ var xmlReplacer = getCharRegExp(xml_json_1.default, true);
12
+ var xmlInvalidChars = getCharRegExp(xml_json_1.default, false);
13
+ var xmlCodeMap = new Map(Object.keys(xml_json_1.default).map(function (k) { return [
14
+ xml_json_1.default[k].charCodeAt(0),
15
+ "&" + k + ";",
16
+ ]; }));
17
+ /**
18
+ * Encodes all non-ASCII characters, as well as characters not valid in XML
19
+ * documents using XML entities.
20
+ *
21
+ * If a character has no equivalent entity, a
22
+ * numeric hexadecimal reference (eg. `&#xfc;`) will be used.
23
+ */
24
+ function encodeXML(str) {
25
+ var ret = "";
26
+ var lastIdx = 0;
27
+ var match;
28
+ while ((match = xmlReplacer.exec(str)) !== null) {
29
+ var i = match.index;
30
+ var char = str.charCodeAt(i);
31
+ var next = xmlCodeMap.get(char);
32
+ if (next) {
33
+ ret += str.substring(lastIdx, i) + next;
34
+ lastIdx = i + 1;
31
35
  }
32
36
  else {
33
- // Add value to multiple array
34
- multiple.push(k);
37
+ ret += str.substring(lastIdx, i) + "&#x" + encode_trie_1.getCodePoint(str, i).toString(16) + ";";
38
+ // Increase by 1 if we have a surrogate pair
39
+ lastIdx = xmlReplacer.lastIndex += Number((char & 65408) === 0xd800);
35
40
  }
36
41
  }
42
+ return ret + str.substr(lastIdx);
43
+ }
44
+ exports.encodeXML = encodeXML;
45
+ /**
46
+ * Encodes all entities and non-ASCII characters in the input.
47
+ *
48
+ * This includes characters that are valid ASCII characters in HTML documents.
49
+ * For example `#` will be encoded as `&num;`. To get a more compact output,
50
+ * consider using the `encodeNonAsciiHTML` function.
51
+ *
52
+ * If a character has no equivalent entity, a
53
+ * numeric hexadecimal reference (eg. `&#xfc;`) will be used.
54
+ */
55
+ function encodeHTML(data) {
56
+ return encode_trie_1.encodeHTMLTrieRe(htmlReplacer, data);
57
+ }
58
+ exports.encodeHTML = encodeHTML;
59
+ /**
60
+ * Encodes all non-ASCII characters, as well as characters not valid in HTML
61
+ * documents using HTML entities.
62
+ *
63
+ * If a character has no equivalent entity, a
64
+ * numeric hexadecimal reference (eg. `&#xfc;`) will be used.
65
+ */
66
+ function encodeNonAsciiHTML(data) {
67
+ return encode_trie_1.encodeHTMLTrieRe(xmlReplacer, data);
68
+ }
69
+ exports.encodeNonAsciiHTML = encodeNonAsciiHTML;
70
+ function getCharRegExp(map, nonAscii) {
71
+ // Collect the start characters of all entities
72
+ var chars = Object.keys(map)
73
+ .map(function (k) { return "\\" + map[k].charAt(0); })
74
+ .filter(function (v) { return !nonAscii || v.charCodeAt(1) < 128; })
75
+ .sort(function (a, b) { return a.charCodeAt(1) - b.charCodeAt(1); })
76
+ // Remove duplicates
77
+ .filter(function (v, i, a) { return v !== a[i + 1]; });
37
78
  // Add ranges to single characters.
38
- single.sort();
39
- for (var start = 0; start < single.length - 1; start++) {
79
+ for (var start = 0; start < chars.length - 1; start++) {
40
80
  // Find the end of a run of characters
41
81
  var end = start;
42
- while (end < single.length - 1 &&
43
- single[end].charCodeAt(1) + 1 === single[end + 1].charCodeAt(1)) {
82
+ while (end < chars.length - 1 &&
83
+ chars[end].charCodeAt(1) + 1 === chars[end + 1].charCodeAt(1)) {
44
84
  end += 1;
45
85
  }
46
86
  var count = 1 + end - start;
47
87
  // We want to replace at least three characters
48
88
  if (count < 3)
49
89
  continue;
50
- single.splice(start, count, single[start] + "-" + single[end]);
90
+ chars.splice(start, count, chars[start] + "-" + chars[end]);
51
91
  }
52
- multiple.unshift("[" + single.join("") + "]");
53
- return new RegExp(multiple.join("|"), "g");
92
+ return new RegExp("[" + chars.join("") + (nonAscii ? "\\x80-\\uFFFF" : "") + "]", "g");
54
93
  }
55
- var reNonASCII = /(?:[\x80-\uD7FF\uE000-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF])/g;
56
- function singleCharReplacer(c) {
57
- // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
58
- return "&#x" + c.codePointAt(0).toString(16).toUpperCase() + ";";
59
- }
60
- function getInverse(inverse, re) {
61
- return function (data) {
62
- return data
63
- .replace(re, function (name) { return inverse[name]; })
64
- .replace(reNonASCII, singleCharReplacer);
65
- };
66
- }
67
- var reXmlChars = getInverseReplacer(inverseXML);
68
- function escape(data) {
69
- return data
70
- .replace(reXmlChars, singleCharReplacer)
71
- .replace(reNonASCII, singleCharReplacer);
94
+ /**
95
+ * Encodes all non-ASCII characters, as well as characters not valid in XML
96
+ * documents using numeric hexadecimal reference (eg. `&#xfc;`).
97
+ *
98
+ * Have a look at `escapeUTF8` if you want a more concise output at the expense
99
+ * of reduced transportability.
100
+ *
101
+ * @param data String to escape.
102
+ */
103
+ exports.escape = encodeXML;
104
+ /**
105
+ * Encodes all characters not valid in XML documents using XML entities.
106
+ *
107
+ * Note that the output will be character-set dependent.
108
+ *
109
+ * @param data String to escape.
110
+ */
111
+ function escapeUTF8(data) {
112
+ var match;
113
+ var lastIdx = 0;
114
+ var result = "";
115
+ while ((match = xmlInvalidChars.exec(data))) {
116
+ if (lastIdx !== match.index) {
117
+ result += data.substring(lastIdx, match.index);
118
+ }
119
+ // We know that this chararcter will be in `inverseXML`
120
+ result += xmlCodeMap.get(match[0].charCodeAt(0));
121
+ // Every match will be of length 1
122
+ lastIdx = match.index + 1;
123
+ }
124
+ return result + data.substring(lastIdx);
72
125
  }
73
- exports.escape = escape;
126
+ exports.escapeUTF8 = escapeUTF8;
@@ -0,0 +1,3 @@
1
+ declare const _default: Uint16Array;
2
+ export default _default;
3
+ //# sourceMappingURL=decode-data-html.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"decode-data-html.d.ts","sourceRoot":"","sources":["../../src/generated/decode-data-html.ts"],"names":[],"mappings":";AAEA,wBAAox9E"}