entities 2.0.3 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/decode.d.ts +13 -5
- package/lib/decode.d.ts.map +1 -1
- package/lib/decode.js +133 -42
- package/lib/decode_codepoint.d.ts.map +1 -1
- package/lib/decode_codepoint.js +46 -16
- package/lib/encode-trie.d.ts +8 -0
- package/lib/encode-trie.d.ts.map +1 -0
- package/lib/encode-trie.js +77 -0
- package/lib/encode.d.ts +45 -3
- package/lib/encode.d.ts.map +1 -1
- package/lib/encode.js +103 -50
- package/lib/generated/decode-data-html.d.ts +3 -0
- package/lib/generated/decode-data-html.d.ts.map +1 -0
- package/lib/generated/decode-data-html.js +5 -0
- package/lib/generated/decode-data-xml.d.ts +3 -0
- package/lib/generated/decode-data-xml.d.ts.map +1 -0
- package/lib/generated/decode-data-xml.js +5 -0
- package/lib/index.d.ts +74 -7
- package/lib/index.d.ts.map +1 -1
- package/lib/index.js +78 -12
- package/lib/maps/entities.json +1 -1
- package/lib/maps/legacy.json +1 -1
- package/lib/maps/xml.json +1 -1
- package/package.json +29 -18
- package/readme.md +36 -12
- package/lib/maps/decode.json +0 -30
package/lib/decode.d.ts
CHANGED
|
@@ -1,7 +1,15 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
export
|
|
4
|
-
|
|
1
|
+
import htmlDecodeTree from "./generated/decode-data-html";
|
|
2
|
+
import xmlDecodeTree from "./generated/decode-data-xml";
|
|
3
|
+
export { htmlDecodeTree, xmlDecodeTree };
|
|
4
|
+
export declare enum BinTrieFlags {
|
|
5
|
+
HAS_VALUE = 32768,
|
|
6
|
+
BRANCH_LENGTH = 32512,
|
|
7
|
+
MULTI_BYTE = 128,
|
|
8
|
+
JUMP_TABLE = 127
|
|
5
9
|
}
|
|
6
|
-
export declare const
|
|
10
|
+
export declare const JUMP_OFFSET_BASE: number;
|
|
11
|
+
export declare function determineBranch(decodeTree: Uint16Array, current: number, nodeIdx: number, char: number): number;
|
|
12
|
+
export declare function decodeHTML(str: string): string;
|
|
13
|
+
export declare function decodeHTMLStrict(str: string): string;
|
|
14
|
+
export declare function decodeXML(str: string): string;
|
|
7
15
|
//# sourceMappingURL=decode.d.ts.map
|
package/lib/decode.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"decode.d.ts","sourceRoot":"","sources":["../src/decode.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"decode.d.ts","sourceRoot":"","sources":["../src/decode.ts"],"names":[],"mappings":"AAAA,OAAO,cAAc,MAAM,8BAA8B,CAAC;AAC1D,OAAO,aAAa,MAAM,6BAA6B,CAAC;AAIxD,OAAO,EAAE,cAAc,EAAE,aAAa,EAAE,CAAC;AAczC,oBAAY,YAAY;IACpB,SAAS,QAAwB;IACjC,aAAa,QAAwB;IACrC,UAAU,MAAwB;IAClC,UAAU,MAAwB;CACrC;AAED,eAAO,MAAM,gBAAgB,QAAqB,CAAC;AAmGnD,wBAAgB,eAAe,CAC3B,UAAU,EAAE,WAAW,EACvB,OAAO,EAAE,MAAM,EACf,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,MAAM,GACb,MAAM,CA0CR;AAKD,wBAAgB,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAE9C;AAED,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAEpD;AAED,wBAAgB,SAAS,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAE7C"}
|
package/lib/decode.js
CHANGED
|
@@ -3,52 +3,143 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.
|
|
7
|
-
var
|
|
8
|
-
|
|
9
|
-
var
|
|
6
|
+
exports.decodeXML = exports.decodeHTMLStrict = exports.decodeHTML = exports.determineBranch = exports.JUMP_OFFSET_BASE = exports.BinTrieFlags = exports.xmlDecodeTree = exports.htmlDecodeTree = void 0;
|
|
7
|
+
var decode_data_html_1 = __importDefault(require("./generated/decode-data-html"));
|
|
8
|
+
exports.htmlDecodeTree = decode_data_html_1.default;
|
|
9
|
+
var decode_data_xml_1 = __importDefault(require("./generated/decode-data-xml"));
|
|
10
|
+
exports.xmlDecodeTree = decode_data_xml_1.default;
|
|
10
11
|
var decode_codepoint_1 = __importDefault(require("./decode_codepoint"));
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
12
|
+
var BinTrieFlags;
|
|
13
|
+
(function (BinTrieFlags) {
|
|
14
|
+
BinTrieFlags[BinTrieFlags["HAS_VALUE"] = 32768] = "HAS_VALUE";
|
|
15
|
+
BinTrieFlags[BinTrieFlags["BRANCH_LENGTH"] = 32512] = "BRANCH_LENGTH";
|
|
16
|
+
BinTrieFlags[BinTrieFlags["MULTI_BYTE"] = 128] = "MULTI_BYTE";
|
|
17
|
+
BinTrieFlags[BinTrieFlags["JUMP_TABLE"] = 127] = "JUMP_TABLE";
|
|
18
|
+
})(BinTrieFlags = exports.BinTrieFlags || (exports.BinTrieFlags = {}));
|
|
19
|
+
exports.JUMP_OFFSET_BASE = 48 /* ZERO */ - 1;
|
|
20
|
+
function getDecoder(decodeTree) {
|
|
21
|
+
return function decodeHTMLBinary(str, strict) {
|
|
22
|
+
var ret = "";
|
|
23
|
+
var lastIdx = 0;
|
|
24
|
+
var strIdx = 0;
|
|
25
|
+
while ((strIdx = str.indexOf("&", strIdx)) >= 0) {
|
|
26
|
+
ret += str.slice(lastIdx, strIdx);
|
|
27
|
+
lastIdx = strIdx;
|
|
28
|
+
// Skip the "&"
|
|
29
|
+
strIdx += 1;
|
|
30
|
+
// If we have a numeric entity, handle this separately.
|
|
31
|
+
if (str.charCodeAt(strIdx) === 35 /* NUM */) {
|
|
32
|
+
// Skip the leading "&#". For hex entities, also skip the leading "x".
|
|
33
|
+
var start = strIdx + 1;
|
|
34
|
+
var base = 10;
|
|
35
|
+
var cp = str.charCodeAt(start);
|
|
36
|
+
if ((cp | 32 /* To_LOWER_BIT */) === 120 /* LOWER_X */) {
|
|
37
|
+
base = 16;
|
|
38
|
+
strIdx += 1;
|
|
39
|
+
start += 1;
|
|
40
|
+
}
|
|
41
|
+
while (((cp = str.charCodeAt(++strIdx)) >= 48 /* ZERO */ &&
|
|
42
|
+
cp <= 57 /* NINE */) ||
|
|
43
|
+
(base === 16 &&
|
|
44
|
+
(cp | 32 /* To_LOWER_BIT */) >= 97 /* LOWER_A */ &&
|
|
45
|
+
(cp | 32 /* To_LOWER_BIT */) <= 102 /* LOWER_F */))
|
|
46
|
+
;
|
|
47
|
+
if (start !== strIdx) {
|
|
48
|
+
var entity = str.substring(start, strIdx);
|
|
49
|
+
var parsed = parseInt(entity, base);
|
|
50
|
+
if (str.charCodeAt(strIdx) === 59 /* SEMI */) {
|
|
51
|
+
strIdx += 1;
|
|
52
|
+
}
|
|
53
|
+
else if (strict) {
|
|
54
|
+
continue;
|
|
55
|
+
}
|
|
56
|
+
ret += decode_codepoint_1.default(parsed);
|
|
57
|
+
lastIdx = strIdx;
|
|
58
|
+
}
|
|
59
|
+
continue;
|
|
60
|
+
}
|
|
61
|
+
var result = null;
|
|
62
|
+
var excess = 1;
|
|
63
|
+
var treeIdx = 0;
|
|
64
|
+
var current = decodeTree[treeIdx];
|
|
65
|
+
for (; strIdx < str.length; strIdx++, excess++) {
|
|
66
|
+
treeIdx = determineBranch(decodeTree, current, treeIdx + 1, str.charCodeAt(strIdx));
|
|
67
|
+
if (treeIdx < 0)
|
|
68
|
+
break;
|
|
69
|
+
current = decodeTree[treeIdx];
|
|
70
|
+
// If the branch is a value, store it and continue
|
|
71
|
+
if (current & BinTrieFlags.HAS_VALUE) {
|
|
72
|
+
// If we have a legacy entity while parsing strictly, just skip the number of bytes
|
|
73
|
+
if (strict && str.charCodeAt(strIdx) !== 59 /* SEMI */) {
|
|
74
|
+
// No need to consider multi-byte values, as the legacy entity is always a single byte
|
|
75
|
+
treeIdx += 1;
|
|
76
|
+
}
|
|
77
|
+
else {
|
|
78
|
+
// If this is a surrogate pair, combine the higher bits from the node with the next byte
|
|
79
|
+
result =
|
|
80
|
+
current & BinTrieFlags.MULTI_BYTE
|
|
81
|
+
? String.fromCharCode(decodeTree[++treeIdx], decodeTree[++treeIdx])
|
|
82
|
+
: String.fromCharCode(decodeTree[++treeIdx]);
|
|
83
|
+
excess = 0;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
if (result != null) {
|
|
88
|
+
ret += result;
|
|
89
|
+
lastIdx = strIdx - excess + 1;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
return ret + str.slice(lastIdx);
|
|
93
|
+
};
|
|
19
94
|
}
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
95
|
+
function determineBranch(decodeTree, current, nodeIdx, char) {
|
|
96
|
+
if (current <= 128) {
|
|
97
|
+
return char === current ? nodeIdx : -1;
|
|
98
|
+
}
|
|
99
|
+
var branchCount = (current & BinTrieFlags.BRANCH_LENGTH) >> 8;
|
|
100
|
+
if (branchCount === 0) {
|
|
101
|
+
return -1;
|
|
102
|
+
}
|
|
103
|
+
if (branchCount === 1) {
|
|
104
|
+
return char === decodeTree[nodeIdx] ? nodeIdx + 1 : -1;
|
|
105
|
+
}
|
|
106
|
+
var jumpOffset = current & BinTrieFlags.JUMP_TABLE;
|
|
107
|
+
if (jumpOffset) {
|
|
108
|
+
var value = char - exports.JUMP_OFFSET_BASE - jumpOffset;
|
|
109
|
+
return value < 0 || value > branchCount
|
|
110
|
+
? -1
|
|
111
|
+
: decodeTree[nodeIdx + value] - 1;
|
|
112
|
+
}
|
|
113
|
+
// Binary search for the character.
|
|
114
|
+
var lo = nodeIdx;
|
|
115
|
+
var hi = lo + branchCount - 1;
|
|
116
|
+
while (lo <= hi) {
|
|
117
|
+
var mid = (lo + hi) >>> 1;
|
|
118
|
+
var midVal = decodeTree[mid];
|
|
119
|
+
if (midVal < char) {
|
|
120
|
+
lo = mid + 1;
|
|
121
|
+
}
|
|
122
|
+
else if (midVal > char) {
|
|
123
|
+
hi = mid - 1;
|
|
28
124
|
}
|
|
29
125
|
else {
|
|
30
|
-
|
|
126
|
+
return decodeTree[mid + branchCount];
|
|
31
127
|
}
|
|
32
128
|
}
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
return decode_codepoint_1.default(parseInt(str.substr(3), 16));
|
|
49
|
-
}
|
|
50
|
-
return decode_codepoint_1.default(parseInt(str.substr(2), 10));
|
|
51
|
-
}
|
|
52
|
-
return map[str.slice(1, -1)];
|
|
53
|
-
};
|
|
129
|
+
return -1;
|
|
130
|
+
}
|
|
131
|
+
exports.determineBranch = determineBranch;
|
|
132
|
+
var htmlDecoder = getDecoder(decode_data_html_1.default);
|
|
133
|
+
var xmlDecoder = getDecoder(decode_data_xml_1.default);
|
|
134
|
+
function decodeHTML(str) {
|
|
135
|
+
return htmlDecoder(str, false);
|
|
136
|
+
}
|
|
137
|
+
exports.decodeHTML = decodeHTML;
|
|
138
|
+
function decodeHTMLStrict(str) {
|
|
139
|
+
return htmlDecoder(str, true);
|
|
140
|
+
}
|
|
141
|
+
exports.decodeHTMLStrict = decodeHTMLStrict;
|
|
142
|
+
function decodeXML(str) {
|
|
143
|
+
return xmlDecoder(str, true);
|
|
54
144
|
}
|
|
145
|
+
exports.decodeXML = decodeXML;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"decode_codepoint.d.ts","sourceRoot":"","sources":["../src/decode_codepoint.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"decode_codepoint.d.ts","sourceRoot":"","sources":["../src/decode_codepoint.ts"],"names":[],"mappings":"AAmDA,MAAM,CAAC,OAAO,UAAU,eAAe,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CAMjE"}
|
package/lib/decode_codepoint.js
CHANGED
|
@@ -1,24 +1,54 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
2
|
+
// Adapted from https://github.com/mathiasbynens/he/blob/36afe179392226cf1b6ccdb16ebbb7a5a844d93a/src/he.js#L106-L134
|
|
5
3
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
var
|
|
7
|
-
|
|
4
|
+
var decodeMap = new Map([
|
|
5
|
+
[0, 65533],
|
|
6
|
+
[128, 8364],
|
|
7
|
+
[130, 8218],
|
|
8
|
+
[131, 402],
|
|
9
|
+
[132, 8222],
|
|
10
|
+
[133, 8230],
|
|
11
|
+
[134, 8224],
|
|
12
|
+
[135, 8225],
|
|
13
|
+
[136, 710],
|
|
14
|
+
[137, 8240],
|
|
15
|
+
[138, 352],
|
|
16
|
+
[139, 8249],
|
|
17
|
+
[140, 338],
|
|
18
|
+
[142, 381],
|
|
19
|
+
[145, 8216],
|
|
20
|
+
[146, 8217],
|
|
21
|
+
[147, 8220],
|
|
22
|
+
[148, 8221],
|
|
23
|
+
[149, 8226],
|
|
24
|
+
[150, 8211],
|
|
25
|
+
[151, 8212],
|
|
26
|
+
[152, 732],
|
|
27
|
+
[153, 8482],
|
|
28
|
+
[154, 353],
|
|
29
|
+
[155, 8250],
|
|
30
|
+
[156, 339],
|
|
31
|
+
[158, 382],
|
|
32
|
+
[159, 376],
|
|
33
|
+
]);
|
|
34
|
+
var fromCodePoint =
|
|
35
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, node/no-unsupported-features/es-builtins
|
|
36
|
+
String.fromCodePoint ||
|
|
37
|
+
function (codePoint) {
|
|
38
|
+
var output = "";
|
|
39
|
+
if (codePoint > 0xffff) {
|
|
40
|
+
codePoint -= 0x10000;
|
|
41
|
+
output += String.fromCharCode(((codePoint >>> 10) & 0x3ff) | 0xd800);
|
|
42
|
+
codePoint = 0xdc00 | (codePoint & 0x3ff);
|
|
43
|
+
}
|
|
44
|
+
output += String.fromCharCode(codePoint);
|
|
45
|
+
return output;
|
|
46
|
+
};
|
|
8
47
|
function decodeCodePoint(codePoint) {
|
|
48
|
+
var _a;
|
|
9
49
|
if ((codePoint >= 0xd800 && codePoint <= 0xdfff) || codePoint > 0x10ffff) {
|
|
10
50
|
return "\uFFFD";
|
|
11
51
|
}
|
|
12
|
-
|
|
13
|
-
codePoint = decode_json_1.default[codePoint];
|
|
14
|
-
}
|
|
15
|
-
var output = "";
|
|
16
|
-
if (codePoint > 0xffff) {
|
|
17
|
-
codePoint -= 0x10000;
|
|
18
|
-
output += String.fromCharCode(((codePoint >>> 10) & 0x3ff) | 0xd800);
|
|
19
|
-
codePoint = 0xdc00 | (codePoint & 0x3ff);
|
|
20
|
-
}
|
|
21
|
-
output += String.fromCharCode(codePoint);
|
|
22
|
-
return output;
|
|
52
|
+
return fromCodePoint((_a = decodeMap.get(codePoint)) !== null && _a !== void 0 ? _a : codePoint);
|
|
23
53
|
}
|
|
24
54
|
exports.default = decodeCodePoint;
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export declare const getCodePoint: (str: string, index: number) => number;
|
|
2
|
+
export declare function encodeHTMLTrieRe(regExp: RegExp, str: string): string;
|
|
3
|
+
export interface TrieNode {
|
|
4
|
+
value?: string;
|
|
5
|
+
next?: Map<number, TrieNode>;
|
|
6
|
+
}
|
|
7
|
+
export declare function getTrie(map: Record<string, string>): Map<number, TrieNode>;
|
|
8
|
+
//# sourceMappingURL=encode-trie.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"encode-trie.d.ts","sourceRoot":"","sources":["../src/encode-trie.ts"],"names":[],"mappings":"AAYA,eAAO,MAAM,YAAY,QAGT,MAAM,SAAS,MAAM,KAAG,MAQD,CAAC;AAIxC,wBAAgB,gBAAgB,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,MAAM,CAkCpE;AAED,MAAM,WAAW,QAAQ;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,GAAG,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;CAChC;AAED,wBAAgB,OAAO,CAAC,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,QAAQ,CAAC,CAmB1E"}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.getTrie = exports.encodeHTMLTrieRe = exports.getCodePoint = void 0;
|
|
7
|
+
var entities_json_1 = __importDefault(require("./maps/entities.json"));
|
|
8
|
+
function isHighSurrugate(c) {
|
|
9
|
+
return (c & 64512 /* Mask */) === 55296 /* High */;
|
|
10
|
+
}
|
|
11
|
+
// For compatibility with node < 4, we wrap `codePointAt`
|
|
12
|
+
exports.getCodePoint =
|
|
13
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
|
14
|
+
String.prototype.codePointAt != null
|
|
15
|
+
? function (str, index) { return str.codePointAt(index); }
|
|
16
|
+
: // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
|
|
17
|
+
function (c, index) {
|
|
18
|
+
return isHighSurrugate(c.charCodeAt(index))
|
|
19
|
+
? (c.charCodeAt(index) - 55296 /* High */) * 0x400 +
|
|
20
|
+
c.charCodeAt(index + 1) -
|
|
21
|
+
0xdc00 +
|
|
22
|
+
0x10000
|
|
23
|
+
: c.charCodeAt(index);
|
|
24
|
+
};
|
|
25
|
+
var htmlTrie = getTrie(entities_json_1.default);
|
|
26
|
+
function encodeHTMLTrieRe(regExp, str) {
|
|
27
|
+
var _a;
|
|
28
|
+
var ret = "";
|
|
29
|
+
var lastIdx = 0;
|
|
30
|
+
var match;
|
|
31
|
+
while ((match = regExp.exec(str)) !== null) {
|
|
32
|
+
var i = match.index;
|
|
33
|
+
var char = str.charCodeAt(i);
|
|
34
|
+
var next = htmlTrie.get(char);
|
|
35
|
+
if (next) {
|
|
36
|
+
if (next.next != null && i + 1 < str.length) {
|
|
37
|
+
var value = (_a = next.next.get(str.charCodeAt(i + 1))) === null || _a === void 0 ? void 0 : _a.value;
|
|
38
|
+
if (value != null) {
|
|
39
|
+
ret += str.substring(lastIdx, i) + value;
|
|
40
|
+
regExp.lastIndex += 1;
|
|
41
|
+
lastIdx = i + 2;
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
ret += str.substring(lastIdx, i) + next.value;
|
|
46
|
+
lastIdx = i + 1;
|
|
47
|
+
}
|
|
48
|
+
else {
|
|
49
|
+
ret += str.substring(lastIdx, i) + "&#x" + exports.getCodePoint(str, i).toString(16) + ";";
|
|
50
|
+
// Increase by 1 if we have a surrogate pair
|
|
51
|
+
lastIdx = regExp.lastIndex += Number(isHighSurrugate(char));
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return ret + str.substr(lastIdx);
|
|
55
|
+
}
|
|
56
|
+
exports.encodeHTMLTrieRe = encodeHTMLTrieRe;
|
|
57
|
+
function getTrie(map) {
|
|
58
|
+
var _a, _b, _c, _d;
|
|
59
|
+
var trie = new Map();
|
|
60
|
+
for (var _i = 0, _e = Object.keys(map); _i < _e.length; _i++) {
|
|
61
|
+
var value = _e[_i];
|
|
62
|
+
var key = map[value];
|
|
63
|
+
// Resolve the key
|
|
64
|
+
var lastMap = trie;
|
|
65
|
+
for (var i = 0; i < key.length - 1; i++) {
|
|
66
|
+
var char = key.charCodeAt(i);
|
|
67
|
+
var next = (_a = lastMap.get(char)) !== null && _a !== void 0 ? _a : {};
|
|
68
|
+
lastMap.set(char, next);
|
|
69
|
+
lastMap = (_b = next.next) !== null && _b !== void 0 ? _b : (next.next = new Map());
|
|
70
|
+
}
|
|
71
|
+
var val = (_c = lastMap.get(key.charCodeAt(key.length - 1))) !== null && _c !== void 0 ? _c : {};
|
|
72
|
+
(_d = val.value) !== null && _d !== void 0 ? _d : (val.value = "&" + value + ";");
|
|
73
|
+
lastMap.set(key.charCodeAt(key.length - 1), val);
|
|
74
|
+
}
|
|
75
|
+
return trie;
|
|
76
|
+
}
|
|
77
|
+
exports.getTrie = getTrie;
|
package/lib/encode.d.ts
CHANGED
|
@@ -1,4 +1,46 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Encodes all non-ASCII characters, as well as characters not valid in XML
|
|
3
|
+
* documents using XML entities.
|
|
4
|
+
*
|
|
5
|
+
* If a character has no equivalent entity, a
|
|
6
|
+
* numeric hexadecimal reference (eg. `ü`) will be used.
|
|
7
|
+
*/
|
|
8
|
+
export declare function encodeXML(str: string): string;
|
|
9
|
+
/**
|
|
10
|
+
* Encodes all entities and non-ASCII characters in the input.
|
|
11
|
+
*
|
|
12
|
+
* This includes characters that are valid ASCII characters in HTML documents.
|
|
13
|
+
* For example `#` will be encoded as `#`. To get a more compact output,
|
|
14
|
+
* consider using the `encodeNonAsciiHTML` function.
|
|
15
|
+
*
|
|
16
|
+
* If a character has no equivalent entity, a
|
|
17
|
+
* numeric hexadecimal reference (eg. `ü`) will be used.
|
|
18
|
+
*/
|
|
19
|
+
export declare function encodeHTML(data: string): string;
|
|
20
|
+
/**
|
|
21
|
+
* Encodes all non-ASCII characters, as well as characters not valid in HTML
|
|
22
|
+
* documents using HTML entities.
|
|
23
|
+
*
|
|
24
|
+
* If a character has no equivalent entity, a
|
|
25
|
+
* numeric hexadecimal reference (eg. `ü`) will be used.
|
|
26
|
+
*/
|
|
27
|
+
export declare function encodeNonAsciiHTML(data: string): string;
|
|
28
|
+
/**
|
|
29
|
+
* Encodes all non-ASCII characters, as well as characters not valid in XML
|
|
30
|
+
* documents using numeric hexadecimal reference (eg. `ü`).
|
|
31
|
+
*
|
|
32
|
+
* Have a look at `escapeUTF8` if you want a more concise output at the expense
|
|
33
|
+
* of reduced transportability.
|
|
34
|
+
*
|
|
35
|
+
* @param data String to escape.
|
|
36
|
+
*/
|
|
37
|
+
export declare const escape: typeof encodeXML;
|
|
38
|
+
/**
|
|
39
|
+
* Encodes all characters not valid in XML documents using XML entities.
|
|
40
|
+
*
|
|
41
|
+
* Note that the output will be character-set dependent.
|
|
42
|
+
*
|
|
43
|
+
* @param data String to escape.
|
|
44
|
+
*/
|
|
45
|
+
export declare function escapeUTF8(data: string): string;
|
|
4
46
|
//# sourceMappingURL=encode.d.ts.map
|
package/lib/encode.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"encode.d.ts","sourceRoot":"","sources":["../src/encode.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"encode.d.ts","sourceRoot":"","sources":["../src/encode.ts"],"names":[],"mappings":"AAgBA;;;;;;GAMG;AACH,wBAAgB,SAAS,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CA0B7C;AAED;;;;;;;;;GASG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAE/C;AACD;;;;;;GAMG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEvD;AAoCD;;;;;;;;GAQG;AACH,eAAO,MAAM,MAAM,kBAAY,CAAC;AAEhC;;;;;;GAMG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAkB/C"}
|
package/lib/encode.js
CHANGED
|
@@ -3,71 +3,124 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.escape = exports.encodeHTML = exports.encodeXML = void 0;
|
|
6
|
+
exports.escapeUTF8 = exports.escape = exports.encodeNonAsciiHTML = exports.encodeHTML = exports.encodeXML = void 0;
|
|
7
7
|
var xml_json_1 = __importDefault(require("./maps/xml.json"));
|
|
8
|
-
var
|
|
9
|
-
var xmlReplacer = getInverseReplacer(inverseXML);
|
|
10
|
-
exports.encodeXML = getInverse(inverseXML, xmlReplacer);
|
|
8
|
+
var encode_trie_1 = require("./encode-trie");
|
|
11
9
|
var entities_json_1 = __importDefault(require("./maps/entities.json"));
|
|
12
|
-
var
|
|
13
|
-
var
|
|
14
|
-
|
|
15
|
-
function
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
10
|
+
var htmlReplacer = getCharRegExp(entities_json_1.default, true);
|
|
11
|
+
var xmlReplacer = getCharRegExp(xml_json_1.default, true);
|
|
12
|
+
var xmlInvalidChars = getCharRegExp(xml_json_1.default, false);
|
|
13
|
+
var xmlCodeMap = new Map(Object.keys(xml_json_1.default).map(function (k) { return [
|
|
14
|
+
xml_json_1.default[k].charCodeAt(0),
|
|
15
|
+
"&" + k + ";",
|
|
16
|
+
]; }));
|
|
17
|
+
/**
|
|
18
|
+
* Encodes all non-ASCII characters, as well as characters not valid in XML
|
|
19
|
+
* documents using XML entities.
|
|
20
|
+
*
|
|
21
|
+
* If a character has no equivalent entity, a
|
|
22
|
+
* numeric hexadecimal reference (eg. `ü`) will be used.
|
|
23
|
+
*/
|
|
24
|
+
function encodeXML(str) {
|
|
25
|
+
var ret = "";
|
|
26
|
+
var lastIdx = 0;
|
|
27
|
+
var match;
|
|
28
|
+
while ((match = xmlReplacer.exec(str)) !== null) {
|
|
29
|
+
var i = match.index;
|
|
30
|
+
var char = str.charCodeAt(i);
|
|
31
|
+
var next = xmlCodeMap.get(char);
|
|
32
|
+
if (next) {
|
|
33
|
+
ret += str.substring(lastIdx, i) + next;
|
|
34
|
+
lastIdx = i + 1;
|
|
31
35
|
}
|
|
32
36
|
else {
|
|
33
|
-
|
|
34
|
-
|
|
37
|
+
ret += str.substring(lastIdx, i) + "&#x" + encode_trie_1.getCodePoint(str, i).toString(16) + ";";
|
|
38
|
+
// Increase by 1 if we have a surrogate pair
|
|
39
|
+
lastIdx = xmlReplacer.lastIndex += Number((char & 65408) === 0xd800);
|
|
35
40
|
}
|
|
36
41
|
}
|
|
42
|
+
return ret + str.substr(lastIdx);
|
|
43
|
+
}
|
|
44
|
+
exports.encodeXML = encodeXML;
|
|
45
|
+
/**
|
|
46
|
+
* Encodes all entities and non-ASCII characters in the input.
|
|
47
|
+
*
|
|
48
|
+
* This includes characters that are valid ASCII characters in HTML documents.
|
|
49
|
+
* For example `#` will be encoded as `#`. To get a more compact output,
|
|
50
|
+
* consider using the `encodeNonAsciiHTML` function.
|
|
51
|
+
*
|
|
52
|
+
* If a character has no equivalent entity, a
|
|
53
|
+
* numeric hexadecimal reference (eg. `ü`) will be used.
|
|
54
|
+
*/
|
|
55
|
+
function encodeHTML(data) {
|
|
56
|
+
return encode_trie_1.encodeHTMLTrieRe(htmlReplacer, data);
|
|
57
|
+
}
|
|
58
|
+
exports.encodeHTML = encodeHTML;
|
|
59
|
+
/**
|
|
60
|
+
* Encodes all non-ASCII characters, as well as characters not valid in HTML
|
|
61
|
+
* documents using HTML entities.
|
|
62
|
+
*
|
|
63
|
+
* If a character has no equivalent entity, a
|
|
64
|
+
* numeric hexadecimal reference (eg. `ü`) will be used.
|
|
65
|
+
*/
|
|
66
|
+
function encodeNonAsciiHTML(data) {
|
|
67
|
+
return encode_trie_1.encodeHTMLTrieRe(xmlReplacer, data);
|
|
68
|
+
}
|
|
69
|
+
exports.encodeNonAsciiHTML = encodeNonAsciiHTML;
|
|
70
|
+
function getCharRegExp(map, nonAscii) {
|
|
71
|
+
// Collect the start characters of all entities
|
|
72
|
+
var chars = Object.keys(map)
|
|
73
|
+
.map(function (k) { return "\\" + map[k].charAt(0); })
|
|
74
|
+
.filter(function (v) { return !nonAscii || v.charCodeAt(1) < 128; })
|
|
75
|
+
.sort(function (a, b) { return a.charCodeAt(1) - b.charCodeAt(1); })
|
|
76
|
+
// Remove duplicates
|
|
77
|
+
.filter(function (v, i, a) { return v !== a[i + 1]; });
|
|
37
78
|
// Add ranges to single characters.
|
|
38
|
-
|
|
39
|
-
for (var start = 0; start < single.length - 1; start++) {
|
|
79
|
+
for (var start = 0; start < chars.length - 1; start++) {
|
|
40
80
|
// Find the end of a run of characters
|
|
41
81
|
var end = start;
|
|
42
|
-
while (end <
|
|
43
|
-
|
|
82
|
+
while (end < chars.length - 1 &&
|
|
83
|
+
chars[end].charCodeAt(1) + 1 === chars[end + 1].charCodeAt(1)) {
|
|
44
84
|
end += 1;
|
|
45
85
|
}
|
|
46
86
|
var count = 1 + end - start;
|
|
47
87
|
// We want to replace at least three characters
|
|
48
88
|
if (count < 3)
|
|
49
89
|
continue;
|
|
50
|
-
|
|
90
|
+
chars.splice(start, count, chars[start] + "-" + chars[end]);
|
|
51
91
|
}
|
|
52
|
-
|
|
53
|
-
return new RegExp(multiple.join("|"), "g");
|
|
92
|
+
return new RegExp("[" + chars.join("") + (nonAscii ? "\\x80-\\uFFFF" : "") + "]", "g");
|
|
54
93
|
}
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
94
|
+
/**
|
|
95
|
+
* Encodes all non-ASCII characters, as well as characters not valid in XML
|
|
96
|
+
* documents using numeric hexadecimal reference (eg. `ü`).
|
|
97
|
+
*
|
|
98
|
+
* Have a look at `escapeUTF8` if you want a more concise output at the expense
|
|
99
|
+
* of reduced transportability.
|
|
100
|
+
*
|
|
101
|
+
* @param data String to escape.
|
|
102
|
+
*/
|
|
103
|
+
exports.escape = encodeXML;
|
|
104
|
+
/**
|
|
105
|
+
* Encodes all characters not valid in XML documents using XML entities.
|
|
106
|
+
*
|
|
107
|
+
* Note that the output will be character-set dependent.
|
|
108
|
+
*
|
|
109
|
+
* @param data String to escape.
|
|
110
|
+
*/
|
|
111
|
+
function escapeUTF8(data) {
|
|
112
|
+
var match;
|
|
113
|
+
var lastIdx = 0;
|
|
114
|
+
var result = "";
|
|
115
|
+
while ((match = xmlInvalidChars.exec(data))) {
|
|
116
|
+
if (lastIdx !== match.index) {
|
|
117
|
+
result += data.substring(lastIdx, match.index);
|
|
118
|
+
}
|
|
119
|
+
// We know that this chararcter will be in `inverseXML`
|
|
120
|
+
result += xmlCodeMap.get(match[0].charCodeAt(0));
|
|
121
|
+
// Every match will be of length 1
|
|
122
|
+
lastIdx = match.index + 1;
|
|
123
|
+
}
|
|
124
|
+
return result + data.substring(lastIdx);
|
|
72
125
|
}
|
|
73
|
-
exports.
|
|
126
|
+
exports.escapeUTF8 = escapeUTF8;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"decode-data-html.d.ts","sourceRoot":"","sources":["../../src/generated/decode-data-html.ts"],"names":[],"mappings":";AAEA,wBAAox9E"}
|