entities 4.2.0 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/decode.d.ts +21 -2
- package/lib/decode.d.ts.map +1 -1
- package/lib/decode.js +44 -35
- package/lib/decode.js.map +1 -1
- package/lib/decode_codepoint.d.ts +1 -0
- package/lib/decode_codepoint.d.ts.map +1 -1
- package/lib/decode_codepoint.js +3 -3
- package/lib/decode_codepoint.js.map +1 -1
- package/lib/encode.d.ts +11 -49
- package/lib/encode.d.ts.map +1 -1
- package/lib/encode.js +56 -104
- package/lib/encode.js.map +1 -1
- package/lib/escape.d.ts +43 -0
- package/lib/escape.d.ts.map +1 -0
- package/lib/escape.js +112 -0
- package/lib/escape.js.map +1 -0
- package/lib/esm/decode.d.ts +21 -2
- package/lib/esm/decode.d.ts.map +1 -1
- package/lib/esm/decode.js +41 -10
- package/lib/esm/decode.js.map +1 -1
- package/lib/esm/decode_codepoint.d.ts +1 -0
- package/lib/esm/decode_codepoint.d.ts.map +1 -1
- package/lib/esm/decode_codepoint.js +1 -1
- package/lib/esm/decode_codepoint.js.map +1 -1
- package/lib/esm/encode.d.ts +11 -49
- package/lib/esm/encode.d.ts.map +1 -1
- package/lib/esm/encode.js +50 -100
- package/lib/esm/encode.js.map +1 -1
- package/lib/esm/escape.d.ts +43 -0
- package/lib/esm/escape.d.ts.map +1 -0
- package/lib/esm/escape.js +106 -0
- package/lib/esm/escape.js.map +1 -0
- package/lib/esm/generated/decode-data-html.d.ts.map +1 -1
- package/lib/esm/generated/decode-data-html.js +4 -1
- package/lib/esm/generated/decode-data-html.js.map +1 -1
- package/lib/esm/generated/decode-data-xml.d.ts.map +1 -1
- package/lib/esm/generated/decode-data-xml.js +4 -1
- package/lib/esm/generated/decode-data-xml.js.map +1 -1
- package/lib/esm/generated/encode-html.d.ts.map +1 -1
- package/lib/esm/generated/encode-html.js +7 -1
- package/lib/esm/generated/encode-html.js.map +1 -1
- package/lib/esm/index.d.ts +3 -2
- package/lib/esm/index.d.ts.map +1 -1
- package/lib/esm/index.js +5 -3
- package/lib/esm/index.js.map +1 -1
- package/lib/generated/decode-data-html.d.ts.map +1 -1
- package/lib/generated/decode-data-html.js +5 -2
- package/lib/generated/decode-data-html.js.map +1 -1
- package/lib/generated/decode-data-xml.d.ts.map +1 -1
- package/lib/generated/decode-data-xml.js +5 -2
- package/lib/generated/decode-data-xml.js.map +1 -1
- package/lib/generated/encode-html.d.ts.map +1 -1
- package/lib/generated/encode-html.js +7 -1
- package/lib/generated/encode-html.js.map +1 -1
- package/lib/index.d.ts +3 -2
- package/lib/index.d.ts.map +1 -1
- package/lib/index.js +13 -11
- package/lib/index.js.map +1 -1
- package/package.json +17 -14
- package/lib/encode-trie.d.ts +0 -3
- package/lib/encode-trie.d.ts.map +0 -1
- package/lib/encode-trie.js +0 -66
- package/lib/encode-trie.js.map +0 -1
- package/lib/esm/encode-trie.d.ts +0 -3
- package/lib/esm/encode-trie.d.ts.map +0 -1
- package/lib/esm/encode-trie.js +0 -57
- package/lib/esm/encode-trie.js.map +0 -1
package/lib/escape.js
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.escapeText = exports.escapeAttribute = exports.escapeUTF8 = exports.escape = exports.encodeXML = exports.getCodePoint = exports.xmlReplacer = void 0;
|
|
4
|
+
exports.xmlReplacer = /["&'<>$\x80-\uFFFF]/g;
|
|
5
|
+
var xmlCodeMap = new Map([
|
|
6
|
+
[34, """],
|
|
7
|
+
[38, "&"],
|
|
8
|
+
[39, "'"],
|
|
9
|
+
[60, "<"],
|
|
10
|
+
[62, ">"],
|
|
11
|
+
]);
|
|
12
|
+
// For compatibility with node < 4, we wrap `codePointAt`
|
|
13
|
+
exports.getCodePoint =
|
|
14
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
|
15
|
+
String.prototype.codePointAt != null
|
|
16
|
+
? function (str, index) { return str.codePointAt(index); }
|
|
17
|
+
: // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
|
|
18
|
+
function (c, index) {
|
|
19
|
+
return (c.charCodeAt(index) & 0xfc00) === 0xd800
|
|
20
|
+
? (c.charCodeAt(index) - 0xd800) * 0x400 +
|
|
21
|
+
c.charCodeAt(index + 1) -
|
|
22
|
+
0xdc00 +
|
|
23
|
+
0x10000
|
|
24
|
+
: c.charCodeAt(index);
|
|
25
|
+
};
|
|
26
|
+
/**
|
|
27
|
+
* Encodes all non-ASCII characters, as well as characters not valid in XML
|
|
28
|
+
* documents using XML entities.
|
|
29
|
+
*
|
|
30
|
+
* If a character has no equivalent entity, a
|
|
31
|
+
* numeric hexadecimal reference (eg. `ü`) will be used.
|
|
32
|
+
*/
|
|
33
|
+
function encodeXML(str) {
|
|
34
|
+
var ret = "";
|
|
35
|
+
var lastIdx = 0;
|
|
36
|
+
var match;
|
|
37
|
+
while ((match = exports.xmlReplacer.exec(str)) !== null) {
|
|
38
|
+
var i = match.index;
|
|
39
|
+
var char = str.charCodeAt(i);
|
|
40
|
+
var next = xmlCodeMap.get(char);
|
|
41
|
+
if (next !== undefined) {
|
|
42
|
+
ret += str.substring(lastIdx, i) + next;
|
|
43
|
+
lastIdx = i + 1;
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
ret += "".concat(str.substring(lastIdx, i), "&#x").concat((0, exports.getCodePoint)(str, i).toString(16), ";");
|
|
47
|
+
// Increase by 1 if we have a surrogate pair
|
|
48
|
+
lastIdx = exports.xmlReplacer.lastIndex += Number((char & 0xfc00) === 0xd800);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
return ret + str.substr(lastIdx);
|
|
52
|
+
}
|
|
53
|
+
exports.encodeXML = encodeXML;
|
|
54
|
+
/**
|
|
55
|
+
* Encodes all non-ASCII characters, as well as characters not valid in XML
|
|
56
|
+
* documents using numeric hexadecimal reference (eg. `ü`).
|
|
57
|
+
*
|
|
58
|
+
* Have a look at `escapeUTF8` if you want a more concise output at the expense
|
|
59
|
+
* of reduced transportability.
|
|
60
|
+
*
|
|
61
|
+
* @param data String to escape.
|
|
62
|
+
*/
|
|
63
|
+
exports.escape = encodeXML;
|
|
64
|
+
function getEscaper(regex, map) {
|
|
65
|
+
return function escape(data) {
|
|
66
|
+
var match;
|
|
67
|
+
var lastIdx = 0;
|
|
68
|
+
var result = "";
|
|
69
|
+
while ((match = regex.exec(data))) {
|
|
70
|
+
if (lastIdx !== match.index) {
|
|
71
|
+
result += data.substring(lastIdx, match.index);
|
|
72
|
+
}
|
|
73
|
+
// We know that this chararcter will be in the map.
|
|
74
|
+
result += map.get(match[0].charCodeAt(0));
|
|
75
|
+
// Every match will be of length 1
|
|
76
|
+
lastIdx = match.index + 1;
|
|
77
|
+
}
|
|
78
|
+
return result + data.substring(lastIdx);
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Encodes all characters not valid in XML documents using XML entities.
|
|
83
|
+
*
|
|
84
|
+
* Note that the output will be character-set dependent.
|
|
85
|
+
*
|
|
86
|
+
* @param data String to escape.
|
|
87
|
+
*/
|
|
88
|
+
exports.escapeUTF8 = getEscaper(/[&<>'"]/g, xmlCodeMap);
|
|
89
|
+
/**
|
|
90
|
+
* Encodes all characters that have to be escaped in HTML attributes,
|
|
91
|
+
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
|
|
92
|
+
*
|
|
93
|
+
* @param data String to escape.
|
|
94
|
+
*/
|
|
95
|
+
exports.escapeAttribute = getEscaper(/["&\u00A0]/g, new Map([
|
|
96
|
+
[34, """],
|
|
97
|
+
[38, "&"],
|
|
98
|
+
[160, " "],
|
|
99
|
+
]));
|
|
100
|
+
/**
|
|
101
|
+
* Encodes all characters that have to be escaped in HTML text,
|
|
102
|
+
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
|
|
103
|
+
*
|
|
104
|
+
* @param data String to escape.
|
|
105
|
+
*/
|
|
106
|
+
exports.escapeText = getEscaper(/[&<>\u00A0]/g, new Map([
|
|
107
|
+
[38, "&"],
|
|
108
|
+
[60, "<"],
|
|
109
|
+
[62, ">"],
|
|
110
|
+
[160, " "],
|
|
111
|
+
]));
|
|
112
|
+
//# sourceMappingURL=escape.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"escape.js","sourceRoot":"https://raw.githubusercontent.com/fb55/entities/867ac709ba482a56a98b7c35f49ca833c74dc193/src/","sources":["escape.ts"],"names":[],"mappings":";;;AAAa,QAAA,WAAW,GAAG,sBAAsB,CAAC;AAElD,IAAM,UAAU,GAAG,IAAI,GAAG,CAAC;IACvB,CAAC,EAAE,EAAE,QAAQ,CAAC;IACd,CAAC,EAAE,EAAE,OAAO,CAAC;IACb,CAAC,EAAE,EAAE,QAAQ,CAAC;IACd,CAAC,EAAE,EAAE,MAAM,CAAC;IACZ,CAAC,EAAE,EAAE,MAAM,CAAC;CACf,CAAC,CAAC;AAEH,yDAAyD;AAC5C,QAAA,YAAY;AACrB,uEAAuE;AACvE,MAAM,CAAC,SAAS,CAAC,WAAW,IAAI,IAAI;IAChC,CAAC,CAAC,UAAC,GAAW,EAAE,KAAa,IAAa,OAAA,GAAG,CAAC,WAAW,CAAC,KAAK,CAAE,EAAvB,CAAuB;IACjE,CAAC,CAAC,uEAAuE;QACvE,UAAC,CAAS,EAAE,KAAa;YACrB,OAAA,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,MAAM,CAAC,KAAK,MAAM;gBACrC,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,MAAM,CAAC,GAAG,KAAK;oBACtC,CAAC,CAAC,UAAU,CAAC,KAAK,GAAG,CAAC,CAAC;oBACvB,MAAM;oBACN,OAAO;gBACT,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC;QALzB,CAKyB,CAAC;AAExC;;;;;;GAMG;AACH,SAAgB,SAAS,CAAC,GAAW;IACjC,IAAI,GAAG,GAAG,EAAE,CAAC;IACb,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,KAAK,CAAC;IAEV,OAAO,CAAC,KAAK,GAAG,mBAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE;QAC7C,IAAM,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC;QACtB,IAAM,IAAI,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QAC/B,IAAM,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAElC,IAAI,IAAI,KAAK,SAAS,EAAE;YACpB,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC;YACxC,OAAO,GAAG,CAAC,GAAG,CAAC,CAAC;SACnB;aAAM;YACH,GAAG,IAAI,UAAG,GAAG,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC,CAAC,gBAAM,IAAA,oBAAY,EACjD,GAAG,EACH,CAAC,CACJ,CAAC,QAAQ,CAAC,EAAE,CAAC,MAAG,CAAC;YAClB,4CAA4C;YAC5C,OAAO,GAAG,mBAAW,CAAC,SAAS,IAAI,MAAM,CACrC,CAAC,IAAI,GAAG,MAAM,CAAC,KAAK,MAAM,CAC7B,CAAC;SACL;KACJ;IAED,OAAO,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;AACrC,CAAC;AA1BD,8BA0BC;AAED;;;;;;;;GAQG;AACU,QAAA,MAAM,GAAG,SAAS,CAAC;AAEhC,SAAS,UAAU,CACf,KAAa,EACb,GAAwB;IAExB,OAAO,SAAS,MAAM,CAAC,IAAY;QAC/B,IAAI,KAAK,CAAC;QACV,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,IAAI,MAAM,GAAG,EAAE,CAAC;QAEhB,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE;YAC/B,IAAI,OAAO,KAAK,KAAK,CAAC,KAAK,EAAE;gBACzB,MAAM,IAAI,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC;aAClD;YAED,mDAAmD;YACnD,MAAM,IAAI,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAE,CAAC;YAE3C,kCAAkC;YAClC,OAAO,GAAG,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC;SAC7B;QAED,OAAO,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;IAC5C,CAAC,CAAC;AACN,CAAC;AAED;;;;;;GAMG;AACU,QAAA,UAAU,GAAG,UAAU,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;AAE7D;;;;;GAKG;AACU,QAAA,eAAe,GAAG,UAAU,CACrC,aAAa,EACb,IAAI,GAAG,CAAC;IACJ,CAAC,EAAE,EAAE,QAAQ,CAAC;IACd,CAAC,EAAE,EAAE,OAAO,CAAC;IACb,CAAC,GAAG,EAAE,QAAQ,CAAC;CAClB,CAAC,CACL,CAAC;AAEF;;;;;GAKG;AACU,QAAA,UAAU,GAAG,UAAU,CAChC,cAAc,EACd,IAAI,GAAG,CAAC;IACJ,CAAC,EAAE,EAAE,OAAO,CAAC;IACb,CAAC,EAAE,EAAE,MAAM,CAAC;IACZ,CAAC,EAAE,EAAE,MAAM,CAAC;IACZ,CAAC,GAAG,EAAE,QAAQ,CAAC;CAClB,CAAC,CACL,CAAC"}
|
package/lib/esm/decode.d.ts
CHANGED
|
@@ -1,14 +1,33 @@
|
|
|
1
1
|
import htmlDecodeTree from "./generated/decode-data-html.js";
|
|
2
2
|
import xmlDecodeTree from "./generated/decode-data-xml.js";
|
|
3
|
-
import
|
|
4
|
-
export { htmlDecodeTree, xmlDecodeTree, decodeCodePoint
|
|
3
|
+
import decodeCodePoint from "./decode_codepoint.js";
|
|
4
|
+
export { htmlDecodeTree, xmlDecodeTree, decodeCodePoint };
|
|
5
|
+
export { replaceCodePoint, fromCodePoint } from "./decode_codepoint.js";
|
|
5
6
|
export declare enum BinTrieFlags {
|
|
6
7
|
VALUE_LENGTH = 49152,
|
|
7
8
|
BRANCH_LENGTH = 16256,
|
|
8
9
|
JUMP_TABLE = 127
|
|
9
10
|
}
|
|
10
11
|
export declare function determineBranch(decodeTree: Uint16Array, current: number, nodeIdx: number, char: number): number;
|
|
12
|
+
/**
|
|
13
|
+
* Decodes an HTML string, allowing for entities not terminated by a semi-colon.
|
|
14
|
+
*
|
|
15
|
+
* @param str The string to decode.
|
|
16
|
+
* @returns The decoded string.
|
|
17
|
+
*/
|
|
11
18
|
export declare function decodeHTML(str: string): string;
|
|
19
|
+
/**
|
|
20
|
+
* Decodes an HTML string, requiring all entities to be terminated by a semi-colon.
|
|
21
|
+
*
|
|
22
|
+
* @param str The string to decode.
|
|
23
|
+
* @returns The decoded string.
|
|
24
|
+
*/
|
|
12
25
|
export declare function decodeHTMLStrict(str: string): string;
|
|
26
|
+
/**
|
|
27
|
+
* Decodes an XML string, requiring all entities to be terminated by a semi-colon.
|
|
28
|
+
*
|
|
29
|
+
* @param str The string to decode.
|
|
30
|
+
* @returns The decoded string.
|
|
31
|
+
*/
|
|
13
32
|
export declare function decodeXML(str: string): string;
|
|
14
33
|
//# sourceMappingURL=decode.d.ts.map
|
package/lib/esm/decode.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"decode.d.ts","sourceRoot":"","sources":["
|
|
1
|
+
{"version":3,"file":"decode.d.ts","sourceRoot":"https://raw.githubusercontent.com/fb55/entities/867ac709ba482a56a98b7c35f49ca833c74dc193/src/","sources":["decode.ts"],"names":[],"mappings":"AAAA,OAAO,cAAc,MAAM,iCAAiC,CAAC;AAC7D,OAAO,aAAa,MAAM,gCAAgC,CAAC;AAC3D,OAAO,eAAe,MAAM,uBAAuB,CAAC;AAGpD,OAAO,EAAE,cAAc,EAAE,aAAa,EAAE,eAAe,EAAE,CAAC;AAC1D,OAAO,EAAE,gBAAgB,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAcxE,oBAAY,YAAY;IACpB,YAAY,QAAwB;IACpC,aAAa,QAAwB;IACrC,UAAU,MAAwB;CACrC;AA8GD,wBAAgB,eAAe,CAC3B,UAAU,EAAE,WAAW,EACvB,OAAO,EAAE,MAAM,EACf,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,MAAM,GACb,MAAM,CAsCR;AAKD;;;;;GAKG;AACH,wBAAgB,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAE9C;AAED;;;;;GAKG;AACH,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAEpD;AAED;;;;;GAKG;AACH,wBAAgB,SAAS,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAE7C"}
|
package/lib/esm/decode.js
CHANGED
|
@@ -1,8 +1,21 @@
|
|
|
1
1
|
import htmlDecodeTree from "./generated/decode-data-html.js";
|
|
2
2
|
import xmlDecodeTree from "./generated/decode-data-xml.js";
|
|
3
|
-
import
|
|
3
|
+
import decodeCodePoint from "./decode_codepoint.js";
|
|
4
4
|
// Re-export for use by eg. htmlparser2
|
|
5
|
-
export { htmlDecodeTree, xmlDecodeTree, decodeCodePoint
|
|
5
|
+
export { htmlDecodeTree, xmlDecodeTree, decodeCodePoint };
|
|
6
|
+
export { replaceCodePoint, fromCodePoint } from "./decode_codepoint.js";
|
|
7
|
+
var CharCodes;
|
|
8
|
+
(function (CharCodes) {
|
|
9
|
+
CharCodes[CharCodes["NUM"] = 35] = "NUM";
|
|
10
|
+
CharCodes[CharCodes["SEMI"] = 59] = "SEMI";
|
|
11
|
+
CharCodes[CharCodes["ZERO"] = 48] = "ZERO";
|
|
12
|
+
CharCodes[CharCodes["NINE"] = 57] = "NINE";
|
|
13
|
+
CharCodes[CharCodes["LOWER_A"] = 97] = "LOWER_A";
|
|
14
|
+
CharCodes[CharCodes["LOWER_F"] = 102] = "LOWER_F";
|
|
15
|
+
CharCodes[CharCodes["LOWER_X"] = 120] = "LOWER_X";
|
|
16
|
+
/** Bit that needs to be set to convert an upper case ASCII character to lower case */
|
|
17
|
+
CharCodes[CharCodes["To_LOWER_BIT"] = 32] = "To_LOWER_BIT";
|
|
18
|
+
})(CharCodes || (CharCodes = {}));
|
|
6
19
|
export var BinTrieFlags;
|
|
7
20
|
(function (BinTrieFlags) {
|
|
8
21
|
BinTrieFlags[BinTrieFlags["VALUE_LENGTH"] = 49152] = "VALUE_LENGTH";
|
|
@@ -20,26 +33,26 @@ function getDecoder(decodeTree) {
|
|
|
20
33
|
// Skip the "&"
|
|
21
34
|
strIdx += 1;
|
|
22
35
|
// If we have a numeric entity, handle this separately.
|
|
23
|
-
if (str.charCodeAt(strIdx) ===
|
|
36
|
+
if (str.charCodeAt(strIdx) === CharCodes.NUM) {
|
|
24
37
|
// Skip the leading "&#". For hex entities, also skip the leading "x".
|
|
25
38
|
let start = strIdx + 1;
|
|
26
39
|
let base = 10;
|
|
27
40
|
let cp = str.charCodeAt(start);
|
|
28
|
-
if ((cp |
|
|
41
|
+
if ((cp | CharCodes.To_LOWER_BIT) === CharCodes.LOWER_X) {
|
|
29
42
|
base = 16;
|
|
30
43
|
strIdx += 1;
|
|
31
44
|
start += 1;
|
|
32
45
|
}
|
|
33
46
|
do
|
|
34
47
|
cp = str.charCodeAt(++strIdx);
|
|
35
|
-
while ((cp >=
|
|
48
|
+
while ((cp >= CharCodes.ZERO && cp <= CharCodes.NINE) ||
|
|
36
49
|
(base === 16 &&
|
|
37
|
-
(cp |
|
|
38
|
-
(cp |
|
|
50
|
+
(cp | CharCodes.To_LOWER_BIT) >= CharCodes.LOWER_A &&
|
|
51
|
+
(cp | CharCodes.To_LOWER_BIT) <= CharCodes.LOWER_F));
|
|
39
52
|
if (start !== strIdx) {
|
|
40
53
|
const entity = str.substring(start, strIdx);
|
|
41
54
|
const parsed = parseInt(entity, base);
|
|
42
|
-
if (str.charCodeAt(strIdx) ===
|
|
55
|
+
if (str.charCodeAt(strIdx) === CharCodes.SEMI) {
|
|
43
56
|
strIdx += 1;
|
|
44
57
|
}
|
|
45
58
|
else if (strict) {
|
|
@@ -63,7 +76,7 @@ function getDecoder(decodeTree) {
|
|
|
63
76
|
// If the branch is a value, store it and continue
|
|
64
77
|
if (masked) {
|
|
65
78
|
// If we have a legacy entity while parsing strictly, just skip the number of bytes
|
|
66
|
-
if (!strict || str.charCodeAt(strIdx) ===
|
|
79
|
+
if (!strict || str.charCodeAt(strIdx) === CharCodes.SEMI) {
|
|
67
80
|
resultIdx = treeIdx;
|
|
68
81
|
excess = 0;
|
|
69
82
|
}
|
|
@@ -98,7 +111,7 @@ export function determineBranch(decodeTree, current, nodeIdx, char) {
|
|
|
98
111
|
// Case 2: Multiple branches encoded in jump table
|
|
99
112
|
if (jumpOffset) {
|
|
100
113
|
const value = char - jumpOffset;
|
|
101
|
-
return value < 0 || value
|
|
114
|
+
return value < 0 || value >= branchCount
|
|
102
115
|
? -1
|
|
103
116
|
: decodeTree[nodeIdx + value] - 1;
|
|
104
117
|
}
|
|
@@ -123,12 +136,30 @@ export function determineBranch(decodeTree, current, nodeIdx, char) {
|
|
|
123
136
|
}
|
|
124
137
|
const htmlDecoder = getDecoder(htmlDecodeTree);
|
|
125
138
|
const xmlDecoder = getDecoder(xmlDecodeTree);
|
|
139
|
+
/**
|
|
140
|
+
* Decodes an HTML string, allowing for entities not terminated by a semi-colon.
|
|
141
|
+
*
|
|
142
|
+
* @param str The string to decode.
|
|
143
|
+
* @returns The decoded string.
|
|
144
|
+
*/
|
|
126
145
|
export function decodeHTML(str) {
|
|
127
146
|
return htmlDecoder(str, false);
|
|
128
147
|
}
|
|
148
|
+
/**
|
|
149
|
+
* Decodes an HTML string, requiring all entities to be terminated by a semi-colon.
|
|
150
|
+
*
|
|
151
|
+
* @param str The string to decode.
|
|
152
|
+
* @returns The decoded string.
|
|
153
|
+
*/
|
|
129
154
|
export function decodeHTMLStrict(str) {
|
|
130
155
|
return htmlDecoder(str, true);
|
|
131
156
|
}
|
|
157
|
+
/**
|
|
158
|
+
* Decodes an XML string, requiring all entities to be terminated by a semi-colon.
|
|
159
|
+
*
|
|
160
|
+
* @param str The string to decode.
|
|
161
|
+
* @returns The decoded string.
|
|
162
|
+
*/
|
|
132
163
|
export function decodeXML(str) {
|
|
133
164
|
return xmlDecoder(str, true);
|
|
134
165
|
}
|
package/lib/esm/decode.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"decode.js","sourceRoot":"","sources":["
|
|
1
|
+
{"version":3,"file":"decode.js","sourceRoot":"https://raw.githubusercontent.com/fb55/entities/867ac709ba482a56a98b7c35f49ca833c74dc193/src/","sources":["decode.ts"],"names":[],"mappings":"AAAA,OAAO,cAAc,MAAM,iCAAiC,CAAC;AAC7D,OAAO,aAAa,MAAM,gCAAgC,CAAC;AAC3D,OAAO,eAAe,MAAM,uBAAuB,CAAC;AAEpD,uCAAuC;AACvC,OAAO,EAAE,cAAc,EAAE,aAAa,EAAE,eAAe,EAAE,CAAC;AAC1D,OAAO,EAAE,gBAAgB,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAExE,IAAW,SAUV;AAVD,WAAW,SAAS;IAChB,wCAAQ,CAAA;IACR,0CAAS,CAAA;IACT,0CAAS,CAAA;IACT,0CAAS,CAAA;IACT,gDAAY,CAAA;IACZ,iDAAa,CAAA;IACb,iDAAa,CAAA;IACb,sFAAsF;IACtF,0DAAuB,CAAA;AAC3B,CAAC,EAVU,SAAS,KAAT,SAAS,QAUnB;AAED,MAAM,CAAN,IAAY,YAIX;AAJD,WAAY,YAAY;IACpB,mEAAoC,CAAA;IACpC,qEAAqC,CAAA;IACrC,6DAAkC,CAAA;AACtC,CAAC,EAJW,YAAY,KAAZ,YAAY,QAIvB;AAED,SAAS,UAAU,CAAC,UAAuB;IACvC,OAAO,SAAS,gBAAgB,CAAC,GAAW,EAAE,MAAe;QACzD,IAAI,GAAG,GAAG,EAAE,CAAC;QACb,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,IAAI,MAAM,GAAG,CAAC,CAAC;QAEf,OAAO,CAAC,MAAM,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,EAAE;YAC7C,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;YAClC,OAAO,GAAG,MAAM,CAAC;YACjB,eAAe;YACf,MAAM,IAAI,CAAC,CAAC;YAEZ,uDAAuD;YACvD,IAAI,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,KAAK,SAAS,CAAC,GAAG,EAAE;gBAC1C,sEAAsE;gBACtE,IAAI,KAAK,GAAG,MAAM,GAAG,CAAC,CAAC;gBACvB,IAAI,IAAI,GAAG,EAAE,CAAC;gBAEd,IAAI,EAAE,GAAG,GAAG,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;gBAC/B,IAAI,CAAC,EAAE,GAAG,SAAS,CAAC,YAAY,CAAC,KAAK,SAAS,CAAC,OAAO,EAAE;oBACrD,IAAI,GAAG,EAAE,CAAC;oBACV,MAAM,IAAI,CAAC,CAAC;oBACZ,KAAK,IAAI,CAAC,CAAC;iBACd;gBAED;oBAAG,EAAE,GAAG,GAAG,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC,CAAC;uBAE7B,CAAC,EAAE,IAAI,SAAS,CAAC,IAAI,IAAI,EAAE,IAAI,SAAS,CAAC,IAAI,CAAC;oBAC9C,CAAC,IAAI,KAAK,EAAE;wBACR,CAAC,EAAE,GAAG,SAAS,CAAC,YAAY,CAAC,IAAI,SAAS,CAAC,OAAO;wBAClD,CAAC,EAAE,GAAG,SAAS,CAAC,YAAY,CAAC,IAAI,SAAS,CAAC,OAAO,CAAC,EACzD;gBAEF,IAAI,KAAK,KAAK,MAAM,EAAE;oBAClB,MAAM,MAAM,GAAG,GAAG,CAAC,SAAS,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;oBAC5C,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;oBAEtC,IAAI,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,KAAK,SAAS,CAAC,IAAI,EAAE;wBAC3C,MAAM,IAAI,CAAC,CAAC;qBACf;yBAAM,IAAI,MAAM,EAAE;wBACf,SAAS;qBACZ;oBAED,GAAG,IAAI,eAAe,CAAC,MAAM,CAAC,CAAC;oBAC/B,OAAO,GAAG,MAAM,CAAC;iBACpB;gBAED,SAAS;aACZ;YAED,IAAI,SAAS,GAAG,CAAC,CAAC;YAClB,IAAI,MAAM,GAAG,CAAC,CAAC;YACf,IAAI,OAAO,GAAG,CAAC,CAAC;YAChB,IAAI,OAAO,GAAG,UAAU,CAAC,OAAO,CAAC,CAAC;YAElC,OAAO,MAAM,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,MAAM,EAAE,EAAE;gBAC5C,OAAO,GAAG,eAAe,CACrB,UAAU,EACV,OAAO,EACP,OAAO,GAAG,CAAC,EACX,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,CACzB,CAAC;gBAEF,IAAI,OAAO,GAAG,CAAC;oBAAE,MAAM;gBAEvB,OAAO,GAAG,UAAU,CAAC,OAAO,CAAC,CAAC;gBAE9B,MAAM,MAAM,GAAG,OAAO,GAAG,YAAY,CAAC,YAAY,CAAC;gBAEnD,kDAAkD;gBAClD,IAAI,MAAM,EAAE;oBACR,mFAAmF;oBACnF,IAAI,CAAC,MAAM,IAAI,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,KAAK,SAAS,CAAC,IAAI,EAAE;wBACtD,SAAS,GAAG,OAAO,CAAC;wBACpB,MAAM,GAAG,CAAC,CAAC;qBACd;oBAED,4EAA4E;oBAC5E,MAAM,WAAW,GAAG,CAAC,MAAM,IAAI,EAAE,CAAC,GAAG,CAAC,CAAC;oBAEvC,IAAI,WAAW,KAAK,CAAC;wBAAE,MAAM;oBAE7B,OAAO,IAAI,WAAW,CAAC;iBAC1B;aACJ;YAED,IAAI,SAAS,KAAK,CAAC,EAAE;gBACjB,MAAM,WAAW,GACb,CAAC,UAAU,CAAC,SAAS,CAAC,GAAG,YAAY,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC;gBAC9D,GAAG;oBACC,WAAW,KAAK,CAAC;wBACb,CAAC,CAAC,MAAM,CAAC,YAAY,CACf,UAAU,CAAC,SAAS,CAAC,GAAG,CAAC,YAAY,CAAC,YAAY,CACrD;wBACH,CAAC,CAAC,WAAW,KAAK,CAAC;4BACnB,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC,UAAU,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;4BAChD,CAAC,CAAC,MAAM,CAAC,YAAY,CACf,UAAU,CAAC,SAAS,GAAG,CAAC,CAAC,EACzB,UAAU,CAAC,SAAS,GAAG,CAAC,CAAC,CAC5B,CAAC;gBACZ,OAAO,GAAG,MAAM,GAAG,MAAM,GAAG,CAAC,CAAC;aACjC;SACJ;QAED,OAAO,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACpC,CAAC,CAAC;AACN,CAAC;AAED,MAAM,UAAU,eAAe,CAC3B,UAAuB,EACvB,OAAe,EACf,OAAe,EACf,IAAY;IAEZ,MAAM,WAAW,GAAG,CAAC,OAAO,GAAG,YAAY,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;IAChE,MAAM,UAAU,GAAG,OAAO,GAAG,YAAY,CAAC,UAAU,CAAC;IAErD,+CAA+C;IAC/C,IAAI,WAAW,KAAK,CAAC,EAAE;QACnB,OAAO,UAAU,KAAK,CAAC,IAAI,IAAI,KAAK,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;KACjE;IAED,kDAAkD;IAClD,IAAI,UAAU,EAAE;QACZ,MAAM,KAAK,GAAG,IAAI,GAAG,UAAU,CAAC;QAEhC,OAAO,KAAK,GAAG,CAAC,IAAI,KAAK,IAAI,WAAW;YACpC,CAAC,CAAC,CAAC,CAAC;YACJ,CAAC,CAAC,UAAU,CAAC,OAAO,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC;KACzC;IAED,kDAAkD;IAElD,mCAAmC;IACnC,IAAI,EAAE,GAAG,OAAO,CAAC;IACjB,IAAI,EAAE,GAAG,EAAE,GAAG,WAAW,GAAG,CAAC,CAAC;IAE9B,OAAO,EAAE,IAAI,EAAE,EAAE;QACb,MAAM,GAAG,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC;QAC5B,MAAM,MAAM,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC;QAE/B,IAAI,MAAM,GAAG,IAAI,EAAE;YACf,EAAE,GAAG,GAAG,GAAG,CAAC,CAAC;SAChB;aAAM,IAAI,MAAM,GAAG,IAAI,EAAE;YACtB,EAAE,GAAG,GAAG,GAAG,CAAC,CAAC;SAChB;aAAM;YACH,OAAO,UAAU,CAAC,GAAG,GAAG,WAAW,CAAC,CAAC;SACxC;KACJ;IAED,OAAO,CAAC,CAAC,CAAC;AACd,CAAC;AAED,MAAM,WAAW,GAAG,UAAU,CAAC,cAAc,CAAC,CAAC;AAC/C,MAAM,UAAU,GAAG,UAAU,CAAC,aAAa,CAAC,CAAC;AAE7C;;;;;GAKG;AACH,MAAM,UAAU,UAAU,CAAC,GAAW;IAClC,OAAO,WAAW,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;AACnC,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,gBAAgB,CAAC,GAAW;IACxC,OAAO,WAAW,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;AAClC,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,SAAS,CAAC,GAAW;IACjC,OAAO,UAAU,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;AACjC,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"decode_codepoint.d.ts","sourceRoot":"","sources":["
|
|
1
|
+
{"version":3,"file":"decode_codepoint.d.ts","sourceRoot":"https://raw.githubusercontent.com/fb55/entities/867ac709ba482a56a98b7c35f49ca833c74dc193/src/","sources":["decode_codepoint.ts"],"names":[],"mappings":"AAiCA,eAAO,MAAM,aAAa,qCAgBrB,CAAC;AAEN,wBAAgB,gBAAgB,CAAC,SAAS,EAAE,MAAM,UAMjD;AAED,MAAM,CAAC,OAAO,UAAU,eAAe,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CAEjE"}
|
|
@@ -30,7 +30,7 @@ const decodeMap = new Map([
|
|
|
30
30
|
[158, 382],
|
|
31
31
|
[159, 376],
|
|
32
32
|
]);
|
|
33
|
-
const fromCodePoint =
|
|
33
|
+
export const fromCodePoint =
|
|
34
34
|
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, node/no-unsupported-features/es-builtins
|
|
35
35
|
(_a = String.fromCodePoint) !== null && _a !== void 0 ? _a : function (codePoint) {
|
|
36
36
|
let output = "";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"decode_codepoint.js","sourceRoot":"","sources":["
|
|
1
|
+
{"version":3,"file":"decode_codepoint.js","sourceRoot":"https://raw.githubusercontent.com/fb55/entities/867ac709ba482a56a98b7c35f49ca833c74dc193/src/","sources":["decode_codepoint.ts"],"names":[],"mappings":"AAAA,qHAAqH;;AAErH,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC;IACtB,CAAC,CAAC,EAAE,KAAK,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,GAAG,CAAC;CACb,CAAC,CAAC;AAEH,MAAM,CAAC,MAAM,aAAa;AACtB,iHAAiH;AACjH,MAAA,MAAM,CAAC,aAAa,mCACpB,UAAU,SAAiB;IACvB,IAAI,MAAM,GAAG,EAAE,CAAC;IAEhB,IAAI,SAAS,GAAG,MAAM,EAAE;QACpB,SAAS,IAAI,OAAO,CAAC;QACrB,MAAM,IAAI,MAAM,CAAC,YAAY,CACzB,CAAC,CAAC,SAAS,KAAK,EAAE,CAAC,GAAG,KAAK,CAAC,GAAG,MAAM,CACxC,CAAC;QACF,SAAS,GAAG,MAAM,GAAG,CAAC,SAAS,GAAG,KAAK,CAAC,CAAC;KAC5C;IAED,MAAM,IAAI,MAAM,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;IACzC,OAAO,MAAM,CAAC;AAClB,CAAC,CAAC;AAEN,MAAM,UAAU,gBAAgB,CAAC,SAAiB;;IAC9C,IAAI,CAAC,SAAS,IAAI,MAAM,IAAI,SAAS,IAAI,MAAM,CAAC,IAAI,SAAS,GAAG,QAAQ,EAAE;QACtE,OAAO,MAAM,CAAC;KACjB;IAED,OAAO,MAAA,SAAS,CAAC,GAAG,CAAC,SAAS,CAAC,mCAAI,SAAS,CAAC;AACjD,CAAC;AAED,MAAM,CAAC,OAAO,UAAU,eAAe,CAAC,SAAiB;IACrD,OAAO,aAAa,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC,CAAC;AACtD,CAAC"}
|
package/lib/esm/encode.d.ts
CHANGED
|
@@ -1,60 +1,22 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Encodes all
|
|
3
|
-
* documents
|
|
2
|
+
* Encodes all characters in the input using HTML entities. This includes
|
|
3
|
+
* characters that are valid ASCII characters in HTML documents, such as `#`.
|
|
4
4
|
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
|
|
8
|
-
export declare function encodeXML(str: string): string;
|
|
9
|
-
/**
|
|
10
|
-
* Encodes all entities and non-ASCII characters in the input.
|
|
11
|
-
*
|
|
12
|
-
* This includes characters that are valid ASCII characters in HTML documents.
|
|
13
|
-
* For example `#` will be encoded as `#`. To get a more compact output,
|
|
14
|
-
* consider using the `encodeNonAsciiHTML` function.
|
|
5
|
+
* To get a more compact output, consider using the `encodeNonAsciiHTML`
|
|
6
|
+
* function, which will only encode characters that are not valid in HTML
|
|
7
|
+
* documents, as well as non-ASCII characters.
|
|
15
8
|
*
|
|
16
|
-
* If a character has no equivalent entity, a
|
|
17
|
-
*
|
|
9
|
+
* If a character has no equivalent entity, a numeric hexadecimal reference
|
|
10
|
+
* (eg. `ü`) will be used.
|
|
18
11
|
*/
|
|
19
12
|
export declare function encodeHTML(data: string): string;
|
|
20
13
|
/**
|
|
21
14
|
* Encodes all non-ASCII characters, as well as characters not valid in HTML
|
|
22
|
-
* documents using HTML entities.
|
|
15
|
+
* documents using HTML entities. This function will not encode characters that
|
|
16
|
+
* are valid in HTML documents, such as `#`.
|
|
23
17
|
*
|
|
24
|
-
* If a character has no equivalent entity, a
|
|
25
|
-
*
|
|
18
|
+
* If a character has no equivalent entity, a numeric hexadecimal reference
|
|
19
|
+
* (eg. `ü`) will be used.
|
|
26
20
|
*/
|
|
27
21
|
export declare function encodeNonAsciiHTML(data: string): string;
|
|
28
|
-
/**
|
|
29
|
-
* Encodes all non-ASCII characters, as well as characters not valid in XML
|
|
30
|
-
* documents using numeric hexadecimal reference (eg. `ü`).
|
|
31
|
-
*
|
|
32
|
-
* Have a look at `escapeUTF8` if you want a more concise output at the expense
|
|
33
|
-
* of reduced transportability.
|
|
34
|
-
*
|
|
35
|
-
* @param data String to escape.
|
|
36
|
-
*/
|
|
37
|
-
export declare const escape: typeof encodeXML;
|
|
38
|
-
/**
|
|
39
|
-
* Encodes all characters not valid in XML documents using XML entities.
|
|
40
|
-
*
|
|
41
|
-
* Note that the output will be character-set dependent.
|
|
42
|
-
*
|
|
43
|
-
* @param data String to escape.
|
|
44
|
-
*/
|
|
45
|
-
export declare const escapeUTF8: (data: string) => string;
|
|
46
|
-
/**
|
|
47
|
-
* Encodes all characters that have to be escaped in HTML attributes,
|
|
48
|
-
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
|
|
49
|
-
*
|
|
50
|
-
* @param data String to escape.
|
|
51
|
-
*/
|
|
52
|
-
export declare const escapeAttribute: (data: string) => string;
|
|
53
|
-
/**
|
|
54
|
-
* Encodes all characters that have to be escaped in HTML text,
|
|
55
|
-
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
|
|
56
|
-
*
|
|
57
|
-
* @param data String to escape.
|
|
58
|
-
*/
|
|
59
|
-
export declare const escapeText: (data: string) => string;
|
|
60
22
|
//# sourceMappingURL=encode.d.ts.map
|
package/lib/esm/encode.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"encode.d.ts","sourceRoot":"","sources":["
|
|
1
|
+
{"version":3,"file":"encode.d.ts","sourceRoot":"https://raw.githubusercontent.com/fb55/entities/867ac709ba482a56a98b7c35f49ca833c74dc193/src/","sources":["encode.ts"],"names":[],"mappings":"AAKA;;;;;;;;;;GAUG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAE/C;AACD;;;;;;;GAOG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEvD"}
|
package/lib/esm/encode.js
CHANGED
|
@@ -1,119 +1,69 @@
|
|
|
1
|
-
import
|
|
1
|
+
import htmlTrie from "./generated/encode-html.js";
|
|
2
|
+
import { xmlReplacer, getCodePoint } from "./escape.js";
|
|
2
3
|
const htmlReplacer = /[\t\n!-,./:-@[-`\f{-}$\x80-\uFFFF]/g;
|
|
3
|
-
const xmlReplacer = /["&'<>$\x80-\uFFFF]/g;
|
|
4
|
-
const xmlCodeMap = new Map([
|
|
5
|
-
[34, """],
|
|
6
|
-
[38, "&"],
|
|
7
|
-
[39, "'"],
|
|
8
|
-
[60, "<"],
|
|
9
|
-
[62, ">"],
|
|
10
|
-
]);
|
|
11
4
|
/**
|
|
12
|
-
* Encodes all
|
|
13
|
-
* documents
|
|
5
|
+
* Encodes all characters in the input using HTML entities. This includes
|
|
6
|
+
* characters that are valid ASCII characters in HTML documents, such as `#`.
|
|
14
7
|
*
|
|
15
|
-
*
|
|
16
|
-
*
|
|
17
|
-
|
|
18
|
-
export function encodeXML(str) {
|
|
19
|
-
let ret = "";
|
|
20
|
-
let lastIdx = 0;
|
|
21
|
-
let match;
|
|
22
|
-
while ((match = xmlReplacer.exec(str)) !== null) {
|
|
23
|
-
const i = match.index;
|
|
24
|
-
const char = str.charCodeAt(i);
|
|
25
|
-
const next = xmlCodeMap.get(char);
|
|
26
|
-
if (next !== undefined) {
|
|
27
|
-
ret += str.substring(lastIdx, i) + next;
|
|
28
|
-
lastIdx = i + 1;
|
|
29
|
-
}
|
|
30
|
-
else {
|
|
31
|
-
ret += `${str.substring(lastIdx, i)}&#x${getCodePoint(str, i).toString(16)};`;
|
|
32
|
-
// Increase by 1 if we have a surrogate pair
|
|
33
|
-
lastIdx = xmlReplacer.lastIndex += Number((char & 65408) === 0xd800);
|
|
34
|
-
}
|
|
35
|
-
}
|
|
36
|
-
return ret + str.substr(lastIdx);
|
|
37
|
-
}
|
|
38
|
-
/**
|
|
39
|
-
* Encodes all entities and non-ASCII characters in the input.
|
|
40
|
-
*
|
|
41
|
-
* This includes characters that are valid ASCII characters in HTML documents.
|
|
42
|
-
* For example `#` will be encoded as `#`. To get a more compact output,
|
|
43
|
-
* consider using the `encodeNonAsciiHTML` function.
|
|
8
|
+
* To get a more compact output, consider using the `encodeNonAsciiHTML`
|
|
9
|
+
* function, which will only encode characters that are not valid in HTML
|
|
10
|
+
* documents, as well as non-ASCII characters.
|
|
44
11
|
*
|
|
45
|
-
* If a character has no equivalent entity, a
|
|
46
|
-
*
|
|
12
|
+
* If a character has no equivalent entity, a numeric hexadecimal reference
|
|
13
|
+
* (eg. `ü`) will be used.
|
|
47
14
|
*/
|
|
48
15
|
export function encodeHTML(data) {
|
|
49
16
|
return encodeHTMLTrieRe(htmlReplacer, data);
|
|
50
17
|
}
|
|
51
18
|
/**
|
|
52
19
|
* Encodes all non-ASCII characters, as well as characters not valid in HTML
|
|
53
|
-
* documents using HTML entities.
|
|
20
|
+
* documents using HTML entities. This function will not encode characters that
|
|
21
|
+
* are valid in HTML documents, such as `#`.
|
|
54
22
|
*
|
|
55
|
-
* If a character has no equivalent entity, a
|
|
56
|
-
*
|
|
23
|
+
* If a character has no equivalent entity, a numeric hexadecimal reference
|
|
24
|
+
* (eg. `ü`) will be used.
|
|
57
25
|
*/
|
|
58
26
|
export function encodeNonAsciiHTML(data) {
|
|
59
27
|
return encodeHTMLTrieRe(xmlReplacer, data);
|
|
60
28
|
}
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
29
|
+
function encodeHTMLTrieRe(regExp, str) {
|
|
30
|
+
let ret = "";
|
|
31
|
+
let lastIdx = 0;
|
|
32
|
+
let match;
|
|
33
|
+
while ((match = regExp.exec(str)) !== null) {
|
|
34
|
+
const i = match.index;
|
|
35
|
+
ret += str.substring(lastIdx, i);
|
|
36
|
+
const char = str.charCodeAt(i);
|
|
37
|
+
let next = htmlTrie.get(char);
|
|
38
|
+
if (typeof next === "object") {
|
|
39
|
+
// We are in a branch. Try to match the next char.
|
|
40
|
+
if (i + 1 < str.length) {
|
|
41
|
+
const nextChar = str.charCodeAt(i + 1);
|
|
42
|
+
const value = typeof next.n === "number"
|
|
43
|
+
? next.n === nextChar
|
|
44
|
+
? next.o
|
|
45
|
+
: undefined
|
|
46
|
+
: next.n.get(nextChar);
|
|
47
|
+
if (value !== undefined) {
|
|
48
|
+
ret += value;
|
|
49
|
+
lastIdx = regExp.lastIndex += 1;
|
|
50
|
+
continue;
|
|
51
|
+
}
|
|
79
52
|
}
|
|
80
|
-
|
|
81
|
-
result += map.get(match[0].charCodeAt(0));
|
|
82
|
-
// Every match will be of length 1
|
|
83
|
-
lastIdx = match.index + 1;
|
|
53
|
+
next = next.v;
|
|
84
54
|
}
|
|
85
|
-
|
|
86
|
-
|
|
55
|
+
// We might have a tree node without a value; skip and use a numeric entitiy.
|
|
56
|
+
if (next !== undefined) {
|
|
57
|
+
ret += next;
|
|
58
|
+
lastIdx = i + 1;
|
|
59
|
+
}
|
|
60
|
+
else {
|
|
61
|
+
const cp = getCodePoint(str, i);
|
|
62
|
+
ret += `&#x${cp.toString(16)};`;
|
|
63
|
+
// Increase by 1 if we have a surrogate pair
|
|
64
|
+
lastIdx = regExp.lastIndex += Number(cp !== char);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
return ret + str.substr(lastIdx);
|
|
87
68
|
}
|
|
88
|
-
/**
|
|
89
|
-
* Encodes all characters not valid in XML documents using XML entities.
|
|
90
|
-
*
|
|
91
|
-
* Note that the output will be character-set dependent.
|
|
92
|
-
*
|
|
93
|
-
* @param data String to escape.
|
|
94
|
-
*/
|
|
95
|
-
export const escapeUTF8 = getEscaper(/[&<>'"]/g, xmlCodeMap);
|
|
96
|
-
/**
|
|
97
|
-
* Encodes all characters that have to be escaped in HTML attributes,
|
|
98
|
-
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
|
|
99
|
-
*
|
|
100
|
-
* @param data String to escape.
|
|
101
|
-
*/
|
|
102
|
-
export const escapeAttribute = getEscaper(/["&\u00A0]/g, new Map([
|
|
103
|
-
[34, """],
|
|
104
|
-
[38, "&"],
|
|
105
|
-
[160, " "],
|
|
106
|
-
]));
|
|
107
|
-
/**
|
|
108
|
-
* Encodes all characters that have to be escaped in HTML text,
|
|
109
|
-
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
|
|
110
|
-
*
|
|
111
|
-
* @param data String to escape.
|
|
112
|
-
*/
|
|
113
|
-
export const escapeText = getEscaper(/[&<>\u00A0]/g, new Map([
|
|
114
|
-
[38, "&"],
|
|
115
|
-
[60, "<"],
|
|
116
|
-
[62, ">"],
|
|
117
|
-
[160, " "],
|
|
118
|
-
]));
|
|
119
69
|
//# sourceMappingURL=encode.js.map
|
package/lib/esm/encode.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"encode.js","sourceRoot":"","sources":["
|
|
1
|
+
{"version":3,"file":"encode.js","sourceRoot":"https://raw.githubusercontent.com/fb55/entities/867ac709ba482a56a98b7c35f49ca833c74dc193/src/","sources":["encode.ts"],"names":[],"mappings":"AAAA,OAAO,QAAQ,MAAM,4BAA4B,CAAC;AAClD,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAExD,MAAM,YAAY,GAAG,qCAAqC,CAAC;AAE3D;;;;;;;;;;GAUG;AACH,MAAM,UAAU,UAAU,CAAC,IAAY;IACnC,OAAO,gBAAgB,CAAC,YAAY,EAAE,IAAI,CAAC,CAAC;AAChD,CAAC;AACD;;;;;;;GAOG;AACH,MAAM,UAAU,kBAAkB,CAAC,IAAY;IAC3C,OAAO,gBAAgB,CAAC,WAAW,EAAE,IAAI,CAAC,CAAC;AAC/C,CAAC;AAED,SAAS,gBAAgB,CAAC,MAAc,EAAE,GAAW;IACjD,IAAI,GAAG,GAAG,EAAE,CAAC;IACb,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,KAAK,CAAC;IAEV,OAAO,CAAC,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE;QACxC,MAAM,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC;QACtB,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QACjC,MAAM,IAAI,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QAC/B,IAAI,IAAI,GAAG,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAE9B,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE;YAC1B,kDAAkD;YAClD,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE;gBACpB,MAAM,QAAQ,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;gBACvC,MAAM,KAAK,GACP,OAAO,IAAI,CAAC,CAAC,KAAK,QAAQ;oBACtB,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,QAAQ;wBACjB,CAAC,CAAC,IAAI,CAAC,CAAC;wBACR,CAAC,CAAC,SAAS;oBACf,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;gBAE/B,IAAI,KAAK,KAAK,SAAS,EAAE;oBACrB,GAAG,IAAI,KAAK,CAAC;oBACb,OAAO,GAAG,MAAM,CAAC,SAAS,IAAI,CAAC,CAAC;oBAChC,SAAS;iBACZ;aACJ;YAED,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC;SACjB;QAED,6EAA6E;QAC7E,IAAI,IAAI,KAAK,SAAS,EAAE;YACpB,GAAG,IAAI,IAAI,CAAC;YACZ,OAAO,GAAG,CAAC,GAAG,CAAC,CAAC;SACnB;aAAM;YACH,MAAM,EAAE,GAAG,YAAY,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;YAChC,GAAG,IAAI,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,GAAG,CAAC;YAChC,4CAA4C;YAC5C,OAAO,GAAG,MAAM,CAAC,SAAS,IAAI,MAAM,CAAC,EAAE,KAAK,IAAI,CAAC,CAAC;SACrD;KACJ;IAED,OAAO,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;AACrC,CAAC"}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
export declare const xmlReplacer: RegExp;
|
|
2
|
+
export declare const getCodePoint: (str: string, index: number) => number;
|
|
3
|
+
/**
|
|
4
|
+
* Encodes all non-ASCII characters, as well as characters not valid in XML
|
|
5
|
+
* documents using XML entities.
|
|
6
|
+
*
|
|
7
|
+
* If a character has no equivalent entity, a
|
|
8
|
+
* numeric hexadecimal reference (eg. `ü`) will be used.
|
|
9
|
+
*/
|
|
10
|
+
export declare function encodeXML(str: string): string;
|
|
11
|
+
/**
|
|
12
|
+
* Encodes all non-ASCII characters, as well as characters not valid in XML
|
|
13
|
+
* documents using numeric hexadecimal reference (eg. `ü`).
|
|
14
|
+
*
|
|
15
|
+
* Have a look at `escapeUTF8` if you want a more concise output at the expense
|
|
16
|
+
* of reduced transportability.
|
|
17
|
+
*
|
|
18
|
+
* @param data String to escape.
|
|
19
|
+
*/
|
|
20
|
+
export declare const escape: typeof encodeXML;
|
|
21
|
+
/**
|
|
22
|
+
* Encodes all characters not valid in XML documents using XML entities.
|
|
23
|
+
*
|
|
24
|
+
* Note that the output will be character-set dependent.
|
|
25
|
+
*
|
|
26
|
+
* @param data String to escape.
|
|
27
|
+
*/
|
|
28
|
+
export declare const escapeUTF8: (data: string) => string;
|
|
29
|
+
/**
|
|
30
|
+
* Encodes all characters that have to be escaped in HTML attributes,
|
|
31
|
+
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
|
|
32
|
+
*
|
|
33
|
+
* @param data String to escape.
|
|
34
|
+
*/
|
|
35
|
+
export declare const escapeAttribute: (data: string) => string;
|
|
36
|
+
/**
|
|
37
|
+
* Encodes all characters that have to be escaped in HTML text,
|
|
38
|
+
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
|
|
39
|
+
*
|
|
40
|
+
* @param data String to escape.
|
|
41
|
+
*/
|
|
42
|
+
export declare const escapeText: (data: string) => string;
|
|
43
|
+
//# sourceMappingURL=escape.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"escape.d.ts","sourceRoot":"https://raw.githubusercontent.com/fb55/entities/867ac709ba482a56a98b7c35f49ca833c74dc193/src/","sources":["escape.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,WAAW,QAAyB,CAAC;AAWlD,eAAO,MAAM,YAAY,QAGT,MAAM,SAAS,MAAM,KAAG,MAQD,CAAC;AAExC;;;;;;GAMG;AACH,wBAAgB,SAAS,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CA0B7C;AAED;;;;;;;;GAQG;AACH,eAAO,MAAM,MAAM,kBAAY,CAAC;AA2BhC;;;;;;GAMG;AACH,eAAO,MAAM,UAAU,SA7Bb,MAAM,KAAK,MA6BuC,CAAC;AAE7D;;;;;GAKG;AACH,eAAO,MAAM,eAAe,SArClB,MAAM,KAAK,MA4CpB,CAAC;AAEF;;;;;GAKG;AACH,eAAO,MAAM,UAAU,SApDb,MAAM,KAAK,MA4DpB,CAAC"}
|