entities 6.0.0 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/decode.d.ts +3 -0
- package/dist/commonjs/decode-codepoint.d.ts.map +1 -1
- package/dist/commonjs/decode-codepoint.js +2 -2
- package/dist/commonjs/decode-codepoint.js.map +1 -1
- package/dist/commonjs/decode.d.ts +1 -7
- package/dist/commonjs/decode.d.ts.map +1 -1
- package/dist/commonjs/decode.js +105 -48
- package/dist/commonjs/decode.js.map +1 -1
- package/dist/commonjs/encode.d.ts.map +1 -1
- package/dist/commonjs/encode.js +49 -30
- package/dist/commonjs/encode.js.map +1 -1
- package/dist/commonjs/escape.d.ts +7 -4
- package/dist/commonjs/escape.d.ts.map +1 -1
- package/dist/commonjs/escape.js +36 -19
- package/dist/commonjs/escape.js.map +1 -1
- package/dist/commonjs/generated/decode-data-html.d.ts.map +1 -1
- package/dist/commonjs/generated/decode-data-html.js +2 -5
- package/dist/commonjs/generated/decode-data-html.js.map +1 -1
- package/dist/commonjs/generated/decode-data-xml.d.ts.map +1 -1
- package/dist/commonjs/generated/decode-data-xml.js +2 -5
- package/dist/commonjs/generated/decode-data-xml.js.map +1 -1
- package/dist/commonjs/generated/encode-html.d.ts +1 -6
- package/dist/commonjs/generated/encode-html.d.ts.map +1 -1
- package/dist/commonjs/generated/encode-html.js +9 -8
- package/dist/commonjs/generated/encode-html.js.map +1 -1
- package/dist/commonjs/index.d.ts +3 -3
- package/dist/commonjs/index.d.ts.map +1 -1
- package/dist/commonjs/index.js +19 -19
- package/dist/commonjs/index.js.map +1 -1
- package/dist/commonjs/internal/bin-trie-flags.d.ts +17 -0
- package/dist/commonjs/internal/bin-trie-flags.d.ts.map +1 -0
- package/dist/commonjs/internal/bin-trie-flags.js +21 -0
- package/dist/commonjs/internal/bin-trie-flags.js.map +1 -0
- package/dist/commonjs/internal/decode-shared.d.ts +2 -0
- package/dist/commonjs/internal/decode-shared.d.ts.map +1 -0
- package/dist/commonjs/internal/decode-shared.js +31 -0
- package/dist/commonjs/internal/decode-shared.js.map +1 -0
- package/dist/commonjs/internal/encode-shared.d.ts +32 -0
- package/dist/commonjs/internal/encode-shared.d.ts.map +1 -0
- package/dist/commonjs/internal/encode-shared.js +94 -0
- package/dist/commonjs/internal/encode-shared.js.map +1 -0
- package/dist/esm/decode-codepoint.d.ts.map +1 -1
- package/dist/esm/decode-codepoint.js +2 -2
- package/dist/esm/decode-codepoint.js.map +1 -1
- package/dist/esm/decode.d.ts +1 -7
- package/dist/esm/decode.d.ts.map +1 -1
- package/dist/esm/decode.js +96 -39
- package/dist/esm/decode.js.map +1 -1
- package/dist/esm/encode.d.ts.map +1 -1
- package/dist/esm/encode.js +49 -30
- package/dist/esm/encode.js.map +1 -1
- package/dist/esm/escape.d.ts +7 -4
- package/dist/esm/escape.d.ts.map +1 -1
- package/dist/esm/escape.js +35 -18
- package/dist/esm/escape.js.map +1 -1
- package/dist/esm/generated/decode-data-html.d.ts.map +1 -1
- package/dist/esm/generated/decode-data-html.js +2 -5
- package/dist/esm/generated/decode-data-html.js.map +1 -1
- package/dist/esm/generated/decode-data-xml.d.ts.map +1 -1
- package/dist/esm/generated/decode-data-xml.js +2 -5
- package/dist/esm/generated/decode-data-xml.js.map +1 -1
- package/dist/esm/generated/encode-html.d.ts +1 -6
- package/dist/esm/generated/encode-html.d.ts.map +1 -1
- package/dist/esm/generated/encode-html.js +9 -8
- package/dist/esm/generated/encode-html.js.map +1 -1
- package/dist/esm/index.d.ts +3 -3
- package/dist/esm/index.d.ts.map +1 -1
- package/dist/esm/index.js +9 -9
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/internal/bin-trie-flags.d.ts +17 -0
- package/dist/esm/internal/bin-trie-flags.d.ts.map +1 -0
- package/dist/esm/internal/bin-trie-flags.js +18 -0
- package/dist/esm/internal/bin-trie-flags.js.map +1 -0
- package/dist/esm/internal/decode-shared.d.ts +2 -0
- package/dist/esm/internal/decode-shared.d.ts.map +1 -0
- package/dist/esm/internal/decode-shared.js +28 -0
- package/dist/esm/internal/decode-shared.js.map +1 -0
- package/dist/esm/internal/encode-shared.d.ts +32 -0
- package/dist/esm/internal/encode-shared.d.ts.map +1 -0
- package/dist/esm/internal/encode-shared.js +91 -0
- package/dist/esm/internal/encode-shared.js.map +1 -0
- package/escape.d.ts +3 -0
- package/package.json +19 -22
- package/src/decode-codepoint.ts +2 -2
- package/src/decode.spec.ts +44 -1
- package/src/decode.ts +111 -55
- package/src/encode.spec.ts +1 -1
- package/src/encode.ts +47 -31
- package/src/escape.spec.ts +1 -1
- package/src/escape.ts +39 -26
- package/src/generated/decode-data-html.ts +3 -5
- package/src/generated/decode-data-xml.ts +3 -5
- package/src/generated/encode-html.ts +14 -14
- package/src/index.spec.ts +2 -2
- package/src/index.ts +23 -24
- package/src/internal/bin-trie-flags.ts +16 -0
- package/src/internal/decode-shared.ts +30 -0
- package/src/internal/encode-shared.ts +121 -0
package/dist/esm/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAClE,OAAO,EAAE,UAAU,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAC7D,OAAO,EACH,SAAS,EACT,eAAe,EACf,UAAU,EACV,UAAU,GACb,MAAM,aAAa,CAAC;AAErB,wCAAwC;AACxC,MAAM,CAAN,IAAY,WAKX;AALD,WAAY,WAAW;IACnB,iCAAiC;IACjC,2CAAO,CAAA;IACP,mEAAmE;IACnE,6CAAQ,CAAA;AACZ,CAAC,EALW,WAAW,KAAX,WAAW,QAKtB;AAED,MAAM,CAAN,IAAY,YA2BX;AA3BD,WAAY,YAAY;IACpB;;;OAGG;IACH,+CAAI,CAAA;IACJ;;;;OAIG;IACH,iDAAK,CAAA;IACL;;;OAGG;IACH,yDAAS,CAAA;IACT;;;OAGG;IACH,yDAAS,CAAA;IACT;;;OAGG;IACH,+CAAI,CAAA;AACR,CAAC,EA3BW,YAAY,KAAZ,YAAY,QA2BvB;AAsBD;;;;;GAKG;AACH,MAAM,UAAU,MAAM,CAClB,KAAa,EACb,UAAyC,WAAW,CAAC,GAAG;IAExD,MAAM,KAAK,GAAG,OAAO,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC;IAEpE,IAAI,KAAK,KAAK,WAAW,CAAC,IAAI,EAAE,CAAC;QAC7B,MAAM,IAAI,GAAG,OAAO,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC;QACpE,OAAO,UAAU,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;IACnC,CAAC;IAED,OAAO,SAAS,CAAC,KAAK,CAAC,CAAC;AAC5B,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,YAAY,CACxB,KAAa,EACb,UAAyC,WAAW,CAAC,GAAG;;IAExD,MAAM,iBAAiB,GACnB,OAAO,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;IAC/D,MAAA,iBAAiB,CAAC,IAAI,oCAAtB,iBAAiB,CAAC,IAAI,GAAK,YAAY,CAAC,MAAM,EAAC;IAE/C,OAAO,MAAM,CAAC,KAAK,EAAE,iBAAiB,CAAC,CAAC;AAC5C,CAAC;AAkBD;;;;;GAKG;AACH,MAAM,UAAU,MAAM,CAClB,KAAa,EACb,UAAyC,WAAW,CAAC,GAAG;IAExD,MAAM,EAAE,IAAI,GAAG,YAAY,CAAC,SAAS,EAAE,KAAK,GAAG,WAAW,CAAC,GAAG,EAAE,GAC5D,OAAO,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;IAE/D,QAAQ,IAAI,EAAE,CAAC;QACX,KAAK,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC;YACrB,OAAO,UAAU,CAAC,KAAK,CAAC,CAAC;QAC7B,CAAC;QACD,KAAK,YAAY,CAAC,SAAS,CAAC,CAAC,CAAC;YAC1B,OAAO,eAAe,CAAC,KAAK,CAAC,CAAC;QAClC,CAAC;QACD,KAAK,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC;YACrB,OAAO,UAAU,CAAC,KAAK,CAAC,CAAC;QAC7B,CAAC;QACD,KAAK,YAAY,CAAC,KAAK,CAAC,CAAC,CAAC;YACtB,OAAO,KAAK,KAAK,WAAW,CAAC,IAAI;gBAC7B,CAAC,CAAC,kBAAkB,CAAC,KAAK,CAAC;gBAC3B,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QAC3B,CAAC;QACD,wGAAwG;QACxG,KAAK,YAAY,CAAC,SAAS,CAAC,CAAC,qDAAqD;QAClF,OAAO,CAAC,CAAC,CAAC;YACN,OAAO,KAAK,KAAK,WAAW,CAAC,IAAI;gBAC7B,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC;gBACnB,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QAC3B,CAAC;IACL,CAAC;AACL,CAAC;AAED,OAAO,EACH,YAAY,EACZ,UAAU;AACV,8BAA8B;AAC9B,UAAU,IAAI,WAAW,EACzB,UAAU,IAAI,WAAW,EACzB,mBAAmB,EACnB,gBAAgB,EAChB,gBAAgB,IAAI,iBAAiB,EACrC,gBAAgB,IAAI,iBAAiB,EACrC,SAAS,EACT,SAAS,IAAI,eAAe,EAC5B,aAAa,GAChB,MAAM,aAAa,CAAC;AAErB,OAAO,EACH,UAAU;AACV,8BAA8B;AAC9B,UAAU,IAAI,WAAW,EACzB,UAAU,IAAI,WAAW,EACzB,kBAAkB,GACrB,MAAM,aAAa,CAAC;AACrB,OAAO,EACH,SAAS,EACT,MAAM,EACN,eAAe,EACf,UAAU,EACV,UAAU,GACb,MAAM,aAAa,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bit flags & masks for the binary trie encoding used for entity decoding.
|
|
3
|
+
*
|
|
4
|
+
* Bit layout (16 bits total):
|
|
5
|
+
* 15..14 VALUE_LENGTH (+1 encoding; 0 => no value)
|
|
6
|
+
* 13 FLAG13. If valueLength>0: semicolon required flag (implicit ';').
|
|
7
|
+
* If valueLength==0: compact run flag.
|
|
8
|
+
* 12..7 BRANCH_LENGTH Branch length (0 => single branch in 6..0 if jumpOffset==char) OR run length (when compact run)
|
|
9
|
+
* 6..0 JUMP_TABLE Jump offset (jump table) OR single-branch char code OR first run char
|
|
10
|
+
*/
|
|
11
|
+
export declare enum BinTrieFlags {
|
|
12
|
+
VALUE_LENGTH = 49152,
|
|
13
|
+
FLAG13 = 8192,
|
|
14
|
+
BRANCH_LENGTH = 8064,
|
|
15
|
+
JUMP_TABLE = 127
|
|
16
|
+
}
|
|
17
|
+
//# sourceMappingURL=bin-trie-flags.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bin-trie-flags.d.ts","sourceRoot":"","sources":["../../../src/internal/bin-trie-flags.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AACH,oBAAY,YAAY;IACpB,YAAY,QAAwB;IACpC,MAAM,OAAwB;IAC9B,aAAa,OAAwB;IACrC,UAAU,MAAwB;CACrC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bit flags & masks for the binary trie encoding used for entity decoding.
|
|
3
|
+
*
|
|
4
|
+
* Bit layout (16 bits total):
|
|
5
|
+
* 15..14 VALUE_LENGTH (+1 encoding; 0 => no value)
|
|
6
|
+
* 13 FLAG13. If valueLength>0: semicolon required flag (implicit ';').
|
|
7
|
+
* If valueLength==0: compact run flag.
|
|
8
|
+
* 12..7 BRANCH_LENGTH Branch length (0 => single branch in 6..0 if jumpOffset==char) OR run length (when compact run)
|
|
9
|
+
* 6..0 JUMP_TABLE Jump offset (jump table) OR single-branch char code OR first run char
|
|
10
|
+
*/
|
|
11
|
+
export var BinTrieFlags;
|
|
12
|
+
(function (BinTrieFlags) {
|
|
13
|
+
BinTrieFlags[BinTrieFlags["VALUE_LENGTH"] = 49152] = "VALUE_LENGTH";
|
|
14
|
+
BinTrieFlags[BinTrieFlags["FLAG13"] = 8192] = "FLAG13";
|
|
15
|
+
BinTrieFlags[BinTrieFlags["BRANCH_LENGTH"] = 8064] = "BRANCH_LENGTH";
|
|
16
|
+
BinTrieFlags[BinTrieFlags["JUMP_TABLE"] = 127] = "JUMP_TABLE";
|
|
17
|
+
})(BinTrieFlags || (BinTrieFlags = {}));
|
|
18
|
+
//# sourceMappingURL=bin-trie-flags.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bin-trie-flags.js","sourceRoot":"","sources":["../../../src/internal/bin-trie-flags.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AACH,MAAM,CAAN,IAAY,YAKX;AALD,WAAY,YAAY;IACpB,mEAAoC,CAAA;IACpC,sDAA8B,CAAA;IAC9B,oEAAqC,CAAA;IACrC,6DAAkC,CAAA;AACtC,CAAC,EALW,YAAY,KAAZ,YAAY,QAKvB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"decode-shared.d.ts","sourceRoot":"","sources":["../../../src/internal/decode-shared.ts"],"names":[],"mappings":"AAIA,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,WAAW,CAyBvD"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Shared base64 decode helper for generated decode data.
|
|
3
|
+
* Assumes global atob is available.
|
|
4
|
+
*/
|
|
5
|
+
export function decodeBase64(input) {
|
|
6
|
+
const binary =
|
|
7
|
+
// eslint-disable-next-line n/no-unsupported-features/node-builtins
|
|
8
|
+
typeof atob === "function"
|
|
9
|
+
? // Browser (and Node >=16)
|
|
10
|
+
// eslint-disable-next-line n/no-unsupported-features/node-builtins
|
|
11
|
+
atob(input)
|
|
12
|
+
: // Older Node versions (<16)
|
|
13
|
+
// eslint-disable-next-line n/no-unsupported-features/node-builtins
|
|
14
|
+
typeof Buffer.from === "function"
|
|
15
|
+
? // eslint-disable-next-line n/no-unsupported-features/node-builtins
|
|
16
|
+
Buffer.from(input, "base64").toString("binary")
|
|
17
|
+
: // eslint-disable-next-line unicorn/no-new-buffer, n/no-deprecated-api
|
|
18
|
+
new Buffer(input, "base64").toString("binary");
|
|
19
|
+
const evenLength = binary.length & ~1; // Round down to even length
|
|
20
|
+
const out = new Uint16Array(evenLength / 2);
|
|
21
|
+
for (let index = 0, outIndex = 0; index < evenLength; index += 2) {
|
|
22
|
+
const lo = binary.charCodeAt(index);
|
|
23
|
+
const hi = binary.charCodeAt(index + 1);
|
|
24
|
+
out[outIndex++] = lo | (hi << 8);
|
|
25
|
+
}
|
|
26
|
+
return out;
|
|
27
|
+
}
|
|
28
|
+
//# sourceMappingURL=decode-shared.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"decode-shared.js","sourceRoot":"","sources":["../../../src/internal/decode-shared.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,MAAM,UAAU,YAAY,CAAC,KAAa;IACtC,MAAM,MAAM;IACR,mEAAmE;IACnE,OAAO,IAAI,KAAK,UAAU;QACtB,CAAC,CAAC,0BAA0B;YAC1B,mEAAmE;YACnE,IAAI,CAAC,KAAK,CAAC;QACb,CAAC,CAAC,4BAA4B;YAC5B,mEAAmE;YACnE,OAAO,MAAM,CAAC,IAAI,KAAK,UAAU;gBACjC,CAAC,CAAC,mEAAmE;oBACnE,MAAM,CAAC,IAAI,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC;gBACjD,CAAC,CAAC,sEAAsE;oBACtE,IAAI,MAAM,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAE3D,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,4BAA4B;IACnE,MAAM,GAAG,GAAG,IAAI,WAAW,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC;IAE5C,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,QAAQ,GAAG,CAAC,EAAE,KAAK,GAAG,UAAU,EAAE,KAAK,IAAI,CAAC,EAAE,CAAC;QAC/D,MAAM,EAAE,GAAG,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QACpC,MAAM,EAAE,GAAG,MAAM,CAAC,UAAU,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;QACxC,GAAG,CAAC,QAAQ,EAAE,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC;IACrC,CAAC;IAED,OAAO,GAAG,CAAC;AACf,CAAC"}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* A node inside the encoding trie used by `encode.ts`.
|
|
3
|
+
*
|
|
4
|
+
* There are two physical shapes to minimize allocations and lookup cost:
|
|
5
|
+
*
|
|
6
|
+
* 1. Leaf node (string)
|
|
7
|
+
* - A plain string (already in the form `"&name;"`).
|
|
8
|
+
* - Represents a terminal match with no children.
|
|
9
|
+
*
|
|
10
|
+
* 2. Branch / value node (object)
|
|
11
|
+
*/
|
|
12
|
+
export type EncodeTrieNode = string | {
|
|
13
|
+
/**
|
|
14
|
+
* Entity value for the current code point sequence (wrapped: `&...;`).
|
|
15
|
+
* Present when the path to this node itself is a valid named entity.
|
|
16
|
+
*/
|
|
17
|
+
value: string | undefined;
|
|
18
|
+
/** If a number, the next code unit of the only next character. */
|
|
19
|
+
next: number | Map<number, EncodeTrieNode>;
|
|
20
|
+
/** If next is a number, `nextValue` contains the entity value. */
|
|
21
|
+
nextValue?: string;
|
|
22
|
+
};
|
|
23
|
+
/**
|
|
24
|
+
* Parse a compact encode trie string into a Map structure used for encoding.
|
|
25
|
+
*
|
|
26
|
+
* Format per entry (ascending code points using delta encoding):
|
|
27
|
+
* <diffBase36>[&name;][{<children>}] -- diff omitted when 0
|
|
28
|
+
* Where diff = currentKey - previousKey - 1 (first entry stores absolute key).
|
|
29
|
+
* `&name;` is the entity value (already wrapped); a following `{` denotes children.
|
|
30
|
+
*/
|
|
31
|
+
export declare function parseEncodeTrie(serialized: string): Map<number, EncodeTrieNode>;
|
|
32
|
+
//# sourceMappingURL=encode-shared.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"encode-shared.d.ts","sourceRoot":"","sources":["../../../src/internal/encode-shared.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AACH,MAAM,MAAM,cAAc,GACpB,MAAM,GACN;IACI;;;OAGG;IACH,KAAK,EAAE,MAAM,GAAG,SAAS,CAAC;IAC1B,kEAAkE;IAClE,IAAI,EAAE,MAAM,GAAG,GAAG,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC;IAC3C,kEAAkE;IAClE,SAAS,CAAC,EAAE,MAAM,CAAC;CACtB,CAAC;AAER;;;;;;;GAOG;AACH,wBAAgB,eAAe,CAC3B,UAAU,EAAE,MAAM,GACnB,GAAG,CAAC,MAAM,EAAE,cAAc,CAAC,CAqF7B"}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse a compact encode trie string into a Map structure used for encoding.
|
|
3
|
+
*
|
|
4
|
+
* Format per entry (ascending code points using delta encoding):
|
|
5
|
+
* <diffBase36>[&name;][{<children>}] -- diff omitted when 0
|
|
6
|
+
* Where diff = currentKey - previousKey - 1 (first entry stores absolute key).
|
|
7
|
+
* `&name;` is the entity value (already wrapped); a following `{` denotes children.
|
|
8
|
+
*/
|
|
9
|
+
export function parseEncodeTrie(serialized) {
|
|
10
|
+
const top = new Map();
|
|
11
|
+
const totalLength = serialized.length;
|
|
12
|
+
let cursor = 0;
|
|
13
|
+
let lastTopKey = -1;
|
|
14
|
+
function readDiff() {
|
|
15
|
+
const start = cursor;
|
|
16
|
+
while (cursor < totalLength) {
|
|
17
|
+
const char = serialized.charAt(cursor);
|
|
18
|
+
if ((char < "0" || char > "9") && (char < "a" || char > "z")) {
|
|
19
|
+
break;
|
|
20
|
+
}
|
|
21
|
+
cursor++;
|
|
22
|
+
}
|
|
23
|
+
if (cursor === start)
|
|
24
|
+
return 0;
|
|
25
|
+
return Number.parseInt(serialized.slice(start, cursor), 36);
|
|
26
|
+
}
|
|
27
|
+
function readEntity() {
|
|
28
|
+
if (serialized[cursor] !== "&") {
|
|
29
|
+
throw new Error(`Child entry missing value near index ${cursor}`);
|
|
30
|
+
}
|
|
31
|
+
// Cursor currently points at '&'
|
|
32
|
+
const start = cursor;
|
|
33
|
+
const end = serialized.indexOf(";", cursor + 1);
|
|
34
|
+
if (end === -1) {
|
|
35
|
+
throw new Error(`Unterminated entity starting at index ${start}`);
|
|
36
|
+
}
|
|
37
|
+
cursor = end + 1; // Move past ';'
|
|
38
|
+
return serialized.slice(start, cursor); // Includes & ... ;
|
|
39
|
+
}
|
|
40
|
+
while (cursor < totalLength) {
|
|
41
|
+
const keyDiff = readDiff();
|
|
42
|
+
const key = lastTopKey === -1 ? keyDiff : lastTopKey + keyDiff + 1;
|
|
43
|
+
let value;
|
|
44
|
+
if (serialized[cursor] === "&")
|
|
45
|
+
value = readEntity();
|
|
46
|
+
if (serialized[cursor] === "{") {
|
|
47
|
+
cursor++; // Skip '{'
|
|
48
|
+
// Parse first child
|
|
49
|
+
let diff = readDiff();
|
|
50
|
+
let childKey = diff; // First key (lastChildKey = -1)
|
|
51
|
+
const firstValue = readEntity();
|
|
52
|
+
if (serialized[cursor] === "{") {
|
|
53
|
+
throw new Error("Unexpected nested '{' beyond depth 2");
|
|
54
|
+
}
|
|
55
|
+
// If end of block -> single child optimization
|
|
56
|
+
if (serialized[cursor] === "}") {
|
|
57
|
+
top.set(key, { value, next: childKey, nextValue: firstValue });
|
|
58
|
+
cursor++; // Skip '}'
|
|
59
|
+
}
|
|
60
|
+
else {
|
|
61
|
+
const childMap = new Map();
|
|
62
|
+
childMap.set(childKey, firstValue);
|
|
63
|
+
let lastChildKey = childKey;
|
|
64
|
+
while (cursor < totalLength && serialized[cursor] !== "}") {
|
|
65
|
+
diff = readDiff();
|
|
66
|
+
childKey = lastChildKey + diff + 1;
|
|
67
|
+
const childValue = readEntity();
|
|
68
|
+
if (serialized[cursor] === "{") {
|
|
69
|
+
throw new Error("Unexpected nested '{' beyond depth 2");
|
|
70
|
+
}
|
|
71
|
+
childMap.set(childKey, childValue);
|
|
72
|
+
lastChildKey = childKey;
|
|
73
|
+
}
|
|
74
|
+
if (serialized[cursor] !== "}") {
|
|
75
|
+
throw new Error("Unterminated child block");
|
|
76
|
+
}
|
|
77
|
+
cursor++; // Skip '}'
|
|
78
|
+
top.set(key, { value, next: childMap });
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
else if (value === undefined) {
|
|
82
|
+
throw new Error(`Malformed encode trie: missing value at index ${cursor}`);
|
|
83
|
+
}
|
|
84
|
+
else {
|
|
85
|
+
top.set(key, value);
|
|
86
|
+
}
|
|
87
|
+
lastTopKey = key;
|
|
88
|
+
}
|
|
89
|
+
return top;
|
|
90
|
+
}
|
|
91
|
+
//# sourceMappingURL=encode-shared.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"encode-shared.js","sourceRoot":"","sources":["../../../src/internal/encode-shared.ts"],"names":[],"mappings":"AAyBA;;;;;;;GAOG;AACH,MAAM,UAAU,eAAe,CAC3B,UAAkB;IAElB,MAAM,GAAG,GAAG,IAAI,GAAG,EAA0B,CAAC;IAC9C,MAAM,WAAW,GAAG,UAAU,CAAC,MAAM,CAAC;IACtC,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,IAAI,UAAU,GAAG,CAAC,CAAC,CAAC;IAEpB,SAAS,QAAQ;QACb,MAAM,KAAK,GAAG,MAAM,CAAC;QACrB,OAAO,MAAM,GAAG,WAAW,EAAE,CAAC;YAC1B,MAAM,IAAI,GAAG,UAAU,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YAEvC,IAAI,CAAC,IAAI,GAAG,GAAG,IAAI,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC,IAAI,GAAG,GAAG,IAAI,IAAI,GAAG,GAAG,CAAC,EAAE,CAAC;gBAC3D,MAAM;YACV,CAAC;YACD,MAAM,EAAE,CAAC;QACb,CAAC;QACD,IAAI,MAAM,KAAK,KAAK;YAAE,OAAO,CAAC,CAAC;QAC/B,OAAO,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAC,KAAK,CAAC,KAAK,EAAE,MAAM,CAAC,EAAE,EAAE,CAAC,CAAC;IAChE,CAAC;IAED,SAAS,UAAU;QACf,IAAI,UAAU,CAAC,MAAM,CAAC,KAAK,GAAG,EAAE,CAAC;YAC7B,MAAM,IAAI,KAAK,CAAC,wCAAwC,MAAM,EAAE,CAAC,CAAC;QACtE,CAAC;QAED,iCAAiC;QACjC,MAAM,KAAK,GAAG,MAAM,CAAC;QACrB,MAAM,GAAG,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,MAAM,GAAG,CAAC,CAAC,CAAC;QAChD,IAAI,GAAG,KAAK,CAAC,CAAC,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,yCAAyC,KAAK,EAAE,CAAC,CAAC;QACtE,CAAC;QACD,MAAM,GAAG,GAAG,GAAG,CAAC,CAAC,CAAC,gBAAgB;QAClC,OAAO,UAAU,CAAC,KAAK,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,mBAAmB;IAC/D,CAAC;IAED,OAAO,MAAM,GAAG,WAAW,EAAE,CAAC;QAC1B,MAAM,OAAO,GAAG,QAAQ,EAAE,CAAC;QAC3B,MAAM,GAAG,GAAG,UAAU,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,GAAG,OAAO,GAAG,CAAC,CAAC;QAEnE,IAAI,KAAyB,CAAC;QAC9B,IAAI,UAAU,CAAC,MAAM,CAAC,KAAK,GAAG;YAAE,KAAK,GAAG,UAAU,EAAE,CAAC;QAErD,IAAI,UAAU,CAAC,MAAM,CAAC,KAAK,GAAG,EAAE,CAAC;YAC7B,MAAM,EAAE,CAAC,CAAC,WAAW;YACrB,oBAAoB;YACpB,IAAI,IAAI,GAAG,QAAQ,EAAE,CAAC;YACtB,IAAI,QAAQ,GAAG,IAAI,CAAC,CAAC,gCAAgC;YACrD,MAAM,UAAU,GAAG,UAAU,EAAE,CAAC;YAChC,IAAI,UAAU,CAAC,MAAM,CAAC,KAAK,GAAG,EAAE,CAAC;gBAC7B,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;YAC5D,CAAC;YACD,+CAA+C;YAC/C,IAAI,UAAU,CAAC,MAAM,CAAC,KAAK,GAAG,EAAE,CAAC;gBAC7B,GAAG,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC,CAAC;gBAC/D,MAAM,EAAE,CAAC,CAAC,WAAW;YACzB,CAAC;iBAAM,CAAC;gBACJ,MAAM,QAAQ,GAAG,IAAI,GAAG,EAA0B,CAAC;gBACnD,QAAQ,CAAC,GAAG,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;gBACnC,IAAI,YAAY,GAAG,QAAQ,CAAC;gBAC5B,OAAO,MAAM,GAAG,WAAW,IAAI,UAAU,CAAC,MAAM,CAAC,KAAK,GAAG,EAAE,CAAC;oBACxD,IAAI,GAAG,QAAQ,EAAE,CAAC;oBAClB,QAAQ,GAAG,YAAY,GAAG,IAAI,GAAG,CAAC,CAAC;oBACnC,MAAM,UAAU,GAAG,UAAU,EAAE,CAAC;oBAChC,IAAI,UAAU,CAAC,MAAM,CAAC,KAAK,GAAG,EAAE,CAAC;wBAC7B,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;oBAC5D,CAAC;oBACD,QAAQ,CAAC,GAAG,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;oBACnC,YAAY,GAAG,QAAQ,CAAC;gBAC5B,CAAC;gBACD,IAAI,UAAU,CAAC,MAAM,CAAC,KAAK,GAAG,EAAE,CAAC;oBAC7B,MAAM,IAAI,KAAK,CAAC,0BAA0B,CAAC,CAAC;gBAChD,CAAC;gBACD,MAAM,EAAE,CAAC,CAAC,WAAW;gBACrB,GAAG,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC;YAC5C,CAAC;QACL,CAAC;aAAM,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YAC7B,MAAM,IAAI,KAAK,CACX,iDAAiD,MAAM,EAAE,CAC5D,CAAC;QACN,CAAC;aAAM,CAAC;YACJ,GAAG,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QACxB,CAAC;QACD,UAAU,GAAG,GAAG,CAAC;IACrB,CAAC;IACD,OAAO,GAAG,CAAC;AACf,CAAC"}
|
package/escape.d.ts
ADDED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "entities",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "7.0.0",
|
|
4
4
|
"description": "Encode & decode XML and HTML entities with ease & speed",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"html entities",
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
],
|
|
14
14
|
"repository": {
|
|
15
15
|
"type": "git",
|
|
16
|
-
"url": "
|
|
16
|
+
"url": "https://github.com/fb55/entities.git"
|
|
17
17
|
},
|
|
18
18
|
"funding": "https://github.com/fb55/entities?sponsor=1",
|
|
19
19
|
"license": "BSD-2-Clause",
|
|
@@ -57,7 +57,9 @@
|
|
|
57
57
|
"types": "./dist/commonjs/index.d.ts",
|
|
58
58
|
"files": [
|
|
59
59
|
"decode.js",
|
|
60
|
+
"decode.d.ts",
|
|
60
61
|
"escape.js",
|
|
62
|
+
"escape.d.ts",
|
|
61
63
|
"dist",
|
|
62
64
|
"src"
|
|
63
65
|
],
|
|
@@ -65,37 +67,32 @@
|
|
|
65
67
|
"build:docs": "typedoc --hideGenerator src/index.ts",
|
|
66
68
|
"build:encode-trie": "node --import=tsx scripts/write-encode-map.ts",
|
|
67
69
|
"build:trie": "node --import=tsx scripts/write-decode-map.ts",
|
|
68
|
-
"format": "npm run format:es && npm run format:
|
|
70
|
+
"format": "npm run format:es && npm run format:biome",
|
|
69
71
|
"format:es": "npm run lint:es -- --fix",
|
|
70
|
-
"format:
|
|
71
|
-
"lint": "npm run lint:es && npm run lint:ts && npm run lint:
|
|
72
|
+
"format:biome": "biome check --fix .",
|
|
73
|
+
"lint": "npm run lint:es && npm run lint:ts && npm run lint:biome",
|
|
72
74
|
"lint:es": "eslint . --ignore-path .gitignore",
|
|
73
|
-
"lint:
|
|
75
|
+
"lint:biome": "biome check .",
|
|
74
76
|
"lint:ts": "tsc --noEmit",
|
|
75
77
|
"prepublishOnly": "tshy",
|
|
76
|
-
"prettier": "prettier '**/*.{ts,md,json,yml}'",
|
|
77
78
|
"test": "npm run test:vi && npm run lint",
|
|
78
79
|
"test:vi": "vitest run"
|
|
79
80
|
},
|
|
80
|
-
"prettier": {
|
|
81
|
-
"proseWrap": "always",
|
|
82
|
-
"tabWidth": 4
|
|
83
|
-
},
|
|
84
81
|
"devDependencies": {
|
|
85
|
-
"@
|
|
86
|
-
"@
|
|
87
|
-
"@typescript-eslint/
|
|
88
|
-
"@
|
|
82
|
+
"@biomejs/biome": "^2.2.3",
|
|
83
|
+
"@types/node": "^24.3.1",
|
|
84
|
+
"@typescript-eslint/eslint-plugin": "^8.42.0",
|
|
85
|
+
"@typescript-eslint/parser": "^8.33.1",
|
|
86
|
+
"@vitest/coverage-v8": "^3.2.4",
|
|
89
87
|
"eslint": "^8.57.1",
|
|
90
|
-
"eslint-config-
|
|
91
|
-
"eslint-plugin-n": "^17.
|
|
88
|
+
"eslint-config-biome": "^2.1.3",
|
|
89
|
+
"eslint-plugin-n": "^17.21.3",
|
|
92
90
|
"eslint-plugin-unicorn": "^56.0.1",
|
|
93
|
-
"prettier": "^3.4.2",
|
|
94
91
|
"tshy": "^3.0.2",
|
|
95
|
-
"tsx": "^4.
|
|
96
|
-
"typedoc": "^0.
|
|
97
|
-
"typescript": "^5.
|
|
98
|
-
"vitest": "^2.
|
|
92
|
+
"tsx": "^4.20.5",
|
|
93
|
+
"typedoc": "^0.28.12",
|
|
94
|
+
"typescript": "^5.9.2",
|
|
95
|
+
"vitest": "^3.2.4"
|
|
99
96
|
},
|
|
100
97
|
"engines": {
|
|
101
98
|
"node": ">=0.12"
|
package/src/decode-codepoint.ts
CHANGED
|
@@ -38,7 +38,7 @@ const decodeMap = new Map([
|
|
|
38
38
|
export const fromCodePoint: (...codePoints: number[]) => string =
|
|
39
39
|
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, n/no-unsupported-features/es-builtins
|
|
40
40
|
String.fromCodePoint ??
|
|
41
|
-
|
|
41
|
+
((codePoint: number): string => {
|
|
42
42
|
let output = "";
|
|
43
43
|
|
|
44
44
|
if (codePoint > 0xff_ff) {
|
|
@@ -51,7 +51,7 @@ export const fromCodePoint: (...codePoints: number[]) => string =
|
|
|
51
51
|
|
|
52
52
|
output += String.fromCharCode(codePoint);
|
|
53
53
|
return output;
|
|
54
|
-
};
|
|
54
|
+
});
|
|
55
55
|
|
|
56
56
|
/**
|
|
57
57
|
* Replace the given code point with a replacement character if it is a
|
package/src/decode.spec.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { describe,
|
|
1
|
+
import { describe, expect, it, vitest } from "vitest";
|
|
2
2
|
import * as entities from "./decode.js";
|
|
3
3
|
|
|
4
4
|
describe("Decode test", () => {
|
|
@@ -190,6 +190,49 @@ describe("EntityDecoder", () => {
|
|
|
190
190
|
expect(callback).toHaveBeenCalledTimes(0);
|
|
191
191
|
});
|
|
192
192
|
|
|
193
|
+
/*
|
|
194
|
+
* Focused tests exercising early exit paths inside a compact run in the real trie.
|
|
195
|
+
* Discovered prefix: "zi" followed by compact run "grarr"; mismatching inside this run should
|
|
196
|
+
* return 0 with no emission (result still 0).
|
|
197
|
+
*/
|
|
198
|
+
describe("compact run mismatches", () => {
|
|
199
|
+
it("first run character mismatch returns 0", () => {
|
|
200
|
+
const callback = vitest.fn();
|
|
201
|
+
const d = new entities.EntityDecoder(
|
|
202
|
+
entities.htmlDecodeTree,
|
|
203
|
+
callback,
|
|
204
|
+
);
|
|
205
|
+
d.startEntity(entities.DecodingMode.Strict);
|
|
206
|
+
// After '&': correct prefix 'zi', wrong first run char 'X' (expected 'g').
|
|
207
|
+
expect(d.write("ziXgrar", 0)).toBe(0);
|
|
208
|
+
expect(callback).not.toHaveBeenCalled();
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
it("mismatch after one correct run char returns 0", () => {
|
|
212
|
+
const callback = vitest.fn();
|
|
213
|
+
const d = new entities.EntityDecoder(
|
|
214
|
+
entities.htmlDecodeTree,
|
|
215
|
+
callback,
|
|
216
|
+
);
|
|
217
|
+
d.startEntity(entities.DecodingMode.Strict);
|
|
218
|
+
// 'zig' matches prefix + first run char; next char 'X' mismatches expected 'r'.
|
|
219
|
+
expect(d.write("zigXarr", 0)).toBe(0);
|
|
220
|
+
expect(callback).not.toHaveBeenCalled();
|
|
221
|
+
});
|
|
222
|
+
|
|
223
|
+
it("mismatch after two correct run chars returns 0", () => {
|
|
224
|
+
const callback = vitest.fn();
|
|
225
|
+
const d = new entities.EntityDecoder(
|
|
226
|
+
entities.htmlDecodeTree,
|
|
227
|
+
callback,
|
|
228
|
+
);
|
|
229
|
+
d.startEntity(entities.DecodingMode.Strict);
|
|
230
|
+
// 'zigr' matches prefix + first two run chars; next char 'X' mismatches expected 'a'.
|
|
231
|
+
expect(d.write("zigrXrr", 0)).toBe(0);
|
|
232
|
+
expect(callback).not.toHaveBeenCalled();
|
|
233
|
+
});
|
|
234
|
+
});
|
|
235
|
+
|
|
193
236
|
describe("errors", () => {
|
|
194
237
|
it("should produce an error for a named entity without a semicolon", () => {
|
|
195
238
|
const errorHandlers = {
|