@mtkruto/node 0.1.289 → 0.1.299
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +77 -1
- package/esm/client/4_client.js +1 -1
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/DomHandler.d.ts +83 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/DomHandler.js +203 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/DomSerializer.d.ts +50 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/DomSerializer.js +274 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/ElementType.d.ts +47 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/ElementType.js +51 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/FeedHandler.d.ts +66 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/FeedHandler.js +191 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/Node.d.ts +168 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/Node.js +385 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/Parser.d.ts +159 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/Parser.js +431 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/Tokenizer.d.ts +181 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/Tokenizer.js +1046 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/mod.d.ts +42 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/mod.js +52 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/decode.d.ts +11 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/decode.js +122 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/decode_codepoint.d.ts +1 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/decode_codepoint.js +24 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/encode.d.ts +46 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/encode.js +121 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/maps/decode.d.ts +31 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/maps/decode.js +30 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/maps/entities.d.ts +2128 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/maps/entities.js +2127 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/maps/legacy.d.ts +109 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/maps/legacy.js +108 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/maps/xml.d.ts +8 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/maps/xml.js +1 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/mod.d.ts +90 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/mod.js +95 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/helpers.d.ts +50 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/helpers.js +128 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/legacy.d.ts +46 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/legacy.js +110 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/manipulation.d.ts +42 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/manipulation.js +120 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/mod.d.ts +6 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/mod.js +6 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/querying.d.ts +54 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/querying.js +110 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/stringify.d.ts +40 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/stringify.js +75 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/traversal.d.ts +58 -0
- package/esm/deps/deno.land/x/html_parser@v0.1.3/src/utils/traversal.js +101 -0
- package/esm/deps/raw.githubusercontent.com/MTKruto/compress/main/zlib/deflate.d.ts +2 -0
- package/esm/deps/raw.githubusercontent.com/MTKruto/compress/main/zlib/deflate.js +5 -1
- package/esm/deps/raw.githubusercontent.com/MTKruto/compress/main/zlib/inflate.d.ts +2 -2
- package/esm/deps/raw.githubusercontent.com/MTKruto/compress/main/zlib/inflate.js +5 -1
- package/esm/deps/raw.githubusercontent.com/MTKruto/compress/main/zlib/zlib/crc32.js +1 -1
- package/esm/deps/raw.githubusercontent.com/MTKruto/compress/main/zlib/zlib/deflate.js +8 -8
- package/esm/deps/raw.githubusercontent.com/MTKruto/compress/main/zlib/zlib/inffast.js +2 -2
- package/esm/deps/raw.githubusercontent.com/MTKruto/compress/main/zlib/zlib/inflate.js +19 -21
- package/esm/deps/raw.githubusercontent.com/MTKruto/compress/main/zlib/zlib/trees.js +3 -3
- package/package.json +1 -1
- package/script/client/4_client.js +1 -1
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/DomHandler.d.ts +83 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/DomHandler.js +207 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/DomSerializer.d.ts +50 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/DomSerializer.js +301 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/ElementType.d.ts +47 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/ElementType.js +55 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/FeedHandler.d.ts +66 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/FeedHandler.js +222 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/Node.d.ts +168 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/Node.js +404 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/Parser.d.ts +159 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/Parser.js +438 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/Tokenizer.d.ts +181 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/Tokenizer.js +1052 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/mod.d.ts +42 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/mod.js +88 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/decode.d.ts +11 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/decode.js +128 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/decode_codepoint.d.ts +1 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/decode_codepoint.js +30 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/encode.d.ts +46 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/encode.js +129 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/maps/decode.d.ts +31 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/maps/decode.js +32 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/maps/entities.d.ts +2128 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/maps/entities.js +2129 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/maps/legacy.d.ts +109 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/maps/legacy.js +110 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/maps/xml.d.ts +8 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/maps/xml.js +3 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/mod.d.ts +90 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/entities/mod.js +114 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/helpers.d.ts +50 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/helpers.js +134 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/legacy.d.ts +46 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/legacy.js +118 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/manipulation.d.ts +42 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/manipulation.js +129 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/mod.d.ts +6 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/mod.js +22 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/querying.d.ts +54 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/querying.js +119 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/stringify.d.ts +40 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/stringify.js +86 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/traversal.d.ts +58 -0
- package/script/deps/deno.land/x/html_parser@v0.1.3/src/utils/traversal.js +112 -0
- package/script/deps/raw.githubusercontent.com/MTKruto/compress/main/zlib/deflate.d.ts +2 -0
- package/script/deps/raw.githubusercontent.com/MTKruto/compress/main/zlib/deflate.js +7 -1
- package/script/deps/raw.githubusercontent.com/MTKruto/compress/main/zlib/inflate.d.ts +2 -2
- package/script/deps/raw.githubusercontent.com/MTKruto/compress/main/zlib/inflate.js +7 -1
- package/script/deps/raw.githubusercontent.com/MTKruto/compress/main/zlib/zlib/crc32.js +1 -1
- package/script/deps/raw.githubusercontent.com/MTKruto/compress/main/zlib/zlib/deflate.js +8 -8
- package/script/deps/raw.githubusercontent.com/MTKruto/compress/main/zlib/zlib/inffast.js +2 -2
- package/script/deps/raw.githubusercontent.com/MTKruto/compress/main/zlib/zlib/inflate.js +19 -21
- package/script/deps/raw.githubusercontent.com/MTKruto/compress/main/zlib/zlib/trees.js +3 -3
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { Parser, ParserOptions } from './Parser.js';
|
|
2
|
+
export { Parser };
|
|
3
|
+
export type { ParserOptions };
|
|
4
|
+
import { Node, Element, Document } from './Node.js';
|
|
5
|
+
import { DomHandler, DomHandlerOptions } from './DomHandler.js';
|
|
6
|
+
export { DomHandler };
|
|
7
|
+
export type { DomHandlerOptions };
|
|
8
|
+
type Options = ParserOptions & DomHandlerOptions;
|
|
9
|
+
/**
|
|
10
|
+
* Parses the data, returns the resulting document.
|
|
11
|
+
*
|
|
12
|
+
* @param data The data that should be parsed.
|
|
13
|
+
* @param options Optional options for the parser and DOM builder.
|
|
14
|
+
*/
|
|
15
|
+
export declare function parseDocument(data: string, options?: Options): Document;
|
|
16
|
+
/**
|
|
17
|
+
* Parses data, returns an array of the root nodes.
|
|
18
|
+
*
|
|
19
|
+
* Note that the root nodes still have a `Document` node as their parent.
|
|
20
|
+
* Use `parseDocument` to get the `Document` node instead.
|
|
21
|
+
*
|
|
22
|
+
* @param data The data that should be parsed.
|
|
23
|
+
* @param options Optional options for the parser and DOM builder.
|
|
24
|
+
* @deprecated Use `parseDocument` instead.
|
|
25
|
+
*/
|
|
26
|
+
export declare function parseDOM(data: string, options?: Options): Node[];
|
|
27
|
+
/**
|
|
28
|
+
* Creates a parser instance, with an attached DOM handler.
|
|
29
|
+
*
|
|
30
|
+
* @param cb A callback that will be called once parsing has been completed.
|
|
31
|
+
* @param options Optional options for the parser and DOM builder.
|
|
32
|
+
* @param elementCb An optional callback that will be called every time a tag has been completed inside of the DOM.
|
|
33
|
+
*/
|
|
34
|
+
export declare function createDomStream(cb: (error: Error | null, dom: Node[]) => void, options?: Options, elementCb?: (element: Element) => void): Parser;
|
|
35
|
+
export { default as Tokenizer } from './Tokenizer.js';
|
|
36
|
+
export type { Callbacks as TokenizerCallbacks } from './Tokenizer.js';
|
|
37
|
+
import * as ElementType from './ElementType.js';
|
|
38
|
+
export { ElementType };
|
|
39
|
+
export * from './FeedHandler.js';
|
|
40
|
+
export * as DomUtils from './utils/mod.js';
|
|
41
|
+
export { DomHandler as DefaultHandler };
|
|
42
|
+
export { FeedHandler as RssHandler } from './FeedHandler.js';
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { Parser } from './Parser.js';
|
|
2
|
+
export { Parser };
|
|
3
|
+
import { DomHandler } from './DomHandler.js';
|
|
4
|
+
export { DomHandler };
|
|
5
|
+
// Helper methods
|
|
6
|
+
/**
|
|
7
|
+
* Parses the data, returns the resulting document.
|
|
8
|
+
*
|
|
9
|
+
* @param data The data that should be parsed.
|
|
10
|
+
* @param options Optional options for the parser and DOM builder.
|
|
11
|
+
*/
|
|
12
|
+
export function parseDocument(data, options) {
|
|
13
|
+
const handler = new DomHandler(undefined, options);
|
|
14
|
+
new Parser(handler, options).end(data);
|
|
15
|
+
return handler.root;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Parses data, returns an array of the root nodes.
|
|
19
|
+
*
|
|
20
|
+
* Note that the root nodes still have a `Document` node as their parent.
|
|
21
|
+
* Use `parseDocument` to get the `Document` node instead.
|
|
22
|
+
*
|
|
23
|
+
* @param data The data that should be parsed.
|
|
24
|
+
* @param options Optional options for the parser and DOM builder.
|
|
25
|
+
* @deprecated Use `parseDocument` instead.
|
|
26
|
+
*/
|
|
27
|
+
export function parseDOM(data, options) {
|
|
28
|
+
return parseDocument(data, options).children;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Creates a parser instance, with an attached DOM handler.
|
|
32
|
+
*
|
|
33
|
+
* @param cb A callback that will be called once parsing has been completed.
|
|
34
|
+
* @param options Optional options for the parser and DOM builder.
|
|
35
|
+
* @param elementCb An optional callback that will be called every time a tag has been completed inside of the DOM.
|
|
36
|
+
*/
|
|
37
|
+
export function createDomStream(cb, options, elementCb) {
|
|
38
|
+
const handler = new DomHandler(cb, options, elementCb);
|
|
39
|
+
return new Parser(handler, options);
|
|
40
|
+
}
|
|
41
|
+
export { default as Tokenizer } from './Tokenizer.js';
|
|
42
|
+
import * as ElementType from './ElementType.js';
|
|
43
|
+
export { ElementType };
|
|
44
|
+
/*
|
|
45
|
+
* All of the following exports exist for backwards-compatibility.
|
|
46
|
+
* They should probably be removed eventually.
|
|
47
|
+
*/
|
|
48
|
+
export * from './FeedHandler.js';
|
|
49
|
+
export * as DomUtils from './utils/mod.js';
|
|
50
|
+
// Old names for Dom- & FeedHandler
|
|
51
|
+
export { DomHandler as DefaultHandler };
|
|
52
|
+
export { FeedHandler as RssHandler } from './FeedHandler.js';
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export interface TrieNode {
|
|
2
|
+
value?: string;
|
|
3
|
+
legacy?: boolean;
|
|
4
|
+
base?: number;
|
|
5
|
+
next?: Map<string, TrieNode>;
|
|
6
|
+
}
|
|
7
|
+
export declare const xmlTrie: TrieNode;
|
|
8
|
+
export declare const decodeXML: (str: string) => string;
|
|
9
|
+
export declare const htmlTrie: TrieNode;
|
|
10
|
+
export declare const decodeHTMLStrict: (str: string) => string;
|
|
11
|
+
export declare const decodeHTML: (str: string) => string;
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import entityMap from './maps/entities.js';
|
|
2
|
+
import legacyMap from './maps/legacy.js';
|
|
3
|
+
import xmlMap from './maps/xml.js';
|
|
4
|
+
import decodeCodePoint from './decode_codepoint.js';
|
|
5
|
+
const numStart = (function () {
|
|
6
|
+
const numStart = new Map();
|
|
7
|
+
const numRecurse = new Map();
|
|
8
|
+
const numValue = { next: numRecurse, base: 10 };
|
|
9
|
+
for (let i = 0; i <= 9; i++) {
|
|
10
|
+
numStart.set(i.toString(10), numValue);
|
|
11
|
+
numRecurse.set(i.toString(10), numValue);
|
|
12
|
+
}
|
|
13
|
+
const hexRecurse = new Map();
|
|
14
|
+
const hexValue = { next: hexRecurse, base: 16 };
|
|
15
|
+
for (let i = 0; i <= 15; i++) {
|
|
16
|
+
hexRecurse.set(i.toString(16), hexValue);
|
|
17
|
+
hexRecurse.set(i.toString(16).toUpperCase(), hexValue);
|
|
18
|
+
}
|
|
19
|
+
const hexStart = { next: hexRecurse };
|
|
20
|
+
numStart.set('x', hexStart);
|
|
21
|
+
numStart.set('X', hexStart);
|
|
22
|
+
return { next: numStart };
|
|
23
|
+
})();
|
|
24
|
+
function getTrieReplacer(trieStart, legacyEntities) {
|
|
25
|
+
return (str) => {
|
|
26
|
+
let ret = '';
|
|
27
|
+
let lastIdx = 0;
|
|
28
|
+
let legacyTrieIndex = 0;
|
|
29
|
+
let idx = 0;
|
|
30
|
+
function decodeNumeric(base) {
|
|
31
|
+
const entity = str.substring(
|
|
32
|
+
// Skip the leading "&#". For hex entities, also skip the leading "x".
|
|
33
|
+
lastIdx + 2 + (base >>> 4), idx);
|
|
34
|
+
const parsed = parseInt(entity, base);
|
|
35
|
+
return decodeCodePoint(parsed);
|
|
36
|
+
}
|
|
37
|
+
entityLoop: while ((idx = str.indexOf('&', idx)) >= 0) {
|
|
38
|
+
ret += str.slice(lastIdx, idx);
|
|
39
|
+
lastIdx = idx;
|
|
40
|
+
let trieNode = trieStart;
|
|
41
|
+
let legacyTrie;
|
|
42
|
+
while (++idx < str.length) {
|
|
43
|
+
const c = str.charAt(idx);
|
|
44
|
+
if (c === ';') {
|
|
45
|
+
if (trieNode.value) {
|
|
46
|
+
ret += trieNode.value;
|
|
47
|
+
}
|
|
48
|
+
else if (trieNode.base) {
|
|
49
|
+
ret += decodeNumeric(trieNode.base);
|
|
50
|
+
}
|
|
51
|
+
else
|
|
52
|
+
break;
|
|
53
|
+
idx += 1;
|
|
54
|
+
lastIdx = idx;
|
|
55
|
+
continue entityLoop;
|
|
56
|
+
}
|
|
57
|
+
else {
|
|
58
|
+
const next = trieNode.next?.get(c);
|
|
59
|
+
if (next) {
|
|
60
|
+
trieNode = next;
|
|
61
|
+
if (legacyEntities && next.legacy) {
|
|
62
|
+
legacyTrie = next;
|
|
63
|
+
legacyTrieIndex = idx;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
else
|
|
67
|
+
break;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
if (legacyEntities) {
|
|
71
|
+
if (legacyTrie) {
|
|
72
|
+
ret += legacyTrie.value;
|
|
73
|
+
lastIdx = legacyTrieIndex + 1;
|
|
74
|
+
}
|
|
75
|
+
else if (trieNode.base) {
|
|
76
|
+
ret += decodeNumeric(trieNode.base);
|
|
77
|
+
lastIdx = idx;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return ret + str.substr(lastIdx);
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
export const xmlTrie = { next: getTrie(xmlMap) };
|
|
85
|
+
export const decodeXML = getTrieReplacer(xmlTrie, false);
|
|
86
|
+
export const htmlTrie = {
|
|
87
|
+
next: markLegacyEntries(getTrie(entityMap), legacyMap),
|
|
88
|
+
};
|
|
89
|
+
export const decodeHTMLStrict = getTrieReplacer(htmlTrie, false);
|
|
90
|
+
export const decodeHTML = getTrieReplacer(htmlTrie, true);
|
|
91
|
+
function getTrie(map) {
|
|
92
|
+
const trie = new Map();
|
|
93
|
+
for (const key of Object.keys(map)) {
|
|
94
|
+
// Resolve the key
|
|
95
|
+
let lastMap = trie;
|
|
96
|
+
for (const char of key.slice(0, -1)) {
|
|
97
|
+
const next = lastMap.get(char) ?? {};
|
|
98
|
+
lastMap.set(char, next);
|
|
99
|
+
lastMap = next.next ??= new Map();
|
|
100
|
+
}
|
|
101
|
+
const val = lastMap.get(key.slice(-1)) ?? {};
|
|
102
|
+
val.value = map[key];
|
|
103
|
+
lastMap.set(key.slice(-1), val);
|
|
104
|
+
}
|
|
105
|
+
// Add numeric values
|
|
106
|
+
trie.set('#', numStart);
|
|
107
|
+
return trie;
|
|
108
|
+
}
|
|
109
|
+
function markLegacyEntries(trie, legacy) {
|
|
110
|
+
for (const key of Object.keys(legacy)) {
|
|
111
|
+
// Resolve the key
|
|
112
|
+
let lastMap = { next: trie };
|
|
113
|
+
for (const char of key) {
|
|
114
|
+
const next = lastMap.next?.get(char);
|
|
115
|
+
if (!next)
|
|
116
|
+
throw new Error(`Could not find ${key} at ${char}`);
|
|
117
|
+
lastMap = next;
|
|
118
|
+
}
|
|
119
|
+
lastMap.legacy = true;
|
|
120
|
+
}
|
|
121
|
+
return trie;
|
|
122
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export default function decodeCodePoint(codePoint: number): string;
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import decodeMap from './maps/decode.js';
|
|
2
|
+
// Adapted from https://github.com/mathiasbynens/he/blob/master/src/he.js#L94-L119
|
|
3
|
+
const fromCodePoint =
|
|
4
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, node/no-unsupported-features/es-builtins
|
|
5
|
+
String.fromCodePoint ||
|
|
6
|
+
function (codePoint) {
|
|
7
|
+
let output = '';
|
|
8
|
+
if (codePoint > 0xffff) {
|
|
9
|
+
codePoint -= 0x10000;
|
|
10
|
+
output += String.fromCharCode(((codePoint >>> 10) & 0x3ff) | 0xd800);
|
|
11
|
+
codePoint = 0xdc00 | (codePoint & 0x3ff);
|
|
12
|
+
}
|
|
13
|
+
output += String.fromCharCode(codePoint);
|
|
14
|
+
return output;
|
|
15
|
+
};
|
|
16
|
+
export default function decodeCodePoint(codePoint) {
|
|
17
|
+
if ((codePoint >= 0xd800 && codePoint <= 0xdfff) || codePoint > 0x10ffff) {
|
|
18
|
+
return '\uFFFD';
|
|
19
|
+
}
|
|
20
|
+
if (codePoint in decodeMap) {
|
|
21
|
+
codePoint = decodeMap[codePoint];
|
|
22
|
+
}
|
|
23
|
+
return fromCodePoint(codePoint);
|
|
24
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Encodes all non-ASCII characters, as well as characters not valid in XML
|
|
3
|
+
* documents using XML entities.
|
|
4
|
+
*
|
|
5
|
+
* If a character has no equivalent entity, a
|
|
6
|
+
* numeric hexadecimal reference (eg. `ü`) will be used.
|
|
7
|
+
*/
|
|
8
|
+
export declare const encodeXML: (data: string) => string;
|
|
9
|
+
/**
|
|
10
|
+
* Encodes all entities and non-ASCII characters in the input.
|
|
11
|
+
*
|
|
12
|
+
* This includes characters that are valid ASCII characters in HTML documents.
|
|
13
|
+
* For example `#` will be encoded as `#`. To get a more compact output,
|
|
14
|
+
* consider using the `encodeNonAsciiHTML` function.
|
|
15
|
+
*
|
|
16
|
+
* If a character has no equivalent entity, a
|
|
17
|
+
* numeric hexadecimal reference (eg. `ü`) will be used.
|
|
18
|
+
*/
|
|
19
|
+
export declare const encodeHTML: (data: string) => string;
|
|
20
|
+
/**
|
|
21
|
+
* Encodes all non-ASCII characters, as well as characters not valid in HTML
|
|
22
|
+
* documents using HTML entities.
|
|
23
|
+
*
|
|
24
|
+
* If a character has no equivalent entity, a
|
|
25
|
+
* numeric hexadecimal reference (eg. `ü`) will be used.
|
|
26
|
+
*/
|
|
27
|
+
export declare const encodeNonAsciiHTML: (data: string) => string;
|
|
28
|
+
/**
|
|
29
|
+
* Encodes all non-ASCII characters, as well as characters not valid in XML
|
|
30
|
+
* documents using numeric hexadecimal reference (eg. `ü`).
|
|
31
|
+
*
|
|
32
|
+
* Have a look at `escapeUTF8` if you want a more concise output at the expense
|
|
33
|
+
* of reduced transportability.
|
|
34
|
+
*
|
|
35
|
+
* @param data String to escape.
|
|
36
|
+
*/
|
|
37
|
+
export declare function escape(data: string): string;
|
|
38
|
+
/**
|
|
39
|
+
* Encodes all characters not valid in XML documents using numeric hexadecimal
|
|
40
|
+
* reference (eg. `ü`).
|
|
41
|
+
*
|
|
42
|
+
* Note that the output will be character-set dependent.
|
|
43
|
+
*
|
|
44
|
+
* @param data String to escape.
|
|
45
|
+
*/
|
|
46
|
+
export declare function escapeUTF8(data: string): string;
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import xmlMap from './maps/xml.js';
|
|
2
|
+
const inverseXML = getInverseObj(xmlMap);
|
|
3
|
+
const xmlReplacer = getInverseReplacer(inverseXML);
|
|
4
|
+
/**
|
|
5
|
+
* Encodes all non-ASCII characters, as well as characters not valid in XML
|
|
6
|
+
* documents using XML entities.
|
|
7
|
+
*
|
|
8
|
+
* If a character has no equivalent entity, a
|
|
9
|
+
* numeric hexadecimal reference (eg. `ü`) will be used.
|
|
10
|
+
*/
|
|
11
|
+
export const encodeXML = getASCIIEncoder(inverseXML);
|
|
12
|
+
import htmlMap from './maps/entities.js';
|
|
13
|
+
const inverseHTML = getInverseObj(htmlMap);
|
|
14
|
+
const htmlReplacer = getInverseReplacer(inverseHTML);
|
|
15
|
+
/**
|
|
16
|
+
* Encodes all entities and non-ASCII characters in the input.
|
|
17
|
+
*
|
|
18
|
+
* This includes characters that are valid ASCII characters in HTML documents.
|
|
19
|
+
* For example `#` will be encoded as `#`. To get a more compact output,
|
|
20
|
+
* consider using the `encodeNonAsciiHTML` function.
|
|
21
|
+
*
|
|
22
|
+
* If a character has no equivalent entity, a
|
|
23
|
+
* numeric hexadecimal reference (eg. `ü`) will be used.
|
|
24
|
+
*/
|
|
25
|
+
export const encodeHTML = getInverse(inverseHTML, htmlReplacer);
|
|
26
|
+
/**
|
|
27
|
+
* Encodes all non-ASCII characters, as well as characters not valid in HTML
|
|
28
|
+
* documents using HTML entities.
|
|
29
|
+
*
|
|
30
|
+
* If a character has no equivalent entity, a
|
|
31
|
+
* numeric hexadecimal reference (eg. `ü`) will be used.
|
|
32
|
+
*/
|
|
33
|
+
export const encodeNonAsciiHTML = getASCIIEncoder(inverseHTML);
|
|
34
|
+
function getInverseObj(obj) {
|
|
35
|
+
return Object.keys(obj)
|
|
36
|
+
.sort()
|
|
37
|
+
.reduce((inverse, name) => {
|
|
38
|
+
inverse[obj[name]] = `&${name};`;
|
|
39
|
+
return inverse;
|
|
40
|
+
}, {});
|
|
41
|
+
}
|
|
42
|
+
function getInverseReplacer(inverse) {
|
|
43
|
+
const single = [];
|
|
44
|
+
const multiple = [];
|
|
45
|
+
for (const k of Object.keys(inverse)) {
|
|
46
|
+
if (k.length === 1) {
|
|
47
|
+
// Add value to single array
|
|
48
|
+
single.push(`\\${k}`);
|
|
49
|
+
}
|
|
50
|
+
else {
|
|
51
|
+
// Add value to multiple array
|
|
52
|
+
multiple.push(k);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
// Add ranges to single characters.
|
|
56
|
+
single.sort();
|
|
57
|
+
for (let start = 0; start < single.length - 1; start++) {
|
|
58
|
+
// Find the end of a run of characters
|
|
59
|
+
let end = start;
|
|
60
|
+
while (end < single.length - 1 &&
|
|
61
|
+
single[end].charCodeAt(1) + 1 === single[end + 1].charCodeAt(1)) {
|
|
62
|
+
end += 1;
|
|
63
|
+
}
|
|
64
|
+
const count = 1 + end - start;
|
|
65
|
+
// We want to replace at least three characters
|
|
66
|
+
if (count < 3)
|
|
67
|
+
continue;
|
|
68
|
+
single.splice(start, count, `${single[start]}-${single[end]}`);
|
|
69
|
+
}
|
|
70
|
+
multiple.unshift(`[${single.join('')}]`);
|
|
71
|
+
return new RegExp(multiple.join('|'), 'g');
|
|
72
|
+
}
|
|
73
|
+
// /[^\0-\x7F]/gu
|
|
74
|
+
const reNonASCII = /(?:[\x80-\uD7FF\uE000-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF])/g;
|
|
75
|
+
const getCodePoint =
|
|
76
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
|
77
|
+
String.prototype.codePointAt != null
|
|
78
|
+
? // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
79
|
+
(str) => str.codePointAt(0)
|
|
80
|
+
: // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
|
|
81
|
+
(c) => (c.charCodeAt(0) - 0xd800) * 0x400 +
|
|
82
|
+
c.charCodeAt(1) -
|
|
83
|
+
0xdc00 +
|
|
84
|
+
0x10000;
|
|
85
|
+
function singleCharReplacer(c) {
|
|
86
|
+
return `&#x${(c.length > 1 ? getCodePoint(c) : c.charCodeAt(0))
|
|
87
|
+
.toString(16)
|
|
88
|
+
.toUpperCase()};`;
|
|
89
|
+
}
|
|
90
|
+
function getInverse(inverse, re) {
|
|
91
|
+
return (data) => data
|
|
92
|
+
.replace(re, name => inverse[name])
|
|
93
|
+
.replace(reNonASCII, singleCharReplacer);
|
|
94
|
+
}
|
|
95
|
+
const reEscapeChars = new RegExp(`${xmlReplacer.source}|${reNonASCII.source}`, 'g');
|
|
96
|
+
/**
|
|
97
|
+
* Encodes all non-ASCII characters, as well as characters not valid in XML
|
|
98
|
+
* documents using numeric hexadecimal reference (eg. `ü`).
|
|
99
|
+
*
|
|
100
|
+
* Have a look at `escapeUTF8` if you want a more concise output at the expense
|
|
101
|
+
* of reduced transportability.
|
|
102
|
+
*
|
|
103
|
+
* @param data String to escape.
|
|
104
|
+
*/
|
|
105
|
+
export function escape(data) {
|
|
106
|
+
return data.replace(reEscapeChars, singleCharReplacer);
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Encodes all characters not valid in XML documents using numeric hexadecimal
|
|
110
|
+
* reference (eg. `ü`).
|
|
111
|
+
*
|
|
112
|
+
* Note that the output will be character-set dependent.
|
|
113
|
+
*
|
|
114
|
+
* @param data String to escape.
|
|
115
|
+
*/
|
|
116
|
+
export function escapeUTF8(data) {
|
|
117
|
+
return data.replace(xmlReplacer, singleCharReplacer);
|
|
118
|
+
}
|
|
119
|
+
function getASCIIEncoder(obj) {
|
|
120
|
+
return (data) => data.replace(reEscapeChars, c => obj[c] || singleCharReplacer(c));
|
|
121
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
declare const _default: {
|
|
2
|
+
'0': number;
|
|
3
|
+
'128': number;
|
|
4
|
+
'130': number;
|
|
5
|
+
'131': number;
|
|
6
|
+
'132': number;
|
|
7
|
+
'133': number;
|
|
8
|
+
'134': number;
|
|
9
|
+
'135': number;
|
|
10
|
+
'136': number;
|
|
11
|
+
'137': number;
|
|
12
|
+
'138': number;
|
|
13
|
+
'139': number;
|
|
14
|
+
'140': number;
|
|
15
|
+
'142': number;
|
|
16
|
+
'145': number;
|
|
17
|
+
'146': number;
|
|
18
|
+
'147': number;
|
|
19
|
+
'148': number;
|
|
20
|
+
'149': number;
|
|
21
|
+
'150': number;
|
|
22
|
+
'151': number;
|
|
23
|
+
'152': number;
|
|
24
|
+
'153': number;
|
|
25
|
+
'154': number;
|
|
26
|
+
'155': number;
|
|
27
|
+
'156': number;
|
|
28
|
+
'158': number;
|
|
29
|
+
'159': number;
|
|
30
|
+
};
|
|
31
|
+
export default _default;
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
export default {
|
|
2
|
+
'0': 65533,
|
|
3
|
+
'128': 8364,
|
|
4
|
+
'130': 8218,
|
|
5
|
+
'131': 402,
|
|
6
|
+
'132': 8222,
|
|
7
|
+
'133': 8230,
|
|
8
|
+
'134': 8224,
|
|
9
|
+
'135': 8225,
|
|
10
|
+
'136': 710,
|
|
11
|
+
'137': 8240,
|
|
12
|
+
'138': 352,
|
|
13
|
+
'139': 8249,
|
|
14
|
+
'140': 338,
|
|
15
|
+
'142': 381,
|
|
16
|
+
'145': 8216,
|
|
17
|
+
'146': 8217,
|
|
18
|
+
'147': 8220,
|
|
19
|
+
'148': 8221,
|
|
20
|
+
'149': 8226,
|
|
21
|
+
'150': 8211,
|
|
22
|
+
'151': 8212,
|
|
23
|
+
'152': 732,
|
|
24
|
+
'153': 8482,
|
|
25
|
+
'154': 353,
|
|
26
|
+
'155': 8250,
|
|
27
|
+
'156': 339,
|
|
28
|
+
'158': 382,
|
|
29
|
+
'159': 376,
|
|
30
|
+
};
|