cspell-trie-lib 8.10.2 → 8.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/lib/Builder/BuilderCursor.d.ts +4 -1
- package/dist/lib/Builder/cursor-util.js +9 -8
- package/dist/lib/ITrie.d.ts +1 -1
- package/dist/lib/ITrieNode/FindTypes.d.ts +7 -0
- package/dist/lib/ITrieNode/FindTypes.js +2 -0
- package/dist/lib/ITrieNode/ITrieNode.d.ts +30 -0
- package/dist/lib/ITrieNode/find.d.ts +2 -20
- package/dist/lib/ITrieNode/find.js +13 -2
- package/dist/lib/TrieBlob/CharIndex.d.ts +32 -0
- package/dist/lib/TrieBlob/CharIndex.js +116 -0
- package/dist/lib/TrieBlob/FastTrieBlob.d.ts +38 -10
- package/dist/lib/TrieBlob/FastTrieBlob.js +105 -37
- package/dist/lib/TrieBlob/FastTrieBlobBuilder.d.ts +19 -4
- package/dist/lib/TrieBlob/FastTrieBlobBuilder.js +56 -81
- package/dist/lib/TrieBlob/FastTrieBlobIRoot.js +9 -12
- package/dist/lib/TrieBlob/FastTrieBlobInternals.d.ts +12 -3
- package/dist/lib/TrieBlob/FastTrieBlobInternals.js +49 -5
- package/dist/lib/TrieBlob/TrieBlob.d.ts +23 -30
- package/dist/lib/TrieBlob/TrieBlob.js +217 -89
- package/dist/lib/TrieBlob/TrieBlobIRoot.d.ts +9 -4
- package/dist/lib/TrieBlob/TrieBlobIRoot.js +17 -16
- package/dist/lib/TrieBlob/Utf8.d.ts +55 -0
- package/dist/lib/TrieBlob/Utf8.js +261 -0
- package/dist/lib/TrieBlob/createTrieBlob.d.ts +4 -4
- package/dist/lib/TrieBlob/createTrieBlob.js +4 -56
- package/dist/lib/TrieData.d.ts +1 -0
- package/dist/lib/TrieNode/TrieNodeBuilder.d.ts +17 -1
- package/dist/lib/TrieNode/TrieNodeBuilder.js +26 -0
- package/dist/lib/TrieNode/find.js +4 -4
- package/dist/lib/TrieNode/trie-util.d.ts +8 -1
- package/dist/lib/TrieNode/trie-util.js +22 -1
- package/dist/lib/buildITrie.js +2 -1
- package/dist/lib/io/importV3.js +2 -2
- package/dist/lib/utils/isValidChar.d.ts +4 -0
- package/dist/lib/utils/isValidChar.js +19 -0
- package/package.json +8 -5
- package/dist/lib/TrieBlob/NumberSequenceByteDecoderAccumulator.d.ts +0 -34
- package/dist/lib/TrieBlob/NumberSequenceByteDecoderAccumulator.js +0 -129
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
export interface BuilderCursor {
|
|
2
|
+
/** Insert a character in the current node. */
|
|
2
3
|
insertChar(char: string): void;
|
|
4
|
+
/** Mark the current node as End of Word */
|
|
3
5
|
markEOW(): void;
|
|
4
|
-
|
|
6
|
+
/** Refer to a previous node */
|
|
7
|
+
reference(refId: number): void;
|
|
5
8
|
backStep(num: number): void;
|
|
6
9
|
}
|
|
7
10
|
//# sourceMappingURL=BuilderCursor.d.ts.map
|
|
@@ -1,17 +1,18 @@
|
|
|
1
1
|
export function insertWordsAtCursor(cursor, words) {
|
|
2
|
-
let
|
|
3
|
-
for (const
|
|
4
|
-
const
|
|
5
|
-
const
|
|
2
|
+
let prevWordLetters = [];
|
|
3
|
+
for (const _word of words) {
|
|
4
|
+
const letters = [..._word];
|
|
5
|
+
const pLen = commonStrPrefix(prevWordLetters, letters);
|
|
6
|
+
const stepBack = prevWordLetters.length - pLen;
|
|
6
7
|
cursor.backStep(stepBack);
|
|
7
|
-
const wLen =
|
|
8
|
+
const wLen = letters.length;
|
|
8
9
|
for (let i = pLen; i < wLen; ++i) {
|
|
9
|
-
cursor.insertChar(
|
|
10
|
+
cursor.insertChar(letters[i]);
|
|
10
11
|
}
|
|
11
12
|
cursor.markEOW();
|
|
12
|
-
|
|
13
|
+
prevWordLetters = letters;
|
|
13
14
|
}
|
|
14
|
-
cursor.backStep(
|
|
15
|
+
cursor.backStep(prevWordLetters.length);
|
|
15
16
|
}
|
|
16
17
|
export function commonStringPrefixLen(a, b) {
|
|
17
18
|
let i = 0;
|
package/dist/lib/ITrie.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { WeightMap } from './distance/index.js';
|
|
2
|
-
import type { FindFullResult } from './ITrieNode/find.js';
|
|
3
2
|
import type { ITrieNode } from './ITrieNode/index.js';
|
|
3
|
+
import { FindFullResult } from './ITrieNode/ITrieNode.js';
|
|
4
4
|
import type { PartialTrieInfo, TrieInfo } from './ITrieNode/TrieInfo.js';
|
|
5
5
|
import type { CompoundWordsMethod, WalkerIterator } from './ITrieNode/walker/walkerTypes.js';
|
|
6
6
|
import type { SuggestionCollector, SuggestionResult } from './suggestCollector.js';
|
|
@@ -1,4 +1,25 @@
|
|
|
1
1
|
import type { TrieInfo } from './TrieInfo.js';
|
|
2
|
+
export interface FindResult {
|
|
3
|
+
found: string | false;
|
|
4
|
+
compoundUsed: boolean;
|
|
5
|
+
caseMatched: boolean;
|
|
6
|
+
/**
|
|
7
|
+
* Is the word explicitly forbidden.
|
|
8
|
+
* - `true` - word is in the forbidden list.
|
|
9
|
+
* - `false` - word is not in the forbidden list.
|
|
10
|
+
* - `undefined` - unknown - was not checked.
|
|
11
|
+
*/
|
|
12
|
+
forbidden?: boolean | undefined;
|
|
13
|
+
}
|
|
14
|
+
export interface FindFullResult extends FindResult {
|
|
15
|
+
/**
|
|
16
|
+
* Is the word explicitly forbidden.
|
|
17
|
+
* - `true` - word is in the forbidden list.
|
|
18
|
+
* - `false` - word is not in the forbidden list.
|
|
19
|
+
* - `undefined` - unknown - was not checked.
|
|
20
|
+
*/
|
|
21
|
+
forbidden: boolean | undefined;
|
|
22
|
+
}
|
|
2
23
|
export type ITrieNodeId = object | number | string;
|
|
3
24
|
type Entry = readonly [string, ITrieNode];
|
|
4
25
|
export interface ITrieNode {
|
|
@@ -37,6 +58,15 @@ export interface ITrieNodeRoot extends ITrieNode {
|
|
|
37
58
|
* @param id an of a ITrieNode in this Trie
|
|
38
59
|
*/
|
|
39
60
|
resolveId(id: ITrieNodeId): ITrieNode;
|
|
61
|
+
findExact?: ((word: string) => boolean) | undefined;
|
|
62
|
+
/**
|
|
63
|
+
* Try to find a word.
|
|
64
|
+
* @param word - the normalized word to look up.
|
|
65
|
+
* @param strict - if `true` the case and accents must match.
|
|
66
|
+
* @returns undefined if it did not try to find the word, otherwise a FindResult.
|
|
67
|
+
*/
|
|
68
|
+
find?: ((word: string, strict: boolean) => FindResult | undefined) | undefined;
|
|
69
|
+
isForbidden?: ((word: string) => boolean) | undefined;
|
|
40
70
|
}
|
|
41
71
|
export {};
|
|
42
72
|
//# sourceMappingURL=ITrieNode.d.ts.map
|
|
@@ -1,25 +1,7 @@
|
|
|
1
1
|
import type { FindOptions, PartialFindOptions } from './FindOptions.js';
|
|
2
|
-
import type {
|
|
2
|
+
import type { FindFullNodeResult } from './FindTypes.js';
|
|
3
|
+
import type { FindFullResult, FindResult, ITrieNode, ITrieNodeRoot } from './ITrieNode.js';
|
|
3
4
|
type Root = ITrieNodeRoot;
|
|
4
|
-
export interface FindNodeResult {
|
|
5
|
-
node: ITrieNode | undefined;
|
|
6
|
-
}
|
|
7
|
-
export interface FindResult {
|
|
8
|
-
found: string | false;
|
|
9
|
-
compoundUsed: boolean;
|
|
10
|
-
caseMatched: boolean;
|
|
11
|
-
}
|
|
12
|
-
export interface FindFullResult extends FindResult {
|
|
13
|
-
/**
|
|
14
|
-
* Is the word explicitly forbidden.
|
|
15
|
-
* - `true` - word is in the forbidden list.
|
|
16
|
-
* - `false` - word is not in the forbidden list.
|
|
17
|
-
* - `undefined` - unknown - was not checked.
|
|
18
|
-
* */
|
|
19
|
-
forbidden: boolean | undefined;
|
|
20
|
-
}
|
|
21
|
-
export interface FindFullNodeResult extends FindNodeResult, FindFullResult {
|
|
22
|
-
}
|
|
23
5
|
/**
|
|
24
6
|
*
|
|
25
7
|
* @param root Trie root node. root.c contains the compound root and forbidden root.
|
|
@@ -37,6 +37,11 @@ export function findWordNode(root, word, options) {
|
|
|
37
37
|
* @param options
|
|
38
38
|
*/
|
|
39
39
|
function _findWord(root, word, options) {
|
|
40
|
+
if (root.find) {
|
|
41
|
+
const found = root.find(word, options.matchCase);
|
|
42
|
+
if (found)
|
|
43
|
+
return found;
|
|
44
|
+
}
|
|
40
45
|
const { node: _, ...result } = _findWordNode(root, word, options);
|
|
41
46
|
return result;
|
|
42
47
|
}
|
|
@@ -49,10 +54,10 @@ function _findWord(root, word, options) {
|
|
|
49
54
|
function _findWordNode(root, word, options) {
|
|
50
55
|
const trieInfo = root.info;
|
|
51
56
|
const compoundMode = knownCompoundModes.get(options.compoundMode) || _defaultFindOptions.compoundMode;
|
|
52
|
-
const compoundPrefix = options.compoundMode === 'compound' ? trieInfo.compoundCharacter ?? options.compoundFix : '';
|
|
57
|
+
const compoundPrefix = options.compoundMode === 'compound' ? (trieInfo.compoundCharacter ?? options.compoundFix) : '';
|
|
53
58
|
const ignoreCasePrefix = options.matchCase
|
|
54
59
|
? ''
|
|
55
|
-
: trieInfo.stripCaseAndAccentsPrefix ?? options.caseInsensitivePrefix;
|
|
60
|
+
: (trieInfo.stripCaseAndAccentsPrefix ?? options.caseInsensitivePrefix);
|
|
56
61
|
function __findCompound() {
|
|
57
62
|
const f = findCompoundWord(root, word, compoundPrefix, ignoreCasePrefix);
|
|
58
63
|
const result = { ...f };
|
|
@@ -180,6 +185,9 @@ function findCompoundWord(root, word, compoundCharacter, ignoreCasePrefix) {
|
|
|
180
185
|
return { found, compoundUsed, node, forbidden: undefined, caseMatched };
|
|
181
186
|
}
|
|
182
187
|
export function findWordExact(root, word) {
|
|
188
|
+
const r = root;
|
|
189
|
+
if (r?.findExact)
|
|
190
|
+
return r.findExact(word);
|
|
183
191
|
return isEndOfWordNode(walk(root, word));
|
|
184
192
|
}
|
|
185
193
|
export function isEndOfWordNode(n) {
|
|
@@ -277,6 +285,9 @@ function findLegacyCompoundWord(roots, word, minCompoundLength) {
|
|
|
277
285
|
return { found, compoundUsed, caseMatched };
|
|
278
286
|
}
|
|
279
287
|
export function isForbiddenWord(root, word, forbiddenPrefix) {
|
|
288
|
+
const r = root;
|
|
289
|
+
if (r?.isForbidden)
|
|
290
|
+
return r.isForbidden(word);
|
|
280
291
|
return findWordExact(root?.get(forbiddenPrefix), word);
|
|
281
292
|
}
|
|
282
293
|
export const createFindOptions = memorizeLastCall(_createFindOptions);
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { type Utf8BE32 } from './Utf8.js';
|
|
2
|
+
export type Utf8Seq = Readonly<number[]>;
|
|
3
|
+
export type CharIndexMap = Record<string, Utf8BE32>;
|
|
4
|
+
export type RO_CharIndexMap = Readonly<CharIndexMap>;
|
|
5
|
+
export type CharIndexSeqMap = Record<string, Utf8Seq>;
|
|
6
|
+
export type RO_CharIndexSeqMap = Readonly<CharIndexSeqMap>;
|
|
7
|
+
export declare class CharIndex {
|
|
8
|
+
#private;
|
|
9
|
+
readonly charIndex: readonly string[];
|
|
10
|
+
constructor(charIndex: readonly string[]);
|
|
11
|
+
getCharUtf8Seq(c: string): Utf8Seq;
|
|
12
|
+
wordToUtf8Seq(word: string): Utf8Seq;
|
|
13
|
+
indexContainsMultiByteChars(): boolean;
|
|
14
|
+
get size(): number;
|
|
15
|
+
toJSON(): {
|
|
16
|
+
charIndex: readonly string[];
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
export declare class CharIndexBuilder {
|
|
20
|
+
#private;
|
|
21
|
+
private readonly charIndex;
|
|
22
|
+
readonly charIndexMap: CharIndexMap;
|
|
23
|
+
readonly charIndexSeqMap: CharIndexSeqMap;
|
|
24
|
+
constructor();
|
|
25
|
+
getUtf8Value(c: string): number;
|
|
26
|
+
utf8ValueToUtf8Seq(idx: number): number[];
|
|
27
|
+
charToUtf8Seq(c: string): number[];
|
|
28
|
+
wordToUtf8Seq(word: string): number[];
|
|
29
|
+
get size(): number;
|
|
30
|
+
build(): CharIndex;
|
|
31
|
+
}
|
|
32
|
+
//# sourceMappingURL=CharIndex.d.ts.map
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import { encodeTextToUtf8, encodeUtf8N_BE } from './Utf8.js';
|
|
2
|
+
const emptySeq = [0];
|
|
3
|
+
Object.freeze(emptySeq);
|
|
4
|
+
export class CharIndex {
|
|
5
|
+
charIndex;
|
|
6
|
+
#charToUtf8SeqMap;
|
|
7
|
+
#lastWord = '';
|
|
8
|
+
#lastWordSeq = [];
|
|
9
|
+
#multiByteChars;
|
|
10
|
+
constructor(charIndex) {
|
|
11
|
+
this.charIndex = charIndex;
|
|
12
|
+
this.#charToUtf8SeqMap = buildCharIndexSequenceMap(charIndex);
|
|
13
|
+
this.#multiByteChars = Object.values(this.#charToUtf8SeqMap).some((c) => c.length > 1);
|
|
14
|
+
}
|
|
15
|
+
getCharUtf8Seq(c) {
|
|
16
|
+
const found = this.#charToUtf8SeqMap[c];
|
|
17
|
+
if (found)
|
|
18
|
+
return found;
|
|
19
|
+
const s = encodeTextToUtf8(c);
|
|
20
|
+
this.#charToUtf8SeqMap[c] = s;
|
|
21
|
+
return s;
|
|
22
|
+
}
|
|
23
|
+
wordToUtf8Seq(word) {
|
|
24
|
+
if (this.#lastWord === word)
|
|
25
|
+
return this.#lastWordSeq;
|
|
26
|
+
const seq = encodeTextToUtf8(word);
|
|
27
|
+
this.#lastWord = word;
|
|
28
|
+
this.#lastWordSeq = seq;
|
|
29
|
+
return seq;
|
|
30
|
+
}
|
|
31
|
+
indexContainsMultiByteChars() {
|
|
32
|
+
return this.#multiByteChars;
|
|
33
|
+
}
|
|
34
|
+
get size() {
|
|
35
|
+
return this.charIndex.length;
|
|
36
|
+
}
|
|
37
|
+
toJSON() {
|
|
38
|
+
return { charIndex: this.charIndex };
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
function buildCharIndexSequenceMap(charIndex) {
|
|
42
|
+
const map = Object.create(null);
|
|
43
|
+
for (const key of charIndex) {
|
|
44
|
+
map[key] = encodeTextToUtf8(key);
|
|
45
|
+
}
|
|
46
|
+
return map;
|
|
47
|
+
}
|
|
48
|
+
export class CharIndexBuilder {
|
|
49
|
+
charIndex = [];
|
|
50
|
+
charIndexMap = Object.create(null);
|
|
51
|
+
charIndexSeqMap = Object.create(null);
|
|
52
|
+
#mapIdxToSeq = new Map();
|
|
53
|
+
constructor() {
|
|
54
|
+
this.getUtf8Value('');
|
|
55
|
+
}
|
|
56
|
+
getUtf8Value(c) {
|
|
57
|
+
const found = this.charIndexMap[c];
|
|
58
|
+
if (found !== undefined) {
|
|
59
|
+
return found;
|
|
60
|
+
}
|
|
61
|
+
const nc = c.normalize('NFC');
|
|
62
|
+
this.charIndex.push(nc);
|
|
63
|
+
const utf8 = encodeUtf8N_BE(nc.codePointAt(0) || 0);
|
|
64
|
+
this.charIndexMap[c] = utf8;
|
|
65
|
+
this.charIndexMap[nc] = utf8;
|
|
66
|
+
this.charIndexMap[c.normalize('NFD')] = utf8;
|
|
67
|
+
return utf8;
|
|
68
|
+
}
|
|
69
|
+
utf8ValueToUtf8Seq(idx) {
|
|
70
|
+
const found = this.#mapIdxToSeq.get(idx);
|
|
71
|
+
if (found !== undefined) {
|
|
72
|
+
return found;
|
|
73
|
+
}
|
|
74
|
+
const seq = splitUtf8(idx);
|
|
75
|
+
this.#mapIdxToSeq.set(idx, seq);
|
|
76
|
+
return seq;
|
|
77
|
+
}
|
|
78
|
+
charToUtf8Seq(c) {
|
|
79
|
+
const idx = this.getUtf8Value(c);
|
|
80
|
+
return this.utf8ValueToUtf8Seq(idx);
|
|
81
|
+
}
|
|
82
|
+
wordToUtf8Seq(word) {
|
|
83
|
+
const seq = new Array(word.length);
|
|
84
|
+
let i = 0;
|
|
85
|
+
for (const c of word) {
|
|
86
|
+
const idx = this.getUtf8Value(c);
|
|
87
|
+
const cSep = this.utf8ValueToUtf8Seq(idx);
|
|
88
|
+
if (typeof cSep === 'number') {
|
|
89
|
+
seq[i++] = cSep;
|
|
90
|
+
continue;
|
|
91
|
+
}
|
|
92
|
+
for (const cIdx of cSep) {
|
|
93
|
+
seq[i++] = cIdx;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
if (seq.length !== i)
|
|
97
|
+
seq.length = i;
|
|
98
|
+
return seq;
|
|
99
|
+
}
|
|
100
|
+
get size() {
|
|
101
|
+
return this.charIndex.length;
|
|
102
|
+
}
|
|
103
|
+
build() {
|
|
104
|
+
return new CharIndex(this.charIndex);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
function splitUtf8(utf8) {
|
|
108
|
+
if (utf8 <= 0xff)
|
|
109
|
+
return [utf8];
|
|
110
|
+
if (utf8 <= 0xffff)
|
|
111
|
+
return [(utf8 >> 8) & 0xff, utf8 & 0xff];
|
|
112
|
+
if (utf8 <= 0xff_ffff)
|
|
113
|
+
return [(utf8 >> 16) & 0xff, (utf8 >> 8) & 0xff, utf8 & 0xff];
|
|
114
|
+
return [(utf8 >> 24) & 0xff, (utf8 >> 16) & 0xff, (utf8 >> 8) & 0xff, utf8 & 0xff].filter((v) => v);
|
|
115
|
+
}
|
|
116
|
+
//# sourceMappingURL=CharIndex.js.map
|
|
@@ -1,22 +1,25 @@
|
|
|
1
1
|
import type { ITrieNode, ITrieNodeRoot } from '../ITrieNode/ITrieNode.js';
|
|
2
2
|
import type { PartialTrieInfo, TrieInfo } from '../ITrieNode/TrieInfo.js';
|
|
3
3
|
import type { TrieData } from '../TrieData.js';
|
|
4
|
+
import { Utf8Seq } from './CharIndex.js';
|
|
4
5
|
import { type FastTrieBlobBitMaskInfo } from './FastTrieBlobBitMaskInfo.js';
|
|
5
6
|
import { FastTrieBlobInternals } from './FastTrieBlobInternals.js';
|
|
6
7
|
import { TrieBlob } from './TrieBlob.js';
|
|
8
|
+
import { Utf8Accumulator } from './Utf8.js';
|
|
9
|
+
type FastTrieBlobNode = number[];
|
|
7
10
|
export declare class FastTrieBlob implements TrieData {
|
|
11
|
+
#private;
|
|
8
12
|
private nodes;
|
|
9
13
|
private _charIndex;
|
|
10
14
|
readonly bitMasksInfo: FastTrieBlobBitMaskInfo;
|
|
11
|
-
|
|
15
|
+
readonly sorted: boolean;
|
|
12
16
|
private _readonly;
|
|
13
17
|
private _forbidIdx;
|
|
14
18
|
private _iTrieRoot;
|
|
15
19
|
wordToCharacters: (word: string) => readonly string[];
|
|
16
20
|
readonly info: Readonly<TrieInfo>;
|
|
17
21
|
private constructor();
|
|
18
|
-
|
|
19
|
-
private wordToNodeCharIndexSequence;
|
|
22
|
+
wordToNodeCharIndexSequence(word: string): Utf8Seq;
|
|
20
23
|
private letterToNodeCharIndexSequence;
|
|
21
24
|
has(word: string): boolean;
|
|
22
25
|
private _has;
|
|
@@ -26,8 +29,19 @@ export declare class FastTrieBlob implements TrieData {
|
|
|
26
29
|
freeze(): this;
|
|
27
30
|
toJSON(): {
|
|
28
31
|
info: Readonly<TrieInfo>;
|
|
29
|
-
nodes:
|
|
30
|
-
|
|
32
|
+
nodes: ({
|
|
33
|
+
i: number;
|
|
34
|
+
w: number;
|
|
35
|
+
c?: never;
|
|
36
|
+
} | {
|
|
37
|
+
i: number;
|
|
38
|
+
w: number;
|
|
39
|
+
c: {
|
|
40
|
+
i: number;
|
|
41
|
+
c: string | 0 | undefined;
|
|
42
|
+
s: string;
|
|
43
|
+
}[];
|
|
44
|
+
})[];
|
|
31
45
|
};
|
|
32
46
|
static create(data: FastTrieBlobInternals, options?: PartialTrieInfo): FastTrieBlob;
|
|
33
47
|
static toITrieNodeRoot(trie: FastTrieBlob): ITrieNodeRoot;
|
|
@@ -40,6 +54,7 @@ export declare class FastTrieBlob implements TrieData {
|
|
|
40
54
|
getNode(prefix: string): ITrieNode | undefined;
|
|
41
55
|
isForbiddenWord(word: string): boolean;
|
|
42
56
|
hasForbiddenWords(): boolean;
|
|
57
|
+
nodeInfo(nodeIndex: number, accumulator?: Utf8Accumulator): TrieBlobNodeInfo;
|
|
43
58
|
/** number of nodes */
|
|
44
59
|
get size(): number;
|
|
45
60
|
private _lookupCharIndexNode;
|
|
@@ -47,15 +62,28 @@ export declare class FastTrieBlob implements TrieData {
|
|
|
47
62
|
private _searchNodeForChar;
|
|
48
63
|
get charIndex(): readonly string[];
|
|
49
64
|
static fromTrieBlob(trie: TrieBlob): FastTrieBlob;
|
|
65
|
+
static isFastTrieBlob(obj: unknown): obj is FastTrieBlob;
|
|
50
66
|
}
|
|
51
|
-
interface
|
|
52
|
-
id: number;
|
|
67
|
+
interface TrieBlobNodeInfo {
|
|
53
68
|
eow: boolean;
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
c: number | string;
|
|
69
|
+
children: {
|
|
70
|
+
c: string;
|
|
57
71
|
i: number;
|
|
72
|
+
cIdx: number;
|
|
58
73
|
}[];
|
|
59
74
|
}
|
|
75
|
+
export declare function nodesToJSON(nodes: Readonly<FastTrieBlobNode[]>): ({
|
|
76
|
+
i: number;
|
|
77
|
+
w: number;
|
|
78
|
+
c?: never;
|
|
79
|
+
} | {
|
|
80
|
+
i: number;
|
|
81
|
+
w: number;
|
|
82
|
+
c: {
|
|
83
|
+
i: number;
|
|
84
|
+
c: string | 0 | undefined;
|
|
85
|
+
s: string;
|
|
86
|
+
}[];
|
|
87
|
+
})[];
|
|
60
88
|
export {};
|
|
61
89
|
//# sourceMappingURL=FastTrieBlob.d.ts.map
|
|
@@ -1,42 +1,47 @@
|
|
|
1
1
|
import { findNode } from '../ITrieNode/trie-util.js';
|
|
2
2
|
import { mergeOptionalWithDefaults } from '../utils/mergeOptionalWithDefaults.js';
|
|
3
3
|
import { extractInfo } from './FastTrieBlobBitMaskInfo.js';
|
|
4
|
-
import { FastTrieBlobInternals } from './FastTrieBlobInternals.js';
|
|
4
|
+
import { assertSorted, FastTrieBlobInternals, sortNodes } from './FastTrieBlobInternals.js';
|
|
5
5
|
import { FastTrieBlobIRoot } from './FastTrieBlobIRoot.js';
|
|
6
|
-
import { NumberSequenceByteDecoderAccumulator } from './NumberSequenceByteDecoderAccumulator.js';
|
|
7
6
|
import { TrieBlob } from './TrieBlob.js';
|
|
7
|
+
import { Utf8Accumulator } from './Utf8.js';
|
|
8
|
+
const useSorted = true;
|
|
9
|
+
const checkSorted = false;
|
|
8
10
|
export class FastTrieBlob {
|
|
9
11
|
nodes;
|
|
10
12
|
_charIndex;
|
|
11
13
|
bitMasksInfo;
|
|
12
|
-
|
|
14
|
+
sorted;
|
|
13
15
|
_readonly = false;
|
|
14
16
|
_forbidIdx;
|
|
15
17
|
_iTrieRoot;
|
|
16
18
|
wordToCharacters;
|
|
17
19
|
info;
|
|
18
|
-
constructor(nodes, _charIndex, bitMasksInfo, options) {
|
|
20
|
+
constructor(nodes, _charIndex, bitMasksInfo, sorted, options) {
|
|
19
21
|
this.nodes = nodes;
|
|
20
22
|
this._charIndex = _charIndex;
|
|
21
23
|
this.bitMasksInfo = bitMasksInfo;
|
|
24
|
+
this.sorted = sorted;
|
|
22
25
|
this.info = mergeOptionalWithDefaults(options);
|
|
23
26
|
this.wordToCharacters = (word) => [...word];
|
|
24
|
-
this._charToIndexMap = createCharToIndexMap(_charIndex);
|
|
25
27
|
this._forbidIdx = this._searchNodeForChar(0, this.info.forbiddenWordPrefix);
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
28
|
+
if (sorted && checkSorted) {
|
|
29
|
+
assertSorted(nodes, bitMasksInfo.NodeMaskChildCharIndex);
|
|
30
|
+
}
|
|
29
31
|
}
|
|
30
32
|
wordToNodeCharIndexSequence(word) {
|
|
31
|
-
return
|
|
33
|
+
return this._charIndex.wordToUtf8Seq(word);
|
|
32
34
|
}
|
|
33
35
|
letterToNodeCharIndexSequence(letter) {
|
|
34
|
-
return
|
|
36
|
+
return this._charIndex.getCharUtf8Seq(letter);
|
|
35
37
|
}
|
|
36
38
|
has(word) {
|
|
37
39
|
return this._has(0, word);
|
|
38
40
|
}
|
|
39
41
|
_has(nodeIdx, word) {
|
|
42
|
+
return this.sorted && useSorted ? this.#hasSorted(nodeIdx, word) : this.#has(nodeIdx, word);
|
|
43
|
+
}
|
|
44
|
+
#has(nodeIdx, word) {
|
|
40
45
|
const NodeMaskChildCharIndex = this.bitMasksInfo.NodeMaskChildCharIndex;
|
|
41
46
|
const NodeChildRefShift = this.bitMasksInfo.NodeChildRefShift;
|
|
42
47
|
const NodeMaskEOW = this.bitMasksInfo.NodeMaskEOW;
|
|
@@ -61,12 +66,47 @@ export class FastTrieBlob {
|
|
|
61
66
|
}
|
|
62
67
|
return !!(node[0] & NodeMaskEOW);
|
|
63
68
|
}
|
|
69
|
+
#hasSorted(nodeIdx, word) {
|
|
70
|
+
const NodeMaskChildCharIndex = this.bitMasksInfo.NodeMaskChildCharIndex;
|
|
71
|
+
const NodeChildRefShift = this.bitMasksInfo.NodeChildRefShift;
|
|
72
|
+
const NodeMaskEOW = this.bitMasksInfo.NodeMaskEOW;
|
|
73
|
+
const nodes = this.nodes;
|
|
74
|
+
const charIndexes = this.wordToNodeCharIndexSequence(word);
|
|
75
|
+
const len = charIndexes.length;
|
|
76
|
+
let node = nodes[nodeIdx];
|
|
77
|
+
for (let p = 0; p < len; ++p, node = nodes[nodeIdx]) {
|
|
78
|
+
const letterIdx = charIndexes[p];
|
|
79
|
+
const count = node.length;
|
|
80
|
+
// console.error('%o', { p, letterIdx, ...this.nodeInfo(nodeIdx) });
|
|
81
|
+
if (count < 2)
|
|
82
|
+
return false;
|
|
83
|
+
let i = 1;
|
|
84
|
+
let j = count - 1;
|
|
85
|
+
let c = -1;
|
|
86
|
+
while (i < j) {
|
|
87
|
+
const m = (i + j) >> 1;
|
|
88
|
+
c = node[m] & NodeMaskChildCharIndex;
|
|
89
|
+
if (c < letterIdx) {
|
|
90
|
+
i = m + 1;
|
|
91
|
+
}
|
|
92
|
+
else {
|
|
93
|
+
j = m;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
if (i >= count || (node[i] & NodeMaskChildCharIndex) !== letterIdx)
|
|
97
|
+
return false;
|
|
98
|
+
nodeIdx = node[i] >>> NodeChildRefShift;
|
|
99
|
+
if (!nodeIdx)
|
|
100
|
+
return false;
|
|
101
|
+
}
|
|
102
|
+
return !!(node[0] & NodeMaskEOW);
|
|
103
|
+
}
|
|
64
104
|
*words() {
|
|
65
105
|
const NodeMaskChildCharIndex = this.bitMasksInfo.NodeMaskChildCharIndex;
|
|
66
106
|
const NodeChildRefShift = this.bitMasksInfo.NodeChildRefShift;
|
|
67
107
|
const NodeMaskEOW = this.bitMasksInfo.NodeMaskEOW;
|
|
68
108
|
const nodes = this.nodes;
|
|
69
|
-
const accumulator =
|
|
109
|
+
const accumulator = Utf8Accumulator.create();
|
|
70
110
|
const stack = [{ nodeIdx: 0, pos: 0, word: '', accumulator }];
|
|
71
111
|
let depth = 0;
|
|
72
112
|
while (depth >= 0) {
|
|
@@ -83,8 +123,8 @@ export class FastTrieBlob {
|
|
|
83
123
|
const entry = node[nextPos];
|
|
84
124
|
const charIdx = entry & NodeMaskChildCharIndex;
|
|
85
125
|
const acc = accumulator.clone();
|
|
86
|
-
const
|
|
87
|
-
const letter = (
|
|
126
|
+
const codePoint = acc.decode(charIdx);
|
|
127
|
+
const letter = (codePoint && String.fromCodePoint(codePoint)) || '';
|
|
88
128
|
++depth;
|
|
89
129
|
stack[depth] = {
|
|
90
130
|
nodeIdx: entry >>> NodeChildRefShift,
|
|
@@ -137,15 +177,15 @@ export class FastTrieBlob {
|
|
|
137
177
|
toJSON() {
|
|
138
178
|
return {
|
|
139
179
|
info: this.info,
|
|
140
|
-
nodes:
|
|
141
|
-
charIndex: this._charIndex,
|
|
180
|
+
nodes: nodesToJSON(this.nodes),
|
|
181
|
+
// charIndex: this._charIndex,
|
|
142
182
|
};
|
|
143
183
|
}
|
|
144
184
|
static create(data, options) {
|
|
145
|
-
return new FastTrieBlob(data.nodes, data.charIndex, extractInfo(data), options);
|
|
185
|
+
return new FastTrieBlob(data.nodes, data.charIndex, extractInfo(data), data.sorted, options);
|
|
146
186
|
}
|
|
147
187
|
static toITrieNodeRoot(trie) {
|
|
148
|
-
return new FastTrieBlobIRoot(new FastTrieBlobInternals(trie.nodes, trie._charIndex, trie.
|
|
188
|
+
return new FastTrieBlobIRoot(new FastTrieBlobInternals(trie.nodes, trie._charIndex, trie.bitMasksInfo), 0, trie.info);
|
|
149
189
|
}
|
|
150
190
|
static NodeMaskEOW = TrieBlob.NodeMaskEOW;
|
|
151
191
|
static NodeChildRefShift = TrieBlob.NodeChildRefShift;
|
|
@@ -170,6 +210,20 @@ export class FastTrieBlob {
|
|
|
170
210
|
hasForbiddenWords() {
|
|
171
211
|
return !!this._forbidIdx;
|
|
172
212
|
}
|
|
213
|
+
nodeInfo(nodeIndex, accumulator) {
|
|
214
|
+
const acc = accumulator ?? Utf8Accumulator.create();
|
|
215
|
+
const n = this.nodes[nodeIndex];
|
|
216
|
+
const eow = !!(n[0] & this.bitMasksInfo.NodeMaskEOW);
|
|
217
|
+
const children = n.slice(1).map((v) => {
|
|
218
|
+
const cIdx = v & this.bitMasksInfo.NodeMaskChildCharIndex;
|
|
219
|
+
const a = acc.clone();
|
|
220
|
+
const codePoint = a.decode(cIdx);
|
|
221
|
+
const c = codePoint !== undefined ? String.fromCodePoint(codePoint) : '∎';
|
|
222
|
+
const i = v >>> this.bitMasksInfo.NodeChildRefShift;
|
|
223
|
+
return { c, i, cIdx };
|
|
224
|
+
});
|
|
225
|
+
return { eow, children };
|
|
226
|
+
}
|
|
173
227
|
/** number of nodes */
|
|
174
228
|
get size() {
|
|
175
229
|
return this.nodes.length;
|
|
@@ -201,7 +255,7 @@ export class FastTrieBlob {
|
|
|
201
255
|
return idx;
|
|
202
256
|
}
|
|
203
257
|
get charIndex() {
|
|
204
|
-
return [...this._charIndex];
|
|
258
|
+
return [...this._charIndex.charIndex];
|
|
205
259
|
}
|
|
206
260
|
static fromTrieBlob(trie) {
|
|
207
261
|
const bitMasksInfo = {
|
|
@@ -236,28 +290,42 @@ export class FastTrieBlob {
|
|
|
236
290
|
node[j] = (idx << TrieBlob.NodeChildRefShift) | charIndex;
|
|
237
291
|
}
|
|
238
292
|
}
|
|
239
|
-
return new FastTrieBlob(nodes, trie.charIndex, bitMasksInfo, trie.info);
|
|
293
|
+
return new FastTrieBlob(sortNodes(nodes, TrieBlob.NodeMaskChildCharIndex), trie.charIndex, bitMasksInfo, true, trie.info);
|
|
240
294
|
}
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
const map = Object.create(null);
|
|
244
|
-
for (let i = 0; i < charIndex.length; ++i) {
|
|
245
|
-
const char = charIndex[i];
|
|
246
|
-
map[char.normalize('NFC')] = i;
|
|
247
|
-
map[char.normalize('NFD')] = i;
|
|
295
|
+
static isFastTrieBlob(obj) {
|
|
296
|
+
return obj instanceof FastTrieBlob;
|
|
248
297
|
}
|
|
249
|
-
return map;
|
|
250
298
|
}
|
|
251
|
-
function
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
299
|
+
export function nodesToJSON(nodes) {
|
|
300
|
+
const mapNodeToAcc = new Map();
|
|
301
|
+
function mapNode(node, i) {
|
|
302
|
+
if (node.length === 1) {
|
|
303
|
+
return {
|
|
304
|
+
i,
|
|
305
|
+
w: (!!(node[0] & TrieBlob.NodeMaskEOW) && 1) || 0,
|
|
306
|
+
};
|
|
307
|
+
}
|
|
308
|
+
const acc = mapNodeToAcc.get(node) || Utf8Accumulator.create();
|
|
309
|
+
function mapChild(n) {
|
|
310
|
+
const index = n >>> TrieBlob.NodeChildRefShift;
|
|
311
|
+
const seq = n & TrieBlob.NodeMaskChildCharIndex;
|
|
312
|
+
const cAcc = acc.clone();
|
|
313
|
+
const codePoint = cAcc.decode(seq);
|
|
314
|
+
if (codePoint === undefined) {
|
|
315
|
+
mapNodeToAcc.set(nodes[index], cAcc);
|
|
316
|
+
}
|
|
317
|
+
return {
|
|
318
|
+
i: index,
|
|
319
|
+
c: codePoint && String.fromCodePoint(codePoint),
|
|
320
|
+
s: seq.toString(16).padStart(2, '0'),
|
|
321
|
+
};
|
|
322
|
+
}
|
|
323
|
+
return {
|
|
324
|
+
i,
|
|
325
|
+
w: (!!(node[0] & TrieBlob.NodeMaskEOW) && 1) || 0,
|
|
326
|
+
c: node.slice(1).map(mapChild),
|
|
327
|
+
};
|
|
259
328
|
}
|
|
260
|
-
|
|
261
|
-
return elements;
|
|
329
|
+
return nodes.map((n, i) => mapNode(n, i));
|
|
262
330
|
}
|
|
263
331
|
//# sourceMappingURL=FastTrieBlob.js.map
|