cspell-trie-lib 8.3.2 → 8.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/lib/Builder/TrieBuilder.d.ts +10 -0
- package/dist/lib/Builder/cursor-util.d.ts +1 -0
- package/dist/lib/Builder/cursor-util.js +18 -3
- package/dist/lib/ITrieNode/find.js +2 -2
- package/dist/lib/TrieBlob/FastTrieBlob.d.ts +26 -4
- package/dist/lib/TrieBlob/FastTrieBlob.js +98 -17
- package/dist/lib/TrieBlob/FastTrieBlobBuilder.d.ts +4 -1
- package/dist/lib/TrieBlob/FastTrieBlobBuilder.js +82 -12
- package/dist/lib/TrieBlob/FastTrieBlobIRoot.d.ts +17 -7
- package/dist/lib/TrieBlob/FastTrieBlobIRoot.js +129 -39
- package/dist/lib/TrieBlob/FastTrieBlobInternals.d.ts +3 -2
- package/dist/lib/TrieBlob/FastTrieBlobInternals.js +3 -0
- package/dist/lib/TrieBlob/NumberSequenceByteDecoderAccumulator.d.ts +34 -0
- package/dist/lib/TrieBlob/NumberSequenceByteDecoderAccumulator.js +120 -0
- package/dist/lib/TrieBlob/TrieBlob.d.ts +57 -4
- package/dist/lib/TrieBlob/TrieBlob.js +119 -20
- package/dist/lib/TrieBlob/TrieBlobIRoot.d.ts +19 -7
- package/dist/lib/TrieBlob/TrieBlobIRoot.js +129 -33
- package/dist/lib/TrieBlob/createTrieBlob.d.ts +1 -1
- package/dist/lib/TrieData.d.ts +2 -0
- package/dist/lib/TrieNode/TrieNodeBuilder.d.ts +1 -0
- package/dist/lib/TrieNode/TrieNodeBuilder.js +1 -0
- package/dist/lib/TrieNode/TrieNodeTrie.d.ts +1 -0
- package/dist/lib/TrieNode/TrieNodeTrie.js +1 -0
- package/dist/lib/utils/text.d.ts +2 -0
- package/dist/lib/utils/text.js +27 -0
- package/package.json +5 -5
|
@@ -2,6 +2,16 @@ import type { PartialTrieOptions, TrieOptions } from '../trie.js';
|
|
|
2
2
|
import type { TrieData } from '../TrieData.js';
|
|
3
3
|
import type { BuilderCursor } from './BuilderCursor.js';
|
|
4
4
|
export interface TrieBuilder<T extends TrieData> {
|
|
5
|
+
/**
|
|
6
|
+
* Use this method to convert a word into an array of characters.
|
|
7
|
+
* Since `[...word]` is not equal to `word.split('')` or `word[i]` in some cases,
|
|
8
|
+
* this method is used to ensure that the characters are split correctly.
|
|
9
|
+
* @see [String.codePointAt](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/codePointAt)
|
|
10
|
+
* @see [String.charCodeAt](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/charCodeAt)
|
|
11
|
+
* @param word - The word to convert into an array of characters.
|
|
12
|
+
* @returns An array of characters, one for each character in the word.
|
|
13
|
+
*/
|
|
14
|
+
wordToCharacters(word: string): string[];
|
|
5
15
|
getCursor(): BuilderCursor;
|
|
6
16
|
build(): T;
|
|
7
17
|
setOptions(options: Readonly<PartialTrieOptions>): Readonly<TrieOptions>;
|
|
@@ -1,3 +1,4 @@
|
|
|
1
1
|
import type { BuilderCursor } from './BuilderCursor.js';
|
|
2
2
|
export declare function insertWordsAtCursor(cursor: BuilderCursor, words: Iterable<string>): void;
|
|
3
|
+
export declare function commonStringPrefixLen(a: string, b: string): number;
|
|
3
4
|
//# sourceMappingURL=cursor-util.d.ts.map
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
export function insertWordsAtCursor(cursor, words) {
|
|
2
2
|
let prevWord = '';
|
|
3
3
|
for (const word of words) {
|
|
4
|
-
const pLen =
|
|
4
|
+
const pLen = commonStrPrefix(prevWord, word);
|
|
5
5
|
const stepBack = prevWord.length - pLen;
|
|
6
6
|
cursor.backStep(stepBack);
|
|
7
|
-
|
|
7
|
+
const wLen = word.length;
|
|
8
|
+
for (let i = pLen; i < wLen; ++i) {
|
|
8
9
|
cursor.insertChar(word[i]);
|
|
9
10
|
}
|
|
10
11
|
cursor.markEOW();
|
|
@@ -12,7 +13,21 @@ export function insertWordsAtCursor(cursor, words) {
|
|
|
12
13
|
}
|
|
13
14
|
cursor.backStep(prevWord.length);
|
|
14
15
|
}
|
|
15
|
-
function
|
|
16
|
+
export function commonStringPrefixLen(a, b) {
|
|
17
|
+
let i = 0;
|
|
18
|
+
for (i = 0; i < a.length && a[i] === b[i]; ++i) {
|
|
19
|
+
/* empty */
|
|
20
|
+
}
|
|
21
|
+
if (i) {
|
|
22
|
+
// detect second half of a surrogate pair and backup.
|
|
23
|
+
const c = a.charCodeAt(i) & 0xffff;
|
|
24
|
+
if (c >= 0xdc00 && c <= 0xdfff) {
|
|
25
|
+
--i;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
return i;
|
|
29
|
+
}
|
|
30
|
+
function commonStrPrefix(a, b) {
|
|
16
31
|
let i = 0;
|
|
17
32
|
for (i = 0; i < a.length && a[i] === b[i]; ++i) {
|
|
18
33
|
/* empty */
|
|
@@ -186,10 +186,10 @@ export function isEndOfWordNode(n) {
|
|
|
186
186
|
return !!n?.eow;
|
|
187
187
|
}
|
|
188
188
|
function walk(root, word) {
|
|
189
|
-
const w = word;
|
|
189
|
+
const w = [...word];
|
|
190
190
|
let n = root;
|
|
191
191
|
let i = 0;
|
|
192
|
-
while (n && i <
|
|
192
|
+
while (n && i < w.length) {
|
|
193
193
|
const h = w[i++];
|
|
194
194
|
n = n.get(h);
|
|
195
195
|
}
|
|
@@ -6,21 +6,29 @@ import { FastTrieBlobInternals } from './FastTrieBlobInternals.js';
|
|
|
6
6
|
import { TrieBlob } from './TrieBlob.js';
|
|
7
7
|
export declare class FastTrieBlob implements TrieData {
|
|
8
8
|
private nodes;
|
|
9
|
-
private
|
|
9
|
+
private _charIndex;
|
|
10
10
|
readonly bitMasksInfo: FastTrieBlobBitMaskInfo;
|
|
11
|
-
private
|
|
11
|
+
private _charToIndexMap;
|
|
12
12
|
private _readonly;
|
|
13
13
|
private _forbidIdx;
|
|
14
14
|
private _iTrieRoot;
|
|
15
|
+
wordToCharacters: (word: string) => readonly string[];
|
|
15
16
|
readonly info: Readonly<TrieInfo>;
|
|
16
17
|
private constructor();
|
|
17
|
-
private
|
|
18
|
+
private _lookUpCharIndex;
|
|
19
|
+
private wordToNodeCharIndexSequence;
|
|
20
|
+
private letterToNodeCharIndexSequence;
|
|
18
21
|
has(word: string): boolean;
|
|
19
22
|
private _has;
|
|
20
23
|
words(): Iterable<string>;
|
|
21
24
|
toTrieBlob(): TrieBlob;
|
|
22
25
|
isReadonly(): boolean;
|
|
23
26
|
freeze(): this;
|
|
27
|
+
toJSON(): {
|
|
28
|
+
info: Readonly<TrieInfo>;
|
|
29
|
+
nodes: NodeElement[];
|
|
30
|
+
charIndex: readonly string[];
|
|
31
|
+
};
|
|
24
32
|
static create(data: FastTrieBlobInternals, options?: PartialTrieInfo): FastTrieBlob;
|
|
25
33
|
static toITrieNodeRoot(trie: FastTrieBlob): ITrieNodeRoot;
|
|
26
34
|
static NodeMaskEOW: number;
|
|
@@ -34,6 +42,20 @@ export declare class FastTrieBlob implements TrieData {
|
|
|
34
42
|
hasForbiddenWords(): boolean;
|
|
35
43
|
/** number of nodes */
|
|
36
44
|
get size(): number;
|
|
37
|
-
private
|
|
45
|
+
private _lookupCharIndexNode;
|
|
46
|
+
/** Search from nodeIdx for the node index representing the character. */
|
|
47
|
+
private _searchNodeForChar;
|
|
48
|
+
get charIndex(): readonly string[];
|
|
49
|
+
static fromTrieBlob(trie: TrieBlob): FastTrieBlob;
|
|
38
50
|
}
|
|
51
|
+
interface NodeElement {
|
|
52
|
+
id: number;
|
|
53
|
+
eow: boolean;
|
|
54
|
+
n: number;
|
|
55
|
+
c: {
|
|
56
|
+
c: number | string;
|
|
57
|
+
i: number;
|
|
58
|
+
}[];
|
|
59
|
+
}
|
|
60
|
+
export {};
|
|
39
61
|
//# sourceMappingURL=FastTrieBlob.d.ts.map
|
|
@@ -3,26 +3,35 @@ import { mergeOptionalWithDefaults } from '../utils/mergeOptionalWithDefaults.js
|
|
|
3
3
|
import { extractInfo } from './FastTrieBlobBitMaskInfo.js';
|
|
4
4
|
import { FastTrieBlobInternals } from './FastTrieBlobInternals.js';
|
|
5
5
|
import { FastTrieBlobIRoot } from './FastTrieBlobIRoot.js';
|
|
6
|
+
import { NumberSequenceByteDecoderAccumulator } from './NumberSequenceByteDecoderAccumulator.js';
|
|
6
7
|
import { TrieBlob } from './TrieBlob.js';
|
|
7
8
|
export class FastTrieBlob {
|
|
8
9
|
nodes;
|
|
9
|
-
|
|
10
|
+
_charIndex;
|
|
10
11
|
bitMasksInfo;
|
|
11
|
-
|
|
12
|
+
_charToIndexMap;
|
|
12
13
|
_readonly = false;
|
|
13
14
|
_forbidIdx;
|
|
14
15
|
_iTrieRoot;
|
|
16
|
+
wordToCharacters;
|
|
15
17
|
info;
|
|
16
|
-
constructor(nodes,
|
|
18
|
+
constructor(nodes, _charIndex, bitMasksInfo, options) {
|
|
17
19
|
this.nodes = nodes;
|
|
18
|
-
this.
|
|
20
|
+
this._charIndex = _charIndex;
|
|
19
21
|
this.bitMasksInfo = bitMasksInfo;
|
|
20
22
|
this.info = mergeOptionalWithDefaults(options);
|
|
21
|
-
this.
|
|
22
|
-
this.
|
|
23
|
+
this.wordToCharacters = (word) => [...word];
|
|
24
|
+
this._charToIndexMap = createCharToIndexMap(_charIndex);
|
|
25
|
+
this._forbidIdx = this._searchNodeForChar(0, this.info.forbiddenWordPrefix);
|
|
23
26
|
}
|
|
24
|
-
|
|
25
|
-
return this.
|
|
27
|
+
_lookUpCharIndex(char) {
|
|
28
|
+
return this._charToIndexMap[char] ?? -1;
|
|
29
|
+
}
|
|
30
|
+
wordToNodeCharIndexSequence(word) {
|
|
31
|
+
return TrieBlob.charactersToCharIndexSequence(this.wordToCharacters(word), (c) => this._lookUpCharIndex(c));
|
|
32
|
+
}
|
|
33
|
+
letterToNodeCharIndexSequence(letter) {
|
|
34
|
+
return TrieBlob.toCharIndexSequence(this._lookUpCharIndex(letter));
|
|
26
35
|
}
|
|
27
36
|
has(word) {
|
|
28
37
|
return this._has(0, word);
|
|
@@ -32,10 +41,11 @@ export class FastTrieBlob {
|
|
|
32
41
|
const NodeChildRefShift = this.bitMasksInfo.NodeChildRefShift;
|
|
33
42
|
const NodeMaskEOW = this.bitMasksInfo.NodeMaskEOW;
|
|
34
43
|
const nodes = this.nodes;
|
|
35
|
-
const
|
|
44
|
+
const charIndexes = this.wordToNodeCharIndexSequence(word);
|
|
45
|
+
const len = charIndexes.length;
|
|
36
46
|
let node = nodes[nodeIdx];
|
|
37
47
|
for (let p = 0; p < len; ++p, node = nodes[nodeIdx]) {
|
|
38
|
-
const letterIdx =
|
|
48
|
+
const letterIdx = charIndexes[p];
|
|
39
49
|
const count = node.length;
|
|
40
50
|
let i = count - 1;
|
|
41
51
|
for (; i > 0; --i) {
|
|
@@ -56,10 +66,11 @@ export class FastTrieBlob {
|
|
|
56
66
|
const NodeChildRefShift = this.bitMasksInfo.NodeChildRefShift;
|
|
57
67
|
const NodeMaskEOW = this.bitMasksInfo.NodeMaskEOW;
|
|
58
68
|
const nodes = this.nodes;
|
|
59
|
-
const
|
|
69
|
+
const accumulator = NumberSequenceByteDecoderAccumulator.create();
|
|
70
|
+
const stack = [{ nodeIdx: 0, pos: 0, word: '', accumulator }];
|
|
60
71
|
let depth = 0;
|
|
61
72
|
while (depth >= 0) {
|
|
62
|
-
const { nodeIdx, pos, word } = stack[depth];
|
|
73
|
+
const { nodeIdx, pos, word, accumulator } = stack[depth];
|
|
63
74
|
const node = nodes[nodeIdx];
|
|
64
75
|
if (!pos && node[0] & NodeMaskEOW) {
|
|
65
76
|
yield word;
|
|
@@ -71,12 +82,15 @@ export class FastTrieBlob {
|
|
|
71
82
|
const nextPos = ++stack[depth].pos;
|
|
72
83
|
const entry = node[nextPos];
|
|
73
84
|
const charIdx = entry & NodeMaskChildCharIndex;
|
|
74
|
-
const
|
|
85
|
+
const acc = accumulator.clone();
|
|
86
|
+
const letterIdx = acc.decode(charIdx);
|
|
87
|
+
const letter = (letterIdx && this._charIndex[letterIdx]) || '';
|
|
75
88
|
++depth;
|
|
76
89
|
stack[depth] = {
|
|
77
90
|
nodeIdx: entry >>> NodeChildRefShift,
|
|
78
91
|
pos: 0,
|
|
79
92
|
word: word + letter,
|
|
93
|
+
accumulator: acc,
|
|
80
94
|
};
|
|
81
95
|
}
|
|
82
96
|
}
|
|
@@ -111,7 +125,7 @@ export class FastTrieBlob {
|
|
|
111
125
|
binNodes[offset++] = (nodeToIndex[nodeRef] << refShift) | charIndex;
|
|
112
126
|
}
|
|
113
127
|
}
|
|
114
|
-
return new TrieBlob(binNodes, this.
|
|
128
|
+
return new TrieBlob(binNodes, this._charIndex, this.info);
|
|
115
129
|
}
|
|
116
130
|
isReadonly() {
|
|
117
131
|
return this._readonly;
|
|
@@ -120,11 +134,18 @@ export class FastTrieBlob {
|
|
|
120
134
|
this._readonly = true;
|
|
121
135
|
return this;
|
|
122
136
|
}
|
|
137
|
+
toJSON() {
|
|
138
|
+
return {
|
|
139
|
+
info: this.info,
|
|
140
|
+
nodes: nodesToJson(this.nodes),
|
|
141
|
+
charIndex: this._charIndex,
|
|
142
|
+
};
|
|
143
|
+
}
|
|
123
144
|
static create(data, options) {
|
|
124
145
|
return new FastTrieBlob(data.nodes, data.charIndex, extractInfo(data), options);
|
|
125
146
|
}
|
|
126
147
|
static toITrieNodeRoot(trie) {
|
|
127
|
-
return new FastTrieBlobIRoot(new FastTrieBlobInternals(trie.nodes, trie.
|
|
148
|
+
return new FastTrieBlobIRoot(new FastTrieBlobInternals(trie.nodes, trie._charIndex, trie._charToIndexMap, trie.bitMasksInfo), 0, trie.info);
|
|
128
149
|
}
|
|
129
150
|
static NodeMaskEOW = TrieBlob.NodeMaskEOW;
|
|
130
151
|
static NodeChildRefShift = TrieBlob.NodeChildRefShift;
|
|
@@ -153,12 +174,12 @@ export class FastTrieBlob {
|
|
|
153
174
|
get size() {
|
|
154
175
|
return this.nodes.length;
|
|
155
176
|
}
|
|
156
|
-
|
|
177
|
+
_lookupCharIndexNode(nodeIdx, charIndex) {
|
|
157
178
|
const NodeMaskChildCharIndex = this.bitMasksInfo.NodeMaskChildCharIndex;
|
|
158
179
|
const NodeChildRefShift = this.bitMasksInfo.NodeChildRefShift;
|
|
159
180
|
const nodes = this.nodes;
|
|
160
181
|
const node = nodes[nodeIdx];
|
|
161
|
-
const letterIdx =
|
|
182
|
+
const letterIdx = charIndex;
|
|
162
183
|
const count = node.length;
|
|
163
184
|
let i = count - 1;
|
|
164
185
|
for (; i > 0; --i) {
|
|
@@ -168,6 +189,54 @@ export class FastTrieBlob {
|
|
|
168
189
|
}
|
|
169
190
|
return 0;
|
|
170
191
|
}
|
|
192
|
+
/** Search from nodeIdx for the node index representing the character. */
|
|
193
|
+
_searchNodeForChar(nodeIdx, char) {
|
|
194
|
+
const charIndexes = this.letterToNodeCharIndexSequence(char);
|
|
195
|
+
let idx = nodeIdx;
|
|
196
|
+
for (let i = 0; i < charIndexes.length; ++i) {
|
|
197
|
+
idx = this._lookupCharIndexNode(idx, charIndexes[i]);
|
|
198
|
+
if (!idx)
|
|
199
|
+
return 0;
|
|
200
|
+
}
|
|
201
|
+
return idx;
|
|
202
|
+
}
|
|
203
|
+
get charIndex() {
|
|
204
|
+
return [...this._charIndex];
|
|
205
|
+
}
|
|
206
|
+
static fromTrieBlob(trie) {
|
|
207
|
+
const bitMasksInfo = {
|
|
208
|
+
NodeMaskEOW: TrieBlob.NodeMaskEOW,
|
|
209
|
+
NodeMaskChildCharIndex: TrieBlob.NodeMaskChildCharIndex,
|
|
210
|
+
NodeChildRefShift: TrieBlob.NodeChildRefShift,
|
|
211
|
+
};
|
|
212
|
+
const trieNodesBin = TrieBlob.nodesView(trie);
|
|
213
|
+
const nodeOffsets = [];
|
|
214
|
+
for (let offset = 0; offset < trieNodesBin.length; offset += (trieNodesBin[offset] & TrieBlob.NodeMaskNumChildren) + 1) {
|
|
215
|
+
nodeOffsets.push(offset);
|
|
216
|
+
}
|
|
217
|
+
const offsetToNodeIndex = new Map(nodeOffsets.map((offset, i) => [offset, i]));
|
|
218
|
+
const nodes = new Array(nodeOffsets.length);
|
|
219
|
+
for (let i = 0; i < nodes.length; ++i) {
|
|
220
|
+
const offset = nodeOffsets[i];
|
|
221
|
+
const n = trieNodesBin[offset];
|
|
222
|
+
const eow = n & TrieBlob.NodeMaskEOW;
|
|
223
|
+
const count = n & TrieBlob.NodeMaskNumChildren;
|
|
224
|
+
const node = new Array(count + 1);
|
|
225
|
+
node[0] = eow;
|
|
226
|
+
nodes[i] = node;
|
|
227
|
+
for (let j = 1; j <= count; ++j) {
|
|
228
|
+
const n = trieNodesBin[offset + j];
|
|
229
|
+
const charIndex = n & TrieBlob.NodeMaskChildCharIndex;
|
|
230
|
+
const nodeIndex = n >>> TrieBlob.NodeChildRefShift;
|
|
231
|
+
const idx = offsetToNodeIndex.get(nodeIndex);
|
|
232
|
+
if (idx === undefined) {
|
|
233
|
+
throw new Error(`Invalid node index ${nodeIndex}`);
|
|
234
|
+
}
|
|
235
|
+
node[j] = (idx << TrieBlob.NodeChildRefShift) | charIndex;
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
return new FastTrieBlob(nodes, trie.charIndex, bitMasksInfo, trie.info);
|
|
239
|
+
}
|
|
171
240
|
}
|
|
172
241
|
function createCharToIndexMap(charIndex) {
|
|
173
242
|
const map = Object.create(null);
|
|
@@ -178,4 +247,16 @@ function createCharToIndexMap(charIndex) {
|
|
|
178
247
|
}
|
|
179
248
|
return map;
|
|
180
249
|
}
|
|
250
|
+
function nodesToJson(nodes) {
|
|
251
|
+
function nodeElement(node, index) {
|
|
252
|
+
const eow = !!(node[0] & TrieBlob.NodeMaskEOW);
|
|
253
|
+
const children = node.slice(1).map((n) => ({
|
|
254
|
+
c: ('00' + (n & TrieBlob.NodeMaskChildCharIndex).toString(16)).slice(-2),
|
|
255
|
+
i: n >>> TrieBlob.NodeChildRefShift,
|
|
256
|
+
}));
|
|
257
|
+
return { id: index, eow, n: node.length, c: children };
|
|
258
|
+
}
|
|
259
|
+
const elements = nodes.map((n, i) => nodeElement(n, i));
|
|
260
|
+
return elements;
|
|
261
|
+
}
|
|
181
262
|
//# sourceMappingURL=FastTrieBlob.js.map
|
|
@@ -11,11 +11,14 @@ export declare class FastTrieBlobBuilder implements TrieBuilder<FastTrieBlob> {
|
|
|
11
11
|
private IdxEOW;
|
|
12
12
|
private _cursor;
|
|
13
13
|
private _options;
|
|
14
|
+
wordToCharacters: (word: string) => string[];
|
|
14
15
|
readonly bitMasksInfo: FastTrieBlobBitMaskInfo;
|
|
15
16
|
constructor(options?: PartialTrieInfo, bitMasksInfo?: FastTrieBlobBitMaskInfo);
|
|
16
17
|
setOptions(options: PartialTrieInfo): Readonly<TrieInfo>;
|
|
17
18
|
get options(): Readonly<TrieInfo>;
|
|
18
19
|
private getCharIndex;
|
|
20
|
+
private wordToNodeCharIndexSequence;
|
|
21
|
+
private letterToNodeCharIndexSequence;
|
|
19
22
|
insert(word: string | Iterable<string> | string[]): this;
|
|
20
23
|
getCursor(): BuilderCursor;
|
|
21
24
|
private createCursor;
|
|
@@ -24,7 +27,7 @@ export declare class FastTrieBlobBuilder implements TrieBuilder<FastTrieBlob> {
|
|
|
24
27
|
isReadonly(): boolean;
|
|
25
28
|
freeze(): this;
|
|
26
29
|
build(): FastTrieBlob;
|
|
27
|
-
static fromWordList(words: string[] | Iterable<string>, options?: PartialTrieInfo): FastTrieBlob;
|
|
30
|
+
static fromWordList(words: readonly string[] | Iterable<string>, options?: PartialTrieInfo): FastTrieBlob;
|
|
28
31
|
static fromTrieRoot(root: TrieRoot): FastTrieBlob;
|
|
29
32
|
static NodeMaskEOW: number;
|
|
30
33
|
static NodeChildRefShift: number;
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { assert } from '../utils/assert.js';
|
|
2
2
|
import { mergeOptionalWithDefaults } from '../utils/mergeOptionalWithDefaults.js';
|
|
3
|
+
import { assertValidUtf16Character } from '../utils/text.js';
|
|
3
4
|
import { FastTrieBlob } from './FastTrieBlob.js';
|
|
4
5
|
import { FastTrieBlobInternals } from './FastTrieBlobInternals.js';
|
|
5
6
|
import { resolveMap } from './resolveMap.js';
|
|
@@ -12,6 +13,7 @@ export class FastTrieBlobBuilder {
|
|
|
12
13
|
IdxEOW;
|
|
13
14
|
_cursor;
|
|
14
15
|
_options;
|
|
16
|
+
wordToCharacters = (word) => [...word];
|
|
15
17
|
bitMasksInfo;
|
|
16
18
|
constructor(options, bitMasksInfo = FastTrieBlobBuilder.DefaultBitMaskInfo) {
|
|
17
19
|
this._options = mergeOptionalWithDefaults(options);
|
|
@@ -37,6 +39,12 @@ export class FastTrieBlobBuilder {
|
|
|
37
39
|
this.charToIndexMap[charNFD] = idx;
|
|
38
40
|
return idx;
|
|
39
41
|
}
|
|
42
|
+
wordToNodeCharIndexSequence(word) {
|
|
43
|
+
return TrieBlob.charactersToCharIndexSequence(this.wordToCharacters(word), (c) => this.getCharIndex(c));
|
|
44
|
+
}
|
|
45
|
+
letterToNodeCharIndexSequence(letter) {
|
|
46
|
+
return TrieBlob.toCharIndexSequence(this.getCharIndex(letter));
|
|
47
|
+
}
|
|
40
48
|
insert(word) {
|
|
41
49
|
if (this.isReadonly()) {
|
|
42
50
|
throw new Error('FastTrieBlob is readonly');
|
|
@@ -76,10 +84,43 @@ export class FastTrieBlobBuilder {
|
|
|
76
84
|
const eow = 1;
|
|
77
85
|
const eowShifted = eow << NodeChildRefShift;
|
|
78
86
|
const nodes = this.nodes;
|
|
79
|
-
const stack = [{ nodeIdx: 0, pos: 0 }];
|
|
87
|
+
const stack = [{ nodeIdx: 0, pos: 0, dCount: 1, ps: '' }];
|
|
80
88
|
let nodeIdx = 0;
|
|
81
89
|
let depth = 0;
|
|
82
90
|
const insertChar = (char) => {
|
|
91
|
+
const cc = char.charCodeAt(0) & 0xdc00;
|
|
92
|
+
// Work with partial surrogate pairs.
|
|
93
|
+
if (cc === 0xd800 && char.length == 1) {
|
|
94
|
+
// We have a high surrogate
|
|
95
|
+
const s = stack[depth];
|
|
96
|
+
const ns = stack[++depth];
|
|
97
|
+
if (ns) {
|
|
98
|
+
ns.nodeIdx = s.nodeIdx;
|
|
99
|
+
ns.pos = s.pos;
|
|
100
|
+
ns.dCount = 1;
|
|
101
|
+
ns.ps = char;
|
|
102
|
+
}
|
|
103
|
+
else {
|
|
104
|
+
stack[depth] = { nodeIdx: s.nodeIdx, pos: s.pos, dCount: 1, ps: char };
|
|
105
|
+
}
|
|
106
|
+
return;
|
|
107
|
+
}
|
|
108
|
+
if (stack[depth].ps) {
|
|
109
|
+
char = stack[depth].ps + char;
|
|
110
|
+
assertValidUtf16Character(char);
|
|
111
|
+
}
|
|
112
|
+
const indexSeq = this.letterToNodeCharIndexSequence(char);
|
|
113
|
+
for (let i = 0; i < indexSeq.length; ++i) {
|
|
114
|
+
insertCharIndexes(indexSeq[i], i + 1);
|
|
115
|
+
}
|
|
116
|
+
};
|
|
117
|
+
/**
|
|
118
|
+
* A single character can result in multiple nodes being created
|
|
119
|
+
* because it takes multiple bytes to represent a character.
|
|
120
|
+
* @param charIndex - partial character index.
|
|
121
|
+
* @param char - the source character
|
|
122
|
+
*/
|
|
123
|
+
const insertCharIndexes = (charIndex, dCount) => {
|
|
83
124
|
// console.warn('i %o at %o', char, nodeIdx);
|
|
84
125
|
if (nodes[nodeIdx] && Object.isFrozen(nodes[nodeIdx])) {
|
|
85
126
|
nodeIdx = nodes.push([...nodes[nodeIdx]]) - 1;
|
|
@@ -91,7 +132,7 @@ export class FastTrieBlobBuilder {
|
|
|
91
132
|
}
|
|
92
133
|
const node = nodes[nodeIdx] ?? [0];
|
|
93
134
|
nodes[nodeIdx] = node;
|
|
94
|
-
const letterIdx =
|
|
135
|
+
const letterIdx = charIndex;
|
|
95
136
|
const hasIdx = childPos(node, letterIdx);
|
|
96
137
|
const childIdx = hasIdx ? node[hasIdx] >>> NodeChildRefShift : nodes.length;
|
|
97
138
|
const pos = hasIdx || node.push((childIdx << NodeChildRefShift) | letterIdx) - 1;
|
|
@@ -100,9 +141,11 @@ export class FastTrieBlobBuilder {
|
|
|
100
141
|
if (s) {
|
|
101
142
|
s.nodeIdx = nodeIdx;
|
|
102
143
|
s.pos = pos;
|
|
144
|
+
s.dCount = dCount;
|
|
145
|
+
s.ps = '';
|
|
103
146
|
}
|
|
104
147
|
else {
|
|
105
|
-
stack[depth] = { nodeIdx, pos };
|
|
148
|
+
stack[depth] = { nodeIdx, pos, dCount, ps: '' };
|
|
106
149
|
}
|
|
107
150
|
nodeIdx = childIdx;
|
|
108
151
|
};
|
|
@@ -141,7 +184,9 @@ export class FastTrieBlobBuilder {
|
|
|
141
184
|
return;
|
|
142
185
|
// console.warn('<< %o', num);
|
|
143
186
|
assert(num <= depth && num > 0);
|
|
144
|
-
|
|
187
|
+
for (; num > 0; --num) {
|
|
188
|
+
depth -= stack[depth].dCount;
|
|
189
|
+
}
|
|
145
190
|
nodeIdx = stack[depth + 1].nodeIdx;
|
|
146
191
|
};
|
|
147
192
|
const c = {
|
|
@@ -161,10 +206,11 @@ export class FastTrieBlobBuilder {
|
|
|
161
206
|
const NodeMaskEOW = this.bitMasksInfo.NodeMaskEOW;
|
|
162
207
|
const IdxEOW = this.IdxEOW;
|
|
163
208
|
const nodes = this.nodes;
|
|
164
|
-
const
|
|
209
|
+
const charIndexes = this.wordToNodeCharIndexSequence(word);
|
|
210
|
+
const len = charIndexes.length;
|
|
165
211
|
let nodeIdx = 0;
|
|
166
212
|
for (let p = 0; p < len; ++p) {
|
|
167
|
-
const letterIdx =
|
|
213
|
+
const letterIdx = charIndexes[p];
|
|
168
214
|
const node = nodes[nodeIdx];
|
|
169
215
|
const count = node.length;
|
|
170
216
|
let i = count - 1;
|
|
@@ -197,11 +243,12 @@ export class FastTrieBlobBuilder {
|
|
|
197
243
|
const NodeChildRefShift = this.bitMasksInfo.NodeChildRefShift;
|
|
198
244
|
const NodeMaskEOW = this.bitMasksInfo.NodeMaskEOW;
|
|
199
245
|
const nodes = this.nodes;
|
|
200
|
-
const
|
|
246
|
+
const charIndexes = this.wordToNodeCharIndexSequence(word);
|
|
247
|
+
const len = charIndexes.length;
|
|
201
248
|
let nodeIdx = 0;
|
|
202
249
|
let node = nodes[nodeIdx];
|
|
203
250
|
for (let p = 0; p < len; ++p, node = nodes[nodeIdx]) {
|
|
204
|
-
const letterIdx =
|
|
251
|
+
const letterIdx = charIndexes[p];
|
|
205
252
|
const count = node.length;
|
|
206
253
|
let i = count - 1;
|
|
207
254
|
for (; i > 0; --i) {
|
|
@@ -234,6 +281,7 @@ export class FastTrieBlobBuilder {
|
|
|
234
281
|
static fromTrieRoot(root) {
|
|
235
282
|
const bitMasksInfo = FastTrieBlobBuilder.DefaultBitMaskInfo;
|
|
236
283
|
const NodeChildRefShift = bitMasksInfo.NodeChildRefShift;
|
|
284
|
+
const NodeCharIndexMask = bitMasksInfo.NodeMaskChildCharIndex;
|
|
237
285
|
const NodeMaskEOW = bitMasksInfo.NodeMaskEOW;
|
|
238
286
|
const tf = new FastTrieBlobBuilder(undefined, bitMasksInfo);
|
|
239
287
|
const IdxEOW = tf.IdxEOW;
|
|
@@ -253,15 +301,37 @@ export class FastTrieBlobBuilder {
|
|
|
253
301
|
if (!n.c)
|
|
254
302
|
return nodeIdx;
|
|
255
303
|
const children = Object.entries(n.c);
|
|
256
|
-
node.length = children.length + 1;
|
|
257
304
|
for (let p = 0; p < children.length; ++p) {
|
|
258
305
|
const [char, childNode] = children[p];
|
|
259
|
-
|
|
260
|
-
const childIdx = walk(childNode);
|
|
261
|
-
node[p + 1] = (childIdx << NodeChildRefShift) | letterIdx;
|
|
306
|
+
addCharToNode(node, char, childNode);
|
|
262
307
|
}
|
|
263
308
|
return nodeIdx;
|
|
264
309
|
}
|
|
310
|
+
function resolveChild(node, charIndex) {
|
|
311
|
+
let i = 1;
|
|
312
|
+
for (i = 1; i < node.length && (node[i] & NodeCharIndexMask) !== charIndex; ++i) {
|
|
313
|
+
// empty
|
|
314
|
+
}
|
|
315
|
+
return i;
|
|
316
|
+
}
|
|
317
|
+
function addCharToNode(node, char, n) {
|
|
318
|
+
const indexSeq = tf.letterToNodeCharIndexSequence(char);
|
|
319
|
+
for (const idx of indexSeq.slice(0, -1)) {
|
|
320
|
+
const pos = resolveChild(node, idx);
|
|
321
|
+
if (pos < node.length) {
|
|
322
|
+
node = tf.nodes[node[pos] >>> NodeChildRefShift];
|
|
323
|
+
}
|
|
324
|
+
else {
|
|
325
|
+
const next = [0];
|
|
326
|
+
const nodeIdx = tf.nodes.push(next) - 1;
|
|
327
|
+
node[pos] = (nodeIdx << NodeChildRefShift) | idx;
|
|
328
|
+
node = next;
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
const letterIdx = indexSeq[indexSeq.length - 1];
|
|
332
|
+
const i = node.push(letterIdx) - 1;
|
|
333
|
+
node[i] = (walk(n) << NodeChildRefShift) | letterIdx;
|
|
334
|
+
}
|
|
265
335
|
walk(root);
|
|
266
336
|
return tf.build();
|
|
267
337
|
}
|
|
@@ -1,19 +1,25 @@
|
|
|
1
1
|
import type { ITrieNode, ITrieNodeId, ITrieNodeRoot } from '../ITrieNode/ITrieNode.js';
|
|
2
2
|
import type { TrieInfo } from '../ITrieNode/TrieInfo.js';
|
|
3
3
|
import type { FastTrieBlobInternals } from './FastTrieBlobInternals.js';
|
|
4
|
+
type Node = readonly number[];
|
|
5
|
+
type NodeIndex = number;
|
|
4
6
|
declare class FastTrieBlobINode implements ITrieNode {
|
|
5
7
|
readonly trie: FastTrieBlobInternals;
|
|
6
|
-
readonly nodeIdx:
|
|
8
|
+
readonly nodeIdx: NodeIndex;
|
|
7
9
|
readonly id: number;
|
|
8
|
-
readonly
|
|
9
|
-
readonly node: number[];
|
|
10
|
+
readonly node: Node;
|
|
10
11
|
readonly eow: boolean;
|
|
11
|
-
charToIdx: Record<string, number> | undefined;
|
|
12
12
|
private _keys;
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
private _count;
|
|
14
|
+
private _size;
|
|
15
|
+
private _chained;
|
|
16
|
+
private _nodesEntries;
|
|
17
|
+
private _entries;
|
|
18
|
+
private _values;
|
|
19
|
+
protected charToIdx: Readonly<Record<string, NodeIndex>> | undefined;
|
|
20
|
+
constructor(trie: FastTrieBlobInternals, nodeIdx: NodeIndex);
|
|
15
21
|
/** get keys to children */
|
|
16
|
-
|
|
22
|
+
keys(): readonly string[];
|
|
17
23
|
values(): readonly ITrieNode[];
|
|
18
24
|
entries(): readonly (readonly [string, ITrieNode])[];
|
|
19
25
|
/** get child ITrieNode */
|
|
@@ -22,6 +28,10 @@ declare class FastTrieBlobINode implements ITrieNode {
|
|
|
22
28
|
hasChildren(): boolean;
|
|
23
29
|
child(keyIdx: number): ITrieNode;
|
|
24
30
|
getCharToIdxMap(): Record<string, number>;
|
|
31
|
+
private containsChainedIndexes;
|
|
32
|
+
private getNodesEntries;
|
|
33
|
+
private walkChainedIndexes;
|
|
34
|
+
get size(): number;
|
|
25
35
|
}
|
|
26
36
|
export declare class FastTrieBlobIRoot extends FastTrieBlobINode implements ITrieNodeRoot {
|
|
27
37
|
readonly info: Readonly<TrieInfo>;
|