cspell-trie-lib 8.3.2 → 8.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/lib/Builder/TrieBuilder.d.ts +10 -0
- package/dist/lib/Builder/cursor-util.d.ts +1 -0
- package/dist/lib/Builder/cursor-util.js +18 -3
- package/dist/lib/ITrieNode/find.js +2 -2
- package/dist/lib/TrieBlob/FastTrieBlob.d.ts +26 -4
- package/dist/lib/TrieBlob/FastTrieBlob.js +98 -17
- package/dist/lib/TrieBlob/FastTrieBlobBuilder.d.ts +4 -1
- package/dist/lib/TrieBlob/FastTrieBlobBuilder.js +82 -12
- package/dist/lib/TrieBlob/FastTrieBlobIRoot.d.ts +17 -7
- package/dist/lib/TrieBlob/FastTrieBlobIRoot.js +129 -39
- package/dist/lib/TrieBlob/FastTrieBlobInternals.d.ts +3 -2
- package/dist/lib/TrieBlob/FastTrieBlobInternals.js +3 -0
- package/dist/lib/TrieBlob/NumberSequenceByteDecoderAccumulator.d.ts +34 -0
- package/dist/lib/TrieBlob/NumberSequenceByteDecoderAccumulator.js +120 -0
- package/dist/lib/TrieBlob/TrieBlob.d.ts +57 -4
- package/dist/lib/TrieBlob/TrieBlob.js +119 -20
- package/dist/lib/TrieBlob/TrieBlobIRoot.d.ts +19 -7
- package/dist/lib/TrieBlob/TrieBlobIRoot.js +129 -33
- package/dist/lib/TrieBlob/createTrieBlob.d.ts +1 -1
- package/dist/lib/TrieData.d.ts +2 -0
- package/dist/lib/TrieNode/TrieNodeBuilder.d.ts +1 -0
- package/dist/lib/TrieNode/TrieNodeBuilder.js +1 -0
- package/dist/lib/TrieNode/TrieNodeTrie.d.ts +1 -0
- package/dist/lib/TrieNode/TrieNodeTrie.js +1 -0
- package/dist/lib/utils/text.d.ts +2 -0
- package/dist/lib/utils/text.js +27 -0
- package/package.json +5 -5
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { defaultTrieInfo } from '../constants.js';
|
|
2
2
|
import { findNode } from '../ITrieNode/trie-util.js';
|
|
3
3
|
import { mergeOptionalWithDefaults } from '../utils/mergeOptionalWithDefaults.js';
|
|
4
|
+
import { NumberSequenceByteDecoderAccumulator, NumberSequenceByteEncoderDecoder, } from './NumberSequenceByteDecoderAccumulator.js';
|
|
4
5
|
import { TrieBlobInternals, TrieBlobIRoot } from './TrieBlobIRoot.js';
|
|
5
6
|
const NodeHeaderNumChildrenBits = 8;
|
|
6
7
|
const NodeHeaderNumChildrenShift = 0;
|
|
@@ -35,11 +36,14 @@ export class TrieBlob {
|
|
|
35
36
|
_forbidIdx;
|
|
36
37
|
_size;
|
|
37
38
|
_iTrieRoot;
|
|
39
|
+
wordToCharacters;
|
|
38
40
|
constructor(nodes, charIndex, info) {
|
|
39
41
|
this.nodes = nodes;
|
|
40
42
|
this.charIndex = charIndex;
|
|
41
43
|
this.info = mergeOptionalWithDefaults(info);
|
|
44
|
+
this.wordToCharacters = (word) => [...word];
|
|
42
45
|
this.charToIndexMap = Object.create(null);
|
|
46
|
+
Object.freeze(this.charIndex);
|
|
43
47
|
for (let i = 0; i < charIndex.length; ++i) {
|
|
44
48
|
const char = charIndex[i];
|
|
45
49
|
this.charToIndexMap[char.normalize('NFC')] = i;
|
|
@@ -47,6 +51,15 @@ export class TrieBlob {
|
|
|
47
51
|
}
|
|
48
52
|
this._forbidIdx = this._lookupNode(0, this.info.forbiddenWordPrefix);
|
|
49
53
|
}
|
|
54
|
+
_lookUpCharIndex = (char) => {
|
|
55
|
+
return this.charToIndexMap[char] || 0;
|
|
56
|
+
};
|
|
57
|
+
wordToNodeCharIndexSequence(word) {
|
|
58
|
+
return TrieBlob.charactersToCharIndexSequence(this.wordToCharacters(word), this._lookUpCharIndex);
|
|
59
|
+
}
|
|
60
|
+
letterToNodeCharIndexSequence(letter) {
|
|
61
|
+
return TrieBlob.toCharIndexSequence(this._lookUpCharIndex(letter));
|
|
62
|
+
}
|
|
50
63
|
has(word) {
|
|
51
64
|
return this._has(0, word);
|
|
52
65
|
}
|
|
@@ -76,11 +89,11 @@ export class TrieBlob {
|
|
|
76
89
|
const NodeMaskChildCharIndex = TrieBlob.NodeMaskChildCharIndex;
|
|
77
90
|
const NodeChildRefShift = TrieBlob.NodeChildRefShift;
|
|
78
91
|
const nodes = this.nodes;
|
|
79
|
-
const
|
|
80
|
-
const
|
|
92
|
+
const wordIndexes = this.wordToNodeCharIndexSequence(word);
|
|
93
|
+
const len = wordIndexes.length;
|
|
81
94
|
let node = nodes[nodeIdx];
|
|
82
95
|
for (let p = 0; p < len; ++p, node = nodes[nodeIdx]) {
|
|
83
|
-
const letterIdx =
|
|
96
|
+
const letterIdx = wordIndexes[p];
|
|
84
97
|
const count = node & NodeMaskNumChildren;
|
|
85
98
|
let i = count;
|
|
86
99
|
for (; i > 0; --i) {
|
|
@@ -94,14 +107,39 @@ export class TrieBlob {
|
|
|
94
107
|
}
|
|
95
108
|
return (node & TrieBlob.NodeMaskEOW) === TrieBlob.NodeMaskEOW;
|
|
96
109
|
}
|
|
110
|
+
/**
|
|
111
|
+
* Find the node index for the given character.
|
|
112
|
+
* @param nodeIdx - node index to start the search
|
|
113
|
+
* @param char - character to look for
|
|
114
|
+
* @returns
|
|
115
|
+
*/
|
|
97
116
|
_lookupNode(nodeIdx, char) {
|
|
117
|
+
const indexSeq = this.letterToNodeCharIndexSequence(char);
|
|
118
|
+
const len = indexSeq.length;
|
|
119
|
+
if (!len)
|
|
120
|
+
return undefined;
|
|
121
|
+
let currNodeIdx = nodeIdx;
|
|
122
|
+
for (let i = 0; i < len; ++i) {
|
|
123
|
+
currNodeIdx = this._lookupNodeByCharIndexSeq(currNodeIdx, indexSeq[i]);
|
|
124
|
+
if (currNodeIdx === undefined) {
|
|
125
|
+
return undefined;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
return currNodeIdx;
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Find the node index for the given character.
|
|
132
|
+
* @param nodeIdx - node index to start the search
|
|
133
|
+
* @param char - character to look for
|
|
134
|
+
* @returns
|
|
135
|
+
*/
|
|
136
|
+
_lookupNodeByCharIndexSeq(nodeIdx, index) {
|
|
98
137
|
const NodeMaskNumChildren = TrieBlob.NodeMaskNumChildren;
|
|
99
138
|
const NodeMaskChildCharIndex = TrieBlob.NodeMaskChildCharIndex;
|
|
100
139
|
const NodeChildRefShift = TrieBlob.NodeChildRefShift;
|
|
101
140
|
const nodes = this.nodes;
|
|
102
|
-
const charToIndexMap = this.charToIndexMap;
|
|
103
141
|
const node = nodes[nodeIdx];
|
|
104
|
-
const letterIdx =
|
|
142
|
+
const letterIdx = index;
|
|
105
143
|
const count = node & NodeMaskNumChildren;
|
|
106
144
|
let i = count;
|
|
107
145
|
for (; i > 0; --i) {
|
|
@@ -117,10 +155,12 @@ export class TrieBlob {
|
|
|
117
155
|
const NodeMaskChildCharIndex = TrieBlob.NodeMaskChildCharIndex;
|
|
118
156
|
const NodeChildRefShift = TrieBlob.NodeChildRefShift;
|
|
119
157
|
const nodes = this.nodes;
|
|
120
|
-
const stack = [
|
|
158
|
+
const stack = [
|
|
159
|
+
{ nodeIdx: 0, pos: 0, word: '', acc: NumberSequenceByteDecoderAccumulator.create() },
|
|
160
|
+
];
|
|
121
161
|
let depth = 0;
|
|
122
162
|
while (depth >= 0) {
|
|
123
|
-
const { nodeIdx, pos, word } = stack[depth];
|
|
163
|
+
const { nodeIdx, pos, word, acc } = stack[depth];
|
|
124
164
|
const node = nodes[nodeIdx];
|
|
125
165
|
// pos is 0 when first entering a node
|
|
126
166
|
if (!pos && node & NodeMaskEOW) {
|
|
@@ -133,13 +173,15 @@ export class TrieBlob {
|
|
|
133
173
|
}
|
|
134
174
|
const nextPos = ++stack[depth].pos;
|
|
135
175
|
const entry = nodes[nodeIdx + nextPos];
|
|
136
|
-
const
|
|
137
|
-
const
|
|
176
|
+
const nAcc = acc.clone();
|
|
177
|
+
const charIdx = nAcc.decode(entry & NodeMaskChildCharIndex);
|
|
178
|
+
const letter = (charIdx && this.charIndex[charIdx]) || '';
|
|
138
179
|
++depth;
|
|
139
180
|
stack[depth] = {
|
|
140
181
|
nodeIdx: entry >>> NodeChildRefShift,
|
|
141
182
|
pos: 0,
|
|
142
183
|
word: word + letter,
|
|
184
|
+
acc: nAcc,
|
|
143
185
|
};
|
|
144
186
|
}
|
|
145
187
|
}
|
|
@@ -159,14 +201,14 @@ export class TrieBlob {
|
|
|
159
201
|
}
|
|
160
202
|
toJSON() {
|
|
161
203
|
return {
|
|
162
|
-
charIndex: this.charIndex,
|
|
163
204
|
options: this.info,
|
|
164
|
-
nodes:
|
|
205
|
+
nodes: nodesToJson(this.nodes),
|
|
206
|
+
charIndex: this.charIndex,
|
|
165
207
|
};
|
|
166
208
|
}
|
|
167
209
|
encodeBin() {
|
|
168
210
|
const charIndex = Buffer.from(this.charIndex.join('\n'));
|
|
169
|
-
const charIndexLen = (charIndex.byteLength + 3) & ~3;
|
|
211
|
+
const charIndexLen = (charIndex.byteLength + 3) & ~3; // round up to the nearest 4 byte boundary.
|
|
170
212
|
const nodeOffset = HEADER_SIZE + charIndexLen;
|
|
171
213
|
const size = nodeOffset + this.nodes.length * 4;
|
|
172
214
|
const useLittle = isLittleEndian();
|
|
@@ -182,7 +224,8 @@ export class TrieBlob {
|
|
|
182
224
|
header.setUint32(HEADER.charIndexLen, charIndex.length, useLittle);
|
|
183
225
|
buffer.set(charIndex, HEADER_SIZE);
|
|
184
226
|
buffer.set(nodeData, nodeOffset);
|
|
185
|
-
//
|
|
227
|
+
// console.log('encodeBin: %o', this.toJSON());
|
|
228
|
+
// console.log('encodeBin: buf %o nodes %o', buffer, this.nodes);
|
|
186
229
|
return buffer;
|
|
187
230
|
}
|
|
188
231
|
static decodeBin(blob) {
|
|
@@ -205,14 +248,54 @@ export class TrieBlob {
|
|
|
205
248
|
const charIndex = Buffer.from(blob.subarray(offsetCharIndex, offsetCharIndex + lenCharIndex))
|
|
206
249
|
.toString('utf8')
|
|
207
250
|
.split('\n');
|
|
208
|
-
const nodes = new Uint32Array(blob.buffer
|
|
209
|
-
|
|
251
|
+
const nodes = new Uint32Array(blob.buffer, offsetNodes, lenNodes);
|
|
252
|
+
const trieBlob = new TrieBlob(nodes, charIndex, defaultTrieInfo);
|
|
253
|
+
// console.log('decodeBin: %o', trieBlob.toJSON());
|
|
254
|
+
return trieBlob;
|
|
210
255
|
}
|
|
211
256
|
static NodeMaskEOW = 0x00000100;
|
|
212
257
|
static NodeMaskNumChildren = (1 << NodeHeaderNumChildrenBits) - 1;
|
|
213
258
|
static NodeMaskNumChildrenShift = NodeHeaderNumChildrenShift;
|
|
214
259
|
static NodeChildRefShift = 8;
|
|
260
|
+
/**
|
|
261
|
+
* Only 8 bits are reserved for the character index.
|
|
262
|
+
* The max index is {@link TrieBlob.SpecialCharIndexMask} - 1.
|
|
263
|
+
* Node chaining is used to reference higher character indexes.
|
|
264
|
+
* - @see {@link TrieBlob.SpecialCharIndexMask}
|
|
265
|
+
* - @see {@link TrieBlob.MaxCharIndex}
|
|
266
|
+
*/
|
|
215
267
|
static NodeMaskChildCharIndex = 0x000000ff;
|
|
268
|
+
/** SpecialCharIndexMask is used to indicate a node chain */
|
|
269
|
+
static SpecialCharIndexMask = 0xf8;
|
|
270
|
+
static MaxCharIndex = this.SpecialCharIndexMask - 1;
|
|
271
|
+
/**
|
|
272
|
+
* SpecialCharIndex8bit is used to indicate a node chain. Where the final character index is 248 + the index found in the next node.
|
|
273
|
+
*/
|
|
274
|
+
static SpecialCharIndex8bit = this.SpecialCharIndexMask | 0x01;
|
|
275
|
+
static SpecialCharIndex16bit = this.SpecialCharIndexMask | 0x02;
|
|
276
|
+
static SpecialCharIndex24bit = this.SpecialCharIndexMask | 0x03;
|
|
277
|
+
/**
|
|
278
|
+
* Since it is only possible to store single byte indexes, a multi-byte index is stored as a sequence of indexes chained between nodes.
|
|
279
|
+
* @param charIndex - character index to convert to a sequence of indexes
|
|
280
|
+
* @returns encoded index values.
|
|
281
|
+
*/
|
|
282
|
+
static toCharIndexSequence(charIndex) {
|
|
283
|
+
return NumberSequenceByteEncoderDecoder.encode(charIndex);
|
|
284
|
+
}
|
|
285
|
+
static fromCharIndexSequence(charIndexes) {
|
|
286
|
+
return NumberSequenceByteEncoderDecoder.decodeSequence(charIndexes);
|
|
287
|
+
}
|
|
288
|
+
static charactersToCharIndexSequence(chars, charToIndexMap) {
|
|
289
|
+
const fn = typeof charToIndexMap === 'function' ? charToIndexMap : (c) => charToIndexMap[c];
|
|
290
|
+
return chars.map(fn).flatMap((c) => this.toCharIndexSequence(c));
|
|
291
|
+
}
|
|
292
|
+
static charIndexSequenceToCharacters(charIndexSequence, charIndex) {
|
|
293
|
+
const chars = [...this.fromCharIndexSequence(charIndexSequence)].map((c) => charIndex[c]);
|
|
294
|
+
return chars;
|
|
295
|
+
}
|
|
296
|
+
static nodesView(trie) {
|
|
297
|
+
return new Uint32Array(trie.nodes);
|
|
298
|
+
}
|
|
216
299
|
}
|
|
217
300
|
function isLittleEndian() {
|
|
218
301
|
const buf = new Uint8Array([1, 2, 3, 4]);
|
|
@@ -234,11 +317,27 @@ class ErrorDecodeTrieBlob extends Error {
|
|
|
234
317
|
super(message);
|
|
235
318
|
}
|
|
236
319
|
}
|
|
237
|
-
function
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
320
|
+
function nodesToJson(nodes) {
|
|
321
|
+
function nodeElement(offset) {
|
|
322
|
+
const node = nodes[offset];
|
|
323
|
+
const numChildren = node & TrieBlob.NodeMaskNumChildren;
|
|
324
|
+
const eow = !!(node & TrieBlob.NodeMaskEOW);
|
|
325
|
+
const children = [];
|
|
326
|
+
for (let i = 1; i <= numChildren; ++i) {
|
|
327
|
+
children.push({
|
|
328
|
+
c: ('00' + (nodes[offset + i] & TrieBlob.NodeMaskChildCharIndex).toString(16)).slice(-2),
|
|
329
|
+
o: nodes[offset + i] >>> TrieBlob.NodeChildRefShift,
|
|
330
|
+
});
|
|
331
|
+
}
|
|
332
|
+
return { id: offset, eow, n: offset + numChildren + 1, c: children };
|
|
333
|
+
}
|
|
334
|
+
const elements = [];
|
|
335
|
+
let offset = 0;
|
|
336
|
+
while (offset < nodes.length) {
|
|
337
|
+
const e = nodeElement(offset);
|
|
338
|
+
elements.push(e);
|
|
339
|
+
offset = e.n;
|
|
241
340
|
}
|
|
242
|
-
return
|
|
341
|
+
return elements;
|
|
243
342
|
}
|
|
244
343
|
//# sourceMappingURL=TrieBlob.js.map
|
|
@@ -6,26 +6,34 @@ interface BitMaskInfo {
|
|
|
6
6
|
readonly NodeMaskChildCharIndex: number;
|
|
7
7
|
readonly NodeChildRefShift: number;
|
|
8
8
|
}
|
|
9
|
+
type Node = number;
|
|
10
|
+
type NodeIndex = number;
|
|
9
11
|
export declare class TrieBlobInternals implements BitMaskInfo {
|
|
10
12
|
readonly nodes: Uint32Array;
|
|
11
|
-
readonly charIndex: string[];
|
|
13
|
+
readonly charIndex: readonly string[];
|
|
12
14
|
readonly charToIndexMap: Readonly<Record<string, number>>;
|
|
13
15
|
readonly NodeMaskEOW: number;
|
|
14
16
|
readonly NodeMaskNumChildren: number;
|
|
15
17
|
readonly NodeMaskChildCharIndex: number;
|
|
16
18
|
readonly NodeChildRefShift: number;
|
|
17
|
-
|
|
19
|
+
readonly isIndexDecoderNeeded: boolean;
|
|
20
|
+
constructor(nodes: Uint32Array, charIndex: readonly string[], charToIndexMap: Readonly<Record<string, number>>, maskInfo: BitMaskInfo);
|
|
18
21
|
}
|
|
19
22
|
declare class TrieBlobINode implements ITrieNode {
|
|
20
23
|
readonly trie: TrieBlobInternals;
|
|
21
|
-
readonly nodeIdx:
|
|
24
|
+
readonly nodeIdx: NodeIndex;
|
|
22
25
|
readonly id: number;
|
|
23
|
-
readonly
|
|
24
|
-
readonly node: number;
|
|
26
|
+
readonly node: Node;
|
|
25
27
|
readonly eow: boolean;
|
|
26
28
|
private _keys;
|
|
27
|
-
|
|
28
|
-
|
|
29
|
+
private _count;
|
|
30
|
+
private _size;
|
|
31
|
+
private _chained;
|
|
32
|
+
private _nodesEntries;
|
|
33
|
+
private _entries;
|
|
34
|
+
private _values;
|
|
35
|
+
protected charToIdx: Readonly<Record<string, number>> | undefined;
|
|
36
|
+
constructor(trie: TrieBlobInternals, nodeIdx: NodeIndex);
|
|
29
37
|
/** get keys to children */
|
|
30
38
|
keys(): readonly string[];
|
|
31
39
|
values(): readonly ITrieNode[];
|
|
@@ -36,6 +44,10 @@ declare class TrieBlobINode implements ITrieNode {
|
|
|
36
44
|
hasChildren(): boolean;
|
|
37
45
|
child(keyIdx: number): ITrieNode;
|
|
38
46
|
getCharToIdxMap(): Record<string, number>;
|
|
47
|
+
private containsChainedIndexes;
|
|
48
|
+
private getNodesEntries;
|
|
49
|
+
private walkChainedIndexes;
|
|
50
|
+
get size(): number;
|
|
39
51
|
}
|
|
40
52
|
export declare class TrieBlobIRoot extends TrieBlobINode implements ITrieNodeRoot {
|
|
41
53
|
readonly info: Readonly<TrieInfo>;
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { NumberSequenceByteDecoderAccumulator, NumberSequenceByteEncoderDecoder, } from './NumberSequenceByteDecoderAccumulator.js';
|
|
2
|
+
const SpecialCharIndexMask = NumberSequenceByteEncoderDecoder.SpecialCharIndexMask;
|
|
1
3
|
export class TrieBlobInternals {
|
|
2
4
|
nodes;
|
|
3
5
|
charIndex;
|
|
@@ -6,6 +8,7 @@ export class TrieBlobInternals {
|
|
|
6
8
|
NodeMaskNumChildren;
|
|
7
9
|
NodeMaskChildCharIndex;
|
|
8
10
|
NodeChildRefShift;
|
|
11
|
+
isIndexDecoderNeeded;
|
|
9
12
|
constructor(nodes, charIndex, charToIndexMap, maskInfo) {
|
|
10
13
|
this.nodes = nodes;
|
|
11
14
|
this.charIndex = charIndex;
|
|
@@ -15,18 +18,25 @@ export class TrieBlobInternals {
|
|
|
15
18
|
this.NodeMaskNumChildren = NodeMaskNumChildren;
|
|
16
19
|
this.NodeMaskChildCharIndex = NodeMaskChildCharIndex;
|
|
17
20
|
this.NodeChildRefShift = NodeChildRefShift;
|
|
21
|
+
this.isIndexDecoderNeeded = charIndex.length > NumberSequenceByteEncoderDecoder.MaxCharIndex;
|
|
18
22
|
}
|
|
19
23
|
}
|
|
20
24
|
const EmptyKeys = Object.freeze([]);
|
|
21
25
|
const EmptyNodes = Object.freeze([]);
|
|
26
|
+
const EmptyEntries = Object.freeze([]);
|
|
22
27
|
class TrieBlobINode {
|
|
23
28
|
trie;
|
|
24
29
|
nodeIdx;
|
|
25
30
|
id;
|
|
26
|
-
size;
|
|
27
31
|
node;
|
|
28
32
|
eow;
|
|
29
33
|
_keys;
|
|
34
|
+
_count;
|
|
35
|
+
_size;
|
|
36
|
+
_chained;
|
|
37
|
+
_nodesEntries;
|
|
38
|
+
_entries;
|
|
39
|
+
_values;
|
|
30
40
|
charToIdx;
|
|
31
41
|
constructor(trie, nodeIdx) {
|
|
32
42
|
this.trie = trie;
|
|
@@ -34,46 +44,34 @@ class TrieBlobINode {
|
|
|
34
44
|
const node = trie.nodes[nodeIdx];
|
|
35
45
|
this.node = node;
|
|
36
46
|
this.eow = !!(node & trie.NodeMaskEOW);
|
|
37
|
-
this.
|
|
47
|
+
this._count = node & trie.NodeMaskNumChildren;
|
|
38
48
|
this.id = nodeIdx;
|
|
39
49
|
}
|
|
40
50
|
/** get keys to children */
|
|
41
51
|
keys() {
|
|
42
52
|
if (this._keys)
|
|
43
53
|
return this._keys;
|
|
44
|
-
if (!this.
|
|
54
|
+
if (!this._count)
|
|
45
55
|
return EmptyKeys;
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
const keys = Array(this.size);
|
|
49
|
-
const offset = this.nodeIdx + 1;
|
|
50
|
-
const len = this.size;
|
|
51
|
-
for (let i = 0; i < len; ++i) {
|
|
52
|
-
const entry = this.trie.nodes[i + offset];
|
|
53
|
-
const charIdx = entry & NodeMaskChildCharIndex;
|
|
54
|
-
keys[i] = charIndex[charIdx];
|
|
55
|
-
}
|
|
56
|
-
this._keys = keys;
|
|
57
|
-
return keys;
|
|
56
|
+
this._keys = this.getNodesEntries().map(([key]) => key);
|
|
57
|
+
return this._keys;
|
|
58
58
|
}
|
|
59
59
|
values() {
|
|
60
|
-
if (!this.
|
|
60
|
+
if (!this._count)
|
|
61
61
|
return EmptyNodes;
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
return nodes;
|
|
62
|
+
if (this._values)
|
|
63
|
+
return this._values;
|
|
64
|
+
this._values = this.entries().map(([, value]) => value);
|
|
65
|
+
return this._values;
|
|
67
66
|
}
|
|
68
67
|
entries() {
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
return entries;
|
|
68
|
+
if (this._entries)
|
|
69
|
+
return this._entries;
|
|
70
|
+
if (!this._count)
|
|
71
|
+
return EmptyEntries;
|
|
72
|
+
const entries = this.getNodesEntries();
|
|
73
|
+
this._entries = entries.map(([key, value]) => [key, new TrieBlobINode(this.trie, value)]);
|
|
74
|
+
return this._entries;
|
|
77
75
|
}
|
|
78
76
|
/** get child ITrieNode */
|
|
79
77
|
get(char) {
|
|
@@ -87,12 +85,15 @@ class TrieBlobINode {
|
|
|
87
85
|
return idx !== undefined;
|
|
88
86
|
}
|
|
89
87
|
hasChildren() {
|
|
90
|
-
return this.
|
|
88
|
+
return this._count > 0;
|
|
91
89
|
}
|
|
92
90
|
child(keyIdx) {
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
91
|
+
if (!this._values && !this.containsChainedIndexes()) {
|
|
92
|
+
const n = this.trie.nodes[this.nodeIdx + keyIdx + 1];
|
|
93
|
+
const nodeIdx = n >>> this.trie.NodeChildRefShift;
|
|
94
|
+
return new TrieBlobINode(this.trie, nodeIdx);
|
|
95
|
+
}
|
|
96
|
+
return this.values()[keyIdx];
|
|
96
97
|
}
|
|
97
98
|
getCharToIdxMap() {
|
|
98
99
|
const m = this.charToIdx;
|
|
@@ -106,6 +107,101 @@ class TrieBlobINode {
|
|
|
106
107
|
this.charToIdx = map;
|
|
107
108
|
return map;
|
|
108
109
|
}
|
|
110
|
+
containsChainedIndexes() {
|
|
111
|
+
if (this._chained !== undefined)
|
|
112
|
+
return this._chained;
|
|
113
|
+
if (!this._count || !this.trie.isIndexDecoderNeeded) {
|
|
114
|
+
this._chained = false;
|
|
115
|
+
return false;
|
|
116
|
+
}
|
|
117
|
+
// scan the node to see if there are encoded entries.
|
|
118
|
+
let found = false;
|
|
119
|
+
const NodeMaskChildCharIndex = this.trie.NodeMaskChildCharIndex;
|
|
120
|
+
const offset = this.nodeIdx + 1;
|
|
121
|
+
const nodes = this.trie.nodes;
|
|
122
|
+
const len = this._count;
|
|
123
|
+
for (let i = 0; i < len && !found; ++i) {
|
|
124
|
+
const entry = nodes[i + offset];
|
|
125
|
+
const charIdx = entry & NodeMaskChildCharIndex;
|
|
126
|
+
found = (charIdx & SpecialCharIndexMask) === SpecialCharIndexMask;
|
|
127
|
+
}
|
|
128
|
+
this._chained = !!found;
|
|
129
|
+
return this._chained;
|
|
130
|
+
}
|
|
131
|
+
getNodesEntries() {
|
|
132
|
+
if (this._nodesEntries)
|
|
133
|
+
return this._nodesEntries;
|
|
134
|
+
if (!this.containsChainedIndexes()) {
|
|
135
|
+
const entries = Array(this._count);
|
|
136
|
+
const nodes = this.trie.nodes;
|
|
137
|
+
const offset = this.nodeIdx + 1;
|
|
138
|
+
const charIndex = this.trie.charIndex;
|
|
139
|
+
const NodeMaskChildCharIndex = this.trie.NodeMaskChildCharIndex;
|
|
140
|
+
const RefShift = this.trie.NodeChildRefShift;
|
|
141
|
+
for (let i = 0; i < this._count; ++i) {
|
|
142
|
+
const entry = nodes[offset + i];
|
|
143
|
+
const charIdx = entry & NodeMaskChildCharIndex;
|
|
144
|
+
entries[i] = [charIndex[charIdx], entry >>> RefShift];
|
|
145
|
+
}
|
|
146
|
+
this._nodesEntries = entries;
|
|
147
|
+
return entries;
|
|
148
|
+
}
|
|
149
|
+
this._nodesEntries = this.walkChainedIndexes();
|
|
150
|
+
return this._nodesEntries;
|
|
151
|
+
}
|
|
152
|
+
walkChainedIndexes() {
|
|
153
|
+
const NodeMaskChildCharIndex = this.trie.NodeMaskChildCharIndex;
|
|
154
|
+
const NodeChildRefShift = this.trie.NodeChildRefShift;
|
|
155
|
+
const NodeMaskNumChildren = this.trie.NodeMaskNumChildren;
|
|
156
|
+
const nodes = this.trie.nodes;
|
|
157
|
+
const acc = NumberSequenceByteDecoderAccumulator.create();
|
|
158
|
+
const stack = [{ nodeIdx: this.nodeIdx + 1, lastIdx: this.nodeIdx + this._count, acc }];
|
|
159
|
+
let depth = 0;
|
|
160
|
+
const entries = Array(this._count);
|
|
161
|
+
let eIdx = 0;
|
|
162
|
+
const charIndex = this.trie.charIndex;
|
|
163
|
+
while (depth >= 0) {
|
|
164
|
+
const s = stack[depth];
|
|
165
|
+
const { nodeIdx, lastIdx } = s;
|
|
166
|
+
if (nodeIdx > lastIdx) {
|
|
167
|
+
--depth;
|
|
168
|
+
continue;
|
|
169
|
+
}
|
|
170
|
+
++s.nodeIdx;
|
|
171
|
+
const entry = nodes[nodeIdx];
|
|
172
|
+
const charIdx = entry & NodeMaskChildCharIndex;
|
|
173
|
+
const acc = s.acc.clone();
|
|
174
|
+
const letterIdx = acc.decode(charIdx);
|
|
175
|
+
if (letterIdx !== undefined) {
|
|
176
|
+
const char = charIndex[letterIdx];
|
|
177
|
+
const nodeIdx = entry >>> NodeChildRefShift;
|
|
178
|
+
entries[eIdx++] = [char, nodeIdx];
|
|
179
|
+
continue;
|
|
180
|
+
}
|
|
181
|
+
const idx = entry >>> NodeChildRefShift;
|
|
182
|
+
const lIdx = idx + (nodes[idx] & NodeMaskNumChildren);
|
|
183
|
+
const ss = stack[++depth];
|
|
184
|
+
if (ss) {
|
|
185
|
+
ss.nodeIdx = idx + 1;
|
|
186
|
+
ss.lastIdx = lIdx;
|
|
187
|
+
ss.acc = acc;
|
|
188
|
+
}
|
|
189
|
+
else {
|
|
190
|
+
stack[depth] = { nodeIdx: idx + 1, lastIdx: lIdx, acc };
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
return entries;
|
|
194
|
+
}
|
|
195
|
+
get size() {
|
|
196
|
+
if (this._size === undefined) {
|
|
197
|
+
if (!this.containsChainedIndexes()) {
|
|
198
|
+
this._size = this._count;
|
|
199
|
+
return this._size;
|
|
200
|
+
}
|
|
201
|
+
this._size = this.getNodesEntries().length;
|
|
202
|
+
}
|
|
203
|
+
return this._size;
|
|
204
|
+
}
|
|
109
205
|
}
|
|
110
206
|
export class TrieBlobIRoot extends TrieBlobINode {
|
|
111
207
|
info;
|
|
@@ -2,7 +2,7 @@ import type { ITrieNodeRoot } from '../ITrieNode/ITrieNode.js';
|
|
|
2
2
|
import type { PartialTrieInfo } from '../ITrieNode/TrieInfo.js';
|
|
3
3
|
import type { TrieData } from '../TrieData.js';
|
|
4
4
|
import { TrieBlob } from './TrieBlob.js';
|
|
5
|
-
export declare function createTrieBlob(words: string[], options?: PartialTrieInfo): TrieBlob;
|
|
5
|
+
export declare function createTrieBlob(words: readonly string[], options?: PartialTrieInfo): TrieBlob;
|
|
6
6
|
export declare function createTrieBlobFromITrieNodeRoot(root: ITrieNodeRoot): TrieBlob;
|
|
7
7
|
export declare function createTrieBlobFromTrieData(trie: TrieData): TrieBlob;
|
|
8
8
|
//# sourceMappingURL=createTrieBlob.d.ts.map
|
package/dist/lib/TrieData.d.ts
CHANGED
|
@@ -2,6 +2,8 @@ import type { ITrieNode, ITrieNodeRoot } from './ITrieNode/ITrieNode.js';
|
|
|
2
2
|
import type { TrieInfo } from './ITrieNode/TrieInfo.js';
|
|
3
3
|
export interface TrieData {
|
|
4
4
|
info: Readonly<TrieInfo>;
|
|
5
|
+
/** Method used to split words into individual characters. */
|
|
6
|
+
wordToCharacters(word: string): readonly string[];
|
|
5
7
|
words(): Iterable<string>;
|
|
6
8
|
getRoot(): ITrieNodeRoot;
|
|
7
9
|
getNode(prefix: string): ITrieNode | undefined;
|
|
@@ -5,6 +5,7 @@ import { TrieNodeTrie } from './TrieNodeTrie.js';
|
|
|
5
5
|
export declare class TrieNodeBuilder implements TrieBuilder<TrieNodeTrie> {
|
|
6
6
|
private _cursor;
|
|
7
7
|
root: TrieRoot;
|
|
8
|
+
wordToCharacters: (word: string) => string[];
|
|
8
9
|
setOptions(options: Readonly<PartialTrieOptions>): Readonly<TrieOptions>;
|
|
9
10
|
build(): TrieNodeTrie;
|
|
10
11
|
getCursor(): BuilderCursor;
|
|
@@ -7,6 +7,7 @@ const EOW = Object.freeze({ f: 1, k: true });
|
|
|
7
7
|
export class TrieNodeBuilder {
|
|
8
8
|
_cursor;
|
|
9
9
|
root = { ...defaultTrieInfo, c: Object.create(null) };
|
|
10
|
+
wordToCharacters = (word) => word.split('');
|
|
10
11
|
setOptions(options) {
|
|
11
12
|
const opts = mergeOptionalWithDefaults(options, this.root);
|
|
12
13
|
Object.assign(this.root, opts);
|
|
@@ -8,6 +8,7 @@ export declare class TrieNodeTrie implements TrieData {
|
|
|
8
8
|
readonly info: TrieOptions;
|
|
9
9
|
private _size;
|
|
10
10
|
constructor(root: TrieRoot);
|
|
11
|
+
wordToCharacters: (word: string) => string[];
|
|
11
12
|
get iTrieRoot(): ITrieNodeRoot;
|
|
12
13
|
getRoot(): ITrieNodeRoot;
|
|
13
14
|
getNode(prefix: string): ITrieNode | undefined;
|
package/dist/lib/utils/text.d.ts
CHANGED
|
@@ -50,4 +50,6 @@ export declare function stripAccents(characters: string): string;
|
|
|
50
50
|
* @returns - only the accents.
|
|
51
51
|
*/
|
|
52
52
|
export declare function stripNonAccents(characters: string): string;
|
|
53
|
+
export declare function isValidUtf16Character(char: string): boolean;
|
|
54
|
+
export declare function assertValidUtf16Character(char: string): void;
|
|
53
55
|
//# sourceMappingURL=text.d.ts.map
|
package/dist/lib/utils/text.js
CHANGED
|
@@ -99,4 +99,31 @@ export function stripAccents(characters) {
|
|
|
99
99
|
export function stripNonAccents(characters) {
|
|
100
100
|
return characters.normalize('NFD').replace(/[^\p{M}]/gu, '');
|
|
101
101
|
}
|
|
102
|
+
export function isValidUtf16Character(char) {
|
|
103
|
+
const len = char.length;
|
|
104
|
+
const code = char.charCodeAt(0) & 0xfc00;
|
|
105
|
+
const valid = (len === 1 && (code & 0xf800) !== 0xd800) ||
|
|
106
|
+
(len === 2 && (code & 0xfc00) === 0xd800 && (char.charCodeAt(1) & 0xfc00) === 0xdc00);
|
|
107
|
+
return valid;
|
|
108
|
+
}
|
|
109
|
+
export function assertValidUtf16Character(char) {
|
|
110
|
+
if (!isValidUtf16Character(char)) {
|
|
111
|
+
const len = char.length;
|
|
112
|
+
const codes = char
|
|
113
|
+
.slice(0, 2)
|
|
114
|
+
.split('')
|
|
115
|
+
.map((c) => '0x' + ('0000' + c.charCodeAt(0).toString(16)).slice(-4));
|
|
116
|
+
let message;
|
|
117
|
+
if (len == 1) {
|
|
118
|
+
message = `Invalid utf16 character, lone surrogate: ${codes[0]}`;
|
|
119
|
+
}
|
|
120
|
+
else if (len == 2) {
|
|
121
|
+
message = `Invalid utf16 character, not a valid surrogate pair: [${codes.join(', ')}]`;
|
|
122
|
+
}
|
|
123
|
+
else {
|
|
124
|
+
message = `Invalid utf16 character, must be a single character, found: ${len}`;
|
|
125
|
+
}
|
|
126
|
+
throw new Error(message);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
102
129
|
//# sourceMappingURL=text.js.map
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "cspell-trie-lib",
|
|
3
|
-
"version": "8.
|
|
3
|
+
"version": "8.4.0",
|
|
4
4
|
"description": "Trie Data Structure to support cspell.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"sideEffects": false,
|
|
@@ -47,18 +47,18 @@
|
|
|
47
47
|
},
|
|
48
48
|
"homepage": "https://github.com/streetsidesoftware/cspell#readme",
|
|
49
49
|
"dependencies": {
|
|
50
|
-
"@cspell/cspell-pipe": "8.
|
|
51
|
-
"@cspell/cspell-types": "8.
|
|
50
|
+
"@cspell/cspell-pipe": "8.4.0",
|
|
51
|
+
"@cspell/cspell-types": "8.4.0",
|
|
52
52
|
"gensequence": "^6.0.0"
|
|
53
53
|
},
|
|
54
54
|
"engines": {
|
|
55
55
|
"node": ">=18"
|
|
56
56
|
},
|
|
57
57
|
"devDependencies": {
|
|
58
|
-
"@cspell/dict-en_us": "^4.3.
|
|
58
|
+
"@cspell/dict-en_us": "^4.3.16",
|
|
59
59
|
"@cspell/dict-es-es": "^2.3.1",
|
|
60
60
|
"@cspell/dict-nl-nl": "^2.3.0",
|
|
61
61
|
"import-meta-resolve": "^4.0.0"
|
|
62
62
|
},
|
|
63
|
-
"gitHead": "
|
|
63
|
+
"gitHead": "f9ad457ca2102c6642c377417a95a4415f5ec3d8"
|
|
64
64
|
}
|