cspell-trie-lib 8.3.2 → 8.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  import { defaultTrieInfo } from '../constants.js';
2
2
  import { findNode } from '../ITrieNode/trie-util.js';
3
3
  import { mergeOptionalWithDefaults } from '../utils/mergeOptionalWithDefaults.js';
4
+ import { NumberSequenceByteDecoderAccumulator, NumberSequenceByteEncoderDecoder, } from './NumberSequenceByteDecoderAccumulator.js';
4
5
  import { TrieBlobInternals, TrieBlobIRoot } from './TrieBlobIRoot.js';
5
6
  const NodeHeaderNumChildrenBits = 8;
6
7
  const NodeHeaderNumChildrenShift = 0;
@@ -35,11 +36,14 @@ export class TrieBlob {
35
36
  _forbidIdx;
36
37
  _size;
37
38
  _iTrieRoot;
39
+ wordToCharacters;
38
40
  constructor(nodes, charIndex, info) {
39
41
  this.nodes = nodes;
40
42
  this.charIndex = charIndex;
41
43
  this.info = mergeOptionalWithDefaults(info);
44
+ this.wordToCharacters = (word) => [...word];
42
45
  this.charToIndexMap = Object.create(null);
46
+ Object.freeze(this.charIndex);
43
47
  for (let i = 0; i < charIndex.length; ++i) {
44
48
  const char = charIndex[i];
45
49
  this.charToIndexMap[char.normalize('NFC')] = i;
@@ -47,6 +51,15 @@ export class TrieBlob {
47
51
  }
48
52
  this._forbidIdx = this._lookupNode(0, this.info.forbiddenWordPrefix);
49
53
  }
54
+ _lookUpCharIndex = (char) => {
55
+ return this.charToIndexMap[char] || 0;
56
+ };
57
+ wordToNodeCharIndexSequence(word) {
58
+ return TrieBlob.charactersToCharIndexSequence(this.wordToCharacters(word), this._lookUpCharIndex);
59
+ }
60
+ letterToNodeCharIndexSequence(letter) {
61
+ return TrieBlob.toCharIndexSequence(this._lookUpCharIndex(letter));
62
+ }
50
63
  has(word) {
51
64
  return this._has(0, word);
52
65
  }
@@ -76,11 +89,11 @@ export class TrieBlob {
76
89
  const NodeMaskChildCharIndex = TrieBlob.NodeMaskChildCharIndex;
77
90
  const NodeChildRefShift = TrieBlob.NodeChildRefShift;
78
91
  const nodes = this.nodes;
79
- const len = word.length;
80
- const charToIndexMap = this.charToIndexMap;
92
+ const wordIndexes = this.wordToNodeCharIndexSequence(word);
93
+ const len = wordIndexes.length;
81
94
  let node = nodes[nodeIdx];
82
95
  for (let p = 0; p < len; ++p, node = nodes[nodeIdx]) {
83
- const letterIdx = charToIndexMap[word[p]];
96
+ const letterIdx = wordIndexes[p];
84
97
  const count = node & NodeMaskNumChildren;
85
98
  let i = count;
86
99
  for (; i > 0; --i) {
@@ -94,14 +107,39 @@ export class TrieBlob {
94
107
  }
95
108
  return (node & TrieBlob.NodeMaskEOW) === TrieBlob.NodeMaskEOW;
96
109
  }
110
+ /**
111
+ * Find the node index for the given character.
112
+ * @param nodeIdx - node index to start the search
113
+ * @param char - character to look for
114
+ * @returns
115
+ */
97
116
  _lookupNode(nodeIdx, char) {
117
+ const indexSeq = this.letterToNodeCharIndexSequence(char);
118
+ const len = indexSeq.length;
119
+ if (!len)
120
+ return undefined;
121
+ let currNodeIdx = nodeIdx;
122
+ for (let i = 0; i < len; ++i) {
123
+ currNodeIdx = this._lookupNodeByCharIndexSeq(currNodeIdx, indexSeq[i]);
124
+ if (currNodeIdx === undefined) {
125
+ return undefined;
126
+ }
127
+ }
128
+ return currNodeIdx;
129
+ }
130
+ /**
131
+ * Find the node index for the given character.
132
+ * @param nodeIdx - node index to start the search
133
+ * @param char - character to look for
134
+ * @returns
135
+ */
136
+ _lookupNodeByCharIndexSeq(nodeIdx, index) {
98
137
  const NodeMaskNumChildren = TrieBlob.NodeMaskNumChildren;
99
138
  const NodeMaskChildCharIndex = TrieBlob.NodeMaskChildCharIndex;
100
139
  const NodeChildRefShift = TrieBlob.NodeChildRefShift;
101
140
  const nodes = this.nodes;
102
- const charToIndexMap = this.charToIndexMap;
103
141
  const node = nodes[nodeIdx];
104
- const letterIdx = charToIndexMap[char];
142
+ const letterIdx = index;
105
143
  const count = node & NodeMaskNumChildren;
106
144
  let i = count;
107
145
  for (; i > 0; --i) {
@@ -117,10 +155,12 @@ export class TrieBlob {
117
155
  const NodeMaskChildCharIndex = TrieBlob.NodeMaskChildCharIndex;
118
156
  const NodeChildRefShift = TrieBlob.NodeChildRefShift;
119
157
  const nodes = this.nodes;
120
- const stack = [{ nodeIdx: 0, pos: 0, word: '' }];
158
+ const stack = [
159
+ { nodeIdx: 0, pos: 0, word: '', acc: NumberSequenceByteDecoderAccumulator.create() },
160
+ ];
121
161
  let depth = 0;
122
162
  while (depth >= 0) {
123
- const { nodeIdx, pos, word } = stack[depth];
163
+ const { nodeIdx, pos, word, acc } = stack[depth];
124
164
  const node = nodes[nodeIdx];
125
165
  // pos is 0 when first entering a node
126
166
  if (!pos && node & NodeMaskEOW) {
@@ -133,13 +173,15 @@ export class TrieBlob {
133
173
  }
134
174
  const nextPos = ++stack[depth].pos;
135
175
  const entry = nodes[nodeIdx + nextPos];
136
- const charIdx = entry & NodeMaskChildCharIndex;
137
- const letter = this.charIndex[charIdx];
176
+ const nAcc = acc.clone();
177
+ const charIdx = nAcc.decode(entry & NodeMaskChildCharIndex);
178
+ const letter = (charIdx && this.charIndex[charIdx]) || '';
138
179
  ++depth;
139
180
  stack[depth] = {
140
181
  nodeIdx: entry >>> NodeChildRefShift,
141
182
  pos: 0,
142
183
  word: word + letter,
184
+ acc: nAcc,
143
185
  };
144
186
  }
145
187
  }
@@ -159,14 +201,14 @@ export class TrieBlob {
159
201
  }
160
202
  toJSON() {
161
203
  return {
162
- charIndex: this.charIndex,
163
204
  options: this.info,
164
- nodes: splitString(Buffer.from(this.nodes.buffer, 128).toString('base64')),
205
+ nodes: nodesToJson(this.nodes),
206
+ charIndex: this.charIndex,
165
207
  };
166
208
  }
167
209
  encodeBin() {
168
210
  const charIndex = Buffer.from(this.charIndex.join('\n'));
169
- const charIndexLen = (charIndex.byteLength + 3) & ~3;
211
+ const charIndexLen = (charIndex.byteLength + 3) & ~3; // round up to the nearest 4 byte boundary.
170
212
  const nodeOffset = HEADER_SIZE + charIndexLen;
171
213
  const size = nodeOffset + this.nodes.length * 4;
172
214
  const useLittle = isLittleEndian();
@@ -182,7 +224,8 @@ export class TrieBlob {
182
224
  header.setUint32(HEADER.charIndexLen, charIndex.length, useLittle);
183
225
  buffer.set(charIndex, HEADER_SIZE);
184
226
  buffer.set(nodeData, nodeOffset);
185
- // dumpBin(nodeData);
227
+ // console.log('encodeBin: %o', this.toJSON());
228
+ // console.log('encodeBin: buf %o nodes %o', buffer, this.nodes);
186
229
  return buffer;
187
230
  }
188
231
  static decodeBin(blob) {
@@ -205,14 +248,54 @@ export class TrieBlob {
205
248
  const charIndex = Buffer.from(blob.subarray(offsetCharIndex, offsetCharIndex + lenCharIndex))
206
249
  .toString('utf8')
207
250
  .split('\n');
208
- const nodes = new Uint32Array(blob.buffer).subarray(offsetNodes / 4, offsetNodes / 4 + lenNodes);
209
- return new TrieBlob(nodes, charIndex, defaultTrieInfo);
251
+ const nodes = new Uint32Array(blob.buffer, offsetNodes, lenNodes);
252
+ const trieBlob = new TrieBlob(nodes, charIndex, defaultTrieInfo);
253
+ // console.log('decodeBin: %o', trieBlob.toJSON());
254
+ return trieBlob;
210
255
  }
211
256
  static NodeMaskEOW = 0x00000100;
212
257
  static NodeMaskNumChildren = (1 << NodeHeaderNumChildrenBits) - 1;
213
258
  static NodeMaskNumChildrenShift = NodeHeaderNumChildrenShift;
214
259
  static NodeChildRefShift = 8;
260
+ /**
261
+ * Only 8 bits are reserved for the character index.
262
+ * The max index is {@link TrieBlob.SpecialCharIndexMask} - 1.
263
+ * Node chaining is used to reference higher character indexes.
264
+ * - @see {@link TrieBlob.SpecialCharIndexMask}
265
+ * - @see {@link TrieBlob.MaxCharIndex}
266
+ */
215
267
  static NodeMaskChildCharIndex = 0x000000ff;
268
+ /** SpecialCharIndexMask is used to indicate a node chain */
269
+ static SpecialCharIndexMask = 0xf8;
270
+ static MaxCharIndex = this.SpecialCharIndexMask - 1;
271
+ /**
272
+ * SpecialCharIndex8bit is used to indicate a node chain. Where the final character index is 248 + the index found in the next node.
273
+ */
274
+ static SpecialCharIndex8bit = this.SpecialCharIndexMask | 0x01;
275
+ static SpecialCharIndex16bit = this.SpecialCharIndexMask | 0x02;
276
+ static SpecialCharIndex24bit = this.SpecialCharIndexMask | 0x03;
277
+ /**
278
+ * Since it is only possible to store single byte indexes, a multi-byte index is stored as a sequence of indexes chained between nodes.
279
+ * @param charIndex - character index to convert to a sequence of indexes
280
+ * @returns encoded index values.
281
+ */
282
+ static toCharIndexSequence(charIndex) {
283
+ return NumberSequenceByteEncoderDecoder.encode(charIndex);
284
+ }
285
+ static fromCharIndexSequence(charIndexes) {
286
+ return NumberSequenceByteEncoderDecoder.decodeSequence(charIndexes);
287
+ }
288
+ static charactersToCharIndexSequence(chars, charToIndexMap) {
289
+ const fn = typeof charToIndexMap === 'function' ? charToIndexMap : (c) => charToIndexMap[c];
290
+ return chars.map(fn).flatMap((c) => this.toCharIndexSequence(c));
291
+ }
292
+ static charIndexSequenceToCharacters(charIndexSequence, charIndex) {
293
+ const chars = [...this.fromCharIndexSequence(charIndexSequence)].map((c) => charIndex[c]);
294
+ return chars;
295
+ }
296
+ static nodesView(trie) {
297
+ return new Uint32Array(trie.nodes);
298
+ }
216
299
  }
217
300
  function isLittleEndian() {
218
301
  const buf = new Uint8Array([1, 2, 3, 4]);
@@ -234,11 +317,27 @@ class ErrorDecodeTrieBlob extends Error {
234
317
  super(message);
235
318
  }
236
319
  }
237
- function splitString(s, len = 64) {
238
- const splits = [];
239
- for (let i = 0; i < s.length; i += len) {
240
- splits.push(s.slice(i, i + len));
320
+ function nodesToJson(nodes) {
321
+ function nodeElement(offset) {
322
+ const node = nodes[offset];
323
+ const numChildren = node & TrieBlob.NodeMaskNumChildren;
324
+ const eow = !!(node & TrieBlob.NodeMaskEOW);
325
+ const children = [];
326
+ for (let i = 1; i <= numChildren; ++i) {
327
+ children.push({
328
+ c: ('00' + (nodes[offset + i] & TrieBlob.NodeMaskChildCharIndex).toString(16)).slice(-2),
329
+ o: nodes[offset + i] >>> TrieBlob.NodeChildRefShift,
330
+ });
331
+ }
332
+ return { id: offset, eow, n: offset + numChildren + 1, c: children };
333
+ }
334
+ const elements = [];
335
+ let offset = 0;
336
+ while (offset < nodes.length) {
337
+ const e = nodeElement(offset);
338
+ elements.push(e);
339
+ offset = e.n;
241
340
  }
242
- return splits;
341
+ return elements;
243
342
  }
244
343
  //# sourceMappingURL=TrieBlob.js.map
@@ -6,26 +6,34 @@ interface BitMaskInfo {
6
6
  readonly NodeMaskChildCharIndex: number;
7
7
  readonly NodeChildRefShift: number;
8
8
  }
9
+ type Node = number;
10
+ type NodeIndex = number;
9
11
  export declare class TrieBlobInternals implements BitMaskInfo {
10
12
  readonly nodes: Uint32Array;
11
- readonly charIndex: string[];
13
+ readonly charIndex: readonly string[];
12
14
  readonly charToIndexMap: Readonly<Record<string, number>>;
13
15
  readonly NodeMaskEOW: number;
14
16
  readonly NodeMaskNumChildren: number;
15
17
  readonly NodeMaskChildCharIndex: number;
16
18
  readonly NodeChildRefShift: number;
17
- constructor(nodes: Uint32Array, charIndex: string[], charToIndexMap: Readonly<Record<string, number>>, maskInfo: BitMaskInfo);
19
+ readonly isIndexDecoderNeeded: boolean;
20
+ constructor(nodes: Uint32Array, charIndex: readonly string[], charToIndexMap: Readonly<Record<string, number>>, maskInfo: BitMaskInfo);
18
21
  }
19
22
  declare class TrieBlobINode implements ITrieNode {
20
23
  readonly trie: TrieBlobInternals;
21
- readonly nodeIdx: number;
24
+ readonly nodeIdx: NodeIndex;
22
25
  readonly id: number;
23
- readonly size: number;
24
- readonly node: number;
26
+ readonly node: Node;
25
27
  readonly eow: boolean;
26
28
  private _keys;
27
- charToIdx: Record<string, number> | undefined;
28
- constructor(trie: TrieBlobInternals, nodeIdx: number);
29
+ private _count;
30
+ private _size;
31
+ private _chained;
32
+ private _nodesEntries;
33
+ private _entries;
34
+ private _values;
35
+ protected charToIdx: Readonly<Record<string, number>> | undefined;
36
+ constructor(trie: TrieBlobInternals, nodeIdx: NodeIndex);
29
37
  /** get keys to children */
30
38
  keys(): readonly string[];
31
39
  values(): readonly ITrieNode[];
@@ -36,6 +44,10 @@ declare class TrieBlobINode implements ITrieNode {
36
44
  hasChildren(): boolean;
37
45
  child(keyIdx: number): ITrieNode;
38
46
  getCharToIdxMap(): Record<string, number>;
47
+ private containsChainedIndexes;
48
+ private getNodesEntries;
49
+ private walkChainedIndexes;
50
+ get size(): number;
39
51
  }
40
52
  export declare class TrieBlobIRoot extends TrieBlobINode implements ITrieNodeRoot {
41
53
  readonly info: Readonly<TrieInfo>;
@@ -1,3 +1,5 @@
1
+ import { NumberSequenceByteDecoderAccumulator, NumberSequenceByteEncoderDecoder, } from './NumberSequenceByteDecoderAccumulator.js';
2
+ const SpecialCharIndexMask = NumberSequenceByteEncoderDecoder.SpecialCharIndexMask;
1
3
  export class TrieBlobInternals {
2
4
  nodes;
3
5
  charIndex;
@@ -6,6 +8,7 @@ export class TrieBlobInternals {
6
8
  NodeMaskNumChildren;
7
9
  NodeMaskChildCharIndex;
8
10
  NodeChildRefShift;
11
+ isIndexDecoderNeeded;
9
12
  constructor(nodes, charIndex, charToIndexMap, maskInfo) {
10
13
  this.nodes = nodes;
11
14
  this.charIndex = charIndex;
@@ -15,18 +18,25 @@ export class TrieBlobInternals {
15
18
  this.NodeMaskNumChildren = NodeMaskNumChildren;
16
19
  this.NodeMaskChildCharIndex = NodeMaskChildCharIndex;
17
20
  this.NodeChildRefShift = NodeChildRefShift;
21
+ this.isIndexDecoderNeeded = charIndex.length > NumberSequenceByteEncoderDecoder.MaxCharIndex;
18
22
  }
19
23
  }
20
24
  const EmptyKeys = Object.freeze([]);
21
25
  const EmptyNodes = Object.freeze([]);
26
+ const EmptyEntries = Object.freeze([]);
22
27
  class TrieBlobINode {
23
28
  trie;
24
29
  nodeIdx;
25
30
  id;
26
- size;
27
31
  node;
28
32
  eow;
29
33
  _keys;
34
+ _count;
35
+ _size;
36
+ _chained;
37
+ _nodesEntries;
38
+ _entries;
39
+ _values;
30
40
  charToIdx;
31
41
  constructor(trie, nodeIdx) {
32
42
  this.trie = trie;
@@ -34,46 +44,34 @@ class TrieBlobINode {
34
44
  const node = trie.nodes[nodeIdx];
35
45
  this.node = node;
36
46
  this.eow = !!(node & trie.NodeMaskEOW);
37
- this.size = node & trie.NodeMaskNumChildren;
47
+ this._count = node & trie.NodeMaskNumChildren;
38
48
  this.id = nodeIdx;
39
49
  }
40
50
  /** get keys to children */
41
51
  keys() {
42
52
  if (this._keys)
43
53
  return this._keys;
44
- if (!this.size)
54
+ if (!this._count)
45
55
  return EmptyKeys;
46
- const NodeMaskChildCharIndex = this.trie.NodeMaskChildCharIndex;
47
- const charIndex = this.trie.charIndex;
48
- const keys = Array(this.size);
49
- const offset = this.nodeIdx + 1;
50
- const len = this.size;
51
- for (let i = 0; i < len; ++i) {
52
- const entry = this.trie.nodes[i + offset];
53
- const charIdx = entry & NodeMaskChildCharIndex;
54
- keys[i] = charIndex[charIdx];
55
- }
56
- this._keys = keys;
57
- return keys;
56
+ this._keys = this.getNodesEntries().map(([key]) => key);
57
+ return this._keys;
58
58
  }
59
59
  values() {
60
- if (!this.size)
60
+ if (!this._count)
61
61
  return EmptyNodes;
62
- const nodes = Array(this.size);
63
- for (let i = 0; i < this.size; ++i) {
64
- nodes[i] = this.child(i);
65
- }
66
- return nodes;
62
+ if (this._values)
63
+ return this._values;
64
+ this._values = this.entries().map(([, value]) => value);
65
+ return this._values;
67
66
  }
68
67
  entries() {
69
- const keys = this.keys();
70
- const values = this.values();
71
- const len = keys.length;
72
- const entries = Array(len);
73
- for (let i = 0; i < len; ++i) {
74
- entries[i] = [keys[i], values[i]];
75
- }
76
- return entries;
68
+ if (this._entries)
69
+ return this._entries;
70
+ if (!this._count)
71
+ return EmptyEntries;
72
+ const entries = this.getNodesEntries();
73
+ this._entries = entries.map(([key, value]) => [key, new TrieBlobINode(this.trie, value)]);
74
+ return this._entries;
77
75
  }
78
76
  /** get child ITrieNode */
79
77
  get(char) {
@@ -87,12 +85,15 @@ class TrieBlobINode {
87
85
  return idx !== undefined;
88
86
  }
89
87
  hasChildren() {
90
- return this.size > 0;
88
+ return this._count > 0;
91
89
  }
92
90
  child(keyIdx) {
93
- const n = this.trie.nodes[this.nodeIdx + keyIdx + 1];
94
- const nodeIdx = n >>> this.trie.NodeChildRefShift;
95
- return new TrieBlobINode(this.trie, nodeIdx);
91
+ if (!this._values && !this.containsChainedIndexes()) {
92
+ const n = this.trie.nodes[this.nodeIdx + keyIdx + 1];
93
+ const nodeIdx = n >>> this.trie.NodeChildRefShift;
94
+ return new TrieBlobINode(this.trie, nodeIdx);
95
+ }
96
+ return this.values()[keyIdx];
96
97
  }
97
98
  getCharToIdxMap() {
98
99
  const m = this.charToIdx;
@@ -106,6 +107,101 @@ class TrieBlobINode {
106
107
  this.charToIdx = map;
107
108
  return map;
108
109
  }
110
+ containsChainedIndexes() {
111
+ if (this._chained !== undefined)
112
+ return this._chained;
113
+ if (!this._count || !this.trie.isIndexDecoderNeeded) {
114
+ this._chained = false;
115
+ return false;
116
+ }
117
+ // scan the node to see if there are encoded entries.
118
+ let found = false;
119
+ const NodeMaskChildCharIndex = this.trie.NodeMaskChildCharIndex;
120
+ const offset = this.nodeIdx + 1;
121
+ const nodes = this.trie.nodes;
122
+ const len = this._count;
123
+ for (let i = 0; i < len && !found; ++i) {
124
+ const entry = nodes[i + offset];
125
+ const charIdx = entry & NodeMaskChildCharIndex;
126
+ found = (charIdx & SpecialCharIndexMask) === SpecialCharIndexMask;
127
+ }
128
+ this._chained = !!found;
129
+ return this._chained;
130
+ }
131
+ getNodesEntries() {
132
+ if (this._nodesEntries)
133
+ return this._nodesEntries;
134
+ if (!this.containsChainedIndexes()) {
135
+ const entries = Array(this._count);
136
+ const nodes = this.trie.nodes;
137
+ const offset = this.nodeIdx + 1;
138
+ const charIndex = this.trie.charIndex;
139
+ const NodeMaskChildCharIndex = this.trie.NodeMaskChildCharIndex;
140
+ const RefShift = this.trie.NodeChildRefShift;
141
+ for (let i = 0; i < this._count; ++i) {
142
+ const entry = nodes[offset + i];
143
+ const charIdx = entry & NodeMaskChildCharIndex;
144
+ entries[i] = [charIndex[charIdx], entry >>> RefShift];
145
+ }
146
+ this._nodesEntries = entries;
147
+ return entries;
148
+ }
149
+ this._nodesEntries = this.walkChainedIndexes();
150
+ return this._nodesEntries;
151
+ }
152
+ walkChainedIndexes() {
153
+ const NodeMaskChildCharIndex = this.trie.NodeMaskChildCharIndex;
154
+ const NodeChildRefShift = this.trie.NodeChildRefShift;
155
+ const NodeMaskNumChildren = this.trie.NodeMaskNumChildren;
156
+ const nodes = this.trie.nodes;
157
+ const acc = NumberSequenceByteDecoderAccumulator.create();
158
+ const stack = [{ nodeIdx: this.nodeIdx + 1, lastIdx: this.nodeIdx + this._count, acc }];
159
+ let depth = 0;
160
+ const entries = Array(this._count);
161
+ let eIdx = 0;
162
+ const charIndex = this.trie.charIndex;
163
+ while (depth >= 0) {
164
+ const s = stack[depth];
165
+ const { nodeIdx, lastIdx } = s;
166
+ if (nodeIdx > lastIdx) {
167
+ --depth;
168
+ continue;
169
+ }
170
+ ++s.nodeIdx;
171
+ const entry = nodes[nodeIdx];
172
+ const charIdx = entry & NodeMaskChildCharIndex;
173
+ const acc = s.acc.clone();
174
+ const letterIdx = acc.decode(charIdx);
175
+ if (letterIdx !== undefined) {
176
+ const char = charIndex[letterIdx];
177
+ const nodeIdx = entry >>> NodeChildRefShift;
178
+ entries[eIdx++] = [char, nodeIdx];
179
+ continue;
180
+ }
181
+ const idx = entry >>> NodeChildRefShift;
182
+ const lIdx = idx + (nodes[idx] & NodeMaskNumChildren);
183
+ const ss = stack[++depth];
184
+ if (ss) {
185
+ ss.nodeIdx = idx + 1;
186
+ ss.lastIdx = lIdx;
187
+ ss.acc = acc;
188
+ }
189
+ else {
190
+ stack[depth] = { nodeIdx: idx + 1, lastIdx: lIdx, acc };
191
+ }
192
+ }
193
+ return entries;
194
+ }
195
+ get size() {
196
+ if (this._size === undefined) {
197
+ if (!this.containsChainedIndexes()) {
198
+ this._size = this._count;
199
+ return this._size;
200
+ }
201
+ this._size = this.getNodesEntries().length;
202
+ }
203
+ return this._size;
204
+ }
109
205
  }
110
206
  export class TrieBlobIRoot extends TrieBlobINode {
111
207
  info;
@@ -2,7 +2,7 @@ import type { ITrieNodeRoot } from '../ITrieNode/ITrieNode.js';
2
2
  import type { PartialTrieInfo } from '../ITrieNode/TrieInfo.js';
3
3
  import type { TrieData } from '../TrieData.js';
4
4
  import { TrieBlob } from './TrieBlob.js';
5
- export declare function createTrieBlob(words: string[], options?: PartialTrieInfo): TrieBlob;
5
+ export declare function createTrieBlob(words: readonly string[], options?: PartialTrieInfo): TrieBlob;
6
6
  export declare function createTrieBlobFromITrieNodeRoot(root: ITrieNodeRoot): TrieBlob;
7
7
  export declare function createTrieBlobFromTrieData(trie: TrieData): TrieBlob;
8
8
  //# sourceMappingURL=createTrieBlob.d.ts.map
@@ -2,6 +2,8 @@ import type { ITrieNode, ITrieNodeRoot } from './ITrieNode/ITrieNode.js';
2
2
  import type { TrieInfo } from './ITrieNode/TrieInfo.js';
3
3
  export interface TrieData {
4
4
  info: Readonly<TrieInfo>;
5
+ /** Method used to split words into individual characters. */
6
+ wordToCharacters(word: string): readonly string[];
5
7
  words(): Iterable<string>;
6
8
  getRoot(): ITrieNodeRoot;
7
9
  getNode(prefix: string): ITrieNode | undefined;
@@ -5,6 +5,7 @@ import { TrieNodeTrie } from './TrieNodeTrie.js';
5
5
  export declare class TrieNodeBuilder implements TrieBuilder<TrieNodeTrie> {
6
6
  private _cursor;
7
7
  root: TrieRoot;
8
+ wordToCharacters: (word: string) => string[];
8
9
  setOptions(options: Readonly<PartialTrieOptions>): Readonly<TrieOptions>;
9
10
  build(): TrieNodeTrie;
10
11
  getCursor(): BuilderCursor;
@@ -7,6 +7,7 @@ const EOW = Object.freeze({ f: 1, k: true });
7
7
  export class TrieNodeBuilder {
8
8
  _cursor;
9
9
  root = { ...defaultTrieInfo, c: Object.create(null) };
10
+ wordToCharacters = (word) => word.split('');
10
11
  setOptions(options) {
11
12
  const opts = mergeOptionalWithDefaults(options, this.root);
12
13
  Object.assign(this.root, opts);
@@ -8,6 +8,7 @@ export declare class TrieNodeTrie implements TrieData {
8
8
  readonly info: TrieOptions;
9
9
  private _size;
10
10
  constructor(root: TrieRoot);
11
+ wordToCharacters: (word: string) => string[];
11
12
  get iTrieRoot(): ITrieNodeRoot;
12
13
  getRoot(): ITrieNodeRoot;
13
14
  getNode(prefix: string): ITrieNode | undefined;
@@ -13,6 +13,7 @@ export class TrieNodeTrie {
13
13
  this.root = root;
14
14
  this.info = mergeOptionalWithDefaults(root);
15
15
  }
16
+ wordToCharacters = (word) => word.split('');
16
17
  get iTrieRoot() {
17
18
  return this._iTrieRoot || (this._iTrieRoot = trieRootToITrieRoot(this.root));
18
19
  }
@@ -50,4 +50,6 @@ export declare function stripAccents(characters: string): string;
50
50
  * @returns - only the accents.
51
51
  */
52
52
  export declare function stripNonAccents(characters: string): string;
53
+ export declare function isValidUtf16Character(char: string): boolean;
54
+ export declare function assertValidUtf16Character(char: string): void;
53
55
  //# sourceMappingURL=text.d.ts.map
@@ -99,4 +99,31 @@ export function stripAccents(characters) {
99
99
  export function stripNonAccents(characters) {
100
100
  return characters.normalize('NFD').replace(/[^\p{M}]/gu, '');
101
101
  }
102
+ export function isValidUtf16Character(char) {
103
+ const len = char.length;
104
+ const code = char.charCodeAt(0) & 0xfc00;
105
+ const valid = (len === 1 && (code & 0xf800) !== 0xd800) ||
106
+ (len === 2 && (code & 0xfc00) === 0xd800 && (char.charCodeAt(1) & 0xfc00) === 0xdc00);
107
+ return valid;
108
+ }
109
+ export function assertValidUtf16Character(char) {
110
+ if (!isValidUtf16Character(char)) {
111
+ const len = char.length;
112
+ const codes = char
113
+ .slice(0, 2)
114
+ .split('')
115
+ .map((c) => '0x' + ('0000' + c.charCodeAt(0).toString(16)).slice(-4));
116
+ let message;
117
+ if (len == 1) {
118
+ message = `Invalid utf16 character, lone surrogate: ${codes[0]}`;
119
+ }
120
+ else if (len == 2) {
121
+ message = `Invalid utf16 character, not a valid surrogate pair: [${codes.join(', ')}]`;
122
+ }
123
+ else {
124
+ message = `Invalid utf16 character, must be a single character, found: ${len}`;
125
+ }
126
+ throw new Error(message);
127
+ }
128
+ }
102
129
  //# sourceMappingURL=text.js.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cspell-trie-lib",
3
- "version": "8.3.2",
3
+ "version": "8.4.0",
4
4
  "description": "Trie Data Structure to support cspell.",
5
5
  "type": "module",
6
6
  "sideEffects": false,
@@ -47,18 +47,18 @@
47
47
  },
48
48
  "homepage": "https://github.com/streetsidesoftware/cspell#readme",
49
49
  "dependencies": {
50
- "@cspell/cspell-pipe": "8.3.2",
51
- "@cspell/cspell-types": "8.3.2",
50
+ "@cspell/cspell-pipe": "8.4.0",
51
+ "@cspell/cspell-types": "8.4.0",
52
52
  "gensequence": "^6.0.0"
53
53
  },
54
54
  "engines": {
55
55
  "node": ">=18"
56
56
  },
57
57
  "devDependencies": {
58
- "@cspell/dict-en_us": "^4.3.13",
58
+ "@cspell/dict-en_us": "^4.3.16",
59
59
  "@cspell/dict-es-es": "^2.3.1",
60
60
  "@cspell/dict-nl-nl": "^2.3.0",
61
61
  "import-meta-resolve": "^4.0.0"
62
62
  },
63
- "gitHead": "98f622b2b12529f2d1ccf0f3a57991e4c08b3e3a"
63
+ "gitHead": "f9ad457ca2102c6642c377417a95a4415f5ec3d8"
64
64
  }