cspell-trie-lib 8.6.1 → 8.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/lib/Builder/TrieBuilder.d.ts +10 -0
- package/dist/lib/Builder/cursor-util.d.ts +1 -0
- package/dist/lib/Builder/cursor-util.js +19 -3
- package/dist/lib/ITrieNode/find.js +11 -12
- package/dist/lib/ITrieNode/walker/hintedWalker.js +3 -3
- package/dist/lib/ITrieNode/walker/walker.js +1 -1
- package/dist/lib/SimpleDictionaryParser.js +14 -10
- package/dist/lib/TrieBlob/FastTrieBlob.d.ts +26 -4
- package/dist/lib/TrieBlob/FastTrieBlob.js +99 -17
- package/dist/lib/TrieBlob/FastTrieBlobBuilder.d.ts +4 -1
- package/dist/lib/TrieBlob/FastTrieBlobBuilder.js +83 -12
- package/dist/lib/TrieBlob/FastTrieBlobIRoot.d.ts +17 -7
- package/dist/lib/TrieBlob/FastTrieBlobIRoot.js +129 -39
- package/dist/lib/TrieBlob/FastTrieBlobInternals.d.ts +3 -2
- package/dist/lib/TrieBlob/FastTrieBlobInternals.js +3 -0
- package/dist/lib/TrieBlob/NumberSequenceByteDecoderAccumulator.d.ts +34 -0
- package/dist/lib/TrieBlob/NumberSequenceByteDecoderAccumulator.js +129 -0
- package/dist/lib/TrieBlob/TrieBlob.d.ts +57 -4
- package/dist/lib/TrieBlob/TrieBlob.js +124 -29
- package/dist/lib/TrieBlob/TrieBlobIRoot.d.ts +19 -7
- package/dist/lib/TrieBlob/TrieBlobIRoot.js +129 -33
- package/dist/lib/TrieBlob/createTrieBlob.d.ts +1 -1
- package/dist/lib/TrieBuilder.js +18 -16
- package/dist/lib/TrieData.d.ts +2 -0
- package/dist/lib/TrieNode/TrieNodeBuilder.d.ts +1 -0
- package/dist/lib/TrieNode/TrieNodeBuilder.js +1 -0
- package/dist/lib/TrieNode/TrieNodeTrie.d.ts +1 -0
- package/dist/lib/TrieNode/TrieNodeTrie.js +1 -0
- package/dist/lib/TrieNode/find.js +9 -10
- package/dist/lib/convertToTrieRefNodes.js +2 -2
- package/dist/lib/distance/distanceAStarWeighted.js +1 -1
- package/dist/lib/distance/formatResultEx.js +1 -1
- package/dist/lib/distance/levenshtein.js +2 -5
- package/dist/lib/distance/weightedMaps.js +1 -1
- package/dist/lib/io/decode.js +1 -1
- package/dist/lib/io/importExport.js +1 -1
- package/dist/lib/io/importExportV1.js +11 -8
- package/dist/lib/io/importExportV2.js +12 -7
- package/dist/lib/io/importExportV3.js +16 -7
- package/dist/lib/io/importExportV4.js +29 -14
- package/dist/lib/io/importV3.js +2 -2
- package/dist/lib/mappers/mapDictionaryInfoToWeightMap.js +1 -1
- package/dist/lib/mappers/mapHunspellInformation.js +1 -1
- package/dist/lib/suggestions/orthography.js +2 -2
- package/dist/lib/suggestions/suggest.js +5 -3
- package/dist/lib/suggestions/suggestAStar.js +6 -6
- package/dist/lib/utils/PairingHeap.d.ts +1 -1
- package/dist/lib/utils/PairingHeap.js +1 -1
- package/dist/lib/utils/normalizeWord.js +2 -2
- package/dist/lib/utils/text.d.ts +2 -0
- package/dist/lib/utils/text.js +39 -7
- package/dist/lib/utils/timer.js +1 -1
- package/dist/lib/utils/util.js +1 -1
- package/dist/lib/walker/hintedWalker.js +3 -3
- package/dist/lib/walker/walker.js +1 -1
- package/package.json +10 -9
|
@@ -2,6 +2,16 @@ import type { PartialTrieOptions, TrieOptions } from '../trie.js';
|
|
|
2
2
|
import type { TrieData } from '../TrieData.js';
|
|
3
3
|
import type { BuilderCursor } from './BuilderCursor.js';
|
|
4
4
|
export interface TrieBuilder<T extends TrieData> {
|
|
5
|
+
/**
|
|
6
|
+
* Use this method to convert a word into an array of characters.
|
|
7
|
+
* Since `[...word]` is not equal to `word.split('')` or `word[i]` in some cases,
|
|
8
|
+
* this method is used to ensure that the characters are split correctly.
|
|
9
|
+
* @see [String.codePointAt](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/codePointAt)
|
|
10
|
+
* @see [String.charCodeAt](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/charCodeAt)
|
|
11
|
+
* @param word - The word to convert into an array of characters.
|
|
12
|
+
* @returns An array of characters, one for each character in the word.
|
|
13
|
+
*/
|
|
14
|
+
wordToCharacters(word: string): string[];
|
|
5
15
|
getCursor(): BuilderCursor;
|
|
6
16
|
build(): T;
|
|
7
17
|
setOptions(options: Readonly<PartialTrieOptions>): Readonly<TrieOptions>;
|
|
@@ -1,3 +1,4 @@
|
|
|
1
1
|
import type { BuilderCursor } from './BuilderCursor.js';
|
|
2
2
|
export declare function insertWordsAtCursor(cursor: BuilderCursor, words: Iterable<string>): void;
|
|
3
|
+
export declare function commonStringPrefixLen(a: string, b: string): number;
|
|
3
4
|
//# sourceMappingURL=cursor-util.d.ts.map
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
export function insertWordsAtCursor(cursor, words) {
|
|
2
2
|
let prevWord = '';
|
|
3
3
|
for (const word of words) {
|
|
4
|
-
const pLen =
|
|
4
|
+
const pLen = commonStrPrefix(prevWord, word);
|
|
5
5
|
const stepBack = prevWord.length - pLen;
|
|
6
6
|
cursor.backStep(stepBack);
|
|
7
|
-
|
|
7
|
+
const wLen = word.length;
|
|
8
|
+
for (let i = pLen; i < wLen; ++i) {
|
|
8
9
|
cursor.insertChar(word[i]);
|
|
9
10
|
}
|
|
10
11
|
cursor.markEOW();
|
|
@@ -12,7 +13,22 @@ export function insertWordsAtCursor(cursor, words) {
|
|
|
12
13
|
}
|
|
13
14
|
cursor.backStep(prevWord.length);
|
|
14
15
|
}
|
|
15
|
-
function
|
|
16
|
+
export function commonStringPrefixLen(a, b) {
|
|
17
|
+
let i = 0;
|
|
18
|
+
for (i = 0; i < a.length && a[i] === b[i]; ++i) {
|
|
19
|
+
/* empty */
|
|
20
|
+
}
|
|
21
|
+
if (i) {
|
|
22
|
+
// detect second half of a surrogate pair and backup.
|
|
23
|
+
// eslint-disable-next-line unicorn/prefer-code-point
|
|
24
|
+
const c = a.charCodeAt(i) & 0xffff;
|
|
25
|
+
if (c >= 0xdc00 && c <= 0xdfff) {
|
|
26
|
+
--i;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return i;
|
|
30
|
+
}
|
|
31
|
+
function commonStrPrefix(a, b) {
|
|
16
32
|
let i = 0;
|
|
17
33
|
for (i = 0; i < a.length && a[i] === b[i]; ++i) {
|
|
18
34
|
/* empty */
|
|
@@ -77,12 +77,15 @@ function _findWordNode(root, word, options) {
|
|
|
77
77
|
return result;
|
|
78
78
|
}
|
|
79
79
|
switch (compoundMode) {
|
|
80
|
-
case 'none':
|
|
80
|
+
case 'none': {
|
|
81
81
|
return options.matchCase ? __findExact() : __findCompound();
|
|
82
|
-
|
|
82
|
+
}
|
|
83
|
+
case 'compound': {
|
|
83
84
|
return __findCompound();
|
|
84
|
-
|
|
85
|
+
}
|
|
86
|
+
case 'legacy': {
|
|
85
87
|
return findLegacyCompound(root, word, options);
|
|
88
|
+
}
|
|
86
89
|
}
|
|
87
90
|
}
|
|
88
91
|
export function findLegacyCompound(root, word, options) {
|
|
@@ -120,7 +123,6 @@ export function findCompoundNode(root, word, compoundCharacter, ignoreCasePrefix
|
|
|
120
123
|
let caseMatched = true;
|
|
121
124
|
let i = 0;
|
|
122
125
|
let node;
|
|
123
|
-
// eslint-disable-next-line no-constant-condition
|
|
124
126
|
while (true) {
|
|
125
127
|
const s = stack[i];
|
|
126
128
|
const h = w[i++];
|
|
@@ -149,11 +151,9 @@ export function findCompoundNode(root, word, compoundCharacter, ignoreCasePrefix
|
|
|
149
151
|
if (!r.cr) {
|
|
150
152
|
break;
|
|
151
153
|
}
|
|
152
|
-
if (!i && !r.caseMatched) {
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
break;
|
|
156
|
-
}
|
|
154
|
+
if (!i && !r.caseMatched && w !== w.toLowerCase()) {
|
|
155
|
+
// It is not going to be found.
|
|
156
|
+
break;
|
|
157
157
|
}
|
|
158
158
|
}
|
|
159
159
|
else {
|
|
@@ -186,10 +186,10 @@ export function isEndOfWordNode(n) {
|
|
|
186
186
|
return !!n?.eow;
|
|
187
187
|
}
|
|
188
188
|
function walk(root, word) {
|
|
189
|
-
const w = word;
|
|
189
|
+
const w = [...word];
|
|
190
190
|
let n = root;
|
|
191
191
|
let i = 0;
|
|
192
|
-
while (n && i <
|
|
192
|
+
while (n && i < w.length) {
|
|
193
193
|
const h = w[i++];
|
|
194
194
|
n = n.get(h);
|
|
195
195
|
}
|
|
@@ -208,7 +208,6 @@ function findLegacyCompoundNode(roots, word, minCompoundLength) {
|
|
|
208
208
|
let caseMatched = true;
|
|
209
209
|
let i = 0;
|
|
210
210
|
let node;
|
|
211
|
-
// eslint-disable-next-line no-constant-condition
|
|
212
211
|
while (true) {
|
|
213
212
|
const s = stack[i];
|
|
214
213
|
const h = w[i++];
|
|
@@ -20,11 +20,11 @@ function* hintedWalkerNext(root, ignoreCase, hint, compoundingMethod, emitWordSe
|
|
|
20
20
|
const roots = rawRoots.map(filterRoot);
|
|
21
21
|
const compoundRoots = rawRoots.map((r) => r.get(compoundCharacter)).filter(isDefined);
|
|
22
22
|
const setOfCompoundRoots = new Set(compoundRoots);
|
|
23
|
-
const rootsForCompoundMethods = roots
|
|
23
|
+
const rootsForCompoundMethods = [...roots, ...compoundRoots];
|
|
24
24
|
const compoundMethodRoots = {
|
|
25
25
|
[CompoundWordsMethod.NONE]: [],
|
|
26
|
-
[CompoundWordsMethod.JOIN_WORDS]:
|
|
27
|
-
[CompoundWordsMethod.SEPARATE_WORDS]:
|
|
26
|
+
[CompoundWordsMethod.JOIN_WORDS]: rootsForCompoundMethods.map((r) => [JOIN_SEPARATOR, r]),
|
|
27
|
+
[CompoundWordsMethod.SEPARATE_WORDS]: rootsForCompoundMethods.map((r) => [WORD_SEPARATOR, r]),
|
|
28
28
|
};
|
|
29
29
|
function* children(n, hintOffset) {
|
|
30
30
|
if (n.hasChildren()) {
|
|
@@ -14,7 +14,7 @@ function* compoundWalker(root, compoundingMethod) {
|
|
|
14
14
|
function children(n) {
|
|
15
15
|
if (n.hasChildren()) {
|
|
16
16
|
const c = n.keys().map((k, i) => [k, n.child(i)]);
|
|
17
|
-
return n.eow && rc ? c
|
|
17
|
+
return n.eow && rc ? [...c, ...rc] : c;
|
|
18
18
|
}
|
|
19
19
|
if (n.eow) {
|
|
20
20
|
return roots[compoundingMethod];
|
|
@@ -52,18 +52,22 @@ export function createDictionaryLineParserMapper(options) {
|
|
|
52
52
|
.filter((a) => !!a);
|
|
53
53
|
for (const flag of flags) {
|
|
54
54
|
switch (flag) {
|
|
55
|
-
case 'split':
|
|
55
|
+
case 'split': {
|
|
56
56
|
split = true;
|
|
57
57
|
break;
|
|
58
|
-
|
|
58
|
+
}
|
|
59
|
+
case 'no-split': {
|
|
59
60
|
split = false;
|
|
60
61
|
break;
|
|
61
|
-
|
|
62
|
+
}
|
|
63
|
+
case 'no-generate-alternatives': {
|
|
62
64
|
stripCaseAndAccents = false;
|
|
63
65
|
break;
|
|
64
|
-
|
|
66
|
+
}
|
|
67
|
+
case 'generate-alternatives': {
|
|
65
68
|
stripCaseAndAccents = true;
|
|
66
69
|
break;
|
|
70
|
+
}
|
|
67
71
|
}
|
|
68
72
|
}
|
|
69
73
|
}
|
|
@@ -101,7 +105,7 @@ export function createDictionaryLineParserMapper(options) {
|
|
|
101
105
|
return w.startsWith(ignoreCase + ignoreCase) ? w.slice(1) : w;
|
|
102
106
|
}
|
|
103
107
|
function stripKeepCasePrefixAndQuotes(word) {
|
|
104
|
-
word = word.
|
|
108
|
+
word = word.replaceAll(/"(.*?)"/g, '$1');
|
|
105
109
|
return word[0] === keepCase ? word.slice(1) : word;
|
|
106
110
|
}
|
|
107
111
|
function _normalize(word) {
|
|
@@ -121,11 +125,11 @@ export function createDictionaryLineParserMapper(options) {
|
|
|
121
125
|
function* splitWords(lines) {
|
|
122
126
|
for (const line of lines) {
|
|
123
127
|
if (split) {
|
|
124
|
-
const lineEscaped = line.
|
|
125
|
-
? line.
|
|
128
|
+
const lineEscaped = line.includes('"')
|
|
129
|
+
? line.replaceAll(/".*?"/g, (quoted) => ' ' + quoted.replaceAll(/(\s)/g, '\\$1') + ' ')
|
|
126
130
|
: line;
|
|
127
131
|
const words = splitLine(lineEscaped, splitSeparator);
|
|
128
|
-
yield* words.map((escaped) => escaped.
|
|
132
|
+
yield* words.map((escaped) => escaped.replaceAll('\\', ''));
|
|
129
133
|
if (!splitKeepBoth)
|
|
130
134
|
continue;
|
|
131
135
|
}
|
|
@@ -189,10 +193,10 @@ function mergeOptions(base, ...partials) {
|
|
|
189
193
|
const RegExpToEncode = /\\([\s,;])/g;
|
|
190
194
|
const RegExpDecode = /<<(%[\da-f]{2})>>/gi;
|
|
191
195
|
function encodeLine(line) {
|
|
192
|
-
return line.
|
|
196
|
+
return line.replaceAll(RegExpToEncode, (_, v) => '<<' + encodeURIComponent(v) + '>>');
|
|
193
197
|
}
|
|
194
198
|
function decodeLine(line) {
|
|
195
|
-
return line.
|
|
199
|
+
return line.replaceAll(RegExpDecode, (_, v) => '\\' + decodeURIComponent(v));
|
|
196
200
|
}
|
|
197
201
|
function splitLine(line, regExp) {
|
|
198
202
|
return encodeLine(line)
|
|
@@ -6,21 +6,29 @@ import { FastTrieBlobInternals } from './FastTrieBlobInternals.js';
|
|
|
6
6
|
import { TrieBlob } from './TrieBlob.js';
|
|
7
7
|
export declare class FastTrieBlob implements TrieData {
|
|
8
8
|
private nodes;
|
|
9
|
-
private
|
|
9
|
+
private _charIndex;
|
|
10
10
|
readonly bitMasksInfo: FastTrieBlobBitMaskInfo;
|
|
11
|
-
private
|
|
11
|
+
private _charToIndexMap;
|
|
12
12
|
private _readonly;
|
|
13
13
|
private _forbidIdx;
|
|
14
14
|
private _iTrieRoot;
|
|
15
|
+
wordToCharacters: (word: string) => readonly string[];
|
|
15
16
|
readonly info: Readonly<TrieInfo>;
|
|
16
17
|
private constructor();
|
|
17
|
-
private
|
|
18
|
+
private _lookUpCharIndex;
|
|
19
|
+
private wordToNodeCharIndexSequence;
|
|
20
|
+
private letterToNodeCharIndexSequence;
|
|
18
21
|
has(word: string): boolean;
|
|
19
22
|
private _has;
|
|
20
23
|
words(): Iterable<string>;
|
|
21
24
|
toTrieBlob(): TrieBlob;
|
|
22
25
|
isReadonly(): boolean;
|
|
23
26
|
freeze(): this;
|
|
27
|
+
toJSON(): {
|
|
28
|
+
info: Readonly<TrieInfo>;
|
|
29
|
+
nodes: NodeElement[];
|
|
30
|
+
charIndex: readonly string[];
|
|
31
|
+
};
|
|
24
32
|
static create(data: FastTrieBlobInternals, options?: PartialTrieInfo): FastTrieBlob;
|
|
25
33
|
static toITrieNodeRoot(trie: FastTrieBlob): ITrieNodeRoot;
|
|
26
34
|
static NodeMaskEOW: number;
|
|
@@ -34,6 +42,20 @@ export declare class FastTrieBlob implements TrieData {
|
|
|
34
42
|
hasForbiddenWords(): boolean;
|
|
35
43
|
/** number of nodes */
|
|
36
44
|
get size(): number;
|
|
37
|
-
private
|
|
45
|
+
private _lookupCharIndexNode;
|
|
46
|
+
/** Search from nodeIdx for the node index representing the character. */
|
|
47
|
+
private _searchNodeForChar;
|
|
48
|
+
get charIndex(): readonly string[];
|
|
49
|
+
static fromTrieBlob(trie: TrieBlob): FastTrieBlob;
|
|
38
50
|
}
|
|
51
|
+
interface NodeElement {
|
|
52
|
+
id: number;
|
|
53
|
+
eow: boolean;
|
|
54
|
+
n: number;
|
|
55
|
+
c: {
|
|
56
|
+
c: number | string;
|
|
57
|
+
i: number;
|
|
58
|
+
}[];
|
|
59
|
+
}
|
|
60
|
+
export {};
|
|
39
61
|
//# sourceMappingURL=FastTrieBlob.d.ts.map
|
|
@@ -3,26 +3,35 @@ import { mergeOptionalWithDefaults } from '../utils/mergeOptionalWithDefaults.js
|
|
|
3
3
|
import { extractInfo } from './FastTrieBlobBitMaskInfo.js';
|
|
4
4
|
import { FastTrieBlobInternals } from './FastTrieBlobInternals.js';
|
|
5
5
|
import { FastTrieBlobIRoot } from './FastTrieBlobIRoot.js';
|
|
6
|
+
import { NumberSequenceByteDecoderAccumulator } from './NumberSequenceByteDecoderAccumulator.js';
|
|
6
7
|
import { TrieBlob } from './TrieBlob.js';
|
|
7
8
|
export class FastTrieBlob {
|
|
8
9
|
nodes;
|
|
9
|
-
|
|
10
|
+
_charIndex;
|
|
10
11
|
bitMasksInfo;
|
|
11
|
-
|
|
12
|
+
_charToIndexMap;
|
|
12
13
|
_readonly = false;
|
|
13
14
|
_forbidIdx;
|
|
14
15
|
_iTrieRoot;
|
|
16
|
+
wordToCharacters;
|
|
15
17
|
info;
|
|
16
|
-
constructor(nodes,
|
|
18
|
+
constructor(nodes, _charIndex, bitMasksInfo, options) {
|
|
17
19
|
this.nodes = nodes;
|
|
18
|
-
this.
|
|
20
|
+
this._charIndex = _charIndex;
|
|
19
21
|
this.bitMasksInfo = bitMasksInfo;
|
|
20
22
|
this.info = mergeOptionalWithDefaults(options);
|
|
21
|
-
this.
|
|
22
|
-
this.
|
|
23
|
+
this.wordToCharacters = (word) => [...word];
|
|
24
|
+
this._charToIndexMap = createCharToIndexMap(_charIndex);
|
|
25
|
+
this._forbidIdx = this._searchNodeForChar(0, this.info.forbiddenWordPrefix);
|
|
23
26
|
}
|
|
24
|
-
|
|
25
|
-
return this.
|
|
27
|
+
_lookUpCharIndex(char) {
|
|
28
|
+
return this._charToIndexMap[char] ?? -1;
|
|
29
|
+
}
|
|
30
|
+
wordToNodeCharIndexSequence(word) {
|
|
31
|
+
return TrieBlob.charactersToCharIndexSequence(this.wordToCharacters(word), (c) => this._lookUpCharIndex(c));
|
|
32
|
+
}
|
|
33
|
+
letterToNodeCharIndexSequence(letter) {
|
|
34
|
+
return TrieBlob.toCharIndexSequence(this._lookUpCharIndex(letter));
|
|
26
35
|
}
|
|
27
36
|
has(word) {
|
|
28
37
|
return this._has(0, word);
|
|
@@ -32,10 +41,11 @@ export class FastTrieBlob {
|
|
|
32
41
|
const NodeChildRefShift = this.bitMasksInfo.NodeChildRefShift;
|
|
33
42
|
const NodeMaskEOW = this.bitMasksInfo.NodeMaskEOW;
|
|
34
43
|
const nodes = this.nodes;
|
|
35
|
-
const
|
|
44
|
+
const charIndexes = this.wordToNodeCharIndexSequence(word);
|
|
45
|
+
const len = charIndexes.length;
|
|
36
46
|
let node = nodes[nodeIdx];
|
|
37
47
|
for (let p = 0; p < len; ++p, node = nodes[nodeIdx]) {
|
|
38
|
-
const letterIdx =
|
|
48
|
+
const letterIdx = charIndexes[p];
|
|
39
49
|
const count = node.length;
|
|
40
50
|
let i = count - 1;
|
|
41
51
|
for (; i > 0; --i) {
|
|
@@ -56,10 +66,11 @@ export class FastTrieBlob {
|
|
|
56
66
|
const NodeChildRefShift = this.bitMasksInfo.NodeChildRefShift;
|
|
57
67
|
const NodeMaskEOW = this.bitMasksInfo.NodeMaskEOW;
|
|
58
68
|
const nodes = this.nodes;
|
|
59
|
-
const
|
|
69
|
+
const accumulator = NumberSequenceByteDecoderAccumulator.create();
|
|
70
|
+
const stack = [{ nodeIdx: 0, pos: 0, word: '', accumulator }];
|
|
60
71
|
let depth = 0;
|
|
61
72
|
while (depth >= 0) {
|
|
62
|
-
const { nodeIdx, pos, word } = stack[depth];
|
|
73
|
+
const { nodeIdx, pos, word, accumulator } = stack[depth];
|
|
63
74
|
const node = nodes[nodeIdx];
|
|
64
75
|
if (!pos && node[0] & NodeMaskEOW) {
|
|
65
76
|
yield word;
|
|
@@ -71,12 +82,15 @@ export class FastTrieBlob {
|
|
|
71
82
|
const nextPos = ++stack[depth].pos;
|
|
72
83
|
const entry = node[nextPos];
|
|
73
84
|
const charIdx = entry & NodeMaskChildCharIndex;
|
|
74
|
-
const
|
|
85
|
+
const acc = accumulator.clone();
|
|
86
|
+
const letterIdx = acc.decode(charIdx);
|
|
87
|
+
const letter = (letterIdx && this._charIndex[letterIdx]) || '';
|
|
75
88
|
++depth;
|
|
76
89
|
stack[depth] = {
|
|
77
90
|
nodeIdx: entry >>> NodeChildRefShift,
|
|
78
91
|
pos: 0,
|
|
79
92
|
word: word + letter,
|
|
93
|
+
accumulator: acc,
|
|
80
94
|
};
|
|
81
95
|
}
|
|
82
96
|
}
|
|
@@ -111,7 +125,7 @@ export class FastTrieBlob {
|
|
|
111
125
|
binNodes[offset++] = (nodeToIndex[nodeRef] << refShift) | charIndex;
|
|
112
126
|
}
|
|
113
127
|
}
|
|
114
|
-
return new TrieBlob(binNodes, this.
|
|
128
|
+
return new TrieBlob(binNodes, this._charIndex, this.info);
|
|
115
129
|
}
|
|
116
130
|
isReadonly() {
|
|
117
131
|
return this._readonly;
|
|
@@ -120,11 +134,18 @@ export class FastTrieBlob {
|
|
|
120
134
|
this._readonly = true;
|
|
121
135
|
return this;
|
|
122
136
|
}
|
|
137
|
+
toJSON() {
|
|
138
|
+
return {
|
|
139
|
+
info: this.info,
|
|
140
|
+
nodes: nodesToJson(this.nodes),
|
|
141
|
+
charIndex: this._charIndex,
|
|
142
|
+
};
|
|
143
|
+
}
|
|
123
144
|
static create(data, options) {
|
|
124
145
|
return new FastTrieBlob(data.nodes, data.charIndex, extractInfo(data), options);
|
|
125
146
|
}
|
|
126
147
|
static toITrieNodeRoot(trie) {
|
|
127
|
-
return new FastTrieBlobIRoot(new FastTrieBlobInternals(trie.nodes, trie.
|
|
148
|
+
return new FastTrieBlobIRoot(new FastTrieBlobInternals(trie.nodes, trie._charIndex, trie._charToIndexMap, trie.bitMasksInfo), 0, trie.info);
|
|
128
149
|
}
|
|
129
150
|
static NodeMaskEOW = TrieBlob.NodeMaskEOW;
|
|
130
151
|
static NodeChildRefShift = TrieBlob.NodeChildRefShift;
|
|
@@ -153,12 +174,12 @@ export class FastTrieBlob {
|
|
|
153
174
|
get size() {
|
|
154
175
|
return this.nodes.length;
|
|
155
176
|
}
|
|
156
|
-
|
|
177
|
+
_lookupCharIndexNode(nodeIdx, charIndex) {
|
|
157
178
|
const NodeMaskChildCharIndex = this.bitMasksInfo.NodeMaskChildCharIndex;
|
|
158
179
|
const NodeChildRefShift = this.bitMasksInfo.NodeChildRefShift;
|
|
159
180
|
const nodes = this.nodes;
|
|
160
181
|
const node = nodes[nodeIdx];
|
|
161
|
-
const letterIdx =
|
|
182
|
+
const letterIdx = charIndex;
|
|
162
183
|
const count = node.length;
|
|
163
184
|
let i = count - 1;
|
|
164
185
|
for (; i > 0; --i) {
|
|
@@ -168,6 +189,55 @@ export class FastTrieBlob {
|
|
|
168
189
|
}
|
|
169
190
|
return 0;
|
|
170
191
|
}
|
|
192
|
+
/** Search from nodeIdx for the node index representing the character. */
|
|
193
|
+
_searchNodeForChar(nodeIdx, char) {
|
|
194
|
+
const charIndexes = this.letterToNodeCharIndexSequence(char);
|
|
195
|
+
let idx = nodeIdx;
|
|
196
|
+
for (let i = 0; i < charIndexes.length; ++i) {
|
|
197
|
+
idx = this._lookupCharIndexNode(idx, charIndexes[i]);
|
|
198
|
+
if (!idx)
|
|
199
|
+
return 0;
|
|
200
|
+
}
|
|
201
|
+
return idx;
|
|
202
|
+
}
|
|
203
|
+
get charIndex() {
|
|
204
|
+
return [...this._charIndex];
|
|
205
|
+
}
|
|
206
|
+
static fromTrieBlob(trie) {
|
|
207
|
+
const bitMasksInfo = {
|
|
208
|
+
NodeMaskEOW: TrieBlob.NodeMaskEOW,
|
|
209
|
+
NodeMaskChildCharIndex: TrieBlob.NodeMaskChildCharIndex,
|
|
210
|
+
NodeChildRefShift: TrieBlob.NodeChildRefShift,
|
|
211
|
+
};
|
|
212
|
+
const trieNodesBin = TrieBlob.nodesView(trie);
|
|
213
|
+
const nodeOffsets = [];
|
|
214
|
+
for (let offset = 0; offset < trieNodesBin.length; offset += (trieNodesBin[offset] & TrieBlob.NodeMaskNumChildren) + 1) {
|
|
215
|
+
nodeOffsets.push(offset);
|
|
216
|
+
}
|
|
217
|
+
const offsetToNodeIndex = new Map(nodeOffsets.map((offset, i) => [offset, i]));
|
|
218
|
+
const nodes = Array.from({ length: nodeOffsets.length });
|
|
219
|
+
for (let i = 0; i < nodes.length; ++i) {
|
|
220
|
+
const offset = nodeOffsets[i];
|
|
221
|
+
const n = trieNodesBin[offset];
|
|
222
|
+
const eow = n & TrieBlob.NodeMaskEOW;
|
|
223
|
+
const count = n & TrieBlob.NodeMaskNumChildren;
|
|
224
|
+
// Preallocate the array to the correct size.
|
|
225
|
+
const node = Array.from({ length: count + 1 });
|
|
226
|
+
node[0] = eow;
|
|
227
|
+
nodes[i] = node;
|
|
228
|
+
for (let j = 1; j <= count; ++j) {
|
|
229
|
+
const n = trieNodesBin[offset + j];
|
|
230
|
+
const charIndex = n & TrieBlob.NodeMaskChildCharIndex;
|
|
231
|
+
const nodeIndex = n >>> TrieBlob.NodeChildRefShift;
|
|
232
|
+
const idx = offsetToNodeIndex.get(nodeIndex);
|
|
233
|
+
if (idx === undefined) {
|
|
234
|
+
throw new Error(`Invalid node index ${nodeIndex}`);
|
|
235
|
+
}
|
|
236
|
+
node[j] = (idx << TrieBlob.NodeChildRefShift) | charIndex;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
return new FastTrieBlob(nodes, trie.charIndex, bitMasksInfo, trie.info);
|
|
240
|
+
}
|
|
171
241
|
}
|
|
172
242
|
function createCharToIndexMap(charIndex) {
|
|
173
243
|
const map = Object.create(null);
|
|
@@ -178,4 +248,16 @@ function createCharToIndexMap(charIndex) {
|
|
|
178
248
|
}
|
|
179
249
|
return map;
|
|
180
250
|
}
|
|
251
|
+
function nodesToJson(nodes) {
|
|
252
|
+
function nodeElement(node, index) {
|
|
253
|
+
const eow = !!(node[0] & TrieBlob.NodeMaskEOW);
|
|
254
|
+
const children = node.slice(1).map((n) => ({
|
|
255
|
+
c: ('00' + (n & TrieBlob.NodeMaskChildCharIndex).toString(16)).slice(-2),
|
|
256
|
+
i: n >>> TrieBlob.NodeChildRefShift,
|
|
257
|
+
}));
|
|
258
|
+
return { id: index, eow, n: node.length, c: children };
|
|
259
|
+
}
|
|
260
|
+
const elements = nodes.map((n, i) => nodeElement(n, i));
|
|
261
|
+
return elements;
|
|
262
|
+
}
|
|
181
263
|
//# sourceMappingURL=FastTrieBlob.js.map
|
|
@@ -11,11 +11,14 @@ export declare class FastTrieBlobBuilder implements TrieBuilder<FastTrieBlob> {
|
|
|
11
11
|
private IdxEOW;
|
|
12
12
|
private _cursor;
|
|
13
13
|
private _options;
|
|
14
|
+
wordToCharacters: (word: string) => string[];
|
|
14
15
|
readonly bitMasksInfo: FastTrieBlobBitMaskInfo;
|
|
15
16
|
constructor(options?: PartialTrieInfo, bitMasksInfo?: FastTrieBlobBitMaskInfo);
|
|
16
17
|
setOptions(options: PartialTrieInfo): Readonly<TrieInfo>;
|
|
17
18
|
get options(): Readonly<TrieInfo>;
|
|
18
19
|
private getCharIndex;
|
|
20
|
+
private wordToNodeCharIndexSequence;
|
|
21
|
+
private letterToNodeCharIndexSequence;
|
|
19
22
|
insert(word: string | Iterable<string> | string[]): this;
|
|
20
23
|
getCursor(): BuilderCursor;
|
|
21
24
|
private createCursor;
|
|
@@ -24,7 +27,7 @@ export declare class FastTrieBlobBuilder implements TrieBuilder<FastTrieBlob> {
|
|
|
24
27
|
isReadonly(): boolean;
|
|
25
28
|
freeze(): this;
|
|
26
29
|
build(): FastTrieBlob;
|
|
27
|
-
static fromWordList(words: string[] | Iterable<string>, options?: PartialTrieInfo): FastTrieBlob;
|
|
30
|
+
static fromWordList(words: readonly string[] | Iterable<string>, options?: PartialTrieInfo): FastTrieBlob;
|
|
28
31
|
static fromTrieRoot(root: TrieRoot): FastTrieBlob;
|
|
29
32
|
static NodeMaskEOW: number;
|
|
30
33
|
static NodeChildRefShift: number;
|