cspell-trie-lib 8.11.0 → 8.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/lib/ITrie.d.ts +10 -6
- package/dist/lib/ITrie.js +18 -30
- package/dist/lib/ITrieNode/FindOptions.d.ts +2 -4
- package/dist/lib/ITrieNode/ITrieNode.d.ts +15 -9
- package/dist/lib/ITrieNode/TrieInfo.d.ts +5 -0
- package/dist/lib/ITrieNode/find.d.ts +3 -3
- package/dist/lib/ITrieNode/find.js +44 -37
- package/dist/lib/ITrieNode/trie-util.js +4 -5
- package/dist/lib/ITrieNode/walker/hintedWalker.js +9 -15
- package/dist/lib/ITrieNode/walker/walker.js +13 -9
- package/dist/lib/TrieBlob/CharIndex.d.ts +2 -2
- package/dist/lib/TrieBlob/CharIndex.js +11 -11
- package/dist/lib/TrieBlob/FastTrieBlob.d.ts +11 -14
- package/dist/lib/TrieBlob/FastTrieBlob.js +71 -85
- package/dist/lib/TrieBlob/FastTrieBlobBuilder.js +3 -3
- package/dist/lib/TrieBlob/FastTrieBlobIRoot.d.ts +18 -8
- package/dist/lib/TrieBlob/FastTrieBlobIRoot.js +54 -6
- package/dist/lib/TrieBlob/FastTrieBlobInternals.d.ts +27 -5
- package/dist/lib/TrieBlob/FastTrieBlobInternals.js +26 -4
- package/dist/lib/TrieBlob/TrieBlob.d.ts +3 -14
- package/dist/lib/TrieBlob/TrieBlob.js +110 -131
- package/dist/lib/TrieBlob/TrieBlobIRoot.d.ts +30 -7
- package/dist/lib/TrieBlob/TrieBlobIRoot.js +62 -13
- package/dist/lib/TrieData.d.ts +7 -5
- package/dist/lib/TrieNode/TrieNodeTrie.d.ts +3 -1
- package/dist/lib/TrieNode/TrieNodeTrie.js +6 -4
- package/dist/lib/TrieNode/find.js +3 -2
- package/dist/lib/TrieNode/trie.js +35 -0
- package/dist/lib/buildITrie.js +1 -1
- package/dist/lib/constants.js +3 -0
- package/dist/lib/suggestions/suggestAStar.js +27 -32
- package/dist/lib/walker/walker.js +3 -4
- package/package.json +4 -4
package/dist/lib/ITrie.d.ts
CHANGED
|
@@ -79,18 +79,23 @@ export interface ITrie {
|
|
|
79
79
|
* On the returned Iterator, calling .next(goDeeper: boolean), allows for controlling the depth.
|
|
80
80
|
*/
|
|
81
81
|
iterate(): WalkerIterator;
|
|
82
|
-
weightMap: WeightMap | undefined;
|
|
83
|
-
|
|
82
|
+
readonly weightMap: WeightMap | undefined;
|
|
83
|
+
readonly isCaseAware: boolean;
|
|
84
|
+
readonly hasForbiddenWords: boolean;
|
|
85
|
+
readonly hasCompoundWords: boolean;
|
|
86
|
+
readonly hasNonStrictWords: boolean;
|
|
84
87
|
}
|
|
85
88
|
export declare class ITrieImpl implements ITrie {
|
|
89
|
+
#private;
|
|
86
90
|
readonly data: TrieData;
|
|
87
91
|
private numNodes?;
|
|
88
92
|
private _info;
|
|
89
|
-
private _findOptionsDefaults;
|
|
90
|
-
private hasForbidden;
|
|
91
93
|
private root;
|
|
92
94
|
private count?;
|
|
93
95
|
weightMap: WeightMap | undefined;
|
|
96
|
+
readonly hasForbiddenWords: boolean;
|
|
97
|
+
readonly hasCompoundWords: boolean;
|
|
98
|
+
readonly hasNonStrictWords: boolean;
|
|
94
99
|
constructor(data: TrieData, numNodes?: number | undefined);
|
|
95
100
|
/**
|
|
96
101
|
* Number of words in the Trie, the first call to this method might be expensive.
|
|
@@ -154,11 +159,10 @@ export declare class ITrieImpl implements ITrie {
|
|
|
154
159
|
iterate(): WalkerIterator;
|
|
155
160
|
static create(words: Iterable<string> | IterableIterator<string>, info?: PartialTrieInfo): ITrie;
|
|
156
161
|
private createFindOptions;
|
|
157
|
-
private lastCreateFindOptionsMatchCaseMap;
|
|
158
|
-
private createFindOptionsMatchCase;
|
|
159
162
|
}
|
|
160
163
|
export interface FindWordOptions {
|
|
161
164
|
caseSensitive?: boolean;
|
|
162
165
|
useLegacyWordCompounds?: boolean | number;
|
|
166
|
+
checkForbidden?: boolean;
|
|
163
167
|
}
|
|
164
168
|
//# sourceMappingURL=ITrie.d.ts.map
|
package/dist/lib/ITrie.js
CHANGED
|
@@ -13,22 +13,21 @@ export class ITrieImpl {
|
|
|
13
13
|
data;
|
|
14
14
|
numNodes;
|
|
15
15
|
_info;
|
|
16
|
-
_findOptionsDefaults;
|
|
17
|
-
hasForbidden;
|
|
18
16
|
root;
|
|
19
17
|
count;
|
|
20
18
|
weightMap;
|
|
19
|
+
#optionsCompound = this.createFindOptions({ compoundMode: 'compound' });
|
|
20
|
+
hasForbiddenWords;
|
|
21
|
+
hasCompoundWords;
|
|
22
|
+
hasNonStrictWords;
|
|
21
23
|
constructor(data, numNodes) {
|
|
22
24
|
this.data = data;
|
|
23
25
|
this.numNodes = numNodes;
|
|
24
26
|
this.root = data.getRoot();
|
|
25
27
|
this._info = mergeOptionalWithDefaults(data.info);
|
|
26
|
-
this.
|
|
27
|
-
this.
|
|
28
|
-
|
|
29
|
-
compoundFix: this._info.compoundCharacter,
|
|
30
|
-
forbidPrefix: this._info.forbiddenWordPrefix,
|
|
31
|
-
};
|
|
28
|
+
this.hasForbiddenWords = data.hasForbiddenWords;
|
|
29
|
+
this.hasCompoundWords = data.hasCompoundWords;
|
|
30
|
+
this.hasNonStrictWords = data.hasNonStrictWords;
|
|
32
31
|
}
|
|
33
32
|
/**
|
|
34
33
|
* Number of words in the Trie, the first call to this method might be expensive.
|
|
@@ -54,8 +53,7 @@ export class ITrieImpl {
|
|
|
54
53
|
* @param text - text to find in the Trie
|
|
55
54
|
*/
|
|
56
55
|
find(text) {
|
|
57
|
-
|
|
58
|
-
return findWordNode(this.data.getRoot(), text, options).node;
|
|
56
|
+
return findWordNode(this.data.getRoot(), text, this.#optionsCompound).node;
|
|
59
57
|
}
|
|
60
58
|
has(word, minLegacyCompoundLength) {
|
|
61
59
|
if (this.hasWord(word, false))
|
|
@@ -73,8 +71,8 @@ export class ITrieImpl {
|
|
|
73
71
|
* @returns true if the word was found and is not forbidden.
|
|
74
72
|
*/
|
|
75
73
|
hasWord(word, caseSensitive) {
|
|
76
|
-
const f = this.findWord(word, { caseSensitive });
|
|
77
|
-
return !!f.found
|
|
74
|
+
const f = this.findWord(word, { caseSensitive, checkForbidden: false });
|
|
75
|
+
return !!f.found;
|
|
78
76
|
}
|
|
79
77
|
findWord(word, options) {
|
|
80
78
|
if (options?.useLegacyWordCompounds) {
|
|
@@ -83,19 +81,21 @@ export class ITrieImpl {
|
|
|
83
81
|
: defaultLegacyMinCompoundLength;
|
|
84
82
|
const findOptions = this.createFindOptions({
|
|
85
83
|
legacyMinCompoundLength: len,
|
|
86
|
-
matchCase: options.caseSensitive,
|
|
84
|
+
matchCase: options.caseSensitive || false,
|
|
87
85
|
});
|
|
88
86
|
return findLegacyCompound(this.root, word, findOptions);
|
|
89
87
|
}
|
|
90
|
-
|
|
91
|
-
|
|
88
|
+
return findWord(this.root, word, {
|
|
89
|
+
matchCase: options?.caseSensitive,
|
|
90
|
+
checkForbidden: options?.checkForbidden,
|
|
91
|
+
});
|
|
92
92
|
}
|
|
93
93
|
/**
|
|
94
94
|
* Determine if a word is in the forbidden word list.
|
|
95
95
|
* @param word the word to lookup.
|
|
96
96
|
*/
|
|
97
97
|
isForbiddenWord(word) {
|
|
98
|
-
return this.
|
|
98
|
+
return this.hasForbiddenWords && isForbiddenWord(this.root, word, this.info.forbiddenWordPrefix);
|
|
99
99
|
}
|
|
100
100
|
/**
|
|
101
101
|
* Provides an ordered sequence of words with the prefix of text.
|
|
@@ -165,20 +165,8 @@ export class ITrieImpl {
|
|
|
165
165
|
const root = builder.build();
|
|
166
166
|
return new ITrieImpl(root, undefined);
|
|
167
167
|
}
|
|
168
|
-
createFindOptions(options
|
|
169
|
-
const findOptions = createFindOptions(
|
|
170
|
-
...this._findOptionsDefaults,
|
|
171
|
-
...options,
|
|
172
|
-
});
|
|
173
|
-
return findOptions;
|
|
174
|
-
}
|
|
175
|
-
lastCreateFindOptionsMatchCaseMap = new Map();
|
|
176
|
-
createFindOptionsMatchCase(matchCase) {
|
|
177
|
-
const f = this.lastCreateFindOptionsMatchCaseMap.get(matchCase);
|
|
178
|
-
if (f !== undefined)
|
|
179
|
-
return f;
|
|
180
|
-
const findOptions = this.createFindOptions({ matchCase });
|
|
181
|
-
this.lastCreateFindOptionsMatchCaseMap.set(matchCase, findOptions);
|
|
168
|
+
createFindOptions(options) {
|
|
169
|
+
const findOptions = createFindOptions(options);
|
|
182
170
|
return findOptions;
|
|
183
171
|
}
|
|
184
172
|
}
|
|
@@ -3,10 +3,8 @@ import type { CompoundModes } from './CompoundModes.js';
|
|
|
3
3
|
export interface FindOptions {
|
|
4
4
|
matchCase: boolean;
|
|
5
5
|
compoundMode: CompoundModes;
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
caseInsensitivePrefix: string;
|
|
9
|
-
legacyMinCompoundLength: number;
|
|
6
|
+
legacyMinCompoundLength?: number | undefined;
|
|
7
|
+
checkForbidden?: boolean | undefined;
|
|
10
8
|
}
|
|
11
9
|
export type PartialFindOptions = PartialWithUndefined<FindOptions> | undefined;
|
|
12
10
|
//# sourceMappingURL=FindOptions.d.ts.map
|
|
@@ -34,31 +34,31 @@ export interface ITrieNode {
|
|
|
34
34
|
readonly id: ITrieNodeId;
|
|
35
35
|
/** flag End of Word */
|
|
36
36
|
readonly eow: boolean;
|
|
37
|
-
/** number of children */
|
|
38
|
-
readonly size: number;
|
|
39
37
|
/** get keys to children */
|
|
40
|
-
keys():
|
|
38
|
+
keys(): Iterable<string>;
|
|
41
39
|
/** get keys to children */
|
|
42
|
-
values():
|
|
40
|
+
values(): Iterable<ITrieNode>;
|
|
43
41
|
/** get the children as key value pairs */
|
|
44
|
-
entries():
|
|
42
|
+
entries(): Iterable<Entry>;
|
|
45
43
|
/** get child ITrieNode */
|
|
46
44
|
get(char: string): ITrieNode | undefined;
|
|
47
|
-
/** get a child
|
|
48
|
-
|
|
45
|
+
/** get a nested child ITrieNode */
|
|
46
|
+
getNode?: (chars: string) => ITrieNode | undefined;
|
|
49
47
|
/** has child */
|
|
50
48
|
has(char: string): boolean;
|
|
51
49
|
/** `true` iff this node has children */
|
|
52
50
|
hasChildren(): boolean;
|
|
51
|
+
/** check if a word exists within this node. */
|
|
52
|
+
findExact?: ((word: string) => boolean) | undefined;
|
|
53
53
|
}
|
|
54
54
|
export interface ITrieNodeRoot extends ITrieNode {
|
|
55
|
-
info: Readonly<TrieInfo>;
|
|
55
|
+
readonly info: Readonly<TrieInfo>;
|
|
56
56
|
/**
|
|
57
57
|
* converts an `id` into a node.
|
|
58
58
|
* @param id an of a ITrieNode in this Trie
|
|
59
59
|
*/
|
|
60
60
|
resolveId(id: ITrieNodeId): ITrieNode;
|
|
61
|
-
findExact
|
|
61
|
+
findExact: ((word: string) => boolean) | undefined;
|
|
62
62
|
/**
|
|
63
63
|
* Try to find a word.
|
|
64
64
|
* @param word - the normalized word to look up.
|
|
@@ -67,6 +67,12 @@ export interface ITrieNodeRoot extends ITrieNode {
|
|
|
67
67
|
*/
|
|
68
68
|
find?: ((word: string, strict: boolean) => FindResult | undefined) | undefined;
|
|
69
69
|
isForbidden?: ((word: string) => boolean) | undefined;
|
|
70
|
+
readonly forbidPrefix: string;
|
|
71
|
+
readonly compoundFix: string;
|
|
72
|
+
readonly caseInsensitivePrefix: string;
|
|
73
|
+
readonly hasForbiddenWords: boolean;
|
|
74
|
+
readonly hasCompoundWords: boolean;
|
|
75
|
+
readonly hasNonStrictWords: boolean;
|
|
70
76
|
}
|
|
71
77
|
export {};
|
|
72
78
|
//# sourceMappingURL=ITrieNode.d.ts.map
|
|
@@ -5,5 +5,10 @@ export interface TrieInfo {
|
|
|
5
5
|
forbiddenWordPrefix: string;
|
|
6
6
|
isCaseAware: boolean;
|
|
7
7
|
}
|
|
8
|
+
export interface TrieCharacteristics {
|
|
9
|
+
hasForbiddenWords: boolean;
|
|
10
|
+
hasCompoundWords: boolean;
|
|
11
|
+
hasNonStrictWords: boolean;
|
|
12
|
+
}
|
|
8
13
|
export type PartialTrieInfo = PartialWithUndefined<TrieInfo> | undefined;
|
|
9
14
|
//# sourceMappingURL=TrieInfo.d.ts.map
|
|
@@ -8,15 +8,15 @@ type Root = ITrieNodeRoot;
|
|
|
8
8
|
* @param word A pre normalized word use `normalizeWord` or `normalizeWordToLowercase`
|
|
9
9
|
* @param options
|
|
10
10
|
*/
|
|
11
|
-
export declare function
|
|
11
|
+
export declare function findWordNode(root: Root, word: string, options?: PartialFindOptions): FindFullNodeResult;
|
|
12
12
|
/**
|
|
13
13
|
*
|
|
14
14
|
* @param root Trie root node. root.c contains the compound root and forbidden root.
|
|
15
15
|
* @param word A pre normalized word use `normalizeWord` or `normalizeWordToLowercase`
|
|
16
16
|
* @param options
|
|
17
17
|
*/
|
|
18
|
-
export declare function
|
|
19
|
-
export declare function findLegacyCompound(root: Root, word: string, options:
|
|
18
|
+
export declare function findWord(root: Root, word: string, options?: PartialFindOptions): FindFullResult;
|
|
19
|
+
export declare function findLegacyCompound(root: Root, word: string, options: PartialFindOptions): FindFullNodeResult;
|
|
20
20
|
export declare function findCompoundNode(root: Root | undefined, word: string, compoundCharacter: string, ignoreCasePrefix: string): FindFullNodeResult;
|
|
21
21
|
export declare function findWordExact(root: Root | ITrieNode | undefined, word: string): boolean;
|
|
22
22
|
export declare function isEndOfWordNode(n: ITrieNode | undefined): boolean;
|
|
@@ -1,26 +1,15 @@
|
|
|
1
|
-
import { CASE_INSENSITIVE_PREFIX, COMPOUND_FIX, FORBID_PREFIX } from '../constants.js';
|
|
2
1
|
import { memorizeLastCall } from '../utils/memorizeLastCall.js';
|
|
3
|
-
import { mergeDefaults } from '../utils/mergeDefaults.js';
|
|
4
2
|
const defaultLegacyMinCompoundLength = 3;
|
|
5
3
|
const _defaultFindOptions = {
|
|
6
4
|
matchCase: false,
|
|
7
5
|
compoundMode: 'compound',
|
|
8
|
-
forbidPrefix: FORBID_PREFIX,
|
|
9
|
-
compoundFix: COMPOUND_FIX,
|
|
10
|
-
caseInsensitivePrefix: CASE_INSENSITIVE_PREFIX,
|
|
11
6
|
legacyMinCompoundLength: defaultLegacyMinCompoundLength,
|
|
12
7
|
};
|
|
8
|
+
Object.freeze(_defaultFindOptions);
|
|
13
9
|
const arrayCompoundModes = ['none', 'compound', 'legacy'];
|
|
14
10
|
const knownCompoundModes = new Map(arrayCompoundModes.map((a) => [a, a]));
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
* @param root Trie root node. root.c contains the compound root and forbidden root.
|
|
18
|
-
* @param word A pre normalized word use `normalizeWord` or `normalizeWordToLowercase`
|
|
19
|
-
* @param options
|
|
20
|
-
*/
|
|
21
|
-
export function findWord(root, word, options) {
|
|
22
|
-
return _findWord(root, word, createFindOptions(options));
|
|
23
|
-
}
|
|
11
|
+
const notFound = { found: false, compoundUsed: false, caseMatched: false, forbidden: undefined };
|
|
12
|
+
Object.freeze(notFound);
|
|
24
13
|
/**
|
|
25
14
|
*
|
|
26
15
|
* @param root Trie root node. root.c contains the compound root and forbidden root.
|
|
@@ -28,7 +17,7 @@ export function findWord(root, word, options) {
|
|
|
28
17
|
* @param options
|
|
29
18
|
*/
|
|
30
19
|
export function findWordNode(root, word, options) {
|
|
31
|
-
return _findWordNode(root, word,
|
|
20
|
+
return _findWordNode(root, word, options);
|
|
32
21
|
}
|
|
33
22
|
/**
|
|
34
23
|
*
|
|
@@ -36,14 +25,23 @@ export function findWordNode(root, word, options) {
|
|
|
36
25
|
* @param word A pre normalized word use `normalizeWord` or `normalizeWordToLowercase`
|
|
37
26
|
* @param options
|
|
38
27
|
*/
|
|
39
|
-
function
|
|
28
|
+
export function findWord(root, word, options) {
|
|
40
29
|
if (root.find) {
|
|
41
|
-
const found = root.find(word, options
|
|
30
|
+
const found = root.find(word, options?.matchCase || false);
|
|
42
31
|
if (found)
|
|
43
32
|
return found;
|
|
33
|
+
if (!root.hasCompoundWords) {
|
|
34
|
+
return notFound;
|
|
35
|
+
}
|
|
44
36
|
}
|
|
45
|
-
|
|
46
|
-
|
|
37
|
+
// return { found: false, compoundUsed: false, caseMatched: false, forbidden: false };
|
|
38
|
+
const v = _findWordNode(root, word, options);
|
|
39
|
+
return {
|
|
40
|
+
found: v.found,
|
|
41
|
+
compoundUsed: v.compoundUsed,
|
|
42
|
+
caseMatched: v.caseMatched,
|
|
43
|
+
forbidden: v.forbidden,
|
|
44
|
+
};
|
|
47
45
|
}
|
|
48
46
|
/**
|
|
49
47
|
*
|
|
@@ -53,29 +51,29 @@ function _findWord(root, word, options) {
|
|
|
53
51
|
*/
|
|
54
52
|
function _findWordNode(root, word, options) {
|
|
55
53
|
const trieInfo = root.info;
|
|
56
|
-
const
|
|
57
|
-
const
|
|
58
|
-
const
|
|
59
|
-
|
|
60
|
-
|
|
54
|
+
const matchCase = options?.matchCase || false;
|
|
55
|
+
const compoundMode = knownCompoundModes.get(options?.compoundMode) || _defaultFindOptions.compoundMode;
|
|
56
|
+
const compoundPrefix = compoundMode === 'compound' ? (trieInfo.compoundCharacter ?? root.compoundFix) : '';
|
|
57
|
+
const ignoreCasePrefix = matchCase ? '' : (trieInfo.stripCaseAndAccentsPrefix ?? root.caseInsensitivePrefix);
|
|
58
|
+
const checkForbidden = options?.checkForbidden ?? true;
|
|
61
59
|
function __findCompound() {
|
|
62
60
|
const f = findCompoundWord(root, word, compoundPrefix, ignoreCasePrefix);
|
|
63
61
|
const result = { ...f };
|
|
64
62
|
if (f.found !== false && f.compoundUsed) {
|
|
65
63
|
// If case was ignored when searching for the word, then check the forbidden
|
|
66
64
|
// in the ignore case forbidden list.
|
|
67
|
-
const r = !f.caseMatched ? walk(root,
|
|
68
|
-
result.forbidden = isForbiddenWord(r, word,
|
|
65
|
+
const r = !f.caseMatched ? walk(root, root.caseInsensitivePrefix) : root;
|
|
66
|
+
result.forbidden = checkForbidden ? isForbiddenWord(r, word, root.forbidPrefix) : undefined;
|
|
69
67
|
}
|
|
70
68
|
return result;
|
|
71
69
|
}
|
|
72
70
|
function __findExact() {
|
|
73
|
-
const n = walk(root, word);
|
|
71
|
+
const n = root.getNode ? root.getNode(word) : walk(root, word);
|
|
74
72
|
const isFound = isEndOfWordNode(n);
|
|
75
73
|
const result = {
|
|
76
74
|
found: isFound && word,
|
|
77
75
|
compoundUsed: false,
|
|
78
|
-
forbidden: isForbiddenWord(root, word,
|
|
76
|
+
forbidden: checkForbidden ? isForbiddenWord(root, word, root.forbidPrefix) : undefined,
|
|
79
77
|
node: n,
|
|
80
78
|
caseMatched: true,
|
|
81
79
|
};
|
|
@@ -83,7 +81,7 @@ function _findWordNode(root, word, options) {
|
|
|
83
81
|
}
|
|
84
82
|
switch (compoundMode) {
|
|
85
83
|
case 'none': {
|
|
86
|
-
return
|
|
84
|
+
return matchCase ? __findExact() : __findCompound();
|
|
87
85
|
}
|
|
88
86
|
case 'compound': {
|
|
89
87
|
return __findCompound();
|
|
@@ -95,10 +93,10 @@ function _findWordNode(root, word, options) {
|
|
|
95
93
|
}
|
|
96
94
|
export function findLegacyCompound(root, word, options) {
|
|
97
95
|
const roots = [root];
|
|
98
|
-
if (!options
|
|
99
|
-
roots.push(walk(root,
|
|
96
|
+
if (!options?.matchCase) {
|
|
97
|
+
roots.push(walk(root, root.caseInsensitivePrefix));
|
|
100
98
|
}
|
|
101
|
-
return findLegacyCompoundNode(roots, word, options
|
|
99
|
+
return findLegacyCompoundNode(roots, word, options?.legacyMinCompoundLength || defaultLegacyMinCompoundLength);
|
|
102
100
|
}
|
|
103
101
|
export function findCompoundNode(root, word, compoundCharacter, ignoreCasePrefix) {
|
|
104
102
|
// Approach - do a depth first search for the matching word.
|
|
@@ -108,7 +106,8 @@ export function findCompoundNode(root, word, compoundCharacter, ignoreCasePrefix
|
|
|
108
106
|
];
|
|
109
107
|
const compoundPrefix = compoundCharacter || ignoreCasePrefix;
|
|
110
108
|
const possibleCompoundPrefix = ignoreCasePrefix && compoundCharacter ? ignoreCasePrefix + compoundCharacter : '';
|
|
111
|
-
const
|
|
109
|
+
const nw = word.normalize();
|
|
110
|
+
const w = [...nw];
|
|
112
111
|
function determineRoot(s) {
|
|
113
112
|
const prefix = s.compoundPrefix;
|
|
114
113
|
let r = root;
|
|
@@ -132,7 +131,7 @@ export function findCompoundNode(root, word, compoundCharacter, ignoreCasePrefix
|
|
|
132
131
|
const s = stack[i];
|
|
133
132
|
const h = w[i++];
|
|
134
133
|
const n = s.cr || s.n;
|
|
135
|
-
const c = n?.get(h);
|
|
134
|
+
const c = (h && n?.get(h)) || undefined;
|
|
136
135
|
if (c && i < word.length) {
|
|
137
136
|
// Go deeper.
|
|
138
137
|
caseMatched = s.caseMatched;
|
|
@@ -156,7 +155,7 @@ export function findCompoundNode(root, word, compoundCharacter, ignoreCasePrefix
|
|
|
156
155
|
if (!r.cr) {
|
|
157
156
|
break;
|
|
158
157
|
}
|
|
159
|
-
if (!i && !r.caseMatched &&
|
|
158
|
+
if (!i && !r.caseMatched && nw !== nw.toLowerCase()) {
|
|
160
159
|
// It is not going to be found.
|
|
161
160
|
break;
|
|
162
161
|
}
|
|
@@ -171,7 +170,7 @@ export function findCompoundNode(root, word, compoundCharacter, ignoreCasePrefix
|
|
|
171
170
|
break;
|
|
172
171
|
}
|
|
173
172
|
}
|
|
174
|
-
const found = (i
|
|
173
|
+
const found = (i === word.length && word) || false;
|
|
175
174
|
const result = { found, compoundUsed, node, forbidden: undefined, caseMatched };
|
|
176
175
|
return result;
|
|
177
176
|
}
|
|
@@ -292,7 +291,15 @@ export function isForbiddenWord(root, word, forbiddenPrefix) {
|
|
|
292
291
|
}
|
|
293
292
|
export const createFindOptions = memorizeLastCall(_createFindOptions);
|
|
294
293
|
function _createFindOptions(options) {
|
|
295
|
-
|
|
294
|
+
if (!options)
|
|
295
|
+
return _defaultFindOptions;
|
|
296
|
+
const d = _defaultFindOptions;
|
|
297
|
+
return {
|
|
298
|
+
matchCase: options.matchCase ?? d.matchCase,
|
|
299
|
+
compoundMode: options.compoundMode ?? d.compoundMode,
|
|
300
|
+
legacyMinCompoundLength: options.legacyMinCompoundLength ?? d.legacyMinCompoundLength,
|
|
301
|
+
checkForbidden: options.checkForbidden ?? d.checkForbidden,
|
|
302
|
+
};
|
|
296
303
|
}
|
|
297
304
|
export const __testing__ = {
|
|
298
305
|
findLegacyCompoundWord,
|
|
@@ -34,8 +34,8 @@ export function countNodes(root) {
|
|
|
34
34
|
if (seen.has(n.id))
|
|
35
35
|
return;
|
|
36
36
|
seen.add(n.id);
|
|
37
|
-
for (
|
|
38
|
-
walk(
|
|
37
|
+
for (const c of n.values()) {
|
|
38
|
+
walk(c);
|
|
39
39
|
}
|
|
40
40
|
}
|
|
41
41
|
walk(root);
|
|
@@ -51,9 +51,8 @@ export function countWords(root) {
|
|
|
51
51
|
let cnt = n.eow ? 1 : 0;
|
|
52
52
|
// add the node to the set to avoid getting stuck on circular references.
|
|
53
53
|
visited.set(n, cnt);
|
|
54
|
-
const
|
|
55
|
-
|
|
56
|
-
cnt += walk(n.child(i));
|
|
54
|
+
for (const c of n.values()) {
|
|
55
|
+
cnt += walk(c);
|
|
57
56
|
}
|
|
58
57
|
visited.set(n, cnt);
|
|
59
58
|
return cnt;
|
|
@@ -42,8 +42,8 @@ function* hintedWalkerNext(root, ignoreCase, hint, compoundingMethod, emitWordSe
|
|
|
42
42
|
// We don't want to suggest the compound character.
|
|
43
43
|
hints.add(compoundCharacter);
|
|
44
44
|
// Then yield everything else.
|
|
45
|
-
|
|
46
|
-
|
|
45
|
+
const entries = n.entries();
|
|
46
|
+
yield* (Array.isArray(entries) ? entries : [...entries])
|
|
47
47
|
.filter((a) => !hints.has(a[0]))
|
|
48
48
|
.map(([letter, node]) => ({
|
|
49
49
|
letter,
|
|
@@ -109,10 +109,8 @@ class ITrieNodeFiltered {
|
|
|
109
109
|
this.srcNode = srcNode;
|
|
110
110
|
this.id = srcNode.id;
|
|
111
111
|
this.eow = srcNode.eow;
|
|
112
|
-
const
|
|
113
|
-
this.filtered =
|
|
114
|
-
.map((key, idx) => [key, idx])
|
|
115
|
-
.filter(([key, idx]) => predicate(key, idx, srcNode));
|
|
112
|
+
const entries = srcNode.entries();
|
|
113
|
+
this.filtered = (Array.isArray(entries) ? entries : [...entries]).filter(([key], idx) => predicate(key, idx, srcNode));
|
|
116
114
|
this.keyMap = new Map(this.filtered);
|
|
117
115
|
this.size = this.keyMap.size;
|
|
118
116
|
}
|
|
@@ -120,14 +118,10 @@ class ITrieNodeFiltered {
|
|
|
120
118
|
return [...this.keyMap.keys()];
|
|
121
119
|
}
|
|
122
120
|
values() {
|
|
123
|
-
return this.filtered.map(([
|
|
124
|
-
}
|
|
125
|
-
child(idx) {
|
|
126
|
-
const [_, srcIdx] = this.filtered[idx];
|
|
127
|
-
return this.srcNode.child(srcIdx);
|
|
121
|
+
return this.filtered.map(([, node]) => node);
|
|
128
122
|
}
|
|
129
123
|
entries() {
|
|
130
|
-
return this.filtered
|
|
124
|
+
return this.filtered;
|
|
131
125
|
}
|
|
132
126
|
has(char) {
|
|
133
127
|
return this.keyMap.has(char);
|
|
@@ -136,10 +130,10 @@ class ITrieNodeFiltered {
|
|
|
136
130
|
return this.size > 0;
|
|
137
131
|
}
|
|
138
132
|
get(char) {
|
|
139
|
-
const
|
|
140
|
-
if (
|
|
133
|
+
const node = this.keyMap.get(char);
|
|
134
|
+
if (node === undefined)
|
|
141
135
|
return undefined;
|
|
142
|
-
return
|
|
136
|
+
return node;
|
|
143
137
|
}
|
|
144
138
|
}
|
|
145
139
|
//# sourceMappingURL=hintedWalker.js.map
|
|
@@ -13,7 +13,8 @@ function* compoundWalker(root, compoundingMethod) {
|
|
|
13
13
|
const rc = roots[compoundingMethod].length ? roots[compoundingMethod] : undefined;
|
|
14
14
|
function children(n) {
|
|
15
15
|
if (n.hasChildren()) {
|
|
16
|
-
const
|
|
16
|
+
const entries = n.entries();
|
|
17
|
+
const c = Array.isArray(entries) ? entries : [...entries];
|
|
17
18
|
return n.eow && rc ? [...c, ...rc] : c;
|
|
18
19
|
}
|
|
19
20
|
if (n.eow) {
|
|
@@ -48,21 +49,22 @@ function* compoundWalker(root, compoundingMethod) {
|
|
|
48
49
|
function* nodeWalker(root) {
|
|
49
50
|
let depth = 0;
|
|
50
51
|
const stack = [];
|
|
51
|
-
|
|
52
|
+
const entries = root.entries();
|
|
53
|
+
stack[depth] = { t: '', n: root, c: Array.isArray(entries) ? entries : [...entries], ci: 0 };
|
|
52
54
|
while (depth >= 0) {
|
|
53
55
|
let s = stack[depth];
|
|
54
56
|
let baseText = s.t;
|
|
55
57
|
while (s.ci < s.c.length && s.n) {
|
|
56
58
|
const idx = s.ci++;
|
|
57
|
-
const char = s.c[idx];
|
|
58
|
-
const node = s.n.child(idx);
|
|
59
|
+
const [char, node] = s.c[idx];
|
|
59
60
|
const text = baseText + char;
|
|
60
61
|
const goDeeper = yield { text, node, depth };
|
|
61
62
|
if (goDeeper !== false) {
|
|
62
63
|
depth++;
|
|
63
64
|
baseText = text;
|
|
64
65
|
const s = stack[depth];
|
|
65
|
-
const
|
|
66
|
+
const entries = node.entries();
|
|
67
|
+
const c = Array.isArray(entries) ? entries : [...entries];
|
|
66
68
|
if (s) {
|
|
67
69
|
s.t = text;
|
|
68
70
|
s.n = node;
|
|
@@ -90,13 +92,14 @@ export function walkerWords(root) {
|
|
|
90
92
|
export function* walkerWordsITrie(root) {
|
|
91
93
|
let depth = 0;
|
|
92
94
|
const stack = [];
|
|
93
|
-
|
|
95
|
+
const entries = root.entries();
|
|
96
|
+
const c = Array.isArray(entries) ? entries : [...entries];
|
|
97
|
+
stack[depth] = { t: '', n: root, c, ci: 0 };
|
|
94
98
|
while (depth >= 0) {
|
|
95
99
|
let s = stack[depth];
|
|
96
100
|
let baseText = s.t;
|
|
97
101
|
while (s.ci < s.c.length && s.n) {
|
|
98
|
-
const char = s.c[s.ci++];
|
|
99
|
-
const node = s.n.get(char);
|
|
102
|
+
const [char, node] = s.c[s.ci++];
|
|
100
103
|
if (!node)
|
|
101
104
|
continue;
|
|
102
105
|
const text = baseText + char;
|
|
@@ -104,7 +107,8 @@ export function* walkerWordsITrie(root) {
|
|
|
104
107
|
yield text;
|
|
105
108
|
depth++;
|
|
106
109
|
baseText = text;
|
|
107
|
-
const
|
|
110
|
+
const entries = node.entries();
|
|
111
|
+
const c = Array.isArray(entries) ? entries : [...entries];
|
|
108
112
|
if (stack[depth]) {
|
|
109
113
|
s = stack[depth];
|
|
110
114
|
s.t = text;
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { type Utf8BE32 } from './Utf8.js';
|
|
2
2
|
export type Utf8Seq = Readonly<number[]>;
|
|
3
|
-
export type CharIndexMap =
|
|
3
|
+
export type CharIndexMap = Map<string, Utf8BE32>;
|
|
4
4
|
export type RO_CharIndexMap = Readonly<CharIndexMap>;
|
|
5
|
-
export type CharIndexSeqMap =
|
|
5
|
+
export type CharIndexSeqMap = Map<string, Utf8Seq>;
|
|
6
6
|
export type RO_CharIndexSeqMap = Readonly<CharIndexSeqMap>;
|
|
7
7
|
export declare class CharIndex {
|
|
8
8
|
#private;
|
|
@@ -10,14 +10,14 @@ export class CharIndex {
|
|
|
10
10
|
constructor(charIndex) {
|
|
11
11
|
this.charIndex = charIndex;
|
|
12
12
|
this.#charToUtf8SeqMap = buildCharIndexSequenceMap(charIndex);
|
|
13
|
-
this.#multiByteChars =
|
|
13
|
+
this.#multiByteChars = [...this.#charToUtf8SeqMap.values()].some((c) => c.length > 1);
|
|
14
14
|
}
|
|
15
15
|
getCharUtf8Seq(c) {
|
|
16
|
-
const found = this.#charToUtf8SeqMap
|
|
16
|
+
const found = this.#charToUtf8SeqMap.get(c);
|
|
17
17
|
if (found)
|
|
18
18
|
return found;
|
|
19
19
|
const s = encodeTextToUtf8(c);
|
|
20
|
-
this.#charToUtf8SeqMap
|
|
20
|
+
this.#charToUtf8SeqMap.set(c, s);
|
|
21
21
|
return s;
|
|
22
22
|
}
|
|
23
23
|
wordToUtf8Seq(word) {
|
|
@@ -39,31 +39,31 @@ export class CharIndex {
|
|
|
39
39
|
}
|
|
40
40
|
}
|
|
41
41
|
function buildCharIndexSequenceMap(charIndex) {
|
|
42
|
-
const map =
|
|
42
|
+
const map = new Map();
|
|
43
43
|
for (const key of charIndex) {
|
|
44
|
-
map
|
|
44
|
+
map.set(key, encodeTextToUtf8(key));
|
|
45
45
|
}
|
|
46
46
|
return map;
|
|
47
47
|
}
|
|
48
48
|
export class CharIndexBuilder {
|
|
49
49
|
charIndex = [];
|
|
50
|
-
charIndexMap =
|
|
51
|
-
charIndexSeqMap =
|
|
50
|
+
charIndexMap = new Map();
|
|
51
|
+
charIndexSeqMap = new Map();
|
|
52
52
|
#mapIdxToSeq = new Map();
|
|
53
53
|
constructor() {
|
|
54
54
|
this.getUtf8Value('');
|
|
55
55
|
}
|
|
56
56
|
getUtf8Value(c) {
|
|
57
|
-
const found = this.charIndexMap
|
|
57
|
+
const found = this.charIndexMap.get(c);
|
|
58
58
|
if (found !== undefined) {
|
|
59
59
|
return found;
|
|
60
60
|
}
|
|
61
61
|
const nc = c.normalize('NFC');
|
|
62
62
|
this.charIndex.push(nc);
|
|
63
63
|
const utf8 = encodeUtf8N_BE(nc.codePointAt(0) || 0);
|
|
64
|
-
this.charIndexMap
|
|
65
|
-
this.charIndexMap
|
|
66
|
-
this.charIndexMap
|
|
64
|
+
this.charIndexMap.set(c, utf8);
|
|
65
|
+
this.charIndexMap.set(nc, utf8);
|
|
66
|
+
this.charIndexMap.set(c.normalize('NFD'), utf8);
|
|
67
67
|
return utf8;
|
|
68
68
|
}
|
|
69
69
|
utf8ValueToUtf8Seq(idx) {
|