cspell-trie-lib 8.10.2 → 8.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/dist/lib/Builder/BuilderCursor.d.ts +4 -1
  2. package/dist/lib/Builder/cursor-util.js +9 -8
  3. package/dist/lib/ITrie.d.ts +1 -1
  4. package/dist/lib/ITrieNode/FindTypes.d.ts +7 -0
  5. package/dist/lib/ITrieNode/FindTypes.js +2 -0
  6. package/dist/lib/ITrieNode/ITrieNode.d.ts +30 -0
  7. package/dist/lib/ITrieNode/find.d.ts +2 -20
  8. package/dist/lib/ITrieNode/find.js +13 -2
  9. package/dist/lib/TrieBlob/CharIndex.d.ts +32 -0
  10. package/dist/lib/TrieBlob/CharIndex.js +116 -0
  11. package/dist/lib/TrieBlob/FastTrieBlob.d.ts +38 -10
  12. package/dist/lib/TrieBlob/FastTrieBlob.js +105 -37
  13. package/dist/lib/TrieBlob/FastTrieBlobBuilder.d.ts +19 -4
  14. package/dist/lib/TrieBlob/FastTrieBlobBuilder.js +56 -81
  15. package/dist/lib/TrieBlob/FastTrieBlobIRoot.js +9 -12
  16. package/dist/lib/TrieBlob/FastTrieBlobInternals.d.ts +12 -3
  17. package/dist/lib/TrieBlob/FastTrieBlobInternals.js +49 -5
  18. package/dist/lib/TrieBlob/TrieBlob.d.ts +23 -30
  19. package/dist/lib/TrieBlob/TrieBlob.js +217 -89
  20. package/dist/lib/TrieBlob/TrieBlobIRoot.d.ts +9 -4
  21. package/dist/lib/TrieBlob/TrieBlobIRoot.js +17 -16
  22. package/dist/lib/TrieBlob/Utf8.d.ts +55 -0
  23. package/dist/lib/TrieBlob/Utf8.js +261 -0
  24. package/dist/lib/TrieBlob/createTrieBlob.d.ts +4 -4
  25. package/dist/lib/TrieBlob/createTrieBlob.js +4 -56
  26. package/dist/lib/TrieData.d.ts +1 -0
  27. package/dist/lib/TrieNode/TrieNodeBuilder.d.ts +17 -1
  28. package/dist/lib/TrieNode/TrieNodeBuilder.js +26 -0
  29. package/dist/lib/TrieNode/find.js +4 -4
  30. package/dist/lib/TrieNode/trie-util.d.ts +8 -1
  31. package/dist/lib/TrieNode/trie-util.js +22 -1
  32. package/dist/lib/buildITrie.js +2 -1
  33. package/dist/lib/io/importV3.js +2 -2
  34. package/dist/lib/utils/isValidChar.d.ts +4 -0
  35. package/dist/lib/utils/isValidChar.js +19 -0
  36. package/package.json +8 -5
  37. package/dist/lib/TrieBlob/NumberSequenceByteDecoderAccumulator.d.ts +0 -34
  38. package/dist/lib/TrieBlob/NumberSequenceByteDecoderAccumulator.js +0 -129
@@ -1,7 +1,10 @@
1
1
  export interface BuilderCursor {
2
+ /** Insert a character in the current node. */
2
3
  insertChar(char: string): void;
4
+ /** Mark the current node as End of Word */
3
5
  markEOW(): void;
4
- reference(nodeIdx: number): void;
6
+ /** Refer to a previous node */
7
+ reference(refId: number): void;
5
8
  backStep(num: number): void;
6
9
  }
7
10
  //# sourceMappingURL=BuilderCursor.d.ts.map
@@ -1,17 +1,18 @@
1
1
  export function insertWordsAtCursor(cursor, words) {
2
- let prevWord = '';
3
- for (const word of words) {
4
- const pLen = commonStrPrefix(prevWord, word);
5
- const stepBack = prevWord.length - pLen;
2
+ let prevWordLetters = [];
3
+ for (const _word of words) {
4
+ const letters = [..._word];
5
+ const pLen = commonStrPrefix(prevWordLetters, letters);
6
+ const stepBack = prevWordLetters.length - pLen;
6
7
  cursor.backStep(stepBack);
7
- const wLen = word.length;
8
+ const wLen = letters.length;
8
9
  for (let i = pLen; i < wLen; ++i) {
9
- cursor.insertChar(word[i]);
10
+ cursor.insertChar(letters[i]);
10
11
  }
11
12
  cursor.markEOW();
12
- prevWord = word;
13
+ prevWordLetters = letters;
13
14
  }
14
- cursor.backStep(prevWord.length);
15
+ cursor.backStep(prevWordLetters.length);
15
16
  }
16
17
  export function commonStringPrefixLen(a, b) {
17
18
  let i = 0;
@@ -1,6 +1,6 @@
1
1
  import type { WeightMap } from './distance/index.js';
2
- import type { FindFullResult } from './ITrieNode/find.js';
3
2
  import type { ITrieNode } from './ITrieNode/index.js';
3
+ import { FindFullResult } from './ITrieNode/ITrieNode.js';
4
4
  import type { PartialTrieInfo, TrieInfo } from './ITrieNode/TrieInfo.js';
5
5
  import type { CompoundWordsMethod, WalkerIterator } from './ITrieNode/walker/walkerTypes.js';
6
6
  import type { SuggestionCollector, SuggestionResult } from './suggestCollector.js';
@@ -0,0 +1,7 @@
1
+ import type { FindFullResult, ITrieNode } from './ITrieNode.js';
2
+ export interface FindNodeResult {
3
+ node: ITrieNode | undefined;
4
+ }
5
+ export interface FindFullNodeResult extends FindNodeResult, FindFullResult {
6
+ }
7
+ //# sourceMappingURL=FindTypes.d.ts.map
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=FindTypes.js.map
@@ -1,4 +1,25 @@
1
1
  import type { TrieInfo } from './TrieInfo.js';
2
+ export interface FindResult {
3
+ found: string | false;
4
+ compoundUsed: boolean;
5
+ caseMatched: boolean;
6
+ /**
7
+ * Is the word explicitly forbidden.
8
+ * - `true` - word is in the forbidden list.
9
+ * - `false` - word is not in the forbidden list.
10
+ * - `undefined` - unknown - was not checked.
11
+ */
12
+ forbidden?: boolean | undefined;
13
+ }
14
+ export interface FindFullResult extends FindResult {
15
+ /**
16
+ * Is the word explicitly forbidden.
17
+ * - `true` - word is in the forbidden list.
18
+ * - `false` - word is not in the forbidden list.
19
+ * - `undefined` - unknown - was not checked.
20
+ */
21
+ forbidden: boolean | undefined;
22
+ }
2
23
  export type ITrieNodeId = object | number | string;
3
24
  type Entry = readonly [string, ITrieNode];
4
25
  export interface ITrieNode {
@@ -37,6 +58,15 @@ export interface ITrieNodeRoot extends ITrieNode {
37
58
  * @param id an of a ITrieNode in this Trie
38
59
  */
39
60
  resolveId(id: ITrieNodeId): ITrieNode;
61
+ findExact?: ((word: string) => boolean) | undefined;
62
+ /**
63
+ * Try to find a word.
64
+ * @param word - the normalized word to look up.
65
+ * @param strict - if `true` the case and accents must match.
66
+ * @returns undefined if it did not try to find the word, otherwise a FindResult.
67
+ */
68
+ find?: ((word: string, strict: boolean) => FindResult | undefined) | undefined;
69
+ isForbidden?: ((word: string) => boolean) | undefined;
40
70
  }
41
71
  export {};
42
72
  //# sourceMappingURL=ITrieNode.d.ts.map
@@ -1,25 +1,7 @@
1
1
  import type { FindOptions, PartialFindOptions } from './FindOptions.js';
2
- import type { ITrieNode, ITrieNodeRoot } from './ITrieNode.js';
2
+ import type { FindFullNodeResult } from './FindTypes.js';
3
+ import type { FindFullResult, FindResult, ITrieNode, ITrieNodeRoot } from './ITrieNode.js';
3
4
  type Root = ITrieNodeRoot;
4
- export interface FindNodeResult {
5
- node: ITrieNode | undefined;
6
- }
7
- export interface FindResult {
8
- found: string | false;
9
- compoundUsed: boolean;
10
- caseMatched: boolean;
11
- }
12
- export interface FindFullResult extends FindResult {
13
- /**
14
- * Is the word explicitly forbidden.
15
- * - `true` - word is in the forbidden list.
16
- * - `false` - word is not in the forbidden list.
17
- * - `undefined` - unknown - was not checked.
18
- * */
19
- forbidden: boolean | undefined;
20
- }
21
- export interface FindFullNodeResult extends FindNodeResult, FindFullResult {
22
- }
23
5
  /**
24
6
  *
25
7
  * @param root Trie root node. root.c contains the compound root and forbidden root.
@@ -37,6 +37,11 @@ export function findWordNode(root, word, options) {
37
37
  * @param options
38
38
  */
39
39
  function _findWord(root, word, options) {
40
+ if (root.find) {
41
+ const found = root.find(word, options.matchCase);
42
+ if (found)
43
+ return found;
44
+ }
40
45
  const { node: _, ...result } = _findWordNode(root, word, options);
41
46
  return result;
42
47
  }
@@ -49,10 +54,10 @@ function _findWord(root, word, options) {
49
54
  function _findWordNode(root, word, options) {
50
55
  const trieInfo = root.info;
51
56
  const compoundMode = knownCompoundModes.get(options.compoundMode) || _defaultFindOptions.compoundMode;
52
- const compoundPrefix = options.compoundMode === 'compound' ? trieInfo.compoundCharacter ?? options.compoundFix : '';
57
+ const compoundPrefix = options.compoundMode === 'compound' ? (trieInfo.compoundCharacter ?? options.compoundFix) : '';
53
58
  const ignoreCasePrefix = options.matchCase
54
59
  ? ''
55
- : trieInfo.stripCaseAndAccentsPrefix ?? options.caseInsensitivePrefix;
60
+ : (trieInfo.stripCaseAndAccentsPrefix ?? options.caseInsensitivePrefix);
56
61
  function __findCompound() {
57
62
  const f = findCompoundWord(root, word, compoundPrefix, ignoreCasePrefix);
58
63
  const result = { ...f };
@@ -180,6 +185,9 @@ function findCompoundWord(root, word, compoundCharacter, ignoreCasePrefix) {
180
185
  return { found, compoundUsed, node, forbidden: undefined, caseMatched };
181
186
  }
182
187
  export function findWordExact(root, word) {
188
+ const r = root;
189
+ if (r?.findExact)
190
+ return r.findExact(word);
183
191
  return isEndOfWordNode(walk(root, word));
184
192
  }
185
193
  export function isEndOfWordNode(n) {
@@ -277,6 +285,9 @@ function findLegacyCompoundWord(roots, word, minCompoundLength) {
277
285
  return { found, compoundUsed, caseMatched };
278
286
  }
279
287
  export function isForbiddenWord(root, word, forbiddenPrefix) {
288
+ const r = root;
289
+ if (r?.isForbidden)
290
+ return r.isForbidden(word);
280
291
  return findWordExact(root?.get(forbiddenPrefix), word);
281
292
  }
282
293
  export const createFindOptions = memorizeLastCall(_createFindOptions);
@@ -0,0 +1,32 @@
1
+ import { type Utf8BE32 } from './Utf8.js';
2
+ export type Utf8Seq = Readonly<number[]>;
3
+ export type CharIndexMap = Record<string, Utf8BE32>;
4
+ export type RO_CharIndexMap = Readonly<CharIndexMap>;
5
+ export type CharIndexSeqMap = Record<string, Utf8Seq>;
6
+ export type RO_CharIndexSeqMap = Readonly<CharIndexSeqMap>;
7
+ export declare class CharIndex {
8
+ #private;
9
+ readonly charIndex: readonly string[];
10
+ constructor(charIndex: readonly string[]);
11
+ getCharUtf8Seq(c: string): Utf8Seq;
12
+ wordToUtf8Seq(word: string): Utf8Seq;
13
+ indexContainsMultiByteChars(): boolean;
14
+ get size(): number;
15
+ toJSON(): {
16
+ charIndex: readonly string[];
17
+ };
18
+ }
19
+ export declare class CharIndexBuilder {
20
+ #private;
21
+ private readonly charIndex;
22
+ readonly charIndexMap: CharIndexMap;
23
+ readonly charIndexSeqMap: CharIndexSeqMap;
24
+ constructor();
25
+ getUtf8Value(c: string): number;
26
+ utf8ValueToUtf8Seq(idx: number): number[];
27
+ charToUtf8Seq(c: string): number[];
28
+ wordToUtf8Seq(word: string): number[];
29
+ get size(): number;
30
+ build(): CharIndex;
31
+ }
32
+ //# sourceMappingURL=CharIndex.d.ts.map
@@ -0,0 +1,116 @@
1
+ import { encodeTextToUtf8, encodeUtf8N_BE } from './Utf8.js';
2
+ const emptySeq = [0];
3
+ Object.freeze(emptySeq);
4
+ export class CharIndex {
5
+ charIndex;
6
+ #charToUtf8SeqMap;
7
+ #lastWord = '';
8
+ #lastWordSeq = [];
9
+ #multiByteChars;
10
+ constructor(charIndex) {
11
+ this.charIndex = charIndex;
12
+ this.#charToUtf8SeqMap = buildCharIndexSequenceMap(charIndex);
13
+ this.#multiByteChars = Object.values(this.#charToUtf8SeqMap).some((c) => c.length > 1);
14
+ }
15
+ getCharUtf8Seq(c) {
16
+ const found = this.#charToUtf8SeqMap[c];
17
+ if (found)
18
+ return found;
19
+ const s = encodeTextToUtf8(c);
20
+ this.#charToUtf8SeqMap[c] = s;
21
+ return s;
22
+ }
23
+ wordToUtf8Seq(word) {
24
+ if (this.#lastWord === word)
25
+ return this.#lastWordSeq;
26
+ const seq = encodeTextToUtf8(word);
27
+ this.#lastWord = word;
28
+ this.#lastWordSeq = seq;
29
+ return seq;
30
+ }
31
+ indexContainsMultiByteChars() {
32
+ return this.#multiByteChars;
33
+ }
34
+ get size() {
35
+ return this.charIndex.length;
36
+ }
37
+ toJSON() {
38
+ return { charIndex: this.charIndex };
39
+ }
40
+ }
41
+ function buildCharIndexSequenceMap(charIndex) {
42
+ const map = Object.create(null);
43
+ for (const key of charIndex) {
44
+ map[key] = encodeTextToUtf8(key);
45
+ }
46
+ return map;
47
+ }
48
+ export class CharIndexBuilder {
49
+ charIndex = [];
50
+ charIndexMap = Object.create(null);
51
+ charIndexSeqMap = Object.create(null);
52
+ #mapIdxToSeq = new Map();
53
+ constructor() {
54
+ this.getUtf8Value('');
55
+ }
56
+ getUtf8Value(c) {
57
+ const found = this.charIndexMap[c];
58
+ if (found !== undefined) {
59
+ return found;
60
+ }
61
+ const nc = c.normalize('NFC');
62
+ this.charIndex.push(nc);
63
+ const utf8 = encodeUtf8N_BE(nc.codePointAt(0) || 0);
64
+ this.charIndexMap[c] = utf8;
65
+ this.charIndexMap[nc] = utf8;
66
+ this.charIndexMap[c.normalize('NFD')] = utf8;
67
+ return utf8;
68
+ }
69
+ utf8ValueToUtf8Seq(idx) {
70
+ const found = this.#mapIdxToSeq.get(idx);
71
+ if (found !== undefined) {
72
+ return found;
73
+ }
74
+ const seq = splitUtf8(idx);
75
+ this.#mapIdxToSeq.set(idx, seq);
76
+ return seq;
77
+ }
78
+ charToUtf8Seq(c) {
79
+ const idx = this.getUtf8Value(c);
80
+ return this.utf8ValueToUtf8Seq(idx);
81
+ }
82
+ wordToUtf8Seq(word) {
83
+ const seq = new Array(word.length);
84
+ let i = 0;
85
+ for (const c of word) {
86
+ const idx = this.getUtf8Value(c);
87
+ const cSep = this.utf8ValueToUtf8Seq(idx);
88
+ if (typeof cSep === 'number') {
89
+ seq[i++] = cSep;
90
+ continue;
91
+ }
92
+ for (const cIdx of cSep) {
93
+ seq[i++] = cIdx;
94
+ }
95
+ }
96
+ if (seq.length !== i)
97
+ seq.length = i;
98
+ return seq;
99
+ }
100
+ get size() {
101
+ return this.charIndex.length;
102
+ }
103
+ build() {
104
+ return new CharIndex(this.charIndex);
105
+ }
106
+ }
107
+ function splitUtf8(utf8) {
108
+ if (utf8 <= 0xff)
109
+ return [utf8];
110
+ if (utf8 <= 0xffff)
111
+ return [(utf8 >> 8) & 0xff, utf8 & 0xff];
112
+ if (utf8 <= 0xff_ffff)
113
+ return [(utf8 >> 16) & 0xff, (utf8 >> 8) & 0xff, utf8 & 0xff];
114
+ return [(utf8 >> 24) & 0xff, (utf8 >> 16) & 0xff, (utf8 >> 8) & 0xff, utf8 & 0xff].filter((v) => v);
115
+ }
116
+ //# sourceMappingURL=CharIndex.js.map
@@ -1,22 +1,25 @@
1
1
  import type { ITrieNode, ITrieNodeRoot } from '../ITrieNode/ITrieNode.js';
2
2
  import type { PartialTrieInfo, TrieInfo } from '../ITrieNode/TrieInfo.js';
3
3
  import type { TrieData } from '../TrieData.js';
4
+ import { Utf8Seq } from './CharIndex.js';
4
5
  import { type FastTrieBlobBitMaskInfo } from './FastTrieBlobBitMaskInfo.js';
5
6
  import { FastTrieBlobInternals } from './FastTrieBlobInternals.js';
6
7
  import { TrieBlob } from './TrieBlob.js';
8
+ import { Utf8Accumulator } from './Utf8.js';
9
+ type FastTrieBlobNode = number[];
7
10
  export declare class FastTrieBlob implements TrieData {
11
+ #private;
8
12
  private nodes;
9
13
  private _charIndex;
10
14
  readonly bitMasksInfo: FastTrieBlobBitMaskInfo;
11
- private _charToIndexMap;
15
+ readonly sorted: boolean;
12
16
  private _readonly;
13
17
  private _forbidIdx;
14
18
  private _iTrieRoot;
15
19
  wordToCharacters: (word: string) => readonly string[];
16
20
  readonly info: Readonly<TrieInfo>;
17
21
  private constructor();
18
- private _lookUpCharIndex;
19
- private wordToNodeCharIndexSequence;
22
+ wordToNodeCharIndexSequence(word: string): Utf8Seq;
20
23
  private letterToNodeCharIndexSequence;
21
24
  has(word: string): boolean;
22
25
  private _has;
@@ -26,8 +29,19 @@ export declare class FastTrieBlob implements TrieData {
26
29
  freeze(): this;
27
30
  toJSON(): {
28
31
  info: Readonly<TrieInfo>;
29
- nodes: NodeElement[];
30
- charIndex: readonly string[];
32
+ nodes: ({
33
+ i: number;
34
+ w: number;
35
+ c?: never;
36
+ } | {
37
+ i: number;
38
+ w: number;
39
+ c: {
40
+ i: number;
41
+ c: string | 0 | undefined;
42
+ s: string;
43
+ }[];
44
+ })[];
31
45
  };
32
46
  static create(data: FastTrieBlobInternals, options?: PartialTrieInfo): FastTrieBlob;
33
47
  static toITrieNodeRoot(trie: FastTrieBlob): ITrieNodeRoot;
@@ -40,6 +54,7 @@ export declare class FastTrieBlob implements TrieData {
40
54
  getNode(prefix: string): ITrieNode | undefined;
41
55
  isForbiddenWord(word: string): boolean;
42
56
  hasForbiddenWords(): boolean;
57
+ nodeInfo(nodeIndex: number, accumulator?: Utf8Accumulator): TrieBlobNodeInfo;
43
58
  /** number of nodes */
44
59
  get size(): number;
45
60
  private _lookupCharIndexNode;
@@ -47,15 +62,28 @@ export declare class FastTrieBlob implements TrieData {
47
62
  private _searchNodeForChar;
48
63
  get charIndex(): readonly string[];
49
64
  static fromTrieBlob(trie: TrieBlob): FastTrieBlob;
65
+ static isFastTrieBlob(obj: unknown): obj is FastTrieBlob;
50
66
  }
51
- interface NodeElement {
52
- id: number;
67
+ interface TrieBlobNodeInfo {
53
68
  eow: boolean;
54
- n: number;
55
- c: {
56
- c: number | string;
69
+ children: {
70
+ c: string;
57
71
  i: number;
72
+ cIdx: number;
58
73
  }[];
59
74
  }
75
+ export declare function nodesToJSON(nodes: Readonly<FastTrieBlobNode[]>): ({
76
+ i: number;
77
+ w: number;
78
+ c?: never;
79
+ } | {
80
+ i: number;
81
+ w: number;
82
+ c: {
83
+ i: number;
84
+ c: string | 0 | undefined;
85
+ s: string;
86
+ }[];
87
+ })[];
60
88
  export {};
61
89
  //# sourceMappingURL=FastTrieBlob.d.ts.map
@@ -1,42 +1,47 @@
1
1
  import { findNode } from '../ITrieNode/trie-util.js';
2
2
  import { mergeOptionalWithDefaults } from '../utils/mergeOptionalWithDefaults.js';
3
3
  import { extractInfo } from './FastTrieBlobBitMaskInfo.js';
4
- import { FastTrieBlobInternals } from './FastTrieBlobInternals.js';
4
+ import { assertSorted, FastTrieBlobInternals, sortNodes } from './FastTrieBlobInternals.js';
5
5
  import { FastTrieBlobIRoot } from './FastTrieBlobIRoot.js';
6
- import { NumberSequenceByteDecoderAccumulator } from './NumberSequenceByteDecoderAccumulator.js';
7
6
  import { TrieBlob } from './TrieBlob.js';
7
+ import { Utf8Accumulator } from './Utf8.js';
8
+ const useSorted = true;
9
+ const checkSorted = false;
8
10
  export class FastTrieBlob {
9
11
  nodes;
10
12
  _charIndex;
11
13
  bitMasksInfo;
12
- _charToIndexMap;
14
+ sorted;
13
15
  _readonly = false;
14
16
  _forbidIdx;
15
17
  _iTrieRoot;
16
18
  wordToCharacters;
17
19
  info;
18
- constructor(nodes, _charIndex, bitMasksInfo, options) {
20
+ constructor(nodes, _charIndex, bitMasksInfo, sorted, options) {
19
21
  this.nodes = nodes;
20
22
  this._charIndex = _charIndex;
21
23
  this.bitMasksInfo = bitMasksInfo;
24
+ this.sorted = sorted;
22
25
  this.info = mergeOptionalWithDefaults(options);
23
26
  this.wordToCharacters = (word) => [...word];
24
- this._charToIndexMap = createCharToIndexMap(_charIndex);
25
27
  this._forbidIdx = this._searchNodeForChar(0, this.info.forbiddenWordPrefix);
26
- }
27
- _lookUpCharIndex(char) {
28
- return this._charToIndexMap[char] ?? -1;
28
+ if (sorted && checkSorted) {
29
+ assertSorted(nodes, bitMasksInfo.NodeMaskChildCharIndex);
30
+ }
29
31
  }
30
32
  wordToNodeCharIndexSequence(word) {
31
- return TrieBlob.charactersToCharIndexSequence(this.wordToCharacters(word), (c) => this._lookUpCharIndex(c));
33
+ return this._charIndex.wordToUtf8Seq(word);
32
34
  }
33
35
  letterToNodeCharIndexSequence(letter) {
34
- return TrieBlob.toCharIndexSequence(this._lookUpCharIndex(letter));
36
+ return this._charIndex.getCharUtf8Seq(letter);
35
37
  }
36
38
  has(word) {
37
39
  return this._has(0, word);
38
40
  }
39
41
  _has(nodeIdx, word) {
42
+ return this.sorted && useSorted ? this.#hasSorted(nodeIdx, word) : this.#has(nodeIdx, word);
43
+ }
44
+ #has(nodeIdx, word) {
40
45
  const NodeMaskChildCharIndex = this.bitMasksInfo.NodeMaskChildCharIndex;
41
46
  const NodeChildRefShift = this.bitMasksInfo.NodeChildRefShift;
42
47
  const NodeMaskEOW = this.bitMasksInfo.NodeMaskEOW;
@@ -61,12 +66,47 @@ export class FastTrieBlob {
61
66
  }
62
67
  return !!(node[0] & NodeMaskEOW);
63
68
  }
69
+ #hasSorted(nodeIdx, word) {
70
+ const NodeMaskChildCharIndex = this.bitMasksInfo.NodeMaskChildCharIndex;
71
+ const NodeChildRefShift = this.bitMasksInfo.NodeChildRefShift;
72
+ const NodeMaskEOW = this.bitMasksInfo.NodeMaskEOW;
73
+ const nodes = this.nodes;
74
+ const charIndexes = this.wordToNodeCharIndexSequence(word);
75
+ const len = charIndexes.length;
76
+ let node = nodes[nodeIdx];
77
+ for (let p = 0; p < len; ++p, node = nodes[nodeIdx]) {
78
+ const letterIdx = charIndexes[p];
79
+ const count = node.length;
80
+ // console.error('%o', { p, letterIdx, ...this.nodeInfo(nodeIdx) });
81
+ if (count < 2)
82
+ return false;
83
+ let i = 1;
84
+ let j = count - 1;
85
+ let c = -1;
86
+ while (i < j) {
87
+ const m = (i + j) >> 1;
88
+ c = node[m] & NodeMaskChildCharIndex;
89
+ if (c < letterIdx) {
90
+ i = m + 1;
91
+ }
92
+ else {
93
+ j = m;
94
+ }
95
+ }
96
+ if (i >= count || (node[i] & NodeMaskChildCharIndex) !== letterIdx)
97
+ return false;
98
+ nodeIdx = node[i] >>> NodeChildRefShift;
99
+ if (!nodeIdx)
100
+ return false;
101
+ }
102
+ return !!(node[0] & NodeMaskEOW);
103
+ }
64
104
  *words() {
65
105
  const NodeMaskChildCharIndex = this.bitMasksInfo.NodeMaskChildCharIndex;
66
106
  const NodeChildRefShift = this.bitMasksInfo.NodeChildRefShift;
67
107
  const NodeMaskEOW = this.bitMasksInfo.NodeMaskEOW;
68
108
  const nodes = this.nodes;
69
- const accumulator = NumberSequenceByteDecoderAccumulator.create();
109
+ const accumulator = Utf8Accumulator.create();
70
110
  const stack = [{ nodeIdx: 0, pos: 0, word: '', accumulator }];
71
111
  let depth = 0;
72
112
  while (depth >= 0) {
@@ -83,8 +123,8 @@ export class FastTrieBlob {
83
123
  const entry = node[nextPos];
84
124
  const charIdx = entry & NodeMaskChildCharIndex;
85
125
  const acc = accumulator.clone();
86
- const letterIdx = acc.decode(charIdx);
87
- const letter = (letterIdx && this._charIndex[letterIdx]) || '';
126
+ const codePoint = acc.decode(charIdx);
127
+ const letter = (codePoint && String.fromCodePoint(codePoint)) || '';
88
128
  ++depth;
89
129
  stack[depth] = {
90
130
  nodeIdx: entry >>> NodeChildRefShift,
@@ -137,15 +177,15 @@ export class FastTrieBlob {
137
177
  toJSON() {
138
178
  return {
139
179
  info: this.info,
140
- nodes: nodesToJson(this.nodes),
141
- charIndex: this._charIndex,
180
+ nodes: nodesToJSON(this.nodes),
181
+ // charIndex: this._charIndex,
142
182
  };
143
183
  }
144
184
  static create(data, options) {
145
- return new FastTrieBlob(data.nodes, data.charIndex, extractInfo(data), options);
185
+ return new FastTrieBlob(data.nodes, data.charIndex, extractInfo(data), data.sorted, options);
146
186
  }
147
187
  static toITrieNodeRoot(trie) {
148
- return new FastTrieBlobIRoot(new FastTrieBlobInternals(trie.nodes, trie._charIndex, trie._charToIndexMap, trie.bitMasksInfo), 0, trie.info);
188
+ return new FastTrieBlobIRoot(new FastTrieBlobInternals(trie.nodes, trie._charIndex, trie.bitMasksInfo), 0, trie.info);
149
189
  }
150
190
  static NodeMaskEOW = TrieBlob.NodeMaskEOW;
151
191
  static NodeChildRefShift = TrieBlob.NodeChildRefShift;
@@ -170,6 +210,20 @@ export class FastTrieBlob {
170
210
  hasForbiddenWords() {
171
211
  return !!this._forbidIdx;
172
212
  }
213
+ nodeInfo(nodeIndex, accumulator) {
214
+ const acc = accumulator ?? Utf8Accumulator.create();
215
+ const n = this.nodes[nodeIndex];
216
+ const eow = !!(n[0] & this.bitMasksInfo.NodeMaskEOW);
217
+ const children = n.slice(1).map((v) => {
218
+ const cIdx = v & this.bitMasksInfo.NodeMaskChildCharIndex;
219
+ const a = acc.clone();
220
+ const codePoint = a.decode(cIdx);
221
+ const c = codePoint !== undefined ? String.fromCodePoint(codePoint) : '∎';
222
+ const i = v >>> this.bitMasksInfo.NodeChildRefShift;
223
+ return { c, i, cIdx };
224
+ });
225
+ return { eow, children };
226
+ }
173
227
  /** number of nodes */
174
228
  get size() {
175
229
  return this.nodes.length;
@@ -201,7 +255,7 @@ export class FastTrieBlob {
201
255
  return idx;
202
256
  }
203
257
  get charIndex() {
204
- return [...this._charIndex];
258
+ return [...this._charIndex.charIndex];
205
259
  }
206
260
  static fromTrieBlob(trie) {
207
261
  const bitMasksInfo = {
@@ -236,28 +290,42 @@ export class FastTrieBlob {
236
290
  node[j] = (idx << TrieBlob.NodeChildRefShift) | charIndex;
237
291
  }
238
292
  }
239
- return new FastTrieBlob(nodes, trie.charIndex, bitMasksInfo, trie.info);
293
+ return new FastTrieBlob(sortNodes(nodes, TrieBlob.NodeMaskChildCharIndex), trie.charIndex, bitMasksInfo, true, trie.info);
240
294
  }
241
- }
242
- function createCharToIndexMap(charIndex) {
243
- const map = Object.create(null);
244
- for (let i = 0; i < charIndex.length; ++i) {
245
- const char = charIndex[i];
246
- map[char.normalize('NFC')] = i;
247
- map[char.normalize('NFD')] = i;
295
+ static isFastTrieBlob(obj) {
296
+ return obj instanceof FastTrieBlob;
248
297
  }
249
- return map;
250
298
  }
251
- function nodesToJson(nodes) {
252
- function nodeElement(node, index) {
253
- const eow = !!(node[0] & TrieBlob.NodeMaskEOW);
254
- const children = node.slice(1).map((n) => ({
255
- c: ('00' + (n & TrieBlob.NodeMaskChildCharIndex).toString(16)).slice(-2),
256
- i: n >>> TrieBlob.NodeChildRefShift,
257
- }));
258
- return { id: index, eow, n: node.length, c: children };
299
+ export function nodesToJSON(nodes) {
300
+ const mapNodeToAcc = new Map();
301
+ function mapNode(node, i) {
302
+ if (node.length === 1) {
303
+ return {
304
+ i,
305
+ w: (!!(node[0] & TrieBlob.NodeMaskEOW) && 1) || 0,
306
+ };
307
+ }
308
+ const acc = mapNodeToAcc.get(node) || Utf8Accumulator.create();
309
+ function mapChild(n) {
310
+ const index = n >>> TrieBlob.NodeChildRefShift;
311
+ const seq = n & TrieBlob.NodeMaskChildCharIndex;
312
+ const cAcc = acc.clone();
313
+ const codePoint = cAcc.decode(seq);
314
+ if (codePoint === undefined) {
315
+ mapNodeToAcc.set(nodes[index], cAcc);
316
+ }
317
+ return {
318
+ i: index,
319
+ c: codePoint && String.fromCodePoint(codePoint),
320
+ s: seq.toString(16).padStart(2, '0'),
321
+ };
322
+ }
323
+ return {
324
+ i,
325
+ w: (!!(node[0] & TrieBlob.NodeMaskEOW) && 1) || 0,
326
+ c: node.slice(1).map(mapChild),
327
+ };
259
328
  }
260
- const elements = nodes.map((n, i) => nodeElement(n, i));
261
- return elements;
329
+ return nodes.map((n, i) => mapNode(n, i));
262
330
  }
263
331
  //# sourceMappingURL=FastTrieBlob.js.map