npm - cspell-trie-lib - Versions diffs - 6.6.1 → 6.7.0 - Mend

cspell-trie-lib 6.6.1 → 6.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md +111 -1
package/dist/lib/io/importExport.d.ts +1 -0
package/dist/lib/io/importExport.js +16 -20
package/dist/lib/io/importExportV3.d.ts +11 -1
package/dist/lib/io/importExportV3.js +61 -11
package/dist/lib/io/importExportV4.d.ts +52 -0
package/dist/lib/io/importExportV4.js +442 -0
package/package.json +6 -6

package/README.md CHANGED Viewed

@@ -14,7 +14,117 @@ The resulting trie can then be compressed into a
 npm install -S cspell-trie-lib
 ```
-## File Format
+## File Format V3
+### Header
+```
+TrieXv3
+base=10
+# Comments
+__DATA__
+```
+The header has two parts.
+- `TrieXv3` -- the format identifier.
+- base -- references are stored using the base (10, 16, 32) are common.
+  higher the base, the smaller the file. Max is 36
+### Data
+The data is a stream of characters and operators. Each character represents a node in the Trie. The operators adjust the position in the Trie.
+### Conceptual Format
+Given a sorted list of words:
+```text
+joust
+jouster
+jousting
+joy
+joyful
+joyfuller
+joyfullest
+```
+It is possible to think of the same list stored as a series of operations.
+| op    | Meaning             |
+| ----- | ------------------- |
+| `<`   | remove 1 character  |
+| `<<`  | remove 2 characters |
+| `<<<` | remove 3 characters |
+| `<2`  | remove 2 characters |
+| `<3`  | remove 3 characters |
+| `$`   | end of word         |
+| `_`   | visual place holder |
+```text
+joust$
+_____er$
+_____<<
+_____ing$
+__<<<<<<
+__y$
+___ful$
+______ler$
+________<
+________st$
+```
+Becomes:
+```text
+joust$er$<2ing$<6y$ful$ler$<st$
+```
+Trie:
+```text
+j─o┬u─s─t┬$
+   │     ├e─r─$
+   │     └i─n─g─$
+   └y┬$
+     └f─u─l┬$
+           └l─e┬r─$
+               └s─t─$
+```
+### Data Format
+| op    | Meaning                                                                                                   |
+| ----- | --------------------------------------------------------------------------------------------------------- |
+| `<`   | remove 1 character                                                                                        |
+| `<n`  | remove n characters where `n` is `[2-9]` to remove 12 characters use `<9<3`                               |
+| `$`   | end of word                                                                                               |
+| `\`   | escape next character. All characters can be escaped. <br/> `\\` -> `\` <br/>`\#` -> `#` <br/>`\a` -> `a` |
+| `#n;` | reference to an already imported trie node where `n` is the node number                                   |
+**Sample Data**
+<!--- cspell:disable --->
+```text
+Big Apple$8races\: \{\}\[\]\(\)$9<5
+New York$7umbers \0\1\2\3\4\5\6\7\8\9$9<9
+ap#6;<rrow \<$7
+big a#5;<4urned$r$2ing$3s$$4
+chalk#56;<3u#54;<3
+eol \\n$3w \$$4scape \\\$8
+fun journey$7wal#27;<7
+journalism$tic$2$3s$$2eyer$2man$2e#103;<2$4ste#101;<i#58;<$3vialit#85;<2$4wly$$2yfuller$st$4ness$4$3lessn#120;<$4ou#125;<2ridde#103;<2er$$i#58;<3od#8;<3
+stic#27;<4$3
+lift#56;<3ong w#86;<6
+ref \#$5
+t#61;<
+wa#62;<2
+```
+<!--- cspell:enable --->
+## File Format V1
 ### Header

package/dist/lib/io/importExport.d.ts CHANGED Viewed

@@ -4,6 +4,7 @@ export interface ExportOptions {
     base?: number;
     comment?: string;
     version?: number;
+    addLineBreaksToImproveDiffs?: boolean;
 }
 /**
  * Serialize a TrieNode.

package/dist/lib/io/importExport.js CHANGED Viewed

@@ -24,12 +24,21 @@ var __importStar = (this && this.__importStar) || function (mod) {
 };
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.importTrie = exports.serializeTrie = void 0;
+const cspell_pipe_1 = require("@cspell/cspell-pipe");
 const gensequence_1 = require("gensequence");
 const iv1 = __importStar(require("./importExportV1"));
 const iv2 = __importStar(require("./importExportV2"));
 const iv3 = __importStar(require("./importExportV3"));
-const serializers = [iv1.serializeTrie, iv1.serializeTrie, iv2.serializeTrie, iv3.serializeTrie];
-const deserializers = [iv1.importTrie, iv1.importTrie, iv2.importTrie, iv3.importTrie];
+const iv4 = __importStar(require("./importExportV4"));
+const serializers = [
+    iv1.serializeTrie,
+    iv1.serializeTrie,
+    iv2.serializeTrie,
+    iv3.serializeTrie,
+    iv4.serializeTrie,
+];
+const deserializers = [iv1.importTrie, iv1.importTrie, iv2.importTrie, iv3.importTrie, iv4.importTrie];
+const DEFAULT_VERSION = 3;
 /**
  * Serialize a TrieNode.
  * Note: This is destructive.  The node will no longer be usable.
@@ -37,7 +46,7 @@ const deserializers = [iv1.importTrie, iv1.importTrie, iv2.importTrie, iv3.impor
  * Considering this is the last step before exporting, it was decided to let this be destructive.
  */
 function serializeTrie(root, options = 16) {
-    const version = typeof options !== 'number' && options.version ? options.version : 0;
+    const version = typeof options !== 'number' && options.version ? options.version : DEFAULT_VERSION;
     const method = serializers[version];
     if (!method) {
         throw new Error(`Unknown version: ${version}`);
@@ -46,14 +55,9 @@ function serializeTrie(root, options = 16) {
 }
 exports.serializeTrie = serializeTrie;
 function importTrie(lines) {
-    const comment = /^\s*#/;
-    function* arrayToIterableIterator(i) {
-        yield* i;
-    }
     function parseHeaderRows(headerRows) {
         const header = headerRows.join('\n');
-        const headerReg = /\bTrieXv(\d+)/;
-        /* istanbul ignore if */
+        const headerReg = /^\s*TrieXv(\d+)/m;
         const match = header.match(headerReg);
         if (!match)
             throw new Error('Unknown file format');
@@ -61,16 +65,8 @@ function importTrie(lines) {
     }
     function readHeader(iter) {
         const headerRows = [];
-        // eslint-disable-next-line no-constant-condition
-        while (true) {
-            const next = iter.next();
-            if (next.done) {
-                break;
-            }
-            const line = next.value.trim();
-            if (!line || comment.test(line)) {
-                continue;
-            }
+        for (const entry of iter) {
+            const line = entry.trim();
             headerRows.push(line);
             if (line === iv1.DATA || line === iv2.DATA) {
                 break;
@@ -78,7 +74,7 @@ function importTrie(lines) {
         }
         return headerRows;
     }
-    const input = arrayToIterableIterator(lines);
+    const input = (0, cspell_pipe_1.toDistributableIterable)(lines);
     const headerLines = readHeader(input);
     const version = parseHeaderRows(headerLines);
     const stream = (0, gensequence_1.genSequence)(headerLines).concat(input);

package/dist/lib/io/importExportV3.d.ts CHANGED Viewed

@@ -4,10 +4,20 @@ export declare const DATA = "__DATA__";
 export interface ExportOptions {
     base?: number;
     comment?: string;
+    /**
+     * This will reduce the size of the `.trie` file by removing references to short suffixes.
+     * But it does increase the size of the trie when loaded into memory.
+     */
+    optimizeSimpleReferences?: boolean;
+    /**
+     * To improve diffs, an EOL is added before each double letter prefix.
+     * @default true
+     */
+    addLineBreaksToImproveDiffs?: boolean;
 }
 /**
  * Serialize a TrieRoot.
  */
 export declare function serializeTrie(root: TrieRoot, options?: ExportOptions | number): Sequence<string>;
-export declare function importTrie(linesX: Iterable<string>): TrieRoot;
+export declare function importTrie(linesX: Iterable<string> | string): TrieRoot;
 //# sourceMappingURL=importExportV3.d.ts.map

package/dist/lib/io/importExportV3.js CHANGED Viewed

@@ -5,12 +5,12 @@ const TrieNode_1 = require("../TrieNode");
 const gensequence_1 = require("gensequence");
 const bufferLines_1 = require("../utils/bufferLines");
 const trie_util_1 = require("../trie-util");
-const EOW = '$';
-const BACK = '<';
-const EOL = '\n';
-const LF = '\r';
-const REF = '#';
-const EOR = ';';
+const EOW = '$'; // End of word
+const BACK = '<'; // Move up the tree
+const EOL = '\n'; // End of Line (ignored)
+const LF = '\r'; // Line Feed (ignored)
+const REF = '#'; // Start of Reference
+const EOR = ';'; // End of Reference
 const ESCAPE = '\\';
 const specialCharacters = new Set([EOW, BACK, EOL, REF, EOR, ESCAPE, LF]
     .concat('0123456789'.split(''))
@@ -21,6 +21,8 @@ const specialCharacterMap = new Map([
     ['\\', '\\\\'],
 ]);
 const characterMap = new Map([...specialCharacterMap].map((a) => [a[1], a[0]]));
+const specialPrefix = stringToCharSet('~!');
+const WORDS_PER_LINE = 20;
 exports.DATA = '__DATA__';
 function generateHeader(base, comment) {
     const header = ['#!/usr/bin/env cspell-trie reader', 'TrieXv3', 'base=' + base]
@@ -32,12 +34,15 @@ function generateHeader(base, comment) {
  * Serialize a TrieRoot.
  */
 function serializeTrie(root, options = 16) {
-    options = typeof options === 'number' ? { base: options } : options;
-    const { base = 16, comment = '' } = options;
+    options = typeof options === 'number' ? { base: options, addLineBreaksToImproveDiffs: false } : options;
+    const { base = 16, comment = '', addLineBreaksToImproveDiffs: addBreaks = true } = options;
     const radix = base > 36 ? 36 : base < 10 ? 10 : base;
     const cache = new Map();
+    const cacheShouldRef = new Map();
     let count = 0;
-    const backBuffer = { last: '', count: 0 };
+    const backBuffer = { last: '', count: 0, words: 0, eol: false };
+    const optimizeSimpleReferences = options.optimizeSimpleReferences ?? false;
+    const wordChars = [];
     function ref(n) {
         return '#' + n.toString(radix) + ';';
     }
@@ -51,6 +56,11 @@ function serializeTrie(root, options = 16) {
             backBuffer.last = BACK;
             backBuffer.count -= n;
         }
+        if (backBuffer.eol) {
+            yield EOL;
+            backBuffer.eol = false;
+            backBuffer.words = 0;
+        }
     }
     function* emit(s) {
         switch (s) {
@@ -58,25 +68,39 @@ function serializeTrie(root, options = 16) {
                 yield* flush();
                 backBuffer.last = EOW;
                 backBuffer.count = 0;
+                backBuffer.words++;
                 break;
             case BACK:
                 backBuffer.count++;
                 break;
+            case EOL:
+                backBuffer.eol = true;
+                break;
             default:
+                if (backBuffer.words >= WORDS_PER_LINE) {
+                    backBuffer.eol = true;
+                }
                 yield* flush();
+                if (s.startsWith(REF)) {
+                    backBuffer.words++;
+                }
                 yield s;
         }
     }
     function* walk(node, depth) {
         const r = cache.get(node);
-        if (r !== undefined) {
+        if (r !== undefined && (!optimizeSimpleReferences || !shouldSimpleRef(node))) {
             yield* emit(ref(r));
             return;
         }
         if (node.c) {
+            if (addBreaks && depth > 0 && depth <= 2) {
+                yield* emit(EOL);
+            }
             cache.set(node, count++);
             const c = [...node.c].sort((a, b) => (a[0] < b[0] ? -1 : 1));
             for (const [s, n] of c) {
+                wordChars[depth] = s;
                 yield* emit(escape(s));
                 yield* walk(n, depth + 1);
                 yield* emit(BACK);
@@ -88,18 +112,36 @@ function serializeTrie(root, options = 16) {
         if (node.f) {
             yield* emit(EOW);
         }
+        if (addBreaks && (depth === 2 || (depth === 3 && wordChars[0] in specialPrefix))) {
+            yield* emit(EOL);
+        }
     }
     function* serialize(node) {
         yield* walk(node, 0);
         yield* flush();
     }
-    return generateHeader(radix, comment).concat((0, bufferLines_1.bufferLines)((0, bufferLines_1.bufferLines)(serialize(root), 120, '\n'), 10, ''));
+    function _calcShouldSimpleRef(node) {
+        if (node.c?.size !== 1)
+            return false;
+        const [n] = [...node.c.values()];
+        return !!n.f && (n.c === undefined || n.c.size === 0);
+    }
+    function shouldSimpleRef(node) {
+        const r = cacheShouldRef.get(node);
+        if (r !== undefined)
+            return r;
+        const rr = _calcShouldSimpleRef(node);
+        cacheShouldRef.set(node, rr);
+        return rr;
+    }
+    return generateHeader(radix, comment).concat((0, bufferLines_1.bufferLines)(serialize(root), 1200, ''));
 }
 exports.serializeTrie = serializeTrie;
 function* toIterableIterator(iter) {
     yield* iter;
 }
 function importTrie(linesX) {
+    linesX = typeof linesX === 'string' ? linesX.split(/(?<=\n)/) : linesX;
     const root = (0, trie_util_1.trieNodeToRoot)({}, {});
     let radix = 16;
     const comment = /^\s*#/;
@@ -234,4 +276,12 @@ function parseStream(radix) {
     }
     return parserMain;
 }
+function stringToCharSet(values) {
+    const set = Object.create(null);
+    const len = values.length;
+    for (let i = 0; i < len; ++i) {
+        set[values[i]] = true;
+    }
+    return set;
+}
 //# sourceMappingURL=importExportV3.js.map

package/dist/lib/io/importExportV4.d.ts ADDED Viewed

@@ -0,0 +1,52 @@
+/**
+ * Trie file format v4
+ *
+ * Trie format v4 is very similar to v3. The v4 reader can even read v3 files.
+ * The motivation behind v4 is to reduce the cost of storing `.trie` files in git.
+ * When a word is added in v3, nearly the entire file is changed due to the absolute
+ * references. V4 adds an index sorted by the most frequently used reference to the least.
+ * Because git diff is line based, it is important to add line breaks at logical points.
+ * V3 added line breaks just to make sure the lines were not too long, V4 takes a different
+ * approach. Line breaks are added at two distinct points. First, at the start of each two
+ * letter prefix and second after approximately 50 words have been emitted.
+ *
+ * To improve readability and git diff, at the beginning of each two letter prefix,
+ * a comment is emitted.
+ *
+ * Example:
+ *
+ * ```
+ * /* ab */
+ * ```
+ */
+import { Sequence } from 'gensequence';
+import { TrieNode, TrieRoot } from '../TrieNode';
+export declare const DATA = "__DATA__";
+export interface ExportOptions {
+    base?: number;
+    comment?: string;
+    /**
+     * This will reduce the size of the `.trie` file by removing references to short suffixes.
+     * But it does increase the size of the trie when loaded into memory.
+     */
+    optimizeSimpleReferences?: boolean;
+}
+/**
+ * Serialize a TrieRoot.
+ */
+export declare function serializeTrie(root: TrieRoot, options?: ExportOptions | number): Sequence<string>;
+interface ReferenceMap {
+    /**
+     * An array of references to nodes.
+     * The most frequently referenced is first in the list.
+     * A node must be reference by other nodes to be included.
+     */
+    refCounts: (readonly [TrieNode, number])[];
+}
+declare function buildReferenceMap(root: TrieRoot, base: number): ReferenceMap;
+export declare function importTrie(linesX: Iterable<string> | string): TrieRoot;
+export declare const __testing__: {
+    buildReferenceMap: typeof buildReferenceMap;
+};
+export {};
+//# sourceMappingURL=importExportV4.d.ts.map

package/dist/lib/io/importExportV4.js ADDED Viewed

@@ -0,0 +1,442 @@
+"use strict";
+/* eslint-disable no-irregular-whitespace */
+/**
+ * Trie file format v4
+ *
+ * Trie format v4 is very similar to v3. The v4 reader can even read v3 files.
+ * The motivation behind v4 is to reduce the cost of storing `.trie` files in git.
+ * When a word is added in v3, nearly the entire file is changed due to the absolute
+ * references. V4 adds an index sorted by the most frequently used reference to the least.
+ * Because git diff is line based, it is important to add line breaks at logical points.
+ * V3 added line breaks just to make sure the lines were not too long, V4 takes a different
+ * approach. Line breaks are added at two distinct points. First, at the start of each two
+ * letter prefix and second after approximately 50 words have been emitted.
+ *
+ * To improve readability and git diff, at the beginning of each two letter prefix,
+ * a comment is emitted.
+ *
+ * Example:
+ *
+ * ```
+ * /* ab */
+ * ```
+ */
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.__testing__ = exports.importTrie = exports.serializeTrie = exports.DATA = void 0;
+const cspell_pipe_1 = require("@cspell/cspell-pipe");
+const gensequence_1 = require("gensequence");
+const trie_util_1 = require("../trie-util");
+const TrieNode_1 = require("../TrieNode");
+const bufferLines_1 = require("../utils/bufferLines");
+const EOW = '$'; // End of word
+const BACK = '<'; // Move up the tree
+const EOL = '\n'; // End of Line (ignored)
+const LF = '\r'; // Line Feed (ignored)
+const REF = '#'; // Start absolute of Reference
+const REF_REL = '@'; // Start indexed of Reference
+const EOR = ';'; // End of Reference
+const ESCAPE = '\\';
+const REF_INDEX_BEGIN = '[';
+const REF_INDEX_END = ']';
+const INLINE_DATA_COMMENT_LINE = '/';
+const specialCharacters = stringToCharSet([EOW, BACK, EOL, REF, REF_REL, EOR, ESCAPE, LF, REF_INDEX_BEGIN, REF_INDEX_END, INLINE_DATA_COMMENT_LINE]
+    .concat('0123456789'.split(''))
+    .concat('`~!@#$%^&*()_-+=[]{};:\'"<>,./?\\|'.split(''))
+    .join(''));
+const SPECIAL_CHARACTERS_MAP = [
+    ['\n', '\\n'],
+    ['\r', '\\r'],
+    ['\\', '\\\\'],
+];
+const specialCharacterMap = stringToCharMap(SPECIAL_CHARACTERS_MAP);
+const characterMap = stringToCharMap(SPECIAL_CHARACTERS_MAP.map((a) => [a[1], a[0]]));
+const specialPrefix = stringToCharSet('~!');
+const WORDS_PER_LINE = 20;
+exports.DATA = '__DATA__';
+function generateHeader(base, comment) {
+    const comments = comment
+        .split('\n')
+        .map((a) => '# ' + a.trimEnd())
+        .join('\n');
+    return `\
+#!/usr/bin/env cspell-trie reader
+TrieXv4
+base=${base}
+${comments}
+# Data:
+${exports.DATA}
+`;
+}
+/**
+ * Serialize a TrieRoot.
+ */
+function serializeTrie(root, options = 16) {
+    options = typeof options === 'number' ? { base: options } : options;
+    const { base = 10, comment = '' } = options;
+    const radix = base > 36 ? 36 : base < 10 ? 10 : base;
+    const cache = new Map();
+    const refMap = buildReferenceMap(root, base);
+    const nodeToIndexMap = new Map(refMap.refCounts.map(([node], index) => [node, index]));
+    let count = 0;
+    const backBuffer = { last: '', count: 0, words: 0, eol: false };
+    const wordChars = [];
+    function ref(n, idx) {
+        const r = idx === undefined || n < idx ? REF + n.toString(radix) : REF_REL + idx.toString(radix);
+        return radix === 10 ? r : r + ';';
+    }
+    function escape(s) {
+        return s in specialCharacters ? ESCAPE + (specialCharacterMap[s] || s) : s;
+    }
+    function* flush() {
+        while (backBuffer.count) {
+            const n = Math.min(9, backBuffer.count);
+            yield n > 1 ? backBuffer.last + n : backBuffer.last;
+            backBuffer.last = BACK;
+            backBuffer.count -= n;
+        }
+        if (backBuffer.eol) {
+            yield EOL;
+            backBuffer.eol = false;
+            backBuffer.words = 0;
+        }
+    }
+    function* emit(s) {
+        switch (s) {
+            case EOW:
+                yield* flush();
+                backBuffer.last = EOW;
+                backBuffer.count = 0;
+                backBuffer.words++;
+                break;
+            case BACK:
+                backBuffer.count++;
+                break;
+            case EOL:
+                backBuffer.eol = true;
+                break;
+            default:
+                if (backBuffer.words >= WORDS_PER_LINE) {
+                    backBuffer.eol = true;
+                }
+                yield* flush();
+                if (s.startsWith(REF) || s.startsWith(REF_REL)) {
+                    backBuffer.words++;
+                }
+                yield s;
+        }
+    }
+    const comment_begin = `${EOL}${INLINE_DATA_COMMENT_LINE}* `;
+    const comment_end = ` *${INLINE_DATA_COMMENT_LINE}${EOL}`;
+    function* walk(node, depth) {
+        const nodeNumber = cache.get(node);
+        const refIndex = nodeToIndexMap.get(node);
+        if (nodeNumber !== undefined) {
+            yield* emit(ref(nodeNumber, refIndex));
+            return;
+        }
+        if (node.c) {
+            if (depth > 0 && depth <= 2) {
+                const chars = wordChars.slice(0, depth).map(escape).join('');
+                yield* emit(comment_begin + chars + comment_end);
+            }
+            cache.set(node, count++);
+            const c = [...node.c].sort((a, b) => (a[0] < b[0] ? -1 : 1));
+            for (const [s, n] of c) {
+                wordChars[depth] = s;
+                yield* emit(escape(s));
+                yield* walk(n, depth + 1);
+                yield* emit(BACK);
+                if (depth === 0)
+                    yield* emit(EOL);
+            }
+        }
+        // Output EOW after children so it can be optimized on read
+        if (node.f) {
+            yield* emit(EOW);
+        }
+        if (depth === 2 || (depth === 3 && wordChars[0] in specialPrefix)) {
+            yield* emit(EOL);
+        }
+    }
+    function* serialize(node) {
+        yield* walk(node, 0);
+        yield* flush();
+    }
+    const lines = [...(0, bufferLines_1.bufferLines)(serialize(root), 1000, '')];
+    const resolvedReferences = refMap.refCounts.map(([node]) => cache.get(node) || 0);
+    // const r = refMap.refCounts.slice(0, 200).map(([node, c]) => ({ n: cache.get(node) || 0, c }));
+    // console.log('First 100: %o \n %o', r.slice(0, 100), r.slice(100, 200));
+    const reference = '[\n' +
+        resolvedReferences
+            .map((n) => n.toString(radix))
+            .join(',')
+            .replace(/.{110,130}[,]/g, '$&\n') +
+        '\n]\n';
+    return (0, gensequence_1.genSequence)([generateHeader(radix, comment), reference]).concat(lines);
+}
+exports.serializeTrie = serializeTrie;
+function buildReferenceMap(root, base) {
+    const refCount = new Map();
+    let nodeCount = 0;
+    function walk(node) {
+        const ref = refCount.get(node);
+        if (ref) {
+            ref.c++;
+            return;
+        }
+        refCount.set(node, { c: 1, n: nodeCount++ });
+        if (!node.c)
+            return;
+        for (const child of node.c.values()) {
+            walk(child);
+        }
+    }
+    walk(root);
+    // sorted highest to lowest
+    const refCountAndNode = [
+        ...(0, cspell_pipe_1.pipeSync)(refCount, (0, cspell_pipe_1.opFilter)(([_, ref]) => ref.c >= 2)),
+    ].sort((a, b) => b[1].c - a[1].c || a[1].n - b[1].n);
+    let adj = 0;
+    const baseLogScale = 1 / Math.log(base);
+    const refs = refCountAndNode
+        .filter(([_, ref], idx) => {
+        const i = idx - adj;
+        const charsIdx = Math.ceil(Math.log(i) * baseLogScale);
+        const charsNode = Math.ceil(Math.log(ref.n) * baseLogScale);
+        const savings = ref.c * (charsNode - charsIdx) - charsIdx;
+        const keep = savings > 0;
+        adj += keep ? 0 : 1;
+        return keep;
+    })
+        .map(([n, ref]) => [n, ref.c]);
+    return { refCounts: refs };
+}
+function importTrie(linesX) {
+    linesX = typeof linesX === 'string' ? linesX.split(/(?<=\n)/) : linesX;
+    let radix = 10;
+    const comment = /^\s*#/;
+    const iter = tapIterable((0, cspell_pipe_1.pipeSync)(linesX, (0, cspell_pipe_1.opConcatMap)((a) => a.split(/(?<=\n)(?!$)/))));
+    function parseHeaderRows(headerRows) {
+        const header = headerRows.slice(0, 2).join('\n');
+        const headerReg = /^TrieXv[34]\nbase=(\d+)$/;
+        /* istanbul ignore if */
+        if (!headerReg.test(header))
+            throw new Error('Unknown file format');
+        radix = Number.parseInt(header.replace(headerReg, '$1'), 10);
+    }
+    function readHeader(iter) {
+        const headerRows = [];
+        for (const value of iter) {
+            const line = value.trim();
+            if (!line || comment.test(line))
+                continue;
+            if (line === exports.DATA)
+                break;
+            headerRows.push(line);
+        }
+        parseHeaderRows(headerRows);
+    }
+    readHeader(iter);
+    const root = parseStream(radix, iter);
+    return root;
+}
+exports.importTrie = importTrie;
+const numbersSet = stringToCharSet('0123456789');
+function parseStream(radix, iter) {
+    const eow = Object.freeze({ f: 1 });
+    let refIndex = [];
+    const root = (0, trie_util_1.trieNodeToRoot)({}, {});
+    function parseReference(acc, s) {
+        const isIndexRef = s === REF_REL;
+        let ref = '';
+        function parser(acc, s) {
+            if (s === EOR || (radix === 10 && !(s in numbersSet))) {
+                const { root, nodes, stack } = acc;
+                const r = parseInt(ref, radix);
+                const top = stack[stack.length - 1];
+                const p = stack[stack.length - 2].node;
+                const n = isIndexRef ? refIndex[r] : r;
+                p.c?.set(top.s, nodes[n]);
+                const rr = { root, nodes, stack, parser: undefined };
+                return s === EOR ? rr : parserMain(rr, s);
+            }
+            ref = ref + s;
+            return acc;
+        }
+        const { nodes } = acc;
+        nodes.pop();
+        return { ...acc, nodes, parser };
+    }
+    function parseEscapeCharacter(acc, _) {
+        let prev = '';
+        const parser = function (acc, s) {
+            if (prev) {
+                s = characterMap[prev + s] || s;
+                return parseCharacter({ ...acc, parser: undefined }, s);
+            }
+            if (s === ESCAPE) {
+                prev = s;
+                return acc;
+            }
+            return parseCharacter({ ...acc, parser: undefined }, s);
+        };
+        return { ...acc, parser };
+    }
+    function parseComment(acc, s) {
+        const endOfComment = s;
+        let isEscaped = false;
+        function parser(acc, s) {
+            if (isEscaped) {
+                isEscaped = false;
+                return acc;
+            }
+            if (s === ESCAPE) {
+                isEscaped = true;
+                return acc;
+            }
+            if (s === endOfComment) {
+                return { ...acc, parser: undefined };
+            }
+            return acc;
+        }
+        return { ...acc, parser };
+    }
+    function parseCharacter(acc, s) {
+        const parser = undefined;
+        const { root, nodes, stack } = acc;
+        const top = stack[stack.length - 1];
+        const node = top.node;
+        node.c = node.c ?? new Map();
+        const n = { f: undefined, c: undefined, n: nodes.length };
+        node.c.set(s, n);
+        stack.push({ node: n, s });
+        nodes.push(n);
+        return { root, nodes, stack, parser };
+    }
+    function parseEOW(acc, _) {
+        const parser = parseBack;
+        const { root, nodes, stack } = acc;
+        const top = stack[stack.length - 1];
+        const node = top.node;
+        node.f = TrieNode_1.FLAG_WORD;
+        if (!node.c) {
+            top.node = eow;
+            const p = stack[stack.length - 2].node;
+            p.c?.set(top.s, eow);
+            nodes.pop();
+        }
+        stack.pop();
+        return { root, nodes, stack, parser };
+    }
+    const charactersBack = stringToCharSet(BACK + '23456789');
+    function parseBack(acc, s) {
+        if (!(s in charactersBack)) {
+            return parserMain({ ...acc, parser: undefined }, s);
+        }
+        let n = s === BACK ? 1 : parseInt(s, 10) - 1;
+        const { stack } = acc;
+        while (n-- > 0) {
+            stack.pop();
+        }
+        return { ...acc, parser: parseBack };
+    }
+    function parseIgnore(acc, _) {
+        return acc;
+    }
+    const parsers = createStringLookupMap([
+        [EOW, parseEOW],
+        [BACK, parseBack],
+        [REF, parseReference],
+        [REF_REL, parseReference],
+        [ESCAPE, parseEscapeCharacter],
+        [EOL, parseIgnore],
+        [LF, parseIgnore],
+        [INLINE_DATA_COMMENT_LINE, parseComment],
+    ]);
+    function parserMain(acc, s) {
+        const parser = acc.parser ?? parsers[s] ?? parseCharacter;
+        return parser(acc, s);
+    }
+    const charsetSpaces = stringToCharSet(' \r\n\t');
+    function parseReferenceIndex(acc, s) {
+        let json = '';
+        function parserStart(acc, s) {
+            if (s === REF_INDEX_BEGIN) {
+                json = json + s;
+                return { ...acc, parser };
+            }
+            if (s in charsetSpaces) {
+                return acc;
+            }
+            // A Reference Index was not found.
+            return parserMain({ ...acc, parser: undefined }, s);
+        }
+        function parser(acc, s) {
+            json = json + s;
+            if (s === REF_INDEX_END) {
+                refIndex = JSON.parse(json);
+                return { ...acc, parser: undefined };
+            }
+            return acc;
+        }
+        return parserStart({ ...acc, parser: parserStart }, s);
+    }
+    (0, gensequence_1.genSequence)(iter)
+        .concatMap((a) => a.split(''))
+        .reduce(parserMain, {
+        nodes: [root],
+        root,
+        stack: [{ node: root, s: '' }],
+        parser: parseReferenceIndex,
+    });
+    return root;
+}
+function stringToCharSet(values) {
+    const set = Object.create(null);
+    const len = values.length;
+    for (let i = 0; i < len; ++i) {
+        set[values[i]] = true;
+    }
+    return set;
+}
+function stringToCharMap(values) {
+    return createStringLookupMap(values);
+}
+function createStringLookupMap(values) {
+    const map = Object.create(null);
+    const len = values.length;
+    for (let i = 0; i < len; ++i) {
+        map[values[i][0]] = values[i][1];
+    }
+    return map;
+}
+/**
+ * Allows an iterable to be shared by multiple consumers.
+ * Each consumer takes from the iterable.
+ * @param iterable - the iterable to share
+ */
+function tapIterable(iterable) {
+    let lastValue;
+    let iter;
+    function getNext() {
+        if (lastValue && lastValue.done) {
+            return { ...lastValue };
+        }
+        iter = iter || iterable[Symbol.iterator]();
+        lastValue = iter.next();
+        return lastValue;
+    }
+    function* iterableFn() {
+        let next;
+        while (!(next = getNext()).done) {
+            yield next.value;
+        }
+    }
+    return {
+        [Symbol.iterator]: iterableFn,
+    };
+}
+exports.__testing__ = {
+    buildReferenceMap,
+};
+//# sourceMappingURL=importExportV4.js.map

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "cspell-trie-lib",
-  "version": "6.6.1",
+  "version": "6.7.0",
   "description": "Trie Data Structure to support cspell.",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",
@@ -37,7 +37,8 @@
   },
   "homepage": "https://github.com/streetsidesoftware/cspell#readme",
   "dependencies": {
-    "@cspell/cspell-pipe": "^6.6.1",
+    "@cspell/cspell-pipe": "^6.7.0",
+    "@cspell/cspell-types": "^6.7.0",
     "fs-extra": "^10.1.0",
     "gensequence": "^3.1.1"
   },
@@ -45,13 +46,12 @@
     "node": ">=14"
   },
   "devDependencies": {
-    "@cspell/cspell-types": "^6.6.1",
-    "@cspell/dict-en_us": "^2.3.0",
+    "@cspell/dict-en_us": "^2.3.1",
     "@cspell/dict-es-es": "^2.2.0",
     "@types/fs-extra": "^9.0.13",
-    "@types/node": "^18.6.5",
+    "@types/node": "^18.7.6",
     "jest": "^28.1.3",
     "rimraf": "^3.0.2"
   },
-  "gitHead": "3c9c24d1cebd558ac3729d3fbf441e6ed751d8cf"
+  "gitHead": "3a7312a15d2df1507d9e01863ec5842f5a99e0cc"
 }