cspell-trie-lib 9.2.1 → 9.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +0 -25
- package/dist/index.js +199 -382
- package/package.json +5 -5
package/dist/index.js
CHANGED
|
@@ -19,19 +19,18 @@ function memorizeLastCall(fn) {
|
|
|
19
19
|
|
|
20
20
|
//#endregion
|
|
21
21
|
//#region src/lib/ITrieNode/find.ts
|
|
22
|
-
const defaultLegacyMinCompoundLength$
|
|
22
|
+
const defaultLegacyMinCompoundLength$2 = 3;
|
|
23
23
|
const _defaultFindOptions$1 = {
|
|
24
24
|
matchCase: false,
|
|
25
25
|
compoundMode: "compound",
|
|
26
|
-
legacyMinCompoundLength: defaultLegacyMinCompoundLength$
|
|
26
|
+
legacyMinCompoundLength: defaultLegacyMinCompoundLength$2
|
|
27
27
|
};
|
|
28
28
|
Object.freeze(_defaultFindOptions$1);
|
|
29
|
-
const
|
|
29
|
+
const knownCompoundModes$1 = new Map([
|
|
30
30
|
"none",
|
|
31
31
|
"compound",
|
|
32
32
|
"legacy"
|
|
33
|
-
];
|
|
34
|
-
const knownCompoundModes$1 = new Map(arrayCompoundModes$1.map((a) => [a, a]));
|
|
33
|
+
].map((a) => [a, a]));
|
|
35
34
|
const notFound = {
|
|
36
35
|
found: false,
|
|
37
36
|
compoundUsed: false,
|
|
@@ -89,23 +88,18 @@ function _findWordNode$1(root, word, options) {
|
|
|
89
88
|
const checkForbidden = options?.checkForbidden ?? true;
|
|
90
89
|
function __findCompound() {
|
|
91
90
|
const f = findCompoundWord$1(root, word, compoundPrefix, ignoreCasePrefix);
|
|
92
|
-
if (f.found !== false && (mustCheckForbidden || f.compoundUsed && checkForbidden))
|
|
93
|
-
const r = !f.caseMatched ? walk$2(root, root.caseInsensitivePrefix) : root;
|
|
94
|
-
f.forbidden = isForbiddenWord$1(r, word, root.forbidPrefix);
|
|
95
|
-
}
|
|
91
|
+
if (f.found !== false && (mustCheckForbidden || f.compoundUsed && checkForbidden)) f.forbidden = isForbiddenWord$1(!f.caseMatched ? walk$2(root, root.caseInsensitivePrefix) : root, word, root.forbidPrefix);
|
|
96
92
|
return f;
|
|
97
93
|
}
|
|
98
94
|
function __findExact() {
|
|
99
95
|
const n = root.getNode ? root.getNode(word) : walk$2(root, word);
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
found: isFound && word,
|
|
96
|
+
return {
|
|
97
|
+
found: isEndOfWordNode$1(n) && word,
|
|
103
98
|
compoundUsed: false,
|
|
104
99
|
forbidden: checkForbidden ? isForbiddenWord$1(root, word, root.forbidPrefix) : void 0,
|
|
105
100
|
node: n,
|
|
106
101
|
caseMatched: true
|
|
107
102
|
};
|
|
108
|
-
return result;
|
|
109
103
|
}
|
|
110
104
|
switch (compoundMode) {
|
|
111
105
|
case "none": return matchCase ? __findExact() : __findCompound();
|
|
@@ -116,7 +110,7 @@ function _findWordNode$1(root, word, options) {
|
|
|
116
110
|
function findLegacyCompound$1(root, word, options) {
|
|
117
111
|
const roots = [root];
|
|
118
112
|
if (!options?.matchCase) roots.push(walk$2(root, root.caseInsensitivePrefix));
|
|
119
|
-
return findLegacyCompoundNode$1(roots, word, options?.legacyMinCompoundLength || defaultLegacyMinCompoundLength$
|
|
113
|
+
return findLegacyCompoundNode$1(roots, word, options?.legacyMinCompoundLength || defaultLegacyMinCompoundLength$2);
|
|
120
114
|
}
|
|
121
115
|
function findCompoundNode$1(root, word, compoundCharacter, ignoreCasePrefix) {
|
|
122
116
|
const stack = [{
|
|
@@ -179,15 +173,13 @@ function findCompoundNode$1(root, word, compoundCharacter, ignoreCasePrefix) {
|
|
|
179
173
|
break;
|
|
180
174
|
}
|
|
181
175
|
}
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
found,
|
|
176
|
+
return {
|
|
177
|
+
found: i === word.length && word || false,
|
|
185
178
|
compoundUsed,
|
|
186
179
|
node,
|
|
187
180
|
forbidden: void 0,
|
|
188
181
|
caseMatched
|
|
189
182
|
};
|
|
190
|
-
return result;
|
|
191
183
|
}
|
|
192
184
|
function findCompoundWord$1(root, word, compoundCharacter, ignoreCasePrefix) {
|
|
193
185
|
const { found, compoundUsed, node, caseMatched } = findCompoundNode$1(root, word, compoundCharacter, ignoreCasePrefix);
|
|
@@ -244,8 +236,7 @@ function findLegacyCompoundNode$1(roots, word, minCompoundLength) {
|
|
|
244
236
|
while (true) {
|
|
245
237
|
const s = stack[i];
|
|
246
238
|
const h = w[i++];
|
|
247
|
-
const
|
|
248
|
-
const c = n?.get(h);
|
|
239
|
+
const c = (s.cr || s.n)?.get(h);
|
|
249
240
|
if (c && i < wLen) stack[i] = {
|
|
250
241
|
n: c,
|
|
251
242
|
usedRoots: 0,
|
|
@@ -285,15 +276,13 @@ function findLegacyCompoundNode$1(roots, word, minCompoundLength) {
|
|
|
285
276
|
}
|
|
286
277
|
return letters.join("");
|
|
287
278
|
}
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
found,
|
|
279
|
+
return {
|
|
280
|
+
found: extractWord(),
|
|
291
281
|
compoundUsed,
|
|
292
282
|
node,
|
|
293
283
|
forbidden: void 0,
|
|
294
284
|
caseMatched
|
|
295
285
|
};
|
|
296
|
-
return result;
|
|
297
286
|
}
|
|
298
287
|
function isForbiddenWord$1(root, word, forbiddenPrefix) {
|
|
299
288
|
const r = root;
|
|
@@ -368,12 +357,11 @@ function* compoundWalker$1(root, compoundingMethod) {
|
|
|
368
357
|
while (s.ci < s.c.length) {
|
|
369
358
|
const [char, node] = s.c[s.ci++];
|
|
370
359
|
const text = baseText + char;
|
|
371
|
-
|
|
360
|
+
if ((yield {
|
|
372
361
|
text,
|
|
373
362
|
node,
|
|
374
363
|
depth
|
|
375
|
-
}
|
|
376
|
-
if (goDeeper ?? true) {
|
|
364
|
+
}) ?? true) {
|
|
377
365
|
depth++;
|
|
378
366
|
baseText = text;
|
|
379
367
|
stack[depth] = {
|
|
@@ -408,12 +396,11 @@ function* nodeWalker$1(root) {
|
|
|
408
396
|
const idx$1 = s.ci++;
|
|
409
397
|
const [char, node] = s.c[idx$1];
|
|
410
398
|
const text = baseText + char;
|
|
411
|
-
|
|
399
|
+
if ((yield {
|
|
412
400
|
text,
|
|
413
401
|
node,
|
|
414
402
|
depth
|
|
415
|
-
}
|
|
416
|
-
if (goDeeper !== false) {
|
|
403
|
+
}) !== false) {
|
|
417
404
|
depth++;
|
|
418
405
|
baseText = text;
|
|
419
406
|
const s$1 = stack[depth];
|
|
@@ -449,11 +436,10 @@ function* walkerWordsITrie(root) {
|
|
|
449
436
|
let depth = 0;
|
|
450
437
|
const stack = [];
|
|
451
438
|
const entries = root.entries();
|
|
452
|
-
const c = Array.isArray(entries) ? entries : [...entries];
|
|
453
439
|
stack[depth] = {
|
|
454
440
|
t: "",
|
|
455
441
|
n: root,
|
|
456
|
-
c,
|
|
442
|
+
c: Array.isArray(entries) ? entries : [...entries],
|
|
457
443
|
ci: 0
|
|
458
444
|
};
|
|
459
445
|
while (depth >= 0) {
|
|
@@ -467,17 +453,17 @@ function* walkerWordsITrie(root) {
|
|
|
467
453
|
depth++;
|
|
468
454
|
baseText = text;
|
|
469
455
|
const entries$1 = node.entries();
|
|
470
|
-
const c
|
|
456
|
+
const c = Array.isArray(entries$1) ? entries$1 : [...entries$1];
|
|
471
457
|
if (stack[depth]) {
|
|
472
458
|
s = stack[depth];
|
|
473
459
|
s.t = text;
|
|
474
460
|
s.n = node;
|
|
475
|
-
s.c = c
|
|
461
|
+
s.c = c;
|
|
476
462
|
s.ci = 0;
|
|
477
463
|
} else stack[depth] = {
|
|
478
464
|
t: text,
|
|
479
465
|
n: node,
|
|
480
|
-
c
|
|
466
|
+
c,
|
|
481
467
|
ci: 0
|
|
482
468
|
};
|
|
483
469
|
s = stack[depth];
|
|
@@ -542,8 +528,7 @@ function* hintedWalkerNext(root, ignoreCase, hint, compoundingMethod, emitWordSe
|
|
|
542
528
|
root.forbiddenWordPrefix
|
|
543
529
|
]);
|
|
544
530
|
function filterRoot(root$1) {
|
|
545
|
-
const
|
|
546
|
-
const c = children$1?.filter(([v]) => !(v in specialRootsPrefix));
|
|
531
|
+
const c = (root$1.c && Object.entries(root$1.c))?.filter(([v]) => !(v in specialRootsPrefix));
|
|
547
532
|
return { c: c && Object.fromEntries(c) };
|
|
548
533
|
}
|
|
549
534
|
const roots = rawRoots.map(filterRoot);
|
|
@@ -720,8 +705,7 @@ var ImplITrieRoot = class extends ImplITrieNode {
|
|
|
720
705
|
return false;
|
|
721
706
|
}
|
|
722
707
|
resolveId(id) {
|
|
723
|
-
|
|
724
|
-
return new ImplITrieNode(n);
|
|
708
|
+
return new ImplITrieNode(id);
|
|
725
709
|
}
|
|
726
710
|
get forbidPrefix() {
|
|
727
711
|
return this.root.forbiddenWordPrefix;
|
|
@@ -770,12 +754,11 @@ function* compoundWalker(root, compoundingMethod) {
|
|
|
770
754
|
while (s.ci < s.c.length) {
|
|
771
755
|
const [char, node] = s.c[s.ci++];
|
|
772
756
|
const text = baseText + char;
|
|
773
|
-
|
|
757
|
+
if ((yield {
|
|
774
758
|
text,
|
|
775
759
|
node,
|
|
776
760
|
depth
|
|
777
|
-
}
|
|
778
|
-
if (goDeeper ?? true) {
|
|
761
|
+
}) ?? true) {
|
|
779
762
|
depth++;
|
|
780
763
|
baseText = text;
|
|
781
764
|
stack[depth] = {
|
|
@@ -814,12 +797,11 @@ function* nodeWalker(root) {
|
|
|
814
797
|
const char = s.c[s.ci++];
|
|
815
798
|
const node = s.n[char];
|
|
816
799
|
const text = baseText + char;
|
|
817
|
-
|
|
800
|
+
if ((yield {
|
|
818
801
|
text,
|
|
819
802
|
node,
|
|
820
803
|
depth
|
|
821
|
-
}
|
|
822
|
-
if (goDeeper !== false) {
|
|
804
|
+
}) !== false) {
|
|
823
805
|
depth++;
|
|
824
806
|
baseText = text;
|
|
825
807
|
const s$1 = stack[depth];
|
|
@@ -904,14 +886,11 @@ const defaultSuggestionOptions = {
|
|
|
904
886
|
includeTies: true,
|
|
905
887
|
timeout: 5e3
|
|
906
888
|
};
|
|
907
|
-
const
|
|
889
|
+
const keyMapOfSuggestionOptionsStrict = {
|
|
908
890
|
changeLimit: "changeLimit",
|
|
909
891
|
compoundMethod: "compoundMethod",
|
|
910
892
|
ignoreCase: "ignoreCase",
|
|
911
|
-
compoundSeparator: "compoundSeparator"
|
|
912
|
-
};
|
|
913
|
-
const keyMapOfSuggestionOptionsStrict = {
|
|
914
|
-
...keyMapOfGenSuggestionOptionsStrict,
|
|
893
|
+
compoundSeparator: "compoundSeparator",
|
|
915
894
|
filter: "filter",
|
|
916
895
|
includeTies: "includeTies",
|
|
917
896
|
numSuggestions: "numSuggestions",
|
|
@@ -1077,8 +1056,7 @@ const visualLetterGroups = [
|
|
|
1077
1056
|
function forms(letters) {
|
|
1078
1057
|
const n = letters.normalize("NFC").replaceAll(/\p{M}/gu, "");
|
|
1079
1058
|
const na = n.normalize("NFD").replaceAll(/\p{M}/gu, "");
|
|
1080
|
-
|
|
1081
|
-
return [...s].join("");
|
|
1059
|
+
return [...new Set(n + n.toLowerCase() + n.toUpperCase() + na + na.toLowerCase() + na.toUpperCase())].join("");
|
|
1082
1060
|
}
|
|
1083
1061
|
/**
|
|
1084
1062
|
* This is a map of letters to groups mask values.
|
|
@@ -1123,8 +1101,7 @@ function addDefsToWeightMap(map, defs) {
|
|
|
1123
1101
|
}
|
|
1124
1102
|
for (const _def of defs) {
|
|
1125
1103
|
const def = normalizeDef(_def);
|
|
1126
|
-
|
|
1127
|
-
mapSets.forEach((s) => addSet(s, def));
|
|
1104
|
+
splitMap$1(def).forEach((s) => addSet(s, def));
|
|
1128
1105
|
}
|
|
1129
1106
|
return map;
|
|
1130
1107
|
}
|
|
@@ -1182,8 +1159,7 @@ function splitMapSubstrings(map) {
|
|
|
1182
1159
|
*/
|
|
1183
1160
|
function splitMap$1(def) {
|
|
1184
1161
|
const { map } = def;
|
|
1185
|
-
|
|
1186
|
-
return sets.map(splitMapSubstrings).filter((s) => s.length > 0);
|
|
1162
|
+
return map.split("|").map(splitMapSubstrings).filter((s) => s.length > 0);
|
|
1187
1163
|
}
|
|
1188
1164
|
function addToTrieCost(trie, str, cost, penalties) {
|
|
1189
1165
|
if (!str) return;
|
|
@@ -1201,8 +1177,7 @@ function addToTrieTrieCost(trie, left, right, cost, penalties) {
|
|
|
1201
1177
|
const n = t.n = t.n || Object.create(null);
|
|
1202
1178
|
t = n[c] = n[c] || Object.create(null);
|
|
1203
1179
|
}
|
|
1204
|
-
|
|
1205
|
-
addToTrieCost(trieCost, right, cost, penalties);
|
|
1180
|
+
addToTrieCost(t.t = t.t || Object.create(null), right, cost, penalties);
|
|
1206
1181
|
}
|
|
1207
1182
|
function addSetToTrieCost(trie, set, cost, penalties) {
|
|
1208
1183
|
if (cost === void 0) return;
|
|
@@ -1403,12 +1378,11 @@ function _distanceAStarWeightedEx(wordA, wordB, map, cost = 100) {
|
|
|
1403
1378
|
c,
|
|
1404
1379
|
p
|
|
1405
1380
|
};
|
|
1406
|
-
|
|
1381
|
+
[
|
|
1407
1382
|
map.calcInsDelCosts(pos),
|
|
1408
1383
|
map.calcSwapCosts(pos),
|
|
1409
1384
|
map.calcReplaceCosts(pos)
|
|
1410
|
-
]
|
|
1411
|
-
costCalculations.forEach((iter) => {
|
|
1385
|
+
].forEach((iter) => {
|
|
1412
1386
|
for (const nn of iter) candidates.add({
|
|
1413
1387
|
...nn,
|
|
1414
1388
|
f: n
|
|
@@ -1437,7 +1411,6 @@ var CandidatePool = class {
|
|
|
1437
1411
|
next() {
|
|
1438
1412
|
let n;
|
|
1439
1413
|
while (n = this.pool.dequeue()) if (!n.d) return n;
|
|
1440
|
-
return void 0;
|
|
1441
1414
|
}
|
|
1442
1415
|
add(n) {
|
|
1443
1416
|
const i = idx(n.ai, n.bi, this.bN);
|
|
@@ -1628,8 +1601,7 @@ function createPerfTimer() {
|
|
|
1628
1601
|
at: 0,
|
|
1629
1602
|
elapsed: 0
|
|
1630
1603
|
});
|
|
1631
|
-
|
|
1632
|
-
return lines.join("\n");
|
|
1604
|
+
return lineElements.map((e) => `${e.at.padStart(lengths.at)} ${e.name.padEnd(lengths.name)} ${e.elapsed.padStart(lengths.elapsed)}`).join("\n");
|
|
1633
1605
|
}
|
|
1634
1606
|
function measureFn(name, fn) {
|
|
1635
1607
|
const s = start(name);
|
|
@@ -1675,8 +1647,7 @@ function isDefined$1(a) {
|
|
|
1675
1647
|
* @returns t
|
|
1676
1648
|
*/
|
|
1677
1649
|
function cleanCopy(t) {
|
|
1678
|
-
|
|
1679
|
-
return clean$1(r);
|
|
1650
|
+
return clean$1({ ...t });
|
|
1680
1651
|
}
|
|
1681
1652
|
/**
|
|
1682
1653
|
* Remove any fields with an `undefined` value.
|
|
@@ -1714,8 +1685,7 @@ function replaceAllFactory(match, replaceWithText) {
|
|
|
1714
1685
|
const defaultMaxNumberSuggestions = 10;
|
|
1715
1686
|
const BASE_COST = 100;
|
|
1716
1687
|
const MAX_NUM_CHANGES = 5;
|
|
1717
|
-
const
|
|
1718
|
-
const MAX_ALLOWED_COST_SCALE = 1.03 * MAX_COST_SCALE;
|
|
1688
|
+
const MAX_ALLOWED_COST_SCALE = 1.03 * .5;
|
|
1719
1689
|
const collator = new Intl.Collator();
|
|
1720
1690
|
const regexSeparator = new RegExp(`[${regexQuote(WORD_SEPARATOR)}]`, "g");
|
|
1721
1691
|
const wordLengthCost = [
|
|
@@ -1730,9 +1700,7 @@ const EXTRA_WORD_COST = 5;
|
|
|
1730
1700
|
const DEFAULT_COLLECTOR_TIMEOUT = 1e3;
|
|
1731
1701
|
const symStopProcessing = Symbol("Collector Stop Processing");
|
|
1732
1702
|
function compSuggestionResults(a, b) {
|
|
1733
|
-
|
|
1734
|
-
const bPref = b.isPreferred && -1 || 0;
|
|
1735
|
-
return aPref - bPref || a.cost - b.cost || a.word.length - b.word.length || collator.compare(a.word, b.word);
|
|
1703
|
+
return (a.isPreferred && -1 || 0) - (b.isPreferred && -1 || 0) || a.cost - b.cost || a.word.length - b.word.length || collator.compare(a.word, b.word);
|
|
1736
1704
|
}
|
|
1737
1705
|
const defaultSuggestionCollectorOptions = Object.freeze({
|
|
1738
1706
|
numSuggestions: defaultMaxNumberSuggestions,
|
|
@@ -1841,12 +1809,11 @@ function suggestionCollector(wordToMatch, options) {
|
|
|
1841
1809
|
const NF = "NFD";
|
|
1842
1810
|
const nWordToMatch = wordToMatch.normalize(NF);
|
|
1843
1811
|
const rawValues = [...sugs.values()];
|
|
1844
|
-
const
|
|
1812
|
+
const sorted = (weightMap ? rawValues.map(({ word, cost, isPreferred }) => ({
|
|
1845
1813
|
word,
|
|
1846
1814
|
cost: isPreferred ? cost : editDistanceWeighted(nWordToMatch, word.normalize(NF), weightMap, 110),
|
|
1847
1815
|
isPreferred
|
|
1848
|
-
})) : rawValues;
|
|
1849
|
-
const sorted = values.sort(compSuggestionResults).map(cleanCompoundResult);
|
|
1816
|
+
})) : rawValues).sort(compSuggestionResults).map(cleanCompoundResult);
|
|
1850
1817
|
let i = Math.min(sorted.length, numSuggestions) - 1;
|
|
1851
1818
|
const limit = includeTies ? sorted.length : Math.min(sorted.length, numSuggestions);
|
|
1852
1819
|
const iCost = sorted[i].cost;
|
|
@@ -1855,7 +1822,7 @@ function suggestionCollector(wordToMatch, options) {
|
|
|
1855
1822
|
sorted.length = i;
|
|
1856
1823
|
return sorted;
|
|
1857
1824
|
}
|
|
1858
|
-
|
|
1825
|
+
return {
|
|
1859
1826
|
collect,
|
|
1860
1827
|
add: function(suggestion) {
|
|
1861
1828
|
collectSuggestion(suggestion);
|
|
@@ -1881,7 +1848,6 @@ function suggestionCollector(wordToMatch, options) {
|
|
|
1881
1848
|
symbolStopProcessing: symStopProcessing,
|
|
1882
1849
|
genSuggestionOptions
|
|
1883
1850
|
};
|
|
1884
|
-
return collector;
|
|
1885
1851
|
}
|
|
1886
1852
|
/**
|
|
1887
1853
|
* Impersonating a Collector, allows searching for multiple variants on the same word.
|
|
@@ -1987,15 +1953,13 @@ function* getSuggestionsAStar(trie, srcWord, options = {}) {
|
|
|
1987
1953
|
return;
|
|
1988
1954
|
function compareSuggestion(a, b) {
|
|
1989
1955
|
const pa = a.isPreferred && 1 || 0;
|
|
1990
|
-
|
|
1991
|
-
return pb - pa || a.cost - b.cost || Math.abs(a.word.charCodeAt(0) - srcWord.charCodeAt(0)) - Math.abs(b.word.charCodeAt(0) - srcWord.charCodeAt(0));
|
|
1956
|
+
return (b.isPreferred && 1 || 0) - pa || a.cost - b.cost || Math.abs(a.word.charCodeAt(0) - srcWord.charCodeAt(0)) - Math.abs(b.word.charCodeAt(0) - srcWord.charCodeAt(0));
|
|
1992
1957
|
}
|
|
1993
1958
|
function processPath(p) {
|
|
1994
1959
|
const len = srcLetters.length;
|
|
1995
1960
|
if (p.n.eow && p.i === len) {
|
|
1996
|
-
const word = pNodeToWord(p);
|
|
1997
1961
|
const result = {
|
|
1998
|
-
word,
|
|
1962
|
+
word: pNodeToWord(p),
|
|
1999
1963
|
cost: p.c
|
|
2000
1964
|
};
|
|
2001
1965
|
resultHeap.add(result);
|
|
@@ -2020,14 +1984,12 @@ function* getSuggestionsAStar(trie, srcWord, options = {}) {
|
|
|
2020
1984
|
storePath(t, n, i + 1, cost, "", p, "d", "");
|
|
2021
1985
|
for (const [ss, node] of n.entries()) {
|
|
2022
1986
|
if (node.id === m?.id || ss in sc) continue;
|
|
2023
|
-
const
|
|
2024
|
-
const c = sg & g ? costVis : cost;
|
|
1987
|
+
const c = sg & (visMap[ss] || 0) ? costVis : cost;
|
|
2025
1988
|
storePath(t, node, i + 1, c, ss, p, "r", ss);
|
|
2026
1989
|
}
|
|
2027
1990
|
if (n.eow && i && compoundMethod) storePath(t, root, i, costLegacyCompound, wordSeparator, p, "L", wordSeparator);
|
|
2028
1991
|
if (ns) {
|
|
2029
|
-
const
|
|
2030
|
-
const n2 = n1?.get(s);
|
|
1992
|
+
const n2 = n.get(ns)?.get(s);
|
|
2031
1993
|
if (n2) {
|
|
2032
1994
|
const ss = ns + s;
|
|
2033
1995
|
storePath(t, n2, i + 2, cost0 + opCosts.swapCost, ss, p, "s", ss);
|
|
@@ -2058,8 +2020,7 @@ function* getSuggestionsAStar(trie, srcWord, options = {}) {
|
|
|
2058
2020
|
*/
|
|
2059
2021
|
function storePath(t, n, i, c, s, p, a, ss) {
|
|
2060
2022
|
const tt = getCostTrie(t, ss);
|
|
2061
|
-
|
|
2062
|
-
if (curr <= c || c > limit) return void 0;
|
|
2023
|
+
if (tt.c[i] <= c || c > limit) return void 0;
|
|
2063
2024
|
tt.c[i] = c;
|
|
2064
2025
|
pathHeap.add({
|
|
2065
2026
|
n,
|
|
@@ -2270,12 +2231,11 @@ function caseForms(letter, locale) {
|
|
|
2270
2231
|
* @returns combined set of possible forms.
|
|
2271
2232
|
*/
|
|
2272
2233
|
function accentForms(letter) {
|
|
2273
|
-
|
|
2234
|
+
return new Set([
|
|
2274
2235
|
letter,
|
|
2275
2236
|
letter.normalize("NFC"),
|
|
2276
2237
|
letter.normalize("NFD")
|
|
2277
2238
|
]);
|
|
2278
|
-
return forms$1;
|
|
2279
2239
|
}
|
|
2280
2240
|
/**
|
|
2281
2241
|
* Remove all accents.
|
|
@@ -2296,8 +2256,7 @@ function stripNonAccents(characters) {
|
|
|
2296
2256
|
function isValidUtf16Character(char) {
|
|
2297
2257
|
const len = char.length;
|
|
2298
2258
|
const code = char.charCodeAt(0) & 64512;
|
|
2299
|
-
|
|
2300
|
-
return valid;
|
|
2259
|
+
return len === 1 && (code & 63488) !== 55296 || len === 2 && (code & 64512) === 55296 && (char.charCodeAt(1) & 64512) === 56320;
|
|
2301
2260
|
}
|
|
2302
2261
|
function assertValidUtf16Character(char) {
|
|
2303
2262
|
if (!isValidUtf16Character(char)) {
|
|
@@ -2359,17 +2318,17 @@ var Utf8Accumulator = class Utf8Accumulator {
|
|
|
2359
2318
|
if ((byte & 224) === 192) {
|
|
2360
2319
|
this.value = byte & 31;
|
|
2361
2320
|
this.remaining = 1;
|
|
2362
|
-
return
|
|
2321
|
+
return;
|
|
2363
2322
|
}
|
|
2364
2323
|
if ((byte & 240) === 224) {
|
|
2365
2324
|
this.value = byte & 15;
|
|
2366
2325
|
this.remaining = 2;
|
|
2367
|
-
return
|
|
2326
|
+
return;
|
|
2368
2327
|
}
|
|
2369
2328
|
if ((byte & 248) === 240) {
|
|
2370
2329
|
this.value = byte & 7;
|
|
2371
2330
|
this.remaining = 3;
|
|
2372
|
-
return
|
|
2331
|
+
return;
|
|
2373
2332
|
}
|
|
2374
2333
|
return this.reset();
|
|
2375
2334
|
}
|
|
@@ -2433,8 +2392,7 @@ function encodeTextToUtf8(text) {
|
|
|
2433
2392
|
|
|
2434
2393
|
//#endregion
|
|
2435
2394
|
//#region src/lib/TrieBlob/CharIndex.ts
|
|
2436
|
-
|
|
2437
|
-
Object.freeze(emptySeq);
|
|
2395
|
+
Object.freeze([0]);
|
|
2438
2396
|
var CharIndex = class {
|
|
2439
2397
|
#charToUtf8SeqMap;
|
|
2440
2398
|
#lastWord = "";
|
|
@@ -2679,8 +2637,7 @@ var FastTrieBlobINode = class FastTrieBlobINode {
|
|
|
2679
2637
|
entries() {
|
|
2680
2638
|
if (this._entries) return this._entries;
|
|
2681
2639
|
if (!this._count) return EmptyEntries$1;
|
|
2682
|
-
|
|
2683
|
-
this._entries = entries.map(([key, value]) => [key, new FastTrieBlobINode(this.trie, value)]);
|
|
2640
|
+
this._entries = this.getNodesEntries().map(([key, value]) => [key, new FastTrieBlobINode(this.trie, value)]);
|
|
2684
2641
|
return this._entries;
|
|
2685
2642
|
}
|
|
2686
2643
|
/** get child ITrieNode */
|
|
@@ -2695,16 +2652,14 @@ var FastTrieBlobINode = class FastTrieBlobINode {
|
|
|
2695
2652
|
return new FastTrieBlobINode(this.trie, idx$1);
|
|
2696
2653
|
}
|
|
2697
2654
|
has(char) {
|
|
2698
|
-
|
|
2699
|
-
return idx$1 !== void 0;
|
|
2655
|
+
return this.trie.nodeGetChild(this.id, char) !== void 0;
|
|
2700
2656
|
}
|
|
2701
2657
|
hasChildren() {
|
|
2702
2658
|
return this._count > 0;
|
|
2703
2659
|
}
|
|
2704
2660
|
child(keyIdx) {
|
|
2705
2661
|
if (!this._values && !this.containsChainedIndexes()) {
|
|
2706
|
-
const
|
|
2707
|
-
const nodeIdx = n >>> this.trie.NodeChildRefShift;
|
|
2662
|
+
const nodeIdx = this.node[keyIdx + 1] >>> this.trie.NodeChildRefShift;
|
|
2708
2663
|
return new FastTrieBlobINode(this.trie, nodeIdx);
|
|
2709
2664
|
}
|
|
2710
2665
|
return this.values()[keyIdx];
|
|
@@ -2742,8 +2697,7 @@ var FastTrieBlobINode = class FastTrieBlobINode {
|
|
|
2742
2697
|
const len = this._count;
|
|
2743
2698
|
const node = this.node;
|
|
2744
2699
|
for (let i = 1; i <= len && !found; ++i) {
|
|
2745
|
-
const
|
|
2746
|
-
const codePoint = entry & NodeMaskChildCharIndex;
|
|
2700
|
+
const codePoint = node[i] & NodeMaskChildCharIndex;
|
|
2747
2701
|
found = Utf8Accumulator.isMultiByte(codePoint);
|
|
2748
2702
|
}
|
|
2749
2703
|
this._chained = !!found;
|
|
@@ -2941,8 +2895,7 @@ var TrieBlobINode = class TrieBlobINode {
|
|
|
2941
2895
|
entries() {
|
|
2942
2896
|
if (this._entries) return this._entries;
|
|
2943
2897
|
if (!this._count) return EmptyEntries;
|
|
2944
|
-
|
|
2945
|
-
this._entries = entries.map(([key, value]) => [key, new TrieBlobINode(this.trie, value)]);
|
|
2898
|
+
this._entries = this.getNodesEntries().map(([key, value]) => [key, new TrieBlobINode(this.trie, value)]);
|
|
2946
2899
|
return this._entries;
|
|
2947
2900
|
}
|
|
2948
2901
|
/** get child ITrieNode */
|
|
@@ -2957,8 +2910,7 @@ var TrieBlobINode = class TrieBlobINode {
|
|
|
2957
2910
|
}
|
|
2958
2911
|
child(keyIdx) {
|
|
2959
2912
|
if (!this._values && !this.containsChainedIndexes()) {
|
|
2960
|
-
const
|
|
2961
|
-
const nodeIdx = n >>> this.trie.NodeChildRefShift;
|
|
2913
|
+
const nodeIdx = this.trie.nodes[this.nodeIdx + keyIdx + 1] >>> this.trie.NodeChildRefShift;
|
|
2962
2914
|
return new TrieBlobINode(this.trie, nodeIdx);
|
|
2963
2915
|
}
|
|
2964
2916
|
return this.values()[keyIdx];
|
|
@@ -3004,8 +2956,7 @@ var TrieBlobINode = class TrieBlobINode {
|
|
|
3004
2956
|
const nodes = this.trie.nodes;
|
|
3005
2957
|
const len = this._count;
|
|
3006
2958
|
for (let i = 0; i < len && !found; ++i) {
|
|
3007
|
-
const
|
|
3008
|
-
const charIdx = entry & NodeMaskChildCharIndex;
|
|
2959
|
+
const charIdx = nodes[i + offset] & NodeMaskChildCharIndex;
|
|
3009
2960
|
found = Utf8Accumulator.isMultiByte(charIdx);
|
|
3010
2961
|
}
|
|
3011
2962
|
this._chained = !!found;
|
|
@@ -3121,8 +3072,7 @@ var TrieBlobIRoot = class extends TrieBlobINode {
|
|
|
3121
3072
|
//#region src/lib/TrieBlob/TrieBlob.ts
|
|
3122
3073
|
const NodeHeaderNumChildrenBits = 8;
|
|
3123
3074
|
const NodeHeaderNumChildrenShift = 0;
|
|
3124
|
-
const
|
|
3125
|
-
const HEADER_SIZE = HEADER_SIZE_UINT32 * 4;
|
|
3075
|
+
const HEADER_SIZE = 32;
|
|
3126
3076
|
const HEADER_OFFSET = 0;
|
|
3127
3077
|
const HEADER_OFFSET_SIG = HEADER_OFFSET;
|
|
3128
3078
|
const HEADER_OFFSET_ENDIAN = HEADER_OFFSET_SIG + 8;
|
|
@@ -3130,7 +3080,6 @@ const HEADER_OFFSET_VERSION = HEADER_OFFSET_ENDIAN + 4;
|
|
|
3130
3080
|
const HEADER_OFFSET_NODES = HEADER_OFFSET_VERSION + 4;
|
|
3131
3081
|
const HEADER_OFFSET_NODES_LEN = HEADER_OFFSET_NODES + 4;
|
|
3132
3082
|
const HEADER_OFFSET_CHAR_INDEX = HEADER_OFFSET_NODES_LEN + 4;
|
|
3133
|
-
const HEADER_OFFSET_CHAR_INDEX_LEN = HEADER_OFFSET_CHAR_INDEX + 4;
|
|
3134
3083
|
const HEADER = {
|
|
3135
3084
|
header: HEADER_OFFSET,
|
|
3136
3085
|
sig: HEADER_OFFSET_SIG,
|
|
@@ -3139,7 +3088,7 @@ const HEADER = {
|
|
|
3139
3088
|
nodes: HEADER_OFFSET_NODES,
|
|
3140
3089
|
nodesLen: HEADER_OFFSET_NODES_LEN,
|
|
3141
3090
|
charIndex: HEADER_OFFSET_CHAR_INDEX,
|
|
3142
|
-
charIndexLen:
|
|
3091
|
+
charIndexLen: HEADER_OFFSET_CHAR_INDEX + 4
|
|
3143
3092
|
};
|
|
3144
3093
|
const headerSig = "TrieBlob";
|
|
3145
3094
|
const version = "00.01.00";
|
|
@@ -3193,8 +3142,7 @@ var TrieBlob = class TrieBlob {
|
|
|
3193
3142
|
*/
|
|
3194
3143
|
find(word, strict) {
|
|
3195
3144
|
if (!this.hasCompoundWords) {
|
|
3196
|
-
|
|
3197
|
-
if (found) return {
|
|
3145
|
+
if (this.#hasWord(0, word)) return {
|
|
3198
3146
|
found: word,
|
|
3199
3147
|
compoundUsed: false,
|
|
3200
3148
|
caseMatched: true
|
|
@@ -3210,13 +3158,12 @@ var TrieBlob = class TrieBlob {
|
|
|
3210
3158
|
caseMatched: false
|
|
3211
3159
|
};
|
|
3212
3160
|
}
|
|
3213
|
-
return void 0;
|
|
3214
3161
|
}
|
|
3215
3162
|
getRoot() {
|
|
3216
3163
|
return this.#iTrieRoot ??= this._getRoot();
|
|
3217
3164
|
}
|
|
3218
3165
|
_getRoot() {
|
|
3219
|
-
|
|
3166
|
+
return new TrieBlobIRoot(new TrieBlobInternals(this.nodes, this.charIndex, {
|
|
3220
3167
|
NodeMaskEOW: TrieBlob.NodeMaskEOW,
|
|
3221
3168
|
NodeMaskNumChildren: TrieBlob.NodeMaskNumChildren,
|
|
3222
3169
|
NodeMaskChildCharIndex: TrieBlob.NodeMaskChildCharIndex,
|
|
@@ -3230,8 +3177,7 @@ var TrieBlob = class TrieBlob {
|
|
|
3230
3177
|
hasCompoundWords: this.hasCompoundWords,
|
|
3231
3178
|
hasForbiddenWords: this.hasForbiddenWords,
|
|
3232
3179
|
hasNonStrictWords: this.hasNonStrictWords
|
|
3233
|
-
});
|
|
3234
|
-
return new TrieBlobIRoot(trieData, 0, this.info, { find: (word, strict) => this.find(word, strict) });
|
|
3180
|
+
}), 0, this.info, { find: (word, strict) => this.find(word, strict) });
|
|
3235
3181
|
}
|
|
3236
3182
|
getNode(prefix) {
|
|
3237
3183
|
return findNode$1(this.getRoot(), prefix);
|
|
@@ -3296,8 +3242,7 @@ var TrieBlob = class TrieBlob {
|
|
|
3296
3242
|
*/
|
|
3297
3243
|
_lookupNode(nodeIdx, char) {
|
|
3298
3244
|
const indexSeq = this.letterToNodeCharIndexSequence(char);
|
|
3299
|
-
|
|
3300
|
-
return currNodeIdx;
|
|
3245
|
+
return this.#lookupNode(nodeIdx, indexSeq);
|
|
3301
3246
|
}
|
|
3302
3247
|
*words() {
|
|
3303
3248
|
const NodeMaskNumChildren = TrieBlob.NodeMaskNumChildren;
|
|
@@ -3316,13 +3261,11 @@ var TrieBlob = class TrieBlob {
|
|
|
3316
3261
|
const { nodeIdx, pos, word, acc } = stack[depth];
|
|
3317
3262
|
const node = nodes[nodeIdx];
|
|
3318
3263
|
if (!pos && node & NodeMaskEOW) yield word;
|
|
3319
|
-
|
|
3320
|
-
if (pos >= len) {
|
|
3264
|
+
if (pos >= (node & NodeMaskNumChildren)) {
|
|
3321
3265
|
--depth;
|
|
3322
3266
|
continue;
|
|
3323
3267
|
}
|
|
3324
|
-
const
|
|
3325
|
-
const entry = nodes[nodeIdx + nextPos];
|
|
3268
|
+
const entry = nodes[nodeIdx + ++stack[depth].pos];
|
|
3326
3269
|
const nAcc = acc.clone();
|
|
3327
3270
|
const codePoint = nAcc.decode(entry & NodeMaskChildCharIndex);
|
|
3328
3271
|
const letter = codePoint && String.fromCodePoint(codePoint) || "";
|
|
@@ -3357,8 +3300,7 @@ var TrieBlob = class TrieBlob {
|
|
|
3357
3300
|
}
|
|
3358
3301
|
encodeBin() {
|
|
3359
3302
|
const charIndex = Buffer.from(this.charIndex.charIndex.join("\n"));
|
|
3360
|
-
const
|
|
3361
|
-
const nodeOffset = HEADER_SIZE + charIndexLen;
|
|
3303
|
+
const nodeOffset = HEADER_SIZE + (charIndex.byteLength + 3 & -4);
|
|
3362
3304
|
const size = nodeOffset + this.nodes.length * 4;
|
|
3363
3305
|
const useLittle = isLittleEndian();
|
|
3364
3306
|
const buffer$1 = Buffer.alloc(size);
|
|
@@ -3385,9 +3327,7 @@ var TrieBlob = class TrieBlob {
|
|
|
3385
3327
|
const offsetCharIndex = header.getUint32(HEADER.charIndex, useLittle);
|
|
3386
3328
|
const lenCharIndex = header.getUint32(HEADER.charIndexLen, useLittle);
|
|
3387
3329
|
const charIndex = Buffer.from(blob.subarray(offsetCharIndex, offsetCharIndex + lenCharIndex)).toString("utf8").split("\n");
|
|
3388
|
-
|
|
3389
|
-
const trieBlob = new TrieBlob(nodes, new CharIndex(charIndex), defaultTrieInfo);
|
|
3390
|
-
return trieBlob;
|
|
3330
|
+
return new TrieBlob(new Uint32Array(blob.buffer, offsetNodes, lenNodes), new CharIndex(charIndex), defaultTrieInfo);
|
|
3391
3331
|
}
|
|
3392
3332
|
static NodeMaskEOW = 256;
|
|
3393
3333
|
static NodeMaskNumChildren = (1 << NodeHeaderNumChildrenBits) - 1 & 65535;
|
|
@@ -3412,13 +3352,11 @@ function isLittleEndian() {
|
|
|
3412
3352
|
3,
|
|
3413
3353
|
4
|
|
3414
3354
|
]);
|
|
3415
|
-
|
|
3416
|
-
return view.getUint32(0, true) === 67305985;
|
|
3355
|
+
return new DataView(buf.buffer).getUint32(0, true) === 67305985;
|
|
3417
3356
|
}
|
|
3418
3357
|
function checkSig(blob) {
|
|
3419
3358
|
if (blob.length < HEADER_SIZE) return false;
|
|
3420
|
-
|
|
3421
|
-
if (buf.toString("utf8", 0, 8) !== headerSig) return false;
|
|
3359
|
+
if (Buffer.from(blob, 0, 8).toString("utf8", 0, 8) !== headerSig) return false;
|
|
3422
3360
|
return true;
|
|
3423
3361
|
}
|
|
3424
3362
|
var ErrorDecodeTrieBlob = class extends Error {
|
|
@@ -3475,8 +3413,7 @@ function trieBlobSort(data) {
|
|
|
3475
3413
|
last = cIdx;
|
|
3476
3414
|
}
|
|
3477
3415
|
if (i === end) continue;
|
|
3478
|
-
|
|
3479
|
-
sorted.forEach((v, i$1) => data[start + i$1] = v);
|
|
3416
|
+
data.slice(start, end).sort((a, b) => (a & MaskChildCharIndex) - (b & MaskChildCharIndex)).forEach((v, i$1) => data[start + i$1] = v);
|
|
3480
3417
|
}
|
|
3481
3418
|
}
|
|
3482
3419
|
|
|
@@ -3525,8 +3462,7 @@ var FastTrieBlob = class FastTrieBlob {
|
|
|
3525
3462
|
const charIndexes = this.wordToUtf8Seq(word);
|
|
3526
3463
|
const found = this.#lookupNode(nodeIdx, charIndexes);
|
|
3527
3464
|
if (found === void 0) return false;
|
|
3528
|
-
|
|
3529
|
-
return !!(node[0] & this.bitMasksInfo.NodeMaskEOW);
|
|
3465
|
+
return !!(this.nodes[found][0] & this.bitMasksInfo.NodeMaskEOW);
|
|
3530
3466
|
}
|
|
3531
3467
|
/**
|
|
3532
3468
|
* Find the node index for the given Utf8 character sequence.
|
|
@@ -3564,26 +3500,24 @@ var FastTrieBlob = class FastTrieBlob {
|
|
|
3564
3500
|
const NodeChildRefShift = this.bitMasksInfo.NodeChildRefShift;
|
|
3565
3501
|
const NodeMaskEOW = this.bitMasksInfo.NodeMaskEOW;
|
|
3566
3502
|
const nodes = this.nodes;
|
|
3567
|
-
const accumulator = Utf8Accumulator.create();
|
|
3568
3503
|
const stack = [{
|
|
3569
3504
|
nodeIdx: 0,
|
|
3570
3505
|
pos: 0,
|
|
3571
3506
|
word: "",
|
|
3572
|
-
accumulator
|
|
3507
|
+
accumulator: Utf8Accumulator.create()
|
|
3573
3508
|
}];
|
|
3574
3509
|
let depth = 0;
|
|
3575
3510
|
while (depth >= 0) {
|
|
3576
|
-
const { nodeIdx, pos, word, accumulator
|
|
3511
|
+
const { nodeIdx, pos, word, accumulator } = stack[depth];
|
|
3577
3512
|
const node = nodes[nodeIdx];
|
|
3578
3513
|
if (!pos && node[0] & NodeMaskEOW) yield word;
|
|
3579
3514
|
if (pos >= node.length - 1) {
|
|
3580
3515
|
--depth;
|
|
3581
3516
|
continue;
|
|
3582
3517
|
}
|
|
3583
|
-
const
|
|
3584
|
-
const entry = node[nextPos];
|
|
3518
|
+
const entry = node[++stack[depth].pos];
|
|
3585
3519
|
const charIdx = entry & NodeMaskChildCharIndex;
|
|
3586
|
-
const acc = accumulator
|
|
3520
|
+
const acc = accumulator.clone();
|
|
3587
3521
|
const codePoint = acc.decode(charIdx);
|
|
3588
3522
|
const letter = codePoint && String.fromCodePoint(codePoint) || "";
|
|
3589
3523
|
++depth;
|
|
@@ -3684,13 +3618,10 @@ var FastTrieBlob = class FastTrieBlob {
|
|
|
3684
3618
|
for (let p = 1; p < n.length; ++p) {
|
|
3685
3619
|
const v = n[p];
|
|
3686
3620
|
const cIdx = v & this.bitMasksInfo.NodeMaskChildCharIndex;
|
|
3687
|
-
const
|
|
3688
|
-
const codePoint = a.decode(cIdx);
|
|
3689
|
-
const c = codePoint !== void 0 ? String.fromCodePoint(codePoint) : "∎";
|
|
3690
|
-
const i = v >>> this.bitMasksInfo.NodeChildRefShift;
|
|
3621
|
+
const codePoint = acc.clone().decode(cIdx);
|
|
3691
3622
|
children[p] = {
|
|
3692
|
-
c,
|
|
3693
|
-
i,
|
|
3623
|
+
c: codePoint !== void 0 ? String.fromCodePoint(codePoint) : "∎",
|
|
3624
|
+
i: v >>> this.bitMasksInfo.NodeChildRefShift,
|
|
3694
3625
|
cIdx
|
|
3695
3626
|
};
|
|
3696
3627
|
}
|
|
@@ -3913,13 +3844,12 @@ var FastTrieBlobBuilder = class FastTrieBlobBuilder {
|
|
|
3913
3844
|
for (let n = num; n > 0; --n) depth = stack[depth].pDepth;
|
|
3914
3845
|
nodeIdx = stack[depth + 1].nodeIdx;
|
|
3915
3846
|
};
|
|
3916
|
-
|
|
3847
|
+
return {
|
|
3917
3848
|
insertChar,
|
|
3918
3849
|
markEOW,
|
|
3919
3850
|
reference,
|
|
3920
3851
|
backStep
|
|
3921
3852
|
};
|
|
3922
|
-
return c;
|
|
3923
3853
|
}
|
|
3924
3854
|
_insert(word) {
|
|
3925
3855
|
word = word.trim();
|
|
@@ -3935,8 +3865,7 @@ var FastTrieBlobBuilder = class FastTrieBlobBuilder {
|
|
|
3935
3865
|
for (let p = 0; p < len; ++p) {
|
|
3936
3866
|
const seq = utf8Seq[p];
|
|
3937
3867
|
const node = nodes[nodeIdx];
|
|
3938
|
-
|
|
3939
|
-
let i = count - 1;
|
|
3868
|
+
let i = node.length - 1;
|
|
3940
3869
|
for (; i > 0; --i) if ((node[i] & NodeMaskChildCharIndex) === seq) break;
|
|
3941
3870
|
if (i > 0) {
|
|
3942
3871
|
nodeIdx = node[i] >>> NodeChildRefShift;
|
|
@@ -3966,8 +3895,7 @@ var FastTrieBlobBuilder = class FastTrieBlobBuilder {
|
|
|
3966
3895
|
let node = nodes[nodeIdx];
|
|
3967
3896
|
for (let p = 0; p < len; ++p, node = nodes[nodeIdx]) {
|
|
3968
3897
|
const letterIdx = charIndexes[p];
|
|
3969
|
-
|
|
3970
|
-
let i = count - 1;
|
|
3898
|
+
let i = node.length - 1;
|
|
3971
3899
|
for (; i > 0; --i) if ((node[i] & NodeMaskChildCharIndex) === letterIdx) break;
|
|
3972
3900
|
if (i < 1) return false;
|
|
3973
3901
|
nodeIdx = node[i] >>> NodeChildRefShift;
|
|
@@ -3997,8 +3925,7 @@ var FastTrieBlobBuilder = class FastTrieBlobBuilder {
|
|
|
3997
3925
|
assert(!this.isReadonly(), "FastTrieBlobBuilder is readonly");
|
|
3998
3926
|
}
|
|
3999
3927
|
static fromWordList(words, options) {
|
|
4000
|
-
|
|
4001
|
-
return ft.insert(words).build();
|
|
3928
|
+
return new FastTrieBlobBuilder(options).insert(words).build();
|
|
4002
3929
|
}
|
|
4003
3930
|
static fromTrieRoot(root) {
|
|
4004
3931
|
const bitMasksInfo = FastTrieBlobBuilder.DefaultBitMaskInfo;
|
|
@@ -4039,8 +3966,7 @@ var FastTrieBlobBuilder = class FastTrieBlobBuilder {
|
|
|
4039
3966
|
if (pos < node.length) node = tf.nodes[node[pos] >>> NodeChildRefShift];
|
|
4040
3967
|
else {
|
|
4041
3968
|
const next = [0];
|
|
4042
|
-
|
|
4043
|
-
node[pos] = nodeIdx << NodeChildRefShift | idx$1;
|
|
3969
|
+
node[pos] = tf.nodes.push(next) - 1 << NodeChildRefShift | idx$1;
|
|
4044
3970
|
node = next;
|
|
4045
3971
|
}
|
|
4046
3972
|
}
|
|
@@ -4071,7 +3997,7 @@ function clean(t) {
|
|
|
4071
3997
|
|
|
4072
3998
|
//#endregion
|
|
4073
3999
|
//#region src/lib/ITrie.ts
|
|
4074
|
-
const defaultLegacyMinCompoundLength$
|
|
4000
|
+
const defaultLegacyMinCompoundLength$1 = 3;
|
|
4075
4001
|
var ITrieImpl = class ITrieImpl {
|
|
4076
4002
|
_info;
|
|
4077
4003
|
root;
|
|
@@ -4118,10 +4044,7 @@ var ITrieImpl = class ITrieImpl {
|
|
|
4118
4044
|
}
|
|
4119
4045
|
has(word, minLegacyCompoundLength) {
|
|
4120
4046
|
if (this.hasWord(word, false)) return true;
|
|
4121
|
-
if (minLegacyCompoundLength) {
|
|
4122
|
-
const f = this.findWord(word, { useLegacyWordCompounds: minLegacyCompoundLength });
|
|
4123
|
-
return !!f.found;
|
|
4124
|
-
}
|
|
4047
|
+
if (minLegacyCompoundLength) return !!this.findWord(word, { useLegacyWordCompounds: minLegacyCompoundLength }).found;
|
|
4125
4048
|
return false;
|
|
4126
4049
|
}
|
|
4127
4050
|
/**
|
|
@@ -4131,15 +4054,14 @@ var ITrieImpl = class ITrieImpl {
|
|
|
4131
4054
|
* @returns true if the word was found and is not forbidden.
|
|
4132
4055
|
*/
|
|
4133
4056
|
hasWord(word, caseSensitive) {
|
|
4134
|
-
|
|
4057
|
+
return !!this.findWord(word, {
|
|
4135
4058
|
caseSensitive,
|
|
4136
4059
|
checkForbidden: false
|
|
4137
|
-
});
|
|
4138
|
-
return !!f.found;
|
|
4060
|
+
}).found;
|
|
4139
4061
|
}
|
|
4140
4062
|
findWord(word, options) {
|
|
4141
4063
|
if (options?.useLegacyWordCompounds) {
|
|
4142
|
-
const len = options.useLegacyWordCompounds !== true ? options.useLegacyWordCompounds : defaultLegacyMinCompoundLength$
|
|
4064
|
+
const len = options.useLegacyWordCompounds !== true ? options.useLegacyWordCompounds : defaultLegacyMinCompoundLength$1;
|
|
4143
4065
|
const findOptions = this.createFindOptions({
|
|
4144
4066
|
legacyMinCompoundLength: len,
|
|
4145
4067
|
matchCase: options.caseSensitive || false
|
|
@@ -4228,12 +4150,10 @@ var ITrieImpl = class ITrieImpl {
|
|
|
4228
4150
|
static create(words, info) {
|
|
4229
4151
|
const builder = new FastTrieBlobBuilder(info);
|
|
4230
4152
|
builder.insert(words);
|
|
4231
|
-
|
|
4232
|
-
return new ITrieImpl(root, void 0);
|
|
4153
|
+
return new ITrieImpl(builder.build(), void 0);
|
|
4233
4154
|
}
|
|
4234
4155
|
createFindOptions(options) {
|
|
4235
|
-
|
|
4236
|
-
return findOptions;
|
|
4156
|
+
return createFindOptions$1(options);
|
|
4237
4157
|
}
|
|
4238
4158
|
};
|
|
4239
4159
|
|
|
@@ -4303,9 +4223,8 @@ function iteratorTrieWords(node) {
|
|
|
4303
4223
|
return walkerWords(node);
|
|
4304
4224
|
}
|
|
4305
4225
|
function createTrieRoot(options) {
|
|
4306
|
-
const fullOptions = mergeOptionalWithDefaults(options);
|
|
4307
4226
|
return {
|
|
4308
|
-
...
|
|
4227
|
+
...mergeOptionalWithDefaults(options),
|
|
4309
4228
|
c: Object.create(null)
|
|
4310
4229
|
};
|
|
4311
4230
|
}
|
|
@@ -4365,15 +4284,13 @@ function checkCircular(root) {
|
|
|
4365
4284
|
};
|
|
4366
4285
|
if (inStack.has(n)) {
|
|
4367
4286
|
const stack = [...inStack, n];
|
|
4368
|
-
const word = trieStackToWord(stack);
|
|
4369
|
-
const pos = stack.indexOf(n);
|
|
4370
4287
|
return {
|
|
4371
4288
|
isCircular: true,
|
|
4372
4289
|
allSeen: false,
|
|
4373
4290
|
ref: {
|
|
4374
4291
|
stack,
|
|
4375
|
-
word,
|
|
4376
|
-
pos
|
|
4292
|
+
word: trieStackToWord(stack),
|
|
4293
|
+
pos: stack.indexOf(n)
|
|
4377
4294
|
}
|
|
4378
4295
|
};
|
|
4379
4296
|
}
|
|
@@ -4412,9 +4329,8 @@ function isCircular(root) {
|
|
|
4412
4329
|
return checkCircular(root).isCircular;
|
|
4413
4330
|
}
|
|
4414
4331
|
function trieNodeToRoot(node, options) {
|
|
4415
|
-
const newOptions = mergeOptionalWithDefaults(options);
|
|
4416
4332
|
return {
|
|
4417
|
-
...
|
|
4333
|
+
...mergeOptionalWithDefaults(options),
|
|
4418
4334
|
c: node.c || Object.create(null)
|
|
4419
4335
|
};
|
|
4420
4336
|
}
|
|
@@ -4432,9 +4348,7 @@ function consolidate(root) {
|
|
|
4432
4348
|
const knownMap = /* @__PURE__ */ new Map();
|
|
4433
4349
|
if (isCircular(root)) throw new Error("Trie is circular.");
|
|
4434
4350
|
function signature$1(n) {
|
|
4435
|
-
|
|
4436
|
-
const ref = n.c ? JSON.stringify(Object.entries(n.c).map(([k, n$1]) => [k, cached.get(n$1)])) : "";
|
|
4437
|
-
return isWord + ref;
|
|
4351
|
+
return (n.f ? "*" : "") + (n.c ? JSON.stringify(Object.entries(n.c).map(([k, n$1]) => [k, cached.get(n$1)])) : "");
|
|
4438
4352
|
}
|
|
4439
4353
|
function findEow(n) {
|
|
4440
4354
|
if (n.f && !n.c) return n;
|
|
@@ -4499,21 +4413,19 @@ function consolidate(root) {
|
|
|
4499
4413
|
|
|
4500
4414
|
//#endregion
|
|
4501
4415
|
//#region src/lib/TrieNode/find.ts
|
|
4502
|
-
const defaultLegacyMinCompoundLength$1 = 3;
|
|
4503
4416
|
const _defaultFindOptions = {
|
|
4504
4417
|
matchCase: false,
|
|
4505
4418
|
compoundMode: "compound",
|
|
4506
4419
|
forbidPrefix: FORBID_PREFIX,
|
|
4507
4420
|
compoundFix: COMPOUND_FIX,
|
|
4508
4421
|
caseInsensitivePrefix: CASE_INSENSITIVE_PREFIX,
|
|
4509
|
-
legacyMinCompoundLength:
|
|
4422
|
+
legacyMinCompoundLength: 3
|
|
4510
4423
|
};
|
|
4511
|
-
const
|
|
4424
|
+
const knownCompoundModes = new Map([
|
|
4512
4425
|
"none",
|
|
4513
4426
|
"compound",
|
|
4514
4427
|
"legacy"
|
|
4515
|
-
];
|
|
4516
|
-
const knownCompoundModes = new Map(arrayCompoundModes.map((a) => [a, a]));
|
|
4428
|
+
].map((a) => [a, a]));
|
|
4517
4429
|
/**
|
|
4518
4430
|
*
|
|
4519
4431
|
* @param root Trie root node. root.c contains the compound root and forbidden root.
|
|
@@ -4555,23 +4467,18 @@ function _findWordNode(root, word, options) {
|
|
|
4555
4467
|
function __findCompound() {
|
|
4556
4468
|
const f = findCompoundWord(root, word, compoundPrefix, ignoreCasePrefix);
|
|
4557
4469
|
const result = { ...f };
|
|
4558
|
-
if (f.found !== false && f.compoundUsed)
|
|
4559
|
-
const r = !f.caseMatched ? walk$1(root, options.caseInsensitivePrefix) : root;
|
|
4560
|
-
result.forbidden = isForbiddenWord(r, word, options.forbidPrefix);
|
|
4561
|
-
}
|
|
4470
|
+
if (f.found !== false && f.compoundUsed) result.forbidden = isForbiddenWord(!f.caseMatched ? walk$1(root, options.caseInsensitivePrefix) : root, word, options.forbidPrefix);
|
|
4562
4471
|
return result;
|
|
4563
4472
|
}
|
|
4564
4473
|
function __findExact() {
|
|
4565
4474
|
const n = walk$1(root, word);
|
|
4566
|
-
|
|
4567
|
-
|
|
4568
|
-
found: isFound && word,
|
|
4475
|
+
return {
|
|
4476
|
+
found: isEndOfWordNode(n) && word,
|
|
4569
4477
|
compoundUsed: false,
|
|
4570
4478
|
forbidden: isForbiddenWord(root, word, options.forbidPrefix),
|
|
4571
4479
|
node: n,
|
|
4572
4480
|
caseMatched: true
|
|
4573
4481
|
};
|
|
4574
|
-
return result;
|
|
4575
4482
|
}
|
|
4576
4483
|
switch (compoundMode) {
|
|
4577
4484
|
case "none": return options.matchCase ? __findExact() : __findCompound();
|
|
@@ -4615,8 +4522,7 @@ function findCompoundNode(root, word, compoundCharacter, ignoreCasePrefix) {
|
|
|
4615
4522
|
while (true) {
|
|
4616
4523
|
const s = stack[i];
|
|
4617
4524
|
const h = w[i++];
|
|
4618
|
-
const
|
|
4619
|
-
const c = n?.c?.[h];
|
|
4525
|
+
const c = (s.cr || s.n)?.c?.[h];
|
|
4620
4526
|
if (c && i < word.length) {
|
|
4621
4527
|
caseMatched = s.caseMatched;
|
|
4622
4528
|
stack[i] = {
|
|
@@ -4645,15 +4551,13 @@ function findCompoundNode(root, word, compoundCharacter, ignoreCasePrefix) {
|
|
|
4645
4551
|
break;
|
|
4646
4552
|
}
|
|
4647
4553
|
}
|
|
4648
|
-
|
|
4649
|
-
|
|
4650
|
-
found,
|
|
4554
|
+
return {
|
|
4555
|
+
found: i && i === word.length && word || false,
|
|
4651
4556
|
compoundUsed,
|
|
4652
4557
|
node,
|
|
4653
4558
|
forbidden: void 0,
|
|
4654
4559
|
caseMatched
|
|
4655
4560
|
};
|
|
4656
|
-
return result;
|
|
4657
4561
|
}
|
|
4658
4562
|
function findCompoundWord(root, word, compoundCharacter, ignoreCasePrefix) {
|
|
4659
4563
|
const { found, compoundUsed, node, caseMatched } = findCompoundNode(root, word, compoundCharacter, ignoreCasePrefix);
|
|
@@ -4708,8 +4612,7 @@ function findLegacyCompoundNode(roots, word, minCompoundLength) {
|
|
|
4708
4612
|
while (true) {
|
|
4709
4613
|
const s = stack[i];
|
|
4710
4614
|
const h = w[i++];
|
|
4711
|
-
const
|
|
4712
|
-
const c = n?.c?.[h];
|
|
4615
|
+
const c = (s.cr || s.n)?.c?.[h];
|
|
4713
4616
|
if (c && i < wLen) stack[i] = {
|
|
4714
4617
|
n: c,
|
|
4715
4618
|
usedRoots: 0,
|
|
@@ -4749,15 +4652,13 @@ function findLegacyCompoundNode(roots, word, minCompoundLength) {
|
|
|
4749
4652
|
}
|
|
4750
4653
|
return letters.join("");
|
|
4751
4654
|
}
|
|
4752
|
-
|
|
4753
|
-
|
|
4754
|
-
found,
|
|
4655
|
+
return {
|
|
4656
|
+
found: extractWord(),
|
|
4755
4657
|
compoundUsed,
|
|
4756
4658
|
node,
|
|
4757
4659
|
forbidden: void 0,
|
|
4758
4660
|
caseMatched
|
|
4759
4661
|
};
|
|
4760
|
-
return result;
|
|
4761
4662
|
}
|
|
4762
4663
|
function isForbiddenWord(root, word, forbiddenPrefix) {
|
|
4763
4664
|
return findWordExact(root?.c?.[forbiddenPrefix], word);
|
|
@@ -4806,12 +4707,10 @@ var TrieNodeTrie = class TrieNodeTrie {
|
|
|
4806
4707
|
return this._size ??= countNodes(this.root);
|
|
4807
4708
|
}
|
|
4808
4709
|
static createFromWords(words, options) {
|
|
4809
|
-
|
|
4810
|
-
return new TrieNodeTrie(root);
|
|
4710
|
+
return new TrieNodeTrie(createTrieRootFromList(words, options));
|
|
4811
4711
|
}
|
|
4812
4712
|
static createFromWordsAndConsolidate(words, options) {
|
|
4813
|
-
|
|
4814
|
-
return new TrieNodeTrie(consolidate(root));
|
|
4713
|
+
return new TrieNodeTrie(consolidate(createTrieRootFromList(words, options)));
|
|
4815
4714
|
}
|
|
4816
4715
|
};
|
|
4817
4716
|
|
|
@@ -4874,11 +4773,10 @@ function convertToTrieRefNodes(root) {
|
|
|
4874
4773
|
function convert(n) {
|
|
4875
4774
|
const { f, c } = n;
|
|
4876
4775
|
const r = c ? Object.entries(c).sort((a, b) => a[0] < b[0] ? -1 : 1).map(([s, n$1]) => [s, cached.get(n$1)]) : void 0;
|
|
4877
|
-
|
|
4776
|
+
return r ? f ? {
|
|
4878
4777
|
f,
|
|
4879
4778
|
r
|
|
4880
4779
|
} : { r } : { f };
|
|
4881
|
-
return rn;
|
|
4882
4780
|
}
|
|
4883
4781
|
function* walk$3(root$1) {
|
|
4884
4782
|
cached.set(eow, count++);
|
|
@@ -4918,7 +4816,7 @@ function trieToExportString(node, base) {
|
|
|
4918
4816
|
return genSequence(walk$3(node));
|
|
4919
4817
|
}
|
|
4920
4818
|
function generateHeader$3(base, comment) {
|
|
4921
|
-
|
|
4819
|
+
return genSequence([
|
|
4922
4820
|
...[
|
|
4923
4821
|
"#!/usr/bin/env cspell-trie reader",
|
|
4924
4822
|
"TrieXv1",
|
|
@@ -4926,8 +4824,7 @@ function generateHeader$3(base, comment) {
|
|
|
4926
4824
|
],
|
|
4927
4825
|
...comment ? comment.split("\n").map((a) => "# " + a) : [],
|
|
4928
4826
|
...["# Data:"]
|
|
4929
|
-
];
|
|
4930
|
-
return genSequence(header).map((a) => a + "\n");
|
|
4827
|
+
]).map((a) => a + "\n");
|
|
4931
4828
|
}
|
|
4932
4829
|
/**
|
|
4933
4830
|
* Serialize a TrieNode.
|
|
@@ -4940,8 +4837,7 @@ function serializeTrie$4(root, options = 16) {
|
|
|
4940
4837
|
const { base = 16, comment = "" } = options;
|
|
4941
4838
|
const radix = base > 36 ? 36 : base < 10 ? 10 : base;
|
|
4942
4839
|
const rows = toReferences(root).map((node) => {
|
|
4943
|
-
|
|
4944
|
-
return row;
|
|
4840
|
+
return [...trieToExportString(node, radix), "\n"].join("").replaceAll(regExTrailingComma, "$1");
|
|
4945
4841
|
});
|
|
4946
4842
|
return generateHeader$3(radix, comment).concat(rows);
|
|
4947
4843
|
}
|
|
@@ -4976,22 +4872,20 @@ function importTrie$5(linesX) {
|
|
|
4976
4872
|
const regUnescape = /[\\](.)/g;
|
|
4977
4873
|
const flagsWord = { f: FLAG_WORD };
|
|
4978
4874
|
function splitLine$1(line) {
|
|
4979
|
-
|
|
4980
|
-
return line.replaceAll(regNotEscapedCommas, pattern).split(regUnescapeCommas).map((a) => a.replaceAll(regUnescape, "$1"));
|
|
4875
|
+
return line.replaceAll(regNotEscapedCommas, "$1__COMMA__").split(regUnescapeCommas).map((a) => a.replaceAll(regUnescape, "$1"));
|
|
4981
4876
|
}
|
|
4982
4877
|
function decodeLine$1(line, nodes) {
|
|
4983
4878
|
const isWord = line[0] === EOW$3;
|
|
4984
4879
|
line = isWord ? line.slice(1) : line;
|
|
4985
4880
|
const flags = isWord ? flagsWord : {};
|
|
4986
4881
|
const children = splitLine$1(line).filter((a) => !!a).map((a) => [a[0], Number.parseInt(a.slice(1) || "0", radix)]).map(([k, i]) => [k, nodes[i]]);
|
|
4987
|
-
const cNode = children.length ? { c: Object.fromEntries(children) } : {};
|
|
4988
4882
|
return {
|
|
4989
|
-
...
|
|
4883
|
+
...children.length ? { c: Object.fromEntries(children) } : {},
|
|
4990
4884
|
...flags
|
|
4991
4885
|
};
|
|
4992
4886
|
}
|
|
4993
4887
|
readHeader(iter);
|
|
4994
|
-
|
|
4888
|
+
return trieNodeToRoot(genSequence([DATA$4]).concat(iter).map((a) => a.replace(/\r?\n/, "")).filter((a) => !!a).reduce((acc, line) => {
|
|
4995
4889
|
const { lines, nodes } = acc;
|
|
4996
4890
|
const root = decodeLine$1(line, nodes);
|
|
4997
4891
|
nodes[lines] = root;
|
|
@@ -5004,8 +4898,7 @@ function importTrie$5(linesX) {
|
|
|
5004
4898
|
lines: 0,
|
|
5005
4899
|
nodes: [],
|
|
5006
4900
|
root: {}
|
|
5007
|
-
});
|
|
5008
|
-
return trieNodeToRoot(n.root, { isCaseAware: false });
|
|
4901
|
+
}).root, { isCaseAware: false });
|
|
5009
4902
|
}
|
|
5010
4903
|
|
|
5011
4904
|
//#endregion
|
|
@@ -5071,15 +4964,14 @@ function toLine(node, base) {
|
|
|
5071
4964
|
return node.s + flags + refs;
|
|
5072
4965
|
}
|
|
5073
4966
|
function generateHeader$2(base, comment) {
|
|
5074
|
-
|
|
4967
|
+
return genSequence([
|
|
5075
4968
|
"#!/usr/bin/env cspell-trie reader",
|
|
5076
4969
|
"TrieXv2",
|
|
5077
4970
|
"base=" + base,
|
|
5078
4971
|
...comment ? comment.split("\n").map((a) => "# " + a) : [],
|
|
5079
4972
|
"# Data:",
|
|
5080
4973
|
DATA$3
|
|
5081
|
-
];
|
|
5082
|
-
return genSequence(header);
|
|
4974
|
+
]);
|
|
5083
4975
|
}
|
|
5084
4976
|
/**
|
|
5085
4977
|
* Serialize a TrieNode.
|
|
@@ -5091,11 +4983,10 @@ function serializeTrie$3(root, options = 16) {
|
|
|
5091
4983
|
options = typeof options === "number" ? { base: options } : options;
|
|
5092
4984
|
const { base = 16, comment = "" } = options;
|
|
5093
4985
|
const radix = base > 36 ? 36 : base < 10 ? 10 : base;
|
|
5094
|
-
const
|
|
4986
|
+
const rows = flattenToReferences({
|
|
5095
4987
|
...root,
|
|
5096
4988
|
s: "^"
|
|
5097
|
-
};
|
|
5098
|
-
const rows = flattenToReferences(rootRef).map((n) => toLine(n, base));
|
|
4989
|
+
}).map((n) => toLine(n, base));
|
|
5099
4990
|
return generateHeader$2(radix, comment).concat(rows).map((a) => a + "\n");
|
|
5100
4991
|
}
|
|
5101
4992
|
function* toIterableIterator(iter) {
|
|
@@ -5138,16 +5029,15 @@ function importTrie$4(linesX) {
|
|
|
5138
5029
|
function decodeLine$1(line, nodes) {
|
|
5139
5030
|
const { letter, isWord, refs } = parseLine(line, radix);
|
|
5140
5031
|
const flags = isWord ? flagsWord : {};
|
|
5141
|
-
const children = refs.map((r) => nodes[r]).sort((a, b) => a.s < b.s ? -1 : 1).map((n
|
|
5142
|
-
const cNode = children.length ? { c: Object.fromEntries(children) } : {};
|
|
5032
|
+
const children = refs.map((r) => nodes[r]).sort((a, b) => a.s < b.s ? -1 : 1).map((n) => [n.s, n]);
|
|
5143
5033
|
return {
|
|
5144
5034
|
s: letter,
|
|
5145
|
-
...
|
|
5035
|
+
...children.length ? { c: Object.fromEntries(children) } : {},
|
|
5146
5036
|
...flags
|
|
5147
5037
|
};
|
|
5148
5038
|
}
|
|
5149
5039
|
readHeader(iter);
|
|
5150
|
-
|
|
5040
|
+
return trieNodeToRoot(genSequence(iter).map((a) => a.replace(/\r?\n/, "")).filter((a) => !!a).reduce((acc, line) => {
|
|
5151
5041
|
const { nodes } = acc;
|
|
5152
5042
|
const root = decodeLine$1(line, nodes);
|
|
5153
5043
|
nodes.push(root);
|
|
@@ -5161,8 +5051,7 @@ function importTrie$4(linesX) {
|
|
|
5161
5051
|
s: "",
|
|
5162
5052
|
c: Object.create(null)
|
|
5163
5053
|
}
|
|
5164
|
-
});
|
|
5165
|
-
return trieNodeToRoot(n.root, { isCaseAware: false });
|
|
5054
|
+
}).root, { isCaseAware: false });
|
|
5166
5055
|
}
|
|
5167
5056
|
|
|
5168
5057
|
//#endregion
|
|
@@ -5236,12 +5125,11 @@ const specialPrefix$1 = stringToCharSet$2("~!");
|
|
|
5236
5125
|
const WORDS_PER_LINE$1 = 20;
|
|
5237
5126
|
const DATA$2 = "__DATA__";
|
|
5238
5127
|
function generateHeader$1(base, comment) {
|
|
5239
|
-
const comments = comment.split("\n").map((a) => "# " + a.trimEnd()).join("\n");
|
|
5240
5128
|
return `\
|
|
5241
5129
|
#!/usr/bin/env cspell-trie reader
|
|
5242
5130
|
TrieXv4
|
|
5243
5131
|
base=${base}
|
|
5244
|
-
${
|
|
5132
|
+
${comment.split("\n").map((a) => "# " + a.trimEnd()).join("\n")}
|
|
5245
5133
|
# Data:
|
|
5246
5134
|
${DATA$2}
|
|
5247
5135
|
`;
|
|
@@ -5315,10 +5203,7 @@ function serializeTrie$2(root, options = 16) {
|
|
|
5315
5203
|
return;
|
|
5316
5204
|
}
|
|
5317
5205
|
if (node.c) {
|
|
5318
|
-
if (depth > 0 && depth <= 2)
|
|
5319
|
-
const chars = wordChars.slice(0, depth).map(escape).join("");
|
|
5320
|
-
yield* emit(comment_begin + chars + comment_end);
|
|
5321
|
-
}
|
|
5206
|
+
if (depth > 0 && depth <= 2) yield* emit(comment_begin + wordChars.slice(0, depth).map(escape).join("") + comment_end);
|
|
5322
5207
|
cache.set(node, count++);
|
|
5323
5208
|
const c = Object.entries(node.c).sort((a, b) => a[0] < b[0] ? -1 : 1);
|
|
5324
5209
|
for (const [s, n] of c) {
|
|
@@ -5337,8 +5222,7 @@ function serializeTrie$2(root, options = 16) {
|
|
|
5337
5222
|
yield* flush();
|
|
5338
5223
|
}
|
|
5339
5224
|
const lines = [...bufferLines(serialize(root), 1e3, "")];
|
|
5340
|
-
const
|
|
5341
|
-
const reference = "[\n" + resolvedReferences.map((n) => n.toString(radix)).join(",").replaceAll(/.{110,130}[,]/g, "$&\n") + "\n]\n";
|
|
5225
|
+
const reference = "[\n" + refMap.refCounts.map(([node]) => cache.get(node) || 0).map((n) => n.toString(radix)).join(",").replaceAll(/.{110,130}[,]/g, "$&\n") + "\n]\n";
|
|
5342
5226
|
return pipe([generateHeader$1(radix, comment), reference], opAppend(lines));
|
|
5343
5227
|
}
|
|
5344
5228
|
function buildReferenceMap(root, base) {
|
|
@@ -5361,16 +5245,14 @@ function buildReferenceMap(root, base) {
|
|
|
5361
5245
|
const refCountAndNode = [...pipe(refCount, opFilter(([_, ref]) => ref.c >= 2))].sort((a, b) => b[1].c - a[1].c || a[1].n - b[1].n);
|
|
5362
5246
|
let adj = 0;
|
|
5363
5247
|
const baseLogScale = 1 / Math.log(base);
|
|
5364
|
-
|
|
5248
|
+
return { refCounts: refCountAndNode.filter(([_, ref], idx$1) => {
|
|
5365
5249
|
const i = idx$1 - adj;
|
|
5366
5250
|
const charsIdx = Math.ceil(Math.log(i) * baseLogScale);
|
|
5367
5251
|
const charsNode = Math.ceil(Math.log(ref.n) * baseLogScale);
|
|
5368
|
-
const
|
|
5369
|
-
const keep = savings > 0;
|
|
5252
|
+
const keep = ref.c * (charsNode - charsIdx) - charsIdx > 0;
|
|
5370
5253
|
adj += keep ? 0 : 1;
|
|
5371
5254
|
return keep;
|
|
5372
|
-
}).map(([n, ref]) => [n, ref.c]);
|
|
5373
|
-
return { refCounts: refs };
|
|
5255
|
+
}).map(([n, ref]) => [n, ref.c]) };
|
|
5374
5256
|
}
|
|
5375
5257
|
function importTrie$3(linesX) {
|
|
5376
5258
|
linesX = typeof linesX === "string" ? linesX.split(/^/m) : linesX;
|
|
@@ -5395,8 +5277,7 @@ function importTrie$3(linesX) {
|
|
|
5395
5277
|
parseHeaderRows(headerRows);
|
|
5396
5278
|
}
|
|
5397
5279
|
readHeader(iter);
|
|
5398
|
-
|
|
5399
|
-
return root;
|
|
5280
|
+
return parseStream$1(radix, iter);
|
|
5400
5281
|
}
|
|
5401
5282
|
const numbersSet = stringToCharSet$2("0123456789");
|
|
5402
5283
|
function parseStream$1(radix, iter) {
|
|
@@ -5483,8 +5364,7 @@ function parseStream$1(radix, iter) {
|
|
|
5483
5364
|
function parseCharacter(acc, s) {
|
|
5484
5365
|
const parser = void 0;
|
|
5485
5366
|
const { root: root$1, nodes, stack } = acc;
|
|
5486
|
-
const
|
|
5487
|
-
const node = top.node;
|
|
5367
|
+
const node = stack[stack.length - 1].node;
|
|
5488
5368
|
const c = node.c ?? Object.create(null);
|
|
5489
5369
|
const n = {
|
|
5490
5370
|
f: void 0,
|
|
@@ -5553,8 +5433,7 @@ function parseStream$1(radix, iter) {
|
|
|
5553
5433
|
[INLINE_DATA_COMMENT_LINE, parseComment]
|
|
5554
5434
|
]);
|
|
5555
5435
|
function parserMain(acc, s) {
|
|
5556
|
-
|
|
5557
|
-
return parser(acc, s);
|
|
5436
|
+
return (acc.parser ?? parsers[s] ?? parseCharacter)(acc, s);
|
|
5558
5437
|
}
|
|
5559
5438
|
const charsetSpaces = stringToCharSet$2(" \r\n ");
|
|
5560
5439
|
function parseReferenceIndex(acc, s) {
|
|
@@ -5691,15 +5570,15 @@ var TrieNodeBuilder = class {
|
|
|
5691
5570
|
assertIsValidChar(char);
|
|
5692
5571
|
if (currNode.k) {
|
|
5693
5572
|
const s$1 = stack[depth];
|
|
5694
|
-
const { k: _, c: c$
|
|
5573
|
+
const { k: _, c: c$1,...copy } = currNode;
|
|
5695
5574
|
currNode = s$1.n.c[s$1.c] = copy;
|
|
5696
|
-
if (c$
|
|
5575
|
+
if (c$1) currNode.c = Object.assign(Object.create(null), c$1);
|
|
5697
5576
|
nodes.push(currNode);
|
|
5698
5577
|
}
|
|
5699
|
-
const c
|
|
5700
|
-
currNode.c = c
|
|
5578
|
+
const c = currNode.c || Object.create(null);
|
|
5579
|
+
currNode.c = c;
|
|
5701
5580
|
const n = currNode;
|
|
5702
|
-
const next = c
|
|
5581
|
+
const next = c[char] = c[char] || {};
|
|
5703
5582
|
nodes.push(next);
|
|
5704
5583
|
++depth;
|
|
5705
5584
|
const s = stack[depth];
|
|
@@ -5731,13 +5610,12 @@ var TrieNodeBuilder = class {
|
|
|
5731
5610
|
depth -= num;
|
|
5732
5611
|
currNode = stack[depth + 1].n;
|
|
5733
5612
|
};
|
|
5734
|
-
|
|
5613
|
+
return {
|
|
5735
5614
|
insertChar,
|
|
5736
5615
|
markEOW,
|
|
5737
5616
|
reference,
|
|
5738
5617
|
backStep
|
|
5739
5618
|
};
|
|
5740
|
-
return c;
|
|
5741
5619
|
}
|
|
5742
5620
|
sortChildren(node) {
|
|
5743
5621
|
const entries = Object.entries(node.c).sort((a, b) => compare(a[0], b[0]));
|
|
@@ -5759,8 +5637,7 @@ const specialCharacterMap$1 = new Map([
|
|
|
5759
5637
|
const characterMap = new Map([...specialCharacterMap$1].map((a) => [a[1], a[0]]));
|
|
5760
5638
|
const DATA$1 = "__DATA__";
|
|
5761
5639
|
function importTrieV3AsTrieRoot(srcLines) {
|
|
5762
|
-
|
|
5763
|
-
return importTrieV3WithBuilder(builder, srcLines);
|
|
5640
|
+
return importTrieV3WithBuilder(new TrieNodeBuilder(), srcLines);
|
|
5764
5641
|
}
|
|
5765
5642
|
function importTrieV3WithBuilder(builder, srcLines) {
|
|
5766
5643
|
const timer = getGlobalPerfTimer();
|
|
@@ -5776,10 +5653,7 @@ function importTrieV3WithBuilder(builder, srcLines) {
|
|
|
5776
5653
|
radix = Number.parseInt(header.replace(headerReg$2, "$1"), 10);
|
|
5777
5654
|
}
|
|
5778
5655
|
function findStartOfData(data) {
|
|
5779
|
-
for (let i = 0; i < data.length; ++i)
|
|
5780
|
-
const line = data[i];
|
|
5781
|
-
if (line.includes(DATA$1)) return i;
|
|
5782
|
-
}
|
|
5656
|
+
for (let i = 0; i < data.length; ++i) if (data[i].includes(DATA$1)) return i;
|
|
5783
5657
|
return -1;
|
|
5784
5658
|
}
|
|
5785
5659
|
function readHeader(data) {
|
|
@@ -5795,9 +5669,8 @@ function importTrieV3WithBuilder(builder, srcLines) {
|
|
|
5795
5669
|
const startOfData = findStartOfData(dataLines);
|
|
5796
5670
|
if (startOfData < 0) throw new Error("Unknown file format");
|
|
5797
5671
|
readHeader(dataLines.slice(0, startOfData));
|
|
5798
|
-
const cursor = builder.getCursor();
|
|
5799
5672
|
let node = {
|
|
5800
|
-
cursor,
|
|
5673
|
+
cursor: builder.getCursor(),
|
|
5801
5674
|
parser: void 0
|
|
5802
5675
|
};
|
|
5803
5676
|
const parser = parseStream(radix);
|
|
@@ -5879,8 +5752,7 @@ function parseStream(radix) {
|
|
|
5879
5752
|
[LF, parseIgnore]
|
|
5880
5753
|
]);
|
|
5881
5754
|
function parserMain(acc, s) {
|
|
5882
|
-
|
|
5883
|
-
return parser(acc, s);
|
|
5755
|
+
return (acc.parser ?? parsers.get(s) ?? parseCharacter)(acc, s);
|
|
5884
5756
|
}
|
|
5885
5757
|
return parserMain;
|
|
5886
5758
|
}
|
|
@@ -5931,8 +5803,7 @@ function importTrie$2(input) {
|
|
|
5931
5803
|
}
|
|
5932
5804
|
return headerRows;
|
|
5933
5805
|
}
|
|
5934
|
-
const
|
|
5935
|
-
const version$1 = parseHeaderRows(headerLines);
|
|
5806
|
+
const version$1 = parseHeaderRows(readHeader(lines));
|
|
5936
5807
|
const method = deserializers$1[version$1];
|
|
5937
5808
|
if (!method) throw new Error(`Unsupported version: ${version$1}`);
|
|
5938
5809
|
return method(lines);
|
|
@@ -5941,8 +5812,7 @@ function importTrie$2(input) {
|
|
|
5941
5812
|
//#endregion
|
|
5942
5813
|
//#region src/lib/decodeTrie.ts
|
|
5943
5814
|
function decodeTrie(raw) {
|
|
5944
|
-
|
|
5945
|
-
return new ITrieImpl(data);
|
|
5815
|
+
return new ITrieImpl(decodeTrieData(raw));
|
|
5946
5816
|
}
|
|
5947
5817
|
|
|
5948
5818
|
//#endregion
|
|
@@ -5967,15 +5837,14 @@ const specialPrefix = stringToCharSet("~!");
|
|
|
5967
5837
|
const WORDS_PER_LINE = 20;
|
|
5968
5838
|
const DATA = "__DATA__";
|
|
5969
5839
|
function generateHeader(base, comment) {
|
|
5970
|
-
|
|
5840
|
+
return [
|
|
5971
5841
|
"#!/usr/bin/env cspell-trie reader",
|
|
5972
5842
|
"TrieXv3",
|
|
5973
5843
|
"base=" + base,
|
|
5974
5844
|
...comment ? comment.split("\n").map((a) => "# " + a) : [],
|
|
5975
5845
|
"# Data:",
|
|
5976
5846
|
DATA
|
|
5977
|
-
];
|
|
5978
|
-
return header.map((a) => a + "\n");
|
|
5847
|
+
].map((a) => a + "\n");
|
|
5979
5848
|
}
|
|
5980
5849
|
/**
|
|
5981
5850
|
* Serialize a TrieRoot.
|
|
@@ -6080,8 +5949,7 @@ function serializeTrie$1(root, options = 16) {
|
|
|
6080
5949
|
return pipe(generateHeader(radix, comment), opAppend(bufferLines(serialize(root), 1200, "")));
|
|
6081
5950
|
}
|
|
6082
5951
|
function importTrie$1(srcLines) {
|
|
6083
|
-
|
|
6084
|
-
return trie.root;
|
|
5952
|
+
return importTrieV3AsTrieRoot(srcLines).root;
|
|
6085
5953
|
}
|
|
6086
5954
|
function stringToCharSet(values) {
|
|
6087
5955
|
const set = Object.create(null);
|
|
@@ -6138,8 +6006,7 @@ function importTrie(input) {
|
|
|
6138
6006
|
}
|
|
6139
6007
|
return headerRows;
|
|
6140
6008
|
}
|
|
6141
|
-
const
|
|
6142
|
-
const version$1 = parseHeaderRows(headerLines);
|
|
6009
|
+
const version$1 = parseHeaderRows(readHeader(lines));
|
|
6143
6010
|
const method = deserializers[version$1];
|
|
6144
6011
|
if (!method) throw new Error(`Unsupported version: ${version$1}`);
|
|
6145
6012
|
return method(lines);
|
|
@@ -8282,9 +8149,7 @@ function normalizeLocale(locale) {
|
|
|
8282
8149
|
if (regExTwoLetter.test(locale)) return locale.toLowerCase();
|
|
8283
8150
|
const m = locale.match(regExLocaleWithCountry);
|
|
8284
8151
|
if (!m) return locale;
|
|
8285
|
-
|
|
8286
|
-
const variant = m[2].toUpperCase();
|
|
8287
|
-
return `${lang}-${variant}`;
|
|
8152
|
+
return `${m[1].toLowerCase()}-${m[2].toUpperCase()}`;
|
|
8288
8153
|
}
|
|
8289
8154
|
function isStandardLocale(locale) {
|
|
8290
8155
|
return regExValidLocale.test(locale);
|
|
@@ -8348,39 +8213,31 @@ function mapEditCosts(costs = {}) {
|
|
|
8348
8213
|
* @param letters - letters to join
|
|
8349
8214
|
*/
|
|
8350
8215
|
function joinLetters(letters) {
|
|
8351
|
-
|
|
8352
|
-
return v.map((a) => a.length > 1 || !a.length ? `(${a})` : a).join("");
|
|
8216
|
+
return [...letters].map((a) => a.length > 1 || !a.length ? `(${a})` : a).join("");
|
|
8353
8217
|
}
|
|
8354
8218
|
|
|
8355
8219
|
//#endregion
|
|
8356
8220
|
//#region src/lib/mappers/mapToSuggestionCostDef.ts
|
|
8357
8221
|
function parseAlphabet(cs, locale, editCost) {
|
|
8358
8222
|
const { cost, penalty } = cs;
|
|
8359
|
-
const
|
|
8360
|
-
const charForms = [...pipe(characters, opMap((c) => caseForms(c, locale).sort()))];
|
|
8361
|
-
const alphabet = joinLetters([...pipe(charForms, opFlatten(), opMap((letter) => accentForms(letter)), opFlatten(), opUnique())].sort());
|
|
8362
|
-
const sugAlpha = clean$1({
|
|
8363
|
-
map: alphabet,
|
|
8364
|
-
replace: cost,
|
|
8365
|
-
insDel: cost,
|
|
8366
|
-
swap: cost,
|
|
8367
|
-
penalty
|
|
8368
|
-
});
|
|
8223
|
+
const alphabet = joinLetters([...pipe([...pipe(expandCharacterSet(cs.characters), opMap((c) => caseForms(c, locale).sort()))], opFlatten(), opMap((letter) => accentForms(letter)), opFlatten(), opUnique())].sort());
|
|
8369
8224
|
return [
|
|
8370
|
-
|
|
8225
|
+
clean$1({
|
|
8226
|
+
map: alphabet,
|
|
8227
|
+
replace: cost,
|
|
8228
|
+
insDel: cost,
|
|
8229
|
+
swap: cost,
|
|
8230
|
+
penalty
|
|
8231
|
+
}),
|
|
8371
8232
|
parseAlphabetCaps(cs.characters, locale, editCost),
|
|
8372
8233
|
...calcCostsForAccentedLetters(alphabet, locale, editCost)
|
|
8373
8234
|
];
|
|
8374
8235
|
}
|
|
8375
8236
|
function parseAlphabetCaps(alphabet, locale, editCost) {
|
|
8376
|
-
|
|
8377
|
-
|
|
8378
|
-
const caps = charForms.map((a) => joinLetters(a)).join("|");
|
|
8379
|
-
const sugCaps = {
|
|
8380
|
-
map: caps,
|
|
8237
|
+
return {
|
|
8238
|
+
map: [...pipe(expandCharacterSet(alphabet), opMap((c) => caseForms(c, locale).sort()))].map((a) => joinLetters(a)).join("|"),
|
|
8381
8239
|
replace: editCost.capsCosts
|
|
8382
8240
|
};
|
|
8383
|
-
return sugCaps;
|
|
8384
8241
|
}
|
|
8385
8242
|
function calcFirstCharacterReplaceDefs(alphabets, editCost) {
|
|
8386
8243
|
return alphabets.map((cs) => calcFirstCharacterReplace(cs, editCost));
|
|
@@ -8388,10 +8245,9 @@ function calcFirstCharacterReplaceDefs(alphabets, editCost) {
|
|
|
8388
8245
|
function calcFirstCharacterReplace(cs, editCost) {
|
|
8389
8246
|
const mapOfFirstLetters = [...pipe(expandCharacterSet(cs.characters), opUnique(), opMap((letter) => `(^${letter})`))].sort().join("") + "(^)";
|
|
8390
8247
|
const penalty = editCost.firstLetterPenalty;
|
|
8391
|
-
const cost = cs.cost - penalty;
|
|
8392
8248
|
return {
|
|
8393
8249
|
map: mapOfFirstLetters,
|
|
8394
|
-
replace: cost,
|
|
8250
|
+
replace: cs.cost - penalty,
|
|
8395
8251
|
penalty: penalty * 2
|
|
8396
8252
|
};
|
|
8397
8253
|
}
|
|
@@ -8408,8 +8264,7 @@ function parseAccents(cs, _editCost) {
|
|
|
8408
8264
|
}
|
|
8409
8265
|
function calcCostsForAccentedLetters(simpleMap, locale, costs) {
|
|
8410
8266
|
const charactersWithAccents = [...pipe(splitMap(simpleMap), opMap((char) => caseForms(char, locale)), opFlatten(), opMap((char) => [...accentForms(char)]), opFilter((forms$1) => forms$1.length > 1))];
|
|
8411
|
-
const
|
|
8412
|
-
const replaceAccentMap = [...characters].join("|");
|
|
8267
|
+
const replaceAccentMap = [...pipe(charactersWithAccents, opMap((forms$1) => new Set([...forms$1, ...forms$1.map((char) => stripAccents(char))])), opMap((forms$1) => [...forms$1].sort()), opFilter((forms$1) => forms$1.length > 1), opMap(joinLetters), opUnique())].join("|");
|
|
8413
8268
|
const cost = costs.accentCosts;
|
|
8414
8269
|
const costToReplaceAccent = !replaceAccentMap ? [] : [{
|
|
8415
8270
|
map: replaceAccentMap,
|
|
@@ -8469,20 +8324,16 @@ function hunspellInformationToSuggestionCostDef(hunInfo, locales) {
|
|
|
8469
8324
|
function parseAff(aff, costs$1) {
|
|
8470
8325
|
const regSupportedAff = /^(?:MAP|KEY|TRY|NO-TRY|ICONV|OCONV|REP)\s/;
|
|
8471
8326
|
const rejectAff = /^(?:MAP|KEY|TRY|ICONV|OCONV|REP)\s+\d+$/;
|
|
8472
|
-
|
|
8473
|
-
const defs = pipe(lines, opMap((line) => pipe(operations, opMap((fn) => fn(line, costs$1)), opMap(asArrayOf), opFlatten())), opFlatten(), opFilter(isDefined$1));
|
|
8474
|
-
return [...defs];
|
|
8327
|
+
return [...pipe(aff.split("\n").map((a) => a.replace(/#.*/, "")).map((a) => a.trim()).filter((a) => regSupportedAff.test(a)).filter((a) => !rejectAff.test(a)), opMap((line) => pipe(operations, opMap((fn) => fn(line, costs$1)), opMap(asArrayOf), opFlatten())), opFlatten(), opFilter(isDefined$1))];
|
|
8475
8328
|
}
|
|
8476
8329
|
return parseAff(hunInfo.aff, costs);
|
|
8477
8330
|
}
|
|
8478
8331
|
function calcCosts(costs = {}, locale) {
|
|
8479
8332
|
const useLocale = locale?.length ? locale.map((loc) => loc.locale) : void 0;
|
|
8480
|
-
|
|
8481
|
-
|
|
8482
|
-
...hunCosts,
|
|
8333
|
+
return {
|
|
8334
|
+
...mapHunspellCosts(costs),
|
|
8483
8335
|
locale: useLocale
|
|
8484
8336
|
};
|
|
8485
|
-
return c;
|
|
8486
8337
|
}
|
|
8487
8338
|
const regExpMap = /^(?:MAP)\s+(\S+)$/;
|
|
8488
8339
|
function affMap(line, costs) {
|
|
@@ -8501,8 +8352,7 @@ function affTry(line, costs) {
|
|
|
8501
8352
|
const m = line.match(regExpTry);
|
|
8502
8353
|
if (!m) return void 0;
|
|
8503
8354
|
const cost = costs.tryCharCost;
|
|
8504
|
-
const
|
|
8505
|
-
const characters = tryChars;
|
|
8355
|
+
const characters = m[1];
|
|
8506
8356
|
return parseAlphabet({
|
|
8507
8357
|
characters,
|
|
8508
8358
|
cost
|
|
@@ -8522,9 +8372,8 @@ const regExpNoTry = /^NO-TRY\s+(\S+)$/;
|
|
|
8522
8372
|
function affNoTry(line, costs) {
|
|
8523
8373
|
const m = line.match(regExpNoTry);
|
|
8524
8374
|
if (!m) return void 0;
|
|
8525
|
-
const map = m[1];
|
|
8526
8375
|
return {
|
|
8527
|
-
map,
|
|
8376
|
+
map: m[1],
|
|
8528
8377
|
insDel: Math.max(costs.nonAlphabetCosts - costs.tryCharCost, 0),
|
|
8529
8378
|
penalty: costs.nonAlphabetCosts + costs.tryCharCost
|
|
8530
8379
|
};
|
|
@@ -8587,9 +8436,7 @@ function affMapAccents(line, costs) {
|
|
|
8587
8436
|
}
|
|
8588
8437
|
function parseCaps(value, costs) {
|
|
8589
8438
|
const locale = costs.locale;
|
|
8590
|
-
const
|
|
8591
|
-
const withCases = letters.map((s) => caseForms(s, locale)).filter((forms$1) => forms$1.length > 1).map(joinLetters);
|
|
8592
|
-
const map = unique(withCases).join("|");
|
|
8439
|
+
const map = unique([...splitMap(value)].filter((a) => a !== "|").map((s) => caseForms(s, locale)).filter((forms$1) => forms$1.length > 1).map(joinLetters)).join("|");
|
|
8593
8440
|
const cost = costs.capsCosts;
|
|
8594
8441
|
if (!map) return void 0;
|
|
8595
8442
|
return {
|
|
@@ -8636,8 +8483,7 @@ function toCharSets(cs, defaultValue, cost, penalty) {
|
|
|
8636
8483
|
return cs;
|
|
8637
8484
|
}
|
|
8638
8485
|
function processAccents(accents, editCost) {
|
|
8639
|
-
|
|
8640
|
-
return cs.map((cs$1) => parseAccents(cs$1, editCost)).filter(isDefined$1);
|
|
8486
|
+
return toCharSets(accents, "̀-́", editCost.accentCosts).map((cs) => parseAccents(cs, editCost)).filter(isDefined$1);
|
|
8641
8487
|
}
|
|
8642
8488
|
function mapDictionaryInformationToAdjustment(dictInfo) {
|
|
8643
8489
|
if (!dictInfo.adjustments) return [];
|
|
@@ -8687,8 +8533,7 @@ function mapDictionaryInformationToWeightMap(dictInfo) {
|
|
|
8687
8533
|
//#endregion
|
|
8688
8534
|
//#region src/lib/suggestions/suggest.ts
|
|
8689
8535
|
const baseCost = opCosts.baseCost;
|
|
8690
|
-
const
|
|
8691
|
-
const postSwapCost = swapCost - baseCost;
|
|
8536
|
+
const postSwapCost = opCosts.swapCost - baseCost;
|
|
8692
8537
|
const insertSpaceCost = -1;
|
|
8693
8538
|
const mapSubCost = opCosts.visuallySimilar;
|
|
8694
8539
|
const maxCostScale = opCosts.wordLengthCostFactor;
|
|
@@ -8696,8 +8541,7 @@ const discourageInsertCost = baseCost;
|
|
|
8696
8541
|
const setOfSeparators = new Set([JOIN_SEPARATOR, WORD_SEPARATOR]);
|
|
8697
8542
|
function suggest(root, word, options = {}) {
|
|
8698
8543
|
const opts = createSuggestionOptions(options);
|
|
8699
|
-
const
|
|
8700
|
-
const collector = suggestionCollector(word, collectorOpts);
|
|
8544
|
+
const collector = suggestionCollector(word, clean(opts));
|
|
8701
8545
|
collector.collect(genSuggestions(root, word, {
|
|
8702
8546
|
...opts,
|
|
8703
8547
|
...collector.genSuggestionOptions
|
|
@@ -8707,7 +8551,6 @@ function suggest(root, word, options = {}) {
|
|
|
8707
8551
|
function* genSuggestions(root, word, options = {}) {
|
|
8708
8552
|
const roots = Array.isArray(root) ? root : [root];
|
|
8709
8553
|
for (const r of roots) yield* genCompoundableSuggestions(r, word, options);
|
|
8710
|
-
return void 0;
|
|
8711
8554
|
}
|
|
8712
8555
|
function* genCompoundableSuggestions(root, word, options = {}) {
|
|
8713
8556
|
const { compoundMethod = CompoundWordsMethod.NONE, changeLimit, ignoreCase } = createSuggestionOptions(options);
|
|
@@ -8747,8 +8590,7 @@ function* genCompoundableSuggestions(root, word, options = {}) {
|
|
|
8747
8590
|
a,
|
|
8748
8591
|
b
|
|
8749
8592
|
};
|
|
8750
|
-
const
|
|
8751
|
-
const iWalk = hintedWalker(root, ignoreCase, hint, compoundMethod, options.compoundSeparator);
|
|
8593
|
+
const iWalk = hintedWalker(root, ignoreCase, word, compoundMethod, options.compoundSeparator);
|
|
8752
8594
|
let goDeeper = true;
|
|
8753
8595
|
for (let r = iWalk.next({ goDeeper }); !stopNow && !r.done; r = iWalk.next({ goDeeper })) {
|
|
8754
8596
|
const { text, node, depth } = r.value;
|
|
@@ -8766,22 +8608,16 @@ function* genCompoundableSuggestions(root, word, options = {}) {
|
|
|
8766
8608
|
goDeeper = false;
|
|
8767
8609
|
const { i: i$1, w: w$1, m } = ht;
|
|
8768
8610
|
if (i$1 >= history.length) continue;
|
|
8769
|
-
|
|
8770
|
-
if (r$1.word.slice(0, w$1.length) !== w$1) continue;
|
|
8611
|
+
if (history[i$1].word.slice(0, w$1.length) !== w$1) continue;
|
|
8771
8612
|
const dc = mxMin - m;
|
|
8772
8613
|
for (let p = i$1; p < history.length; ++p) {
|
|
8773
8614
|
const { word: word$1, cost: hCost } = history[p];
|
|
8774
|
-
|
|
8775
|
-
if (fix !== w$1) break;
|
|
8615
|
+
if (word$1.slice(0, w$1.length) !== w$1) break;
|
|
8776
8616
|
const cost$1 = hCost + dc;
|
|
8777
|
-
if (cost$1 <= costLimit) {
|
|
8778
|
-
|
|
8779
|
-
|
|
8780
|
-
|
|
8781
|
-
word: emit,
|
|
8782
|
-
cost: cost$1
|
|
8783
|
-
});
|
|
8784
|
-
}
|
|
8617
|
+
if (cost$1 <= costLimit) updateCostLimit(yield {
|
|
8618
|
+
word: text + word$1.slice(w$1.length),
|
|
8619
|
+
cost: cost$1
|
|
8620
|
+
});
|
|
8785
8621
|
}
|
|
8786
8622
|
continue;
|
|
8787
8623
|
} else historyTags.set(tag, {
|
|
@@ -8844,7 +8680,6 @@ function* genCompoundableSuggestions(root, word, options = {}) {
|
|
|
8844
8680
|
} else updateCostLimit(yield void 0);
|
|
8845
8681
|
goDeeper = min <= costLimit;
|
|
8846
8682
|
}
|
|
8847
|
-
return void 0;
|
|
8848
8683
|
}
|
|
8849
8684
|
|
|
8850
8685
|
//#endregion
|
|
@@ -8896,10 +8731,7 @@ var Trie = class Trie {
|
|
|
8896
8731
|
}
|
|
8897
8732
|
has(word, minLegacyCompoundLength) {
|
|
8898
8733
|
if (this.hasWord(word, false)) return true;
|
|
8899
|
-
if (minLegacyCompoundLength) {
|
|
8900
|
-
const f = this.findWord(word, { useLegacyWordCompounds: minLegacyCompoundLength });
|
|
8901
|
-
return !!f.found;
|
|
8902
|
-
}
|
|
8734
|
+
if (minLegacyCompoundLength) return !!this.findWord(word, { useLegacyWordCompounds: minLegacyCompoundLength }).found;
|
|
8903
8735
|
return false;
|
|
8904
8736
|
}
|
|
8905
8737
|
/**
|
|
@@ -9010,11 +8842,10 @@ var Trie = class Trie {
|
|
|
9010
8842
|
return new Trie(root, void 0);
|
|
9011
8843
|
}
|
|
9012
8844
|
createFindOptions(options = {}) {
|
|
9013
|
-
|
|
8845
|
+
return createFindOptions({
|
|
9014
8846
|
...this._findOptionsDefaults,
|
|
9015
8847
|
...options
|
|
9016
8848
|
});
|
|
9017
|
-
return findOptions;
|
|
9018
8849
|
}
|
|
9019
8850
|
lastCreateFindOptionsMatchCaseMap = /* @__PURE__ */ new Map();
|
|
9020
8851
|
createFindOptionsMatchCase(matchCase) {
|
|
@@ -9074,7 +8905,6 @@ var SecondChanceCache = class {
|
|
|
9074
8905
|
this.map0.set(key, v);
|
|
9075
8906
|
return v;
|
|
9076
8907
|
}
|
|
9077
|
-
return void 0;
|
|
9078
8908
|
}
|
|
9079
8909
|
toArray() {
|
|
9080
8910
|
return [...this.map1, ...this.map0];
|
|
@@ -9098,8 +8928,7 @@ function buildTrie(words, trieOptions) {
|
|
|
9098
8928
|
* @param trieOptions options for the Trie
|
|
9099
8929
|
*/
|
|
9100
8930
|
function buildTrieFast(words, trieOptions) {
|
|
9101
|
-
|
|
9102
|
-
return new Trie(root, void 0);
|
|
8931
|
+
return new Trie(createTrieRootFromList(words, trieOptions), void 0);
|
|
9103
8932
|
}
|
|
9104
8933
|
const MAX_NUM_SIGS = 1e5;
|
|
9105
8934
|
const MAX_TRANSFORMS = 1e6;
|
|
@@ -9140,9 +8969,7 @@ var TrieBuilder = class {
|
|
|
9140
8969
|
const isWord = n.f ? "*" : "";
|
|
9141
8970
|
const entries = n.c ? Object.entries(n.c) : void 0;
|
|
9142
8971
|
const c = entries ? entries.map(([k, n$1]) => [k, this.cached.get(n$1)]) : void 0;
|
|
9143
|
-
|
|
9144
|
-
const sig = isWord + ref;
|
|
9145
|
-
return sig;
|
|
8972
|
+
return isWord + (c ? JSON.stringify(c) : "");
|
|
9146
8973
|
}
|
|
9147
8974
|
_canBeCached(n) {
|
|
9148
8975
|
if (!n.c) return true;
|
|
@@ -9245,8 +9072,7 @@ var TrieBuilder = class {
|
|
|
9245
9072
|
const chars = [...word];
|
|
9246
9073
|
let d = 1;
|
|
9247
9074
|
for (const s of chars) {
|
|
9248
|
-
|
|
9249
|
-
if (p?.s !== s) break;
|
|
9075
|
+
if (this.lastPath[d]?.s !== s) break;
|
|
9250
9076
|
d++;
|
|
9251
9077
|
}
|
|
9252
9078
|
if (chars.length < d) d = chars.length;
|
|
@@ -9305,20 +9131,15 @@ var TrieBuilder = class {
|
|
|
9305
9131
|
return stack.map((n) => this.debNodeInfo(n));
|
|
9306
9132
|
}
|
|
9307
9133
|
debNodeInfo(node) {
|
|
9308
|
-
const id = node.id ?? "?";
|
|
9309
|
-
const cid = this.cached.get(node) ?? "?";
|
|
9310
|
-
const f = node.f || 0;
|
|
9311
|
-
const c = node.c ? Object.fromEntries(Object.entries(node.c).map(([k, n]) => [k, {
|
|
9312
|
-
id: n.id,
|
|
9313
|
-
r: this.cached.get(n)
|
|
9314
|
-
}])) : void 0;
|
|
9315
|
-
const L = Object.isFrozen(node);
|
|
9316
9134
|
return {
|
|
9317
|
-
id,
|
|
9318
|
-
cid,
|
|
9319
|
-
f,
|
|
9320
|
-
c,
|
|
9321
|
-
|
|
9135
|
+
id: node.id ?? "?",
|
|
9136
|
+
cid: this.cached.get(node) ?? "?",
|
|
9137
|
+
f: node.f || 0,
|
|
9138
|
+
c: node.c ? Object.fromEntries(Object.entries(node.c).map(([k, n]) => [k, {
|
|
9139
|
+
id: n.id,
|
|
9140
|
+
r: this.cached.get(n)
|
|
9141
|
+
}])) : void 0,
|
|
9142
|
+
L: Object.isFrozen(node)
|
|
9322
9143
|
};
|
|
9323
9144
|
}
|
|
9324
9145
|
logDebug(methodName, contentOrFunction) {
|
|
@@ -9378,7 +9199,6 @@ const normalizeWordForCaseInsensitive = (text) => {
|
|
|
9378
9199
|
|
|
9379
9200
|
//#endregion
|
|
9380
9201
|
//#region src/lib/SimpleDictionaryParser.ts
|
|
9381
|
-
const RegExpSplit = /[\s,;]/g;
|
|
9382
9202
|
const _defaultOptions = {
|
|
9383
9203
|
commentCharacter: LINE_COMMENT,
|
|
9384
9204
|
optionalCompoundCharacter: OPTIONAL_COMPOUND_FIX,
|
|
@@ -9391,7 +9211,7 @@ const _defaultOptions = {
|
|
|
9391
9211
|
stripCaseAndAccentsOnForbidden: false,
|
|
9392
9212
|
split: false,
|
|
9393
9213
|
splitKeepBoth: false,
|
|
9394
|
-
splitSeparator:
|
|
9214
|
+
splitSeparator: /[\s,;]/g,
|
|
9395
9215
|
keepOptionalCompoundCharacter: false
|
|
9396
9216
|
};
|
|
9397
9217
|
const defaultParseDictionaryOptions = Object.freeze(_defaultOptions);
|
|
@@ -9479,9 +9299,7 @@ function createDictionaryLineParserMapper(options) {
|
|
|
9479
9299
|
function* splitWords(lines) {
|
|
9480
9300
|
for (const line of lines) {
|
|
9481
9301
|
if (split) {
|
|
9482
|
-
|
|
9483
|
-
const words = splitLine(lineEscaped, splitSeparator);
|
|
9484
|
-
yield* words.map((escaped) => escaped.replaceAll("\\", ""));
|
|
9302
|
+
yield* splitLine(line.includes("\"") ? line.replaceAll(/".*?"/g, (quoted) => " " + quoted.replaceAll(/(\s)/g, "\\$1") + " ") : line, splitSeparator).map((escaped) => escaped.replaceAll("\\", ""));
|
|
9485
9303
|
if (!splitKeepBoth) continue;
|
|
9486
9304
|
}
|
|
9487
9305
|
yield line;
|
|
@@ -9491,8 +9309,7 @@ function createDictionaryLineParserMapper(options) {
|
|
|
9491
9309
|
for (const paragraph of paragraphs) yield* paragraph.split("\n");
|
|
9492
9310
|
}
|
|
9493
9311
|
const mapCompounds = keepOptionalCompoundCharacter ? [] : [opConcatMap(mapOptionalPrefix), opConcatMap(mapOptionalSuffix)];
|
|
9494
|
-
|
|
9495
|
-
return processLines;
|
|
9312
|
+
return opCombine(opFilter(isString), splitLines, opMap(removeComments), splitWords, opMap(trim), opFilter(filterEmptyLines), ...mapCompounds, opConcatMap(mapNormalize), opMap(removeDoublePrefix));
|
|
9496
9313
|
}
|
|
9497
9314
|
/**
|
|
9498
9315
|
* Normalizes a dictionary words based upon prefix / suffixes.
|