cspell-trie-lib 9.2.0 → 9.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +1 -26
- package/dist/index.js +213 -412
- package/package.json +8 -8
package/dist/index.js
CHANGED
|
@@ -19,19 +19,18 @@ function memorizeLastCall(fn) {
|
|
|
19
19
|
|
|
20
20
|
//#endregion
|
|
21
21
|
//#region src/lib/ITrieNode/find.ts
|
|
22
|
-
const defaultLegacyMinCompoundLength$
|
|
22
|
+
const defaultLegacyMinCompoundLength$2 = 3;
|
|
23
23
|
const _defaultFindOptions$1 = {
|
|
24
24
|
matchCase: false,
|
|
25
25
|
compoundMode: "compound",
|
|
26
|
-
legacyMinCompoundLength: defaultLegacyMinCompoundLength$
|
|
26
|
+
legacyMinCompoundLength: defaultLegacyMinCompoundLength$2
|
|
27
27
|
};
|
|
28
28
|
Object.freeze(_defaultFindOptions$1);
|
|
29
|
-
const
|
|
29
|
+
const knownCompoundModes$1 = new Map([
|
|
30
30
|
"none",
|
|
31
31
|
"compound",
|
|
32
32
|
"legacy"
|
|
33
|
-
];
|
|
34
|
-
const knownCompoundModes$1 = new Map(arrayCompoundModes$1.map((a) => [a, a]));
|
|
33
|
+
].map((a) => [a, a]));
|
|
35
34
|
const notFound = {
|
|
36
35
|
found: false,
|
|
37
36
|
compoundUsed: false,
|
|
@@ -89,23 +88,18 @@ function _findWordNode$1(root, word, options) {
|
|
|
89
88
|
const checkForbidden = options?.checkForbidden ?? true;
|
|
90
89
|
function __findCompound() {
|
|
91
90
|
const f = findCompoundWord$1(root, word, compoundPrefix, ignoreCasePrefix);
|
|
92
|
-
if (f.found !== false && (mustCheckForbidden || f.compoundUsed && checkForbidden))
|
|
93
|
-
const r = !f.caseMatched ? walk$2(root, root.caseInsensitivePrefix) : root;
|
|
94
|
-
f.forbidden = isForbiddenWord$1(r, word, root.forbidPrefix);
|
|
95
|
-
}
|
|
91
|
+
if (f.found !== false && (mustCheckForbidden || f.compoundUsed && checkForbidden)) f.forbidden = isForbiddenWord$1(!f.caseMatched ? walk$2(root, root.caseInsensitivePrefix) : root, word, root.forbidPrefix);
|
|
96
92
|
return f;
|
|
97
93
|
}
|
|
98
94
|
function __findExact() {
|
|
99
95
|
const n = root.getNode ? root.getNode(word) : walk$2(root, word);
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
found: isFound && word,
|
|
96
|
+
return {
|
|
97
|
+
found: isEndOfWordNode$1(n) && word,
|
|
103
98
|
compoundUsed: false,
|
|
104
99
|
forbidden: checkForbidden ? isForbiddenWord$1(root, word, root.forbidPrefix) : void 0,
|
|
105
100
|
node: n,
|
|
106
101
|
caseMatched: true
|
|
107
102
|
};
|
|
108
|
-
return result;
|
|
109
103
|
}
|
|
110
104
|
switch (compoundMode) {
|
|
111
105
|
case "none": return matchCase ? __findExact() : __findCompound();
|
|
@@ -116,7 +110,7 @@ function _findWordNode$1(root, word, options) {
|
|
|
116
110
|
function findLegacyCompound$1(root, word, options) {
|
|
117
111
|
const roots = [root];
|
|
118
112
|
if (!options?.matchCase) roots.push(walk$2(root, root.caseInsensitivePrefix));
|
|
119
|
-
return findLegacyCompoundNode$1(roots, word, options?.legacyMinCompoundLength || defaultLegacyMinCompoundLength$
|
|
113
|
+
return findLegacyCompoundNode$1(roots, word, options?.legacyMinCompoundLength || defaultLegacyMinCompoundLength$2);
|
|
120
114
|
}
|
|
121
115
|
function findCompoundNode$1(root, word, compoundCharacter, ignoreCasePrefix) {
|
|
122
116
|
const stack = [{
|
|
@@ -179,15 +173,13 @@ function findCompoundNode$1(root, word, compoundCharacter, ignoreCasePrefix) {
|
|
|
179
173
|
break;
|
|
180
174
|
}
|
|
181
175
|
}
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
found,
|
|
176
|
+
return {
|
|
177
|
+
found: i === word.length && word || false,
|
|
185
178
|
compoundUsed,
|
|
186
179
|
node,
|
|
187
180
|
forbidden: void 0,
|
|
188
181
|
caseMatched
|
|
189
182
|
};
|
|
190
|
-
return result;
|
|
191
183
|
}
|
|
192
184
|
function findCompoundWord$1(root, word, compoundCharacter, ignoreCasePrefix) {
|
|
193
185
|
const { found, compoundUsed, node, caseMatched } = findCompoundNode$1(root, word, compoundCharacter, ignoreCasePrefix);
|
|
@@ -244,8 +236,7 @@ function findLegacyCompoundNode$1(roots, word, minCompoundLength) {
|
|
|
244
236
|
while (true) {
|
|
245
237
|
const s = stack[i];
|
|
246
238
|
const h = w[i++];
|
|
247
|
-
const
|
|
248
|
-
const c = n?.get(h);
|
|
239
|
+
const c = (s.cr || s.n)?.get(h);
|
|
249
240
|
if (c && i < wLen) stack[i] = {
|
|
250
241
|
n: c,
|
|
251
242
|
usedRoots: 0,
|
|
@@ -285,15 +276,13 @@ function findLegacyCompoundNode$1(roots, word, minCompoundLength) {
|
|
|
285
276
|
}
|
|
286
277
|
return letters.join("");
|
|
287
278
|
}
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
found,
|
|
279
|
+
return {
|
|
280
|
+
found: extractWord(),
|
|
291
281
|
compoundUsed,
|
|
292
282
|
node,
|
|
293
283
|
forbidden: void 0,
|
|
294
284
|
caseMatched
|
|
295
285
|
};
|
|
296
|
-
return result;
|
|
297
286
|
}
|
|
298
287
|
function isForbiddenWord$1(root, word, forbiddenPrefix) {
|
|
299
288
|
const r = root;
|
|
@@ -368,12 +357,11 @@ function* compoundWalker$1(root, compoundingMethod) {
|
|
|
368
357
|
while (s.ci < s.c.length) {
|
|
369
358
|
const [char, node] = s.c[s.ci++];
|
|
370
359
|
const text = baseText + char;
|
|
371
|
-
|
|
360
|
+
if ((yield {
|
|
372
361
|
text,
|
|
373
362
|
node,
|
|
374
363
|
depth
|
|
375
|
-
}
|
|
376
|
-
if (goDeeper ?? true) {
|
|
364
|
+
}) ?? true) {
|
|
377
365
|
depth++;
|
|
378
366
|
baseText = text;
|
|
379
367
|
stack[depth] = {
|
|
@@ -408,12 +396,11 @@ function* nodeWalker$1(root) {
|
|
|
408
396
|
const idx$1 = s.ci++;
|
|
409
397
|
const [char, node] = s.c[idx$1];
|
|
410
398
|
const text = baseText + char;
|
|
411
|
-
|
|
399
|
+
if ((yield {
|
|
412
400
|
text,
|
|
413
401
|
node,
|
|
414
402
|
depth
|
|
415
|
-
}
|
|
416
|
-
if (goDeeper !== false) {
|
|
403
|
+
}) !== false) {
|
|
417
404
|
depth++;
|
|
418
405
|
baseText = text;
|
|
419
406
|
const s$1 = stack[depth];
|
|
@@ -449,11 +436,10 @@ function* walkerWordsITrie(root) {
|
|
|
449
436
|
let depth = 0;
|
|
450
437
|
const stack = [];
|
|
451
438
|
const entries = root.entries();
|
|
452
|
-
const c = Array.isArray(entries) ? entries : [...entries];
|
|
453
439
|
stack[depth] = {
|
|
454
440
|
t: "",
|
|
455
441
|
n: root,
|
|
456
|
-
c,
|
|
442
|
+
c: Array.isArray(entries) ? entries : [...entries],
|
|
457
443
|
ci: 0
|
|
458
444
|
};
|
|
459
445
|
while (depth >= 0) {
|
|
@@ -467,17 +453,17 @@ function* walkerWordsITrie(root) {
|
|
|
467
453
|
depth++;
|
|
468
454
|
baseText = text;
|
|
469
455
|
const entries$1 = node.entries();
|
|
470
|
-
const c
|
|
456
|
+
const c = Array.isArray(entries$1) ? entries$1 : [...entries$1];
|
|
471
457
|
if (stack[depth]) {
|
|
472
458
|
s = stack[depth];
|
|
473
459
|
s.t = text;
|
|
474
460
|
s.n = node;
|
|
475
|
-
s.c = c
|
|
461
|
+
s.c = c;
|
|
476
462
|
s.ci = 0;
|
|
477
463
|
} else stack[depth] = {
|
|
478
464
|
t: text,
|
|
479
465
|
n: node,
|
|
480
|
-
c
|
|
466
|
+
c,
|
|
481
467
|
ci: 0
|
|
482
468
|
};
|
|
483
469
|
s = stack[depth];
|
|
@@ -542,8 +528,7 @@ function* hintedWalkerNext(root, ignoreCase, hint, compoundingMethod, emitWordSe
|
|
|
542
528
|
root.forbiddenWordPrefix
|
|
543
529
|
]);
|
|
544
530
|
function filterRoot(root$1) {
|
|
545
|
-
const
|
|
546
|
-
const c = children$1?.filter(([v]) => !(v in specialRootsPrefix));
|
|
531
|
+
const c = (root$1.c && Object.entries(root$1.c))?.filter(([v]) => !(v in specialRootsPrefix));
|
|
547
532
|
return { c: c && Object.fromEntries(c) };
|
|
548
533
|
}
|
|
549
534
|
const roots = rawRoots.map(filterRoot);
|
|
@@ -720,8 +705,7 @@ var ImplITrieRoot = class extends ImplITrieNode {
|
|
|
720
705
|
return false;
|
|
721
706
|
}
|
|
722
707
|
resolveId(id) {
|
|
723
|
-
|
|
724
|
-
return new ImplITrieNode(n);
|
|
708
|
+
return new ImplITrieNode(id);
|
|
725
709
|
}
|
|
726
710
|
get forbidPrefix() {
|
|
727
711
|
return this.root.forbiddenWordPrefix;
|
|
@@ -770,12 +754,11 @@ function* compoundWalker(root, compoundingMethod) {
|
|
|
770
754
|
while (s.ci < s.c.length) {
|
|
771
755
|
const [char, node] = s.c[s.ci++];
|
|
772
756
|
const text = baseText + char;
|
|
773
|
-
|
|
757
|
+
if ((yield {
|
|
774
758
|
text,
|
|
775
759
|
node,
|
|
776
760
|
depth
|
|
777
|
-
}
|
|
778
|
-
if (goDeeper ?? true) {
|
|
761
|
+
}) ?? true) {
|
|
779
762
|
depth++;
|
|
780
763
|
baseText = text;
|
|
781
764
|
stack[depth] = {
|
|
@@ -814,12 +797,11 @@ function* nodeWalker(root) {
|
|
|
814
797
|
const char = s.c[s.ci++];
|
|
815
798
|
const node = s.n[char];
|
|
816
799
|
const text = baseText + char;
|
|
817
|
-
|
|
800
|
+
if ((yield {
|
|
818
801
|
text,
|
|
819
802
|
node,
|
|
820
803
|
depth
|
|
821
|
-
}
|
|
822
|
-
if (goDeeper !== false) {
|
|
804
|
+
}) !== false) {
|
|
823
805
|
depth++;
|
|
824
806
|
baseText = text;
|
|
825
807
|
const s$1 = stack[depth];
|
|
@@ -904,14 +886,11 @@ const defaultSuggestionOptions = {
|
|
|
904
886
|
includeTies: true,
|
|
905
887
|
timeout: 5e3
|
|
906
888
|
};
|
|
907
|
-
const
|
|
889
|
+
const keyMapOfSuggestionOptionsStrict = {
|
|
908
890
|
changeLimit: "changeLimit",
|
|
909
891
|
compoundMethod: "compoundMethod",
|
|
910
892
|
ignoreCase: "ignoreCase",
|
|
911
|
-
compoundSeparator: "compoundSeparator"
|
|
912
|
-
};
|
|
913
|
-
const keyMapOfSuggestionOptionsStrict = {
|
|
914
|
-
...keyMapOfGenSuggestionOptionsStrict,
|
|
893
|
+
compoundSeparator: "compoundSeparator",
|
|
915
894
|
filter: "filter",
|
|
916
895
|
includeTies: "includeTies",
|
|
917
896
|
numSuggestions: "numSuggestions",
|
|
@@ -1077,8 +1056,7 @@ const visualLetterGroups = [
|
|
|
1077
1056
|
function forms(letters) {
|
|
1078
1057
|
const n = letters.normalize("NFC").replaceAll(/\p{M}/gu, "");
|
|
1079
1058
|
const na = n.normalize("NFD").replaceAll(/\p{M}/gu, "");
|
|
1080
|
-
|
|
1081
|
-
return [...s].join("");
|
|
1059
|
+
return [...new Set(n + n.toLowerCase() + n.toUpperCase() + na + na.toLowerCase() + na.toUpperCase())].join("");
|
|
1082
1060
|
}
|
|
1083
1061
|
/**
|
|
1084
1062
|
* This is a map of letters to groups mask values.
|
|
@@ -1123,8 +1101,7 @@ function addDefsToWeightMap(map, defs) {
|
|
|
1123
1101
|
}
|
|
1124
1102
|
for (const _def of defs) {
|
|
1125
1103
|
const def = normalizeDef(_def);
|
|
1126
|
-
|
|
1127
|
-
mapSets.forEach((s) => addSet(s, def));
|
|
1104
|
+
splitMap$1(def).forEach((s) => addSet(s, def));
|
|
1128
1105
|
}
|
|
1129
1106
|
return map;
|
|
1130
1107
|
}
|
|
@@ -1182,8 +1159,7 @@ function splitMapSubstrings(map) {
|
|
|
1182
1159
|
*/
|
|
1183
1160
|
function splitMap$1(def) {
|
|
1184
1161
|
const { map } = def;
|
|
1185
|
-
|
|
1186
|
-
return sets.map(splitMapSubstrings).filter((s) => s.length > 0);
|
|
1162
|
+
return map.split("|").map(splitMapSubstrings).filter((s) => s.length > 0);
|
|
1187
1163
|
}
|
|
1188
1164
|
function addToTrieCost(trie, str, cost, penalties) {
|
|
1189
1165
|
if (!str) return;
|
|
@@ -1201,8 +1177,7 @@ function addToTrieTrieCost(trie, left, right, cost, penalties) {
|
|
|
1201
1177
|
const n = t.n = t.n || Object.create(null);
|
|
1202
1178
|
t = n[c] = n[c] || Object.create(null);
|
|
1203
1179
|
}
|
|
1204
|
-
|
|
1205
|
-
addToTrieCost(trieCost, right, cost, penalties);
|
|
1180
|
+
addToTrieCost(t.t = t.t || Object.create(null), right, cost, penalties);
|
|
1206
1181
|
}
|
|
1207
1182
|
function addSetToTrieCost(trie, set, cost, penalties) {
|
|
1208
1183
|
if (cost === void 0) return;
|
|
@@ -1403,12 +1378,11 @@ function _distanceAStarWeightedEx(wordA, wordB, map, cost = 100) {
|
|
|
1403
1378
|
c,
|
|
1404
1379
|
p
|
|
1405
1380
|
};
|
|
1406
|
-
|
|
1381
|
+
[
|
|
1407
1382
|
map.calcInsDelCosts(pos),
|
|
1408
1383
|
map.calcSwapCosts(pos),
|
|
1409
1384
|
map.calcReplaceCosts(pos)
|
|
1410
|
-
]
|
|
1411
|
-
costCalculations.forEach((iter) => {
|
|
1385
|
+
].forEach((iter) => {
|
|
1412
1386
|
for (const nn of iter) candidates.add({
|
|
1413
1387
|
...nn,
|
|
1414
1388
|
f: n
|
|
@@ -1437,7 +1411,6 @@ var CandidatePool = class {
|
|
|
1437
1411
|
next() {
|
|
1438
1412
|
let n;
|
|
1439
1413
|
while (n = this.pool.dequeue()) if (!n.d) return n;
|
|
1440
|
-
return void 0;
|
|
1441
1414
|
}
|
|
1442
1415
|
add(n) {
|
|
1443
1416
|
const i = idx(n.ai, n.bi, this.bN);
|
|
@@ -1628,8 +1601,7 @@ function createPerfTimer() {
|
|
|
1628
1601
|
at: 0,
|
|
1629
1602
|
elapsed: 0
|
|
1630
1603
|
});
|
|
1631
|
-
|
|
1632
|
-
return lines.join("\n");
|
|
1604
|
+
return lineElements.map((e) => `${e.at.padStart(lengths.at)} ${e.name.padEnd(lengths.name)} ${e.elapsed.padStart(lengths.elapsed)}`).join("\n");
|
|
1633
1605
|
}
|
|
1634
1606
|
function measureFn(name, fn) {
|
|
1635
1607
|
const s = start(name);
|
|
@@ -1675,8 +1647,7 @@ function isDefined$1(a) {
|
|
|
1675
1647
|
* @returns t
|
|
1676
1648
|
*/
|
|
1677
1649
|
function cleanCopy(t) {
|
|
1678
|
-
|
|
1679
|
-
return clean$1(r);
|
|
1650
|
+
return clean$1({ ...t });
|
|
1680
1651
|
}
|
|
1681
1652
|
/**
|
|
1682
1653
|
* Remove any fields with an `undefined` value.
|
|
@@ -1714,8 +1685,7 @@ function replaceAllFactory(match, replaceWithText) {
|
|
|
1714
1685
|
const defaultMaxNumberSuggestions = 10;
|
|
1715
1686
|
const BASE_COST = 100;
|
|
1716
1687
|
const MAX_NUM_CHANGES = 5;
|
|
1717
|
-
const
|
|
1718
|
-
const MAX_ALLOWED_COST_SCALE = 1.03 * MAX_COST_SCALE;
|
|
1688
|
+
const MAX_ALLOWED_COST_SCALE = 1.03 * .5;
|
|
1719
1689
|
const collator = new Intl.Collator();
|
|
1720
1690
|
const regexSeparator = new RegExp(`[${regexQuote(WORD_SEPARATOR)}]`, "g");
|
|
1721
1691
|
const wordLengthCost = [
|
|
@@ -1730,9 +1700,7 @@ const EXTRA_WORD_COST = 5;
|
|
|
1730
1700
|
const DEFAULT_COLLECTOR_TIMEOUT = 1e3;
|
|
1731
1701
|
const symStopProcessing = Symbol("Collector Stop Processing");
|
|
1732
1702
|
function compSuggestionResults(a, b) {
|
|
1733
|
-
|
|
1734
|
-
const bPref = b.isPreferred && -1 || 0;
|
|
1735
|
-
return aPref - bPref || a.cost - b.cost || a.word.length - b.word.length || collator.compare(a.word, b.word);
|
|
1703
|
+
return (a.isPreferred && -1 || 0) - (b.isPreferred && -1 || 0) || a.cost - b.cost || a.word.length - b.word.length || collator.compare(a.word, b.word);
|
|
1736
1704
|
}
|
|
1737
1705
|
const defaultSuggestionCollectorOptions = Object.freeze({
|
|
1738
1706
|
numSuggestions: defaultMaxNumberSuggestions,
|
|
@@ -1841,12 +1809,11 @@ function suggestionCollector(wordToMatch, options) {
|
|
|
1841
1809
|
const NF = "NFD";
|
|
1842
1810
|
const nWordToMatch = wordToMatch.normalize(NF);
|
|
1843
1811
|
const rawValues = [...sugs.values()];
|
|
1844
|
-
const
|
|
1812
|
+
const sorted = (weightMap ? rawValues.map(({ word, cost, isPreferred }) => ({
|
|
1845
1813
|
word,
|
|
1846
1814
|
cost: isPreferred ? cost : editDistanceWeighted(nWordToMatch, word.normalize(NF), weightMap, 110),
|
|
1847
1815
|
isPreferred
|
|
1848
|
-
})) : rawValues;
|
|
1849
|
-
const sorted = values.sort(compSuggestionResults).map(cleanCompoundResult);
|
|
1816
|
+
})) : rawValues).sort(compSuggestionResults).map(cleanCompoundResult);
|
|
1850
1817
|
let i = Math.min(sorted.length, numSuggestions) - 1;
|
|
1851
1818
|
const limit = includeTies ? sorted.length : Math.min(sorted.length, numSuggestions);
|
|
1852
1819
|
const iCost = sorted[i].cost;
|
|
@@ -1855,7 +1822,7 @@ function suggestionCollector(wordToMatch, options) {
|
|
|
1855
1822
|
sorted.length = i;
|
|
1856
1823
|
return sorted;
|
|
1857
1824
|
}
|
|
1858
|
-
|
|
1825
|
+
return {
|
|
1859
1826
|
collect,
|
|
1860
1827
|
add: function(suggestion) {
|
|
1861
1828
|
collectSuggestion(suggestion);
|
|
@@ -1881,7 +1848,6 @@ function suggestionCollector(wordToMatch, options) {
|
|
|
1881
1848
|
symbolStopProcessing: symStopProcessing,
|
|
1882
1849
|
genSuggestionOptions
|
|
1883
1850
|
};
|
|
1884
|
-
return collector;
|
|
1885
1851
|
}
|
|
1886
1852
|
/**
|
|
1887
1853
|
* Impersonating a Collector, allows searching for multiple variants on the same word.
|
|
@@ -1987,15 +1953,13 @@ function* getSuggestionsAStar(trie, srcWord, options = {}) {
|
|
|
1987
1953
|
return;
|
|
1988
1954
|
function compareSuggestion(a, b) {
|
|
1989
1955
|
const pa = a.isPreferred && 1 || 0;
|
|
1990
|
-
|
|
1991
|
-
return pb - pa || a.cost - b.cost || Math.abs(a.word.charCodeAt(0) - srcWord.charCodeAt(0)) - Math.abs(b.word.charCodeAt(0) - srcWord.charCodeAt(0));
|
|
1956
|
+
return (b.isPreferred && 1 || 0) - pa || a.cost - b.cost || Math.abs(a.word.charCodeAt(0) - srcWord.charCodeAt(0)) - Math.abs(b.word.charCodeAt(0) - srcWord.charCodeAt(0));
|
|
1992
1957
|
}
|
|
1993
1958
|
function processPath(p) {
|
|
1994
1959
|
const len = srcLetters.length;
|
|
1995
1960
|
if (p.n.eow && p.i === len) {
|
|
1996
|
-
const word = pNodeToWord(p);
|
|
1997
1961
|
const result = {
|
|
1998
|
-
word,
|
|
1962
|
+
word: pNodeToWord(p),
|
|
1999
1963
|
cost: p.c
|
|
2000
1964
|
};
|
|
2001
1965
|
resultHeap.add(result);
|
|
@@ -2020,14 +1984,12 @@ function* getSuggestionsAStar(trie, srcWord, options = {}) {
|
|
|
2020
1984
|
storePath(t, n, i + 1, cost, "", p, "d", "");
|
|
2021
1985
|
for (const [ss, node] of n.entries()) {
|
|
2022
1986
|
if (node.id === m?.id || ss in sc) continue;
|
|
2023
|
-
const
|
|
2024
|
-
const c = sg & g ? costVis : cost;
|
|
1987
|
+
const c = sg & (visMap[ss] || 0) ? costVis : cost;
|
|
2025
1988
|
storePath(t, node, i + 1, c, ss, p, "r", ss);
|
|
2026
1989
|
}
|
|
2027
1990
|
if (n.eow && i && compoundMethod) storePath(t, root, i, costLegacyCompound, wordSeparator, p, "L", wordSeparator);
|
|
2028
1991
|
if (ns) {
|
|
2029
|
-
const
|
|
2030
|
-
const n2 = n1?.get(s);
|
|
1992
|
+
const n2 = n.get(ns)?.get(s);
|
|
2031
1993
|
if (n2) {
|
|
2032
1994
|
const ss = ns + s;
|
|
2033
1995
|
storePath(t, n2, i + 2, cost0 + opCosts.swapCost, ss, p, "s", ss);
|
|
@@ -2047,7 +2009,6 @@ function* getSuggestionsAStar(trie, srcWord, options = {}) {
|
|
|
2047
2009
|
delLetters(p, weightMap$1, srcLetters, storePath);
|
|
2048
2010
|
insLetters(p, weightMap$1, srcLetters, storePath);
|
|
2049
2011
|
repLetters(p, weightMap$1, srcLetters, storePath);
|
|
2050
|
-
return;
|
|
2051
2012
|
}
|
|
2052
2013
|
/**
|
|
2053
2014
|
* Apply a cost to the current step.
|
|
@@ -2059,8 +2020,7 @@ function* getSuggestionsAStar(trie, srcWord, options = {}) {
|
|
|
2059
2020
|
*/
|
|
2060
2021
|
function storePath(t, n, i, c, s, p, a, ss) {
|
|
2061
2022
|
const tt = getCostTrie(t, ss);
|
|
2062
|
-
|
|
2063
|
-
if (curr <= c || c > limit) return void 0;
|
|
2023
|
+
if (tt.c[i] <= c || c > limit) return void 0;
|
|
2064
2024
|
tt.c[i] = c;
|
|
2065
2025
|
pathHeap.add({
|
|
2066
2026
|
n,
|
|
@@ -2271,12 +2231,11 @@ function caseForms(letter, locale) {
|
|
|
2271
2231
|
* @returns combined set of possible forms.
|
|
2272
2232
|
*/
|
|
2273
2233
|
function accentForms(letter) {
|
|
2274
|
-
|
|
2234
|
+
return new Set([
|
|
2275
2235
|
letter,
|
|
2276
2236
|
letter.normalize("NFC"),
|
|
2277
2237
|
letter.normalize("NFD")
|
|
2278
2238
|
]);
|
|
2279
|
-
return forms$1;
|
|
2280
2239
|
}
|
|
2281
2240
|
/**
|
|
2282
2241
|
* Remove all accents.
|
|
@@ -2297,8 +2256,7 @@ function stripNonAccents(characters) {
|
|
|
2297
2256
|
function isValidUtf16Character(char) {
|
|
2298
2257
|
const len = char.length;
|
|
2299
2258
|
const code = char.charCodeAt(0) & 64512;
|
|
2300
|
-
|
|
2301
|
-
return valid;
|
|
2259
|
+
return len === 1 && (code & 63488) !== 55296 || len === 2 && (code & 64512) === 55296 && (char.charCodeAt(1) & 64512) === 56320;
|
|
2302
2260
|
}
|
|
2303
2261
|
function assertValidUtf16Character(char) {
|
|
2304
2262
|
if (!isValidUtf16Character(char)) {
|
|
@@ -2360,17 +2318,17 @@ var Utf8Accumulator = class Utf8Accumulator {
|
|
|
2360
2318
|
if ((byte & 224) === 192) {
|
|
2361
2319
|
this.value = byte & 31;
|
|
2362
2320
|
this.remaining = 1;
|
|
2363
|
-
return
|
|
2321
|
+
return;
|
|
2364
2322
|
}
|
|
2365
2323
|
if ((byte & 240) === 224) {
|
|
2366
2324
|
this.value = byte & 15;
|
|
2367
2325
|
this.remaining = 2;
|
|
2368
|
-
return
|
|
2326
|
+
return;
|
|
2369
2327
|
}
|
|
2370
2328
|
if ((byte & 248) === 240) {
|
|
2371
2329
|
this.value = byte & 7;
|
|
2372
2330
|
this.remaining = 3;
|
|
2373
|
-
return
|
|
2331
|
+
return;
|
|
2374
2332
|
}
|
|
2375
2333
|
return this.reset();
|
|
2376
2334
|
}
|
|
@@ -2434,8 +2392,7 @@ function encodeTextToUtf8(text) {
|
|
|
2434
2392
|
|
|
2435
2393
|
//#endregion
|
|
2436
2394
|
//#region src/lib/TrieBlob/CharIndex.ts
|
|
2437
|
-
|
|
2438
|
-
Object.freeze(emptySeq);
|
|
2395
|
+
Object.freeze([0]);
|
|
2439
2396
|
var CharIndex = class {
|
|
2440
2397
|
#charToUtf8SeqMap;
|
|
2441
2398
|
#lastWord = "";
|
|
@@ -2680,8 +2637,7 @@ var FastTrieBlobINode = class FastTrieBlobINode {
|
|
|
2680
2637
|
entries() {
|
|
2681
2638
|
if (this._entries) return this._entries;
|
|
2682
2639
|
if (!this._count) return EmptyEntries$1;
|
|
2683
|
-
|
|
2684
|
-
this._entries = entries.map(([key, value]) => [key, new FastTrieBlobINode(this.trie, value)]);
|
|
2640
|
+
this._entries = this.getNodesEntries().map(([key, value]) => [key, new FastTrieBlobINode(this.trie, value)]);
|
|
2685
2641
|
return this._entries;
|
|
2686
2642
|
}
|
|
2687
2643
|
/** get child ITrieNode */
|
|
@@ -2696,16 +2652,14 @@ var FastTrieBlobINode = class FastTrieBlobINode {
|
|
|
2696
2652
|
return new FastTrieBlobINode(this.trie, idx$1);
|
|
2697
2653
|
}
|
|
2698
2654
|
has(char) {
|
|
2699
|
-
|
|
2700
|
-
return idx$1 !== void 0;
|
|
2655
|
+
return this.trie.nodeGetChild(this.id, char) !== void 0;
|
|
2701
2656
|
}
|
|
2702
2657
|
hasChildren() {
|
|
2703
2658
|
return this._count > 0;
|
|
2704
2659
|
}
|
|
2705
2660
|
child(keyIdx) {
|
|
2706
2661
|
if (!this._values && !this.containsChainedIndexes()) {
|
|
2707
|
-
const
|
|
2708
|
-
const nodeIdx = n >>> this.trie.NodeChildRefShift;
|
|
2662
|
+
const nodeIdx = this.node[keyIdx + 1] >>> this.trie.NodeChildRefShift;
|
|
2709
2663
|
return new FastTrieBlobINode(this.trie, nodeIdx);
|
|
2710
2664
|
}
|
|
2711
2665
|
return this.values()[keyIdx];
|
|
@@ -2743,8 +2697,7 @@ var FastTrieBlobINode = class FastTrieBlobINode {
|
|
|
2743
2697
|
const len = this._count;
|
|
2744
2698
|
const node = this.node;
|
|
2745
2699
|
for (let i = 1; i <= len && !found; ++i) {
|
|
2746
|
-
const
|
|
2747
|
-
const codePoint = entry & NodeMaskChildCharIndex;
|
|
2700
|
+
const codePoint = node[i] & NodeMaskChildCharIndex;
|
|
2748
2701
|
found = Utf8Accumulator.isMultiByte(codePoint);
|
|
2749
2702
|
}
|
|
2750
2703
|
this._chained = !!found;
|
|
@@ -2942,8 +2895,7 @@ var TrieBlobINode = class TrieBlobINode {
|
|
|
2942
2895
|
entries() {
|
|
2943
2896
|
if (this._entries) return this._entries;
|
|
2944
2897
|
if (!this._count) return EmptyEntries;
|
|
2945
|
-
|
|
2946
|
-
this._entries = entries.map(([key, value]) => [key, new TrieBlobINode(this.trie, value)]);
|
|
2898
|
+
this._entries = this.getNodesEntries().map(([key, value]) => [key, new TrieBlobINode(this.trie, value)]);
|
|
2947
2899
|
return this._entries;
|
|
2948
2900
|
}
|
|
2949
2901
|
/** get child ITrieNode */
|
|
@@ -2958,8 +2910,7 @@ var TrieBlobINode = class TrieBlobINode {
|
|
|
2958
2910
|
}
|
|
2959
2911
|
child(keyIdx) {
|
|
2960
2912
|
if (!this._values && !this.containsChainedIndexes()) {
|
|
2961
|
-
const
|
|
2962
|
-
const nodeIdx = n >>> this.trie.NodeChildRefShift;
|
|
2913
|
+
const nodeIdx = this.trie.nodes[this.nodeIdx + keyIdx + 1] >>> this.trie.NodeChildRefShift;
|
|
2963
2914
|
return new TrieBlobINode(this.trie, nodeIdx);
|
|
2964
2915
|
}
|
|
2965
2916
|
return this.values()[keyIdx];
|
|
@@ -3005,8 +2956,7 @@ var TrieBlobINode = class TrieBlobINode {
|
|
|
3005
2956
|
const nodes = this.trie.nodes;
|
|
3006
2957
|
const len = this._count;
|
|
3007
2958
|
for (let i = 0; i < len && !found; ++i) {
|
|
3008
|
-
const
|
|
3009
|
-
const charIdx = entry & NodeMaskChildCharIndex;
|
|
2959
|
+
const charIdx = nodes[i + offset] & NodeMaskChildCharIndex;
|
|
3010
2960
|
found = Utf8Accumulator.isMultiByte(charIdx);
|
|
3011
2961
|
}
|
|
3012
2962
|
this._chained = !!found;
|
|
@@ -3122,8 +3072,7 @@ var TrieBlobIRoot = class extends TrieBlobINode {
|
|
|
3122
3072
|
//#region src/lib/TrieBlob/TrieBlob.ts
|
|
3123
3073
|
const NodeHeaderNumChildrenBits = 8;
|
|
3124
3074
|
const NodeHeaderNumChildrenShift = 0;
|
|
3125
|
-
const
|
|
3126
|
-
const HEADER_SIZE = HEADER_SIZE_UINT32 * 4;
|
|
3075
|
+
const HEADER_SIZE = 32;
|
|
3127
3076
|
const HEADER_OFFSET = 0;
|
|
3128
3077
|
const HEADER_OFFSET_SIG = HEADER_OFFSET;
|
|
3129
3078
|
const HEADER_OFFSET_ENDIAN = HEADER_OFFSET_SIG + 8;
|
|
@@ -3131,7 +3080,6 @@ const HEADER_OFFSET_VERSION = HEADER_OFFSET_ENDIAN + 4;
|
|
|
3131
3080
|
const HEADER_OFFSET_NODES = HEADER_OFFSET_VERSION + 4;
|
|
3132
3081
|
const HEADER_OFFSET_NODES_LEN = HEADER_OFFSET_NODES + 4;
|
|
3133
3082
|
const HEADER_OFFSET_CHAR_INDEX = HEADER_OFFSET_NODES_LEN + 4;
|
|
3134
|
-
const HEADER_OFFSET_CHAR_INDEX_LEN = HEADER_OFFSET_CHAR_INDEX + 4;
|
|
3135
3083
|
const HEADER = {
|
|
3136
3084
|
header: HEADER_OFFSET,
|
|
3137
3085
|
sig: HEADER_OFFSET_SIG,
|
|
@@ -3140,7 +3088,7 @@ const HEADER = {
|
|
|
3140
3088
|
nodes: HEADER_OFFSET_NODES,
|
|
3141
3089
|
nodesLen: HEADER_OFFSET_NODES_LEN,
|
|
3142
3090
|
charIndex: HEADER_OFFSET_CHAR_INDEX,
|
|
3143
|
-
charIndexLen:
|
|
3091
|
+
charIndexLen: HEADER_OFFSET_CHAR_INDEX + 4
|
|
3144
3092
|
};
|
|
3145
3093
|
const headerSig = "TrieBlob";
|
|
3146
3094
|
const version = "00.01.00";
|
|
@@ -3194,8 +3142,7 @@ var TrieBlob = class TrieBlob {
|
|
|
3194
3142
|
*/
|
|
3195
3143
|
find(word, strict) {
|
|
3196
3144
|
if (!this.hasCompoundWords) {
|
|
3197
|
-
|
|
3198
|
-
if (found) return {
|
|
3145
|
+
if (this.#hasWord(0, word)) return {
|
|
3199
3146
|
found: word,
|
|
3200
3147
|
compoundUsed: false,
|
|
3201
3148
|
caseMatched: true
|
|
@@ -3211,13 +3158,12 @@ var TrieBlob = class TrieBlob {
|
|
|
3211
3158
|
caseMatched: false
|
|
3212
3159
|
};
|
|
3213
3160
|
}
|
|
3214
|
-
return void 0;
|
|
3215
3161
|
}
|
|
3216
3162
|
getRoot() {
|
|
3217
3163
|
return this.#iTrieRoot ??= this._getRoot();
|
|
3218
3164
|
}
|
|
3219
3165
|
_getRoot() {
|
|
3220
|
-
|
|
3166
|
+
return new TrieBlobIRoot(new TrieBlobInternals(this.nodes, this.charIndex, {
|
|
3221
3167
|
NodeMaskEOW: TrieBlob.NodeMaskEOW,
|
|
3222
3168
|
NodeMaskNumChildren: TrieBlob.NodeMaskNumChildren,
|
|
3223
3169
|
NodeMaskChildCharIndex: TrieBlob.NodeMaskChildCharIndex,
|
|
@@ -3231,8 +3177,7 @@ var TrieBlob = class TrieBlob {
|
|
|
3231
3177
|
hasCompoundWords: this.hasCompoundWords,
|
|
3232
3178
|
hasForbiddenWords: this.hasForbiddenWords,
|
|
3233
3179
|
hasNonStrictWords: this.hasNonStrictWords
|
|
3234
|
-
});
|
|
3235
|
-
return new TrieBlobIRoot(trieData, 0, this.info, { find: (word, strict) => this.find(word, strict) });
|
|
3180
|
+
}), 0, this.info, { find: (word, strict) => this.find(word, strict) });
|
|
3236
3181
|
}
|
|
3237
3182
|
getNode(prefix) {
|
|
3238
3183
|
return findNode$1(this.getRoot(), prefix);
|
|
@@ -3297,8 +3242,7 @@ var TrieBlob = class TrieBlob {
|
|
|
3297
3242
|
*/
|
|
3298
3243
|
_lookupNode(nodeIdx, char) {
|
|
3299
3244
|
const indexSeq = this.letterToNodeCharIndexSequence(char);
|
|
3300
|
-
|
|
3301
|
-
return currNodeIdx;
|
|
3245
|
+
return this.#lookupNode(nodeIdx, indexSeq);
|
|
3302
3246
|
}
|
|
3303
3247
|
*words() {
|
|
3304
3248
|
const NodeMaskNumChildren = TrieBlob.NodeMaskNumChildren;
|
|
@@ -3317,13 +3261,11 @@ var TrieBlob = class TrieBlob {
|
|
|
3317
3261
|
const { nodeIdx, pos, word, acc } = stack[depth];
|
|
3318
3262
|
const node = nodes[nodeIdx];
|
|
3319
3263
|
if (!pos && node & NodeMaskEOW) yield word;
|
|
3320
|
-
|
|
3321
|
-
if (pos >= len) {
|
|
3264
|
+
if (pos >= (node & NodeMaskNumChildren)) {
|
|
3322
3265
|
--depth;
|
|
3323
3266
|
continue;
|
|
3324
3267
|
}
|
|
3325
|
-
const
|
|
3326
|
-
const entry = nodes[nodeIdx + nextPos];
|
|
3268
|
+
const entry = nodes[nodeIdx + ++stack[depth].pos];
|
|
3327
3269
|
const nAcc = acc.clone();
|
|
3328
3270
|
const codePoint = nAcc.decode(entry & NodeMaskChildCharIndex);
|
|
3329
3271
|
const letter = codePoint && String.fromCodePoint(codePoint) || "";
|
|
@@ -3358,8 +3300,7 @@ var TrieBlob = class TrieBlob {
|
|
|
3358
3300
|
}
|
|
3359
3301
|
encodeBin() {
|
|
3360
3302
|
const charIndex = Buffer.from(this.charIndex.charIndex.join("\n"));
|
|
3361
|
-
const
|
|
3362
|
-
const nodeOffset = HEADER_SIZE + charIndexLen;
|
|
3303
|
+
const nodeOffset = HEADER_SIZE + (charIndex.byteLength + 3 & -4);
|
|
3363
3304
|
const size = nodeOffset + this.nodes.length * 4;
|
|
3364
3305
|
const useLittle = isLittleEndian();
|
|
3365
3306
|
const buffer$1 = Buffer.alloc(size);
|
|
@@ -3386,9 +3327,7 @@ var TrieBlob = class TrieBlob {
|
|
|
3386
3327
|
const offsetCharIndex = header.getUint32(HEADER.charIndex, useLittle);
|
|
3387
3328
|
const lenCharIndex = header.getUint32(HEADER.charIndexLen, useLittle);
|
|
3388
3329
|
const charIndex = Buffer.from(blob.subarray(offsetCharIndex, offsetCharIndex + lenCharIndex)).toString("utf8").split("\n");
|
|
3389
|
-
|
|
3390
|
-
const trieBlob = new TrieBlob(nodes, new CharIndex(charIndex), defaultTrieInfo);
|
|
3391
|
-
return trieBlob;
|
|
3330
|
+
return new TrieBlob(new Uint32Array(blob.buffer, offsetNodes, lenNodes), new CharIndex(charIndex), defaultTrieInfo);
|
|
3392
3331
|
}
|
|
3393
3332
|
static NodeMaskEOW = 256;
|
|
3394
3333
|
static NodeMaskNumChildren = (1 << NodeHeaderNumChildrenBits) - 1 & 65535;
|
|
@@ -3413,13 +3352,11 @@ function isLittleEndian() {
|
|
|
3413
3352
|
3,
|
|
3414
3353
|
4
|
|
3415
3354
|
]);
|
|
3416
|
-
|
|
3417
|
-
return view.getUint32(0, true) === 67305985;
|
|
3355
|
+
return new DataView(buf.buffer).getUint32(0, true) === 67305985;
|
|
3418
3356
|
}
|
|
3419
3357
|
function checkSig(blob) {
|
|
3420
3358
|
if (blob.length < HEADER_SIZE) return false;
|
|
3421
|
-
|
|
3422
|
-
if (buf.toString("utf8", 0, 8) !== headerSig) return false;
|
|
3359
|
+
if (Buffer.from(blob, 0, 8).toString("utf8", 0, 8) !== headerSig) return false;
|
|
3423
3360
|
return true;
|
|
3424
3361
|
}
|
|
3425
3362
|
var ErrorDecodeTrieBlob = class extends Error {
|
|
@@ -3476,8 +3413,7 @@ function trieBlobSort(data) {
|
|
|
3476
3413
|
last = cIdx;
|
|
3477
3414
|
}
|
|
3478
3415
|
if (i === end) continue;
|
|
3479
|
-
|
|
3480
|
-
sorted.forEach((v, i$1) => data[start + i$1] = v);
|
|
3416
|
+
data.slice(start, end).sort((a, b) => (a & MaskChildCharIndex) - (b & MaskChildCharIndex)).forEach((v, i$1) => data[start + i$1] = v);
|
|
3481
3417
|
}
|
|
3482
3418
|
}
|
|
3483
3419
|
|
|
@@ -3526,8 +3462,7 @@ var FastTrieBlob = class FastTrieBlob {
|
|
|
3526
3462
|
const charIndexes = this.wordToUtf8Seq(word);
|
|
3527
3463
|
const found = this.#lookupNode(nodeIdx, charIndexes);
|
|
3528
3464
|
if (found === void 0) return false;
|
|
3529
|
-
|
|
3530
|
-
return !!(node[0] & this.bitMasksInfo.NodeMaskEOW);
|
|
3465
|
+
return !!(this.nodes[found][0] & this.bitMasksInfo.NodeMaskEOW);
|
|
3531
3466
|
}
|
|
3532
3467
|
/**
|
|
3533
3468
|
* Find the node index for the given Utf8 character sequence.
|
|
@@ -3565,26 +3500,24 @@ var FastTrieBlob = class FastTrieBlob {
|
|
|
3565
3500
|
const NodeChildRefShift = this.bitMasksInfo.NodeChildRefShift;
|
|
3566
3501
|
const NodeMaskEOW = this.bitMasksInfo.NodeMaskEOW;
|
|
3567
3502
|
const nodes = this.nodes;
|
|
3568
|
-
const accumulator = Utf8Accumulator.create();
|
|
3569
3503
|
const stack = [{
|
|
3570
3504
|
nodeIdx: 0,
|
|
3571
3505
|
pos: 0,
|
|
3572
3506
|
word: "",
|
|
3573
|
-
accumulator
|
|
3507
|
+
accumulator: Utf8Accumulator.create()
|
|
3574
3508
|
}];
|
|
3575
3509
|
let depth = 0;
|
|
3576
3510
|
while (depth >= 0) {
|
|
3577
|
-
const { nodeIdx, pos, word, accumulator
|
|
3511
|
+
const { nodeIdx, pos, word, accumulator } = stack[depth];
|
|
3578
3512
|
const node = nodes[nodeIdx];
|
|
3579
3513
|
if (!pos && node[0] & NodeMaskEOW) yield word;
|
|
3580
3514
|
if (pos >= node.length - 1) {
|
|
3581
3515
|
--depth;
|
|
3582
3516
|
continue;
|
|
3583
3517
|
}
|
|
3584
|
-
const
|
|
3585
|
-
const entry = node[nextPos];
|
|
3518
|
+
const entry = node[++stack[depth].pos];
|
|
3586
3519
|
const charIdx = entry & NodeMaskChildCharIndex;
|
|
3587
|
-
const acc = accumulator
|
|
3520
|
+
const acc = accumulator.clone();
|
|
3588
3521
|
const codePoint = acc.decode(charIdx);
|
|
3589
3522
|
const letter = codePoint && String.fromCodePoint(codePoint) || "";
|
|
3590
3523
|
++depth;
|
|
@@ -3685,13 +3618,10 @@ var FastTrieBlob = class FastTrieBlob {
|
|
|
3685
3618
|
for (let p = 1; p < n.length; ++p) {
|
|
3686
3619
|
const v = n[p];
|
|
3687
3620
|
const cIdx = v & this.bitMasksInfo.NodeMaskChildCharIndex;
|
|
3688
|
-
const
|
|
3689
|
-
const codePoint = a.decode(cIdx);
|
|
3690
|
-
const c = codePoint !== void 0 ? String.fromCodePoint(codePoint) : "∎";
|
|
3691
|
-
const i = v >>> this.bitMasksInfo.NodeChildRefShift;
|
|
3621
|
+
const codePoint = acc.clone().decode(cIdx);
|
|
3692
3622
|
children[p] = {
|
|
3693
|
-
c,
|
|
3694
|
-
i,
|
|
3623
|
+
c: codePoint !== void 0 ? String.fromCodePoint(codePoint) : "∎",
|
|
3624
|
+
i: v >>> this.bitMasksInfo.NodeChildRefShift,
|
|
3695
3625
|
cIdx
|
|
3696
3626
|
};
|
|
3697
3627
|
}
|
|
@@ -3914,13 +3844,12 @@ var FastTrieBlobBuilder = class FastTrieBlobBuilder {
|
|
|
3914
3844
|
for (let n = num; n > 0; --n) depth = stack[depth].pDepth;
|
|
3915
3845
|
nodeIdx = stack[depth + 1].nodeIdx;
|
|
3916
3846
|
};
|
|
3917
|
-
|
|
3847
|
+
return {
|
|
3918
3848
|
insertChar,
|
|
3919
3849
|
markEOW,
|
|
3920
3850
|
reference,
|
|
3921
3851
|
backStep
|
|
3922
3852
|
};
|
|
3923
|
-
return c;
|
|
3924
3853
|
}
|
|
3925
3854
|
_insert(word) {
|
|
3926
3855
|
word = word.trim();
|
|
@@ -3936,8 +3865,7 @@ var FastTrieBlobBuilder = class FastTrieBlobBuilder {
|
|
|
3936
3865
|
for (let p = 0; p < len; ++p) {
|
|
3937
3866
|
const seq = utf8Seq[p];
|
|
3938
3867
|
const node = nodes[nodeIdx];
|
|
3939
|
-
|
|
3940
|
-
let i = count - 1;
|
|
3868
|
+
let i = node.length - 1;
|
|
3941
3869
|
for (; i > 0; --i) if ((node[i] & NodeMaskChildCharIndex) === seq) break;
|
|
3942
3870
|
if (i > 0) {
|
|
3943
3871
|
nodeIdx = node[i] >>> NodeChildRefShift;
|
|
@@ -3967,8 +3895,7 @@ var FastTrieBlobBuilder = class FastTrieBlobBuilder {
|
|
|
3967
3895
|
let node = nodes[nodeIdx];
|
|
3968
3896
|
for (let p = 0; p < len; ++p, node = nodes[nodeIdx]) {
|
|
3969
3897
|
const letterIdx = charIndexes[p];
|
|
3970
|
-
|
|
3971
|
-
let i = count - 1;
|
|
3898
|
+
let i = node.length - 1;
|
|
3972
3899
|
for (; i > 0; --i) if ((node[i] & NodeMaskChildCharIndex) === letterIdx) break;
|
|
3973
3900
|
if (i < 1) return false;
|
|
3974
3901
|
nodeIdx = node[i] >>> NodeChildRefShift;
|
|
@@ -3998,8 +3925,7 @@ var FastTrieBlobBuilder = class FastTrieBlobBuilder {
|
|
|
3998
3925
|
assert(!this.isReadonly(), "FastTrieBlobBuilder is readonly");
|
|
3999
3926
|
}
|
|
4000
3927
|
static fromWordList(words, options) {
|
|
4001
|
-
|
|
4002
|
-
return ft.insert(words).build();
|
|
3928
|
+
return new FastTrieBlobBuilder(options).insert(words).build();
|
|
4003
3929
|
}
|
|
4004
3930
|
static fromTrieRoot(root) {
|
|
4005
3931
|
const bitMasksInfo = FastTrieBlobBuilder.DefaultBitMaskInfo;
|
|
@@ -4040,8 +3966,7 @@ var FastTrieBlobBuilder = class FastTrieBlobBuilder {
|
|
|
4040
3966
|
if (pos < node.length) node = tf.nodes[node[pos] >>> NodeChildRefShift];
|
|
4041
3967
|
else {
|
|
4042
3968
|
const next = [0];
|
|
4043
|
-
|
|
4044
|
-
node[pos] = nodeIdx << NodeChildRefShift | idx$1;
|
|
3969
|
+
node[pos] = tf.nodes.push(next) - 1 << NodeChildRefShift | idx$1;
|
|
4045
3970
|
node = next;
|
|
4046
3971
|
}
|
|
4047
3972
|
}
|
|
@@ -4072,7 +3997,7 @@ function clean(t) {
|
|
|
4072
3997
|
|
|
4073
3998
|
//#endregion
|
|
4074
3999
|
//#region src/lib/ITrie.ts
|
|
4075
|
-
const defaultLegacyMinCompoundLength$
|
|
4000
|
+
const defaultLegacyMinCompoundLength$1 = 3;
|
|
4076
4001
|
var ITrieImpl = class ITrieImpl {
|
|
4077
4002
|
_info;
|
|
4078
4003
|
root;
|
|
@@ -4119,10 +4044,7 @@ var ITrieImpl = class ITrieImpl {
|
|
|
4119
4044
|
}
|
|
4120
4045
|
has(word, minLegacyCompoundLength) {
|
|
4121
4046
|
if (this.hasWord(word, false)) return true;
|
|
4122
|
-
if (minLegacyCompoundLength) {
|
|
4123
|
-
const f = this.findWord(word, { useLegacyWordCompounds: minLegacyCompoundLength });
|
|
4124
|
-
return !!f.found;
|
|
4125
|
-
}
|
|
4047
|
+
if (minLegacyCompoundLength) return !!this.findWord(word, { useLegacyWordCompounds: minLegacyCompoundLength }).found;
|
|
4126
4048
|
return false;
|
|
4127
4049
|
}
|
|
4128
4050
|
/**
|
|
@@ -4132,15 +4054,14 @@ var ITrieImpl = class ITrieImpl {
|
|
|
4132
4054
|
* @returns true if the word was found and is not forbidden.
|
|
4133
4055
|
*/
|
|
4134
4056
|
hasWord(word, caseSensitive) {
|
|
4135
|
-
|
|
4057
|
+
return !!this.findWord(word, {
|
|
4136
4058
|
caseSensitive,
|
|
4137
4059
|
checkForbidden: false
|
|
4138
|
-
});
|
|
4139
|
-
return !!f.found;
|
|
4060
|
+
}).found;
|
|
4140
4061
|
}
|
|
4141
4062
|
findWord(word, options) {
|
|
4142
4063
|
if (options?.useLegacyWordCompounds) {
|
|
4143
|
-
const len = options.useLegacyWordCompounds !== true ? options.useLegacyWordCompounds : defaultLegacyMinCompoundLength$
|
|
4064
|
+
const len = options.useLegacyWordCompounds !== true ? options.useLegacyWordCompounds : defaultLegacyMinCompoundLength$1;
|
|
4144
4065
|
const findOptions = this.createFindOptions({
|
|
4145
4066
|
legacyMinCompoundLength: len,
|
|
4146
4067
|
matchCase: options.caseSensitive || false
|
|
@@ -4229,12 +4150,10 @@ var ITrieImpl = class ITrieImpl {
|
|
|
4229
4150
|
static create(words, info) {
|
|
4230
4151
|
const builder = new FastTrieBlobBuilder(info);
|
|
4231
4152
|
builder.insert(words);
|
|
4232
|
-
|
|
4233
|
-
return new ITrieImpl(root, void 0);
|
|
4153
|
+
return new ITrieImpl(builder.build(), void 0);
|
|
4234
4154
|
}
|
|
4235
4155
|
createFindOptions(options) {
|
|
4236
|
-
|
|
4237
|
-
return findOptions;
|
|
4156
|
+
return createFindOptions$1(options);
|
|
4238
4157
|
}
|
|
4239
4158
|
};
|
|
4240
4159
|
|
|
@@ -4304,9 +4223,8 @@ function iteratorTrieWords(node) {
|
|
|
4304
4223
|
return walkerWords(node);
|
|
4305
4224
|
}
|
|
4306
4225
|
function createTrieRoot(options) {
|
|
4307
|
-
const fullOptions = mergeOptionalWithDefaults(options);
|
|
4308
4226
|
return {
|
|
4309
|
-
...
|
|
4227
|
+
...mergeOptionalWithDefaults(options),
|
|
4310
4228
|
c: Object.create(null)
|
|
4311
4229
|
};
|
|
4312
4230
|
}
|
|
@@ -4366,15 +4284,13 @@ function checkCircular(root) {
|
|
|
4366
4284
|
};
|
|
4367
4285
|
if (inStack.has(n)) {
|
|
4368
4286
|
const stack = [...inStack, n];
|
|
4369
|
-
const word = trieStackToWord(stack);
|
|
4370
|
-
const pos = stack.indexOf(n);
|
|
4371
4287
|
return {
|
|
4372
4288
|
isCircular: true,
|
|
4373
4289
|
allSeen: false,
|
|
4374
4290
|
ref: {
|
|
4375
4291
|
stack,
|
|
4376
|
-
word,
|
|
4377
|
-
pos
|
|
4292
|
+
word: trieStackToWord(stack),
|
|
4293
|
+
pos: stack.indexOf(n)
|
|
4378
4294
|
}
|
|
4379
4295
|
};
|
|
4380
4296
|
}
|
|
@@ -4413,9 +4329,8 @@ function isCircular(root) {
|
|
|
4413
4329
|
return checkCircular(root).isCircular;
|
|
4414
4330
|
}
|
|
4415
4331
|
function trieNodeToRoot(node, options) {
|
|
4416
|
-
const newOptions = mergeOptionalWithDefaults(options);
|
|
4417
4332
|
return {
|
|
4418
|
-
...
|
|
4333
|
+
...mergeOptionalWithDefaults(options),
|
|
4419
4334
|
c: node.c || Object.create(null)
|
|
4420
4335
|
};
|
|
4421
4336
|
}
|
|
@@ -4433,9 +4348,7 @@ function consolidate(root) {
|
|
|
4433
4348
|
const knownMap = /* @__PURE__ */ new Map();
|
|
4434
4349
|
if (isCircular(root)) throw new Error("Trie is circular.");
|
|
4435
4350
|
function signature$1(n) {
|
|
4436
|
-
|
|
4437
|
-
const ref = n.c ? JSON.stringify(Object.entries(n.c).map(([k, n$1]) => [k, cached.get(n$1)])) : "";
|
|
4438
|
-
return isWord + ref;
|
|
4351
|
+
return (n.f ? "*" : "") + (n.c ? JSON.stringify(Object.entries(n.c).map(([k, n$1]) => [k, cached.get(n$1)])) : "");
|
|
4439
4352
|
}
|
|
4440
4353
|
function findEow(n) {
|
|
4441
4354
|
if (n.f && !n.c) return n;
|
|
@@ -4500,21 +4413,19 @@ function consolidate(root) {
|
|
|
4500
4413
|
|
|
4501
4414
|
//#endregion
|
|
4502
4415
|
//#region src/lib/TrieNode/find.ts
|
|
4503
|
-
const defaultLegacyMinCompoundLength$1 = 3;
|
|
4504
4416
|
const _defaultFindOptions = {
|
|
4505
4417
|
matchCase: false,
|
|
4506
4418
|
compoundMode: "compound",
|
|
4507
4419
|
forbidPrefix: FORBID_PREFIX,
|
|
4508
4420
|
compoundFix: COMPOUND_FIX,
|
|
4509
4421
|
caseInsensitivePrefix: CASE_INSENSITIVE_PREFIX,
|
|
4510
|
-
legacyMinCompoundLength:
|
|
4422
|
+
legacyMinCompoundLength: 3
|
|
4511
4423
|
};
|
|
4512
|
-
const
|
|
4424
|
+
const knownCompoundModes = new Map([
|
|
4513
4425
|
"none",
|
|
4514
4426
|
"compound",
|
|
4515
4427
|
"legacy"
|
|
4516
|
-
];
|
|
4517
|
-
const knownCompoundModes = new Map(arrayCompoundModes.map((a) => [a, a]));
|
|
4428
|
+
].map((a) => [a, a]));
|
|
4518
4429
|
/**
|
|
4519
4430
|
*
|
|
4520
4431
|
* @param root Trie root node. root.c contains the compound root and forbidden root.
|
|
@@ -4556,23 +4467,18 @@ function _findWordNode(root, word, options) {
|
|
|
4556
4467
|
function __findCompound() {
|
|
4557
4468
|
const f = findCompoundWord(root, word, compoundPrefix, ignoreCasePrefix);
|
|
4558
4469
|
const result = { ...f };
|
|
4559
|
-
if (f.found !== false && f.compoundUsed)
|
|
4560
|
-
const r = !f.caseMatched ? walk$1(root, options.caseInsensitivePrefix) : root;
|
|
4561
|
-
result.forbidden = isForbiddenWord(r, word, options.forbidPrefix);
|
|
4562
|
-
}
|
|
4470
|
+
if (f.found !== false && f.compoundUsed) result.forbidden = isForbiddenWord(!f.caseMatched ? walk$1(root, options.caseInsensitivePrefix) : root, word, options.forbidPrefix);
|
|
4563
4471
|
return result;
|
|
4564
4472
|
}
|
|
4565
4473
|
function __findExact() {
|
|
4566
4474
|
const n = walk$1(root, word);
|
|
4567
|
-
|
|
4568
|
-
|
|
4569
|
-
found: isFound && word,
|
|
4475
|
+
return {
|
|
4476
|
+
found: isEndOfWordNode(n) && word,
|
|
4570
4477
|
compoundUsed: false,
|
|
4571
4478
|
forbidden: isForbiddenWord(root, word, options.forbidPrefix),
|
|
4572
4479
|
node: n,
|
|
4573
4480
|
caseMatched: true
|
|
4574
4481
|
};
|
|
4575
|
-
return result;
|
|
4576
4482
|
}
|
|
4577
4483
|
switch (compoundMode) {
|
|
4578
4484
|
case "none": return options.matchCase ? __findExact() : __findCompound();
|
|
@@ -4616,8 +4522,7 @@ function findCompoundNode(root, word, compoundCharacter, ignoreCasePrefix) {
|
|
|
4616
4522
|
while (true) {
|
|
4617
4523
|
const s = stack[i];
|
|
4618
4524
|
const h = w[i++];
|
|
4619
|
-
const
|
|
4620
|
-
const c = n?.c?.[h];
|
|
4525
|
+
const c = (s.cr || s.n)?.c?.[h];
|
|
4621
4526
|
if (c && i < word.length) {
|
|
4622
4527
|
caseMatched = s.caseMatched;
|
|
4623
4528
|
stack[i] = {
|
|
@@ -4646,15 +4551,13 @@ function findCompoundNode(root, word, compoundCharacter, ignoreCasePrefix) {
|
|
|
4646
4551
|
break;
|
|
4647
4552
|
}
|
|
4648
4553
|
}
|
|
4649
|
-
|
|
4650
|
-
|
|
4651
|
-
found,
|
|
4554
|
+
return {
|
|
4555
|
+
found: i && i === word.length && word || false,
|
|
4652
4556
|
compoundUsed,
|
|
4653
4557
|
node,
|
|
4654
4558
|
forbidden: void 0,
|
|
4655
4559
|
caseMatched
|
|
4656
4560
|
};
|
|
4657
|
-
return result;
|
|
4658
4561
|
}
|
|
4659
4562
|
function findCompoundWord(root, word, compoundCharacter, ignoreCasePrefix) {
|
|
4660
4563
|
const { found, compoundUsed, node, caseMatched } = findCompoundNode(root, word, compoundCharacter, ignoreCasePrefix);
|
|
@@ -4709,8 +4612,7 @@ function findLegacyCompoundNode(roots, word, minCompoundLength) {
|
|
|
4709
4612
|
while (true) {
|
|
4710
4613
|
const s = stack[i];
|
|
4711
4614
|
const h = w[i++];
|
|
4712
|
-
const
|
|
4713
|
-
const c = n?.c?.[h];
|
|
4615
|
+
const c = (s.cr || s.n)?.c?.[h];
|
|
4714
4616
|
if (c && i < wLen) stack[i] = {
|
|
4715
4617
|
n: c,
|
|
4716
4618
|
usedRoots: 0,
|
|
@@ -4750,15 +4652,13 @@ function findLegacyCompoundNode(roots, word, minCompoundLength) {
|
|
|
4750
4652
|
}
|
|
4751
4653
|
return letters.join("");
|
|
4752
4654
|
}
|
|
4753
|
-
|
|
4754
|
-
|
|
4755
|
-
found,
|
|
4655
|
+
return {
|
|
4656
|
+
found: extractWord(),
|
|
4756
4657
|
compoundUsed,
|
|
4757
4658
|
node,
|
|
4758
4659
|
forbidden: void 0,
|
|
4759
4660
|
caseMatched
|
|
4760
4661
|
};
|
|
4761
|
-
return result;
|
|
4762
4662
|
}
|
|
4763
4663
|
function isForbiddenWord(root, word, forbiddenPrefix) {
|
|
4764
4664
|
return findWordExact(root?.c?.[forbiddenPrefix], word);
|
|
@@ -4807,12 +4707,10 @@ var TrieNodeTrie = class TrieNodeTrie {
|
|
|
4807
4707
|
return this._size ??= countNodes(this.root);
|
|
4808
4708
|
}
|
|
4809
4709
|
static createFromWords(words, options) {
|
|
4810
|
-
|
|
4811
|
-
return new TrieNodeTrie(root);
|
|
4710
|
+
return new TrieNodeTrie(createTrieRootFromList(words, options));
|
|
4812
4711
|
}
|
|
4813
4712
|
static createFromWordsAndConsolidate(words, options) {
|
|
4814
|
-
|
|
4815
|
-
return new TrieNodeTrie(consolidate(root));
|
|
4713
|
+
return new TrieNodeTrie(consolidate(createTrieRootFromList(words, options)));
|
|
4816
4714
|
}
|
|
4817
4715
|
};
|
|
4818
4716
|
|
|
@@ -4875,11 +4773,10 @@ function convertToTrieRefNodes(root) {
|
|
|
4875
4773
|
function convert(n) {
|
|
4876
4774
|
const { f, c } = n;
|
|
4877
4775
|
const r = c ? Object.entries(c).sort((a, b) => a[0] < b[0] ? -1 : 1).map(([s, n$1]) => [s, cached.get(n$1)]) : void 0;
|
|
4878
|
-
|
|
4776
|
+
return r ? f ? {
|
|
4879
4777
|
f,
|
|
4880
4778
|
r
|
|
4881
4779
|
} : { r } : { f };
|
|
4882
|
-
return rn;
|
|
4883
4780
|
}
|
|
4884
4781
|
function* walk$3(root$1) {
|
|
4885
4782
|
cached.set(eow, count++);
|
|
@@ -4919,7 +4816,7 @@ function trieToExportString(node, base) {
|
|
|
4919
4816
|
return genSequence(walk$3(node));
|
|
4920
4817
|
}
|
|
4921
4818
|
function generateHeader$3(base, comment) {
|
|
4922
|
-
|
|
4819
|
+
return genSequence([
|
|
4923
4820
|
...[
|
|
4924
4821
|
"#!/usr/bin/env cspell-trie reader",
|
|
4925
4822
|
"TrieXv1",
|
|
@@ -4927,8 +4824,7 @@ function generateHeader$3(base, comment) {
|
|
|
4927
4824
|
],
|
|
4928
4825
|
...comment ? comment.split("\n").map((a) => "# " + a) : [],
|
|
4929
4826
|
...["# Data:"]
|
|
4930
|
-
];
|
|
4931
|
-
return genSequence(header).map((a) => a + "\n");
|
|
4827
|
+
]).map((a) => a + "\n");
|
|
4932
4828
|
}
|
|
4933
4829
|
/**
|
|
4934
4830
|
* Serialize a TrieNode.
|
|
@@ -4941,8 +4837,7 @@ function serializeTrie$4(root, options = 16) {
|
|
|
4941
4837
|
const { base = 16, comment = "" } = options;
|
|
4942
4838
|
const radix = base > 36 ? 36 : base < 10 ? 10 : base;
|
|
4943
4839
|
const rows = toReferences(root).map((node) => {
|
|
4944
|
-
|
|
4945
|
-
return row;
|
|
4840
|
+
return [...trieToExportString(node, radix), "\n"].join("").replaceAll(regExTrailingComma, "$1");
|
|
4946
4841
|
});
|
|
4947
4842
|
return generateHeader$3(radix, comment).concat(rows);
|
|
4948
4843
|
}
|
|
@@ -4977,22 +4872,20 @@ function importTrie$5(linesX) {
|
|
|
4977
4872
|
const regUnescape = /[\\](.)/g;
|
|
4978
4873
|
const flagsWord = { f: FLAG_WORD };
|
|
4979
4874
|
function splitLine$1(line) {
|
|
4980
|
-
|
|
4981
|
-
return line.replaceAll(regNotEscapedCommas, pattern).split(regUnescapeCommas).map((a) => a.replaceAll(regUnescape, "$1"));
|
|
4875
|
+
return line.replaceAll(regNotEscapedCommas, "$1__COMMA__").split(regUnescapeCommas).map((a) => a.replaceAll(regUnescape, "$1"));
|
|
4982
4876
|
}
|
|
4983
4877
|
function decodeLine$1(line, nodes) {
|
|
4984
4878
|
const isWord = line[0] === EOW$3;
|
|
4985
4879
|
line = isWord ? line.slice(1) : line;
|
|
4986
4880
|
const flags = isWord ? flagsWord : {};
|
|
4987
4881
|
const children = splitLine$1(line).filter((a) => !!a).map((a) => [a[0], Number.parseInt(a.slice(1) || "0", radix)]).map(([k, i]) => [k, nodes[i]]);
|
|
4988
|
-
const cNode = children.length ? { c: Object.fromEntries(children) } : {};
|
|
4989
4882
|
return {
|
|
4990
|
-
...
|
|
4883
|
+
...children.length ? { c: Object.fromEntries(children) } : {},
|
|
4991
4884
|
...flags
|
|
4992
4885
|
};
|
|
4993
4886
|
}
|
|
4994
4887
|
readHeader(iter);
|
|
4995
|
-
|
|
4888
|
+
return trieNodeToRoot(genSequence([DATA$4]).concat(iter).map((a) => a.replace(/\r?\n/, "")).filter((a) => !!a).reduce((acc, line) => {
|
|
4996
4889
|
const { lines, nodes } = acc;
|
|
4997
4890
|
const root = decodeLine$1(line, nodes);
|
|
4998
4891
|
nodes[lines] = root;
|
|
@@ -5005,8 +4898,7 @@ function importTrie$5(linesX) {
|
|
|
5005
4898
|
lines: 0,
|
|
5006
4899
|
nodes: [],
|
|
5007
4900
|
root: {}
|
|
5008
|
-
});
|
|
5009
|
-
return trieNodeToRoot(n.root, { isCaseAware: false });
|
|
4901
|
+
}).root, { isCaseAware: false });
|
|
5010
4902
|
}
|
|
5011
4903
|
|
|
5012
4904
|
//#endregion
|
|
@@ -5072,15 +4964,14 @@ function toLine(node, base) {
|
|
|
5072
4964
|
return node.s + flags + refs;
|
|
5073
4965
|
}
|
|
5074
4966
|
function generateHeader$2(base, comment) {
|
|
5075
|
-
|
|
4967
|
+
return genSequence([
|
|
5076
4968
|
"#!/usr/bin/env cspell-trie reader",
|
|
5077
4969
|
"TrieXv2",
|
|
5078
4970
|
"base=" + base,
|
|
5079
4971
|
...comment ? comment.split("\n").map((a) => "# " + a) : [],
|
|
5080
4972
|
"# Data:",
|
|
5081
4973
|
DATA$3
|
|
5082
|
-
];
|
|
5083
|
-
return genSequence(header);
|
|
4974
|
+
]);
|
|
5084
4975
|
}
|
|
5085
4976
|
/**
|
|
5086
4977
|
* Serialize a TrieNode.
|
|
@@ -5092,11 +4983,10 @@ function serializeTrie$3(root, options = 16) {
|
|
|
5092
4983
|
options = typeof options === "number" ? { base: options } : options;
|
|
5093
4984
|
const { base = 16, comment = "" } = options;
|
|
5094
4985
|
const radix = base > 36 ? 36 : base < 10 ? 10 : base;
|
|
5095
|
-
const
|
|
4986
|
+
const rows = flattenToReferences({
|
|
5096
4987
|
...root,
|
|
5097
4988
|
s: "^"
|
|
5098
|
-
};
|
|
5099
|
-
const rows = flattenToReferences(rootRef).map((n) => toLine(n, base));
|
|
4989
|
+
}).map((n) => toLine(n, base));
|
|
5100
4990
|
return generateHeader$2(radix, comment).concat(rows).map((a) => a + "\n");
|
|
5101
4991
|
}
|
|
5102
4992
|
function* toIterableIterator(iter) {
|
|
@@ -5139,16 +5029,15 @@ function importTrie$4(linesX) {
|
|
|
5139
5029
|
function decodeLine$1(line, nodes) {
|
|
5140
5030
|
const { letter, isWord, refs } = parseLine(line, radix);
|
|
5141
5031
|
const flags = isWord ? flagsWord : {};
|
|
5142
|
-
const children = refs.map((r) => nodes[r]).sort((a, b) => a.s < b.s ? -1 : 1).map((n
|
|
5143
|
-
const cNode = children.length ? { c: Object.fromEntries(children) } : {};
|
|
5032
|
+
const children = refs.map((r) => nodes[r]).sort((a, b) => a.s < b.s ? -1 : 1).map((n) => [n.s, n]);
|
|
5144
5033
|
return {
|
|
5145
5034
|
s: letter,
|
|
5146
|
-
...
|
|
5035
|
+
...children.length ? { c: Object.fromEntries(children) } : {},
|
|
5147
5036
|
...flags
|
|
5148
5037
|
};
|
|
5149
5038
|
}
|
|
5150
5039
|
readHeader(iter);
|
|
5151
|
-
|
|
5040
|
+
return trieNodeToRoot(genSequence(iter).map((a) => a.replace(/\r?\n/, "")).filter((a) => !!a).reduce((acc, line) => {
|
|
5152
5041
|
const { nodes } = acc;
|
|
5153
5042
|
const root = decodeLine$1(line, nodes);
|
|
5154
5043
|
nodes.push(root);
|
|
@@ -5162,8 +5051,7 @@ function importTrie$4(linesX) {
|
|
|
5162
5051
|
s: "",
|
|
5163
5052
|
c: Object.create(null)
|
|
5164
5053
|
}
|
|
5165
|
-
});
|
|
5166
|
-
return trieNodeToRoot(n.root, { isCaseAware: false });
|
|
5054
|
+
}).root, { isCaseAware: false });
|
|
5167
5055
|
}
|
|
5168
5056
|
|
|
5169
5057
|
//#endregion
|
|
@@ -5237,12 +5125,11 @@ const specialPrefix$1 = stringToCharSet$2("~!");
|
|
|
5237
5125
|
const WORDS_PER_LINE$1 = 20;
|
|
5238
5126
|
const DATA$2 = "__DATA__";
|
|
5239
5127
|
function generateHeader$1(base, comment) {
|
|
5240
|
-
const comments = comment.split("\n").map((a) => "# " + a.trimEnd()).join("\n");
|
|
5241
5128
|
return `\
|
|
5242
5129
|
#!/usr/bin/env cspell-trie reader
|
|
5243
5130
|
TrieXv4
|
|
5244
5131
|
base=${base}
|
|
5245
|
-
${
|
|
5132
|
+
${comment.split("\n").map((a) => "# " + a.trimEnd()).join("\n")}
|
|
5246
5133
|
# Data:
|
|
5247
5134
|
${DATA$2}
|
|
5248
5135
|
`;
|
|
@@ -5287,27 +5174,23 @@ function serializeTrie$2(root, options = 16) {
|
|
|
5287
5174
|
}
|
|
5288
5175
|
function* emit(s) {
|
|
5289
5176
|
switch (s) {
|
|
5290
|
-
case EOW:
|
|
5177
|
+
case EOW:
|
|
5291
5178
|
yield* flush();
|
|
5292
5179
|
backBuffer.last = EOW;
|
|
5293
5180
|
backBuffer.count = 0;
|
|
5294
5181
|
backBuffer.words++;
|
|
5295
5182
|
break;
|
|
5296
|
-
|
|
5297
|
-
case BACK: {
|
|
5183
|
+
case BACK:
|
|
5298
5184
|
backBuffer.count++;
|
|
5299
5185
|
break;
|
|
5300
|
-
|
|
5301
|
-
case EOL: {
|
|
5186
|
+
case EOL:
|
|
5302
5187
|
backBuffer.eol = true;
|
|
5303
5188
|
break;
|
|
5304
|
-
|
|
5305
|
-
default: {
|
|
5189
|
+
default:
|
|
5306
5190
|
if (backBuffer.words >= WORDS_PER_LINE$1) backBuffer.eol = true;
|
|
5307
5191
|
yield* flush();
|
|
5308
5192
|
if (s.startsWith(REF) || s.startsWith(REF_REL)) backBuffer.words++;
|
|
5309
5193
|
yield s;
|
|
5310
|
-
}
|
|
5311
5194
|
}
|
|
5312
5195
|
}
|
|
5313
5196
|
const comment_begin = `${EOL}${INLINE_DATA_COMMENT_LINE}* `;
|
|
@@ -5320,10 +5203,7 @@ function serializeTrie$2(root, options = 16) {
|
|
|
5320
5203
|
return;
|
|
5321
5204
|
}
|
|
5322
5205
|
if (node.c) {
|
|
5323
|
-
if (depth > 0 && depth <= 2)
|
|
5324
|
-
const chars = wordChars.slice(0, depth).map(escape).join("");
|
|
5325
|
-
yield* emit(comment_begin + chars + comment_end);
|
|
5326
|
-
}
|
|
5206
|
+
if (depth > 0 && depth <= 2) yield* emit(comment_begin + wordChars.slice(0, depth).map(escape).join("") + comment_end);
|
|
5327
5207
|
cache.set(node, count++);
|
|
5328
5208
|
const c = Object.entries(node.c).sort((a, b) => a[0] < b[0] ? -1 : 1);
|
|
5329
5209
|
for (const [s, n] of c) {
|
|
@@ -5342,8 +5222,7 @@ function serializeTrie$2(root, options = 16) {
|
|
|
5342
5222
|
yield* flush();
|
|
5343
5223
|
}
|
|
5344
5224
|
const lines = [...bufferLines(serialize(root), 1e3, "")];
|
|
5345
|
-
const
|
|
5346
|
-
const reference = "[\n" + resolvedReferences.map((n) => n.toString(radix)).join(",").replaceAll(/.{110,130}[,]/g, "$&\n") + "\n]\n";
|
|
5225
|
+
const reference = "[\n" + refMap.refCounts.map(([node]) => cache.get(node) || 0).map((n) => n.toString(radix)).join(",").replaceAll(/.{110,130}[,]/g, "$&\n") + "\n]\n";
|
|
5347
5226
|
return pipe([generateHeader$1(radix, comment), reference], opAppend(lines));
|
|
5348
5227
|
}
|
|
5349
5228
|
function buildReferenceMap(root, base) {
|
|
@@ -5366,16 +5245,14 @@ function buildReferenceMap(root, base) {
|
|
|
5366
5245
|
const refCountAndNode = [...pipe(refCount, opFilter(([_, ref]) => ref.c >= 2))].sort((a, b) => b[1].c - a[1].c || a[1].n - b[1].n);
|
|
5367
5246
|
let adj = 0;
|
|
5368
5247
|
const baseLogScale = 1 / Math.log(base);
|
|
5369
|
-
|
|
5248
|
+
return { refCounts: refCountAndNode.filter(([_, ref], idx$1) => {
|
|
5370
5249
|
const i = idx$1 - adj;
|
|
5371
5250
|
const charsIdx = Math.ceil(Math.log(i) * baseLogScale);
|
|
5372
5251
|
const charsNode = Math.ceil(Math.log(ref.n) * baseLogScale);
|
|
5373
|
-
const
|
|
5374
|
-
const keep = savings > 0;
|
|
5252
|
+
const keep = ref.c * (charsNode - charsIdx) - charsIdx > 0;
|
|
5375
5253
|
adj += keep ? 0 : 1;
|
|
5376
5254
|
return keep;
|
|
5377
|
-
}).map(([n, ref]) => [n, ref.c]);
|
|
5378
|
-
return { refCounts: refs };
|
|
5255
|
+
}).map(([n, ref]) => [n, ref.c]) };
|
|
5379
5256
|
}
|
|
5380
5257
|
function importTrie$3(linesX) {
|
|
5381
5258
|
linesX = typeof linesX === "string" ? linesX.split(/^/m) : linesX;
|
|
@@ -5400,8 +5277,7 @@ function importTrie$3(linesX) {
|
|
|
5400
5277
|
parseHeaderRows(headerRows);
|
|
5401
5278
|
}
|
|
5402
5279
|
readHeader(iter);
|
|
5403
|
-
|
|
5404
|
-
return root;
|
|
5280
|
+
return parseStream$1(radix, iter);
|
|
5405
5281
|
}
|
|
5406
5282
|
const numbersSet = stringToCharSet$2("0123456789");
|
|
5407
5283
|
function parseStream$1(radix, iter) {
|
|
@@ -5488,8 +5364,7 @@ function parseStream$1(radix, iter) {
|
|
|
5488
5364
|
function parseCharacter(acc, s) {
|
|
5489
5365
|
const parser = void 0;
|
|
5490
5366
|
const { root: root$1, nodes, stack } = acc;
|
|
5491
|
-
const
|
|
5492
|
-
const node = top.node;
|
|
5367
|
+
const node = stack[stack.length - 1].node;
|
|
5493
5368
|
const c = node.c ?? Object.create(null);
|
|
5494
5369
|
const n = {
|
|
5495
5370
|
f: void 0,
|
|
@@ -5558,8 +5433,7 @@ function parseStream$1(radix, iter) {
|
|
|
5558
5433
|
[INLINE_DATA_COMMENT_LINE, parseComment]
|
|
5559
5434
|
]);
|
|
5560
5435
|
function parserMain(acc, s) {
|
|
5561
|
-
|
|
5562
|
-
return parser(acc, s);
|
|
5436
|
+
return (acc.parser ?? parsers[s] ?? parseCharacter)(acc, s);
|
|
5563
5437
|
}
|
|
5564
5438
|
const charsetSpaces = stringToCharSet$2(" \r\n ");
|
|
5565
5439
|
function parseReferenceIndex(acc, s) {
|
|
@@ -5696,15 +5570,15 @@ var TrieNodeBuilder = class {
|
|
|
5696
5570
|
assertIsValidChar(char);
|
|
5697
5571
|
if (currNode.k) {
|
|
5698
5572
|
const s$1 = stack[depth];
|
|
5699
|
-
const { k: _, c: c$
|
|
5573
|
+
const { k: _, c: c$1,...copy } = currNode;
|
|
5700
5574
|
currNode = s$1.n.c[s$1.c] = copy;
|
|
5701
|
-
if (c$
|
|
5575
|
+
if (c$1) currNode.c = Object.assign(Object.create(null), c$1);
|
|
5702
5576
|
nodes.push(currNode);
|
|
5703
5577
|
}
|
|
5704
|
-
const c
|
|
5705
|
-
currNode.c = c
|
|
5578
|
+
const c = currNode.c || Object.create(null);
|
|
5579
|
+
currNode.c = c;
|
|
5706
5580
|
const n = currNode;
|
|
5707
|
-
const next = c
|
|
5581
|
+
const next = c[char] = c[char] || {};
|
|
5708
5582
|
nodes.push(next);
|
|
5709
5583
|
++depth;
|
|
5710
5584
|
const s = stack[depth];
|
|
@@ -5736,13 +5610,12 @@ var TrieNodeBuilder = class {
|
|
|
5736
5610
|
depth -= num;
|
|
5737
5611
|
currNode = stack[depth + 1].n;
|
|
5738
5612
|
};
|
|
5739
|
-
|
|
5613
|
+
return {
|
|
5740
5614
|
insertChar,
|
|
5741
5615
|
markEOW,
|
|
5742
5616
|
reference,
|
|
5743
5617
|
backStep
|
|
5744
5618
|
};
|
|
5745
|
-
return c;
|
|
5746
5619
|
}
|
|
5747
5620
|
sortChildren(node) {
|
|
5748
5621
|
const entries = Object.entries(node.c).sort((a, b) => compare(a[0], b[0]));
|
|
@@ -5764,8 +5637,7 @@ const specialCharacterMap$1 = new Map([
|
|
|
5764
5637
|
const characterMap = new Map([...specialCharacterMap$1].map((a) => [a[1], a[0]]));
|
|
5765
5638
|
const DATA$1 = "__DATA__";
|
|
5766
5639
|
function importTrieV3AsTrieRoot(srcLines) {
|
|
5767
|
-
|
|
5768
|
-
return importTrieV3WithBuilder(builder, srcLines);
|
|
5640
|
+
return importTrieV3WithBuilder(new TrieNodeBuilder(), srcLines);
|
|
5769
5641
|
}
|
|
5770
5642
|
function importTrieV3WithBuilder(builder, srcLines) {
|
|
5771
5643
|
const timer = getGlobalPerfTimer();
|
|
@@ -5781,10 +5653,7 @@ function importTrieV3WithBuilder(builder, srcLines) {
|
|
|
5781
5653
|
radix = Number.parseInt(header.replace(headerReg$2, "$1"), 10);
|
|
5782
5654
|
}
|
|
5783
5655
|
function findStartOfData(data) {
|
|
5784
|
-
for (let i = 0; i < data.length; ++i)
|
|
5785
|
-
const line = data[i];
|
|
5786
|
-
if (line.includes(DATA$1)) return i;
|
|
5787
|
-
}
|
|
5656
|
+
for (let i = 0; i < data.length; ++i) if (data[i].includes(DATA$1)) return i;
|
|
5788
5657
|
return -1;
|
|
5789
5658
|
}
|
|
5790
5659
|
function readHeader(data) {
|
|
@@ -5800,9 +5669,8 @@ function importTrieV3WithBuilder(builder, srcLines) {
|
|
|
5800
5669
|
const startOfData = findStartOfData(dataLines);
|
|
5801
5670
|
if (startOfData < 0) throw new Error("Unknown file format");
|
|
5802
5671
|
readHeader(dataLines.slice(0, startOfData));
|
|
5803
|
-
const cursor = builder.getCursor();
|
|
5804
5672
|
let node = {
|
|
5805
|
-
cursor,
|
|
5673
|
+
cursor: builder.getCursor(),
|
|
5806
5674
|
parser: void 0
|
|
5807
5675
|
};
|
|
5808
5676
|
const parser = parseStream(radix);
|
|
@@ -5884,8 +5752,7 @@ function parseStream(radix) {
|
|
|
5884
5752
|
[LF, parseIgnore]
|
|
5885
5753
|
]);
|
|
5886
5754
|
function parserMain(acc, s) {
|
|
5887
|
-
|
|
5888
|
-
return parser(acc, s);
|
|
5755
|
+
return (acc.parser ?? parsers.get(s) ?? parseCharacter)(acc, s);
|
|
5889
5756
|
}
|
|
5890
5757
|
return parserMain;
|
|
5891
5758
|
}
|
|
@@ -5936,8 +5803,7 @@ function importTrie$2(input) {
|
|
|
5936
5803
|
}
|
|
5937
5804
|
return headerRows;
|
|
5938
5805
|
}
|
|
5939
|
-
const
|
|
5940
|
-
const version$1 = parseHeaderRows(headerLines);
|
|
5806
|
+
const version$1 = parseHeaderRows(readHeader(lines));
|
|
5941
5807
|
const method = deserializers$1[version$1];
|
|
5942
5808
|
if (!method) throw new Error(`Unsupported version: ${version$1}`);
|
|
5943
5809
|
return method(lines);
|
|
@@ -5946,8 +5812,7 @@ function importTrie$2(input) {
|
|
|
5946
5812
|
//#endregion
|
|
5947
5813
|
//#region src/lib/decodeTrie.ts
|
|
5948
5814
|
function decodeTrie(raw) {
|
|
5949
|
-
|
|
5950
|
-
return new ITrieImpl(data);
|
|
5815
|
+
return new ITrieImpl(decodeTrieData(raw));
|
|
5951
5816
|
}
|
|
5952
5817
|
|
|
5953
5818
|
//#endregion
|
|
@@ -5972,15 +5837,14 @@ const specialPrefix = stringToCharSet("~!");
|
|
|
5972
5837
|
const WORDS_PER_LINE = 20;
|
|
5973
5838
|
const DATA = "__DATA__";
|
|
5974
5839
|
function generateHeader(base, comment) {
|
|
5975
|
-
|
|
5840
|
+
return [
|
|
5976
5841
|
"#!/usr/bin/env cspell-trie reader",
|
|
5977
5842
|
"TrieXv3",
|
|
5978
5843
|
"base=" + base,
|
|
5979
5844
|
...comment ? comment.split("\n").map((a) => "# " + a) : [],
|
|
5980
5845
|
"# Data:",
|
|
5981
5846
|
DATA
|
|
5982
|
-
];
|
|
5983
|
-
return header.map((a) => a + "\n");
|
|
5847
|
+
].map((a) => a + "\n");
|
|
5984
5848
|
}
|
|
5985
5849
|
/**
|
|
5986
5850
|
* Serialize a TrieRoot.
|
|
@@ -6024,27 +5888,23 @@ function serializeTrie$1(root, options = 16) {
|
|
|
6024
5888
|
}
|
|
6025
5889
|
function* emit(s) {
|
|
6026
5890
|
switch (s) {
|
|
6027
|
-
case EOW:
|
|
5891
|
+
case EOW:
|
|
6028
5892
|
yield* flush();
|
|
6029
5893
|
backBuffer.last = EOW;
|
|
6030
5894
|
backBuffer.count = 0;
|
|
6031
5895
|
backBuffer.words++;
|
|
6032
5896
|
break;
|
|
6033
|
-
|
|
6034
|
-
case BACK: {
|
|
5897
|
+
case BACK:
|
|
6035
5898
|
backBuffer.count++;
|
|
6036
5899
|
break;
|
|
6037
|
-
|
|
6038
|
-
case EOL: {
|
|
5900
|
+
case EOL:
|
|
6039
5901
|
backBuffer.eol = true;
|
|
6040
5902
|
break;
|
|
6041
|
-
|
|
6042
|
-
default: {
|
|
5903
|
+
default:
|
|
6043
5904
|
if (backBuffer.words >= WORDS_PER_LINE) backBuffer.eol = true;
|
|
6044
5905
|
yield* flush();
|
|
6045
5906
|
if (s.startsWith(REF)) backBuffer.words++;
|
|
6046
5907
|
yield s;
|
|
6047
|
-
}
|
|
6048
5908
|
}
|
|
6049
5909
|
}
|
|
6050
5910
|
function* walk$3(node, depth) {
|
|
@@ -6089,8 +5949,7 @@ function serializeTrie$1(root, options = 16) {
|
|
|
6089
5949
|
return pipe(generateHeader(radix, comment), opAppend(bufferLines(serialize(root), 1200, "")));
|
|
6090
5950
|
}
|
|
6091
5951
|
function importTrie$1(srcLines) {
|
|
6092
|
-
|
|
6093
|
-
return trie.root;
|
|
5952
|
+
return importTrieV3AsTrieRoot(srcLines).root;
|
|
6094
5953
|
}
|
|
6095
5954
|
function stringToCharSet(values) {
|
|
6096
5955
|
const set = Object.create(null);
|
|
@@ -6147,8 +6006,7 @@ function importTrie(input) {
|
|
|
6147
6006
|
}
|
|
6148
6007
|
return headerRows;
|
|
6149
6008
|
}
|
|
6150
|
-
const
|
|
6151
|
-
const version$1 = parseHeaderRows(headerLines);
|
|
6009
|
+
const version$1 = parseHeaderRows(readHeader(lines));
|
|
6152
6010
|
const method = deserializers[version$1];
|
|
6153
6011
|
if (!method) throw new Error(`Unsupported version: ${version$1}`);
|
|
6154
6012
|
return method(lines);
|
|
@@ -8291,9 +8149,7 @@ function normalizeLocale(locale) {
|
|
|
8291
8149
|
if (regExTwoLetter.test(locale)) return locale.toLowerCase();
|
|
8292
8150
|
const m = locale.match(regExLocaleWithCountry);
|
|
8293
8151
|
if (!m) return locale;
|
|
8294
|
-
|
|
8295
|
-
const variant = m[2].toUpperCase();
|
|
8296
|
-
return `${lang}-${variant}`;
|
|
8152
|
+
return `${m[1].toLowerCase()}-${m[2].toUpperCase()}`;
|
|
8297
8153
|
}
|
|
8298
8154
|
function isStandardLocale(locale) {
|
|
8299
8155
|
return regExValidLocale.test(locale);
|
|
@@ -8357,39 +8213,31 @@ function mapEditCosts(costs = {}) {
|
|
|
8357
8213
|
* @param letters - letters to join
|
|
8358
8214
|
*/
|
|
8359
8215
|
function joinLetters(letters) {
|
|
8360
|
-
|
|
8361
|
-
return v.map((a) => a.length > 1 || !a.length ? `(${a})` : a).join("");
|
|
8216
|
+
return [...letters].map((a) => a.length > 1 || !a.length ? `(${a})` : a).join("");
|
|
8362
8217
|
}
|
|
8363
8218
|
|
|
8364
8219
|
//#endregion
|
|
8365
8220
|
//#region src/lib/mappers/mapToSuggestionCostDef.ts
|
|
8366
8221
|
function parseAlphabet(cs, locale, editCost) {
|
|
8367
8222
|
const { cost, penalty } = cs;
|
|
8368
|
-
const
|
|
8369
|
-
const charForms = [...pipe(characters, opMap((c) => caseForms(c, locale).sort()))];
|
|
8370
|
-
const alphabet = joinLetters([...pipe(charForms, opFlatten(), opMap((letter) => accentForms(letter)), opFlatten(), opUnique())].sort());
|
|
8371
|
-
const sugAlpha = clean$1({
|
|
8372
|
-
map: alphabet,
|
|
8373
|
-
replace: cost,
|
|
8374
|
-
insDel: cost,
|
|
8375
|
-
swap: cost,
|
|
8376
|
-
penalty
|
|
8377
|
-
});
|
|
8223
|
+
const alphabet = joinLetters([...pipe([...pipe(expandCharacterSet(cs.characters), opMap((c) => caseForms(c, locale).sort()))], opFlatten(), opMap((letter) => accentForms(letter)), opFlatten(), opUnique())].sort());
|
|
8378
8224
|
return [
|
|
8379
|
-
|
|
8225
|
+
clean$1({
|
|
8226
|
+
map: alphabet,
|
|
8227
|
+
replace: cost,
|
|
8228
|
+
insDel: cost,
|
|
8229
|
+
swap: cost,
|
|
8230
|
+
penalty
|
|
8231
|
+
}),
|
|
8380
8232
|
parseAlphabetCaps(cs.characters, locale, editCost),
|
|
8381
8233
|
...calcCostsForAccentedLetters(alphabet, locale, editCost)
|
|
8382
8234
|
];
|
|
8383
8235
|
}
|
|
8384
8236
|
function parseAlphabetCaps(alphabet, locale, editCost) {
|
|
8385
|
-
|
|
8386
|
-
|
|
8387
|
-
const caps = charForms.map((a) => joinLetters(a)).join("|");
|
|
8388
|
-
const sugCaps = {
|
|
8389
|
-
map: caps,
|
|
8237
|
+
return {
|
|
8238
|
+
map: [...pipe(expandCharacterSet(alphabet), opMap((c) => caseForms(c, locale).sort()))].map((a) => joinLetters(a)).join("|"),
|
|
8390
8239
|
replace: editCost.capsCosts
|
|
8391
8240
|
};
|
|
8392
|
-
return sugCaps;
|
|
8393
8241
|
}
|
|
8394
8242
|
function calcFirstCharacterReplaceDefs(alphabets, editCost) {
|
|
8395
8243
|
return alphabets.map((cs) => calcFirstCharacterReplace(cs, editCost));
|
|
@@ -8397,10 +8245,9 @@ function calcFirstCharacterReplaceDefs(alphabets, editCost) {
|
|
|
8397
8245
|
function calcFirstCharacterReplace(cs, editCost) {
|
|
8398
8246
|
const mapOfFirstLetters = [...pipe(expandCharacterSet(cs.characters), opUnique(), opMap((letter) => `(^${letter})`))].sort().join("") + "(^)";
|
|
8399
8247
|
const penalty = editCost.firstLetterPenalty;
|
|
8400
|
-
const cost = cs.cost - penalty;
|
|
8401
8248
|
return {
|
|
8402
8249
|
map: mapOfFirstLetters,
|
|
8403
|
-
replace: cost,
|
|
8250
|
+
replace: cs.cost - penalty,
|
|
8404
8251
|
penalty: penalty * 2
|
|
8405
8252
|
};
|
|
8406
8253
|
}
|
|
@@ -8417,8 +8264,7 @@ function parseAccents(cs, _editCost) {
|
|
|
8417
8264
|
}
|
|
8418
8265
|
function calcCostsForAccentedLetters(simpleMap, locale, costs) {
|
|
8419
8266
|
const charactersWithAccents = [...pipe(splitMap(simpleMap), opMap((char) => caseForms(char, locale)), opFlatten(), opMap((char) => [...accentForms(char)]), opFilter((forms$1) => forms$1.length > 1))];
|
|
8420
|
-
const
|
|
8421
|
-
const replaceAccentMap = [...characters].join("|");
|
|
8267
|
+
const replaceAccentMap = [...pipe(charactersWithAccents, opMap((forms$1) => new Set([...forms$1, ...forms$1.map((char) => stripAccents(char))])), opMap((forms$1) => [...forms$1].sort()), opFilter((forms$1) => forms$1.length > 1), opMap(joinLetters), opUnique())].join("|");
|
|
8422
8268
|
const cost = costs.accentCosts;
|
|
8423
8269
|
const costToReplaceAccent = !replaceAccentMap ? [] : [{
|
|
8424
8270
|
map: replaceAccentMap,
|
|
@@ -8478,20 +8324,16 @@ function hunspellInformationToSuggestionCostDef(hunInfo, locales) {
|
|
|
8478
8324
|
function parseAff(aff, costs$1) {
|
|
8479
8325
|
const regSupportedAff = /^(?:MAP|KEY|TRY|NO-TRY|ICONV|OCONV|REP)\s/;
|
|
8480
8326
|
const rejectAff = /^(?:MAP|KEY|TRY|ICONV|OCONV|REP)\s+\d+$/;
|
|
8481
|
-
|
|
8482
|
-
const defs = pipe(lines, opMap((line) => pipe(operations, opMap((fn) => fn(line, costs$1)), opMap(asArrayOf), opFlatten())), opFlatten(), opFilter(isDefined$1));
|
|
8483
|
-
return [...defs];
|
|
8327
|
+
return [...pipe(aff.split("\n").map((a) => a.replace(/#.*/, "")).map((a) => a.trim()).filter((a) => regSupportedAff.test(a)).filter((a) => !rejectAff.test(a)), opMap((line) => pipe(operations, opMap((fn) => fn(line, costs$1)), opMap(asArrayOf), opFlatten())), opFlatten(), opFilter(isDefined$1))];
|
|
8484
8328
|
}
|
|
8485
8329
|
return parseAff(hunInfo.aff, costs);
|
|
8486
8330
|
}
|
|
8487
8331
|
function calcCosts(costs = {}, locale) {
|
|
8488
8332
|
const useLocale = locale?.length ? locale.map((loc) => loc.locale) : void 0;
|
|
8489
|
-
|
|
8490
|
-
|
|
8491
|
-
...hunCosts,
|
|
8333
|
+
return {
|
|
8334
|
+
...mapHunspellCosts(costs),
|
|
8492
8335
|
locale: useLocale
|
|
8493
8336
|
};
|
|
8494
|
-
return c;
|
|
8495
8337
|
}
|
|
8496
8338
|
const regExpMap = /^(?:MAP)\s+(\S+)$/;
|
|
8497
8339
|
function affMap(line, costs) {
|
|
@@ -8510,8 +8352,7 @@ function affTry(line, costs) {
|
|
|
8510
8352
|
const m = line.match(regExpTry);
|
|
8511
8353
|
if (!m) return void 0;
|
|
8512
8354
|
const cost = costs.tryCharCost;
|
|
8513
|
-
const
|
|
8514
|
-
const characters = tryChars;
|
|
8355
|
+
const characters = m[1];
|
|
8515
8356
|
return parseAlphabet({
|
|
8516
8357
|
characters,
|
|
8517
8358
|
cost
|
|
@@ -8531,9 +8372,8 @@ const regExpNoTry = /^NO-TRY\s+(\S+)$/;
|
|
|
8531
8372
|
function affNoTry(line, costs) {
|
|
8532
8373
|
const m = line.match(regExpNoTry);
|
|
8533
8374
|
if (!m) return void 0;
|
|
8534
|
-
const map = m[1];
|
|
8535
8375
|
return {
|
|
8536
|
-
map,
|
|
8376
|
+
map: m[1],
|
|
8537
8377
|
insDel: Math.max(costs.nonAlphabetCosts - costs.tryCharCost, 0),
|
|
8538
8378
|
penalty: costs.nonAlphabetCosts + costs.tryCharCost
|
|
8539
8379
|
};
|
|
@@ -8596,9 +8436,7 @@ function affMapAccents(line, costs) {
|
|
|
8596
8436
|
}
|
|
8597
8437
|
function parseCaps(value, costs) {
|
|
8598
8438
|
const locale = costs.locale;
|
|
8599
|
-
const
|
|
8600
|
-
const withCases = letters.map((s) => caseForms(s, locale)).filter((forms$1) => forms$1.length > 1).map(joinLetters);
|
|
8601
|
-
const map = unique(withCases).join("|");
|
|
8439
|
+
const map = unique([...splitMap(value)].filter((a) => a !== "|").map((s) => caseForms(s, locale)).filter((forms$1) => forms$1.length > 1).map(joinLetters)).join("|");
|
|
8602
8440
|
const cost = costs.capsCosts;
|
|
8603
8441
|
if (!map) return void 0;
|
|
8604
8442
|
return {
|
|
@@ -8645,8 +8483,7 @@ function toCharSets(cs, defaultValue, cost, penalty) {
|
|
|
8645
8483
|
return cs;
|
|
8646
8484
|
}
|
|
8647
8485
|
function processAccents(accents, editCost) {
|
|
8648
|
-
|
|
8649
|
-
return cs.map((cs$1) => parseAccents(cs$1, editCost)).filter(isDefined$1);
|
|
8486
|
+
return toCharSets(accents, "̀-́", editCost.accentCosts).map((cs) => parseAccents(cs, editCost)).filter(isDefined$1);
|
|
8650
8487
|
}
|
|
8651
8488
|
function mapDictionaryInformationToAdjustment(dictInfo) {
|
|
8652
8489
|
if (!dictInfo.adjustments) return [];
|
|
@@ -8696,8 +8533,7 @@ function mapDictionaryInformationToWeightMap(dictInfo) {
|
|
|
8696
8533
|
//#endregion
|
|
8697
8534
|
//#region src/lib/suggestions/suggest.ts
|
|
8698
8535
|
const baseCost = opCosts.baseCost;
|
|
8699
|
-
const
|
|
8700
|
-
const postSwapCost = swapCost - baseCost;
|
|
8536
|
+
const postSwapCost = opCosts.swapCost - baseCost;
|
|
8701
8537
|
const insertSpaceCost = -1;
|
|
8702
8538
|
const mapSubCost = opCosts.visuallySimilar;
|
|
8703
8539
|
const maxCostScale = opCosts.wordLengthCostFactor;
|
|
@@ -8705,8 +8541,7 @@ const discourageInsertCost = baseCost;
|
|
|
8705
8541
|
const setOfSeparators = new Set([JOIN_SEPARATOR, WORD_SEPARATOR]);
|
|
8706
8542
|
function suggest(root, word, options = {}) {
|
|
8707
8543
|
const opts = createSuggestionOptions(options);
|
|
8708
|
-
const
|
|
8709
|
-
const collector = suggestionCollector(word, collectorOpts);
|
|
8544
|
+
const collector = suggestionCollector(word, clean(opts));
|
|
8710
8545
|
collector.collect(genSuggestions(root, word, {
|
|
8711
8546
|
...opts,
|
|
8712
8547
|
...collector.genSuggestionOptions
|
|
@@ -8716,7 +8551,6 @@ function suggest(root, word, options = {}) {
|
|
|
8716
8551
|
function* genSuggestions(root, word, options = {}) {
|
|
8717
8552
|
const roots = Array.isArray(root) ? root : [root];
|
|
8718
8553
|
for (const r of roots) yield* genCompoundableSuggestions(r, word, options);
|
|
8719
|
-
return void 0;
|
|
8720
8554
|
}
|
|
8721
8555
|
function* genCompoundableSuggestions(root, word, options = {}) {
|
|
8722
8556
|
const { compoundMethod = CompoundWordsMethod.NONE, changeLimit, ignoreCase } = createSuggestionOptions(options);
|
|
@@ -8737,14 +8571,12 @@ function* genCompoundableSuggestions(root, word, options = {}) {
|
|
|
8737
8571
|
let costLimit = bc * Math.min(word.length * maxCostScale, changeLimit);
|
|
8738
8572
|
function updateCostLimit(maxCost) {
|
|
8739
8573
|
switch (typeof maxCost) {
|
|
8740
|
-
case "number":
|
|
8574
|
+
case "number":
|
|
8741
8575
|
costLimit = maxCost;
|
|
8742
8576
|
break;
|
|
8743
|
-
|
|
8744
|
-
case "symbol": {
|
|
8577
|
+
case "symbol":
|
|
8745
8578
|
stopNow = true;
|
|
8746
8579
|
break;
|
|
8747
|
-
}
|
|
8748
8580
|
}
|
|
8749
8581
|
}
|
|
8750
8582
|
const a = 0;
|
|
@@ -8758,8 +8590,7 @@ function* genCompoundableSuggestions(root, word, options = {}) {
|
|
|
8758
8590
|
a,
|
|
8759
8591
|
b
|
|
8760
8592
|
};
|
|
8761
|
-
const
|
|
8762
|
-
const iWalk = hintedWalker(root, ignoreCase, hint, compoundMethod, options.compoundSeparator);
|
|
8593
|
+
const iWalk = hintedWalker(root, ignoreCase, word, compoundMethod, options.compoundSeparator);
|
|
8763
8594
|
let goDeeper = true;
|
|
8764
8595
|
for (let r = iWalk.next({ goDeeper }); !stopNow && !r.done; r = iWalk.next({ goDeeper })) {
|
|
8765
8596
|
const { text, node, depth } = r.value;
|
|
@@ -8777,22 +8608,16 @@ function* genCompoundableSuggestions(root, word, options = {}) {
|
|
|
8777
8608
|
goDeeper = false;
|
|
8778
8609
|
const { i: i$1, w: w$1, m } = ht;
|
|
8779
8610
|
if (i$1 >= history.length) continue;
|
|
8780
|
-
|
|
8781
|
-
if (r$1.word.slice(0, w$1.length) !== w$1) continue;
|
|
8611
|
+
if (history[i$1].word.slice(0, w$1.length) !== w$1) continue;
|
|
8782
8612
|
const dc = mxMin - m;
|
|
8783
8613
|
for (let p = i$1; p < history.length; ++p) {
|
|
8784
8614
|
const { word: word$1, cost: hCost } = history[p];
|
|
8785
|
-
|
|
8786
|
-
if (fix !== w$1) break;
|
|
8615
|
+
if (word$1.slice(0, w$1.length) !== w$1) break;
|
|
8787
8616
|
const cost$1 = hCost + dc;
|
|
8788
|
-
if (cost$1 <= costLimit) {
|
|
8789
|
-
|
|
8790
|
-
|
|
8791
|
-
|
|
8792
|
-
word: emit,
|
|
8793
|
-
cost: cost$1
|
|
8794
|
-
});
|
|
8795
|
-
}
|
|
8617
|
+
if (cost$1 <= costLimit) updateCostLimit(yield {
|
|
8618
|
+
word: text + word$1.slice(w$1.length),
|
|
8619
|
+
cost: cost$1
|
|
8620
|
+
});
|
|
8796
8621
|
}
|
|
8797
8622
|
continue;
|
|
8798
8623
|
} else historyTags.set(tag, {
|
|
@@ -8855,7 +8680,6 @@ function* genCompoundableSuggestions(root, word, options = {}) {
|
|
|
8855
8680
|
} else updateCostLimit(yield void 0);
|
|
8856
8681
|
goDeeper = min <= costLimit;
|
|
8857
8682
|
}
|
|
8858
|
-
return void 0;
|
|
8859
8683
|
}
|
|
8860
8684
|
|
|
8861
8685
|
//#endregion
|
|
@@ -8907,10 +8731,7 @@ var Trie = class Trie {
|
|
|
8907
8731
|
}
|
|
8908
8732
|
has(word, minLegacyCompoundLength) {
|
|
8909
8733
|
if (this.hasWord(word, false)) return true;
|
|
8910
|
-
if (minLegacyCompoundLength) {
|
|
8911
|
-
const f = this.findWord(word, { useLegacyWordCompounds: minLegacyCompoundLength });
|
|
8912
|
-
return !!f.found;
|
|
8913
|
-
}
|
|
8734
|
+
if (minLegacyCompoundLength) return !!this.findWord(word, { useLegacyWordCompounds: minLegacyCompoundLength }).found;
|
|
8914
8735
|
return false;
|
|
8915
8736
|
}
|
|
8916
8737
|
/**
|
|
@@ -9021,11 +8842,10 @@ var Trie = class Trie {
|
|
|
9021
8842
|
return new Trie(root, void 0);
|
|
9022
8843
|
}
|
|
9023
8844
|
createFindOptions(options = {}) {
|
|
9024
|
-
|
|
8845
|
+
return createFindOptions({
|
|
9025
8846
|
...this._findOptionsDefaults,
|
|
9026
8847
|
...options
|
|
9027
8848
|
});
|
|
9028
|
-
return findOptions;
|
|
9029
8849
|
}
|
|
9030
8850
|
lastCreateFindOptionsMatchCaseMap = /* @__PURE__ */ new Map();
|
|
9031
8851
|
createFindOptionsMatchCase(matchCase) {
|
|
@@ -9085,7 +8905,6 @@ var SecondChanceCache = class {
|
|
|
9085
8905
|
this.map0.set(key, v);
|
|
9086
8906
|
return v;
|
|
9087
8907
|
}
|
|
9088
|
-
return void 0;
|
|
9089
8908
|
}
|
|
9090
8909
|
toArray() {
|
|
9091
8910
|
return [...this.map1, ...this.map0];
|
|
@@ -9094,7 +8913,6 @@ var SecondChanceCache = class {
|
|
|
9094
8913
|
|
|
9095
8914
|
//#endregion
|
|
9096
8915
|
//#region src/lib/TrieBuilder.ts
|
|
9097
|
-
const SymbolFrozenNode = Symbol();
|
|
9098
8916
|
/**
|
|
9099
8917
|
* Builds an optimized Trie from a Iterable<string>. It attempts to reduce the size of the trie
|
|
9100
8918
|
* by finding common endings.
|
|
@@ -9110,8 +8928,7 @@ function buildTrie(words, trieOptions) {
|
|
|
9110
8928
|
* @param trieOptions options for the Trie
|
|
9111
8929
|
*/
|
|
9112
8930
|
function buildTrieFast(words, trieOptions) {
|
|
9113
|
-
|
|
9114
|
-
return new Trie(root, void 0);
|
|
8931
|
+
return new Trie(createTrieRootFromList(words, trieOptions), void 0);
|
|
9115
8932
|
}
|
|
9116
8933
|
const MAX_NUM_SIGS = 1e5;
|
|
9117
8934
|
const MAX_TRANSFORMS = 1e6;
|
|
@@ -9152,9 +8969,7 @@ var TrieBuilder = class {
|
|
|
9152
8969
|
const isWord = n.f ? "*" : "";
|
|
9153
8970
|
const entries = n.c ? Object.entries(n.c) : void 0;
|
|
9154
8971
|
const c = entries ? entries.map(([k, n$1]) => [k, this.cached.get(n$1)]) : void 0;
|
|
9155
|
-
|
|
9156
|
-
const sig = isWord + ref;
|
|
9157
|
-
return sig;
|
|
8972
|
+
return isWord + (c ? JSON.stringify(c) : "");
|
|
9158
8973
|
}
|
|
9159
8974
|
_canBeCached(n) {
|
|
9160
8975
|
if (!n.c) return true;
|
|
@@ -9257,8 +9072,7 @@ var TrieBuilder = class {
|
|
|
9257
9072
|
const chars = [...word];
|
|
9258
9073
|
let d = 1;
|
|
9259
9074
|
for (const s of chars) {
|
|
9260
|
-
|
|
9261
|
-
if (p?.s !== s) break;
|
|
9075
|
+
if (this.lastPath[d]?.s !== s) break;
|
|
9262
9076
|
d++;
|
|
9263
9077
|
}
|
|
9264
9078
|
if (chars.length < d) d = chars.length;
|
|
@@ -9317,20 +9131,15 @@ var TrieBuilder = class {
|
|
|
9317
9131
|
return stack.map((n) => this.debNodeInfo(n));
|
|
9318
9132
|
}
|
|
9319
9133
|
debNodeInfo(node) {
|
|
9320
|
-
const id = node.id ?? "?";
|
|
9321
|
-
const cid = this.cached.get(node) ?? "?";
|
|
9322
|
-
const f = node.f || 0;
|
|
9323
|
-
const c = node.c ? Object.fromEntries(Object.entries(node.c).map(([k, n]) => [k, {
|
|
9324
|
-
id: n.id,
|
|
9325
|
-
r: this.cached.get(n)
|
|
9326
|
-
}])) : void 0;
|
|
9327
|
-
const L = Object.isFrozen(node);
|
|
9328
9134
|
return {
|
|
9329
|
-
id,
|
|
9330
|
-
cid,
|
|
9331
|
-
f,
|
|
9332
|
-
c,
|
|
9333
|
-
|
|
9135
|
+
id: node.id ?? "?",
|
|
9136
|
+
cid: this.cached.get(node) ?? "?",
|
|
9137
|
+
f: node.f || 0,
|
|
9138
|
+
c: node.c ? Object.fromEntries(Object.entries(node.c).map(([k, n]) => [k, {
|
|
9139
|
+
id: n.id,
|
|
9140
|
+
r: this.cached.get(n)
|
|
9141
|
+
}])) : void 0,
|
|
9142
|
+
L: Object.isFrozen(node)
|
|
9334
9143
|
};
|
|
9335
9144
|
}
|
|
9336
9145
|
logDebug(methodName, contentOrFunction) {
|
|
@@ -9390,7 +9199,6 @@ const normalizeWordForCaseInsensitive = (text) => {
|
|
|
9390
9199
|
|
|
9391
9200
|
//#endregion
|
|
9392
9201
|
//#region src/lib/SimpleDictionaryParser.ts
|
|
9393
|
-
const RegExpSplit = /[\s,;]/g;
|
|
9394
9202
|
const _defaultOptions = {
|
|
9395
9203
|
commentCharacter: LINE_COMMENT,
|
|
9396
9204
|
optionalCompoundCharacter: OPTIONAL_COMPOUND_FIX,
|
|
@@ -9403,7 +9211,7 @@ const _defaultOptions = {
|
|
|
9403
9211
|
stripCaseAndAccentsOnForbidden: false,
|
|
9404
9212
|
split: false,
|
|
9405
9213
|
splitKeepBoth: false,
|
|
9406
|
-
splitSeparator:
|
|
9214
|
+
splitSeparator: /[\s,;]/g,
|
|
9407
9215
|
keepOptionalCompoundCharacter: false
|
|
9408
9216
|
};
|
|
9409
9217
|
const defaultParseDictionaryOptions = Object.freeze(_defaultOptions);
|
|
@@ -9431,22 +9239,18 @@ function createDictionaryLineParserMapper(options) {
|
|
|
9431
9239
|
if (idxDirective >= 0) {
|
|
9432
9240
|
const flags = line.slice(idxDirective).split(/[\s,;]/g).map((s) => s.trim()).filter((a) => !!a);
|
|
9433
9241
|
for (const flag of flags) switch (flag) {
|
|
9434
|
-
case "split":
|
|
9242
|
+
case "split":
|
|
9435
9243
|
split = true;
|
|
9436
9244
|
break;
|
|
9437
|
-
|
|
9438
|
-
case "no-split": {
|
|
9245
|
+
case "no-split":
|
|
9439
9246
|
split = false;
|
|
9440
9247
|
break;
|
|
9441
|
-
|
|
9442
|
-
case "no-generate-alternatives": {
|
|
9248
|
+
case "no-generate-alternatives":
|
|
9443
9249
|
stripCaseAndAccents = false;
|
|
9444
9250
|
break;
|
|
9445
|
-
|
|
9446
|
-
case "generate-alternatives": {
|
|
9251
|
+
case "generate-alternatives":
|
|
9447
9252
|
stripCaseAndAccents = true;
|
|
9448
9253
|
break;
|
|
9449
|
-
}
|
|
9450
9254
|
}
|
|
9451
9255
|
}
|
|
9452
9256
|
return line.slice(0, idx$1).trim();
|
|
@@ -9495,9 +9299,7 @@ function createDictionaryLineParserMapper(options) {
|
|
|
9495
9299
|
function* splitWords(lines) {
|
|
9496
9300
|
for (const line of lines) {
|
|
9497
9301
|
if (split) {
|
|
9498
|
-
|
|
9499
|
-
const words = splitLine(lineEscaped, splitSeparator);
|
|
9500
|
-
yield* words.map((escaped) => escaped.replaceAll("\\", ""));
|
|
9302
|
+
yield* splitLine(line.includes("\"") ? line.replaceAll(/".*?"/g, (quoted) => " " + quoted.replaceAll(/(\s)/g, "\\$1") + " ") : line, splitSeparator).map((escaped) => escaped.replaceAll("\\", ""));
|
|
9501
9303
|
if (!splitKeepBoth) continue;
|
|
9502
9304
|
}
|
|
9503
9305
|
yield line;
|
|
@@ -9507,8 +9309,7 @@ function createDictionaryLineParserMapper(options) {
|
|
|
9507
9309
|
for (const paragraph of paragraphs) yield* paragraph.split("\n");
|
|
9508
9310
|
}
|
|
9509
9311
|
const mapCompounds = keepOptionalCompoundCharacter ? [] : [opConcatMap(mapOptionalPrefix), opConcatMap(mapOptionalSuffix)];
|
|
9510
|
-
|
|
9511
|
-
return processLines;
|
|
9312
|
+
return opCombine(opFilter(isString), splitLines, opMap(removeComments), splitWords, opMap(trim), opFilter(filterEmptyLines), ...mapCompounds, opConcatMap(mapNormalize), opMap(removeDoublePrefix));
|
|
9512
9313
|
}
|
|
9513
9314
|
/**
|
|
9514
9315
|
* Normalizes a dictionary words based upon prefix / suffixes.
|