cspell-dictionary 8.10.2 → 8.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -41,7 +41,7 @@ export declare class SpellingDictionaryFromTrie implements SpellingDictionary {
|
|
|
41
41
|
* @returns SpellingDictionary
|
|
42
42
|
*/
|
|
43
43
|
export declare function createSpellingDictionaryFromTrieFile(data: string | Buffer, name: string, source: string, options: SpellingDictionaryOptions): SpellingDictionary;
|
|
44
|
-
declare function outerWordForms(word: string, mapWord: (word: string) => string[]):
|
|
44
|
+
declare function outerWordForms(word: string, mapWord: (word: string) => string[]): Iterable<string>;
|
|
45
45
|
export declare const __testing__: {
|
|
46
46
|
outerWordForms: typeof outerWordForms;
|
|
47
47
|
};
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { opConcatMap, pipe } from '@cspell/cspell-pipe/sync';
|
|
2
1
|
import { CompoundWordsMethod, decodeTrie, suggestionCollector } from 'cspell-trie-lib';
|
|
3
2
|
import { autoCache, createCache01 } from '../util/AutoCache.js';
|
|
4
3
|
import { clean } from '../util/clean.js';
|
|
@@ -71,7 +70,7 @@ export class SpellingDictionaryFromTrie {
|
|
|
71
70
|
}
|
|
72
71
|
_find = findCache((word, useCompounds, ignoreCase) => this.findAnyForm(word, useCompounds, ignoreCase));
|
|
73
72
|
findAnyForm(word, useCompounds, ignoreCase) {
|
|
74
|
-
const outerForms = outerWordForms(word, this.remapWord
|
|
73
|
+
const outerForms = outerWordForms(word, this.remapWord || ((word) => [this.mapWord(word)]));
|
|
75
74
|
for (const form of outerForms) {
|
|
76
75
|
const r = this._findAnyForm(form, useCompounds, ignoreCase);
|
|
77
76
|
if (r)
|
|
@@ -168,9 +167,28 @@ function findCache(fn, size = 2000) {
|
|
|
168
167
|
}
|
|
169
168
|
return find;
|
|
170
169
|
}
|
|
171
|
-
function outerWordForms(word, mapWord) {
|
|
172
|
-
|
|
173
|
-
|
|
170
|
+
function* outerWordForms(word, mapWord) {
|
|
171
|
+
// Only generate the needed forms.
|
|
172
|
+
const sent = new Set();
|
|
173
|
+
let w = word;
|
|
174
|
+
yield w;
|
|
175
|
+
sent.add(w);
|
|
176
|
+
w = word.normalize('NFC');
|
|
177
|
+
if (!sent.has(w))
|
|
178
|
+
yield w;
|
|
179
|
+
sent.add(w);
|
|
180
|
+
w = word.normalize('NFD');
|
|
181
|
+
if (!sent.has(w))
|
|
182
|
+
yield w;
|
|
183
|
+
sent.add(w);
|
|
184
|
+
for (const f of [...sent]) {
|
|
185
|
+
for (const m of mapWord(f)) {
|
|
186
|
+
if (!sent.has(m))
|
|
187
|
+
yield m;
|
|
188
|
+
sent.add(m);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
return;
|
|
174
192
|
}
|
|
175
193
|
export const __testing__ = { outerWordForms };
|
|
176
194
|
//# sourceMappingURL=SpellingDictionaryFromTrie.js.map
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import assert from 'node:assert';
|
|
2
|
+
import { buildITrieFromWords } from 'cspell-trie-lib';
|
|
3
|
+
import { loremIpsum } from 'lorem-ipsum';
|
|
4
|
+
import { suite } from 'perf-insight';
|
|
5
|
+
import { createSpellingDictionary } from '../SpellingDictionary/createSpellingDictionary.js';
|
|
6
|
+
import { createCollection } from '../SpellingDictionary/SpellingDictionaryCollection.js';
|
|
7
|
+
suite('dictionary has', async (test) => {
|
|
8
|
+
const words = genWords(10_000);
|
|
9
|
+
const words2 = genWords(1000);
|
|
10
|
+
const words3 = genWords(1000);
|
|
11
|
+
const iTrie = buildITrieFromWords(words);
|
|
12
|
+
const dict = createSpellingDictionary(words, 'test', import.meta.url);
|
|
13
|
+
const dict2 = createSpellingDictionary(words2, 'test2', import.meta.url);
|
|
14
|
+
const dict3 = createSpellingDictionary(words3, 'test3', import.meta.url);
|
|
15
|
+
const dictCol = createCollection([dict, dict2, dict3], 'test-collection');
|
|
16
|
+
test('dictionary has 100k words', () => {
|
|
17
|
+
checkWords(dict, words);
|
|
18
|
+
});
|
|
19
|
+
test('dictionary has 100k words (2nd time)', () => {
|
|
20
|
+
checkWords(dict, words);
|
|
21
|
+
});
|
|
22
|
+
test('collection has 100k words', () => {
|
|
23
|
+
checkWords(dictCol, words);
|
|
24
|
+
});
|
|
25
|
+
test('iTrie has 100k words', () => {
|
|
26
|
+
checkWords(iTrie, words);
|
|
27
|
+
});
|
|
28
|
+
test('iTrie.hasWord has 100k words', () => {
|
|
29
|
+
const dict = { has: (word) => iTrie.hasWord(word, true) };
|
|
30
|
+
checkWords(dict, words);
|
|
31
|
+
});
|
|
32
|
+
test('iTrie.data has 100k words', () => {
|
|
33
|
+
checkWords(iTrie.data, words);
|
|
34
|
+
});
|
|
35
|
+
});
|
|
36
|
+
suite('dictionary has Not', async (test) => {
|
|
37
|
+
const words = genWords(10_000);
|
|
38
|
+
const words2 = genWords(1000);
|
|
39
|
+
const words3 = genWords(1000);
|
|
40
|
+
const missingWords = words.map((w) => w + '-x-');
|
|
41
|
+
const iTrie = buildITrieFromWords(words);
|
|
42
|
+
const dict = createSpellingDictionary(words, 'test', import.meta.url);
|
|
43
|
+
const dict2 = createSpellingDictionary(words2, 'test2', import.meta.url);
|
|
44
|
+
const dict3 = createSpellingDictionary(words3, 'test3', import.meta.url);
|
|
45
|
+
const dictCol = createCollection([dict, dict2, dict3], 'test-collection');
|
|
46
|
+
test('dictionary has 100k words', () => {
|
|
47
|
+
checkWords(dict, missingWords, false);
|
|
48
|
+
});
|
|
49
|
+
test('dictionary has 100k words (2nd time)', () => {
|
|
50
|
+
checkWords(dict, missingWords, false);
|
|
51
|
+
});
|
|
52
|
+
test('collection has 100k words', () => {
|
|
53
|
+
checkWords(dictCol, missingWords, false);
|
|
54
|
+
});
|
|
55
|
+
test('iTrie has 100k words', () => {
|
|
56
|
+
checkWords(iTrie, missingWords, false);
|
|
57
|
+
});
|
|
58
|
+
test('iTrie.hasWord has 100k words', () => {
|
|
59
|
+
const dict = { has: (word) => iTrie.hasWord(word, true) };
|
|
60
|
+
checkWords(dict, missingWords, false);
|
|
61
|
+
});
|
|
62
|
+
test('iTrie.data has 100k words', () => {
|
|
63
|
+
checkWords(iTrie.data, missingWords, false);
|
|
64
|
+
});
|
|
65
|
+
});
|
|
66
|
+
function checkWords(dict, words, expected = true, totalChecks = 100_000) {
|
|
67
|
+
let has = true;
|
|
68
|
+
const len = words.length;
|
|
69
|
+
for (let i = 0; i < totalChecks; ++i) {
|
|
70
|
+
const word = words[i % len];
|
|
71
|
+
const r = expected === dict.has(word);
|
|
72
|
+
if (!r) {
|
|
73
|
+
console.log(`Word ${expected ? 'not found' : 'found'}: ${word}`);
|
|
74
|
+
}
|
|
75
|
+
has = r && has;
|
|
76
|
+
}
|
|
77
|
+
assert(has, 'All words should be found in the dictionary');
|
|
78
|
+
}
|
|
79
|
+
function genWords(count, includeForbidden = true) {
|
|
80
|
+
const setOfWords = new Set(loremIpsum({ count }).split(' '));
|
|
81
|
+
if (includeForbidden) {
|
|
82
|
+
setOfWords.add('!forbidden');
|
|
83
|
+
setOfWords.add('!bad-word');
|
|
84
|
+
setOfWords.add('!rejection');
|
|
85
|
+
}
|
|
86
|
+
while (setOfWords.size < count) {
|
|
87
|
+
const words = [...setOfWords];
|
|
88
|
+
for (const a of words) {
|
|
89
|
+
for (const b of words) {
|
|
90
|
+
if (a !== b) {
|
|
91
|
+
setOfWords.add(a + b);
|
|
92
|
+
}
|
|
93
|
+
if (setOfWords.size >= count) {
|
|
94
|
+
break;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
if (setOfWords.size >= count) {
|
|
98
|
+
break;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
return [...setOfWords];
|
|
103
|
+
}
|
|
104
|
+
//# sourceMappingURL=has.perf.js.map
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "cspell-dictionary",
|
|
3
|
-
"version": "8.
|
|
3
|
+
"version": "8.11.0",
|
|
4
4
|
"description": "A spelling dictionary library useful for checking words and getting suggestions.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"sideEffects": false,
|
|
@@ -22,12 +22,15 @@
|
|
|
22
22
|
],
|
|
23
23
|
"scripts": {
|
|
24
24
|
"clean": "shx rm -rf dist temp coverage \"*.tsbuildInfo\"",
|
|
25
|
-
"build": "tsc -
|
|
25
|
+
"build": "tsc -p .",
|
|
26
26
|
"clean-build": "pnpm run clean && pnpm run build",
|
|
27
27
|
"coverage": "vitest run --coverage",
|
|
28
28
|
"test:watch": "vitest",
|
|
29
29
|
"test": "vitest run",
|
|
30
|
-
"
|
|
30
|
+
"test:perf": "NODE_ENV=production insight --register ts-node/esm --file \"**/*.perf.{mts,ts}\" -t 500",
|
|
31
|
+
"test:perf:js": "NODE_ENV=production insight -t 500",
|
|
32
|
+
"test:perf:prof": "NODE_ENV=production node --cpu-prof ../../node_modules/perf-insight/bin.mjs",
|
|
33
|
+
"watch": "tsc -p . -w "
|
|
31
34
|
},
|
|
32
35
|
"repository": {
|
|
33
36
|
"type": "git",
|
|
@@ -47,11 +50,14 @@
|
|
|
47
50
|
"node": ">=18"
|
|
48
51
|
},
|
|
49
52
|
"dependencies": {
|
|
50
|
-
"@cspell/cspell-pipe": "8.
|
|
51
|
-
"@cspell/cspell-types": "8.
|
|
52
|
-
"cspell-trie-lib": "8.
|
|
53
|
+
"@cspell/cspell-pipe": "8.11.0",
|
|
54
|
+
"@cspell/cspell-types": "8.11.0",
|
|
55
|
+
"cspell-trie-lib": "8.11.0",
|
|
53
56
|
"fast-equals": "^5.0.1",
|
|
54
57
|
"gensequence": "^7.0.0"
|
|
55
58
|
},
|
|
56
|
-
"
|
|
59
|
+
"devDependencies": {
|
|
60
|
+
"lorem-ipsum": "^2.0.8"
|
|
61
|
+
},
|
|
62
|
+
"gitHead": "2b85b2b458b1117870a4f0aee18fb45ce991848d"
|
|
57
63
|
}
|