hunspell-reader 6.22.0 → 6.23.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/affToDicInfo.d.ts +4 -0
- package/dist/affToDicInfo.js +149 -0
- package/dist/app.js +6 -165
- package/dist/commandDictInfo.d.ts +3 -0
- package/dist/commandDictInfo.js +26 -0
- package/dist/commandWords.d.ts +3 -0
- package/dist/commandWords.js +176 -0
- package/dist/index.d.ts +1 -1
- package/dist/index.js +5 -17
- package/dist/textUtils.d.ts +18 -0
- package/dist/textUtils.js +84 -0
- package/package.json +9 -7
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.affToDicInfo = void 0;
|
|
4
|
+
const sync_1 = require("@cspell/cspell-pipe/sync");
|
|
5
|
+
const textUtils_1 = require("./textUtils");
|
|
6
|
+
function affToDicInfo(aff, locale) {
|
|
7
|
+
const alphabetInfo = extractAlphabet(aff, locale);
|
|
8
|
+
return {
|
|
9
|
+
...alphabetInfo,
|
|
10
|
+
...extractSuggestionEditCosts(aff, alphabetInfo),
|
|
11
|
+
locale,
|
|
12
|
+
alphabet: (0, textUtils_1.toRange)(alphabetInfo.alphabet, 5),
|
|
13
|
+
accents: (0, textUtils_1.toRange)([...alphabetInfo.accents].sort().join('')),
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
exports.affToDicInfo = affToDicInfo;
|
|
17
|
+
function extractAlphabet(aff, locale) {
|
|
18
|
+
const sources = [
|
|
19
|
+
aff.MAP,
|
|
20
|
+
aff.TRY,
|
|
21
|
+
aff.KEY,
|
|
22
|
+
aff.REP?.flatMap((rep) => [rep.match, rep.replaceWith]),
|
|
23
|
+
aff.ICONV?.flatMap((cov) => [cov.from, cov.to]),
|
|
24
|
+
aff.OCONV?.flatMap((cov) => [cov.from, cov.to]),
|
|
25
|
+
extractFxLetters(aff.PFX),
|
|
26
|
+
extractFxLetters(aff.SFX),
|
|
27
|
+
];
|
|
28
|
+
const setOfLetters = new Set(sources
|
|
29
|
+
.filter(isDefined)
|
|
30
|
+
.flatMap((a) => a)
|
|
31
|
+
.map((a) => a.normalize())
|
|
32
|
+
.flatMap((a) => [...a, ...a.toLocaleLowerCase(locale), ...a.toLocaleUpperCase(locale)])
|
|
33
|
+
.map((a) => a.trim())
|
|
34
|
+
.filter((a) => !!a));
|
|
35
|
+
const alphabet = [...setOfLetters].sort().join('').replace(/\P{L}/gu, '');
|
|
36
|
+
const accents = new Set(alphabet.normalize('NFD').replace(/\P{M}/gu, ''));
|
|
37
|
+
return { locale, alphabet, accents };
|
|
38
|
+
}
|
|
39
|
+
function isDefined(a) {
|
|
40
|
+
return a !== undefined;
|
|
41
|
+
}
|
|
42
|
+
function extractSuggestionEditCosts(aff, alphaInfo) {
|
|
43
|
+
const suggestionEditCosts = [];
|
|
44
|
+
suggestionEditCosts.push(...calcCapsAndAccentReplacements(alphaInfo), ...calcAffMapReplacements(aff), ...calcAffRepReplacements(aff));
|
|
45
|
+
return {
|
|
46
|
+
suggestionEditCosts,
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
function calcAffMapReplacements(aff) {
|
|
50
|
+
if (!aff.MAP)
|
|
51
|
+
return [];
|
|
52
|
+
const map = aff.MAP.sort().join('|');
|
|
53
|
+
return [{ map, replace: 1, description: 'Hunspell Aff Map' }];
|
|
54
|
+
}
|
|
55
|
+
function calcAffRepReplacements(aff) {
|
|
56
|
+
if (!aff.REP)
|
|
57
|
+
return [];
|
|
58
|
+
return createCostMaps(aff.REP.map((rep) => [rep.match, rep.replaceWith]), { map: '', replace: 75, description: 'Hunspell Replace Map' });
|
|
59
|
+
}
|
|
60
|
+
function calcCapsAndAccentReplacements(alphaInfo) {
|
|
61
|
+
const { locale, alphabet } = alphaInfo;
|
|
62
|
+
const letters = [...alphabet];
|
|
63
|
+
const capForms = letters.map((letter) => calcCapitalizationForms(letter, locale));
|
|
64
|
+
const accentForms = calcAccentForms(letters);
|
|
65
|
+
const mapCrossAccent = calcCrossAccentCapsMap(accentForms, locale);
|
|
66
|
+
return [
|
|
67
|
+
...createCostMaps(capForms, { map: '', replace: 1, description: 'Capitalization change.' }),
|
|
68
|
+
...createCostMaps(accentForms, { map: '', replace: 1, description: 'Replace Accents' }),
|
|
69
|
+
...createCostMaps(mapCrossAccent, { map: '', replace: 2, description: 'Capitalization and Accent change.' }),
|
|
70
|
+
];
|
|
71
|
+
}
|
|
72
|
+
function createCostMaps(formMaps, base) {
|
|
73
|
+
const forms = formMaps.map((forms) => joinCharMap(forms));
|
|
74
|
+
const mapValues = [...new Set(forms)].sort().filter((a) => !!a);
|
|
75
|
+
return [...groupsOfN(mapValues, 6)].map((mapValues) => ({ ...base, map: mapValues.join('|') }));
|
|
76
|
+
}
|
|
77
|
+
function calcCapitalizationForms(letter, locale) {
|
|
78
|
+
const forms = new Set();
|
|
79
|
+
forms.add(letter);
|
|
80
|
+
forms.add(letter.toUpperCase());
|
|
81
|
+
forms.add(letter.toLowerCase());
|
|
82
|
+
forms.add(letter.toLocaleUpperCase(locale));
|
|
83
|
+
forms.add(letter.toLocaleLowerCase(locale));
|
|
84
|
+
forms.add(letter.toLocaleUpperCase(locale).toLocaleLowerCase(locale));
|
|
85
|
+
forms.add(letter.toLocaleLowerCase(locale).toLocaleUpperCase(locale));
|
|
86
|
+
return forms;
|
|
87
|
+
}
|
|
88
|
+
function calcAccentForms(letters) {
|
|
89
|
+
const forms = new Map();
|
|
90
|
+
function getForm(letter) {
|
|
91
|
+
const f = forms.get(letter);
|
|
92
|
+
if (f)
|
|
93
|
+
return f;
|
|
94
|
+
const s = new Set();
|
|
95
|
+
forms.set(letter, s);
|
|
96
|
+
return s;
|
|
97
|
+
}
|
|
98
|
+
for (const letter of letters) {
|
|
99
|
+
const base = (0, textUtils_1.removeAccents)(letter);
|
|
100
|
+
const formCollection = getForm(base);
|
|
101
|
+
formCollection.add(base);
|
|
102
|
+
formCollection.add(letter);
|
|
103
|
+
// addAccents(base, accents, formCollection);
|
|
104
|
+
}
|
|
105
|
+
return [...forms.values()].filter((s) => s.size > 1);
|
|
106
|
+
}
|
|
107
|
+
function joinCharMap(values) {
|
|
108
|
+
return [...values]
|
|
109
|
+
.sort()
|
|
110
|
+
.map((a) => (a.length > 1 ? '(' + a + ')' : a))
|
|
111
|
+
.join('');
|
|
112
|
+
}
|
|
113
|
+
function calcCrossAccentCapsMap(accentForms, locale) {
|
|
114
|
+
function calc(form) {
|
|
115
|
+
return new Set((0, sync_1.pipe)(form, (0, sync_1.opConcatMap)((letter) => calcCapitalizationForms(letter, locale))));
|
|
116
|
+
}
|
|
117
|
+
const values = (0, sync_1.pipe)(accentForms, (0, sync_1.opMap)(calc));
|
|
118
|
+
return [...values];
|
|
119
|
+
}
|
|
120
|
+
// function addAccents(cleanLetter: string, accents: Iterable<string>, collection: Set<string>) {
|
|
121
|
+
// for (const accent of accents) {
|
|
122
|
+
// collection.add(applyAccent(cleanLetter, accent));
|
|
123
|
+
// }
|
|
124
|
+
// }
|
|
125
|
+
// function applyAccent(letter: string, accent: string): string {
|
|
126
|
+
// const withAccent = (letter + accent).normalize('NFC');
|
|
127
|
+
// return removeLooseAccents(withAccent);
|
|
128
|
+
// }
|
|
129
|
+
function extractFxLetters(fxm) {
|
|
130
|
+
if (!fxm)
|
|
131
|
+
return undefined;
|
|
132
|
+
const substations = (0, sync_1.pipe)(fxm.values(), (0, sync_1.opConcatMap)((f) => f.substitutionSets.values()), (0, sync_1.opConcatMap)((s) => s.substitutions));
|
|
133
|
+
const partials = (0, sync_1.pipe)(substations, (0, sync_1.opConcatMap)((sub) => [sub.remove, sub.attach]));
|
|
134
|
+
return [...partials];
|
|
135
|
+
}
|
|
136
|
+
function* groupsOfN(values, n) {
|
|
137
|
+
let buffer = [];
|
|
138
|
+
for (const item of values) {
|
|
139
|
+
buffer.push(item);
|
|
140
|
+
if (buffer.length >= n) {
|
|
141
|
+
yield buffer;
|
|
142
|
+
buffer = [];
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
if (buffer.length) {
|
|
146
|
+
yield buffer;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
//# sourceMappingURL=affToDicInfo.js.map
|
package/dist/app.js
CHANGED
|
@@ -1,173 +1,14 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
// cSpell:ignore findup
|
|
4
3
|
const commander_1 = require("commander");
|
|
5
|
-
const
|
|
6
|
-
const
|
|
7
|
-
const aff_1 = require("./aff");
|
|
8
|
-
const IterableHunspellReader_1 = require("./IterableHunspellReader");
|
|
9
|
-
const iterableToStream_1 = require("./iterableToStream");
|
|
10
|
-
const util_1 = require("./util");
|
|
11
|
-
const uniqueHistorySize = 500000;
|
|
4
|
+
const commandDictInfo_1 = require("./commandDictInfo");
|
|
5
|
+
const commandWords_1 = require("./commandWords");
|
|
12
6
|
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
13
7
|
const packageInfo = require('../package.json');
|
|
14
8
|
const version = packageInfo['version'];
|
|
15
|
-
let displayHelp = true;
|
|
16
|
-
let logStream = process.stderr;
|
|
17
9
|
commander_1.program.version(version);
|
|
18
|
-
commander_1.program
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
.option('-u, --unique', 'make sure the words are unique.')
|
|
23
|
-
.option('-l, --lower_case', 'output in lower case')
|
|
24
|
-
.option('-T, --no-transform', 'Do not apply the prefix and suffix transforms. Root words only.')
|
|
25
|
-
.option('-x, --infix', 'Return words with prefix / suffix breaks. ex: "un<do>ing"')
|
|
26
|
-
.option('-r, --rules', 'Append rules used to generate word.')
|
|
27
|
-
.option('-p, --progress', 'Show progress.')
|
|
28
|
-
.option('-m, --max_depth <limit>', 'Maximum depth to apply suffix rules.')
|
|
29
|
-
.option('-n, --number <limit>', 'Limit the number of words to output.')
|
|
30
|
-
.option('--forbidden', 'include forbidden words')
|
|
31
|
-
.option('--partial_compounds', 'include words that must be part of a compound word')
|
|
32
|
-
.option('--only_forbidden', 'includes only words that are forbidden')
|
|
33
|
-
.description('Output all the words in the <hunspell.dic> file.')
|
|
34
|
-
.action(action);
|
|
35
|
-
commander_1.program.parse(process.argv);
|
|
36
|
-
if (displayHelp) {
|
|
37
|
-
commander_1.program.help();
|
|
38
|
-
}
|
|
39
|
-
function notify(message, newLine = true) {
|
|
40
|
-
message = message + (newLine ? '\n' : '');
|
|
41
|
-
logStream.write(message, 'utf-8');
|
|
42
|
-
}
|
|
43
|
-
function yesNo(value) {
|
|
44
|
-
return value ? 'Yes' : 'No';
|
|
45
|
-
}
|
|
46
|
-
function affWordToInfix(aff) {
|
|
47
|
-
return { ...aff, word: aff.prefix + '<' + aff.base + '>' + aff.suffix };
|
|
48
|
-
}
|
|
49
|
-
function mapWord(map) {
|
|
50
|
-
return (aff) => ({ ...aff, word: map(aff.word) });
|
|
51
|
-
}
|
|
52
|
-
function appendRules(aff) {
|
|
53
|
-
return { ...aff, word: aff.word + '\t[' + aff.rulesApplied + ' ]\t' + '(' + aff.dic + ')' };
|
|
54
|
-
}
|
|
55
|
-
function writeSeqToFile(seq, outFile) {
|
|
56
|
-
return new Promise((resolve, reject) => {
|
|
57
|
-
let resolved = false;
|
|
58
|
-
const out = outFile ? (0, fs_1.createWriteStream)(outFile) : process.stdout;
|
|
59
|
-
const bufferedSeq = (0, gensequence_1.genSequence)((0, util_1.batch)(seq, 500)).map((batch) => batch.join(''));
|
|
60
|
-
const dataStream = (0, iterableToStream_1.iterableToStream)(bufferedSeq);
|
|
61
|
-
const fileStream = dataStream.pipe(out);
|
|
62
|
-
const endEvents = ['finish', 'close', 'end'];
|
|
63
|
-
function resolvePromise() {
|
|
64
|
-
if (!resolved) {
|
|
65
|
-
resolved = true;
|
|
66
|
-
resolve();
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
const endHandler = () => {
|
|
70
|
-
cleanupStreams();
|
|
71
|
-
setTimeout(resolvePromise, 10);
|
|
72
|
-
};
|
|
73
|
-
const errorHandler = (e) => {
|
|
74
|
-
cleanupStreams();
|
|
75
|
-
reject(e);
|
|
76
|
-
};
|
|
77
|
-
listenToStreams();
|
|
78
|
-
function listenToStreams() {
|
|
79
|
-
endEvents.forEach((event) => fileStream.addListener(event, endHandler));
|
|
80
|
-
fileStream.addListener('error', errorHandler);
|
|
81
|
-
dataStream.addListener('end', endHandler);
|
|
82
|
-
}
|
|
83
|
-
function cleanupStreams() {
|
|
84
|
-
endEvents.forEach((event) => fileStream.removeListener(event, endHandler));
|
|
85
|
-
fileStream.removeListener('error', errorHandler);
|
|
86
|
-
dataStream.removeListener('end', endHandler);
|
|
87
|
-
}
|
|
88
|
-
});
|
|
89
|
-
}
|
|
90
|
-
function action(hunspellDicFilename, options) {
|
|
91
|
-
return actionPrime(hunspellDicFilename, options).catch((reason) => {
|
|
92
|
-
if (reason.code === 'EPIPE') {
|
|
93
|
-
console.log(reason);
|
|
94
|
-
return;
|
|
95
|
-
}
|
|
96
|
-
console.error(reason);
|
|
97
|
-
});
|
|
98
|
-
}
|
|
99
|
-
async function actionPrime(hunspellDicFilename, options) {
|
|
100
|
-
displayHelp = false;
|
|
101
|
-
const { sort = false, unique = false, output: outputFile, lower_case: lowerCase = false, transform = true, infix = false, rules = false, progress: showProgress = false, max_depth, forbidden = false, only_forbidden: onlyForbidden = false, partial_compounds: partialCompoundsAllowed = false, } = options;
|
|
102
|
-
logStream = outputFile ? process.stdout : process.stderr;
|
|
103
|
-
const log = notify;
|
|
104
|
-
log('Write words');
|
|
105
|
-
log(`Sort: ${yesNo(sort)}`);
|
|
106
|
-
log(`Unique: ${yesNo(unique)}`);
|
|
107
|
-
const baseFile = hunspellDicFilename.replace(/\.(dic|aff)$/, '');
|
|
108
|
-
const dicFile = baseFile + '.dic';
|
|
109
|
-
const affFile = baseFile + '.aff';
|
|
110
|
-
log(`Dic file: ${dicFile}`);
|
|
111
|
-
log(`Aff file: ${affFile}`);
|
|
112
|
-
log(`Generating Words...`);
|
|
113
|
-
const reader = await IterableHunspellReader_1.IterableHunspellReader.createFromFiles(affFile, dicFile);
|
|
114
|
-
if (max_depth && Number.parseInt(max_depth) >= 0) {
|
|
115
|
-
reader.maxDepth = Number.parseInt(max_depth);
|
|
116
|
-
}
|
|
117
|
-
const transformers = [];
|
|
118
|
-
const filters = [];
|
|
119
|
-
if (!forbidden && !onlyForbidden)
|
|
120
|
-
filters.push((aff) => !aff.flags.isForbiddenWord);
|
|
121
|
-
if (onlyForbidden)
|
|
122
|
-
filters.push((aff) => !!aff.flags.isForbiddenWord);
|
|
123
|
-
if (!partialCompoundsAllowed)
|
|
124
|
-
filters.push((aff) => !aff.flags.isOnlyAllowedInCompound);
|
|
125
|
-
if (infix) {
|
|
126
|
-
transformers.push(affWordToInfix);
|
|
127
|
-
}
|
|
128
|
-
if (lowerCase) {
|
|
129
|
-
transformers.push(mapWord((a) => a.toLowerCase()));
|
|
130
|
-
}
|
|
131
|
-
if (rules) {
|
|
132
|
-
transformers.push(appendRules);
|
|
133
|
-
}
|
|
134
|
-
transformers.push(mapWord((a) => a.trim()));
|
|
135
|
-
const dicSize = reader.dic.length;
|
|
136
|
-
let current = 0;
|
|
137
|
-
const calcProgress = () => '\r' + current + ' / ' + dicSize;
|
|
138
|
-
const reportProgressRate = 253;
|
|
139
|
-
const callback = showProgress
|
|
140
|
-
? () => {
|
|
141
|
-
current++;
|
|
142
|
-
!(current % reportProgressRate) && process.stderr.write(calcProgress(), 'utf-8');
|
|
143
|
-
}
|
|
144
|
-
: () => {
|
|
145
|
-
/* void */
|
|
146
|
-
};
|
|
147
|
-
const seqWords = transform ? reader.seqAffWords(callback) : reader.seqRootWords().map(aff_1.asAffWord);
|
|
148
|
-
const filterUnique = unique ? (0, util_1.uniqueFilter)(uniqueHistorySize) : (_) => true;
|
|
149
|
-
const applyTransformers = (aff) => transformers.reduce((aff, fn) => fn(aff), aff);
|
|
150
|
-
const applyFilters = (aff) => filters.reduce((cur, fn) => cur && fn(aff), true);
|
|
151
|
-
const allWords = seqWords
|
|
152
|
-
.filter(applyFilters)
|
|
153
|
-
.map(applyTransformers)
|
|
154
|
-
.map((a) => a.word)
|
|
155
|
-
.filter((a) => !!a)
|
|
156
|
-
.filter(filterUnique)
|
|
157
|
-
.map((a) => a + '\n');
|
|
158
|
-
const words = options.number ? allWords.take(Number.parseInt(options.number)) : allWords;
|
|
159
|
-
if (sort) {
|
|
160
|
-
log('Sorting...');
|
|
161
|
-
const data = words.toArray().sort().join('');
|
|
162
|
-
const fd = outputFile ? (0, fs_1.openSync)(outputFile, 'w') : 1;
|
|
163
|
-
(0, fs_1.writeSync)(fd, data);
|
|
164
|
-
}
|
|
165
|
-
else {
|
|
166
|
-
await writeSeqToFile(words, outputFile);
|
|
167
|
-
}
|
|
168
|
-
if (showProgress) {
|
|
169
|
-
console.error(calcProgress());
|
|
170
|
-
}
|
|
171
|
-
log('Done.');
|
|
172
|
-
}
|
|
10
|
+
commander_1.program.addCommand((0, commandWords_1.getCommand)());
|
|
11
|
+
commander_1.program.addCommand((0, commandDictInfo_1.getCommand)());
|
|
12
|
+
commander_1.program.showHelpAfterError();
|
|
13
|
+
commander_1.program.parseAsync(process.argv);
|
|
173
14
|
//# sourceMappingURL=app.js.map
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.getCommand = void 0;
|
|
4
|
+
// cSpell:ignore findup
|
|
5
|
+
const commander_1 = require("commander");
|
|
6
|
+
const affReader_1 = require("./affReader");
|
|
7
|
+
const affToDicInfo_1 = require("./affToDicInfo");
|
|
8
|
+
const textUtils_1 = require("./textUtils");
|
|
9
|
+
function getCommand() {
|
|
10
|
+
const commander = new commander_1.Command('cspell-dict-info');
|
|
11
|
+
commander
|
|
12
|
+
.arguments('<hunspell_aff_file> <locale>')
|
|
13
|
+
.description('Display the CSpell Dictionary Information')
|
|
14
|
+
.action(action);
|
|
15
|
+
return commander;
|
|
16
|
+
}
|
|
17
|
+
exports.getCommand = getCommand;
|
|
18
|
+
async function action(hunspellFile, locale) {
|
|
19
|
+
const baseFile = hunspellFile.replace(/\.(dic|aff)$/, '');
|
|
20
|
+
const affFile = baseFile + '.aff';
|
|
21
|
+
const aff = await (0, affReader_1.parseAffFile)(affFile);
|
|
22
|
+
const info = (0, affToDicInfo_1.affToDicInfo)(aff, locale);
|
|
23
|
+
const rawJson = JSON.stringify(info, null, 2);
|
|
24
|
+
console.log((0, textUtils_1.escapeUnicodeCode)(rawJson));
|
|
25
|
+
}
|
|
26
|
+
//# sourceMappingURL=commandDictInfo.js.map
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.getCommand = void 0;
|
|
4
|
+
// cSpell:ignore findup
|
|
5
|
+
const commander_1 = require("commander");
|
|
6
|
+
const fs_1 = require("fs");
|
|
7
|
+
const gensequence_1 = require("gensequence");
|
|
8
|
+
const aff_1 = require("./aff");
|
|
9
|
+
const IterableHunspellReader_1 = require("./IterableHunspellReader");
|
|
10
|
+
const iterableToStream_1 = require("./iterableToStream");
|
|
11
|
+
const util_1 = require("./util");
|
|
12
|
+
const uniqueHistorySize = 500000;
|
|
13
|
+
let logStream = process.stderr;
|
|
14
|
+
function getCommand() {
|
|
15
|
+
const commander = new commander_1.Command('words');
|
|
16
|
+
commander
|
|
17
|
+
.arguments('<hunspell_dic_file>')
|
|
18
|
+
.option('-o, --output <file>', 'output file - defaults to stdout')
|
|
19
|
+
.option('-s, --sort', 'sort the list of words')
|
|
20
|
+
.option('-u, --unique', 'make sure the words are unique.')
|
|
21
|
+
.option('-l, --lower_case', 'output in lower case')
|
|
22
|
+
.option('-T, --no-transform', 'Do not apply the prefix and suffix transforms. Root words only.')
|
|
23
|
+
.option('-x, --infix', 'Return words with prefix / suffix breaks. ex: "un<do>ing"')
|
|
24
|
+
.option('-r, --rules', 'Append rules used to generate word.')
|
|
25
|
+
.option('-p, --progress', 'Show progress.')
|
|
26
|
+
.option('-m, --max_depth <limit>', 'Maximum depth to apply suffix rules.')
|
|
27
|
+
.option('-n, --number <limit>', 'Limit the number of words to output.')
|
|
28
|
+
.option('--forbidden', 'include forbidden words')
|
|
29
|
+
.option('--partial_compounds', 'include words that must be part of a compound word')
|
|
30
|
+
.option('--only_forbidden', 'includes only words that are forbidden')
|
|
31
|
+
.description('Output all the words in the <hunspell.dic> file.')
|
|
32
|
+
.action(action);
|
|
33
|
+
return commander;
|
|
34
|
+
}
|
|
35
|
+
exports.getCommand = getCommand;
|
|
36
|
+
function notify(message, newLine = true) {
|
|
37
|
+
message = message + (newLine ? '\n' : '');
|
|
38
|
+
logStream.write(message, 'utf-8');
|
|
39
|
+
}
|
|
40
|
+
function yesNo(value) {
|
|
41
|
+
return value ? 'Yes' : 'No';
|
|
42
|
+
}
|
|
43
|
+
function affWordToInfix(aff) {
|
|
44
|
+
return { ...aff, word: aff.prefix + '<' + aff.base + '>' + aff.suffix };
|
|
45
|
+
}
|
|
46
|
+
function mapWord(map) {
|
|
47
|
+
return (aff) => ({ ...aff, word: map(aff.word) });
|
|
48
|
+
}
|
|
49
|
+
function appendRules(aff) {
|
|
50
|
+
return { ...aff, word: aff.word + '\t[' + aff.rulesApplied + ' ]\t' + '(' + aff.dic + ')' };
|
|
51
|
+
}
|
|
52
|
+
function writeSeqToFile(seq, outFile) {
|
|
53
|
+
return new Promise((resolve, reject) => {
|
|
54
|
+
let resolved = false;
|
|
55
|
+
const out = outFile ? (0, fs_1.createWriteStream)(outFile) : process.stdout;
|
|
56
|
+
const bufferedSeq = (0, gensequence_1.genSequence)((0, util_1.batch)(seq, 500)).map((batch) => batch.join(''));
|
|
57
|
+
const dataStream = (0, iterableToStream_1.iterableToStream)(bufferedSeq);
|
|
58
|
+
const fileStream = dataStream.pipe(out);
|
|
59
|
+
const endEvents = ['finish', 'close', 'end'];
|
|
60
|
+
function resolvePromise() {
|
|
61
|
+
if (!resolved) {
|
|
62
|
+
resolved = true;
|
|
63
|
+
resolve();
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
const endHandler = () => {
|
|
67
|
+
cleanupStreams();
|
|
68
|
+
setTimeout(resolvePromise, 10);
|
|
69
|
+
};
|
|
70
|
+
const errorHandler = (e) => {
|
|
71
|
+
cleanupStreams();
|
|
72
|
+
reject(e);
|
|
73
|
+
};
|
|
74
|
+
listenToStreams();
|
|
75
|
+
function listenToStreams() {
|
|
76
|
+
endEvents.forEach((event) => fileStream.addListener(event, endHandler));
|
|
77
|
+
fileStream.addListener('error', errorHandler);
|
|
78
|
+
dataStream.addListener('end', endHandler);
|
|
79
|
+
}
|
|
80
|
+
function cleanupStreams() {
|
|
81
|
+
endEvents.forEach((event) => fileStream.removeListener(event, endHandler));
|
|
82
|
+
fileStream.removeListener('error', errorHandler);
|
|
83
|
+
dataStream.removeListener('end', endHandler);
|
|
84
|
+
}
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
async function action(hunspellDicFilename, options) {
|
|
88
|
+
try {
|
|
89
|
+
await actionPrime(hunspellDicFilename, options);
|
|
90
|
+
}
|
|
91
|
+
catch (err) {
|
|
92
|
+
const reason = asError(err);
|
|
93
|
+
if (reason?.code === 'EPIPE') {
|
|
94
|
+
console.log(reason);
|
|
95
|
+
return;
|
|
96
|
+
}
|
|
97
|
+
throw err;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
function asError(err) {
|
|
101
|
+
return err && typeof err === 'object' ? err : undefined;
|
|
102
|
+
}
|
|
103
|
+
async function actionPrime(hunspellDicFilename, options) {
|
|
104
|
+
const { sort = false, unique = false, output: outputFile, lower_case: lowerCase = false, transform = true, infix = false, rules = false, progress: showProgress = false, max_depth, forbidden = false, only_forbidden: onlyForbidden = false, partial_compounds: partialCompoundsAllowed = false, } = options;
|
|
105
|
+
logStream = outputFile ? process.stdout : process.stderr;
|
|
106
|
+
const log = notify;
|
|
107
|
+
log('Write words');
|
|
108
|
+
log(`Sort: ${yesNo(sort)}`);
|
|
109
|
+
log(`Unique: ${yesNo(unique)}`);
|
|
110
|
+
const baseFile = hunspellDicFilename.replace(/\.(dic|aff)$/, '');
|
|
111
|
+
const dicFile = baseFile + '.dic';
|
|
112
|
+
const affFile = baseFile + '.aff';
|
|
113
|
+
log(`Dic file: ${dicFile}`);
|
|
114
|
+
log(`Aff file: ${affFile}`);
|
|
115
|
+
log(`Generating Words...`);
|
|
116
|
+
const reader = await IterableHunspellReader_1.IterableHunspellReader.createFromFiles(affFile, dicFile);
|
|
117
|
+
if (max_depth && Number.parseInt(max_depth) >= 0) {
|
|
118
|
+
reader.maxDepth = Number.parseInt(max_depth);
|
|
119
|
+
}
|
|
120
|
+
const transformers = [];
|
|
121
|
+
const filters = [];
|
|
122
|
+
if (!forbidden && !onlyForbidden)
|
|
123
|
+
filters.push((aff) => !aff.flags.isForbiddenWord);
|
|
124
|
+
if (onlyForbidden)
|
|
125
|
+
filters.push((aff) => !!aff.flags.isForbiddenWord);
|
|
126
|
+
if (!partialCompoundsAllowed)
|
|
127
|
+
filters.push((aff) => !aff.flags.isOnlyAllowedInCompound);
|
|
128
|
+
if (infix) {
|
|
129
|
+
transformers.push(affWordToInfix);
|
|
130
|
+
}
|
|
131
|
+
if (lowerCase) {
|
|
132
|
+
transformers.push(mapWord((a) => a.toLowerCase()));
|
|
133
|
+
}
|
|
134
|
+
if (rules) {
|
|
135
|
+
transformers.push(appendRules);
|
|
136
|
+
}
|
|
137
|
+
transformers.push(mapWord((a) => a.trim()));
|
|
138
|
+
const dicSize = reader.dic.length;
|
|
139
|
+
let current = 0;
|
|
140
|
+
const calcProgress = () => '\r' + current + ' / ' + dicSize;
|
|
141
|
+
const reportProgressRate = 253;
|
|
142
|
+
const callback = showProgress
|
|
143
|
+
? () => {
|
|
144
|
+
current++;
|
|
145
|
+
!(current % reportProgressRate) && process.stderr.write(calcProgress(), 'utf-8');
|
|
146
|
+
}
|
|
147
|
+
: () => {
|
|
148
|
+
/* void */
|
|
149
|
+
};
|
|
150
|
+
const seqWords = transform ? reader.seqAffWords(callback) : reader.seqRootWords().map(aff_1.asAffWord);
|
|
151
|
+
const filterUnique = unique ? (0, util_1.uniqueFilter)(uniqueHistorySize) : (_) => true;
|
|
152
|
+
const applyTransformers = (aff) => transformers.reduce((aff, fn) => fn(aff), aff);
|
|
153
|
+
const applyFilters = (aff) => filters.reduce((cur, fn) => cur && fn(aff), true);
|
|
154
|
+
const allWords = seqWords
|
|
155
|
+
.filter(applyFilters)
|
|
156
|
+
.map(applyTransformers)
|
|
157
|
+
.map((a) => a.word)
|
|
158
|
+
.filter((a) => !!a)
|
|
159
|
+
.filter(filterUnique)
|
|
160
|
+
.map((a) => a + '\n');
|
|
161
|
+
const words = options.number ? allWords.take(Number.parseInt(options.number)) : allWords;
|
|
162
|
+
if (sort) {
|
|
163
|
+
log('Sorting...');
|
|
164
|
+
const data = words.toArray().sort().join('');
|
|
165
|
+
const fd = outputFile ? (0, fs_1.openSync)(outputFile, 'w') : 1;
|
|
166
|
+
(0, fs_1.writeSync)(fd, data);
|
|
167
|
+
}
|
|
168
|
+
else {
|
|
169
|
+
await writeSeqToFile(words, outputFile);
|
|
170
|
+
}
|
|
171
|
+
if (showProgress) {
|
|
172
|
+
console.error(calcProgress());
|
|
173
|
+
}
|
|
174
|
+
log('Done.');
|
|
175
|
+
}
|
|
176
|
+
//# sourceMappingURL=commandWords.js.map
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
export type { AffInfo, AffWord } from './affDef';
|
|
2
2
|
export { parseAff, parseAffFile as readAffFile } from './affReader';
|
|
3
|
-
export
|
|
3
|
+
export { createMatchingWordsFilter, type HunspellSrcData, IterableHunspellReader, type WordInfo, } from './IterableHunspellReader';
|
|
4
4
|
export { IterableHunspellReader as HunspellReader } from './IterableHunspellReader';
|
|
5
5
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.js
CHANGED
|
@@ -1,24 +1,12 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
-
if (k2 === undefined) k2 = k;
|
|
4
|
-
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
-
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
-
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
-
}
|
|
8
|
-
Object.defineProperty(o, k2, desc);
|
|
9
|
-
}) : (function(o, m, k, k2) {
|
|
10
|
-
if (k2 === undefined) k2 = k;
|
|
11
|
-
o[k2] = m[k];
|
|
12
|
-
}));
|
|
13
|
-
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
-
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
-
};
|
|
16
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
-
exports.HunspellReader = exports.readAffFile = exports.parseAff = void 0;
|
|
3
|
+
exports.HunspellReader = exports.IterableHunspellReader = exports.createMatchingWordsFilter = exports.readAffFile = exports.parseAff = void 0;
|
|
18
4
|
var affReader_1 = require("./affReader");
|
|
19
5
|
Object.defineProperty(exports, "parseAff", { enumerable: true, get: function () { return affReader_1.parseAff; } });
|
|
20
6
|
Object.defineProperty(exports, "readAffFile", { enumerable: true, get: function () { return affReader_1.parseAffFile; } });
|
|
21
|
-
__exportStar(require("./IterableHunspellReader"), exports);
|
|
22
7
|
var IterableHunspellReader_1 = require("./IterableHunspellReader");
|
|
23
|
-
Object.defineProperty(exports, "
|
|
8
|
+
Object.defineProperty(exports, "createMatchingWordsFilter", { enumerable: true, get: function () { return IterableHunspellReader_1.createMatchingWordsFilter; } });
|
|
9
|
+
Object.defineProperty(exports, "IterableHunspellReader", { enumerable: true, get: function () { return IterableHunspellReader_1.IterableHunspellReader; } });
|
|
10
|
+
var IterableHunspellReader_2 = require("./IterableHunspellReader");
|
|
11
|
+
Object.defineProperty(exports, "HunspellReader", { enumerable: true, get: function () { return IterableHunspellReader_2.IterableHunspellReader; } });
|
|
24
12
|
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Escape Unicode Characters
|
|
3
|
+
* @param text
|
|
4
|
+
* @param regexp
|
|
5
|
+
* @returns
|
|
6
|
+
*/
|
|
7
|
+
export declare function escapeUnicodeCode(text: string, regexp?: RegExp): string;
|
|
8
|
+
/**
|
|
9
|
+
* Converts a string of letters in ranges.
|
|
10
|
+
*
|
|
11
|
+
* `abcde` => `a-e`
|
|
12
|
+
*
|
|
13
|
+
* @param letters - sorted letters
|
|
14
|
+
*/
|
|
15
|
+
export declare function toRange(letters: string, minLength?: number): string;
|
|
16
|
+
export declare function removeAccents(text: string): string;
|
|
17
|
+
export declare function removeLooseAccents(text: string): string;
|
|
18
|
+
//# sourceMappingURL=textUtils.d.ts.map
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.removeLooseAccents = exports.removeAccents = exports.toRange = exports.escapeUnicodeCode = void 0;
|
|
4
|
+
/**
|
|
5
|
+
* Escape Unicode Characters
|
|
6
|
+
* @param text
|
|
7
|
+
* @param regexp
|
|
8
|
+
* @returns
|
|
9
|
+
*/
|
|
10
|
+
function escapeUnicodeCode(text, regexp = /\p{M}/gu) {
|
|
11
|
+
return text.replace(regexp, replaceWithUnicode);
|
|
12
|
+
}
|
|
13
|
+
exports.escapeUnicodeCode = escapeUnicodeCode;
|
|
14
|
+
function replaceWithUnicode(substring) {
|
|
15
|
+
const start = 0x20;
|
|
16
|
+
const end = 0x7a;
|
|
17
|
+
let val = '';
|
|
18
|
+
for (let i = 0; i < substring.length; ++i) {
|
|
19
|
+
const char = substring[i];
|
|
20
|
+
const code = char.charCodeAt(0);
|
|
21
|
+
if (code >= start && code <= end) {
|
|
22
|
+
val += char;
|
|
23
|
+
continue;
|
|
24
|
+
}
|
|
25
|
+
const hex = '0000' + code.toString(16);
|
|
26
|
+
val += code < 256 ? '\\x' + hex.slice(-2) : '\\u' + hex.slice(-4);
|
|
27
|
+
}
|
|
28
|
+
return val;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Converts a string of letters in ranges.
|
|
32
|
+
*
|
|
33
|
+
* `abcde` => `a-e`
|
|
34
|
+
*
|
|
35
|
+
* @param letters - sorted letters
|
|
36
|
+
*/
|
|
37
|
+
function toRange(letters, minLength = 4) {
|
|
38
|
+
const chars = [];
|
|
39
|
+
let begin = 0;
|
|
40
|
+
let end = 0;
|
|
41
|
+
let endChar = '';
|
|
42
|
+
const minDiff = Math.max(minLength - 2, 1);
|
|
43
|
+
function fill() {
|
|
44
|
+
if (!(end - begin > 1))
|
|
45
|
+
return;
|
|
46
|
+
if (end - begin > minDiff) {
|
|
47
|
+
chars.push('-');
|
|
48
|
+
return;
|
|
49
|
+
}
|
|
50
|
+
for (let code = begin + 1; code < end; code += 1) {
|
|
51
|
+
chars.push(String.fromCharCode(code));
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
function pushRange() {
|
|
55
|
+
fill();
|
|
56
|
+
chars.push(endChar);
|
|
57
|
+
endChar = '';
|
|
58
|
+
}
|
|
59
|
+
for (let i = 0; i < letters.length; ++i) {
|
|
60
|
+
const letter = letters[i];
|
|
61
|
+
const code = letter.charCodeAt(0);
|
|
62
|
+
if (code - end === 1) {
|
|
63
|
+
end = code;
|
|
64
|
+
endChar = letter;
|
|
65
|
+
continue;
|
|
66
|
+
}
|
|
67
|
+
pushRange();
|
|
68
|
+
chars.push(letter);
|
|
69
|
+
begin = code;
|
|
70
|
+
end = code;
|
|
71
|
+
}
|
|
72
|
+
pushRange();
|
|
73
|
+
return chars.join('');
|
|
74
|
+
}
|
|
75
|
+
exports.toRange = toRange;
|
|
76
|
+
function removeAccents(text) {
|
|
77
|
+
return removeLooseAccents(text.normalize('NFD'));
|
|
78
|
+
}
|
|
79
|
+
exports.removeAccents = removeAccents;
|
|
80
|
+
function removeLooseAccents(text) {
|
|
81
|
+
return text.replace(/\p{M}/gu, '');
|
|
82
|
+
}
|
|
83
|
+
exports.removeLooseAccents = removeLooseAccents;
|
|
84
|
+
//# sourceMappingURL=textUtils.js.map
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "hunspell-reader",
|
|
3
|
-
"version": "6.
|
|
3
|
+
"version": "6.23.1",
|
|
4
4
|
"description": "A library for reading Hunspell Dictionary Files",
|
|
5
5
|
"bin": "bin.js",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
},
|
|
26
26
|
"repository": {
|
|
27
27
|
"type": "git",
|
|
28
|
-
"url": "git+https://github.com/
|
|
28
|
+
"url": "git+https://github.com/streetsidesoftware/cspell.git"
|
|
29
29
|
},
|
|
30
30
|
"keywords": [
|
|
31
31
|
"Hunspell"
|
|
@@ -33,17 +33,19 @@
|
|
|
33
33
|
"author": "Jason Dent",
|
|
34
34
|
"license": "MIT",
|
|
35
35
|
"bugs": {
|
|
36
|
-
"url": "https://github.com/
|
|
36
|
+
"url": "https://github.com/streetsidesoftware/cspell/issues"
|
|
37
37
|
},
|
|
38
|
-
"homepage": "https://github.com/
|
|
38
|
+
"homepage": "https://github.com/streetsidesoftware/cspell/tree/main/packages/hunspell-reader#readme",
|
|
39
39
|
"devDependencies": {
|
|
40
40
|
"@types/jest": "^29.4.0",
|
|
41
|
-
"@types/node": "^18.
|
|
42
|
-
"jest": "^29.4.
|
|
41
|
+
"@types/node": "^18.13.0",
|
|
42
|
+
"jest": "^29.4.2",
|
|
43
43
|
"ts-jest": "^29.0.5",
|
|
44
44
|
"typescript": "^4.9.5"
|
|
45
45
|
},
|
|
46
46
|
"dependencies": {
|
|
47
|
+
"@cspell/cspell-pipe": "^6.23.1",
|
|
48
|
+
"@cspell/cspell-types": "^6.23.1",
|
|
47
49
|
"commander": "^10.0.0",
|
|
48
50
|
"gensequence": "^4.0.3",
|
|
49
51
|
"iconv-lite": "^0.6.3"
|
|
@@ -51,5 +53,5 @@
|
|
|
51
53
|
"engines": {
|
|
52
54
|
"node": ">=14"
|
|
53
55
|
},
|
|
54
|
-
"gitHead": "
|
|
56
|
+
"gitHead": "dace8b0625beb2766565f47bc813dc0a45480dc0"
|
|
55
57
|
}
|