hunspell-reader 6.22.0 → 6.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ import type { DictionaryInformation } from '@cspell/cspell-types';
2
+ import type { AffInfo } from './affDef';
3
+ export declare function affToDicInfo(aff: AffInfo, locale: string): DictionaryInformation;
4
+ //# sourceMappingURL=affToDicInfo.d.ts.map
@@ -0,0 +1,149 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.affToDicInfo = void 0;
4
+ const sync_1 = require("@cspell/cspell-pipe/sync");
5
+ const textUtils_1 = require("./textUtils");
6
+ function affToDicInfo(aff, locale) {
7
+ const alphabetInfo = extractAlphabet(aff, locale);
8
+ return {
9
+ ...alphabetInfo,
10
+ ...extractSuggestionEditCosts(aff, alphabetInfo),
11
+ locale,
12
+ alphabet: (0, textUtils_1.toRange)(alphabetInfo.alphabet, 5),
13
+ accents: (0, textUtils_1.toRange)([...alphabetInfo.accents].sort().join('')),
14
+ };
15
+ }
16
+ exports.affToDicInfo = affToDicInfo;
17
+ function extractAlphabet(aff, locale) {
18
+ const sources = [
19
+ aff.MAP,
20
+ aff.TRY,
21
+ aff.KEY,
22
+ aff.REP?.flatMap((rep) => [rep.match, rep.replaceWith]),
23
+ aff.ICONV?.flatMap((cov) => [cov.from, cov.to]),
24
+ aff.OCONV?.flatMap((cov) => [cov.from, cov.to]),
25
+ extractFxLetters(aff.PFX),
26
+ extractFxLetters(aff.SFX),
27
+ ];
28
+ const setOfLetters = new Set(sources
29
+ .filter(isDefined)
30
+ .flatMap((a) => a)
31
+ .map((a) => a.normalize())
32
+ .flatMap((a) => [...a, ...a.toLocaleLowerCase(locale), ...a.toLocaleUpperCase(locale)])
33
+ .map((a) => a.trim())
34
+ .filter((a) => !!a));
35
+ const alphabet = [...setOfLetters].sort().join('').replace(/\P{L}/gu, '');
36
+ const accents = new Set(alphabet.normalize('NFD').replace(/\P{M}/gu, ''));
37
+ return { locale, alphabet, accents };
38
+ }
39
+ function isDefined(a) {
40
+ return a !== undefined;
41
+ }
42
+ function extractSuggestionEditCosts(aff, alphaInfo) {
43
+ const suggestionEditCosts = [];
44
+ suggestionEditCosts.push(...calcCapsAndAccentReplacements(alphaInfo), ...calcAffMapReplacements(aff), ...calcAffRepReplacements(aff));
45
+ return {
46
+ suggestionEditCosts,
47
+ };
48
+ }
49
+ function calcAffMapReplacements(aff) {
50
+ if (!aff.MAP)
51
+ return [];
52
+ const map = aff.MAP.sort().join('|');
53
+ return [{ map, replace: 1, description: 'Hunspell Aff Map' }];
54
+ }
55
+ function calcAffRepReplacements(aff) {
56
+ if (!aff.REP)
57
+ return [];
58
+ return createCostMaps(aff.REP.map((rep) => [rep.match, rep.replaceWith]), { map: '', replace: 75, description: 'Hunspell Replace Map' });
59
+ }
60
+ function calcCapsAndAccentReplacements(alphaInfo) {
61
+ const { locale, alphabet } = alphaInfo;
62
+ const letters = [...alphabet];
63
+ const capForms = letters.map((letter) => calcCapitalizationForms(letter, locale));
64
+ const accentForms = calcAccentForms(letters);
65
+ const mapCrossAccent = calcCrossAccentCapsMap(accentForms, locale);
66
+ return [
67
+ ...createCostMaps(capForms, { map: '', replace: 1, description: 'Capitalization change.' }),
68
+ ...createCostMaps(accentForms, { map: '', replace: 1, description: 'Replace Accents' }),
69
+ ...createCostMaps(mapCrossAccent, { map: '', replace: 2, description: 'Capitalization and Accent change.' }),
70
+ ];
71
+ }
72
+ function createCostMaps(formMaps, base) {
73
+ const forms = formMaps.map((forms) => joinCharMap(forms));
74
+ const mapValues = [...new Set(forms)].sort().filter((a) => !!a);
75
+ return [...groupsOfN(mapValues, 6)].map((mapValues) => ({ ...base, map: mapValues.join('|') }));
76
+ }
77
+ function calcCapitalizationForms(letter, locale) {
78
+ const forms = new Set();
79
+ forms.add(letter);
80
+ forms.add(letter.toUpperCase());
81
+ forms.add(letter.toLowerCase());
82
+ forms.add(letter.toLocaleUpperCase(locale));
83
+ forms.add(letter.toLocaleLowerCase(locale));
84
+ forms.add(letter.toLocaleUpperCase(locale).toLocaleLowerCase(locale));
85
+ forms.add(letter.toLocaleLowerCase(locale).toLocaleUpperCase(locale));
86
+ return forms;
87
+ }
88
+ function calcAccentForms(letters) {
89
+ const forms = new Map();
90
+ function getForm(letter) {
91
+ const f = forms.get(letter);
92
+ if (f)
93
+ return f;
94
+ const s = new Set();
95
+ forms.set(letter, s);
96
+ return s;
97
+ }
98
+ for (const letter of letters) {
99
+ const base = (0, textUtils_1.removeAccents)(letter);
100
+ const formCollection = getForm(base);
101
+ formCollection.add(base);
102
+ formCollection.add(letter);
103
+ // addAccents(base, accents, formCollection);
104
+ }
105
+ return [...forms.values()].filter((s) => s.size > 1);
106
+ }
107
+ function joinCharMap(values) {
108
+ return [...values]
109
+ .sort()
110
+ .map((a) => (a.length > 1 ? '(' + a + ')' : a))
111
+ .join('');
112
+ }
113
+ function calcCrossAccentCapsMap(accentForms, locale) {
114
+ function calc(form) {
115
+ return new Set((0, sync_1.pipe)(form, (0, sync_1.opConcatMap)((letter) => calcCapitalizationForms(letter, locale))));
116
+ }
117
+ const values = (0, sync_1.pipe)(accentForms, (0, sync_1.opMap)(calc));
118
+ return [...values];
119
+ }
120
+ // function addAccents(cleanLetter: string, accents: Iterable<string>, collection: Set<string>) {
121
+ // for (const accent of accents) {
122
+ // collection.add(applyAccent(cleanLetter, accent));
123
+ // }
124
+ // }
125
+ // function applyAccent(letter: string, accent: string): string {
126
+ // const withAccent = (letter + accent).normalize('NFC');
127
+ // return removeLooseAccents(withAccent);
128
+ // }
129
+ function extractFxLetters(fxm) {
130
+ if (!fxm)
131
+ return undefined;
132
+ const substations = (0, sync_1.pipe)(fxm.values(), (0, sync_1.opConcatMap)((f) => f.substitutionSets.values()), (0, sync_1.opConcatMap)((s) => s.substitutions));
133
+ const partials = (0, sync_1.pipe)(substations, (0, sync_1.opConcatMap)((sub) => [sub.remove, sub.attach]));
134
+ return [...partials];
135
+ }
136
+ function* groupsOfN(values, n) {
137
+ let buffer = [];
138
+ for (const item of values) {
139
+ buffer.push(item);
140
+ if (buffer.length >= n) {
141
+ yield buffer;
142
+ buffer = [];
143
+ }
144
+ }
145
+ if (buffer.length) {
146
+ yield buffer;
147
+ }
148
+ }
149
+ //# sourceMappingURL=affToDicInfo.js.map
package/dist/app.js CHANGED
@@ -1,173 +1,14 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- // cSpell:ignore findup
4
3
  const commander_1 = require("commander");
5
- const fs_1 = require("fs");
6
- const gensequence_1 = require("gensequence");
7
- const aff_1 = require("./aff");
8
- const IterableHunspellReader_1 = require("./IterableHunspellReader");
9
- const iterableToStream_1 = require("./iterableToStream");
10
- const util_1 = require("./util");
11
- const uniqueHistorySize = 500000;
4
+ const commandDictInfo_1 = require("./commandDictInfo");
5
+ const commandWords_1 = require("./commandWords");
12
6
  // eslint-disable-next-line @typescript-eslint/no-var-requires
13
7
  const packageInfo = require('../package.json');
14
8
  const version = packageInfo['version'];
15
- let displayHelp = true;
16
- let logStream = process.stderr;
17
9
  commander_1.program.version(version);
18
- commander_1.program
19
- .command('words <hunspell_dic_file>')
20
- .option('-o, --output <file>', 'output file - defaults to stdout')
21
- .option('-s, --sort', 'sort the list of words')
22
- .option('-u, --unique', 'make sure the words are unique.')
23
- .option('-l, --lower_case', 'output in lower case')
24
- .option('-T, --no-transform', 'Do not apply the prefix and suffix transforms. Root words only.')
25
- .option('-x, --infix', 'Return words with prefix / suffix breaks. ex: "un<do>ing"')
26
- .option('-r, --rules', 'Append rules used to generate word.')
27
- .option('-p, --progress', 'Show progress.')
28
- .option('-m, --max_depth <limit>', 'Maximum depth to apply suffix rules.')
29
- .option('-n, --number <limit>', 'Limit the number of words to output.')
30
- .option('--forbidden', 'include forbidden words')
31
- .option('--partial_compounds', 'include words that must be part of a compound word')
32
- .option('--only_forbidden', 'includes only words that are forbidden')
33
- .description('Output all the words in the <hunspell.dic> file.')
34
- .action(action);
35
- commander_1.program.parse(process.argv);
36
- if (displayHelp) {
37
- commander_1.program.help();
38
- }
39
- function notify(message, newLine = true) {
40
- message = message + (newLine ? '\n' : '');
41
- logStream.write(message, 'utf-8');
42
- }
43
- function yesNo(value) {
44
- return value ? 'Yes' : 'No';
45
- }
46
- function affWordToInfix(aff) {
47
- return { ...aff, word: aff.prefix + '<' + aff.base + '>' + aff.suffix };
48
- }
49
- function mapWord(map) {
50
- return (aff) => ({ ...aff, word: map(aff.word) });
51
- }
52
- function appendRules(aff) {
53
- return { ...aff, word: aff.word + '\t[' + aff.rulesApplied + ' ]\t' + '(' + aff.dic + ')' };
54
- }
55
- function writeSeqToFile(seq, outFile) {
56
- return new Promise((resolve, reject) => {
57
- let resolved = false;
58
- const out = outFile ? (0, fs_1.createWriteStream)(outFile) : process.stdout;
59
- const bufferedSeq = (0, gensequence_1.genSequence)((0, util_1.batch)(seq, 500)).map((batch) => batch.join(''));
60
- const dataStream = (0, iterableToStream_1.iterableToStream)(bufferedSeq);
61
- const fileStream = dataStream.pipe(out);
62
- const endEvents = ['finish', 'close', 'end'];
63
- function resolvePromise() {
64
- if (!resolved) {
65
- resolved = true;
66
- resolve();
67
- }
68
- }
69
- const endHandler = () => {
70
- cleanupStreams();
71
- setTimeout(resolvePromise, 10);
72
- };
73
- const errorHandler = (e) => {
74
- cleanupStreams();
75
- reject(e);
76
- };
77
- listenToStreams();
78
- function listenToStreams() {
79
- endEvents.forEach((event) => fileStream.addListener(event, endHandler));
80
- fileStream.addListener('error', errorHandler);
81
- dataStream.addListener('end', endHandler);
82
- }
83
- function cleanupStreams() {
84
- endEvents.forEach((event) => fileStream.removeListener(event, endHandler));
85
- fileStream.removeListener('error', errorHandler);
86
- dataStream.removeListener('end', endHandler);
87
- }
88
- });
89
- }
90
- function action(hunspellDicFilename, options) {
91
- return actionPrime(hunspellDicFilename, options).catch((reason) => {
92
- if (reason.code === 'EPIPE') {
93
- console.log(reason);
94
- return;
95
- }
96
- console.error(reason);
97
- });
98
- }
99
- async function actionPrime(hunspellDicFilename, options) {
100
- displayHelp = false;
101
- const { sort = false, unique = false, output: outputFile, lower_case: lowerCase = false, transform = true, infix = false, rules = false, progress: showProgress = false, max_depth, forbidden = false, only_forbidden: onlyForbidden = false, partial_compounds: partialCompoundsAllowed = false, } = options;
102
- logStream = outputFile ? process.stdout : process.stderr;
103
- const log = notify;
104
- log('Write words');
105
- log(`Sort: ${yesNo(sort)}`);
106
- log(`Unique: ${yesNo(unique)}`);
107
- const baseFile = hunspellDicFilename.replace(/\.(dic|aff)$/, '');
108
- const dicFile = baseFile + '.dic';
109
- const affFile = baseFile + '.aff';
110
- log(`Dic file: ${dicFile}`);
111
- log(`Aff file: ${affFile}`);
112
- log(`Generating Words...`);
113
- const reader = await IterableHunspellReader_1.IterableHunspellReader.createFromFiles(affFile, dicFile);
114
- if (max_depth && Number.parseInt(max_depth) >= 0) {
115
- reader.maxDepth = Number.parseInt(max_depth);
116
- }
117
- const transformers = [];
118
- const filters = [];
119
- if (!forbidden && !onlyForbidden)
120
- filters.push((aff) => !aff.flags.isForbiddenWord);
121
- if (onlyForbidden)
122
- filters.push((aff) => !!aff.flags.isForbiddenWord);
123
- if (!partialCompoundsAllowed)
124
- filters.push((aff) => !aff.flags.isOnlyAllowedInCompound);
125
- if (infix) {
126
- transformers.push(affWordToInfix);
127
- }
128
- if (lowerCase) {
129
- transformers.push(mapWord((a) => a.toLowerCase()));
130
- }
131
- if (rules) {
132
- transformers.push(appendRules);
133
- }
134
- transformers.push(mapWord((a) => a.trim()));
135
- const dicSize = reader.dic.length;
136
- let current = 0;
137
- const calcProgress = () => '\r' + current + ' / ' + dicSize;
138
- const reportProgressRate = 253;
139
- const callback = showProgress
140
- ? () => {
141
- current++;
142
- !(current % reportProgressRate) && process.stderr.write(calcProgress(), 'utf-8');
143
- }
144
- : () => {
145
- /* void */
146
- };
147
- const seqWords = transform ? reader.seqAffWords(callback) : reader.seqRootWords().map(aff_1.asAffWord);
148
- const filterUnique = unique ? (0, util_1.uniqueFilter)(uniqueHistorySize) : (_) => true;
149
- const applyTransformers = (aff) => transformers.reduce((aff, fn) => fn(aff), aff);
150
- const applyFilters = (aff) => filters.reduce((cur, fn) => cur && fn(aff), true);
151
- const allWords = seqWords
152
- .filter(applyFilters)
153
- .map(applyTransformers)
154
- .map((a) => a.word)
155
- .filter((a) => !!a)
156
- .filter(filterUnique)
157
- .map((a) => a + '\n');
158
- const words = options.number ? allWords.take(Number.parseInt(options.number)) : allWords;
159
- if (sort) {
160
- log('Sorting...');
161
- const data = words.toArray().sort().join('');
162
- const fd = outputFile ? (0, fs_1.openSync)(outputFile, 'w') : 1;
163
- (0, fs_1.writeSync)(fd, data);
164
- }
165
- else {
166
- await writeSeqToFile(words, outputFile);
167
- }
168
- if (showProgress) {
169
- console.error(calcProgress());
170
- }
171
- log('Done.');
172
- }
10
+ commander_1.program.addCommand((0, commandWords_1.getCommand)());
11
+ commander_1.program.addCommand((0, commandDictInfo_1.getCommand)());
12
+ commander_1.program.showHelpAfterError();
13
+ commander_1.program.parseAsync(process.argv);
173
14
  //# sourceMappingURL=app.js.map
@@ -0,0 +1,3 @@
1
+ import { Command } from 'commander';
2
+ export declare function getCommand(): Command;
3
+ //# sourceMappingURL=commandDictInfo.d.ts.map
@@ -0,0 +1,26 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.getCommand = void 0;
4
+ // cSpell:ignore findup
5
+ const commander_1 = require("commander");
6
+ const affReader_1 = require("./affReader");
7
+ const affToDicInfo_1 = require("./affToDicInfo");
8
+ const textUtils_1 = require("./textUtils");
9
+ function getCommand() {
10
+ const commander = new commander_1.Command('cspell-dict-info');
11
+ commander
12
+ .arguments('<hunspell_aff_file> <locale>')
13
+ .description('Display the CSpell Dictionary Information')
14
+ .action(action);
15
+ return commander;
16
+ }
17
+ exports.getCommand = getCommand;
18
+ async function action(hunspellFile, locale) {
19
+ const baseFile = hunspellFile.replace(/\.(dic|aff)$/, '');
20
+ const affFile = baseFile + '.aff';
21
+ const aff = await (0, affReader_1.parseAffFile)(affFile);
22
+ const info = (0, affToDicInfo_1.affToDicInfo)(aff, locale);
23
+ const rawJson = JSON.stringify(info, null, 2);
24
+ console.log((0, textUtils_1.escapeUnicodeCode)(rawJson));
25
+ }
26
+ //# sourceMappingURL=commandDictInfo.js.map
@@ -0,0 +1,3 @@
1
+ import { Command } from 'commander';
2
+ export declare function getCommand(): Command;
3
+ //# sourceMappingURL=commandWords.d.ts.map
@@ -0,0 +1,176 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.getCommand = void 0;
4
+ // cSpell:ignore findup
5
+ const commander_1 = require("commander");
6
+ const fs_1 = require("fs");
7
+ const gensequence_1 = require("gensequence");
8
+ const aff_1 = require("./aff");
9
+ const IterableHunspellReader_1 = require("./IterableHunspellReader");
10
+ const iterableToStream_1 = require("./iterableToStream");
11
+ const util_1 = require("./util");
12
+ const uniqueHistorySize = 500000;
13
+ let logStream = process.stderr;
14
+ function getCommand() {
15
+ const commander = new commander_1.Command('words');
16
+ commander
17
+ .arguments('<hunspell_dic_file>')
18
+ .option('-o, --output <file>', 'output file - defaults to stdout')
19
+ .option('-s, --sort', 'sort the list of words')
20
+ .option('-u, --unique', 'make sure the words are unique.')
21
+ .option('-l, --lower_case', 'output in lower case')
22
+ .option('-T, --no-transform', 'Do not apply the prefix and suffix transforms. Root words only.')
23
+ .option('-x, --infix', 'Return words with prefix / suffix breaks. ex: "un<do>ing"')
24
+ .option('-r, --rules', 'Append rules used to generate word.')
25
+ .option('-p, --progress', 'Show progress.')
26
+ .option('-m, --max_depth <limit>', 'Maximum depth to apply suffix rules.')
27
+ .option('-n, --number <limit>', 'Limit the number of words to output.')
28
+ .option('--forbidden', 'include forbidden words')
29
+ .option('--partial_compounds', 'include words that must be part of a compound word')
30
+ .option('--only_forbidden', 'includes only words that are forbidden')
31
+ .description('Output all the words in the <hunspell.dic> file.')
32
+ .action(action);
33
+ return commander;
34
+ }
35
+ exports.getCommand = getCommand;
36
+ function notify(message, newLine = true) {
37
+ message = message + (newLine ? '\n' : '');
38
+ logStream.write(message, 'utf-8');
39
+ }
40
+ function yesNo(value) {
41
+ return value ? 'Yes' : 'No';
42
+ }
43
+ function affWordToInfix(aff) {
44
+ return { ...aff, word: aff.prefix + '<' + aff.base + '>' + aff.suffix };
45
+ }
46
+ function mapWord(map) {
47
+ return (aff) => ({ ...aff, word: map(aff.word) });
48
+ }
49
+ function appendRules(aff) {
50
+ return { ...aff, word: aff.word + '\t[' + aff.rulesApplied + ' ]\t' + '(' + aff.dic + ')' };
51
+ }
52
+ function writeSeqToFile(seq, outFile) {
53
+ return new Promise((resolve, reject) => {
54
+ let resolved = false;
55
+ const out = outFile ? (0, fs_1.createWriteStream)(outFile) : process.stdout;
56
+ const bufferedSeq = (0, gensequence_1.genSequence)((0, util_1.batch)(seq, 500)).map((batch) => batch.join(''));
57
+ const dataStream = (0, iterableToStream_1.iterableToStream)(bufferedSeq);
58
+ const fileStream = dataStream.pipe(out);
59
+ const endEvents = ['finish', 'close', 'end'];
60
+ function resolvePromise() {
61
+ if (!resolved) {
62
+ resolved = true;
63
+ resolve();
64
+ }
65
+ }
66
+ const endHandler = () => {
67
+ cleanupStreams();
68
+ setTimeout(resolvePromise, 10);
69
+ };
70
+ const errorHandler = (e) => {
71
+ cleanupStreams();
72
+ reject(e);
73
+ };
74
+ listenToStreams();
75
+ function listenToStreams() {
76
+ endEvents.forEach((event) => fileStream.addListener(event, endHandler));
77
+ fileStream.addListener('error', errorHandler);
78
+ dataStream.addListener('end', endHandler);
79
+ }
80
+ function cleanupStreams() {
81
+ endEvents.forEach((event) => fileStream.removeListener(event, endHandler));
82
+ fileStream.removeListener('error', errorHandler);
83
+ dataStream.removeListener('end', endHandler);
84
+ }
85
+ });
86
+ }
87
+ async function action(hunspellDicFilename, options) {
88
+ try {
89
+ await actionPrime(hunspellDicFilename, options);
90
+ }
91
+ catch (err) {
92
+ const reason = asError(err);
93
+ if (reason?.code === 'EPIPE') {
94
+ console.log(reason);
95
+ return;
96
+ }
97
+ throw err;
98
+ }
99
+ }
100
+ function asError(err) {
101
+ return err && typeof err === 'object' ? err : undefined;
102
+ }
103
+ async function actionPrime(hunspellDicFilename, options) {
104
+ const { sort = false, unique = false, output: outputFile, lower_case: lowerCase = false, transform = true, infix = false, rules = false, progress: showProgress = false, max_depth, forbidden = false, only_forbidden: onlyForbidden = false, partial_compounds: partialCompoundsAllowed = false, } = options;
105
+ logStream = outputFile ? process.stdout : process.stderr;
106
+ const log = notify;
107
+ log('Write words');
108
+ log(`Sort: ${yesNo(sort)}`);
109
+ log(`Unique: ${yesNo(unique)}`);
110
+ const baseFile = hunspellDicFilename.replace(/\.(dic|aff)$/, '');
111
+ const dicFile = baseFile + '.dic';
112
+ const affFile = baseFile + '.aff';
113
+ log(`Dic file: ${dicFile}`);
114
+ log(`Aff file: ${affFile}`);
115
+ log(`Generating Words...`);
116
+ const reader = await IterableHunspellReader_1.IterableHunspellReader.createFromFiles(affFile, dicFile);
117
+ if (max_depth && Number.parseInt(max_depth) >= 0) {
118
+ reader.maxDepth = Number.parseInt(max_depth);
119
+ }
120
+ const transformers = [];
121
+ const filters = [];
122
+ if (!forbidden && !onlyForbidden)
123
+ filters.push((aff) => !aff.flags.isForbiddenWord);
124
+ if (onlyForbidden)
125
+ filters.push((aff) => !!aff.flags.isForbiddenWord);
126
+ if (!partialCompoundsAllowed)
127
+ filters.push((aff) => !aff.flags.isOnlyAllowedInCompound);
128
+ if (infix) {
129
+ transformers.push(affWordToInfix);
130
+ }
131
+ if (lowerCase) {
132
+ transformers.push(mapWord((a) => a.toLowerCase()));
133
+ }
134
+ if (rules) {
135
+ transformers.push(appendRules);
136
+ }
137
+ transformers.push(mapWord((a) => a.trim()));
138
+ const dicSize = reader.dic.length;
139
+ let current = 0;
140
+ const calcProgress = () => '\r' + current + ' / ' + dicSize;
141
+ const reportProgressRate = 253;
142
+ const callback = showProgress
143
+ ? () => {
144
+ current++;
145
+ !(current % reportProgressRate) && process.stderr.write(calcProgress(), 'utf-8');
146
+ }
147
+ : () => {
148
+ /* void */
149
+ };
150
+ const seqWords = transform ? reader.seqAffWords(callback) : reader.seqRootWords().map(aff_1.asAffWord);
151
+ const filterUnique = unique ? (0, util_1.uniqueFilter)(uniqueHistorySize) : (_) => true;
152
+ const applyTransformers = (aff) => transformers.reduce((aff, fn) => fn(aff), aff);
153
+ const applyFilters = (aff) => filters.reduce((cur, fn) => cur && fn(aff), true);
154
+ const allWords = seqWords
155
+ .filter(applyFilters)
156
+ .map(applyTransformers)
157
+ .map((a) => a.word)
158
+ .filter((a) => !!a)
159
+ .filter(filterUnique)
160
+ .map((a) => a + '\n');
161
+ const words = options.number ? allWords.take(Number.parseInt(options.number)) : allWords;
162
+ if (sort) {
163
+ log('Sorting...');
164
+ const data = words.toArray().sort().join('');
165
+ const fd = outputFile ? (0, fs_1.openSync)(outputFile, 'w') : 1;
166
+ (0, fs_1.writeSync)(fd, data);
167
+ }
168
+ else {
169
+ await writeSeqToFile(words, outputFile);
170
+ }
171
+ if (showProgress) {
172
+ console.error(calcProgress());
173
+ }
174
+ log('Done.');
175
+ }
176
+ //# sourceMappingURL=commandWords.js.map
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  export type { AffInfo, AffWord } from './affDef';
2
2
  export { parseAff, parseAffFile as readAffFile } from './affReader';
3
- export * from './IterableHunspellReader';
3
+ export { createMatchingWordsFilter, type HunspellSrcData, IterableHunspellReader, type WordInfo, } from './IterableHunspellReader';
4
4
  export { IterableHunspellReader as HunspellReader } from './IterableHunspellReader';
5
5
  //# sourceMappingURL=index.d.ts.map
package/dist/index.js CHANGED
@@ -1,24 +1,12 @@
1
1
  "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
- for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
- };
16
2
  Object.defineProperty(exports, "__esModule", { value: true });
17
- exports.HunspellReader = exports.readAffFile = exports.parseAff = void 0;
3
+ exports.HunspellReader = exports.IterableHunspellReader = exports.createMatchingWordsFilter = exports.readAffFile = exports.parseAff = void 0;
18
4
  var affReader_1 = require("./affReader");
19
5
  Object.defineProperty(exports, "parseAff", { enumerable: true, get: function () { return affReader_1.parseAff; } });
20
6
  Object.defineProperty(exports, "readAffFile", { enumerable: true, get: function () { return affReader_1.parseAffFile; } });
21
- __exportStar(require("./IterableHunspellReader"), exports);
22
7
  var IterableHunspellReader_1 = require("./IterableHunspellReader");
23
- Object.defineProperty(exports, "HunspellReader", { enumerable: true, get: function () { return IterableHunspellReader_1.IterableHunspellReader; } });
8
+ Object.defineProperty(exports, "createMatchingWordsFilter", { enumerable: true, get: function () { return IterableHunspellReader_1.createMatchingWordsFilter; } });
9
+ Object.defineProperty(exports, "IterableHunspellReader", { enumerable: true, get: function () { return IterableHunspellReader_1.IterableHunspellReader; } });
10
+ var IterableHunspellReader_2 = require("./IterableHunspellReader");
11
+ Object.defineProperty(exports, "HunspellReader", { enumerable: true, get: function () { return IterableHunspellReader_2.IterableHunspellReader; } });
24
12
  //# sourceMappingURL=index.js.map
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Escape Unicode Characters
3
+ * @param text
4
+ * @param regexp
5
+ * @returns
6
+ */
7
+ export declare function escapeUnicodeCode(text: string, regexp?: RegExp): string;
8
+ /**
9
+ * Converts a string of letters in ranges.
10
+ *
11
+ * `abcde` => `a-e`
12
+ *
13
+ * @param letters - sorted letters
14
+ */
15
+ export declare function toRange(letters: string, minLength?: number): string;
16
+ export declare function removeAccents(text: string): string;
17
+ export declare function removeLooseAccents(text: string): string;
18
+ //# sourceMappingURL=textUtils.d.ts.map
@@ -0,0 +1,84 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.removeLooseAccents = exports.removeAccents = exports.toRange = exports.escapeUnicodeCode = void 0;
4
+ /**
5
+ * Escape Unicode Characters
6
+ * @param text
7
+ * @param regexp
8
+ * @returns
9
+ */
10
+ function escapeUnicodeCode(text, regexp = /\p{M}/gu) {
11
+ return text.replace(regexp, replaceWithUnicode);
12
+ }
13
+ exports.escapeUnicodeCode = escapeUnicodeCode;
14
+ function replaceWithUnicode(substring) {
15
+ const start = 0x20;
16
+ const end = 0x7a;
17
+ let val = '';
18
+ for (let i = 0; i < substring.length; ++i) {
19
+ const char = substring[i];
20
+ const code = char.charCodeAt(0);
21
+ if (code >= start && code <= end) {
22
+ val += char;
23
+ continue;
24
+ }
25
+ const hex = '0000' + code.toString(16);
26
+ val += code < 256 ? '\\x' + hex.slice(-2) : '\\u' + hex.slice(-4);
27
+ }
28
+ return val;
29
+ }
30
+ /**
31
+ * Converts a string of letters in ranges.
32
+ *
33
+ * `abcde` => `a-e`
34
+ *
35
+ * @param letters - sorted letters
36
+ */
37
+ function toRange(letters, minLength = 4) {
38
+ const chars = [];
39
+ let begin = 0;
40
+ let end = 0;
41
+ let endChar = '';
42
+ const minDiff = Math.max(minLength - 2, 1);
43
+ function fill() {
44
+ if (!(end - begin > 1))
45
+ return;
46
+ if (end - begin > minDiff) {
47
+ chars.push('-');
48
+ return;
49
+ }
50
+ for (let code = begin + 1; code < end; code += 1) {
51
+ chars.push(String.fromCharCode(code));
52
+ }
53
+ }
54
+ function pushRange() {
55
+ fill();
56
+ chars.push(endChar);
57
+ endChar = '';
58
+ }
59
+ for (let i = 0; i < letters.length; ++i) {
60
+ const letter = letters[i];
61
+ const code = letter.charCodeAt(0);
62
+ if (code - end === 1) {
63
+ end = code;
64
+ endChar = letter;
65
+ continue;
66
+ }
67
+ pushRange();
68
+ chars.push(letter);
69
+ begin = code;
70
+ end = code;
71
+ }
72
+ pushRange();
73
+ return chars.join('');
74
+ }
75
+ exports.toRange = toRange;
76
+ function removeAccents(text) {
77
+ return removeLooseAccents(text.normalize('NFD'));
78
+ }
79
+ exports.removeAccents = removeAccents;
80
+ function removeLooseAccents(text) {
81
+ return text.replace(/\p{M}/gu, '');
82
+ }
83
+ exports.removeLooseAccents = removeLooseAccents;
84
+ //# sourceMappingURL=textUtils.js.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "hunspell-reader",
3
- "version": "6.22.0",
3
+ "version": "6.23.0",
4
4
  "description": "A library for reading Hunspell Dictionary Files",
5
5
  "bin": "bin.js",
6
6
  "main": "dist/index.js",
@@ -25,7 +25,7 @@
25
25
  },
26
26
  "repository": {
27
27
  "type": "git",
28
- "url": "git+https://github.com/Jason-Rev/hunspell-reader.git"
28
+ "url": "git+https://github.com/streetsidesoftware/cspell.git"
29
29
  },
30
30
  "keywords": [
31
31
  "Hunspell"
@@ -33,17 +33,19 @@
33
33
  "author": "Jason Dent",
34
34
  "license": "MIT",
35
35
  "bugs": {
36
- "url": "https://github.com/Jason-Rev/hunspell-reader/issues"
36
+ "url": "https://github.com/streetsidesoftware/cspell/issues"
37
37
  },
38
- "homepage": "https://github.com/Jason-Rev/hunspell-reader#readme",
38
+ "homepage": "https://github.com/streetsidesoftware/cspell/tree/main/packages/hunspell-reader#readme",
39
39
  "devDependencies": {
40
40
  "@types/jest": "^29.4.0",
41
- "@types/node": "^18.11.19",
42
- "jest": "^29.4.1",
41
+ "@types/node": "^18.13.0",
42
+ "jest": "^29.4.2",
43
43
  "ts-jest": "^29.0.5",
44
44
  "typescript": "^4.9.5"
45
45
  },
46
46
  "dependencies": {
47
+ "@cspell/cspell-pipe": "^6.23.0",
48
+ "@cspell/cspell-types": "^6.23.0",
47
49
  "commander": "^10.0.0",
48
50
  "gensequence": "^4.0.3",
49
51
  "iconv-lite": "^0.6.3"
@@ -51,5 +53,5 @@
51
53
  "engines": {
52
54
  "node": ">=14"
53
55
  },
54
- "gitHead": "a133874ed7590cbe140f5067cfa80db84b644a5d"
56
+ "gitHead": "a7cfbfa6cf42c9d9746dd8229d9f1317472e5af8"
55
57
  }