hunspell-reader 8.7.0 → 8.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/IterableHunspellReaderLegacy.js +2 -1
- package/dist/aff.js +15 -20
- package/dist/affLegacy.js +17 -14
- package/dist/affReader.js +8 -7
- package/dist/affToDicInfo.js +3 -3
- package/dist/app.js +1 -1
- package/dist/commandDictInfo.js +1 -1
- package/dist/commandWords.js +4 -4
- package/dist/converter.js +1 -1
- package/dist/iterableToStream.d.ts +1 -1
- package/dist/iterableToStream.js +9 -3
- package/dist/textUtils.js +13 -10
- package/dist/util.js +1 -1
- package/package.json +4 -4
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
import * as fs from 'fs/promises';
|
|
1
|
+
import * as fs from 'node:fs/promises';
|
|
2
2
|
import { genSequence } from 'gensequence';
|
|
3
3
|
import pkgIconvLite from 'iconv-lite';
|
|
4
4
|
import { parseAffFileToAffLegacy } from './affReader.js';
|
|
5
5
|
import { filterOrderedList } from './util.js';
|
|
6
6
|
const { decode } = pkgIconvLite;
|
|
7
|
+
// eslint-disable-next-line unicorn/text-encoding-identifier-case
|
|
7
8
|
const defaultEncoding = 'UTF-8';
|
|
8
9
|
export class IterableHunspellReaderLegacy {
|
|
9
10
|
src;
|
package/dist/aff.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import assert from 'assert';
|
|
1
|
+
import assert from 'node:assert';
|
|
2
2
|
import { affFlag } from './affConstants.js';
|
|
3
3
|
import { Converter } from './converter.js';
|
|
4
4
|
import { filterOrderedList, groupByField, isDefined } from './util.js';
|
|
@@ -81,13 +81,7 @@ export class Aff {
|
|
|
81
81
|
const { flags } = affWord;
|
|
82
82
|
const subRules = this.affData.getRulesForAffSubstitution(sub);
|
|
83
83
|
const rules = joinRules(affWord.rules, subRules);
|
|
84
|
-
|
|
85
|
-
if (sub.type === 'S') {
|
|
86
|
-
word = stripped + sub.attach;
|
|
87
|
-
}
|
|
88
|
-
else {
|
|
89
|
-
word = sub.attach + stripped;
|
|
90
|
-
}
|
|
84
|
+
const word = sub.type === 'S' ? stripped + sub.attach : sub.attach + stripped;
|
|
91
85
|
return this.affData.toAffixWord(affWord, word, flags, rules);
|
|
92
86
|
}
|
|
93
87
|
#applySubstitution(affWord, subs) {
|
|
@@ -338,12 +332,14 @@ class AffData {
|
|
|
338
332
|
}
|
|
339
333
|
#splitRules(rules) {
|
|
340
334
|
switch (this.affFlagType) {
|
|
341
|
-
case 'long':
|
|
342
|
-
return [...new Set(rules.
|
|
343
|
-
|
|
335
|
+
case 'long': {
|
|
336
|
+
return [...new Set(rules.replaceAll(/(..)/g, '$1//').split('//').slice(0, -1))];
|
|
337
|
+
}
|
|
338
|
+
case 'num': {
|
|
344
339
|
return [...new Set(rules.split(','))];
|
|
340
|
+
}
|
|
345
341
|
}
|
|
346
|
-
return [...new Set(rules
|
|
342
|
+
return [...new Set(rules)];
|
|
347
343
|
}
|
|
348
344
|
#processAffInfo(affInfo) {
|
|
349
345
|
const { AF = [], SFX = [], PFX = [] } = affInfo;
|
|
@@ -384,12 +380,9 @@ class AffData {
|
|
|
384
380
|
const fx = sfx || pfx;
|
|
385
381
|
if (fx) {
|
|
386
382
|
const affFx = this.#mapFx(fx);
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
else {
|
|
391
|
-
return { id, idx: index, type: 'S', flags: 0, fx: affFx };
|
|
392
|
-
}
|
|
383
|
+
return affFx.type === 'P'
|
|
384
|
+
? { id, idx: index, type: 'P', flags: 0, fx: affFx }
|
|
385
|
+
: { id, idx: index, type: 'S', flags: 0, fx: affFx };
|
|
393
386
|
}
|
|
394
387
|
return { id, idx: index, type: 'F', flags: flags || 0 };
|
|
395
388
|
}
|
|
@@ -438,10 +431,12 @@ export function toAffFlagType(FLAG) {
|
|
|
438
431
|
return 'char';
|
|
439
432
|
switch (FLAG) {
|
|
440
433
|
case 'long':
|
|
441
|
-
case 'num':
|
|
434
|
+
case 'num': {
|
|
442
435
|
return FLAG;
|
|
443
|
-
|
|
436
|
+
}
|
|
437
|
+
default: {
|
|
444
438
|
throw new Error(`Unexpected FLAG value: ${FLAG}`);
|
|
439
|
+
}
|
|
445
440
|
}
|
|
446
441
|
}
|
|
447
442
|
//# sourceMappingURL=aff.js.map
|
package/dist/affLegacy.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
+
import * as util from 'node:util';
|
|
1
2
|
import * as GS from 'gensequence';
|
|
2
|
-
import * as util from 'util';
|
|
3
3
|
import { affFlag, flagToLongStringMap, flagToStringMap } from './affConstants.js';
|
|
4
4
|
import { Converter } from './converter.js';
|
|
5
5
|
import { filterOrderedList, isDefined } from './util.js';
|
|
@@ -127,7 +127,7 @@ export class Aff {
|
|
|
127
127
|
}
|
|
128
128
|
getMatchingRules(rules) {
|
|
129
129
|
const { AF = [] } = this.affInfo;
|
|
130
|
-
const idx = regExpIsNumber.test(rules) ? parseInt(rules, 10) : -1;
|
|
130
|
+
const idx = regExpIsNumber.test(rules) ? Number.parseInt(rules, 10) : -1;
|
|
131
131
|
const rulesToSplit = AF[idx] || rules;
|
|
132
132
|
return this.separateRules(rulesToSplit)
|
|
133
133
|
.map((key) => this.rules.get(key))
|
|
@@ -135,10 +135,12 @@ export class Aff {
|
|
|
135
135
|
}
|
|
136
136
|
joinRules(rules) {
|
|
137
137
|
switch (this.affInfo.FLAG) {
|
|
138
|
-
case 'long':
|
|
138
|
+
case 'long': {
|
|
139
139
|
return rules.join('');
|
|
140
|
-
|
|
140
|
+
}
|
|
141
|
+
case 'num': {
|
|
141
142
|
return rules.join(',');
|
|
143
|
+
}
|
|
142
144
|
}
|
|
143
145
|
return rules.join('');
|
|
144
146
|
}
|
|
@@ -152,12 +154,14 @@ export class Aff {
|
|
|
152
154
|
}
|
|
153
155
|
#separateRules(rules) {
|
|
154
156
|
switch (this.affInfo.FLAG) {
|
|
155
|
-
case 'long':
|
|
156
|
-
return [...new Set(rules.
|
|
157
|
-
|
|
157
|
+
case 'long': {
|
|
158
|
+
return [...new Set(rules.replaceAll(/(..)/g, '$1//').split('//').slice(0, -1))];
|
|
159
|
+
}
|
|
160
|
+
case 'num': {
|
|
158
161
|
return [...new Set(rules.split(','))];
|
|
162
|
+
}
|
|
159
163
|
}
|
|
160
|
-
return [...new Set(rules
|
|
164
|
+
return [...new Set(rules)];
|
|
161
165
|
}
|
|
162
166
|
get iConv() {
|
|
163
167
|
return this._iConv;
|
|
@@ -182,11 +186,10 @@ export function processRules(affInfo) {
|
|
|
182
186
|
const pfxRules = [...(affInfo.PFX || [])]
|
|
183
187
|
.map(([, pfx]) => pfx)
|
|
184
188
|
.map((pfx) => ({ id: pfx.id, type: 'pfx', pfx }));
|
|
185
|
-
const flagRules =
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
];
|
|
189
|
+
const flagRules = GS.sequenceFromObject(affInfo)
|
|
190
|
+
.filter(([key, value]) => !!affFlag[key] && !!value)
|
|
191
|
+
.map(([key, value]) => ({ id: value, type: 'flag', flags: affFlag[key] }))
|
|
192
|
+
.toArray();
|
|
190
193
|
const rules = [...sfxRules, ...pfxRules, ...flagRules].reduce((acc, rule) => {
|
|
191
194
|
acc.set(rule.id, rule);
|
|
192
195
|
return acc;
|
|
@@ -205,7 +208,7 @@ export function logAffWord(affWord, message) {
|
|
|
205
208
|
export function affWordToColoredString(affWord) {
|
|
206
209
|
return util
|
|
207
210
|
.inspect({ ...affWord, flags: flagsToString(affWord.flags) }, { showHidden: false, depth: 5, colors: true })
|
|
208
|
-
.
|
|
211
|
+
.replaceAll(/(\s|\n|\r)+/g, ' ');
|
|
209
212
|
}
|
|
210
213
|
/* istanbul ignore next */
|
|
211
214
|
export function flagsToString(flags) {
|
package/dist/affReader.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
|
|
2
|
-
import
|
|
1
|
+
/* eslint-disable unicorn/text-encoding-identifier-case */
|
|
2
|
+
import assert from 'node:assert';
|
|
3
|
+
import { readFile } from 'node:fs/promises';
|
|
3
4
|
import { decode as decodeHtmlEntities } from 'html-entities';
|
|
4
5
|
import pkgIconvLite from 'iconv-lite';
|
|
5
6
|
import { Aff } from './aff.js';
|
|
@@ -129,7 +130,7 @@ function parseAffixCreation(line) {
|
|
|
129
130
|
const fx = {
|
|
130
131
|
id: flag,
|
|
131
132
|
type: line.option === 'SFX' ? 'SFX' : 'PFX',
|
|
132
|
-
combinable: !!
|
|
133
|
+
combinable: !!yesRegex.test(combinable),
|
|
133
134
|
count,
|
|
134
135
|
extra,
|
|
135
136
|
substitutionSets: new Map(),
|
|
@@ -181,7 +182,7 @@ function fixMatch(type, match) {
|
|
|
181
182
|
function affixMatchToRegExpString(match) {
|
|
182
183
|
if (match === '0')
|
|
183
184
|
return '';
|
|
184
|
-
return match.
|
|
185
|
+
return match.replaceAll(/([\\\-?*])/g, '\\$1');
|
|
185
186
|
}
|
|
186
187
|
function collectFx() {
|
|
187
188
|
let value;
|
|
@@ -196,8 +197,8 @@ function collectFx() {
|
|
|
196
197
|
const asPfx = collectFx;
|
|
197
198
|
const asSfx = collectFx;
|
|
198
199
|
const asString = () => collectPrimitive((v) => v, '');
|
|
199
|
-
const asBoolean = () => collectPrimitive((v) => !!parseInt(v), '1');
|
|
200
|
-
const asNumber = () => collectPrimitive(parseInt, '0');
|
|
200
|
+
const asBoolean = () => collectPrimitive((v) => !!Number.parseInt(v), '1');
|
|
201
|
+
const asNumber = () => collectPrimitive(Number.parseInt, '0');
|
|
201
202
|
function collectPrimitive(map, defaultValue = '') {
|
|
202
203
|
let primitive;
|
|
203
204
|
function getValue() {
|
|
@@ -323,7 +324,7 @@ export async function parseAffFile(filename, encoding = UTF8) {
|
|
|
323
324
|
return affInfo;
|
|
324
325
|
}
|
|
325
326
|
function convertHtmlEntities(line, index) {
|
|
326
|
-
if (line.
|
|
327
|
+
if (!line.includes('&'))
|
|
327
328
|
return line;
|
|
328
329
|
const fixed = decodeHtmlEntities(line);
|
|
329
330
|
if (fixed !== line) {
|
package/dist/affToDicInfo.js
CHANGED
|
@@ -23,13 +23,13 @@ function extractAlphabet(aff, locale) {
|
|
|
23
23
|
];
|
|
24
24
|
const setOfLetters = new Set(sources
|
|
25
25
|
.filter(isDefined)
|
|
26
|
-
.
|
|
26
|
+
.flat()
|
|
27
27
|
.map((a) => a.normalize())
|
|
28
28
|
.flatMap((a) => [...a, ...a.toLocaleLowerCase(locale), ...a.toLocaleUpperCase(locale)])
|
|
29
29
|
.map((a) => a.trim())
|
|
30
30
|
.filter((a) => !!a));
|
|
31
|
-
const alphabet = [...setOfLetters].sort().join('').
|
|
32
|
-
const accents = new Set(alphabet.normalize('NFD').
|
|
31
|
+
const alphabet = [...setOfLetters].sort().join('').replaceAll(/\P{L}/gu, '');
|
|
32
|
+
const accents = new Set(alphabet.normalize('NFD').replaceAll(/\P{M}/gu, ''));
|
|
33
33
|
return { locale, alphabet, accents };
|
|
34
34
|
}
|
|
35
35
|
function isDefined(a) {
|
package/dist/app.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
+
import { readFileSync } from 'node:fs';
|
|
1
2
|
import { program } from 'commander';
|
|
2
|
-
import { readFileSync } from 'fs';
|
|
3
3
|
import { getCommand as getDictInfoCommand } from './commandDictInfo.js';
|
|
4
4
|
import { getCommand as commandWords } from './commandWords.js';
|
|
5
5
|
const pkgRaw = readFileSync(new URL('../package.json', import.meta.url), 'utf8');
|
package/dist/commandDictInfo.js
CHANGED
|
@@ -16,7 +16,7 @@ async function action(hunspellFile, locale) {
|
|
|
16
16
|
const affFile = baseFile + '.aff';
|
|
17
17
|
const aff = await parseAffFile(affFile);
|
|
18
18
|
const info = affToDicInfo(aff, locale);
|
|
19
|
-
const rawJson = JSON.stringify(info,
|
|
19
|
+
const rawJson = JSON.stringify(info, undefined, 2);
|
|
20
20
|
console.log(escapeUnicodeCode(rawJson));
|
|
21
21
|
}
|
|
22
22
|
//# sourceMappingURL=commandDictInfo.js.map
|
package/dist/commandWords.js
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
// cSpell:ignore findup
|
|
2
|
+
import { createWriteStream, openSync, writeSync } from 'node:fs';
|
|
2
3
|
import { Command } from 'commander';
|
|
3
|
-
import { createWriteStream, openSync, writeSync } from 'fs';
|
|
4
4
|
import { genSequence } from 'gensequence';
|
|
5
5
|
import { asAffWord } from './affLegacy.js';
|
|
6
6
|
import { IterableHunspellReaderLegacy } from './IterableHunspellReaderLegacy.js';
|
|
7
7
|
import { iterableToStream } from './iterableToStream.js';
|
|
8
8
|
import { batch, uniqueFilter } from './util.js';
|
|
9
|
-
const uniqueHistorySize =
|
|
9
|
+
const uniqueHistorySize = 500_000;
|
|
10
10
|
let logStream = process.stderr;
|
|
11
11
|
export function getCommand() {
|
|
12
12
|
const commander = new Command('words');
|
|
@@ -31,7 +31,7 @@ export function getCommand() {
|
|
|
31
31
|
}
|
|
32
32
|
function notify(message, newLine = true) {
|
|
33
33
|
message = message + (newLine ? '\n' : '');
|
|
34
|
-
logStream.write(message, '
|
|
34
|
+
logStream.write(message, 'utf8');
|
|
35
35
|
}
|
|
36
36
|
function yesNo(value) {
|
|
37
37
|
return value ? 'Yes' : 'No';
|
|
@@ -138,7 +138,7 @@ async function actionPrime(hunspellDicFilename, options) {
|
|
|
138
138
|
const callback = showProgress
|
|
139
139
|
? () => {
|
|
140
140
|
current++;
|
|
141
|
-
!(current % reportProgressRate) && process.stderr.write(calcProgress(), '
|
|
141
|
+
!(current % reportProgressRate) && process.stderr.write(calcProgress(), 'utf8');
|
|
142
142
|
}
|
|
143
143
|
: () => {
|
|
144
144
|
/* void */
|
package/dist/converter.js
CHANGED
|
@@ -3,7 +3,7 @@ export class Converter {
|
|
|
3
3
|
_match;
|
|
4
4
|
_map;
|
|
5
5
|
constructor(convList) {
|
|
6
|
-
const match = convList.map(({ from }) => from.
|
|
6
|
+
const match = convList.map(({ from }) => from.replaceAll(regexSpecialCharacters, '\\$&')).join('|');
|
|
7
7
|
this._match = new RegExp(match, 'g');
|
|
8
8
|
this._map = Object.create(null);
|
|
9
9
|
convList.reduce((map, { from, to }) => {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/// <reference types="node" resolution-mode="require"/>
|
|
2
2
|
/// <reference types="node" resolution-mode="require"/>
|
|
3
|
-
import * as stream from 'stream';
|
|
3
|
+
import * as stream from 'node:stream';
|
|
4
4
|
export type Streamable = string | Buffer;
|
|
5
5
|
export type IterableLike<T> = Iterable<T> | IterableIterator<T>;
|
|
6
6
|
/**
|
package/dist/iterableToStream.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import * as stream from 'stream';
|
|
1
|
+
import * as stream from 'node:stream';
|
|
2
2
|
/**
|
|
3
3
|
* Transform an iterable into a node readable stream.
|
|
4
4
|
*/
|
|
5
|
-
export function iterableToStream(src, options
|
|
6
|
-
return new ReadableObservableStream(src, options);
|
|
5
|
+
export function iterableToStream(src, options) {
|
|
6
|
+
return new ReadableObservableStream(src, options ?? { encoding: 'utf8' });
|
|
7
7
|
}
|
|
8
8
|
class ReadableObservableStream extends stream.Readable {
|
|
9
9
|
_source;
|
|
@@ -18,6 +18,9 @@ class ReadableObservableStream extends stream.Readable {
|
|
|
18
18
|
this.iter = this._source[Symbol.iterator]();
|
|
19
19
|
}
|
|
20
20
|
if (this.done) {
|
|
21
|
+
// See: https://nodejs.org/api/stream.html#readablepushchunk-encoding
|
|
22
|
+
// Pushing null means the stream is done.
|
|
23
|
+
// eslint-disable-next-line unicorn/no-null
|
|
21
24
|
this.push(null);
|
|
22
25
|
return;
|
|
23
26
|
}
|
|
@@ -31,6 +34,9 @@ class ReadableObservableStream extends stream.Readable {
|
|
|
31
34
|
if (r.value !== null && r.value !== undefined) {
|
|
32
35
|
this.push(r.value);
|
|
33
36
|
}
|
|
37
|
+
// See: https://nodejs.org/api/stream.html#readablepushchunk-encoding
|
|
38
|
+
// Pushing null means the stream is done.
|
|
39
|
+
// eslint-disable-next-line unicorn/no-null
|
|
34
40
|
this.push(null);
|
|
35
41
|
}
|
|
36
42
|
}
|
package/dist/textUtils.js
CHANGED
|
@@ -11,15 +11,19 @@ function replaceWithUnicode(substring) {
|
|
|
11
11
|
const start = 0x20;
|
|
12
12
|
const end = 0x7a;
|
|
13
13
|
let val = '';
|
|
14
|
-
for (
|
|
15
|
-
const
|
|
16
|
-
const code = char.charCodeAt(0);
|
|
14
|
+
for (const char of substring) {
|
|
15
|
+
const code = char.codePointAt(0) || 0;
|
|
17
16
|
if (code >= start && code <= end) {
|
|
18
17
|
val += char;
|
|
19
18
|
continue;
|
|
20
19
|
}
|
|
21
|
-
|
|
22
|
-
|
|
20
|
+
for (let i = 0; i < char.length; i += 1) {
|
|
21
|
+
// Use charCodeAt to get the value because JSON does not handle \u{10000} correctly.
|
|
22
|
+
// eslint-disable-next-line unicorn/prefer-code-point
|
|
23
|
+
const code = char.charCodeAt(i);
|
|
24
|
+
const hex = code.toString(16).toUpperCase().padStart(4, '0');
|
|
25
|
+
val += code < 256 ? '\\x' + hex.slice(-2) : hex.length === 4 ? '\\u' + hex : '\\u{' + hex + '}';
|
|
26
|
+
}
|
|
23
27
|
}
|
|
24
28
|
return val;
|
|
25
29
|
}
|
|
@@ -44,7 +48,7 @@ export function toRange(letters, minLength = 4) {
|
|
|
44
48
|
return;
|
|
45
49
|
}
|
|
46
50
|
for (let code = begin + 1; code < end; code += 1) {
|
|
47
|
-
chars.push(String.
|
|
51
|
+
chars.push(String.fromCodePoint(code));
|
|
48
52
|
}
|
|
49
53
|
}
|
|
50
54
|
function pushRange() {
|
|
@@ -52,9 +56,8 @@ export function toRange(letters, minLength = 4) {
|
|
|
52
56
|
chars.push(endChar);
|
|
53
57
|
endChar = '';
|
|
54
58
|
}
|
|
55
|
-
for (
|
|
56
|
-
const
|
|
57
|
-
const code = letter.charCodeAt(0);
|
|
59
|
+
for (const letter of letters) {
|
|
60
|
+
const code = letter.codePointAt(0) || 0;
|
|
58
61
|
if (code - end === 1) {
|
|
59
62
|
end = code;
|
|
60
63
|
endChar = letter;
|
|
@@ -72,6 +75,6 @@ export function removeAccents(text) {
|
|
|
72
75
|
return removeLooseAccents(text.normalize('NFD'));
|
|
73
76
|
}
|
|
74
77
|
export function removeLooseAccents(text) {
|
|
75
|
-
return text.
|
|
78
|
+
return text.replaceAll(/\p{M}/gu, '');
|
|
76
79
|
}
|
|
77
80
|
//# sourceMappingURL=textUtils.js.map
|
package/dist/util.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "hunspell-reader",
|
|
3
|
-
"version": "8.
|
|
3
|
+
"version": "8.8.0",
|
|
4
4
|
"description": "A library for reading Hunspell Dictionary Files",
|
|
5
5
|
"bin": "bin.js",
|
|
6
6
|
"type": "module",
|
|
@@ -44,8 +44,8 @@
|
|
|
44
44
|
},
|
|
45
45
|
"homepage": "https://github.com/streetsidesoftware/cspell/tree/main/packages/hunspell-reader#readme",
|
|
46
46
|
"dependencies": {
|
|
47
|
-
"@cspell/cspell-pipe": "^8.
|
|
48
|
-
"@cspell/cspell-types": "^8.
|
|
47
|
+
"@cspell/cspell-pipe": "^8.8.0",
|
|
48
|
+
"@cspell/cspell-types": "^8.8.0",
|
|
49
49
|
"commander": "^12.0.0",
|
|
50
50
|
"gensequence": "^7.0.0",
|
|
51
51
|
"html-entities": "^2.5.2",
|
|
@@ -54,5 +54,5 @@
|
|
|
54
54
|
"engines": {
|
|
55
55
|
"node": ">=18"
|
|
56
56
|
},
|
|
57
|
-
"gitHead": "
|
|
57
|
+
"gitHead": "a42bce675c00cb2d51809b3ae3894119ea4f5ce7"
|
|
58
58
|
}
|