hunspell-reader 8.3.2 → 8.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{IterableHunspellReader.d.ts → IterableHunspellReaderLegacy.d.ts} +4 -4
- package/dist/{IterableHunspellReader.js → IterableHunspellReaderLegacy.js} +5 -5
- package/dist/aff.d.ts +205 -26
- package/dist/aff.js +377 -221
- package/dist/affConstants.d.ts +6 -0
- package/dist/affConstants.js +50 -0
- package/dist/affDef.d.ts +18 -4
- package/dist/affLegacy.d.ts +51 -0
- package/dist/affLegacy.js +269 -0
- package/dist/affReader.d.ts +2 -0
- package/dist/affReader.js +49 -6
- package/dist/commandWords.js +3 -3
- package/dist/converter.d.ts +1 -1
- package/dist/converter.js +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +2 -2
- package/dist/util.d.ts +2 -0
- package/dist/util.js +22 -0
- package/package.json +6 -5
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/*
|
|
2
|
+
cspell:ignore KEEPCASE FORBIDDENWORD NEEDAFFIX
|
|
3
|
+
cspell:ignore COMPOUNDBEGIN COMPOUNDMIDDLE COMPOUNDEND COMPOUNDFORBIDFLAG
|
|
4
|
+
*/
|
|
5
|
+
export const affFlag = {
|
|
6
|
+
KEEPCASE: { isKeepCase: true },
|
|
7
|
+
WARN: { isWarning: true },
|
|
8
|
+
FORCEUCASE: { isForceUCase: true },
|
|
9
|
+
FORBIDDENWORD: { isForbiddenWord: true },
|
|
10
|
+
NOSUGGEST: { isNoSuggest: true },
|
|
11
|
+
NEEDAFFIX: { isNeedAffix: true },
|
|
12
|
+
COMPOUNDBEGIN: { canBeCompoundBegin: true },
|
|
13
|
+
COMPOUNDMIDDLE: { canBeCompoundMiddle: true },
|
|
14
|
+
COMPOUNDEND: { canBeCompoundEnd: true },
|
|
15
|
+
COMPOUNDFLAG: { isCompoundPermitted: true },
|
|
16
|
+
COMPOUNDPERMITFLAG: { isCompoundPermitted: true },
|
|
17
|
+
COMPOUNDFORBIDFLAG: { isCompoundForbidden: true },
|
|
18
|
+
ONLYINCOMPOUND: { isOnlyAllowedInCompound: true },
|
|
19
|
+
};
|
|
20
|
+
const _FlagToStringMap = {
|
|
21
|
+
isCompoundPermitted: 'C',
|
|
22
|
+
canBeCompoundBegin: 'B',
|
|
23
|
+
canBeCompoundMiddle: 'M',
|
|
24
|
+
canBeCompoundEnd: 'E',
|
|
25
|
+
isOnlyAllowedInCompound: 'O',
|
|
26
|
+
isWarning: 'W',
|
|
27
|
+
isKeepCase: 'K',
|
|
28
|
+
isForceUCase: 'U',
|
|
29
|
+
isForbiddenWord: 'F',
|
|
30
|
+
isNoSuggest: 'N',
|
|
31
|
+
isNeedAffix: 'A',
|
|
32
|
+
isCompoundForbidden: '-',
|
|
33
|
+
};
|
|
34
|
+
const _FlagToLongStringMap = {
|
|
35
|
+
isCompoundPermitted: 'CompoundPermitted',
|
|
36
|
+
canBeCompoundBegin: 'CompoundBegin',
|
|
37
|
+
canBeCompoundMiddle: 'CompoundMiddle',
|
|
38
|
+
canBeCompoundEnd: 'CompoundEnd',
|
|
39
|
+
isOnlyAllowedInCompound: 'OnlyInCompound',
|
|
40
|
+
isWarning: 'Warning',
|
|
41
|
+
isKeepCase: 'KeepCase',
|
|
42
|
+
isForceUCase: 'ForceUpperCase',
|
|
43
|
+
isForbiddenWord: 'Forbidden',
|
|
44
|
+
isNoSuggest: 'NoSuggest',
|
|
45
|
+
isNeedAffix: 'NeedAffix',
|
|
46
|
+
isCompoundForbidden: 'CompoundForbidden',
|
|
47
|
+
};
|
|
48
|
+
export const flagToStringMap = _FlagToStringMap;
|
|
49
|
+
export const flagToLongStringMap = _FlagToLongStringMap;
|
|
50
|
+
//# sourceMappingURL=affConstants.js.map
|
package/dist/affDef.d.ts
CHANGED
|
@@ -1,22 +1,25 @@
|
|
|
1
1
|
export interface Fx {
|
|
2
|
-
type:
|
|
2
|
+
type: 'PFX' | 'SFX';
|
|
3
3
|
id: string;
|
|
4
4
|
combinable: boolean;
|
|
5
5
|
substitutionSets: Substitutions;
|
|
6
|
+
substitutionsForRegExps: SubstitutionsForRegExp[];
|
|
6
7
|
count?: string;
|
|
7
8
|
extra?: string[];
|
|
8
9
|
}
|
|
9
|
-
export type Substitutions = Map<string,
|
|
10
|
+
export type Substitutions = Map<string, SubstitutionsForRegExp>;
|
|
10
11
|
export interface Substitution {
|
|
12
|
+
type: 'P' | 'S';
|
|
11
13
|
remove: string;
|
|
12
14
|
attach: string;
|
|
13
15
|
attachRules?: string;
|
|
14
16
|
replace: RegExp;
|
|
15
17
|
extra?: string;
|
|
16
18
|
}
|
|
17
|
-
export interface
|
|
19
|
+
export interface SubstitutionsForRegExp {
|
|
18
20
|
match: RegExp;
|
|
19
21
|
substitutions: Substitution[];
|
|
22
|
+
substitutionsGroupedByRemove: Map<RegExp, Substitution[]>;
|
|
20
23
|
}
|
|
21
24
|
export interface Rep {
|
|
22
25
|
match: string;
|
|
@@ -66,13 +69,23 @@ export interface AffInfo extends AffTransformFlags {
|
|
|
66
69
|
PFX?: Map<string, Fx>;
|
|
67
70
|
SFX?: Map<string, Fx>;
|
|
68
71
|
}
|
|
69
|
-
export
|
|
72
|
+
export type Rule = FlagRule | PfxRule | SfxRule;
|
|
73
|
+
interface RuleBase {
|
|
70
74
|
id: string;
|
|
71
75
|
type: string;
|
|
72
76
|
flags?: AffWordFlags;
|
|
73
77
|
pfx?: Fx;
|
|
74
78
|
sfx?: Fx;
|
|
75
79
|
}
|
|
80
|
+
export interface FlagRule extends RuleBase {
|
|
81
|
+
flags: AffWordFlags;
|
|
82
|
+
}
|
|
83
|
+
export interface PfxRule extends RuleBase {
|
|
84
|
+
pfx: Fx;
|
|
85
|
+
}
|
|
86
|
+
export interface SfxRule extends RuleBase {
|
|
87
|
+
sfx: Fx;
|
|
88
|
+
}
|
|
76
89
|
/**
|
|
77
90
|
* AffWordFlags are the flags applied to a word after the hunspell rules have been applied.
|
|
78
91
|
* They are either `true` or `undefined`.
|
|
@@ -177,4 +190,5 @@ export interface AffWord {
|
|
|
177
190
|
prefix: string;
|
|
178
191
|
dic: string;
|
|
179
192
|
}
|
|
193
|
+
export {};
|
|
180
194
|
//# sourceMappingURL=affDef.d.ts.map
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import type { AffInfo, AffWord, AffWordFlags, Fx, Rule } from './affDef.js';
|
|
2
|
+
import { Converter } from './converter.js';
|
|
3
|
+
/** The `word` field in a Converted AffWord has been converted using the OCONV mapping */
|
|
4
|
+
export type ConvertedAffWord = AffWord;
|
|
5
|
+
export declare class Aff {
|
|
6
|
+
#private;
|
|
7
|
+
readonly affInfo: AffInfo;
|
|
8
|
+
readonly filename?: string | undefined;
|
|
9
|
+
protected rules: Map<string, Rule>;
|
|
10
|
+
protected _oConv: Converter;
|
|
11
|
+
protected _iConv: Converter;
|
|
12
|
+
private _maxSuffixDepth;
|
|
13
|
+
private _mapRules;
|
|
14
|
+
constructor(affInfo: AffInfo, filename?: string | undefined);
|
|
15
|
+
get maxSuffixDepth(): number;
|
|
16
|
+
set maxSuffixDepth(value: number);
|
|
17
|
+
/**
|
|
18
|
+
* Takes a line from a hunspell.dic file and applies the rules found in the aff file.
|
|
19
|
+
* For performance reasons, only the `word` field is mapped with OCONV.
|
|
20
|
+
* @param {string} line - the line from the .dic file.
|
|
21
|
+
*/
|
|
22
|
+
applyRulesToDicEntry(line: string, maxDepth?: number): ConvertedAffWord[];
|
|
23
|
+
/**
|
|
24
|
+
* @internal
|
|
25
|
+
*/
|
|
26
|
+
applyRulesToWord(affWord: AffWord, remainingDepth: number): AffWord[];
|
|
27
|
+
applyAffixesToWord(affixRules: Fx[], affWord: AffWord, remainingDepth: number): AffWord[];
|
|
28
|
+
applyAffixToWord(affix: Fx, affWord: AffWord, combinableSfx: string): AffWord[];
|
|
29
|
+
getMatchingRules(rules: string): Rule[];
|
|
30
|
+
joinRules(rules: string[]): string;
|
|
31
|
+
separateRules(rules: string): string[];
|
|
32
|
+
get iConv(): Converter;
|
|
33
|
+
get oConv(): Converter;
|
|
34
|
+
}
|
|
35
|
+
declare function signature(aff: AffWord): string;
|
|
36
|
+
export declare function processRules(affInfo: AffInfo): Map<string, Rule>;
|
|
37
|
+
export declare function logAffWord(affWord: AffWord, message: string): AffWord;
|
|
38
|
+
export declare function affWordToColoredString(affWord: AffWord): string;
|
|
39
|
+
export declare function flagsToString(flags: AffWordFlags): string;
|
|
40
|
+
export declare function asAffWord(word: string, rules?: string, flags?: AffWordFlags): AffWord;
|
|
41
|
+
export declare function compareAff(a: AffWord, b: AffWord): 0 | 1 | -1;
|
|
42
|
+
/**
|
|
43
|
+
* Returns a filter function that will filter adjacent AffWords
|
|
44
|
+
* It compares the word and the flags.
|
|
45
|
+
*/
|
|
46
|
+
export declare function filterAff(): (t: AffWord) => boolean;
|
|
47
|
+
export declare const debug: {
|
|
48
|
+
signature: typeof signature;
|
|
49
|
+
};
|
|
50
|
+
export {};
|
|
51
|
+
//# sourceMappingURL=affLegacy.d.ts.map
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
import * as GS from 'gensequence';
|
|
2
|
+
import * as util from 'util';
|
|
3
|
+
import { affFlag, flagToLongStringMap, flagToStringMap } from './affConstants.js';
|
|
4
|
+
import { Converter } from './converter.js';
|
|
5
|
+
import { filterOrderedList, isDefined } from './util.js';
|
|
6
|
+
const log = false;
|
|
7
|
+
const DefaultMaxDepth = 5;
|
|
8
|
+
const regExpIsNumber = /^\d+$/;
|
|
9
|
+
export class Aff {
|
|
10
|
+
affInfo;
|
|
11
|
+
filename;
|
|
12
|
+
rules;
|
|
13
|
+
_oConv;
|
|
14
|
+
_iConv;
|
|
15
|
+
_maxSuffixDepth = DefaultMaxDepth;
|
|
16
|
+
_mapRules = new Map();
|
|
17
|
+
constructor(affInfo, filename) {
|
|
18
|
+
this.affInfo = affInfo;
|
|
19
|
+
this.filename = filename;
|
|
20
|
+
this.rules = processRules(affInfo);
|
|
21
|
+
this._iConv = new Converter(affInfo.ICONV || []);
|
|
22
|
+
this._oConv = new Converter(affInfo.OCONV || []);
|
|
23
|
+
}
|
|
24
|
+
get maxSuffixDepth() {
|
|
25
|
+
return this._maxSuffixDepth;
|
|
26
|
+
}
|
|
27
|
+
set maxSuffixDepth(value) {
|
|
28
|
+
this._maxSuffixDepth = value;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Takes a line from a hunspell.dic file and applies the rules found in the aff file.
|
|
32
|
+
* For performance reasons, only the `word` field is mapped with OCONV.
|
|
33
|
+
* @param {string} line - the line from the .dic file.
|
|
34
|
+
*/
|
|
35
|
+
applyRulesToDicEntry(line, maxDepth) {
|
|
36
|
+
const maxSuffixDepth = maxDepth ?? this.maxSuffixDepth;
|
|
37
|
+
const [lineLeft] = line.split(/\s+/, 1);
|
|
38
|
+
const [word, rules = ''] = lineLeft.split('/', 2);
|
|
39
|
+
const convert = this._oConv.convert;
|
|
40
|
+
const results = this.applyRulesToWord(asAffWord(word, rules), maxSuffixDepth).map((affWord) => ((affWord.word = convert(affWord.word)), affWord));
|
|
41
|
+
results.sort(compareAff);
|
|
42
|
+
const filtered = results.filter(filterAff());
|
|
43
|
+
return filtered;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* @internal
|
|
47
|
+
*/
|
|
48
|
+
applyRulesToWord(affWord, remainingDepth) {
|
|
49
|
+
const compoundMin = this.affInfo.COMPOUNDMIN ?? 3;
|
|
50
|
+
const { word, base, suffix, prefix, dic } = affWord;
|
|
51
|
+
const allRules = this.getMatchingRules(affWord.rules);
|
|
52
|
+
const { rulesApplied, flags } = reduceAffixRules(affWord, allRules);
|
|
53
|
+
const rules = this.joinRules(allRules.filter((rule) => !rule.flags).map((rule) => rule.id));
|
|
54
|
+
const affixRules = allRules.map((rule) => rule.sfx || rule.pfx).filter(isDefined);
|
|
55
|
+
const wordWithFlags = { word, flags, rulesApplied, rules: '', base, suffix, prefix, dic };
|
|
56
|
+
return [wordWithFlags, ...this.applyAffixesToWord(affixRules, { ...wordWithFlags, rules }, remainingDepth)]
|
|
57
|
+
.filter(({ flags }) => !flags.isNeedAffix)
|
|
58
|
+
.map((affWord) => adjustCompounding(affWord, compoundMin))
|
|
59
|
+
.map((affWord) => logAffWord(affWord, 'applyRulesToWord'));
|
|
60
|
+
}
|
|
61
|
+
applyAffixesToWord(affixRules, affWord, remainingDepth) {
|
|
62
|
+
if (remainingDepth <= 0) {
|
|
63
|
+
return [];
|
|
64
|
+
}
|
|
65
|
+
const combinableRules = affixRules
|
|
66
|
+
.filter((rule) => rule.type === 'SFX')
|
|
67
|
+
.filter((rule) => rule.combinable === true)
|
|
68
|
+
.map(({ id }) => id);
|
|
69
|
+
const combinableSfx = this.joinRules(combinableRules);
|
|
70
|
+
const r = affixRules
|
|
71
|
+
.flatMap((affix) => this.applyAffixToWord(affix, affWord, combinableSfx))
|
|
72
|
+
.flatMap((affWord) => this.applyRulesToWord(affWord, remainingDepth - 1));
|
|
73
|
+
return r;
|
|
74
|
+
}
|
|
75
|
+
applyAffixToWord(affix, affWord, combinableSfx) {
|
|
76
|
+
const { word } = affWord;
|
|
77
|
+
const combineRules = affix.type === 'PFX' && affix.combinable && !!combinableSfx ? combinableSfx : '';
|
|
78
|
+
const flags = affWord.flags.isNeedAffix ? removeNeedAffix(affWord.flags) : affWord.flags;
|
|
79
|
+
const matchingSubstitutions = affix.substitutionsForRegExps.filter((sub) => sub.match.test(word));
|
|
80
|
+
const partialAffWord = { ...affWord, flags, rules: combineRules };
|
|
81
|
+
return matchingSubstitutions
|
|
82
|
+
.flatMap((sub) => this.#applySubstitution(affix, partialAffWord, sub))
|
|
83
|
+
.map((affWord) => logAffWord(affWord, 'applyAffixToWord'));
|
|
84
|
+
}
|
|
85
|
+
#substituteAttach(affix, affWord, sub, stripped) {
|
|
86
|
+
const { word: origWord, rulesApplied, flags, dic } = affWord;
|
|
87
|
+
const rules = affWord.rules + (sub.attachRules || '');
|
|
88
|
+
let word;
|
|
89
|
+
let p = affWord.prefix.length;
|
|
90
|
+
let s = origWord.length - affWord.suffix.length;
|
|
91
|
+
if (sub.type === 'S') {
|
|
92
|
+
word = stripped + sub.attach;
|
|
93
|
+
s = Math.min(stripped.length, s);
|
|
94
|
+
p = Math.min(p, s);
|
|
95
|
+
}
|
|
96
|
+
else {
|
|
97
|
+
word = sub.attach + stripped;
|
|
98
|
+
const d = word.length - origWord.length;
|
|
99
|
+
p = Math.max(p, word.length - stripped.length);
|
|
100
|
+
s = Math.max(s + d, p);
|
|
101
|
+
}
|
|
102
|
+
const base = word.slice(p, s);
|
|
103
|
+
const prefix = word.slice(0, p);
|
|
104
|
+
const suffix = word.slice(s);
|
|
105
|
+
return {
|
|
106
|
+
word,
|
|
107
|
+
rulesApplied: rulesApplied + ' ' + affix.id,
|
|
108
|
+
rules,
|
|
109
|
+
flags,
|
|
110
|
+
base,
|
|
111
|
+
suffix,
|
|
112
|
+
prefix,
|
|
113
|
+
dic,
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
#applySubstitution(affix, affWord, subs) {
|
|
117
|
+
const results = [];
|
|
118
|
+
for (const [replace, substitutions] of subs.substitutionsGroupedByRemove) {
|
|
119
|
+
if (!replace.test(affWord.word))
|
|
120
|
+
continue;
|
|
121
|
+
const stripped = affWord.word.replace(replace, '');
|
|
122
|
+
for (const sub of substitutions) {
|
|
123
|
+
results.push(this.#substituteAttach(affix, affWord, sub, stripped));
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
return results;
|
|
127
|
+
}
|
|
128
|
+
getMatchingRules(rules) {
|
|
129
|
+
const { AF = [] } = this.affInfo;
|
|
130
|
+
const idx = regExpIsNumber.test(rules) ? parseInt(rules, 10) : -1;
|
|
131
|
+
const rulesToSplit = AF[idx] || rules;
|
|
132
|
+
return this.separateRules(rulesToSplit)
|
|
133
|
+
.map((key) => this.rules.get(key))
|
|
134
|
+
.filter(isDefined);
|
|
135
|
+
}
|
|
136
|
+
joinRules(rules) {
|
|
137
|
+
switch (this.affInfo.FLAG) {
|
|
138
|
+
case 'long':
|
|
139
|
+
return rules.join('');
|
|
140
|
+
case 'num':
|
|
141
|
+
return rules.join(',');
|
|
142
|
+
}
|
|
143
|
+
return rules.join('');
|
|
144
|
+
}
|
|
145
|
+
separateRules(rules) {
|
|
146
|
+
const found = this._mapRules.get(rules);
|
|
147
|
+
if (found)
|
|
148
|
+
return found;
|
|
149
|
+
const split = this.#separateRules(rules);
|
|
150
|
+
this._mapRules.set(rules, split);
|
|
151
|
+
return split;
|
|
152
|
+
}
|
|
153
|
+
#separateRules(rules) {
|
|
154
|
+
switch (this.affInfo.FLAG) {
|
|
155
|
+
case 'long':
|
|
156
|
+
return [...new Set(rules.replace(/(..)/g, '$1//').split('//').slice(0, -1))];
|
|
157
|
+
case 'num':
|
|
158
|
+
return [...new Set(rules.split(','))];
|
|
159
|
+
}
|
|
160
|
+
return [...new Set(rules.split(''))];
|
|
161
|
+
}
|
|
162
|
+
get iConv() {
|
|
163
|
+
return this._iConv;
|
|
164
|
+
}
|
|
165
|
+
get oConv() {
|
|
166
|
+
return this._oConv;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
function signature(aff) {
|
|
170
|
+
const { word, flags } = aff;
|
|
171
|
+
const sig = Object.entries(flags)
|
|
172
|
+
.filter((e) => !!e[1])
|
|
173
|
+
.map((f) => flagToStringMap[f[0]])
|
|
174
|
+
.sort()
|
|
175
|
+
.join('');
|
|
176
|
+
return word + '|' + sig;
|
|
177
|
+
}
|
|
178
|
+
export function processRules(affInfo) {
|
|
179
|
+
const sfxRules = [...(affInfo.SFX || [])]
|
|
180
|
+
.map(([, sfx]) => sfx)
|
|
181
|
+
.map((sfx) => ({ id: sfx.id, type: 'sfx', sfx }));
|
|
182
|
+
const pfxRules = [...(affInfo.PFX || [])]
|
|
183
|
+
.map(([, pfx]) => pfx)
|
|
184
|
+
.map((pfx) => ({ id: pfx.id, type: 'pfx', pfx }));
|
|
185
|
+
const flagRules = [
|
|
186
|
+
...GS.sequenceFromObject(affInfo)
|
|
187
|
+
.filter(([key, value]) => !!affFlag[key] && !!value)
|
|
188
|
+
.map(([key, value]) => ({ id: value, type: 'flag', flags: affFlag[key] })),
|
|
189
|
+
];
|
|
190
|
+
const rules = [...sfxRules, ...pfxRules, ...flagRules].reduce((acc, rule) => {
|
|
191
|
+
acc.set(rule.id, rule);
|
|
192
|
+
return acc;
|
|
193
|
+
}, new Map());
|
|
194
|
+
return rules;
|
|
195
|
+
}
|
|
196
|
+
export function logAffWord(affWord, message) {
|
|
197
|
+
/* istanbul ignore if */
|
|
198
|
+
if (log) {
|
|
199
|
+
const dump = util.inspect(affWord, { showHidden: false, depth: 5, colors: true });
|
|
200
|
+
console.log(`${message}: ${dump}`);
|
|
201
|
+
}
|
|
202
|
+
return affWord;
|
|
203
|
+
}
|
|
204
|
+
/* istanbul ignore next */
|
|
205
|
+
export function affWordToColoredString(affWord) {
|
|
206
|
+
return util
|
|
207
|
+
.inspect({ ...affWord, flags: flagsToString(affWord.flags) }, { showHidden: false, depth: 5, colors: true })
|
|
208
|
+
.replace(/(\s|\n|\r)+/g, ' ');
|
|
209
|
+
}
|
|
210
|
+
/* istanbul ignore next */
|
|
211
|
+
export function flagsToString(flags) {
|
|
212
|
+
return [...Object.entries(flags)]
|
|
213
|
+
.filter(([, v]) => !!v)
|
|
214
|
+
.map(([k]) => flagToLongStringMap[k])
|
|
215
|
+
.sort()
|
|
216
|
+
.join(':');
|
|
217
|
+
}
|
|
218
|
+
export function asAffWord(word, rules = '', flags = {}) {
|
|
219
|
+
return {
|
|
220
|
+
word,
|
|
221
|
+
base: word,
|
|
222
|
+
prefix: '',
|
|
223
|
+
suffix: '',
|
|
224
|
+
rulesApplied: '',
|
|
225
|
+
rules,
|
|
226
|
+
flags,
|
|
227
|
+
dic: rules ? word + '/' + rules : word,
|
|
228
|
+
};
|
|
229
|
+
}
|
|
230
|
+
export function compareAff(a, b) {
|
|
231
|
+
if (a.word !== b.word) {
|
|
232
|
+
return a.word < b.word ? -1 : 1;
|
|
233
|
+
}
|
|
234
|
+
const sigA = signature(a);
|
|
235
|
+
const sigB = signature(b);
|
|
236
|
+
return sigA < sigB ? -1 : sigA > sigB ? 1 : 0;
|
|
237
|
+
}
|
|
238
|
+
function reduceAffixRules(affWord, allRules) {
|
|
239
|
+
return allRules
|
|
240
|
+
.filter((rule) => !!rule.flags)
|
|
241
|
+
.reduce((acc, rule) => ({
|
|
242
|
+
rulesApplied: [acc.rulesApplied, rule.id].join(' '),
|
|
243
|
+
flags: { ...acc.flags, ...rule.flags },
|
|
244
|
+
}), { rulesApplied: affWord.rulesApplied, flags: affWord.flags });
|
|
245
|
+
}
|
|
246
|
+
/**
|
|
247
|
+
* Returns a filter function that will filter adjacent AffWords
|
|
248
|
+
* It compares the word and the flags.
|
|
249
|
+
*/
|
|
250
|
+
export function filterAff() {
|
|
251
|
+
return filterOrderedList((a, b) => a.word !== b.word || signature(a) !== signature(b));
|
|
252
|
+
}
|
|
253
|
+
export const debug = {
|
|
254
|
+
signature,
|
|
255
|
+
};
|
|
256
|
+
function removeNeedAffix(flags) {
|
|
257
|
+
const newFlags = { ...flags };
|
|
258
|
+
delete newFlags.isNeedAffix;
|
|
259
|
+
return newFlags;
|
|
260
|
+
}
|
|
261
|
+
function adjustCompounding(affWord, minLength) {
|
|
262
|
+
if (!affWord.flags.isCompoundPermitted || affWord.word.length >= minLength) {
|
|
263
|
+
return affWord;
|
|
264
|
+
}
|
|
265
|
+
const { isCompoundPermitted: _, ...flags } = affWord.flags;
|
|
266
|
+
affWord.flags = flags;
|
|
267
|
+
return affWord;
|
|
268
|
+
}
|
|
269
|
+
//# sourceMappingURL=affLegacy.js.map
|
package/dist/affReader.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { Aff } from './aff.js';
|
|
2
2
|
import type { AffInfo, Fx } from './affDef.js';
|
|
3
|
+
import { Aff as AffLegacy } from './affLegacy.js';
|
|
3
4
|
export interface ConvEntry {
|
|
4
5
|
from: string;
|
|
5
6
|
to: string;
|
|
@@ -21,6 +22,7 @@ declare function parseAffixRule(line: AffLine): AffixRule | undefined;
|
|
|
21
22
|
export declare function parseAffFile(filename: string, encoding?: string): Promise<AffInfo>;
|
|
22
23
|
export declare function parseAff(affFileContent: string, encoding?: string): AffInfo;
|
|
23
24
|
export declare function parseAffFileToAff(filename: string, encoding?: string): Promise<Aff>;
|
|
25
|
+
export declare function parseAffFileToAffLegacy(filename: string, encoding?: string): Promise<AffLegacy>;
|
|
24
26
|
declare function parseLine(line: string): AffLine;
|
|
25
27
|
export interface AffLine {
|
|
26
28
|
option: string;
|
package/dist/affReader.js
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import assert from 'assert';
|
|
2
2
|
import { readFile } from 'fs/promises';
|
|
3
|
+
import { decode as decodeHtmlEntities } from 'html-entities';
|
|
3
4
|
import pkgIconvLite from 'iconv-lite';
|
|
4
5
|
import { Aff } from './aff.js';
|
|
5
|
-
import {
|
|
6
|
+
import { Aff as AffLegacy } from './affLegacy.js';
|
|
7
|
+
import { cleanObject, insertItemIntoGroupByField, isDefined } from './util.js';
|
|
6
8
|
const { decode } = pkgIconvLite;
|
|
7
9
|
const fixRegex = {
|
|
8
10
|
SFX: { m: /$/, r: '$' },
|
|
@@ -99,18 +101,23 @@ function tablePfxOrSfx(fieldValue, line) {
|
|
|
99
101
|
substitutionSets.set(ruleAsString, {
|
|
100
102
|
match: rule.condition,
|
|
101
103
|
substitutions: [],
|
|
104
|
+
substitutionsGroupedByRemove: new Map(),
|
|
102
105
|
});
|
|
103
106
|
}
|
|
104
107
|
const substitutionSet = substitutionSets.get(ruleAsString);
|
|
105
108
|
assert(substitutionSet);
|
|
106
109
|
const [attachText, attachRules] = rule.affix.split('/', 2);
|
|
107
|
-
|
|
110
|
+
const substitution = {
|
|
111
|
+
type: rule.type === 'SFX' ? 'S' : 'P',
|
|
108
112
|
remove: rule.stripping,
|
|
109
113
|
replace: rule.replace,
|
|
110
114
|
attach: attachText,
|
|
111
115
|
attachRules,
|
|
112
116
|
extra: rule.extra,
|
|
113
|
-
}
|
|
117
|
+
};
|
|
118
|
+
substitutionSet.substitutions.push(substitution);
|
|
119
|
+
insertItemIntoGroupByField(substitutionSet.substitutionsGroupedByRemove, 'replace', substitution);
|
|
120
|
+
fixRuleSet.substitutionsForRegExps = [...substitutionSets.values()];
|
|
114
121
|
return fieldValue;
|
|
115
122
|
}
|
|
116
123
|
/**
|
|
@@ -121,11 +128,12 @@ function parseAffixCreation(line) {
|
|
|
121
128
|
const [flag, combinable, count, ...extra] = (line.value || '').split(spaceRegex);
|
|
122
129
|
const fx = {
|
|
123
130
|
id: flag,
|
|
124
|
-
type: line.option,
|
|
131
|
+
type: line.option === 'SFX' ? 'SFX' : 'PFX',
|
|
125
132
|
combinable: !!combinable.match(yesRegex),
|
|
126
133
|
count,
|
|
127
134
|
extra,
|
|
128
135
|
substitutionSets: new Map(),
|
|
136
|
+
substitutionsForRegExps: [],
|
|
129
137
|
};
|
|
130
138
|
return fx;
|
|
131
139
|
}
|
|
@@ -158,10 +166,17 @@ function cleanAffixAttach(affix) {
|
|
|
158
166
|
const attach = fix === '0' ? '' : fix;
|
|
159
167
|
return attach + (rules ? '/' + rules : '');
|
|
160
168
|
}
|
|
169
|
+
const regexpCache = new Map();
|
|
161
170
|
function fixMatch(type, match) {
|
|
171
|
+
const key = type + ':' + match;
|
|
172
|
+
const cached = regexpCache.get(key);
|
|
173
|
+
if (cached)
|
|
174
|
+
return cached;
|
|
162
175
|
const exp = affixMatchToRegExpString(match);
|
|
163
176
|
const fix = fixRegex[type];
|
|
164
|
-
|
|
177
|
+
const regexp = new RegExp(exp.replace(fix.m, fix.r));
|
|
178
|
+
regexpCache.set(key, regexp);
|
|
179
|
+
return regexp;
|
|
165
180
|
}
|
|
166
181
|
function affixMatchToRegExpString(match) {
|
|
167
182
|
if (match === '0')
|
|
@@ -295,8 +310,11 @@ function collectionToAffInfo(affFieldCollectionTable, encoding) {
|
|
|
295
310
|
};
|
|
296
311
|
return cleanObject(result);
|
|
297
312
|
}
|
|
313
|
+
let htmlEntitiesFound = 0;
|
|
314
|
+
let currentAffFilename = '';
|
|
298
315
|
export async function parseAffFile(filename, encoding = UTF8) {
|
|
299
316
|
const buffer = await readFile(filename);
|
|
317
|
+
currentAffFilename = filename;
|
|
300
318
|
const file = decode(buffer, encoding);
|
|
301
319
|
const affInfo = parseAff(file, encoding);
|
|
302
320
|
if (affInfo.SET && affInfo.SET.toLowerCase() !== encoding.toLowerCase()) {
|
|
@@ -304,13 +322,31 @@ export async function parseAffFile(filename, encoding = UTF8) {
|
|
|
304
322
|
}
|
|
305
323
|
return affInfo;
|
|
306
324
|
}
|
|
325
|
+
function convertHtmlEntities(line, index) {
|
|
326
|
+
if (line.indexOf('&') < 0)
|
|
327
|
+
return line;
|
|
328
|
+
const fixed = decodeHtmlEntities(line);
|
|
329
|
+
if (fixed !== line) {
|
|
330
|
+
if (htmlEntitiesFound < 10) {
|
|
331
|
+
const foundInFile = currentAffFilename;
|
|
332
|
+
console.error('HTML Entities found in aff file at line %s:%i\n\t%o replaced with:\n\t%o', foundInFile, index + 1, line, fixed);
|
|
333
|
+
}
|
|
334
|
+
if (htmlEntitiesFound === 10) {
|
|
335
|
+
console.error('HTML Entities found in aff...');
|
|
336
|
+
}
|
|
337
|
+
++htmlEntitiesFound;
|
|
338
|
+
}
|
|
339
|
+
return fixed;
|
|
340
|
+
}
|
|
307
341
|
export function parseAff(affFileContent, encoding = UTF8) {
|
|
342
|
+
htmlEntitiesFound = 0;
|
|
308
343
|
const lines = affFileContent.split(/\r?\n/g);
|
|
309
344
|
const affFieldCollectionTable = createAffFieldTable();
|
|
310
345
|
affFieldCollectionTable.SET.addLine({ option: 'SET', value: encoding });
|
|
311
346
|
lines
|
|
312
347
|
.map((line) => line.trimStart())
|
|
313
348
|
.map((line) => line.replace(commentRegex, ''))
|
|
349
|
+
.map(convertHtmlEntities)
|
|
314
350
|
.filter((line) => line.trim() !== '')
|
|
315
351
|
.map(parseLine)
|
|
316
352
|
.forEach((line) => {
|
|
@@ -320,7 +356,14 @@ export function parseAff(affFileContent, encoding = UTF8) {
|
|
|
320
356
|
return collectionToAffInfo(affFieldCollectionTable, encoding);
|
|
321
357
|
}
|
|
322
358
|
export function parseAffFileToAff(filename, encoding) {
|
|
323
|
-
return parseAffFile(filename, encoding).then((affInfo) =>
|
|
359
|
+
return parseAffFile(filename, encoding).then((affInfo) => {
|
|
360
|
+
return new Aff(affInfo, filename);
|
|
361
|
+
});
|
|
362
|
+
}
|
|
363
|
+
export function parseAffFileToAffLegacy(filename, encoding) {
|
|
364
|
+
return parseAffFile(filename, encoding).then((affInfo) => {
|
|
365
|
+
return new AffLegacy(affInfo, filename);
|
|
366
|
+
});
|
|
324
367
|
}
|
|
325
368
|
function parseLine(line) {
|
|
326
369
|
const result = line.match(affixLine) || ['', ''];
|
package/dist/commandWords.js
CHANGED
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
import { Command } from 'commander';
|
|
3
3
|
import { createWriteStream, openSync, writeSync } from 'fs';
|
|
4
4
|
import { genSequence } from 'gensequence';
|
|
5
|
-
import { asAffWord } from './
|
|
6
|
-
import {
|
|
5
|
+
import { asAffWord } from './affLegacy.js';
|
|
6
|
+
import { IterableHunspellReaderLegacy } from './IterableHunspellReaderLegacy.js';
|
|
7
7
|
import { iterableToStream } from './iterableToStream.js';
|
|
8
8
|
import { batch, uniqueFilter } from './util.js';
|
|
9
9
|
const uniqueHistorySize = 500000;
|
|
@@ -109,7 +109,7 @@ async function actionPrime(hunspellDicFilename, options) {
|
|
|
109
109
|
log(`Dic file: ${dicFile}`);
|
|
110
110
|
log(`Aff file: ${affFile}`);
|
|
111
111
|
log(`Generating Words...`);
|
|
112
|
-
const reader = await
|
|
112
|
+
const reader = await IterableHunspellReaderLegacy.createFromFiles(affFile, dicFile);
|
|
113
113
|
if (max_depth && Number.parseInt(max_depth) >= 0) {
|
|
114
114
|
reader.maxDepth = Number.parseInt(max_depth);
|
|
115
115
|
}
|
package/dist/converter.d.ts
CHANGED
package/dist/converter.js
CHANGED
|
@@ -11,10 +11,10 @@ export class Converter {
|
|
|
11
11
|
return map;
|
|
12
12
|
}, this._map);
|
|
13
13
|
}
|
|
14
|
-
convert(input) {
|
|
14
|
+
convert = (input) => {
|
|
15
15
|
return input.replace(this._match, (m) => {
|
|
16
16
|
return this._map[m] || '';
|
|
17
17
|
});
|
|
18
|
-
}
|
|
18
|
+
};
|
|
19
19
|
}
|
|
20
20
|
//# sourceMappingURL=converter.js.map
|
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
export type { AffInfo, AffWord } from './affDef.js';
|
|
2
2
|
export { parseAff, parseAffFile as readAffFile } from './affReader.js';
|
|
3
|
-
export { createMatchingWordsFilter, type HunspellSrcData, IterableHunspellReader, type WordInfo, } from './
|
|
4
|
-
export {
|
|
3
|
+
export { createMatchingWordsFilter, type HunspellSrcData, IterableHunspellReaderLegacy as IterableHunspellReader, IterableHunspellReaderLegacy, type WordInfo, } from './IterableHunspellReaderLegacy.js';
|
|
4
|
+
export { IterableHunspellReaderLegacy as HunspellReader } from './IterableHunspellReaderLegacy.js';
|
|
5
5
|
export { uniqueFilter } from './util.js';
|
|
6
6
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
export { parseAff, parseAffFile as readAffFile } from './affReader.js';
|
|
2
|
-
export { createMatchingWordsFilter, IterableHunspellReader, } from './
|
|
3
|
-
export {
|
|
2
|
+
export { createMatchingWordsFilter, IterableHunspellReaderLegacy as IterableHunspellReader, IterableHunspellReaderLegacy, } from './IterableHunspellReaderLegacy.js';
|
|
3
|
+
export { IterableHunspellReaderLegacy as HunspellReader } from './IterableHunspellReaderLegacy.js';
|
|
4
4
|
export { uniqueFilter } from './util.js';
|
|
5
5
|
//# sourceMappingURL=index.js.map
|
package/dist/util.d.ts
CHANGED
|
@@ -14,4 +14,6 @@ export declare function isDefined<T>(v: T | undefined): v is T;
|
|
|
14
14
|
* @returns the same object.
|
|
15
15
|
*/
|
|
16
16
|
export declare function cleanObject<T>(obj: T): T;
|
|
17
|
+
export declare function groupByField<T, K extends keyof T>(i: Iterable<T>, field: K): Map<T[K], T[]>;
|
|
18
|
+
export declare function insertItemIntoGroupByField<T, K extends keyof T>(map: Map<T[K], T[]>, field: K, item: T): void;
|
|
17
19
|
//# sourceMappingURL=util.d.ts.map
|
package/dist/util.js
CHANGED
|
@@ -65,4 +65,26 @@ export function cleanObject(obj) {
|
|
|
65
65
|
}
|
|
66
66
|
return obj;
|
|
67
67
|
}
|
|
68
|
+
export function groupByField(i, field) {
|
|
69
|
+
const r = new Map();
|
|
70
|
+
for (const t of i) {
|
|
71
|
+
const k = t[field];
|
|
72
|
+
let a = r.get(k);
|
|
73
|
+
if (!a) {
|
|
74
|
+
a = [];
|
|
75
|
+
r.set(k, a);
|
|
76
|
+
}
|
|
77
|
+
a.push(t);
|
|
78
|
+
}
|
|
79
|
+
return r;
|
|
80
|
+
}
|
|
81
|
+
export function insertItemIntoGroupByField(map, field, item) {
|
|
82
|
+
const k = item[field];
|
|
83
|
+
let a = map.get(k);
|
|
84
|
+
if (!a) {
|
|
85
|
+
a = [];
|
|
86
|
+
map.set(k, a);
|
|
87
|
+
}
|
|
88
|
+
a.push(item);
|
|
89
|
+
}
|
|
68
90
|
//# sourceMappingURL=util.js.map
|