hunspell-reader 8.3.2 → 8.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{IterableHunspellReader.d.ts → IterableHunspellReaderLegacy.d.ts} +4 -4
- package/dist/{IterableHunspellReader.js → IterableHunspellReaderLegacy.js} +5 -5
- package/dist/aff.d.ts +205 -26
- package/dist/aff.js +377 -221
- package/dist/affConstants.d.ts +6 -0
- package/dist/affConstants.js +50 -0
- package/dist/affDef.d.ts +18 -4
- package/dist/affLegacy.d.ts +51 -0
- package/dist/affLegacy.js +269 -0
- package/dist/affReader.d.ts +2 -0
- package/dist/affReader.js +49 -6
- package/dist/commandWords.js +3 -3
- package/dist/converter.d.ts +1 -1
- package/dist/converter.js +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +2 -2
- package/dist/util.d.ts +2 -0
- package/dist/util.js +22 -0
- package/package.json +6 -5
package/dist/aff.js
CHANGED
|
@@ -1,19 +1,21 @@
|
|
|
1
|
-
import
|
|
2
|
-
import {
|
|
3
|
-
import * as util from 'util';
|
|
1
|
+
import assert from 'assert';
|
|
2
|
+
import { affFlag } from './affConstants.js';
|
|
4
3
|
import { Converter } from './converter.js';
|
|
5
|
-
import { filterOrderedList, isDefined } from './util.js';
|
|
6
|
-
const
|
|
4
|
+
import { filterOrderedList, groupByField, isDefined } from './util.js';
|
|
5
|
+
const debug = false;
|
|
6
|
+
function logError(msg, ...args) {
|
|
7
|
+
debug && console.error(msg, ...args);
|
|
8
|
+
}
|
|
7
9
|
const DefaultMaxDepth = 5;
|
|
8
10
|
export class Aff {
|
|
9
11
|
affInfo;
|
|
10
|
-
|
|
12
|
+
affData;
|
|
11
13
|
_oConv;
|
|
12
14
|
_iConv;
|
|
13
15
|
_maxSuffixDepth = DefaultMaxDepth;
|
|
14
|
-
constructor(affInfo) {
|
|
16
|
+
constructor(affInfo, filename) {
|
|
15
17
|
this.affInfo = affInfo;
|
|
16
|
-
this.
|
|
18
|
+
this.affData = new AffData(affInfo, filename);
|
|
17
19
|
this._iConv = new Converter(affInfo.ICONV || []);
|
|
18
20
|
this._oConv = new Converter(affInfo.OCONV || []);
|
|
19
21
|
}
|
|
@@ -29,12 +31,13 @@ export class Aff {
|
|
|
29
31
|
* @param {string} line - the line from the .dic file.
|
|
30
32
|
*/
|
|
31
33
|
applyRulesToDicEntry(line, maxDepth) {
|
|
34
|
+
const afWord = this.affData.dictLineToAffixWord(line);
|
|
32
35
|
const maxSuffixDepth = maxDepth ?? this.maxSuffixDepth;
|
|
33
|
-
const
|
|
34
|
-
const
|
|
35
|
-
const results = this.applyRulesToWord(asAffWord(word, rules), maxSuffixDepth).map((affWord) => ({
|
|
36
|
+
const convert = this._oConv.convert;
|
|
37
|
+
const results = this.applyRulesToWord(afWord, maxSuffixDepth).map((affWord) => ({
|
|
36
38
|
...affWord,
|
|
37
|
-
word:
|
|
39
|
+
word: convert(affWord.word),
|
|
40
|
+
originalWord: affWord.word,
|
|
38
41
|
}));
|
|
39
42
|
results.sort(compareAff);
|
|
40
43
|
const filtered = results.filter(filterAff());
|
|
@@ -45,103 +48,72 @@ export class Aff {
|
|
|
45
48
|
*/
|
|
46
49
|
applyRulesToWord(affWord, remainingDepth) {
|
|
47
50
|
const compoundMin = this.affInfo.COMPOUNDMIN ?? 3;
|
|
48
|
-
const { word,
|
|
49
|
-
const
|
|
50
|
-
|
|
51
|
-
.filter((
|
|
52
|
-
.
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
const rules = this.joinRules(allRules.filter((rule) => !rule.flags).map((rule) => rule.id));
|
|
57
|
-
const affixRules = allRules.map((rule) => rule.sfx || rule.pfx).filter(isDefined);
|
|
58
|
-
const wordWithFlags = { word, flags, rulesApplied, rules: '', base, suffix, prefix, dic };
|
|
59
|
-
return [wordWithFlags, ...this.applyAffixesToWord(affixRules, { ...wordWithFlags, rules }, remainingDepth)]
|
|
60
|
-
.filter(({ flags }) => !flags.isNeedAffix)
|
|
61
|
-
.map((affWord) => adjustCompounding(affWord, compoundMin))
|
|
62
|
-
.map((affWord) => logAffWord(affWord, 'applyRulesToWord'));
|
|
63
|
-
}
|
|
64
|
-
applyAffixesToWord(affixRules, affWord, remainingDepth) {
|
|
65
|
-
if (remainingDepth <= 0) {
|
|
51
|
+
const { word, flags, dict, appliedRules } = affWord;
|
|
52
|
+
const wordWithFlags = { word, rules: undefined, flags, dict, appliedRules };
|
|
53
|
+
return [wordWithFlags, ...this.applyAffixesToWord(affWord, remainingDepth)]
|
|
54
|
+
.filter(({ flags }) => !(flags & AffixFlags.isNeedAffix))
|
|
55
|
+
.map((affWord) => adjustCompounding(affWord, compoundMin));
|
|
56
|
+
}
|
|
57
|
+
applyAffixesToWord(affWord, remainingDepth) {
|
|
58
|
+
if (remainingDepth <= 0 || !affWord.rules) {
|
|
66
59
|
return [];
|
|
67
60
|
}
|
|
68
|
-
const
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
.map(({ id }) => id);
|
|
72
|
-
const combinableSfx = this.joinRules(combinableRules);
|
|
73
|
-
const r = affixRules
|
|
61
|
+
const rules = affWord.rules;
|
|
62
|
+
const combinableSfx = rules.filter((r) => r.type === 'S' && r.fx.combinable);
|
|
63
|
+
const r = affWord.rules
|
|
74
64
|
.flatMap((affix) => this.applyAffixToWord(affix, affWord, combinableSfx))
|
|
75
65
|
.flatMap((affWord) => this.applyRulesToWord(affWord, remainingDepth - 1));
|
|
76
66
|
return r;
|
|
77
67
|
}
|
|
78
|
-
applyAffixToWord(
|
|
68
|
+
applyAffixToWord(rule, affWord, combinableSfx) {
|
|
79
69
|
const { word } = affWord;
|
|
80
|
-
const combineRules =
|
|
81
|
-
const flags = affWord.flags
|
|
82
|
-
const matchingSubstitutions =
|
|
83
|
-
const
|
|
84
|
-
|
|
85
|
-
.
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
const {
|
|
92
|
-
const
|
|
93
|
-
const
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
if (affix.type === 'SFX') {
|
|
98
|
-
s = Math.min(stripped.length, s);
|
|
99
|
-
p = Math.min(p, s);
|
|
70
|
+
const combineRules = rule.type === 'P' && rule.fx.combinable ? combinableSfx : [];
|
|
71
|
+
const flags = affWord.flags & ~AffixFlags.isNeedAffix;
|
|
72
|
+
const matchingSubstitutions = rule.fx.substitutionsForRegExps.filter((sub) => sub.match.test(word));
|
|
73
|
+
const source = {
|
|
74
|
+
dict: affWord.dict,
|
|
75
|
+
appliedRules: affWord.appliedRules ? [...affWord.appliedRules, rule.idx] : undefined,
|
|
76
|
+
};
|
|
77
|
+
const partialAffWord = this.affData.toAffixWord(source, word, flags, combineRules);
|
|
78
|
+
return matchingSubstitutions.flatMap((sub) => this.#applySubstitution(partialAffWord, sub));
|
|
79
|
+
}
|
|
80
|
+
#substituteAttach(affWord, sub, stripped) {
|
|
81
|
+
const { flags } = affWord;
|
|
82
|
+
const subRules = this.affData.getRulesForAffSubstitution(sub);
|
|
83
|
+
const rules = joinRules(affWord.rules, subRules);
|
|
84
|
+
let word;
|
|
85
|
+
if (sub.type === 'S') {
|
|
86
|
+
word = stripped + sub.attach;
|
|
100
87
|
}
|
|
101
88
|
else {
|
|
102
|
-
|
|
103
|
-
p = Math.max(p, word.length - stripped.length);
|
|
104
|
-
s = Math.max(s + d, p);
|
|
89
|
+
word = sub.attach + stripped;
|
|
105
90
|
}
|
|
106
|
-
|
|
107
|
-
const prefix = word.slice(0, p);
|
|
108
|
-
const suffix = word.slice(s);
|
|
109
|
-
return {
|
|
110
|
-
word,
|
|
111
|
-
rulesApplied: rulesApplied + ' ' + affix.id,
|
|
112
|
-
rules,
|
|
113
|
-
flags,
|
|
114
|
-
base,
|
|
115
|
-
suffix,
|
|
116
|
-
prefix,
|
|
117
|
-
dic,
|
|
118
|
-
};
|
|
119
|
-
}
|
|
120
|
-
getMatchingRules(rules) {
|
|
121
|
-
const { AF = [] } = this.affInfo;
|
|
122
|
-
const idx = parseInt(rules, 10);
|
|
123
|
-
const rulesToSplit = AF[idx] || rules;
|
|
124
|
-
return this.separateRules(rulesToSplit)
|
|
125
|
-
.map((key) => this.rules.get(key))
|
|
126
|
-
.filter(isDefined);
|
|
91
|
+
return this.affData.toAffixWord(affWord, word, flags, rules);
|
|
127
92
|
}
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
93
|
+
#applySubstitution(affWord, subs) {
|
|
94
|
+
const results = [];
|
|
95
|
+
for (const [replace, substitutions] of subs.substitutionsGroupedByRemove) {
|
|
96
|
+
if (!replace.test(affWord.word))
|
|
97
|
+
continue;
|
|
98
|
+
const stripped = affWord.word.replace(replace, '');
|
|
99
|
+
for (const sub of substitutions) {
|
|
100
|
+
results.push(this.#substituteAttach(affWord, sub, stripped));
|
|
101
|
+
}
|
|
134
102
|
}
|
|
135
|
-
return
|
|
103
|
+
return results;
|
|
136
104
|
}
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
105
|
+
getMatchingRules(flags) {
|
|
106
|
+
const rules = this.affData.getRules(flags);
|
|
107
|
+
return rules;
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Convert the applied rule indexes to AFF Letters.
|
|
111
|
+
* Requires that the affixWord was generated with trace mode turned on.
|
|
112
|
+
* @param affixWord - the generated AffixWord.
|
|
113
|
+
*/
|
|
114
|
+
getFlagsValuesForAffixWord(affixWord) {
|
|
115
|
+
const rules = this.affData.getRulesForIndexes(affixWord.appliedRules);
|
|
116
|
+
return rules?.map((r) => r.id);
|
|
145
117
|
}
|
|
146
118
|
get iConv() {
|
|
147
119
|
return this._iConv;
|
|
@@ -149,143 +121,327 @@ export class Aff {
|
|
|
149
121
|
get oConv() {
|
|
150
122
|
return this._oConv;
|
|
151
123
|
}
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
const { word, flags } = aff;
|
|
155
|
-
const sig = Object.entries(flags)
|
|
156
|
-
.filter((e) => !!e[1])
|
|
157
|
-
.map((f) => flagToStringMap[f[0]])
|
|
158
|
-
.sort()
|
|
159
|
-
.join('');
|
|
160
|
-
return word + '|' + sig;
|
|
161
|
-
}
|
|
162
|
-
export function processRules(affInfo) {
|
|
163
|
-
const sfxRules = gs(affInfo.SFX || [])
|
|
164
|
-
.map(([, sfx]) => sfx)
|
|
165
|
-
.map((sfx) => ({ id: sfx.id, type: 'sfx', sfx }));
|
|
166
|
-
const pfxRules = gs(affInfo.PFX || [])
|
|
167
|
-
.map(([, pfx]) => pfx)
|
|
168
|
-
.map((pfx) => ({ id: pfx.id, type: 'pfx', pfx }));
|
|
169
|
-
const flagRules = GS.sequenceFromObject(affInfo)
|
|
170
|
-
.filter(([key, value]) => !!affFlag[key] && !!value)
|
|
171
|
-
.map(([key, value]) => ({ id: value, type: 'flag', flags: affFlag[key] }));
|
|
172
|
-
const rules = sfxRules
|
|
173
|
-
.concat(pfxRules)
|
|
174
|
-
.concat(flagRules)
|
|
175
|
-
.reduce((acc, rule) => {
|
|
176
|
-
acc.set(rule.id, rule);
|
|
177
|
-
return acc;
|
|
178
|
-
}, new Map());
|
|
179
|
-
return rules;
|
|
180
|
-
}
|
|
181
|
-
const affFlag = {
|
|
182
|
-
KEEPCASE: { isKeepCase: true },
|
|
183
|
-
WARN: { isWarning: true },
|
|
184
|
-
FORCEUCASE: { isForceUCase: true },
|
|
185
|
-
FORBIDDENWORD: { isForbiddenWord: true },
|
|
186
|
-
NOSUGGEST: { isNoSuggest: true },
|
|
187
|
-
NEEDAFFIX: { isNeedAffix: true },
|
|
188
|
-
COMPOUNDBEGIN: { canBeCompoundBegin: true },
|
|
189
|
-
COMPOUNDMIDDLE: { canBeCompoundMiddle: true },
|
|
190
|
-
COMPOUNDEND: { canBeCompoundEnd: true },
|
|
191
|
-
COMPOUNDFLAG: { isCompoundPermitted: true },
|
|
192
|
-
COMPOUNDPERMITFLAG: { isCompoundPermitted: true },
|
|
193
|
-
COMPOUNDFORBIDFLAG: { isCompoundForbidden: true },
|
|
194
|
-
ONLYINCOMPOUND: { isOnlyAllowedInCompound: true },
|
|
195
|
-
};
|
|
196
|
-
const _FlagToStringMap = {
|
|
197
|
-
isCompoundPermitted: 'C',
|
|
198
|
-
canBeCompoundBegin: 'B',
|
|
199
|
-
canBeCompoundMiddle: 'M',
|
|
200
|
-
canBeCompoundEnd: 'E',
|
|
201
|
-
isOnlyAllowedInCompound: 'O',
|
|
202
|
-
isWarning: 'W',
|
|
203
|
-
isKeepCase: 'K',
|
|
204
|
-
isForceUCase: 'U',
|
|
205
|
-
isForbiddenWord: 'F',
|
|
206
|
-
isNoSuggest: 'N',
|
|
207
|
-
isNeedAffix: 'A',
|
|
208
|
-
isCompoundForbidden: '-',
|
|
209
|
-
};
|
|
210
|
-
const _FlagToLongStringMap = {
|
|
211
|
-
isCompoundPermitted: 'CompoundPermitted',
|
|
212
|
-
canBeCompoundBegin: 'CompoundBegin',
|
|
213
|
-
canBeCompoundMiddle: 'CompoundMiddle',
|
|
214
|
-
canBeCompoundEnd: 'CompoundEnd',
|
|
215
|
-
isOnlyAllowedInCompound: 'OnlyInCompound',
|
|
216
|
-
isWarning: 'Warning',
|
|
217
|
-
isKeepCase: 'KeepCase',
|
|
218
|
-
isForceUCase: 'ForceUpperCase',
|
|
219
|
-
isForbiddenWord: 'Forbidden',
|
|
220
|
-
isNoSuggest: 'NoSuggest',
|
|
221
|
-
isNeedAffix: 'NeedAffix',
|
|
222
|
-
isCompoundForbidden: 'CompoundForbidden',
|
|
223
|
-
};
|
|
224
|
-
const flagToStringMap = _FlagToStringMap;
|
|
225
|
-
const flagToLongStringMap = _FlagToLongStringMap;
|
|
226
|
-
export function logAffWord(affWord, message) {
|
|
227
|
-
/* istanbul ignore if */
|
|
228
|
-
if (log) {
|
|
229
|
-
const dump = util.inspect(affWord, { showHidden: false, depth: 5, colors: true });
|
|
230
|
-
console.log(`${message}: ${dump}`);
|
|
124
|
+
setTraceMode(value) {
|
|
125
|
+
this.affData.trace = value;
|
|
231
126
|
}
|
|
232
|
-
return affWord;
|
|
233
|
-
}
|
|
234
|
-
/* istanbul ignore next */
|
|
235
|
-
export function affWordToColoredString(affWord) {
|
|
236
|
-
return util
|
|
237
|
-
.inspect({ ...affWord, flags: flagsToString(affWord.flags) }, { showHidden: false, depth: 5, colors: true })
|
|
238
|
-
.replace(/(\s|\n|\r)+/g, ' ');
|
|
239
|
-
}
|
|
240
|
-
/* istanbul ignore next */
|
|
241
|
-
export function flagsToString(flags) {
|
|
242
|
-
return [...Object.entries(flags)]
|
|
243
|
-
.filter(([, v]) => !!v)
|
|
244
|
-
.map(([k]) => flagToLongStringMap[k])
|
|
245
|
-
.sort()
|
|
246
|
-
.join(':');
|
|
247
|
-
}
|
|
248
|
-
export function asAffWord(word, rules = '', flags = {}) {
|
|
249
|
-
return {
|
|
250
|
-
word,
|
|
251
|
-
base: word,
|
|
252
|
-
prefix: '',
|
|
253
|
-
suffix: '',
|
|
254
|
-
rulesApplied: '',
|
|
255
|
-
rules,
|
|
256
|
-
flags,
|
|
257
|
-
dic: rules ? word + '/' + rules : word,
|
|
258
|
-
};
|
|
259
127
|
}
|
|
260
128
|
export function compareAff(a, b) {
|
|
261
|
-
|
|
262
|
-
return a.word < b.word ? -1 : 1;
|
|
263
|
-
}
|
|
264
|
-
const sigA = signature(a);
|
|
265
|
-
const sigB = signature(b);
|
|
266
|
-
return sigA < sigB ? -1 : sigA > sigB ? 1 : 0;
|
|
129
|
+
return a.word < b.word ? -1 : a.word > b.word ? 1 : a.flags - b.flags;
|
|
267
130
|
}
|
|
268
131
|
/**
|
|
269
132
|
* Returns a filter function that will filter adjacent AffWords
|
|
270
133
|
* It compares the word and the flags.
|
|
271
134
|
*/
|
|
272
|
-
|
|
273
|
-
return filterOrderedList((a, b) => a.word !== b.word ||
|
|
274
|
-
}
|
|
275
|
-
export const debug = {
|
|
276
|
-
signature,
|
|
277
|
-
};
|
|
278
|
-
function removeNeedAffix(flags) {
|
|
279
|
-
const newFlags = { ...flags };
|
|
280
|
-
delete newFlags.isNeedAffix;
|
|
281
|
-
return newFlags;
|
|
135
|
+
function filterAff() {
|
|
136
|
+
return filterOrderedList((a, b) => a.word !== b.word || a.flags !== b.flags);
|
|
282
137
|
}
|
|
283
138
|
function adjustCompounding(affWord, minLength) {
|
|
284
|
-
if (!affWord.flags.isCompoundPermitted || affWord.word.length >= minLength) {
|
|
139
|
+
if (!(affWord.flags & AffixFlags.isCompoundPermitted) || affWord.word.length >= minLength) {
|
|
285
140
|
return affWord;
|
|
286
141
|
}
|
|
287
|
-
|
|
288
|
-
affWord.flags = flags;
|
|
142
|
+
affWord.flags &= ~AffixFlags.isCompoundPermitted;
|
|
289
143
|
return affWord;
|
|
290
144
|
}
|
|
145
|
+
export var AffixFlags;
|
|
146
|
+
(function (AffixFlags) {
|
|
147
|
+
AffixFlags[AffixFlags["none"] = 0] = "none";
|
|
148
|
+
/**
|
|
149
|
+
* COMPOUNDFLAG flag
|
|
150
|
+
*
|
|
151
|
+
* Words signed with COMPOUNDFLAG may be in compound words (except when word shorter than COMPOUNDMIN).
|
|
152
|
+
* Affixes with COMPOUNDFLAG also permits compounding of affixed words.
|
|
153
|
+
*
|
|
154
|
+
*/
|
|
155
|
+
AffixFlags[AffixFlags["isCompoundPermitted"] = 1] = "isCompoundPermitted";
|
|
156
|
+
/**
|
|
157
|
+
* COMPOUNDBEGIN flag
|
|
158
|
+
*
|
|
159
|
+
* Words signed with COMPOUNDBEGIN (or with a signed affix) may be first elements in compound words.
|
|
160
|
+
*
|
|
161
|
+
*/
|
|
162
|
+
AffixFlags[AffixFlags["canBeCompoundBegin"] = 2] = "canBeCompoundBegin";
|
|
163
|
+
/**
|
|
164
|
+
* COMPOUNDMIDDLE flag
|
|
165
|
+
*
|
|
166
|
+
* Words signed with COMPOUNDMIDDLE (or with a signed affix) may be middle elements in compound words.
|
|
167
|
+
*
|
|
168
|
+
*/
|
|
169
|
+
AffixFlags[AffixFlags["canBeCompoundMiddle"] = 4] = "canBeCompoundMiddle";
|
|
170
|
+
/**
|
|
171
|
+
* COMPOUNDLAST flag
|
|
172
|
+
*
|
|
173
|
+
* Words signed with COMPOUNDLAST (or with a signed affix) may be last elements in compound words.
|
|
174
|
+
*
|
|
175
|
+
*/
|
|
176
|
+
AffixFlags[AffixFlags["canBeCompoundEnd"] = 8] = "canBeCompoundEnd";
|
|
177
|
+
/**
|
|
178
|
+
* COMPOUNDPERMITFLAG flag
|
|
179
|
+
*
|
|
180
|
+
* Prefixes are allowed at the beginning of compounds, suffixes are allowed at the end of compounds by default.
|
|
181
|
+
* Affixes with COMPOUNDPERMITFLAG may be inside of compounds.
|
|
182
|
+
*
|
|
183
|
+
*/
|
|
184
|
+
AffixFlags[AffixFlags["isOnlyAllowedInCompound"] = 16] = "isOnlyAllowedInCompound";
|
|
185
|
+
/**
|
|
186
|
+
* COMPOUNDFORBIDFLAG flag
|
|
187
|
+
*
|
|
188
|
+
* Suffixes with this flag forbid compounding of the affixed word.
|
|
189
|
+
*
|
|
190
|
+
*/
|
|
191
|
+
AffixFlags[AffixFlags["isCompoundForbidden"] = 32] = "isCompoundForbidden";
|
|
192
|
+
/**
|
|
193
|
+
* WARN flag
|
|
194
|
+
*
|
|
195
|
+
* This flag is for rare words, which are also often spelling mistakes, see option -r of command line Hunspell and FORBIDWARN.
|
|
196
|
+
*/
|
|
197
|
+
AffixFlags[AffixFlags["isWarning"] = 64] = "isWarning";
|
|
198
|
+
/**
|
|
199
|
+
* KEEPCASE flag
|
|
200
|
+
*
|
|
201
|
+
* Forbid uppercased and capitalized forms of words signed with KEEPCASE flags. Useful for special orthographies (measurements and
|
|
202
|
+
* currency often keep their case in uppercased texts) and writing systems (e.g. keeping lower case of IPA characters). Also valuable
|
|
203
|
+
* for words erroneously written in the wrong case.
|
|
204
|
+
*/
|
|
205
|
+
AffixFlags[AffixFlags["isKeepCase"] = 128] = "isKeepCase";
|
|
206
|
+
/**
|
|
207
|
+
* FORCEUCASE flag
|
|
208
|
+
*
|
|
209
|
+
* Last word part of a compound with flag FORCEUCASE forces capitalization of the whole compound word.
|
|
210
|
+
* Eg. Dutch word "straat" (street) with FORCEUCASE flags will allowed only in capitalized compound forms,
|
|
211
|
+
* according to the Dutch spelling rules for proper names.
|
|
212
|
+
*/
|
|
213
|
+
AffixFlags[AffixFlags["isForceUCase"] = 256] = "isForceUCase";
|
|
214
|
+
/**
|
|
215
|
+
* FORBIDDENWORD flag
|
|
216
|
+
*
|
|
217
|
+
* This flag signs forbidden word form. Because affixed forms are also forbidden, we can subtract a subset from set of the
|
|
218
|
+
* accepted affixed and compound words. Note: useful to forbid erroneous words, generated by the compounding mechanism.
|
|
219
|
+
*/
|
|
220
|
+
AffixFlags[AffixFlags["isForbiddenWord"] = 512] = "isForbiddenWord";
|
|
221
|
+
/**
|
|
222
|
+
* NOSUGGEST flag
|
|
223
|
+
*
|
|
224
|
+
* Words signed with NOSUGGEST flag are not suggested (but still accepted when typed correctly). Proposed flag for vulgar
|
|
225
|
+
* and obscene words (see also SUBSTANDARD).
|
|
226
|
+
*/
|
|
227
|
+
AffixFlags[AffixFlags["isNoSuggest"] = 1024] = "isNoSuggest";
|
|
228
|
+
// cspell:ignore pseudoroot
|
|
229
|
+
/**
|
|
230
|
+
* NEEDAFFIX flag
|
|
231
|
+
*
|
|
232
|
+
* This flag signs virtual stems in the dictionary, words only valid when affixed. Except, if the dictionary word has a homonym
|
|
233
|
+
* or a zero affix. NEEDAFFIX works also with prefixes and prefix + suffix combinations (see tests/pseudoroot5.*).
|
|
234
|
+
*/
|
|
235
|
+
AffixFlags[AffixFlags["isNeedAffix"] = 2048] = "isNeedAffix";
|
|
236
|
+
})(AffixFlags || (AffixFlags = {}));
|
|
237
|
+
function toAffixFlags(flags) {
|
|
238
|
+
let result = 0;
|
|
239
|
+
for (const [key, value] of Object.entries(flags)) {
|
|
240
|
+
if (value) {
|
|
241
|
+
const flag = AffixFlags[key];
|
|
242
|
+
result |= flag;
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
return result;
|
|
246
|
+
}
|
|
247
|
+
class AffData {
|
|
248
|
+
affInfo;
|
|
249
|
+
filename;
|
|
250
|
+
rules = [];
|
|
251
|
+
mapToRuleIdx = new Map();
|
|
252
|
+
mapWordRulesToRuleIndexes = new Map();
|
|
253
|
+
mapWordRulesToRules = new Map();
|
|
254
|
+
affFlagType;
|
|
255
|
+
missingFlags = new Set();
|
|
256
|
+
_mapRuleIdxToRules = new WeakMap();
|
|
257
|
+
trace = false;
|
|
258
|
+
constructor(affInfo, filename) {
|
|
259
|
+
this.affInfo = affInfo;
|
|
260
|
+
this.filename = filename;
|
|
261
|
+
this.affFlagType = toAffFlagType(affInfo.FLAG);
|
|
262
|
+
this.#processAffInfo(affInfo);
|
|
263
|
+
}
|
|
264
|
+
dictLineToEntry(line) {
|
|
265
|
+
const [lineLeft] = line.split(/\s+/, 1);
|
|
266
|
+
const [word, rules = ''] = lineLeft.split('/', 2);
|
|
267
|
+
return { word, flags: rules, line };
|
|
268
|
+
}
|
|
269
|
+
dictLineToAffixWord(line) {
|
|
270
|
+
const entry = this.dictLineToEntry(line);
|
|
271
|
+
return this.toAffixWord({ dict: entry, appliedRules: this.trace ? [] : undefined }, entry.word, AffixFlags.none, this.getRules(entry.flags));
|
|
272
|
+
}
|
|
273
|
+
toAffixWord(source, word, flags, rules) {
|
|
274
|
+
const dict = source.dict;
|
|
275
|
+
let appliedRules = source.appliedRules;
|
|
276
|
+
if (!rules)
|
|
277
|
+
return { word, rules: undefined, flags, dict, appliedRules };
|
|
278
|
+
const fxRules = rules.filter((rule) => rule.type !== 'F');
|
|
279
|
+
if (appliedRules) {
|
|
280
|
+
appliedRules = [...appliedRules, ...rules.filter((r) => r.type === 'F').map((r) => r.idx)];
|
|
281
|
+
}
|
|
282
|
+
return {
|
|
283
|
+
word,
|
|
284
|
+
rules: fxRules.length ? fxRules : undefined,
|
|
285
|
+
flags: flags | this.rulesToFlags(rules),
|
|
286
|
+
appliedRules,
|
|
287
|
+
dict,
|
|
288
|
+
};
|
|
289
|
+
}
|
|
290
|
+
getRules(rules) {
|
|
291
|
+
const foundRules = this.mapWordRulesToRules.get(rules);
|
|
292
|
+
if (foundRules)
|
|
293
|
+
return foundRules;
|
|
294
|
+
const ruleIndexes = this.getRuleIndexes(rules);
|
|
295
|
+
const affRules = ruleIndexes.map((idx) => this.rules[idx]);
|
|
296
|
+
this.mapWordRulesToRules.set(rules, affRules);
|
|
297
|
+
return affRules;
|
|
298
|
+
}
|
|
299
|
+
getRuleIndexes(rules) {
|
|
300
|
+
const found = this.mapWordRulesToRuleIndexes.get(rules);
|
|
301
|
+
if (found)
|
|
302
|
+
return found;
|
|
303
|
+
const indexes = this.#getRuleIndexes(rules);
|
|
304
|
+
this.mapWordRulesToRuleIndexes.set(rules, indexes);
|
|
305
|
+
return indexes;
|
|
306
|
+
}
|
|
307
|
+
rulesToFlags(rules) {
|
|
308
|
+
return rules.reduce((acc, rule) => acc | rule.flags, AffixFlags.none);
|
|
309
|
+
}
|
|
310
|
+
getRulesForIndexes(indexes) {
|
|
311
|
+
if (!indexes)
|
|
312
|
+
return undefined;
|
|
313
|
+
let rules = this._mapRuleIdxToRules.get(indexes);
|
|
314
|
+
if (rules)
|
|
315
|
+
return rules;
|
|
316
|
+
rules = indexes.map((idx) => this.rules[idx]);
|
|
317
|
+
this._mapRuleIdxToRules.set(indexes, rules);
|
|
318
|
+
return rules;
|
|
319
|
+
}
|
|
320
|
+
getRulesForAffSubstitution(sub) {
|
|
321
|
+
return this.getRulesForIndexes(sub.attachRules);
|
|
322
|
+
}
|
|
323
|
+
#getRuleIndexes(rules) {
|
|
324
|
+
const flags = this.#splitRules(rules);
|
|
325
|
+
const indexes = flags
|
|
326
|
+
.flatMap((flag) => {
|
|
327
|
+
const found = this.mapToRuleIdx.get(flag);
|
|
328
|
+
if (found === undefined && !this.missingFlags.has(flag)) {
|
|
329
|
+
this.missingFlags.add(flag);
|
|
330
|
+
const filename = this.filename;
|
|
331
|
+
logError('Unable to resolve flag: %o, for file: %o', flag, filename);
|
|
332
|
+
// throw new Error('Unable to resolve flag');
|
|
333
|
+
}
|
|
334
|
+
return found;
|
|
335
|
+
})
|
|
336
|
+
.filter(isDefined);
|
|
337
|
+
return indexes;
|
|
338
|
+
}
|
|
339
|
+
#splitRules(rules) {
|
|
340
|
+
switch (this.affFlagType) {
|
|
341
|
+
case 'long':
|
|
342
|
+
return [...new Set(rules.replace(/(..)/g, '$1//').split('//').slice(0, -1))];
|
|
343
|
+
case 'num':
|
|
344
|
+
return [...new Set(rules.split(','))];
|
|
345
|
+
}
|
|
346
|
+
return [...new Set(rules.split(''))];
|
|
347
|
+
}
|
|
348
|
+
#processAffInfo(affInfo) {
|
|
349
|
+
const { AF = [], SFX = [], PFX = [] } = affInfo;
|
|
350
|
+
const flags = objectToKvP(affInfo)
|
|
351
|
+
.filter(isValidFlagMember)
|
|
352
|
+
.map(([key, value]) => ({ id: value, flags: toAffixFlags(affFlag[key]) }));
|
|
353
|
+
const sfxRules = [...SFX].map(([, sfx]) => sfx).map((sfx) => ({ id: sfx.id, sfx }));
|
|
354
|
+
const pfxRules = [...PFX].map(([, pfx]) => pfx).map((pfx) => ({ id: pfx.id, pfx }));
|
|
355
|
+
const rules = [...flags, ...sfxRules, ...pfxRules];
|
|
356
|
+
rules.forEach((rule, idx) => {
|
|
357
|
+
const found = this.mapToRuleIdx.get(rule.id);
|
|
358
|
+
if (found) {
|
|
359
|
+
const filename = this.filename;
|
|
360
|
+
logError('Duplicate affix rule: %o, filename: %o', rule.id, filename);
|
|
361
|
+
const toAdd = Array.isArray(found) ? found : [found];
|
|
362
|
+
toAdd.push(idx);
|
|
363
|
+
this.mapToRuleIdx.set(rule.id, toAdd);
|
|
364
|
+
return;
|
|
365
|
+
}
|
|
366
|
+
this.mapToRuleIdx.set(rule.id, idx);
|
|
367
|
+
});
|
|
368
|
+
AF.forEach((af, idx) => {
|
|
369
|
+
if (!af)
|
|
370
|
+
return;
|
|
371
|
+
const indexes = this.#getRuleIndexes(af);
|
|
372
|
+
this.mapWordRulesToRuleIndexes.set(idx.toString(), indexes);
|
|
373
|
+
});
|
|
374
|
+
this.rules = rules.map((rule, idx) => this.#mapPartialRule(rule, idx));
|
|
375
|
+
}
|
|
376
|
+
#mapPartialRule(rule, index) {
|
|
377
|
+
const { id, flags, sfx, pfx } = rule;
|
|
378
|
+
const idx = this.mapToRuleIdx.get(id);
|
|
379
|
+
// if (index !== idx) {
|
|
380
|
+
// const filename = this.affInfo.filename;
|
|
381
|
+
// logError('Unexpected index: %o !== %o, rule %o, filename: %o', index, idx, rule, filename);
|
|
382
|
+
// }
|
|
383
|
+
assert(idx !== undefined && (idx === index || (Array.isArray(idx) && idx.includes(index))));
|
|
384
|
+
const fx = sfx || pfx;
|
|
385
|
+
if (fx) {
|
|
386
|
+
const affFx = this.#mapFx(fx);
|
|
387
|
+
if (affFx.type === 'P') {
|
|
388
|
+
return { id, idx: index, type: 'P', flags: 0, fx: affFx };
|
|
389
|
+
}
|
|
390
|
+
else {
|
|
391
|
+
return { id, idx: index, type: 'S', flags: 0, fx: affFx };
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
return { id, idx: index, type: 'F', flags: flags || 0 };
|
|
395
|
+
}
|
|
396
|
+
#mapFx(fx) {
|
|
397
|
+
const { id, combinable } = fx;
|
|
398
|
+
const substitutionsForRegExps = this.#mapSubstitutionsForRegExps(fx.substitutionsForRegExps);
|
|
399
|
+
return { type: fx.type === 'PFX' ? 'P' : 'S', id, combinable, substitutionsForRegExps };
|
|
400
|
+
}
|
|
401
|
+
#mapSubstitutionsForRegExps(substitutions) {
|
|
402
|
+
return substitutions.map((sub) => this.#mapSubstitutionsForRegExp(sub));
|
|
403
|
+
}
|
|
404
|
+
#mapSubstitutionsForRegExp(subForRegExp) {
|
|
405
|
+
const { match, substitutions: subs } = subForRegExp;
|
|
406
|
+
const substitutions = subs.map((sub) => this.#mapSubstitution(sub));
|
|
407
|
+
const substitutionsGroupedByRemove = groupByField(substitutions, 'replace');
|
|
408
|
+
return { match, substitutionsGroupedByRemove };
|
|
409
|
+
}
|
|
410
|
+
#mapSubstitution(sub) {
|
|
411
|
+
const { type, remove, attach, attachRules, replace } = sub;
|
|
412
|
+
const rules = attachRules ? this.getRuleIndexes(attachRules) : undefined;
|
|
413
|
+
return { type, remove, attach, attachRules: rules, replace };
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
function joinRules(a, b) {
|
|
417
|
+
if (!a)
|
|
418
|
+
return b;
|
|
419
|
+
if (!b)
|
|
420
|
+
return a;
|
|
421
|
+
return [...a, ...b];
|
|
422
|
+
}
|
|
423
|
+
function objectToKvP(t) {
|
|
424
|
+
return Object.entries(t);
|
|
425
|
+
}
|
|
426
|
+
// type Defined<T> = Exclude<T, undefined>;
|
|
427
|
+
function isValidFlagMember(t) {
|
|
428
|
+
const [key, value] = t;
|
|
429
|
+
return key in affFlag && !!value;
|
|
430
|
+
}
|
|
431
|
+
/**
|
|
432
|
+
*
|
|
433
|
+
* @param FLAG - the FLAG value from the aff file
|
|
434
|
+
* @returns the AffFlagType or throws
|
|
435
|
+
*/
|
|
436
|
+
export function toAffFlagType(FLAG) {
|
|
437
|
+
if (!FLAG)
|
|
438
|
+
return 'char';
|
|
439
|
+
switch (FLAG) {
|
|
440
|
+
case 'long':
|
|
441
|
+
case 'num':
|
|
442
|
+
return FLAG;
|
|
443
|
+
default:
|
|
444
|
+
throw new Error(`Unexpected FLAG value: ${FLAG}`);
|
|
445
|
+
}
|
|
446
|
+
}
|
|
291
447
|
//# sourceMappingURL=aff.js.map
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { AffTransformFlags, AffWordFlags } from './affDef.js';
|
|
2
|
+
import type { Mapping } from './types.js';
|
|
3
|
+
export declare const affFlag: Mapping<AffTransformFlags, AffWordFlags>;
|
|
4
|
+
export declare const flagToStringMap: Record<string, string | undefined>;
|
|
5
|
+
export declare const flagToLongStringMap: Record<string, string | undefined>;
|
|
6
|
+
//# sourceMappingURL=affConstants.d.ts.map
|