hunspell-reader 8.3.2 → 8.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/aff.js CHANGED
@@ -1,19 +1,21 @@
1
- import * as GS from 'gensequence';
2
- import { genSequence as gs } from 'gensequence';
3
- import * as util from 'util';
1
+ import assert from 'assert';
2
+ import { affFlag } from './affConstants.js';
4
3
  import { Converter } from './converter.js';
5
- import { filterOrderedList, isDefined } from './util.js';
6
- const log = false;
4
+ import { filterOrderedList, groupByField, isDefined } from './util.js';
5
+ const debug = false;
6
+ function logError(msg, ...args) {
7
+ debug && console.error(msg, ...args);
8
+ }
7
9
  const DefaultMaxDepth = 5;
8
10
  export class Aff {
9
11
  affInfo;
10
- rules;
12
+ affData;
11
13
  _oConv;
12
14
  _iConv;
13
15
  _maxSuffixDepth = DefaultMaxDepth;
14
- constructor(affInfo) {
16
+ constructor(affInfo, filename) {
15
17
  this.affInfo = affInfo;
16
- this.rules = processRules(affInfo);
18
+ this.affData = new AffData(affInfo, filename);
17
19
  this._iConv = new Converter(affInfo.ICONV || []);
18
20
  this._oConv = new Converter(affInfo.OCONV || []);
19
21
  }
@@ -29,12 +31,13 @@ export class Aff {
29
31
  * @param {string} line - the line from the .dic file.
30
32
  */
31
33
  applyRulesToDicEntry(line, maxDepth) {
34
+ const afWord = this.affData.dictLineToAffixWord(line);
32
35
  const maxSuffixDepth = maxDepth ?? this.maxSuffixDepth;
33
- const [lineLeft] = line.split(/\s+/, 1);
34
- const [word, rules = ''] = lineLeft.split('/', 2);
35
- const results = this.applyRulesToWord(asAffWord(word, rules), maxSuffixDepth).map((affWord) => ({
36
+ const convert = this._oConv.convert;
37
+ const results = this.applyRulesToWord(afWord, maxSuffixDepth).map((affWord) => ({
36
38
  ...affWord,
37
- word: this._oConv.convert(affWord.word),
39
+ word: convert(affWord.word),
40
+ originalWord: affWord.word,
38
41
  }));
39
42
  results.sort(compareAff);
40
43
  const filtered = results.filter(filterAff());
@@ -45,103 +48,72 @@ export class Aff {
45
48
  */
46
49
  applyRulesToWord(affWord, remainingDepth) {
47
50
  const compoundMin = this.affInfo.COMPOUNDMIN ?? 3;
48
- const { word, base, suffix, prefix, dic } = affWord;
49
- const allRules = this.getMatchingRules(affWord.rules);
50
- const { rulesApplied, flags } = allRules
51
- .filter((rule) => !!rule.flags)
52
- .reduce((acc, rule) => ({
53
- rulesApplied: [acc.rulesApplied, rule.id].join(' '),
54
- flags: { ...acc.flags, ...rule.flags },
55
- }), { rulesApplied: affWord.rulesApplied, flags: affWord.flags });
56
- const rules = this.joinRules(allRules.filter((rule) => !rule.flags).map((rule) => rule.id));
57
- const affixRules = allRules.map((rule) => rule.sfx || rule.pfx).filter(isDefined);
58
- const wordWithFlags = { word, flags, rulesApplied, rules: '', base, suffix, prefix, dic };
59
- return [wordWithFlags, ...this.applyAffixesToWord(affixRules, { ...wordWithFlags, rules }, remainingDepth)]
60
- .filter(({ flags }) => !flags.isNeedAffix)
61
- .map((affWord) => adjustCompounding(affWord, compoundMin))
62
- .map((affWord) => logAffWord(affWord, 'applyRulesToWord'));
63
- }
64
- applyAffixesToWord(affixRules, affWord, remainingDepth) {
65
- if (remainingDepth <= 0) {
51
+ const { word, flags, dict, appliedRules } = affWord;
52
+ const wordWithFlags = { word, rules: undefined, flags, dict, appliedRules };
53
+ return [wordWithFlags, ...this.applyAffixesToWord(affWord, remainingDepth)]
54
+ .filter(({ flags }) => !(flags & AffixFlags.isNeedAffix))
55
+ .map((affWord) => adjustCompounding(affWord, compoundMin));
56
+ }
57
+ applyAffixesToWord(affWord, remainingDepth) {
58
+ if (remainingDepth <= 0 || !affWord.rules) {
66
59
  return [];
67
60
  }
68
- const combinableRules = affixRules
69
- .filter((rule) => rule.type === 'SFX')
70
- .filter((rule) => rule.combinable === true)
71
- .map(({ id }) => id);
72
- const combinableSfx = this.joinRules(combinableRules);
73
- const r = affixRules
61
+ const rules = affWord.rules;
62
+ const combinableSfx = rules.filter((r) => r.type === 'S' && r.fx.combinable);
63
+ const r = affWord.rules
74
64
  .flatMap((affix) => this.applyAffixToWord(affix, affWord, combinableSfx))
75
65
  .flatMap((affWord) => this.applyRulesToWord(affWord, remainingDepth - 1));
76
66
  return r;
77
67
  }
78
- applyAffixToWord(affix, affWord, combinableSfx) {
68
+ applyAffixToWord(rule, affWord, combinableSfx) {
79
69
  const { word } = affWord;
80
- const combineRules = affix.type === 'PFX' && affix.combinable && !!combinableSfx ? combinableSfx : '';
81
- const flags = affWord.flags.isNeedAffix ? removeNeedAffix(affWord.flags) : affWord.flags;
82
- const matchingSubstitutions = [...affix.substitutionSets.values()].filter((sub) => sub.match.test(word));
83
- const partialAffWord = { ...affWord, flags, rules: combineRules };
84
- return matchingSubstitutions
85
- .flatMap((sub) => sub.substitutions)
86
- .filter((sub) => sub.remove === '0' || sub.replace.test(word))
87
- .map((sub) => this.substitute(affix, partialAffWord, sub))
88
- .map((affWord) => logAffWord(affWord, 'applyAffixToWord'));
89
- }
90
- substitute(affix, affWord, sub) {
91
- const { word: origWord, rulesApplied, flags, dic } = affWord;
92
- const rules = affWord.rules + (sub.attachRules || '');
93
- const word = origWord.replace(sub.replace, sub.attach);
94
- const stripped = origWord.replace(sub.replace, '');
95
- let p = affWord.prefix.length;
96
- let s = origWord.length - affWord.suffix.length;
97
- if (affix.type === 'SFX') {
98
- s = Math.min(stripped.length, s);
99
- p = Math.min(p, s);
70
+ const combineRules = rule.type === 'P' && rule.fx.combinable ? combinableSfx : [];
71
+ const flags = affWord.flags & ~AffixFlags.isNeedAffix;
72
+ const matchingSubstitutions = rule.fx.substitutionsForRegExps.filter((sub) => sub.match.test(word));
73
+ const source = {
74
+ dict: affWord.dict,
75
+ appliedRules: affWord.appliedRules ? [...affWord.appliedRules, rule.idx] : undefined,
76
+ };
77
+ const partialAffWord = this.affData.toAffixWord(source, word, flags, combineRules);
78
+ return matchingSubstitutions.flatMap((sub) => this.#applySubstitution(partialAffWord, sub));
79
+ }
80
+ #substituteAttach(affWord, sub, stripped) {
81
+ const { flags } = affWord;
82
+ const subRules = this.affData.getRulesForAffSubstitution(sub);
83
+ const rules = joinRules(affWord.rules, subRules);
84
+ let word;
85
+ if (sub.type === 'S') {
86
+ word = stripped + sub.attach;
100
87
  }
101
88
  else {
102
- const d = word.length - origWord.length;
103
- p = Math.max(p, word.length - stripped.length);
104
- s = Math.max(s + d, p);
89
+ word = sub.attach + stripped;
105
90
  }
106
- const base = word.slice(p, s);
107
- const prefix = word.slice(0, p);
108
- const suffix = word.slice(s);
109
- return {
110
- word,
111
- rulesApplied: rulesApplied + ' ' + affix.id,
112
- rules,
113
- flags,
114
- base,
115
- suffix,
116
- prefix,
117
- dic,
118
- };
119
- }
120
- getMatchingRules(rules) {
121
- const { AF = [] } = this.affInfo;
122
- const idx = parseInt(rules, 10);
123
- const rulesToSplit = AF[idx] || rules;
124
- return this.separateRules(rulesToSplit)
125
- .map((key) => this.rules.get(key))
126
- .filter(isDefined);
91
+ return this.affData.toAffixWord(affWord, word, flags, rules);
127
92
  }
128
- joinRules(rules) {
129
- switch (this.affInfo.FLAG) {
130
- case 'long':
131
- return rules.join('');
132
- case 'num':
133
- return rules.join(',');
93
+ #applySubstitution(affWord, subs) {
94
+ const results = [];
95
+ for (const [replace, substitutions] of subs.substitutionsGroupedByRemove) {
96
+ if (!replace.test(affWord.word))
97
+ continue;
98
+ const stripped = affWord.word.replace(replace, '');
99
+ for (const sub of substitutions) {
100
+ results.push(this.#substituteAttach(affWord, sub, stripped));
101
+ }
134
102
  }
135
- return rules.join('');
103
+ return results;
136
104
  }
137
- separateRules(rules) {
138
- switch (this.affInfo.FLAG) {
139
- case 'long':
140
- return [...new Set(rules.replace(/(..)/g, '$1//').split('//').slice(0, -1))];
141
- case 'num':
142
- return [...new Set(rules.split(','))];
143
- }
144
- return [...new Set(rules.split(''))];
105
+ getMatchingRules(flags) {
106
+ const rules = this.affData.getRules(flags);
107
+ return rules;
108
+ }
109
+ /**
110
+ * Convert the applied rule indexes to AFF Letters.
111
+ * Requires that the affixWord was generated with trace mode turned on.
112
+ * @param affixWord - the generated AffixWord.
113
+ */
114
+ getFlagsValuesForAffixWord(affixWord) {
115
+ const rules = this.affData.getRulesForIndexes(affixWord.appliedRules);
116
+ return rules?.map((r) => r.id);
145
117
  }
146
118
  get iConv() {
147
119
  return this._iConv;
@@ -149,143 +121,327 @@ export class Aff {
149
121
  get oConv() {
150
122
  return this._oConv;
151
123
  }
152
- }
153
- function signature(aff) {
154
- const { word, flags } = aff;
155
- const sig = Object.entries(flags)
156
- .filter((e) => !!e[1])
157
- .map((f) => flagToStringMap[f[0]])
158
- .sort()
159
- .join('');
160
- return word + '|' + sig;
161
- }
162
- export function processRules(affInfo) {
163
- const sfxRules = gs(affInfo.SFX || [])
164
- .map(([, sfx]) => sfx)
165
- .map((sfx) => ({ id: sfx.id, type: 'sfx', sfx }));
166
- const pfxRules = gs(affInfo.PFX || [])
167
- .map(([, pfx]) => pfx)
168
- .map((pfx) => ({ id: pfx.id, type: 'pfx', pfx }));
169
- const flagRules = GS.sequenceFromObject(affInfo)
170
- .filter(([key, value]) => !!affFlag[key] && !!value)
171
- .map(([key, value]) => ({ id: value, type: 'flag', flags: affFlag[key] }));
172
- const rules = sfxRules
173
- .concat(pfxRules)
174
- .concat(flagRules)
175
- .reduce((acc, rule) => {
176
- acc.set(rule.id, rule);
177
- return acc;
178
- }, new Map());
179
- return rules;
180
- }
181
- const affFlag = {
182
- KEEPCASE: { isKeepCase: true },
183
- WARN: { isWarning: true },
184
- FORCEUCASE: { isForceUCase: true },
185
- FORBIDDENWORD: { isForbiddenWord: true },
186
- NOSUGGEST: { isNoSuggest: true },
187
- NEEDAFFIX: { isNeedAffix: true },
188
- COMPOUNDBEGIN: { canBeCompoundBegin: true },
189
- COMPOUNDMIDDLE: { canBeCompoundMiddle: true },
190
- COMPOUNDEND: { canBeCompoundEnd: true },
191
- COMPOUNDFLAG: { isCompoundPermitted: true },
192
- COMPOUNDPERMITFLAG: { isCompoundPermitted: true },
193
- COMPOUNDFORBIDFLAG: { isCompoundForbidden: true },
194
- ONLYINCOMPOUND: { isOnlyAllowedInCompound: true },
195
- };
196
- const _FlagToStringMap = {
197
- isCompoundPermitted: 'C',
198
- canBeCompoundBegin: 'B',
199
- canBeCompoundMiddle: 'M',
200
- canBeCompoundEnd: 'E',
201
- isOnlyAllowedInCompound: 'O',
202
- isWarning: 'W',
203
- isKeepCase: 'K',
204
- isForceUCase: 'U',
205
- isForbiddenWord: 'F',
206
- isNoSuggest: 'N',
207
- isNeedAffix: 'A',
208
- isCompoundForbidden: '-',
209
- };
210
- const _FlagToLongStringMap = {
211
- isCompoundPermitted: 'CompoundPermitted',
212
- canBeCompoundBegin: 'CompoundBegin',
213
- canBeCompoundMiddle: 'CompoundMiddle',
214
- canBeCompoundEnd: 'CompoundEnd',
215
- isOnlyAllowedInCompound: 'OnlyInCompound',
216
- isWarning: 'Warning',
217
- isKeepCase: 'KeepCase',
218
- isForceUCase: 'ForceUpperCase',
219
- isForbiddenWord: 'Forbidden',
220
- isNoSuggest: 'NoSuggest',
221
- isNeedAffix: 'NeedAffix',
222
- isCompoundForbidden: 'CompoundForbidden',
223
- };
224
- const flagToStringMap = _FlagToStringMap;
225
- const flagToLongStringMap = _FlagToLongStringMap;
226
- export function logAffWord(affWord, message) {
227
- /* istanbul ignore if */
228
- if (log) {
229
- const dump = util.inspect(affWord, { showHidden: false, depth: 5, colors: true });
230
- console.log(`${message}: ${dump}`);
124
+ setTraceMode(value) {
125
+ this.affData.trace = value;
231
126
  }
232
- return affWord;
233
- }
234
- /* istanbul ignore next */
235
- export function affWordToColoredString(affWord) {
236
- return util
237
- .inspect({ ...affWord, flags: flagsToString(affWord.flags) }, { showHidden: false, depth: 5, colors: true })
238
- .replace(/(\s|\n|\r)+/g, ' ');
239
- }
240
- /* istanbul ignore next */
241
- export function flagsToString(flags) {
242
- return [...Object.entries(flags)]
243
- .filter(([, v]) => !!v)
244
- .map(([k]) => flagToLongStringMap[k])
245
- .sort()
246
- .join(':');
247
- }
248
- export function asAffWord(word, rules = '', flags = {}) {
249
- return {
250
- word,
251
- base: word,
252
- prefix: '',
253
- suffix: '',
254
- rulesApplied: '',
255
- rules,
256
- flags,
257
- dic: rules ? word + '/' + rules : word,
258
- };
259
127
  }
260
128
  export function compareAff(a, b) {
261
- if (a.word !== b.word) {
262
- return a.word < b.word ? -1 : 1;
263
- }
264
- const sigA = signature(a);
265
- const sigB = signature(b);
266
- return sigA < sigB ? -1 : sigA > sigB ? 1 : 0;
129
+ return a.word < b.word ? -1 : a.word > b.word ? 1 : a.flags - b.flags;
267
130
  }
268
131
  /**
269
132
  * Returns a filter function that will filter adjacent AffWords
270
133
  * It compares the word and the flags.
271
134
  */
272
- export function filterAff() {
273
- return filterOrderedList((a, b) => a.word !== b.word || signature(a) !== signature(b));
274
- }
275
- export const debug = {
276
- signature,
277
- };
278
- function removeNeedAffix(flags) {
279
- const newFlags = { ...flags };
280
- delete newFlags.isNeedAffix;
281
- return newFlags;
135
+ function filterAff() {
136
+ return filterOrderedList((a, b) => a.word !== b.word || a.flags !== b.flags);
282
137
  }
283
138
  function adjustCompounding(affWord, minLength) {
284
- if (!affWord.flags.isCompoundPermitted || affWord.word.length >= minLength) {
139
+ if (!(affWord.flags & AffixFlags.isCompoundPermitted) || affWord.word.length >= minLength) {
285
140
  return affWord;
286
141
  }
287
- const { isCompoundPermitted: _, ...flags } = affWord.flags;
288
- affWord.flags = flags;
142
+ affWord.flags &= ~AffixFlags.isCompoundPermitted;
289
143
  return affWord;
290
144
  }
145
+ export var AffixFlags;
146
+ (function (AffixFlags) {
147
+ AffixFlags[AffixFlags["none"] = 0] = "none";
148
+ /**
149
+ * COMPOUNDFLAG flag
150
+ *
151
+ * Words signed with COMPOUNDFLAG may be in compound words (except when word shorter than COMPOUNDMIN).
152
+ * Affixes with COMPOUNDFLAG also permits compounding of affixed words.
153
+ *
154
+ */
155
+ AffixFlags[AffixFlags["isCompoundPermitted"] = 1] = "isCompoundPermitted";
156
+ /**
157
+ * COMPOUNDBEGIN flag
158
+ *
159
+ * Words signed with COMPOUNDBEGIN (or with a signed affix) may be first elements in compound words.
160
+ *
161
+ */
162
+ AffixFlags[AffixFlags["canBeCompoundBegin"] = 2] = "canBeCompoundBegin";
163
+ /**
164
+ * COMPOUNDMIDDLE flag
165
+ *
166
+ * Words signed with COMPOUNDMIDDLE (or with a signed affix) may be middle elements in compound words.
167
+ *
168
+ */
169
+ AffixFlags[AffixFlags["canBeCompoundMiddle"] = 4] = "canBeCompoundMiddle";
170
+ /**
171
+ * COMPOUNDLAST flag
172
+ *
173
+ * Words signed with COMPOUNDLAST (or with a signed affix) may be last elements in compound words.
174
+ *
175
+ */
176
+ AffixFlags[AffixFlags["canBeCompoundEnd"] = 8] = "canBeCompoundEnd";
177
+ /**
178
+ * COMPOUNDPERMITFLAG flag
179
+ *
180
+ * Prefixes are allowed at the beginning of compounds, suffixes are allowed at the end of compounds by default.
181
+ * Affixes with COMPOUNDPERMITFLAG may be inside of compounds.
182
+ *
183
+ */
184
+ AffixFlags[AffixFlags["isOnlyAllowedInCompound"] = 16] = "isOnlyAllowedInCompound";
185
+ /**
186
+ * COMPOUNDFORBIDFLAG flag
187
+ *
188
+ * Suffixes with this flag forbid compounding of the affixed word.
189
+ *
190
+ */
191
+ AffixFlags[AffixFlags["isCompoundForbidden"] = 32] = "isCompoundForbidden";
192
+ /**
193
+ * WARN flag
194
+ *
195
+ * This flag is for rare words, which are also often spelling mistakes, see option -r of command line Hunspell and FORBIDWARN.
196
+ */
197
+ AffixFlags[AffixFlags["isWarning"] = 64] = "isWarning";
198
+ /**
199
+ * KEEPCASE flag
200
+ *
201
+ * Forbid uppercased and capitalized forms of words signed with KEEPCASE flags. Useful for special orthographies (measurements and
202
+ * currency often keep their case in uppercased texts) and writing systems (e.g. keeping lower case of IPA characters). Also valuable
203
+ * for words erroneously written in the wrong case.
204
+ */
205
+ AffixFlags[AffixFlags["isKeepCase"] = 128] = "isKeepCase";
206
+ /**
207
+ * FORCEUCASE flag
208
+ *
209
+ * Last word part of a compound with flag FORCEUCASE forces capitalization of the whole compound word.
210
+ * Eg. Dutch word "straat" (street) with FORCEUCASE flags will allowed only in capitalized compound forms,
211
+ * according to the Dutch spelling rules for proper names.
212
+ */
213
+ AffixFlags[AffixFlags["isForceUCase"] = 256] = "isForceUCase";
214
+ /**
215
+ * FORBIDDENWORD flag
216
+ *
217
+ * This flag signs forbidden word form. Because affixed forms are also forbidden, we can subtract a subset from set of the
218
+ * accepted affixed and compound words. Note: useful to forbid erroneous words, generated by the compounding mechanism.
219
+ */
220
+ AffixFlags[AffixFlags["isForbiddenWord"] = 512] = "isForbiddenWord";
221
+ /**
222
+ * NOSUGGEST flag
223
+ *
224
+ * Words signed with NOSUGGEST flag are not suggested (but still accepted when typed correctly). Proposed flag for vulgar
225
+ * and obscene words (see also SUBSTANDARD).
226
+ */
227
+ AffixFlags[AffixFlags["isNoSuggest"] = 1024] = "isNoSuggest";
228
+ // cspell:ignore pseudoroot
229
+ /**
230
+ * NEEDAFFIX flag
231
+ *
232
+ * This flag signs virtual stems in the dictionary, words only valid when affixed. Except, if the dictionary word has a homonym
233
+ * or a zero affix. NEEDAFFIX works also with prefixes and prefix + suffix combinations (see tests/pseudoroot5.*).
234
+ */
235
+ AffixFlags[AffixFlags["isNeedAffix"] = 2048] = "isNeedAffix";
236
+ })(AffixFlags || (AffixFlags = {}));
237
+ function toAffixFlags(flags) {
238
+ let result = 0;
239
+ for (const [key, value] of Object.entries(flags)) {
240
+ if (value) {
241
+ const flag = AffixFlags[key];
242
+ result |= flag;
243
+ }
244
+ }
245
+ return result;
246
+ }
247
+ class AffData {
248
+ affInfo;
249
+ filename;
250
+ rules = [];
251
+ mapToRuleIdx = new Map();
252
+ mapWordRulesToRuleIndexes = new Map();
253
+ mapWordRulesToRules = new Map();
254
+ affFlagType;
255
+ missingFlags = new Set();
256
+ _mapRuleIdxToRules = new WeakMap();
257
+ trace = false;
258
+ constructor(affInfo, filename) {
259
+ this.affInfo = affInfo;
260
+ this.filename = filename;
261
+ this.affFlagType = toAffFlagType(affInfo.FLAG);
262
+ this.#processAffInfo(affInfo);
263
+ }
264
+ dictLineToEntry(line) {
265
+ const [lineLeft] = line.split(/\s+/, 1);
266
+ const [word, rules = ''] = lineLeft.split('/', 2);
267
+ return { word, flags: rules, line };
268
+ }
269
+ dictLineToAffixWord(line) {
270
+ const entry = this.dictLineToEntry(line);
271
+ return this.toAffixWord({ dict: entry, appliedRules: this.trace ? [] : undefined }, entry.word, AffixFlags.none, this.getRules(entry.flags));
272
+ }
273
+ toAffixWord(source, word, flags, rules) {
274
+ const dict = source.dict;
275
+ let appliedRules = source.appliedRules;
276
+ if (!rules)
277
+ return { word, rules: undefined, flags, dict, appliedRules };
278
+ const fxRules = rules.filter((rule) => rule.type !== 'F');
279
+ if (appliedRules) {
280
+ appliedRules = [...appliedRules, ...rules.filter((r) => r.type === 'F').map((r) => r.idx)];
281
+ }
282
+ return {
283
+ word,
284
+ rules: fxRules.length ? fxRules : undefined,
285
+ flags: flags | this.rulesToFlags(rules),
286
+ appliedRules,
287
+ dict,
288
+ };
289
+ }
290
+ getRules(rules) {
291
+ const foundRules = this.mapWordRulesToRules.get(rules);
292
+ if (foundRules)
293
+ return foundRules;
294
+ const ruleIndexes = this.getRuleIndexes(rules);
295
+ const affRules = ruleIndexes.map((idx) => this.rules[idx]);
296
+ this.mapWordRulesToRules.set(rules, affRules);
297
+ return affRules;
298
+ }
299
+ getRuleIndexes(rules) {
300
+ const found = this.mapWordRulesToRuleIndexes.get(rules);
301
+ if (found)
302
+ return found;
303
+ const indexes = this.#getRuleIndexes(rules);
304
+ this.mapWordRulesToRuleIndexes.set(rules, indexes);
305
+ return indexes;
306
+ }
307
+ rulesToFlags(rules) {
308
+ return rules.reduce((acc, rule) => acc | rule.flags, AffixFlags.none);
309
+ }
310
+ getRulesForIndexes(indexes) {
311
+ if (!indexes)
312
+ return undefined;
313
+ let rules = this._mapRuleIdxToRules.get(indexes);
314
+ if (rules)
315
+ return rules;
316
+ rules = indexes.map((idx) => this.rules[idx]);
317
+ this._mapRuleIdxToRules.set(indexes, rules);
318
+ return rules;
319
+ }
320
+ getRulesForAffSubstitution(sub) {
321
+ return this.getRulesForIndexes(sub.attachRules);
322
+ }
323
+ #getRuleIndexes(rules) {
324
+ const flags = this.#splitRules(rules);
325
+ const indexes = flags
326
+ .flatMap((flag) => {
327
+ const found = this.mapToRuleIdx.get(flag);
328
+ if (found === undefined && !this.missingFlags.has(flag)) {
329
+ this.missingFlags.add(flag);
330
+ const filename = this.filename;
331
+ logError('Unable to resolve flag: %o, for file: %o', flag, filename);
332
+ // throw new Error('Unable to resolve flag');
333
+ }
334
+ return found;
335
+ })
336
+ .filter(isDefined);
337
+ return indexes;
338
+ }
339
+ #splitRules(rules) {
340
+ switch (this.affFlagType) {
341
+ case 'long':
342
+ return [...new Set(rules.replace(/(..)/g, '$1//').split('//').slice(0, -1))];
343
+ case 'num':
344
+ return [...new Set(rules.split(','))];
345
+ }
346
+ return [...new Set(rules.split(''))];
347
+ }
348
+ #processAffInfo(affInfo) {
349
+ const { AF = [], SFX = [], PFX = [] } = affInfo;
350
+ const flags = objectToKvP(affInfo)
351
+ .filter(isValidFlagMember)
352
+ .map(([key, value]) => ({ id: value, flags: toAffixFlags(affFlag[key]) }));
353
+ const sfxRules = [...SFX].map(([, sfx]) => sfx).map((sfx) => ({ id: sfx.id, sfx }));
354
+ const pfxRules = [...PFX].map(([, pfx]) => pfx).map((pfx) => ({ id: pfx.id, pfx }));
355
+ const rules = [...flags, ...sfxRules, ...pfxRules];
356
+ rules.forEach((rule, idx) => {
357
+ const found = this.mapToRuleIdx.get(rule.id);
358
+ if (found) {
359
+ const filename = this.filename;
360
+ logError('Duplicate affix rule: %o, filename: %o', rule.id, filename);
361
+ const toAdd = Array.isArray(found) ? found : [found];
362
+ toAdd.push(idx);
363
+ this.mapToRuleIdx.set(rule.id, toAdd);
364
+ return;
365
+ }
366
+ this.mapToRuleIdx.set(rule.id, idx);
367
+ });
368
+ AF.forEach((af, idx) => {
369
+ if (!af)
370
+ return;
371
+ const indexes = this.#getRuleIndexes(af);
372
+ this.mapWordRulesToRuleIndexes.set(idx.toString(), indexes);
373
+ });
374
+ this.rules = rules.map((rule, idx) => this.#mapPartialRule(rule, idx));
375
+ }
376
+ #mapPartialRule(rule, index) {
377
+ const { id, flags, sfx, pfx } = rule;
378
+ const idx = this.mapToRuleIdx.get(id);
379
+ // if (index !== idx) {
380
+ // const filename = this.affInfo.filename;
381
+ // logError('Unexpected index: %o !== %o, rule %o, filename: %o', index, idx, rule, filename);
382
+ // }
383
+ assert(idx !== undefined && (idx === index || (Array.isArray(idx) && idx.includes(index))));
384
+ const fx = sfx || pfx;
385
+ if (fx) {
386
+ const affFx = this.#mapFx(fx);
387
+ if (affFx.type === 'P') {
388
+ return { id, idx: index, type: 'P', flags: 0, fx: affFx };
389
+ }
390
+ else {
391
+ return { id, idx: index, type: 'S', flags: 0, fx: affFx };
392
+ }
393
+ }
394
+ return { id, idx: index, type: 'F', flags: flags || 0 };
395
+ }
396
+ #mapFx(fx) {
397
+ const { id, combinable } = fx;
398
+ const substitutionsForRegExps = this.#mapSubstitutionsForRegExps(fx.substitutionsForRegExps);
399
+ return { type: fx.type === 'PFX' ? 'P' : 'S', id, combinable, substitutionsForRegExps };
400
+ }
401
+ #mapSubstitutionsForRegExps(substitutions) {
402
+ return substitutions.map((sub) => this.#mapSubstitutionsForRegExp(sub));
403
+ }
404
+ #mapSubstitutionsForRegExp(subForRegExp) {
405
+ const { match, substitutions: subs } = subForRegExp;
406
+ const substitutions = subs.map((sub) => this.#mapSubstitution(sub));
407
+ const substitutionsGroupedByRemove = groupByField(substitutions, 'replace');
408
+ return { match, substitutionsGroupedByRemove };
409
+ }
410
+ #mapSubstitution(sub) {
411
+ const { type, remove, attach, attachRules, replace } = sub;
412
+ const rules = attachRules ? this.getRuleIndexes(attachRules) : undefined;
413
+ return { type, remove, attach, attachRules: rules, replace };
414
+ }
415
+ }
416
+ function joinRules(a, b) {
417
+ if (!a)
418
+ return b;
419
+ if (!b)
420
+ return a;
421
+ return [...a, ...b];
422
+ }
423
+ function objectToKvP(t) {
424
+ return Object.entries(t);
425
+ }
426
+ // type Defined<T> = Exclude<T, undefined>;
427
+ function isValidFlagMember(t) {
428
+ const [key, value] = t;
429
+ return key in affFlag && !!value;
430
+ }
431
+ /**
432
+ *
433
+ * @param FLAG - the FLAG value from the aff file
434
+ * @returns the AffFlagType or throws
435
+ */
436
+ export function toAffFlagType(FLAG) {
437
+ if (!FLAG)
438
+ return 'char';
439
+ switch (FLAG) {
440
+ case 'long':
441
+ case 'num':
442
+ return FLAG;
443
+ default:
444
+ throw new Error(`Unexpected FLAG value: ${FLAG}`);
445
+ }
446
+ }
291
447
  //# sourceMappingURL=aff.js.map
@@ -0,0 +1,6 @@
1
+ import type { AffTransformFlags, AffWordFlags } from './affDef.js';
2
+ import type { Mapping } from './types.js';
3
+ export declare const affFlag: Mapping<AffTransformFlags, AffWordFlags>;
4
+ export declare const flagToStringMap: Record<string, string | undefined>;
5
+ export declare const flagToLongStringMap: Record<string, string | undefined>;
6
+ //# sourceMappingURL=affConstants.d.ts.map