hunspell-reader 8.3.2 → 8.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  import type { Sequence } from 'gensequence';
2
- import type { Aff } from './aff.js';
3
2
  import type { AffWord } from './affDef.js';
3
+ import type { Aff } from './affLegacy.js';
4
4
  import type { WordInfo } from './types.js';
5
5
  export { WordInfo } from './types.js';
6
6
  export interface HunspellSrcData {
@@ -9,7 +9,7 @@ export interface HunspellSrcData {
9
9
  /** the hunspell dictionary entries complete with affix flags */
10
10
  dic: string[];
11
11
  }
12
- export declare class IterableHunspellReader implements Iterable<string> {
12
+ export declare class IterableHunspellReaderLegacy implements Iterable<string> {
13
13
  readonly src: HunspellSrcData;
14
14
  readonly aff: Aff;
15
15
  constructor(src: HunspellSrcData);
@@ -67,7 +67,7 @@ export declare class IterableHunspellReader implements Iterable<string> {
67
67
  * @param dicFile - path to dic file.
68
68
  * @returns IterableHunspellReader
69
69
  */
70
- static createFromFiles(affFile: string, dicFile: string): Promise<IterableHunspellReader>;
70
+ static createFromFiles(affFile: string, dicFile: string): Promise<IterableHunspellReaderLegacy>;
71
71
  }
72
72
  export declare function createMatchingWordsFilter(): (t: string) => boolean;
73
- //# sourceMappingURL=IterableHunspellReader.d.ts.map
73
+ //# sourceMappingURL=IterableHunspellReaderLegacy.d.ts.map
@@ -1,11 +1,11 @@
1
1
  import * as fs from 'fs/promises';
2
2
  import { genSequence } from 'gensequence';
3
3
  import pkgIconvLite from 'iconv-lite';
4
- import { parseAffFileToAff } from './affReader.js';
4
+ import { parseAffFileToAffLegacy } from './affReader.js';
5
5
  import { filterOrderedList } from './util.js';
6
6
  const { decode } = pkgIconvLite;
7
7
  const defaultEncoding = 'UTF-8';
8
- export class IterableHunspellReader {
8
+ export class IterableHunspellReaderLegacy {
9
9
  src;
10
10
  aff;
11
11
  constructor(src) {
@@ -105,7 +105,7 @@ export class IterableHunspellReader {
105
105
  * @returns IterableHunspellReader
106
106
  */
107
107
  static async createFromFiles(affFile, dicFile) {
108
- const aff = await parseAffFileToAff(affFile, defaultEncoding);
108
+ const aff = await parseAffFileToAffLegacy(affFile, defaultEncoding);
109
109
  const buffer = await fs.readFile(dicFile);
110
110
  const dicFileContent = decode(buffer, aff.affInfo.SET);
111
111
  const dic = dicFileContent
@@ -113,10 +113,10 @@ export class IterableHunspellReader {
113
113
  .slice(1) // The first entry is the count of entries.
114
114
  .map((a) => a.trim())
115
115
  .filter((line) => !!line);
116
- return new IterableHunspellReader({ aff, dic });
116
+ return new IterableHunspellReaderLegacy({ aff, dic });
117
117
  }
118
118
  }
119
119
  export function createMatchingWordsFilter() {
120
120
  return filterOrderedList((a, b) => a !== b);
121
121
  }
122
- //# sourceMappingURL=IterableHunspellReader.js.map
122
+ //# sourceMappingURL=IterableHunspellReaderLegacy.js.map
package/dist/aff.d.ts CHANGED
@@ -1,14 +1,17 @@
1
- import type { AffInfo, AffWord, AffWordFlags, Fx, Rule, Substitution } from './affDef.js';
1
+ import type { AffInfo } from './affDef.js';
2
2
  import { Converter } from './converter.js';
3
3
  /** The `word` field in a Converted AffWord has been converted using the OCONV mapping */
4
- export type ConvertedAffWord = AffWord;
4
+ export interface ConvertedAffixWord extends AffixWord {
5
+ originalWord: string;
6
+ }
5
7
  export declare class Aff {
6
- affInfo: AffInfo;
7
- protected rules: Map<string, Rule>;
8
+ #private;
9
+ readonly affInfo: AffInfo;
10
+ protected affData: AffData;
8
11
  protected _oConv: Converter;
9
12
  protected _iConv: Converter;
10
13
  private _maxSuffixDepth;
11
- constructor(affInfo: AffInfo);
14
+ constructor(affInfo: AffInfo, filename: string);
12
15
  get maxSuffixDepth(): number;
13
16
  set maxSuffixDepth(value: number);
14
17
  /**
@@ -16,34 +19,210 @@ export declare class Aff {
16
19
  * For performance reasons, only the `word` field is mapped with OCONV.
17
20
  * @param {string} line - the line from the .dic file.
18
21
  */
19
- applyRulesToDicEntry(line: string, maxDepth?: number): ConvertedAffWord[];
22
+ applyRulesToDicEntry(line: string, maxDepth?: number): ConvertedAffixWord[];
20
23
  /**
21
24
  * @internal
22
25
  */
23
- applyRulesToWord(affWord: AffWord, remainingDepth: number): AffWord[];
24
- applyAffixesToWord(affixRules: Fx[], affWord: AffWord, remainingDepth: number): AffWord[];
25
- applyAffixToWord(affix: Fx, affWord: AffWord, combinableSfx: string): AffWord[];
26
- substitute(affix: Fx, affWord: AffWord, sub: Substitution): AffWord;
27
- getMatchingRules(rules: string): Rule[];
28
- joinRules(rules: string[]): string;
29
- separateRules(rules: string): string[];
26
+ applyRulesToWord(affWord: AffixWord, remainingDepth: number): AffixWord[];
27
+ applyAffixesToWord(affWord: AffixWord, remainingDepth: number): AffixWord[];
28
+ applyAffixToWord(rule: FxRule, affWord: AffixWord, combinableSfx: FxRule[]): AffixWord[];
29
+ getMatchingRules(flags: string): AffRule[];
30
+ /**
31
+ * Convert the applied rule indexes to AFF Letters.
32
+ * Requires that the affixWord was generated with trace mode turned on.
33
+ * @param affixWord - the generated AffixWord.
34
+ */
35
+ getFlagsValuesForAffixWord(affixWord: AffixWord): string[] | undefined;
30
36
  get iConv(): Converter;
31
37
  get oConv(): Converter;
38
+ setTraceMode(value: boolean): void;
39
+ }
40
+ export declare function compareAff(a: AffixWord, b: AffixWord): number;
41
+ export declare enum AffixFlags {
42
+ none = 0,
43
+ /**
44
+ * COMPOUNDFLAG flag
45
+ *
46
+ * Words signed with COMPOUNDFLAG may be in compound words (except when word shorter than COMPOUNDMIN).
47
+ * Affixes with COMPOUNDFLAG also permits compounding of affixed words.
48
+ *
49
+ */
50
+ isCompoundPermitted = 1,
51
+ /**
52
+ * COMPOUNDBEGIN flag
53
+ *
54
+ * Words signed with COMPOUNDBEGIN (or with a signed affix) may be first elements in compound words.
55
+ *
56
+ */
57
+ canBeCompoundBegin = 2,// default false
58
+ /**
59
+ * COMPOUNDMIDDLE flag
60
+ *
61
+ * Words signed with COMPOUNDMIDDLE (or with a signed affix) may be middle elements in compound words.
62
+ *
63
+ */
64
+ canBeCompoundMiddle = 4,// default false
65
+ /**
66
+ * COMPOUNDLAST flag
67
+ *
68
+ * Words signed with COMPOUNDLAST (or with a signed affix) may be last elements in compound words.
69
+ *
70
+ */
71
+ canBeCompoundEnd = 8,// default false
72
+ /**
73
+ * COMPOUNDPERMITFLAG flag
74
+ *
75
+ * Prefixes are allowed at the beginning of compounds, suffixes are allowed at the end of compounds by default.
76
+ * Affixes with COMPOUNDPERMITFLAG may be inside of compounds.
77
+ *
78
+ */
79
+ isOnlyAllowedInCompound = 16,
80
+ /**
81
+ * COMPOUNDFORBIDFLAG flag
82
+ *
83
+ * Suffixes with this flag forbid compounding of the affixed word.
84
+ *
85
+ */
86
+ isCompoundForbidden = 32,
87
+ /**
88
+ * WARN flag
89
+ *
90
+ * This flag is for rare words, which are also often spelling mistakes, see option -r of command line Hunspell and FORBIDWARN.
91
+ */
92
+ isWarning = 64,
93
+ /**
94
+ * KEEPCASE flag
95
+ *
96
+ * Forbid uppercased and capitalized forms of words signed with KEEPCASE flags. Useful for special orthographies (measurements and
97
+ * currency often keep their case in uppercased texts) and writing systems (e.g. keeping lower case of IPA characters). Also valuable
98
+ * for words erroneously written in the wrong case.
99
+ */
100
+ isKeepCase = 128,
101
+ /**
102
+ * FORCEUCASE flag
103
+ *
104
+ * Last word part of a compound with flag FORCEUCASE forces capitalization of the whole compound word.
105
+ * Eg. Dutch word "straat" (street) with FORCEUCASE flags will allowed only in capitalized compound forms,
106
+ * according to the Dutch spelling rules for proper names.
107
+ */
108
+ isForceUCase = 256,
109
+ /**
110
+ * FORBIDDENWORD flag
111
+ *
112
+ * This flag signs forbidden word form. Because affixed forms are also forbidden, we can subtract a subset from set of the
113
+ * accepted affixed and compound words. Note: useful to forbid erroneous words, generated by the compounding mechanism.
114
+ */
115
+ isForbiddenWord = 512,
116
+ /**
117
+ * NOSUGGEST flag
118
+ *
119
+ * Words signed with NOSUGGEST flag are not suggested (but still accepted when typed correctly). Proposed flag for vulgar
120
+ * and obscene words (see also SUBSTANDARD).
121
+ */
122
+ isNoSuggest = 1024,
123
+ /**
124
+ * NEEDAFFIX flag
125
+ *
126
+ * This flag signs virtual stems in the dictionary, words only valid when affixed. Except, if the dictionary word has a homonym
127
+ * or a zero affix. NEEDAFFIX works also with prefixes and prefix + suffix combinations (see tests/pseudoroot5.*).
128
+ */
129
+ isNeedAffix = 2048
130
+ }
131
+ type RuleIdx = number;
132
+ type SingleFlag = string;
133
+ type WordFlags = string;
134
+ type DictionaryLine = string;
135
+ interface DictionaryEntry {
136
+ word: string;
137
+ /** flags are the part after the `/`, `word/FLAGS` */
138
+ flags: string;
139
+ /** The original dictionary line. */
140
+ line: string;
141
+ }
142
+ export interface AffixWordSource {
143
+ /** Original dictionary entry */
144
+ dict: DictionaryEntry;
145
+ /** Optional applied rules, trace mode must be turned on. */
146
+ appliedRules?: number[] | undefined;
147
+ }
148
+ export interface AffixWord extends AffixWordSource {
149
+ /** The word */
150
+ word: string;
151
+ /** Rules to apply */
152
+ rules: FxRule[] | undefined;
153
+ /** Flags */
154
+ flags: AffixFlags;
155
+ }
156
+ declare class AffData {
157
+ #private;
158
+ private affInfo;
159
+ readonly filename: string;
160
+ rules: AffRule[];
161
+ mapToRuleIdx: Map<SingleFlag, RuleIdx | RuleIdx[]>;
162
+ mapWordRulesToRuleIndexes: Map<WordFlags, RuleIdx[]>;
163
+ mapWordRulesToRules: Map<WordFlags, AffRule[]>;
164
+ affFlagType: 'long' | 'num' | 'char';
165
+ missingFlags: Set<string>;
166
+ private _mapRuleIdxToRules;
167
+ trace: boolean;
168
+ constructor(affInfo: AffInfo, filename: string);
169
+ dictLineToEntry(line: DictionaryLine): DictionaryEntry;
170
+ dictLineToAffixWord(line: DictionaryLine): AffixWord;
171
+ toAffixWord(source: AffixWordSource | AffixWord, word: string, flags: AffixFlags, rules: AffRule[] | undefined): AffixWord;
172
+ getRules(rules: WordFlags): AffRule[];
173
+ getRuleIndexes(rules: WordFlags): RuleIdx[];
174
+ rulesToFlags(rules: AffRule[]): AffixFlags;
175
+ getRulesForIndexes(indexes: undefined): undefined;
176
+ getRulesForIndexes(indexes: RuleIdx[]): AffRule[];
177
+ getRulesForIndexes(indexes: RuleIdx[] | undefined): AffRule[] | undefined;
178
+ getRulesForAffSubstitution(sub: AffSubstitution): AffRule[] | undefined;
179
+ }
180
+ type AffType = 'P' | 'S';
181
+ interface AffFx {
182
+ type: AffType;
183
+ id: string;
184
+ combinable: boolean;
185
+ substitutionsForRegExps: AffSubstitutionsForRegExp[];
186
+ }
187
+ interface AffSubstitution {
188
+ type: AffType;
189
+ remove: string;
190
+ attach: string;
191
+ attachRules?: RuleIdx[];
192
+ replace: RegExp;
193
+ }
194
+ interface AffSubstitutionsForRegExp {
195
+ match: RegExp;
196
+ substitutionsGroupedByRemove: Map<RegExp, AffSubstitution[]>;
197
+ }
198
+ type AffRule = FlagRule | FxRule;
199
+ type FxRule = PfxRule | SfxRule;
200
+ type RuleType = 'S' | 'P' | 'F';
201
+ interface RuleBase {
202
+ id: string;
203
+ idx: number;
204
+ type: RuleType;
205
+ flags: AffixFlags;
206
+ px?: AffFx;
207
+ }
208
+ interface FlagRule extends RuleBase {
209
+ type: 'F';
210
+ flags: AffixFlags;
211
+ }
212
+ interface PfxRule extends RuleBase {
213
+ type: 'P';
214
+ fx: AffFx;
215
+ }
216
+ interface SfxRule extends RuleBase {
217
+ type: 'S';
218
+ fx: AffFx;
32
219
  }
33
- declare function signature(aff: AffWord): string;
34
- export declare function processRules(affInfo: AffInfo): Map<string, Rule>;
35
- export declare function logAffWord(affWord: AffWord, message: string): AffWord;
36
- export declare function affWordToColoredString(affWord: AffWord): string;
37
- export declare function flagsToString(flags: AffWordFlags): string;
38
- export declare function asAffWord(word: string, rules?: string, flags?: AffWordFlags): AffWord;
39
- export declare function compareAff(a: AffWord, b: AffWord): 0 | 1 | -1;
220
+ type AffFlagType = 'long' | 'num' | 'char';
40
221
  /**
41
- * Returns a filter function that will filter adjacent AffWords
42
- * It compares the word and the flags.
222
+ *
223
+ * @param FLAG - the FLAG value from the aff file
224
+ * @returns the AffFlagType or throws
43
225
  */
44
- export declare function filterAff(): (t: AffWord) => boolean;
45
- export declare const debug: {
46
- signature: typeof signature;
47
- };
226
+ export declare function toAffFlagType(FLAG: string | undefined): AffFlagType;
48
227
  export {};
49
228
  //# sourceMappingURL=aff.d.ts.map