@cspell/cspell-tools 8.15.5 → 8.15.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -53,6 +53,11 @@
53
53
  "description": "Maximum number of nested Hunspell Rules to apply. This is needed for recursive dictionaries like Hebrew.",
54
54
  "type": "number"
55
55
  },
56
+ "minCompoundLength": {
57
+ "default": 4,
58
+ "description": "Controls the minimum length of a compound word when storing words using `storeSplitWordsAsCompounds`. The compound words are prefixed / suffixed with `*`, to allow them to be combined with other compound words. If the length is too low, then the dictionary will consider many misspelled words as correct.",
59
+ "type": "number"
60
+ },
56
61
  "split": {
57
62
  "anyOf": [
58
63
  {
@@ -68,7 +73,7 @@
68
73
  },
69
74
  "storeSplitWordsAsCompounds": {
70
75
  "default": false,
71
- "description": "Words that have been split using the `allowedSplitWords` are added to the dictionary as compoundable words. These words are prefixed / suffixed with `*`.",
76
+ "description": "Camel case words that have been split using the `allowedSplitWords` are added to the dictionary as compoundable words. These words are prefixed / suffixed with `*`.",
72
77
  "type": "boolean"
73
78
  }
74
79
  },
@@ -110,6 +115,11 @@
110
115
  "description": "Maximum number of nested Hunspell Rules to apply. This is needed for recursive dictionaries like Hebrew.",
111
116
  "type": "number"
112
117
  },
118
+ "minCompoundLength": {
119
+ "default": 4,
120
+ "description": "Controls the minimum length of a compound word when storing words using `storeSplitWordsAsCompounds`. The compound words are prefixed / suffixed with `*`, to allow them to be combined with other compound words. If the length is too low, then the dictionary will consider many misspelled words as correct.",
121
+ "type": "number"
122
+ },
113
123
  "split": {
114
124
  "anyOf": [
115
125
  {
@@ -125,7 +135,7 @@
125
135
  },
126
136
  "storeSplitWordsAsCompounds": {
127
137
  "default": false,
128
- "description": "Words that have been split using the `allowedSplitWords` are added to the dictionary as compoundable words. These words are prefixed / suffixed with `*`.",
138
+ "description": "Camel case words that have been split using the `allowedSplitWords` are added to the dictionary as compoundable words. These words are prefixed / suffixed with `*`.",
129
139
  "type": "boolean"
130
140
  }
131
141
  },
@@ -266,6 +276,11 @@
266
276
  "description": "Maximum number of nested Hunspell Rules to apply. This is needed for recursive dictionaries like Hebrew.",
267
277
  "type": "number"
268
278
  },
279
+ "minCompoundLength": {
280
+ "default": 4,
281
+ "description": "Controls the minimum length of a compound word when storing words using `storeSplitWordsAsCompounds`. The compound words are prefixed / suffixed with `*`, to allow them to be combined with other compound words. If the length is too low, then the dictionary will consider many misspelled words as correct.",
282
+ "type": "number"
283
+ },
269
284
  "removeDuplicates": {
270
285
  "default": false,
271
286
  "description": "Remove duplicate words, favor lower case words over mixed case words. Combine compound prefixes where possible.",
@@ -295,7 +310,7 @@
295
310
  },
296
311
  "storeSplitWordsAsCompounds": {
297
312
  "default": false,
298
- "description": "Words that have been split using the `allowedSplitWords` are added to the dictionary as compoundable words. These words are prefixed / suffixed with `*`.",
313
+ "description": "Camel case words that have been split using the `allowedSplitWords` are added to the dictionary as compoundable words. These words are prefixed / suffixed with `*`.",
299
314
  "type": "boolean"
300
315
  },
301
316
  "targets": {
@@ -17,6 +17,7 @@ export interface SourceReaderOptions {
17
17
  keepCase?: boolean;
18
18
  allowedSplitWords: AllowedSplitWordsCollection;
19
19
  storeSplitWordsAsCompounds: boolean | undefined;
20
+ minCompoundLength?: number | undefined;
20
21
  }
21
22
  export type AnnotatedWord = string;
22
23
  export interface SourceReader {
@@ -25,8 +25,17 @@ function splitLines(lines, options) {
25
25
  return split();
26
26
  }
27
27
  async function textFileReader(reader, options) {
28
- const { legacy, splitWords: split, allowedSplitWords, storeSplitWordsAsCompounds } = options;
29
- const words = [...parseFileLines(reader.lines, { legacy, split, allowedSplitWords, storeSplitWordsAsCompounds })];
28
+ const { legacy, splitWords: split, allowedSplitWords, storeSplitWordsAsCompounds, minCompoundLength } = options;
29
+ const parseOptions = {
30
+ legacy,
31
+ split,
32
+ splitKeepBoth: undefined,
33
+ keepCase: undefined,
34
+ allowedSplitWords,
35
+ storeSplitWordsAsCompounds,
36
+ minCompoundLength,
37
+ };
38
+ const words = [...parseFileLines(reader.lines, parseOptions)];
30
39
  return {
31
40
  size: words.length,
32
41
  words,
@@ -175,7 +175,7 @@ async function readFileList(fileList) {
175
175
  .filter((a) => !!a);
176
176
  }
177
177
  async function readFileSource(fileSource, sourceOptions) {
178
- const { filename, keepRawCase = sourceOptions.keepRawCase || false, split = sourceOptions.split || false, maxDepth, storeSplitWordsAsCompounds, } = fileSource;
178
+ const { filename, keepRawCase = sourceOptions.keepRawCase || false, split = sourceOptions.split || false, maxDepth, storeSplitWordsAsCompounds, minCompoundLength, } = fileSource;
179
179
  const legacy = split === 'legacy';
180
180
  const splitWords = legacy ? false : split;
181
181
  // console.warn('fileSource: %o,\n targetOptions %o, \n opt: %o', fileSource, targetOptions, opt);
@@ -187,6 +187,7 @@ async function readFileSource(fileSource, sourceOptions) {
187
187
  keepCase: keepRawCase,
188
188
  allowedSplitWords,
189
189
  storeSplitWordsAsCompounds,
190
+ minCompoundLength,
190
191
  };
191
192
  logWithTimestamp(`Reading ${path.basename(filename)}`);
192
193
  const stream = await streamSourceWordsFromFile(filename, readerOptions);
@@ -1,12 +1,15 @@
1
1
  import { opConcatMap, opFilter, opMap, pipe } from '@cspell/cspell-pipe/sync';
2
+ import { defaultCompileSourceOptions } from '../config/configDefaults.js';
2
3
  import { regExpSpaceOrDash, splitCamelCaseIfAllowed } from './splitCamelCaseIfAllowed.js';
3
4
  const regNonWord = /[^\p{L}\p{M}' _\d]+/giu;
4
5
  const regExpRepeatChars = /(.)\1{5}/i;
6
+ const minCompoundLength = defaultCompileSourceOptions.minCompoundLength;
5
7
  export function legacyLineToWords(line, keepCase, allowedSplitWords) {
6
8
  // Remove punctuation and non-letters.
7
9
  const filteredLine = line.replaceAll(regNonWord, '|');
8
10
  const wordGroups = filteredLine.split('|');
9
- const words = pipe(wordGroups, opConcatMap((a) => a.split(regExpSpaceOrDash)), opConcatMap((a) => splitCamelCaseIfAllowed(a, allowedSplitWords, keepCase, '')), opMap((a) => a.trim()), opFilter((a) => !!a), opFilter((s) => !regExpRepeatChars.test(s)));
11
+ const _minCompoundLength = minCompoundLength;
12
+ const words = pipe(wordGroups, opConcatMap((a) => a.split(regExpSpaceOrDash)), opConcatMap((a) => splitCamelCaseIfAllowed(a, allowedSplitWords, keepCase, '', _minCompoundLength)), opMap((a) => a.trim()), opFilter((a) => !!a), opFilter((s) => !regExpRepeatChars.test(s)));
10
13
  return words;
11
14
  }
12
15
  export function* legacyLinesToWords(lines, keepCase, allowedSplitWords) {
@@ -1,5 +1,5 @@
1
1
  import type { AllowedSplitWordsCollection } from './WordsCollection.js';
2
2
  export declare const regExpSpaceOrDash: RegExp;
3
3
  export declare const regExpIsNumber: RegExp;
4
- export declare function splitCamelCaseIfAllowed(word: string, allowedWords: AllowedSplitWordsCollection, keepCase: boolean, compoundPrefix: string): string[];
4
+ export declare function splitCamelCaseIfAllowed(word: string, allowedWords: AllowedSplitWordsCollection, keepCase: boolean, compoundPrefix: string, minCompoundLength: number): string[];
5
5
  //# sourceMappingURL=splitCamelCaseIfAllowed.d.ts.map
@@ -1,7 +1,7 @@
1
1
  import { isSingleLetter, splitCamelCaseWord } from './text.js';
2
2
  export const regExpSpaceOrDash = /[- ]+/g;
3
3
  export const regExpIsNumber = /^\d+$/;
4
- export function splitCamelCaseIfAllowed(word, allowedWords, keepCase, compoundPrefix) {
4
+ export function splitCamelCaseIfAllowed(word, allowedWords, keepCase, compoundPrefix, minCompoundLength) {
5
5
  const split = [...splitCamelCase(word)];
6
6
  if (split.length == 1)
7
7
  return adjustCases(split, allowedWords, keepCase);
@@ -14,7 +14,7 @@ export function splitCamelCaseIfAllowed(word, allowedWords, keepCase, compoundPr
14
14
  ? adjusted
15
15
  : adjusted.map((w, i) => {
16
16
  const { px, sx } = wordIndexes[i];
17
- const canCompound = w.length > 2;
17
+ const canCompound = w.length >= minCompoundLength;
18
18
  const lc = w.toLowerCase();
19
19
  const p = canCompound && isSingleLetter(px) ? compoundPrefix : '';
20
20
  const s = canCompound && isSingleLetter(sx) ? compoundPrefix : '';
@@ -46,8 +46,13 @@ function* removeDuplicates(words) {
46
46
  continue;
47
47
  }
48
48
  const mForms = removeDuplicateForms(forms);
49
+ // if (forms.some((a) => /^[*+]?col[*+]?$/.test(a))) {
50
+ // console.warn('Found col %o', { forms, mForms });
51
+ // }
49
52
  if (mForms.size <= 1) {
50
- yield* mForms.values();
53
+ for (const form of mForms.values()) {
54
+ yield* form;
55
+ }
51
56
  continue;
52
57
  }
53
58
  // Handle upper / lower mix.
@@ -55,15 +60,22 @@ function* removeDuplicates(words) {
55
60
  const lc = words[0].toLowerCase();
56
61
  const lcForm = mForms.get(lc);
57
62
  if (!lcForm) {
58
- yield* mForms.values();
63
+ for (const form of mForms.values()) {
64
+ yield* form;
65
+ }
59
66
  continue;
60
67
  }
61
68
  mForms.delete(lc);
62
- yield lcForm;
63
- for (const form of mForms.values()) {
64
- if (form.toLowerCase() === lcForm)
65
- continue;
66
- yield form;
69
+ const sLcForms = new Set(lcForm);
70
+ yield* lcForm;
71
+ if (sLcForms.has('*' + lc + '*'))
72
+ continue;
73
+ for (const forms of mForms.values()) {
74
+ for (const form of forms) {
75
+ if (sLcForms.has(form.toLowerCase()))
76
+ continue;
77
+ yield form;
78
+ }
67
79
  }
68
80
  }
69
81
  }
@@ -77,36 +89,62 @@ function* removeDuplicates(words) {
77
89
  var Flags;
78
90
  (function (Flags) {
79
91
  Flags[Flags["base"] = 0] = "base";
80
- Flags[Flags["noPfx"] = 1] = "noPfx";
81
- Flags[Flags["noSfx"] = 2] = "noSfx";
92
+ Flags[Flags["none"] = 1] = "none";
93
+ Flags[Flags["both"] = 2] = "both";
82
94
  Flags[Flags["pfx"] = 4] = "pfx";
83
95
  Flags[Flags["sfx"] = 8] = "sfx";
84
- Flags[Flags["noFix"] = 3] = "noFix";
85
- Flags[Flags["midFix"] = 12] = "midFix";
96
+ Flags[Flags["all"] = 15] = "all";
86
97
  })(Flags || (Flags = {}));
87
98
  function applyFlags(word, flags) {
88
- if (flags === Flags.noFix)
89
- return word;
90
- if (flags === (Flags.noFix | Flags.midFix))
91
- return '*' + word + '*';
92
- const p = flags & Flags.pfx ? (flags & Flags.noPfx ? '*' : '+') : '';
93
- const s = flags & Flags.sfx ? (flags & Flags.noSfx ? '*' : '+') : '';
94
- return s + word + p;
99
+ if (flags === Flags.none)
100
+ return [word];
101
+ if (flags === Flags.all)
102
+ return ['*' + word + '*'];
103
+ if (flags === Flags.both)
104
+ return ['+' + word + '+'];
105
+ if (flags === Flags.pfx)
106
+ return [word + '+'];
107
+ if (flags === Flags.sfx)
108
+ return ['+' + word];
109
+ if (flags === (Flags.none | Flags.sfx))
110
+ return ['*' + word];
111
+ if (flags === (Flags.none | Flags.pfx))
112
+ return [word + '*'];
113
+ if (flags === (Flags.none | Flags.pfx | Flags.sfx))
114
+ return [word + '*', '*' + word];
115
+ if (flags === (Flags.none | Flags.both))
116
+ return [word, '+' + word + '+'];
117
+ if (flags === (Flags.none | Flags.both | Flags.sfx))
118
+ return [word, '+' + word + '*'];
119
+ if (flags === (Flags.none | Flags.both | Flags.pfx))
120
+ return [word, '*' + word + '+'];
121
+ if (flags === (Flags.both | Flags.pfx))
122
+ return ['*' + word + '+'];
123
+ if (flags === (Flags.both | Flags.sfx))
124
+ return ['+' + word + '*'];
125
+ if (flags === (Flags.both | Flags.pfx | Flags.sfx))
126
+ return ['+' + word + '*', '*' + word + '+'];
127
+ return ['+' + word, word + '+'];
95
128
  }
96
129
  function removeDuplicateForms(forms) {
97
130
  function flags(word, flag = 0) {
98
- let f = Flags.base;
99
- const isOptPrefix = word.endsWith('*');
100
- const isPrefix = !isOptPrefix && word.endsWith('+');
101
- const isAnyPrefix = isPrefix || isOptPrefix;
102
- const isOptSuffix = word.startsWith('*');
103
- const isSuffix = !isOptSuffix && word.startsWith('+');
104
- const isAnySuffix = isSuffix || isOptSuffix;
105
- f |= isAnyPrefix ? Flags.pfx : 0;
106
- f |= !isPrefix ? Flags.noPfx : 0;
107
- f |= isAnySuffix ? Flags.sfx : 0;
108
- f |= !isSuffix ? Flags.noSfx : 0;
109
- return flag | f;
131
+ const canBePrefix = word.endsWith('*');
132
+ const mustBePrefix = !canBePrefix && word.endsWith('+');
133
+ const isPrefix = canBePrefix || mustBePrefix;
134
+ const canBeSuffix = word.startsWith('*');
135
+ const mustBeSuffix = !canBeSuffix && word.startsWith('+');
136
+ const isSuffix = canBeSuffix || mustBeSuffix;
137
+ if (canBePrefix && canBeSuffix)
138
+ return flag | Flags.all;
139
+ if (mustBePrefix && mustBeSuffix)
140
+ return flag | Flags.both;
141
+ if (!isPrefix && !isSuffix)
142
+ return flag | Flags.none;
143
+ flag |= isPrefix && !isSuffix ? Flags.pfx : 0;
144
+ flag |= isSuffix && !isPrefix ? Flags.sfx : 0;
145
+ flag |= canBePrefix && !mustBeSuffix ? Flags.none : 0;
146
+ flag |= canBeSuffix && !mustBePrefix ? Flags.none : 0;
147
+ return flag;
110
148
  }
111
149
  const m = new Map();
112
150
  for (const form of forms) {
@@ -30,6 +30,13 @@ export interface ParseFileOptions {
30
30
  * @default undefined
31
31
  */
32
32
  storeSplitWordsAsCompounds: boolean | undefined;
33
+ /**
34
+ * Controls the minimum length of a compound word when storing words using `storeSplitWordsAsCompounds`.
35
+ * The compound words are prefixed / suffixed with `*`, to allow them to be combined with other compound words.
36
+ * If the length is too low, then the dictionary will consider many misspelled words as correct.
37
+ * @default 4
38
+ */
39
+ minCompoundLength: number | undefined;
33
40
  }
34
41
  type ParseFileOptionsRequired = Required<ParseFileOptions>;
35
42
  export declare const defaultParseDictionaryOptions: ParseFileOptionsRequired;
@@ -49,6 +56,6 @@ export declare function createParseFileLineMapper(options?: Partial<ParseFileOpt
49
56
  * @param _options - defines prefixes used when parsing lines.
50
57
  * @returns words that have been normalized.
51
58
  */
52
- export declare function parseFileLines(lines: Iterable<string> | string, options: Partial<ParseFileOptions>): Iterable<string>;
59
+ export declare function parseFileLines(lines: Iterable<string> | string, options: ParseFileOptions): Iterable<string>;
53
60
  export {};
54
61
  //# sourceMappingURL=wordListParser.d.ts.map
@@ -1,6 +1,7 @@
1
1
  import { opCombine, opCombine as opPipe, opFilter, opMap } from '@cspell/cspell-pipe/sync';
2
2
  import { createDictionaryLineParser } from 'cspell-trie-lib';
3
3
  import { uniqueFilter } from 'hunspell-reader';
4
+ import { defaultCompileSourceOptions } from '../config/configDefaults.js';
4
5
  import { legacyLineToWords } from './legacyLineToWords.js';
5
6
  import { splitCamelCaseIfAllowed } from './splitCamelCaseIfAllowed.js';
6
7
  export function normalizeTargetWords(options) {
@@ -45,7 +46,8 @@ const _defaultOptions = {
45
46
  splitKeepBoth: false,
46
47
  // splitSeparator: regExpSplit,
47
48
  allowedSplitWords: { has: () => true, size: 0 },
48
- storeSplitWordsAsCompounds: undefined,
49
+ storeSplitWordsAsCompounds: defaultCompileSourceOptions.storeSplitWordsAsCompounds,
50
+ minCompoundLength: defaultCompileSourceOptions.minCompoundLength,
49
51
  };
50
52
  export const defaultParseDictionaryOptions = Object.freeze(_defaultOptions);
51
53
  export const cSpellToolDirective = 'cspell-tools:';
@@ -58,7 +60,7 @@ export const setOfCSpellDirectiveFlags = ['no-split', 'split', 'keep-case', 'no-
58
60
  */
59
61
  export function createParseFileLineMapper(options) {
60
62
  const _options = options || _defaultOptions;
61
- const { splitKeepBoth = _defaultOptions.splitKeepBoth, allowedSplitWords = _defaultOptions.allowedSplitWords, storeSplitWordsAsCompounds, } = _options;
63
+ const { splitKeepBoth = _defaultOptions.splitKeepBoth, allowedSplitWords = _defaultOptions.allowedSplitWords, storeSplitWordsAsCompounds, minCompoundLength = _defaultOptions.minCompoundLength, } = _options;
62
64
  let { legacy = _defaultOptions.legacy } = _options;
63
65
  let { split = _defaultOptions.split, keepCase = legacy ? false : _defaultOptions.keepCase } = _options;
64
66
  const compoundFix = storeSplitWordsAsCompounds ? '+' : '';
@@ -135,7 +137,7 @@ export function createParseFileLineMapper(options) {
135
137
  return lines;
136
138
  }
137
139
  function splitWordIntoWords(word) {
138
- return splitCamelCaseIfAllowed(word, allowedSplitWords, keepCase, compoundFix);
140
+ return splitCamelCaseIfAllowed(word, allowedSplitWords, keepCase, compoundFix, minCompoundLength);
139
141
  }
140
142
  function* splitWords(lines) {
141
143
  for (const line of lines) {
@@ -161,11 +161,18 @@ export interface CompileSourceOptions {
161
161
  */
162
162
  allowedSplitWords?: FilePath | FilePath[] | undefined;
163
163
  /**
164
- * Words that have been split using the `allowedSplitWords` are added to the dictionary as compoundable words.
164
+ * Camel case words that have been split using the `allowedSplitWords` are added to the dictionary as compoundable words.
165
165
  * These words are prefixed / suffixed with `*`.
166
166
  * @default false
167
167
  */
168
168
  storeSplitWordsAsCompounds?: boolean | undefined;
169
+ /**
170
+ * Controls the minimum length of a compound word when storing words using `storeSplitWordsAsCompounds`.
171
+ * The compound words are prefixed / suffixed with `*`, to allow them to be combined with other compound words.
172
+ * If the length is too low, then the dictionary will consider many misspelled words as correct.
173
+ * @default 4
174
+ */
175
+ minCompoundLength?: number | undefined;
169
176
  }
170
177
  export declare const configFileSchemaURL = "https://raw.githubusercontent.com/streetsidesoftware/cspell/main/packages/cspell-tools/cspell-tools.config.schema.json";
171
178
  //# sourceMappingURL=config.d.ts.map
@@ -0,0 +1,9 @@
1
+ export declare const defaultCompileSourceOptions: {
2
+ readonly maxDepth: undefined;
3
+ readonly split: false;
4
+ readonly keepRawCase: false;
5
+ readonly allowedSplitWords: undefined;
6
+ readonly storeSplitWordsAsCompounds: false;
7
+ readonly minCompoundLength: 4;
8
+ };
9
+ //# sourceMappingURL=configDefaults.d.ts.map
@@ -0,0 +1,9 @@
1
+ export const defaultCompileSourceOptions = {
2
+ maxDepth: undefined,
3
+ split: false,
4
+ keepRawCase: false,
5
+ allowedSplitWords: undefined,
6
+ storeSplitWordsAsCompounds: false,
7
+ minCompoundLength: 4,
8
+ };
9
+ //# sourceMappingURL=configDefaults.js.map
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Make all properties in T required, but keep the original optionality of the properties.
3
+ */
4
+ export type RequireFields<T> = {
5
+ [P in keyof Required<T>]: T[P];
6
+ };
7
+ //# sourceMappingURL=types.d.ts.map
package/dist/types.js ADDED
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=types.js.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cspell/cspell-tools",
3
- "version": "8.15.5",
3
+ "version": "8.15.7",
4
4
  "description": "Tools to assist with the development of cSpell",
5
5
  "publishConfig": {
6
6
  "access": "public",
@@ -51,12 +51,12 @@
51
51
  },
52
52
  "homepage": "https://github.com/streetsidesoftware/cspell/tree/main/packages/cspell-tools#readme",
53
53
  "dependencies": {
54
- "@cspell/cspell-pipe": "8.15.5",
54
+ "@cspell/cspell-pipe": "8.15.7",
55
55
  "commander": "^12.1.0",
56
56
  "cosmiconfig": "9.0.0",
57
- "cspell-trie-lib": "8.15.5",
57
+ "cspell-trie-lib": "8.15.7",
58
58
  "glob": "^10.4.5",
59
- "hunspell-reader": "8.15.5",
59
+ "hunspell-reader": "8.15.7",
60
60
  "yaml": "^2.6.0"
61
61
  },
62
62
  "engines": {
@@ -67,5 +67,5 @@
67
67
  "ts-json-schema-generator": "^2.3.0"
68
68
  },
69
69
  "module": "bin.mjs",
70
- "gitHead": "5f974b2d4d61e2582aba5a1c87fcbc127f5f06bb"
70
+ "gitHead": "9decdc45e8fd0d6bb9c3851813c2c0728d3c7c5e"
71
71
  }