@cspell/cspell-tools 8.19.3 → 8.19.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -174,7 +174,21 @@
174
174
  "type": "array"
175
175
  },
176
176
  "excludeWordsFrom": {
177
- "description": "Words from the sources that are found in `excludeWordsFrom` files will not be added to the dictionary.",
177
+ "description": "Words from the sources that are found in `excludeWordsFrom` files will NOT be added to the dictionary.",
178
+ "items": {
179
+ "$ref": "#/definitions/FilePath"
180
+ },
181
+ "type": "array"
182
+ },
183
+ "excludeWordsMatchingRegex": {
184
+ "description": "Words from the sources that match the regex in `excludeWordsMatchingRegex` will NOT be added to the dictionary.\n\nNote: The regex must be a valid JavaScript literal regex expression including the `/` delimiters.",
185
+ "items": {
186
+ "type": "string"
187
+ },
188
+ "type": "array"
189
+ },
190
+ "excludeWordsNotFoundIn": {
191
+ "description": "Words from the sources that are NOT found in `excludeWordsNotFoundIn` files will NOT be added to the dictionary.",
178
192
  "items": {
179
193
  "$ref": "#/definitions/FilePath"
180
194
  },
@@ -4,6 +4,7 @@ import { opAwaitAsync, opMapAsync } from '@cspell/cspell-pipe/operators';
4
4
  import { opConcatMap, opMap, pipe } from '@cspell/cspell-pipe/sync';
5
5
  import { isFileListSource, isFilePath, isFileSource } from '../config/index.js';
6
6
  import { checkShasumFile, updateChecksumForFiles } from '../shasum/index.js';
7
+ import { stringToRegExp } from '../util/textRegex.js';
7
8
  import { createAllowedSplitWordsFromFiles, createWordsCollectionFromFiles } from './createWordsCollection.js';
8
9
  import { logWithTimestamp } from './logWithTimestamp.js';
9
10
  import { readTextFile } from './readers/readTextFile.js';
@@ -55,11 +56,16 @@ function resolveChecksumFile(checksumFile, root) {
55
56
  export async function compileTarget(target, options, compileOptions) {
56
57
  logWithTimestamp(`Start compile: ${target.name}`);
57
58
  const { rootDir, cwd, checksumFile, conditional } = compileOptions;
58
- const { format, sources, trieBase, sort = true, generateNonStrict = false, excludeWordsFrom } = target;
59
+ const { format, sources, trieBase, sort = true, generateNonStrict = false, excludeWordsFrom = [], excludeWordsNotFoundIn = [], excludeWordsMatchingRegex, } = target;
59
60
  const targetDirectory = path.resolve(rootDir, target.targetDirectory ?? cwd ?? process.cwd());
60
61
  const dictionaryDirectives = target.dictionaryDirectives ?? compileOptions.dictionaryDirectives;
61
62
  const removeDuplicates = target.removeDuplicates ?? false;
62
- const excludeFilter = await createExcludeFilter(excludeWordsFrom);
63
+ const excludeFromFilter = await createExcludeFilter(excludeWordsFrom);
64
+ const includeFromFilter = await createIncludeFilter(excludeWordsNotFoundIn);
65
+ const excludeRegexFilter = createExcludeRegexFilter(excludeWordsMatchingRegex);
66
+ const excludeFilter = (word) => {
67
+ return excludeFromFilter(word) && includeFromFilter(word) && excludeRegexFilter(word);
68
+ };
63
69
  const generateNonStrictTrie = target.generateNonStrict ?? true;
64
70
  const name = normalizeTargetName(target.name);
65
71
  const useTrie = format.startsWith('trie');
@@ -74,7 +80,9 @@ export async function compileTarget(target, options, compileOptions) {
74
80
  // removeDuplicates, // Add this in if we use it.
75
81
  });
76
82
  const checksumRoot = (checksumFile && path.dirname(checksumFile)) || rootDir;
77
- const deps = [...calculateDependencies(filename, filesToProcess, excludeWordsFrom, checksumRoot)];
83
+ const deps = [
84
+ ...calculateDependencies(filename, filesToProcess, [...excludeWordsFrom, ...excludeWordsNotFoundIn], checksumRoot),
85
+ ];
78
86
  if (conditional && checksumFile) {
79
87
  const check = await checkShasumFile(checksumFile, deps, checksumRoot).catch(() => undefined);
80
88
  if (check?.passed) {
@@ -215,10 +223,41 @@ function logProgress(freq = 100_000) {
215
223
  }
216
224
  return logProgress;
217
225
  }
226
+ /**
227
+ * @param excludeWordsFrom - List of files to read words from.
228
+ * @returns a function that returns true if the word is not in the exclude list.
229
+ */
218
230
  async function createExcludeFilter(excludeWordsFrom) {
219
231
  if (!excludeWordsFrom || !excludeWordsFrom.length)
220
232
  return () => true;
221
233
  const excludeWords = await createWordsCollectionFromFiles(excludeWordsFrom);
222
234
  return (word) => !excludeWords.has(word, word.toUpperCase() !== word);
223
235
  }
236
+ /**
237
+ * @param includeWordsFrom - List of files to read words from.
238
+ * @returns a function that returns true if the word is in the include list.
239
+ */
240
+ async function createIncludeFilter(includeWordsFrom) {
241
+ if (!includeWordsFrom || !includeWordsFrom.length)
242
+ return () => true;
243
+ const excludeWords = await createWordsCollectionFromFiles(includeWordsFrom);
244
+ return (word) => excludeWords.has(word, word.toUpperCase() !== word);
245
+ }
246
+ /**
247
+ * @param excludeWordsMatchingRegex - List of regex patterns to exclude.
248
+ * @returns a function that returns true if the word does not match any of the regex patterns.
249
+ */
250
+ function createExcludeRegexFilter(excludeWordsMatchingRegex) {
251
+ if (!excludeWordsMatchingRegex || !excludeWordsMatchingRegex.length)
252
+ return () => true;
253
+ const regexes = excludeWordsMatchingRegex
254
+ .map((a) => stringToRegExp(a))
255
+ .filter((a, i) => {
256
+ if (a)
257
+ return true;
258
+ console.warn('Invalid regex: "%s"', excludeWordsMatchingRegex[i]);
259
+ return false;
260
+ });
261
+ return (word) => !regexes.some((r) => r.test(word));
262
+ }
224
263
  //# sourceMappingURL=compile.js.map
@@ -108,11 +108,27 @@ export interface Target extends CompileTargetOptions {
108
108
  sources: DictionarySource[];
109
109
  /**
110
110
  * Words from the sources that are found in `excludeWordsFrom` files
111
- * will not be added to the dictionary.
111
+ * will NOT be added to the dictionary.
112
112
  *
113
113
  * @since 8.3.2
114
114
  */
115
115
  excludeWordsFrom?: FilePath[] | undefined;
116
+ /**
117
+ * Words from the sources that are NOT found in `excludeWordsNotFoundIn` files
118
+ * will NOT be added to the dictionary.
119
+ *
120
+ * @since 8.19.4
121
+ */
122
+ excludeWordsNotFoundIn?: FilePath[] | undefined;
123
+ /**
124
+ * Words from the sources that match the regex in `excludeWordsMatchingRegex`
125
+ * will NOT be added to the dictionary.
126
+ *
127
+ * Note: The regex must be a valid JavaScript literal regex expression including the `/` delimiters.
128
+ *
129
+ * @since 8.19.4
130
+ */
131
+ excludeWordsMatchingRegex?: string[] | undefined;
116
132
  /**
117
133
  * Advanced: Set the trie base number. A value between 10 and 36
118
134
  * Set numeric base to use.
@@ -0,0 +1,3 @@
1
+ export declare const regExMatchRegExParts: RegExp;
2
+ export declare function stringToRegExp(pattern: string | RegExp, defaultFlags?: string, forceFlags?: string): RegExp | undefined;
3
+ //# sourceMappingURL=textRegex.d.ts.map
@@ -0,0 +1,109 @@
1
+ // cspell:ignore gimuxy
2
+ export const regExMatchRegExParts = /^\s*\/([\s\S]*?)\/([gimuxy]*)\s*$/;
3
+ export function stringToRegExp(pattern, defaultFlags = '', forceFlags = '') {
4
+ if (pattern instanceof RegExp) {
5
+ return pattern;
6
+ }
7
+ try {
8
+ const [, pat, flag] = [
9
+ ...(pattern.match(regExMatchRegExParts) || ['', pattern.trim(), defaultFlags]),
10
+ forceFlags,
11
+ ];
12
+ if (pat) {
13
+ const regPattern = flag.includes('x') ? removeVerboseFromRegExp(pat) : pat;
14
+ // Make sure the flags are unique.
15
+ const flags = [...new Set(forceFlags + flag)].join('').replaceAll(/[^gimuy]/g, '');
16
+ const regex = new RegExp(regPattern, flags);
17
+ return regex;
18
+ }
19
+ }
20
+ catch {
21
+ /* empty */
22
+ }
23
+ return undefined;
24
+ }
25
+ const SPACES = {
26
+ ' ': true,
27
+ '\n': true,
28
+ '\r': true,
29
+ '\t': true,
30
+ };
31
+ /**
32
+ * Remove all whitespace and comments from a regexp string. The format follows Pythons Verbose.
33
+ * Note: this is a best attempt. Special cases for comments: `#` and spaces should be proceeded with a `\`
34
+ *
35
+ * All space must be proceeded by a `\` or in a character class `[]`
36
+ *
37
+ * @param pattern - the pattern to clean
38
+ */
39
+ function removeVerboseFromRegExp(pattern) {
40
+ function escape(acc) {
41
+ const char = pattern[acc.idx];
42
+ if (char !== '\\')
43
+ return undefined;
44
+ const next = pattern[++acc.idx];
45
+ acc.idx++;
46
+ if (next === '#') {
47
+ acc.result += '#';
48
+ return acc;
49
+ }
50
+ if (!(next in SPACES)) {
51
+ acc.result += '\\' + next;
52
+ return acc;
53
+ }
54
+ acc.result += next;
55
+ if (next === '\r' && pattern[acc.idx] === '\n') {
56
+ acc.result += '\n';
57
+ acc.idx++;
58
+ }
59
+ return acc;
60
+ }
61
+ function braces(acc) {
62
+ const char = pattern[acc.idx];
63
+ if (char !== '[')
64
+ return undefined;
65
+ acc.result += char;
66
+ acc.idx++;
67
+ let escCount = 0;
68
+ while (acc.idx < pattern.length) {
69
+ const char = pattern[acc.idx];
70
+ acc.result += char;
71
+ acc.idx++;
72
+ if (char === ']' && !(escCount & 1))
73
+ break;
74
+ escCount = char === '\\' ? escCount + 1 : 0;
75
+ }
76
+ return acc;
77
+ }
78
+ function spaces(acc) {
79
+ const char = pattern[acc.idx];
80
+ if (!(char in SPACES))
81
+ return undefined;
82
+ acc.idx++;
83
+ return acc;
84
+ }
85
+ function comments(acc) {
86
+ const char = pattern[acc.idx];
87
+ if (char !== '#')
88
+ return undefined;
89
+ while (acc.idx < pattern.length && pattern[acc.idx] !== '\n') {
90
+ acc.idx++;
91
+ }
92
+ return acc;
93
+ }
94
+ function copy(acc) {
95
+ const char = pattern[acc.idx++];
96
+ acc.result += char;
97
+ return acc;
98
+ }
99
+ const reducers = [escape, braces, spaces, comments, copy];
100
+ const result = { idx: 0, result: '' };
101
+ while (result.idx < pattern.length) {
102
+ for (const r of reducers) {
103
+ if (r(result))
104
+ break;
105
+ }
106
+ }
107
+ return result.result;
108
+ }
109
+ //# sourceMappingURL=textRegex.js.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cspell/cspell-tools",
3
- "version": "8.19.3",
3
+ "version": "8.19.4",
4
4
  "description": "Tools to assist with the development of cSpell",
5
5
  "publishConfig": {
6
6
  "access": "public",
@@ -63,12 +63,12 @@
63
63
  },
64
64
  "homepage": "https://github.com/streetsidesoftware/cspell/tree/main/packages/cspell-tools#readme",
65
65
  "dependencies": {
66
- "@cspell/cspell-pipe": "8.19.3",
66
+ "@cspell/cspell-pipe": "8.19.4",
67
67
  "commander": "^13.1.0",
68
68
  "cosmiconfig": "9.0.0",
69
- "cspell-trie-lib": "8.19.3",
69
+ "cspell-trie-lib": "8.19.4",
70
70
  "glob": "^10.4.5",
71
- "hunspell-reader": "8.19.3",
71
+ "hunspell-reader": "8.19.4",
72
72
  "yaml": "^2.7.1"
73
73
  },
74
74
  "engines": {
@@ -79,5 +79,5 @@
79
79
  "ts-json-schema-generator": "^2.4.0"
80
80
  },
81
81
  "module": "bin.mjs",
82
- "gitHead": "cb6946b709da590d0c6175f9962799d18aa43e93"
82
+ "gitHead": "1bee5f5aa4429a1b1ae0e88934b093c5440b44dc"
83
83
  }