@cspell/cspell-tools 9.5.0 → 9.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/bin.mjs +1 -1
  2. package/cspell-tools.config.schema.json +29 -0
  3. package/dist/app.d.mts +35 -0
  4. package/dist/app.mjs +292 -0
  5. package/dist/build-OgMPaXPZ.mjs +1403 -0
  6. package/dist/index.d.mts +35 -0
  7. package/dist/index.mjs +3 -0
  8. package/package.json +10 -9
  9. package/dist/AppOptions.d.ts +0 -31
  10. package/dist/AppOptions.js +0 -2
  11. package/dist/FeatureFlags/FeatureFlags.d.ts +0 -34
  12. package/dist/FeatureFlags/FeatureFlags.js +0 -94
  13. package/dist/FeatureFlags/index.d.ts +0 -3
  14. package/dist/FeatureFlags/index.js +0 -3
  15. package/dist/FeatureFlags/parseFlags.d.ts +0 -3
  16. package/dist/FeatureFlags/parseFlags.js +0 -20
  17. package/dist/app.d.ts +0 -4
  18. package/dist/app.js +0 -86
  19. package/dist/build.d.ts +0 -12
  20. package/dist/build.js +0 -36
  21. package/dist/compile.d.ts +0 -5
  22. package/dist/compile.js +0 -41
  23. package/dist/compiler/CompileOptions.d.ts +0 -35
  24. package/dist/compiler/CompileOptions.js +0 -2
  25. package/dist/compiler/CompilerDefinitions.d.ts +0 -2
  26. package/dist/compiler/CompilerDefinitions.js +0 -2
  27. package/dist/compiler/Reader.d.ts +0 -3
  28. package/dist/compiler/Reader.js +0 -24
  29. package/dist/compiler/SourceReader.d.ts +0 -28
  30. package/dist/compiler/SourceReader.js +0 -44
  31. package/dist/compiler/WordsCollection.d.ts +0 -20
  32. package/dist/compiler/WordsCollection.js +0 -3
  33. package/dist/compiler/compile.d.ts +0 -26
  34. package/dist/compiler/compile.js +0 -270
  35. package/dist/compiler/createCompileRequest.d.ts +0 -4
  36. package/dist/compiler/createCompileRequest.js +0 -84
  37. package/dist/compiler/createWordsCollection.d.ts +0 -10
  38. package/dist/compiler/createWordsCollection.js +0 -111
  39. package/dist/compiler/index.d.ts +0 -4
  40. package/dist/compiler/index.js +0 -3
  41. package/dist/compiler/legacyLineToWords.d.ts +0 -4
  42. package/dist/compiler/legacyLineToWords.js +0 -20
  43. package/dist/compiler/logWithTimestamp.d.ts +0 -3
  44. package/dist/compiler/logWithTimestamp.js +0 -5
  45. package/dist/compiler/logger.d.ts +0 -4
  46. package/dist/compiler/logger.js +0 -9
  47. package/dist/compiler/readers/ReaderOptions.d.ts +0 -19
  48. package/dist/compiler/readers/ReaderOptions.js +0 -2
  49. package/dist/compiler/readers/readHunspellFiles.d.ts +0 -3
  50. package/dist/compiler/readers/readHunspellFiles.js +0 -57
  51. package/dist/compiler/readers/readTextFile.d.ts +0 -3
  52. package/dist/compiler/readers/readTextFile.js +0 -20
  53. package/dist/compiler/readers/regHunspellFile.d.ts +0 -2
  54. package/dist/compiler/readers/regHunspellFile.js +0 -2
  55. package/dist/compiler/readers/textFileReader.d.ts +0 -3
  56. package/dist/compiler/readers/textFileReader.js +0 -11
  57. package/dist/compiler/readers/trieFileReader.d.ts +0 -3
  58. package/dist/compiler/readers/trieFileReader.js +0 -16
  59. package/dist/compiler/splitCamelCaseIfAllowed.d.ts +0 -5
  60. package/dist/compiler/splitCamelCaseIfAllowed.js +0 -65
  61. package/dist/compiler/streamSourceWordsFromFile.d.ts +0 -3
  62. package/dist/compiler/streamSourceWordsFromFile.js +0 -6
  63. package/dist/compiler/text.d.ts +0 -10
  64. package/dist/compiler/text.js +0 -28
  65. package/dist/compiler/wordListCompiler.d.ts +0 -21
  66. package/dist/compiler/wordListCompiler.js +0 -205
  67. package/dist/compiler/wordListParser.d.ts +0 -61
  68. package/dist/compiler/wordListParser.js +0 -184
  69. package/dist/compiler/writeTextToFile.d.ts +0 -2
  70. package/dist/compiler/writeTextToFile.js +0 -16
  71. package/dist/config/config.d.ts +0 -200
  72. package/dist/config/config.js +0 -2
  73. package/dist/config/configDefaults.d.ts +0 -9
  74. package/dist/config/configDefaults.js +0 -9
  75. package/dist/config/configUtils.d.ts +0 -5
  76. package/dist/config/configUtils.js +0 -14
  77. package/dist/config/index.d.ts +0 -4
  78. package/dist/config/index.js +0 -3
  79. package/dist/config/normalizeConfig.d.ts +0 -8
  80. package/dist/config/normalizeConfig.js +0 -13
  81. package/dist/gzip/compressFiles.d.ts +0 -16
  82. package/dist/gzip/compressFiles.js +0 -42
  83. package/dist/gzip/gzip.d.ts +0 -3
  84. package/dist/gzip/gzip.js +0 -10
  85. package/dist/gzip/index.d.ts +0 -3
  86. package/dist/gzip/index.js +0 -3
  87. package/dist/shasum/checksum.d.ts +0 -7
  88. package/dist/shasum/checksum.js +0 -19
  89. package/dist/shasum/index.d.ts +0 -3
  90. package/dist/shasum/index.js +0 -3
  91. package/dist/shasum/shasum.d.ts +0 -38
  92. package/dist/shasum/shasum.js +0 -150
  93. package/dist/test/TestHelper.d.ts +0 -53
  94. package/dist/test/TestHelper.js +0 -121
  95. package/dist/test/console.d.ts +0 -9
  96. package/dist/test/console.js +0 -34
  97. package/dist/test/escapeRegEx.d.ts +0 -7
  98. package/dist/test/escapeRegEx.js +0 -9
  99. package/dist/test/normalizeOutput.d.ts +0 -3
  100. package/dist/test/normalizeOutput.js +0 -20
  101. package/dist/types.d.ts +0 -7
  102. package/dist/types.js +0 -2
  103. package/dist/util/errors.d.ts +0 -6
  104. package/dist/util/errors.js +0 -11
  105. package/dist/util/globP.d.ts +0 -7
  106. package/dist/util/globP.js +0 -7
  107. package/dist/util/index.d.ts +0 -2
  108. package/dist/util/index.js +0 -4
  109. package/dist/util/textRegex.d.ts +0 -3
  110. package/dist/util/textRegex.js +0 -109
@@ -1,3 +0,0 @@
1
- export declare function readTextFile(filename: string): Promise<string>;
2
- export declare function readTextFileLines(filename: string): Promise<string[]>;
3
- //# sourceMappingURL=readTextFile.d.ts.map
@@ -1,20 +0,0 @@
1
- import assert from 'node:assert';
2
- import { Buffer } from 'node:buffer';
3
- import { promises as fs } from 'node:fs';
4
- import { decompress } from '../../gzip/index.js';
5
- const isGzFile = /\.gz$/;
6
- export function readTextFile(filename) {
7
- const content = fs
8
- .readFile(filename)
9
- .then(async (buffer) => (isGzFile.test(filename) ? decompress(buffer) : buffer))
10
- .then((buffer) => (assertIsBuffer(buffer), buffer.toString('utf8')));
11
- return content;
12
- }
13
- export async function readTextFileLines(filename) {
14
- const content = await readTextFile(filename);
15
- return content.split('\n');
16
- }
17
- function assertIsBuffer(value) {
18
- assert(Buffer.isBuffer(value));
19
- }
20
- //# sourceMappingURL=readTextFile.js.map
@@ -1,2 +0,0 @@
1
- export declare const regHunspellFile: RegExp;
2
- //# sourceMappingURL=regHunspellFile.d.ts.map
@@ -1,2 +0,0 @@
1
- export const regHunspellFile = /\.(dic|aff)$/i;
2
- //# sourceMappingURL=regHunspellFile.js.map
@@ -1,3 +0,0 @@
1
- import type { BaseReader } from './ReaderOptions.js';
2
- export declare function textFileReader(filename: string): Promise<BaseReader>;
3
- //# sourceMappingURL=textFileReader.d.ts.map
@@ -1,11 +0,0 @@
1
- import { readTextFile } from './readTextFile.js';
2
- export async function textFileReader(filename) {
3
- const content = await readTextFile(filename);
4
- const words = content.split('\n').map((s) => s.trim());
5
- return {
6
- type: 'TextFile',
7
- size: words.length,
8
- lines: words,
9
- };
10
- }
11
- //# sourceMappingURL=textFileReader.js.map
@@ -1,3 +0,0 @@
1
- import type { DictionaryReader } from './ReaderOptions.js';
2
- export declare function trieFileReader(filename: string): Promise<DictionaryReader>;
3
- //# sourceMappingURL=trieFileReader.d.ts.map
@@ -1,16 +0,0 @@
1
- import { importTrie, Trie } from 'cspell-trie-lib';
2
- import { readTextFileLines } from './readTextFile.js';
3
- export async function trieFileReader(filename) {
4
- const trieRoot = importTrie(await readTextFileLines(filename));
5
- const trie = new Trie(trieRoot);
6
- const words = trie.words();
7
- return {
8
- type: 'Trie',
9
- get size() {
10
- return trie.size();
11
- },
12
- lines: words,
13
- hasWord: (word, caseSensitive) => trie.hasWord(word, caseSensitive),
14
- };
15
- }
16
- //# sourceMappingURL=trieFileReader.js.map
@@ -1,5 +0,0 @@
1
- import type { AllowedSplitWordsCollection } from './WordsCollection.js';
2
- export declare const regExpSpaceOrDash: RegExp;
3
- export declare const regExpIsNumber: RegExp;
4
- export declare function splitCamelCaseIfAllowed(word: string, allowedWords: AllowedSplitWordsCollection, keepCase: boolean, compoundPrefix: string, minCompoundLength: number): string[];
5
- //# sourceMappingURL=splitCamelCaseIfAllowed.d.ts.map
@@ -1,65 +0,0 @@
1
- import { isSingleLetter, splitCamelCaseWord } from './text.js';
2
- export const regExpSpaceOrDash = /[- ]+/g;
3
- export const regExpIsNumber = /^\d+$/;
4
- export function splitCamelCaseIfAllowed(word, allowedWords, keepCase, compoundPrefix, minCompoundLength) {
5
- const split = [...splitCamelCase(word)];
6
- if (split.length == 1)
7
- return adjustCases(split, allowedWords, keepCase);
8
- const missing = split.some((w) => isUnknown(w, allowedWords));
9
- if (missing)
10
- return [word];
11
- const wordIndexes = calcWordIndex(word, split);
12
- const adjusted = adjustCases(split, allowedWords, keepCase);
13
- return !compoundPrefix
14
- ? adjusted
15
- : adjusted.map((w, i) => {
16
- const { px, sx } = wordIndexes[i];
17
- const canCompound = w.length >= minCompoundLength;
18
- const lc = w.toLowerCase();
19
- const p = canCompound && isSingleLetter(px) ? compoundPrefix : '';
20
- const s = canCompound && isSingleLetter(sx) ? compoundPrefix : '';
21
- if (lc.length < 4 || allowedWords.has(w, true))
22
- return p + w + s;
23
- return p + lc + s;
24
- });
25
- }
26
- function adjustCases(words, allowedWords, keepCase) {
27
- return words.map((w) => adjustCase(w, allowedWords, keepCase));
28
- }
29
- function adjustCase(word, allowedWords, keepCase) {
30
- const lc = word.toLowerCase();
31
- if (!allowedWords.has(lc, true))
32
- return word;
33
- if (lc === word)
34
- return word;
35
- if (word.slice(1).toLowerCase() === word.slice(1))
36
- return lc;
37
- if (!keepCase && word.toUpperCase() === word)
38
- return word.toLowerCase();
39
- return word;
40
- }
41
- function isUnknown(word, allowedWords) {
42
- if (word === 'ERROR') {
43
- return !allowedWords.has(word, false);
44
- }
45
- return !allowedWords.has(word, false);
46
- }
47
- function splitCamelCase(word) {
48
- const splitWords = splitCamelCaseWord(word).filter((word) => !regExpIsNumber.test(word));
49
- // We only want to preserve this: "New York" and not "Namespace DNSLookup"
50
- if (splitWords.length > 1 && regExpSpaceOrDash.test(word)) {
51
- return splitWords.flatMap((w) => w.split(regExpSpaceOrDash));
52
- }
53
- return splitWords;
54
- }
55
- function calcWordIndex(word, words) {
56
- let i = 0;
57
- return words.map((w) => {
58
- const j = word.indexOf(w, i);
59
- const k = j + w.length;
60
- const wIndex = { word: w, i: j, px: word[j - 1] || '', sx: word[k] || '' };
61
- i = k;
62
- return wIndex;
63
- });
64
- }
65
- //# sourceMappingURL=splitCamelCaseIfAllowed.js.map
@@ -1,3 +0,0 @@
1
- import type { SourceReaderOptions } from './SourceReader.js';
2
- export declare function streamSourceWordsFromFile(filename: string, options: SourceReaderOptions): Promise<Iterable<string>>;
3
- //# sourceMappingURL=streamSourceWordsFromFile.d.ts.map
@@ -1,6 +0,0 @@
1
- import { createSourceReader } from './SourceReader.js';
2
- export async function streamSourceWordsFromFile(filename, options) {
3
- const reader = await createSourceReader(filename, options);
4
- return reader.words;
5
- }
6
- //# sourceMappingURL=streamSourceWordsFromFile.js.map
@@ -1,10 +0,0 @@
1
- /**
2
- * Split camelCase words into an array of strings.
3
- */
4
- export declare function splitCamelCaseWord(word: string): string[];
5
- /**
6
- * Split camelCase words into an array of strings, try to fix English words.
7
- */
8
- export declare function splitCamelCaseWordAutoStem(word: string): string[];
9
- export declare function isSingleLetter(c: string): boolean;
10
- //# sourceMappingURL=text.d.ts.map
@@ -1,28 +0,0 @@
1
- // cSpell:ignore ings ning gimuy
2
- const regExUpperSOrIng = /(\p{Lu}+'?(?:s|ing|ies|es|ings|ed|ning))(?!\p{Ll})/gu;
3
- const regExSplitWords = /([\p{Ll}])([\p{Lu}])/gu;
4
- const regExSplitWords2 = /(\p{Lu})(\p{Lu}\p{Ll})/gu;
5
- const regExpIsLetter = /^\p{L}\p{M}{0,2}$/u;
6
- /**
7
- * Split camelCase words into an array of strings.
8
- */
9
- export function splitCamelCaseWord(word) {
10
- const pass1 = word.replaceAll(regExSplitWords, '$1|$2');
11
- const pass2 = pass1.replaceAll(regExSplitWords2, '$1|$2');
12
- const pass3 = pass2.replaceAll(/[\d_]+/g, '|');
13
- return pass3.split('|').filter((a) => !!a);
14
- }
15
- /**
16
- * Split camelCase words into an array of strings, try to fix English words.
17
- */
18
- export function splitCamelCaseWordAutoStem(word) {
19
- return splitCamelCaseWord(word.replaceAll(regExUpperSOrIng, tailToLowerCase));
20
- }
21
- function tailToLowerCase(word) {
22
- const letters = [...word];
23
- return letters[0] + letters.slice(1).join('').toLowerCase();
24
- }
25
- export function isSingleLetter(c) {
26
- return regExpIsLetter.test(c);
27
- }
28
- //# sourceMappingURL=text.js.map
@@ -1,21 +0,0 @@
1
- import type { CompileOptions } from './CompileOptions.js';
2
- import { WordListCompiler } from './CompilerDefinitions.js';
3
- export declare function compileWordListToTarget(lines: Iterable<string>, destFilename: string, options: CompileOptions): Promise<void>;
4
- export declare function createWordListCompiler(options: CompileOptions): WordListCompiler;
5
- declare function removeDuplicates(words: Iterable<string>): Iterable<string>;
6
- export declare function createTargetFile(destFilename: string, seq: Iterable<string> | string, compress?: boolean): Promise<void>;
7
- export interface TrieOptions {
8
- base?: number | undefined;
9
- trie3?: boolean | undefined;
10
- trie4?: boolean | undefined;
11
- }
12
- export interface CompileTrieOptions extends CompileOptions, TrieOptions {
13
- }
14
- export declare function compileTrieToTarget(words: Iterable<string>, destFilename: string, options: CompileTrieOptions): Promise<void>;
15
- export declare function createTrieCompiler(options: TrieOptions): WordListCompiler;
16
- export declare const __testing__: {
17
- wordListHeader: string;
18
- removeDuplicates: typeof removeDuplicates;
19
- };
20
- export {};
21
- //# sourceMappingURL=wordListCompiler.d.ts.map
@@ -1,205 +0,0 @@
1
- import { mkdir } from 'node:fs/promises';
2
- import * as path from 'node:path';
3
- import { opAppend, opMap, pipe } from '@cspell/cspell-pipe/sync';
4
- import * as Trie from 'cspell-trie-lib';
5
- import { getLogger } from './logger.js';
6
- import { normalizeTargetWords } from './wordListParser.js';
7
- import { writeTextToFile } from './writeTextToFile.js';
8
- const mkdirp = async (p) => {
9
- await mkdir(p, { recursive: true });
10
- };
11
- // Indicate that a word list has already been processed.
12
- const wordListHeader = `
13
- # cspell-tools: keep-case no-split`;
14
- const wordListHeaderLines = wordListHeader.split('\n').map((a) => a.trim());
15
- export async function compileWordListToTarget(lines, destFilename, options) {
16
- const compiler = createWordListCompiler(options);
17
- return createTargetFile(destFilename, compiler(lines));
18
- }
19
- export function createWordListCompiler(options) {
20
- return (lines) => {
21
- const finalLines = normalize(lines, options);
22
- const directives = options.dictionaryDirectives ?? [];
23
- const directivesLines = directives.map((a) => `# cspell-dictionary: ${a}`);
24
- return pipe([...wordListHeaderLines, ...directivesLines, ''], opAppend(finalLines), opMap((a) => a + '\n'));
25
- };
26
- }
27
- function normalize(lines, options) {
28
- const filter = normalizeTargetWords(options);
29
- const cleanLines = options.removeDuplicates ? removeDuplicates(lines) : lines;
30
- const iter = pipe(cleanLines, filter);
31
- if (!options.sort)
32
- return iter;
33
- const result = new Set(iter);
34
- return [...result].sort();
35
- }
36
- function stripCompoundAFix(word) {
37
- return word.replaceAll('*', '').replaceAll('+', '');
38
- }
39
- function* removeDuplicates(words) {
40
- const wordSet = new Set(words);
41
- const wordForms = new Map();
42
- for (const word of wordSet) {
43
- const lc = stripCompoundAFix(word.toLowerCase());
44
- const forms = wordForms.get(lc) ?? [];
45
- forms.push(word);
46
- wordForms.set(lc, forms);
47
- }
48
- for (const forms of wordForms.values()) {
49
- if (forms.length <= 1) {
50
- yield* forms;
51
- continue;
52
- }
53
- const mForms = removeDuplicateForms(forms);
54
- // if (forms.some((a) => /^[*+]?col[*+]?$/.test(a))) {
55
- // console.warn('Found col %o', { forms, mForms });
56
- // }
57
- if (mForms.size <= 1) {
58
- for (const form of mForms.values()) {
59
- yield* form;
60
- }
61
- continue;
62
- }
63
- // Handle upper / lower mix.
64
- const words = [...mForms.keys()];
65
- const lc = words[0].toLowerCase();
66
- const lcForm = mForms.get(lc);
67
- if (!lcForm) {
68
- for (const form of mForms.values()) {
69
- yield* form;
70
- }
71
- continue;
72
- }
73
- mForms.delete(lc);
74
- const sLcForms = new Set(lcForm);
75
- yield* lcForm;
76
- if (sLcForms.has('*' + lc + '*'))
77
- continue;
78
- for (const forms of mForms.values()) {
79
- for (const form of forms) {
80
- if (sLcForms.has(form.toLowerCase()))
81
- continue;
82
- yield form;
83
- }
84
- }
85
- }
86
- }
87
- /**
88
- * solo
89
- * optional_prefix*
90
- * optional_suffix*
91
- * required_prefix+
92
- * required_suffix+
93
- */
94
- var Flags;
95
- (function (Flags) {
96
- Flags[Flags["base"] = 0] = "base";
97
- Flags[Flags["none"] = 1] = "none";
98
- Flags[Flags["both"] = 2] = "both";
99
- Flags[Flags["pfx"] = 4] = "pfx";
100
- Flags[Flags["sfx"] = 8] = "sfx";
101
- Flags[Flags["all"] = 15] = "all";
102
- })(Flags || (Flags = {}));
103
- function applyFlags(word, flags) {
104
- if (flags === Flags.none)
105
- return [word];
106
- if (flags === Flags.all)
107
- return ['*' + word + '*'];
108
- if (flags === Flags.both)
109
- return ['+' + word + '+'];
110
- if (flags === Flags.pfx)
111
- return [word + '+'];
112
- if (flags === Flags.sfx)
113
- return ['+' + word];
114
- if (flags === (Flags.none | Flags.sfx))
115
- return ['*' + word];
116
- if (flags === (Flags.none | Flags.pfx))
117
- return [word + '*'];
118
- if (flags === (Flags.none | Flags.pfx | Flags.sfx))
119
- return [word + '*', '*' + word];
120
- if (flags === (Flags.none | Flags.both)) {
121
- // the "correct" answer is [word, '+' + word + '+']
122
- // but practically it makes sense to allow all combinations.
123
- return ['*' + word + '*'];
124
- }
125
- if (flags === (Flags.none | Flags.both | Flags.sfx))
126
- return [word, '+' + word + '*'];
127
- if (flags === (Flags.none | Flags.both | Flags.pfx))
128
- return [word, '*' + word + '+'];
129
- if (flags === (Flags.both | Flags.pfx))
130
- return ['*' + word + '+'];
131
- if (flags === (Flags.both | Flags.sfx))
132
- return ['+' + word + '*'];
133
- if (flags === (Flags.both | Flags.pfx | Flags.sfx))
134
- return ['+' + word + '*', '*' + word + '+'];
135
- return ['+' + word, word + '+'];
136
- }
137
- function removeDuplicateForms(forms) {
138
- function flags(word, flag = 0) {
139
- const canBePrefix = word.endsWith('*');
140
- const mustBePrefix = !canBePrefix && word.endsWith('+');
141
- const isPrefix = canBePrefix || mustBePrefix;
142
- const canBeSuffix = word.startsWith('*');
143
- const mustBeSuffix = !canBeSuffix && word.startsWith('+');
144
- const isSuffix = canBeSuffix || mustBeSuffix;
145
- if (canBePrefix && canBeSuffix)
146
- return flag | Flags.all;
147
- if (mustBePrefix && mustBeSuffix)
148
- return flag | Flags.both;
149
- if (!isPrefix && !isSuffix)
150
- return flag | Flags.none;
151
- flag |= isPrefix && !isSuffix ? Flags.pfx : 0;
152
- flag |= isSuffix && !isPrefix ? Flags.sfx : 0;
153
- flag |= canBePrefix && !mustBeSuffix ? Flags.none : 0;
154
- flag |= canBeSuffix && !mustBePrefix ? Flags.none : 0;
155
- return flag;
156
- }
157
- const m = new Map();
158
- for (const form of forms) {
159
- const k = stripCompoundAFix(form);
160
- m.set(k, flags(form, m.get(k)));
161
- }
162
- return new Map([...m.entries()].map(([form, flag]) => {
163
- return [form, applyFlags(form, flag)];
164
- }));
165
- }
166
- export async function createTargetFile(destFilename, seq, compress) {
167
- const rel = path.relative(process.cwd(), destFilename).replaceAll(path.sep, '/');
168
- const log = getLogger();
169
- log(`Writing to file ${rel}${compress ? '.gz' : ''}`);
170
- const destDir = path.dirname(destFilename);
171
- await mkdirp(destDir);
172
- await writeTextToFile(destFilename, seq, compress);
173
- }
174
- export async function compileTrieToTarget(words, destFilename, options) {
175
- await createTrieTarget(destFilename, options)(words);
176
- }
177
- function createTrieTarget(destFilename, options) {
178
- return async (words) => {
179
- await createTargetFile(destFilename, createTrieCompiler(options)(words));
180
- const log = getLogger();
181
- log(`Done writing to file ${path.basename(destFilename)}`);
182
- };
183
- }
184
- export function createTrieCompiler(options) {
185
- return (words) => {
186
- const log = getLogger();
187
- log('Reading Words into Trie');
188
- const base = options.base ?? 32;
189
- const version = options.trie4 ? 4 : options.trie3 ? 3 : 1;
190
- const root = Trie.buildTrie(words).root;
191
- log('Reduce duplicate word endings');
192
- const trie = Trie.consolidate(root);
193
- log('Trie compilation complete');
194
- return Trie.serializeTrie(trie, {
195
- base,
196
- comment: 'Built by cspell-tools.',
197
- version,
198
- });
199
- };
200
- }
201
- export const __testing__ = {
202
- wordListHeader,
203
- removeDuplicates,
204
- };
205
- //# sourceMappingURL=wordListCompiler.js.map
@@ -1,61 +0,0 @@
1
- import { type Operator } from '@cspell/cspell-pipe/sync';
2
- import type { CompileOptions } from './CompileOptions.js';
3
- import type { AllowedSplitWordsCollection } from './WordsCollection.js';
4
- export declare function normalizeTargetWords(options: CompileOptions): Operator<string>;
5
- export interface ParseFileOptions {
6
- /**
7
- * Preserve case
8
- * @default true
9
- */
10
- keepCase?: boolean | undefined;
11
- /**
12
- * Tell the parser to split into words along spaces.
13
- * @default false
14
- */
15
- split?: boolean | undefined;
16
- /**
17
- * When splitting tells the parser to output both the split and non-split versions of the line.
18
- * @default false
19
- */
20
- splitKeepBoth?: boolean | undefined;
21
- /**
22
- * Use legacy splitting.
23
- * @default false
24
- */
25
- legacy?: boolean | undefined;
26
- allowedSplitWords: AllowedSplitWordsCollection;
27
- /**
28
- * Words that have been split using the `allowedSplitWords` are added to the dictionary as compoundable words.
29
- * These words are prefixed / suffixed with `*`.
30
- * @default undefined
31
- */
32
- storeSplitWordsAsCompounds: boolean | undefined;
33
- /**
34
- * Controls the minimum length of a compound word when storing words using `storeSplitWordsAsCompounds`.
35
- * The compound words are prefixed / suffixed with `*`, to allow them to be combined with other compound words.
36
- * If the length is too low, then the dictionary will consider many misspelled words as correct.
37
- * @default 4
38
- */
39
- minCompoundLength: number | undefined;
40
- }
41
- type ParseFileOptionsRequired = Required<ParseFileOptions>;
42
- export declare const defaultParseDictionaryOptions: ParseFileOptionsRequired;
43
- export declare const cSpellToolDirective = "cspell-tools:";
44
- export declare const setOfCSpellDirectiveFlags: string[];
45
- /**
46
- * Normalizes a dictionary words based upon prefix / suffixes.
47
- * Case insensitive versions are also generated.
48
- * @param options - defines prefixes used when parsing lines.
49
- * @returns words that have been normalized.
50
- */
51
- export declare function createParseFileLineMapper(options?: Partial<ParseFileOptions>): Operator<string>;
52
- /**
53
- * Normalizes a dictionary words based upon prefix / suffixes.
54
- * Case insensitive versions are also generated.
55
- * @param lines - one word per line
56
- * @param _options - defines prefixes used when parsing lines.
57
- * @returns words that have been normalized.
58
- */
59
- export declare function parseFileLines(lines: Iterable<string> | string, options: ParseFileOptions): Iterable<string>;
60
- export {};
61
- //# sourceMappingURL=wordListParser.d.ts.map
@@ -1,184 +0,0 @@
1
- import { opCombine, opCombine as opPipe, opFilter, opMap } from '@cspell/cspell-pipe/sync';
2
- import { createDictionaryLineParser } from 'cspell-trie-lib';
3
- import { uniqueFilter } from 'hunspell-reader';
4
- import { defaultCompileSourceOptions } from '../config/configDefaults.js';
5
- import { legacyLineToWords } from './legacyLineToWords.js';
6
- import { splitCamelCaseIfAllowed } from './splitCamelCaseIfAllowed.js';
7
- export function normalizeTargetWords(options) {
8
- const lineParser = createDictionaryLineParser({
9
- stripCaseAndAccents: options.generateNonStrict,
10
- stripCaseAndAccentsOnForbidden: true,
11
- keepOptionalCompoundCharacter: true,
12
- });
13
- const operations = [
14
- opFilter((a) => !!a),
15
- lineParser,
16
- options.sort ? createInlineBufferedSort(10_000) : undefined,
17
- opFilter(uniqueFilter(10_000)),
18
- options.filter ? opFilter(options.filter) : undefined,
19
- ].filter(isDefined);
20
- return opCombine(...operations);
21
- }
22
- function isDefined(v) {
23
- return v !== undefined;
24
- }
25
- function createInlineBufferedSort(bufferSize = 1000) {
26
- function* inlineBufferedSort(lines) {
27
- const buffer = [];
28
- for (const line of lines) {
29
- buffer.push(line);
30
- if (buffer.length >= bufferSize) {
31
- buffer.sort();
32
- yield* buffer;
33
- buffer.length = 0;
34
- }
35
- }
36
- buffer.sort();
37
- yield* buffer;
38
- }
39
- return inlineBufferedSort;
40
- }
41
- const commentCharacter = '#';
42
- const _defaultOptions = {
43
- keepCase: true,
44
- legacy: false,
45
- split: false,
46
- splitKeepBoth: false,
47
- // splitSeparator: regExpSplit,
48
- allowedSplitWords: { has: () => true, size: 0 },
49
- storeSplitWordsAsCompounds: defaultCompileSourceOptions.storeSplitWordsAsCompounds,
50
- minCompoundLength: defaultCompileSourceOptions.minCompoundLength,
51
- };
52
- export const defaultParseDictionaryOptions = Object.freeze(_defaultOptions);
53
- export const cSpellToolDirective = 'cspell-tools:';
54
- export const setOfCSpellDirectiveFlags = ['no-split', 'split', 'keep-case', 'no-keep-case', 'legacy'];
55
- /**
56
- * Normalizes a dictionary words based upon prefix / suffixes.
57
- * Case insensitive versions are also generated.
58
- * @param options - defines prefixes used when parsing lines.
59
- * @returns words that have been normalized.
60
- */
61
- export function createParseFileLineMapper(options) {
62
- const _options = options || _defaultOptions;
63
- const { splitKeepBoth = _defaultOptions.splitKeepBoth, allowedSplitWords = _defaultOptions.allowedSplitWords, storeSplitWordsAsCompounds, minCompoundLength = _defaultOptions.minCompoundLength, } = _options;
64
- let { legacy = _defaultOptions.legacy } = _options;
65
- let { split = _defaultOptions.split, keepCase = legacy ? false : _defaultOptions.keepCase } = _options;
66
- const compoundFix = storeSplitWordsAsCompounds ? '+' : '';
67
- function isString(line) {
68
- return typeof line === 'string';
69
- }
70
- function trim(line) {
71
- return line.trim();
72
- }
73
- function removeComments(line) {
74
- const idx = line.indexOf(commentCharacter);
75
- if (idx < 0)
76
- return line;
77
- const idxDirective = line.indexOf(cSpellToolDirective, idx);
78
- if (idxDirective >= 0) {
79
- const flags = line
80
- .slice(idxDirective)
81
- .split(/[\s,;]/g)
82
- .map((s) => s.trim())
83
- .filter((a) => !!a);
84
- for (const flag of flags) {
85
- switch (flag) {
86
- case 'split': {
87
- split = true;
88
- break;
89
- }
90
- case 'no-split': {
91
- split = false;
92
- break;
93
- }
94
- case 'keep-case': {
95
- keepCase = true;
96
- legacy = false;
97
- break;
98
- }
99
- case 'no-keep-case': {
100
- keepCase = false;
101
- break;
102
- }
103
- case 'legacy': {
104
- keepCase = false;
105
- legacy = true;
106
- break;
107
- }
108
- }
109
- }
110
- }
111
- return line.slice(0, idx).trim();
112
- }
113
- function filterEmptyLines(line) {
114
- return !!line;
115
- }
116
- const regNonWordOrDigit = /[^\p{L}\p{M}'\w-]+/giu;
117
- function splitLine(line) {
118
- line = line.replace(/#.*/, ''); // remove comment
119
- line = line.trim();
120
- line = line.replaceAll(/\bU\+[0-9A-F]{4}\b/gi, '|'); // Remove Unicode Definitions
121
- line = line.replaceAll(/\\U[0-9A-F]{4}/gi, '|'); // Remove Unicode Definitions
122
- line = line.replaceAll(regNonWordOrDigit, '|');
123
- line = line.replaceAll(/'(?=\|)/g, ''); // remove trailing '
124
- line = line.replace(/'$/, ''); // remove trailing '
125
- line = line.replaceAll(/(?<=\|)'/g, ''); // remove leading '
126
- line = line.replace(/^'/, ''); // remove leading '
127
- line = line.replaceAll(/\s*\|\s*/g, '|'); // remove spaces around |
128
- line = line.replaceAll(/[|]+/g, '|'); // reduce repeated |
129
- line = line.replace(/^\|/, ''); // remove leading |
130
- line = line.replace(/\|$/, ''); // remove trailing |
131
- const lines = line
132
- .split('|')
133
- .map((a) => a.trim())
134
- .filter((a) => !!a)
135
- .filter((a) => !/^[0-9_-]+$/.test(a)) // pure numbers and symbols
136
- .filter((a) => !/^0[xo][0-9A-F]+$/i.test(a)); // c-style hex/octal digits
137
- return lines;
138
- }
139
- function splitWordIntoWords(word) {
140
- return splitCamelCaseIfAllowed(word, allowedSplitWords, keepCase, compoundFix, minCompoundLength);
141
- }
142
- function* splitWords(lines) {
143
- for (const line of lines) {
144
- if (legacy) {
145
- yield* legacyLineToWords(line, keepCase, allowedSplitWords);
146
- continue;
147
- }
148
- if (split) {
149
- const words = splitLine(line);
150
- yield* !allowedSplitWords.size ? words : words.flatMap((word) => splitWordIntoWords(word));
151
- if (!splitKeepBoth)
152
- continue;
153
- }
154
- yield line.replaceAll(/["]/g, '');
155
- }
156
- }
157
- function* unique(lines) {
158
- const known = new Set();
159
- for (const line of lines) {
160
- if (known.has(line))
161
- continue;
162
- known.add(line);
163
- yield line;
164
- }
165
- }
166
- function* splitLines(paragraphs) {
167
- for (const paragraph of paragraphs) {
168
- yield* paragraph.split('\n');
169
- }
170
- }
171
- const processLines = opPipe(opFilter(isString), splitLines, opMap(removeComments), splitWords, opMap(trim), opFilter(filterEmptyLines), unique);
172
- return processLines;
173
- }
174
- /**
175
- * Normalizes a dictionary words based upon prefix / suffixes.
176
- * Case insensitive versions are also generated.
177
- * @param lines - one word per line
178
- * @param _options - defines prefixes used when parsing lines.
179
- * @returns words that have been normalized.
180
- */
181
- export function parseFileLines(lines, options) {
182
- return createParseFileLineMapper(options)(typeof lines === 'string' ? [lines] : lines);
183
- }
184
- //# sourceMappingURL=wordListParser.js.map