@cspell/cspell-tools 6.11.1 → 6.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/AppOptions.d.ts +28 -0
- package/dist/AppOptions.js +3 -0
- package/dist/FeatureFlags/FeatureFlags.d.ts +34 -0
- package/dist/FeatureFlags/FeatureFlags.js +99 -0
- package/dist/FeatureFlags/index.d.ts +3 -0
- package/dist/FeatureFlags/index.js +10 -0
- package/dist/FeatureFlags/parseFlags.d.ts +3 -0
- package/dist/FeatureFlags/parseFlags.js +24 -0
- package/dist/app.d.ts +2 -21
- package/dist/app.js +25 -144
- package/dist/build.d.ts +7 -0
- package/dist/build.js +59 -0
- package/dist/compile.d.ts +4 -0
- package/dist/compile.js +24 -0
- package/dist/compiler/CompileOptions.d.ts +12 -0
- package/dist/compiler/CompileOptions.js +3 -0
- package/dist/compiler/Reader.d.ts +16 -11
- package/dist/compiler/Reader.js +38 -91
- package/dist/compiler/compile.d.ts +11 -0
- package/dist/compiler/compile.js +157 -0
- package/dist/compiler/createCompileRequest.d.ts +4 -0
- package/dist/compiler/createCompileRequest.js +86 -0
- package/dist/compiler/fileWriter.d.ts +2 -3
- package/dist/compiler/fileWriter.js +5 -7
- package/dist/compiler/globP.d.ts +2 -0
- package/dist/compiler/globP.js +16 -0
- package/dist/compiler/index.d.ts +4 -1
- package/dist/compiler/index.js +9 -15
- package/dist/compiler/iterateWordsFromFile.d.ts +1 -2
- package/dist/compiler/iterateWordsFromFile.js +1 -1
- package/dist/compiler/legacyLineToWords.d.ts +3 -0
- package/dist/compiler/legacyLineToWords.js +54 -0
- package/dist/compiler/logWithTimestamp.d.ts +3 -0
- package/dist/compiler/logWithTimestamp.js +9 -0
- package/dist/compiler/logger.d.ts +4 -0
- package/dist/compiler/logger.js +14 -0
- package/dist/compiler/readTextFile.d.ts +3 -0
- package/dist/compiler/readTextFile.js +45 -0
- package/dist/compiler/wordListCompiler.d.ts +3 -33
- package/dist/compiler/wordListCompiler.js +13 -169
- package/dist/compiler/wordListParser.d.ts +46 -0
- package/dist/compiler/wordListParser.js +171 -0
- package/dist/compiler/writeTextToFile.d.ts +3 -0
- package/dist/compiler/writeTextToFile.js +44 -0
- package/dist/config/config.d.ts +109 -0
- package/dist/config/config.js +3 -0
- package/dist/config/configUtils.d.ts +5 -0
- package/dist/config/configUtils.js +20 -0
- package/dist/config/index.d.ts +4 -0
- package/dist/config/index.js +10 -0
- package/dist/config/normalizeConfig.d.ts +8 -0
- package/dist/config/normalizeConfig.js +40 -0
- package/dist/test/TestHelper.d.ts +42 -0
- package/dist/test/TestHelper.js +131 -0
- package/dist/test/console.d.ts +10 -0
- package/dist/test/console.js +23 -0
- package/dist/test/escapeRegEx.d.ts +7 -0
- package/dist/test/escapeRegEx.js +13 -0
- package/dist/test/normalizeOutput.d.ts +3 -0
- package/dist/test/normalizeOutput.js +46 -0
- package/package.json +15 -11
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || function (mod) {
|
|
19
|
+
if (mod && mod.__esModule) return mod;
|
|
20
|
+
var result = {};
|
|
21
|
+
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
22
|
+
__setModuleDefault(result, mod);
|
|
23
|
+
return result;
|
|
24
|
+
};
|
|
25
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
+
exports.readTextFileLines = exports.readTextFile = void 0;
|
|
27
|
+
const fs_1 = require("fs");
|
|
28
|
+
const util_1 = require("util");
|
|
29
|
+
const zlib = __importStar(require("zlib"));
|
|
30
|
+
const gunzip = (0, util_1.promisify)(zlib.gunzip);
|
|
31
|
+
const isGzFile = /\.gz$/;
|
|
32
|
+
function readTextFile(filename) {
|
|
33
|
+
const content = fs_1.promises
|
|
34
|
+
.readFile(filename)
|
|
35
|
+
.then((buffer) => (isGzFile.test(filename) ? gunzip(buffer) : buffer))
|
|
36
|
+
.then((buffer) => buffer.toString('utf8'));
|
|
37
|
+
return content;
|
|
38
|
+
}
|
|
39
|
+
exports.readTextFile = readTextFile;
|
|
40
|
+
async function readTextFileLines(filename) {
|
|
41
|
+
const content = await readTextFile(filename);
|
|
42
|
+
return content.split('\n');
|
|
43
|
+
}
|
|
44
|
+
exports.readTextFileLines = readTextFileLines;
|
|
45
|
+
//# sourceMappingURL=readTextFile.js.map
|
|
@@ -1,20 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
3
|
-
export declare type Logger = (message?: any, ...optionalParams: any[]) => void;
|
|
4
|
-
export declare function setLogger(logger?: Logger): void;
|
|
5
|
-
declare type Normalizer = (lines: Sequence<string>) => Sequence<string>;
|
|
6
|
-
export declare function legacyNormalizeWords(lines: Sequence<string>): Sequence<string>;
|
|
7
|
-
export declare function legacyLineToWords(line: string): Sequence<string>;
|
|
8
|
-
export interface CompileOptions {
|
|
9
|
-
skipNormalization: boolean | undefined;
|
|
10
|
-
splitWords: boolean | undefined;
|
|
11
|
-
keepRawCase: boolean;
|
|
12
|
-
sort: boolean;
|
|
13
|
-
legacy: boolean | undefined;
|
|
14
|
-
}
|
|
15
|
-
declare function createNormalizer(options: CompileOptions): Normalizer;
|
|
16
|
-
export declare function compileWordList(lines: Sequence<string>, destFilename: string, options: CompileOptions): Promise<void>;
|
|
17
|
-
export declare function createWordListTarget(destFilename: string): (seq: Sequence<string>) => Promise<void>;
|
|
1
|
+
import { CompileOptions } from './CompileOptions';
|
|
2
|
+
export declare function compileWordList(lines: Iterable<string>, destFilename: string, options: CompileOptions): Promise<void>;
|
|
18
3
|
export interface TrieOptions {
|
|
19
4
|
base?: number;
|
|
20
5
|
trie3?: boolean;
|
|
@@ -22,23 +7,8 @@ export interface TrieOptions {
|
|
|
22
7
|
}
|
|
23
8
|
export interface CompileTrieOptions extends CompileOptions, TrieOptions {
|
|
24
9
|
}
|
|
25
|
-
export declare
|
|
26
|
-
export declare function compileTrie(words: Sequence<string>, destFilename: string, options: CompileTrieOptions): Promise<void>;
|
|
27
|
-
export declare function createTrieTarget(destFilename: string, options: TrieOptions): (words: Sequence<string>) => Promise<void>;
|
|
28
|
-
/**
|
|
29
|
-
* Splits a line of text into words, but does not split words.
|
|
30
|
-
* @param line text line to split.
|
|
31
|
-
* @returns array of words
|
|
32
|
-
* @example `readline.clearLine(stream, dir)` => ['readline', 'clearLine', 'stream', 'dir']
|
|
33
|
-
* @example `New York` => ['New', 'York']
|
|
34
|
-
* @example `don't` => [`don't`]
|
|
35
|
-
* @example `Event: 'SIGCONT'` => ['Event', 'SIGCONT']
|
|
36
|
-
*/
|
|
37
|
-
declare function splitLine(line: string): string[];
|
|
10
|
+
export declare function compileTrie(words: Iterable<string>, destFilename: string, options: CompileTrieOptions): Promise<void>;
|
|
38
11
|
export declare const __testing__: {
|
|
39
|
-
splitLine: typeof splitLine;
|
|
40
|
-
createNormalizer: typeof createNormalizer;
|
|
41
12
|
wordListHeader: string;
|
|
42
13
|
};
|
|
43
|
-
export {};
|
|
44
14
|
//# sourceMappingURL=wordListCompiler.d.ts.map
|
|
@@ -23,168 +23,51 @@ var __importStar = (this && this.__importStar) || function (mod) {
|
|
|
23
23
|
return result;
|
|
24
24
|
};
|
|
25
25
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
-
exports.__testing__ = exports.
|
|
27
|
-
const
|
|
28
|
-
const Text = __importStar(require("./text"));
|
|
29
|
-
const path = __importStar(require("path"));
|
|
30
|
-
const fs_extra_1 = require("fs-extra");
|
|
26
|
+
exports.__testing__ = exports.compileTrie = exports.compileWordList = void 0;
|
|
27
|
+
const sync_1 = require("@cspell/cspell-pipe/sync");
|
|
31
28
|
const Trie = __importStar(require("cspell-trie-lib"));
|
|
29
|
+
const fs_extra_1 = require("fs-extra");
|
|
30
|
+
const path = __importStar(require("path"));
|
|
32
31
|
const fileWriter_1 = require("./fileWriter");
|
|
33
|
-
const
|
|
34
|
-
const
|
|
35
|
-
const regNonWordOrSpace = /[^\p{L}\p{M}' ]+/giu;
|
|
36
|
-
const regNonWordOrDigit = /[^\p{L}\p{M}'\w-]+/giu;
|
|
37
|
-
const regExpSpaceOrDash = /[- ]+/g;
|
|
38
|
-
const regExpRepeatChars = /(.)\1{4,}/i;
|
|
32
|
+
const logger_1 = require("./logger");
|
|
33
|
+
const wordListParser_1 = require("./wordListParser");
|
|
39
34
|
// Indicate that a word list has already been processed.
|
|
40
35
|
const wordListHeader = `
|
|
41
36
|
# cspell-tools: keep-case no-split
|
|
42
37
|
`;
|
|
43
38
|
const wordListHeaderLines = wordListHeader.split('\n').map((a) => a.trim());
|
|
44
|
-
let log = defaultLogger;
|
|
45
|
-
function setLogger(logger) {
|
|
46
|
-
log = logger ?? defaultLogger;
|
|
47
|
-
}
|
|
48
|
-
exports.setLogger = setLogger;
|
|
49
|
-
function defaultLogger(message, ...optionalParams) {
|
|
50
|
-
console.log(message, ...optionalParams);
|
|
51
|
-
}
|
|
52
|
-
function legacyNormalizeWords(lines) {
|
|
53
|
-
return lines.concatMap((line) => legacyLineToWords(line));
|
|
54
|
-
}
|
|
55
|
-
exports.legacyNormalizeWords = legacyNormalizeWords;
|
|
56
|
-
function legacyLineToWords(line) {
|
|
57
|
-
// Remove punctuation and non-letters.
|
|
58
|
-
const filteredLine = line.replace(regNonWordOrSpace, '|');
|
|
59
|
-
const wordGroups = filteredLine.split('|');
|
|
60
|
-
const words = (0, gensequence_1.genSequence)(wordGroups)
|
|
61
|
-
.concatMap((a) => [a, ...a.split(regExpSpaceOrDash)])
|
|
62
|
-
.concatMap((a) => splitCamelCase(a))
|
|
63
|
-
.map((a) => a.trim())
|
|
64
|
-
.filter((a) => !!a)
|
|
65
|
-
.filter((s) => !regExpRepeatChars.test(s))
|
|
66
|
-
.map((a) => a.toLowerCase());
|
|
67
|
-
return words;
|
|
68
|
-
}
|
|
69
|
-
exports.legacyLineToWords = legacyLineToWords;
|
|
70
|
-
function splitCamelCase(word) {
|
|
71
|
-
const splitWords = Text.splitCamelCaseWord(word);
|
|
72
|
-
// We only want to preserve this: "New York" and not "Namespace DNSLookup"
|
|
73
|
-
if (splitWords.length > 1 && regExpSpaceOrDash.test(word)) {
|
|
74
|
-
return (0, gensequence_1.genSequence)(splitWords).concatMap((w) => w.split(regExpSpaceOrDash));
|
|
75
|
-
}
|
|
76
|
-
return splitWords;
|
|
77
|
-
}
|
|
78
|
-
function createNormalizer(options) {
|
|
79
|
-
const { skipNormalization = false, splitWords, keepRawCase, legacy } = options;
|
|
80
|
-
if (skipNormalization) {
|
|
81
|
-
return (lines) => lines;
|
|
82
|
-
}
|
|
83
|
-
const lineProcessor = legacy ? legacyLineToWords : splitWords ? splitLine : noSplit;
|
|
84
|
-
const wordMapper = keepRawCase ? mapWordIdentity : mapWordToDictionaryEntries;
|
|
85
|
-
const initialState = {
|
|
86
|
-
inlineSettings: {},
|
|
87
|
-
lineProcessor,
|
|
88
|
-
wordMapper,
|
|
89
|
-
};
|
|
90
|
-
const fnNormalizeLines = (lines) => normalizeWordListSeq(lines, initialState)
|
|
91
|
-
.filter((a) => !!a)
|
|
92
|
-
.pipe(createInlineBufferedSort())
|
|
93
|
-
.filter((0, util_1.uniqueFilter)(10000));
|
|
94
|
-
return fnNormalizeLines;
|
|
95
|
-
}
|
|
96
39
|
async function compileWordList(lines, destFilename, options) {
|
|
97
|
-
const
|
|
98
|
-
const
|
|
99
|
-
const header = (0, gensequence_1.genSequence)(wordListHeaderLines);
|
|
100
|
-
const finalSeq = header.concat(options.sort ? (0, gensequence_1.genSequence)(sort(seq)) : seq);
|
|
40
|
+
const filter = (0, wordListParser_1.normalizeTargetWords)(options);
|
|
41
|
+
const finalSeq = (0, sync_1.pipe)(wordListHeaderLines, (0, sync_1.opAppend)((0, sync_1.pipe)(lines, filter)));
|
|
101
42
|
return createWordListTarget(destFilename)(finalSeq);
|
|
102
43
|
}
|
|
103
44
|
exports.compileWordList = compileWordList;
|
|
104
45
|
function createWordListTarget(destFilename) {
|
|
105
46
|
const target = createTarget(destFilename);
|
|
106
|
-
return (seq) => target(seq.
|
|
47
|
+
return (seq) => target((0, sync_1.pipe)(seq, (0, sync_1.opMap)((a) => a + '\n')));
|
|
107
48
|
}
|
|
108
|
-
exports.createWordListTarget = createWordListTarget;
|
|
109
49
|
function createTarget(destFilename) {
|
|
110
50
|
const destDir = path.dirname(destFilename);
|
|
111
51
|
const pDir = (0, fs_extra_1.mkdirp)(destDir);
|
|
112
52
|
return async (seq) => {
|
|
113
53
|
await pDir;
|
|
114
|
-
|
|
54
|
+
await (0, fileWriter_1.writeSeqToFile)(seq, destFilename);
|
|
115
55
|
};
|
|
116
56
|
}
|
|
117
|
-
function mapWordToDictionaryEntries(w) {
|
|
118
|
-
return Trie.parseDictionaryLines([w]);
|
|
119
|
-
}
|
|
120
|
-
function mapWordIdentity(w) {
|
|
121
|
-
return [w];
|
|
122
|
-
}
|
|
123
|
-
function normalizeWordListSeq(lines, initialState) {
|
|
124
|
-
return (0, gensequence_1.genSequence)(normalizeWordListGen(lines, initialState));
|
|
125
|
-
}
|
|
126
|
-
function* normalizeWordListGen(lines, initialState) {
|
|
127
|
-
let state = initialState;
|
|
128
|
-
for (let line of lines) {
|
|
129
|
-
line = line.normalize('NFC');
|
|
130
|
-
state = adjustState(state, line);
|
|
131
|
-
for (const word of state.lineProcessor(line)) {
|
|
132
|
-
const w = word.trim();
|
|
133
|
-
if (!w)
|
|
134
|
-
continue;
|
|
135
|
-
yield* state.wordMapper(w);
|
|
136
|
-
}
|
|
137
|
-
}
|
|
138
|
-
}
|
|
139
|
-
function createInlineBufferedSort(bufferSize = 1000) {
|
|
140
|
-
function* inlineBufferedSort(lines) {
|
|
141
|
-
const buffer = [];
|
|
142
|
-
for (const line of lines) {
|
|
143
|
-
buffer.push(line);
|
|
144
|
-
if (buffer.length >= bufferSize) {
|
|
145
|
-
buffer.sort();
|
|
146
|
-
yield* buffer;
|
|
147
|
-
buffer.length = 0;
|
|
148
|
-
}
|
|
149
|
-
}
|
|
150
|
-
buffer.sort();
|
|
151
|
-
yield* buffer;
|
|
152
|
-
}
|
|
153
|
-
return inlineBufferedSort;
|
|
154
|
-
}
|
|
155
|
-
function adjustState(state, line) {
|
|
156
|
-
const inlineSettings = (0, inlineSettings_1.extractInlineSettings)(line);
|
|
157
|
-
if (!inlineSettings)
|
|
158
|
-
return state;
|
|
159
|
-
const r = { ...state };
|
|
160
|
-
r.inlineSettings = { ...r.inlineSettings, ...inlineSettings };
|
|
161
|
-
r.wordMapper =
|
|
162
|
-
inlineSettings.keepRawCase === undefined
|
|
163
|
-
? r.wordMapper
|
|
164
|
-
: inlineSettings.keepRawCase
|
|
165
|
-
? mapWordIdentity
|
|
166
|
-
: mapWordToDictionaryEntries;
|
|
167
|
-
r.lineProcessor = inlineSettings.split === undefined ? r.lineProcessor : inlineSettings.split ? splitLine : noSplit;
|
|
168
|
-
return r;
|
|
169
|
-
}
|
|
170
|
-
function sort(words) {
|
|
171
|
-
return [...words].sort();
|
|
172
|
-
}
|
|
173
|
-
exports.consolidate = Trie.consolidate;
|
|
174
57
|
async function compileTrie(words, destFilename, options) {
|
|
175
|
-
|
|
176
|
-
await createTrieTarget(destFilename, options)(normalizer(words));
|
|
58
|
+
await createTrieTarget(destFilename, options)(words);
|
|
177
59
|
}
|
|
178
60
|
exports.compileTrie = compileTrie;
|
|
179
61
|
function createTrieTarget(destFilename, options) {
|
|
180
62
|
const target = createTarget(destFilename);
|
|
181
63
|
return async (words) => {
|
|
64
|
+
const log = (0, logger_1.getLogger)();
|
|
182
65
|
log('Reading Words into Trie');
|
|
183
66
|
const base = options.base ?? 32;
|
|
184
67
|
const version = options.trie4 ? 4 : options.trie3 ? 3 : 1;
|
|
185
68
|
const root = Trie.buildTrie(words).root;
|
|
186
69
|
log('Reduce duplicate word endings');
|
|
187
|
-
const trie =
|
|
70
|
+
const trie = Trie.consolidate(root);
|
|
188
71
|
log(`Writing to file ${path.basename(destFilename)}`);
|
|
189
72
|
await target(Trie.serializeTrie(trie, {
|
|
190
73
|
base,
|
|
@@ -194,46 +77,7 @@ function createTrieTarget(destFilename, options) {
|
|
|
194
77
|
log(`Done writing to file ${path.basename(destFilename)}`);
|
|
195
78
|
};
|
|
196
79
|
}
|
|
197
|
-
exports.createTrieTarget = createTrieTarget;
|
|
198
|
-
/**
|
|
199
|
-
* Splits a line of text into words, but does not split words.
|
|
200
|
-
* @param line text line to split.
|
|
201
|
-
* @returns array of words
|
|
202
|
-
* @example `readline.clearLine(stream, dir)` => ['readline', 'clearLine', 'stream', 'dir']
|
|
203
|
-
* @example `New York` => ['New', 'York']
|
|
204
|
-
* @example `don't` => [`don't`]
|
|
205
|
-
* @example `Event: 'SIGCONT'` => ['Event', 'SIGCONT']
|
|
206
|
-
*/
|
|
207
|
-
function splitLine(line) {
|
|
208
|
-
line = line.replace(/#.*/, ''); // remove comment
|
|
209
|
-
line = line.trim();
|
|
210
|
-
line = line.replace(/\bU\+[0-9A-F]+\b/gi, '|'); // Remove Unicode Definitions
|
|
211
|
-
line = line.replace(regNonWordOrDigit, '|');
|
|
212
|
-
line = line.replace(/'(?=\|)/g, ''); // remove trailing '
|
|
213
|
-
line = line.replace(/'$/, ''); // remove trailing '
|
|
214
|
-
line = line.replace(/(?<=\|)'/g, ''); // remove leading '
|
|
215
|
-
line = line.replace(/^'/, ''); // remove leading '
|
|
216
|
-
line = line.replace(/\s*\|\s*/g, '|'); // remove spaces around |
|
|
217
|
-
line = line.replace(/[|]+/g, '|'); // reduce repeated |
|
|
218
|
-
line = line.replace(/^\|/, ''); // remove leading |
|
|
219
|
-
line = line.replace(/\|$/, ''); // remove trailing |
|
|
220
|
-
const lines = line
|
|
221
|
-
.split('|')
|
|
222
|
-
.map((a) => a.trim())
|
|
223
|
-
.filter((a) => !!a)
|
|
224
|
-
.filter((a) => !a.match(/^[0-9_-]+$/)) // pure numbers and symbols
|
|
225
|
-
.filter((a) => !a.match(/^[ux][0-9A-F]*$/i)) // hex digits
|
|
226
|
-
.filter((a) => !a.match(/^0[xo][0-9A-F]*$/i)); // c-style hex/octal digits
|
|
227
|
-
return lines;
|
|
228
|
-
}
|
|
229
|
-
function noSplit(line) {
|
|
230
|
-
line = line.replace(/#.*/, ''); // remove comment
|
|
231
|
-
line = line.trim();
|
|
232
|
-
return !line ? [] : [line];
|
|
233
|
-
}
|
|
234
80
|
exports.__testing__ = {
|
|
235
|
-
splitLine: splitLine,
|
|
236
|
-
createNormalizer,
|
|
237
81
|
wordListHeader,
|
|
238
82
|
};
|
|
239
83
|
//# sourceMappingURL=wordListCompiler.js.map
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import { type Operator } from '@cspell/cspell-pipe/sync';
|
|
2
|
+
import { CompileOptions } from './CompileOptions';
|
|
3
|
+
export declare function normalizeTargetWords(options: CompileOptions): Operator<string>;
|
|
4
|
+
export interface ParseFileOptions {
|
|
5
|
+
/**
|
|
6
|
+
* Preserve case
|
|
7
|
+
* @default true
|
|
8
|
+
*/
|
|
9
|
+
keepCase?: boolean;
|
|
10
|
+
/**
|
|
11
|
+
* Tell the parser to split into words along spaces.
|
|
12
|
+
* @default false
|
|
13
|
+
*/
|
|
14
|
+
split?: boolean | undefined;
|
|
15
|
+
/**
|
|
16
|
+
* When splitting tells the parser to output both the split and non-split versions of the line.
|
|
17
|
+
* @default false
|
|
18
|
+
*/
|
|
19
|
+
splitKeepBoth?: boolean | undefined;
|
|
20
|
+
/**
|
|
21
|
+
* Use legacy splitting.
|
|
22
|
+
* @default false
|
|
23
|
+
*/
|
|
24
|
+
legacy?: boolean;
|
|
25
|
+
}
|
|
26
|
+
declare type ParseFileOptionsRequired = Required<ParseFileOptions>;
|
|
27
|
+
export declare const defaultParseDictionaryOptions: ParseFileOptionsRequired;
|
|
28
|
+
export declare const cSpellToolDirective = "cspell-tools:";
|
|
29
|
+
export declare const setOfCSpellDirectiveFlags: string[];
|
|
30
|
+
/**
|
|
31
|
+
* Normalizes a dictionary words based upon prefix / suffixes.
|
|
32
|
+
* Case insensitive versions are also generated.
|
|
33
|
+
* @param options - defines prefixes used when parsing lines.
|
|
34
|
+
* @returns words that have been normalized.
|
|
35
|
+
*/
|
|
36
|
+
export declare function createParseFileLineMapper(options?: Partial<ParseFileOptions>): Operator<string>;
|
|
37
|
+
/**
|
|
38
|
+
* Normalizes a dictionary words based upon prefix / suffixes.
|
|
39
|
+
* Case insensitive versions are also generated.
|
|
40
|
+
* @param lines - one word per line
|
|
41
|
+
* @param _options - defines prefixes used when parsing lines.
|
|
42
|
+
* @returns words that have been normalized.
|
|
43
|
+
*/
|
|
44
|
+
export declare function parseFileLines(lines: Iterable<string> | string, options?: Partial<ParseFileOptions>): Iterable<string>;
|
|
45
|
+
export {};
|
|
46
|
+
//# sourceMappingURL=wordListParser.d.ts.map
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.parseFileLines = exports.createParseFileLineMapper = exports.setOfCSpellDirectiveFlags = exports.cSpellToolDirective = exports.defaultParseDictionaryOptions = exports.normalizeTargetWords = void 0;
|
|
4
|
+
const sync_1 = require("@cspell/cspell-pipe/sync");
|
|
5
|
+
const cspell_trie_lib_1 = require("cspell-trie-lib");
|
|
6
|
+
const util_1 = require("hunspell-reader/dist/util");
|
|
7
|
+
const legacyLineToWords_1 = require("./legacyLineToWords");
|
|
8
|
+
function normalizeTargetWords(options) {
|
|
9
|
+
const lineParser = (0, cspell_trie_lib_1.createDictionaryLineParser)({ stripCaseAndAccents: options.generateNonStrict });
|
|
10
|
+
const operations = [
|
|
11
|
+
(0, sync_1.opFilter)((a) => !!a),
|
|
12
|
+
lineParser,
|
|
13
|
+
options.sort ? createInlineBufferedSort(10000) : undefined,
|
|
14
|
+
(0, sync_1.opFilter)((0, util_1.uniqueFilter)(10000)),
|
|
15
|
+
].filter(isDefined);
|
|
16
|
+
return (0, sync_1.opCombine)(...operations);
|
|
17
|
+
}
|
|
18
|
+
exports.normalizeTargetWords = normalizeTargetWords;
|
|
19
|
+
function isDefined(v) {
|
|
20
|
+
return v !== undefined;
|
|
21
|
+
}
|
|
22
|
+
function createInlineBufferedSort(bufferSize = 1000) {
|
|
23
|
+
function* inlineBufferedSort(lines) {
|
|
24
|
+
const buffer = [];
|
|
25
|
+
for (const line of lines) {
|
|
26
|
+
buffer.push(line);
|
|
27
|
+
if (buffer.length >= bufferSize) {
|
|
28
|
+
buffer.sort();
|
|
29
|
+
yield* buffer;
|
|
30
|
+
buffer.length = 0;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
buffer.sort();
|
|
34
|
+
yield* buffer;
|
|
35
|
+
}
|
|
36
|
+
return inlineBufferedSort;
|
|
37
|
+
}
|
|
38
|
+
const commentCharacter = '#';
|
|
39
|
+
const _defaultOptions = {
|
|
40
|
+
keepCase: true,
|
|
41
|
+
legacy: false,
|
|
42
|
+
split: false,
|
|
43
|
+
splitKeepBoth: false,
|
|
44
|
+
// splitSeparator: regExpSplit,
|
|
45
|
+
};
|
|
46
|
+
exports.defaultParseDictionaryOptions = Object.freeze(_defaultOptions);
|
|
47
|
+
exports.cSpellToolDirective = 'cspell-tools:';
|
|
48
|
+
exports.setOfCSpellDirectiveFlags = ['no-split', 'split', 'keep-case', 'no-keep-case', 'legacy'];
|
|
49
|
+
/**
|
|
50
|
+
* Normalizes a dictionary words based upon prefix / suffixes.
|
|
51
|
+
* Case insensitive versions are also generated.
|
|
52
|
+
* @param options - defines prefixes used when parsing lines.
|
|
53
|
+
* @returns words that have been normalized.
|
|
54
|
+
*/
|
|
55
|
+
function createParseFileLineMapper(options) {
|
|
56
|
+
const _options = options || _defaultOptions;
|
|
57
|
+
const { splitKeepBoth = _defaultOptions.splitKeepBoth } = _options;
|
|
58
|
+
let { legacy = _defaultOptions.legacy } = _options;
|
|
59
|
+
let { split = _defaultOptions.split, keepCase = legacy ? false : _defaultOptions.keepCase } = _options;
|
|
60
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
61
|
+
function isString(line) {
|
|
62
|
+
return typeof line === 'string';
|
|
63
|
+
}
|
|
64
|
+
function trim(line) {
|
|
65
|
+
return line.trim();
|
|
66
|
+
}
|
|
67
|
+
function removeComments(line) {
|
|
68
|
+
const idx = line.indexOf(commentCharacter);
|
|
69
|
+
if (idx < 0)
|
|
70
|
+
return line;
|
|
71
|
+
const idxDirective = line.indexOf(exports.cSpellToolDirective, idx);
|
|
72
|
+
if (idxDirective >= 0) {
|
|
73
|
+
const flags = line
|
|
74
|
+
.slice(idxDirective)
|
|
75
|
+
.split(/[\s,;]/g)
|
|
76
|
+
.map((s) => s.trim())
|
|
77
|
+
.filter((a) => !!a);
|
|
78
|
+
for (const flag of flags) {
|
|
79
|
+
switch (flag) {
|
|
80
|
+
case 'split':
|
|
81
|
+
split = true;
|
|
82
|
+
break;
|
|
83
|
+
case 'no-split':
|
|
84
|
+
split = false;
|
|
85
|
+
break;
|
|
86
|
+
case 'keep-case':
|
|
87
|
+
keepCase = true;
|
|
88
|
+
legacy = false;
|
|
89
|
+
break;
|
|
90
|
+
case 'no-keep-case':
|
|
91
|
+
keepCase = false;
|
|
92
|
+
break;
|
|
93
|
+
case 'legacy':
|
|
94
|
+
keepCase = false;
|
|
95
|
+
legacy = true;
|
|
96
|
+
break;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return line.slice(0, idx).trim();
|
|
101
|
+
}
|
|
102
|
+
function filterEmptyLines(line) {
|
|
103
|
+
return !!line;
|
|
104
|
+
}
|
|
105
|
+
const regNonWordOrDigit = /[^\p{L}\p{M}'\w-]+/giu;
|
|
106
|
+
function splitLine(line) {
|
|
107
|
+
line = line.replace(/#.*/, ''); // remove comment
|
|
108
|
+
line = line.trim();
|
|
109
|
+
line = line.replace(/\bU\+[0-9A-F]+\b/gi, '|'); // Remove Unicode Definitions
|
|
110
|
+
line = line.replace(regNonWordOrDigit, '|');
|
|
111
|
+
line = line.replace(/'(?=\|)/g, ''); // remove trailing '
|
|
112
|
+
line = line.replace(/'$/, ''); // remove trailing '
|
|
113
|
+
line = line.replace(/(?<=\|)'/g, ''); // remove leading '
|
|
114
|
+
line = line.replace(/^'/, ''); // remove leading '
|
|
115
|
+
line = line.replace(/\s*\|\s*/g, '|'); // remove spaces around |
|
|
116
|
+
line = line.replace(/[|]+/g, '|'); // reduce repeated |
|
|
117
|
+
line = line.replace(/^\|/, ''); // remove leading |
|
|
118
|
+
line = line.replace(/\|$/, ''); // remove trailing |
|
|
119
|
+
const lines = line
|
|
120
|
+
.split('|')
|
|
121
|
+
.map((a) => a.trim())
|
|
122
|
+
.filter((a) => !!a)
|
|
123
|
+
.filter((a) => !a.match(/^[0-9_-]+$/)) // pure numbers and symbols
|
|
124
|
+
.filter((a) => !a.match(/^[ux][0-9A-F]*$/i)) // hex digits
|
|
125
|
+
.filter((a) => !a.match(/^0[xo][0-9A-F]*$/i)); // c-style hex/octal digits
|
|
126
|
+
return lines;
|
|
127
|
+
}
|
|
128
|
+
function* splitWords(lines) {
|
|
129
|
+
for (const line of lines) {
|
|
130
|
+
if (legacy) {
|
|
131
|
+
yield* (0, legacyLineToWords_1.legacyLineToWords)(line, keepCase);
|
|
132
|
+
continue;
|
|
133
|
+
}
|
|
134
|
+
if (split) {
|
|
135
|
+
yield* splitLine(line);
|
|
136
|
+
if (!splitKeepBoth)
|
|
137
|
+
continue;
|
|
138
|
+
}
|
|
139
|
+
yield line.replace(/["]/g, '');
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
function* unique(lines) {
|
|
143
|
+
const known = new Set();
|
|
144
|
+
for (const line of lines) {
|
|
145
|
+
if (known.has(line))
|
|
146
|
+
continue;
|
|
147
|
+
known.add(line);
|
|
148
|
+
yield line;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
function* splitLines(paragraphs) {
|
|
152
|
+
for (const paragraph of paragraphs) {
|
|
153
|
+
yield* paragraph.split('\n');
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
const processLines = (0, sync_1.opCombine)((0, sync_1.opFilter)(isString), splitLines, (0, sync_1.opMap)(removeComments), splitWords, (0, sync_1.opMap)(trim), (0, sync_1.opFilter)(filterEmptyLines), unique);
|
|
157
|
+
return processLines;
|
|
158
|
+
}
|
|
159
|
+
exports.createParseFileLineMapper = createParseFileLineMapper;
|
|
160
|
+
/**
|
|
161
|
+
* Normalizes a dictionary words based upon prefix / suffixes.
|
|
162
|
+
* Case insensitive versions are also generated.
|
|
163
|
+
* @param lines - one word per line
|
|
164
|
+
* @param _options - defines prefixes used when parsing lines.
|
|
165
|
+
* @returns words that have been normalized.
|
|
166
|
+
*/
|
|
167
|
+
function parseFileLines(lines, options) {
|
|
168
|
+
return createParseFileLineMapper(options)(typeof lines === 'string' ? [lines] : lines);
|
|
169
|
+
}
|
|
170
|
+
exports.parseFileLines = parseFileLines;
|
|
171
|
+
//# sourceMappingURL=wordListParser.js.map
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || function (mod) {
|
|
19
|
+
if (mod && mod.__esModule) return mod;
|
|
20
|
+
var result = {};
|
|
21
|
+
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
22
|
+
__setModuleDefault(result, mod);
|
|
23
|
+
return result;
|
|
24
|
+
};
|
|
25
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
+
exports.writeTextLinesToFile = exports.writeTextToFile = void 0;
|
|
27
|
+
const fs_1 = require("fs");
|
|
28
|
+
const util_1 = require("util");
|
|
29
|
+
const zlib = __importStar(require("zlib"));
|
|
30
|
+
const gzip = (0, util_1.promisify)(zlib.gzip);
|
|
31
|
+
const isGzFile = /\.gz$/;
|
|
32
|
+
async function writeTextToFile(filename, data) {
|
|
33
|
+
const useGz = isGzFile.test(filename);
|
|
34
|
+
const buf = Buffer.from(data, 'utf-8');
|
|
35
|
+
const buffer = useGz ? await gzip(buf) : buf;
|
|
36
|
+
await fs_1.promises.writeFile(filename, buffer);
|
|
37
|
+
}
|
|
38
|
+
exports.writeTextToFile = writeTextToFile;
|
|
39
|
+
function writeTextLinesToFile(filename, lines) {
|
|
40
|
+
const data = Array.isArray(lines) ? lines.join('') : [...lines].join('');
|
|
41
|
+
return writeTextToFile(filename, data);
|
|
42
|
+
}
|
|
43
|
+
exports.writeTextLinesToFile = writeTextLinesToFile;
|
|
44
|
+
//# sourceMappingURL=writeTextToFile.js.map
|