@cspell/cspell-tools 6.28.0 → 6.29.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cspell-tools.config.schema.json +54 -0
- package/dist/compiler/Reader.d.ts +2 -26
- package/dist/compiler/Reader.js +9 -117
- package/dist/compiler/SourceReader.d.ts +26 -0
- package/dist/compiler/SourceReader.js +27 -0
- package/dist/compiler/WordsCollection.d.ts +20 -0
- package/dist/compiler/WordsCollection.js +6 -0
- package/dist/compiler/compile.js +12 -5
- package/dist/compiler/createWordsCollection.d.ts +9 -0
- package/dist/compiler/createWordsCollection.js +74 -0
- package/dist/compiler/legacyLineToWords.d.ts +3 -2
- package/dist/compiler/legacyLineToWords.js +7 -39
- package/dist/compiler/readers/ReaderOptions.d.ts +15 -0
- package/dist/compiler/readers/ReaderOptions.js +3 -0
- package/dist/compiler/readers/readHunspellFiles.d.ts +3 -0
- package/dist/compiler/readers/readHunspellFiles.js +84 -0
- package/dist/compiler/readers/textFileReader.d.ts +3 -0
- package/dist/compiler/readers/textFileReader.js +15 -0
- package/dist/compiler/readers/trieFileReader.d.ts +3 -0
- package/dist/compiler/readers/trieFileReader.js +19 -0
- package/dist/compiler/splitCamelCaseIfAllowed.d.ts +5 -0
- package/dist/compiler/splitCamelCaseIfAllowed.js +66 -0
- package/dist/compiler/streamSourceWordsFromFile.d.ts +3 -0
- package/dist/compiler/streamSourceWordsFromFile.js +10 -0
- package/dist/compiler/text.js +6 -6
- package/dist/compiler/wordListParser.d.ts +3 -1
- package/dist/compiler/wordListParser.js +11 -4
- package/dist/config/config.d.ts +13 -0
- package/dist/test/TestHelper.d.ts +1 -0
- package/dist/test/TestHelper.js +7 -2
- package/package.json +6 -6
- package/dist/compiler/iterateWordsFromFile.d.ts +0 -3
- package/dist/compiler/iterateWordsFromFile.js +0 -10
- /package/dist/compiler/{readTextFile.d.ts → readers/readTextFile.d.ts} +0 -0
- /package/dist/compiler/{readTextFile.js → readers/readTextFile.js} +0 -0
|
@@ -27,6 +27,19 @@
|
|
|
27
27
|
"FileListSource": {
|
|
28
28
|
"additionalProperties": false,
|
|
29
29
|
"properties": {
|
|
30
|
+
"allowedSplitWords": {
|
|
31
|
+
"anyOf": [
|
|
32
|
+
{
|
|
33
|
+
"$ref": "#/definitions/FilePath"
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
"items": {
|
|
37
|
+
"$ref": "#/definitions/FilePath"
|
|
38
|
+
},
|
|
39
|
+
"type": "array"
|
|
40
|
+
}
|
|
41
|
+
]
|
|
42
|
+
},
|
|
30
43
|
"keepRawCase": {
|
|
31
44
|
"default": false,
|
|
32
45
|
"description": "Do not generate lower case / accent free versions of words.",
|
|
@@ -65,6 +78,19 @@
|
|
|
65
78
|
"FileSource": {
|
|
66
79
|
"additionalProperties": false,
|
|
67
80
|
"properties": {
|
|
81
|
+
"allowedSplitWords": {
|
|
82
|
+
"anyOf": [
|
|
83
|
+
{
|
|
84
|
+
"$ref": "#/definitions/FilePath"
|
|
85
|
+
},
|
|
86
|
+
{
|
|
87
|
+
"items": {
|
|
88
|
+
"$ref": "#/definitions/FilePath"
|
|
89
|
+
},
|
|
90
|
+
"type": "array"
|
|
91
|
+
}
|
|
92
|
+
]
|
|
93
|
+
},
|
|
68
94
|
"filename": {
|
|
69
95
|
"$ref": "#/definitions/FilePath"
|
|
70
96
|
},
|
|
@@ -99,6 +125,20 @@
|
|
|
99
125
|
"Target": {
|
|
100
126
|
"additionalProperties": false,
|
|
101
127
|
"properties": {
|
|
128
|
+
"allowedSplitWords": {
|
|
129
|
+
"anyOf": [
|
|
130
|
+
{
|
|
131
|
+
"$ref": "#/definitions/FilePath"
|
|
132
|
+
},
|
|
133
|
+
{
|
|
134
|
+
"items": {
|
|
135
|
+
"$ref": "#/definitions/FilePath"
|
|
136
|
+
},
|
|
137
|
+
"type": "array"
|
|
138
|
+
}
|
|
139
|
+
],
|
|
140
|
+
"description": "Words in the `allowedSplitWords` are considered correct and can be used as a basis for splitting compound words.\n\nIf entries can be split so that all the words in the entry are allowed, then only the individual words are added, otherwise the entire entry is added. This is to prevent misspellings in CamelCase words from being introduced into the dictionary."
|
|
141
|
+
},
|
|
102
142
|
"compress": {
|
|
103
143
|
"default": ": false",
|
|
104
144
|
"description": "gzip the file?",
|
|
@@ -155,6 +195,20 @@
|
|
|
155
195
|
}
|
|
156
196
|
},
|
|
157
197
|
"properties": {
|
|
198
|
+
"allowedSplitWords": {
|
|
199
|
+
"anyOf": [
|
|
200
|
+
{
|
|
201
|
+
"$ref": "#/definitions/FilePath"
|
|
202
|
+
},
|
|
203
|
+
{
|
|
204
|
+
"items": {
|
|
205
|
+
"$ref": "#/definitions/FilePath"
|
|
206
|
+
},
|
|
207
|
+
"type": "array"
|
|
208
|
+
}
|
|
209
|
+
],
|
|
210
|
+
"description": "Words in the `allowedSplitWords` are considered correct and can be used as a basis for splitting compound words.\n\nIf entries can be split so that all the words in the entry are allowed, then only the individual words are added, otherwise the entire entry is added. This is to prevent misspellings in CamelCase words from being introduced into the dictionary."
|
|
211
|
+
},
|
|
158
212
|
"generateNonStrict": {
|
|
159
213
|
"default": true,
|
|
160
214
|
"description": "Generate lower case / accent free versions of words.",
|
|
@@ -1,28 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
* Max Hunspell recursive depth.
|
|
4
|
-
*/
|
|
5
|
-
maxDepth?: number;
|
|
6
|
-
/**
|
|
7
|
-
* split words if necessary.
|
|
8
|
-
*/
|
|
9
|
-
splitWords: boolean;
|
|
10
|
-
/**
|
|
11
|
-
* Indicate that it is an unformatted file and needs to be cleaned
|
|
12
|
-
* before processing. Applies only to text file sources.
|
|
13
|
-
* @default false
|
|
14
|
-
*/
|
|
15
|
-
legacy?: boolean;
|
|
16
|
-
keepCase?: boolean;
|
|
17
|
-
}
|
|
18
|
-
export type AnnotatedWord = string;
|
|
19
|
-
interface BaseReader {
|
|
20
|
-
size: number;
|
|
21
|
-
words: Iterable<AnnotatedWord>;
|
|
22
|
-
}
|
|
23
|
-
export interface Reader extends BaseReader, Iterable<string> {
|
|
24
|
-
}
|
|
1
|
+
import type { Reader, ReaderOptions } from './readers/ReaderOptions';
|
|
2
|
+
export declare const regHunspellFile: RegExp;
|
|
25
3
|
export declare function createReader(filename: string, options: ReaderOptions): Promise<Reader>;
|
|
26
|
-
export declare function readHunspellFiles(filename: string, options: ReaderOptions): Promise<BaseReader>;
|
|
27
|
-
export {};
|
|
28
4
|
//# sourceMappingURL=Reader.d.ts.map
|
package/dist/compiler/Reader.js
CHANGED
|
@@ -1,41 +1,14 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
-
if (k2 === undefined) k2 = k;
|
|
4
|
-
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
-
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
-
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
-
}
|
|
8
|
-
Object.defineProperty(o, k2, desc);
|
|
9
|
-
}) : (function(o, m, k, k2) {
|
|
10
|
-
if (k2 === undefined) k2 = k;
|
|
11
|
-
o[k2] = m[k];
|
|
12
|
-
}));
|
|
13
|
-
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
-
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
-
}) : function(o, v) {
|
|
16
|
-
o["default"] = v;
|
|
17
|
-
});
|
|
18
|
-
var __importStar = (this && this.__importStar) || function (mod) {
|
|
19
|
-
if (mod && mod.__esModule) return mod;
|
|
20
|
-
var result = {};
|
|
21
|
-
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
22
|
-
__setModuleDefault(result, mod);
|
|
23
|
-
return result;
|
|
24
|
-
};
|
|
25
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
-
exports.
|
|
27
|
-
const
|
|
28
|
-
const
|
|
29
|
-
const
|
|
30
|
-
|
|
31
|
-
const wordListParser_1 = require("./wordListParser");
|
|
32
|
-
const regHunspellFile = /\.(dic|aff)$/i;
|
|
33
|
-
// cspell:word dedupe
|
|
34
|
-
const DEDUPE_SIZE = 1000;
|
|
3
|
+
exports.createReader = exports.regHunspellFile = void 0;
|
|
4
|
+
const readHunspellFiles_1 = require("./readers/readHunspellFiles");
|
|
5
|
+
const textFileReader_1 = require("./readers/textFileReader");
|
|
6
|
+
const trieFileReader_1 = require("./readers/trieFileReader");
|
|
7
|
+
exports.regHunspellFile = /\.(dic|aff)$/i;
|
|
35
8
|
// Readers first match wins
|
|
36
9
|
const readers = [
|
|
37
|
-
{ test: /\.trie\b/, method: trieFileReader },
|
|
38
|
-
{ test: regHunspellFile, method: readHunspellFiles },
|
|
10
|
+
{ test: /\.trie\b/, method: trieFileReader_1.trieFileReader },
|
|
11
|
+
{ test: exports.regHunspellFile, method: readHunspellFiles_1.readHunspellFiles },
|
|
39
12
|
];
|
|
40
13
|
function findMatchingReader(filename, options) {
|
|
41
14
|
for (const reader of readers) {
|
|
@@ -43,94 +16,13 @@ function findMatchingReader(filename, options) {
|
|
|
43
16
|
return reader.method(filename, options);
|
|
44
17
|
}
|
|
45
18
|
}
|
|
46
|
-
return
|
|
19
|
+
return (0, textFileReader_1.textFileReader)(filename);
|
|
47
20
|
}
|
|
48
21
|
async function createReader(filename, options) {
|
|
49
22
|
const baseReader = await findMatchingReader(filename, options);
|
|
50
23
|
return Object.assign(baseReader, {
|
|
51
|
-
[Symbol.iterator]: () => baseReader.
|
|
24
|
+
[Symbol.iterator]: () => baseReader.lines[Symbol.iterator](),
|
|
52
25
|
});
|
|
53
26
|
}
|
|
54
27
|
exports.createReader = createReader;
|
|
55
|
-
async function readHunspellFiles(filename, options) {
|
|
56
|
-
const dicFile = filename.replace(regHunspellFile, '.dic');
|
|
57
|
-
const affFile = filename.replace(regHunspellFile, '.aff');
|
|
58
|
-
const reader = await HR.IterableHunspellReader.createFromFiles(affFile, dicFile);
|
|
59
|
-
reader.maxDepth = options.maxDepth !== undefined ? options.maxDepth : reader.maxDepth;
|
|
60
|
-
const words = (0, sync_1.pipe)(reader.seqAffWords(), _mapAffWords, dedupeAndSort);
|
|
61
|
-
return {
|
|
62
|
-
size: reader.dic.length,
|
|
63
|
-
words,
|
|
64
|
-
};
|
|
65
|
-
}
|
|
66
|
-
exports.readHunspellFiles = readHunspellFiles;
|
|
67
|
-
async function trieFileReader(filename) {
|
|
68
|
-
const trieRoot = (0, cspell_trie_lib_1.importTrie)(await (0, readTextFile_1.readTextFileLines)(filename));
|
|
69
|
-
const trie = new cspell_trie_lib_1.Trie(trieRoot);
|
|
70
|
-
const words = trie.words();
|
|
71
|
-
return {
|
|
72
|
-
get size() {
|
|
73
|
-
return trie.size();
|
|
74
|
-
},
|
|
75
|
-
words,
|
|
76
|
-
};
|
|
77
|
-
}
|
|
78
|
-
async function textFileReader(filename, options) {
|
|
79
|
-
const content = await (0, readTextFile_1.readTextFile)(filename);
|
|
80
|
-
const words = [...(0, wordListParser_1.parseFileLines)(content, { legacy: options.legacy, split: options.splitWords })];
|
|
81
|
-
return {
|
|
82
|
-
size: words.length,
|
|
83
|
-
words,
|
|
84
|
-
};
|
|
85
|
-
}
|
|
86
|
-
// function* _stripCaseAndAccents(words: Iterable<AnnotatedWord>): Iterable<AnnotatedWord> {
|
|
87
|
-
// for (const word of words) {
|
|
88
|
-
// // Words are normalized to the compact format: e + ` => è
|
|
89
|
-
// yield word.normalize();
|
|
90
|
-
// // covert to lower case and strip accents.
|
|
91
|
-
// const n = word.toLowerCase().normalize('NFD').replace(/\p{M}/gu, '');
|
|
92
|
-
// // All words are added for case-insensitive searches.
|
|
93
|
-
// // It is a space / speed trade-off. In this case, speed is more important.
|
|
94
|
-
// yield CASE_INSENSITIVE_PREFIX + n;
|
|
95
|
-
// }
|
|
96
|
-
// }
|
|
97
|
-
function* dedupeAndSort(words) {
|
|
98
|
-
const buffer = new Set();
|
|
99
|
-
function flush() {
|
|
100
|
-
const result = [...buffer].sort();
|
|
101
|
-
buffer.clear();
|
|
102
|
-
return result;
|
|
103
|
-
}
|
|
104
|
-
for (const word of words) {
|
|
105
|
-
buffer.add(word);
|
|
106
|
-
if (buffer.size >= DEDUPE_SIZE) {
|
|
107
|
-
yield* flush();
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
yield* flush();
|
|
111
|
-
}
|
|
112
|
-
function* _mapAffWords(affWords) {
|
|
113
|
-
const hasSpecial = /[~+!]/;
|
|
114
|
-
for (const affWord of affWords) {
|
|
115
|
-
const { word, flags } = affWord;
|
|
116
|
-
// For now do not include words with special characters.
|
|
117
|
-
if (hasSpecial.test(word))
|
|
118
|
-
continue;
|
|
119
|
-
const compound = flags.isCompoundForbidden ? '' : cspell_trie_lib_1.COMPOUND_FIX;
|
|
120
|
-
const forbid = flags.isForbiddenWord ? cspell_trie_lib_1.FORBID_PREFIX : '';
|
|
121
|
-
if (!forbid) {
|
|
122
|
-
if (flags.canBeCompoundBegin || flags.isCompoundPermitted)
|
|
123
|
-
yield word + compound;
|
|
124
|
-
if (flags.canBeCompoundEnd || flags.isCompoundPermitted)
|
|
125
|
-
yield compound + word;
|
|
126
|
-
if (flags.canBeCompoundMiddle || flags.isCompoundPermitted)
|
|
127
|
-
yield compound + word + compound;
|
|
128
|
-
if (!flags.isOnlyAllowedInCompound)
|
|
129
|
-
yield word;
|
|
130
|
-
}
|
|
131
|
-
else {
|
|
132
|
-
yield forbid + word;
|
|
133
|
-
}
|
|
134
|
-
}
|
|
135
|
-
}
|
|
136
28
|
//# sourceMappingURL=Reader.js.map
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import type { AllowedSplitWordsCollection } from './WordsCollection';
|
|
2
|
+
export interface SourceReaderOptions {
|
|
3
|
+
/**
|
|
4
|
+
* Max Hunspell recursive depth.
|
|
5
|
+
*/
|
|
6
|
+
maxDepth?: number;
|
|
7
|
+
/**
|
|
8
|
+
* split words if necessary.
|
|
9
|
+
*/
|
|
10
|
+
splitWords: boolean;
|
|
11
|
+
/**
|
|
12
|
+
* Indicate that it is an unformatted file and needs to be cleaned
|
|
13
|
+
* before processing. Applies only to text file sources.
|
|
14
|
+
* @default false
|
|
15
|
+
*/
|
|
16
|
+
legacy?: boolean;
|
|
17
|
+
keepCase?: boolean;
|
|
18
|
+
allowedSplitWords: AllowedSplitWordsCollection;
|
|
19
|
+
}
|
|
20
|
+
export type AnnotatedWord = string;
|
|
21
|
+
export interface SourceReader {
|
|
22
|
+
size: number;
|
|
23
|
+
words: Iterable<AnnotatedWord>;
|
|
24
|
+
}
|
|
25
|
+
export declare function createSourceReader(filename: string, options: SourceReaderOptions): Promise<SourceReader>;
|
|
26
|
+
//# sourceMappingURL=SourceReader.d.ts.map
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.createSourceReader = void 0;
|
|
4
|
+
const Reader_1 = require("./Reader");
|
|
5
|
+
const wordListParser_1 = require("./wordListParser");
|
|
6
|
+
async function createSourceReader(filename, options) {
|
|
7
|
+
const reader = await (0, Reader_1.createReader)(filename, options);
|
|
8
|
+
if (reader.type !== 'TextFile') {
|
|
9
|
+
return {
|
|
10
|
+
words: reader.lines,
|
|
11
|
+
get size() {
|
|
12
|
+
return reader.size;
|
|
13
|
+
},
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
return textFileReader(reader, options);
|
|
17
|
+
}
|
|
18
|
+
exports.createSourceReader = createSourceReader;
|
|
19
|
+
async function textFileReader(reader, options) {
|
|
20
|
+
const { legacy, splitWords: split, allowedSplitWords } = options;
|
|
21
|
+
const words = [...(0, wordListParser_1.parseFileLines)(reader, { legacy, split, allowedSplitWords })];
|
|
22
|
+
return {
|
|
23
|
+
size: words.length,
|
|
24
|
+
words,
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=SourceReader.js.map
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
export interface WordsCollection {
|
|
2
|
+
size: number;
|
|
3
|
+
has(words: string): boolean;
|
|
4
|
+
type?: string;
|
|
5
|
+
}
|
|
6
|
+
/**
|
|
7
|
+
* Collection of words to be allowed after splitting.
|
|
8
|
+
*/
|
|
9
|
+
export interface AllowedSplitWordsCollection extends WordsCollection {
|
|
10
|
+
type?: 'AllowedSplitWordsCollection';
|
|
11
|
+
}
|
|
12
|
+
export declare const defaultAllowedSplitWords: AllowedSplitWordsCollection;
|
|
13
|
+
/**
|
|
14
|
+
* Collection of words to be excluded.
|
|
15
|
+
*/
|
|
16
|
+
export interface ExcludeWordsCollection extends WordsCollection {
|
|
17
|
+
type?: 'ExcludeWordsCollection';
|
|
18
|
+
}
|
|
19
|
+
export declare const defaultExcludeWordsCollection: ExcludeWordsCollection;
|
|
20
|
+
//# sourceMappingURL=WordsCollection.d.ts.map
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.defaultExcludeWordsCollection = exports.defaultAllowedSplitWords = void 0;
|
|
4
|
+
exports.defaultAllowedSplitWords = Object.freeze({ size: 0, has: () => true });
|
|
5
|
+
exports.defaultExcludeWordsCollection = Object.freeze({ size: 0, has: () => false });
|
|
6
|
+
//# sourceMappingURL=WordsCollection.js.map
|
package/dist/compiler/compile.js
CHANGED
|
@@ -29,9 +29,10 @@ const operators_1 = require("@cspell/cspell-pipe/operators");
|
|
|
29
29
|
const sync_1 = require("@cspell/cspell-pipe/sync");
|
|
30
30
|
const path = __importStar(require("path"));
|
|
31
31
|
const config_1 = require("../config");
|
|
32
|
-
const
|
|
32
|
+
const createWordsCollection_1 = require("./createWordsCollection");
|
|
33
33
|
const logWithTimestamp_1 = require("./logWithTimestamp");
|
|
34
|
-
const readTextFile_1 = require("./readTextFile");
|
|
34
|
+
const readTextFile_1 = require("./readers/readTextFile");
|
|
35
|
+
const streamSourceWordsFromFile_1 = require("./streamSourceWordsFromFile");
|
|
35
36
|
const wordListCompiler_1 = require("./wordListCompiler");
|
|
36
37
|
const wordListParser_1 = require("./wordListParser");
|
|
37
38
|
async function compile(request, options) {
|
|
@@ -54,7 +55,6 @@ async function compile(request, options) {
|
|
|
54
55
|
exports.compile = compile;
|
|
55
56
|
async function compileTarget(target, options, rootDir) {
|
|
56
57
|
(0, logWithTimestamp_1.logWithTimestamp)(`Start compile: ${target.name}`);
|
|
57
|
-
// console.log('Target: %o', target);
|
|
58
58
|
const { format, sources, trieBase, sort = true, generateNonStrict = false } = target;
|
|
59
59
|
const targetDirectory = path.resolve(rootDir, target.targetDirectory ?? process.cwd());
|
|
60
60
|
const generateNonStrictTrie = target.generateNonStrict ?? true;
|
|
@@ -141,9 +141,16 @@ async function readFileSource(fileSource, sourceOptions) {
|
|
|
141
141
|
const legacy = split === 'legacy';
|
|
142
142
|
const splitWords = legacy ? false : split;
|
|
143
143
|
// console.warn('fileSource: %o,\n targetOptions %o, \n opt: %o', fileSource, targetOptions, opt);
|
|
144
|
-
const
|
|
144
|
+
const allowedSplitWords = await (0, createWordsCollection_1.createAllowedSplitWordsFromFiles)(fileSource.allowedSplitWords || sourceOptions.allowedSplitWords);
|
|
145
|
+
const readerOptions = {
|
|
146
|
+
maxDepth,
|
|
147
|
+
legacy,
|
|
148
|
+
splitWords,
|
|
149
|
+
keepCase: keepRawCase,
|
|
150
|
+
allowedSplitWords,
|
|
151
|
+
};
|
|
145
152
|
(0, logWithTimestamp_1.logWithTimestamp)(`Reading ${path.basename(filename)}`);
|
|
146
|
-
const stream = await (0,
|
|
153
|
+
const stream = await (0, streamSourceWordsFromFile_1.streamSourceWordsFromFile)(filename, readerOptions);
|
|
147
154
|
(0, logWithTimestamp_1.logWithTimestamp)(`Done reading ${path.basename(filename)}`);
|
|
148
155
|
const f = {
|
|
149
156
|
src: filename,
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { FilePath } from '../config/config';
|
|
2
|
+
import type { AllowedSplitWordsCollection, ExcludeWordsCollection, WordsCollection } from './WordsCollection';
|
|
3
|
+
export declare function createAllowedSplitWordsFromFiles(files: FilePath | FilePath[] | undefined): Promise<AllowedSplitWordsCollection>;
|
|
4
|
+
export declare function createAllowedSplitWords(words: Iterable<string> | undefined): AllowedSplitWordsCollection;
|
|
5
|
+
export declare function createWordsCollectionFromFiles(files: FilePath | FilePath[]): Promise<WordsCollection>;
|
|
6
|
+
export declare function createWordsCollection(words: Iterable<string>): WordsCollection;
|
|
7
|
+
export declare function createExcludeWordsCollectionFromFiles(files: FilePath | FilePath[] | undefined): Promise<ExcludeWordsCollection>;
|
|
8
|
+
export declare function createExcludeWordsCollection(words: Iterable<string> | undefined): ExcludeWordsCollection;
|
|
9
|
+
//# sourceMappingURL=createWordsCollection.d.ts.map
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.createExcludeWordsCollection = exports.createExcludeWordsCollectionFromFiles = exports.createWordsCollection = exports.createWordsCollectionFromFiles = exports.createAllowedSplitWords = exports.createAllowedSplitWordsFromFiles = void 0;
|
|
4
|
+
const Reader_1 = require("./Reader");
|
|
5
|
+
const WordsCollection_1 = require("./WordsCollection");
|
|
6
|
+
class AllowedSplitWordsImpl {
|
|
7
|
+
constructor(collection) {
|
|
8
|
+
this.words = collection;
|
|
9
|
+
this.size = collection.size;
|
|
10
|
+
}
|
|
11
|
+
has(word) {
|
|
12
|
+
return !this.size || this.words.has(word);
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
async function createAllowedSplitWordsFromFiles(files) {
|
|
16
|
+
if (!files || !files.length)
|
|
17
|
+
return WordsCollection_1.defaultAllowedSplitWords;
|
|
18
|
+
const collection = await createWordsCollectionFromFiles(files);
|
|
19
|
+
return new AllowedSplitWordsImpl(collection);
|
|
20
|
+
}
|
|
21
|
+
exports.createAllowedSplitWordsFromFiles = createAllowedSplitWordsFromFiles;
|
|
22
|
+
function createAllowedSplitWords(words) {
|
|
23
|
+
if (!words)
|
|
24
|
+
return WordsCollection_1.defaultAllowedSplitWords;
|
|
25
|
+
return new AllowedSplitWordsImpl(createWordsCollection(words));
|
|
26
|
+
}
|
|
27
|
+
exports.createAllowedSplitWords = createAllowedSplitWords;
|
|
28
|
+
async function readFile(filename) {
|
|
29
|
+
const reader = await (0, Reader_1.createReader)(filename, {});
|
|
30
|
+
return [...reader];
|
|
31
|
+
}
|
|
32
|
+
const cache = new WeakMap();
|
|
33
|
+
async function createWordsCollectionFromFiles(files) {
|
|
34
|
+
files = Array.isArray(files) ? files : [files];
|
|
35
|
+
const cached = cache.get(files);
|
|
36
|
+
if (cached)
|
|
37
|
+
return cached;
|
|
38
|
+
const sources = await Promise.all(files.map((file) => readFile(file)));
|
|
39
|
+
const collection = createWordsCollection(sources.flatMap((a) => a));
|
|
40
|
+
cache.set(files, collection);
|
|
41
|
+
return collection;
|
|
42
|
+
}
|
|
43
|
+
exports.createWordsCollectionFromFiles = createWordsCollectionFromFiles;
|
|
44
|
+
function createWordsCollection(words) {
|
|
45
|
+
if (words instanceof Set)
|
|
46
|
+
return words;
|
|
47
|
+
const arrWords = (Array.isArray(words) ? words : [...words])
|
|
48
|
+
.map((a) => a.trim())
|
|
49
|
+
.filter((a) => !!a)
|
|
50
|
+
.filter((a) => !a.startsWith('#'));
|
|
51
|
+
return new Set(arrWords);
|
|
52
|
+
}
|
|
53
|
+
exports.createWordsCollection = createWordsCollection;
|
|
54
|
+
class ExcludeWordsCollectionImpl {
|
|
55
|
+
constructor(collection) {
|
|
56
|
+
this.words = collection;
|
|
57
|
+
this.size = collection.size;
|
|
58
|
+
}
|
|
59
|
+
has(word) {
|
|
60
|
+
return this.words.has(word);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
async function createExcludeWordsCollectionFromFiles(files) {
|
|
64
|
+
if (!files || !files.length)
|
|
65
|
+
return WordsCollection_1.defaultExcludeWordsCollection;
|
|
66
|
+
const collection = await createWordsCollectionFromFiles(files);
|
|
67
|
+
return new ExcludeWordsCollectionImpl(collection);
|
|
68
|
+
}
|
|
69
|
+
exports.createExcludeWordsCollectionFromFiles = createExcludeWordsCollectionFromFiles;
|
|
70
|
+
function createExcludeWordsCollection(words) {
|
|
71
|
+
return new ExcludeWordsCollectionImpl(words ? createWordsCollection(words) : new Set());
|
|
72
|
+
}
|
|
73
|
+
exports.createExcludeWordsCollection = createExcludeWordsCollection;
|
|
74
|
+
//# sourceMappingURL=createWordsCollection.js.map
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
export declare function
|
|
1
|
+
import type { AllowedSplitWordsCollection } from './WordsCollection';
|
|
2
|
+
export declare function legacyLineToWords(line: string, keepCase: boolean, allowedSplitWords: AllowedSplitWordsCollection): Iterable<string>;
|
|
3
|
+
export declare function legacyLinesToWords(lines: Iterable<string>, keepCase: boolean, allowedSplitWords: AllowedSplitWordsCollection): Iterable<string>;
|
|
3
4
|
//# sourceMappingURL=legacyLineToWords.d.ts.map
|
|
@@ -1,54 +1,22 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
-
if (k2 === undefined) k2 = k;
|
|
4
|
-
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
-
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
-
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
-
}
|
|
8
|
-
Object.defineProperty(o, k2, desc);
|
|
9
|
-
}) : (function(o, m, k, k2) {
|
|
10
|
-
if (k2 === undefined) k2 = k;
|
|
11
|
-
o[k2] = m[k];
|
|
12
|
-
}));
|
|
13
|
-
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
-
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
-
}) : function(o, v) {
|
|
16
|
-
o["default"] = v;
|
|
17
|
-
});
|
|
18
|
-
var __importStar = (this && this.__importStar) || function (mod) {
|
|
19
|
-
if (mod && mod.__esModule) return mod;
|
|
20
|
-
var result = {};
|
|
21
|
-
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
22
|
-
__setModuleDefault(result, mod);
|
|
23
|
-
return result;
|
|
24
|
-
};
|
|
25
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
3
|
exports.legacyLinesToWords = exports.legacyLineToWords = void 0;
|
|
27
4
|
const sync_1 = require("@cspell/cspell-pipe/sync");
|
|
28
|
-
const
|
|
29
|
-
const regNonWord = /[^\p{L}\p{M}' ]+/giu;
|
|
30
|
-
const
|
|
31
|
-
|
|
32
|
-
function legacyLineToWords(line, keepCase) {
|
|
5
|
+
const splitCamelCaseIfAllowed_1 = require("./splitCamelCaseIfAllowed");
|
|
6
|
+
const regNonWord = /[^\p{L}\p{M}' \d]+/giu;
|
|
7
|
+
const regExpRepeatChars = /(.)\1{5}/i;
|
|
8
|
+
function legacyLineToWords(line, keepCase, allowedSplitWords) {
|
|
33
9
|
// Remove punctuation and non-letters.
|
|
34
10
|
const filteredLine = line.replace(regNonWord, '|');
|
|
35
11
|
const wordGroups = filteredLine.split('|');
|
|
36
|
-
const words = (0, sync_1.pipe)(wordGroups, (0, sync_1.opConcatMap)((a) => [
|
|
12
|
+
const words = (0, sync_1.pipe)(wordGroups, (0, sync_1.opConcatMap)((a) => [...a.split(splitCamelCaseIfAllowed_1.regExpSpaceOrDash)]), (0, sync_1.opConcatMap)((a) => (0, splitCamelCaseIfAllowed_1.splitCamelCaseIfAllowed)(a, allowedSplitWords, keepCase)), (0, sync_1.opMap)((a) => a.trim()), (0, sync_1.opFilter)((a) => !!a), (0, sync_1.opFilter)((s) => !regExpRepeatChars.test(s)));
|
|
37
13
|
return words;
|
|
38
14
|
}
|
|
39
15
|
exports.legacyLineToWords = legacyLineToWords;
|
|
40
|
-
function* legacyLinesToWords(lines, keepCase
|
|
16
|
+
function* legacyLinesToWords(lines, keepCase, allowedSplitWords) {
|
|
41
17
|
for (const line of lines) {
|
|
42
|
-
yield* legacyLineToWords(line, keepCase);
|
|
18
|
+
yield* legacyLineToWords(line, keepCase, allowedSplitWords);
|
|
43
19
|
}
|
|
44
20
|
}
|
|
45
21
|
exports.legacyLinesToWords = legacyLinesToWords;
|
|
46
|
-
function splitCamelCase(word) {
|
|
47
|
-
const splitWords = Text.splitCamelCaseWord(word);
|
|
48
|
-
// We only want to preserve this: "New York" and not "Namespace DNSLookup"
|
|
49
|
-
if (splitWords.length > 1 && regExpSpaceOrDash.test(word)) {
|
|
50
|
-
return (0, sync_1.pipe)(splitWords, (0, sync_1.opConcatMap)((w) => w.split(regExpSpaceOrDash)));
|
|
51
|
-
}
|
|
52
|
-
return splitWords;
|
|
53
|
-
}
|
|
54
22
|
//# sourceMappingURL=legacyLineToWords.js.map
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
export interface ReaderOptions {
|
|
2
|
+
/**
|
|
3
|
+
* Max Hunspell recursive depth.
|
|
4
|
+
*/
|
|
5
|
+
maxDepth?: number;
|
|
6
|
+
}
|
|
7
|
+
export type AnnotatedWord = string;
|
|
8
|
+
export interface BaseReader {
|
|
9
|
+
size: number;
|
|
10
|
+
type: 'Hunspell' | 'TextFile' | 'Trie';
|
|
11
|
+
lines: Iterable<AnnotatedWord>;
|
|
12
|
+
}
|
|
13
|
+
export interface Reader extends BaseReader, Iterable<string> {
|
|
14
|
+
}
|
|
15
|
+
//# sourceMappingURL=ReaderOptions.d.ts.map
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || function (mod) {
|
|
19
|
+
if (mod && mod.__esModule) return mod;
|
|
20
|
+
var result = {};
|
|
21
|
+
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
22
|
+
__setModuleDefault(result, mod);
|
|
23
|
+
return result;
|
|
24
|
+
};
|
|
25
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
+
exports.readHunspellFiles = void 0;
|
|
27
|
+
const sync_1 = require("@cspell/cspell-pipe/sync");
|
|
28
|
+
const cspell_trie_lib_1 = require("cspell-trie-lib");
|
|
29
|
+
const HR = __importStar(require("hunspell-reader"));
|
|
30
|
+
const Reader_1 = require("../Reader");
|
|
31
|
+
const DEDUPE_SIZE = 1000;
|
|
32
|
+
async function readHunspellFiles(filename, options) {
|
|
33
|
+
const dicFile = filename.replace(Reader_1.regHunspellFile, '.dic');
|
|
34
|
+
const affFile = filename.replace(Reader_1.regHunspellFile, '.aff');
|
|
35
|
+
const reader = await HR.IterableHunspellReader.createFromFiles(affFile, dicFile);
|
|
36
|
+
reader.maxDepth = options.maxDepth !== undefined ? options.maxDepth : reader.maxDepth;
|
|
37
|
+
const words = (0, sync_1.pipe)(reader.seqAffWords(), _mapAffWords, dedupeAndSort);
|
|
38
|
+
return {
|
|
39
|
+
type: 'Hunspell',
|
|
40
|
+
size: reader.dic.length,
|
|
41
|
+
lines: words,
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
exports.readHunspellFiles = readHunspellFiles;
|
|
45
|
+
function* dedupeAndSort(words) {
|
|
46
|
+
const buffer = new Set();
|
|
47
|
+
function flush() {
|
|
48
|
+
const result = [...buffer].sort();
|
|
49
|
+
buffer.clear();
|
|
50
|
+
return result;
|
|
51
|
+
}
|
|
52
|
+
for (const word of words) {
|
|
53
|
+
buffer.add(word);
|
|
54
|
+
if (buffer.size >= DEDUPE_SIZE) {
|
|
55
|
+
yield* flush();
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
yield* flush();
|
|
59
|
+
}
|
|
60
|
+
function* _mapAffWords(affWords) {
|
|
61
|
+
const hasSpecial = /[~+!]/;
|
|
62
|
+
for (const affWord of affWords) {
|
|
63
|
+
const { word, flags } = affWord;
|
|
64
|
+
// For now do not include words with special characters.
|
|
65
|
+
if (hasSpecial.test(word))
|
|
66
|
+
continue;
|
|
67
|
+
const compound = flags.isCompoundForbidden ? '' : cspell_trie_lib_1.COMPOUND_FIX;
|
|
68
|
+
const forbid = flags.isForbiddenWord ? cspell_trie_lib_1.FORBID_PREFIX : '';
|
|
69
|
+
if (!forbid) {
|
|
70
|
+
if (flags.canBeCompoundBegin || flags.isCompoundPermitted)
|
|
71
|
+
yield word + compound;
|
|
72
|
+
if (flags.canBeCompoundEnd || flags.isCompoundPermitted)
|
|
73
|
+
yield compound + word;
|
|
74
|
+
if (flags.canBeCompoundMiddle || flags.isCompoundPermitted)
|
|
75
|
+
yield compound + word + compound;
|
|
76
|
+
if (!flags.isOnlyAllowedInCompound)
|
|
77
|
+
yield word;
|
|
78
|
+
}
|
|
79
|
+
else {
|
|
80
|
+
yield forbid + word;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
//# sourceMappingURL=readHunspellFiles.js.map
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.textFileReader = void 0;
|
|
4
|
+
const readTextFile_1 = require("./readTextFile");
|
|
5
|
+
async function textFileReader(filename) {
|
|
6
|
+
const content = await (0, readTextFile_1.readTextFile)(filename);
|
|
7
|
+
const words = content.split('\n').map((s) => s.trim());
|
|
8
|
+
return {
|
|
9
|
+
type: 'TextFile',
|
|
10
|
+
size: words.length,
|
|
11
|
+
lines: words,
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
exports.textFileReader = textFileReader;
|
|
15
|
+
//# sourceMappingURL=textFileReader.js.map
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.trieFileReader = void 0;
|
|
4
|
+
const cspell_trie_lib_1 = require("cspell-trie-lib");
|
|
5
|
+
const readTextFile_1 = require("./readTextFile");
|
|
6
|
+
async function trieFileReader(filename) {
|
|
7
|
+
const trieRoot = (0, cspell_trie_lib_1.importTrie)(await (0, readTextFile_1.readTextFileLines)(filename));
|
|
8
|
+
const trie = new cspell_trie_lib_1.Trie(trieRoot);
|
|
9
|
+
const words = trie.words();
|
|
10
|
+
return {
|
|
11
|
+
type: 'Trie',
|
|
12
|
+
get size() {
|
|
13
|
+
return trie.size();
|
|
14
|
+
},
|
|
15
|
+
lines: words,
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
exports.trieFileReader = trieFileReader;
|
|
19
|
+
//# sourceMappingURL=trieFileReader.js.map
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { AllowedSplitWordsCollection } from './WordsCollection';
|
|
2
|
+
export declare const regExpSpaceOrDash: RegExp;
|
|
3
|
+
export declare const regExpIsNumber: RegExp;
|
|
4
|
+
export declare function splitCamelCaseIfAllowed(word: string, allowedWords: AllowedSplitWordsCollection, keepCase: boolean): string[];
|
|
5
|
+
//# sourceMappingURL=splitCamelCaseIfAllowed.d.ts.map
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || function (mod) {
|
|
19
|
+
if (mod && mod.__esModule) return mod;
|
|
20
|
+
var result = {};
|
|
21
|
+
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
22
|
+
__setModuleDefault(result, mod);
|
|
23
|
+
return result;
|
|
24
|
+
};
|
|
25
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
+
exports.splitCamelCaseIfAllowed = exports.regExpIsNumber = exports.regExpSpaceOrDash = void 0;
|
|
27
|
+
const Text = __importStar(require("./text"));
|
|
28
|
+
exports.regExpSpaceOrDash = /[- ]+/g;
|
|
29
|
+
exports.regExpIsNumber = /^\d+$/;
|
|
30
|
+
function splitCamelCaseIfAllowed(word, allowedWords, keepCase) {
|
|
31
|
+
const split = [...splitCamelCase(word)];
|
|
32
|
+
if (split.length == 1)
|
|
33
|
+
return adjustCases(split, allowedWords, keepCase);
|
|
34
|
+
const missing = split.find((w) => isUnknown(w, allowedWords));
|
|
35
|
+
if (missing !== undefined)
|
|
36
|
+
return [word];
|
|
37
|
+
return adjustCases(split, allowedWords, keepCase);
|
|
38
|
+
}
|
|
39
|
+
exports.splitCamelCaseIfAllowed = splitCamelCaseIfAllowed;
|
|
40
|
+
function adjustCases(words, allowedWords, keepCase) {
|
|
41
|
+
return words.map((w) => adjustCase(w, allowedWords, keepCase));
|
|
42
|
+
}
|
|
43
|
+
function adjustCase(word, allowedWords, keepCase) {
|
|
44
|
+
const lc = word.toLowerCase();
|
|
45
|
+
if (!allowedWords.has(lc))
|
|
46
|
+
return word;
|
|
47
|
+
if (lc === word)
|
|
48
|
+
return word;
|
|
49
|
+
if (word.slice(1).toLowerCase() === word.slice(1))
|
|
50
|
+
return lc;
|
|
51
|
+
if (!keepCase && word.toUpperCase() === word)
|
|
52
|
+
return word.toLowerCase();
|
|
53
|
+
return word;
|
|
54
|
+
}
|
|
55
|
+
function isUnknown(word, allowedWords) {
|
|
56
|
+
return !allowedWords.has(word) && !allowedWords.has(word.toLowerCase());
|
|
57
|
+
}
|
|
58
|
+
function splitCamelCase(word) {
|
|
59
|
+
const splitWords = Text.splitCamelCaseWord(word).filter((word) => !exports.regExpIsNumber.test(word));
|
|
60
|
+
// We only want to preserve this: "New York" and not "Namespace DNSLookup"
|
|
61
|
+
if (splitWords.length > 1 && exports.regExpSpaceOrDash.test(word)) {
|
|
62
|
+
return splitWords.flatMap((w) => w.split(exports.regExpSpaceOrDash));
|
|
63
|
+
}
|
|
64
|
+
return splitWords;
|
|
65
|
+
}
|
|
66
|
+
//# sourceMappingURL=splitCamelCaseIfAllowed.js.map
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.streamSourceWordsFromFile = void 0;
|
|
4
|
+
const SourceReader_1 = require("./SourceReader");
|
|
5
|
+
async function streamSourceWordsFromFile(filename, options) {
|
|
6
|
+
const reader = await (0, SourceReader_1.createSourceReader)(filename, options);
|
|
7
|
+
return reader.words;
|
|
8
|
+
}
|
|
9
|
+
exports.streamSourceWordsFromFile = streamSourceWordsFromFile;
|
|
10
|
+
//# sourceMappingURL=streamSourceWordsFromFile.js.map
|
package/dist/compiler/text.js
CHANGED
|
@@ -3,17 +3,17 @@
|
|
|
3
3
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
4
|
exports.splitCamelCaseWord = void 0;
|
|
5
5
|
const regExUpperSOrIng = /(\p{Lu}+'?(?:s|ing|ies|es|ings|ed|ning))(?!\p{Ll})/gu;
|
|
6
|
-
const regExSplitWords = /(\p{Ll})(\p{Lu})/gu;
|
|
6
|
+
const regExSplitWords = /([\p{Ll}])([\p{Lu}])/gu;
|
|
7
7
|
const regExSplitWords2 = /(\p{Lu})(\p{Lu}\p{Ll})/gu;
|
|
8
8
|
/**
|
|
9
9
|
* Split camelCase words into an array of strings.
|
|
10
10
|
*/
|
|
11
11
|
function splitCamelCaseWord(word) {
|
|
12
|
-
const wPrime = word.replace(regExUpperSOrIng, (s) => s[0] + s.
|
|
13
|
-
const
|
|
14
|
-
const
|
|
15
|
-
const
|
|
16
|
-
return
|
|
12
|
+
const wPrime = word.replace(regExUpperSOrIng, (s) => s[0] + s.slice(1).toLowerCase());
|
|
13
|
+
const pass1 = wPrime.replace(regExSplitWords, '$1|$2');
|
|
14
|
+
const pass2 = pass1.replace(regExSplitWords2, '$1|$2');
|
|
15
|
+
const pass3 = pass2.replace(/[\d_]+/g, '|');
|
|
16
|
+
return pass3.split('|').filter((a) => !!a);
|
|
17
17
|
}
|
|
18
18
|
exports.splitCamelCaseWord = splitCamelCaseWord;
|
|
19
19
|
//# sourceMappingURL=text.js.map
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { type Operator } from '@cspell/cspell-pipe/sync';
|
|
2
2
|
import type { CompileOptions } from './CompileOptions';
|
|
3
|
+
import type { AllowedSplitWordsCollection } from './WordsCollection';
|
|
3
4
|
export declare function normalizeTargetWords(options: CompileOptions): Operator<string>;
|
|
4
5
|
export interface ParseFileOptions {
|
|
5
6
|
/**
|
|
@@ -22,6 +23,7 @@ export interface ParseFileOptions {
|
|
|
22
23
|
* @default false
|
|
23
24
|
*/
|
|
24
25
|
legacy?: boolean;
|
|
26
|
+
allowedSplitWords: AllowedSplitWordsCollection;
|
|
25
27
|
}
|
|
26
28
|
type ParseFileOptionsRequired = Required<ParseFileOptions>;
|
|
27
29
|
export declare const defaultParseDictionaryOptions: ParseFileOptionsRequired;
|
|
@@ -41,6 +43,6 @@ export declare function createParseFileLineMapper(options?: Partial<ParseFileOpt
|
|
|
41
43
|
* @param _options - defines prefixes used when parsing lines.
|
|
42
44
|
* @returns words that have been normalized.
|
|
43
45
|
*/
|
|
44
|
-
export declare function parseFileLines(lines: Iterable<string> | string, options
|
|
46
|
+
export declare function parseFileLines(lines: Iterable<string> | string, options: Partial<ParseFileOptions>): Iterable<string>;
|
|
45
47
|
export {};
|
|
46
48
|
//# sourceMappingURL=wordListParser.d.ts.map
|
|
@@ -5,6 +5,7 @@ const sync_1 = require("@cspell/cspell-pipe/sync");
|
|
|
5
5
|
const cspell_trie_lib_1 = require("cspell-trie-lib");
|
|
6
6
|
const util_1 = require("hunspell-reader/dist/util");
|
|
7
7
|
const legacyLineToWords_1 = require("./legacyLineToWords");
|
|
8
|
+
const splitCamelCaseIfAllowed_1 = require("./splitCamelCaseIfAllowed");
|
|
8
9
|
function normalizeTargetWords(options) {
|
|
9
10
|
const lineParser = (0, cspell_trie_lib_1.createDictionaryLineParser)({
|
|
10
11
|
stripCaseAndAccents: options.generateNonStrict,
|
|
@@ -45,6 +46,7 @@ const _defaultOptions = {
|
|
|
45
46
|
split: false,
|
|
46
47
|
splitKeepBoth: false,
|
|
47
48
|
// splitSeparator: regExpSplit,
|
|
49
|
+
allowedSplitWords: { has: () => true, size: 0 },
|
|
48
50
|
};
|
|
49
51
|
exports.defaultParseDictionaryOptions = Object.freeze(_defaultOptions);
|
|
50
52
|
exports.cSpellToolDirective = 'cspell-tools:';
|
|
@@ -57,10 +59,9 @@ exports.setOfCSpellDirectiveFlags = ['no-split', 'split', 'keep-case', 'no-keep-
|
|
|
57
59
|
*/
|
|
58
60
|
function createParseFileLineMapper(options) {
|
|
59
61
|
const _options = options || _defaultOptions;
|
|
60
|
-
const { splitKeepBoth = _defaultOptions.splitKeepBoth } = _options;
|
|
62
|
+
const { splitKeepBoth = _defaultOptions.splitKeepBoth, allowedSplitWords = _defaultOptions.allowedSplitWords } = _options;
|
|
61
63
|
let { legacy = _defaultOptions.legacy } = _options;
|
|
62
64
|
let { split = _defaultOptions.split, keepCase = legacy ? false : _defaultOptions.keepCase } = _options;
|
|
63
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
64
65
|
function isString(line) {
|
|
65
66
|
return typeof line === 'string';
|
|
66
67
|
}
|
|
@@ -131,11 +132,17 @@ function createParseFileLineMapper(options) {
|
|
|
131
132
|
function* splitWords(lines) {
|
|
132
133
|
for (const line of lines) {
|
|
133
134
|
if (legacy) {
|
|
134
|
-
yield* (0, legacyLineToWords_1.legacyLineToWords)(line, keepCase);
|
|
135
|
+
yield* (0, legacyLineToWords_1.legacyLineToWords)(line, keepCase, allowedSplitWords);
|
|
135
136
|
continue;
|
|
136
137
|
}
|
|
137
138
|
if (split) {
|
|
138
|
-
|
|
139
|
+
const words = splitLine(line);
|
|
140
|
+
if (!allowedSplitWords.size) {
|
|
141
|
+
yield* words;
|
|
142
|
+
}
|
|
143
|
+
else {
|
|
144
|
+
yield* words.flatMap((word) => (0, splitCamelCaseIfAllowed_1.splitCamelCaseIfAllowed)(word, allowedSplitWords, keepCase));
|
|
145
|
+
}
|
|
139
146
|
if (!splitKeepBoth)
|
|
140
147
|
continue;
|
|
141
148
|
}
|
package/dist/config/config.d.ts
CHANGED
|
@@ -39,6 +39,16 @@ export interface CompileTargetOptions {
|
|
|
39
39
|
* @default: true
|
|
40
40
|
*/
|
|
41
41
|
sort?: boolean | undefined;
|
|
42
|
+
/**
|
|
43
|
+
* Words in the `allowedSplitWords` are considered correct and can be used
|
|
44
|
+
* as a basis for splitting compound words.
|
|
45
|
+
*
|
|
46
|
+
* If entries can be split so that all the words in the entry are allowed,
|
|
47
|
+
* then only the individual words are added, otherwise the entire entry is added.
|
|
48
|
+
* This is to prevent misspellings in CamelCase words from being introduced into the
|
|
49
|
+
* dictionary.
|
|
50
|
+
*/
|
|
51
|
+
allowedSplitWords?: FilePath | FilePath[] | undefined;
|
|
42
52
|
}
|
|
43
53
|
export interface Target extends CompileTargetOptions {
|
|
44
54
|
/**
|
|
@@ -66,6 +76,8 @@ export interface Target extends CompileTargetOptions {
|
|
|
66
76
|
/**
|
|
67
77
|
* Words from the sources that are found in `excludeWordsFrom` files
|
|
68
78
|
* will not be added to the dictionary.
|
|
79
|
+
*
|
|
80
|
+
* @version TBD
|
|
69
81
|
*/
|
|
70
82
|
excludeWordsFrom?: FilePath[] | undefined;
|
|
71
83
|
/**
|
|
@@ -105,5 +117,6 @@ export interface CompileSourceOptions {
|
|
|
105
117
|
* @default false
|
|
106
118
|
*/
|
|
107
119
|
keepRawCase?: boolean | undefined;
|
|
120
|
+
allowedSplitWords?: FilePath | FilePath[] | undefined;
|
|
108
121
|
}
|
|
109
122
|
//# sourceMappingURL=config.d.ts.map
|
|
@@ -39,4 +39,5 @@ export interface TestHelper {
|
|
|
39
39
|
fileExists(path: string): Promise<boolean>;
|
|
40
40
|
}
|
|
41
41
|
export declare function createTestHelper(testFilename?: string): TestHelper;
|
|
42
|
+
export declare function resolvePathToFixture(...segments: string[]): string;
|
|
42
43
|
//# sourceMappingURL=TestHelper.d.ts.map
|
package/dist/test/TestHelper.js
CHANGED
|
@@ -23,7 +23,7 @@ var __importStar = (this && this.__importStar) || function (mod) {
|
|
|
23
23
|
return result;
|
|
24
24
|
};
|
|
25
25
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
-
exports.createTestHelper = void 0;
|
|
26
|
+
exports.resolvePathToFixture = exports.createTestHelper = void 0;
|
|
27
27
|
const fs_1 = require("fs");
|
|
28
28
|
const path = __importStar(require("path"));
|
|
29
29
|
const shell = __importStar(require("shelljs"));
|
|
@@ -34,6 +34,7 @@ function createTestHelper(testFilename) {
|
|
|
34
34
|
return new TestHelperImpl(testFilename || expect.getState().testPath || 'test');
|
|
35
35
|
}
|
|
36
36
|
exports.createTestHelper = createTestHelper;
|
|
37
|
+
const fixtureDir = path.join(packageRoot, 'fixtures');
|
|
37
38
|
class TestHelperImpl {
|
|
38
39
|
constructor(testFilename) {
|
|
39
40
|
this.packageRoot = packageRoot;
|
|
@@ -41,7 +42,7 @@ class TestHelperImpl {
|
|
|
41
42
|
this.testCounter = new Map();
|
|
42
43
|
this.createTempDir = this.mkdir;
|
|
43
44
|
this.tempDir = path.join(tempDirBase, path.relative(packageRoot, testFilename));
|
|
44
|
-
this.fixtureDir =
|
|
45
|
+
this.fixtureDir = fixtureDir;
|
|
45
46
|
}
|
|
46
47
|
beginTest() {
|
|
47
48
|
const currentTestName = this.getRawTestName();
|
|
@@ -128,4 +129,8 @@ class TestHelperImpl {
|
|
|
128
129
|
}
|
|
129
130
|
}
|
|
130
131
|
}
|
|
132
|
+
function resolvePathToFixture(...segments) {
|
|
133
|
+
return path.resolve(fixtureDir, ...segments);
|
|
134
|
+
}
|
|
135
|
+
exports.resolvePathToFixture = resolvePathToFixture;
|
|
131
136
|
//# sourceMappingURL=TestHelper.js.map
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@cspell/cspell-tools",
|
|
3
|
-
"version": "6.
|
|
3
|
+
"version": "6.29.1",
|
|
4
4
|
"description": "Tools to assist with the development of cSpell",
|
|
5
5
|
"typings": "dist/index.d.ts",
|
|
6
6
|
"publishConfig": {
|
|
@@ -48,13 +48,13 @@
|
|
|
48
48
|
},
|
|
49
49
|
"homepage": "https://github.com/streetsidesoftware/cspell#readme",
|
|
50
50
|
"dependencies": {
|
|
51
|
-
"@cspell/cspell-pipe": "6.
|
|
51
|
+
"@cspell/cspell-pipe": "6.29.1",
|
|
52
52
|
"commander": "^10.0.0",
|
|
53
53
|
"cosmiconfig": "^8.1.0",
|
|
54
|
-
"cspell-trie-lib": "6.
|
|
54
|
+
"cspell-trie-lib": "6.29.1",
|
|
55
55
|
"gensequence": "^5.0.2",
|
|
56
56
|
"glob": "^8.1.0",
|
|
57
|
-
"hunspell-reader": "6.
|
|
57
|
+
"hunspell-reader": "6.29.1"
|
|
58
58
|
},
|
|
59
59
|
"engines": {
|
|
60
60
|
"node": ">=14"
|
|
@@ -63,11 +63,11 @@
|
|
|
63
63
|
"@types/glob": "^8.1.0",
|
|
64
64
|
"@types/jest": "^29.4.0",
|
|
65
65
|
"@types/shelljs": "^0.8.11",
|
|
66
|
-
"jest": "^29.
|
|
66
|
+
"jest": "^29.5.0",
|
|
67
67
|
"lorem-ipsum": "^2.0.8",
|
|
68
68
|
"shelljs": "^0.8.5",
|
|
69
69
|
"ts-json-schema-generator": "^1.2.0"
|
|
70
70
|
},
|
|
71
71
|
"main": "bin.js",
|
|
72
|
-
"gitHead": "
|
|
72
|
+
"gitHead": "e524c611f3529b22a7e8ae3449a5c9a01332d44f"
|
|
73
73
|
}
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.streamWordsFromFile = void 0;
|
|
4
|
-
const Reader_1 = require("./Reader");
|
|
5
|
-
async function streamWordsFromFile(filename, options) {
|
|
6
|
-
const reader = await (0, Reader_1.createReader)(filename, options);
|
|
7
|
-
return reader;
|
|
8
|
-
}
|
|
9
|
-
exports.streamWordsFromFile = streamWordsFromFile;
|
|
10
|
-
//# sourceMappingURL=iterateWordsFromFile.js.map
|
|
File without changes
|
|
File without changes
|