cspell-dictionary 6.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +3 -0
  3. package/dist/SpellingDictionary/Dictionaries.d.ts +9 -0
  4. package/dist/SpellingDictionary/Dictionaries.js +61 -0
  5. package/dist/SpellingDictionary/SpellingDictionary.d.ts +93 -0
  6. package/dist/SpellingDictionary/SpellingDictionary.js +6 -0
  7. package/dist/SpellingDictionary/SpellingDictionaryCollection.d.ts +34 -0
  8. package/dist/SpellingDictionary/SpellingDictionaryCollection.js +111 -0
  9. package/dist/SpellingDictionary/SpellingDictionaryError.d.ts +10 -0
  10. package/dist/SpellingDictionary/SpellingDictionaryError.js +18 -0
  11. package/dist/SpellingDictionary/SpellingDictionaryFromTrie.d.ts +36 -0
  12. package/dist/SpellingDictionary/SpellingDictionaryFromTrie.js +148 -0
  13. package/dist/SpellingDictionary/SpellingDictionaryMethods.d.ts +29 -0
  14. package/dist/SpellingDictionary/SpellingDictionaryMethods.js +114 -0
  15. package/dist/SpellingDictionary/charset.d.ts +3 -0
  16. package/dist/SpellingDictionary/charset.js +16 -0
  17. package/dist/SpellingDictionary/createSpellingDictionary.d.ts +17 -0
  18. package/dist/SpellingDictionary/createSpellingDictionary.js +91 -0
  19. package/dist/SpellingDictionary/index.d.ts +4 -0
  20. package/dist/SpellingDictionary/index.js +9 -0
  21. package/dist/index.d.ts +3 -0
  22. package/dist/index.js +8 -0
  23. package/dist/util/Comparable.d.ts +20 -0
  24. package/dist/util/Comparable.js +55 -0
  25. package/dist/util/FreqCounter.d.ts +16 -0
  26. package/dist/util/FreqCounter.js +52 -0
  27. package/dist/util/IterableLike.d.ts +4 -0
  28. package/dist/util/IterableLike.js +3 -0
  29. package/dist/util/Memorizer.d.ts +65 -0
  30. package/dist/util/Memorizer.js +138 -0
  31. package/dist/util/MinHeapQueue.d.ts +23 -0
  32. package/dist/util/MinHeapQueue.js +97 -0
  33. package/dist/util/PairingHeap.d.ts +32 -0
  34. package/dist/util/PairingHeap.js +90 -0
  35. package/dist/util/TextMap.d.ts +15 -0
  36. package/dist/util/TextMap.js +62 -0
  37. package/dist/util/TextRange.d.ts +28 -0
  38. package/dist/util/TextRange.js +144 -0
  39. package/dist/util/clean.d.ts +7 -0
  40. package/dist/util/clean.js +18 -0
  41. package/dist/util/debugPerf.d.ts +9 -0
  42. package/dist/util/debugPerf.js +22 -0
  43. package/dist/util/errors.d.ts +17 -0
  44. package/dist/util/errors.js +52 -0
  45. package/dist/util/fileReader.d.ts +4 -0
  46. package/dist/util/fileReader.js +21 -0
  47. package/dist/util/iterableIteratorLib.d.ts +4 -0
  48. package/dist/util/iterableIteratorLib.js +14 -0
  49. package/dist/util/logger.d.ts +33 -0
  50. package/dist/util/logger.js +46 -0
  51. package/dist/util/memorizerWeak.d.ts +6 -0
  52. package/dist/util/memorizerWeak.js +42 -0
  53. package/dist/util/regexHelper.d.ts +7 -0
  54. package/dist/util/regexHelper.js +13 -0
  55. package/dist/util/repMap.d.ts +4 -0
  56. package/dist/util/repMap.js +38 -0
  57. package/dist/util/resolveFile.d.ts +13 -0
  58. package/dist/util/resolveFile.js +127 -0
  59. package/dist/util/search.d.ts +6 -0
  60. package/dist/util/search.js +23 -0
  61. package/dist/util/simpleCache.d.ts +46 -0
  62. package/dist/util/simpleCache.js +143 -0
  63. package/dist/util/text.d.ts +9 -0
  64. package/dist/util/text.js +55 -0
  65. package/dist/util/textRegex.d.ts +1 -0
  66. package/dist/util/textRegex.js +2 -0
  67. package/dist/util/timer.d.ts +26 -0
  68. package/dist/util/timer.js +58 -0
  69. package/dist/util/types.d.ts +7 -0
  70. package/dist/util/types.js +3 -0
  71. package/dist/util/util.d.ts +2 -0
  72. package/dist/util/util.js +8 -0
  73. package/dist/util/util.test copy.d.ts +2 -0
  74. package/dist/util/util.test copy.js +17 -0
  75. package/dist/util/wordSplitter.d.ts +46 -0
  76. package/dist/util/wordSplitter.js +326 -0
  77. package/package.json +47 -0
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Like Required, but keeps the Optional.
3
+ */
4
+ export declare type RemoveUndefined<T> = {
5
+ [P in keyof T]: Exclude<T[P], undefined>;
6
+ };
7
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1,3 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1,2 @@
1
+ export declare function isDefined<T>(v: T | undefined): v is T;
2
+ //# sourceMappingURL=util.d.ts.map
@@ -0,0 +1,8 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.isDefined = void 0;
4
+ function isDefined(v) {
5
+ return v !== undefined;
6
+ }
7
+ exports.isDefined = isDefined;
8
+ //# sourceMappingURL=util.js.map
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=util.test%20copy.d.ts.map
@@ -0,0 +1,17 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ const clean_1 = require("./clean");
4
+ describe('Validate util', () => {
5
+ test('tests clean up obj', () => {
6
+ const obj = {
7
+ a: undefined,
8
+ b: 1,
9
+ c: true,
10
+ d: undefined,
11
+ e: 'str',
12
+ };
13
+ const cleanObj = (0, clean_1.clean)(obj);
14
+ expect([...Object.keys(cleanObj)]).toEqual(['b', 'c', 'e']);
15
+ });
16
+ });
17
+ //# sourceMappingURL=util.test%20copy.js.map
@@ -0,0 +1,46 @@
1
+ import type { TextOffset } from '@cspell/cspell-types';
2
+ export declare type IsValidWordFn = (word: TextOffset) => boolean;
3
+ export interface SplitResult {
4
+ /** Original line passed to the split function */
5
+ line: TextOffset;
6
+ /** Starting point of processing - Original offset passed to the split function */
7
+ offset: number;
8
+ /** The span of text that was split */
9
+ text: TextOffset;
10
+ /** The collection of words that `text` was split into */
11
+ words: TextOffsetWithValid[];
12
+ /** the offset at which the split stopped */
13
+ endOffset: number;
14
+ }
15
+ export interface LineSegment {
16
+ line: TextOffset;
17
+ relStart: number;
18
+ relEnd: number;
19
+ }
20
+ export interface TextOffsetWithValid extends TextOffset {
21
+ isFound: boolean;
22
+ }
23
+ export interface SplitOptions extends WordBreakOptions {
24
+ }
25
+ export declare function split(line: TextOffset, offset: number, isValidWord: IsValidWordFn, options?: SplitOptions): SplitResult;
26
+ declare type BreakPairs = readonly number[];
27
+ interface PossibleWordBreak {
28
+ /** offset from the start of the string */
29
+ offset: number;
30
+ /**
31
+ * break pairs (start, end)
32
+ * (the characters between the start and end are removed)
33
+ * With a pure break, start === end.
34
+ */
35
+ breaks: BreakPairs[];
36
+ }
37
+ export declare type SortedBreaks = PossibleWordBreak[];
38
+ interface WordBreakOptions {
39
+ optionalWordBreakCharacters?: string;
40
+ }
41
+ declare function generateWordBreaks(line: LineSegment, options: WordBreakOptions): SortedBreaks;
42
+ export declare const __testing__: {
43
+ generateWordBreaks: typeof generateWordBreaks;
44
+ };
45
+ export {};
46
+ //# sourceMappingURL=wordSplitter.d.ts.map
@@ -0,0 +1,326 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.__testing__ = exports.split = void 0;
4
+ const PairingHeap_1 = require("./PairingHeap");
5
+ const regexHelper_1 = require("./regexHelper");
6
+ const textRegex_1 = require("./textRegex");
7
+ const ignoreBreak = Object.freeze([]);
8
+ function split(line, offset, isValidWord, options = {}) {
9
+ const relWordToSplit = findNextWordText({ text: line.text, offset: offset - line.offset });
10
+ const lineOffset = line.offset;
11
+ const requested = new Map();
12
+ if (!relWordToSplit.text) {
13
+ const text = rebaseTextOffset(relWordToSplit);
14
+ return {
15
+ line,
16
+ offset,
17
+ text: text,
18
+ words: [],
19
+ endOffset: text.offset + text.text.length,
20
+ };
21
+ }
22
+ const lineSegment = {
23
+ line,
24
+ relStart: relWordToSplit.offset,
25
+ relEnd: relWordToSplit.offset + relWordToSplit.text.length,
26
+ };
27
+ const possibleBreaks = generateWordBreaks(lineSegment, options);
28
+ if (!possibleBreaks.length) {
29
+ const text = rebaseTextOffset(relWordToSplit);
30
+ return {
31
+ line,
32
+ offset,
33
+ text: text,
34
+ words: [{ ...text, isFound: isValidWord(text) }],
35
+ endOffset: text.offset + text.text.length,
36
+ };
37
+ }
38
+ function rebaseTextOffset(relText) {
39
+ return {
40
+ ...relText,
41
+ offset: relText.offset + lineOffset,
42
+ };
43
+ }
44
+ function has(word) {
45
+ const i = word.offset;
46
+ const j = word.text.length;
47
+ let v = i + (j << 20);
48
+ if (i < 1 << 20 && j < 1 << 11) {
49
+ const b = requested.get(v);
50
+ if (b !== undefined)
51
+ return b;
52
+ }
53
+ else {
54
+ v = -1;
55
+ }
56
+ const r = isValidWord(rebaseTextOffset(word));
57
+ if (v >= 0) {
58
+ requested.set(v, r);
59
+ }
60
+ return r;
61
+ }
62
+ // Add a dummy break at the end to avoid needing to check for last break.
63
+ possibleBreaks.push({
64
+ offset: lineSegment.relEnd,
65
+ breaks: [ignoreBreak],
66
+ });
67
+ const result = {
68
+ line,
69
+ offset,
70
+ text: rebaseTextOffset(relWordToSplit),
71
+ words: splitIntoWords(lineSegment, possibleBreaks, has).map(rebaseTextOffset),
72
+ endOffset: lineOffset + lineSegment.relEnd,
73
+ };
74
+ return result;
75
+ }
76
+ exports.split = split;
77
+ function findNextWordText({ text, offset }) {
78
+ const reg = new RegExp(textRegex_1.regExWordsAndDigits);
79
+ reg.lastIndex = offset;
80
+ const m = reg.exec(text);
81
+ if (!m) {
82
+ return {
83
+ text: '',
84
+ offset: offset + text.length,
85
+ };
86
+ }
87
+ return {
88
+ text: m[0],
89
+ offset: m.index,
90
+ };
91
+ }
92
+ function generateWordBreaks(line, options) {
93
+ const camelBreaks = genWordBreakCamel(line);
94
+ const symbolBreaks = genSymbolBreaks(line);
95
+ const optionalBreaks = genOptionalWordBreaks(line, options.optionalWordBreakCharacters);
96
+ return mergeSortedBreaks(...camelBreaks, ...symbolBreaks, ...optionalBreaks);
97
+ }
98
+ function offsetRegEx(reg, offset) {
99
+ const r = new RegExp(reg);
100
+ r.lastIndex = offset;
101
+ return r;
102
+ }
103
+ function genWordBreakCamel(line) {
104
+ const breaksCamel1 = [];
105
+ const text = line.line.text.slice(0, line.relEnd);
106
+ // lower,Upper: camelCase -> camel|Case
107
+ for (const m of text.matchAll(offsetRegEx(textRegex_1.regExSplitWords, line.relStart))) {
108
+ if (m.index === undefined)
109
+ break;
110
+ const i = m.index + 1;
111
+ breaksCamel1.push({
112
+ offset: m.index,
113
+ breaks: [[i, i], ignoreBreak],
114
+ });
115
+ }
116
+ const breaksCamel2 = [];
117
+ // cspell:ignore ERRORC
118
+ // Upper,Upper,lower: ERRORCodes -> ERROR|Codes, ERRORC|odes
119
+ for (const m of text.matchAll(offsetRegEx(textRegex_1.regExSplitWords2, line.relStart))) {
120
+ if (m.index === undefined)
121
+ break;
122
+ const i = m.index + m[1].length;
123
+ const j = i + 1;
124
+ breaksCamel2.push({
125
+ offset: m.index,
126
+ breaks: [[i, i], [j, j], ignoreBreak],
127
+ });
128
+ }
129
+ return [breaksCamel1, breaksCamel2];
130
+ }
131
+ function calcBreaksForRegEx(line, reg, calcBreak) {
132
+ const sb = [];
133
+ const text = line.line.text.slice(0, line.relEnd);
134
+ for (const m of text.matchAll(offsetRegEx(reg, line.relStart))) {
135
+ const b = calcBreak(m);
136
+ if (b) {
137
+ sb.push(b);
138
+ }
139
+ }
140
+ return sb;
141
+ }
142
+ function genOptionalWordBreaks(line, optionalBreakCharacters) {
143
+ function calcBreaks(m) {
144
+ const i = m.index;
145
+ if (i === undefined)
146
+ return;
147
+ const j = i + m[0].length;
148
+ return {
149
+ offset: i,
150
+ breaks: [
151
+ [i, j],
152
+ ignoreBreak,
153
+ ],
154
+ };
155
+ }
156
+ const breaks = [
157
+ calcBreaksForRegEx(line, textRegex_1.regExDanglingQuote, calcBreaks),
158
+ calcBreaksForRegEx(line, textRegex_1.regExTrailingEndings, calcBreaks),
159
+ ];
160
+ if (optionalBreakCharacters) {
161
+ const regex = new RegExp(`[${(0, regexHelper_1.escapeRegEx)(optionalBreakCharacters)}]`, 'gu');
162
+ breaks.push(calcBreaksForRegEx(line, regex, calcBreaks));
163
+ }
164
+ return breaks;
165
+ }
166
+ function genSymbolBreaks(line) {
167
+ function calcBreaks(m) {
168
+ const i = m.index;
169
+ if (i === undefined)
170
+ return;
171
+ const j = i + m[0].length;
172
+ return {
173
+ offset: i,
174
+ breaks: [
175
+ [i, j],
176
+ [i, i],
177
+ [j, j],
178
+ ignoreBreak,
179
+ ],
180
+ };
181
+ }
182
+ return [
183
+ calcBreaksForRegEx(line, textRegex_1.regExPossibleWordBreaks, calcBreaks),
184
+ calcBreaksForRegEx(line, /\d+/g, calcBreaks),
185
+ calcBreaksForRegEx(line, textRegex_1.regExEscapeCharacters, calcBreaks),
186
+ ];
187
+ }
188
+ function splitIntoWords(lineSeg, breaks, has) {
189
+ const maxIndex = lineSeg.relEnd;
190
+ const maxAttempts = 1000;
191
+ const knownPathsByIndex = new Map();
192
+ /**
193
+ * Create a set of possible candidate to consider
194
+ * @param p - prev candidate that lead to this one
195
+ * @param i - offset within the string
196
+ * @param bi - current index into the set of breaks
197
+ * @param currentCost - current cost accrued
198
+ */
199
+ function makeCandidates(p, i, bi, currentCost) {
200
+ const len = maxIndex;
201
+ while (bi < breaks.length && breaks[bi].offset < i) {
202
+ bi += 1;
203
+ }
204
+ if (bi >= breaks.length) {
205
+ return [];
206
+ }
207
+ const br = breaks[bi];
208
+ function c(bp) {
209
+ const d = bp.length < 2 ? len - i : (bp[0] - i) * 0.5 + len - bp[1];
210
+ const ec = currentCost + d;
211
+ return {
212
+ p,
213
+ i,
214
+ bi,
215
+ bp,
216
+ c: currentCost,
217
+ ec,
218
+ text: undefined,
219
+ };
220
+ }
221
+ return br.breaks.map(c);
222
+ }
223
+ function toTextOffset(text, offset) {
224
+ const valid = has({ text, offset });
225
+ return {
226
+ text,
227
+ offset,
228
+ isFound: valid,
229
+ };
230
+ }
231
+ function compare(a, b) {
232
+ return a.ec - b.ec || b.i - a.i;
233
+ }
234
+ function pathToWords(node) {
235
+ const results = [];
236
+ for (let p = node; p; p = p.n) {
237
+ if (p.text) {
238
+ results.push(p.text);
239
+ }
240
+ }
241
+ return results;
242
+ }
243
+ function addToKnownPaths(candidate, path) {
244
+ for (let can = candidate; can !== undefined; can = can.p) {
245
+ const t = can.text;
246
+ const i = can.i;
247
+ const cost = (!t || t.isFound ? 0 : t.text.length) + (path?.c ?? 0);
248
+ const exitingPath = knownPathsByIndex.get(i);
249
+ // Keep going only if this is a better candidate than the existing path
250
+ if (exitingPath && exitingPath.c <= cost) {
251
+ return undefined;
252
+ }
253
+ const node = {
254
+ n: path,
255
+ i,
256
+ c: cost,
257
+ text: t,
258
+ };
259
+ knownPathsByIndex.set(i, node);
260
+ path = node;
261
+ }
262
+ return path;
263
+ }
264
+ let maxCost = lineSeg.relEnd - lineSeg.relStart;
265
+ const candidates = new PairingHeap_1.PairingHeap(compare);
266
+ const text = lineSeg.line.text;
267
+ candidates.concat(makeCandidates(undefined, lineSeg.relStart, 0, 0));
268
+ let attempts = 0;
269
+ let bestPath;
270
+ while (maxCost && candidates.length && attempts++ < maxAttempts) {
271
+ /** Best Candidate Index */
272
+ const best = candidates.dequeue();
273
+ if (!best || best.c >= maxCost) {
274
+ continue;
275
+ }
276
+ // Does it have a split?
277
+ if (best.bp.length) {
278
+ // yes
279
+ const i = best.bp[0];
280
+ const j = best.bp[1];
281
+ const t = i > best.i ? toTextOffset(text.slice(best.i, i), best.i) : undefined;
282
+ const cost = !t || t.isFound ? 0 : t.text.length;
283
+ const mc = maxIndex - j;
284
+ best.c += cost;
285
+ best.ec = best.c + mc;
286
+ best.text = t;
287
+ const possiblePath = knownPathsByIndex.get(j);
288
+ if (possiblePath) {
289
+ // We found a known apply to candidate
290
+ const f = addToKnownPaths(best, possiblePath);
291
+ bestPath = !bestPath || (f && f.c < bestPath.c) ? f : bestPath;
292
+ }
293
+ else if (best.c < maxCost) {
294
+ const c = makeCandidates(t ? best : best.p, j, best.bi + 1, best.c);
295
+ candidates.concat(c);
296
+ }
297
+ }
298
+ else {
299
+ // It is a pass through
300
+ const c = makeCandidates(best.p, best.i, best.bi + 1, best.c);
301
+ candidates.concat(c);
302
+ if (!c.length) {
303
+ const t = maxIndex > best.i ? toTextOffset(text.slice(best.i, maxIndex), best.i) : undefined;
304
+ const cost = !t || t.isFound ? 0 : t.text.length;
305
+ best.c += cost;
306
+ best.ec = best.c;
307
+ best.text = t;
308
+ const segText = t || best.p?.text || toTextOffset('', best.i);
309
+ const can = t ? { ...best, text: segText } : { ...best, ...best.p, text: segText };
310
+ const f = addToKnownPaths(can, undefined);
311
+ bestPath = !bestPath || (f && f.c < bestPath.c) ? f : bestPath;
312
+ }
313
+ }
314
+ if (bestPath && bestPath.c < maxCost) {
315
+ maxCost = bestPath.c;
316
+ }
317
+ }
318
+ return pathToWords(bestPath);
319
+ }
320
+ function mergeSortedBreaks(...maps) {
321
+ return [].concat(...maps).sort((a, b) => a.offset - b.offset);
322
+ }
323
+ exports.__testing__ = {
324
+ generateWordBreaks,
325
+ };
326
+ //# sourceMappingURL=wordSplitter.js.map
package/package.json ADDED
@@ -0,0 +1,47 @@
1
+ {
2
+ "name": "cspell-dictionary",
3
+ "version": "6.9.0",
4
+ "description": "A spelling dictionary library useful for checking words and getting suggestions.",
5
+ "main": "dist/index.js",
6
+ "typings": "dist/index.d.ts",
7
+ "files": [
8
+ "dist",
9
+ "!**/__mocks__",
10
+ "!**/*.test.*",
11
+ "!**/*.spec.*",
12
+ "!**/*.map"
13
+ ],
14
+ "scripts": {
15
+ "build": "tsc -p .",
16
+ "build-dev": "tsc -p tsconfig.dev.json",
17
+ "watch": "tsc -p . -w",
18
+ "clean": "rimraf dist temp coverage .tsbuildinfo",
19
+ "clean-build": "pnpm run clean && pnpm run build",
20
+ "coverage": "jest --coverage",
21
+ "test-watch": "jest --watch",
22
+ "test": "jest"
23
+ },
24
+ "repository": {
25
+ "type": "git",
26
+ "url": "git+https://github.com/streetsidesoftware/cspell.git"
27
+ },
28
+ "keywords": [
29
+ "cspell"
30
+ ],
31
+ "author": "Jason Dent",
32
+ "license": "MIT",
33
+ "bugs": {
34
+ "url": "https://github.com/streetsidesoftware/cspell/labels/cspell-dictionary"
35
+ },
36
+ "homepage": "https://github.com/streetsidesoftware/cspell#readme",
37
+ "engines": {
38
+ "node": ">=14"
39
+ },
40
+ "dependencies": {
41
+ "@cspell/cspell-pipe": "workspace:^",
42
+ "@cspell/cspell-types": "workspace:^",
43
+ "cspell-trie-lib": "workspace:^",
44
+ "fast-equals": "^4.0.3",
45
+ "gensequence": "^4.0.2"
46
+ }
47
+ }