cspell-lib 8.13.1 → 8.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,7 +25,7 @@ export type { TraceOptions, TraceResult, TraceWordResult } from './trace.js';
25
25
  export { traceWords, traceWordsAsync } from './trace.js';
26
26
  export { getLogger, Logger, setLogger } from './util/logger.js';
27
27
  export { resolveFile } from './util/resolveFile.js';
28
- export * as Text from './util/text.js';
28
+ export * as Text from './util/textApi.js';
29
29
  export { checkText, checkTextDocument, CheckTextInfo, IncludeExcludeFlag, IncludeExcludeOptions, TextInfoItem, validateText, ValidationIssue, } from './validator.js';
30
30
  export * from '@cspell/cspell-types';
31
31
  export { asyncIterableToArray, readFileText as readFile, readFileTextSync as readFileSync, writeToFile, writeToFileIterable, writeToFileIterableP, } from 'cspell-io';
package/dist/lib/index.js CHANGED
@@ -18,7 +18,7 @@ export { DocumentValidator, shouldCheckDocument } from './textValidation/index.j
18
18
  export { traceWords, traceWordsAsync } from './trace.js';
19
19
  export { getLogger, setLogger } from './util/logger.js';
20
20
  export { resolveFile } from './util/resolveFile.js';
21
- export * as Text from './util/text.js';
21
+ export * as Text from './util/textApi.js';
22
22
  export { checkText, checkTextDocument, IncludeExcludeFlag, validateText, } from './validator.js';
23
23
  export * from '@cspell/cspell-types';
24
24
  export { asyncIterableToArray, readFileText as readFile, readFileTextSync as readFileSync, writeToFile, writeToFileIterable, writeToFileIterableP, } from 'cspell-io';
@@ -28,7 +28,9 @@ export interface WordRangeAcc {
28
28
  export type ValidationIssueRO = Readonly<ValidationIssue>;
29
29
  export type LineValidatorFn = (line: LineSegment) => Iterable<ValidationIssue>;
30
30
  export interface LineSegment {
31
+ /** A line from the document, the offset is relative to the beginning of the document. */
31
32
  line: TextOffsetRO;
33
+ /** A segment of text from the line, the offset is relative to the beginning of the document. */
32
34
  segment: TextOffsetRO;
33
35
  }
34
36
  export interface MappedTextValidationResult extends MappedText {
@@ -1,7 +1,9 @@
1
+ import assert from 'node:assert';
1
2
  import { opConcatMap, opFilter, pipe } from '@cspell/cspell-pipe/sync';
2
3
  import { createCachingDictionary } from 'cspell-dictionary';
3
4
  import * as RxPat from '../Settings/RegExpPatterns.js';
4
- import { extractPossibleWordsFromTextOffset, extractText, extractWordsFromCodeTextOffset, extractWordsFromTextOffset, } from '../util/text.js';
5
+ import { extractPossibleWordsFromTextOffset, extractText, extractWordsFromTextOffset, splitWordWithOffset, } from '../util/text.js';
6
+ import { regExpCamelCaseWordBreaksWithEnglishSuffix } from '../util/textRegex.js';
5
7
  import { split } from '../util/wordSplitter.js';
6
8
  import { defaultMinWordLength } from './defaultConstants.js';
7
9
  import { isWordValidWithEscapeRetry } from './isWordValid.js';
@@ -73,7 +75,7 @@ export function lineValidatorFactory(sDict, options) {
73
75
  }
74
76
  return issue;
75
77
  }
76
- const isFlaggedOrMinLength = rememberFilter((wo) => wo.text.length >= minWordLength || !!wo.isFlagged);
78
+ const isFlaggedOrMinLength = (wo) => wo.text.length >= minWordLength || !!wo.isFlagged;
77
79
  const isFlaggedOrNotFound = rememberFilter((wo) => wo.isFlagged || !wo.isFound);
78
80
  const isNotRepeatingChar = rememberFilter((wo) => !RxPat.regExRepeatedChar.test(wo.text));
79
81
  function checkWord(issue) {
@@ -94,17 +96,100 @@ export function lineValidatorFactory(sDict, options) {
94
96
  issue.isFound = isFlagged ? undefined : info.isFound;
95
97
  return issue;
96
98
  }
99
+ const regExUpperCaseWithTrailingCommonEnglishSuffix = /^([\p{Lu}\p{M}]{2,})['’]?(?:s|ing|ies|es|ings|ize|ed|ning)$/u; // cspell:disable-line
100
+ const regExpIsLetter = /\p{L}/u;
97
101
  const fn = (lineSegment) => {
102
+ const line = lineSegment.line;
103
+ function isWordTooShort(word, ignoreSuffix = false) {
104
+ if (word.text.length >= minWordLength)
105
+ return false;
106
+ const offset = word.offset - line.offset;
107
+ assert.equal(line.text.slice(offset, offset + word.text.length), word.text);
108
+ const prefix = [...line.text.slice(Math.max(0, offset - 2), offset)];
109
+ const hasLetterPrefix = !!prefix.length && regExpIsLetter.test(prefix[prefix.length - 1]);
110
+ if (hasLetterPrefix)
111
+ return false;
112
+ if (ignoreSuffix)
113
+ return true;
114
+ const suffix = [...line.text.slice(offset + word.text.length, offset + word.text.length + 2)];
115
+ const hasLetterSuffix = !!suffix.length && regExpIsLetter.test(suffix[0]);
116
+ return !hasLetterSuffix;
117
+ }
98
118
  function splitterIsValid(word) {
99
- return (setOfKnownSuccessfulWords.has(word.text) ||
100
- (!isWordFlagged(word) && isWordValidWithEscapeRetry(hasDict, word, lineSegment.line)));
119
+ if (setOfKnownSuccessfulWords.has(word.text))
120
+ return true;
121
+ if (isWordFlagged(word))
122
+ return false;
123
+ if (isWordValidWithEscapeRetry(hasDict, word, lineSegment.line))
124
+ return true;
125
+ if (isWordTooShort(word))
126
+ return true;
127
+ return isAllCapsWithTrailingCommonEnglishSuffixOk(word);
128
+ }
129
+ function isAllCapsWithTrailingCommonEnglishSuffixOk(tWord) {
130
+ if (!regExUpperCaseWithTrailingCommonEnglishSuffix.test(tWord.text))
131
+ return false;
132
+ const m = tWord.text.match(regExUpperCaseWithTrailingCommonEnglishSuffix);
133
+ if (!m)
134
+ return false;
135
+ const offset = tWord.offset;
136
+ const v = { offset, text: m[1], line };
137
+ const check = checkWord(v);
138
+ if (check.isFlagged)
139
+ return false;
140
+ if (check.isFound)
141
+ return true;
142
+ if (isWordTooShort(v, true))
143
+ return true;
144
+ return false;
101
145
  }
102
146
  function checkFullWord(vr) {
103
147
  if (vr.isFlagged) {
104
148
  return [vr];
105
149
  }
150
+ // English exceptions :-(
151
+ if (isAllCapsWithTrailingCommonEnglishSuffixOk(vr))
152
+ return [];
153
+ if (isWordIgnored(vr.text) || checkWord(vr).isFound) {
154
+ rememberFilter((_) => false)(vr);
155
+ return [];
156
+ }
157
+ if (vr.isFlagged)
158
+ return [vr];
159
+ const codeWordResults = checkCamelCaseWord(vr);
160
+ if (!codeWordResults.length) {
161
+ rememberFilter((_) => false)(vr);
162
+ return [];
163
+ }
164
+ return codeWordResults;
165
+ }
166
+ /**
167
+ * Break a camel case word into its parts and check each part.
168
+ *
169
+ * There are two word break patterns:
170
+ * - `regExpCamelCaseWordBreaks`
171
+ * - `regExpCamelCaseWordBreaksWithEnglishSuffix` is the default pattern with English suffixes on ALL CAPS words.
172
+ *
173
+ * Note: See [#6066](https://github.com/streetsidesoftware/cspell/pull/6066)
174
+ * Using just `regExpCamelCaseWordBreaks` misses unknown 4-letter words.
175
+ *
176
+ * The code below was tried, but it missed words.
177
+ * - `LSTM` was caught. // cspell:disable-line
178
+ * - `LSTMs` was missed because it becomes `LST` and `Ms`. // cspell:disable-line
179
+ *
180
+ * ```ts
181
+ * const results = _checkCamelCaseWord(vr, regExpCamelCaseWordBreaks);
182
+ * if (!results.length) return results;
183
+ * const resultsEnglishBreaks = _checkCamelCaseWord(vr, regExpCamelCaseWordBreaksWithEnglishSuffix);
184
+ * return results.length < resultsEnglishBreaks.length ? results : resultsEnglishBreaks;
185
+ * ```
186
+ */
187
+ function checkCamelCaseWord(vr) {
188
+ return _checkCamelCaseWord(vr, regExpCamelCaseWordBreaksWithEnglishSuffix);
189
+ }
190
+ function _checkCamelCaseWord(vr, regExpWordBreaks) {
106
191
  const codeWordResults = [];
107
- for (const wo of extractWordsFromCodeTextOffset(vr)) {
192
+ for (const wo of splitWordWithOffset(vr, regExpWordBreaks)) {
108
193
  if (setOfKnownSuccessfulWords.has(wo.text))
109
194
  continue;
110
195
  const issue = wo;
@@ -120,13 +205,8 @@ export function lineValidatorFactory(sDict, options) {
120
205
  issue.text = extractText(lineSegment.segment, issue.offset, issue.offset + issue.text.length);
121
206
  codeWordResults.push(issue);
122
207
  }
123
- if (!codeWordResults.length || isWordIgnored(vr.text) || checkWord(vr).isFound) {
124
- rememberFilter((_) => false)(vr);
125
- return [];
126
- }
127
208
  return codeWordResults;
128
209
  }
129
- const useKnownIssues = false;
130
210
  function rebaseKnownIssues(possibleWord, known) {
131
211
  const { issues } = known;
132
212
  const adjOffset = possibleWord.offset - known.possibleWord.offset;
@@ -139,9 +219,9 @@ export function lineValidatorFactory(sDict, options) {
139
219
  }
140
220
  function checkPossibleWords(possibleWord) {
141
221
  const known = setOfKnownIssues.get(possibleWord.text);
142
- if (known && !known.issues.length)
143
- return known.issues;
144
- if (known && useKnownIssues) {
222
+ if (known) {
223
+ if (!known.issues.length)
224
+ return known.issues;
145
225
  const adjusted = rebaseKnownIssues(possibleWord, known);
146
226
  return adjusted;
147
227
  }
@@ -174,7 +254,15 @@ export function lineValidatorFactory(sDict, options) {
174
254
  if (mismatches.length) {
175
255
  // Try the more expensive word splitter
176
256
  const splitResult = split(lineSegment.segment, possibleWord.offset, splitterIsValid);
177
- const nonMatching = splitResult.words.filter((w) => !w.isFound);
257
+ const nonMatching = splitResult.words
258
+ .filter((w) => !w.isFound)
259
+ .filter((w) => {
260
+ const m = w.text.match(regExUpperCaseWithTrailingCommonEnglishSuffix);
261
+ if (!m)
262
+ return true;
263
+ const v = checkWord({ ...w, text: m[1], line: lineSegment.line });
264
+ return v.isFlagged || !v.isFound;
265
+ });
178
266
  if (nonMatching.length < mismatches.length) {
179
267
  return nonMatching.map((w) => ({ ...w, line: lineSegment.line })).map(annotateIsFlagged);
180
268
  }
@@ -1,11 +1,16 @@
1
1
  import type { TextDocumentOffset, TextOffset } from '@cspell/cspell-types';
2
2
  import type { Uri } from './Uri.js';
3
3
  export { stringToRegExp } from './textRegex.js';
4
- export declare function splitCamelCaseWordWithOffset(wo: TextOffset): Array<TextOffset>;
4
+ export declare function splitCamelCaseWordWithOffset(wo: TextOffset): TextOffset[];
5
5
  /**
6
6
  * Split camelCase words into an array of strings.
7
7
  */
8
8
  export declare function splitCamelCaseWord(word: string): string[];
9
+ export declare function splitWordWithOffset(wo: TextOffset, regExpWordBreaks: RegExp): TextOffset[];
10
+ /**
11
+ * Split camelCase words into an array of strings.
12
+ */
13
+ export declare function splitWord(word: string, regExpWordBreaks: RegExp): string[];
9
14
  /**
10
15
  * This function lets you iterate over regular expression matches.
11
16
  */
@@ -1,6 +1,6 @@
1
1
  import { opConcatMap, opMap, pipe } from '@cspell/cspell-pipe/sync';
2
2
  import { binarySearch } from './search.js';
3
- import { regExAccents, regExAllLower, regExAllUpper, regExFirstUpper, regExIgnoreCharacters, regExpSplitWordBreaks, regExWords, regExWordsAndDigits, } from './textRegex.js';
3
+ import { regExAccents, regExAllLower, regExAllUpper, regExFirstUpper, regExIgnoreCharacters, regExpCamelCaseWordBreaksWithEnglishSuffix, regExWords, regExWordsAndDigits, } from './textRegex.js';
4
4
  import { toUri } from './Uri.js';
5
5
  import { scanMap } from './util.js';
6
6
  export { stringToRegExp } from './textRegex.js';
@@ -15,7 +15,19 @@ export function splitCamelCaseWordWithOffset(wo) {
15
15
  * Split camelCase words into an array of strings.
16
16
  */
17
17
  export function splitCamelCaseWord(word) {
18
- return word.split(regExpSplitWordBreaks);
18
+ return splitWord(word, regExpCamelCaseWordBreaksWithEnglishSuffix);
19
+ }
20
+ export function splitWordWithOffset(wo, regExpWordBreaks) {
21
+ return splitWord(wo.text, regExpWordBreaks).map(scanMap((last, text) => ({ text, offset: last.offset + last.text.length }), {
22
+ text: '',
23
+ offset: wo.offset,
24
+ }));
25
+ }
26
+ /**
27
+ * Split camelCase words into an array of strings.
28
+ */
29
+ export function splitWord(word, regExpWordBreaks) {
30
+ return word.split(regExpWordBreaks);
19
31
  }
20
32
  /**
21
33
  * This function lets you iterate over regular expression matches.
@@ -0,0 +1,2 @@
1
+ export { calculateTextDocumentOffsets, camelToSnake, cleanText, cleanTextOffset, extractLinesOfText, extractPossibleWordsFromTextOffset, extractText, extractWordsFromCode, extractWordsFromCodeTextOffset, extractWordsFromText, extractWordsFromTextOffset, isFirstCharacterLower, isFirstCharacterUpper, isLowerCase, isUpperCase, lcFirst, match, matchCase, matchStringToTextOffset, matchToTextOffset, removeAccents, snakeToCamel, splitCamelCaseWord, splitCamelCaseWordWithOffset, stringToRegExp, textOffset, ucFirst, } from './text.js';
2
+ //# sourceMappingURL=textApi.d.ts.map
@@ -0,0 +1,2 @@
1
+ export { calculateTextDocumentOffsets, camelToSnake, cleanText, cleanTextOffset, extractLinesOfText, extractPossibleWordsFromTextOffset, extractText, extractWordsFromCode, extractWordsFromCodeTextOffset, extractWordsFromText, extractWordsFromTextOffset, isFirstCharacterLower, isFirstCharacterUpper, isLowerCase, isUpperCase, lcFirst, match, matchCase, matchStringToTextOffset, matchToTextOffset, removeAccents, snakeToCamel, splitCamelCaseWord, splitCamelCaseWordWithOffset, stringToRegExp, textOffset, ucFirst, } from './text.js';
2
+ //# sourceMappingURL=textApi.js.map
@@ -1,7 +1,8 @@
1
1
  export declare const regExUpperSOrIng: RegExp;
2
2
  export declare const regExSplitWords: RegExp;
3
3
  export declare const regExSplitWords2: RegExp;
4
- export declare const regExpSplitWordBreaks: RegExp;
4
+ export declare const regExpCamelCaseWordBreaksWithEnglishSuffix: RegExp;
5
+ export declare const regExpCamelCaseWordBreaks: RegExp;
5
6
  export declare const regExpAllPossibleWordBreaks: RegExp;
6
7
  export declare const regExWords: RegExp;
7
8
  export declare const regExWordsAndDigits: RegExp;
@@ -1,8 +1,9 @@
1
1
  // cspell:ignore ings ning gimuy anrvtbf gimuxy
2
2
  export const regExUpperSOrIng = /([\p{Lu}\p{M}]+(?:\\?['’])?(?:s|ing|ies|es|ings|ed|ning))(?!\p{Ll})/gu;
3
3
  export const regExSplitWords = /(\p{Ll}\p{M}?)(\p{Lu})/gu;
4
- export const regExSplitWords2 = /(\p{Lu}\p{M}?)(\p{Lu}\p{M}?\p{Ll})/gu;
5
- export const regExpSplitWordBreaks = /(?<=\p{Ll}\p{M}?)(?=\p{Lu})|(?<=\p{Lu}\p{M}?)(?=\p{Lu}\p{M}?\p{Ll})(?!\p{Lu}\p{M}?(?:s|ing|ies|es|ings|ed|ning)(?!\p{Ll}))/gu;
4
+ export const regExSplitWords2 = /(\p{Lu}\p{M}?)((\p{Lu}\p{M}?)\p{Ll})/gu;
5
+ export const regExpCamelCaseWordBreaksWithEnglishSuffix = /(?<=\p{Ll}\p{M}?)(?=\p{Lu})|(?<=\p{Lu}\p{M}?)(?=\p{Lu}\p{M}?\p{Ll})(?!\p{Lu}\p{M}?(?:s|ing|ies|es|ings|ed|ning)(?!\p{Ll}))/gu;
6
+ export const regExpCamelCaseWordBreaks = /(?<=\p{Ll}\p{M}?)(?=\p{Lu})|(?<=\p{Lu}\p{M}?)(?=\p{Lu}\p{M}?\p{Ll})/gu;
6
7
  export const regExpAllPossibleWordBreaks = /(?<=\p{Ll}\p{M}?)(?=\p{Lu})|(?<=\p{Lu}\p{M}?)(?=\p{Lu}\p{M}?\p{Ll})|(?<=\p{Lu}\p{M}?\p{Lu}\p{M}?)(?=\p{Ll})|(?<=\p{L}\p{M}?)(?=\P{L})|(?<=\P{L})(?=\p{L})/gu;
7
8
  export const regExWords = /\p{L}\p{M}?(?:(?:\\?['’])?\p{L}\p{M}?)*/gu;
8
9
  // Words can be made of letters, numbers, period, underscore, dash, plus, and single quote
@@ -111,7 +111,7 @@ function genWordBreakCamel(line) {
111
111
  for (const m of text.matchAll(offsetRegEx(regExSplitWords, line.relStart))) {
112
112
  if (m.index === undefined)
113
113
  break;
114
- const i = m.index + 1;
114
+ const i = m.index + m[1].length;
115
115
  breaksCamel1.push({
116
116
  offset: m.index,
117
117
  breaks: [[i, i], ignoreBreak],
@@ -124,7 +124,7 @@ function genWordBreakCamel(line) {
124
124
  if (m.index === undefined)
125
125
  break;
126
126
  const i = m.index + m[1].length;
127
- const j = i + 1;
127
+ const j = i + m[3].length;
128
128
  breaksCamel2.push({
129
129
  offset: m.index,
130
130
  breaks: [[i, i], [j, j], ignoreBreak],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cspell-lib",
3
- "version": "8.13.1",
3
+ "version": "8.13.3",
4
4
  "description": "A library of useful functions used across various cspell tools.",
5
5
  "type": "module",
6
6
  "sideEffects": false,
@@ -58,21 +58,21 @@
58
58
  },
59
59
  "homepage": "https://github.com/streetsidesoftware/cspell#readme",
60
60
  "dependencies": {
61
- "@cspell/cspell-bundled-dicts": "8.13.1",
62
- "@cspell/cspell-pipe": "8.13.1",
63
- "@cspell/cspell-resolver": "8.13.1",
64
- "@cspell/cspell-types": "8.13.1",
65
- "@cspell/dynamic-import": "8.13.1",
66
- "@cspell/strong-weak-map": "8.13.1",
67
- "@cspell/url": "8.13.1",
61
+ "@cspell/cspell-bundled-dicts": "8.13.3",
62
+ "@cspell/cspell-pipe": "8.13.3",
63
+ "@cspell/cspell-resolver": "8.13.3",
64
+ "@cspell/cspell-types": "8.13.3",
65
+ "@cspell/dynamic-import": "8.13.3",
66
+ "@cspell/strong-weak-map": "8.13.3",
67
+ "@cspell/url": "8.13.3",
68
68
  "clear-module": "^4.1.2",
69
- "comment-json": "^4.2.4",
70
- "cspell-config-lib": "8.13.1",
71
- "cspell-dictionary": "8.13.1",
72
- "cspell-glob": "8.13.1",
73
- "cspell-grammar": "8.13.1",
74
- "cspell-io": "8.13.1",
75
- "cspell-trie-lib": "8.13.1",
69
+ "comment-json": "^4.2.5",
70
+ "cspell-config-lib": "8.13.3",
71
+ "cspell-dictionary": "8.13.3",
72
+ "cspell-glob": "8.13.3",
73
+ "cspell-grammar": "8.13.3",
74
+ "cspell-io": "8.13.3",
75
+ "cspell-trie-lib": "8.13.3",
76
76
  "env-paths": "^3.0.0",
77
77
  "fast-equals": "^5.0.1",
78
78
  "gensequence": "^7.0.0",
@@ -93,7 +93,7 @@
93
93
  "@cspell/dict-fr-fr": "^2.2.2",
94
94
  "@cspell/dict-html": "^4.0.5",
95
95
  "@cspell/dict-nl-nl": "^2.3.0",
96
- "@cspell/dict-python": "^4.2.3",
96
+ "@cspell/dict-python": "^4.2.4",
97
97
  "@types/configstore": "^6.0.2",
98
98
  "configstore": "^7.0.0",
99
99
  "cspell-dict-nl-nl": "^1.1.2",
@@ -101,5 +101,5 @@
101
101
  "lorem-ipsum": "^2.0.8",
102
102
  "perf-insight": "^1.2.0"
103
103
  },
104
- "gitHead": "99cdb4e3e6579c57de1014b0cd3c168188b9c1f5"
104
+ "gitHead": "e017775a1d181b20abce3c6325f2527a7554a3a9"
105
105
  }