npm - @cspell/cspell-tools - Versions diffs - 8.15.5 → 8.15.7 - Mend

@cspell/cspell-tools 8.15.5 → 8.15.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/cspell-tools.config.schema.json +18 -3
package/dist/compiler/SourceReader.d.ts +1 -0
package/dist/compiler/SourceReader.js +11 -2
package/dist/compiler/compile.js +2 -1
package/dist/compiler/legacyLineToWords.js +4 -1
package/dist/compiler/splitCamelCaseIfAllowed.d.ts +1 -1
package/dist/compiler/splitCamelCaseIfAllowed.js +2 -2
package/dist/compiler/wordListCompiler.js +68 -30
package/dist/compiler/wordListParser.d.ts +8 -1
package/dist/compiler/wordListParser.js +5 -3
package/dist/config/config.d.ts +8 -1
package/dist/config/configDefaults.d.ts +9 -0
package/dist/config/configDefaults.js +9 -0
package/dist/types.d.ts +7 -0
package/dist/types.js +2 -0
package/package.json +5 -5

package/cspell-tools.config.schema.json CHANGED Viewed

@@ -53,6 +53,11 @@
           "description": "Maximum number of nested Hunspell Rules to apply. This is needed for recursive dictionaries like Hebrew.",
           "type": "number"
         },
+        "minCompoundLength": {
+          "default": 4,
+          "description": "Controls the minimum length of a compound word when storing words using `storeSplitWordsAsCompounds`. The compound words are prefixed / suffixed with `*`, to allow them to be combined with other compound words. If the length is too low, then the dictionary will consider many misspelled words as correct.",
+          "type": "number"
+        },
         "split": {
           "anyOf": [
             {
@@ -68,7 +73,7 @@
         },
         "storeSplitWordsAsCompounds": {
           "default": false,
-          "description": "Words that have been split using the `allowedSplitWords` are added to the dictionary as compoundable words. These words are prefixed / suffixed with `*`.",
+          "description": "Camel case words that have been split using the `allowedSplitWords` are added to the dictionary as compoundable words. These words are prefixed / suffixed with `*`.",
           "type": "boolean"
         }
       },
@@ -110,6 +115,11 @@
           "description": "Maximum number of nested Hunspell Rules to apply. This is needed for recursive dictionaries like Hebrew.",
           "type": "number"
         },
+        "minCompoundLength": {
+          "default": 4,
+          "description": "Controls the minimum length of a compound word when storing words using `storeSplitWordsAsCompounds`. The compound words are prefixed / suffixed with `*`, to allow them to be combined with other compound words. If the length is too low, then the dictionary will consider many misspelled words as correct.",
+          "type": "number"
+        },
         "split": {
           "anyOf": [
             {
@@ -125,7 +135,7 @@
         },
         "storeSplitWordsAsCompounds": {
           "default": false,
-          "description": "Words that have been split using the `allowedSplitWords` are added to the dictionary as compoundable words. These words are prefixed / suffixed with `*`.",
+          "description": "Camel case words that have been split using the `allowedSplitWords` are added to the dictionary as compoundable words. These words are prefixed / suffixed with `*`.",
           "type": "boolean"
         }
       },
@@ -266,6 +276,11 @@
       "description": "Maximum number of nested Hunspell Rules to apply. This is needed for recursive dictionaries like Hebrew.",
       "type": "number"
     },
+    "minCompoundLength": {
+      "default": 4,
+      "description": "Controls the minimum length of a compound word when storing words using `storeSplitWordsAsCompounds`. The compound words are prefixed / suffixed with `*`, to allow them to be combined with other compound words. If the length is too low, then the dictionary will consider many misspelled words as correct.",
+      "type": "number"
+    },
     "removeDuplicates": {
       "default": false,
       "description": "Remove duplicate words, favor lower case words over mixed case words. Combine compound prefixes where possible.",
@@ -295,7 +310,7 @@
     },
     "storeSplitWordsAsCompounds": {
       "default": false,
-      "description": "Words that have been split using the `allowedSplitWords` are added to the dictionary as compoundable words. These words are prefixed / suffixed with `*`.",
+      "description": "Camel case words that have been split using the `allowedSplitWords` are added to the dictionary as compoundable words. These words are prefixed / suffixed with `*`.",
       "type": "boolean"
     },
     "targets": {

package/dist/compiler/SourceReader.d.ts CHANGED Viewed

@@ -17,6 +17,7 @@ export interface SourceReaderOptions {
     keepCase?: boolean;
     allowedSplitWords: AllowedSplitWordsCollection;
     storeSplitWordsAsCompounds: boolean | undefined;
+    minCompoundLength?: number | undefined;
 }
 export type AnnotatedWord = string;
 export interface SourceReader {

package/dist/compiler/SourceReader.js CHANGED Viewed

@@ -25,8 +25,17 @@ function splitLines(lines, options) {
     return split();
 }
 async function textFileReader(reader, options) {
-    const { legacy, splitWords: split, allowedSplitWords, storeSplitWordsAsCompounds } = options;
-    const words = [...parseFileLines(reader.lines, { legacy, split, allowedSplitWords, storeSplitWordsAsCompounds })];
+    const { legacy, splitWords: split, allowedSplitWords, storeSplitWordsAsCompounds, minCompoundLength } = options;
+    const parseOptions = {
+        legacy,
+        split,
+        splitKeepBoth: undefined,
+        keepCase: undefined,
+        allowedSplitWords,
+        storeSplitWordsAsCompounds,
+        minCompoundLength,
+    };
+    const words = [...parseFileLines(reader.lines, parseOptions)];
     return {
         size: words.length,
         words,

package/dist/compiler/compile.js CHANGED Viewed

@@ -175,7 +175,7 @@ async function readFileList(fileList) {
         .filter((a) => !!a);
 }
 async function readFileSource(fileSource, sourceOptions) {
-    const { filename, keepRawCase = sourceOptions.keepRawCase || false, split = sourceOptions.split || false, maxDepth, storeSplitWordsAsCompounds, } = fileSource;
+    const { filename, keepRawCase = sourceOptions.keepRawCase || false, split = sourceOptions.split || false, maxDepth, storeSplitWordsAsCompounds, minCompoundLength, } = fileSource;
     const legacy = split === 'legacy';
     const splitWords = legacy ? false : split;
     // console.warn('fileSource: %o,\n targetOptions %o, \n opt: %o', fileSource, targetOptions, opt);
@@ -187,6 +187,7 @@ async function readFileSource(fileSource, sourceOptions) {
         keepCase: keepRawCase,
         allowedSplitWords,
         storeSplitWordsAsCompounds,
+        minCompoundLength,
     };
     logWithTimestamp(`Reading ${path.basename(filename)}`);
     const stream = await streamSourceWordsFromFile(filename, readerOptions);

package/dist/compiler/legacyLineToWords.js CHANGED Viewed

@@ -1,12 +1,15 @@
 import { opConcatMap, opFilter, opMap, pipe } from '@cspell/cspell-pipe/sync';
+import { defaultCompileSourceOptions } from '../config/configDefaults.js';
 import { regExpSpaceOrDash, splitCamelCaseIfAllowed } from './splitCamelCaseIfAllowed.js';
 const regNonWord = /[^\p{L}\p{M}' _\d]+/giu;
 const regExpRepeatChars = /(.)\1{5}/i;
+const minCompoundLength = defaultCompileSourceOptions.minCompoundLength;
 export function legacyLineToWords(line, keepCase, allowedSplitWords) {
     // Remove punctuation and non-letters.
     const filteredLine = line.replaceAll(regNonWord, '|');
     const wordGroups = filteredLine.split('|');
-    const words = pipe(wordGroups, opConcatMap((a) => a.split(regExpSpaceOrDash)), opConcatMap((a) => splitCamelCaseIfAllowed(a, allowedSplitWords, keepCase, '')), opMap((a) => a.trim()), opFilter((a) => !!a), opFilter((s) => !regExpRepeatChars.test(s)));
+    const _minCompoundLength = minCompoundLength;
+    const words = pipe(wordGroups, opConcatMap((a) => a.split(regExpSpaceOrDash)), opConcatMap((a) => splitCamelCaseIfAllowed(a, allowedSplitWords, keepCase, '', _minCompoundLength)), opMap((a) => a.trim()), opFilter((a) => !!a), opFilter((s) => !regExpRepeatChars.test(s)));
     return words;
 }
 export function* legacyLinesToWords(lines, keepCase, allowedSplitWords) {

package/dist/compiler/splitCamelCaseIfAllowed.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { AllowedSplitWordsCollection } from './WordsCollection.js';
 export declare const regExpSpaceOrDash: RegExp;
 export declare const regExpIsNumber: RegExp;
-export declare function splitCamelCaseIfAllowed(word: string, allowedWords: AllowedSplitWordsCollection, keepCase: boolean, compoundPrefix: string): string[];
+export declare function splitCamelCaseIfAllowed(word: string, allowedWords: AllowedSplitWordsCollection, keepCase: boolean, compoundPrefix: string, minCompoundLength: number): string[];
 //# sourceMappingURL=splitCamelCaseIfAllowed.d.ts.map

package/dist/compiler/splitCamelCaseIfAllowed.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import { isSingleLetter, splitCamelCaseWord } from './text.js';
 export const regExpSpaceOrDash = /[- ]+/g;
 export const regExpIsNumber = /^\d+$/;
-export function splitCamelCaseIfAllowed(word, allowedWords, keepCase, compoundPrefix) {
+export function splitCamelCaseIfAllowed(word, allowedWords, keepCase, compoundPrefix, minCompoundLength) {
     const split = [...splitCamelCase(word)];
     if (split.length == 1)
         return adjustCases(split, allowedWords, keepCase);
@@ -14,7 +14,7 @@ export function splitCamelCaseIfAllowed(word, allowedWords, keepCase, compoundPr
         ? adjusted
         : adjusted.map((w, i) => {
             const { px, sx } = wordIndexes[i];
-            const canCompound = w.length > 2;
+            const canCompound = w.length >= minCompoundLength;
             const lc = w.toLowerCase();
             const p = canCompound && isSingleLetter(px) ? compoundPrefix : '';
             const s = canCompound && isSingleLetter(sx) ? compoundPrefix : '';

package/dist/compiler/wordListCompiler.js CHANGED Viewed

@@ -46,8 +46,13 @@ function* removeDuplicates(words) {
             continue;
         }
         const mForms = removeDuplicateForms(forms);
+        // if (forms.some((a) => /^[*+]?col[*+]?$/.test(a))) {
+        //     console.warn('Found col %o', { forms, mForms });
+        // }
         if (mForms.size <= 1) {
-            yield* mForms.values();
+            for (const form of mForms.values()) {
+                yield* form;
+            }
             continue;
         }
         // Handle upper / lower mix.
@@ -55,15 +60,22 @@ function* removeDuplicates(words) {
         const lc = words[0].toLowerCase();
         const lcForm = mForms.get(lc);
         if (!lcForm) {
-            yield* mForms.values();
+            for (const form of mForms.values()) {
+                yield* form;
+            }
             continue;
         }
         mForms.delete(lc);
-        yield lcForm;
-        for (const form of mForms.values()) {
-            if (form.toLowerCase() === lcForm)
-                continue;
-            yield form;
+        const sLcForms = new Set(lcForm);
+        yield* lcForm;
+        if (sLcForms.has('*' + lc + '*'))
+            continue;
+        for (const forms of mForms.values()) {
+            for (const form of forms) {
+                if (sLcForms.has(form.toLowerCase()))
+                    continue;
+                yield form;
+            }
         }
     }
 }
@@ -77,36 +89,62 @@ function* removeDuplicates(words) {
 var Flags;
 (function (Flags) {
     Flags[Flags["base"] = 0] = "base";
-    Flags[Flags["noPfx"] = 1] = "noPfx";
-    Flags[Flags["noSfx"] = 2] = "noSfx";
+    Flags[Flags["none"] = 1] = "none";
+    Flags[Flags["both"] = 2] = "both";
     Flags[Flags["pfx"] = 4] = "pfx";
     Flags[Flags["sfx"] = 8] = "sfx";
-    Flags[Flags["noFix"] = 3] = "noFix";
-    Flags[Flags["midFix"] = 12] = "midFix";
+    Flags[Flags["all"] = 15] = "all";
 })(Flags || (Flags = {}));
 function applyFlags(word, flags) {
-    if (flags === Flags.noFix)
-        return word;
-    if (flags === (Flags.noFix | Flags.midFix))
-        return '*' + word + '*';
-    const p = flags & Flags.pfx ? (flags & Flags.noPfx ? '*' : '+') : '';
-    const s = flags & Flags.sfx ? (flags & Flags.noSfx ? '*' : '+') : '';
-    return s + word + p;
+    if (flags === Flags.none)
+        return [word];
+    if (flags === Flags.all)
+        return ['*' + word + '*'];
+    if (flags === Flags.both)
+        return ['+' + word + '+'];
+    if (flags === Flags.pfx)
+        return [word + '+'];
+    if (flags === Flags.sfx)
+        return ['+' + word];
+    if (flags === (Flags.none | Flags.sfx))
+        return ['*' + word];
+    if (flags === (Flags.none | Flags.pfx))
+        return [word + '*'];
+    if (flags === (Flags.none | Flags.pfx | Flags.sfx))
+        return [word + '*', '*' + word];
+    if (flags === (Flags.none | Flags.both))
+        return [word, '+' + word + '+'];
+    if (flags === (Flags.none | Flags.both | Flags.sfx))
+        return [word, '+' + word + '*'];
+    if (flags === (Flags.none | Flags.both | Flags.pfx))
+        return [word, '*' + word + '+'];
+    if (flags === (Flags.both | Flags.pfx))
+        return ['*' + word + '+'];
+    if (flags === (Flags.both | Flags.sfx))
+        return ['+' + word + '*'];
+    if (flags === (Flags.both | Flags.pfx | Flags.sfx))
+        return ['+' + word + '*', '*' + word + '+'];
+    return ['+' + word, word + '+'];
 }
 function removeDuplicateForms(forms) {
     function flags(word, flag = 0) {
-        let f = Flags.base;
-        const isOptPrefix = word.endsWith('*');
-        const isPrefix = !isOptPrefix && word.endsWith('+');
-        const isAnyPrefix = isPrefix || isOptPrefix;
-        const isOptSuffix = word.startsWith('*');
-        const isSuffix = !isOptSuffix && word.startsWith('+');
-        const isAnySuffix = isSuffix || isOptSuffix;
-        f |= isAnyPrefix ? Flags.pfx : 0;
-        f |= !isPrefix ? Flags.noPfx : 0;
-        f |= isAnySuffix ? Flags.sfx : 0;
-        f |= !isSuffix ? Flags.noSfx : 0;
-        return flag | f;
+        const canBePrefix = word.endsWith('*');
+        const mustBePrefix = !canBePrefix && word.endsWith('+');
+        const isPrefix = canBePrefix || mustBePrefix;
+        const canBeSuffix = word.startsWith('*');
+        const mustBeSuffix = !canBeSuffix && word.startsWith('+');
+        const isSuffix = canBeSuffix || mustBeSuffix;
+        if (canBePrefix && canBeSuffix)
+            return flag | Flags.all;
+        if (mustBePrefix && mustBeSuffix)
+            return flag | Flags.both;
+        if (!isPrefix && !isSuffix)
+            return flag | Flags.none;
+        flag |= isPrefix && !isSuffix ? Flags.pfx : 0;
+        flag |= isSuffix && !isPrefix ? Flags.sfx : 0;
+        flag |= canBePrefix && !mustBeSuffix ? Flags.none : 0;
+        flag |= canBeSuffix && !mustBePrefix ? Flags.none : 0;
+        return flag;
     }
     const m = new Map();
     for (const form of forms) {

package/dist/compiler/wordListParser.d.ts CHANGED Viewed

@@ -30,6 +30,13 @@ export interface ParseFileOptions {
      * @default undefined
      */
     storeSplitWordsAsCompounds: boolean | undefined;
+    /**
+     * Controls the minimum length of a compound word when storing words using `storeSplitWordsAsCompounds`.
+     * The compound words are prefixed / suffixed with `*`, to allow them to be combined with other compound words.
+     * If the length is too low, then the dictionary will consider many misspelled words as correct.
+     * @default 4
+     */
+    minCompoundLength: number | undefined;
 }
 type ParseFileOptionsRequired = Required<ParseFileOptions>;
 export declare const defaultParseDictionaryOptions: ParseFileOptionsRequired;
@@ -49,6 +56,6 @@ export declare function createParseFileLineMapper(options?: Partial<ParseFileOpt
  * @param _options - defines prefixes used when parsing lines.
  * @returns words that have been normalized.
  */
-export declare function parseFileLines(lines: Iterable<string> | string, options: Partial<ParseFileOptions>): Iterable<string>;
+export declare function parseFileLines(lines: Iterable<string> | string, options: ParseFileOptions): Iterable<string>;
 export {};
 //# sourceMappingURL=wordListParser.d.ts.map

package/dist/compiler/wordListParser.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import { opCombine, opCombine as opPipe, opFilter, opMap } from '@cspell/cspell-pipe/sync';
 import { createDictionaryLineParser } from 'cspell-trie-lib';
 import { uniqueFilter } from 'hunspell-reader';
+import { defaultCompileSourceOptions } from '../config/configDefaults.js';
 import { legacyLineToWords } from './legacyLineToWords.js';
 import { splitCamelCaseIfAllowed } from './splitCamelCaseIfAllowed.js';
 export function normalizeTargetWords(options) {
@@ -45,7 +46,8 @@ const _defaultOptions = {
     splitKeepBoth: false,
     // splitSeparator: regExpSplit,
     allowedSplitWords: { has: () => true, size: 0 },
-    storeSplitWordsAsCompounds: undefined,
+    storeSplitWordsAsCompounds: defaultCompileSourceOptions.storeSplitWordsAsCompounds,
+    minCompoundLength: defaultCompileSourceOptions.minCompoundLength,
 };
 export const defaultParseDictionaryOptions = Object.freeze(_defaultOptions);
 export const cSpellToolDirective = 'cspell-tools:';
@@ -58,7 +60,7 @@ export const setOfCSpellDirectiveFlags = ['no-split', 'split', 'keep-case', 'no-
  */
 export function createParseFileLineMapper(options) {
     const _options = options || _defaultOptions;
-    const { splitKeepBoth = _defaultOptions.splitKeepBoth, allowedSplitWords = _defaultOptions.allowedSplitWords, storeSplitWordsAsCompounds, } = _options;
+    const { splitKeepBoth = _defaultOptions.splitKeepBoth, allowedSplitWords = _defaultOptions.allowedSplitWords, storeSplitWordsAsCompounds, minCompoundLength = _defaultOptions.minCompoundLength, } = _options;
     let { legacy = _defaultOptions.legacy } = _options;
     let { split = _defaultOptions.split, keepCase = legacy ? false : _defaultOptions.keepCase } = _options;
     const compoundFix = storeSplitWordsAsCompounds ? '+' : '';
@@ -135,7 +137,7 @@ export function createParseFileLineMapper(options) {
         return lines;
     }
     function splitWordIntoWords(word) {
-        return splitCamelCaseIfAllowed(word, allowedSplitWords, keepCase, compoundFix);
+        return splitCamelCaseIfAllowed(word, allowedSplitWords, keepCase, compoundFix, minCompoundLength);
     }
     function* splitWords(lines) {
         for (const line of lines) {

package/dist/config/config.d.ts CHANGED Viewed

@@ -161,11 +161,18 @@ export interface CompileSourceOptions {
      */
     allowedSplitWords?: FilePath | FilePath[] | undefined;
     /**
-     * Words that have been split using the `allowedSplitWords` are added to the dictionary as compoundable words.
+     * Camel case words that have been split using the `allowedSplitWords` are added to the dictionary as compoundable words.
      * These words are prefixed / suffixed with `*`.
      * @default false
      */
     storeSplitWordsAsCompounds?: boolean | undefined;
+    /**
+     * Controls the minimum length of a compound word when storing words using `storeSplitWordsAsCompounds`.
+     * The compound words are prefixed / suffixed with `*`, to allow them to be combined with other compound words.
+     * If the length is too low, then the dictionary will consider many misspelled words as correct.
+     * @default 4
+     */
+    minCompoundLength?: number | undefined;
 }
 export declare const configFileSchemaURL = "https://raw.githubusercontent.com/streetsidesoftware/cspell/main/packages/cspell-tools/cspell-tools.config.schema.json";
 //# sourceMappingURL=config.d.ts.map

package/dist/config/configDefaults.d.ts ADDED Viewed

@@ -0,0 +1,9 @@
+export declare const defaultCompileSourceOptions: {
+    readonly maxDepth: undefined;
+    readonly split: false;
+    readonly keepRawCase: false;
+    readonly allowedSplitWords: undefined;
+    readonly storeSplitWordsAsCompounds: false;
+    readonly minCompoundLength: 4;
+};
+//# sourceMappingURL=configDefaults.d.ts.map

package/dist/config/configDefaults.js ADDED Viewed

@@ -0,0 +1,9 @@
+export const defaultCompileSourceOptions = {
+    maxDepth: undefined,
+    split: false,
+    keepRawCase: false,
+    allowedSplitWords: undefined,
+    storeSplitWordsAsCompounds: false,
+    minCompoundLength: 4,
+};
+//# sourceMappingURL=configDefaults.js.map

package/dist/types.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+/**
+ * Make all properties in T required, but keep the original optionality of the properties.
+ */
+export type RequireFields<T> = {
+    [P in keyof Required<T>]: T[P];
+};
+//# sourceMappingURL=types.d.ts.map

package/dist/types.js ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ export {};
2	+ //# sourceMappingURL=types.js.map

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@cspell/cspell-tools",
-  "version": "8.15.5",
+  "version": "8.15.7",
   "description": "Tools to assist with the development of cSpell",
   "publishConfig": {
     "access": "public",
@@ -51,12 +51,12 @@
   },
   "homepage": "https://github.com/streetsidesoftware/cspell/tree/main/packages/cspell-tools#readme",
   "dependencies": {
-    "@cspell/cspell-pipe": "8.15.5",
+    "@cspell/cspell-pipe": "8.15.7",
     "commander": "^12.1.0",
     "cosmiconfig": "9.0.0",
-    "cspell-trie-lib": "8.15.5",
+    "cspell-trie-lib": "8.15.7",
     "glob": "^10.4.5",
-    "hunspell-reader": "8.15.5",
+    "hunspell-reader": "8.15.7",
     "yaml": "^2.6.0"
   },
   "engines": {
@@ -67,5 +67,5 @@
     "ts-json-schema-generator": "^2.3.0"
   },
   "module": "bin.mjs",
-  "gitHead": "5f974b2d4d61e2582aba5a1c87fcbc127f5f06bb"
+  "gitHead": "9decdc45e8fd0d6bb9c3851813c2c0728d3c7c5e"
 }