npm - allprofanity - Versions diffs - 2.2.1 → 2.3.0 - Mend

allprofanity 2.2.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/README.md +144 -25
package/allprofanity.config.example.json +6 -0
package/bin/init.js +1 -1
package/bin/mcp.js +6 -0
package/config.schema.json +44 -0
package/dist/algos/aho-corasick.d.ts +11 -1
package/dist/algos/aho-corasick.js +31 -6
package/dist/algos/aho-corasick.js.map +1 -1
package/dist/algos/bloom-filter.d.ts +2 -2
package/dist/algos/bloom-filter.js +6 -6
package/dist/algos/bloom-filter.js.map +1 -1
package/dist/index.d.ts +131 -11
package/dist/index.js +736 -126
package/dist/index.js.map +1 -1
package/dist/languages/hindi-words.js +2 -2
package/dist/languages/hindi-words.js.map +1 -1
package/dist/mcp/server.d.ts +30 -0
package/dist/mcp/server.js +364 -0
package/dist/mcp/server.js.map +1 -0
package/dist/mcp/stdio.d.ts +1 -0
package/dist/mcp/stdio.js +72 -0
package/dist/mcp/stdio.js.map +1 -0
package/examples-config/README.md +113 -0
package/examples-config/chat-app.json +24 -0
package/examples-config/content-moderation.json +42 -0
package/examples-config/family-friendly-max.json +33 -0
package/examples-config/high-throughput-api.json +29 -0
package/examples-config/low-latency-minimal.json +24 -0
package/examples-config/medical-professional.json +42 -0
package/examples-config/multilingual-global.json +33 -0
package/package.json +17 -7

package/dist/index.d.ts CHANGED Viewed

@@ -255,6 +255,54 @@ export interface AllProfanityOptions {
          */
         scoreThreshold?: number;
     };
+    /**
+     * Evasion-protection configuration. All passes are enabled by default and
+     * only run when their trigger characters are present in the text, so they
+     * add near-zero cost on ordinary input.
+     */
+    evasionProtection?: {
+        /**
+         * Fold unicode evasion: fullwidth forms (ｆｕｃｋ), Cyrillic/Greek
+         * homoglyphs (fυck), diacritics (fück) and invisible characters
+         * (zero-width spaces, soft hyphens) injected inside words.
+         *
+         * @default true
+         */
+        unicode?: boolean;
+        /**
+         * Collapse stretched characters ("fuuuuck" -> "fuck"). Only triggers when
+         * a run of 3+ identical characters exists.
+         *
+         * @default true
+         */
+        repeatedCharacters?: boolean;
+        /**
+         * Resolve masked characters as single-character wildcards ("f*ck",
+         * "f#ck", "f@ck"). A masked token only matches when the visible letters
+         * align exactly with a dictionary word.
+         *
+         * @default true
+         */
+        maskedCharacters?: boolean;
+        /**
+         * Detect words spelled out with uniform single separators
+         * ("f u c k", "f.u.c.k"). The joined letters must equal a dictionary
+         * word exactly, which keeps initialisms like "U S A" clean.
+         *
+         * @default true
+         */
+        separatedLetters?: boolean;
+        /**
+         * Detect unambiguous profanity stems embedded inside larger tokens
+         * ("sisfuck", "totalshitshow"). Applies only to a curated list of
+         * strong words that never occur in legitimate vocabulary, with built-in
+         * exceptions (Scunthorpe, mishit, snigger, ...), so "classic", "bass"
+         * and "Hitchcock" stay clean.
+         *
+         * @default true
+         */
+        embeddedWords?: boolean;
+    };
     /**
      * Performance optimization configuration.
      */
@@ -291,6 +339,8 @@ export interface AllProfanityOptions {
  * ```
  */
 export declare enum ProfanitySeverity {
+    /** No profanity detected */
+    NONE = 0,
     /** Mild profanity: 1 unique word or 1 total match */
     MILD = 1,
     /** Moderate profanity: 2 unique words or 2 total matches */
@@ -461,14 +511,22 @@ export declare class AllProfanity {
     private caseSensitive;
     private strictMode;
     private detectPartialWords;
+    private evasionUnicode;
+    private evasionRepeatedChars;
+    private evasionMaskedChars;
+    private evasionSeparatedLetters;
+    private evasionEmbeddedWords;
     private readonly availableLanguages;
     private readonly leetMappings;
     private readonly dynamicWords;
     private ahoCorasickAutomaton;
     private bloomFilter;
     private contextAnalyzer;
+    private contextScoreThreshold;
     private matchingAlgorithm;
     private resultCache;
+    private cacheMaxSize;
+    private leetTokensByFirstChar;
     /**
      * Creates a new AllProfanity instance with the specified configuration.
      *
@@ -514,17 +572,56 @@ export declare class AllProfanity {
      */
     private initializeAdvancedAlgorithms;
     /**
-     * Normalize leet speak to regular characters.
-     * @param text - The input text.
-     * @returns Normalized text.
+     * Normalize leet speak to regular characters, keeping a map from each
+     * normalized character back to its source range in the input text.
+     *
+     * For normalized index i, starts[i]/ends[i] give the [start, end) range in
+     * the input that produced that character. A match [s, e) in the normalized
+     * string therefore spans [starts[s], ends[e - 1]) in the input. This is what
+     * keeps positions correct when length-changing mappings like "ph" -> "f"
+     * apply.
      */
-    private normalizeLeetSpeak;
+    private normalizeLeetSpeakWithMap;
     /**
-     * Escape regex special characters in a string.
-     * @param str - The string to escape.
-     * @returns The escaped string.
+     * Fold unicode evasion tactics into ASCII with a position map: fullwidth
+     * forms, Cyrillic/Greek homoglyphs, Latin diacritics, and invisible
+     * characters injected inside words. Non-Latin scripts (Devanagari, Tamil,
+     * etc.) pass through untouched. Returns null when nothing changed.
      */
-    private escapeRegex;
+    private unicodeNormalizeWithMap;
+    /**
+     * Collapse runs of repeated characters ("fuuuuck" -> "fuck") with a
+     * position map. Only triggers when a run of 3+ identical characters
+     * exists, so ordinary doubled letters never pay for this pass.
+     * Returns null when not triggered.
+     */
+    private collapseRepeatsWithMap;
+    /**
+     * Build the list of (text, position-map) variants to scan: the base text
+     * plus unicode-folded, leet-normalized and repeat-collapsed variants, each
+     * included only when its normalization actually changed something.
+     */
+    private buildScanPasses;
+    /**
+     * Find dictionary words hidden behind masked characters ("f*ck", "f#ck").
+     * Each mask matches exactly one character and the token's visible letters
+     * must align with a dictionary word, so "c#" or "5% off" never flag.
+     */
+    private findMaskedMatches;
+    /**
+     * Find words spelled out with a uniform single separator ("f u c k",
+     * "f.u.c.k"). The joined letters must equal a dictionary word exactly:
+     * runs like "U S A" or letters inside spelled-out sentences never flag.
+     */
+    private findSeparatedMatches;
+    /**
+     * Find unambiguous profanity stems embedded inside larger tokens
+     * ("sisfuck", "totalshitshow"). Only stems from EMBEDDED_STRONG_STEMS that
+     * are currently in the dictionary are considered, and tokens listed in
+     * EMBEDDED_SAFE_WORDS or the whitelist never flag. The whole containing
+     * token is reported so cleaning masks all of it.
+     */
+    private findEmbeddedMatches;
     /**
      * Check if a match is bounded by word boundaries (strict mode).
      * @param text - The text.
@@ -548,6 +645,11 @@ export declare class AllProfanity {
      * @returns True if whitelisted, false otherwise.
      */
     private isWhitelistedMatch;
+    /**
+     * In partial-word mode, check whether the word CONTAINING the match is
+     * whitelisted: with "classic" whitelisted, the embedded "ass" must not flag.
+     */
+    private isWhitelistedContainingWord;
     /**
      * Remove overlapping matches, keeping only the longest at each start position.
      * @param matches - Array of match results.
@@ -559,13 +661,24 @@ export declare class AllProfanity {
      */
     private findMatchesWithAhoCorasick;
     /**
-     * Hybrid approach: Aho-Corasick for fast matching, Bloom Filter for validation
+     * Check whether the Bloom Filter can quickly rule out any profanity in the
+     * text. Only safe for ASCII whole-word matching: partial matches and
+     * non-ASCII scripts can match inside tokens, so they bypass the prefilter.
+     */
+    private bloomQuickReject;
+    /**
+     * Hybrid approach: Bloom Filter for quick rejection, Aho-Corasick for matching
      */
     private findMatchesHybrid;
     /**
      * Apply context analysis to filter false positives
      */
     private applyContextAnalysis;
+    /**
+     * Drop all cached detection results. Must be called whenever the word lists
+     * or any option that affects detection output changes.
+     */
+    private invalidateCache;
     /**
      * Detects profanity in the provided text and returns comprehensive analysis.
      *
@@ -661,6 +774,11 @@ export declare class AllProfanity {
      * @see {@link detect} for detailed profanity analysis
      */
     check(text: string): boolean;
+    /**
+     * Trie scan that stops at the first match surviving the whole-word,
+     * whitelist and boundary checks. Powers the fast path in check().
+     */
+    private hasMatchInPass;
     /**
      * Cleans text by replacing profanity with a placeholder character.
      *
@@ -1026,9 +1144,10 @@ export declare class AllProfanity {
      */
     getConfig(): Partial<AllProfanityOptions>;
     /**
-     * Rebuild the profanity trie from loaded dictionaries and dynamic words.
+     * Rebuild all matching structures (trie, Aho-Corasick automaton, Bloom
+     * Filter) from loaded dictionaries and dynamic words.
      */
-    private rebuildTrie;
+    private rebuildIndexes;
     /**
      * Update configuration options for the profanity filter.
      * @param options - Partial configuration object.
@@ -1043,6 +1162,7 @@ export declare class AllProfanity {
 }
 /**
  * Singleton instance of AllProfanity with default configuration.
+ * Silent so that importing the library never writes to the console.
  */
 declare const allProfanity: AllProfanity;
 export default allProfanity;