allprofanity 2.2.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -255,6 +255,54 @@ export interface AllProfanityOptions {
255
255
  */
256
256
  scoreThreshold?: number;
257
257
  };
258
+ /**
259
+ * Evasion-protection configuration. All passes are enabled by default and
260
+ * only run when their trigger characters are present in the text, so they
261
+ * add near-zero cost on ordinary input.
262
+ */
263
+ evasionProtection?: {
264
+ /**
265
+ * Fold unicode evasion: fullwidth forms (fuck), Cyrillic/Greek
266
+ * homoglyphs (fυck), diacritics (fück) and invisible characters
267
+ * (zero-width spaces, soft hyphens) injected inside words.
268
+ *
269
+ * @default true
270
+ */
271
+ unicode?: boolean;
272
+ /**
273
+ * Collapse stretched characters ("fuuuuck" -> "fuck"). Only triggers when
274
+ * a run of 3+ identical characters exists.
275
+ *
276
+ * @default true
277
+ */
278
+ repeatedCharacters?: boolean;
279
+ /**
280
+ * Resolve masked characters as single-character wildcards ("f*ck",
281
+ * "f#ck", "f@ck"). A masked token only matches when the visible letters
282
+ * align exactly with a dictionary word.
283
+ *
284
+ * @default true
285
+ */
286
+ maskedCharacters?: boolean;
287
+ /**
288
+ * Detect words spelled out with uniform single separators
289
+ * ("f u c k", "f.u.c.k"). The joined letters must equal a dictionary
290
+ * word exactly, which keeps initialisms like "U S A" clean.
291
+ *
292
+ * @default true
293
+ */
294
+ separatedLetters?: boolean;
295
+ /**
296
+ * Detect unambiguous profanity stems embedded inside larger tokens
297
+ * ("sisfuck", "totalshitshow"). Applies only to a curated list of
298
+ * strong words that never occur in legitimate vocabulary, with built-in
299
+ * exceptions (Scunthorpe, mishit, snigger, ...), so "classic", "bass"
300
+ * and "Hitchcock" stay clean.
301
+ *
302
+ * @default true
303
+ */
304
+ embeddedWords?: boolean;
305
+ };
258
306
  /**
259
307
  * Performance optimization configuration.
260
308
  */
@@ -291,6 +339,8 @@ export interface AllProfanityOptions {
291
339
  * ```
292
340
  */
293
341
  export declare enum ProfanitySeverity {
342
+ /** No profanity detected */
343
+ NONE = 0,
294
344
  /** Mild profanity: 1 unique word or 1 total match */
295
345
  MILD = 1,
296
346
  /** Moderate profanity: 2 unique words or 2 total matches */
@@ -461,14 +511,22 @@ export declare class AllProfanity {
461
511
  private caseSensitive;
462
512
  private strictMode;
463
513
  private detectPartialWords;
514
+ private evasionUnicode;
515
+ private evasionRepeatedChars;
516
+ private evasionMaskedChars;
517
+ private evasionSeparatedLetters;
518
+ private evasionEmbeddedWords;
464
519
  private readonly availableLanguages;
465
520
  private readonly leetMappings;
466
521
  private readonly dynamicWords;
467
522
  private ahoCorasickAutomaton;
468
523
  private bloomFilter;
469
524
  private contextAnalyzer;
525
+ private contextScoreThreshold;
470
526
  private matchingAlgorithm;
471
527
  private resultCache;
528
+ private cacheMaxSize;
529
+ private leetTokensByFirstChar;
472
530
  /**
473
531
  * Creates a new AllProfanity instance with the specified configuration.
474
532
  *
@@ -514,17 +572,56 @@ export declare class AllProfanity {
514
572
  */
515
573
  private initializeAdvancedAlgorithms;
516
574
  /**
517
- * Normalize leet speak to regular characters.
518
- * @param text - The input text.
519
- * @returns Normalized text.
575
+ * Normalize leet speak to regular characters, keeping a map from each
576
+ * normalized character back to its source range in the input text.
577
+ *
578
+ * For normalized index i, starts[i]/ends[i] give the [start, end) range in
579
+ * the input that produced that character. A match [s, e) in the normalized
580
+ * string therefore spans [starts[s], ends[e - 1]) in the input. This is what
581
+ * keeps positions correct when length-changing mappings like "ph" -> "f"
582
+ * apply.
520
583
  */
521
- private normalizeLeetSpeak;
584
+ private normalizeLeetSpeakWithMap;
522
585
  /**
523
- * Escape regex special characters in a string.
524
- * @param str - The string to escape.
525
- * @returns The escaped string.
586
+ * Fold unicode evasion tactics into ASCII with a position map: fullwidth
587
+ * forms, Cyrillic/Greek homoglyphs, Latin diacritics, and invisible
588
+ * characters injected inside words. Non-Latin scripts (Devanagari, Tamil,
589
+ * etc.) pass through untouched. Returns null when nothing changed.
526
590
  */
527
- private escapeRegex;
591
+ private unicodeNormalizeWithMap;
592
+ /**
593
+ * Collapse runs of repeated characters ("fuuuuck" -> "fuck") with a
594
+ * position map. Only triggers when a run of 3+ identical characters
595
+ * exists, so ordinary doubled letters never pay for this pass.
596
+ * Returns null when not triggered.
597
+ */
598
+ private collapseRepeatsWithMap;
599
+ /**
600
+ * Build the list of (text, position-map) variants to scan: the base text
601
+ * plus unicode-folded, leet-normalized and repeat-collapsed variants, each
602
+ * included only when its normalization actually changed something.
603
+ */
604
+ private buildScanPasses;
605
+ /**
606
+ * Find dictionary words hidden behind masked characters ("f*ck", "f#ck").
607
+ * Each mask matches exactly one character and the token's visible letters
608
+ * must align with a dictionary word, so "c#" or "5% off" never flag.
609
+ */
610
+ private findMaskedMatches;
611
+ /**
612
+ * Find words spelled out with a uniform single separator ("f u c k",
613
+ * "f.u.c.k"). The joined letters must equal a dictionary word exactly:
614
+ * runs like "U S A" or letters inside spelled-out sentences never flag.
615
+ */
616
+ private findSeparatedMatches;
617
+ /**
618
+ * Find unambiguous profanity stems embedded inside larger tokens
619
+ * ("sisfuck", "totalshitshow"). Only stems from EMBEDDED_STRONG_STEMS that
620
+ * are currently in the dictionary are considered, and tokens listed in
621
+ * EMBEDDED_SAFE_WORDS or the whitelist never flag. The whole containing
622
+ * token is reported so cleaning masks all of it.
623
+ */
624
+ private findEmbeddedMatches;
528
625
  /**
529
626
  * Check if a match is bounded by word boundaries (strict mode).
530
627
  * @param text - The text.
@@ -548,6 +645,11 @@ export declare class AllProfanity {
548
645
  * @returns True if whitelisted, false otherwise.
549
646
  */
550
647
  private isWhitelistedMatch;
648
+ /**
649
+ * In partial-word mode, check whether the word CONTAINING the match is
650
+ * whitelisted: with "classic" whitelisted, the embedded "ass" must not flag.
651
+ */
652
+ private isWhitelistedContainingWord;
551
653
  /**
552
654
  * Remove overlapping matches, keeping only the longest at each start position.
553
655
  * @param matches - Array of match results.
@@ -559,13 +661,24 @@ export declare class AllProfanity {
559
661
  */
560
662
  private findMatchesWithAhoCorasick;
561
663
  /**
562
- * Hybrid approach: Aho-Corasick for fast matching, Bloom Filter for validation
664
+ * Check whether the Bloom Filter can quickly rule out any profanity in the
665
+ * text. Only safe for ASCII whole-word matching: partial matches and
666
+ * non-ASCII scripts can match inside tokens, so they bypass the prefilter.
667
+ */
668
+ private bloomQuickReject;
669
+ /**
670
+ * Hybrid approach: Bloom Filter for quick rejection, Aho-Corasick for matching
563
671
  */
564
672
  private findMatchesHybrid;
565
673
  /**
566
674
  * Apply context analysis to filter false positives
567
675
  */
568
676
  private applyContextAnalysis;
677
+ /**
678
+ * Drop all cached detection results. Must be called whenever the word lists
679
+ * or any option that affects detection output changes.
680
+ */
681
+ private invalidateCache;
569
682
  /**
570
683
  * Detects profanity in the provided text and returns comprehensive analysis.
571
684
  *
@@ -661,6 +774,11 @@ export declare class AllProfanity {
661
774
  * @see {@link detect} for detailed profanity analysis
662
775
  */
663
776
  check(text: string): boolean;
777
+ /**
778
+ * Trie scan that stops at the first match surviving the whole-word,
779
+ * whitelist and boundary checks. Powers the fast path in check().
780
+ */
781
+ private hasMatchInPass;
664
782
  /**
665
783
  * Cleans text by replacing profanity with a placeholder character.
666
784
  *
@@ -1026,9 +1144,10 @@ export declare class AllProfanity {
1026
1144
  */
1027
1145
  getConfig(): Partial<AllProfanityOptions>;
1028
1146
  /**
1029
- * Rebuild the profanity trie from loaded dictionaries and dynamic words.
1147
+ * Rebuild all matching structures (trie, Aho-Corasick automaton, Bloom
1148
+ * Filter) from loaded dictionaries and dynamic words.
1030
1149
  */
1031
- private rebuildTrie;
1150
+ private rebuildIndexes;
1032
1151
  /**
1033
1152
  * Update configuration options for the profanity filter.
1034
1153
  * @param options - Partial configuration object.
@@ -1043,6 +1162,7 @@ export declare class AllProfanity {
1043
1162
  }
1044
1163
  /**
1045
1164
  * Singleton instance of AllProfanity with default configuration.
1165
+ * Silent so that importing the library never writes to the console.
1046
1166
  */
1047
1167
  declare const allProfanity: AllProfanity;
1048
1168
  export default allProfanity;