allprofanity 2.2.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -118,6 +118,8 @@ class SilentLogger {
118
118
  */
119
119
  export var ProfanitySeverity;
120
120
  (function (ProfanitySeverity) {
121
+ /** No profanity detected */
122
+ ProfanitySeverity[ProfanitySeverity["NONE"] = 0] = "NONE";
121
123
  /** Mild profanity: 1 unique word or 1 total match */
122
124
  ProfanitySeverity[ProfanitySeverity["MILD"] = 1] = "MILD";
123
125
  /** Moderate profanity: 2 unique words or 2 total matches */
@@ -127,6 +129,99 @@ export var ProfanitySeverity;
127
129
  /** Extreme profanity: 4+ unique words or 5+ total matches */
128
130
  ProfanitySeverity[ProfanitySeverity["EXTREME"] = 4] = "EXTREME";
129
131
  })(ProfanitySeverity = ProfanitySeverity || (ProfanitySeverity = {}));
132
+ /**
133
+ * Compose two position maps: `inner` maps its normalized text back to the
134
+ * text that `outer` normalized, and the result maps `inner.normalized`
135
+ * directly back to the original input.
136
+ *
137
+ * @internal
138
+ */
139
+ function composeMaps(outer, inner) {
140
+ const starts = new Array(inner.starts.length);
141
+ const ends = new Array(inner.ends.length);
142
+ for (let i = 0; i < inner.starts.length; i++) {
143
+ starts[i] = outer.starts[inner.starts[i]];
144
+ ends[i] = outer.ends[inner.ends[i] - 1];
145
+ }
146
+ return { normalized: inner.normalized, starts, ends };
147
+ }
148
+ /**
149
+ * Common homoglyphs (visually identical/near-identical non-Latin characters)
150
+ * folded to their ASCII look-alikes for evasion-resistant matching.
151
+ *
152
+ * @internal
153
+ */
154
+ const CONFUSABLES = new Map([
155
+ // Cyrillic
156
+ ["а", "a"], ["в", "b"], ["е", "e"], ["к", "k"], ["м", "m"], ["н", "h"],
157
+ ["о", "o"], ["р", "p"], ["с", "c"], ["т", "t"], ["у", "y"], ["х", "x"],
158
+ ["і", "i"], ["ј", "j"], ["ѕ", "s"], ["ԁ", "d"], ["ԛ", "q"], ["ԝ", "w"],
159
+ // Greek
160
+ ["α", "a"], ["β", "b"], ["γ", "y"], ["ε", "e"], ["η", "n"], ["ι", "i"],
161
+ ["κ", "k"], ["μ", "m"], ["ν", "v"], ["ο", "o"], ["ρ", "p"], ["σ", "s"],
162
+ ["τ", "t"], ["υ", "u"], ["χ", "x"], ["ω", "w"],
163
+ ]);
164
+ /**
165
+ * Invisible characters commonly injected to break up profane words.
166
+ *
167
+ * @internal
168
+ */
169
+ const INVISIBLE_CHARS = new Set([
170
+ "\u200B",
171
+ "\u200C",
172
+ "\u200D",
173
+ "\uFEFF",
174
+ "\u00AD",
175
+ "\u2060",
176
+ "\u180E", // Mongolian vowel separator
177
+ ]);
178
+ /** Symbols treated as single-character wildcards in masked words like "f*ck". @internal */
179
+ const MASK_CHARS = new Set(["*", "#", "@", "$", "%"]);
180
+ /**
181
+ * Unambiguous profanity stems that are flagged even when embedded inside a
182
+ * larger token ("sisfuck", "totalshitshow"). Only words that essentially
183
+ * never occur inside legitimate vocabulary belong here — ambiguous stems
184
+ * like "ass" or "cock" (class, bass, Hitchcock, peacock) must stay
185
+ * whole-word matched.
186
+ *
187
+ * @internal
188
+ */
189
+ const EMBEDDED_STRONG_STEMS = [
190
+ "fuck",
191
+ "shit",
192
+ "bitch",
193
+ "cunt",
194
+ "whore",
195
+ "nigger",
196
+ "nigga",
197
+ "faggot",
198
+ "wanker",
199
+ "chutiya",
200
+ "bhenchod",
201
+ "behenchod",
202
+ "madarchod",
203
+ "bhosdi",
204
+ ];
205
+ /**
206
+ * Legitimate words that contain a strong stem and must never be flagged by
207
+ * the embedded pass (the user whitelist extends this set).
208
+ *
209
+ * @internal
210
+ */
211
+ const EMBEDDED_SAFE_WORDS = new Set([
212
+ "scunthorpe",
213
+ "mishit",
214
+ "mishits",
215
+ "mishitting",
216
+ "shitake",
217
+ "shitakes",
218
+ "matsushita",
219
+ "takeshita",
220
+ "snigger",
221
+ "sniggers",
222
+ "sniggered",
223
+ "sniggering",
224
+ ]);
130
225
  /**
131
226
  * Validates that an input is a non-empty string.
132
227
  *
@@ -168,13 +263,19 @@ function validateString(input, paramName) {
168
263
  * // Logs warning: "Skipping non-string item in words: 123"
169
264
  * ```
170
265
  */
171
- function validateStringArray(input, paramName) {
266
+ function validateStringArray(input, paramName, logger) {
172
267
  if (!Array.isArray(input)) {
173
268
  throw new TypeError(`${paramName} must be an array`);
174
269
  }
175
270
  return input.filter((item) => {
176
271
  if (typeof item !== "string") {
177
- console.warn(`Skipping non-string item in ${paramName}: ${item}`);
272
+ const message = `Skipping non-string item in ${paramName}: ${item}`;
273
+ if (logger) {
274
+ logger.warn(message);
275
+ }
276
+ else {
277
+ console.warn(message);
278
+ }
178
279
  return false;
179
280
  }
180
281
  return item.trim().length > 0;
@@ -321,26 +422,42 @@ class TrieNode {
321
422
  current = nextNode;
322
423
  pos++;
323
424
  if (current.isEndOfWord) {
324
- if (!allowPartial) {
325
- const wordStart = startPos;
326
- const wordEnd = pos;
327
- matches.push({
328
- word: current.word,
329
- start: wordStart - startPos,
330
- end: wordEnd - startPos,
331
- });
332
- }
333
- else {
334
- matches.push({
335
- word: current.word,
336
- start: 0,
337
- end: pos - startPos,
338
- });
339
- }
425
+ matches.push({
426
+ word: current.word,
427
+ start: 0,
428
+ end: pos - startPos,
429
+ });
340
430
  }
341
431
  }
342
432
  return matches;
343
433
  }
434
+ /**
435
+ * Find a stored word matching the token, where mask characters match any
436
+ * single character. The token must align with a complete word exactly.
437
+ *
438
+ * @param token - The token to resolve (e.g. "f*ck")
439
+ * @param maskChars - Characters that act as single-character wildcards
440
+ * @returns The first matching dictionary word, or null
441
+ */
442
+ findWildcardMatch(token, maskChars) {
443
+ return this.wildcardHelper(token, 0, maskChars);
444
+ }
445
+ wildcardHelper(token, index, maskChars) {
446
+ if (index === token.length) {
447
+ return this.isEndOfWord ? this.word : null;
448
+ }
449
+ const char = token[index];
450
+ if (maskChars.has(char)) {
451
+ for (const child of this.children.values()) {
452
+ const result = child.wildcardHelper(token, index + 1, maskChars);
453
+ if (result)
454
+ return result;
455
+ }
456
+ return null;
457
+ }
458
+ const child = this.children.get(char);
459
+ return child ? child.wildcardHelper(token, index + 1, maskChars) : null;
460
+ }
344
461
  /**
345
462
  * Clears all words from the trie, resetting it to empty state.
346
463
  *
@@ -501,7 +618,7 @@ export class AllProfanity {
501
618
  * @see {@link AllProfanityOptions} for all available configuration options
502
619
  */
503
620
  constructor(options) {
504
- var _a, _b, _c, _d, _e;
621
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p, _q, _r;
505
622
  this.profanityTrie = new TrieNode();
506
623
  this.whitelistSet = new Set();
507
624
  this.loadedLanguages = new Set();
@@ -510,6 +627,11 @@ export class AllProfanity {
510
627
  this.caseSensitive = false;
511
628
  this.strictMode = false;
512
629
  this.detectPartialWords = false;
630
+ this.evasionUnicode = true;
631
+ this.evasionRepeatedChars = true;
632
+ this.evasionMaskedChars = true;
633
+ this.evasionSeparatedLetters = true;
634
+ this.evasionEmbeddedWords = true;
513
635
  this.availableLanguages = {
514
636
  english: englishBadWords || [],
515
637
  hindi: hindiBadWords || [],
@@ -547,7 +669,6 @@ export class AllProfanity {
547
669
  ["¿", "j"],
548
670
  ["|<", "k"],
549
671
  ["1<", "k"],
550
- ["7", "l"],
551
672
  ["|\\/|", "m"],
552
673
  ["/\\/\\", "m"],
553
674
  ["|\\|", "n"],
@@ -561,13 +682,11 @@ export class AllProfanity {
561
682
  ["12", "r"],
562
683
  ["5", "s"],
563
684
  ["$", "s"],
564
- ["z", "s"],
565
685
  ["7", "t"],
566
686
  ["+", "t"],
567
687
  ["†", "t"],
568
688
  ["|_|", "u"],
569
689
  ["(_)", "u"],
570
- ["v", "u"],
571
690
  ["\\/", "v"],
572
691
  ["|/", "v"],
573
692
  ["\\/\\/", "w"],
@@ -575,7 +694,6 @@ export class AllProfanity {
575
694
  ["><", "x"],
576
695
  ["}{", "x"],
577
696
  ["`/", "y"],
578
- ["j", "y"],
579
697
  ["2", "z"],
580
698
  ["7_", "z"],
581
699
  ]);
@@ -584,8 +702,11 @@ export class AllProfanity {
584
702
  this.ahoCorasickAutomaton = null;
585
703
  this.bloomFilter = null;
586
704
  this.contextAnalyzer = null;
705
+ this.contextScoreThreshold = 0.5;
587
706
  this.matchingAlgorithm = "trie";
588
707
  this.resultCache = null;
708
+ this.cacheMaxSize = 1000;
709
+ this.leetTokensByFirstChar = null;
589
710
  // Use silent logger if silent mode is enabled, otherwise use provided logger or console logger
590
711
  this.logger = (options === null || options === void 0 ? void 0 : options.logger) || ((options === null || options === void 0 ? void 0 : options.silent) ? new SilentLogger() : new ConsoleLogger());
591
712
  if ((options === null || options === void 0 ? void 0 : options.defaultPlaceholder) !== undefined) {
@@ -595,6 +716,15 @@ export class AllProfanity {
595
716
  this.caseSensitive = (_b = options === null || options === void 0 ? void 0 : options.caseSensitive) !== null && _b !== void 0 ? _b : false;
596
717
  this.strictMode = (_c = options === null || options === void 0 ? void 0 : options.strictMode) !== null && _c !== void 0 ? _c : false;
597
718
  this.detectPartialWords = (_d = options === null || options === void 0 ? void 0 : options.detectPartialWords) !== null && _d !== void 0 ? _d : false;
719
+ this.evasionUnicode = (_f = (_e = options === null || options === void 0 ? void 0 : options.evasionProtection) === null || _e === void 0 ? void 0 : _e.unicode) !== null && _f !== void 0 ? _f : true;
720
+ this.evasionRepeatedChars =
721
+ (_h = (_g = options === null || options === void 0 ? void 0 : options.evasionProtection) === null || _g === void 0 ? void 0 : _g.repeatedCharacters) !== null && _h !== void 0 ? _h : true;
722
+ this.evasionMaskedChars =
723
+ (_k = (_j = options === null || options === void 0 ? void 0 : options.evasionProtection) === null || _j === void 0 ? void 0 : _j.maskedCharacters) !== null && _k !== void 0 ? _k : true;
724
+ this.evasionSeparatedLetters =
725
+ (_m = (_l = options === null || options === void 0 ? void 0 : options.evasionProtection) === null || _l === void 0 ? void 0 : _l.separatedLetters) !== null && _m !== void 0 ? _m : true;
726
+ this.evasionEmbeddedWords =
727
+ (_p = (_o = options === null || options === void 0 ? void 0 : options.evasionProtection) === null || _o === void 0 ? void 0 : _o.embeddedWords) !== null && _p !== void 0 ? _p : true;
598
728
  if (options === null || options === void 0 ? void 0 : options.whitelistWords) {
599
729
  this.addToWhitelist(options.whitelistWords);
600
730
  }
@@ -603,7 +733,7 @@ export class AllProfanity {
603
733
  this.initializeAdvancedAlgorithms(options);
604
734
  this.loadLanguage("english");
605
735
  this.loadLanguage("hindi");
606
- if ((_e = options === null || options === void 0 ? void 0 : options.languages) === null || _e === void 0 ? void 0 : _e.length) {
736
+ if ((_q = options === null || options === void 0 ? void 0 : options.languages) === null || _q === void 0 ? void 0 : _q.length) {
607
737
  options.languages.forEach((lang) => this.loadLanguage(lang));
608
738
  }
609
739
  if (options === null || options === void 0 ? void 0 : options.customDictionaries) {
@@ -611,12 +741,15 @@ export class AllProfanity {
611
741
  this.loadCustomDictionary(name, words);
612
742
  });
613
743
  }
744
+ if (((_r = options === null || options === void 0 ? void 0 : options.ahoCorasick) === null || _r === void 0 ? void 0 : _r.prebuild) && this.ahoCorasickAutomaton) {
745
+ this.ahoCorasickAutomaton.build();
746
+ }
614
747
  }
615
748
  /**
616
749
  * Initialize advanced algorithms based on configuration
617
750
  */
618
751
  initializeAdvancedAlgorithms(options) {
619
- var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m;
752
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o;
620
753
  // Set matching algorithm
621
754
  if ((_a = options === null || options === void 0 ? void 0 : options.algorithm) === null || _a === void 0 ? void 0 : _a.matching) {
622
755
  this.matchingAlgorithm = options.algorithm.matching;
@@ -649,38 +782,362 @@ export class AllProfanity {
649
782
  if ((_l = options === null || options === void 0 ? void 0 : options.contextAnalysis) === null || _l === void 0 ? void 0 : _l.contextWindow) {
650
783
  this.contextAnalyzer.setContextWindow(options.contextAnalysis.contextWindow);
651
784
  }
785
+ if (((_m = options === null || options === void 0 ? void 0 : options.contextAnalysis) === null || _m === void 0 ? void 0 : _m.scoreThreshold) !== undefined) {
786
+ this.contextScoreThreshold = options.contextAnalysis.scoreThreshold;
787
+ }
652
788
  this.logger.info(`Context Analyzer initialized for languages: ${contextLanguages.join(", ")}`);
653
789
  }
654
790
  // Initialize result cache if enabled
655
- if ((_m = options === null || options === void 0 ? void 0 : options.performance) === null || _m === void 0 ? void 0 : _m.enableCaching) {
656
- const cacheSize = options.performance.cacheSize || 1000;
791
+ if ((_o = options === null || options === void 0 ? void 0 : options.performance) === null || _o === void 0 ? void 0 : _o.enableCaching) {
792
+ this.cacheMaxSize = options.performance.cacheSize || 1000;
657
793
  this.resultCache = new Map();
658
- this.logger.info(`Result caching enabled with size limit: ${cacheSize}`);
794
+ this.logger.info(`Result caching enabled with size limit: ${this.cacheMaxSize}`);
659
795
  }
660
796
  }
661
797
  /**
662
- * Normalize leet speak to regular characters.
663
- * @param text - The input text.
664
- * @returns Normalized text.
798
+ * Normalize leet speak to regular characters, keeping a map from each
799
+ * normalized character back to its source range in the input text.
800
+ *
801
+ * For normalized index i, starts[i]/ends[i] give the [start, end) range in
802
+ * the input that produced that character. A match [s, e) in the normalized
803
+ * string therefore spans [starts[s], ends[e - 1]) in the input. This is what
804
+ * keeps positions correct when length-changing mappings like "ph" -> "f"
805
+ * apply.
665
806
  */
666
- normalizeLeetSpeak(text) {
667
- if (!this.enableLeetSpeak)
668
- return text;
669
- let normalized = text.toLowerCase();
670
- const sortedMappings = Array.from(this.leetMappings.entries()).sort(([leetA], [leetB]) => leetB.length - leetA.length);
671
- for (const [leet, normal] of sortedMappings) {
672
- const regex = new RegExp(this.escapeRegex(leet), "g");
673
- normalized = normalized.replace(regex, normal);
807
+ normalizeLeetSpeakWithMap(text) {
808
+ // Bucket tokens by first character so each position costs one Map lookup
809
+ // instead of a scan over every mapping (longest token first per bucket).
810
+ if (!this.leetTokensByFirstChar) {
811
+ this.leetTokensByFirstChar = new Map();
812
+ for (const entry of this.leetMappings.entries()) {
813
+ const bucket = this.leetTokensByFirstChar.get(entry[0][0]);
814
+ if (bucket) {
815
+ bucket.push(entry);
816
+ }
817
+ else {
818
+ this.leetTokensByFirstChar.set(entry[0][0], [entry]);
819
+ }
820
+ }
821
+ for (const bucket of this.leetTokensByFirstChar.values()) {
822
+ bucket.sort(([leetA], [leetB]) => leetB.length - leetA.length);
823
+ }
824
+ }
825
+ // Fast path: most text contains no leet characters at all. Scan for the
826
+ // first applicable mapping before allocating the position-map arrays.
827
+ let hasLeet = false;
828
+ for (let j = 0; j < text.length && !hasLeet; j++) {
829
+ const bucket = this.leetTokensByFirstChar.get(text[j]);
830
+ if (bucket) {
831
+ for (const [leet] of bucket) {
832
+ if (leet.length === 1 || text.startsWith(leet, j)) {
833
+ hasLeet = true;
834
+ break;
835
+ }
836
+ }
837
+ }
838
+ }
839
+ if (!hasLeet) {
840
+ return { normalized: text, starts: [], ends: [] };
841
+ }
842
+ const parts = [];
843
+ const starts = [];
844
+ const ends = [];
845
+ let i = 0;
846
+ while (i < text.length) {
847
+ let consumed = 0;
848
+ let replacement = "";
849
+ const bucket = this.leetTokensByFirstChar.get(text[i]);
850
+ if (bucket) {
851
+ for (const [leet, normal] of bucket) {
852
+ if (leet.length === 1 || text.startsWith(leet, i)) {
853
+ consumed = leet.length;
854
+ replacement = normal;
855
+ break;
856
+ }
857
+ }
858
+ }
859
+ if (consumed === 0) {
860
+ consumed = 1;
861
+ replacement = text[i];
862
+ }
863
+ for (const char of replacement) {
864
+ parts.push(char);
865
+ starts.push(i);
866
+ ends.push(i + consumed);
867
+ }
868
+ i += consumed;
869
+ }
870
+ return { normalized: parts.join(""), starts, ends };
871
+ }
872
+ /**
873
+ * Fold unicode evasion tactics into ASCII with a position map: fullwidth
874
+ * forms, Cyrillic/Greek homoglyphs, Latin diacritics, and invisible
875
+ * characters injected inside words. Non-Latin scripts (Devanagari, Tamil,
876
+ * etc.) pass through untouched. Returns null when nothing changed.
877
+ */
878
+ unicodeNormalizeWithMap(text) {
879
+ // Fast path: pure ASCII text needs no folding
880
+ let needsScan = false;
881
+ for (let j = 0; j < text.length; j++) {
882
+ if (text.charCodeAt(j) > 127) {
883
+ needsScan = true;
884
+ break;
885
+ }
886
+ }
887
+ if (!needsScan)
888
+ return null;
889
+ const parts = [];
890
+ const starts = [];
891
+ const ends = [];
892
+ let changed = false;
893
+ for (let i = 0; i < text.length; i++) {
894
+ const char = text[i];
895
+ const code = text.charCodeAt(i);
896
+ if (code < 128) {
897
+ parts.push(char);
898
+ starts.push(i);
899
+ ends.push(i + 1);
900
+ continue;
901
+ }
902
+ if (INVISIBLE_CHARS.has(char)) {
903
+ changed = true;
904
+ continue;
905
+ }
906
+ // Fullwidth ASCII block (! U+FF01 .. ~ U+FF5E)
907
+ if (code >= 0xff01 && code <= 0xff5e) {
908
+ parts.push(String.fromCharCode(code - 0xfee0));
909
+ starts.push(i);
910
+ ends.push(i + 1);
911
+ changed = true;
912
+ continue;
913
+ }
914
+ const confusable = CONFUSABLES.get(char);
915
+ if (confusable) {
916
+ parts.push(confusable);
917
+ starts.push(i);
918
+ ends.push(i + 1);
919
+ changed = true;
920
+ continue;
921
+ }
922
+ // Bare combining marks (covers decomposed input like "u" + U+0308)
923
+ if (code >= 0x0300 && code <= 0x036f) {
924
+ changed = true;
925
+ continue;
926
+ }
927
+ // Latin letters with diacritics: decompose and strip the marks.
928
+ // Limited to the Latin blocks so other scripts keep their composed forms.
929
+ if (code >= 0x00c0 && code < 0x0250) {
930
+ for (const piece of char.normalize("NFD")) {
931
+ const pieceCode = piece.charCodeAt(0);
932
+ if (pieceCode >= 0x0300 && pieceCode <= 0x036f) {
933
+ changed = true;
934
+ continue;
935
+ }
936
+ const folded = this.caseSensitive ? piece : piece.toLowerCase();
937
+ parts.push(folded);
938
+ starts.push(i);
939
+ ends.push(i + 1);
940
+ if (folded !== char)
941
+ changed = true;
942
+ }
943
+ continue;
944
+ }
945
+ parts.push(char);
946
+ starts.push(i);
947
+ ends.push(i + 1);
948
+ }
949
+ if (!changed)
950
+ return null;
951
+ return { normalized: parts.join(""), starts, ends };
952
+ }
953
+ /**
954
+ * Collapse runs of repeated characters ("fuuuuck" -> "fuck") with a
955
+ * position map. Only triggers when a run of 3+ identical characters
956
+ * exists, so ordinary doubled letters never pay for this pass.
957
+ * Returns null when not triggered.
958
+ */
959
+ collapseRepeatsWithMap(text) {
960
+ let triggered = false;
961
+ for (let j = 2; j < text.length; j++) {
962
+ if (text[j] === text[j - 1] && text[j] === text[j - 2]) {
963
+ triggered = true;
964
+ break;
965
+ }
966
+ }
967
+ if (!triggered)
968
+ return null;
969
+ const parts = [];
970
+ const starts = [];
971
+ const ends = [];
972
+ let i = 0;
973
+ while (i < text.length) {
974
+ let runEnd = i + 1;
975
+ while (runEnd < text.length && text[runEnd] === text[i]) {
976
+ runEnd++;
977
+ }
978
+ parts.push(text[i]);
979
+ starts.push(i);
980
+ ends.push(runEnd);
981
+ i = runEnd;
982
+ }
983
+ return { normalized: parts.join(""), starts, ends };
984
+ }
985
+ /**
986
+ * Build the list of (text, position-map) variants to scan: the base text
987
+ * plus unicode-folded, leet-normalized and repeat-collapsed variants, each
988
+ * included only when its normalization actually changed something.
989
+ */
990
+ buildScanPasses(normalizedText) {
991
+ const passes = [
992
+ { text: normalizedText },
993
+ ];
994
+ let workText = normalizedText;
995
+ let workMap;
996
+ if (this.evasionUnicode) {
997
+ const uni = this.unicodeNormalizeWithMap(normalizedText);
998
+ if (uni) {
999
+ passes.push({ text: uni.normalized, posMap: uni });
1000
+ workText = uni.normalized;
1001
+ workMap = uni;
1002
+ }
1003
+ }
1004
+ if (this.enableLeetSpeak) {
1005
+ const leet = this.normalizeLeetSpeakWithMap(workText);
1006
+ if (leet.normalized !== workText) {
1007
+ passes.push({
1008
+ text: leet.normalized,
1009
+ posMap: workMap ? composeMaps(workMap, leet) : leet,
1010
+ });
1011
+ }
1012
+ }
1013
+ if (this.evasionRepeatedChars) {
1014
+ const collapsed = this.collapseRepeatsWithMap(workText);
1015
+ if (collapsed) {
1016
+ passes.push({
1017
+ text: collapsed.normalized,
1018
+ posMap: workMap ? composeMaps(workMap, collapsed) : collapsed,
1019
+ });
1020
+ }
1021
+ }
1022
+ return passes;
1023
+ }
1024
+ /**
1025
+ * Find dictionary words hidden behind masked characters ("f*ck", "f#ck").
1026
+ * Each mask matches exactly one character and the token's visible letters
1027
+ * must align with a dictionary word, so "c#" or "5% off" never flag.
1028
+ */
1029
+ findMaskedMatches(searchText, originalText) {
1030
+ const results = [];
1031
+ if (!/[*#@$%]/.test(searchText))
1032
+ return results;
1033
+ const tokenRegex = /[\p{L}*#@$%]+/gu;
1034
+ let tokenMatch;
1035
+ while ((tokenMatch = tokenRegex.exec(searchText)) !== null) {
1036
+ const token = tokenMatch[0];
1037
+ let maskCount = 0;
1038
+ for (const char of token) {
1039
+ if (MASK_CHARS.has(char))
1040
+ maskCount++;
1041
+ }
1042
+ if (maskCount === 0 || maskCount > 2)
1043
+ continue;
1044
+ if (MASK_CHARS.has(token[0]) ||
1045
+ MASK_CHARS.has(token[token.length - 1])) {
1046
+ continue;
1047
+ }
1048
+ const word = this.profanityTrie.findWildcardMatch(token, MASK_CHARS);
1049
+ if (!word)
1050
+ continue;
1051
+ const start = tokenMatch.index;
1052
+ const end = start + token.length;
1053
+ if (!this.detectPartialWords &&
1054
+ !this.isWholeWord(originalText, start, end)) {
1055
+ continue;
1056
+ }
1057
+ const matchedText = originalText.substring(start, end);
1058
+ if (this.isWhitelistedMatch(word, matchedText))
1059
+ continue;
1060
+ if (!this.hasWordBoundaries(originalText, start, end))
1061
+ continue;
1062
+ results.push({ word, start, end, originalWord: matchedText });
674
1063
  }
675
- return normalized;
1064
+ return results;
676
1065
  }
677
1066
  /**
678
- * Escape regex special characters in a string.
679
- * @param str - The string to escape.
680
- * @returns The escaped string.
1067
+ * Find words spelled out with a uniform single separator ("f u c k",
1068
+ * "f.u.c.k"). The joined letters must equal a dictionary word exactly:
1069
+ * runs like "U S A" or letters inside spelled-out sentences never flag.
681
1070
  */
682
- escapeRegex(str) {
683
- return str.replace(/[\\^$.*+?()[\]{}|]/g, "\\$&");
1071
+ findSeparatedMatches(searchText, originalText) {
1072
+ const results = [];
1073
+ // Single letters joined by one consistent separator, at least 3 letters,
1074
+ // not touching letters/digits on either side.
1075
+ const runRegex = /(?<![\p{L}\p{N}])\p{L}(?:([ ._\-/])\p{L})(?:\1\p{L})+(?![\p{L}\p{N}])/gu;
1076
+ let runMatch;
1077
+ while ((runMatch = runRegex.exec(searchText)) !== null) {
1078
+ const run = runMatch[0];
1079
+ const separator = runMatch[1];
1080
+ const joined = run.split(separator).join("");
1081
+ const trieMatches = this.profanityTrie.findMatches(joined, 0, false);
1082
+ const exact = trieMatches.find((m) => m.end === joined.length);
1083
+ if (!exact)
1084
+ continue;
1085
+ const start = runMatch.index;
1086
+ const end = start + run.length;
1087
+ const matchedText = originalText.substring(start, end);
1088
+ if (this.isWhitelistedMatch(exact.word, joined) ||
1089
+ this.isWhitelistedMatch(exact.word, matchedText)) {
1090
+ continue;
1091
+ }
1092
+ results.push({ word: exact.word, start, end, originalWord: matchedText });
1093
+ }
1094
+ return results;
1095
+ }
1096
+ /**
1097
+ * Find unambiguous profanity stems embedded inside larger tokens
1098
+ * ("sisfuck", "totalshitshow"). Only stems from EMBEDDED_STRONG_STEMS that
1099
+ * are currently in the dictionary are considered, and tokens listed in
1100
+ * EMBEDDED_SAFE_WORDS or the whitelist never flag. The whole containing
1101
+ * token is reported so cleaning masks all of it.
1102
+ */
1103
+ findEmbeddedMatches(searchText, originalText) {
1104
+ const results = [];
1105
+ for (const stem of EMBEDDED_STRONG_STEMS) {
1106
+ // Respect remove()/clearList(): only flag stems still in the dictionary
1107
+ const exact = this.profanityTrie
1108
+ .findMatches(stem, 0, false)
1109
+ .some((m) => m.end === stem.length);
1110
+ if (!exact)
1111
+ continue;
1112
+ let index = searchText.indexOf(stem);
1113
+ while (index !== -1) {
1114
+ // Expand to the containing token
1115
+ let tokenStart = index;
1116
+ let tokenEnd = index + stem.length;
1117
+ while (tokenStart > 0 && /\w/.test(searchText[tokenStart - 1])) {
1118
+ tokenStart--;
1119
+ }
1120
+ while (tokenEnd < searchText.length &&
1121
+ /\w/.test(searchText[tokenEnd])) {
1122
+ tokenEnd++;
1123
+ }
1124
+ const token = searchText.substring(tokenStart, tokenEnd);
1125
+ const isEmbedded = token !== stem; // exact tokens are the base pass's job
1126
+ if (isEmbedded &&
1127
+ !EMBEDDED_SAFE_WORDS.has(token.toLowerCase()) &&
1128
+ !this.isWhitelisted(token) &&
1129
+ !this.isWhitelistedMatch(stem, token)) {
1130
+ results.push({
1131
+ word: stem,
1132
+ start: tokenStart,
1133
+ end: tokenEnd,
1134
+ originalWord: originalText.substring(tokenStart, tokenEnd),
1135
+ });
1136
+ }
1137
+ index = searchText.indexOf(stem, tokenEnd);
1138
+ }
1139
+ }
1140
+ return results;
684
1141
  }
685
1142
  /**
686
1143
  * Check if a match is bounded by word boundaries (strict mode).
@@ -726,6 +1183,27 @@ export class AllProfanity {
726
1183
  this.whitelistSet.has(matchedText.toLowerCase()));
727
1184
  }
728
1185
  }
1186
+ /**
1187
+ * In partial-word mode, check whether the word CONTAINING the match is
1188
+ * whitelisted: with "classic" whitelisted, the embedded "ass" must not flag.
1189
+ */
1190
+ isWhitelistedContainingWord(originalText, start, end) {
1191
+ if (!this.detectPartialWords || this.whitelistSet.size === 0) {
1192
+ return false;
1193
+ }
1194
+ let tokenStart = start;
1195
+ let tokenEnd = end;
1196
+ while (tokenStart > 0 && /\w/.test(originalText[tokenStart - 1])) {
1197
+ tokenStart--;
1198
+ }
1199
+ while (tokenEnd < originalText.length && /\w/.test(originalText[tokenEnd])) {
1200
+ tokenEnd++;
1201
+ }
1202
+ if (tokenStart === start && tokenEnd === end) {
1203
+ return false; // match is the whole token; already covered by isWhitelistedMatch
1204
+ }
1205
+ return this.isWhitelisted(originalText.substring(tokenStart, tokenEnd));
1206
+ }
729
1207
  /**
730
1208
  * Remove overlapping matches, keeping only the longest at each start position.
731
1209
  * @param matches - Array of match results.
@@ -750,26 +1228,31 @@ export class AllProfanity {
750
1228
  /**
751
1229
  * Use Aho-Corasick algorithm for pattern matching
752
1230
  */
753
- findMatchesWithAhoCorasick(searchText, originalText) {
1231
+ findMatchesWithAhoCorasick(searchText, originalText, posMap) {
754
1232
  if (!this.ahoCorasickAutomaton) {
755
1233
  return [];
756
1234
  }
757
1235
  const ahoMatches = this.ahoCorasickAutomaton.findAll(searchText);
758
1236
  const results = [];
759
1237
  for (const match of ahoMatches) {
1238
+ const start = posMap ? posMap.starts[match.start] : match.start;
1239
+ const end = posMap ? posMap.ends[match.end - 1] : match.end;
760
1240
  if (!this.detectPartialWords &&
761
- !this.isWholeWord(originalText, match.start, match.end)) {
1241
+ !this.isWholeWord(originalText, start, end)) {
762
1242
  continue;
763
1243
  }
764
- const matchedText = originalText.substring(match.start, match.end);
1244
+ const matchedText = originalText.substring(start, end);
765
1245
  if (this.isWhitelistedMatch(match.pattern, matchedText)) {
766
1246
  continue;
767
1247
  }
768
- if (this.hasWordBoundaries(originalText, match.start, match.end)) {
1248
+ if (this.isWhitelistedContainingWord(originalText, start, end)) {
1249
+ continue;
1250
+ }
1251
+ if (this.hasWordBoundaries(originalText, start, end)) {
769
1252
  results.push({
770
1253
  word: match.pattern,
771
- start: match.start,
772
- end: match.end,
1254
+ start,
1255
+ end,
773
1256
  originalWord: matchedText,
774
1257
  });
775
1258
  }
@@ -777,25 +1260,38 @@ export class AllProfanity {
777
1260
  return results;
778
1261
  }
779
1262
  /**
780
- * Hybrid approach: Aho-Corasick for fast matching, Bloom Filter for validation
1263
+ * Check whether the Bloom Filter can quickly rule out any profanity in the
1264
+ * text. Only safe for ASCII whole-word matching: partial matches and
1265
+ * non-ASCII scripts can match inside tokens, so they bypass the prefilter.
1266
+ */
1267
+ bloomQuickReject(searchText) {
1268
+ if (!this.bloomFilter || this.detectPartialWords)
1269
+ return false;
1270
+ // eslint-disable-next-line no-control-regex
1271
+ if (!/^[\x00-\x7F]*$/.test(searchText))
1272
+ return false;
1273
+ const tokens = searchText.split(/[^\p{L}\p{N}]+/u);
1274
+ for (const token of tokens) {
1275
+ if (token.length > 0 && this.bloomFilter.mightContain(token)) {
1276
+ return false;
1277
+ }
1278
+ }
1279
+ return true;
1280
+ }
1281
+ /**
1282
+ * Hybrid approach: Bloom Filter for quick rejection, Aho-Corasick for matching
781
1283
  */
782
- findMatchesHybrid(searchText, originalText) {
1284
+ findMatchesHybrid(searchText, originalText, posMap) {
1285
+ if (this.bloomQuickReject(searchText)) {
1286
+ return [];
1287
+ }
783
1288
  // Use Aho-Corasick for primary matching if available
784
1289
  if (this.ahoCorasickAutomaton) {
785
- const matches = this.findMatchesWithAhoCorasick(searchText, originalText);
786
- // If Bloom Filter is enabled, validate matches
787
- if (this.bloomFilter) {
788
- return matches.filter((match) => this.bloomFilter.mightContain(match.word));
789
- }
790
- return matches;
1290
+ return this.findMatchesWithAhoCorasick(searchText, originalText, posMap);
791
1291
  }
792
1292
  // Fallback to Trie if Aho-Corasick not available
793
1293
  const matches = [];
794
- this.findMatches(searchText, originalText, matches);
795
- // Validate with Bloom Filter if enabled
796
- if (this.bloomFilter) {
797
- return matches.filter((match) => this.bloomFilter.mightContain(match.word));
798
- }
1294
+ this.findMatches(searchText, originalText, matches, posMap);
799
1295
  return matches;
800
1296
  }
801
1297
  /**
@@ -811,6 +1307,14 @@ export class AllProfanity {
811
1307
  return analysis.score >= scoreThreshold;
812
1308
  });
813
1309
  }
1310
+ /**
1311
+ * Drop all cached detection results. Must be called whenever the word lists
1312
+ * or any option that affects detection output changes.
1313
+ */
1314
+ invalidateCache() {
1315
+ var _a;
1316
+ (_a = this.resultCache) === null || _a === void 0 ? void 0 : _a.clear();
1317
+ }
814
1318
  /**
815
1319
  * Detects profanity in the provided text and returns comprehensive analysis.
816
1320
  *
@@ -864,61 +1368,57 @@ export class AllProfanity {
864
1368
  * @see {@link ProfanitySeverity} for severity levels
865
1369
  */
866
1370
  detect(text) {
867
- var _a;
868
1371
  const validatedText = validateString(text, "text");
869
1372
  if (validatedText.length === 0) {
870
1373
  return {
871
1374
  hasProfanity: false,
872
1375
  detectedWords: [],
873
1376
  cleanedText: validatedText,
874
- severity: ProfanitySeverity.MILD,
1377
+ severity: ProfanitySeverity.NONE,
875
1378
  positions: [],
876
1379
  };
877
1380
  }
878
- // Check cache first if enabled
879
- if ((_a = this.resultCache) === null || _a === void 0 ? void 0 : _a.has(validatedText)) {
880
- return this.resultCache.get(validatedText);
1381
+ // Check cache first if enabled (refresh recency for LRU eviction)
1382
+ if (this.resultCache) {
1383
+ const cached = this.resultCache.get(validatedText);
1384
+ if (cached) {
1385
+ this.resultCache.delete(validatedText);
1386
+ this.resultCache.set(validatedText, cached);
1387
+ return cached;
1388
+ }
881
1389
  }
882
1390
  let matches = [];
883
1391
  const normalizedText = this.caseSensitive
884
1392
  ? validatedText
885
1393
  : validatedText.toLowerCase();
886
- // Choose matching algorithm based on configuration
887
- switch (this.matchingAlgorithm) {
888
- case "aho-corasick":
889
- matches = this.findMatchesWithAhoCorasick(normalizedText, validatedText);
890
- if (this.enableLeetSpeak) {
891
- const leetNormalized = this.normalizeLeetSpeak(normalizedText);
892
- if (leetNormalized !== normalizedText) {
893
- const leetMatches = this.findMatchesWithAhoCorasick(leetNormalized, validatedText);
894
- matches.push(...leetMatches);
895
- }
896
- }
897
- break;
898
- case "hybrid":
899
- matches = this.findMatchesHybrid(normalizedText, validatedText);
900
- if (this.enableLeetSpeak) {
901
- const leetNormalized = this.normalizeLeetSpeak(normalizedText);
902
- if (leetNormalized !== normalizedText) {
903
- const leetMatches = this.findMatchesHybrid(leetNormalized, validatedText);
904
- matches.push(...leetMatches);
905
- }
906
- }
907
- break;
908
- case "trie":
909
- default:
910
- this.findMatches(normalizedText, validatedText, matches);
911
- if (this.enableLeetSpeak) {
912
- const leetNormalized = this.normalizeLeetSpeak(normalizedText);
913
- if (leetNormalized !== normalizedText) {
914
- this.findMatches(leetNormalized, validatedText, matches);
915
- }
916
- }
917
- break;
1394
+ // Scan the base text plus every triggered normalization variant
1395
+ // (unicode folding, leet speak, repeated-character collapse)
1396
+ for (const pass of this.buildScanPasses(normalizedText)) {
1397
+ switch (this.matchingAlgorithm) {
1398
+ case "aho-corasick":
1399
+ matches.push(...this.findMatchesWithAhoCorasick(pass.text, validatedText, pass.posMap));
1400
+ break;
1401
+ case "hybrid":
1402
+ matches.push(...this.findMatchesHybrid(pass.text, validatedText, pass.posMap));
1403
+ break;
1404
+ case "trie":
1405
+ default:
1406
+ this.findMatches(pass.text, validatedText, matches, pass.posMap);
1407
+ break;
1408
+ }
1409
+ }
1410
+ if (this.evasionMaskedChars) {
1411
+ matches.push(...this.findMaskedMatches(normalizedText, validatedText));
1412
+ }
1413
+ if (this.evasionSeparatedLetters) {
1414
+ matches.push(...this.findSeparatedMatches(normalizedText, validatedText));
1415
+ }
1416
+ if (this.evasionEmbeddedWords) {
1417
+ matches.push(...this.findEmbeddedMatches(normalizedText, validatedText));
918
1418
  }
919
1419
  // Apply context analysis if enabled
920
1420
  if (this.contextAnalyzer) {
921
- matches = this.applyContextAnalysis(validatedText, matches);
1421
+ matches = this.applyContextAnalysis(validatedText, matches, this.contextScoreThreshold);
922
1422
  }
923
1423
  const uniqueMatches = this.deduplicateMatches(matches);
924
1424
  const detectedWords = uniqueMatches.map((m) => m.originalWord);
@@ -935,16 +1435,15 @@ export class AllProfanity {
935
1435
  end: m.end,
936
1436
  })),
937
1437
  };
938
- // Cache result if caching is enabled
1438
+ // Cache result if caching is enabled (evict least recently used entry)
939
1439
  if (this.resultCache) {
940
- this.resultCache.set(validatedText, result);
941
- // Implement simple LRU by clearing cache when it gets too large
942
- if (this.resultCache.size > 1000) {
943
- const firstKey = this.resultCache.keys().next().value;
944
- if (firstKey !== undefined) {
945
- this.resultCache.delete(firstKey);
1440
+ if (this.resultCache.size >= this.cacheMaxSize) {
1441
+ const oldestKey = this.resultCache.keys().next().value;
1442
+ if (oldestKey !== undefined) {
1443
+ this.resultCache.delete(oldestKey);
946
1444
  }
947
1445
  }
1446
+ this.resultCache.set(validatedText, result);
948
1447
  }
949
1448
  return result;
950
1449
  }
@@ -954,12 +1453,14 @@ export class AllProfanity {
954
1453
  * @param originalText - The original text.
955
1454
  * @param matches - Array to collect matches.
956
1455
  */
957
- findMatches(searchText, originalText, matches) {
1456
+ findMatches(searchText, originalText, matches, posMap) {
958
1457
  for (let i = 0; i < searchText.length; i++) {
959
1458
  const matchResults = this.profanityTrie.findMatches(searchText, i, this.detectPartialWords);
960
1459
  for (const match of matchResults) {
961
- const start = i + match.start;
962
- const end = i + match.end;
1460
+ const searchStart = i + match.start;
1461
+ const searchEnd = i + match.end;
1462
+ const start = posMap ? posMap.starts[searchStart] : searchStart;
1463
+ const end = posMap ? posMap.ends[searchEnd - 1] : searchEnd;
963
1464
  if (!this.detectPartialWords &&
964
1465
  !this.isWholeWord(originalText, start, end)) {
965
1466
  continue;
@@ -968,6 +1469,9 @@ export class AllProfanity {
968
1469
  if (this.isWhitelistedMatch(match.word, matchedText)) {
969
1470
  continue;
970
1471
  }
1472
+ if (this.isWhitelistedContainingWord(originalText, start, end)) {
1473
+ continue;
1474
+ }
971
1475
  if (this.hasWordBoundaries(originalText, start, end)) {
972
1476
  matches.push({
973
1477
  word: match.word,
@@ -1027,7 +1531,78 @@ export class AllProfanity {
1027
1531
  * @see {@link detect} for detailed profanity analysis
1028
1532
  */
1029
1533
  check(text) {
1030
- return this.detect(text).hasProfanity;
1534
+ const validatedText = validateString(text, "text");
1535
+ if (validatedText.length === 0)
1536
+ return false;
1537
+ // Reuse a cached full result when available
1538
+ if (this.resultCache) {
1539
+ const cached = this.resultCache.get(validatedText);
1540
+ if (cached)
1541
+ return cached.hasProfanity;
1542
+ }
1543
+ // Context analysis scores matches against their surroundings; reuse the
1544
+ // full pipeline so check() and detect() can never disagree.
1545
+ if (this.contextAnalyzer) {
1546
+ return this.detect(validatedText).hasProfanity;
1547
+ }
1548
+ const normalizedText = this.caseSensitive
1549
+ ? validatedText
1550
+ : validatedText.toLowerCase();
1551
+ // Early exit on the first accepted match — unlike detect(), no positions,
1552
+ // severity or cleaned text are computed. The base text is scanned before
1553
+ // any normalization variants are built, so plainly profane text returns
1554
+ // without paying for normalization at all.
1555
+ if (this.hasMatchInPass(normalizedText, validatedText)) {
1556
+ return true;
1557
+ }
1558
+ const passes = this.buildScanPasses(normalizedText);
1559
+ for (let p = 1; p < passes.length; p++) {
1560
+ if (this.hasMatchInPass(passes[p].text, validatedText, passes[p].posMap)) {
1561
+ return true;
1562
+ }
1563
+ }
1564
+ if (this.evasionMaskedChars &&
1565
+ this.findMaskedMatches(normalizedText, validatedText).length > 0) {
1566
+ return true;
1567
+ }
1568
+ if (this.evasionSeparatedLetters &&
1569
+ this.findSeparatedMatches(normalizedText, validatedText).length > 0) {
1570
+ return true;
1571
+ }
1572
+ if (this.evasionEmbeddedWords &&
1573
+ this.findEmbeddedMatches(normalizedText, validatedText).length > 0) {
1574
+ return true;
1575
+ }
1576
+ return false;
1577
+ }
1578
+ /**
1579
+ * Trie scan that stops at the first match surviving the whole-word,
1580
+ * whitelist and boundary checks. Powers the fast path in check().
1581
+ */
1582
+ hasMatchInPass(searchText, originalText, posMap) {
1583
+ for (let i = 0; i < searchText.length; i++) {
1584
+ const matchResults = this.profanityTrie.findMatches(searchText, i, this.detectPartialWords);
1585
+ for (const match of matchResults) {
1586
+ const searchEnd = i + match.end;
1587
+ const start = posMap ? posMap.starts[i] : i;
1588
+ const end = posMap ? posMap.ends[searchEnd - 1] : searchEnd;
1589
+ if (!this.detectPartialWords &&
1590
+ !this.isWholeWord(originalText, start, end)) {
1591
+ continue;
1592
+ }
1593
+ const matchedText = originalText.substring(start, end);
1594
+ if (this.isWhitelistedMatch(match.word, matchedText)) {
1595
+ continue;
1596
+ }
1597
+ if (this.isWhitelistedContainingWord(originalText, start, end)) {
1598
+ continue;
1599
+ }
1600
+ if (this.hasWordBoundaries(originalText, start, end)) {
1601
+ return true;
1602
+ }
1603
+ }
1604
+ }
1605
+ return false;
1031
1606
  }
1032
1607
  /**
1033
1608
  * Cleans text by replacing profanity with a placeholder character.
@@ -1087,9 +1662,10 @@ export class AllProfanity {
1087
1662
  originalWord: text.substring(p.start, p.end),
1088
1663
  }))),
1089
1664
  ].sort((a, b) => b.start - a.start);
1665
+ const placeholderChar = placeholder.charAt(0);
1090
1666
  for (const pos of sortedPositions) {
1091
1667
  const originalWord = text.substring(pos.start, pos.end);
1092
- const replacement = placeholder.repeat(originalWord.length);
1668
+ const replacement = placeholderChar.repeat(originalWord.length);
1093
1669
  result =
1094
1670
  result.substring(0, pos.start) +
1095
1671
  replacement +
@@ -1211,11 +1787,12 @@ export class AllProfanity {
1211
1787
  */
1212
1788
  add(word) {
1213
1789
  const words = Array.isArray(word) ? word : [word];
1214
- const validatedWords = validateStringArray(words, "words to add");
1790
+ const validatedWords = validateStringArray(words, "words to add", this.logger);
1215
1791
  for (const w of validatedWords) {
1216
1792
  this.dynamicWords.add(w);
1217
1793
  this.addWordToTrie(w);
1218
1794
  }
1795
+ this.invalidateCache();
1219
1796
  }
1220
1797
  /**
1221
1798
  * Dynamically removes one or more words from the profanity filter at runtime.
@@ -1264,35 +1841,42 @@ export class AllProfanity {
1264
1841
  * @see {@link addToWhitelist} to exclude dictionary words without removing them
1265
1842
  */
1266
1843
  remove(word) {
1844
+ var _a;
1267
1845
  const words = Array.isArray(word) ? word : [word];
1268
- const validatedWords = validateStringArray(words, "words to remove");
1846
+ const validatedWords = validateStringArray(words, "words to remove", this.logger);
1269
1847
  for (const w of validatedWords) {
1270
1848
  const normalizedWord = this.caseSensitive ? w : w.toLowerCase();
1271
1849
  this.profanityTrie.removeWord(normalizedWord);
1272
1850
  this.dynamicWords.delete(w);
1851
+ // Bloom filter entries cannot be deleted, but stale entries only cost a
1852
+ // skipped quick-rejection — they can never produce a match by themselves.
1853
+ (_a = this.ahoCorasickAutomaton) === null || _a === void 0 ? void 0 : _a.removePattern(normalizedWord);
1273
1854
  }
1855
+ this.invalidateCache();
1274
1856
  }
1275
1857
  /**
1276
1858
  * Add words to the whitelist.
1277
1859
  * @param words - Words to whitelist.
1278
1860
  */
1279
1861
  addToWhitelist(words) {
1280
- const validatedWords = validateStringArray(words, "whitelist words");
1862
+ const validatedWords = validateStringArray(words, "whitelist words", this.logger);
1281
1863
  for (const word of validatedWords) {
1282
1864
  const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
1283
1865
  this.whitelistSet.add(normalizedWord);
1284
1866
  }
1867
+ this.invalidateCache();
1285
1868
  }
1286
1869
  /**
1287
1870
  * Remove words from the whitelist.
1288
1871
  * @param words - Words to remove from whitelist.
1289
1872
  */
1290
1873
  removeFromWhitelist(words) {
1291
- const validatedWords = validateStringArray(words, "whitelist words");
1874
+ const validatedWords = validateStringArray(words, "whitelist words", this.logger);
1292
1875
  for (const word of validatedWords) {
1293
1876
  const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
1294
1877
  this.whitelistSet.delete(normalizedWord);
1295
1878
  }
1879
+ this.invalidateCache();
1296
1880
  }
1297
1881
  /**
1298
1882
  * Check if a word is whitelisted.
@@ -1381,6 +1965,7 @@ export class AllProfanity {
1381
1965
  }
1382
1966
  }
1383
1967
  this.loadedLanguages.add(langKey);
1968
+ this.invalidateCache();
1384
1969
  this.logger.info(`Loaded ${addedCount} words from ${language} dictionary`);
1385
1970
  return true;
1386
1971
  }
@@ -1395,7 +1980,7 @@ export class AllProfanity {
1395
1980
  * @returns Number of successfully loaded languages.
1396
1981
  */
1397
1982
  loadLanguages(languages) {
1398
- const validatedLanguages = validateStringArray(languages, "languages");
1983
+ const validatedLanguages = validateStringArray(languages, "languages", this.logger);
1399
1984
  return validatedLanguages.reduce((count, lang) => {
1400
1985
  return this.loadLanguage(lang) ? count + 1 : count;
1401
1986
  }, 0);
@@ -1470,7 +2055,7 @@ export class AllProfanity {
1470
2055
  */
1471
2056
  loadCustomDictionary(name, words) {
1472
2057
  validateString(name, "dictionary name");
1473
- const validatedWords = validateStringArray(words, "custom dictionary words");
2058
+ const validatedWords = validateStringArray(words, "custom dictionary words", this.logger);
1474
2059
  if (validatedWords.length === 0) {
1475
2060
  this.logger.warn(`Custom dictionary '${name}' contains no valid words`);
1476
2061
  return;
@@ -1484,6 +2069,7 @@ export class AllProfanity {
1484
2069
  }
1485
2070
  this.availableLanguages[name.toLowerCase()] = validatedWords;
1486
2071
  this.loadedLanguages.add(name.toLowerCase());
2072
+ this.invalidateCache();
1487
2073
  this.logger.info(`Loaded ${addedCount} words from custom dictionary '${name}'`);
1488
2074
  }
1489
2075
  catch (error) {
@@ -1507,9 +2093,17 @@ export class AllProfanity {
1507
2093
  }
1508
2094
  // Add to Trie (always used as fallback)
1509
2095
  this.profanityTrie.addWord(normalizedWord);
1510
- // Add to Bloom Filter if enabled
2096
+ // Add to Bloom Filter if enabled. Constituent tokens of multi-word or
2097
+ // symbol-containing entries are added too, so the token-level quick
2098
+ // rejection in bloomQuickReject() can never miss a phrase.
1511
2099
  if (this.bloomFilter) {
1512
2100
  this.bloomFilter.add(normalizedWord);
2101
+ const tokens = normalizedWord.split(/[^\p{L}\p{N}]+/u);
2102
+ for (const token of tokens) {
2103
+ if (token.length > 0 && token !== normalizedWord) {
2104
+ this.bloomFilter.add(token);
2105
+ }
2106
+ }
1513
2107
  }
1514
2108
  // Add to Aho-Corasick automaton if enabled
1515
2109
  if (this.ahoCorasickAutomaton) {
@@ -1524,7 +2118,7 @@ export class AllProfanity {
1524
2118
  */
1525
2119
  calculateSeverity(matches) {
1526
2120
  if (matches.length === 0)
1527
- return ProfanitySeverity.MILD;
2121
+ return ProfanitySeverity.NONE;
1528
2122
  const uniqueWords = new Set(matches.map((m) => m.word)).size;
1529
2123
  const totalMatches = matches.length;
1530
2124
  if (totalMatches >= 5 || uniqueWords >= 4)
@@ -1539,9 +2133,13 @@ export class AllProfanity {
1539
2133
  * Clear all loaded dictionaries and dynamic words.
1540
2134
  */
1541
2135
  clearList() {
2136
+ var _a, _b;
1542
2137
  this.profanityTrie.clear();
1543
2138
  this.loadedLanguages.clear();
1544
2139
  this.dynamicWords.clear();
2140
+ (_a = this.ahoCorasickAutomaton) === null || _a === void 0 ? void 0 : _a.clear();
2141
+ (_b = this.bloomFilter) === null || _b === void 0 ? void 0 : _b.clear();
2142
+ this.invalidateCache();
1545
2143
  }
1546
2144
  /**
1547
2145
  * Set the placeholder character for filtered words.
@@ -1553,6 +2151,7 @@ export class AllProfanity {
1553
2151
  throw new Error("Placeholder cannot be empty");
1554
2152
  }
1555
2153
  this.defaultPlaceholder = placeholder.charAt(0);
2154
+ this.invalidateCache();
1556
2155
  }
1557
2156
  /**
1558
2157
  * Get the list of loaded languages.
@@ -1584,10 +2183,14 @@ export class AllProfanity {
1584
2183
  };
1585
2184
  }
1586
2185
  /**
1587
- * Rebuild the profanity trie from loaded dictionaries and dynamic words.
2186
+ * Rebuild all matching structures (trie, Aho-Corasick automaton, Bloom
2187
+ * Filter) from loaded dictionaries and dynamic words.
1588
2188
  */
1589
- rebuildTrie() {
2189
+ rebuildIndexes() {
2190
+ var _a, _b;
1590
2191
  this.profanityTrie.clear();
2192
+ (_a = this.ahoCorasickAutomaton) === null || _a === void 0 ? void 0 : _a.clear();
2193
+ (_b = this.bloomFilter) === null || _b === void 0 ? void 0 : _b.clear();
1591
2194
  for (const lang of this.loadedLanguages) {
1592
2195
  const words = this.availableLanguages[lang] || [];
1593
2196
  for (const word of words) {
@@ -1597,6 +2200,7 @@ export class AllProfanity {
1597
2200
  for (const word of this.dynamicWords) {
1598
2201
  this.addWordToTrie(word);
1599
2202
  }
2203
+ this.invalidateCache();
1600
2204
  }
1601
2205
  /**
1602
2206
  * Update configuration options for the profanity filter.
@@ -1625,8 +2229,9 @@ export class AllProfanity {
1625
2229
  this.addToWhitelist(options.whitelistWords);
1626
2230
  }
1627
2231
  if (rebuildNeeded) {
1628
- this.rebuildTrie();
2232
+ this.rebuildIndexes();
1629
2233
  }
2234
+ this.invalidateCache();
1630
2235
  }
1631
2236
  /**
1632
2237
  * Create an AllProfanity instance from a configuration object.
@@ -1643,8 +2248,12 @@ export class AllProfanity {
1643
2248
  options.ahoCorasick = config.ahoCorasick;
1644
2249
  if (config.contextAnalysis)
1645
2250
  options.contextAnalysis = config.contextAnalysis;
2251
+ if (config.evasionProtection)
2252
+ options.evasionProtection = config.evasionProtection;
1646
2253
  if (config.performance)
1647
2254
  options.performance = config.performance;
2255
+ if (config.silent !== undefined)
2256
+ options.silent = config.silent;
1648
2257
  if (config.profanityDetection) {
1649
2258
  options.enableLeetSpeak = config.profanityDetection.enableLeetSpeak;
1650
2259
  options.caseSensitive = config.profanityDetection.caseSensitive;
@@ -1675,7 +2284,8 @@ export class AllProfanity {
1675
2284
  }
1676
2285
  /**
1677
2286
  * Singleton instance of AllProfanity with default configuration.
2287
+ * Silent so that importing the library never writes to the console.
1678
2288
  */
1679
- const allProfanity = new AllProfanity();
2289
+ const allProfanity = new AllProfanity({ silent: true });
1680
2290
  export default allProfanity;
1681
2291
  //# sourceMappingURL=index.js.map