allprofanity 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -23,35 +23,221 @@ export { default as tamilBadWords } from "./languages/tamil-words.js";
23
23
  export { default as teluguBadWords } from "./languages/telugu-words.js";
24
24
  export { default as brazilianBadWords } from "./languages/brazilian-words.js";
25
25
  /**
26
- * Default console logger implementation.
26
+ * Default console logger implementation for AllProfanity.
27
+ *
28
+ * @class ConsoleLogger
29
+ * @implements {Logger}
30
+ * @description Logs messages to the browser or Node.js console with an "[AllProfanity]" prefix.
31
+ * This is the default logger used when no custom logger is provided.
32
+ *
33
+ * @internal
27
34
  */
28
35
  class ConsoleLogger {
36
+ /**
37
+ * Log informational messages to console.log with [AllProfanity] prefix.
38
+ *
39
+ * @param message - The message to log
40
+ * @returns void
41
+ */
29
42
  info(message) {
30
43
  console.log(`[AllProfanity] ${message}`);
31
44
  }
45
+ /**
46
+ * Log warning messages to console.warn with [AllProfanity] prefix.
47
+ *
48
+ * @param message - The warning message to log
49
+ * @returns void
50
+ */
32
51
  warn(message) {
33
52
  console.warn(`[AllProfanity] ${message}`);
34
53
  }
54
+ /**
55
+ * Log error messages to console.error with [AllProfanity] prefix.
56
+ *
57
+ * @param message - The error message to log
58
+ * @returns void
59
+ */
35
60
  error(message) {
36
61
  console.error(`[AllProfanity] ${message}`);
37
62
  }
38
63
  }
39
64
  /**
40
- * Severity levels for profanity detection.
65
+ * Silent logger implementation that suppresses all log output.
66
+ *
67
+ * @class SilentLogger
68
+ * @implements {Logger}
69
+ * @description A no-op logger that discards all log messages. Used when `silent: true` is set
70
+ * in AllProfanityOptions, or when you want to completely disable logging.
71
+ *
72
+ * @internal
73
+ */
74
+ class SilentLogger {
75
+ /**
76
+ * No-op implementation - messages are discarded.
77
+ *
78
+ * @param _message - The message (unused)
79
+ * @returns void
80
+ */
81
+ info(_message) {
82
+ // Silent mode - no logging
83
+ }
84
+ /**
85
+ * No-op implementation - warnings are discarded.
86
+ *
87
+ * @param _message - The warning message (unused)
88
+ * @returns void
89
+ */
90
+ warn(_message) {
91
+ // Silent mode - no logging
92
+ }
93
+ /**
94
+ * No-op implementation - errors are discarded.
95
+ *
96
+ * @param _message - The error message (unused)
97
+ * @returns void
98
+ */
99
+ error(_message) {
100
+ // Silent mode - no logging
101
+ }
102
+ }
103
+ /**
104
+ * Severity levels for profanity detection results.
105
+ *
106
+ * @enum {number}
107
+ * @description Categorizes the severity of detected profanity based on the number
108
+ * of unique words and total matches found in the text.
109
+ *
110
+ * @readonly
111
+ * @example
112
+ * ```typescript
113
+ * const result = filter.detect("some text");
114
+ * if (result.severity === ProfanitySeverity.EXTREME) {
115
+ * // Handle extreme profanity
116
+ * }
117
+ * ```
41
118
  */
42
119
  export var ProfanitySeverity;
43
120
  (function (ProfanitySeverity) {
121
+ /** No profanity detected */
122
+ ProfanitySeverity[ProfanitySeverity["NONE"] = 0] = "NONE";
123
+ /** Mild profanity: 1 unique word or 1 total match */
44
124
  ProfanitySeverity[ProfanitySeverity["MILD"] = 1] = "MILD";
125
+ /** Moderate profanity: 2 unique words or 2 total matches */
45
126
  ProfanitySeverity[ProfanitySeverity["MODERATE"] = 2] = "MODERATE";
127
+ /** Severe profanity: 3 unique words or 3 total matches */
46
128
  ProfanitySeverity[ProfanitySeverity["SEVERE"] = 3] = "SEVERE";
129
+ /** Extreme profanity: 4+ unique words or 5+ total matches */
47
130
  ProfanitySeverity[ProfanitySeverity["EXTREME"] = 4] = "EXTREME";
48
131
  })(ProfanitySeverity = ProfanitySeverity || (ProfanitySeverity = {}));
49
132
  /**
50
- * Validate a string parameter.
51
- * @param input - The input to validate.
52
- * @param paramName - The name of the parameter.
53
- * @returns The validated string.
54
- * @throws {TypeError} If input is not a string.
133
+ * Compose two position maps: `inner` maps its normalized text back to the
134
+ * text that `outer` normalized, and the result maps `inner.normalized`
135
+ * directly back to the original input.
136
+ *
137
+ * @internal
138
+ */
139
+ function composeMaps(outer, inner) {
140
+ const starts = new Array(inner.starts.length);
141
+ const ends = new Array(inner.ends.length);
142
+ for (let i = 0; i < inner.starts.length; i++) {
143
+ starts[i] = outer.starts[inner.starts[i]];
144
+ ends[i] = outer.ends[inner.ends[i] - 1];
145
+ }
146
+ return { normalized: inner.normalized, starts, ends };
147
+ }
148
+ /**
149
+ * Common homoglyphs (visually identical/near-identical non-Latin characters)
150
+ * folded to their ASCII look-alikes for evasion-resistant matching.
151
+ *
152
+ * @internal
153
+ */
154
+ const CONFUSABLES = new Map([
155
+ // Cyrillic
156
+ ["а", "a"], ["в", "b"], ["е", "e"], ["к", "k"], ["м", "m"], ["н", "h"],
157
+ ["о", "o"], ["р", "p"], ["с", "c"], ["т", "t"], ["у", "y"], ["х", "x"],
158
+ ["і", "i"], ["ј", "j"], ["ѕ", "s"], ["ԁ", "d"], ["ԛ", "q"], ["ԝ", "w"],
159
+ // Greek
160
+ ["α", "a"], ["β", "b"], ["γ", "y"], ["ε", "e"], ["η", "n"], ["ι", "i"],
161
+ ["κ", "k"], ["μ", "m"], ["ν", "v"], ["ο", "o"], ["ρ", "p"], ["σ", "s"],
162
+ ["τ", "t"], ["υ", "u"], ["χ", "x"], ["ω", "w"],
163
+ ]);
164
+ /**
165
+ * Invisible characters commonly injected to break up profane words.
166
+ *
167
+ * @internal
168
+ */
169
+ const INVISIBLE_CHARS = new Set([
170
+ "\u200B",
171
+ "\u200C",
172
+ "\u200D",
173
+ "\uFEFF",
174
+ "\u00AD",
175
+ "\u2060",
176
+ "\u180E", // Mongolian vowel separator
177
+ ]);
178
+ /** Symbols treated as single-character wildcards in masked words like "f*ck". @internal */
179
+ const MASK_CHARS = new Set(["*", "#", "@", "$", "%"]);
180
+ /**
181
+ * Unambiguous profanity stems that are flagged even when embedded inside a
182
+ * larger token ("sisfuck", "totalshitshow"). Only words that essentially
183
+ * never occur inside legitimate vocabulary belong here — ambiguous stems
184
+ * like "ass" or "cock" (class, bass, Hitchcock, peacock) must stay
185
+ * whole-word matched.
186
+ *
187
+ * @internal
188
+ */
189
+ const EMBEDDED_STRONG_STEMS = [
190
+ "fuck",
191
+ "shit",
192
+ "bitch",
193
+ "cunt",
194
+ "whore",
195
+ "nigger",
196
+ "nigga",
197
+ "faggot",
198
+ "wanker",
199
+ "chutiya",
200
+ "bhenchod",
201
+ "behenchod",
202
+ "madarchod",
203
+ "bhosdi",
204
+ ];
205
+ /**
206
+ * Legitimate words that contain a strong stem and must never be flagged by
207
+ * the embedded pass (the user whitelist extends this set).
208
+ *
209
+ * @internal
210
+ */
211
+ const EMBEDDED_SAFE_WORDS = new Set([
212
+ "scunthorpe",
213
+ "mishit",
214
+ "mishits",
215
+ "mishitting",
216
+ "shitake",
217
+ "shitakes",
218
+ "matsushita",
219
+ "takeshita",
220
+ "snigger",
221
+ "sniggers",
222
+ "sniggered",
223
+ "sniggering",
224
+ ]);
225
+ /**
226
+ * Validates that an input is a non-empty string.
227
+ *
228
+ * @function validateString
229
+ * @param {unknown} input - The value to validate
230
+ * @param {string} paramName - Name of the parameter being validated (used in error messages)
231
+ * @returns {string} The validated string
232
+ * @throws {TypeError} If input is not a string
233
+ *
234
+ * @internal
235
+ *
236
+ * @example
237
+ * ```typescript
238
+ * const text = validateString(userInput, 'text');
239
+ * // Returns userInput if it's a string, throws TypeError otherwise
240
+ * ```
55
241
  */
56
242
  function validateString(input, paramName) {
57
243
  if (typeof input !== "string") {
@@ -60,36 +246,86 @@ function validateString(input, paramName) {
60
246
  return input;
61
247
  }
62
248
  /**
63
- * Validate a string array parameter.
64
- * @param input - The input to validate.
65
- * @param paramName - The name of the parameter.
66
- * @returns The validated string array.
67
- * @throws {TypeError} If input is not an array.
249
+ * Validates and filters a string array, removing non-string and empty items.
250
+ *
251
+ * @function validateStringArray
252
+ * @param {unknown} input - The value to validate (expected to be an array)
253
+ * @param {string} paramName - Name of the parameter being validated (used in error/warning messages)
254
+ * @returns {string[]} Array of valid, non-empty strings
255
+ * @throws {TypeError} If input is not an array
256
+ *
257
+ * @internal
258
+ *
259
+ * @example
260
+ * ```typescript
261
+ * const words = validateStringArray(['word1', '', 123, 'word2'], 'words');
262
+ * // Returns: ['word1', 'word2']
263
+ * // Logs warning: "Skipping non-string item in words: 123"
264
+ * ```
68
265
  */
69
- function validateStringArray(input, paramName) {
266
+ function validateStringArray(input, paramName, logger) {
70
267
  if (!Array.isArray(input)) {
71
268
  throw new TypeError(`${paramName} must be an array`);
72
269
  }
73
270
  return input.filter((item) => {
74
271
  if (typeof item !== "string") {
75
- console.warn(`Skipping non-string item in ${paramName}: ${item}`);
272
+ const message = `Skipping non-string item in ${paramName}: ${item}`;
273
+ if (logger) {
274
+ logger.warn(message);
275
+ }
276
+ else {
277
+ console.warn(message);
278
+ }
76
279
  return false;
77
280
  }
78
281
  return item.trim().length > 0;
79
282
  });
80
283
  }
81
284
  /**
82
- * Trie node for efficient string matching.
285
+ * Trie (prefix tree) node for efficient pattern matching and word storage.
286
+ *
287
+ * @class TrieNode
288
+ * @description Implements a trie data structure for O(m) time complexity word matching,
289
+ * where m is the length of the word being searched. Each node represents a character
290
+ * in the word, and paths from root to nodes with isEndOfWord=true represent complete words.
291
+ *
292
+ * @internal
293
+ *
294
+ * @example
295
+ * ```typescript
296
+ * const trie = new TrieNode();
297
+ * trie.addWord('bad');
298
+ * trie.addWord('badword');
299
+ * const matches = trie.findMatches('badwords here', 0, false);
300
+ * // Returns matches for 'bad' and 'badword'
301
+ * ```
83
302
  */
84
303
  class TrieNode {
85
304
  constructor() {
305
+ /** Map of characters to child nodes for fast lookups */
86
306
  this.children = new Map();
307
+ /** Flag indicating if this node represents the end of a complete word */
87
308
  this.isEndOfWord = false;
309
+ /** The complete word ending at this node (only set when isEndOfWord is true) */
88
310
  this.word = "";
89
311
  }
90
312
  /**
91
- * Add a word to the trie.
92
- * @param word - The word to add.
313
+ * Adds a word to the trie structure.
314
+ *
315
+ * @param {string} word - The word to add to the trie
316
+ * @returns {void}
317
+ *
318
+ * @remarks
319
+ * - Time Complexity: O(m) where m is the length of the word
320
+ * - Space Complexity: O(m) in worst case when all characters are new
321
+ * - Supports any Unicode characters
322
+ *
323
+ * @example
324
+ * ```typescript
325
+ * const trie = new TrieNode();
326
+ * trie.addWord('hello');
327
+ * trie.addWord('world');
328
+ * ```
93
329
  */
94
330
  addWord(word) {
95
331
  let current = this;
@@ -106,13 +342,36 @@ class TrieNode {
106
342
  current.word = word;
107
343
  }
108
344
  /**
109
- * Remove a word from the trie.
110
- * @param word - The word to remove.
111
- * @returns True if the word was removed, false otherwise.
345
+ * Removes a word from the trie structure.
346
+ *
347
+ * @param {string} word - The word to remove from the trie
348
+ * @returns {boolean} True if the word existed and was removed, false if word was not found
349
+ *
350
+ * @remarks
351
+ * - Time Complexity: O(m) where m is the length of the word
352
+ * - Also removes unnecessary nodes to keep the trie optimized
353
+ * - Only removes the word marking; shared prefixes with other words are preserved
354
+ *
355
+ * @example
356
+ * ```typescript
357
+ * const trie = new TrieNode();
358
+ * trie.addWord('hello');
359
+ * trie.removeWord('hello'); // Returns: true
360
+ * trie.removeWord('world'); // Returns: false (word not in trie)
361
+ * ```
112
362
  */
113
363
  removeWord(word) {
114
364
  return this.removeHelper(word, 0);
115
365
  }
366
+ /**
367
+ * Recursive helper method for removing a word from the trie.
368
+ *
369
+ * @param {string} word - The word being removed
370
+ * @param {number} index - Current character index in the word
371
+ * @returns {boolean} True if this node should be deleted (has no children and is not end of another word)
372
+ *
373
+ * @internal
374
+ */
116
375
  removeHelper(word, index) {
117
376
  if (index === word.length) {
118
377
  if (!this.isEndOfWord)
@@ -132,11 +391,25 @@ class TrieNode {
132
391
  return false;
133
392
  }
134
393
  /**
135
- * Find all matches starting at a given position.
136
- * @param text - The text to search.
137
- * @param startPos - The start position.
138
- * @param allowPartial - Whether to allow partial word matches.
139
- * @returns Array of matches.
394
+ * Finds all word matches in text starting at a specific position.
395
+ *
396
+ * @param {string} text - The text to search for profanity
397
+ * @param {number} startPos - The starting position (0-based index) in the text
398
+ * @param {boolean} allowPartial - If true, finds partial matches within larger words
399
+ * @returns {Array<{ word: string; start: number; end: number }>} Array of match objects with word and position info
400
+ *
401
+ * @remarks
402
+ * - Time Complexity: O(k) where k is the length of the longest match from startPos
403
+ * - Returns all valid words that can be formed starting from startPos
404
+ * - When allowPartial is false, respects word boundaries
405
+ *
406
+ * @example
407
+ * ```typescript
408
+ * const trie = new TrieNode();
409
+ * trie.addWord('bad');
410
+ * const matches = trie.findMatches('badword', 0, false);
411
+ * // Returns: [{ word: 'bad', start: 0, end: 3 }]
412
+ * ```
140
413
  */
141
414
  findMatches(text, startPos, allowPartial) {
142
415
  const matches = [];
@@ -149,28 +422,59 @@ class TrieNode {
149
422
  current = nextNode;
150
423
  pos++;
151
424
  if (current.isEndOfWord) {
152
- if (!allowPartial) {
153
- const wordStart = startPos;
154
- const wordEnd = pos;
155
- matches.push({
156
- word: current.word,
157
- start: wordStart - startPos,
158
- end: wordEnd - startPos,
159
- });
160
- }
161
- else {
162
- matches.push({
163
- word: current.word,
164
- start: 0,
165
- end: pos - startPos,
166
- });
167
- }
425
+ matches.push({
426
+ word: current.word,
427
+ start: 0,
428
+ end: pos - startPos,
429
+ });
168
430
  }
169
431
  }
170
432
  return matches;
171
433
  }
172
434
  /**
173
- * Clear all words from the trie.
435
+ * Find a stored word matching the token, where mask characters match any
436
+ * single character. The token must align with a complete word exactly.
437
+ *
438
+ * @param token - The token to resolve (e.g. "f*ck")
439
+ * @param maskChars - Characters that act as single-character wildcards
440
+ * @returns The first matching dictionary word, or null
441
+ */
442
+ findWildcardMatch(token, maskChars) {
443
+ return this.wildcardHelper(token, 0, maskChars);
444
+ }
445
+ wildcardHelper(token, index, maskChars) {
446
+ if (index === token.length) {
447
+ return this.isEndOfWord ? this.word : null;
448
+ }
449
+ const char = token[index];
450
+ if (maskChars.has(char)) {
451
+ for (const child of this.children.values()) {
452
+ const result = child.wildcardHelper(token, index + 1, maskChars);
453
+ if (result)
454
+ return result;
455
+ }
456
+ return null;
457
+ }
458
+ const child = this.children.get(char);
459
+ return child ? child.wildcardHelper(token, index + 1, maskChars) : null;
460
+ }
461
+ /**
462
+ * Clears all words from the trie, resetting it to empty state.
463
+ *
464
+ * @returns {void}
465
+ *
466
+ * @remarks
467
+ * - Time Complexity: O(1) - clears the root node only (JavaScript GC handles children)
468
+ * - Removes all stored words and resets the trie to initial state
469
+ *
470
+ * @example
471
+ * ```typescript
472
+ * const trie = new TrieNode();
473
+ * trie.addWord('hello');
474
+ * trie.addWord('world');
475
+ * trie.clear();
476
+ * // Trie is now empty
477
+ * ```
174
478
  */
175
479
  clear() {
176
480
  this.children.clear();
@@ -179,15 +483,142 @@ class TrieNode {
179
483
  }
180
484
  }
181
485
  /**
182
- * Main class for profanity detection and filtering.
486
+ * AllProfanity - Professional-grade multilingual profanity detection and filtering library.
487
+ *
488
+ * @class AllProfanity
489
+ * @description A comprehensive, high-performance profanity filtering system supporting 9+ languages
490
+ * with advanced features including leet speak detection, context analysis, multiple matching algorithms,
491
+ * and customizable filtering options.
492
+ *
493
+ * @remarks
494
+ * ### Features:
495
+ * - **Multi-language Support**: English, Hindi, French, German, Spanish, Bengali, Tamil, Telugu, Brazilian Portuguese
496
+ * - **Advanced Algorithms**: Trie, Aho-Corasick, Bloom Filter, and hybrid approaches
497
+ * - **Leet Speak Detection**: Automatically normalizes and detects variations like "h3ll0"
498
+ * - **Context Analysis**: Reduces false positives using surrounding word context
499
+ * - **Performance**: Built-in caching and optimized data structures
500
+ * - **Flexible**: Custom dictionaries, whitelisting, severity levels
501
+ *
502
+ * ### Default Behavior:
503
+ * - Loads English and Hindi dictionaries by default
504
+ * - Case-insensitive matching
505
+ * - Leet speak detection enabled
506
+ * - Uses Trie algorithm (fastest for most cases)
507
+ *
508
+ * @example
509
+ * ```typescript
510
+ * // Basic usage with default instance
511
+ * import allProfanity from 'allprofanity';
512
+ *
513
+ * const result = allProfanity.detect("This is some bad text");
514
+ * console.log(result.hasProfanity); // true
515
+ * console.log(result.cleanedText); // "This is some *** text"
516
+ * console.log(result.severity); // ProfanitySeverity.MILD
517
+ * ```
518
+ *
519
+ * @example
520
+ * ```typescript
521
+ * // Advanced usage with custom configuration
522
+ * import { AllProfanity, ProfanitySeverity } from 'allprofanity';
523
+ *
524
+ * const filter = new AllProfanity({
525
+ * languages: ['english', 'french', 'spanish'],
526
+ * enableLeetSpeak: true,
527
+ * strictMode: true,
528
+ * algorithm: {
529
+ * matching: 'hybrid',
530
+ * useBloomFilter: true
531
+ * },
532
+ * performance: {
533
+ * enableCaching: true,
534
+ * cacheSize: 500
535
+ * },
536
+ * whitelistWords: ['class', 'assignment']
537
+ * });
538
+ *
539
+ * const text = "This text has some b@d w0rds";
540
+ * const result = filter.detect(text);
541
+ *
542
+ * if (result.hasProfanity) {
543
+ * console.log(`Found ${result.detectedWords.length} profane words`);
544
+ * console.log(`Severity: ${ProfanitySeverity[result.severity]}`);
545
+ * console.log(`Cleaned: ${result.cleanedText}`);
546
+ * }
547
+ * ```
548
+ *
549
+ * @example
550
+ * ```typescript
551
+ * // Using individual methods
552
+ * const filter = new AllProfanity();
553
+ *
554
+ * // Simple check
555
+ * if (filter.check("some text")) {
556
+ * console.log("Contains profanity!");
557
+ * }
558
+ *
559
+ * // Clean with custom placeholder
560
+ * const cleaned = filter.clean("bad words here", "#");
561
+ *
562
+ * // Load additional languages
563
+ * filter.loadLanguage('german');
564
+ * filter.loadIndianLanguages(); // Loads hindi, bengali, tamil, telugu
565
+ *
566
+ * // Add custom words
567
+ * filter.add(['customword1', 'customword2']);
568
+ *
569
+ * // Remove words
570
+ * filter.remove(['someword']);
571
+ *
572
+ * // Whitelist words
573
+ * filter.addToWhitelist(['class', 'assignment']);
574
+ * ```
575
+ *
576
+ * @see {@link AllProfanityOptions} for all configuration options
577
+ * @see {@link ProfanityDetectionResult} for detection result format
578
+ * @see {@link ProfanitySeverity} for severity levels
183
579
  */
184
580
  export class AllProfanity {
185
581
  /**
186
- * Create an AllProfanity instance.
187
- * @param options - Profanity filter configuration options.
582
+ * Creates a new AllProfanity instance with the specified configuration.
583
+ *
584
+ * @constructor
585
+ * @param {AllProfanityOptions} [options] - Configuration options for profanity detection behavior
586
+ *
587
+ * @remarks
588
+ * ### Default Initialization:
589
+ * - Loads English and Hindi dictionaries automatically
590
+ * - Enables leet speak detection
591
+ * - Case-insensitive matching
592
+ * - Uses Trie algorithm for pattern matching
593
+ *
594
+ * ### Performance Considerations:
595
+ * - Initial load time depends on number of languages loaded
596
+ * - Aho-Corasick automaton (if enabled) is built during construction
597
+ * - Bloom Filter (if enabled) is populated during construction
598
+ *
599
+ * @throws {TypeError} If invalid options are provided
600
+ *
601
+ * @example
602
+ * ```typescript
603
+ * // Default instance
604
+ * const filter = new AllProfanity();
605
+ *
606
+ * // Custom configuration
607
+ * const filter = new AllProfanity({
608
+ * languages: ['english', 'french'],
609
+ * strictMode: true,
610
+ * defaultPlaceholder: '#',
611
+ * algorithm: { matching: 'hybrid' }
612
+ * });
613
+ *
614
+ * // Silent mode (no logging)
615
+ * const filter = new AllProfanity({ silent: true });
616
+ * ```
617
+ *
618
+ * @see {@link AllProfanityOptions} for all available configuration options
188
619
  */
189
620
  constructor(options) {
190
- var _a, _b, _c, _d, _e;
621
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p, _q, _r;
191
622
  this.profanityTrie = new TrieNode();
192
623
  this.whitelistSet = new Set();
193
624
  this.loadedLanguages = new Set();
@@ -196,6 +627,11 @@ export class AllProfanity {
196
627
  this.caseSensitive = false;
197
628
  this.strictMode = false;
198
629
  this.detectPartialWords = false;
630
+ this.evasionUnicode = true;
631
+ this.evasionRepeatedChars = true;
632
+ this.evasionMaskedChars = true;
633
+ this.evasionSeparatedLetters = true;
634
+ this.evasionEmbeddedWords = true;
199
635
  this.availableLanguages = {
200
636
  english: englishBadWords || [],
201
637
  hindi: hindiBadWords || [],
@@ -233,7 +669,6 @@ export class AllProfanity {
233
669
  ["¿", "j"],
234
670
  ["|<", "k"],
235
671
  ["1<", "k"],
236
- ["7", "l"],
237
672
  ["|\\/|", "m"],
238
673
  ["/\\/\\", "m"],
239
674
  ["|\\|", "n"],
@@ -247,13 +682,11 @@ export class AllProfanity {
247
682
  ["12", "r"],
248
683
  ["5", "s"],
249
684
  ["$", "s"],
250
- ["z", "s"],
251
685
  ["7", "t"],
252
686
  ["+", "t"],
253
687
  ["†", "t"],
254
688
  ["|_|", "u"],
255
689
  ["(_)", "u"],
256
- ["v", "u"],
257
690
  ["\\/", "v"],
258
691
  ["|/", "v"],
259
692
  ["\\/\\/", "w"],
@@ -261,7 +694,6 @@ export class AllProfanity {
261
694
  ["><", "x"],
262
695
  ["}{", "x"],
263
696
  ["`/", "y"],
264
- ["j", "y"],
265
697
  ["2", "z"],
266
698
  ["7_", "z"],
267
699
  ]);
@@ -270,9 +702,13 @@ export class AllProfanity {
270
702
  this.ahoCorasickAutomaton = null;
271
703
  this.bloomFilter = null;
272
704
  this.contextAnalyzer = null;
705
+ this.contextScoreThreshold = 0.5;
273
706
  this.matchingAlgorithm = "trie";
274
707
  this.resultCache = null;
275
- this.logger = (options === null || options === void 0 ? void 0 : options.logger) || new ConsoleLogger();
708
+ this.cacheMaxSize = 1000;
709
+ this.leetTokensByFirstChar = null;
710
+ // Use silent logger if silent mode is enabled, otherwise use provided logger or console logger
711
+ this.logger = (options === null || options === void 0 ? void 0 : options.logger) || ((options === null || options === void 0 ? void 0 : options.silent) ? new SilentLogger() : new ConsoleLogger());
276
712
  if ((options === null || options === void 0 ? void 0 : options.defaultPlaceholder) !== undefined) {
277
713
  this.setPlaceholder(options.defaultPlaceholder);
278
714
  }
@@ -280,6 +716,15 @@ export class AllProfanity {
280
716
  this.caseSensitive = (_b = options === null || options === void 0 ? void 0 : options.caseSensitive) !== null && _b !== void 0 ? _b : false;
281
717
  this.strictMode = (_c = options === null || options === void 0 ? void 0 : options.strictMode) !== null && _c !== void 0 ? _c : false;
282
718
  this.detectPartialWords = (_d = options === null || options === void 0 ? void 0 : options.detectPartialWords) !== null && _d !== void 0 ? _d : false;
719
+ this.evasionUnicode = (_f = (_e = options === null || options === void 0 ? void 0 : options.evasionProtection) === null || _e === void 0 ? void 0 : _e.unicode) !== null && _f !== void 0 ? _f : true;
720
+ this.evasionRepeatedChars =
721
+ (_h = (_g = options === null || options === void 0 ? void 0 : options.evasionProtection) === null || _g === void 0 ? void 0 : _g.repeatedCharacters) !== null && _h !== void 0 ? _h : true;
722
+ this.evasionMaskedChars =
723
+ (_k = (_j = options === null || options === void 0 ? void 0 : options.evasionProtection) === null || _j === void 0 ? void 0 : _j.maskedCharacters) !== null && _k !== void 0 ? _k : true;
724
+ this.evasionSeparatedLetters =
725
+ (_m = (_l = options === null || options === void 0 ? void 0 : options.evasionProtection) === null || _l === void 0 ? void 0 : _l.separatedLetters) !== null && _m !== void 0 ? _m : true;
726
+ this.evasionEmbeddedWords =
727
+ (_p = (_o = options === null || options === void 0 ? void 0 : options.evasionProtection) === null || _o === void 0 ? void 0 : _o.embeddedWords) !== null && _p !== void 0 ? _p : true;
283
728
  if (options === null || options === void 0 ? void 0 : options.whitelistWords) {
284
729
  this.addToWhitelist(options.whitelistWords);
285
730
  }
@@ -288,7 +733,7 @@ export class AllProfanity {
288
733
  this.initializeAdvancedAlgorithms(options);
289
734
  this.loadLanguage("english");
290
735
  this.loadLanguage("hindi");
291
- if ((_e = options === null || options === void 0 ? void 0 : options.languages) === null || _e === void 0 ? void 0 : _e.length) {
736
+ if ((_q = options === null || options === void 0 ? void 0 : options.languages) === null || _q === void 0 ? void 0 : _q.length) {
292
737
  options.languages.forEach((lang) => this.loadLanguage(lang));
293
738
  }
294
739
  if (options === null || options === void 0 ? void 0 : options.customDictionaries) {
@@ -296,12 +741,15 @@ export class AllProfanity {
296
741
  this.loadCustomDictionary(name, words);
297
742
  });
298
743
  }
744
+ if (((_r = options === null || options === void 0 ? void 0 : options.ahoCorasick) === null || _r === void 0 ? void 0 : _r.prebuild) && this.ahoCorasickAutomaton) {
745
+ this.ahoCorasickAutomaton.build();
746
+ }
299
747
  }
300
748
  /**
301
749
  * Initialize advanced algorithms based on configuration
302
750
  */
303
751
  initializeAdvancedAlgorithms(options) {
304
- var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m;
752
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o;
305
753
  // Set matching algorithm
306
754
  if ((_a = options === null || options === void 0 ? void 0 : options.algorithm) === null || _a === void 0 ? void 0 : _a.matching) {
307
755
  this.matchingAlgorithm = options.algorithm.matching;
@@ -334,38 +782,362 @@ export class AllProfanity {
334
782
  if ((_l = options === null || options === void 0 ? void 0 : options.contextAnalysis) === null || _l === void 0 ? void 0 : _l.contextWindow) {
335
783
  this.contextAnalyzer.setContextWindow(options.contextAnalysis.contextWindow);
336
784
  }
785
+ if (((_m = options === null || options === void 0 ? void 0 : options.contextAnalysis) === null || _m === void 0 ? void 0 : _m.scoreThreshold) !== undefined) {
786
+ this.contextScoreThreshold = options.contextAnalysis.scoreThreshold;
787
+ }
337
788
  this.logger.info(`Context Analyzer initialized for languages: ${contextLanguages.join(", ")}`);
338
789
  }
339
790
  // Initialize result cache if enabled
340
- if ((_m = options === null || options === void 0 ? void 0 : options.performance) === null || _m === void 0 ? void 0 : _m.enableCaching) {
341
- const cacheSize = options.performance.cacheSize || 1000;
791
+ if ((_o = options === null || options === void 0 ? void 0 : options.performance) === null || _o === void 0 ? void 0 : _o.enableCaching) {
792
+ this.cacheMaxSize = options.performance.cacheSize || 1000;
342
793
  this.resultCache = new Map();
343
- this.logger.info(`Result caching enabled with size limit: ${cacheSize}`);
794
+ this.logger.info(`Result caching enabled with size limit: ${this.cacheMaxSize}`);
344
795
  }
345
796
  }
346
797
  /**
347
- * Normalize leet speak to regular characters.
348
- * @param text - The input text.
349
- * @returns Normalized text.
798
+ * Normalize leet speak to regular characters, keeping a map from each
799
+ * normalized character back to its source range in the input text.
800
+ *
801
+ * For normalized index i, starts[i]/ends[i] give the [start, end) range in
802
+ * the input that produced that character. A match [s, e) in the normalized
803
+ * string therefore spans [starts[s], ends[e - 1]) in the input. This is what
804
+ * keeps positions correct when length-changing mappings like "ph" -> "f"
805
+ * apply.
350
806
  */
351
- normalizeLeetSpeak(text) {
352
- if (!this.enableLeetSpeak)
353
- return text;
354
- let normalized = text.toLowerCase();
355
- const sortedMappings = Array.from(this.leetMappings.entries()).sort(([leetA], [leetB]) => leetB.length - leetA.length);
356
- for (const [leet, normal] of sortedMappings) {
357
- const regex = new RegExp(this.escapeRegex(leet), "g");
358
- normalized = normalized.replace(regex, normal);
807
+ normalizeLeetSpeakWithMap(text) {
808
+ // Bucket tokens by first character so each position costs one Map lookup
809
+ // instead of a scan over every mapping (longest token first per bucket).
810
+ if (!this.leetTokensByFirstChar) {
811
+ this.leetTokensByFirstChar = new Map();
812
+ for (const entry of this.leetMappings.entries()) {
813
+ const bucket = this.leetTokensByFirstChar.get(entry[0][0]);
814
+ if (bucket) {
815
+ bucket.push(entry);
816
+ }
817
+ else {
818
+ this.leetTokensByFirstChar.set(entry[0][0], [entry]);
819
+ }
820
+ }
821
+ for (const bucket of this.leetTokensByFirstChar.values()) {
822
+ bucket.sort(([leetA], [leetB]) => leetB.length - leetA.length);
823
+ }
824
+ }
825
+ // Fast path: most text contains no leet characters at all. Scan for the
826
+ // first applicable mapping before allocating the position-map arrays.
827
+ let hasLeet = false;
828
+ for (let j = 0; j < text.length && !hasLeet; j++) {
829
+ const bucket = this.leetTokensByFirstChar.get(text[j]);
830
+ if (bucket) {
831
+ for (const [leet] of bucket) {
832
+ if (leet.length === 1 || text.startsWith(leet, j)) {
833
+ hasLeet = true;
834
+ break;
835
+ }
836
+ }
837
+ }
838
+ }
839
+ if (!hasLeet) {
840
+ return { normalized: text, starts: [], ends: [] };
841
+ }
842
+ const parts = [];
843
+ const starts = [];
844
+ const ends = [];
845
+ let i = 0;
846
+ while (i < text.length) {
847
+ let consumed = 0;
848
+ let replacement = "";
849
+ const bucket = this.leetTokensByFirstChar.get(text[i]);
850
+ if (bucket) {
851
+ for (const [leet, normal] of bucket) {
852
+ if (leet.length === 1 || text.startsWith(leet, i)) {
853
+ consumed = leet.length;
854
+ replacement = normal;
855
+ break;
856
+ }
857
+ }
858
+ }
859
+ if (consumed === 0) {
860
+ consumed = 1;
861
+ replacement = text[i];
862
+ }
863
+ for (const char of replacement) {
864
+ parts.push(char);
865
+ starts.push(i);
866
+ ends.push(i + consumed);
867
+ }
868
+ i += consumed;
869
+ }
870
+ return { normalized: parts.join(""), starts, ends };
871
+ }
872
+ /**
873
+ * Fold unicode evasion tactics into ASCII with a position map: fullwidth
874
+ * forms, Cyrillic/Greek homoglyphs, Latin diacritics, and invisible
875
+ * characters injected inside words. Non-Latin scripts (Devanagari, Tamil,
876
+ * etc.) pass through untouched. Returns null when nothing changed.
877
+ */
878
+ unicodeNormalizeWithMap(text) {
879
+ // Fast path: pure ASCII text needs no folding
880
+ let needsScan = false;
881
+ for (let j = 0; j < text.length; j++) {
882
+ if (text.charCodeAt(j) > 127) {
883
+ needsScan = true;
884
+ break;
885
+ }
886
+ }
887
+ if (!needsScan)
888
+ return null;
889
+ const parts = [];
890
+ const starts = [];
891
+ const ends = [];
892
+ let changed = false;
893
+ for (let i = 0; i < text.length; i++) {
894
+ const char = text[i];
895
+ const code = text.charCodeAt(i);
896
+ if (code < 128) {
897
+ parts.push(char);
898
+ starts.push(i);
899
+ ends.push(i + 1);
900
+ continue;
901
+ }
902
+ if (INVISIBLE_CHARS.has(char)) {
903
+ changed = true;
904
+ continue;
905
+ }
906
+ // Fullwidth ASCII block (! U+FF01 .. ~ U+FF5E)
907
+ if (code >= 0xff01 && code <= 0xff5e) {
908
+ parts.push(String.fromCharCode(code - 0xfee0));
909
+ starts.push(i);
910
+ ends.push(i + 1);
911
+ changed = true;
912
+ continue;
913
+ }
914
+ const confusable = CONFUSABLES.get(char);
915
+ if (confusable) {
916
+ parts.push(confusable);
917
+ starts.push(i);
918
+ ends.push(i + 1);
919
+ changed = true;
920
+ continue;
921
+ }
922
+ // Bare combining marks (covers decomposed input like "u" + U+0308)
923
+ if (code >= 0x0300 && code <= 0x036f) {
924
+ changed = true;
925
+ continue;
926
+ }
927
+ // Latin letters with diacritics: decompose and strip the marks.
928
+ // Limited to the Latin blocks so other scripts keep their composed forms.
929
+ if (code >= 0x00c0 && code < 0x0250) {
930
+ for (const piece of char.normalize("NFD")) {
931
+ const pieceCode = piece.charCodeAt(0);
932
+ if (pieceCode >= 0x0300 && pieceCode <= 0x036f) {
933
+ changed = true;
934
+ continue;
935
+ }
936
+ const folded = this.caseSensitive ? piece : piece.toLowerCase();
937
+ parts.push(folded);
938
+ starts.push(i);
939
+ ends.push(i + 1);
940
+ if (folded !== char)
941
+ changed = true;
942
+ }
943
+ continue;
944
+ }
945
+ parts.push(char);
946
+ starts.push(i);
947
+ ends.push(i + 1);
948
+ }
949
+ if (!changed)
950
+ return null;
951
+ return { normalized: parts.join(""), starts, ends };
952
+ }
953
+ /**
954
+ * Collapse runs of repeated characters ("fuuuuck" -> "fuck") with a
955
+ * position map. Only triggers when a run of 3+ identical characters
956
+ * exists, so ordinary doubled letters never pay for this pass.
957
+ * Returns null when not triggered.
958
+ */
959
+ collapseRepeatsWithMap(text) {
960
+ let triggered = false;
961
+ for (let j = 2; j < text.length; j++) {
962
+ if (text[j] === text[j - 1] && text[j] === text[j - 2]) {
963
+ triggered = true;
964
+ break;
965
+ }
966
+ }
967
+ if (!triggered)
968
+ return null;
969
+ const parts = [];
970
+ const starts = [];
971
+ const ends = [];
972
+ let i = 0;
973
+ while (i < text.length) {
974
+ let runEnd = i + 1;
975
+ while (runEnd < text.length && text[runEnd] === text[i]) {
976
+ runEnd++;
977
+ }
978
+ parts.push(text[i]);
979
+ starts.push(i);
980
+ ends.push(runEnd);
981
+ i = runEnd;
982
+ }
983
+ return { normalized: parts.join(""), starts, ends };
984
+ }
985
+ /**
986
+ * Build the list of (text, position-map) variants to scan: the base text
987
+ * plus unicode-folded, leet-normalized and repeat-collapsed variants, each
988
+ * included only when its normalization actually changed something.
989
+ */
990
+ buildScanPasses(normalizedText) {
991
+ const passes = [
992
+ { text: normalizedText },
993
+ ];
994
+ let workText = normalizedText;
995
+ let workMap;
996
+ if (this.evasionUnicode) {
997
+ const uni = this.unicodeNormalizeWithMap(normalizedText);
998
+ if (uni) {
999
+ passes.push({ text: uni.normalized, posMap: uni });
1000
+ workText = uni.normalized;
1001
+ workMap = uni;
1002
+ }
1003
+ }
1004
+ if (this.enableLeetSpeak) {
1005
+ const leet = this.normalizeLeetSpeakWithMap(workText);
1006
+ if (leet.normalized !== workText) {
1007
+ passes.push({
1008
+ text: leet.normalized,
1009
+ posMap: workMap ? composeMaps(workMap, leet) : leet,
1010
+ });
1011
+ }
1012
+ }
1013
+ if (this.evasionRepeatedChars) {
1014
+ const collapsed = this.collapseRepeatsWithMap(workText);
1015
+ if (collapsed) {
1016
+ passes.push({
1017
+ text: collapsed.normalized,
1018
+ posMap: workMap ? composeMaps(workMap, collapsed) : collapsed,
1019
+ });
1020
+ }
1021
+ }
1022
+ return passes;
1023
+ }
1024
+ /**
1025
+ * Find dictionary words hidden behind masked characters ("f*ck", "f#ck").
1026
+ * Each mask matches exactly one character and the token's visible letters
1027
+ * must align with a dictionary word, so "c#" or "5% off" never flag.
1028
+ */
1029
+ findMaskedMatches(searchText, originalText) {
1030
+ const results = [];
1031
+ if (!/[*#@$%]/.test(searchText))
1032
+ return results;
1033
+ const tokenRegex = /[\p{L}*#@$%]+/gu;
1034
+ let tokenMatch;
1035
+ while ((tokenMatch = tokenRegex.exec(searchText)) !== null) {
1036
+ const token = tokenMatch[0];
1037
+ let maskCount = 0;
1038
+ for (const char of token) {
1039
+ if (MASK_CHARS.has(char))
1040
+ maskCount++;
1041
+ }
1042
+ if (maskCount === 0 || maskCount > 2)
1043
+ continue;
1044
+ if (MASK_CHARS.has(token[0]) ||
1045
+ MASK_CHARS.has(token[token.length - 1])) {
1046
+ continue;
1047
+ }
1048
+ const word = this.profanityTrie.findWildcardMatch(token, MASK_CHARS);
1049
+ if (!word)
1050
+ continue;
1051
+ const start = tokenMatch.index;
1052
+ const end = start + token.length;
1053
+ if (!this.detectPartialWords &&
1054
+ !this.isWholeWord(originalText, start, end)) {
1055
+ continue;
1056
+ }
1057
+ const matchedText = originalText.substring(start, end);
1058
+ if (this.isWhitelistedMatch(word, matchedText))
1059
+ continue;
1060
+ if (!this.hasWordBoundaries(originalText, start, end))
1061
+ continue;
1062
+ results.push({ word, start, end, originalWord: matchedText });
359
1063
  }
360
- return normalized;
1064
+ return results;
1065
+ }
1066
+ /**
1067
+ * Find words spelled out with a uniform single separator ("f u c k",
1068
+ * "f.u.c.k"). The joined letters must equal a dictionary word exactly:
1069
+ * runs like "U S A" or letters inside spelled-out sentences never flag.
1070
+ */
1071
+ findSeparatedMatches(searchText, originalText) {
1072
+ const results = [];
1073
+ // Single letters joined by one consistent separator, at least 3 letters,
1074
+ // not touching letters/digits on either side.
1075
+ const runRegex = /(?<![\p{L}\p{N}])\p{L}(?:([ ._\-/])\p{L})(?:\1\p{L})+(?![\p{L}\p{N}])/gu;
1076
+ let runMatch;
1077
+ while ((runMatch = runRegex.exec(searchText)) !== null) {
1078
+ const run = runMatch[0];
1079
+ const separator = runMatch[1];
1080
+ const joined = run.split(separator).join("");
1081
+ const trieMatches = this.profanityTrie.findMatches(joined, 0, false);
1082
+ const exact = trieMatches.find((m) => m.end === joined.length);
1083
+ if (!exact)
1084
+ continue;
1085
+ const start = runMatch.index;
1086
+ const end = start + run.length;
1087
+ const matchedText = originalText.substring(start, end);
1088
+ if (this.isWhitelistedMatch(exact.word, joined) ||
1089
+ this.isWhitelistedMatch(exact.word, matchedText)) {
1090
+ continue;
1091
+ }
1092
+ results.push({ word: exact.word, start, end, originalWord: matchedText });
1093
+ }
1094
+ return results;
361
1095
  }
362
1096
  /**
363
- * Escape regex special characters in a string.
364
- * @param str - The string to escape.
365
- * @returns The escaped string.
1097
+ * Find unambiguous profanity stems embedded inside larger tokens
1098
+ * ("sisfuck", "totalshitshow"). Only stems from EMBEDDED_STRONG_STEMS that
1099
+ * are currently in the dictionary are considered, and tokens listed in
1100
+ * EMBEDDED_SAFE_WORDS or the whitelist never flag. The whole containing
1101
+ * token is reported so cleaning masks all of it.
366
1102
  */
367
- escapeRegex(str) {
368
- return str.replace(/[\\^$.*+?()[\]{}|]/g, "\\$&");
1103
+ findEmbeddedMatches(searchText, originalText) {
1104
+ const results = [];
1105
+ for (const stem of EMBEDDED_STRONG_STEMS) {
1106
+ // Respect remove()/clearList(): only flag stems still in the dictionary
1107
+ const exact = this.profanityTrie
1108
+ .findMatches(stem, 0, false)
1109
+ .some((m) => m.end === stem.length);
1110
+ if (!exact)
1111
+ continue;
1112
+ let index = searchText.indexOf(stem);
1113
+ while (index !== -1) {
1114
+ // Expand to the containing token
1115
+ let tokenStart = index;
1116
+ let tokenEnd = index + stem.length;
1117
+ while (tokenStart > 0 && /\w/.test(searchText[tokenStart - 1])) {
1118
+ tokenStart--;
1119
+ }
1120
+ while (tokenEnd < searchText.length &&
1121
+ /\w/.test(searchText[tokenEnd])) {
1122
+ tokenEnd++;
1123
+ }
1124
+ const token = searchText.substring(tokenStart, tokenEnd);
1125
+ const isEmbedded = token !== stem; // exact tokens are the base pass's job
1126
+ if (isEmbedded &&
1127
+ !EMBEDDED_SAFE_WORDS.has(token.toLowerCase()) &&
1128
+ !this.isWhitelisted(token) &&
1129
+ !this.isWhitelistedMatch(stem, token)) {
1130
+ results.push({
1131
+ word: stem,
1132
+ start: tokenStart,
1133
+ end: tokenEnd,
1134
+ originalWord: originalText.substring(tokenStart, tokenEnd),
1135
+ });
1136
+ }
1137
+ index = searchText.indexOf(stem, tokenEnd);
1138
+ }
1139
+ }
1140
+ return results;
369
1141
  }
370
1142
  /**
371
1143
  * Check if a match is bounded by word boundaries (strict mode).
@@ -411,6 +1183,27 @@ export class AllProfanity {
411
1183
  this.whitelistSet.has(matchedText.toLowerCase()));
412
1184
  }
413
1185
  }
1186
+ /**
1187
+ * In partial-word mode, check whether the word CONTAINING the match is
1188
+ * whitelisted: with "classic" whitelisted, the embedded "ass" must not flag.
1189
+ */
1190
+ isWhitelistedContainingWord(originalText, start, end) {
1191
+ if (!this.detectPartialWords || this.whitelistSet.size === 0) {
1192
+ return false;
1193
+ }
1194
+ let tokenStart = start;
1195
+ let tokenEnd = end;
1196
+ while (tokenStart > 0 && /\w/.test(originalText[tokenStart - 1])) {
1197
+ tokenStart--;
1198
+ }
1199
+ while (tokenEnd < originalText.length && /\w/.test(originalText[tokenEnd])) {
1200
+ tokenEnd++;
1201
+ }
1202
+ if (tokenStart === start && tokenEnd === end) {
1203
+ return false; // match is the whole token; already covered by isWhitelistedMatch
1204
+ }
1205
+ return this.isWhitelisted(originalText.substring(tokenStart, tokenEnd));
1206
+ }
414
1207
  /**
415
1208
  * Remove overlapping matches, keeping only the longest at each start position.
416
1209
  * @param matches - Array of match results.
@@ -435,26 +1228,31 @@ export class AllProfanity {
435
1228
  /**
436
1229
  * Use Aho-Corasick algorithm for pattern matching
437
1230
  */
438
- findMatchesWithAhoCorasick(searchText, originalText) {
1231
+ findMatchesWithAhoCorasick(searchText, originalText, posMap) {
439
1232
  if (!this.ahoCorasickAutomaton) {
440
1233
  return [];
441
1234
  }
442
1235
  const ahoMatches = this.ahoCorasickAutomaton.findAll(searchText);
443
1236
  const results = [];
444
1237
  for (const match of ahoMatches) {
1238
+ const start = posMap ? posMap.starts[match.start] : match.start;
1239
+ const end = posMap ? posMap.ends[match.end - 1] : match.end;
445
1240
  if (!this.detectPartialWords &&
446
- !this.isWholeWord(originalText, match.start, match.end)) {
1241
+ !this.isWholeWord(originalText, start, end)) {
447
1242
  continue;
448
1243
  }
449
- const matchedText = originalText.substring(match.start, match.end);
1244
+ const matchedText = originalText.substring(start, end);
450
1245
  if (this.isWhitelistedMatch(match.pattern, matchedText)) {
451
1246
  continue;
452
1247
  }
453
- if (this.hasWordBoundaries(originalText, match.start, match.end)) {
1248
+ if (this.isWhitelistedContainingWord(originalText, start, end)) {
1249
+ continue;
1250
+ }
1251
+ if (this.hasWordBoundaries(originalText, start, end)) {
454
1252
  results.push({
455
1253
  word: match.pattern,
456
- start: match.start,
457
- end: match.end,
1254
+ start,
1255
+ end,
458
1256
  originalWord: matchedText,
459
1257
  });
460
1258
  }
@@ -462,25 +1260,38 @@ export class AllProfanity {
462
1260
  return results;
463
1261
  }
464
1262
  /**
465
- * Hybrid approach: Aho-Corasick for fast matching, Bloom Filter for validation
1263
+ * Check whether the Bloom Filter can quickly rule out any profanity in the
1264
+ * text. Only safe for ASCII whole-word matching: partial matches and
1265
+ * non-ASCII scripts can match inside tokens, so they bypass the prefilter.
466
1266
  */
467
- findMatchesHybrid(searchText, originalText) {
1267
+ bloomQuickReject(searchText) {
1268
+ if (!this.bloomFilter || this.detectPartialWords)
1269
+ return false;
1270
+ // eslint-disable-next-line no-control-regex
1271
+ if (!/^[\x00-\x7F]*$/.test(searchText))
1272
+ return false;
1273
+ const tokens = searchText.split(/[^\p{L}\p{N}]+/u);
1274
+ for (const token of tokens) {
1275
+ if (token.length > 0 && this.bloomFilter.mightContain(token)) {
1276
+ return false;
1277
+ }
1278
+ }
1279
+ return true;
1280
+ }
1281
+ /**
1282
+ * Hybrid approach: Bloom Filter for quick rejection, Aho-Corasick for matching
1283
+ */
1284
+ findMatchesHybrid(searchText, originalText, posMap) {
1285
+ if (this.bloomQuickReject(searchText)) {
1286
+ return [];
1287
+ }
468
1288
  // Use Aho-Corasick for primary matching if available
469
1289
  if (this.ahoCorasickAutomaton) {
470
- const matches = this.findMatchesWithAhoCorasick(searchText, originalText);
471
- // If Bloom Filter is enabled, validate matches
472
- if (this.bloomFilter) {
473
- return matches.filter((match) => this.bloomFilter.mightContain(match.word));
474
- }
475
- return matches;
1290
+ return this.findMatchesWithAhoCorasick(searchText, originalText, posMap);
476
1291
  }
477
1292
  // Fallback to Trie if Aho-Corasick not available
478
1293
  const matches = [];
479
- this.findMatches(searchText, originalText, matches);
480
- // Validate with Bloom Filter if enabled
481
- if (this.bloomFilter) {
482
- return matches.filter((match) => this.bloomFilter.mightContain(match.word));
483
- }
1294
+ this.findMatches(searchText, originalText, matches, posMap);
484
1295
  return matches;
485
1296
  }
486
1297
  /**
@@ -497,66 +1308,117 @@ export class AllProfanity {
497
1308
  });
498
1309
  }
499
1310
  /**
500
- * Detect profanity in a given text.
501
- * @param text - The text to check.
502
- * @returns Profanity detection result.
1311
+ * Drop all cached detection results. Must be called whenever the word lists
1312
+ * or any option that affects detection output changes.
503
1313
  */
504
- detect(text) {
1314
+ invalidateCache() {
505
1315
  var _a;
1316
+ (_a = this.resultCache) === null || _a === void 0 ? void 0 : _a.clear();
1317
+ }
1318
+ /**
1319
+ * Detects profanity in the provided text and returns comprehensive analysis.
1320
+ *
1321
+ * @param {string} text - The text to analyze for profanity
1322
+ * @returns {ProfanityDetectionResult} Detailed detection result including matches, positions, severity, and cleaned text
1323
+ *
1324
+ * @throws {TypeError} If text is not a string
1325
+ *
1326
+ * @remarks
1327
+ * ### Performance:
1328
+ * - Time Complexity: O(n*m) where n is text length, m is average word length in dictionary
1329
+ * - With Bloom Filter: O(n) average case (faster early rejection)
1330
+ * - With Caching: O(1) for repeated identical text
1331
+ *
1332
+ * ### Features:
1333
+ * - Detects leet speak variations (if enabled): "h3ll0" → "hello"
1334
+ * - Respects word boundaries (strict mode) or detects partial matches
1335
+ * - Returns exact positions for highlighting/masking
1336
+ * - Calculates severity based on match count and uniqueness
1337
+ *
1338
+ * ### Caching:
1339
+ * - Results are cached if `performance.enableCaching` is true
1340
+ * - Cache uses LRU eviction when size limit is reached
1341
+ *
1342
+ * @example
1343
+ * ```typescript
1344
+ * const filter = new AllProfanity();
1345
+ * const result = filter.detect("This has bad words");
1346
+ *
1347
+ * console.log(result.hasProfanity); // true
1348
+ * console.log(result.detectedWords); // ['bad']
1349
+ * console.log(result.cleanedText); // 'This has *** words'
1350
+ * console.log(result.severity); // ProfanitySeverity.MILD
1351
+ * console.log(result.positions); // [{ word: 'bad', start: 9, end: 12 }]
1352
+ * ```
1353
+ *
1354
+ * @example
1355
+ * ```typescript
1356
+ * // With leet speak detection
1357
+ * const filter = new AllProfanity({ enableLeetSpeak: true });
1358
+ * const result = filter.detect("st0p b3ing b@d");
1359
+ *
1360
+ * if (result.hasProfanity) {
1361
+ * result.positions.forEach(pos => {
1362
+ * console.log(`Found "${pos.word}" at position ${pos.start}-${pos.end}`);
1363
+ * });
1364
+ * }
1365
+ * ```
1366
+ *
1367
+ * @see {@link ProfanityDetectionResult} for result structure
1368
+ * @see {@link ProfanitySeverity} for severity levels
1369
+ */
1370
+ detect(text) {
506
1371
  const validatedText = validateString(text, "text");
507
1372
  if (validatedText.length === 0) {
508
1373
  return {
509
1374
  hasProfanity: false,
510
1375
  detectedWords: [],
511
1376
  cleanedText: validatedText,
512
- severity: ProfanitySeverity.MILD,
1377
+ severity: ProfanitySeverity.NONE,
513
1378
  positions: [],
514
1379
  };
515
1380
  }
516
- // Check cache first if enabled
517
- if ((_a = this.resultCache) === null || _a === void 0 ? void 0 : _a.has(validatedText)) {
518
- return this.resultCache.get(validatedText);
1381
+ // Check cache first if enabled (refresh recency for LRU eviction)
1382
+ if (this.resultCache) {
1383
+ const cached = this.resultCache.get(validatedText);
1384
+ if (cached) {
1385
+ this.resultCache.delete(validatedText);
1386
+ this.resultCache.set(validatedText, cached);
1387
+ return cached;
1388
+ }
519
1389
  }
520
1390
  let matches = [];
521
1391
  const normalizedText = this.caseSensitive
522
1392
  ? validatedText
523
1393
  : validatedText.toLowerCase();
524
- // Choose matching algorithm based on configuration
525
- switch (this.matchingAlgorithm) {
526
- case "aho-corasick":
527
- matches = this.findMatchesWithAhoCorasick(normalizedText, validatedText);
528
- if (this.enableLeetSpeak) {
529
- const leetNormalized = this.normalizeLeetSpeak(normalizedText);
530
- if (leetNormalized !== normalizedText) {
531
- const leetMatches = this.findMatchesWithAhoCorasick(leetNormalized, validatedText);
532
- matches.push(...leetMatches);
533
- }
534
- }
535
- break;
536
- case "hybrid":
537
- matches = this.findMatchesHybrid(normalizedText, validatedText);
538
- if (this.enableLeetSpeak) {
539
- const leetNormalized = this.normalizeLeetSpeak(normalizedText);
540
- if (leetNormalized !== normalizedText) {
541
- const leetMatches = this.findMatchesHybrid(leetNormalized, validatedText);
542
- matches.push(...leetMatches);
543
- }
544
- }
545
- break;
546
- case "trie":
547
- default:
548
- this.findMatches(normalizedText, validatedText, matches);
549
- if (this.enableLeetSpeak) {
550
- const leetNormalized = this.normalizeLeetSpeak(normalizedText);
551
- if (leetNormalized !== normalizedText) {
552
- this.findMatches(leetNormalized, validatedText, matches);
553
- }
554
- }
555
- break;
1394
+ // Scan the base text plus every triggered normalization variant
1395
+ // (unicode folding, leet speak, repeated-character collapse)
1396
+ for (const pass of this.buildScanPasses(normalizedText)) {
1397
+ switch (this.matchingAlgorithm) {
1398
+ case "aho-corasick":
1399
+ matches.push(...this.findMatchesWithAhoCorasick(pass.text, validatedText, pass.posMap));
1400
+ break;
1401
+ case "hybrid":
1402
+ matches.push(...this.findMatchesHybrid(pass.text, validatedText, pass.posMap));
1403
+ break;
1404
+ case "trie":
1405
+ default:
1406
+ this.findMatches(pass.text, validatedText, matches, pass.posMap);
1407
+ break;
1408
+ }
1409
+ }
1410
+ if (this.evasionMaskedChars) {
1411
+ matches.push(...this.findMaskedMatches(normalizedText, validatedText));
1412
+ }
1413
+ if (this.evasionSeparatedLetters) {
1414
+ matches.push(...this.findSeparatedMatches(normalizedText, validatedText));
1415
+ }
1416
+ if (this.evasionEmbeddedWords) {
1417
+ matches.push(...this.findEmbeddedMatches(normalizedText, validatedText));
556
1418
  }
557
1419
  // Apply context analysis if enabled
558
1420
  if (this.contextAnalyzer) {
559
- matches = this.applyContextAnalysis(validatedText, matches);
1421
+ matches = this.applyContextAnalysis(validatedText, matches, this.contextScoreThreshold);
560
1422
  }
561
1423
  const uniqueMatches = this.deduplicateMatches(matches);
562
1424
  const detectedWords = uniqueMatches.map((m) => m.originalWord);
@@ -573,14 +1435,15 @@ export class AllProfanity {
573
1435
  end: m.end,
574
1436
  })),
575
1437
  };
576
- // Cache result if caching is enabled
1438
+ // Cache result if caching is enabled (evict least recently used entry)
577
1439
  if (this.resultCache) {
578
- this.resultCache.set(validatedText, result);
579
- // Implement simple LRU by clearing cache when it gets too large
580
- if (this.resultCache.size > 1000) {
581
- const firstKey = this.resultCache.keys().next().value;
582
- this.resultCache.delete(firstKey);
1440
+ if (this.resultCache.size >= this.cacheMaxSize) {
1441
+ const oldestKey = this.resultCache.keys().next().value;
1442
+ if (oldestKey !== undefined) {
1443
+ this.resultCache.delete(oldestKey);
1444
+ }
583
1445
  }
1446
+ this.resultCache.set(validatedText, result);
584
1447
  }
585
1448
  return result;
586
1449
  }
@@ -590,12 +1453,14 @@ export class AllProfanity {
590
1453
  * @param originalText - The original text.
591
1454
  * @param matches - Array to collect matches.
592
1455
  */
593
- findMatches(searchText, originalText, matches) {
1456
+ findMatches(searchText, originalText, matches, posMap) {
594
1457
  for (let i = 0; i < searchText.length; i++) {
595
1458
  const matchResults = this.profanityTrie.findMatches(searchText, i, this.detectPartialWords);
596
1459
  for (const match of matchResults) {
597
- const start = i + match.start;
598
- const end = i + match.end;
1460
+ const searchStart = i + match.start;
1461
+ const searchEnd = i + match.end;
1462
+ const start = posMap ? posMap.starts[searchStart] : searchStart;
1463
+ const end = posMap ? posMap.ends[searchEnd - 1] : searchEnd;
599
1464
  if (!this.detectPartialWords &&
600
1465
  !this.isWholeWord(originalText, start, end)) {
601
1466
  continue;
@@ -604,6 +1469,9 @@ export class AllProfanity {
604
1469
  if (this.isWhitelistedMatch(match.word, matchedText)) {
605
1470
  continue;
606
1471
  }
1472
+ if (this.isWhitelistedContainingWord(originalText, start, end)) {
1473
+ continue;
1474
+ }
607
1475
  if (this.hasWordBoundaries(originalText, start, end)) {
608
1476
  matches.push({
609
1477
  word: match.word,
@@ -636,18 +1504,149 @@ export class AllProfanity {
636
1504
  return result;
637
1505
  }
638
1506
  /**
639
- * Check if a string contains profanity.
640
- * @param text - The text to check.
641
- * @returns True if profanity is found, false otherwise.
1507
+ * Quick boolean check for profanity presence in text.
1508
+ *
1509
+ * @param {string} text - The text to check for profanity
1510
+ * @returns {boolean} True if profanity is detected, false otherwise
1511
+ *
1512
+ * @throws {TypeError} If text is not a string
1513
+ *
1514
+ * @remarks
1515
+ * - Convenience method that internally calls `detect()` and returns only the boolean result
1516
+ * - For detailed information about matches, use `detect()` instead
1517
+ * - Results are cached if caching is enabled (same cache as `detect()`)
1518
+ *
1519
+ * @example
1520
+ * ```typescript
1521
+ * const filter = new AllProfanity();
1522
+ *
1523
+ * if (filter.check("This has bad words")) {
1524
+ * console.log("Profanity detected!");
1525
+ * }
1526
+ *
1527
+ * // Quick validation
1528
+ * const isClean = !filter.check(userInput);
1529
+ * ```
1530
+ *
1531
+ * @see {@link detect} for detailed profanity analysis
642
1532
  */
643
1533
  check(text) {
644
- return this.detect(text).hasProfanity;
1534
+ const validatedText = validateString(text, "text");
1535
+ if (validatedText.length === 0)
1536
+ return false;
1537
+ // Reuse a cached full result when available
1538
+ if (this.resultCache) {
1539
+ const cached = this.resultCache.get(validatedText);
1540
+ if (cached)
1541
+ return cached.hasProfanity;
1542
+ }
1543
+ // Context analysis scores matches against their surroundings; reuse the
1544
+ // full pipeline so check() and detect() can never disagree.
1545
+ if (this.contextAnalyzer) {
1546
+ return this.detect(validatedText).hasProfanity;
1547
+ }
1548
+ const normalizedText = this.caseSensitive
1549
+ ? validatedText
1550
+ : validatedText.toLowerCase();
1551
+ // Early exit on the first accepted match — unlike detect(), no positions,
1552
+ // severity or cleaned text are computed. The base text is scanned before
1553
+ // any normalization variants are built, so plainly profane text returns
1554
+ // without paying for normalization at all.
1555
+ if (this.hasMatchInPass(normalizedText, validatedText)) {
1556
+ return true;
1557
+ }
1558
+ const passes = this.buildScanPasses(normalizedText);
1559
+ for (let p = 1; p < passes.length; p++) {
1560
+ if (this.hasMatchInPass(passes[p].text, validatedText, passes[p].posMap)) {
1561
+ return true;
1562
+ }
1563
+ }
1564
+ if (this.evasionMaskedChars &&
1565
+ this.findMaskedMatches(normalizedText, validatedText).length > 0) {
1566
+ return true;
1567
+ }
1568
+ if (this.evasionSeparatedLetters &&
1569
+ this.findSeparatedMatches(normalizedText, validatedText).length > 0) {
1570
+ return true;
1571
+ }
1572
+ if (this.evasionEmbeddedWords &&
1573
+ this.findEmbeddedMatches(normalizedText, validatedText).length > 0) {
1574
+ return true;
1575
+ }
1576
+ return false;
645
1577
  }
646
1578
  /**
647
- * Clean text with a custom placeholder.
648
- * @param text - The text to clean.
649
- * @param placeholder - The placeholder to use.
650
- * @returns Cleaned text.
1579
+ * Trie scan that stops at the first match surviving the whole-word,
1580
+ * whitelist and boundary checks. Powers the fast path in check().
1581
+ */
1582
+ hasMatchInPass(searchText, originalText, posMap) {
1583
+ for (let i = 0; i < searchText.length; i++) {
1584
+ const matchResults = this.profanityTrie.findMatches(searchText, i, this.detectPartialWords);
1585
+ for (const match of matchResults) {
1586
+ const searchEnd = i + match.end;
1587
+ const start = posMap ? posMap.starts[i] : i;
1588
+ const end = posMap ? posMap.ends[searchEnd - 1] : searchEnd;
1589
+ if (!this.detectPartialWords &&
1590
+ !this.isWholeWord(originalText, start, end)) {
1591
+ continue;
1592
+ }
1593
+ const matchedText = originalText.substring(start, end);
1594
+ if (this.isWhitelistedMatch(match.word, matchedText)) {
1595
+ continue;
1596
+ }
1597
+ if (this.isWhitelistedContainingWord(originalText, start, end)) {
1598
+ continue;
1599
+ }
1600
+ if (this.hasWordBoundaries(originalText, start, end)) {
1601
+ return true;
1602
+ }
1603
+ }
1604
+ }
1605
+ return false;
1606
+ }
1607
+ /**
1608
+ * Cleans text by replacing profanity with a placeholder character.
1609
+ *
1610
+ * @param {string} text - The text to clean
1611
+ * @param {string} [placeholder] - Optional custom placeholder character (uses default if not provided)
1612
+ * @returns {string} The cleaned text with profanity replaced
1613
+ *
1614
+ * @throws {TypeError} If text is not a string
1615
+ *
1616
+ * @remarks
1617
+ * ### Character-level Replacement:
1618
+ * - Each profane character is replaced individually
1619
+ * - "bad" with placeholder "*" becomes "***"
1620
+ * - Preserves text length and structure
1621
+ *
1622
+ * ### Placeholder Behavior:
1623
+ * - If no placeholder provided, uses the instance's default placeholder
1624
+ * - If placeholder provided, uses only the first character
1625
+ * - Empty placeholder throws error
1626
+ *
1627
+ * @example
1628
+ * ```typescript
1629
+ * const filter = new AllProfanity();
1630
+ *
1631
+ * // Using default placeholder (*)
1632
+ * const cleaned = filter.clean("This has bad words");
1633
+ * console.log(cleaned); // "This has *** *****"
1634
+ *
1635
+ * // Using custom placeholder
1636
+ * const cleaned = filter.clean("This has bad words", "#");
1637
+ * console.log(cleaned); // "This has ### #####"
1638
+ * ```
1639
+ *
1640
+ * @example
1641
+ * ```typescript
1642
+ * // Clean user-generated content for display
1643
+ * const userComment = "Some inappropriate words here";
1644
+ * const safeComment = filter.clean(userComment);
1645
+ * displayComment(safeComment);
1646
+ * ```
1647
+ *
1648
+ * @see {@link cleanWithPlaceholder} for word-level replacement
1649
+ * @see {@link setPlaceholder} to change default placeholder
651
1650
  */
652
1651
  clean(text, placeholder) {
653
1652
  const detection = this.detect(text);
@@ -663,9 +1662,10 @@ export class AllProfanity {
663
1662
  originalWord: text.substring(p.start, p.end),
664
1663
  }))),
665
1664
  ].sort((a, b) => b.start - a.start);
1665
+ const placeholderChar = placeholder.charAt(0);
666
1666
  for (const pos of sortedPositions) {
667
1667
  const originalWord = text.substring(pos.start, pos.end);
668
- const replacement = placeholder.repeat(originalWord.length);
1668
+ const replacement = placeholderChar.repeat(originalWord.length);
669
1669
  result =
670
1670
  result.substring(0, pos.start) +
671
1671
  replacement +
@@ -674,10 +1674,46 @@ export class AllProfanity {
674
1674
  return result;
675
1675
  }
676
1676
  /**
677
- * Clean text by replacing each profane word with a single placeholder (word-level).
678
- * @param text - The text to clean.
679
- * @param placeholder - The placeholder to use.
680
- * @returns Word-level cleaned text.
1677
+ * Cleans text by replacing each profane word with a single placeholder string (word-level replacement).
1678
+ *
1679
+ * @param {string} text - The text to clean
1680
+ * @param {string} [placeholder="***"] - The placeholder string to use for each profane word
1681
+ * @returns {string} The cleaned text with each profane word replaced by the placeholder
1682
+ *
1683
+ * @throws {TypeError} If text is not a string
1684
+ *
1685
+ * @remarks
1686
+ * ### Word-level Replacement:
1687
+ * - Each profane word is replaced with the entire placeholder string (not character-by-character)
1688
+ * - "bad words" with placeholder "***" becomes "*** ***"
1689
+ * - Does NOT preserve original text length
1690
+ *
1691
+ * ### Difference from `clean()`:
1692
+ * - `clean()`: Character-level replacement - "bad" becomes "***" (preserves length)
1693
+ * - `cleanWithPlaceholder()`: Word-level replacement - "bad" becomes "***" (fixed placeholder)
1694
+ *
1695
+ * @example
1696
+ * ```typescript
1697
+ * const filter = new AllProfanity();
1698
+ *
1699
+ * // Default placeholder (***) const text = "This has bad words";
1700
+ * const cleaned = filter.cleanWithPlaceholder(text);
1701
+ * console.log(cleaned); // "This has *** ***"
1702
+ *
1703
+ * // Custom placeholder
1704
+ * const cleaned2 = filter.cleanWithPlaceholder(text, "[CENSORED]");
1705
+ * console.log(cleaned2); // "This has [CENSORED] [CENSORED]"
1706
+ * ```
1707
+ *
1708
+ * @example
1709
+ * ```typescript
1710
+ * // Censoring chat messages
1711
+ * const message = "You are a badword and stupid";
1712
+ * const censored = filter.cleanWithPlaceholder(message, "[***]");
1713
+ * // Result: "You are a [***] and [***]"
1714
+ * ```
1715
+ *
1716
+ * @see {@link clean} for character-level replacement
681
1717
  */
682
1718
  cleanWithPlaceholder(text, placeholder = "***") {
683
1719
  const detection = this.detect(text);
@@ -703,51 +1739,144 @@ export class AllProfanity {
703
1739
  return result;
704
1740
  }
705
1741
  /**
706
- * Add word(s) to the profanity filter.
707
- * @param word - Word or array of words to add.
1742
+ * Dynamically adds one or more words to the profanity filter at runtime.
1743
+ *
1744
+ * @param {string | string[]} word - A single word or array of words to add to the filter
1745
+ * @returns {void}
1746
+ *
1747
+ * @remarks
1748
+ * ### Behavior:
1749
+ * - Words are added to all active data structures (Trie, Aho-Corasick, Bloom Filter)
1750
+ * - Automatically normalizes words based on caseSensitive setting
1751
+ * - Skips whitelisted words
1752
+ * - Validates and filters out non-string or empty values
1753
+ * - Changes take effect immediately for subsequent detect/check/clean calls
1754
+ *
1755
+ * ### Use Cases:
1756
+ * - Adding context-specific profanity
1757
+ * - Building dynamic word lists from user reports
1758
+ * - Customizing filters for specific communities/applications
1759
+ *
1760
+ * @example
1761
+ * ```typescript
1762
+ * const filter = new AllProfanity();
1763
+ *
1764
+ * // Add single word
1765
+ * filter.add('newbadword');
1766
+ *
1767
+ * // Add multiple words
1768
+ * filter.add(['word1', 'word2', 'word3']);
1769
+ *
1770
+ * // Now these words will be detected
1771
+ * filter.check('newbadword'); // true
1772
+ * ```
1773
+ *
1774
+ * @example
1775
+ * ```typescript
1776
+ * // Add game-specific slang dynamically
1777
+ * const filter = new AllProfanity();
1778
+ * const gamingSlang = ['noob', 'trash', 'tryhard'];
1779
+ * filter.add(gamingSlang);
1780
+ *
1781
+ * const message = "You're such a noob";
1782
+ * console.log(filter.check(message)); // true
1783
+ * ```
1784
+ *
1785
+ * @see {@link remove} to remove words
1786
+ * @see {@link loadCustomDictionary} for loading named dictionaries
708
1787
  */
709
1788
  add(word) {
710
1789
  const words = Array.isArray(word) ? word : [word];
711
- const validatedWords = validateStringArray(words, "words to add");
1790
+ const validatedWords = validateStringArray(words, "words to add", this.logger);
712
1791
  for (const w of validatedWords) {
713
1792
  this.dynamicWords.add(w);
714
1793
  this.addWordToTrie(w);
715
1794
  }
1795
+ this.invalidateCache();
716
1796
  }
717
1797
  /**
718
- * Remove word(s) from the profanity filter.
719
- * @param word - Word or array of words to remove.
1798
+ * Dynamically removes one or more words from the profanity filter at runtime.
1799
+ *
1800
+ * @param {string | string[]} word - A single word or array of words to remove from the filter
1801
+ * @returns {void}
1802
+ *
1803
+ * @remarks
1804
+ * ### Behavior:
1805
+ * - Removes words from all active data structures (Trie, dynamic words set)
1806
+ * - Normalizes words based on caseSensitive setting before removal
1807
+ * - Only removes dynamically added words, not words from loaded language dictionaries
1808
+ * - Changes take effect immediately for subsequent detect/check/clean calls
1809
+ *
1810
+ * ### Important Notes:
1811
+ * - Cannot remove words from built-in language dictionaries
1812
+ * - To exclude dictionary words, use `addToWhitelist()` instead
1813
+ * - Validates and filters out non-string or empty values
1814
+ *
1815
+ * @example
1816
+ * ```typescript
1817
+ * const filter = new AllProfanity();
1818
+ *
1819
+ * // Add then remove a word
1820
+ * filter.add('tempword');
1821
+ * filter.check('tempword'); // true
1822
+ *
1823
+ * filter.remove('tempword');
1824
+ * filter.check('tempword'); // false
1825
+ *
1826
+ * // Remove multiple words
1827
+ * filter.remove(['word1', 'word2']);
1828
+ * ```
1829
+ *
1830
+ * @example
1831
+ * ```typescript
1832
+ * // Managing custom word list
1833
+ * const filter = new AllProfanity();
1834
+ * filter.add(['custom1', 'custom2', 'custom3']);
1835
+ *
1836
+ * // Later, remove one that's no longer needed
1837
+ * filter.remove('custom2');
1838
+ * ```
1839
+ *
1840
+ * @see {@link add} to add words
1841
+ * @see {@link addToWhitelist} to exclude dictionary words without removing them
720
1842
  */
721
1843
  remove(word) {
1844
+ var _a;
722
1845
  const words = Array.isArray(word) ? word : [word];
723
- const validatedWords = validateStringArray(words, "words to remove");
1846
+ const validatedWords = validateStringArray(words, "words to remove", this.logger);
724
1847
  for (const w of validatedWords) {
725
1848
  const normalizedWord = this.caseSensitive ? w : w.toLowerCase();
726
1849
  this.profanityTrie.removeWord(normalizedWord);
727
1850
  this.dynamicWords.delete(w);
1851
+ // Bloom filter entries cannot be deleted, but stale entries only cost a
1852
+ // skipped quick-rejection — they can never produce a match by themselves.
1853
+ (_a = this.ahoCorasickAutomaton) === null || _a === void 0 ? void 0 : _a.removePattern(normalizedWord);
728
1854
  }
1855
+ this.invalidateCache();
729
1856
  }
730
1857
  /**
731
1858
  * Add words to the whitelist.
732
1859
  * @param words - Words to whitelist.
733
1860
  */
734
1861
  addToWhitelist(words) {
735
- const validatedWords = validateStringArray(words, "whitelist words");
1862
+ const validatedWords = validateStringArray(words, "whitelist words", this.logger);
736
1863
  for (const word of validatedWords) {
737
1864
  const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
738
1865
  this.whitelistSet.add(normalizedWord);
739
1866
  }
1867
+ this.invalidateCache();
740
1868
  }
741
1869
  /**
742
1870
  * Remove words from the whitelist.
743
1871
  * @param words - Words to remove from whitelist.
744
1872
  */
745
1873
  removeFromWhitelist(words) {
746
- const validatedWords = validateStringArray(words, "whitelist words");
1874
+ const validatedWords = validateStringArray(words, "whitelist words", this.logger);
747
1875
  for (const word of validatedWords) {
748
1876
  const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
749
1877
  this.whitelistSet.delete(normalizedWord);
750
1878
  }
1879
+ this.invalidateCache();
751
1880
  }
752
1881
  /**
753
1882
  * Check if a word is whitelisted.
@@ -759,9 +1888,60 @@ export class AllProfanity {
759
1888
  return this.whitelistSet.has(normalizedWord);
760
1889
  }
761
1890
  /**
762
- * Load a built-in language dictionary.
763
- * @param language - The language key.
764
- * @returns True if loaded, false otherwise.
1891
+ * Loads a built-in language dictionary into the profanity filter.
1892
+ *
1893
+ * @param {string} language - The language key to load (case-insensitive)
1894
+ * @returns {boolean} True if language was loaded successfully, false if not found or already loaded
1895
+ *
1896
+ * @remarks
1897
+ * ### Available Languages:
1898
+ * - `'english'` - English profanity words
1899
+ * - `'hindi'` - Hindi profanity words
1900
+ * - `'french'` - French profanity words
1901
+ * - `'german'` - German profanity words
1902
+ * - `'spanish'` - Spanish profanity words
1903
+ * - `'bengali'` - Bengali profanity words
1904
+ * - `'tamil'` - Tamil profanity words
1905
+ * - `'telugu'` - Telugu profanity words
1906
+ * - `'brazilian'` - Brazilian Portuguese profanity words
1907
+ *
1908
+ * ### Behavior:
1909
+ * - Language keys are case-insensitive
1910
+ * - Loading is idempotent - calling multiple times for same language is safe
1911
+ * - Returns true if language loaded successfully or was already loaded
1912
+ * - Returns false if language not found
1913
+ * - Logs success/failure messages (unless silent mode enabled)
1914
+ * - Words are added to all active data structures
1915
+ *
1916
+ * ### Default Languages:
1917
+ * English and Hindi are loaded automatically in the constructor
1918
+ *
1919
+ * @example
1920
+ * ```typescript
1921
+ * const filter = new AllProfanity();
1922
+ *
1923
+ * // Load additional languages
1924
+ * filter.loadLanguage('french');
1925
+ * filter.loadLanguage('spanish');
1926
+ *
1927
+ * // Case-insensitive
1928
+ * filter.loadLanguage('GERMAN'); // Works
1929
+ *
1930
+ * // Check if loaded
1931
+ * console.log(filter.getLoadedLanguages()); // ['english', 'hindi', 'french', 'spanish', 'german']
1932
+ * ```
1933
+ *
1934
+ * @example
1935
+ * ```typescript
1936
+ * // Load all Indian languages at once
1937
+ * const filter = new AllProfanity();
1938
+ * filter.loadIndianLanguages();
1939
+ * ```
1940
+ *
1941
+ * @see {@link loadLanguages} to load multiple languages at once
1942
+ * @see {@link loadIndianLanguages} for convenience method
1943
+ * @see {@link getAvailableLanguages} to see all available languages
1944
+ * @see {@link getLoadedLanguages} to see currently loaded languages
765
1945
  */
766
1946
  loadLanguage(language) {
767
1947
  if (!language || typeof language !== "string") {
@@ -785,6 +1965,7 @@ export class AllProfanity {
785
1965
  }
786
1966
  }
787
1967
  this.loadedLanguages.add(langKey);
1968
+ this.invalidateCache();
788
1969
  this.logger.info(`Loaded ${addedCount} words from ${language} dictionary`);
789
1970
  return true;
790
1971
  }
@@ -799,7 +1980,7 @@ export class AllProfanity {
799
1980
  * @returns Number of successfully loaded languages.
800
1981
  */
801
1982
  loadLanguages(languages) {
802
- const validatedLanguages = validateStringArray(languages, "languages");
1983
+ const validatedLanguages = validateStringArray(languages, "languages", this.logger);
803
1984
  return validatedLanguages.reduce((count, lang) => {
804
1985
  return this.loadLanguage(lang) ? count + 1 : count;
805
1986
  }, 0);
@@ -813,13 +1994,68 @@ export class AllProfanity {
813
1994
  return this.loadLanguages(indianLanguages);
814
1995
  }
815
1996
  /**
816
- * Load a custom dictionary.
817
- * @param name - Name of the dictionary.
818
- * @param words - Words to add.
1997
+ * Loads a custom dictionary of profane words with a specific name.
1998
+ *
1999
+ * @param {string} name - Unique name/identifier for this custom dictionary
2000
+ * @param {string[]} words - Array of profane words to add to the dictionary
2001
+ * @returns {void}
2002
+ *
2003
+ * @throws {TypeError} If name is not a string or words is not an array
2004
+ *
2005
+ * @remarks
2006
+ * ### Behavior:
2007
+ * - Creates a new named dictionary or overwrites existing one with same name
2008
+ * - Validates and filters out non-string and empty values from words array
2009
+ * - Words are added to all active data structures (Trie, Aho-Corasick, Bloom Filter)
2010
+ * - Dictionary name is converted to lowercase for storage
2011
+ * - Logs count of loaded words (unless silent mode enabled)
2012
+ *
2013
+ * ### Use Cases:
2014
+ * - Domain-specific profanity (gaming, medical, legal, etc.)
2015
+ * - Organization-specific word lists
2016
+ * - Temporary or context-dependent filters
2017
+ * - Testing and development
2018
+ *
2019
+ * @example
2020
+ * ```typescript
2021
+ * const filter = new AllProfanity();
2022
+ *
2023
+ * // Load gaming-specific slang
2024
+ * filter.loadCustomDictionary('gaming', [
2025
+ * 'noob',
2026
+ * 'scrub',
2027
+ * 'tryhard',
2028
+ * 'trash'
2029
+ * ]);
2030
+ *
2031
+ * // Load company-specific terms
2032
+ * filter.loadCustomDictionary('company', [
2033
+ * 'competitor1',
2034
+ * 'bannedTerm1',
2035
+ * 'inappropriateJargon'
2036
+ * ]);
2037
+ *
2038
+ * console.log(filter.check('You are such a noob')); // true
2039
+ * ```
2040
+ *
2041
+ * @example
2042
+ * ```typescript
2043
+ * // Load from external source
2044
+ * const filter = new AllProfanity();
2045
+ *
2046
+ * async function loadExternalDictionary() {
2047
+ * const response = await fetch('https://example.com/custom-words.json');
2048
+ * const customWords = await response.json();
2049
+ * filter.loadCustomDictionary('external', customWords);
2050
+ * }
2051
+ * ```
2052
+ *
2053
+ * @see {@link add} for adding individual words dynamically
2054
+ * @see {@link loadLanguage} for loading built-in language dictionaries
819
2055
  */
820
2056
  loadCustomDictionary(name, words) {
821
2057
  validateString(name, "dictionary name");
822
- const validatedWords = validateStringArray(words, "custom dictionary words");
2058
+ const validatedWords = validateStringArray(words, "custom dictionary words", this.logger);
823
2059
  if (validatedWords.length === 0) {
824
2060
  this.logger.warn(`Custom dictionary '${name}' contains no valid words`);
825
2061
  return;
@@ -833,6 +2069,7 @@ export class AllProfanity {
833
2069
  }
834
2070
  this.availableLanguages[name.toLowerCase()] = validatedWords;
835
2071
  this.loadedLanguages.add(name.toLowerCase());
2072
+ this.invalidateCache();
836
2073
  this.logger.info(`Loaded ${addedCount} words from custom dictionary '${name}'`);
837
2074
  }
838
2075
  catch (error) {
@@ -856,9 +2093,17 @@ export class AllProfanity {
856
2093
  }
857
2094
  // Add to Trie (always used as fallback)
858
2095
  this.profanityTrie.addWord(normalizedWord);
859
- // Add to Bloom Filter if enabled
2096
+ // Add to Bloom Filter if enabled. Constituent tokens of multi-word or
2097
+ // symbol-containing entries are added too, so the token-level quick
2098
+ // rejection in bloomQuickReject() can never miss a phrase.
860
2099
  if (this.bloomFilter) {
861
2100
  this.bloomFilter.add(normalizedWord);
2101
+ const tokens = normalizedWord.split(/[^\p{L}\p{N}]+/u);
2102
+ for (const token of tokens) {
2103
+ if (token.length > 0 && token !== normalizedWord) {
2104
+ this.bloomFilter.add(token);
2105
+ }
2106
+ }
862
2107
  }
863
2108
  // Add to Aho-Corasick automaton if enabled
864
2109
  if (this.ahoCorasickAutomaton) {
@@ -873,7 +2118,7 @@ export class AllProfanity {
873
2118
  */
874
2119
  calculateSeverity(matches) {
875
2120
  if (matches.length === 0)
876
- return ProfanitySeverity.MILD;
2121
+ return ProfanitySeverity.NONE;
877
2122
  const uniqueWords = new Set(matches.map((m) => m.word)).size;
878
2123
  const totalMatches = matches.length;
879
2124
  if (totalMatches >= 5 || uniqueWords >= 4)
@@ -888,9 +2133,13 @@ export class AllProfanity {
888
2133
  * Clear all loaded dictionaries and dynamic words.
889
2134
  */
890
2135
  clearList() {
2136
+ var _a, _b;
891
2137
  this.profanityTrie.clear();
892
2138
  this.loadedLanguages.clear();
893
2139
  this.dynamicWords.clear();
2140
+ (_a = this.ahoCorasickAutomaton) === null || _a === void 0 ? void 0 : _a.clear();
2141
+ (_b = this.bloomFilter) === null || _b === void 0 ? void 0 : _b.clear();
2142
+ this.invalidateCache();
894
2143
  }
895
2144
  /**
896
2145
  * Set the placeholder character for filtered words.
@@ -902,6 +2151,7 @@ export class AllProfanity {
902
2151
  throw new Error("Placeholder cannot be empty");
903
2152
  }
904
2153
  this.defaultPlaceholder = placeholder.charAt(0);
2154
+ this.invalidateCache();
905
2155
  }
906
2156
  /**
907
2157
  * Get the list of loaded languages.
@@ -933,10 +2183,14 @@ export class AllProfanity {
933
2183
  };
934
2184
  }
935
2185
  /**
936
- * Rebuild the profanity trie from loaded dictionaries and dynamic words.
2186
+ * Rebuild all matching structures (trie, Aho-Corasick automaton, Bloom
2187
+ * Filter) from loaded dictionaries and dynamic words.
937
2188
  */
938
- rebuildTrie() {
2189
+ rebuildIndexes() {
2190
+ var _a, _b;
939
2191
  this.profanityTrie.clear();
2192
+ (_a = this.ahoCorasickAutomaton) === null || _a === void 0 ? void 0 : _a.clear();
2193
+ (_b = this.bloomFilter) === null || _b === void 0 ? void 0 : _b.clear();
940
2194
  for (const lang of this.loadedLanguages) {
941
2195
  const words = this.availableLanguages[lang] || [];
942
2196
  for (const word of words) {
@@ -946,6 +2200,7 @@ export class AllProfanity {
946
2200
  for (const word of this.dynamicWords) {
947
2201
  this.addWordToTrie(word);
948
2202
  }
2203
+ this.invalidateCache();
949
2204
  }
950
2205
  /**
951
2206
  * Update configuration options for the profanity filter.
@@ -974,8 +2229,9 @@ export class AllProfanity {
974
2229
  this.addToWhitelist(options.whitelistWords);
975
2230
  }
976
2231
  if (rebuildNeeded) {
977
- this.rebuildTrie();
2232
+ this.rebuildIndexes();
978
2233
  }
2234
+ this.invalidateCache();
979
2235
  }
980
2236
  /**
981
2237
  * Create an AllProfanity instance from a configuration object.
@@ -992,8 +2248,12 @@ export class AllProfanity {
992
2248
  options.ahoCorasick = config.ahoCorasick;
993
2249
  if (config.contextAnalysis)
994
2250
  options.contextAnalysis = config.contextAnalysis;
2251
+ if (config.evasionProtection)
2252
+ options.evasionProtection = config.evasionProtection;
995
2253
  if (config.performance)
996
2254
  options.performance = config.performance;
2255
+ if (config.silent !== undefined)
2256
+ options.silent = config.silent;
997
2257
  if (config.profanityDetection) {
998
2258
  options.enableLeetSpeak = config.profanityDetection.enableLeetSpeak;
999
2259
  options.caseSensitive = config.profanityDetection.caseSensitive;
@@ -1024,7 +2284,8 @@ export class AllProfanity {
1024
2284
  }
1025
2285
  /**
1026
2286
  * Singleton instance of AllProfanity with default configuration.
2287
+ * Silent so that importing the library never writes to the console.
1027
2288
  */
1028
- const allProfanity = new AllProfanity();
2289
+ const allProfanity = new AllProfanity({ silent: true });
1029
2290
  export default allProfanity;
1030
2291
  //# sourceMappingURL=index.js.map