allprofanity 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -8,81 +8,398 @@ export { default as tamilBadWords } from "./languages/tamil-words.js";
8
8
  export { default as teluguBadWords } from "./languages/telugu-words.js";
9
9
  export { default as brazilianBadWords } from "./languages/brazilian-words.js";
10
10
  /**
11
- * Logger interface for the library.
11
+ * Logger interface for AllProfanity library logging operations.
12
+ *
13
+ * @interface Logger
14
+ * @description Provides a contract for logging implementations used by the AllProfanity library.
15
+ * Implement this interface to provide custom logging behavior (e.g., logging to files, external services).
16
+ *
17
+ * @example
18
+ * ```typescript
19
+ * class CustomLogger implements Logger {
20
+ * info(message: string): void {
21
+ * // Custom info logging logic
22
+ * }
23
+ * warn(message: string): void {
24
+ * // Custom warning logging logic
25
+ * }
26
+ * error(message: string): void {
27
+ * // Custom error logging logic
28
+ * }
29
+ * }
30
+ * const filter = new AllProfanity({ logger: new CustomLogger() });
31
+ * ```
12
32
  */
13
33
  export interface Logger {
14
34
  /**
15
- * Log informational messages.
16
- * @param message - The message to log.
35
+ * Log informational messages about normal operations.
36
+ *
37
+ * @param message - The informational message to log
38
+ * @returns void
17
39
  */
18
40
  info(message: string): void;
19
41
  /**
20
- * Log warning messages.
21
- * @param message - The message to log.
42
+ * Log warning messages about potential issues or deprecated usage.
43
+ *
44
+ * @param message - The warning message to log
45
+ * @returns void
22
46
  */
23
47
  warn(message: string): void;
24
48
  /**
25
- * Log error messages.
26
- * @param message - The message to log.
49
+ * Log error messages about failures or critical issues.
50
+ *
51
+ * @param message - The error message to log
52
+ * @returns void
27
53
  */
28
54
  error(message: string): void;
29
55
  }
30
56
  /**
31
- * Configuration options for AllProfanity.
57
+ * Configuration options for initializing an AllProfanity instance.
58
+ *
59
+ * @interface AllProfanityOptions
60
+ * @description Comprehensive configuration object for customizing profanity detection behavior,
61
+ * algorithm selection, performance optimizations, and logging.
62
+ *
63
+ * @example
64
+ * ```typescript
65
+ * const filter = new AllProfanity({
66
+ * languages: ['english', 'french'],
67
+ * enableLeetSpeak: true,
68
+ * strictMode: true,
69
+ * algorithm: {
70
+ * matching: 'hybrid',
71
+ * useBloomFilter: true
72
+ * },
73
+ * performance: {
74
+ * enableCaching: true,
75
+ * cacheSize: 500
76
+ * }
77
+ * });
78
+ * ```
32
79
  */
33
80
  export interface AllProfanityOptions {
81
+ /**
82
+ * Array of language keys to load (e.g., 'english', 'hindi', 'french').
83
+ * Available languages: english, hindi, french, german, spanish, bengali, tamil, telugu, brazilian.
84
+ *
85
+ * @default ['english', 'hindi'] (loaded by default in constructor)
86
+ */
34
87
  languages?: string[];
88
+ /**
89
+ * Custom dictionaries to load in addition to built-in languages.
90
+ * Key is the dictionary name, value is an array of words.
91
+ *
92
+ * @example
93
+ * ```typescript
94
+ * customDictionaries: {
95
+ * 'gaming': ['noob', 'trash'],
96
+ * 'custom': ['word1', 'word2']
97
+ * }
98
+ * ```
99
+ */
35
100
  customDictionaries?: Record<string, string[]>;
101
+ /**
102
+ * Single character to use as replacement placeholder for profane characters.
103
+ *
104
+ * @default "*"
105
+ */
36
106
  defaultPlaceholder?: string;
107
+ /**
108
+ * Enable detection and normalization of leet speak variations (e.g., "h3ll0" -> "hello").
109
+ *
110
+ * @default true
111
+ */
37
112
  enableLeetSpeak?: boolean;
113
+ /**
114
+ * Enable case-sensitive matching. When false, all matching is done in lowercase.
115
+ *
116
+ * @default false
117
+ */
38
118
  caseSensitive?: boolean;
119
+ /**
120
+ * Array of words to whitelist (never flag as profanity even if in dictionaries).
121
+ *
122
+ * @example ['hello', 'class', 'assignment']
123
+ */
39
124
  whitelistWords?: string[];
125
+ /**
126
+ * Strict mode requires profanity to be surrounded by word boundaries (spaces, punctuation).
127
+ * When false, profanity embedded in other words may be detected.
128
+ *
129
+ * @default false
130
+ */
40
131
  strictMode?: boolean;
132
+ /**
133
+ * Allow detection of profanity as partial matches within larger words.
134
+ * When true, "badword" will be detected in "mybadwordhere".
135
+ *
136
+ * @default false
137
+ */
41
138
  detectPartialWords?: boolean;
139
+ /**
140
+ * Custom logger implementation for handling log messages.
141
+ * If not provided, defaults to ConsoleLogger unless silent mode is enabled.
142
+ */
42
143
  logger?: Logger;
144
+ /**
145
+ * Silent mode suppresses all logging output.
146
+ * When true, uses SilentLogger to discard all log messages.
147
+ *
148
+ * @default false
149
+ */
150
+ silent?: boolean;
151
+ /**
152
+ * Advanced algorithm configuration for pattern matching strategies.
153
+ */
43
154
  algorithm?: {
155
+ /**
156
+ * Primary matching algorithm to use.
157
+ * - 'trie': Fast prefix tree matching (default, best for most use cases)
158
+ * - 'aho-corasick': Multi-pattern matching (best for large dictionaries)
159
+ * - 'hybrid': Combines Aho-Corasick with Bloom Filter (best for extreme performance)
160
+ *
161
+ * @default "trie"
162
+ */
44
163
  matching?: "trie" | "aho-corasick" | "hybrid";
164
+ /**
165
+ * Enable Aho-Corasick automaton for multi-pattern matching.
166
+ * Automatically enabled when matching is set to 'aho-corasick' or 'hybrid'.
167
+ *
168
+ * @default false
169
+ */
45
170
  useAhoCorasick?: boolean;
171
+ /**
172
+ * Enable Bloom Filter for probabilistic quick rejection of non-profane text.
173
+ * Automatically enabled when matching is set to 'hybrid'.
174
+ *
175
+ * @default false
176
+ */
46
177
  useBloomFilter?: boolean;
178
+ /**
179
+ * Enable context analysis to reduce false positives based on surrounding words.
180
+ *
181
+ * @default false
182
+ */
47
183
  useContextAnalysis?: boolean;
48
184
  };
185
+ /**
186
+ * Bloom Filter configuration for probabilistic matching optimization.
187
+ */
49
188
  bloomFilter?: {
189
+ /**
190
+ * Enable Bloom Filter.
191
+ *
192
+ * @default false
193
+ */
50
194
  enabled?: boolean;
195
+ /**
196
+ * Expected number of items to be stored in the Bloom Filter.
197
+ * Higher values increase memory usage but reduce false positive rate.
198
+ *
199
+ * @default 10000
200
+ */
51
201
  expectedItems?: number;
202
+ /**
203
+ * Target false positive rate (probability of incorrectly identifying non-profanity as profanity).
204
+ * Lower values increase memory usage but improve accuracy.
205
+ *
206
+ * @default 0.01 (1%)
207
+ */
52
208
  falsePositiveRate?: number;
53
209
  };
210
+ /**
211
+ * Aho-Corasick automaton configuration for multi-pattern matching.
212
+ */
54
213
  ahoCorasick?: {
214
+ /**
215
+ * Enable Aho-Corasick automaton.
216
+ *
217
+ * @default false
218
+ */
55
219
  enabled?: boolean;
220
+ /**
221
+ * Pre-build the automaton during initialization.
222
+ * When false, automaton is built lazily on first use.
223
+ *
224
+ * @default false
225
+ */
56
226
  prebuild?: boolean;
57
227
  };
228
+ /**
229
+ * Context analysis configuration for reducing false positives.
230
+ */
58
231
  contextAnalysis?: {
232
+ /**
233
+ * Enable context-aware profanity detection.
234
+ *
235
+ * @default false
236
+ */
59
237
  enabled?: boolean;
238
+ /**
239
+ * Number of words before and after the detected word to analyze for context.
240
+ *
241
+ * @default 5
242
+ */
60
243
  contextWindow?: number;
244
+ /**
245
+ * Languages to use for context analysis (e.g., ['en', 'es']).
246
+ *
247
+ * @default ['en']
248
+ */
61
249
  languages?: string[];
250
+ /**
251
+ * Minimum confidence score (0-1) required to flag as profanity.
252
+ * Higher values reduce false positives but may miss some profanity.
253
+ *
254
+ * @default 0.5
255
+ */
62
256
  scoreThreshold?: number;
63
257
  };
258
+ /**
259
+ * Evasion-protection configuration. All passes are enabled by default and
260
+ * only run when their trigger characters are present in the text, so they
261
+ * add near-zero cost on ordinary input.
262
+ */
263
+ evasionProtection?: {
264
+ /**
265
+ * Fold unicode evasion: fullwidth forms (fuck), Cyrillic/Greek
266
+ * homoglyphs (fυck), diacritics (fück) and invisible characters
267
+ * (zero-width spaces, soft hyphens) injected inside words.
268
+ *
269
+ * @default true
270
+ */
271
+ unicode?: boolean;
272
+ /**
273
+ * Collapse stretched characters ("fuuuuck" -> "fuck"). Only triggers when
274
+ * a run of 3+ identical characters exists.
275
+ *
276
+ * @default true
277
+ */
278
+ repeatedCharacters?: boolean;
279
+ /**
280
+ * Resolve masked characters as single-character wildcards ("f*ck",
281
+ * "f#ck", "f@ck"). A masked token only matches when the visible letters
282
+ * align exactly with a dictionary word.
283
+ *
284
+ * @default true
285
+ */
286
+ maskedCharacters?: boolean;
287
+ /**
288
+ * Detect words spelled out with uniform single separators
289
+ * ("f u c k", "f.u.c.k"). The joined letters must equal a dictionary
290
+ * word exactly, which keeps initialisms like "U S A" clean.
291
+ *
292
+ * @default true
293
+ */
294
+ separatedLetters?: boolean;
295
+ /**
296
+ * Detect unambiguous profanity stems embedded inside larger tokens
297
+ * ("sisfuck", "totalshitshow"). Applies only to a curated list of
298
+ * strong words that never occur in legitimate vocabulary, with built-in
299
+ * exceptions (Scunthorpe, mishit, snigger, ...), so "classic", "bass"
300
+ * and "Hitchcock" stay clean.
301
+ *
302
+ * @default true
303
+ */
304
+ embeddedWords?: boolean;
305
+ };
306
+ /**
307
+ * Performance optimization configuration.
308
+ */
64
309
  performance?: {
310
+ /**
311
+ * Maximum number of results to cache in LRU cache.
312
+ *
313
+ * @default 1000
314
+ */
65
315
  cacheSize?: number;
316
+ /**
317
+ * Enable result caching to speed up repeated queries.
318
+ * Stores detection results for previously seen text.
319
+ *
320
+ * @default false
321
+ */
66
322
  enableCaching?: boolean;
67
323
  };
68
324
  }
69
325
  /**
70
- * Severity levels for profanity detection.
326
+ * Severity levels for profanity detection results.
327
+ *
328
+ * @enum {number}
329
+ * @description Categorizes the severity of detected profanity based on the number
330
+ * of unique words and total matches found in the text.
331
+ *
332
+ * @readonly
333
+ * @example
334
+ * ```typescript
335
+ * const result = filter.detect("some text");
336
+ * if (result.severity === ProfanitySeverity.EXTREME) {
337
+ * // Handle extreme profanity
338
+ * }
339
+ * ```
71
340
  */
72
341
  export declare enum ProfanitySeverity {
342
+ /** No profanity detected */
343
+ NONE = 0,
344
+ /** Mild profanity: 1 unique word or 1 total match */
73
345
  MILD = 1,
346
+ /** Moderate profanity: 2 unique words or 2 total matches */
74
347
  MODERATE = 2,
348
+ /** Severe profanity: 3 unique words or 3 total matches */
75
349
  SEVERE = 3,
350
+ /** Extreme profanity: 4+ unique words or 5+ total matches */
76
351
  EXTREME = 4
77
352
  }
78
353
  /**
79
- * Detection result for profanity detection.
354
+ * Result object returned from profanity detection operations.
355
+ *
356
+ * @interface ProfanityDetectionResult
357
+ * @description Contains comprehensive information about detected profanity including
358
+ * what was found, where it was found, how severe it is, and a cleaned version of the text.
359
+ *
360
+ * @example
361
+ * ```typescript
362
+ * const result = filter.detect("This is a bad word");
363
+ * console.log(result.hasProfanity); // true
364
+ * console.log(result.detectedWords); // ['bad word']
365
+ * console.log(result.cleanedText); // 'This is a *** ****'
366
+ * console.log(result.severity); // ProfanitySeverity.MILD
367
+ * console.log(result.positions); // [{ word: 'bad word', start: 10, end: 18 }]
368
+ * ```
80
369
  */
81
370
  export interface ProfanityDetectionResult {
371
+ /**
372
+ * Whether any profanity was detected in the text.
373
+ *
374
+ * @type {boolean}
375
+ */
82
376
  hasProfanity: boolean;
377
+ /**
378
+ * Array of detected profane words/phrases as they appeared in the original text.
379
+ * Includes case and formatting from the original text.
380
+ *
381
+ * @type {string[]}
382
+ */
83
383
  detectedWords: string[];
384
+ /**
385
+ * The text with all profanity replaced by placeholder characters.
386
+ * Each profane character is replaced with the configured placeholder (default: '*').
387
+ *
388
+ * @type {string}
389
+ */
84
390
  cleanedText: string;
391
+ /**
392
+ * Severity level of detected profanity.
393
+ *
394
+ * @type {ProfanitySeverity}
395
+ */
85
396
  severity: ProfanitySeverity;
397
+ /**
398
+ * Precise positions of each detected profane word in the original text.
399
+ * Useful for highlighting or further processing.
400
+ *
401
+ * @type {Array<{ word: string; start: number; end: number }>}
402
+ */
86
403
  positions: Array<{
87
404
  word: string;
88
405
  start: number;
@@ -90,7 +407,99 @@ export interface ProfanityDetectionResult {
90
407
  }>;
91
408
  }
92
409
  /**
93
- * Main class for profanity detection and filtering.
410
+ * AllProfanity - Professional-grade multilingual profanity detection and filtering library.
411
+ *
412
+ * @class AllProfanity
413
+ * @description A comprehensive, high-performance profanity filtering system supporting 9+ languages
414
+ * with advanced features including leet speak detection, context analysis, multiple matching algorithms,
415
+ * and customizable filtering options.
416
+ *
417
+ * @remarks
418
+ * ### Features:
419
+ * - **Multi-language Support**: English, Hindi, French, German, Spanish, Bengali, Tamil, Telugu, Brazilian Portuguese
420
+ * - **Advanced Algorithms**: Trie, Aho-Corasick, Bloom Filter, and hybrid approaches
421
+ * - **Leet Speak Detection**: Automatically normalizes and detects variations like "h3ll0"
422
+ * - **Context Analysis**: Reduces false positives using surrounding word context
423
+ * - **Performance**: Built-in caching and optimized data structures
424
+ * - **Flexible**: Custom dictionaries, whitelisting, severity levels
425
+ *
426
+ * ### Default Behavior:
427
+ * - Loads English and Hindi dictionaries by default
428
+ * - Case-insensitive matching
429
+ * - Leet speak detection enabled
430
+ * - Uses Trie algorithm (fastest for most cases)
431
+ *
432
+ * @example
433
+ * ```typescript
434
+ * // Basic usage with default instance
435
+ * import allProfanity from 'allprofanity';
436
+ *
437
+ * const result = allProfanity.detect("This is some bad text");
438
+ * console.log(result.hasProfanity); // true
439
+ * console.log(result.cleanedText); // "This is some *** text"
440
+ * console.log(result.severity); // ProfanitySeverity.MILD
441
+ * ```
442
+ *
443
+ * @example
444
+ * ```typescript
445
+ * // Advanced usage with custom configuration
446
+ * import { AllProfanity, ProfanitySeverity } from 'allprofanity';
447
+ *
448
+ * const filter = new AllProfanity({
449
+ * languages: ['english', 'french', 'spanish'],
450
+ * enableLeetSpeak: true,
451
+ * strictMode: true,
452
+ * algorithm: {
453
+ * matching: 'hybrid',
454
+ * useBloomFilter: true
455
+ * },
456
+ * performance: {
457
+ * enableCaching: true,
458
+ * cacheSize: 500
459
+ * },
460
+ * whitelistWords: ['class', 'assignment']
461
+ * });
462
+ *
463
+ * const text = "This text has some b@d w0rds";
464
+ * const result = filter.detect(text);
465
+ *
466
+ * if (result.hasProfanity) {
467
+ * console.log(`Found ${result.detectedWords.length} profane words`);
468
+ * console.log(`Severity: ${ProfanitySeverity[result.severity]}`);
469
+ * console.log(`Cleaned: ${result.cleanedText}`);
470
+ * }
471
+ * ```
472
+ *
473
+ * @example
474
+ * ```typescript
475
+ * // Using individual methods
476
+ * const filter = new AllProfanity();
477
+ *
478
+ * // Simple check
479
+ * if (filter.check("some text")) {
480
+ * console.log("Contains profanity!");
481
+ * }
482
+ *
483
+ * // Clean with custom placeholder
484
+ * const cleaned = filter.clean("bad words here", "#");
485
+ *
486
+ * // Load additional languages
487
+ * filter.loadLanguage('german');
488
+ * filter.loadIndianLanguages(); // Loads hindi, bengali, tamil, telugu
489
+ *
490
+ * // Add custom words
491
+ * filter.add(['customword1', 'customword2']);
492
+ *
493
+ * // Remove words
494
+ * filter.remove(['someword']);
495
+ *
496
+ * // Whitelist words
497
+ * filter.addToWhitelist(['class', 'assignment']);
498
+ * ```
499
+ *
500
+ * @see {@link AllProfanityOptions} for all configuration options
501
+ * @see {@link ProfanityDetectionResult} for detection result format
502
+ * @see {@link ProfanitySeverity} for severity levels
94
503
  */
95
504
  export declare class AllProfanity {
96
505
  private readonly profanityTrie;
@@ -102,17 +511,60 @@ export declare class AllProfanity {
102
511
  private caseSensitive;
103
512
  private strictMode;
104
513
  private detectPartialWords;
514
+ private evasionUnicode;
515
+ private evasionRepeatedChars;
516
+ private evasionMaskedChars;
517
+ private evasionSeparatedLetters;
518
+ private evasionEmbeddedWords;
105
519
  private readonly availableLanguages;
106
520
  private readonly leetMappings;
107
521
  private readonly dynamicWords;
108
522
  private ahoCorasickAutomaton;
109
523
  private bloomFilter;
110
524
  private contextAnalyzer;
525
+ private contextScoreThreshold;
111
526
  private matchingAlgorithm;
112
527
  private resultCache;
528
+ private cacheMaxSize;
529
+ private leetTokensByFirstChar;
113
530
  /**
114
- * Create an AllProfanity instance.
115
- * @param options - Profanity filter configuration options.
531
+ * Creates a new AllProfanity instance with the specified configuration.
532
+ *
533
+ * @constructor
534
+ * @param {AllProfanityOptions} [options] - Configuration options for profanity detection behavior
535
+ *
536
+ * @remarks
537
+ * ### Default Initialization:
538
+ * - Loads English and Hindi dictionaries automatically
539
+ * - Enables leet speak detection
540
+ * - Case-insensitive matching
541
+ * - Uses Trie algorithm for pattern matching
542
+ *
543
+ * ### Performance Considerations:
544
+ * - Initial load time depends on number of languages loaded
545
+ * - Aho-Corasick automaton (if enabled) is built during construction
546
+ * - Bloom Filter (if enabled) is populated during construction
547
+ *
548
+ * @throws {TypeError} If invalid options are provided
549
+ *
550
+ * @example
551
+ * ```typescript
552
+ * // Default instance
553
+ * const filter = new AllProfanity();
554
+ *
555
+ * // Custom configuration
556
+ * const filter = new AllProfanity({
557
+ * languages: ['english', 'french'],
558
+ * strictMode: true,
559
+ * defaultPlaceholder: '#',
560
+ * algorithm: { matching: 'hybrid' }
561
+ * });
562
+ *
563
+ * // Silent mode (no logging)
564
+ * const filter = new AllProfanity({ silent: true });
565
+ * ```
566
+ *
567
+ * @see {@link AllProfanityOptions} for all available configuration options
116
568
  */
117
569
  constructor(options?: AllProfanityOptions);
118
570
  /**
@@ -120,17 +572,56 @@ export declare class AllProfanity {
120
572
  */
121
573
  private initializeAdvancedAlgorithms;
122
574
  /**
123
- * Normalize leet speak to regular characters.
124
- * @param text - The input text.
125
- * @returns Normalized text.
575
+ * Normalize leet speak to regular characters, keeping a map from each
576
+ * normalized character back to its source range in the input text.
577
+ *
578
+ * For normalized index i, starts[i]/ends[i] give the [start, end) range in
579
+ * the input that produced that character. A match [s, e) in the normalized
580
+ * string therefore spans [starts[s], ends[e - 1]) in the input. This is what
581
+ * keeps positions correct when length-changing mappings like "ph" -> "f"
582
+ * apply.
583
+ */
584
+ private normalizeLeetSpeakWithMap;
585
+ /**
586
+ * Fold unicode evasion tactics into ASCII with a position map: fullwidth
587
+ * forms, Cyrillic/Greek homoglyphs, Latin diacritics, and invisible
588
+ * characters injected inside words. Non-Latin scripts (Devanagari, Tamil,
589
+ * etc.) pass through untouched. Returns null when nothing changed.
590
+ */
591
+ private unicodeNormalizeWithMap;
592
+ /**
593
+ * Collapse runs of repeated characters ("fuuuuck" -> "fuck") with a
594
+ * position map. Only triggers when a run of 3+ identical characters
595
+ * exists, so ordinary doubled letters never pay for this pass.
596
+ * Returns null when not triggered.
126
597
  */
127
- private normalizeLeetSpeak;
598
+ private collapseRepeatsWithMap;
128
599
  /**
129
- * Escape regex special characters in a string.
130
- * @param str - The string to escape.
131
- * @returns The escaped string.
600
+ * Build the list of (text, position-map) variants to scan: the base text
601
+ * plus unicode-folded, leet-normalized and repeat-collapsed variants, each
602
+ * included only when its normalization actually changed something.
132
603
  */
133
- private escapeRegex;
604
+ private buildScanPasses;
605
+ /**
606
+ * Find dictionary words hidden behind masked characters ("f*ck", "f#ck").
607
+ * Each mask matches exactly one character and the token's visible letters
608
+ * must align with a dictionary word, so "c#" or "5% off" never flag.
609
+ */
610
+ private findMaskedMatches;
611
+ /**
612
+ * Find words spelled out with a uniform single separator ("f u c k",
613
+ * "f.u.c.k"). The joined letters must equal a dictionary word exactly:
614
+ * runs like "U S A" or letters inside spelled-out sentences never flag.
615
+ */
616
+ private findSeparatedMatches;
617
+ /**
618
+ * Find unambiguous profanity stems embedded inside larger tokens
619
+ * ("sisfuck", "totalshitshow"). Only stems from EMBEDDED_STRONG_STEMS that
620
+ * are currently in the dictionary are considered, and tokens listed in
621
+ * EMBEDDED_SAFE_WORDS or the whitelist never flag. The whole containing
622
+ * token is reported so cleaning masks all of it.
623
+ */
624
+ private findEmbeddedMatches;
134
625
  /**
135
626
  * Check if a match is bounded by word boundaries (strict mode).
136
627
  * @param text - The text.
@@ -154,6 +645,11 @@ export declare class AllProfanity {
154
645
  * @returns True if whitelisted, false otherwise.
155
646
  */
156
647
  private isWhitelistedMatch;
648
+ /**
649
+ * In partial-word mode, check whether the word CONTAINING the match is
650
+ * whitelisted: with "classic" whitelisted, the embedded "ass" must not flag.
651
+ */
652
+ private isWhitelistedContainingWord;
157
653
  /**
158
654
  * Remove overlapping matches, keeping only the longest at each start position.
159
655
  * @param matches - Array of match results.
@@ -165,7 +661,13 @@ export declare class AllProfanity {
165
661
  */
166
662
  private findMatchesWithAhoCorasick;
167
663
  /**
168
- * Hybrid approach: Aho-Corasick for fast matching, Bloom Filter for validation
664
+ * Check whether the Bloom Filter can quickly rule out any profanity in the
665
+ * text. Only safe for ASCII whole-word matching: partial matches and
666
+ * non-ASCII scripts can match inside tokens, so they bypass the prefilter.
667
+ */
668
+ private bloomQuickReject;
669
+ /**
670
+ * Hybrid approach: Bloom Filter for quick rejection, Aho-Corasick for matching
169
671
  */
170
672
  private findMatchesHybrid;
171
673
  /**
@@ -173,9 +675,61 @@ export declare class AllProfanity {
173
675
  */
174
676
  private applyContextAnalysis;
175
677
  /**
176
- * Detect profanity in a given text.
177
- * @param text - The text to check.
178
- * @returns Profanity detection result.
678
+ * Drop all cached detection results. Must be called whenever the word lists
679
+ * or any option that affects detection output changes.
680
+ */
681
+ private invalidateCache;
682
+ /**
683
+ * Detects profanity in the provided text and returns comprehensive analysis.
684
+ *
685
+ * @param {string} text - The text to analyze for profanity
686
+ * @returns {ProfanityDetectionResult} Detailed detection result including matches, positions, severity, and cleaned text
687
+ *
688
+ * @throws {TypeError} If text is not a string
689
+ *
690
+ * @remarks
691
+ * ### Performance:
692
+ * - Time Complexity: O(n*m) where n is text length, m is average word length in dictionary
693
+ * - With Bloom Filter: O(n) average case (faster early rejection)
694
+ * - With Caching: O(1) for repeated identical text
695
+ *
696
+ * ### Features:
697
+ * - Detects leet speak variations (if enabled): "h3ll0" → "hello"
698
+ * - Respects word boundaries (strict mode) or detects partial matches
699
+ * - Returns exact positions for highlighting/masking
700
+ * - Calculates severity based on match count and uniqueness
701
+ *
702
+ * ### Caching:
703
+ * - Results are cached if `performance.enableCaching` is true
704
+ * - Cache uses LRU eviction when size limit is reached
705
+ *
706
+ * @example
707
+ * ```typescript
708
+ * const filter = new AllProfanity();
709
+ * const result = filter.detect("This has bad words");
710
+ *
711
+ * console.log(result.hasProfanity); // true
712
+ * console.log(result.detectedWords); // ['bad']
713
+ * console.log(result.cleanedText); // 'This has *** words'
714
+ * console.log(result.severity); // ProfanitySeverity.MILD
715
+ * console.log(result.positions); // [{ word: 'bad', start: 9, end: 12 }]
716
+ * ```
717
+ *
718
+ * @example
719
+ * ```typescript
720
+ * // With leet speak detection
721
+ * const filter = new AllProfanity({ enableLeetSpeak: true });
722
+ * const result = filter.detect("st0p b3ing b@d");
723
+ *
724
+ * if (result.hasProfanity) {
725
+ * result.positions.forEach(pos => {
726
+ * console.log(`Found "${pos.word}" at position ${pos.start}-${pos.end}`);
727
+ * });
728
+ * }
729
+ * ```
730
+ *
731
+ * @see {@link ProfanityDetectionResult} for result structure
732
+ * @see {@link ProfanitySeverity} for severity levels
179
733
  */
180
734
  detect(text: string): ProfanityDetectionResult;
181
735
  /**
@@ -193,33 +747,219 @@ export declare class AllProfanity {
193
747
  */
194
748
  private generateCleanedText;
195
749
  /**
196
- * Check if a string contains profanity.
197
- * @param text - The text to check.
198
- * @returns True if profanity is found, false otherwise.
750
+ * Quick boolean check for profanity presence in text.
751
+ *
752
+ * @param {string} text - The text to check for profanity
753
+ * @returns {boolean} True if profanity is detected, false otherwise
754
+ *
755
+ * @throws {TypeError} If text is not a string
756
+ *
757
+ * @remarks
758
+ * - Convenience method that internally calls `detect()` and returns only the boolean result
759
+ * - For detailed information about matches, use `detect()` instead
760
+ * - Results are cached if caching is enabled (same cache as `detect()`)
761
+ *
762
+ * @example
763
+ * ```typescript
764
+ * const filter = new AllProfanity();
765
+ *
766
+ * if (filter.check("This has bad words")) {
767
+ * console.log("Profanity detected!");
768
+ * }
769
+ *
770
+ * // Quick validation
771
+ * const isClean = !filter.check(userInput);
772
+ * ```
773
+ *
774
+ * @see {@link detect} for detailed profanity analysis
199
775
  */
200
776
  check(text: string): boolean;
201
777
  /**
202
- * Clean text with a custom placeholder.
203
- * @param text - The text to clean.
204
- * @param placeholder - The placeholder to use.
205
- * @returns Cleaned text.
778
+ * Trie scan that stops at the first match surviving the whole-word,
779
+ * whitelist and boundary checks. Powers the fast path in check().
780
+ */
781
+ private hasMatchInPass;
782
+ /**
783
+ * Cleans text by replacing profanity with a placeholder character.
784
+ *
785
+ * @param {string} text - The text to clean
786
+ * @param {string} [placeholder] - Optional custom placeholder character (uses default if not provided)
787
+ * @returns {string} The cleaned text with profanity replaced
788
+ *
789
+ * @throws {TypeError} If text is not a string
790
+ *
791
+ * @remarks
792
+ * ### Character-level Replacement:
793
+ * - Each profane character is replaced individually
794
+ * - "bad" with placeholder "*" becomes "***"
795
+ * - Preserves text length and structure
796
+ *
797
+ * ### Placeholder Behavior:
798
+ * - If no placeholder provided, uses the instance's default placeholder
799
+ * - If placeholder provided, uses only the first character
800
+ * - Empty placeholder throws error
801
+ *
802
+ * @example
803
+ * ```typescript
804
+ * const filter = new AllProfanity();
805
+ *
806
+ * // Using default placeholder (*)
807
+ * const cleaned = filter.clean("This has bad words");
808
+ * console.log(cleaned); // "This has *** *****"
809
+ *
810
+ * // Using custom placeholder
811
+ * const cleaned = filter.clean("This has bad words", "#");
812
+ * console.log(cleaned); // "This has ### #####"
813
+ * ```
814
+ *
815
+ * @example
816
+ * ```typescript
817
+ * // Clean user-generated content for display
818
+ * const userComment = "Some inappropriate words here";
819
+ * const safeComment = filter.clean(userComment);
820
+ * displayComment(safeComment);
821
+ * ```
822
+ *
823
+ * @see {@link cleanWithPlaceholder} for word-level replacement
824
+ * @see {@link setPlaceholder} to change default placeholder
206
825
  */
207
826
  clean(text: string, placeholder?: string): string;
208
827
  /**
209
- * Clean text by replacing each profane word with a single placeholder (word-level).
210
- * @param text - The text to clean.
211
- * @param placeholder - The placeholder to use.
212
- * @returns Word-level cleaned text.
828
+ * Cleans text by replacing each profane word with a single placeholder string (word-level replacement).
829
+ *
830
+ * @param {string} text - The text to clean
831
+ * @param {string} [placeholder="***"] - The placeholder string to use for each profane word
832
+ * @returns {string} The cleaned text with each profane word replaced by the placeholder
833
+ *
834
+ * @throws {TypeError} If text is not a string
835
+ *
836
+ * @remarks
837
+ * ### Word-level Replacement:
838
+ * - Each profane word is replaced with the entire placeholder string (not character-by-character)
839
+ * - "bad words" with placeholder "***" becomes "*** ***"
840
+ * - Does NOT preserve original text length
841
+ *
842
+ * ### Difference from `clean()`:
843
+ * - `clean()`: Character-level replacement - "bad" becomes "***" (preserves length)
844
+ * - `cleanWithPlaceholder()`: Word-level replacement - "bad" becomes "***" (fixed placeholder)
845
+ *
846
+ * @example
847
+ * ```typescript
848
+ * const filter = new AllProfanity();
849
+ *
850
+ * // Default placeholder (***) const text = "This has bad words";
851
+ * const cleaned = filter.cleanWithPlaceholder(text);
852
+ * console.log(cleaned); // "This has *** ***"
853
+ *
854
+ * // Custom placeholder
855
+ * const cleaned2 = filter.cleanWithPlaceholder(text, "[CENSORED]");
856
+ * console.log(cleaned2); // "This has [CENSORED] [CENSORED]"
857
+ * ```
858
+ *
859
+ * @example
860
+ * ```typescript
861
+ * // Censoring chat messages
862
+ * const message = "You are a badword and stupid";
863
+ * const censored = filter.cleanWithPlaceholder(message, "[***]");
864
+ * // Result: "You are a [***] and [***]"
865
+ * ```
866
+ *
867
+ * @see {@link clean} for character-level replacement
213
868
  */
214
869
  cleanWithPlaceholder(text: string, placeholder?: string): string;
215
870
  /**
216
- * Add word(s) to the profanity filter.
217
- * @param word - Word or array of words to add.
871
+ * Dynamically adds one or more words to the profanity filter at runtime.
872
+ *
873
+ * @param {string | string[]} word - A single word or array of words to add to the filter
874
+ * @returns {void}
875
+ *
876
+ * @remarks
877
+ * ### Behavior:
878
+ * - Words are added to all active data structures (Trie, Aho-Corasick, Bloom Filter)
879
+ * - Automatically normalizes words based on caseSensitive setting
880
+ * - Skips whitelisted words
881
+ * - Validates and filters out non-string or empty values
882
+ * - Changes take effect immediately for subsequent detect/check/clean calls
883
+ *
884
+ * ### Use Cases:
885
+ * - Adding context-specific profanity
886
+ * - Building dynamic word lists from user reports
887
+ * - Customizing filters for specific communities/applications
888
+ *
889
+ * @example
890
+ * ```typescript
891
+ * const filter = new AllProfanity();
892
+ *
893
+ * // Add single word
894
+ * filter.add('newbadword');
895
+ *
896
+ * // Add multiple words
897
+ * filter.add(['word1', 'word2', 'word3']);
898
+ *
899
+ * // Now these words will be detected
900
+ * filter.check('newbadword'); // true
901
+ * ```
902
+ *
903
+ * @example
904
+ * ```typescript
905
+ * // Add game-specific slang dynamically
906
+ * const filter = new AllProfanity();
907
+ * const gamingSlang = ['noob', 'trash', 'tryhard'];
908
+ * filter.add(gamingSlang);
909
+ *
910
+ * const message = "You're such a noob";
911
+ * console.log(filter.check(message)); // true
912
+ * ```
913
+ *
914
+ * @see {@link remove} to remove words
915
+ * @see {@link loadCustomDictionary} for loading named dictionaries
218
916
  */
219
917
  add(word: string | string[]): void;
220
918
  /**
221
- * Remove word(s) from the profanity filter.
222
- * @param word - Word or array of words to remove.
919
+ * Dynamically removes one or more words from the profanity filter at runtime.
920
+ *
921
+ * @param {string | string[]} word - A single word or array of words to remove from the filter
922
+ * @returns {void}
923
+ *
924
+ * @remarks
925
+ * ### Behavior:
926
+ * - Removes words from all active data structures (Trie, dynamic words set)
927
+ * - Normalizes words based on caseSensitive setting before removal
928
+ * - Only removes dynamically added words, not words from loaded language dictionaries
929
+ * - Changes take effect immediately for subsequent detect/check/clean calls
930
+ *
931
+ * ### Important Notes:
932
+ * - Cannot remove words from built-in language dictionaries
933
+ * - To exclude dictionary words, use `addToWhitelist()` instead
934
+ * - Validates and filters out non-string or empty values
935
+ *
936
+ * @example
937
+ * ```typescript
938
+ * const filter = new AllProfanity();
939
+ *
940
+ * // Add then remove a word
941
+ * filter.add('tempword');
942
+ * filter.check('tempword'); // true
943
+ *
944
+ * filter.remove('tempword');
945
+ * filter.check('tempword'); // false
946
+ *
947
+ * // Remove multiple words
948
+ * filter.remove(['word1', 'word2']);
949
+ * ```
950
+ *
951
+ * @example
952
+ * ```typescript
953
+ * // Managing custom word list
954
+ * const filter = new AllProfanity();
955
+ * filter.add(['custom1', 'custom2', 'custom3']);
956
+ *
957
+ * // Later, remove one that's no longer needed
958
+ * filter.remove('custom2');
959
+ * ```
960
+ *
961
+ * @see {@link add} to add words
962
+ * @see {@link addToWhitelist} to exclude dictionary words without removing them
223
963
  */
224
964
  remove(word: string | string[]): void;
225
965
  /**
@@ -239,9 +979,60 @@ export declare class AllProfanity {
239
979
  */
240
980
  private isWhitelisted;
241
981
  /**
242
- * Load a built-in language dictionary.
243
- * @param language - The language key.
244
- * @returns True if loaded, false otherwise.
982
+ * Loads a built-in language dictionary into the profanity filter.
983
+ *
984
+ * @param {string} language - The language key to load (case-insensitive)
985
+ * @returns {boolean} True if language was loaded successfully, false if not found or already loaded
986
+ *
987
+ * @remarks
988
+ * ### Available Languages:
989
+ * - `'english'` - English profanity words
990
+ * - `'hindi'` - Hindi profanity words
991
+ * - `'french'` - French profanity words
992
+ * - `'german'` - German profanity words
993
+ * - `'spanish'` - Spanish profanity words
994
+ * - `'bengali'` - Bengali profanity words
995
+ * - `'tamil'` - Tamil profanity words
996
+ * - `'telugu'` - Telugu profanity words
997
+ * - `'brazilian'` - Brazilian Portuguese profanity words
998
+ *
999
+ * ### Behavior:
1000
+ * - Language keys are case-insensitive
1001
+ * - Loading is idempotent - calling multiple times for same language is safe
1002
+ * - Returns true if language loaded successfully or was already loaded
1003
+ * - Returns false if language not found
1004
+ * - Logs success/failure messages (unless silent mode enabled)
1005
+ * - Words are added to all active data structures
1006
+ *
1007
+ * ### Default Languages:
1008
+ * English and Hindi are loaded automatically in the constructor
1009
+ *
1010
+ * @example
1011
+ * ```typescript
1012
+ * const filter = new AllProfanity();
1013
+ *
1014
+ * // Load additional languages
1015
+ * filter.loadLanguage('french');
1016
+ * filter.loadLanguage('spanish');
1017
+ *
1018
+ * // Case-insensitive
1019
+ * filter.loadLanguage('GERMAN'); // Works
1020
+ *
1021
+ * // Check if loaded
1022
+ * console.log(filter.getLoadedLanguages()); // ['english', 'hindi', 'french', 'spanish', 'german']
1023
+ * ```
1024
+ *
1025
+ * @example
1026
+ * ```typescript
1027
+ * // Load all Indian languages at once
1028
+ * const filter = new AllProfanity();
1029
+ * filter.loadIndianLanguages();
1030
+ * ```
1031
+ *
1032
+ * @see {@link loadLanguages} to load multiple languages at once
1033
+ * @see {@link loadIndianLanguages} for convenience method
1034
+ * @see {@link getAvailableLanguages} to see all available languages
1035
+ * @see {@link getLoadedLanguages} to see currently loaded languages
245
1036
  */
246
1037
  loadLanguage(language: string): boolean;
247
1038
  /**
@@ -256,9 +1047,64 @@ export declare class AllProfanity {
256
1047
  */
257
1048
  loadIndianLanguages(): number;
258
1049
  /**
259
- * Load a custom dictionary.
260
- * @param name - Name of the dictionary.
261
- * @param words - Words to add.
1050
+ * Loads a custom dictionary of profane words with a specific name.
1051
+ *
1052
+ * @param {string} name - Unique name/identifier for this custom dictionary
1053
+ * @param {string[]} words - Array of profane words to add to the dictionary
1054
+ * @returns {void}
1055
+ *
1056
+ * @throws {TypeError} If name is not a string or words is not an array
1057
+ *
1058
+ * @remarks
1059
+ * ### Behavior:
1060
+ * - Creates a new named dictionary or overwrites existing one with same name
1061
+ * - Validates and filters out non-string and empty values from words array
1062
+ * - Words are added to all active data structures (Trie, Aho-Corasick, Bloom Filter)
1063
+ * - Dictionary name is converted to lowercase for storage
1064
+ * - Logs count of loaded words (unless silent mode enabled)
1065
+ *
1066
+ * ### Use Cases:
1067
+ * - Domain-specific profanity (gaming, medical, legal, etc.)
1068
+ * - Organization-specific word lists
1069
+ * - Temporary or context-dependent filters
1070
+ * - Testing and development
1071
+ *
1072
+ * @example
1073
+ * ```typescript
1074
+ * const filter = new AllProfanity();
1075
+ *
1076
+ * // Load gaming-specific slang
1077
+ * filter.loadCustomDictionary('gaming', [
1078
+ * 'noob',
1079
+ * 'scrub',
1080
+ * 'tryhard',
1081
+ * 'trash'
1082
+ * ]);
1083
+ *
1084
+ * // Load company-specific terms
1085
+ * filter.loadCustomDictionary('company', [
1086
+ * 'competitor1',
1087
+ * 'bannedTerm1',
1088
+ * 'inappropriateJargon'
1089
+ * ]);
1090
+ *
1091
+ * console.log(filter.check('You are such a noob')); // true
1092
+ * ```
1093
+ *
1094
+ * @example
1095
+ * ```typescript
1096
+ * // Load from external source
1097
+ * const filter = new AllProfanity();
1098
+ *
1099
+ * async function loadExternalDictionary() {
1100
+ * const response = await fetch('https://example.com/custom-words.json');
1101
+ * const customWords = await response.json();
1102
+ * filter.loadCustomDictionary('external', customWords);
1103
+ * }
1104
+ * ```
1105
+ *
1106
+ * @see {@link add} for adding individual words dynamically
1107
+ * @see {@link loadLanguage} for loading built-in language dictionaries
262
1108
  */
263
1109
  loadCustomDictionary(name: string, words: string[]): void;
264
1110
  /**
@@ -298,9 +1144,10 @@ export declare class AllProfanity {
298
1144
  */
299
1145
  getConfig(): Partial<AllProfanityOptions>;
300
1146
  /**
301
- * Rebuild the profanity trie from loaded dictionaries and dynamic words.
1147
+ * Rebuild all matching structures (trie, Aho-Corasick automaton, Bloom
1148
+ * Filter) from loaded dictionaries and dynamic words.
302
1149
  */
303
- private rebuildTrie;
1150
+ private rebuildIndexes;
304
1151
  /**
305
1152
  * Update configuration options for the profanity filter.
306
1153
  * @param options - Partial configuration object.
@@ -315,6 +1162,7 @@ export declare class AllProfanity {
315
1162
  }
316
1163
  /**
317
1164
  * Singleton instance of AllProfanity with default configuration.
1165
+ * Silent so that importing the library never writes to the console.
318
1166
  */
319
1167
  declare const allProfanity: AllProfanity;
320
1168
  export default allProfanity;