allprofanity 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,4 +1,4 @@
1
- // Import language dictionaries (assuming these exist)
1
+ // Language dictionaries imports
2
2
  import englishBadWords from "./languages/english-words.js";
3
3
  import hindiBadWords from "./languages/hindi-words.js";
4
4
  import frenchBadWords from "./languages/french-words.js";
@@ -7,6 +7,11 @@ import spanishBadWords from "./languages/spanish-words.js";
7
7
  import bengaliBadWords from "./languages/bengali-words.js";
8
8
  import tamilBadWords from "./languages/tamil-words.js";
9
9
  import teluguBadWords from "./languages/telugu-words.js";
10
+ import brazilianBadWords from "./languages/brazilian-words.js";
11
+ // Advanced algorithm imports
12
+ import { AhoCorasick } from "./algos/aho-corasick.js";
13
+ import { BloomFilter } from "./algos/bloom-filter.js";
14
+ import { ContextAnalyzer } from "./algos/context-patterns.js";
10
15
  // Export language dictionaries for direct access
11
16
  export { default as englishBadWords } from "./languages/english-words.js";
12
17
  export { default as hindiBadWords } from "./languages/hindi-words.js";
@@ -16,8 +21,9 @@ export { default as spanishBadWords } from "./languages/spanish-words.js";
16
21
  export { default as bengaliBadWords } from "./languages/bengali-words.js";
17
22
  export { default as tamilBadWords } from "./languages/tamil-words.js";
18
23
  export { default as teluguBadWords } from "./languages/telugu-words.js";
24
+ export { default as brazilianBadWords } from "./languages/brazilian-words.js";
19
25
  /**
20
- * Default console logger implementation
26
+ * Default console logger implementation.
21
27
  */
22
28
  class ConsoleLogger {
23
29
  info(message) {
@@ -31,7 +37,7 @@ class ConsoleLogger {
31
37
  }
32
38
  }
33
39
  /**
34
- * Severity levels for profanity detection
40
+ * Severity levels for profanity detection.
35
41
  */
36
42
  export var ProfanitySeverity;
37
43
  (function (ProfanitySeverity) {
@@ -41,7 +47,11 @@ export var ProfanitySeverity;
41
47
  ProfanitySeverity[ProfanitySeverity["EXTREME"] = 4] = "EXTREME";
42
48
  })(ProfanitySeverity = ProfanitySeverity || (ProfanitySeverity = {}));
43
49
  /**
44
- * Validates input parameters
50
+ * Validate a string parameter.
51
+ * @param input - The input to validate.
52
+ * @param paramName - The name of the parameter.
53
+ * @returns The validated string.
54
+ * @throws {TypeError} If input is not a string.
45
55
  */
46
56
  function validateString(input, paramName) {
47
57
  if (typeof input !== "string") {
@@ -49,6 +59,13 @@ function validateString(input, paramName) {
49
59
  }
50
60
  return input;
51
61
  }
62
+ /**
63
+ * Validate a string array parameter.
64
+ * @param input - The input to validate.
65
+ * @param paramName - The name of the parameter.
66
+ * @returns The validated string array.
67
+ * @throws {TypeError} If input is not an array.
68
+ */
52
69
  function validateStringArray(input, paramName) {
53
70
  if (!Array.isArray(input)) {
54
71
  throw new TypeError(`${paramName} must be an array`);
@@ -62,7 +79,7 @@ function validateStringArray(input, paramName) {
62
79
  });
63
80
  }
64
81
  /**
65
- * Efficient Trie data structure for fast string matching
82
+ * Trie node for efficient string matching.
66
83
  */
67
84
  class TrieNode {
68
85
  constructor() {
@@ -71,7 +88,8 @@ class TrieNode {
71
88
  this.word = "";
72
89
  }
73
90
  /**
74
- * Add a word to the trie
91
+ * Add a word to the trie.
92
+ * @param word - The word to add.
75
93
  */
76
94
  addWord(word) {
77
95
  let current = this;
@@ -88,7 +106,9 @@ class TrieNode {
88
106
  current.word = word;
89
107
  }
90
108
  /**
91
- * Remove a word from the trie
109
+ * Remove a word from the trie.
110
+ * @param word - The word to remove.
111
+ * @returns True if the word was removed, false otherwise.
92
112
  */
93
113
  removeWord(word) {
94
114
  return this.removeHelper(word, 0);
@@ -112,7 +132,11 @@ class TrieNode {
112
132
  return false;
113
133
  }
114
134
  /**
115
- * Find all matches starting at a given position
135
+ * Find all matches starting at a given position.
136
+ * @param text - The text to search.
137
+ * @param startPos - The start position.
138
+ * @param allowPartial - Whether to allow partial word matches.
139
+ * @returns Array of matches.
116
140
  */
117
141
  findMatches(text, startPos, allowPartial) {
118
142
  const matches = [];
@@ -146,7 +170,7 @@ class TrieNode {
146
170
  return matches;
147
171
  }
148
172
  /**
149
- * Clear all words from the trie
173
+ * Clear all words from the trie.
150
174
  */
151
175
  clear() {
152
176
  this.children.clear();
@@ -155,22 +179,23 @@ class TrieNode {
155
179
  }
156
180
  }
157
181
  /**
158
- * Advanced AllProfanity - Fixed profanity filter with multi-language support
159
- * Addresses all critical issues from the original implementation
182
+ * Main class for profanity detection and filtering.
160
183
  */
161
184
  export class AllProfanity {
185
+ /**
186
+ * Create an AllProfanity instance.
187
+ * @param options - Profanity filter configuration options.
188
+ */
162
189
  constructor(options) {
163
190
  var _a, _b, _c, _d, _e;
164
191
  this.profanityTrie = new TrieNode();
165
192
  this.whitelistSet = new Set();
166
193
  this.loadedLanguages = new Set();
167
- // Configuration
168
194
  this.defaultPlaceholder = "*";
169
195
  this.enableLeetSpeak = true;
170
196
  this.caseSensitive = false;
171
197
  this.strictMode = false;
172
198
  this.detectPartialWords = false;
173
- // Available language dictionaries
174
199
  this.availableLanguages = {
175
200
  english: englishBadWords || [],
176
201
  hindi: hindiBadWords || [],
@@ -180,8 +205,8 @@ export class AllProfanity {
180
205
  bengali: bengaliBadWords || [],
181
206
  tamil: tamilBadWords || [],
182
207
  telugu: teluguBadWords || [],
208
+ brazilian: brazilianBadWords || [],
183
209
  };
184
- // Fixed leet speak mappings
185
210
  this.leetMappings = new Map([
186
211
  ["@", "a"],
187
212
  ["^", "a"],
@@ -240,10 +265,14 @@ export class AllProfanity {
240
265
  ["2", "z"],
241
266
  ["7_", "z"],
242
267
  ]);
243
- // Dynamic words added at runtime
244
268
  this.dynamicWords = new Set();
269
+ // Advanced algorithms
270
+ this.ahoCorasickAutomaton = null;
271
+ this.bloomFilter = null;
272
+ this.contextAnalyzer = null;
273
+ this.matchingAlgorithm = "trie";
274
+ this.resultCache = null;
245
275
  this.logger = (options === null || options === void 0 ? void 0 : options.logger) || new ConsoleLogger();
246
- // Validate and set configuration
247
276
  if ((options === null || options === void 0 ? void 0 : options.defaultPlaceholder) !== undefined) {
248
277
  this.setPlaceholder(options.defaultPlaceholder);
249
278
  }
@@ -251,18 +280,17 @@ export class AllProfanity {
251
280
  this.caseSensitive = (_b = options === null || options === void 0 ? void 0 : options.caseSensitive) !== null && _b !== void 0 ? _b : false;
252
281
  this.strictMode = (_c = options === null || options === void 0 ? void 0 : options.strictMode) !== null && _c !== void 0 ? _c : false;
253
282
  this.detectPartialWords = (_d = options === null || options === void 0 ? void 0 : options.detectPartialWords) !== null && _d !== void 0 ? _d : false;
254
- // Load whitelist
255
283
  if (options === null || options === void 0 ? void 0 : options.whitelistWords) {
256
284
  this.addToWhitelist(options.whitelistWords);
257
285
  }
258
- // Load default languages
286
+ // Initialize advanced algorithms BEFORE loading dictionaries
287
+ // so that words can be added to all data structures
288
+ this.initializeAdvancedAlgorithms(options);
259
289
  this.loadLanguage("english");
260
290
  this.loadLanguage("hindi");
261
- // Load additional languages
262
291
  if ((_e = options === null || options === void 0 ? void 0 : options.languages) === null || _e === void 0 ? void 0 : _e.length) {
263
292
  options.languages.forEach((lang) => this.loadLanguage(lang));
264
293
  }
265
- // Load custom dictionaries
266
294
  if (options === null || options === void 0 ? void 0 : options.customDictionaries) {
267
295
  Object.entries(options.customDictionaries).forEach(([name, words]) => {
268
296
  this.loadCustomDictionary(name, words);
@@ -270,7 +298,55 @@ export class AllProfanity {
270
298
  }
271
299
  }
272
300
  /**
273
- * Normalize text by converting leet speak to regular characters.
301
+ * Initialize advanced algorithms based on configuration
302
+ */
303
+ initializeAdvancedAlgorithms(options) {
304
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m;
305
+ // Set matching algorithm
306
+ if ((_a = options === null || options === void 0 ? void 0 : options.algorithm) === null || _a === void 0 ? void 0 : _a.matching) {
307
+ this.matchingAlgorithm = options.algorithm.matching;
308
+ }
309
+ // Initialize Bloom Filter if enabled
310
+ const bloomEnabled = ((_b = options === null || options === void 0 ? void 0 : options.algorithm) === null || _b === void 0 ? void 0 : _b.useBloomFilter) ||
311
+ ((_c = options === null || options === void 0 ? void 0 : options.bloomFilter) === null || _c === void 0 ? void 0 : _c.enabled) ||
312
+ this.matchingAlgorithm === "hybrid";
313
+ if (bloomEnabled) {
314
+ const expectedItems = ((_d = options === null || options === void 0 ? void 0 : options.bloomFilter) === null || _d === void 0 ? void 0 : _d.expectedItems) || 10000;
315
+ const falsePositiveRate = ((_e = options === null || options === void 0 ? void 0 : options.bloomFilter) === null || _e === void 0 ? void 0 : _e.falsePositiveRate) || 0.01;
316
+ this.bloomFilter = new BloomFilter(expectedItems, falsePositiveRate);
317
+ this.logger.info(`Bloom Filter initialized with ${expectedItems} expected items and ${(falsePositiveRate * 100).toFixed(2)}% false positive rate`);
318
+ }
319
+ // Initialize Aho-Corasick if enabled
320
+ const ahoEnabled = ((_f = options === null || options === void 0 ? void 0 : options.algorithm) === null || _f === void 0 ? void 0 : _f.useAhoCorasick) ||
321
+ ((_g = options === null || options === void 0 ? void 0 : options.ahoCorasick) === null || _g === void 0 ? void 0 : _g.enabled) ||
322
+ this.matchingAlgorithm === "aho-corasick" ||
323
+ this.matchingAlgorithm === "hybrid";
324
+ if (ahoEnabled) {
325
+ this.ahoCorasickAutomaton = new AhoCorasick([]);
326
+ this.logger.info("Aho-Corasick automaton initialized");
327
+ }
328
+ // Initialize Context Analyzer if enabled
329
+ const contextEnabled = ((_h = options === null || options === void 0 ? void 0 : options.algorithm) === null || _h === void 0 ? void 0 : _h.useContextAnalysis) ||
330
+ ((_j = options === null || options === void 0 ? void 0 : options.contextAnalysis) === null || _j === void 0 ? void 0 : _j.enabled);
331
+ if (contextEnabled) {
332
+ const contextLanguages = ((_k = options === null || options === void 0 ? void 0 : options.contextAnalysis) === null || _k === void 0 ? void 0 : _k.languages) || ["en"];
333
+ this.contextAnalyzer = new ContextAnalyzer(contextLanguages);
334
+ if ((_l = options === null || options === void 0 ? void 0 : options.contextAnalysis) === null || _l === void 0 ? void 0 : _l.contextWindow) {
335
+ this.contextAnalyzer.setContextWindow(options.contextAnalysis.contextWindow);
336
+ }
337
+ this.logger.info(`Context Analyzer initialized for languages: ${contextLanguages.join(", ")}`);
338
+ }
339
+ // Initialize result cache if enabled
340
+ if ((_m = options === null || options === void 0 ? void 0 : options.performance) === null || _m === void 0 ? void 0 : _m.enableCaching) {
341
+ const cacheSize = options.performance.cacheSize || 1000;
342
+ this.resultCache = new Map();
343
+ this.logger.info(`Result caching enabled with size limit: ${cacheSize}`);
344
+ }
345
+ }
346
+ /**
347
+ * Normalize leet speak to regular characters.
348
+ * @param text - The input text.
349
+ * @returns Normalized text.
274
350
  */
275
351
  normalizeLeetSpeak(text) {
276
352
  if (!this.enableLeetSpeak)
@@ -284,13 +360,19 @@ export class AllProfanity {
284
360
  return normalized;
285
361
  }
286
362
  /**
287
- * Properly escape regex special characters
363
+ * Escape regex special characters in a string.
364
+ * @param str - The string to escape.
365
+ * @returns The escaped string.
288
366
  */
289
367
  escapeRegex(str) {
290
368
  return str.replace(/[\\^$.*+?()[\]{}|]/g, "\\$&");
291
369
  }
292
370
  /**
293
- * Check if a position has word boundaries (for strict mode)
371
+ * Check if a match is bounded by word boundaries (strict mode).
372
+ * @param text - The text.
373
+ * @param start - Start index.
374
+ * @param end - End index.
375
+ * @returns True if match is at word boundaries, false otherwise.
294
376
  */
295
377
  hasWordBoundaries(text, start, end) {
296
378
  if (!this.strictMode)
@@ -301,27 +383,24 @@ export class AllProfanity {
301
383
  return (wordBoundaryRegex.test(beforeChar) && wordBoundaryRegex.test(afterChar));
302
384
  }
303
385
  /**
304
- * Helper method to verify whole-word matching.
386
+ * Determine if a match is a whole word.
387
+ * @param text - The text.
388
+ * @param start - Start index.
389
+ * @param end - End index.
390
+ * @returns True if whole word, false otherwise.
305
391
  */
306
392
  isWholeWord(text, start, end) {
307
- // Check left boundary
308
- if (start === 0) {
309
- // ok
310
- }
311
- else if (/\w/.test(text[start - 1])) {
393
+ if (start !== 0 && /\w/.test(text[start - 1]))
312
394
  return false;
313
- }
314
- // Check right boundary
315
- if (end === text.length) {
316
- // ok
317
- }
318
- else if (/\w/.test(text[end])) {
395
+ if (end !== text.length && /\w/.test(text[end]))
319
396
  return false;
320
- }
321
397
  return true;
322
398
  }
323
399
  /**
324
- * Check if a match is whitelisted (by actual matched substring and dictionary word)
400
+ * Check if a match is whitelisted.
401
+ * @param word - Word from dictionary.
402
+ * @param matchedText - Actual matched text.
403
+ * @returns True if whitelisted, false otherwise.
325
404
  */
326
405
  isWhitelistedMatch(word, matchedText) {
327
406
  if (this.caseSensitive) {
@@ -333,7 +412,9 @@ export class AllProfanity {
333
412
  }
334
413
  }
335
414
  /**
336
- * Remove overlapping matches, keep only the longest at each start position
415
+ * Remove overlapping matches, keeping only the longest at each start position.
416
+ * @param matches - Array of match results.
417
+ * @returns Deduplicated matches.
337
418
  */
338
419
  deduplicateMatches(matches) {
339
420
  const sorted = [...matches].sort((a, b) => {
@@ -352,9 +433,76 @@ export class AllProfanity {
352
433
  return result;
353
434
  }
354
435
  /**
355
- * Advanced profanity detection using efficient trie-based algorithm
436
+ * Use Aho-Corasick algorithm for pattern matching
437
+ */
438
+ findMatchesWithAhoCorasick(searchText, originalText) {
439
+ if (!this.ahoCorasickAutomaton) {
440
+ return [];
441
+ }
442
+ const ahoMatches = this.ahoCorasickAutomaton.findAll(searchText);
443
+ const results = [];
444
+ for (const match of ahoMatches) {
445
+ if (!this.detectPartialWords &&
446
+ !this.isWholeWord(originalText, match.start, match.end)) {
447
+ continue;
448
+ }
449
+ const matchedText = originalText.substring(match.start, match.end);
450
+ if (this.isWhitelistedMatch(match.pattern, matchedText)) {
451
+ continue;
452
+ }
453
+ if (this.hasWordBoundaries(originalText, match.start, match.end)) {
454
+ results.push({
455
+ word: match.pattern,
456
+ start: match.start,
457
+ end: match.end,
458
+ originalWord: matchedText,
459
+ });
460
+ }
461
+ }
462
+ return results;
463
+ }
464
+ /**
465
+ * Hybrid approach: Aho-Corasick for fast matching, Bloom Filter for validation
466
+ */
467
+ findMatchesHybrid(searchText, originalText) {
468
+ // Use Aho-Corasick for primary matching if available
469
+ if (this.ahoCorasickAutomaton) {
470
+ const matches = this.findMatchesWithAhoCorasick(searchText, originalText);
471
+ // If Bloom Filter is enabled, validate matches
472
+ if (this.bloomFilter) {
473
+ return matches.filter((match) => this.bloomFilter.mightContain(match.word));
474
+ }
475
+ return matches;
476
+ }
477
+ // Fallback to Trie if Aho-Corasick not available
478
+ const matches = [];
479
+ this.findMatches(searchText, originalText, matches);
480
+ // Validate with Bloom Filter if enabled
481
+ if (this.bloomFilter) {
482
+ return matches.filter((match) => this.bloomFilter.mightContain(match.word));
483
+ }
484
+ return matches;
485
+ }
486
+ /**
487
+ * Apply context analysis to filter false positives
488
+ */
489
+ applyContextAnalysis(text, matches, scoreThreshold = 0.5) {
490
+ if (!this.contextAnalyzer) {
491
+ return matches;
492
+ }
493
+ return matches.filter((match) => {
494
+ const analysis = this.contextAnalyzer.analyzeContext(text, match.start, match.end, match.word);
495
+ // If score is above threshold, it's likely profanity
496
+ return analysis.score >= scoreThreshold;
497
+ });
498
+ }
499
+ /**
500
+ * Detect profanity in a given text.
501
+ * @param text - The text to check.
502
+ * @returns Profanity detection result.
356
503
  */
357
504
  detect(text) {
505
+ var _a;
358
506
  const validatedText = validateString(text, "text");
359
507
  if (validatedText.length === 0) {
360
508
  return {
@@ -365,23 +513,56 @@ export class AllProfanity {
365
513
  positions: [],
366
514
  };
367
515
  }
368
- const matches = [];
516
+ // Check cache first if enabled
517
+ if ((_a = this.resultCache) === null || _a === void 0 ? void 0 : _a.has(validatedText)) {
518
+ return this.resultCache.get(validatedText);
519
+ }
520
+ let matches = [];
369
521
  const normalizedText = this.caseSensitive
370
522
  ? validatedText
371
523
  : validatedText.toLowerCase();
372
- this.findMatches(normalizedText, validatedText, matches);
373
- // Leet speak detection (normalize and search, map back to original)
374
- if (this.enableLeetSpeak) {
375
- const leetNormalized = this.normalizeLeetSpeak(normalizedText);
376
- if (leetNormalized !== normalizedText) {
377
- this.findMatches(leetNormalized, validatedText, matches);
378
- }
524
+ // Choose matching algorithm based on configuration
525
+ switch (this.matchingAlgorithm) {
526
+ case "aho-corasick":
527
+ matches = this.findMatchesWithAhoCorasick(normalizedText, validatedText);
528
+ if (this.enableLeetSpeak) {
529
+ const leetNormalized = this.normalizeLeetSpeak(normalizedText);
530
+ if (leetNormalized !== normalizedText) {
531
+ const leetMatches = this.findMatchesWithAhoCorasick(leetNormalized, validatedText);
532
+ matches.push(...leetMatches);
533
+ }
534
+ }
535
+ break;
536
+ case "hybrid":
537
+ matches = this.findMatchesHybrid(normalizedText, validatedText);
538
+ if (this.enableLeetSpeak) {
539
+ const leetNormalized = this.normalizeLeetSpeak(normalizedText);
540
+ if (leetNormalized !== normalizedText) {
541
+ const leetMatches = this.findMatchesHybrid(leetNormalized, validatedText);
542
+ matches.push(...leetMatches);
543
+ }
544
+ }
545
+ break;
546
+ case "trie":
547
+ default:
548
+ this.findMatches(normalizedText, validatedText, matches);
549
+ if (this.enableLeetSpeak) {
550
+ const leetNormalized = this.normalizeLeetSpeak(normalizedText);
551
+ if (leetNormalized !== normalizedText) {
552
+ this.findMatches(leetNormalized, validatedText, matches);
553
+ }
554
+ }
555
+ break;
556
+ }
557
+ // Apply context analysis if enabled
558
+ if (this.contextAnalyzer) {
559
+ matches = this.applyContextAnalysis(validatedText, matches);
379
560
  }
380
561
  const uniqueMatches = this.deduplicateMatches(matches);
381
562
  const detectedWords = uniqueMatches.map((m) => m.originalWord);
382
563
  const severity = this.calculateSeverity(uniqueMatches);
383
564
  const cleanedText = this.generateCleanedText(validatedText, uniqueMatches);
384
- return {
565
+ const result = {
385
566
  hasProfanity: uniqueMatches.length > 0,
386
567
  detectedWords,
387
568
  cleanedText,
@@ -392,9 +573,22 @@ export class AllProfanity {
392
573
  end: m.end,
393
574
  })),
394
575
  };
576
+ // Cache result if caching is enabled
577
+ if (this.resultCache) {
578
+ this.resultCache.set(validatedText, result);
579
+ // Implement simple LRU by clearing cache when it gets too large
580
+ if (this.resultCache.size > 1000) {
581
+ const firstKey = this.resultCache.keys().next().value;
582
+ this.resultCache.delete(firstKey);
583
+ }
584
+ }
585
+ return result;
395
586
  }
396
587
  /**
397
588
  * Main matching function, with whole-word logic.
589
+ * @param searchText - The normalized text to search.
590
+ * @param originalText - The original text.
591
+ * @param matches - Array to collect matches.
398
592
  */
399
593
  findMatches(searchText, originalText, matches) {
400
594
  for (let i = 0; i < searchText.length; i++) {
@@ -402,12 +596,10 @@ export class AllProfanity {
402
596
  for (const match of matchResults) {
403
597
  const start = i + match.start;
404
598
  const end = i + match.end;
405
- // Only match whole words if !detectPartialWords
406
599
  if (!this.detectPartialWords &&
407
600
  !this.isWholeWord(originalText, start, end)) {
408
601
  continue;
409
602
  }
410
- // Use actual matched text for whitelist check
411
603
  const matchedText = originalText.substring(start, end);
412
604
  if (this.isWhitelistedMatch(match.word, matchedText)) {
413
605
  continue;
@@ -424,13 +616,15 @@ export class AllProfanity {
424
616
  }
425
617
  }
426
618
  /**
427
- * Generate cleaned text by replacing profane words (non-overlapping only)
619
+ * Generate cleaned text by replacing profane words.
620
+ * @param originalText - The original text.
621
+ * @param matches - Array of matches.
622
+ * @returns Cleaned text.
428
623
  */
429
624
  generateCleanedText(originalText, matches) {
430
625
  if (matches.length === 0)
431
626
  return originalText;
432
627
  let result = originalText;
433
- // Process matches in reverse order to maintain indices and avoid overlap
434
628
  const sortedMatches = [...this.deduplicateMatches(matches)].sort((a, b) => b.start - a.start);
435
629
  for (const match of sortedMatches) {
436
630
  const replacement = this.defaultPlaceholder.repeat(match.originalWord.length);
@@ -442,20 +636,24 @@ export class AllProfanity {
442
636
  return result;
443
637
  }
444
638
  /**
445
- * Simple boolean check for profanity
639
+ * Check if a string contains profanity.
640
+ * @param text - The text to check.
641
+ * @returns True if profanity is found, false otherwise.
446
642
  */
447
643
  check(text) {
448
644
  return this.detect(text).hasProfanity;
449
645
  }
450
646
  /**
451
- * Clean text with custom placeholder
647
+ * Clean text with a custom placeholder.
648
+ * @param text - The text to clean.
649
+ * @param placeholder - The placeholder to use.
650
+ * @returns Cleaned text.
452
651
  */
453
652
  clean(text, placeholder) {
454
653
  const detection = this.detect(text);
455
654
  if (!placeholder || placeholder === this.defaultPlaceholder) {
456
655
  return detection.cleanedText;
457
656
  }
458
- // Use custom placeholder
459
657
  let result = text;
460
658
  const sortedPositions = [
461
659
  ...this.deduplicateMatches(detection.positions.map((p) => ({
@@ -476,14 +674,16 @@ export class AllProfanity {
476
674
  return result;
477
675
  }
478
676
  /**
479
- * Clean text by replacing each profane word with a single placeholder (word-level)
677
+ * Clean text by replacing each profane word with a single placeholder (word-level).
678
+ * @param text - The text to clean.
679
+ * @param placeholder - The placeholder to use.
680
+ * @returns Word-level cleaned text.
480
681
  */
481
682
  cleanWithPlaceholder(text, placeholder = "***") {
482
683
  const detection = this.detect(text);
483
684
  if (detection.positions.length === 0)
484
685
  return text;
485
686
  let result = text;
486
- // Sort matches so later matches don't affect earlier ones
487
687
  const sortedPositions = [
488
688
  ...this.deduplicateMatches(detection.positions.map((p) => ({
489
689
  word: p.word,
@@ -493,7 +693,6 @@ export class AllProfanity {
493
693
  }))),
494
694
  ].sort((a, b) => b.start - a.start);
495
695
  for (const pos of sortedPositions) {
496
- // Only replace whole words!
497
696
  if (!this.isWholeWord(result, pos.start, pos.end))
498
697
  continue;
499
698
  result =
@@ -504,7 +703,8 @@ export class AllProfanity {
504
703
  return result;
505
704
  }
506
705
  /**
507
- * Add word(s) to the profanity list
706
+ * Add word(s) to the profanity filter.
707
+ * @param word - Word or array of words to add.
508
708
  */
509
709
  add(word) {
510
710
  const words = Array.isArray(word) ? word : [word];
@@ -515,7 +715,8 @@ export class AllProfanity {
515
715
  }
516
716
  }
517
717
  /**
518
- * Remove word(s) from the profanity list
718
+ * Remove word(s) from the profanity filter.
719
+ * @param word - Word or array of words to remove.
519
720
  */
520
721
  remove(word) {
521
722
  const words = Array.isArray(word) ? word : [word];
@@ -527,7 +728,8 @@ export class AllProfanity {
527
728
  }
528
729
  }
529
730
  /**
530
- * Add words to whitelist
731
+ * Add words to the whitelist.
732
+ * @param words - Words to whitelist.
531
733
  */
532
734
  addToWhitelist(words) {
533
735
  const validatedWords = validateStringArray(words, "whitelist words");
@@ -537,7 +739,8 @@ export class AllProfanity {
537
739
  }
538
740
  }
539
741
  /**
540
- * Remove words from whitelist
742
+ * Remove words from the whitelist.
743
+ * @param words - Words to remove from whitelist.
541
744
  */
542
745
  removeFromWhitelist(words) {
543
746
  const validatedWords = validateStringArray(words, "whitelist words");
@@ -547,14 +750,18 @@ export class AllProfanity {
547
750
  }
548
751
  }
549
752
  /**
550
- * Helper for whitelist checking with correct normalization
753
+ * Check if a word is whitelisted.
754
+ * @param word - The word to check.
755
+ * @returns True if whitelisted, false otherwise.
551
756
  */
552
757
  isWhitelisted(word) {
553
758
  const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
554
759
  return this.whitelistSet.has(normalizedWord);
555
760
  }
556
761
  /**
557
- * Load a built-in language dictionary
762
+ * Load a built-in language dictionary.
763
+ * @param language - The language key.
764
+ * @returns True if loaded, false otherwise.
558
765
  */
559
766
  loadLanguage(language) {
560
767
  if (!language || typeof language !== "string") {
@@ -587,7 +794,9 @@ export class AllProfanity {
587
794
  }
588
795
  }
589
796
  /**
590
- * Load multiple languages at once
797
+ * Load multiple language dictionaries.
798
+ * @param languages - Array of languages to load.
799
+ * @returns Number of successfully loaded languages.
591
800
  */
592
801
  loadLanguages(languages) {
593
802
  const validatedLanguages = validateStringArray(languages, "languages");
@@ -596,14 +805,17 @@ export class AllProfanity {
596
805
  }, 0);
597
806
  }
598
807
  /**
599
- * Load all Indian languages
808
+ * Load all supported Indian languages.
809
+ * @returns Number of loaded Indian languages.
600
810
  */
601
811
  loadIndianLanguages() {
602
812
  const indianLanguages = ["hindi", "bengali", "tamil", "telugu"];
603
813
  return this.loadLanguages(indianLanguages);
604
814
  }
605
815
  /**
606
- * Load a custom dictionary
816
+ * Load a custom dictionary.
817
+ * @param name - Name of the dictionary.
818
+ * @param words - Words to add.
607
819
  */
608
820
  loadCustomDictionary(name, words) {
609
821
  validateString(name, "dictionary name");
@@ -619,7 +831,6 @@ export class AllProfanity {
619
831
  addedCount++;
620
832
  }
621
833
  }
622
- // Store for future reference
623
834
  this.availableLanguages[name.toLowerCase()] = validatedWords;
624
835
  this.loadedLanguages.add(name.toLowerCase());
625
836
  this.logger.info(`Loaded ${addedCount} words from custom dictionary '${name}'`);
@@ -629,7 +840,9 @@ export class AllProfanity {
629
840
  }
630
841
  }
631
842
  /**
632
- * Add a single word to the trie structure
843
+ * Add a single word to the trie.
844
+ * @param word - The word to add.
845
+ * @returns True if added, false otherwise.
633
846
  */
634
847
  addWordToTrie(word) {
635
848
  if (!word || typeof word !== "string" || word.trim().length === 0) {
@@ -638,16 +851,25 @@ export class AllProfanity {
638
851
  const normalizedWord = this.caseSensitive
639
852
  ? word.trim()
640
853
  : word.trim().toLowerCase();
641
- // Skip if whitelisted
642
854
  if (this.isWhitelisted(normalizedWord)) {
643
855
  return false;
644
856
  }
645
- // Add to trie
857
+ // Add to Trie (always used as fallback)
646
858
  this.profanityTrie.addWord(normalizedWord);
859
+ // Add to Bloom Filter if enabled
860
+ if (this.bloomFilter) {
861
+ this.bloomFilter.add(normalizedWord);
862
+ }
863
+ // Add to Aho-Corasick automaton if enabled
864
+ if (this.ahoCorasickAutomaton) {
865
+ this.ahoCorasickAutomaton.addPattern(normalizedWord);
866
+ }
647
867
  return true;
648
868
  }
649
869
  /**
650
- * Remove overlapping matches, keep only the longest at each start position
870
+ * Calculate severity from matches.
871
+ * @param matches - Array of matches.
872
+ * @returns Severity level.
651
873
  */
652
874
  calculateSeverity(matches) {
653
875
  if (matches.length === 0)
@@ -663,7 +885,7 @@ export class AllProfanity {
663
885
  return ProfanitySeverity.MILD;
664
886
  }
665
887
  /**
666
- * Clear all loaded dictionaries
888
+ * Clear all loaded dictionaries and dynamic words.
667
889
  */
668
890
  clearList() {
669
891
  this.profanityTrie.clear();
@@ -671,7 +893,8 @@ export class AllProfanity {
671
893
  this.dynamicWords.clear();
672
894
  }
673
895
  /**
674
- * Set placeholder character
896
+ * Set the placeholder character for filtered words.
897
+ * @param placeholder - The placeholder character.
675
898
  */
676
899
  setPlaceholder(placeholder) {
677
900
  validateString(placeholder, "placeholder");
@@ -681,19 +904,22 @@ export class AllProfanity {
681
904
  this.defaultPlaceholder = placeholder.charAt(0);
682
905
  }
683
906
  /**
684
- * Get loaded languages
907
+ * Get the list of loaded languages.
908
+ * @returns Array of loaded language keys.
685
909
  */
686
910
  getLoadedLanguages() {
687
911
  return Array.from(this.loadedLanguages);
688
912
  }
689
913
  /**
690
- * Get available languages
914
+ * Get the list of available built-in languages.
915
+ * @returns Array of available language keys.
691
916
  */
692
917
  getAvailableLanguages() {
693
918
  return Object.keys(this.availableLanguages);
694
919
  }
695
920
  /**
696
- * Get current configuration
921
+ * Get the current configuration of the profanity filter.
922
+ * @returns Partial configuration object.
697
923
  */
698
924
  getConfig() {
699
925
  return {
@@ -707,24 +933,23 @@ export class AllProfanity {
707
933
  };
708
934
  }
709
935
  /**
710
- * Rebuilds the profanity trie from loaded language dictionaries and dynamic words.
936
+ * Rebuild the profanity trie from loaded dictionaries and dynamic words.
711
937
  */
712
938
  rebuildTrie() {
713
939
  this.profanityTrie.clear();
714
- // Re-add all loaded language words
715
940
  for (const lang of this.loadedLanguages) {
716
941
  const words = this.availableLanguages[lang] || [];
717
942
  for (const word of words) {
718
943
  this.addWordToTrie(word);
719
944
  }
720
945
  }
721
- // Re-add dynamic words
722
946
  for (const word of this.dynamicWords) {
723
947
  this.addWordToTrie(word);
724
948
  }
725
949
  }
726
950
  /**
727
- * Update configuration. Rebuild trie if needed.
951
+ * Update configuration options for the profanity filter.
952
+ * @param options - Partial configuration object.
728
953
  */
729
954
  updateConfig(options) {
730
955
  let rebuildNeeded = false;
@@ -752,8 +977,54 @@ export class AllProfanity {
752
977
  this.rebuildTrie();
753
978
  }
754
979
  }
980
+ /**
981
+ * Create an AllProfanity instance from a configuration object.
982
+ * @param config - Configuration object
983
+ * @returns A new AllProfanity instance
984
+ */
985
+ static fromConfig(config) {
986
+ const options = {};
987
+ if (config.algorithm)
988
+ options.algorithm = config.algorithm;
989
+ if (config.bloomFilter)
990
+ options.bloomFilter = config.bloomFilter;
991
+ if (config.ahoCorasick)
992
+ options.ahoCorasick = config.ahoCorasick;
993
+ if (config.contextAnalysis)
994
+ options.contextAnalysis = config.contextAnalysis;
995
+ if (config.performance)
996
+ options.performance = config.performance;
997
+ if (config.profanityDetection) {
998
+ options.enableLeetSpeak = config.profanityDetection.enableLeetSpeak;
999
+ options.caseSensitive = config.profanityDetection.caseSensitive;
1000
+ options.strictMode = config.profanityDetection.strictMode;
1001
+ options.detectPartialWords = config.profanityDetection.detectPartialWords;
1002
+ options.defaultPlaceholder = config.profanityDetection.defaultPlaceholder;
1003
+ }
1004
+ if (config.enableLeetSpeak !== undefined)
1005
+ options.enableLeetSpeak = config.enableLeetSpeak;
1006
+ if (config.caseSensitive !== undefined)
1007
+ options.caseSensitive = config.caseSensitive;
1008
+ if (config.strictMode !== undefined)
1009
+ options.strictMode = config.strictMode;
1010
+ if (config.detectPartialWords !== undefined)
1011
+ options.detectPartialWords = config.detectPartialWords;
1012
+ if (config.defaultPlaceholder !== undefined)
1013
+ options.defaultPlaceholder = config.defaultPlaceholder;
1014
+ if (config.languages)
1015
+ options.languages = config.languages;
1016
+ if (config.whitelistWords)
1017
+ options.whitelistWords = config.whitelistWords;
1018
+ if (config.customDictionaries)
1019
+ options.customDictionaries = config.customDictionaries;
1020
+ if (config.logger)
1021
+ options.logger = config.logger;
1022
+ return new AllProfanity(options);
1023
+ }
755
1024
  }
756
- // Create and export a singleton instance
1025
+ /**
1026
+ * Singleton instance of AllProfanity with default configuration.
1027
+ */
757
1028
  const allProfanity = new AllProfanity();
758
1029
  export default allProfanity;
759
1030
  //# sourceMappingURL=index.js.map