bekindprofanityfilter 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/CONTRIBUTORS.md +106 -0
  2. package/LICENSE +22 -0
  3. package/README.md +1015 -0
  4. package/allprofanity.config.example.json +35 -0
  5. package/bin/init.js +49 -0
  6. package/config.schema.json +163 -0
  7. package/dist/algos/aho-corasick.d.ts +75 -0
  8. package/dist/algos/aho-corasick.js +238 -0
  9. package/dist/algos/aho-corasick.js.map +1 -0
  10. package/dist/algos/bloom-filter.d.ts +103 -0
  11. package/dist/algos/bloom-filter.js +208 -0
  12. package/dist/algos/bloom-filter.js.map +1 -0
  13. package/dist/algos/context-patterns.d.ts +102 -0
  14. package/dist/algos/context-patterns.js +484 -0
  15. package/dist/algos/context-patterns.js.map +1 -0
  16. package/dist/index.d.ts +1332 -0
  17. package/dist/index.js +2631 -0
  18. package/dist/index.js.map +1 -0
  19. package/dist/innocence-scoring.d.ts +23 -0
  20. package/dist/innocence-scoring.js +118 -0
  21. package/dist/innocence-scoring.js.map +1 -0
  22. package/dist/language-detector.d.ts +162 -0
  23. package/dist/language-detector.js +952 -0
  24. package/dist/language-detector.js.map +1 -0
  25. package/dist/language-dicts.d.ts +60 -0
  26. package/dist/language-dicts.js +2718 -0
  27. package/dist/language-dicts.js.map +1 -0
  28. package/dist/languages/arabic-words.d.ts +10 -0
  29. package/dist/languages/arabic-words.js +1649 -0
  30. package/dist/languages/arabic-words.js.map +1 -0
  31. package/dist/languages/bengali-words.d.ts +10 -0
  32. package/dist/languages/bengali-words.js +1696 -0
  33. package/dist/languages/bengali-words.js.map +1 -0
  34. package/dist/languages/brazilian-words.d.ts +10 -0
  35. package/dist/languages/brazilian-words.js +2122 -0
  36. package/dist/languages/brazilian-words.js.map +1 -0
  37. package/dist/languages/chinese-words.d.ts +10 -0
  38. package/dist/languages/chinese-words.js +2728 -0
  39. package/dist/languages/chinese-words.js.map +1 -0
  40. package/dist/languages/english-primary-all-languages.d.ts +23 -0
  41. package/dist/languages/english-primary-all-languages.js +36894 -0
  42. package/dist/languages/english-primary-all-languages.js.map +1 -0
  43. package/dist/languages/english-words.d.ts +5 -0
  44. package/dist/languages/english-words.js +5156 -0
  45. package/dist/languages/english-words.js.map +1 -0
  46. package/dist/languages/french-words.d.ts +10 -0
  47. package/dist/languages/french-words.js +2326 -0
  48. package/dist/languages/french-words.js.map +1 -0
  49. package/dist/languages/german-words.d.ts +10 -0
  50. package/dist/languages/german-words.js +2633 -0
  51. package/dist/languages/german-words.js.map +1 -0
  52. package/dist/languages/hindi-words.d.ts +10 -0
  53. package/dist/languages/hindi-words.js +2341 -0
  54. package/dist/languages/hindi-words.js.map +1 -0
  55. package/dist/languages/innocent-words.d.ts +41 -0
  56. package/dist/languages/innocent-words.js +109 -0
  57. package/dist/languages/innocent-words.js.map +1 -0
  58. package/dist/languages/italian-words.d.ts +10 -0
  59. package/dist/languages/italian-words.js +2287 -0
  60. package/dist/languages/italian-words.js.map +1 -0
  61. package/dist/languages/japanese-words.d.ts +11 -0
  62. package/dist/languages/japanese-words.js +2557 -0
  63. package/dist/languages/japanese-words.js.map +1 -0
  64. package/dist/languages/korean-words.d.ts +10 -0
  65. package/dist/languages/korean-words.js +2509 -0
  66. package/dist/languages/korean-words.js.map +1 -0
  67. package/dist/languages/russian-words.d.ts +10 -0
  68. package/dist/languages/russian-words.js +2175 -0
  69. package/dist/languages/russian-words.js.map +1 -0
  70. package/dist/languages/spanish-words.d.ts +11 -0
  71. package/dist/languages/spanish-words.js +2536 -0
  72. package/dist/languages/spanish-words.js.map +1 -0
  73. package/dist/languages/tamil-words.d.ts +10 -0
  74. package/dist/languages/tamil-words.js +1722 -0
  75. package/dist/languages/tamil-words.js.map +1 -0
  76. package/dist/languages/telugu-words.d.ts +10 -0
  77. package/dist/languages/telugu-words.js +1739 -0
  78. package/dist/languages/telugu-words.js.map +1 -0
  79. package/dist/romanization-detector.d.ts +50 -0
  80. package/dist/romanization-detector.js +779 -0
  81. package/dist/romanization-detector.js.map +1 -0
  82. package/package.json +79 -0
package/dist/index.js ADDED
@@ -0,0 +1,2631 @@
1
+ // Consolidated all-languages dictionary import
2
+ import allLanguagesBadWords from "./languages/english-primary-all-languages.js";
3
+ // Advanced algorithm imports
4
+ import { AhoCorasick } from "./algos/aho-corasick.js";
5
+ import { BloomFilter } from "./algos/bloom-filter.js";
6
+ import { ContextAnalyzer } from "./algos/context-patterns.js";
7
+ // Cross-language innocence scoring
8
+ import { detectLanguages, scoreWord } from "./language-detector.js";
9
+ import innocentWords from "./languages/innocent-words.js";
10
+ import { adjustCertaintyForLanguage } from "./innocence-scoring.js";
11
+ // Export consolidated dictionary for direct access
12
+ export { default as allLanguagesBadWords } from "./languages/english-primary-all-languages.js";
13
+ /**
14
+ * Default console logger implementation for BeKind.
15
+ *
16
+ * @class ConsoleLogger
17
+ * @implements {Logger}
18
+ * @description Logs messages to the browser or Node.js console with an "[BeKind]" prefix.
19
+ * This is the default logger used when no custom logger is provided.
20
+ *
21
+ * @internal
22
+ */
23
+ class ConsoleLogger {
24
+ /**
25
+ * Log informational messages to console.log with [BeKind] prefix.
26
+ *
27
+ * @param message - The message to log
28
+ * @returns void
29
+ */
30
+ info(message) {
31
+ console.log(`[BeKind] ${message}`);
32
+ }
33
+ /**
34
+ * Log warning messages to console.warn with [BeKind] prefix.
35
+ *
36
+ * @param message - The warning message to log
37
+ * @returns void
38
+ */
39
+ warn(message) {
40
+ console.warn(`[BeKind] ${message}`);
41
+ }
42
+ /**
43
+ * Log error messages to console.error with [BeKind] prefix.
44
+ *
45
+ * @param message - The error message to log
46
+ * @returns void
47
+ */
48
+ error(message) {
49
+ console.error(`[BeKind] ${message}`);
50
+ }
51
+ }
52
+ /**
53
+ * Silent logger implementation that suppresses all log output.
54
+ *
55
+ * @class SilentLogger
56
+ * @implements {Logger}
57
+ * @description A no-op logger that discards all log messages. Used when `silent: true` is set
58
+ * in BeKindOptions, or when you want to completely disable logging.
59
+ *
60
+ * @internal
61
+ */
62
+ class SilentLogger {
63
+ /**
64
+ * No-op implementation - messages are discarded.
65
+ *
66
+ * @param _message - The message (unused)
67
+ * @returns void
68
+ */
69
+ info(_message) {
70
+ // Silent mode - no logging
71
+ }
72
+ /**
73
+ * No-op implementation - warnings are discarded.
74
+ *
75
+ * @param _message - The warning message (unused)
76
+ * @returns void
77
+ */
78
+ warn(_message) {
79
+ // Silent mode - no logging
80
+ }
81
+ /**
82
+ * No-op implementation - errors are discarded.
83
+ *
84
+ * @param _message - The error message (unused)
85
+ * @returns void
86
+ */
87
+ error(_message) {
88
+ // Silent mode - no logging
89
+ }
90
+ }
91
+ /**
92
+ * Severity levels for profanity detection results.
93
+ *
94
+ * @enum {number}
95
+ * @description Categorizes the severity of detected profanity based on the number
96
+ * of unique words and total matches found in the text.
97
+ *
98
+ * @readonly
99
+ * @example
100
+ * ```typescript
101
+ * const result = filter.detect("some text");
102
+ * if (result.severity === ProfanitySeverity.EXTREME) {
103
+ * // Handle extreme profanity
104
+ * }
105
+ * ```
106
+ */
107
+ export var ProfanitySeverity;
108
+ (function (ProfanitySeverity) {
109
+ /** Mild profanity: 1 unique word or 1 total match */
110
+ ProfanitySeverity[ProfanitySeverity["MILD"] = 1] = "MILD";
111
+ /** Moderate profanity: 2 unique words or 2 total matches */
112
+ ProfanitySeverity[ProfanitySeverity["MODERATE"] = 2] = "MODERATE";
113
+ /** Severe profanity: 3 unique words or 3 total matches */
114
+ ProfanitySeverity[ProfanitySeverity["SEVERE"] = 3] = "SEVERE";
115
+ /** Extreme profanity: 4+ unique words or 5+ total matches */
116
+ ProfanitySeverity[ProfanitySeverity["EXTREME"] = 4] = "EXTREME";
117
+ })(ProfanitySeverity = ProfanitySeverity || (ProfanitySeverity = {}));
118
+ /**
119
+ * Per-word severity classification for individual detected words.
120
+ *
121
+ * @enum {number}
122
+ */
123
+ export var WordSeverity;
124
+ (function (WordSeverity) {
125
+ /** Ambivalent: mild/contextual profanity that may be acceptable (damn, hell, crap, suck) */
126
+ WordSeverity[WordSeverity["AMBIVALENT"] = 1] = "AMBIVALENT";
127
+ /** Profane: should be flagged — strong profanity, slurs, explicit content */
128
+ WordSeverity[WordSeverity["PROFANE"] = 2] = "PROFANE";
129
+ })(WordSeverity = WordSeverity || (WordSeverity = {}));
130
+ /**
131
+ * Validates that an input is a non-empty string.
132
+ *
133
+ * @function validateString
134
+ * @param {unknown} input - The value to validate
135
+ * @param {string} paramName - Name of the parameter being validated (used in error messages)
136
+ * @returns {string} The validated string
137
+ * @throws {TypeError} If input is not a string
138
+ *
139
+ * @internal
140
+ *
141
+ * @example
142
+ * ```typescript
143
+ * const text = validateString(userInput, 'text');
144
+ * // Returns userInput if it's a string, throws TypeError otherwise
145
+ * ```
146
+ */
147
+ function validateString(input, paramName) {
148
+ if (typeof input !== "string") {
149
+ throw new TypeError(`${paramName} must be a string, got ${typeof input}`);
150
+ }
151
+ return input;
152
+ }
153
+ /**
154
+ * Validates and filters a string array, removing non-string and empty items.
155
+ *
156
+ * @function validateStringArray
157
+ * @param {unknown} input - The value to validate (expected to be an array)
158
+ * @param {string} paramName - Name of the parameter being validated (used in error/warning messages)
159
+ * @returns {string[]} Array of valid, non-empty strings
160
+ * @throws {TypeError} If input is not an array
161
+ *
162
+ * @internal
163
+ *
164
+ * @example
165
+ * ```typescript
166
+ * const words = validateStringArray(['word1', '', 123, 'word2'], 'words');
167
+ * // Returns: ['word1', 'word2']
168
+ * // Logs warning: "Skipping non-string item in words: 123"
169
+ * ```
170
+ */
171
+ function validateStringArray(input, paramName) {
172
+ if (!Array.isArray(input)) {
173
+ throw new TypeError(`${paramName} must be an array`);
174
+ }
175
+ return input.filter((item) => {
176
+ if (typeof item !== "string") {
177
+ console.warn(`Skipping non-string item in ${paramName}: ${item}`);
178
+ return false;
179
+ }
180
+ return item.trim().length > 0;
181
+ });
182
+ }
183
+ /**
184
+ * Trie (prefix tree) node for efficient pattern matching and word storage.
185
+ *
186
+ * @class TrieNode
187
+ * @description Implements a trie data structure for O(m) time complexity word matching,
188
+ * where m is the length of the word being searched. Each node represents a character
189
+ * in the word, and paths from root to nodes with isEndOfWord=true represent complete words.
190
+ *
191
+ * @internal
192
+ *
193
+ * @example
194
+ * ```typescript
195
+ * const trie = new TrieNode();
196
+ * trie.addWord('bad');
197
+ * trie.addWord('badword');
198
+ * const matches = trie.findMatches('badwords here', 0, false);
199
+ * // Returns matches for 'bad' and 'badword'
200
+ * ```
201
+ */
202
+ class TrieNode {
203
+ constructor() {
204
+ /** Map of characters to child nodes for fast lookups */
205
+ this.children = new Map();
206
+ /** Flag indicating if this node represents the end of a complete word */
207
+ this.isEndOfWord = false;
208
+ /** The complete word ending at this node (only set when isEndOfWord is true) */
209
+ this.word = "";
210
+ }
211
+ /**
212
+ * Get the child node for a given character.
213
+ */
214
+ getChild(char) {
215
+ return this.children.get(char);
216
+ }
217
+ /**
218
+ * Adds a word to the trie structure.
219
+ *
220
+ * @param {string} word - The word to add to the trie
221
+ * @returns {void}
222
+ *
223
+ * @remarks
224
+ * - Time Complexity: O(m) where m is the length of the word
225
+ * - Space Complexity: O(m) in worst case when all characters are new
226
+ * - Supports any Unicode characters
227
+ *
228
+ * @example
229
+ * ```typescript
230
+ * const trie = new TrieNode();
231
+ * trie.addWord('hello');
232
+ * trie.addWord('world');
233
+ * ```
234
+ */
235
+ addWord(word) {
236
+ let current = this;
237
+ for (const char of word) {
238
+ if (!current.children.has(char)) {
239
+ current.children.set(char, new TrieNode());
240
+ }
241
+ const nextNode = current.children.get(char);
242
+ if (nextNode) {
243
+ current = nextNode;
244
+ }
245
+ }
246
+ current.isEndOfWord = true;
247
+ current.word = word;
248
+ }
249
+ /**
250
+ * Removes a word from the trie structure.
251
+ *
252
+ * @param {string} word - The word to remove from the trie
253
+ * @returns {boolean} True if the word existed and was removed, false if word was not found
254
+ *
255
+ * @remarks
256
+ * - Time Complexity: O(m) where m is the length of the word
257
+ * - Also removes unnecessary nodes to keep the trie optimized
258
+ * - Only removes the word marking; shared prefixes with other words are preserved
259
+ *
260
+ * @example
261
+ * ```typescript
262
+ * const trie = new TrieNode();
263
+ * trie.addWord('hello');
264
+ * trie.removeWord('hello'); // Returns: true
265
+ * trie.removeWord('world'); // Returns: false (word not in trie)
266
+ * ```
267
+ */
268
+ removeWord(word) {
269
+ return this.removeHelper(word, 0);
270
+ }
271
+ /**
272
+ * Recursive helper method for removing a word from the trie.
273
+ *
274
+ * @param {string} word - The word being removed
275
+ * @param {number} index - Current character index in the word
276
+ * @returns {boolean} True if this node should be deleted (has no children and is not end of another word)
277
+ *
278
+ * @internal
279
+ */
280
+ removeHelper(word, index) {
281
+ if (index === word.length) {
282
+ if (!this.isEndOfWord)
283
+ return false;
284
+ this.isEndOfWord = false;
285
+ return this.children.size === 0;
286
+ }
287
+ const char = word[index];
288
+ const node = this.children.get(char);
289
+ if (!node)
290
+ return false;
291
+ const shouldDeleteChild = node.removeHelper(word, index + 1);
292
+ if (shouldDeleteChild) {
293
+ this.children.delete(char);
294
+ return this.children.size === 0 && !this.isEndOfWord;
295
+ }
296
+ return false;
297
+ }
298
+ /**
299
+ * Finds all word matches in text starting at a specific position.
300
+ *
301
+ * @param {string} text - The text to search for profanity
302
+ * @param {number} startPos - The starting position (0-based index) in the text
303
+ * @param {boolean} allowPartial - If true, finds partial matches within larger words
304
+ * @returns {Array<{ word: string; start: number; end: number }>} Array of match objects with word and position info
305
+ *
306
+ * @remarks
307
+ * - Time Complexity: O(k) where k is the length of the longest match from startPos
308
+ * - Returns all valid words that can be formed starting from startPos
309
+ * - When allowPartial is false, respects word boundaries
310
+ *
311
+ * @example
312
+ * ```typescript
313
+ * const trie = new TrieNode();
314
+ * trie.addWord('bad');
315
+ * const matches = trie.findMatches('badword', 0, false);
316
+ * // Returns: [{ word: 'bad', start: 0, end: 3 }]
317
+ * ```
318
+ */
319
+ findMatches(text, startPos, allowPartial) {
320
+ const matches = [];
321
+ let current = this;
322
+ let pos = startPos;
323
+ while (pos < text.length) {
324
+ const nextNode = current.children.get(text[pos]);
325
+ if (!nextNode)
326
+ break;
327
+ current = nextNode;
328
+ pos++;
329
+ if (current.isEndOfWord) {
330
+ if (!allowPartial) {
331
+ const wordStart = startPos;
332
+ const wordEnd = pos;
333
+ matches.push({
334
+ word: current.word,
335
+ start: wordStart - startPos,
336
+ end: wordEnd - startPos,
337
+ });
338
+ }
339
+ else {
340
+ matches.push({
341
+ word: current.word,
342
+ start: 0,
343
+ end: pos - startPos,
344
+ });
345
+ }
346
+ }
347
+ }
348
+ return matches;
349
+ }
350
+ /**
351
+ * Clears all words from the trie, resetting it to empty state.
352
+ *
353
+ * @returns {void}
354
+ *
355
+ * @remarks
356
+ * - Time Complexity: O(1) - clears the root node only (JavaScript GC handles children)
357
+ * - Removes all stored words and resets the trie to initial state
358
+ *
359
+ * @example
360
+ * ```typescript
361
+ * const trie = new TrieNode();
362
+ * trie.addWord('hello');
363
+ * trie.addWord('world');
364
+ * trie.clear();
365
+ * // Trie is now empty
366
+ * ```
367
+ */
368
+ clear() {
369
+ this.children.clear();
370
+ this.isEndOfWord = false;
371
+ this.word = "";
372
+ }
373
+ }
374
+ /**
375
+ * BeKind - Professional-grade multilingual profanity detection and filtering library.
376
+ *
377
+ * @class BeKind
378
+ * @description A comprehensive, high-performance profanity filtering system supporting 9+ languages
379
+ * with advanced features including leet speak detection, context analysis, multiple matching algorithms,
380
+ * and customizable filtering options.
381
+ *
382
+ * @remarks
383
+ * ### Features:
384
+ * - **Multi-language Support**: English, Hindi, French, German, Spanish, Bengali, Tamil, Telugu, Brazilian Portuguese
385
+ * - **Advanced Algorithms**: Trie, Aho-Corasick, Bloom Filter, and hybrid approaches
386
+ * - **Leet Speak Detection**: Automatically normalizes and detects variations like "h3ll0"
387
+ * - **Context Analysis**: Reduces false positives using surrounding word context
388
+ * - **Performance**: Built-in caching and optimized data structures
389
+ * - **Flexible**: Custom dictionaries, whitelisting, severity levels
390
+ *
391
+ * ### Default Behavior:
392
+ * - Loads English and Hindi dictionaries by default
393
+ * - Case-insensitive matching
394
+ * - Leet speak detection enabled
395
+ * - Uses Trie algorithm (fastest for most cases)
396
+ *
397
+ * @example
398
+ * ```typescript
399
+ * // Basic usage with default instance
400
+ * import allProfanity from 'allprofanity';
401
+ *
402
+ * const result = allProfanity.detect("This is some bad text");
403
+ * console.log(result.hasProfanity); // true
404
+ * console.log(result.cleanedText); // "This is some *** text"
405
+ * console.log(result.severity); // ProfanitySeverity.MILD
406
+ * ```
407
+ *
408
+ * @example
409
+ * ```typescript
410
+ * // Advanced usage with custom configuration
411
+ * import { BeKind, ProfanitySeverity } from 'allprofanity';
412
+ *
413
+ * const filter = new BeKind({
414
+ * languages: ['english', 'french', 'spanish'],
415
+ * enableLeetSpeak: true,
416
+ * strictMode: true,
417
+ * algorithm: {
418
+ * matching: 'hybrid',
419
+ * useBloomFilter: true
420
+ * },
421
+ * performance: {
422
+ * enableCaching: true,
423
+ * cacheSize: 500
424
+ * },
425
+ * whitelistWords: ['class', 'assignment']
426
+ * });
427
+ *
428
+ * const text = "This text has some b@d w0rds";
429
+ * const result = filter.detect(text);
430
+ *
431
+ * if (result.hasProfanity) {
432
+ * console.log(`Found ${result.detectedWords.length} profane words`);
433
+ * console.log(`Severity: ${ProfanitySeverity[result.severity]}`);
434
+ * console.log(`Cleaned: ${result.cleanedText}`);
435
+ * }
436
+ * ```
437
+ *
438
+ * @example
439
+ * ```typescript
440
+ * // Using individual methods
441
+ * const filter = new BeKind();
442
+ *
443
+ * // Simple check
444
+ * if (filter.check("some text")) {
445
+ * console.log("Contains profanity!");
446
+ * }
447
+ *
448
+ * // Clean with custom placeholder
449
+ * const cleaned = filter.clean("bad words here", "#");
450
+ *
451
+ * // Load additional languages
452
+ * filter.loadLanguage('german');
453
+ * filter.loadIndianLanguages(); // Loads hindi, bengali, tamil, telugu
454
+ *
455
+ * // Add custom words
456
+ * filter.add(['customword1', 'customword2']);
457
+ *
458
+ * // Remove words
459
+ * filter.remove(['someword']);
460
+ *
461
+ * // Whitelist words
462
+ * filter.addToWhitelist(['class', 'assignment']);
463
+ * ```
464
+ *
465
+ * @see {@link BeKindOptions} for all configuration options
466
+ * @see {@link ProfanityDetectionResult} for detection result format
467
+ * @see {@link ProfanitySeverity} for severity levels
468
+ */
469
+ export class BeKind {
470
+ /**
471
+ * Creates a new BeKind instance with the specified configuration.
472
+ *
473
+ * @constructor
474
+ * @param {BeKindOptions} [options] - Configuration options for profanity detection behavior
475
+ *
476
+ * @remarks
477
+ * ### Default Initialization:
478
+ * - Loads English and Hindi dictionaries automatically
479
+ * - Enables leet speak detection
480
+ * - Case-insensitive matching
481
+ * - Uses Trie algorithm for pattern matching
482
+ *
483
+ * ### Performance Considerations:
484
+ * - Initial load time depends on number of languages loaded
485
+ * - Aho-Corasick automaton (if enabled) is built during construction
486
+ * - Bloom Filter (if enabled) is populated during construction
487
+ *
488
+ * @throws {TypeError} If invalid options are provided
489
+ *
490
+ * @example
491
+ * ```typescript
492
+ * // Default instance
493
+ * const filter = new BeKind();
494
+ *
495
+ * // Custom configuration
496
+ * const filter = new BeKind({
497
+ * languages: ['english', 'french'],
498
+ * strictMode: true,
499
+ * defaultPlaceholder: '#',
500
+ * algorithm: { matching: 'hybrid' }
501
+ * });
502
+ *
503
+ * // Silent mode (no logging)
504
+ * const filter = new BeKind({ silent: true });
505
+ * ```
506
+ *
507
+ * @see {@link BeKindOptions} for all available configuration options
508
+ */
509
+ constructor(options) {
510
+ var _a, _b, _c, _d, _e, _f, _g;
511
+ this.profanityTrie = new TrieNode();
512
+ this.whitelistSet = new Set();
513
+ this.loadedLanguages = new Set();
514
+ this.defaultPlaceholder = "*";
515
+ this.enableLeetSpeak = true;
516
+ this.caseSensitive = false;
517
+ this.strictMode = false;
518
+ this.detectPartialWords = false;
519
+ this.embeddedProfanityDetection = false;
520
+ this.separatorTolerance = 5;
521
+ this.sensitiveMode = false;
522
+ /**
523
+ * Temporary storage for suspicious matches found during separator-tolerant detection.
524
+ * Populated by findSeparatorTolerantMatches() and consumed by detect().
525
+ */
526
+ this._suspiciousMatches = null;
527
+ this.availableLanguages = {
528
+ all: Object.keys(allLanguagesBadWords || {}),
529
+ };
530
+ /**
531
+ * Word score lookup map. Maps lowercase words to their severity and certainty scores.
532
+ * Populated from the scored word list on construction.
533
+ */
534
+ this.wordScores = (() => {
535
+ // Normalize dictionary keys to lowercase so getWordScore() lookups work
536
+ // regardless of how words are cased in the dictionary files.
537
+ const raw = allLanguagesBadWords || {};
538
+ const normalized = {};
539
+ for (const [key, value] of Object.entries(raw)) {
540
+ const lk = key.toLowerCase();
541
+ // If duplicate after lowercasing, keep the higher severity entry
542
+ if (!normalized[lk] || value.severity > normalized[lk].severity) {
543
+ normalized[lk] = value;
544
+ }
545
+ }
546
+ return normalized;
547
+ })();
548
+ /**
549
+ * Set of abhorrent words/phrases that trigger needsManualReview.
550
+ * Includes hate groups, slurs, extremist terminology, and Nazi references.
551
+ * Stored in lowercase for case-insensitive matching.
552
+ */
553
+ this.abhorrentWords = new Set([
554
+ // Nazi / Third Reich
555
+ "nazi", "nazis", "neonazi", "neo nazi", "neo-nazi", "hitler",
556
+ "heil hitler", "heilhitler", "hitler did nothing wrong",
557
+ "sieg heil", "siegheil", "third reich", "thirdreich",
558
+ "final solution", "finalsolution", "master race", "masterrace",
559
+ "gas the jews", "gasthejews", "holocaust denier", "holocaustdenier",
560
+ "holocaust denial", "holocaustdenial", "holohoax",
561
+ "lebensraum", "herrenvolk", "volkisch", "völkisch",
562
+ "judenfrei", "judenrein", "untermensch", "untermenschen",
563
+ "rassenschande", "übermensch",
564
+ // KKK and white supremacist orgs
565
+ "klan", "klansman", "klansmen", "ku klux klan", "kukluxklan", "kkk",
566
+ "united klans of america", "imperial klans of america",
567
+ "knights of the ku klux klan", "loyal white knights",
568
+ "white camelia knights", "brotherhood of klans",
569
+ "white knights of the kkk",
570
+ // White supremacy / white nationalism
571
+ "white power", "whitepower", "white pride", "whitepride",
572
+ "white supremacy", "whitesupremacy", "white supremacist", "whitesupremacist",
573
+ "white nationalist", "whitenationalist", "white nationalism", "whitenationalism",
574
+ "white ethnostate", "whiteethnostate", "ethnostate",
575
+ "white genocide", "whitegenocide", "racial purity", "racialpurity",
576
+ "race purification", "racepurification", "racial purification", "racialpurification",
577
+ "racial hygiene", "racialhygiene", "ethnic cleansing", "ethniccleansing",
578
+ "aryan nation", "aryan nations", "aryan brotherhood", "aryan circle",
579
+ "aryan guard", "aryan resistance", "aryan strikeforce",
580
+ "white aryan resistance",
581
+ // Extremist groups
582
+ "proud boys", "proudboys", "oath keepers", "oathkeepers",
583
+ "atomwaffen", "atomwaffen division", "patriot front", "patriotfront",
584
+ "vanguard america", "identity evropa", "american identity movement",
585
+ "national socialist", "national socialism", "national socialist movement",
586
+ "american nazi party", "nordic resistance movement",
587
+ "golden dawn", "casa pound", "casapound",
588
+ "generation identity", "identitarian", "identitarian movement",
589
+ "hammerskins", "hammerskin nation", "combat 18", "combat18",
590
+ "blood honour", "blood honor", "volksfront",
591
+ "stormfront", "iron march", "daily stormer", "dailystormer",
592
+ "order of nine angles", "o9a",
593
+ "rise above movement", "vinlanders social club",
594
+ "nazi low riders",
595
+ // Extremist slogans and coded language
596
+ "fourteen words", "fourteenwords", "1488", "14 88",
597
+ "rahowa", "racial holy war", "racialholywar",
598
+ "blood and soil", "bloodandsoil",
599
+ "day of the rope", "dayoftherope",
600
+ "great replacement", "greatreplacement",
601
+ "race war", "racewar",
602
+ "turner diaries", "turnerdiaries",
603
+ "right wing death squad", "rwds",
604
+ "physical removal", "physicalremoval",
605
+ "free helicopter ride", "helicopter ride",
606
+ "race realism", "racerealism", "race realist", "racerealist",
607
+ // Antisemitic
608
+ "jewish question", "jewishquestion", "jq",
609
+ "zionist occupied government", "zog",
610
+ "jewish conspiracy", "jewishconspiracy",
611
+ "protocols of the elders of zion",
612
+ "international jewry", "internationaljewry", "world jewry", "worldjewry",
613
+ "blood libel", "bloodlibel", "jewish problem", "jewishproblem",
614
+ "six million lie", "sixmillionlie",
615
+ "happy merchant", "happymerchant", "le happy merchant",
616
+ "(((them)))", "(((they)))", "(((who)))",
617
+ "oy vey shut it down",
618
+ "death to jews", "kill all jews",
619
+ // Racial slurs — anti-Black
620
+ "lynching", "lynch mob", "lynchmob",
621
+ "jungle bunny", "junglebunny", "jungle bunnies", "junglebunnies",
622
+ "porch monkey", "porchmonkey", "porch monkeys", "porchmonkeys",
623
+ "spear chucker", "spearchucker", "spear chuckers", "spearchuckers",
624
+ "moon cricket", "mooncricket", "moon crickets", "mooncrickets",
625
+ "cotton picker", "cottonpicker", "cotton pickers", "cottonpickers",
626
+ "tar baby", "tarbaby",
627
+ "race soldiers", "racesoldiers",
628
+ "mud people",
629
+ // Racial slurs — anti-Asian
630
+ "gook", "gooks", "chink", "chinks", "chinaman", "chinamen",
631
+ "zipperhead", "zipperheads", "slant eye", "slanteye",
632
+ "ching chong", "chingchong", "yellow peril", "yellowperil",
633
+ "kung flu", "kungflu",
634
+ // Racial slurs — anti-Latino
635
+ "wetback", "wetbacks", "beaner", "beaners",
636
+ "spic", "spics", "spick", "spicks",
637
+ // Racial slurs — anti-Muslim/Arab
638
+ "sand nigger", "sandnigger", "sand niggers", "sandniggers",
639
+ "towel head", "towelhead", "towel heads", "towelheads",
640
+ "raghead", "ragheads", "rag head", "rag heads",
641
+ "camel jockey", "cameljockey", "camel jockeys", "cameljockeys",
642
+ "goat fucker", "goatfucker", "goat fuckers", "goatfuckers",
643
+ "muzzie", "muzzies", "muzrat", "muzrats",
644
+ // Racial slurs — anti-Indigenous
645
+ "prairie nigger", "prairienigger", "timber nigger", "timbernigger",
646
+ "wagon burner", "wagonburner", "wagon burners", "wagonburners",
647
+ "injun", "injuns",
648
+ // Anti-LGBTQ+ hate
649
+ "death to fags", "god hates fags", "godhatesfags",
650
+ "death to gays", "kill all gays",
651
+ // Genocidal language
652
+ "death to muslims", "death to blacks", "death to whites",
653
+ "death to immigrants",
654
+ "kill all muslims", "kill all blacks", "kill all whites",
655
+ "kill all immigrants",
656
+ // Coded hate
657
+ "dindu nuffin", "dindunuffin", "dindu",
658
+ "we wuz kangz", "wewuzkangz",
659
+ "ooga booga", "oogabooga",
660
+ "remove kebab", "removekebab",
661
+ "race traitor", "race traitors", "racetraitor", "racetraitors",
662
+ "sonnenrad", "black sun", "totenkopf", "wolfsangel",
663
+ // ── Additional terms (sourced from ADL, SPLC, GLAAD, Moonshot CVE, ISD Global) ──
664
+ // Anti-Asian slurs — additional
665
+ "jap", "japs", "nip", "nips",
666
+ "coolie", "coolies",
667
+ "paki", "pakis",
668
+ "slope", "slopes", "slopehead", "slopeheads",
669
+ "wog", "wogs",
670
+ "dog eater", "dogeater", "dog eaters", "dogeaters",
671
+ "bat eater", "bateater",
672
+ "china virus", "chinavirus", "wuhan virus", "wuhanvirus",
673
+ "yellow monkey", "yellowmonkey",
674
+ "rice picker", "ricepicker", "rice pickers", "ricepickers",
675
+ // Anti-Latino slurs — additional
676
+ "greaser", "greasers",
677
+ "taco bender", "tacobender",
678
+ "border bunny", "borderbunny", "border bunnies", "borderbunnies",
679
+ "border hopper", "borderhopper", "border hoppers", "borderhoppers",
680
+ "fence hopper", "fencehopper",
681
+ "anchor baby", "anchorbaby", "anchor babies", "anchorbabies",
682
+ "pepper belly", "pepperbelly",
683
+ // Anti-Indigenous slurs — additional
684
+ "redskin", "redskins",
685
+ "squaw", "squaws",
686
+ "half breed", "halfbreed", "half breeds", "halfbreeds",
687
+ "blanket ass", "blanketass",
688
+ "timber monkey", "timbermonkey",
689
+ "red nigger", "rednigger", "bush nigger", "bushnigger",
690
+ // Antisemitic — additional
691
+ "hollowcost", "hollow cost",
692
+ "jewish bankers", "jewishbankers",
693
+ "jewish media", "jewishmedia", "jewish lobby", "jewishlobby",
694
+ "jewed", "jew down",
695
+ "nose check", "nosecheck",
696
+ "early life check", "earlylifecheck", "early life section", "earlylifesection",
697
+ "every single time", "everysingletime",
698
+ "the goyim know", "thegoyimknow",
699
+ "goyim know shut it down", "goyimknowshutitdown",
700
+ "six gorillion", "sixgorillion",
701
+ "oven dodger", "ovendodger", "oven dodgers", "ovendodgers",
702
+ "wooden doors", "woodendoors",
703
+ "holocaust industry", "holocaustindustry",
704
+ "jews will not replace us", "jewswillnotreplaceus",
705
+ "you will not replace us",
706
+ "synagogue of satan", "synagogueofsatan",
707
+ "jewish supremacy", "jewishsupremacy",
708
+ "jewish bolshevism", "jewishbolshevism", "judeo bolshevism", "judeobolshevism",
709
+ "rootless cosmopolitan", "rootlesscosmopolitan",
710
+ "christ killer", "christkiller", "christ killers", "christkillers",
711
+ "greedy jew", "greedyjew", "dirty jew", "dirtyjew",
712
+ "jew rat", "jewrat",
713
+ "sheeny", "sheenies",
714
+ "khazar milkers", "khazarmilkers",
715
+ "small hat", "small hats", "smallhat",
716
+ // Anti-Muslim/Arab — additional
717
+ "deus vult", "deusvult",
718
+ "kebab remover", "kebabremover",
719
+ "mohammedan", "mohammedans",
720
+ "death to islam", "deathtoislam",
721
+ "kill all arabs", "killallarabs",
722
+ "durka durka", "durkadurka",
723
+ "goat lover", "goatlover",
724
+ "cave dweller", "cavedweller", "cave dwellers", "cavedwellers",
725
+ "abeed",
726
+ "islamo fascist", "islamofascist", "islamo fascism", "islamofascism",
727
+ // Anti-Hindu
728
+ "pajeet", "pajeets",
729
+ "poo in loo", "pooinloo", "poo in the loo", "poointheloo",
730
+ "designated shitting street", "designatedshittingstreet",
731
+ "street shitter", "streetshitter", "street shitters", "streetshitters",
732
+ "cow worshipper", "cowworshipper",
733
+ "dot head", "dothead", "dot heads", "dotheads",
734
+ "curry muncher", "currymuncher", "curry munchers", "currymunchers",
735
+ "curry nigger", "currynigger",
736
+ "death to hindus", "kill all hindus",
737
+ // Anti-Sikh
738
+ "diaper head", "diaperhead", "diaper heads", "diaperheads",
739
+ "death to sikhs", "kill all sikhs",
740
+ // Anti-LGBTQ+ hate — eliminationist phrases
741
+ "death to trannies", "death to queers", "death to lesbians",
742
+ "death to transgenders", "death to bisexuals",
743
+ "kill all trannies", "kill all queers", "kill all lesbians",
744
+ "kill all transgenders",
745
+ "hang all fags", "hang all gays", "hang all trannies",
746
+ "burn all fags", "burn all gays",
747
+ "stone the gays", "stone the fags",
748
+ "gas the gays", "gas the fags", "gas the trannies",
749
+ // Anti-LGBTQ+ hate — religious extremist slogans
750
+ "god hates gays", "godhatesgays",
751
+ "god hates queers", "godhatesqueers",
752
+ "god hates trannies", "godhatestrannies",
753
+ "fags deserve death", "fagsdeservedeath",
754
+ "fags burn in hell", "fagsburninhell",
755
+ "gays burn in hell", "gaysburninhell",
756
+ // Anti-trans specific hate
757
+ "troon", "troons",
758
+ "troid", "troids",
759
+ "trannoid", "trannoids",
760
+ "transtrender", "transtrenders",
761
+ "trans are groomers", "transaregroomers",
762
+ "tranny groomers", "trannygroomers",
763
+ "transgender groomers", "transgendergroomers",
764
+ "trans predator", "transpredator", "trans predators", "transpredators",
765
+ "trans are pedophiles", "transarepedophiles",
766
+ "trans are degenerates", "transaredegenerates",
767
+ // Anti-trans suicide baiting
768
+ "join the 41", "jointhe41", "41 percent", "41percent",
769
+ "dilate and cope", "dilateandcope",
770
+ "you will never be a woman", "youwillneverbeawoman",
771
+ "you will never be a real woman", "youwillneverbeareawoman",
772
+ "you will never pass", "youwillneverpass",
773
+ // Anti-LGBTQ+ groomer rhetoric
774
+ "gay groomers", "gaygroomers",
775
+ "lgbtq groomers", "lgbtqgroomers", "lgbt groomers", "lgbtgroomers",
776
+ "drag queen groomers", "dragqueengroomers",
777
+ "ok groomer", "okgroomer",
778
+ "homosexual agenda", "homosexualagenda",
779
+ "gay agenda", "gayagenda", "trans agenda", "transagenda",
780
+ "coming for your children", "comingforyourchildren",
781
+ // Anti-LGBTQ+ dehumanizing slurs — additional
782
+ "carpet muncher", "carpetmuncher", "carpet munchers", "carpetmunchers",
783
+ "pillow biter", "pillowbiter", "fudge packer", "fudgepacker",
784
+ "batty boy", "battyboy", "batty man", "battyman",
785
+ "chi chi man", "chichiman",
786
+ "poof", "poofs", "poofter", "poofters",
787
+ // Anti-LGBTQ+ conversion/cure rhetoric
788
+ "pray the gay away", "praythegayaway",
789
+ "homosexuality is a disease", "homosexualityisadisease",
790
+ // Anti-LGBTQ+ coded mockery
791
+ "attack helicopter", "attackhelicopter",
792
+ "i identify as an attack helicopter",
793
+ "superstraight", "super straight",
794
+ // Modern extremist groups (post-2020, ADL/SPLC documented)
795
+ "active club", "active clubs", "activeclub",
796
+ "white lives matter", "whitelivesmatter",
797
+ "patriot prayer", "patriotprayer",
798
+ "the base", "thebase",
799
+ "feuerkrieg division", "feuerkrieg",
800
+ "terrorgram", "terrorgram collective",
801
+ "goyim defense league", "goyimdefenseleague",
802
+ "national socialist order",
803
+ "aryan freedom network",
804
+ "nationalist social club", "nsc 131", "nsc131",
805
+ "groyper", "groypers", "groyper army",
806
+ "rapewaffen", "rapewaffen division",
807
+ // Boogaloo movement (ADL documented)
808
+ "boogaloo boi", "boogaloo bois", "boogaloo boys",
809
+ "big igloo", "bigigloo",
810
+ "boojahideen",
811
+ // Accelerationist terminology (Moonshot CVE / ISD)
812
+ "siege culture", "siegeculture",
813
+ "siege pill", "siegepill", "siegepilled",
814
+ "read siege", "readsiege",
815
+ "saint tarrant", "sainttarrant",
816
+ "saint breivik", "saintbreivik",
817
+ "saint roof", "saintroof",
818
+ "saint bowers", "saintbowers",
819
+ "dotr",
820
+ // Incel extremist hate speech (ADL/academic research)
821
+ "incel rebellion", "incelrebellion",
822
+ "beta uprising", "betauprising",
823
+ "supreme gentleman", "supremegentleman",
824
+ "foid", "foids", "femoid", "femoids",
825
+ "roastie", "roasties",
826
+ // Eco-fascist terminology (ISD)
827
+ "eco fascism", "ecofascism", "eco fascist", "ecofascist",
828
+ "pine tree gang", "pinetreegang",
829
+ // Internet-era coded hate — additional
830
+ "clown world", "clownworld", "honk honk", "honkhonk", "honkler",
831
+ "despite being 13 percent", "despite 13",
832
+ "6 million wasn't enough", "6mwe",
833
+ "it's okay to be white", "iotbw",
834
+ "skull mask", "skullmask",
835
+ "white boy summer", "whiteboysummer",
836
+ "wpww", "white pride world wide",
837
+ // Coded numbers (ADL Hate Symbols Database)
838
+ "1312",
839
+ // Genocide denial — additional
840
+ "armenian genocide denial",
841
+ "rwandan genocide denial",
842
+ // Anti-immigrant hate — additional
843
+ "remigration",
844
+ "camp of the saints", "campofthesaints",
845
+ "migrant invasion",
846
+ ]);
847
+ this.leetMappings = new Map([
848
+ ["@", "a"],
849
+ ["^", "a"],
850
+ ["4", "a"],
851
+ ["8", "b"],
852
+ ["6", "b"],
853
+ ["|3", "b"],
854
+ ["(", "c"],
855
+ ["<", "c"],
856
+ ["©", "c"],
857
+ ["|)", "d"],
858
+ ["0", "o"],
859
+ ["3", "e"],
860
+ ["€", "e"],
861
+ ["|=", "f"],
862
+ ["ph", "f"],
863
+ ["9", "g"],
864
+ ["#", "h"],
865
+ ["|-|", "h"],
866
+ ["1", "i"],
867
+ ["!", "i"],
868
+ ["|", "i"],
869
+ ["_|", "j"],
870
+ ["¿", "j"],
871
+ ["|<", "k"],
872
+ ["1<", "k"],
873
+ ["7", "l"],
874
+ ["|\\/|", "m"],
875
+ ["/\\/\\", "m"],
876
+ ["|\\|", "n"],
877
+ ["//", "n"],
878
+ ["()", "o"],
879
+ ["|*", "p"],
880
+ ["|o", "p"],
881
+ ["(_,)", "q"],
882
+ ["()_", "q"],
883
+ ["|2", "r"],
884
+ ["12", "r"],
885
+ ["5", "s"],
886
+ ["$", "s"],
887
+ ["z", "s"],
888
+ ["7", "t"],
889
+ ["+", "t"],
890
+ ["†", "t"],
891
+ ["|_|", "u"],
892
+ ["(_)", "u"],
893
+ ["v", "u"],
894
+ ["\\/", "v"],
895
+ ["|/", "v"],
896
+ ["\\/\\/", "w"],
897
+ ["vv", "w"],
898
+ ["><", "x"],
899
+ ["}{", "x"],
900
+ ["`/", "y"],
901
+ ["j", "y"],
902
+ ["2", "z"],
903
+ ["7_", "z"],
904
+ ]);
905
+ this.dynamicWords = new Set();
906
+ // Advanced algorithms
907
+ this.ahoCorasickAutomaton = null;
908
+ this.bloomFilter = null;
909
+ this.contextAnalyzer = null;
910
+ this.matchingAlgorithm = "trie";
911
+ this.resultCache = null;
912
+ /**
913
+ * Leet mappings where the source is a regular letter (e.g. z→s, v→u, j→y).
914
+ * These are ambiguous because they can destroy legitimate words during
915
+ * normalization (e.g. "nazi" → "nasi"). Separated so that layered
916
+ * normalization can try symbol-only mappings first.
917
+ */
918
+ this.letterToLetterLeetKeys = new Set([...this.leetMappings.keys()].filter((k) => /^[a-zA-Z]+$/.test(k)));
919
+ // Use silent logger if silent mode is enabled, otherwise use provided logger or console logger
920
+ this.logger = (options === null || options === void 0 ? void 0 : options.logger) || ((options === null || options === void 0 ? void 0 : options.silent) ? new SilentLogger() : new ConsoleLogger());
921
+ if ((options === null || options === void 0 ? void 0 : options.defaultPlaceholder) !== undefined) {
922
+ this.setPlaceholder(options.defaultPlaceholder);
923
+ }
924
+ this.enableLeetSpeak = (_a = options === null || options === void 0 ? void 0 : options.enableLeetSpeak) !== null && _a !== void 0 ? _a : true;
925
+ this.caseSensitive = (_b = options === null || options === void 0 ? void 0 : options.caseSensitive) !== null && _b !== void 0 ? _b : false;
926
+ this.strictMode = (_c = options === null || options === void 0 ? void 0 : options.strictMode) !== null && _c !== void 0 ? _c : false;
927
+ this.detectPartialWords = (_d = options === null || options === void 0 ? void 0 : options.detectPartialWords) !== null && _d !== void 0 ? _d : false;
928
+ this.embeddedProfanityDetection = (_e = options === null || options === void 0 ? void 0 : options.embeddedProfanityDetection) !== null && _e !== void 0 ? _e : false;
929
+ this.sensitiveMode = (_f = options === null || options === void 0 ? void 0 : options.sensitiveMode) !== null && _f !== void 0 ? _f : false;
930
+ const sepTol = options === null || options === void 0 ? void 0 : options.separatorTolerance;
931
+ if (sepTol === false) {
932
+ this.separatorTolerance = 0;
933
+ }
934
+ else if (typeof sepTol === "number") {
935
+ this.separatorTolerance = Math.max(0, sepTol);
936
+ }
937
+ else {
938
+ // true or undefined → default 5
939
+ this.separatorTolerance = 5;
940
+ }
941
+ if (options === null || options === void 0 ? void 0 : options.whitelistWords) {
942
+ this.addToWhitelist(options.whitelistWords);
943
+ }
944
+ // Initialize advanced algorithms BEFORE loading dictionaries
945
+ // so that words can be added to all data structures
946
+ this.initializeAdvancedAlgorithms(options);
947
+ this.loadLanguage("all");
948
+ if ((_g = options === null || options === void 0 ? void 0 : options.languages) === null || _g === void 0 ? void 0 : _g.length) {
949
+ options.languages.forEach((lang) => this.loadLanguage(lang));
950
+ }
951
+ if (options === null || options === void 0 ? void 0 : options.customDictionaries) {
952
+ Object.entries(options.customDictionaries).forEach(([name, words]) => {
953
+ this.loadCustomDictionary(name, words);
954
+ });
955
+ }
956
+ }
957
+ /**
958
+ * Initialize advanced algorithms based on configuration
959
+ */
960
+ initializeAdvancedAlgorithms(options) {
961
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m;
962
+ // Set matching algorithm
963
+ if ((_a = options === null || options === void 0 ? void 0 : options.algorithm) === null || _a === void 0 ? void 0 : _a.matching) {
964
+ this.matchingAlgorithm = options.algorithm.matching;
965
+ }
966
+ // Initialize Bloom Filter if enabled
967
+ const bloomEnabled = ((_b = options === null || options === void 0 ? void 0 : options.algorithm) === null || _b === void 0 ? void 0 : _b.useBloomFilter) ||
968
+ ((_c = options === null || options === void 0 ? void 0 : options.bloomFilter) === null || _c === void 0 ? void 0 : _c.enabled) ||
969
+ this.matchingAlgorithm === "hybrid";
970
+ if (bloomEnabled) {
971
+ const expectedItems = ((_d = options === null || options === void 0 ? void 0 : options.bloomFilter) === null || _d === void 0 ? void 0 : _d.expectedItems) || 10000;
972
+ const falsePositiveRate = ((_e = options === null || options === void 0 ? void 0 : options.bloomFilter) === null || _e === void 0 ? void 0 : _e.falsePositiveRate) || 0.01;
973
+ this.bloomFilter = new BloomFilter(expectedItems, falsePositiveRate);
974
+ this.logger.info(`Bloom Filter initialized with ${expectedItems} expected items and ${(falsePositiveRate * 100).toFixed(2)}% false positive rate`);
975
+ }
976
+ // Initialize Aho-Corasick if enabled
977
+ const ahoEnabled = ((_f = options === null || options === void 0 ? void 0 : options.algorithm) === null || _f === void 0 ? void 0 : _f.useAhoCorasick) ||
978
+ ((_g = options === null || options === void 0 ? void 0 : options.ahoCorasick) === null || _g === void 0 ? void 0 : _g.enabled) ||
979
+ this.matchingAlgorithm === "aho-corasick" ||
980
+ this.matchingAlgorithm === "hybrid";
981
+ if (ahoEnabled) {
982
+ this.ahoCorasickAutomaton = new AhoCorasick([]);
983
+ this.logger.info("Aho-Corasick automaton initialized");
984
+ }
985
+ // Initialize Context Analyzer if enabled
986
+ const contextEnabled = ((_h = options === null || options === void 0 ? void 0 : options.algorithm) === null || _h === void 0 ? void 0 : _h.useContextAnalysis) ||
987
+ ((_j = options === null || options === void 0 ? void 0 : options.contextAnalysis) === null || _j === void 0 ? void 0 : _j.enabled);
988
+ if (contextEnabled) {
989
+ const contextLanguages = ((_k = options === null || options === void 0 ? void 0 : options.contextAnalysis) === null || _k === void 0 ? void 0 : _k.languages) || ["en"];
990
+ this.contextAnalyzer = new ContextAnalyzer(contextLanguages);
991
+ if ((_l = options === null || options === void 0 ? void 0 : options.contextAnalysis) === null || _l === void 0 ? void 0 : _l.contextWindow) {
992
+ this.contextAnalyzer.setContextWindow(options.contextAnalysis.contextWindow);
993
+ }
994
+ this.logger.info(`Context Analyzer initialized for languages: ${contextLanguages.join(", ")}`);
995
+ }
996
+ // Initialize result cache if enabled
997
+ if ((_m = options === null || options === void 0 ? void 0 : options.performance) === null || _m === void 0 ? void 0 : _m.enableCaching) {
998
+ const cacheSize = options.performance.cacheSize || 1000;
999
+ this.resultCache = new Map();
1000
+ this.logger.info(`Result caching enabled with size limit: ${cacheSize}`);
1001
+ }
1002
+ }
1003
+ /**
1004
+ * Normalize leet speak to regular characters (full pass — all mappings).
1005
+ * @param text - The input text.
1006
+ * @returns Normalized text.
1007
+ */
1008
+ normalizeLeetSpeak(text) {
1009
+ if (!this.enableLeetSpeak)
1010
+ return text;
1011
+ let normalized = text.toLowerCase();
1012
+ const sortedMappings = Array.from(this.leetMappings.entries()).sort(([leetA], [leetB]) => leetB.length - leetA.length);
1013
+ for (const [leet, normal] of sortedMappings) {
1014
+ const regex = new RegExp(this.escapeRegex(leet), "g");
1015
+ normalized = normalized.replace(regex, normal);
1016
+ }
1017
+ return normalized;
1018
+ }
1019
+ /**
1020
+ * Conservative leet normalization — only replaces non-letter characters
1021
+ * (digits, symbols, punctuation) with their letter equivalents.
1022
+ * Letter-to-letter mappings (z→s, v→u, j→y, ph→f) are skipped so that
1023
+ * real letters are preserved, avoiding collisions like "nazi" → "nasi".
1024
+ */
1025
+ normalizeLeetSpeakSymbolsOnly(text) {
1026
+ if (!this.enableLeetSpeak)
1027
+ return text;
1028
+ let normalized = text.toLowerCase();
1029
+ const sortedMappings = Array.from(this.leetMappings.entries()).sort(([leetA], [leetB]) => leetB.length - leetA.length);
1030
+ for (const [leet, normal] of sortedMappings) {
1031
+ if (this.letterToLetterLeetKeys.has(leet))
1032
+ continue;
1033
+ const regex = new RegExp(this.escapeRegex(leet), "g");
1034
+ normalized = normalized.replace(regex, normal);
1035
+ }
1036
+ return normalized;
1037
+ }
1038
+ /**
1039
+ * Returns all unique leet-normalized variants of the text that differ
1040
+ * from the base normalizedText. Runs two layers:
1041
+ * 1. Symbol-only normalization (digits/special → letters, preserves real letters)
1042
+ * 2. Full normalization (all mappings including letter→letter)
1043
+ *
1044
+ * This layered approach catches both "n4zi" (symbol-only → "nazi") and
1045
+ * "a55" (full → "ass") without one breaking the other.
1046
+ */
1047
+ getLeetVariants(normalizedText) {
1048
+ if (!this.enableLeetSpeak)
1049
+ return [];
1050
+ const variants = [];
1051
+ const seen = new Set([normalizedText]);
1052
+ // Layer 1: symbol-only (conservative) — catches n4zi→nazi, wh1te→white
1053
+ const symbolOnly = this.normalizeLeetSpeakSymbolsOnly(normalizedText);
1054
+ if (!seen.has(symbolOnly)) {
1055
+ seen.add(symbolOnly);
1056
+ variants.push(symbolOnly);
1057
+ }
1058
+ // Layer 2: full normalization — catches z→s substitutions like a55→ass
1059
+ const full = this.normalizeLeetSpeak(normalizedText);
1060
+ if (!seen.has(full)) {
1061
+ seen.add(full);
1062
+ variants.push(full);
1063
+ }
1064
+ return variants;
1065
+ }
1066
+ /**
1067
+ * Check if a character is a non-space separator (skipped freely).
1068
+ */
1069
+ static isSymbolSeparator(char) {
1070
+ return BeKind.SYMBOL_SEPARATOR_SET.has(char);
1071
+ }
1072
+ /**
1073
+ * Check if a character is whitespace (skipped with certainty penalty).
1074
+ */
1075
+ static isWhitespaceSeparator(char) {
1076
+ return char === " " || char === "\t" || char === "\n" || char === "\r";
1077
+ }
1078
+ /**
1079
+ * Check if a character is any kind of separator.
1080
+ */
1081
+ static isSeparator(char) {
1082
+ return BeKind.isSymbolSeparator(char) || BeKind.isWhitespaceSeparator(char);
1083
+ }
1084
+ /**
1085
+ * Extract surrounding context (±N words) around a match position in text.
1086
+ */
1087
+ extractSurroundingContext(text, start, end, wordCount) {
1088
+ const words = text.split(/\s+/);
1089
+ let charPos = 0;
1090
+ let startWordIdx = 0;
1091
+ let endWordIdx = words.length - 1;
1092
+ for (let i = 0; i < words.length; i++) {
1093
+ const wordStart = text.indexOf(words[i], charPos);
1094
+ const wordEnd = wordStart + words[i].length;
1095
+ if (wordEnd <= start)
1096
+ startWordIdx = i;
1097
+ if (wordStart < end)
1098
+ endWordIdx = i;
1099
+ charPos = wordEnd;
1100
+ }
1101
+ const contextStart = Math.max(0, startWordIdx - wordCount);
1102
+ const contextEnd = Math.min(words.length - 1, endWordIdx + wordCount);
1103
+ return words.slice(contextStart, contextEnd + 1).join(" ");
1104
+ }
1105
+ /**
1106
+ * Escape regex special characters in a string.
1107
+ * @param str - The string to escape.
1108
+ * @returns The escaped string.
1109
+ */
1110
+ escapeRegex(str) {
1111
+ return str.replace(/[\\^$.*+?()[\]{}|]/g, "\\$&");
1112
+ }
1113
+ /**
1114
+ * Check if a match is bounded by word boundaries (strict mode).
1115
+ * @param text - The text.
1116
+ * @param start - Start index.
1117
+ * @param end - End index.
1118
+ * @returns True if match is at word boundaries, false otherwise.
1119
+ */
1120
+ hasWordBoundaries(text, start, end) {
1121
+ if (!this.strictMode)
1122
+ return true;
1123
+ const beforeChar = start > 0 ? text[start - 1] : " ";
1124
+ const afterChar = end < text.length ? text[end] : " ";
1125
+ const wordBoundaryRegex = /[\s\p{P}\p{S}]/u;
1126
+ return (wordBoundaryRegex.test(beforeChar) && wordBoundaryRegex.test(afterChar));
1127
+ }
1128
+ isWholeWord(text, start, end) {
1129
+ // CJK scripts (Chinese, Japanese, Korean) have no spaces between words.
1130
+ // If the matched word is CJK, treat it as a whole-word hit unconditionally —
1131
+ // the language-signal guard in isHighCoverageEmbed handles cross-script FPs.
1132
+ if (BeKind.CJK_RE.test(text.slice(start, end)))
1133
+ return true;
1134
+ // Use \p{L} (Unicode letter) not \w (ASCII-only) so that accented and
1135
+ // non-Latin characters (ü, ş, ğ, é, ñ, …) are correctly recognised as
1136
+ // word characters and do not act as false word-boundaries.
1137
+ if (start !== 0 && /\p{L}/u.test(text[start - 1]))
1138
+ return false;
1139
+ if (end !== text.length && /\p{L}/u.test(text[end]))
1140
+ return false;
1141
+ return true;
1142
+ }
1143
+ /**
1144
+ * Returns the char-index bounds of the host word containing [start, end).
1145
+ * Scans outward using the same Unicode-letter definition as isWholeWord.
1146
+ */
1147
+ getHostWordBounds(text, start, end) {
1148
+ let hostStart = start;
1149
+ while (hostStart > 0 && /\p{L}/u.test(text[hostStart - 1]))
1150
+ hostStart--;
1151
+ let hostEnd = end;
1152
+ while (hostEnd < text.length && /\p{L}/u.test(text[hostEnd]))
1153
+ hostEnd++;
1154
+ return { hostStart, hostEnd };
1155
+ }
1156
+ isHighCoverageEmbed(text, matchStart, matchEnd, matchWord, docLangSignal) {
1157
+ var _a, _b;
1158
+ const matchLen = matchEnd - matchStart;
1159
+ if (matchLen < BeKind.HIGH_COVERAGE_MIN_MATCH_LEN)
1160
+ return false;
1161
+ const { hostStart, hostEnd } = this.getHostWordBounds(text, matchStart, matchEnd);
1162
+ const hostLen = hostEnd - hostStart;
1163
+ if (hostLen === 0)
1164
+ return false;
1165
+ // Graduated coverage: shorter matches need higher coverage to reduce FPs
1166
+ const coverageThreshold = matchLen <= 6
1167
+ ? BeKind.HIGH_COVERAGE_THRESHOLD_SHORT
1168
+ : BeKind.HIGH_COVERAGE_THRESHOLD_LONG;
1169
+ if (matchLen / hostLen < coverageThreshold)
1170
+ return false;
1171
+ if (matchWord) {
1172
+ const wordScore = this.wordScores[matchWord.toLowerCase()];
1173
+ if (wordScore) {
1174
+ const profaneLang = wordScore.language;
1175
+ const hostWord = text.slice(hostStart, hostEnd);
1176
+ // Word-level language signal guard
1177
+ const hostSignal = scoreWord(hostWord);
1178
+ const wordLangSignal = (_a = hostSignal[profaneLang]) !== null && _a !== void 0 ? _a : 0;
1179
+ if (wordLangSignal < BeKind.HIGH_COVERAGE_LANG_SIGNAL_MIN)
1180
+ return false;
1181
+ // Document-level language mismatch guard: if the doc is strongly one
1182
+ // language and the profane word is from a DIFFERENT language, skip.
1183
+ // e.g. English doc + French "engin" in "engine" → skip
1184
+ if (docLangSignal) {
1185
+ const docProfaneLangSignal = (_b = docLangSignal[profaneLang]) !== null && _b !== void 0 ? _b : 0;
1186
+ const docTopSignal = Math.max(...Object.values(docLangSignal), 0);
1187
+ // If profane word's language has < 10% doc signal AND another language
1188
+ // dominates the doc (> 50%), this is almost certainly a cross-language FP
1189
+ if (docProfaneLangSignal < 0.1 && docTopSignal > 0.5)
1190
+ return false;
1191
+ }
1192
+ // Innocent embed guard: check hostWords allowlist and partialDampeningFactor
1193
+ const innocentEntries = innocentWords[matchWord.toLowerCase()];
1194
+ if (innocentEntries) {
1195
+ const lowerHost = hostWord.toLowerCase();
1196
+ if (innocentEntries.some(e => { var _a; return (_a = e.hostWords) === null || _a === void 0 ? void 0 : _a.includes(lowerHost); }))
1197
+ return false;
1198
+ if (innocentEntries.some(e => { var _a; return ((_a = e.partialDampeningFactor) !== null && _a !== void 0 ? _a : 0) >= 0.5; }))
1199
+ return false;
1200
+ }
1201
+ }
1202
+ }
1203
+ return true;
1204
+ }
1205
+ /**
1206
+ * Check if a match is whitelisted.
1207
+ * @param word - Word from dictionary.
1208
+ * @param matchedText - Actual matched text.
1209
+ * @returns True if whitelisted, false otherwise.
1210
+ */
1211
+ isWhitelistedMatch(word, matchedText) {
1212
+ if (this.caseSensitive) {
1213
+ return this.whitelistSet.has(word) || this.whitelistSet.has(matchedText);
1214
+ }
1215
+ else {
1216
+ return (this.whitelistSet.has(word.toLowerCase()) ||
1217
+ this.whitelistSet.has(matchedText.toLowerCase()));
1218
+ }
1219
+ }
1220
+ /**
1221
+ * Remove overlapping matches, keeping only the longest at each start position.
1222
+ * @param matches - Array of match results.
1223
+ * @returns Deduplicated matches.
1224
+ */
1225
+ deduplicateMatches(matches) {
1226
+ const sorted = [...matches].sort((a, b) => {
1227
+ if (a.start !== b.start)
1228
+ return a.start - b.start;
1229
+ return b.end - a.end;
1230
+ });
1231
+ const result = [];
1232
+ let lastEnd = -1;
1233
+ for (const match of sorted) {
1234
+ if (match.start >= lastEnd) {
1235
+ result.push(match);
1236
+ lastEnd = match.end;
1237
+ }
1238
+ }
1239
+ return result;
1240
+ }
1241
+ /**
1242
+ * Use Aho-Corasick algorithm for pattern matching
1243
+ */
1244
+ findMatchesWithAhoCorasick(searchText, originalText) {
1245
+ if (!this.ahoCorasickAutomaton) {
1246
+ return [];
1247
+ }
1248
+ const ahoMatches = this.ahoCorasickAutomaton.findAll(searchText);
1249
+ const results = [];
1250
+ // Compute doc-level language signal once for all embed checks
1251
+ let docLangSignal;
1252
+ const getDocLang = () => {
1253
+ if (!docLangSignal) {
1254
+ const detected = detectLanguages(originalText, { maxLanguages: 3 });
1255
+ docLangSignal = {};
1256
+ for (const lang of detected.languages) {
1257
+ docLangSignal[lang.language] = lang.proportion;
1258
+ }
1259
+ }
1260
+ return docLangSignal;
1261
+ };
1262
+ for (const match of ahoMatches) {
1263
+ const isWhole = this.isWholeWord(originalText, match.start, match.end);
1264
+ if (!this.detectPartialWords && !isWhole) {
1265
+ if (!this.isHighCoverageEmbed(originalText, match.start, match.end, match.pattern, getDocLang())) {
1266
+ continue;
1267
+ }
1268
+ }
1269
+ const matchedText = originalText.substring(match.start, match.end);
1270
+ if (this.isWhitelistedMatch(match.pattern, matchedText)) {
1271
+ continue;
1272
+ }
1273
+ if (this.hasWordBoundaries(originalText, match.start, match.end)) {
1274
+ results.push({
1275
+ word: match.pattern,
1276
+ start: match.start,
1277
+ end: match.end,
1278
+ originalWord: matchedText,
1279
+ });
1280
+ }
1281
+ }
1282
+ return results;
1283
+ }
1284
+ /**
1285
+ * Hybrid approach: Aho-Corasick for fast matching, Bloom Filter for validation
1286
+ */
1287
+ findMatchesHybrid(searchText, originalText) {
1288
+ // Use Aho-Corasick for primary matching if available
1289
+ if (this.ahoCorasickAutomaton) {
1290
+ const matches = this.findMatchesWithAhoCorasick(searchText, originalText);
1291
+ // If Bloom Filter is enabled, validate matches
1292
+ if (this.bloomFilter) {
1293
+ return matches.filter((match) => this.bloomFilter.mightContain(match.word));
1294
+ }
1295
+ return matches;
1296
+ }
1297
+ // Fallback to Trie if Aho-Corasick not available
1298
+ const matches = [];
1299
+ this.findMatches(searchText, originalText, matches);
1300
+ // Validate with Bloom Filter if enabled
1301
+ if (this.bloomFilter) {
1302
+ return matches.filter((match) => this.bloomFilter.mightContain(match.word));
1303
+ }
1304
+ return matches;
1305
+ }
1306
+ /**
1307
+ * Apply context analysis to filter false positives
1308
+ */
1309
+ applyContextAnalysis(text, matches, scoreThreshold = 0.5) {
1310
+ if (!this.contextAnalyzer) {
1311
+ return matches;
1312
+ }
1313
+ return matches.filter((match) => {
1314
+ const analysis = this.contextAnalyzer.analyzeContext(text, match.start, match.end, match.word);
1315
+ // If score is above threshold, it's likely profanity
1316
+ return analysis.score >= scoreThreshold;
1317
+ });
1318
+ }
1319
+ /**
1320
+ * Detects profanity in the provided text and returns comprehensive analysis.
1321
+ *
1322
+ * @param {string} text - The text to analyze for profanity
1323
+ * @returns {ProfanityDetectionResult} Detailed detection result including matches, positions, severity, and cleaned text
1324
+ *
1325
+ * @throws {TypeError} If text is not a string
1326
+ *
1327
+ * @remarks
1328
+ * ### Performance:
1329
+ * - Time Complexity: O(n*m) where n is text length, m is average word length in dictionary
1330
+ * - With Bloom Filter: O(n) average case (faster early rejection)
1331
+ * - With Caching: O(1) for repeated identical text
1332
+ *
1333
+ * ### Features:
1334
+ * - Detects leet speak variations (if enabled): "h3ll0" → "hello"
1335
+ * - Respects word boundaries (strict mode) or detects partial matches
1336
+ * - Returns exact positions for highlighting/masking
1337
+ * - Calculates severity based on match count and uniqueness
1338
+ *
1339
+ * ### Caching:
1340
+ * - Results are cached if `performance.enableCaching` is true
1341
+ * - Cache uses LRU eviction when size limit is reached
1342
+ *
1343
+ * @example
1344
+ * ```typescript
1345
+ * const filter = new BeKind();
1346
+ * const result = filter.detect("This has bad words");
1347
+ *
1348
+ * console.log(result.hasProfanity); // true
1349
+ * console.log(result.detectedWords); // ['bad']
1350
+ * console.log(result.cleanedText); // 'This has *** words'
1351
+ * console.log(result.severity); // ProfanitySeverity.MILD
1352
+ * console.log(result.positions); // [{ word: 'bad', start: 9, end: 12 }]
1353
+ * ```
1354
+ *
1355
+ * @example
1356
+ * ```typescript
1357
+ * // With leet speak detection
1358
+ * const filter = new BeKind({ enableLeetSpeak: true });
1359
+ * const result = filter.detect("st0p b3ing b@d");
1360
+ *
1361
+ * if (result.hasProfanity) {
1362
+ * result.positions.forEach(pos => {
1363
+ * console.log(`Found "${pos.word}" at position ${pos.start}-${pos.end}`);
1364
+ * });
1365
+ * }
1366
+ * ```
1367
+ *
1368
+ * @see {@link ProfanityDetectionResult} for result structure
1369
+ * @see {@link ProfanitySeverity} for severity levels
1370
+ */
1371
+ detect(text) {
1372
+ var _a, _b, _c;
1373
+ const validatedText = validateString(text, "text");
1374
+ if (validatedText.length === 0) {
1375
+ return {
1376
+ hasProfanity: false,
1377
+ detectedWords: [],
1378
+ cleanedText: validatedText,
1379
+ severity: ProfanitySeverity.MILD,
1380
+ positions: [],
1381
+ needsManualReview: false,
1382
+ flaggedAbhorrentWords: [],
1383
+ scoredWords: [],
1384
+ maxSeverity: null,
1385
+ suspiciousPhrases: [],
1386
+ };
1387
+ }
1388
+ // Check cache first if enabled
1389
+ if ((_a = this.resultCache) === null || _a === void 0 ? void 0 : _a.has(validatedText)) {
1390
+ return this.resultCache.get(validatedText);
1391
+ }
1392
+ // Reset temporary suspicious match storage
1393
+ this._suspiciousMatches = null;
1394
+ let matches = [];
1395
+ const normalizedText = this.caseSensitive
1396
+ ? validatedText
1397
+ : validatedText.toLowerCase();
1398
+ // Choose matching algorithm based on configuration
1399
+ // Leet-speak uses layered normalization: symbol-only first, then full,
1400
+ // so that letter→letter mappings (z→s) don't clobber legitimate letters.
1401
+ const leetVariants = this.getLeetVariants(normalizedText);
1402
+ switch (this.matchingAlgorithm) {
1403
+ case "aho-corasick":
1404
+ matches = this.findMatchesWithAhoCorasick(normalizedText, validatedText);
1405
+ for (const variant of leetVariants) {
1406
+ matches.push(...this.findMatchesWithAhoCorasick(variant, validatedText));
1407
+ }
1408
+ break;
1409
+ case "hybrid":
1410
+ matches = this.findMatchesHybrid(normalizedText, validatedText);
1411
+ for (const variant of leetVariants) {
1412
+ matches.push(...this.findMatchesHybrid(variant, validatedText));
1413
+ }
1414
+ break;
1415
+ case "trie":
1416
+ default:
1417
+ this.findMatches(normalizedText, validatedText, matches);
1418
+ for (const variant of leetVariants) {
1419
+ this.findMatches(variant, validatedText, matches);
1420
+ }
1421
+ break;
1422
+ }
1423
+ // Separator-tolerant matching: re-walk the trie but skip over separators
1424
+ if (this.separatorTolerance > 0) {
1425
+ this.findSeparatorTolerantMatches(normalizedText, validatedText, matches);
1426
+ }
1427
+ // Context analysis is handled via certainty-delta in shouldFlagWithContext()
1428
+ const allUniqueMatches = this.deduplicateMatches(matches);
1429
+ // Partition: certainty:0 matches become suspicious phrases, not profanity
1430
+ const uniqueMatches = allUniqueMatches.filter((m) => {
1431
+ const score = this.getWordScore(m.word);
1432
+ return !score || score.certainty !== 0;
1433
+ });
1434
+ const suspiciousFromCertaintyZero = allUniqueMatches.filter((m) => {
1435
+ const score = this.getWordScore(m.word);
1436
+ return score && score.certainty === 0;
1437
+ });
1438
+ const detectedWords = uniqueMatches.map((m) => m.originalWord);
1439
+ const severity = this.calculateSeverity(uniqueMatches);
1440
+ const cleanedText = this.generateCleanedText(validatedText, uniqueMatches);
1441
+ // Check for abhorrent words that need manual review
1442
+ const flaggedAbhorrentWords = uniqueMatches
1443
+ .filter((m) => this.abhorrentWords.has(m.word.toLowerCase()))
1444
+ .map((m) => m.originalWord);
1445
+ const uniqueAbhorrent = [...new Set(flaggedAbhorrentWords)];
1446
+ // Lazy document-level language detection — only computed if a collision word is matched
1447
+ let docSignal = null;
1448
+ function getDocSignal() {
1449
+ if (docSignal === null) {
1450
+ docSignal = {};
1451
+ const docResult = detectLanguages(text);
1452
+ for (const lang of docResult.languages) {
1453
+ docSignal[lang.language] = lang.proportion;
1454
+ }
1455
+ }
1456
+ return docSignal;
1457
+ }
1458
+ // Build scoredWords: PROFANE if shouldFlag(), AMBIVALENT otherwise
1459
+ // For embedded/substring matches, use the decayed scores for flag determination
1460
+ const scoredWords = uniqueMatches.map((m) => {
1461
+ var _a, _b;
1462
+ let wordSev;
1463
+ if (m.isSubstringMatch && m.decayedScore) {
1464
+ const { severity, certainty } = m.decayedScore;
1465
+ const shouldFlagEmbedded = BeKind.shouldFlagWithCertainty(severity, certainty);
1466
+ wordSev = shouldFlagEmbedded ? WordSeverity.PROFANE : WordSeverity.AMBIVALENT;
1467
+ }
1468
+ else {
1469
+ // Check for cross-language innocence before standard shouldFlag
1470
+ const normalizedWord = m.word.toLowerCase();
1471
+ const innocentEntries = innocentWords[normalizedWord];
1472
+ if (innocentEntries && innocentEntries.length > 0) {
1473
+ const wordScore = this.getWordScore(m.word);
1474
+ if (wordScore) {
1475
+ const ds = getDocSignal();
1476
+ const wordSignal = scoreWord(normalizedWord);
1477
+ const DOC_WEIGHT = 1.5;
1478
+ const WORD_WEIGHT = 1.0;
1479
+ const TOTAL_WEIGHT = DOC_WEIGHT + WORD_WEIGHT;
1480
+ const amplified = {};
1481
+ for (const lang of new Set([...Object.keys(wordSignal), ...Object.keys(ds)])) {
1482
+ const lk = lang;
1483
+ amplified[lang] = (((_a = wordSignal[lk]) !== null && _a !== void 0 ? _a : 0) * WORD_WEIGHT + ((_b = ds[lk]) !== null && _b !== void 0 ? _b : 0) * DOC_WEIGHT) / TOTAL_WEIGHT;
1484
+ }
1485
+ let adjustedCertainty = adjustCertaintyForLanguage(wordScore.certainty, wordScore.language, innocentEntries, amplified);
1486
+ // Apply context-based certainty delta on top of language adjustment
1487
+ if (this.contextAnalyzer) {
1488
+ const delta = this.contextAnalyzer.getCertaintyDelta(validatedText, m.start, m.end, m.word);
1489
+ adjustedCertainty = Math.max(0, Math.min(5, adjustedCertainty + delta));
1490
+ }
1491
+ const adjustedShouldFlag = BeKind.shouldFlagWithCertainty(wordScore.severity, adjustedCertainty);
1492
+ wordSev = adjustedShouldFlag ? WordSeverity.PROFANE : WordSeverity.AMBIVALENT;
1493
+ }
1494
+ else {
1495
+ wordSev = this.shouldFlagWithContext(m.word, validatedText, m.start, m.end) ? WordSeverity.PROFANE : WordSeverity.AMBIVALENT;
1496
+ }
1497
+ }
1498
+ else {
1499
+ wordSev = this.shouldFlagWithContext(m.word, validatedText, m.start, m.end) ? WordSeverity.PROFANE : WordSeverity.AMBIVALENT;
1500
+ }
1501
+ }
1502
+ return { word: m.originalWord, severity: wordSev };
1503
+ });
1504
+ const maxSeverity = scoredWords.length > 0
1505
+ ? Math.max(...scoredWords.map((sw) => sw.severity))
1506
+ : null;
1507
+ const rawSuspicious = (_b = this._suspiciousMatches) !== null && _b !== void 0 ? _b : [];
1508
+ const suspiciousPhrases = rawSuspicious.map((sm) => {
1509
+ const score = this.getWordScore(sm.word);
1510
+ const baseScore = score
1511
+ ? { severity: score.severity, certainty: score.certainty }
1512
+ : { severity: 1, certainty: 1 };
1513
+ const context = this.extractSurroundingContext(validatedText, sm.start, sm.end, 5);
1514
+ return {
1515
+ word: sm.word,
1516
+ originalText: sm.originalWord,
1517
+ context,
1518
+ start: sm.start,
1519
+ end: sm.end,
1520
+ baseScore,
1521
+ spaceBoundaries: sm.spaceBoundaries,
1522
+ };
1523
+ });
1524
+ this._suspiciousMatches = null;
1525
+ // Append certainty:0 matches as suspicious phrases
1526
+ for (const m of suspiciousFromCertaintyZero) {
1527
+ const score = this.getWordScore(m.word);
1528
+ const context = this.extractSurroundingContext(validatedText, m.start, m.end, 5);
1529
+ suspiciousPhrases.push({
1530
+ word: m.word,
1531
+ originalText: m.originalWord,
1532
+ context,
1533
+ start: m.start,
1534
+ end: m.end,
1535
+ baseScore: { severity: (_c = score === null || score === void 0 ? void 0 : score.severity) !== null && _c !== void 0 ? _c : 1, certainty: 0 },
1536
+ spaceBoundaries: 0,
1537
+ });
1538
+ }
1539
+ // sensitiveMode (default: false) controls whether AMBIVALENT words trigger hasProfanity.
1540
+ // When sensitiveMode is true, any match (including AMBIVALENT cross-language collisions
1541
+ // like "bitte" in German text) counts as profanity.
1542
+ // When false (default), only PROFANE-scored words count.
1543
+ const hasProfane = this.sensitiveMode
1544
+ ? uniqueMatches.length > 0
1545
+ : scoredWords.some((sw) => sw.severity === WordSeverity.PROFANE);
1546
+ const result = {
1547
+ hasProfanity: hasProfane,
1548
+ detectedWords,
1549
+ cleanedText,
1550
+ severity,
1551
+ positions: uniqueMatches.map((m) => ({
1552
+ word: m.originalWord,
1553
+ start: m.start,
1554
+ end: m.end,
1555
+ })),
1556
+ needsManualReview: uniqueAbhorrent.length > 0,
1557
+ flaggedAbhorrentWords: uniqueAbhorrent,
1558
+ scoredWords,
1559
+ maxSeverity,
1560
+ suspiciousPhrases,
1561
+ };
1562
+ // Cache result if caching is enabled
1563
+ if (this.resultCache) {
1564
+ this.resultCache.set(validatedText, result);
1565
+ // Implement simple LRU by clearing cache when it gets too large
1566
+ if (this.resultCache.size > 1000) {
1567
+ const firstKey = this.resultCache.keys().next().value;
1568
+ if (firstKey !== undefined) {
1569
+ this.resultCache.delete(firstKey);
1570
+ }
1571
+ }
1572
+ }
1573
+ return result;
1574
+ }
1575
+ /**
1576
+ * Main matching function, with whole-word logic.
1577
+ * @param searchText - The normalized text to search.
1578
+ * @param originalText - The original text.
1579
+ * @param matches - Array to collect matches.
1580
+ */
1581
+ findMatches(searchText, originalText, matches) {
1582
+ const boundaryMatchedRanges = [];
1583
+ // Compute doc-level language signal once for all embed checks
1584
+ let docLangSignal;
1585
+ const getDocLang = () => {
1586
+ if (!docLangSignal) {
1587
+ const detected = detectLanguages(originalText, { maxLanguages: 3 });
1588
+ docLangSignal = {};
1589
+ for (const lang of detected.languages) {
1590
+ docLangSignal[lang.language] = lang.proportion;
1591
+ }
1592
+ }
1593
+ return docLangSignal;
1594
+ };
1595
+ for (let i = 0; i < searchText.length; i++) {
1596
+ const matchResults = this.profanityTrie.findMatches(searchText, i, this.detectPartialWords);
1597
+ for (const match of matchResults) {
1598
+ const start = i + match.start;
1599
+ const end = i + match.end;
1600
+ const isWhole = this.isWholeWord(originalText, start, end);
1601
+ if (!this.detectPartialWords && !isWhole) {
1602
+ if (!this.isHighCoverageEmbed(originalText, start, end, match.word, getDocLang())) {
1603
+ continue;
1604
+ }
1605
+ }
1606
+ const matchedText = originalText.substring(start, end);
1607
+ if (this.isWhitelistedMatch(match.word, matchedText)) {
1608
+ continue;
1609
+ }
1610
+ if (this.hasWordBoundaries(originalText, start, end)) {
1611
+ matches.push({
1612
+ word: match.word,
1613
+ start,
1614
+ end,
1615
+ originalWord: matchedText,
1616
+ });
1617
+ boundaryMatchedRanges.push({ start, end });
1618
+ }
1619
+ }
1620
+ }
1621
+ // Embedded profanity detection: find profane substrings inside words
1622
+ // that weren't caught by word-boundary matching
1623
+ if (this.embeddedProfanityDetection) {
1624
+ this.findEmbeddedMatches(searchText, originalText, matches, boundaryMatchedRanges);
1625
+ }
1626
+ }
1627
+ /**
1628
+ * Walk the trie while tolerating separator characters between letters.
1629
+ * Catches evasion patterns: "fu ck", "c.u.n.t", "fu@ck@cu@nt@bi@tch"
1630
+ *
1631
+ * Symbol separators (@, ., -, etc.) are skipped freely.
1632
+ * Space separators reduce certainty by SPACE_CERTAINTY_PENALTY per gap.
1633
+ * Matches that drop below the flagging threshold become "suspicious" instead.
1634
+ */
1635
+ findSeparatorTolerantMatches(searchText, originalText, matches) {
1636
+ const alreadyFound = new Set(matches.map((m) => m.word.toLowerCase()));
1637
+ const maxSkip = this.separatorTolerance;
1638
+ for (let i = 0; i < searchText.length; i++) {
1639
+ // Only start walks from non-separator characters at word-boundary positions
1640
+ if (BeKind.isSeparator(searchText[i]))
1641
+ continue;
1642
+ if (i > 0 && /\w/.test(searchText[i - 1]))
1643
+ continue;
1644
+ const found = this.walkTrieWithSeparators(this.profanityTrie, searchText, i, maxSkip, 0);
1645
+ for (const { word, endPos, anySeparatorSkipped, spaceBoundaries } of found) {
1646
+ // Only report if separators were actually skipped (normal matching handles the rest)
1647
+ if (!anySeparatorSkipped)
1648
+ continue;
1649
+ // Require minimum word length of 3 to avoid short false positives
1650
+ if (word.length < 3)
1651
+ continue;
1652
+ if (alreadyFound.has(word.toLowerCase()))
1653
+ continue;
1654
+ if (this.isWhitelistedMatch(word, originalText.substring(i, endPos)))
1655
+ continue;
1656
+ alreadyFound.add(word.toLowerCase());
1657
+ // All separator-tolerant matches are suspicious only for now.
1658
+ // They're captured with context for review but don't flag as profanity.
1659
+ if (!this._suspiciousMatches)
1660
+ this._suspiciousMatches = [];
1661
+ this._suspiciousMatches.push({
1662
+ word,
1663
+ start: i,
1664
+ end: endPos,
1665
+ originalWord: originalText.substring(i, endPos),
1666
+ spaceBoundaries,
1667
+ });
1668
+ }
1669
+ }
1670
+ }
1671
+ /**
1672
+ * Recursively walk the trie from a given node, skipping separator chars.
1673
+ * Tracks space boundaries crossed (for certainty penalty) separately from
1674
+ * symbol separators (which are free to skip).
1675
+ */
1676
+ walkTrieWithSeparators(node, text, pos, maxSkip, spaceBoundaries, totalSkips = 0) {
1677
+ const results = [];
1678
+ if (pos >= text.length) {
1679
+ if (node.isEndOfWord) {
1680
+ results.push({ word: node.word, endPos: pos, anySeparatorSkipped: totalSkips > 0, spaceBoundaries });
1681
+ }
1682
+ return results;
1683
+ }
1684
+ const char = text[pos];
1685
+ // Try matching the character directly in the trie
1686
+ const nextNode = node.getChild(char);
1687
+ if (nextNode) {
1688
+ if (nextNode.isEndOfWord) {
1689
+ results.push({ word: nextNode.word, endPos: pos + 1, anySeparatorSkipped: totalSkips > 0, spaceBoundaries });
1690
+ }
1691
+ results.push(...this.walkTrieWithSeparators(nextNode, text, pos + 1, maxSkip, spaceBoundaries, totalSkips));
1692
+ }
1693
+ // If current char is a separator, skip over consecutive separators
1694
+ if (BeKind.isSeparator(char)) {
1695
+ let skipCount = 0;
1696
+ let skipPos = pos;
1697
+ let hasSpace = false;
1698
+ while (skipPos < text.length && BeKind.isSeparator(text[skipPos]) && skipCount < maxSkip) {
1699
+ if (BeKind.isWhitespaceSeparator(text[skipPos]))
1700
+ hasSpace = true;
1701
+ skipPos++;
1702
+ skipCount++;
1703
+ }
1704
+ if (skipPos < text.length && skipCount > 0) {
1705
+ const newSpaceBoundaries = spaceBoundaries + (hasSpace ? 1 : 0);
1706
+ results.push(...this.walkTrieWithSeparators(node, text, skipPos, maxSkip, newSpaceBoundaries, totalSkips + skipCount));
1707
+ }
1708
+ }
1709
+ return results;
1710
+ }
1711
+ /**
1712
+ * Find profane substrings embedded inside larger words with certainty decay.
1713
+ *
1714
+ * Formula: decayed_c = base_c * (DECAY_RATE ^ extra_chars) * (profane_len / host_word_len)
1715
+ *
1716
+ * Multi-profanity bonus: if a host word contains multiple profane substrings,
1717
+ * certainty is boosted (sum of base severities used as multiplier, capped at c:5).
1718
+ *
1719
+ * Unusually long words (12+ chars) containing profanity get a certainty bonus
1720
+ * since legitimate words rarely exceed this length.
1721
+ */
1722
+ findEmbeddedMatches(searchText, originalText, matches, alreadyMatched) {
1723
+ // Extract individual words from text with their positions
1724
+ const wordPattern = /[a-zA-Z\u00C0-\u024F\u0400-\u04FF\u0600-\u06FF\u3000-\u9FFF\uAC00-\uD7AF]+/g;
1725
+ let wordMatch;
1726
+ while ((wordMatch = wordPattern.exec(searchText)) !== null) {
1727
+ const hostWord = wordMatch[0];
1728
+ const hostStart = wordMatch.index;
1729
+ const hostEnd = hostStart + hostWord.length;
1730
+ // Skip if this word was already fully matched by boundary detection
1731
+ const fullyMatched = alreadyMatched.some((r) => r.start <= hostStart && r.end >= hostEnd);
1732
+ if (fullyMatched)
1733
+ continue;
1734
+ // Find all profane substrings within this word
1735
+ const embeddedFinds = [];
1736
+ for (let i = 0; i < hostWord.length; i++) {
1737
+ const subMatches = this.profanityTrie.findMatches(hostWord.toLowerCase(), i, true);
1738
+ for (const sub of subMatches) {
1739
+ const subStart = hostStart + i + sub.start;
1740
+ const subEnd = hostStart + i + sub.end;
1741
+ // Skip if this exact range was already boundary-matched
1742
+ const alreadyCovered = alreadyMatched.some((r) => r.start === subStart && r.end === subEnd);
1743
+ if (alreadyCovered)
1744
+ continue;
1745
+ const score = this.wordScores[sub.word];
1746
+ if (!score)
1747
+ continue;
1748
+ embeddedFinds.push({
1749
+ word: sub.word,
1750
+ start: subStart,
1751
+ end: subEnd,
1752
+ baseSeverity: score.severity,
1753
+ baseCertainty: score.certainty,
1754
+ });
1755
+ }
1756
+ }
1757
+ if (embeddedFinds.length === 0)
1758
+ continue;
1759
+ // Deduplicate: keep longest match at each position
1760
+ const dedupedFinds = this.deduplicateEmbeddedFinds(embeddedFinds);
1761
+ // Multi-profanity bonus: if multiple distinct profane roots found, boost certainty
1762
+ const multiBonus = dedupedFinds.length >= 2
1763
+ ? Math.min(dedupedFinds.length * 0.5, 2.0) // +0.5 per extra root, cap +2
1764
+ : 0;
1765
+ // Unusually long word bonus (12+ chars with profanity = likely evasion)
1766
+ const lengthBonus = hostWord.length >= 12 ? 1.0 : 0;
1767
+ for (const find of dedupedFinds) {
1768
+ const profaneLen = find.word.length;
1769
+ const extraChars = hostWord.length - profaneLen;
1770
+ const decayFactor = Math.pow(BeKind.EMBEDDED_DECAY_RATE, extraChars);
1771
+ const lengthRatio = profaneLen / hostWord.length;
1772
+ let decayedCertainty = find.baseCertainty * decayFactor * lengthRatio + multiBonus + lengthBonus;
1773
+ decayedCertainty = Math.round(Math.max(1, Math.min(5, decayedCertainty)));
1774
+ if (decayedCertainty < BeKind.EMBEDDED_MIN_CERTAINTY)
1775
+ continue;
1776
+ const matchedText = originalText.substring(find.start, find.end);
1777
+ matches.push({
1778
+ word: find.word,
1779
+ start: find.start,
1780
+ end: find.end,
1781
+ originalWord: matchedText,
1782
+ isSubstringMatch: true,
1783
+ decayedScore: { severity: find.baseSeverity, certainty: decayedCertainty },
1784
+ });
1785
+ }
1786
+ }
1787
+ }
1788
+ /**
1789
+ * Deduplicate embedded finds: at overlapping positions, keep the longest match.
1790
+ */
1791
+ deduplicateEmbeddedFinds(finds) {
1792
+ // Sort by start, then by length descending
1793
+ const sorted = [...finds].sort((a, b) => a.start - b.start || (b.end - b.start) - (a.end - a.start));
1794
+ const result = [];
1795
+ let lastEnd = -1;
1796
+ for (const find of sorted) {
1797
+ // Skip if fully contained within a previous match
1798
+ if (find.start >= lastEnd || find.end > lastEnd) {
1799
+ result.push(find);
1800
+ lastEnd = Math.max(lastEnd, find.end);
1801
+ }
1802
+ }
1803
+ return result;
1804
+ }
1805
+ /**
1806
+ * Generate cleaned text by replacing profane words.
1807
+ * @param originalText - The original text.
1808
+ * @param matches - Array of matches.
1809
+ * @returns Cleaned text.
1810
+ */
1811
+ generateCleanedText(originalText, matches) {
1812
+ if (matches.length === 0)
1813
+ return originalText;
1814
+ let result = originalText;
1815
+ const sortedMatches = [...this.deduplicateMatches(matches)].sort((a, b) => b.start - a.start);
1816
+ for (const match of sortedMatches) {
1817
+ const replacement = this.defaultPlaceholder.repeat(match.originalWord.length);
1818
+ result =
1819
+ result.substring(0, match.start) +
1820
+ replacement +
1821
+ result.substring(match.end);
1822
+ }
1823
+ return result;
1824
+ }
1825
+ /**
1826
+ * Quick boolean check for profanity presence in text.
1827
+ *
1828
+ * @param {string} text - The text to check for profanity
1829
+ * @returns {boolean} True if profanity is detected, false otherwise
1830
+ *
1831
+ * @throws {TypeError} If text is not a string
1832
+ *
1833
+ * @remarks
1834
+ * - Convenience method that internally calls `detect()` and returns only the boolean result
1835
+ * - For detailed information about matches, use `detect()` instead
1836
+ * - Results are cached if caching is enabled (same cache as `detect()`)
1837
+ *
1838
+ * @example
1839
+ * ```typescript
1840
+ * const filter = new BeKind();
1841
+ *
1842
+ * if (filter.check("This has bad words")) {
1843
+ * console.log("Profanity detected!");
1844
+ * }
1845
+ *
1846
+ * // Quick validation
1847
+ * const isClean = !filter.check(userInput);
1848
+ * ```
1849
+ *
1850
+ * @see {@link detect} for detailed profanity analysis
1851
+ */
1852
+ check(text) {
1853
+ return this.detect(text).hasProfanity;
1854
+ }
1855
+ /**
1856
+ * Cleans text by replacing profanity with a placeholder character.
1857
+ *
1858
+ * @param {string} text - The text to clean
1859
+ * @param {string} [placeholder] - Optional custom placeholder character (uses default if not provided)
1860
+ * @returns {string} The cleaned text with profanity replaced
1861
+ *
1862
+ * @throws {TypeError} If text is not a string
1863
+ *
1864
+ * @remarks
1865
+ * ### Character-level Replacement:
1866
+ * - Each profane character is replaced individually
1867
+ * - "bad" with placeholder "*" becomes "***"
1868
+ * - Preserves text length and structure
1869
+ *
1870
+ * ### Placeholder Behavior:
1871
+ * - If no placeholder provided, uses the instance's default placeholder
1872
+ * - If placeholder provided, uses only the first character
1873
+ * - Empty placeholder throws error
1874
+ *
1875
+ * @example
1876
+ * ```typescript
1877
+ * const filter = new BeKind();
1878
+ *
1879
+ * // Using default placeholder (*)
1880
+ * const cleaned = filter.clean("This has bad words");
1881
+ * console.log(cleaned); // "This has *** *****"
1882
+ *
1883
+ * // Using custom placeholder
1884
+ * const cleaned = filter.clean("This has bad words", "#");
1885
+ * console.log(cleaned); // "This has ### #####"
1886
+ * ```
1887
+ *
1888
+ * @example
1889
+ * ```typescript
1890
+ * // Clean user-generated content for display
1891
+ * const userComment = "Some inappropriate words here";
1892
+ * const safeComment = filter.clean(userComment);
1893
+ * displayComment(safeComment);
1894
+ * ```
1895
+ *
1896
+ * @see {@link cleanWithPlaceholder} for word-level replacement
1897
+ * @see {@link setPlaceholder} to change default placeholder
1898
+ */
1899
+ clean(text, placeholder) {
1900
+ const detection = this.detect(text);
1901
+ if (!placeholder || placeholder === this.defaultPlaceholder) {
1902
+ return detection.cleanedText;
1903
+ }
1904
+ let result = text;
1905
+ const sortedPositions = [
1906
+ ...this.deduplicateMatches(detection.positions.map((p) => ({
1907
+ word: p.word,
1908
+ start: p.start,
1909
+ end: p.end,
1910
+ originalWord: text.substring(p.start, p.end),
1911
+ }))),
1912
+ ].sort((a, b) => b.start - a.start);
1913
+ for (const pos of sortedPositions) {
1914
+ const originalWord = text.substring(pos.start, pos.end);
1915
+ const replacement = placeholder.repeat(originalWord.length);
1916
+ result =
1917
+ result.substring(0, pos.start) +
1918
+ replacement +
1919
+ result.substring(pos.end);
1920
+ }
1921
+ return result;
1922
+ }
1923
+ /**
1924
+ * Cleans text by replacing each profane word with a single placeholder string (word-level replacement).
1925
+ *
1926
+ * @param {string} text - The text to clean
1927
+ * @param {string} [placeholder="***"] - The placeholder string to use for each profane word
1928
+ * @returns {string} The cleaned text with each profane word replaced by the placeholder
1929
+ *
1930
+ * @throws {TypeError} If text is not a string
1931
+ *
1932
+ * @remarks
1933
+ * ### Word-level Replacement:
1934
+ * - Each profane word is replaced with the entire placeholder string (not character-by-character)
1935
+ * - "bad words" with placeholder "***" becomes "*** ***"
1936
+ * - Does NOT preserve original text length
1937
+ *
1938
+ * ### Difference from `clean()`:
1939
+ * - `clean()`: Character-level replacement - "bad" becomes "***" (preserves length)
1940
+ * - `cleanWithPlaceholder()`: Word-level replacement - "bad" becomes "***" (fixed placeholder)
1941
+ *
1942
+ * @example
1943
+ * ```typescript
1944
+ * const filter = new BeKind();
1945
+ *
1946
+ * // Default placeholder (***) const text = "This has bad words";
1947
+ * const cleaned = filter.cleanWithPlaceholder(text);
1948
+ * console.log(cleaned); // "This has *** ***"
1949
+ *
1950
+ * // Custom placeholder
1951
+ * const cleaned2 = filter.cleanWithPlaceholder(text, "[CENSORED]");
1952
+ * console.log(cleaned2); // "This has [CENSORED] [CENSORED]"
1953
+ * ```
1954
+ *
1955
+ * @example
1956
+ * ```typescript
1957
+ * // Censoring chat messages
1958
+ * const message = "You are a badword and stupid";
1959
+ * const censored = filter.cleanWithPlaceholder(message, "[***]");
1960
+ * // Result: "You are a [***] and [***]"
1961
+ * ```
1962
+ *
1963
+ * @see {@link clean} for character-level replacement
1964
+ */
1965
+ cleanWithPlaceholder(text, placeholder = "***") {
1966
+ const detection = this.detect(text);
1967
+ if (detection.positions.length === 0)
1968
+ return text;
1969
+ let result = text;
1970
+ const sortedPositions = [
1971
+ ...this.deduplicateMatches(detection.positions.map((p) => ({
1972
+ word: p.word,
1973
+ start: p.start,
1974
+ end: p.end,
1975
+ originalWord: text.substring(p.start, p.end),
1976
+ }))),
1977
+ ].sort((a, b) => b.start - a.start);
1978
+ for (const pos of sortedPositions) {
1979
+ if (!this.isWholeWord(result, pos.start, pos.end))
1980
+ continue;
1981
+ result =
1982
+ result.substring(0, pos.start) +
1983
+ placeholder +
1984
+ result.substring(pos.end);
1985
+ }
1986
+ return result;
1987
+ }
1988
+ /**
1989
+ * Dynamically adds one or more words to the profanity filter at runtime.
1990
+ *
1991
+ * @param {string | string[]} word - A single word or array of words to add to the filter
1992
+ * @returns {void}
1993
+ *
1994
+ * @remarks
1995
+ * ### Behavior:
1996
+ * - Words are added to all active data structures (Trie, Aho-Corasick, Bloom Filter)
1997
+ * - Automatically normalizes words based on caseSensitive setting
1998
+ * - Skips whitelisted words
1999
+ * - Validates and filters out non-string or empty values
2000
+ * - Changes take effect immediately for subsequent detect/check/clean calls
2001
+ *
2002
+ * ### Use Cases:
2003
+ * - Adding context-specific profanity
2004
+ * - Building dynamic word lists from user reports
2005
+ * - Customizing filters for specific communities/applications
2006
+ *
2007
+ * @example
2008
+ * ```typescript
2009
+ * const filter = new BeKind();
2010
+ *
2011
+ * // Add single word
2012
+ * filter.add('newbadword');
2013
+ *
2014
+ * // Add multiple words
2015
+ * filter.add(['word1', 'word2', 'word3']);
2016
+ *
2017
+ * // Now these words will be detected
2018
+ * filter.check('newbadword'); // true
2019
+ * ```
2020
+ *
2021
+ * @example
2022
+ * ```typescript
2023
+ * // Add game-specific slang dynamically
2024
+ * const filter = new BeKind();
2025
+ * const gamingSlang = ['noob', 'trash', 'tryhard'];
2026
+ * filter.add(gamingSlang);
2027
+ *
2028
+ * const message = "You're such a noob";
2029
+ * console.log(filter.check(message)); // true
2030
+ * ```
2031
+ *
2032
+ * @see {@link remove} to remove words
2033
+ * @see {@link loadCustomDictionary} for loading named dictionaries
2034
+ */
2035
+ add(word) {
2036
+ const words = Array.isArray(word) ? word : [word];
2037
+ const validatedWords = validateStringArray(words, "words to add");
2038
+ for (const w of validatedWords) {
2039
+ this.dynamicWords.add(w);
2040
+ this.addWordToTrie(w);
2041
+ }
2042
+ }
2043
+ /**
2044
+ * Dynamically removes one or more words from the profanity filter at runtime.
2045
+ *
2046
+ * @param {string | string[]} word - A single word or array of words to remove from the filter
2047
+ * @returns {void}
2048
+ *
2049
+ * @remarks
2050
+ * ### Behavior:
2051
+ * - Removes words from all active data structures (Trie, dynamic words set)
2052
+ * - Normalizes words based on caseSensitive setting before removal
2053
+ * - Only removes dynamically added words, not words from loaded language dictionaries
2054
+ * - Changes take effect immediately for subsequent detect/check/clean calls
2055
+ *
2056
+ * ### Important Notes:
2057
+ * - Cannot remove words from built-in language dictionaries
2058
+ * - To exclude dictionary words, use `addToWhitelist()` instead
2059
+ * - Validates and filters out non-string or empty values
2060
+ *
2061
+ * @example
2062
+ * ```typescript
2063
+ * const filter = new BeKind();
2064
+ *
2065
+ * // Add then remove a word
2066
+ * filter.add('tempword');
2067
+ * filter.check('tempword'); // true
2068
+ *
2069
+ * filter.remove('tempword');
2070
+ * filter.check('tempword'); // false
2071
+ *
2072
+ * // Remove multiple words
2073
+ * filter.remove(['word1', 'word2']);
2074
+ * ```
2075
+ *
2076
+ * @example
2077
+ * ```typescript
2078
+ * // Managing custom word list
2079
+ * const filter = new BeKind();
2080
+ * filter.add(['custom1', 'custom2', 'custom3']);
2081
+ *
2082
+ * // Later, remove one that's no longer needed
2083
+ * filter.remove('custom2');
2084
+ * ```
2085
+ *
2086
+ * @see {@link add} to add words
2087
+ * @see {@link addToWhitelist} to exclude dictionary words without removing them
2088
+ */
2089
+ remove(word) {
2090
+ const words = Array.isArray(word) ? word : [word];
2091
+ const validatedWords = validateStringArray(words, "words to remove");
2092
+ for (const w of validatedWords) {
2093
+ const normalizedWord = this.caseSensitive ? w : w.toLowerCase();
2094
+ this.profanityTrie.removeWord(normalizedWord);
2095
+ this.dynamicWords.delete(w);
2096
+ }
2097
+ }
2098
+ /**
2099
+ * Add words to the whitelist.
2100
+ * @param words - Words to whitelist.
2101
+ */
2102
+ addToWhitelist(words) {
2103
+ const validatedWords = validateStringArray(words, "whitelist words");
2104
+ for (const word of validatedWords) {
2105
+ const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
2106
+ this.whitelistSet.add(normalizedWord);
2107
+ }
2108
+ }
2109
+ /**
2110
+ * Remove words from the whitelist.
2111
+ * @param words - Words to remove from whitelist.
2112
+ */
2113
+ removeFromWhitelist(words) {
2114
+ const validatedWords = validateStringArray(words, "whitelist words");
2115
+ for (const word of validatedWords) {
2116
+ const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
2117
+ this.whitelistSet.delete(normalizedWord);
2118
+ }
2119
+ }
2120
+ /**
2121
+ * Check if a word is whitelisted.
2122
+ * @param word - The word to check.
2123
+ * @returns True if whitelisted, false otherwise.
2124
+ */
2125
+ isWhitelisted(word) {
2126
+ const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
2127
+ return this.whitelistSet.has(normalizedWord);
2128
+ }
2129
+ /**
2130
+ * Loads a built-in language dictionary into the profanity filter.
2131
+ *
2132
+ * @param {string} language - The language key to load (case-insensitive)
2133
+ * @returns {boolean} True if language was loaded successfully, false if not found or already loaded
2134
+ *
2135
+ * @remarks
2136
+ * ### Available Languages:
2137
+ * - `'english'` - English profanity words
2138
+ * - `'hindi'` - Hindi profanity words
2139
+ * - `'french'` - French profanity words
2140
+ * - `'german'` - German profanity words
2141
+ * - `'spanish'` - Spanish profanity words
2142
+ * - `'bengali'` - Bengali profanity words
2143
+ * - `'tamil'` - Tamil profanity words
2144
+ * - `'telugu'` - Telugu profanity words
2145
+ * - `'brazilian'` - Brazilian Portuguese profanity words
2146
+ *
2147
+ * ### Behavior:
2148
+ * - Language keys are case-insensitive
2149
+ * - Loading is idempotent - calling multiple times for same language is safe
2150
+ * - Returns true if language loaded successfully or was already loaded
2151
+ * - Returns false if language not found
2152
+ * - Logs success/failure messages (unless silent mode enabled)
2153
+ * - Words are added to all active data structures
2154
+ *
2155
+ * ### Default Languages:
2156
+ * English and Hindi are loaded automatically in the constructor
2157
+ *
2158
+ * @example
2159
+ * ```typescript
2160
+ * const filter = new BeKind();
2161
+ *
2162
+ * // Load additional languages
2163
+ * filter.loadLanguage('french');
2164
+ * filter.loadLanguage('spanish');
2165
+ *
2166
+ * // Case-insensitive
2167
+ * filter.loadLanguage('GERMAN'); // Works
2168
+ *
2169
+ * // Check if loaded
2170
+ * console.log(filter.getLoadedLanguages()); // ['english', 'hindi', 'french', 'spanish', 'german']
2171
+ * ```
2172
+ *
2173
+ * @example
2174
+ * ```typescript
2175
+ * // Load all Indian languages at once
2176
+ * const filter = new BeKind();
2177
+ * filter.loadIndianLanguages();
2178
+ * ```
2179
+ *
2180
+ * @see {@link loadLanguages} to load multiple languages at once
2181
+ * @see {@link loadIndianLanguages} for convenience method
2182
+ * @see {@link getAvailableLanguages} to see all available languages
2183
+ * @see {@link getLoadedLanguages} to see currently loaded languages
2184
+ */
2185
+ loadLanguage(language) {
2186
+ if (!language || typeof language !== "string") {
2187
+ this.logger.warn(`Invalid language parameter: ${language}`);
2188
+ return false;
2189
+ }
2190
+ const langKey = language.toLowerCase().trim();
2191
+ if (this.loadedLanguages.has(langKey)) {
2192
+ return true;
2193
+ }
2194
+ const words = this.availableLanguages[langKey];
2195
+ if (!words || words.length === 0) {
2196
+ this.logger.warn(`Language '${language}' not found or empty`);
2197
+ return false;
2198
+ }
2199
+ try {
2200
+ let addedCount = 0;
2201
+ for (const word of words) {
2202
+ if (this.addWordToTrie(word)) {
2203
+ addedCount++;
2204
+ }
2205
+ }
2206
+ this.loadedLanguages.add(langKey);
2207
+ this.logger.info(`Loaded ${addedCount} words from ${language} dictionary`);
2208
+ return true;
2209
+ }
2210
+ catch (error) {
2211
+ this.logger.error(`Failed to load language ${language}: ${error}`);
2212
+ return false;
2213
+ }
2214
+ }
2215
+ /**
2216
+ * Load multiple language dictionaries.
2217
+ * @param languages - Array of languages to load.
2218
+ * @returns Number of successfully loaded languages.
2219
+ */
2220
+ loadLanguages(languages) {
2221
+ const validatedLanguages = validateStringArray(languages, "languages");
2222
+ return validatedLanguages.reduce((count, lang) => {
2223
+ return this.loadLanguage(lang) ? count + 1 : count;
2224
+ }, 0);
2225
+ }
2226
+ /**
2227
+ * Load all supported Indian languages.
2228
+ * @returns Number of loaded Indian languages.
2229
+ */
2230
+ loadIndianLanguages() {
2231
+ const indianLanguages = ["hindi", "bengali", "tamil", "telugu"];
2232
+ return this.loadLanguages(indianLanguages);
2233
+ }
2234
+ /**
2235
+ * Loads a custom dictionary of profane words with a specific name.
2236
+ *
2237
+ * @param {string} name - Unique name/identifier for this custom dictionary
2238
+ * @param {string[]} words - Array of profane words to add to the dictionary
2239
+ * @returns {void}
2240
+ *
2241
+ * @throws {TypeError} If name is not a string or words is not an array
2242
+ *
2243
+ * @remarks
2244
+ * ### Behavior:
2245
+ * - Creates a new named dictionary or overwrites existing one with same name
2246
+ * - Validates and filters out non-string and empty values from words array
2247
+ * - Words are added to all active data structures (Trie, Aho-Corasick, Bloom Filter)
2248
+ * - Dictionary name is converted to lowercase for storage
2249
+ * - Logs count of loaded words (unless silent mode enabled)
2250
+ *
2251
+ * ### Use Cases:
2252
+ * - Domain-specific profanity (gaming, medical, legal, etc.)
2253
+ * - Organization-specific word lists
2254
+ * - Temporary or context-dependent filters
2255
+ * - Testing and development
2256
+ *
2257
+ * @example
2258
+ * ```typescript
2259
+ * const filter = new BeKind();
2260
+ *
2261
+ * // Load gaming-specific slang
2262
+ * filter.loadCustomDictionary('gaming', [
2263
+ * 'noob',
2264
+ * 'scrub',
2265
+ * 'tryhard',
2266
+ * 'trash'
2267
+ * ]);
2268
+ *
2269
+ * // Load company-specific terms
2270
+ * filter.loadCustomDictionary('company', [
2271
+ * 'competitor1',
2272
+ * 'bannedTerm1',
2273
+ * 'inappropriateJargon'
2274
+ * ]);
2275
+ *
2276
+ * console.log(filter.check('You are such a noob')); // true
2277
+ * ```
2278
+ *
2279
+ * @example
2280
+ * ```typescript
2281
+ * // Load from external source
2282
+ * const filter = new BeKind();
2283
+ *
2284
+ * async function loadExternalDictionary() {
2285
+ * const response = await fetch('https://example.com/custom-words.json');
2286
+ * const customWords = await response.json();
2287
+ * filter.loadCustomDictionary('external', customWords);
2288
+ * }
2289
+ * ```
2290
+ *
2291
+ * @see {@link add} for adding individual words dynamically
2292
+ * @see {@link loadLanguage} for loading built-in language dictionaries
2293
+ */
2294
+ loadCustomDictionary(name, words) {
2295
+ validateString(name, "dictionary name");
2296
+ const validatedWords = validateStringArray(words, "custom dictionary words");
2297
+ if (validatedWords.length === 0) {
2298
+ this.logger.warn(`Custom dictionary '${name}' contains no valid words`);
2299
+ return;
2300
+ }
2301
+ try {
2302
+ let addedCount = 0;
2303
+ for (const word of validatedWords) {
2304
+ if (this.addWordToTrie(word)) {
2305
+ addedCount++;
2306
+ }
2307
+ }
2308
+ this.availableLanguages[name.toLowerCase()] = validatedWords;
2309
+ this.loadedLanguages.add(name.toLowerCase());
2310
+ this.logger.info(`Loaded ${addedCount} words from custom dictionary '${name}'`);
2311
+ }
2312
+ catch (error) {
2313
+ this.logger.error(`Failed to load custom dictionary ${name}: ${error}`);
2314
+ }
2315
+ }
2316
+ /**
2317
+ * Add a single word to the trie.
2318
+ * @param word - The word to add.
2319
+ * @returns True if added, false otherwise.
2320
+ */
2321
+ addWordToTrie(word) {
2322
+ if (!word || typeof word !== "string" || word.trim().length === 0) {
2323
+ return false;
2324
+ }
2325
+ const normalizedWord = this.caseSensitive
2326
+ ? word.trim()
2327
+ : word.trim().toLowerCase();
2328
+ if (this.isWhitelisted(normalizedWord)) {
2329
+ return false;
2330
+ }
2331
+ // Add to Trie (always used as fallback)
2332
+ this.profanityTrie.addWord(normalizedWord);
2333
+ // Add to Bloom Filter if enabled
2334
+ if (this.bloomFilter) {
2335
+ this.bloomFilter.add(normalizedWord);
2336
+ }
2337
+ // Add to Aho-Corasick automaton if enabled
2338
+ if (this.ahoCorasickAutomaton) {
2339
+ this.ahoCorasickAutomaton.addPattern(normalizedWord);
2340
+ }
2341
+ return true;
2342
+ }
2343
+ /**
2344
+ * Calculate severity from matches.
2345
+ * @param matches - Array of matches.
2346
+ * @returns Severity level.
2347
+ */
2348
+ calculateSeverity(matches) {
2349
+ if (matches.length === 0)
2350
+ return ProfanitySeverity.MILD;
2351
+ const uniqueWords = new Set(matches.map((m) => m.word)).size;
2352
+ const totalMatches = matches.length;
2353
+ if (totalMatches >= 5 || uniqueWords >= 4)
2354
+ return ProfanitySeverity.EXTREME;
2355
+ if (totalMatches >= 3 || uniqueWords >= 3)
2356
+ return ProfanitySeverity.SEVERE;
2357
+ if (totalMatches >= 2 || uniqueWords >= 2)
2358
+ return ProfanitySeverity.MODERATE;
2359
+ return ProfanitySeverity.MILD;
2360
+ }
2361
+ /**
2362
+ * Get the severity (s) and certainty (c) scores for a word.
2363
+ * Returns null if the word has no score entry.
2364
+ *
2365
+ * @param word - The word to look up
2366
+ * @returns The score object or null
2367
+ */
2368
+ getWordScore(word) {
2369
+ var _a;
2370
+ const normalized = word.toLowerCase().trim();
2371
+ return (_a = this.wordScores[normalized]) !== null && _a !== void 0 ? _a : null;
2372
+ }
2373
+ /**
2374
+ * Check whether a word should be flagged based on its severity/certainty scores.
2375
+ *
2376
+ * Threshold rules:
2377
+ * - Flag if s:5 (any certainty)
2378
+ * - Flag if s:4+ AND c:2+
2379
+ * - Flag if s:3 AND c:3+
2380
+ * - Allow everything else
2381
+ *
2382
+ * @param word - The word to check
2383
+ * @returns true if the word should be flagged
2384
+ */
2385
+ /**
2386
+ * Shared threshold logic: determines whether a severity/certainty pair
2387
+ * crosses the flag threshold. Used by shouldFlag, shouldFlagWithContext,
2388
+ * and inline threshold checks.
2389
+ */
2390
+ static shouldFlagWithCertainty(severity, certainty) {
2391
+ return severity === 5 || (severity >= 4 && certainty >= 2) || (severity === 3 && certainty >= 3);
2392
+ }
2393
+ shouldFlag(word) {
2394
+ const score = this.getWordScore(word);
2395
+ if (!score)
2396
+ return false;
2397
+ return BeKind.shouldFlagWithCertainty(score.severity, score.certainty);
2398
+ }
2399
+ /**
2400
+ * Context-aware shouldFlag: for words with certainty ≤ 3, applies
2401
+ * certainty-delta adjustments from surrounding context before evaluating
2402
+ * the shouldFlag threshold. Words with certainty > 3 skip context analysis.
2403
+ */
2404
+ shouldFlagWithContext(word, text, matchStart, matchEnd) {
2405
+ const wordScore = this.getWordScore(word);
2406
+ if (!wordScore)
2407
+ return false;
2408
+ if (this.contextAnalyzer) {
2409
+ const delta = this.contextAnalyzer.getCertaintyDelta(text, matchStart, matchEnd, word);
2410
+ const adjustedCertainty = Math.max(0, Math.min(5, wordScore.certainty + delta));
2411
+ return BeKind.shouldFlagWithCertainty(wordScore.severity, adjustedCertainty);
2412
+ }
2413
+ return this.shouldFlag(word);
2414
+ }
2415
+ /**
2416
+ * Clear all loaded dictionaries and dynamic words.
2417
+ */
2418
+ clearList() {
2419
+ this.profanityTrie.clear();
2420
+ this.loadedLanguages.clear();
2421
+ this.dynamicWords.clear();
2422
+ }
2423
+ /**
2424
+ * Set the placeholder character for filtered words.
2425
+ * @param placeholder - The placeholder character.
2426
+ */
2427
+ setPlaceholder(placeholder) {
2428
+ validateString(placeholder, "placeholder");
2429
+ if (placeholder.length === 0) {
2430
+ throw new Error("Placeholder cannot be empty");
2431
+ }
2432
+ this.defaultPlaceholder = placeholder.charAt(0);
2433
+ }
2434
+ /**
2435
+ * Get the list of loaded languages.
2436
+ * @returns Array of loaded language keys.
2437
+ */
2438
+ getLoadedLanguages() {
2439
+ return Array.from(this.loadedLanguages);
2440
+ }
2441
+ /**
2442
+ * Get the list of available built-in languages.
2443
+ * @returns Array of available language keys.
2444
+ */
2445
+ getAvailableLanguages() {
2446
+ return Object.keys(this.availableLanguages);
2447
+ }
2448
+ /**
2449
+ * Get the current configuration of the profanity filter.
2450
+ * @returns Partial configuration object.
2451
+ */
2452
+ getConfig() {
2453
+ return {
2454
+ defaultPlaceholder: this.defaultPlaceholder,
2455
+ enableLeetSpeak: this.enableLeetSpeak,
2456
+ caseSensitive: this.caseSensitive,
2457
+ strictMode: this.strictMode,
2458
+ detectPartialWords: this.detectPartialWords,
2459
+ languages: this.getLoadedLanguages(),
2460
+ whitelistWords: Array.from(this.whitelistSet),
2461
+ };
2462
+ }
2463
+ /**
2464
+ * Rebuild the profanity trie from loaded dictionaries and dynamic words.
2465
+ */
2466
+ rebuildTrie() {
2467
+ this.profanityTrie.clear();
2468
+ for (const lang of this.loadedLanguages) {
2469
+ const words = this.availableLanguages[lang] || [];
2470
+ for (const word of words) {
2471
+ this.addWordToTrie(word);
2472
+ }
2473
+ }
2474
+ for (const word of this.dynamicWords) {
2475
+ this.addWordToTrie(word);
2476
+ }
2477
+ }
2478
+ /**
2479
+ * Update configuration options for the profanity filter.
2480
+ * @param options - Partial configuration object.
2481
+ */
2482
+ updateConfig(options) {
2483
+ let rebuildNeeded = false;
2484
+ if (options.defaultPlaceholder !== undefined) {
2485
+ this.setPlaceholder(options.defaultPlaceholder);
2486
+ }
2487
+ if (options.enableLeetSpeak !== undefined) {
2488
+ this.enableLeetSpeak = options.enableLeetSpeak;
2489
+ }
2490
+ if (options.caseSensitive !== undefined &&
2491
+ options.caseSensitive !== this.caseSensitive) {
2492
+ this.caseSensitive = options.caseSensitive;
2493
+ rebuildNeeded = true;
2494
+ }
2495
+ if (options.strictMode !== undefined) {
2496
+ this.strictMode = options.strictMode;
2497
+ }
2498
+ if (options.detectPartialWords !== undefined) {
2499
+ this.detectPartialWords = options.detectPartialWords;
2500
+ }
2501
+ if (options.embeddedProfanityDetection !== undefined) {
2502
+ this.embeddedProfanityDetection = options.embeddedProfanityDetection;
2503
+ }
2504
+ if (options.separatorTolerance !== undefined) {
2505
+ const sepTol = options.separatorTolerance;
2506
+ if (sepTol === false) {
2507
+ this.separatorTolerance = 0;
2508
+ }
2509
+ else if (typeof sepTol === "number") {
2510
+ this.separatorTolerance = Math.max(0, sepTol);
2511
+ }
2512
+ else {
2513
+ this.separatorTolerance = 5;
2514
+ }
2515
+ }
2516
+ if (options.whitelistWords) {
2517
+ this.addToWhitelist(options.whitelistWords);
2518
+ }
2519
+ if (rebuildNeeded) {
2520
+ this.rebuildTrie();
2521
+ }
2522
+ }
2523
+ /**
2524
+ * Create an BeKind instance from a configuration object.
2525
+ * @param config - Configuration object
2526
+ * @returns A new BeKind instance
2527
+ */
2528
+ static fromConfig(config) {
2529
+ const options = {};
2530
+ if (config.algorithm)
2531
+ options.algorithm = config.algorithm;
2532
+ if (config.bloomFilter)
2533
+ options.bloomFilter = config.bloomFilter;
2534
+ if (config.ahoCorasick)
2535
+ options.ahoCorasick = config.ahoCorasick;
2536
+ if (config.contextAnalysis)
2537
+ options.contextAnalysis = config.contextAnalysis;
2538
+ if (config.performance)
2539
+ options.performance = config.performance;
2540
+ if (config.profanityDetection) {
2541
+ options.enableLeetSpeak = config.profanityDetection.enableLeetSpeak;
2542
+ options.caseSensitive = config.profanityDetection.caseSensitive;
2543
+ options.strictMode = config.profanityDetection.strictMode;
2544
+ options.detectPartialWords = config.profanityDetection.detectPartialWords;
2545
+ options.defaultPlaceholder = config.profanityDetection.defaultPlaceholder;
2546
+ }
2547
+ if (config.enableLeetSpeak !== undefined)
2548
+ options.enableLeetSpeak = config.enableLeetSpeak;
2549
+ if (config.caseSensitive !== undefined)
2550
+ options.caseSensitive = config.caseSensitive;
2551
+ if (config.strictMode !== undefined)
2552
+ options.strictMode = config.strictMode;
2553
+ if (config.detectPartialWords !== undefined)
2554
+ options.detectPartialWords = config.detectPartialWords;
2555
+ if (config.defaultPlaceholder !== undefined)
2556
+ options.defaultPlaceholder = config.defaultPlaceholder;
2557
+ if (config.languages)
2558
+ options.languages = config.languages;
2559
+ if (config.whitelistWords)
2560
+ options.whitelistWords = config.whitelistWords;
2561
+ if (config.customDictionaries)
2562
+ options.customDictionaries = config.customDictionaries;
2563
+ if (config.logger)
2564
+ options.logger = config.logger;
2565
+ return new BeKind(options);
2566
+ }
2567
+ }
2568
+ /**
2569
+ * Non-space separator characters (evasion symbols like @, ., -, etc.)
2570
+ * These are skipped freely with no certainty penalty.
2571
+ */
2572
+ BeKind.SYMBOL_SEPARATOR_SET = new Set("@._-*#~`|\\\/+^=:;,!?'\"(){}[]<>".split(""));
2573
+ /**
2574
+ * Certainty penalty per space boundary crossed during separator-tolerant matching.
2575
+ * Each distinct whitespace gap reduces the matched word's certainty by this amount.
2576
+ * e.g., "fu ck" → fuck (c:5) → c:5-2 = c:3 → still flags at s:3
2577
+ * e.g., "No m" → nom (c:3) → c:3-2 = c:1 → drops below threshold
2578
+ */
2579
+ BeKind.SPACE_CERTAINTY_PENALTY = 2;
2580
+ /**
2581
+ * Determine if a match is a whole word.
2582
+ * @param text - The text.
2583
+ * @param start - Start index.
2584
+ * @param end - End index.
2585
+ * @returns True if whole word, false otherwise.
2586
+ */
2587
+ BeKind.CJK_RE = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]/u;
2588
+ /**
2589
+ * When a match is embedded (not a whole word), check whether the profane
2590
+ * substring covers a large enough fraction of its host word to be flagged
2591
+ * anyway. This catches deliberate obfuscation like "urASSHOLEbro" where
2592
+ * "asshole" (7 chars) = 58 % of the 12-char host word.
2593
+ *
2594
+ * Guards (all must pass):
2595
+ * 1. Match length ≥ 6 chars — short words (ass/shit/anal/semen) are too common.
2596
+ * 2. Graduated coverage threshold — shorter matches need higher coverage:
2597
+ * - 6-char matches: ≥ 85% (only catches near-exact wraps like "ufucker")
2598
+ * - 7+ char matches: ≥ 55% (catches obfuscation like "urASSHOLEbro")
2599
+ * 3. Language signal — scoreWord() on the host word must show signal for
2600
+ * the profane word's language. If the host word has no signal for that
2601
+ * language it's a cross-language collision (e.g. "singe" = French slur
2602
+ * inside "singer" which scores as English → skip).
2603
+ *
2604
+ * Examples:
2605
+ * "asshole" (7, en) in "urASSHOLEbro" (en signal) = 58 % → flagged ✓
2606
+ * "fucker" (6, en) in "ufucker" (en signal) = 86 % → flagged ✓
2607
+ * "raging" (6, en) in "foraging" = 75 % → below 85% for 6-char → safe ✓
2608
+ * "semen" (5) in "basement" → too short → safe ✓
2609
+ * "anal" (4) in "canal" → too short → safe ✓
2610
+ * "singe" (5, fr) in "singer" → too short → safe ✓
2611
+ * "negro" (5, en) in "negroni" → too short → safe ✓
2612
+ */
2613
+ BeKind.HIGH_COVERAGE_THRESHOLD_SHORT = 0.85; // 6-char matches
2614
+ BeKind.HIGH_COVERAGE_THRESHOLD_LONG = 0.55; // 7+ char matches
2615
+ BeKind.HIGH_COVERAGE_MIN_MATCH_LEN = 6;
2616
+ BeKind.HIGH_COVERAGE_LANG_SIGNAL_MIN = 0.05;
2617
+ /**
2618
+ * Decay constant for embedded profanity detection.
2619
+ * Each extra character beyond the profane root reduces certainty by this factor.
2620
+ */
2621
+ BeKind.EMBEDDED_DECAY_RATE = 0.9;
2622
+ /**
2623
+ * Minimum decayed certainty to report an embedded match.
2624
+ */
2625
+ BeKind.EMBEDDED_MIN_CERTAINTY = 2;
2626
+ /**
2627
+ * Singleton instance of BeKind with default configuration.
2628
+ */
2629
+ const allProfanity = new BeKind();
2630
+ export default allProfanity;
2631
+ //# sourceMappingURL=index.js.map