allprofanity 2.1.1 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTORS.md +106 -0
- package/README.md +354 -26
- package/allprofanity.config.example.json +35 -0
- package/bin/init.js +49 -0
- package/config.schema.json +163 -0
- package/dist/algos/aho-corasick.d.ts +75 -0
- package/dist/algos/aho-corasick.js +238 -0
- package/dist/algos/aho-corasick.js.map +1 -0
- package/dist/algos/bloom-filter.d.ts +103 -0
- package/dist/algos/bloom-filter.js +208 -0
- package/dist/algos/bloom-filter.js.map +1 -0
- package/dist/algos/context-patterns.d.ts +88 -0
- package/dist/algos/context-patterns.js +298 -0
- package/dist/algos/context-patterns.js.map +1 -0
- package/dist/index.d.ts +818 -37
- package/dist/index.js +935 -60
- package/dist/index.js.map +1 -1
- package/dist/languages/brazilian-words.d.ts +7 -0
- package/dist/languages/brazilian-words.js +207 -0
- package/dist/languages/brazilian-words.js.map +1 -0
- package/package.json +23 -7
package/dist/index.js
CHANGED
|
@@ -7,6 +7,11 @@ import spanishBadWords from "./languages/spanish-words.js";
|
|
|
7
7
|
import bengaliBadWords from "./languages/bengali-words.js";
|
|
8
8
|
import tamilBadWords from "./languages/tamil-words.js";
|
|
9
9
|
import teluguBadWords from "./languages/telugu-words.js";
|
|
10
|
+
import brazilianBadWords from "./languages/brazilian-words.js";
|
|
11
|
+
// Advanced algorithm imports
|
|
12
|
+
import { AhoCorasick } from "./algos/aho-corasick.js";
|
|
13
|
+
import { BloomFilter } from "./algos/bloom-filter.js";
|
|
14
|
+
import { ContextAnalyzer } from "./algos/context-patterns.js";
|
|
10
15
|
// Export language dictionaries for direct access
|
|
11
16
|
export { default as englishBadWords } from "./languages/english-words.js";
|
|
12
17
|
export { default as hindiBadWords } from "./languages/hindi-words.js";
|
|
@@ -16,36 +21,128 @@ export { default as spanishBadWords } from "./languages/spanish-words.js";
|
|
|
16
21
|
export { default as bengaliBadWords } from "./languages/bengali-words.js";
|
|
17
22
|
export { default as tamilBadWords } from "./languages/tamil-words.js";
|
|
18
23
|
export { default as teluguBadWords } from "./languages/telugu-words.js";
|
|
24
|
+
export { default as brazilianBadWords } from "./languages/brazilian-words.js";
|
|
19
25
|
/**
|
|
20
|
-
* Default console logger implementation.
|
|
26
|
+
* Default console logger implementation for AllProfanity.
|
|
27
|
+
*
|
|
28
|
+
* @class ConsoleLogger
|
|
29
|
+
* @implements {Logger}
|
|
30
|
+
* @description Logs messages to the browser or Node.js console with an "[AllProfanity]" prefix.
|
|
31
|
+
* This is the default logger used when no custom logger is provided.
|
|
32
|
+
*
|
|
33
|
+
* @internal
|
|
21
34
|
*/
|
|
22
35
|
class ConsoleLogger {
|
|
36
|
+
/**
|
|
37
|
+
* Log informational messages to console.log with [AllProfanity] prefix.
|
|
38
|
+
*
|
|
39
|
+
* @param message - The message to log
|
|
40
|
+
* @returns void
|
|
41
|
+
*/
|
|
23
42
|
info(message) {
|
|
24
43
|
console.log(`[AllProfanity] ${message}`);
|
|
25
44
|
}
|
|
45
|
+
/**
|
|
46
|
+
* Log warning messages to console.warn with [AllProfanity] prefix.
|
|
47
|
+
*
|
|
48
|
+
* @param message - The warning message to log
|
|
49
|
+
* @returns void
|
|
50
|
+
*/
|
|
26
51
|
warn(message) {
|
|
27
52
|
console.warn(`[AllProfanity] ${message}`);
|
|
28
53
|
}
|
|
54
|
+
/**
|
|
55
|
+
* Log error messages to console.error with [AllProfanity] prefix.
|
|
56
|
+
*
|
|
57
|
+
* @param message - The error message to log
|
|
58
|
+
* @returns void
|
|
59
|
+
*/
|
|
29
60
|
error(message) {
|
|
30
61
|
console.error(`[AllProfanity] ${message}`);
|
|
31
62
|
}
|
|
32
63
|
}
|
|
33
64
|
/**
|
|
34
|
-
*
|
|
65
|
+
* Silent logger implementation that suppresses all log output.
|
|
66
|
+
*
|
|
67
|
+
* @class SilentLogger
|
|
68
|
+
* @implements {Logger}
|
|
69
|
+
* @description A no-op logger that discards all log messages. Used when `silent: true` is set
|
|
70
|
+
* in AllProfanityOptions, or when you want to completely disable logging.
|
|
71
|
+
*
|
|
72
|
+
* @internal
|
|
73
|
+
*/
|
|
74
|
+
class SilentLogger {
|
|
75
|
+
/**
|
|
76
|
+
* No-op implementation - messages are discarded.
|
|
77
|
+
*
|
|
78
|
+
* @param _message - The message (unused)
|
|
79
|
+
* @returns void
|
|
80
|
+
*/
|
|
81
|
+
info(_message) {
|
|
82
|
+
// Silent mode - no logging
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* No-op implementation - warnings are discarded.
|
|
86
|
+
*
|
|
87
|
+
* @param _message - The warning message (unused)
|
|
88
|
+
* @returns void
|
|
89
|
+
*/
|
|
90
|
+
warn(_message) {
|
|
91
|
+
// Silent mode - no logging
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* No-op implementation - errors are discarded.
|
|
95
|
+
*
|
|
96
|
+
* @param _message - The error message (unused)
|
|
97
|
+
* @returns void
|
|
98
|
+
*/
|
|
99
|
+
error(_message) {
|
|
100
|
+
// Silent mode - no logging
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Severity levels for profanity detection results.
|
|
105
|
+
*
|
|
106
|
+
* @enum {number}
|
|
107
|
+
* @description Categorizes the severity of detected profanity based on the number
|
|
108
|
+
* of unique words and total matches found in the text.
|
|
109
|
+
*
|
|
110
|
+
* @readonly
|
|
111
|
+
* @example
|
|
112
|
+
* ```typescript
|
|
113
|
+
* const result = filter.detect("some text");
|
|
114
|
+
* if (result.severity === ProfanitySeverity.EXTREME) {
|
|
115
|
+
* // Handle extreme profanity
|
|
116
|
+
* }
|
|
117
|
+
* ```
|
|
35
118
|
*/
|
|
36
119
|
export var ProfanitySeverity;
|
|
37
120
|
(function (ProfanitySeverity) {
|
|
121
|
+
/** Mild profanity: 1 unique word or 1 total match */
|
|
38
122
|
ProfanitySeverity[ProfanitySeverity["MILD"] = 1] = "MILD";
|
|
123
|
+
/** Moderate profanity: 2 unique words or 2 total matches */
|
|
39
124
|
ProfanitySeverity[ProfanitySeverity["MODERATE"] = 2] = "MODERATE";
|
|
125
|
+
/** Severe profanity: 3 unique words or 3 total matches */
|
|
40
126
|
ProfanitySeverity[ProfanitySeverity["SEVERE"] = 3] = "SEVERE";
|
|
127
|
+
/** Extreme profanity: 4+ unique words or 5+ total matches */
|
|
41
128
|
ProfanitySeverity[ProfanitySeverity["EXTREME"] = 4] = "EXTREME";
|
|
42
129
|
})(ProfanitySeverity = ProfanitySeverity || (ProfanitySeverity = {}));
|
|
43
130
|
/**
|
|
44
|
-
*
|
|
45
|
-
*
|
|
46
|
-
* @
|
|
47
|
-
* @
|
|
48
|
-
* @
|
|
131
|
+
* Validates that an input is a non-empty string.
|
|
132
|
+
*
|
|
133
|
+
* @function validateString
|
|
134
|
+
* @param {unknown} input - The value to validate
|
|
135
|
+
* @param {string} paramName - Name of the parameter being validated (used in error messages)
|
|
136
|
+
* @returns {string} The validated string
|
|
137
|
+
* @throws {TypeError} If input is not a string
|
|
138
|
+
*
|
|
139
|
+
* @internal
|
|
140
|
+
*
|
|
141
|
+
* @example
|
|
142
|
+
* ```typescript
|
|
143
|
+
* const text = validateString(userInput, 'text');
|
|
144
|
+
* // Returns userInput if it's a string, throws TypeError otherwise
|
|
145
|
+
* ```
|
|
49
146
|
*/
|
|
50
147
|
function validateString(input, paramName) {
|
|
51
148
|
if (typeof input !== "string") {
|
|
@@ -54,11 +151,22 @@ function validateString(input, paramName) {
|
|
|
54
151
|
return input;
|
|
55
152
|
}
|
|
56
153
|
/**
|
|
57
|
-
*
|
|
58
|
-
*
|
|
59
|
-
* @
|
|
60
|
-
* @
|
|
61
|
-
* @
|
|
154
|
+
* Validates and filters a string array, removing non-string and empty items.
|
|
155
|
+
*
|
|
156
|
+
* @function validateStringArray
|
|
157
|
+
* @param {unknown} input - The value to validate (expected to be an array)
|
|
158
|
+
* @param {string} paramName - Name of the parameter being validated (used in error/warning messages)
|
|
159
|
+
* @returns {string[]} Array of valid, non-empty strings
|
|
160
|
+
* @throws {TypeError} If input is not an array
|
|
161
|
+
*
|
|
162
|
+
* @internal
|
|
163
|
+
*
|
|
164
|
+
* @example
|
|
165
|
+
* ```typescript
|
|
166
|
+
* const words = validateStringArray(['word1', '', 123, 'word2'], 'words');
|
|
167
|
+
* // Returns: ['word1', 'word2']
|
|
168
|
+
* // Logs warning: "Skipping non-string item in words: 123"
|
|
169
|
+
* ```
|
|
62
170
|
*/
|
|
63
171
|
function validateStringArray(input, paramName) {
|
|
64
172
|
if (!Array.isArray(input)) {
|
|
@@ -73,17 +181,50 @@ function validateStringArray(input, paramName) {
|
|
|
73
181
|
});
|
|
74
182
|
}
|
|
75
183
|
/**
|
|
76
|
-
* Trie node for efficient
|
|
184
|
+
* Trie (prefix tree) node for efficient pattern matching and word storage.
|
|
185
|
+
*
|
|
186
|
+
* @class TrieNode
|
|
187
|
+
* @description Implements a trie data structure for O(m) time complexity word matching,
|
|
188
|
+
* where m is the length of the word being searched. Each node represents a character
|
|
189
|
+
* in the word, and paths from root to nodes with isEndOfWord=true represent complete words.
|
|
190
|
+
*
|
|
191
|
+
* @internal
|
|
192
|
+
*
|
|
193
|
+
* @example
|
|
194
|
+
* ```typescript
|
|
195
|
+
* const trie = new TrieNode();
|
|
196
|
+
* trie.addWord('bad');
|
|
197
|
+
* trie.addWord('badword');
|
|
198
|
+
* const matches = trie.findMatches('badwords here', 0, false);
|
|
199
|
+
* // Returns matches for 'bad' and 'badword'
|
|
200
|
+
* ```
|
|
77
201
|
*/
|
|
78
202
|
class TrieNode {
|
|
79
203
|
constructor() {
|
|
204
|
+
/** Map of characters to child nodes for fast lookups */
|
|
80
205
|
this.children = new Map();
|
|
206
|
+
/** Flag indicating if this node represents the end of a complete word */
|
|
81
207
|
this.isEndOfWord = false;
|
|
208
|
+
/** The complete word ending at this node (only set when isEndOfWord is true) */
|
|
82
209
|
this.word = "";
|
|
83
210
|
}
|
|
84
211
|
/**
|
|
85
|
-
*
|
|
86
|
-
*
|
|
212
|
+
* Adds a word to the trie structure.
|
|
213
|
+
*
|
|
214
|
+
* @param {string} word - The word to add to the trie
|
|
215
|
+
* @returns {void}
|
|
216
|
+
*
|
|
217
|
+
* @remarks
|
|
218
|
+
* - Time Complexity: O(m) where m is the length of the word
|
|
219
|
+
* - Space Complexity: O(m) in worst case when all characters are new
|
|
220
|
+
* - Supports any Unicode characters
|
|
221
|
+
*
|
|
222
|
+
* @example
|
|
223
|
+
* ```typescript
|
|
224
|
+
* const trie = new TrieNode();
|
|
225
|
+
* trie.addWord('hello');
|
|
226
|
+
* trie.addWord('world');
|
|
227
|
+
* ```
|
|
87
228
|
*/
|
|
88
229
|
addWord(word) {
|
|
89
230
|
let current = this;
|
|
@@ -100,13 +241,36 @@ class TrieNode {
|
|
|
100
241
|
current.word = word;
|
|
101
242
|
}
|
|
102
243
|
/**
|
|
103
|
-
*
|
|
104
|
-
*
|
|
105
|
-
* @
|
|
244
|
+
* Removes a word from the trie structure.
|
|
245
|
+
*
|
|
246
|
+
* @param {string} word - The word to remove from the trie
|
|
247
|
+
* @returns {boolean} True if the word existed and was removed, false if word was not found
|
|
248
|
+
*
|
|
249
|
+
* @remarks
|
|
250
|
+
* - Time Complexity: O(m) where m is the length of the word
|
|
251
|
+
* - Also removes unnecessary nodes to keep the trie optimized
|
|
252
|
+
* - Only removes the word marking; shared prefixes with other words are preserved
|
|
253
|
+
*
|
|
254
|
+
* @example
|
|
255
|
+
* ```typescript
|
|
256
|
+
* const trie = new TrieNode();
|
|
257
|
+
* trie.addWord('hello');
|
|
258
|
+
* trie.removeWord('hello'); // Returns: true
|
|
259
|
+
* trie.removeWord('world'); // Returns: false (word not in trie)
|
|
260
|
+
* ```
|
|
106
261
|
*/
|
|
107
262
|
removeWord(word) {
|
|
108
263
|
return this.removeHelper(word, 0);
|
|
109
264
|
}
|
|
265
|
+
/**
|
|
266
|
+
* Recursive helper method for removing a word from the trie.
|
|
267
|
+
*
|
|
268
|
+
* @param {string} word - The word being removed
|
|
269
|
+
* @param {number} index - Current character index in the word
|
|
270
|
+
* @returns {boolean} True if this node should be deleted (has no children and is not end of another word)
|
|
271
|
+
*
|
|
272
|
+
* @internal
|
|
273
|
+
*/
|
|
110
274
|
removeHelper(word, index) {
|
|
111
275
|
if (index === word.length) {
|
|
112
276
|
if (!this.isEndOfWord)
|
|
@@ -126,11 +290,25 @@ class TrieNode {
|
|
|
126
290
|
return false;
|
|
127
291
|
}
|
|
128
292
|
/**
|
|
129
|
-
*
|
|
130
|
-
*
|
|
131
|
-
* @param
|
|
132
|
-
* @param
|
|
133
|
-
* @
|
|
293
|
+
* Finds all word matches in text starting at a specific position.
|
|
294
|
+
*
|
|
295
|
+
* @param {string} text - The text to search for profanity
|
|
296
|
+
* @param {number} startPos - The starting position (0-based index) in the text
|
|
297
|
+
* @param {boolean} allowPartial - If true, finds partial matches within larger words
|
|
298
|
+
* @returns {Array<{ word: string; start: number; end: number }>} Array of match objects with word and position info
|
|
299
|
+
*
|
|
300
|
+
* @remarks
|
|
301
|
+
* - Time Complexity: O(k) where k is the length of the longest match from startPos
|
|
302
|
+
* - Returns all valid words that can be formed starting from startPos
|
|
303
|
+
* - When allowPartial is false, respects word boundaries
|
|
304
|
+
*
|
|
305
|
+
* @example
|
|
306
|
+
* ```typescript
|
|
307
|
+
* const trie = new TrieNode();
|
|
308
|
+
* trie.addWord('bad');
|
|
309
|
+
* const matches = trie.findMatches('badword', 0, false);
|
|
310
|
+
* // Returns: [{ word: 'bad', start: 0, end: 3 }]
|
|
311
|
+
* ```
|
|
134
312
|
*/
|
|
135
313
|
findMatches(text, startPos, allowPartial) {
|
|
136
314
|
const matches = [];
|
|
@@ -164,7 +342,22 @@ class TrieNode {
|
|
|
164
342
|
return matches;
|
|
165
343
|
}
|
|
166
344
|
/**
|
|
167
|
-
*
|
|
345
|
+
* Clears all words from the trie, resetting it to empty state.
|
|
346
|
+
*
|
|
347
|
+
* @returns {void}
|
|
348
|
+
*
|
|
349
|
+
* @remarks
|
|
350
|
+
* - Time Complexity: O(1) - clears the root node only (JavaScript GC handles children)
|
|
351
|
+
* - Removes all stored words and resets the trie to initial state
|
|
352
|
+
*
|
|
353
|
+
* @example
|
|
354
|
+
* ```typescript
|
|
355
|
+
* const trie = new TrieNode();
|
|
356
|
+
* trie.addWord('hello');
|
|
357
|
+
* trie.addWord('world');
|
|
358
|
+
* trie.clear();
|
|
359
|
+
* // Trie is now empty
|
|
360
|
+
* ```
|
|
168
361
|
*/
|
|
169
362
|
clear() {
|
|
170
363
|
this.children.clear();
|
|
@@ -173,12 +366,139 @@ class TrieNode {
|
|
|
173
366
|
}
|
|
174
367
|
}
|
|
175
368
|
/**
|
|
176
|
-
*
|
|
369
|
+
* AllProfanity - Professional-grade multilingual profanity detection and filtering library.
|
|
370
|
+
*
|
|
371
|
+
* @class AllProfanity
|
|
372
|
+
* @description A comprehensive, high-performance profanity filtering system supporting 9+ languages
|
|
373
|
+
* with advanced features including leet speak detection, context analysis, multiple matching algorithms,
|
|
374
|
+
* and customizable filtering options.
|
|
375
|
+
*
|
|
376
|
+
* @remarks
|
|
377
|
+
* ### Features:
|
|
378
|
+
* - **Multi-language Support**: English, Hindi, French, German, Spanish, Bengali, Tamil, Telugu, Brazilian Portuguese
|
|
379
|
+
* - **Advanced Algorithms**: Trie, Aho-Corasick, Bloom Filter, and hybrid approaches
|
|
380
|
+
* - **Leet Speak Detection**: Automatically normalizes and detects variations like "h3ll0"
|
|
381
|
+
* - **Context Analysis**: Reduces false positives using surrounding word context
|
|
382
|
+
* - **Performance**: Built-in caching and optimized data structures
|
|
383
|
+
* - **Flexible**: Custom dictionaries, whitelisting, severity levels
|
|
384
|
+
*
|
|
385
|
+
* ### Default Behavior:
|
|
386
|
+
* - Loads English and Hindi dictionaries by default
|
|
387
|
+
* - Case-insensitive matching
|
|
388
|
+
* - Leet speak detection enabled
|
|
389
|
+
* - Uses Trie algorithm (fastest for most cases)
|
|
390
|
+
*
|
|
391
|
+
* @example
|
|
392
|
+
* ```typescript
|
|
393
|
+
* // Basic usage with default instance
|
|
394
|
+
* import allProfanity from 'allprofanity';
|
|
395
|
+
*
|
|
396
|
+
* const result = allProfanity.detect("This is some bad text");
|
|
397
|
+
* console.log(result.hasProfanity); // true
|
|
398
|
+
* console.log(result.cleanedText); // "This is some *** text"
|
|
399
|
+
* console.log(result.severity); // ProfanitySeverity.MILD
|
|
400
|
+
* ```
|
|
401
|
+
*
|
|
402
|
+
* @example
|
|
403
|
+
* ```typescript
|
|
404
|
+
* // Advanced usage with custom configuration
|
|
405
|
+
* import { AllProfanity, ProfanitySeverity } from 'allprofanity';
|
|
406
|
+
*
|
|
407
|
+
* const filter = new AllProfanity({
|
|
408
|
+
* languages: ['english', 'french', 'spanish'],
|
|
409
|
+
* enableLeetSpeak: true,
|
|
410
|
+
* strictMode: true,
|
|
411
|
+
* algorithm: {
|
|
412
|
+
* matching: 'hybrid',
|
|
413
|
+
* useBloomFilter: true
|
|
414
|
+
* },
|
|
415
|
+
* performance: {
|
|
416
|
+
* enableCaching: true,
|
|
417
|
+
* cacheSize: 500
|
|
418
|
+
* },
|
|
419
|
+
* whitelistWords: ['class', 'assignment']
|
|
420
|
+
* });
|
|
421
|
+
*
|
|
422
|
+
* const text = "This text has some b@d w0rds";
|
|
423
|
+
* const result = filter.detect(text);
|
|
424
|
+
*
|
|
425
|
+
* if (result.hasProfanity) {
|
|
426
|
+
* console.log(`Found ${result.detectedWords.length} profane words`);
|
|
427
|
+
* console.log(`Severity: ${ProfanitySeverity[result.severity]}`);
|
|
428
|
+
* console.log(`Cleaned: ${result.cleanedText}`);
|
|
429
|
+
* }
|
|
430
|
+
* ```
|
|
431
|
+
*
|
|
432
|
+
* @example
|
|
433
|
+
* ```typescript
|
|
434
|
+
* // Using individual methods
|
|
435
|
+
* const filter = new AllProfanity();
|
|
436
|
+
*
|
|
437
|
+
* // Simple check
|
|
438
|
+
* if (filter.check("some text")) {
|
|
439
|
+
* console.log("Contains profanity!");
|
|
440
|
+
* }
|
|
441
|
+
*
|
|
442
|
+
* // Clean with custom placeholder
|
|
443
|
+
* const cleaned = filter.clean("bad words here", "#");
|
|
444
|
+
*
|
|
445
|
+
* // Load additional languages
|
|
446
|
+
* filter.loadLanguage('german');
|
|
447
|
+
* filter.loadIndianLanguages(); // Loads hindi, bengali, tamil, telugu
|
|
448
|
+
*
|
|
449
|
+
* // Add custom words
|
|
450
|
+
* filter.add(['customword1', 'customword2']);
|
|
451
|
+
*
|
|
452
|
+
* // Remove words
|
|
453
|
+
* filter.remove(['someword']);
|
|
454
|
+
*
|
|
455
|
+
* // Whitelist words
|
|
456
|
+
* filter.addToWhitelist(['class', 'assignment']);
|
|
457
|
+
* ```
|
|
458
|
+
*
|
|
459
|
+
* @see {@link AllProfanityOptions} for all configuration options
|
|
460
|
+
* @see {@link ProfanityDetectionResult} for detection result format
|
|
461
|
+
* @see {@link ProfanitySeverity} for severity levels
|
|
177
462
|
*/
|
|
178
463
|
export class AllProfanity {
|
|
179
464
|
/**
|
|
180
|
-
*
|
|
181
|
-
*
|
|
465
|
+
* Creates a new AllProfanity instance with the specified configuration.
|
|
466
|
+
*
|
|
467
|
+
* @constructor
|
|
468
|
+
* @param {AllProfanityOptions} [options] - Configuration options for profanity detection behavior
|
|
469
|
+
*
|
|
470
|
+
* @remarks
|
|
471
|
+
* ### Default Initialization:
|
|
472
|
+
* - Loads English and Hindi dictionaries automatically
|
|
473
|
+
* - Enables leet speak detection
|
|
474
|
+
* - Case-insensitive matching
|
|
475
|
+
* - Uses Trie algorithm for pattern matching
|
|
476
|
+
*
|
|
477
|
+
* ### Performance Considerations:
|
|
478
|
+
* - Initial load time depends on number of languages loaded
|
|
479
|
+
* - Aho-Corasick automaton (if enabled) is built during construction
|
|
480
|
+
* - Bloom Filter (if enabled) is populated during construction
|
|
481
|
+
*
|
|
482
|
+
* @throws {TypeError} If invalid options are provided
|
|
483
|
+
*
|
|
484
|
+
* @example
|
|
485
|
+
* ```typescript
|
|
486
|
+
* // Default instance
|
|
487
|
+
* const filter = new AllProfanity();
|
|
488
|
+
*
|
|
489
|
+
* // Custom configuration
|
|
490
|
+
* const filter = new AllProfanity({
|
|
491
|
+
* languages: ['english', 'french'],
|
|
492
|
+
* strictMode: true,
|
|
493
|
+
* defaultPlaceholder: '#',
|
|
494
|
+
* algorithm: { matching: 'hybrid' }
|
|
495
|
+
* });
|
|
496
|
+
*
|
|
497
|
+
* // Silent mode (no logging)
|
|
498
|
+
* const filter = new AllProfanity({ silent: true });
|
|
499
|
+
* ```
|
|
500
|
+
*
|
|
501
|
+
* @see {@link AllProfanityOptions} for all available configuration options
|
|
182
502
|
*/
|
|
183
503
|
constructor(options) {
|
|
184
504
|
var _a, _b, _c, _d, _e;
|
|
@@ -199,6 +519,7 @@ export class AllProfanity {
|
|
|
199
519
|
bengali: bengaliBadWords || [],
|
|
200
520
|
tamil: tamilBadWords || [],
|
|
201
521
|
telugu: teluguBadWords || [],
|
|
522
|
+
brazilian: brazilianBadWords || [],
|
|
202
523
|
};
|
|
203
524
|
this.leetMappings = new Map([
|
|
204
525
|
["@", "a"],
|
|
@@ -259,7 +580,14 @@ export class AllProfanity {
|
|
|
259
580
|
["7_", "z"],
|
|
260
581
|
]);
|
|
261
582
|
this.dynamicWords = new Set();
|
|
262
|
-
|
|
583
|
+
// Advanced algorithms
|
|
584
|
+
this.ahoCorasickAutomaton = null;
|
|
585
|
+
this.bloomFilter = null;
|
|
586
|
+
this.contextAnalyzer = null;
|
|
587
|
+
this.matchingAlgorithm = "trie";
|
|
588
|
+
this.resultCache = null;
|
|
589
|
+
// Use silent logger if silent mode is enabled, otherwise use provided logger or console logger
|
|
590
|
+
this.logger = (options === null || options === void 0 ? void 0 : options.logger) || ((options === null || options === void 0 ? void 0 : options.silent) ? new SilentLogger() : new ConsoleLogger());
|
|
263
591
|
if ((options === null || options === void 0 ? void 0 : options.defaultPlaceholder) !== undefined) {
|
|
264
592
|
this.setPlaceholder(options.defaultPlaceholder);
|
|
265
593
|
}
|
|
@@ -270,6 +598,9 @@ export class AllProfanity {
|
|
|
270
598
|
if (options === null || options === void 0 ? void 0 : options.whitelistWords) {
|
|
271
599
|
this.addToWhitelist(options.whitelistWords);
|
|
272
600
|
}
|
|
601
|
+
// Initialize advanced algorithms BEFORE loading dictionaries
|
|
602
|
+
// so that words can be added to all data structures
|
|
603
|
+
this.initializeAdvancedAlgorithms(options);
|
|
273
604
|
this.loadLanguage("english");
|
|
274
605
|
this.loadLanguage("hindi");
|
|
275
606
|
if ((_e = options === null || options === void 0 ? void 0 : options.languages) === null || _e === void 0 ? void 0 : _e.length) {
|
|
@@ -281,6 +612,52 @@ export class AllProfanity {
|
|
|
281
612
|
});
|
|
282
613
|
}
|
|
283
614
|
}
|
|
615
|
+
/**
|
|
616
|
+
* Initialize advanced algorithms based on configuration
|
|
617
|
+
*/
|
|
618
|
+
initializeAdvancedAlgorithms(options) {
|
|
619
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m;
|
|
620
|
+
// Set matching algorithm
|
|
621
|
+
if ((_a = options === null || options === void 0 ? void 0 : options.algorithm) === null || _a === void 0 ? void 0 : _a.matching) {
|
|
622
|
+
this.matchingAlgorithm = options.algorithm.matching;
|
|
623
|
+
}
|
|
624
|
+
// Initialize Bloom Filter if enabled
|
|
625
|
+
const bloomEnabled = ((_b = options === null || options === void 0 ? void 0 : options.algorithm) === null || _b === void 0 ? void 0 : _b.useBloomFilter) ||
|
|
626
|
+
((_c = options === null || options === void 0 ? void 0 : options.bloomFilter) === null || _c === void 0 ? void 0 : _c.enabled) ||
|
|
627
|
+
this.matchingAlgorithm === "hybrid";
|
|
628
|
+
if (bloomEnabled) {
|
|
629
|
+
const expectedItems = ((_d = options === null || options === void 0 ? void 0 : options.bloomFilter) === null || _d === void 0 ? void 0 : _d.expectedItems) || 10000;
|
|
630
|
+
const falsePositiveRate = ((_e = options === null || options === void 0 ? void 0 : options.bloomFilter) === null || _e === void 0 ? void 0 : _e.falsePositiveRate) || 0.01;
|
|
631
|
+
this.bloomFilter = new BloomFilter(expectedItems, falsePositiveRate);
|
|
632
|
+
this.logger.info(`Bloom Filter initialized with ${expectedItems} expected items and ${(falsePositiveRate * 100).toFixed(2)}% false positive rate`);
|
|
633
|
+
}
|
|
634
|
+
// Initialize Aho-Corasick if enabled
|
|
635
|
+
const ahoEnabled = ((_f = options === null || options === void 0 ? void 0 : options.algorithm) === null || _f === void 0 ? void 0 : _f.useAhoCorasick) ||
|
|
636
|
+
((_g = options === null || options === void 0 ? void 0 : options.ahoCorasick) === null || _g === void 0 ? void 0 : _g.enabled) ||
|
|
637
|
+
this.matchingAlgorithm === "aho-corasick" ||
|
|
638
|
+
this.matchingAlgorithm === "hybrid";
|
|
639
|
+
if (ahoEnabled) {
|
|
640
|
+
this.ahoCorasickAutomaton = new AhoCorasick([]);
|
|
641
|
+
this.logger.info("Aho-Corasick automaton initialized");
|
|
642
|
+
}
|
|
643
|
+
// Initialize Context Analyzer if enabled
|
|
644
|
+
const contextEnabled = ((_h = options === null || options === void 0 ? void 0 : options.algorithm) === null || _h === void 0 ? void 0 : _h.useContextAnalysis) ||
|
|
645
|
+
((_j = options === null || options === void 0 ? void 0 : options.contextAnalysis) === null || _j === void 0 ? void 0 : _j.enabled);
|
|
646
|
+
if (contextEnabled) {
|
|
647
|
+
const contextLanguages = ((_k = options === null || options === void 0 ? void 0 : options.contextAnalysis) === null || _k === void 0 ? void 0 : _k.languages) || ["en"];
|
|
648
|
+
this.contextAnalyzer = new ContextAnalyzer(contextLanguages);
|
|
649
|
+
if ((_l = options === null || options === void 0 ? void 0 : options.contextAnalysis) === null || _l === void 0 ? void 0 : _l.contextWindow) {
|
|
650
|
+
this.contextAnalyzer.setContextWindow(options.contextAnalysis.contextWindow);
|
|
651
|
+
}
|
|
652
|
+
this.logger.info(`Context Analyzer initialized for languages: ${contextLanguages.join(", ")}`);
|
|
653
|
+
}
|
|
654
|
+
// Initialize result cache if enabled
|
|
655
|
+
if ((_m = options === null || options === void 0 ? void 0 : options.performance) === null || _m === void 0 ? void 0 : _m.enableCaching) {
|
|
656
|
+
const cacheSize = options.performance.cacheSize || 1000;
|
|
657
|
+
this.resultCache = new Map();
|
|
658
|
+
this.logger.info(`Result caching enabled with size limit: ${cacheSize}`);
|
|
659
|
+
}
|
|
660
|
+
}
|
|
284
661
|
/**
|
|
285
662
|
* Normalize leet speak to regular characters.
|
|
286
663
|
* @param text - The input text.
|
|
@@ -371,11 +748,123 @@ export class AllProfanity {
|
|
|
371
748
|
return result;
|
|
372
749
|
}
|
|
373
750
|
/**
|
|
374
|
-
*
|
|
375
|
-
|
|
376
|
-
|
|
751
|
+
* Use Aho-Corasick algorithm for pattern matching
|
|
752
|
+
*/
|
|
753
|
+
findMatchesWithAhoCorasick(searchText, originalText) {
|
|
754
|
+
if (!this.ahoCorasickAutomaton) {
|
|
755
|
+
return [];
|
|
756
|
+
}
|
|
757
|
+
const ahoMatches = this.ahoCorasickAutomaton.findAll(searchText);
|
|
758
|
+
const results = [];
|
|
759
|
+
for (const match of ahoMatches) {
|
|
760
|
+
if (!this.detectPartialWords &&
|
|
761
|
+
!this.isWholeWord(originalText, match.start, match.end)) {
|
|
762
|
+
continue;
|
|
763
|
+
}
|
|
764
|
+
const matchedText = originalText.substring(match.start, match.end);
|
|
765
|
+
if (this.isWhitelistedMatch(match.pattern, matchedText)) {
|
|
766
|
+
continue;
|
|
767
|
+
}
|
|
768
|
+
if (this.hasWordBoundaries(originalText, match.start, match.end)) {
|
|
769
|
+
results.push({
|
|
770
|
+
word: match.pattern,
|
|
771
|
+
start: match.start,
|
|
772
|
+
end: match.end,
|
|
773
|
+
originalWord: matchedText,
|
|
774
|
+
});
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
return results;
|
|
778
|
+
}
|
|
779
|
+
/**
|
|
780
|
+
* Hybrid approach: Aho-Corasick for fast matching, Bloom Filter for validation
|
|
781
|
+
*/
|
|
782
|
+
findMatchesHybrid(searchText, originalText) {
|
|
783
|
+
// Use Aho-Corasick for primary matching if available
|
|
784
|
+
if (this.ahoCorasickAutomaton) {
|
|
785
|
+
const matches = this.findMatchesWithAhoCorasick(searchText, originalText);
|
|
786
|
+
// If Bloom Filter is enabled, validate matches
|
|
787
|
+
if (this.bloomFilter) {
|
|
788
|
+
return matches.filter((match) => this.bloomFilter.mightContain(match.word));
|
|
789
|
+
}
|
|
790
|
+
return matches;
|
|
791
|
+
}
|
|
792
|
+
// Fallback to Trie if Aho-Corasick not available
|
|
793
|
+
const matches = [];
|
|
794
|
+
this.findMatches(searchText, originalText, matches);
|
|
795
|
+
// Validate with Bloom Filter if enabled
|
|
796
|
+
if (this.bloomFilter) {
|
|
797
|
+
return matches.filter((match) => this.bloomFilter.mightContain(match.word));
|
|
798
|
+
}
|
|
799
|
+
return matches;
|
|
800
|
+
}
|
|
801
|
+
/**
|
|
802
|
+
* Apply context analysis to filter false positives
|
|
803
|
+
*/
|
|
804
|
+
applyContextAnalysis(text, matches, scoreThreshold = 0.5) {
|
|
805
|
+
if (!this.contextAnalyzer) {
|
|
806
|
+
return matches;
|
|
807
|
+
}
|
|
808
|
+
return matches.filter((match) => {
|
|
809
|
+
const analysis = this.contextAnalyzer.analyzeContext(text, match.start, match.end, match.word);
|
|
810
|
+
// If score is above threshold, it's likely profanity
|
|
811
|
+
return analysis.score >= scoreThreshold;
|
|
812
|
+
});
|
|
813
|
+
}
|
|
814
|
+
/**
|
|
815
|
+
* Detects profanity in the provided text and returns comprehensive analysis.
|
|
816
|
+
*
|
|
817
|
+
* @param {string} text - The text to analyze for profanity
|
|
818
|
+
* @returns {ProfanityDetectionResult} Detailed detection result including matches, positions, severity, and cleaned text
|
|
819
|
+
*
|
|
820
|
+
* @throws {TypeError} If text is not a string
|
|
821
|
+
*
|
|
822
|
+
* @remarks
|
|
823
|
+
* ### Performance:
|
|
824
|
+
* - Time Complexity: O(n*m) where n is text length, m is average word length in dictionary
|
|
825
|
+
* - With Bloom Filter: O(n) average case (faster early rejection)
|
|
826
|
+
* - With Caching: O(1) for repeated identical text
|
|
827
|
+
*
|
|
828
|
+
* ### Features:
|
|
829
|
+
* - Detects leet speak variations (if enabled): "h3ll0" → "hello"
|
|
830
|
+
* - Respects word boundaries (strict mode) or detects partial matches
|
|
831
|
+
* - Returns exact positions for highlighting/masking
|
|
832
|
+
* - Calculates severity based on match count and uniqueness
|
|
833
|
+
*
|
|
834
|
+
* ### Caching:
|
|
835
|
+
* - Results are cached if `performance.enableCaching` is true
|
|
836
|
+
* - Cache uses LRU eviction when size limit is reached
|
|
837
|
+
*
|
|
838
|
+
* @example
|
|
839
|
+
* ```typescript
|
|
840
|
+
* const filter = new AllProfanity();
|
|
841
|
+
* const result = filter.detect("This has bad words");
|
|
842
|
+
*
|
|
843
|
+
* console.log(result.hasProfanity); // true
|
|
844
|
+
* console.log(result.detectedWords); // ['bad']
|
|
845
|
+
* console.log(result.cleanedText); // 'This has *** words'
|
|
846
|
+
* console.log(result.severity); // ProfanitySeverity.MILD
|
|
847
|
+
* console.log(result.positions); // [{ word: 'bad', start: 9, end: 12 }]
|
|
848
|
+
* ```
|
|
849
|
+
*
|
|
850
|
+
* @example
|
|
851
|
+
* ```typescript
|
|
852
|
+
* // With leet speak detection
|
|
853
|
+
* const filter = new AllProfanity({ enableLeetSpeak: true });
|
|
854
|
+
* const result = filter.detect("st0p b3ing b@d");
|
|
855
|
+
*
|
|
856
|
+
* if (result.hasProfanity) {
|
|
857
|
+
* result.positions.forEach(pos => {
|
|
858
|
+
* console.log(`Found "${pos.word}" at position ${pos.start}-${pos.end}`);
|
|
859
|
+
* });
|
|
860
|
+
* }
|
|
861
|
+
* ```
|
|
862
|
+
*
|
|
863
|
+
* @see {@link ProfanityDetectionResult} for result structure
|
|
864
|
+
* @see {@link ProfanitySeverity} for severity levels
|
|
377
865
|
*/
|
|
378
866
|
detect(text) {
|
|
867
|
+
var _a;
|
|
379
868
|
const validatedText = validateString(text, "text");
|
|
380
869
|
if (validatedText.length === 0) {
|
|
381
870
|
return {
|
|
@@ -386,22 +875,56 @@ export class AllProfanity {
|
|
|
386
875
|
positions: [],
|
|
387
876
|
};
|
|
388
877
|
}
|
|
389
|
-
|
|
878
|
+
// Check cache first if enabled
|
|
879
|
+
if ((_a = this.resultCache) === null || _a === void 0 ? void 0 : _a.has(validatedText)) {
|
|
880
|
+
return this.resultCache.get(validatedText);
|
|
881
|
+
}
|
|
882
|
+
let matches = [];
|
|
390
883
|
const normalizedText = this.caseSensitive
|
|
391
884
|
? validatedText
|
|
392
885
|
: validatedText.toLowerCase();
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
this.
|
|
398
|
-
|
|
886
|
+
// Choose matching algorithm based on configuration
|
|
887
|
+
switch (this.matchingAlgorithm) {
|
|
888
|
+
case "aho-corasick":
|
|
889
|
+
matches = this.findMatchesWithAhoCorasick(normalizedText, validatedText);
|
|
890
|
+
if (this.enableLeetSpeak) {
|
|
891
|
+
const leetNormalized = this.normalizeLeetSpeak(normalizedText);
|
|
892
|
+
if (leetNormalized !== normalizedText) {
|
|
893
|
+
const leetMatches = this.findMatchesWithAhoCorasick(leetNormalized, validatedText);
|
|
894
|
+
matches.push(...leetMatches);
|
|
895
|
+
}
|
|
896
|
+
}
|
|
897
|
+
break;
|
|
898
|
+
case "hybrid":
|
|
899
|
+
matches = this.findMatchesHybrid(normalizedText, validatedText);
|
|
900
|
+
if (this.enableLeetSpeak) {
|
|
901
|
+
const leetNormalized = this.normalizeLeetSpeak(normalizedText);
|
|
902
|
+
if (leetNormalized !== normalizedText) {
|
|
903
|
+
const leetMatches = this.findMatchesHybrid(leetNormalized, validatedText);
|
|
904
|
+
matches.push(...leetMatches);
|
|
905
|
+
}
|
|
906
|
+
}
|
|
907
|
+
break;
|
|
908
|
+
case "trie":
|
|
909
|
+
default:
|
|
910
|
+
this.findMatches(normalizedText, validatedText, matches);
|
|
911
|
+
if (this.enableLeetSpeak) {
|
|
912
|
+
const leetNormalized = this.normalizeLeetSpeak(normalizedText);
|
|
913
|
+
if (leetNormalized !== normalizedText) {
|
|
914
|
+
this.findMatches(leetNormalized, validatedText, matches);
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
break;
|
|
918
|
+
}
|
|
919
|
+
// Apply context analysis if enabled
|
|
920
|
+
if (this.contextAnalyzer) {
|
|
921
|
+
matches = this.applyContextAnalysis(validatedText, matches);
|
|
399
922
|
}
|
|
400
923
|
const uniqueMatches = this.deduplicateMatches(matches);
|
|
401
924
|
const detectedWords = uniqueMatches.map((m) => m.originalWord);
|
|
402
925
|
const severity = this.calculateSeverity(uniqueMatches);
|
|
403
926
|
const cleanedText = this.generateCleanedText(validatedText, uniqueMatches);
|
|
404
|
-
|
|
927
|
+
const result = {
|
|
405
928
|
hasProfanity: uniqueMatches.length > 0,
|
|
406
929
|
detectedWords,
|
|
407
930
|
cleanedText,
|
|
@@ -412,6 +935,18 @@ export class AllProfanity {
|
|
|
412
935
|
end: m.end,
|
|
413
936
|
})),
|
|
414
937
|
};
|
|
938
|
+
// Cache result if caching is enabled
|
|
939
|
+
if (this.resultCache) {
|
|
940
|
+
this.resultCache.set(validatedText, result);
|
|
941
|
+
// Implement simple LRU by clearing cache when it gets too large
|
|
942
|
+
if (this.resultCache.size > 1000) {
|
|
943
|
+
const firstKey = this.resultCache.keys().next().value;
|
|
944
|
+
if (firstKey !== undefined) {
|
|
945
|
+
this.resultCache.delete(firstKey);
|
|
946
|
+
}
|
|
947
|
+
}
|
|
948
|
+
}
|
|
949
|
+
return result;
|
|
415
950
|
}
|
|
416
951
|
/**
|
|
417
952
|
* Main matching function, with whole-word logic.
|
|
@@ -465,18 +1000,78 @@ export class AllProfanity {
|
|
|
465
1000
|
return result;
|
|
466
1001
|
}
|
|
467
1002
|
/**
|
|
468
|
-
*
|
|
469
|
-
*
|
|
470
|
-
* @
|
|
1003
|
+
* Quick boolean check for profanity presence in text.
|
|
1004
|
+
*
|
|
1005
|
+
* @param {string} text - The text to check for profanity
|
|
1006
|
+
* @returns {boolean} True if profanity is detected, false otherwise
|
|
1007
|
+
*
|
|
1008
|
+
* @throws {TypeError} If text is not a string
|
|
1009
|
+
*
|
|
1010
|
+
* @remarks
|
|
1011
|
+
* - Convenience method that internally calls `detect()` and returns only the boolean result
|
|
1012
|
+
* - For detailed information about matches, use `detect()` instead
|
|
1013
|
+
* - Results are cached if caching is enabled (same cache as `detect()`)
|
|
1014
|
+
*
|
|
1015
|
+
* @example
|
|
1016
|
+
* ```typescript
|
|
1017
|
+
* const filter = new AllProfanity();
|
|
1018
|
+
*
|
|
1019
|
+
* if (filter.check("This has bad words")) {
|
|
1020
|
+
* console.log("Profanity detected!");
|
|
1021
|
+
* }
|
|
1022
|
+
*
|
|
1023
|
+
* // Quick validation
|
|
1024
|
+
* const isClean = !filter.check(userInput);
|
|
1025
|
+
* ```
|
|
1026
|
+
*
|
|
1027
|
+
* @see {@link detect} for detailed profanity analysis
|
|
471
1028
|
*/
|
|
472
1029
|
check(text) {
|
|
473
1030
|
return this.detect(text).hasProfanity;
|
|
474
1031
|
}
|
|
475
1032
|
/**
|
|
476
|
-
*
|
|
477
|
-
*
|
|
478
|
-
* @param
|
|
479
|
-
* @
|
|
1033
|
+
* Cleans text by replacing profanity with a placeholder character.
|
|
1034
|
+
*
|
|
1035
|
+
* @param {string} text - The text to clean
|
|
1036
|
+
* @param {string} [placeholder] - Optional custom placeholder character (uses default if not provided)
|
|
1037
|
+
* @returns {string} The cleaned text with profanity replaced
|
|
1038
|
+
*
|
|
1039
|
+
* @throws {TypeError} If text is not a string
|
|
1040
|
+
*
|
|
1041
|
+
* @remarks
|
|
1042
|
+
* ### Character-level Replacement:
|
|
1043
|
+
* - Each profane character is replaced individually
|
|
1044
|
+
* - "bad" with placeholder "*" becomes "***"
|
|
1045
|
+
* - Preserves text length and structure
|
|
1046
|
+
*
|
|
1047
|
+
* ### Placeholder Behavior:
|
|
1048
|
+
* - If no placeholder provided, uses the instance's default placeholder
|
|
1049
|
+
* - If placeholder provided, uses only the first character
|
|
1050
|
+
* - Empty placeholder throws error
|
|
1051
|
+
*
|
|
1052
|
+
* @example
|
|
1053
|
+
* ```typescript
|
|
1054
|
+
* const filter = new AllProfanity();
|
|
1055
|
+
*
|
|
1056
|
+
* // Using default placeholder (*)
|
|
1057
|
+
* const cleaned = filter.clean("This has bad words");
|
|
1058
|
+
* console.log(cleaned); // "This has *** *****"
|
|
1059
|
+
*
|
|
1060
|
+
* // Using custom placeholder
|
|
1061
|
+
* const cleaned = filter.clean("This has bad words", "#");
|
|
1062
|
+
* console.log(cleaned); // "This has ### #####"
|
|
1063
|
+
* ```
|
|
1064
|
+
*
|
|
1065
|
+
* @example
|
|
1066
|
+
* ```typescript
|
|
1067
|
+
* // Clean user-generated content for display
|
|
1068
|
+
* const userComment = "Some inappropriate words here";
|
|
1069
|
+
* const safeComment = filter.clean(userComment);
|
|
1070
|
+
* displayComment(safeComment);
|
|
1071
|
+
* ```
|
|
1072
|
+
*
|
|
1073
|
+
* @see {@link cleanWithPlaceholder} for word-level replacement
|
|
1074
|
+
* @see {@link setPlaceholder} to change default placeholder
|
|
480
1075
|
*/
|
|
481
1076
|
clean(text, placeholder) {
|
|
482
1077
|
const detection = this.detect(text);
|
|
@@ -503,10 +1098,46 @@ export class AllProfanity {
|
|
|
503
1098
|
return result;
|
|
504
1099
|
}
|
|
505
1100
|
/**
|
|
506
|
-
*
|
|
507
|
-
*
|
|
508
|
-
* @param
|
|
509
|
-
* @
|
|
1101
|
+
* Cleans text by replacing each profane word with a single placeholder string (word-level replacement).
|
|
1102
|
+
*
|
|
1103
|
+
* @param {string} text - The text to clean
|
|
1104
|
+
* @param {string} [placeholder="***"] - The placeholder string to use for each profane word
|
|
1105
|
+
* @returns {string} The cleaned text with each profane word replaced by the placeholder
|
|
1106
|
+
*
|
|
1107
|
+
* @throws {TypeError} If text is not a string
|
|
1108
|
+
*
|
|
1109
|
+
* @remarks
|
|
1110
|
+
* ### Word-level Replacement:
|
|
1111
|
+
* - Each profane word is replaced with the entire placeholder string (not character-by-character)
|
|
1112
|
+
* - "bad words" with placeholder "***" becomes "*** ***"
|
|
1113
|
+
* - Does NOT preserve original text length
|
|
1114
|
+
*
|
|
1115
|
+
* ### Difference from `clean()`:
|
|
1116
|
+
* - `clean()`: Character-level replacement - "bad" becomes "***" (preserves length)
|
|
1117
|
+
* - `cleanWithPlaceholder()`: Word-level replacement - "bad" becomes "***" (fixed placeholder)
|
|
1118
|
+
*
|
|
1119
|
+
* @example
|
|
1120
|
+
* ```typescript
|
|
1121
|
+
* const filter = new AllProfanity();
|
|
1122
|
+
*
|
|
1123
|
+
* // Default placeholder (***) const text = "This has bad words";
|
|
1124
|
+
* const cleaned = filter.cleanWithPlaceholder(text);
|
|
1125
|
+
* console.log(cleaned); // "This has *** ***"
|
|
1126
|
+
*
|
|
1127
|
+
* // Custom placeholder
|
|
1128
|
+
* const cleaned2 = filter.cleanWithPlaceholder(text, "[CENSORED]");
|
|
1129
|
+
* console.log(cleaned2); // "This has [CENSORED] [CENSORED]"
|
|
1130
|
+
* ```
|
|
1131
|
+
*
|
|
1132
|
+
* @example
|
|
1133
|
+
* ```typescript
|
|
1134
|
+
* // Censoring chat messages
|
|
1135
|
+
* const message = "You are a badword and stupid";
|
|
1136
|
+
* const censored = filter.cleanWithPlaceholder(message, "[***]");
|
|
1137
|
+
* // Result: "You are a [***] and [***]"
|
|
1138
|
+
* ```
|
|
1139
|
+
*
|
|
1140
|
+
* @see {@link clean} for character-level replacement
|
|
510
1141
|
*/
|
|
511
1142
|
cleanWithPlaceholder(text, placeholder = "***") {
|
|
512
1143
|
const detection = this.detect(text);
|
|
@@ -532,8 +1163,51 @@ export class AllProfanity {
|
|
|
532
1163
|
return result;
|
|
533
1164
|
}
|
|
534
1165
|
/**
|
|
535
|
-
*
|
|
536
|
-
*
|
|
1166
|
+
* Dynamically adds one or more words to the profanity filter at runtime.
|
|
1167
|
+
*
|
|
1168
|
+
* @param {string | string[]} word - A single word or array of words to add to the filter
|
|
1169
|
+
* @returns {void}
|
|
1170
|
+
*
|
|
1171
|
+
* @remarks
|
|
1172
|
+
* ### Behavior:
|
|
1173
|
+
* - Words are added to all active data structures (Trie, Aho-Corasick, Bloom Filter)
|
|
1174
|
+
* - Automatically normalizes words based on caseSensitive setting
|
|
1175
|
+
* - Skips whitelisted words
|
|
1176
|
+
* - Validates and filters out non-string or empty values
|
|
1177
|
+
* - Changes take effect immediately for subsequent detect/check/clean calls
|
|
1178
|
+
*
|
|
1179
|
+
* ### Use Cases:
|
|
1180
|
+
* - Adding context-specific profanity
|
|
1181
|
+
* - Building dynamic word lists from user reports
|
|
1182
|
+
* - Customizing filters for specific communities/applications
|
|
1183
|
+
*
|
|
1184
|
+
* @example
|
|
1185
|
+
* ```typescript
|
|
1186
|
+
* const filter = new AllProfanity();
|
|
1187
|
+
*
|
|
1188
|
+
* // Add single word
|
|
1189
|
+
* filter.add('newbadword');
|
|
1190
|
+
*
|
|
1191
|
+
* // Add multiple words
|
|
1192
|
+
* filter.add(['word1', 'word2', 'word3']);
|
|
1193
|
+
*
|
|
1194
|
+
* // Now these words will be detected
|
|
1195
|
+
* filter.check('newbadword'); // true
|
|
1196
|
+
* ```
|
|
1197
|
+
*
|
|
1198
|
+
* @example
|
|
1199
|
+
* ```typescript
|
|
1200
|
+
* // Add game-specific slang dynamically
|
|
1201
|
+
* const filter = new AllProfanity();
|
|
1202
|
+
* const gamingSlang = ['noob', 'trash', 'tryhard'];
|
|
1203
|
+
* filter.add(gamingSlang);
|
|
1204
|
+
*
|
|
1205
|
+
* const message = "You're such a noob";
|
|
1206
|
+
* console.log(filter.check(message)); // true
|
|
1207
|
+
* ```
|
|
1208
|
+
*
|
|
1209
|
+
* @see {@link remove} to remove words
|
|
1210
|
+
* @see {@link loadCustomDictionary} for loading named dictionaries
|
|
537
1211
|
*/
|
|
538
1212
|
add(word) {
|
|
539
1213
|
const words = Array.isArray(word) ? word : [word];
|
|
@@ -544,8 +1218,50 @@ export class AllProfanity {
|
|
|
544
1218
|
}
|
|
545
1219
|
}
|
|
546
1220
|
/**
|
|
547
|
-
*
|
|
548
|
-
*
|
|
1221
|
+
* Dynamically removes one or more words from the profanity filter at runtime.
|
|
1222
|
+
*
|
|
1223
|
+
* @param {string | string[]} word - A single word or array of words to remove from the filter
|
|
1224
|
+
* @returns {void}
|
|
1225
|
+
*
|
|
1226
|
+
* @remarks
|
|
1227
|
+
* ### Behavior:
|
|
1228
|
+
* - Removes words from all active data structures (Trie, dynamic words set)
|
|
1229
|
+
* - Normalizes words based on caseSensitive setting before removal
|
|
1230
|
+
* - Only removes dynamically added words, not words from loaded language dictionaries
|
|
1231
|
+
* - Changes take effect immediately for subsequent detect/check/clean calls
|
|
1232
|
+
*
|
|
1233
|
+
* ### Important Notes:
|
|
1234
|
+
* - Cannot remove words from built-in language dictionaries
|
|
1235
|
+
* - To exclude dictionary words, use `addToWhitelist()` instead
|
|
1236
|
+
* - Validates and filters out non-string or empty values
|
|
1237
|
+
*
|
|
1238
|
+
* @example
|
|
1239
|
+
* ```typescript
|
|
1240
|
+
* const filter = new AllProfanity();
|
|
1241
|
+
*
|
|
1242
|
+
* // Add then remove a word
|
|
1243
|
+
* filter.add('tempword');
|
|
1244
|
+
* filter.check('tempword'); // true
|
|
1245
|
+
*
|
|
1246
|
+
* filter.remove('tempword');
|
|
1247
|
+
* filter.check('tempword'); // false
|
|
1248
|
+
*
|
|
1249
|
+
* // Remove multiple words
|
|
1250
|
+
* filter.remove(['word1', 'word2']);
|
|
1251
|
+
* ```
|
|
1252
|
+
*
|
|
1253
|
+
* @example
|
|
1254
|
+
* ```typescript
|
|
1255
|
+
* // Managing custom word list
|
|
1256
|
+
* const filter = new AllProfanity();
|
|
1257
|
+
* filter.add(['custom1', 'custom2', 'custom3']);
|
|
1258
|
+
*
|
|
1259
|
+
* // Later, remove one that's no longer needed
|
|
1260
|
+
* filter.remove('custom2');
|
|
1261
|
+
* ```
|
|
1262
|
+
*
|
|
1263
|
+
* @see {@link add} to add words
|
|
1264
|
+
* @see {@link addToWhitelist} to exclude dictionary words without removing them
|
|
549
1265
|
*/
|
|
550
1266
|
remove(word) {
|
|
551
1267
|
const words = Array.isArray(word) ? word : [word];
|
|
@@ -588,9 +1304,60 @@ export class AllProfanity {
|
|
|
588
1304
|
return this.whitelistSet.has(normalizedWord);
|
|
589
1305
|
}
|
|
590
1306
|
/**
|
|
591
|
-
*
|
|
592
|
-
*
|
|
593
|
-
* @
|
|
1307
|
+
* Loads a built-in language dictionary into the profanity filter.
|
|
1308
|
+
*
|
|
1309
|
+
* @param {string} language - The language key to load (case-insensitive)
|
|
1310
|
+
* @returns {boolean} True if language was loaded successfully, false if not found or already loaded
|
|
1311
|
+
*
|
|
1312
|
+
* @remarks
|
|
1313
|
+
* ### Available Languages:
|
|
1314
|
+
* - `'english'` - English profanity words
|
|
1315
|
+
* - `'hindi'` - Hindi profanity words
|
|
1316
|
+
* - `'french'` - French profanity words
|
|
1317
|
+
* - `'german'` - German profanity words
|
|
1318
|
+
* - `'spanish'` - Spanish profanity words
|
|
1319
|
+
* - `'bengali'` - Bengali profanity words
|
|
1320
|
+
* - `'tamil'` - Tamil profanity words
|
|
1321
|
+
* - `'telugu'` - Telugu profanity words
|
|
1322
|
+
* - `'brazilian'` - Brazilian Portuguese profanity words
|
|
1323
|
+
*
|
|
1324
|
+
* ### Behavior:
|
|
1325
|
+
* - Language keys are case-insensitive
|
|
1326
|
+
* - Loading is idempotent - calling multiple times for same language is safe
|
|
1327
|
+
* - Returns true if language loaded successfully or was already loaded
|
|
1328
|
+
* - Returns false if language not found
|
|
1329
|
+
* - Logs success/failure messages (unless silent mode enabled)
|
|
1330
|
+
* - Words are added to all active data structures
|
|
1331
|
+
*
|
|
1332
|
+
* ### Default Languages:
|
|
1333
|
+
* English and Hindi are loaded automatically in the constructor
|
|
1334
|
+
*
|
|
1335
|
+
* @example
|
|
1336
|
+
* ```typescript
|
|
1337
|
+
* const filter = new AllProfanity();
|
|
1338
|
+
*
|
|
1339
|
+
* // Load additional languages
|
|
1340
|
+
* filter.loadLanguage('french');
|
|
1341
|
+
* filter.loadLanguage('spanish');
|
|
1342
|
+
*
|
|
1343
|
+
* // Case-insensitive
|
|
1344
|
+
* filter.loadLanguage('GERMAN'); // Works
|
|
1345
|
+
*
|
|
1346
|
+
* // Check if loaded
|
|
1347
|
+
* console.log(filter.getLoadedLanguages()); // ['english', 'hindi', 'french', 'spanish', 'german']
|
|
1348
|
+
* ```
|
|
1349
|
+
*
|
|
1350
|
+
* @example
|
|
1351
|
+
* ```typescript
|
|
1352
|
+
* // Load all Indian languages at once
|
|
1353
|
+
* const filter = new AllProfanity();
|
|
1354
|
+
* filter.loadIndianLanguages();
|
|
1355
|
+
* ```
|
|
1356
|
+
*
|
|
1357
|
+
* @see {@link loadLanguages} to load multiple languages at once
|
|
1358
|
+
* @see {@link loadIndianLanguages} for convenience method
|
|
1359
|
+
* @see {@link getAvailableLanguages} to see all available languages
|
|
1360
|
+
* @see {@link getLoadedLanguages} to see currently loaded languages
|
|
594
1361
|
*/
|
|
595
1362
|
loadLanguage(language) {
|
|
596
1363
|
if (!language || typeof language !== "string") {
|
|
@@ -642,9 +1409,64 @@ export class AllProfanity {
|
|
|
642
1409
|
return this.loadLanguages(indianLanguages);
|
|
643
1410
|
}
|
|
644
1411
|
/**
|
|
645
|
-
*
|
|
646
|
-
*
|
|
647
|
-
* @param
|
|
1412
|
+
* Loads a custom dictionary of profane words with a specific name.
|
|
1413
|
+
*
|
|
1414
|
+
* @param {string} name - Unique name/identifier for this custom dictionary
|
|
1415
|
+
* @param {string[]} words - Array of profane words to add to the dictionary
|
|
1416
|
+
* @returns {void}
|
|
1417
|
+
*
|
|
1418
|
+
* @throws {TypeError} If name is not a string or words is not an array
|
|
1419
|
+
*
|
|
1420
|
+
* @remarks
|
|
1421
|
+
* ### Behavior:
|
|
1422
|
+
* - Creates a new named dictionary or overwrites existing one with same name
|
|
1423
|
+
* - Validates and filters out non-string and empty values from words array
|
|
1424
|
+
* - Words are added to all active data structures (Trie, Aho-Corasick, Bloom Filter)
|
|
1425
|
+
* - Dictionary name is converted to lowercase for storage
|
|
1426
|
+
* - Logs count of loaded words (unless silent mode enabled)
|
|
1427
|
+
*
|
|
1428
|
+
* ### Use Cases:
|
|
1429
|
+
* - Domain-specific profanity (gaming, medical, legal, etc.)
|
|
1430
|
+
* - Organization-specific word lists
|
|
1431
|
+
* - Temporary or context-dependent filters
|
|
1432
|
+
* - Testing and development
|
|
1433
|
+
*
|
|
1434
|
+
* @example
|
|
1435
|
+
* ```typescript
|
|
1436
|
+
* const filter = new AllProfanity();
|
|
1437
|
+
*
|
|
1438
|
+
* // Load gaming-specific slang
|
|
1439
|
+
* filter.loadCustomDictionary('gaming', [
|
|
1440
|
+
* 'noob',
|
|
1441
|
+
* 'scrub',
|
|
1442
|
+
* 'tryhard',
|
|
1443
|
+
* 'trash'
|
|
1444
|
+
* ]);
|
|
1445
|
+
*
|
|
1446
|
+
* // Load company-specific terms
|
|
1447
|
+
* filter.loadCustomDictionary('company', [
|
|
1448
|
+
* 'competitor1',
|
|
1449
|
+
* 'bannedTerm1',
|
|
1450
|
+
* 'inappropriateJargon'
|
|
1451
|
+
* ]);
|
|
1452
|
+
*
|
|
1453
|
+
* console.log(filter.check('You are such a noob')); // true
|
|
1454
|
+
* ```
|
|
1455
|
+
*
|
|
1456
|
+
* @example
|
|
1457
|
+
* ```typescript
|
|
1458
|
+
* // Load from external source
|
|
1459
|
+
* const filter = new AllProfanity();
|
|
1460
|
+
*
|
|
1461
|
+
* async function loadExternalDictionary() {
|
|
1462
|
+
* const response = await fetch('https://example.com/custom-words.json');
|
|
1463
|
+
* const customWords = await response.json();
|
|
1464
|
+
* filter.loadCustomDictionary('external', customWords);
|
|
1465
|
+
* }
|
|
1466
|
+
* ```
|
|
1467
|
+
*
|
|
1468
|
+
* @see {@link add} for adding individual words dynamically
|
|
1469
|
+
* @see {@link loadLanguage} for loading built-in language dictionaries
|
|
648
1470
|
*/
|
|
649
1471
|
loadCustomDictionary(name, words) {
|
|
650
1472
|
validateString(name, "dictionary name");
|
|
@@ -683,7 +1505,16 @@ export class AllProfanity {
|
|
|
683
1505
|
if (this.isWhitelisted(normalizedWord)) {
|
|
684
1506
|
return false;
|
|
685
1507
|
}
|
|
1508
|
+
// Add to Trie (always used as fallback)
|
|
686
1509
|
this.profanityTrie.addWord(normalizedWord);
|
|
1510
|
+
// Add to Bloom Filter if enabled
|
|
1511
|
+
if (this.bloomFilter) {
|
|
1512
|
+
this.bloomFilter.add(normalizedWord);
|
|
1513
|
+
}
|
|
1514
|
+
// Add to Aho-Corasick automaton if enabled
|
|
1515
|
+
if (this.ahoCorasickAutomaton) {
|
|
1516
|
+
this.ahoCorasickAutomaton.addPattern(normalizedWord);
|
|
1517
|
+
}
|
|
687
1518
|
return true;
|
|
688
1519
|
}
|
|
689
1520
|
/**
|
|
@@ -797,6 +1628,50 @@ export class AllProfanity {
|
|
|
797
1628
|
this.rebuildTrie();
|
|
798
1629
|
}
|
|
799
1630
|
}
|
|
1631
|
+
/**
|
|
1632
|
+
* Create an AllProfanity instance from a configuration object.
|
|
1633
|
+
* @param config - Configuration object
|
|
1634
|
+
* @returns A new AllProfanity instance
|
|
1635
|
+
*/
|
|
1636
|
+
static fromConfig(config) {
|
|
1637
|
+
const options = {};
|
|
1638
|
+
if (config.algorithm)
|
|
1639
|
+
options.algorithm = config.algorithm;
|
|
1640
|
+
if (config.bloomFilter)
|
|
1641
|
+
options.bloomFilter = config.bloomFilter;
|
|
1642
|
+
if (config.ahoCorasick)
|
|
1643
|
+
options.ahoCorasick = config.ahoCorasick;
|
|
1644
|
+
if (config.contextAnalysis)
|
|
1645
|
+
options.contextAnalysis = config.contextAnalysis;
|
|
1646
|
+
if (config.performance)
|
|
1647
|
+
options.performance = config.performance;
|
|
1648
|
+
if (config.profanityDetection) {
|
|
1649
|
+
options.enableLeetSpeak = config.profanityDetection.enableLeetSpeak;
|
|
1650
|
+
options.caseSensitive = config.profanityDetection.caseSensitive;
|
|
1651
|
+
options.strictMode = config.profanityDetection.strictMode;
|
|
1652
|
+
options.detectPartialWords = config.profanityDetection.detectPartialWords;
|
|
1653
|
+
options.defaultPlaceholder = config.profanityDetection.defaultPlaceholder;
|
|
1654
|
+
}
|
|
1655
|
+
if (config.enableLeetSpeak !== undefined)
|
|
1656
|
+
options.enableLeetSpeak = config.enableLeetSpeak;
|
|
1657
|
+
if (config.caseSensitive !== undefined)
|
|
1658
|
+
options.caseSensitive = config.caseSensitive;
|
|
1659
|
+
if (config.strictMode !== undefined)
|
|
1660
|
+
options.strictMode = config.strictMode;
|
|
1661
|
+
if (config.detectPartialWords !== undefined)
|
|
1662
|
+
options.detectPartialWords = config.detectPartialWords;
|
|
1663
|
+
if (config.defaultPlaceholder !== undefined)
|
|
1664
|
+
options.defaultPlaceholder = config.defaultPlaceholder;
|
|
1665
|
+
if (config.languages)
|
|
1666
|
+
options.languages = config.languages;
|
|
1667
|
+
if (config.whitelistWords)
|
|
1668
|
+
options.whitelistWords = config.whitelistWords;
|
|
1669
|
+
if (config.customDictionaries)
|
|
1670
|
+
options.customDictionaries = config.customDictionaries;
|
|
1671
|
+
if (config.logger)
|
|
1672
|
+
options.logger = config.logger;
|
|
1673
|
+
return new AllProfanity(options);
|
|
1674
|
+
}
|
|
800
1675
|
}
|
|
801
1676
|
/**
|
|
802
1677
|
* Singleton instance of AllProfanity with default configuration.
|