allprofanity 2.2.0 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +765 -37
- package/dist/index.js +704 -53
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -23,35 +23,126 @@ export { default as tamilBadWords } from "./languages/tamil-words.js";
|
|
|
23
23
|
export { default as teluguBadWords } from "./languages/telugu-words.js";
|
|
24
24
|
export { default as brazilianBadWords } from "./languages/brazilian-words.js";
|
|
25
25
|
/**
|
|
26
|
-
* Default console logger implementation.
|
|
26
|
+
* Default console logger implementation for AllProfanity.
|
|
27
|
+
*
|
|
28
|
+
* @class ConsoleLogger
|
|
29
|
+
* @implements {Logger}
|
|
30
|
+
* @description Logs messages to the browser or Node.js console with an "[AllProfanity]" prefix.
|
|
31
|
+
* This is the default logger used when no custom logger is provided.
|
|
32
|
+
*
|
|
33
|
+
* @internal
|
|
27
34
|
*/
|
|
28
35
|
class ConsoleLogger {
|
|
36
|
+
/**
|
|
37
|
+
* Log informational messages to console.log with [AllProfanity] prefix.
|
|
38
|
+
*
|
|
39
|
+
* @param message - The message to log
|
|
40
|
+
* @returns void
|
|
41
|
+
*/
|
|
29
42
|
info(message) {
|
|
30
43
|
console.log(`[AllProfanity] ${message}`);
|
|
31
44
|
}
|
|
45
|
+
/**
|
|
46
|
+
* Log warning messages to console.warn with [AllProfanity] prefix.
|
|
47
|
+
*
|
|
48
|
+
* @param message - The warning message to log
|
|
49
|
+
* @returns void
|
|
50
|
+
*/
|
|
32
51
|
warn(message) {
|
|
33
52
|
console.warn(`[AllProfanity] ${message}`);
|
|
34
53
|
}
|
|
54
|
+
/**
|
|
55
|
+
* Log error messages to console.error with [AllProfanity] prefix.
|
|
56
|
+
*
|
|
57
|
+
* @param message - The error message to log
|
|
58
|
+
* @returns void
|
|
59
|
+
*/
|
|
35
60
|
error(message) {
|
|
36
61
|
console.error(`[AllProfanity] ${message}`);
|
|
37
62
|
}
|
|
38
63
|
}
|
|
39
64
|
/**
|
|
40
|
-
*
|
|
65
|
+
* Silent logger implementation that suppresses all log output.
|
|
66
|
+
*
|
|
67
|
+
* @class SilentLogger
|
|
68
|
+
* @implements {Logger}
|
|
69
|
+
* @description A no-op logger that discards all log messages. Used when `silent: true` is set
|
|
70
|
+
* in AllProfanityOptions, or when you want to completely disable logging.
|
|
71
|
+
*
|
|
72
|
+
* @internal
|
|
73
|
+
*/
|
|
74
|
+
class SilentLogger {
|
|
75
|
+
/**
|
|
76
|
+
* No-op implementation - messages are discarded.
|
|
77
|
+
*
|
|
78
|
+
* @param _message - The message (unused)
|
|
79
|
+
* @returns void
|
|
80
|
+
*/
|
|
81
|
+
info(_message) {
|
|
82
|
+
// Silent mode - no logging
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* No-op implementation - warnings are discarded.
|
|
86
|
+
*
|
|
87
|
+
* @param _message - The warning message (unused)
|
|
88
|
+
* @returns void
|
|
89
|
+
*/
|
|
90
|
+
warn(_message) {
|
|
91
|
+
// Silent mode - no logging
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* No-op implementation - errors are discarded.
|
|
95
|
+
*
|
|
96
|
+
* @param _message - The error message (unused)
|
|
97
|
+
* @returns void
|
|
98
|
+
*/
|
|
99
|
+
error(_message) {
|
|
100
|
+
// Silent mode - no logging
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Severity levels for profanity detection results.
|
|
105
|
+
*
|
|
106
|
+
* @enum {number}
|
|
107
|
+
* @description Categorizes the severity of detected profanity based on the number
|
|
108
|
+
* of unique words and total matches found in the text.
|
|
109
|
+
*
|
|
110
|
+
* @readonly
|
|
111
|
+
* @example
|
|
112
|
+
* ```typescript
|
|
113
|
+
* const result = filter.detect("some text");
|
|
114
|
+
* if (result.severity === ProfanitySeverity.EXTREME) {
|
|
115
|
+
* // Handle extreme profanity
|
|
116
|
+
* }
|
|
117
|
+
* ```
|
|
41
118
|
*/
|
|
42
119
|
export var ProfanitySeverity;
|
|
43
120
|
(function (ProfanitySeverity) {
|
|
121
|
+
/** Mild profanity: 1 unique word or 1 total match */
|
|
44
122
|
ProfanitySeverity[ProfanitySeverity["MILD"] = 1] = "MILD";
|
|
123
|
+
/** Moderate profanity: 2 unique words or 2 total matches */
|
|
45
124
|
ProfanitySeverity[ProfanitySeverity["MODERATE"] = 2] = "MODERATE";
|
|
125
|
+
/** Severe profanity: 3 unique words or 3 total matches */
|
|
46
126
|
ProfanitySeverity[ProfanitySeverity["SEVERE"] = 3] = "SEVERE";
|
|
127
|
+
/** Extreme profanity: 4+ unique words or 5+ total matches */
|
|
47
128
|
ProfanitySeverity[ProfanitySeverity["EXTREME"] = 4] = "EXTREME";
|
|
48
129
|
})(ProfanitySeverity = ProfanitySeverity || (ProfanitySeverity = {}));
|
|
49
130
|
/**
|
|
50
|
-
*
|
|
51
|
-
*
|
|
52
|
-
* @
|
|
53
|
-
* @
|
|
54
|
-
* @
|
|
131
|
+
* Validates that an input is a non-empty string.
|
|
132
|
+
*
|
|
133
|
+
* @function validateString
|
|
134
|
+
* @param {unknown} input - The value to validate
|
|
135
|
+
* @param {string} paramName - Name of the parameter being validated (used in error messages)
|
|
136
|
+
* @returns {string} The validated string
|
|
137
|
+
* @throws {TypeError} If input is not a string
|
|
138
|
+
*
|
|
139
|
+
* @internal
|
|
140
|
+
*
|
|
141
|
+
* @example
|
|
142
|
+
* ```typescript
|
|
143
|
+
* const text = validateString(userInput, 'text');
|
|
144
|
+
* // Returns userInput if it's a string, throws TypeError otherwise
|
|
145
|
+
* ```
|
|
55
146
|
*/
|
|
56
147
|
function validateString(input, paramName) {
|
|
57
148
|
if (typeof input !== "string") {
|
|
@@ -60,11 +151,22 @@ function validateString(input, paramName) {
|
|
|
60
151
|
return input;
|
|
61
152
|
}
|
|
62
153
|
/**
|
|
63
|
-
*
|
|
64
|
-
*
|
|
65
|
-
* @
|
|
66
|
-
* @
|
|
67
|
-
* @
|
|
154
|
+
* Validates and filters a string array, removing non-string and empty items.
|
|
155
|
+
*
|
|
156
|
+
* @function validateStringArray
|
|
157
|
+
* @param {unknown} input - The value to validate (expected to be an array)
|
|
158
|
+
* @param {string} paramName - Name of the parameter being validated (used in error/warning messages)
|
|
159
|
+
* @returns {string[]} Array of valid, non-empty strings
|
|
160
|
+
* @throws {TypeError} If input is not an array
|
|
161
|
+
*
|
|
162
|
+
* @internal
|
|
163
|
+
*
|
|
164
|
+
* @example
|
|
165
|
+
* ```typescript
|
|
166
|
+
* const words = validateStringArray(['word1', '', 123, 'word2'], 'words');
|
|
167
|
+
* // Returns: ['word1', 'word2']
|
|
168
|
+
* // Logs warning: "Skipping non-string item in words: 123"
|
|
169
|
+
* ```
|
|
68
170
|
*/
|
|
69
171
|
function validateStringArray(input, paramName) {
|
|
70
172
|
if (!Array.isArray(input)) {
|
|
@@ -79,17 +181,50 @@ function validateStringArray(input, paramName) {
|
|
|
79
181
|
});
|
|
80
182
|
}
|
|
81
183
|
/**
|
|
82
|
-
* Trie node for efficient
|
|
184
|
+
* Trie (prefix tree) node for efficient pattern matching and word storage.
|
|
185
|
+
*
|
|
186
|
+
* @class TrieNode
|
|
187
|
+
* @description Implements a trie data structure for O(m) time complexity word matching,
|
|
188
|
+
* where m is the length of the word being searched. Each node represents a character
|
|
189
|
+
* in the word, and paths from root to nodes with isEndOfWord=true represent complete words.
|
|
190
|
+
*
|
|
191
|
+
* @internal
|
|
192
|
+
*
|
|
193
|
+
* @example
|
|
194
|
+
* ```typescript
|
|
195
|
+
* const trie = new TrieNode();
|
|
196
|
+
* trie.addWord('bad');
|
|
197
|
+
* trie.addWord('badword');
|
|
198
|
+
* const matches = trie.findMatches('badwords here', 0, false);
|
|
199
|
+
* // Returns matches for 'bad' and 'badword'
|
|
200
|
+
* ```
|
|
83
201
|
*/
|
|
84
202
|
class TrieNode {
|
|
85
203
|
constructor() {
|
|
204
|
+
/** Map of characters to child nodes for fast lookups */
|
|
86
205
|
this.children = new Map();
|
|
206
|
+
/** Flag indicating if this node represents the end of a complete word */
|
|
87
207
|
this.isEndOfWord = false;
|
|
208
|
+
/** The complete word ending at this node (only set when isEndOfWord is true) */
|
|
88
209
|
this.word = "";
|
|
89
210
|
}
|
|
90
211
|
/**
|
|
91
|
-
*
|
|
92
|
-
*
|
|
212
|
+
* Adds a word to the trie structure.
|
|
213
|
+
*
|
|
214
|
+
* @param {string} word - The word to add to the trie
|
|
215
|
+
* @returns {void}
|
|
216
|
+
*
|
|
217
|
+
* @remarks
|
|
218
|
+
* - Time Complexity: O(m) where m is the length of the word
|
|
219
|
+
* - Space Complexity: O(m) in worst case when all characters are new
|
|
220
|
+
* - Supports any Unicode characters
|
|
221
|
+
*
|
|
222
|
+
* @example
|
|
223
|
+
* ```typescript
|
|
224
|
+
* const trie = new TrieNode();
|
|
225
|
+
* trie.addWord('hello');
|
|
226
|
+
* trie.addWord('world');
|
|
227
|
+
* ```
|
|
93
228
|
*/
|
|
94
229
|
addWord(word) {
|
|
95
230
|
let current = this;
|
|
@@ -106,13 +241,36 @@ class TrieNode {
|
|
|
106
241
|
current.word = word;
|
|
107
242
|
}
|
|
108
243
|
/**
|
|
109
|
-
*
|
|
110
|
-
*
|
|
111
|
-
* @
|
|
244
|
+
* Removes a word from the trie structure.
|
|
245
|
+
*
|
|
246
|
+
* @param {string} word - The word to remove from the trie
|
|
247
|
+
* @returns {boolean} True if the word existed and was removed, false if word was not found
|
|
248
|
+
*
|
|
249
|
+
* @remarks
|
|
250
|
+
* - Time Complexity: O(m) where m is the length of the word
|
|
251
|
+
* - Also removes unnecessary nodes to keep the trie optimized
|
|
252
|
+
* - Only removes the word marking; shared prefixes with other words are preserved
|
|
253
|
+
*
|
|
254
|
+
* @example
|
|
255
|
+
* ```typescript
|
|
256
|
+
* const trie = new TrieNode();
|
|
257
|
+
* trie.addWord('hello');
|
|
258
|
+
* trie.removeWord('hello'); // Returns: true
|
|
259
|
+
* trie.removeWord('world'); // Returns: false (word not in trie)
|
|
260
|
+
* ```
|
|
112
261
|
*/
|
|
113
262
|
removeWord(word) {
|
|
114
263
|
return this.removeHelper(word, 0);
|
|
115
264
|
}
|
|
265
|
+
/**
|
|
266
|
+
* Recursive helper method for removing a word from the trie.
|
|
267
|
+
*
|
|
268
|
+
* @param {string} word - The word being removed
|
|
269
|
+
* @param {number} index - Current character index in the word
|
|
270
|
+
* @returns {boolean} True if this node should be deleted (has no children and is not end of another word)
|
|
271
|
+
*
|
|
272
|
+
* @internal
|
|
273
|
+
*/
|
|
116
274
|
removeHelper(word, index) {
|
|
117
275
|
if (index === word.length) {
|
|
118
276
|
if (!this.isEndOfWord)
|
|
@@ -132,11 +290,25 @@ class TrieNode {
|
|
|
132
290
|
return false;
|
|
133
291
|
}
|
|
134
292
|
/**
|
|
135
|
-
*
|
|
136
|
-
*
|
|
137
|
-
* @param
|
|
138
|
-
* @param
|
|
139
|
-
* @
|
|
293
|
+
* Finds all word matches in text starting at a specific position.
|
|
294
|
+
*
|
|
295
|
+
* @param {string} text - The text to search for profanity
|
|
296
|
+
* @param {number} startPos - The starting position (0-based index) in the text
|
|
297
|
+
* @param {boolean} allowPartial - If true, finds partial matches within larger words
|
|
298
|
+
* @returns {Array<{ word: string; start: number; end: number }>} Array of match objects with word and position info
|
|
299
|
+
*
|
|
300
|
+
* @remarks
|
|
301
|
+
* - Time Complexity: O(k) where k is the length of the longest match from startPos
|
|
302
|
+
* - Returns all valid words that can be formed starting from startPos
|
|
303
|
+
* - When allowPartial is false, respects word boundaries
|
|
304
|
+
*
|
|
305
|
+
* @example
|
|
306
|
+
* ```typescript
|
|
307
|
+
* const trie = new TrieNode();
|
|
308
|
+
* trie.addWord('bad');
|
|
309
|
+
* const matches = trie.findMatches('badword', 0, false);
|
|
310
|
+
* // Returns: [{ word: 'bad', start: 0, end: 3 }]
|
|
311
|
+
* ```
|
|
140
312
|
*/
|
|
141
313
|
findMatches(text, startPos, allowPartial) {
|
|
142
314
|
const matches = [];
|
|
@@ -170,7 +342,22 @@ class TrieNode {
|
|
|
170
342
|
return matches;
|
|
171
343
|
}
|
|
172
344
|
/**
|
|
173
|
-
*
|
|
345
|
+
* Clears all words from the trie, resetting it to empty state.
|
|
346
|
+
*
|
|
347
|
+
* @returns {void}
|
|
348
|
+
*
|
|
349
|
+
* @remarks
|
|
350
|
+
* - Time Complexity: O(1) - clears the root node only (JavaScript GC handles children)
|
|
351
|
+
* - Removes all stored words and resets the trie to initial state
|
|
352
|
+
*
|
|
353
|
+
* @example
|
|
354
|
+
* ```typescript
|
|
355
|
+
* const trie = new TrieNode();
|
|
356
|
+
* trie.addWord('hello');
|
|
357
|
+
* trie.addWord('world');
|
|
358
|
+
* trie.clear();
|
|
359
|
+
* // Trie is now empty
|
|
360
|
+
* ```
|
|
174
361
|
*/
|
|
175
362
|
clear() {
|
|
176
363
|
this.children.clear();
|
|
@@ -179,12 +366,139 @@ class TrieNode {
|
|
|
179
366
|
}
|
|
180
367
|
}
|
|
181
368
|
/**
|
|
182
|
-
*
|
|
369
|
+
* AllProfanity - Professional-grade multilingual profanity detection and filtering library.
|
|
370
|
+
*
|
|
371
|
+
* @class AllProfanity
|
|
372
|
+
* @description A comprehensive, high-performance profanity filtering system supporting 9+ languages
|
|
373
|
+
* with advanced features including leet speak detection, context analysis, multiple matching algorithms,
|
|
374
|
+
* and customizable filtering options.
|
|
375
|
+
*
|
|
376
|
+
* @remarks
|
|
377
|
+
* ### Features:
|
|
378
|
+
* - **Multi-language Support**: English, Hindi, French, German, Spanish, Bengali, Tamil, Telugu, Brazilian Portuguese
|
|
379
|
+
* - **Advanced Algorithms**: Trie, Aho-Corasick, Bloom Filter, and hybrid approaches
|
|
380
|
+
* - **Leet Speak Detection**: Automatically normalizes and detects variations like "h3ll0"
|
|
381
|
+
* - **Context Analysis**: Reduces false positives using surrounding word context
|
|
382
|
+
* - **Performance**: Built-in caching and optimized data structures
|
|
383
|
+
* - **Flexible**: Custom dictionaries, whitelisting, severity levels
|
|
384
|
+
*
|
|
385
|
+
* ### Default Behavior:
|
|
386
|
+
* - Loads English and Hindi dictionaries by default
|
|
387
|
+
* - Case-insensitive matching
|
|
388
|
+
* - Leet speak detection enabled
|
|
389
|
+
* - Uses Trie algorithm (fastest for most cases)
|
|
390
|
+
*
|
|
391
|
+
* @example
|
|
392
|
+
* ```typescript
|
|
393
|
+
* // Basic usage with default instance
|
|
394
|
+
* import allProfanity from 'allprofanity';
|
|
395
|
+
*
|
|
396
|
+
* const result = allProfanity.detect("This is some bad text");
|
|
397
|
+
* console.log(result.hasProfanity); // true
|
|
398
|
+
* console.log(result.cleanedText); // "This is some *** text"
|
|
399
|
+
* console.log(result.severity); // ProfanitySeverity.MILD
|
|
400
|
+
* ```
|
|
401
|
+
*
|
|
402
|
+
* @example
|
|
403
|
+
* ```typescript
|
|
404
|
+
* // Advanced usage with custom configuration
|
|
405
|
+
* import { AllProfanity, ProfanitySeverity } from 'allprofanity';
|
|
406
|
+
*
|
|
407
|
+
* const filter = new AllProfanity({
|
|
408
|
+
* languages: ['english', 'french', 'spanish'],
|
|
409
|
+
* enableLeetSpeak: true,
|
|
410
|
+
* strictMode: true,
|
|
411
|
+
* algorithm: {
|
|
412
|
+
* matching: 'hybrid',
|
|
413
|
+
* useBloomFilter: true
|
|
414
|
+
* },
|
|
415
|
+
* performance: {
|
|
416
|
+
* enableCaching: true,
|
|
417
|
+
* cacheSize: 500
|
|
418
|
+
* },
|
|
419
|
+
* whitelistWords: ['class', 'assignment']
|
|
420
|
+
* });
|
|
421
|
+
*
|
|
422
|
+
* const text = "This text has some b@d w0rds";
|
|
423
|
+
* const result = filter.detect(text);
|
|
424
|
+
*
|
|
425
|
+
* if (result.hasProfanity) {
|
|
426
|
+
* console.log(`Found ${result.detectedWords.length} profane words`);
|
|
427
|
+
* console.log(`Severity: ${ProfanitySeverity[result.severity]}`);
|
|
428
|
+
* console.log(`Cleaned: ${result.cleanedText}`);
|
|
429
|
+
* }
|
|
430
|
+
* ```
|
|
431
|
+
*
|
|
432
|
+
* @example
|
|
433
|
+
* ```typescript
|
|
434
|
+
* // Using individual methods
|
|
435
|
+
* const filter = new AllProfanity();
|
|
436
|
+
*
|
|
437
|
+
* // Simple check
|
|
438
|
+
* if (filter.check("some text")) {
|
|
439
|
+
* console.log("Contains profanity!");
|
|
440
|
+
* }
|
|
441
|
+
*
|
|
442
|
+
* // Clean with custom placeholder
|
|
443
|
+
* const cleaned = filter.clean("bad words here", "#");
|
|
444
|
+
*
|
|
445
|
+
* // Load additional languages
|
|
446
|
+
* filter.loadLanguage('german');
|
|
447
|
+
* filter.loadIndianLanguages(); // Loads hindi, bengali, tamil, telugu
|
|
448
|
+
*
|
|
449
|
+
* // Add custom words
|
|
450
|
+
* filter.add(['customword1', 'customword2']);
|
|
451
|
+
*
|
|
452
|
+
* // Remove words
|
|
453
|
+
* filter.remove(['someword']);
|
|
454
|
+
*
|
|
455
|
+
* // Whitelist words
|
|
456
|
+
* filter.addToWhitelist(['class', 'assignment']);
|
|
457
|
+
* ```
|
|
458
|
+
*
|
|
459
|
+
* @see {@link AllProfanityOptions} for all configuration options
|
|
460
|
+
* @see {@link ProfanityDetectionResult} for detection result format
|
|
461
|
+
* @see {@link ProfanitySeverity} for severity levels
|
|
183
462
|
*/
|
|
184
463
|
export class AllProfanity {
|
|
185
464
|
/**
|
|
186
|
-
*
|
|
187
|
-
*
|
|
465
|
+
* Creates a new AllProfanity instance with the specified configuration.
|
|
466
|
+
*
|
|
467
|
+
* @constructor
|
|
468
|
+
* @param {AllProfanityOptions} [options] - Configuration options for profanity detection behavior
|
|
469
|
+
*
|
|
470
|
+
* @remarks
|
|
471
|
+
* ### Default Initialization:
|
|
472
|
+
* - Loads English and Hindi dictionaries automatically
|
|
473
|
+
* - Enables leet speak detection
|
|
474
|
+
* - Case-insensitive matching
|
|
475
|
+
* - Uses Trie algorithm for pattern matching
|
|
476
|
+
*
|
|
477
|
+
* ### Performance Considerations:
|
|
478
|
+
* - Initial load time depends on number of languages loaded
|
|
479
|
+
* - Aho-Corasick automaton (if enabled) is built during construction
|
|
480
|
+
* - Bloom Filter (if enabled) is populated during construction
|
|
481
|
+
*
|
|
482
|
+
* @throws {TypeError} If invalid options are provided
|
|
483
|
+
*
|
|
484
|
+
* @example
|
|
485
|
+
* ```typescript
|
|
486
|
+
* // Default instance
|
|
487
|
+
* const filter = new AllProfanity();
|
|
488
|
+
*
|
|
489
|
+
* // Custom configuration
|
|
490
|
+
* const filter = new AllProfanity({
|
|
491
|
+
* languages: ['english', 'french'],
|
|
492
|
+
* strictMode: true,
|
|
493
|
+
* defaultPlaceholder: '#',
|
|
494
|
+
* algorithm: { matching: 'hybrid' }
|
|
495
|
+
* });
|
|
496
|
+
*
|
|
497
|
+
* // Silent mode (no logging)
|
|
498
|
+
* const filter = new AllProfanity({ silent: true });
|
|
499
|
+
* ```
|
|
500
|
+
*
|
|
501
|
+
* @see {@link AllProfanityOptions} for all available configuration options
|
|
188
502
|
*/
|
|
189
503
|
constructor(options) {
|
|
190
504
|
var _a, _b, _c, _d, _e;
|
|
@@ -272,7 +586,8 @@ export class AllProfanity {
|
|
|
272
586
|
this.contextAnalyzer = null;
|
|
273
587
|
this.matchingAlgorithm = "trie";
|
|
274
588
|
this.resultCache = null;
|
|
275
|
-
|
|
589
|
+
// Use silent logger if silent mode is enabled, otherwise use provided logger or console logger
|
|
590
|
+
this.logger = (options === null || options === void 0 ? void 0 : options.logger) || ((options === null || options === void 0 ? void 0 : options.silent) ? new SilentLogger() : new ConsoleLogger());
|
|
276
591
|
if ((options === null || options === void 0 ? void 0 : options.defaultPlaceholder) !== undefined) {
|
|
277
592
|
this.setPlaceholder(options.defaultPlaceholder);
|
|
278
593
|
}
|
|
@@ -497,9 +812,56 @@ export class AllProfanity {
|
|
|
497
812
|
});
|
|
498
813
|
}
|
|
499
814
|
/**
|
|
500
|
-
*
|
|
501
|
-
*
|
|
502
|
-
* @
|
|
815
|
+
* Detects profanity in the provided text and returns comprehensive analysis.
|
|
816
|
+
*
|
|
817
|
+
* @param {string} text - The text to analyze for profanity
|
|
818
|
+
* @returns {ProfanityDetectionResult} Detailed detection result including matches, positions, severity, and cleaned text
|
|
819
|
+
*
|
|
820
|
+
* @throws {TypeError} If text is not a string
|
|
821
|
+
*
|
|
822
|
+
* @remarks
|
|
823
|
+
* ### Performance:
|
|
824
|
+
* - Time Complexity: O(n*m) where n is text length, m is average word length in dictionary
|
|
825
|
+
* - With Bloom Filter: O(n) average case (faster early rejection)
|
|
826
|
+
* - With Caching: O(1) for repeated identical text
|
|
827
|
+
*
|
|
828
|
+
* ### Features:
|
|
829
|
+
* - Detects leet speak variations (if enabled): "h3ll0" → "hello"
|
|
830
|
+
* - Respects word boundaries (strict mode) or detects partial matches
|
|
831
|
+
* - Returns exact positions for highlighting/masking
|
|
832
|
+
* - Calculates severity based on match count and uniqueness
|
|
833
|
+
*
|
|
834
|
+
* ### Caching:
|
|
835
|
+
* - Results are cached if `performance.enableCaching` is true
|
|
836
|
+
* - Cache uses LRU eviction when size limit is reached
|
|
837
|
+
*
|
|
838
|
+
* @example
|
|
839
|
+
* ```typescript
|
|
840
|
+
* const filter = new AllProfanity();
|
|
841
|
+
* const result = filter.detect("This has bad words");
|
|
842
|
+
*
|
|
843
|
+
* console.log(result.hasProfanity); // true
|
|
844
|
+
* console.log(result.detectedWords); // ['bad']
|
|
845
|
+
* console.log(result.cleanedText); // 'This has *** words'
|
|
846
|
+
* console.log(result.severity); // ProfanitySeverity.MILD
|
|
847
|
+
* console.log(result.positions); // [{ word: 'bad', start: 9, end: 12 }]
|
|
848
|
+
* ```
|
|
849
|
+
*
|
|
850
|
+
* @example
|
|
851
|
+
* ```typescript
|
|
852
|
+
* // With leet speak detection
|
|
853
|
+
* const filter = new AllProfanity({ enableLeetSpeak: true });
|
|
854
|
+
* const result = filter.detect("st0p b3ing b@d");
|
|
855
|
+
*
|
|
856
|
+
* if (result.hasProfanity) {
|
|
857
|
+
* result.positions.forEach(pos => {
|
|
858
|
+
* console.log(`Found "${pos.word}" at position ${pos.start}-${pos.end}`);
|
|
859
|
+
* });
|
|
860
|
+
* }
|
|
861
|
+
* ```
|
|
862
|
+
*
|
|
863
|
+
* @see {@link ProfanityDetectionResult} for result structure
|
|
864
|
+
* @see {@link ProfanitySeverity} for severity levels
|
|
503
865
|
*/
|
|
504
866
|
detect(text) {
|
|
505
867
|
var _a;
|
|
@@ -579,7 +941,9 @@ export class AllProfanity {
|
|
|
579
941
|
// Implement simple LRU by clearing cache when it gets too large
|
|
580
942
|
if (this.resultCache.size > 1000) {
|
|
581
943
|
const firstKey = this.resultCache.keys().next().value;
|
|
582
|
-
|
|
944
|
+
if (firstKey !== undefined) {
|
|
945
|
+
this.resultCache.delete(firstKey);
|
|
946
|
+
}
|
|
583
947
|
}
|
|
584
948
|
}
|
|
585
949
|
return result;
|
|
@@ -636,18 +1000,78 @@ export class AllProfanity {
|
|
|
636
1000
|
return result;
|
|
637
1001
|
}
|
|
638
1002
|
/**
|
|
639
|
-
*
|
|
640
|
-
*
|
|
641
|
-
* @
|
|
1003
|
+
* Quick boolean check for profanity presence in text.
|
|
1004
|
+
*
|
|
1005
|
+
* @param {string} text - The text to check for profanity
|
|
1006
|
+
* @returns {boolean} True if profanity is detected, false otherwise
|
|
1007
|
+
*
|
|
1008
|
+
* @throws {TypeError} If text is not a string
|
|
1009
|
+
*
|
|
1010
|
+
* @remarks
|
|
1011
|
+
* - Convenience method that internally calls `detect()` and returns only the boolean result
|
|
1012
|
+
* - For detailed information about matches, use `detect()` instead
|
|
1013
|
+
* - Results are cached if caching is enabled (same cache as `detect()`)
|
|
1014
|
+
*
|
|
1015
|
+
* @example
|
|
1016
|
+
* ```typescript
|
|
1017
|
+
* const filter = new AllProfanity();
|
|
1018
|
+
*
|
|
1019
|
+
* if (filter.check("This has bad words")) {
|
|
1020
|
+
* console.log("Profanity detected!");
|
|
1021
|
+
* }
|
|
1022
|
+
*
|
|
1023
|
+
* // Quick validation
|
|
1024
|
+
* const isClean = !filter.check(userInput);
|
|
1025
|
+
* ```
|
|
1026
|
+
*
|
|
1027
|
+
* @see {@link detect} for detailed profanity analysis
|
|
642
1028
|
*/
|
|
643
1029
|
check(text) {
|
|
644
1030
|
return this.detect(text).hasProfanity;
|
|
645
1031
|
}
|
|
646
1032
|
/**
|
|
647
|
-
*
|
|
648
|
-
*
|
|
649
|
-
* @param
|
|
650
|
-
* @
|
|
1033
|
+
* Cleans text by replacing profanity with a placeholder character.
|
|
1034
|
+
*
|
|
1035
|
+
* @param {string} text - The text to clean
|
|
1036
|
+
* @param {string} [placeholder] - Optional custom placeholder character (uses default if not provided)
|
|
1037
|
+
* @returns {string} The cleaned text with profanity replaced
|
|
1038
|
+
*
|
|
1039
|
+
* @throws {TypeError} If text is not a string
|
|
1040
|
+
*
|
|
1041
|
+
* @remarks
|
|
1042
|
+
* ### Character-level Replacement:
|
|
1043
|
+
* - Each profane character is replaced individually
|
|
1044
|
+
* - "bad" with placeholder "*" becomes "***"
|
|
1045
|
+
* - Preserves text length and structure
|
|
1046
|
+
*
|
|
1047
|
+
* ### Placeholder Behavior:
|
|
1048
|
+
* - If no placeholder provided, uses the instance's default placeholder
|
|
1049
|
+
* - If placeholder provided, uses only the first character
|
|
1050
|
+
* - Empty placeholder throws error
|
|
1051
|
+
*
|
|
1052
|
+
* @example
|
|
1053
|
+
* ```typescript
|
|
1054
|
+
* const filter = new AllProfanity();
|
|
1055
|
+
*
|
|
1056
|
+
* // Using default placeholder (*)
|
|
1057
|
+
* const cleaned = filter.clean("This has bad words");
|
|
1058
|
+
* console.log(cleaned); // "This has *** *****"
|
|
1059
|
+
*
|
|
1060
|
+
* // Using custom placeholder
|
|
1061
|
+
* const cleaned = filter.clean("This has bad words", "#");
|
|
1062
|
+
* console.log(cleaned); // "This has ### #####"
|
|
1063
|
+
* ```
|
|
1064
|
+
*
|
|
1065
|
+
* @example
|
|
1066
|
+
* ```typescript
|
|
1067
|
+
* // Clean user-generated content for display
|
|
1068
|
+
* const userComment = "Some inappropriate words here";
|
|
1069
|
+
* const safeComment = filter.clean(userComment);
|
|
1070
|
+
* displayComment(safeComment);
|
|
1071
|
+
* ```
|
|
1072
|
+
*
|
|
1073
|
+
* @see {@link cleanWithPlaceholder} for word-level replacement
|
|
1074
|
+
* @see {@link setPlaceholder} to change default placeholder
|
|
651
1075
|
*/
|
|
652
1076
|
clean(text, placeholder) {
|
|
653
1077
|
const detection = this.detect(text);
|
|
@@ -674,10 +1098,46 @@ export class AllProfanity {
|
|
|
674
1098
|
return result;
|
|
675
1099
|
}
|
|
676
1100
|
/**
|
|
677
|
-
*
|
|
678
|
-
*
|
|
679
|
-
* @param
|
|
680
|
-
* @
|
|
1101
|
+
* Cleans text by replacing each profane word with a single placeholder string (word-level replacement).
|
|
1102
|
+
*
|
|
1103
|
+
* @param {string} text - The text to clean
|
|
1104
|
+
* @param {string} [placeholder="***"] - The placeholder string to use for each profane word
|
|
1105
|
+
* @returns {string} The cleaned text with each profane word replaced by the placeholder
|
|
1106
|
+
*
|
|
1107
|
+
* @throws {TypeError} If text is not a string
|
|
1108
|
+
*
|
|
1109
|
+
* @remarks
|
|
1110
|
+
* ### Word-level Replacement:
|
|
1111
|
+
* - Each profane word is replaced with the entire placeholder string (not character-by-character)
|
|
1112
|
+
* - "bad words" with placeholder "***" becomes "*** ***"
|
|
1113
|
+
* - Does NOT preserve original text length
|
|
1114
|
+
*
|
|
1115
|
+
* ### Difference from `clean()`:
|
|
1116
|
+
* - `clean()`: Character-level replacement - "bad" becomes "***" (preserves length)
|
|
1117
|
+
* - `cleanWithPlaceholder()`: Word-level replacement - "bad" becomes "***" (fixed placeholder)
|
|
1118
|
+
*
|
|
1119
|
+
* @example
|
|
1120
|
+
* ```typescript
|
|
1121
|
+
* const filter = new AllProfanity();
|
|
1122
|
+
*
|
|
1123
|
+
* // Default placeholder (***) const text = "This has bad words";
|
|
1124
|
+
* const cleaned = filter.cleanWithPlaceholder(text);
|
|
1125
|
+
* console.log(cleaned); // "This has *** ***"
|
|
1126
|
+
*
|
|
1127
|
+
* // Custom placeholder
|
|
1128
|
+
* const cleaned2 = filter.cleanWithPlaceholder(text, "[CENSORED]");
|
|
1129
|
+
* console.log(cleaned2); // "This has [CENSORED] [CENSORED]"
|
|
1130
|
+
* ```
|
|
1131
|
+
*
|
|
1132
|
+
* @example
|
|
1133
|
+
* ```typescript
|
|
1134
|
+
* // Censoring chat messages
|
|
1135
|
+
* const message = "You are a badword and stupid";
|
|
1136
|
+
* const censored = filter.cleanWithPlaceholder(message, "[***]");
|
|
1137
|
+
* // Result: "You are a [***] and [***]"
|
|
1138
|
+
* ```
|
|
1139
|
+
*
|
|
1140
|
+
* @see {@link clean} for character-level replacement
|
|
681
1141
|
*/
|
|
682
1142
|
cleanWithPlaceholder(text, placeholder = "***") {
|
|
683
1143
|
const detection = this.detect(text);
|
|
@@ -703,8 +1163,51 @@ export class AllProfanity {
|
|
|
703
1163
|
return result;
|
|
704
1164
|
}
|
|
705
1165
|
/**
|
|
706
|
-
*
|
|
707
|
-
*
|
|
1166
|
+
* Dynamically adds one or more words to the profanity filter at runtime.
|
|
1167
|
+
*
|
|
1168
|
+
* @param {string | string[]} word - A single word or array of words to add to the filter
|
|
1169
|
+
* @returns {void}
|
|
1170
|
+
*
|
|
1171
|
+
* @remarks
|
|
1172
|
+
* ### Behavior:
|
|
1173
|
+
* - Words are added to all active data structures (Trie, Aho-Corasick, Bloom Filter)
|
|
1174
|
+
* - Automatically normalizes words based on caseSensitive setting
|
|
1175
|
+
* - Skips whitelisted words
|
|
1176
|
+
* - Validates and filters out non-string or empty values
|
|
1177
|
+
* - Changes take effect immediately for subsequent detect/check/clean calls
|
|
1178
|
+
*
|
|
1179
|
+
* ### Use Cases:
|
|
1180
|
+
* - Adding context-specific profanity
|
|
1181
|
+
* - Building dynamic word lists from user reports
|
|
1182
|
+
* - Customizing filters for specific communities/applications
|
|
1183
|
+
*
|
|
1184
|
+
* @example
|
|
1185
|
+
* ```typescript
|
|
1186
|
+
* const filter = new AllProfanity();
|
|
1187
|
+
*
|
|
1188
|
+
* // Add single word
|
|
1189
|
+
* filter.add('newbadword');
|
|
1190
|
+
*
|
|
1191
|
+
* // Add multiple words
|
|
1192
|
+
* filter.add(['word1', 'word2', 'word3']);
|
|
1193
|
+
*
|
|
1194
|
+
* // Now these words will be detected
|
|
1195
|
+
* filter.check('newbadword'); // true
|
|
1196
|
+
* ```
|
|
1197
|
+
*
|
|
1198
|
+
* @example
|
|
1199
|
+
* ```typescript
|
|
1200
|
+
* // Add game-specific slang dynamically
|
|
1201
|
+
* const filter = new AllProfanity();
|
|
1202
|
+
* const gamingSlang = ['noob', 'trash', 'tryhard'];
|
|
1203
|
+
* filter.add(gamingSlang);
|
|
1204
|
+
*
|
|
1205
|
+
* const message = "You're such a noob";
|
|
1206
|
+
* console.log(filter.check(message)); // true
|
|
1207
|
+
* ```
|
|
1208
|
+
*
|
|
1209
|
+
* @see {@link remove} to remove words
|
|
1210
|
+
* @see {@link loadCustomDictionary} for loading named dictionaries
|
|
708
1211
|
*/
|
|
709
1212
|
add(word) {
|
|
710
1213
|
const words = Array.isArray(word) ? word : [word];
|
|
@@ -715,8 +1218,50 @@ export class AllProfanity {
|
|
|
715
1218
|
}
|
|
716
1219
|
}
|
|
717
1220
|
/**
|
|
718
|
-
*
|
|
719
|
-
*
|
|
1221
|
+
* Dynamically removes one or more words from the profanity filter at runtime.
|
|
1222
|
+
*
|
|
1223
|
+
* @param {string | string[]} word - A single word or array of words to remove from the filter
|
|
1224
|
+
* @returns {void}
|
|
1225
|
+
*
|
|
1226
|
+
* @remarks
|
|
1227
|
+
* ### Behavior:
|
|
1228
|
+
* - Removes words from all active data structures (Trie, dynamic words set)
|
|
1229
|
+
* - Normalizes words based on caseSensitive setting before removal
|
|
1230
|
+
* - Only removes dynamically added words, not words from loaded language dictionaries
|
|
1231
|
+
* - Changes take effect immediately for subsequent detect/check/clean calls
|
|
1232
|
+
*
|
|
1233
|
+
* ### Important Notes:
|
|
1234
|
+
* - Cannot remove words from built-in language dictionaries
|
|
1235
|
+
* - To exclude dictionary words, use `addToWhitelist()` instead
|
|
1236
|
+
* - Validates and filters out non-string or empty values
|
|
1237
|
+
*
|
|
1238
|
+
* @example
|
|
1239
|
+
* ```typescript
|
|
1240
|
+
* const filter = new AllProfanity();
|
|
1241
|
+
*
|
|
1242
|
+
* // Add then remove a word
|
|
1243
|
+
* filter.add('tempword');
|
|
1244
|
+
* filter.check('tempword'); // true
|
|
1245
|
+
*
|
|
1246
|
+
* filter.remove('tempword');
|
|
1247
|
+
* filter.check('tempword'); // false
|
|
1248
|
+
*
|
|
1249
|
+
* // Remove multiple words
|
|
1250
|
+
* filter.remove(['word1', 'word2']);
|
|
1251
|
+
* ```
|
|
1252
|
+
*
|
|
1253
|
+
* @example
|
|
1254
|
+
* ```typescript
|
|
1255
|
+
* // Managing custom word list
|
|
1256
|
+
* const filter = new AllProfanity();
|
|
1257
|
+
* filter.add(['custom1', 'custom2', 'custom3']);
|
|
1258
|
+
*
|
|
1259
|
+
* // Later, remove one that's no longer needed
|
|
1260
|
+
* filter.remove('custom2');
|
|
1261
|
+
* ```
|
|
1262
|
+
*
|
|
1263
|
+
* @see {@link add} to add words
|
|
1264
|
+
* @see {@link addToWhitelist} to exclude dictionary words without removing them
|
|
720
1265
|
*/
|
|
721
1266
|
remove(word) {
|
|
722
1267
|
const words = Array.isArray(word) ? word : [word];
|
|
@@ -759,9 +1304,60 @@ export class AllProfanity {
|
|
|
759
1304
|
return this.whitelistSet.has(normalizedWord);
|
|
760
1305
|
}
|
|
761
1306
|
/**
|
|
762
|
-
*
|
|
763
|
-
*
|
|
764
|
-
* @
|
|
1307
|
+
* Loads a built-in language dictionary into the profanity filter.
|
|
1308
|
+
*
|
|
1309
|
+
* @param {string} language - The language key to load (case-insensitive)
|
|
1310
|
+
* @returns {boolean} True if language was loaded successfully, false if not found or already loaded
|
|
1311
|
+
*
|
|
1312
|
+
* @remarks
|
|
1313
|
+
* ### Available Languages:
|
|
1314
|
+
* - `'english'` - English profanity words
|
|
1315
|
+
* - `'hindi'` - Hindi profanity words
|
|
1316
|
+
* - `'french'` - French profanity words
|
|
1317
|
+
* - `'german'` - German profanity words
|
|
1318
|
+
* - `'spanish'` - Spanish profanity words
|
|
1319
|
+
* - `'bengali'` - Bengali profanity words
|
|
1320
|
+
* - `'tamil'` - Tamil profanity words
|
|
1321
|
+
* - `'telugu'` - Telugu profanity words
|
|
1322
|
+
* - `'brazilian'` - Brazilian Portuguese profanity words
|
|
1323
|
+
*
|
|
1324
|
+
* ### Behavior:
|
|
1325
|
+
* - Language keys are case-insensitive
|
|
1326
|
+
* - Loading is idempotent - calling multiple times for same language is safe
|
|
1327
|
+
* - Returns true if language loaded successfully or was already loaded
|
|
1328
|
+
* - Returns false if language not found
|
|
1329
|
+
* - Logs success/failure messages (unless silent mode enabled)
|
|
1330
|
+
* - Words are added to all active data structures
|
|
1331
|
+
*
|
|
1332
|
+
* ### Default Languages:
|
|
1333
|
+
* English and Hindi are loaded automatically in the constructor
|
|
1334
|
+
*
|
|
1335
|
+
* @example
|
|
1336
|
+
* ```typescript
|
|
1337
|
+
* const filter = new AllProfanity();
|
|
1338
|
+
*
|
|
1339
|
+
* // Load additional languages
|
|
1340
|
+
* filter.loadLanguage('french');
|
|
1341
|
+
* filter.loadLanguage('spanish');
|
|
1342
|
+
*
|
|
1343
|
+
* // Case-insensitive
|
|
1344
|
+
* filter.loadLanguage('GERMAN'); // Works
|
|
1345
|
+
*
|
|
1346
|
+
* // Check if loaded
|
|
1347
|
+
* console.log(filter.getLoadedLanguages()); // ['english', 'hindi', 'french', 'spanish', 'german']
|
|
1348
|
+
* ```
|
|
1349
|
+
*
|
|
1350
|
+
* @example
|
|
1351
|
+
* ```typescript
|
|
1352
|
+
* // Load all Indian languages at once
|
|
1353
|
+
* const filter = new AllProfanity();
|
|
1354
|
+
* filter.loadIndianLanguages();
|
|
1355
|
+
* ```
|
|
1356
|
+
*
|
|
1357
|
+
* @see {@link loadLanguages} to load multiple languages at once
|
|
1358
|
+
* @see {@link loadIndianLanguages} for convenience method
|
|
1359
|
+
* @see {@link getAvailableLanguages} to see all available languages
|
|
1360
|
+
* @see {@link getLoadedLanguages} to see currently loaded languages
|
|
765
1361
|
*/
|
|
766
1362
|
loadLanguage(language) {
|
|
767
1363
|
if (!language || typeof language !== "string") {
|
|
@@ -813,9 +1409,64 @@ export class AllProfanity {
|
|
|
813
1409
|
return this.loadLanguages(indianLanguages);
|
|
814
1410
|
}
|
|
815
1411
|
/**
|
|
816
|
-
*
|
|
817
|
-
*
|
|
818
|
-
* @param
|
|
1412
|
+
* Loads a custom dictionary of profane words with a specific name.
|
|
1413
|
+
*
|
|
1414
|
+
* @param {string} name - Unique name/identifier for this custom dictionary
|
|
1415
|
+
* @param {string[]} words - Array of profane words to add to the dictionary
|
|
1416
|
+
* @returns {void}
|
|
1417
|
+
*
|
|
1418
|
+
* @throws {TypeError} If name is not a string or words is not an array
|
|
1419
|
+
*
|
|
1420
|
+
* @remarks
|
|
1421
|
+
* ### Behavior:
|
|
1422
|
+
* - Creates a new named dictionary or overwrites existing one with same name
|
|
1423
|
+
* - Validates and filters out non-string and empty values from words array
|
|
1424
|
+
* - Words are added to all active data structures (Trie, Aho-Corasick, Bloom Filter)
|
|
1425
|
+
* - Dictionary name is converted to lowercase for storage
|
|
1426
|
+
* - Logs count of loaded words (unless silent mode enabled)
|
|
1427
|
+
*
|
|
1428
|
+
* ### Use Cases:
|
|
1429
|
+
* - Domain-specific profanity (gaming, medical, legal, etc.)
|
|
1430
|
+
* - Organization-specific word lists
|
|
1431
|
+
* - Temporary or context-dependent filters
|
|
1432
|
+
* - Testing and development
|
|
1433
|
+
*
|
|
1434
|
+
* @example
|
|
1435
|
+
* ```typescript
|
|
1436
|
+
* const filter = new AllProfanity();
|
|
1437
|
+
*
|
|
1438
|
+
* // Load gaming-specific slang
|
|
1439
|
+
* filter.loadCustomDictionary('gaming', [
|
|
1440
|
+
* 'noob',
|
|
1441
|
+
* 'scrub',
|
|
1442
|
+
* 'tryhard',
|
|
1443
|
+
* 'trash'
|
|
1444
|
+
* ]);
|
|
1445
|
+
*
|
|
1446
|
+
* // Load company-specific terms
|
|
1447
|
+
* filter.loadCustomDictionary('company', [
|
|
1448
|
+
* 'competitor1',
|
|
1449
|
+
* 'bannedTerm1',
|
|
1450
|
+
* 'inappropriateJargon'
|
|
1451
|
+
* ]);
|
|
1452
|
+
*
|
|
1453
|
+
* console.log(filter.check('You are such a noob')); // true
|
|
1454
|
+
* ```
|
|
1455
|
+
*
|
|
1456
|
+
* @example
|
|
1457
|
+
* ```typescript
|
|
1458
|
+
* // Load from external source
|
|
1459
|
+
* const filter = new AllProfanity();
|
|
1460
|
+
*
|
|
1461
|
+
* async function loadExternalDictionary() {
|
|
1462
|
+
* const response = await fetch('https://example.com/custom-words.json');
|
|
1463
|
+
* const customWords = await response.json();
|
|
1464
|
+
* filter.loadCustomDictionary('external', customWords);
|
|
1465
|
+
* }
|
|
1466
|
+
* ```
|
|
1467
|
+
*
|
|
1468
|
+
* @see {@link add} for adding individual words dynamically
|
|
1469
|
+
* @see {@link loadLanguage} for loading built-in language dictionaries
|
|
819
1470
|
*/
|
|
820
1471
|
loadCustomDictionary(name, words) {
|
|
821
1472
|
validateString(name, "dictionary name");
|