allprofanity 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTORS.md +106 -0
- package/README.md +361 -26
- package/allprofanity.config.example.json +35 -0
- package/bin/init.js +49 -0
- package/config.schema.json +163 -0
- package/dist/algos/aho-corasick.d.ts +75 -0
- package/dist/algos/aho-corasick.js +238 -0
- package/dist/algos/aho-corasick.js.map +1 -0
- package/dist/algos/bloom-filter.d.ts +103 -0
- package/dist/algos/bloom-filter.js +208 -0
- package/dist/algos/bloom-filter.js.map +1 -0
- package/dist/algos/context-patterns.d.ts +88 -0
- package/dist/algos/context-patterns.js +298 -0
- package/dist/algos/context-patterns.js.map +1 -0
- package/dist/index.d.ts +161 -35
- package/dist/index.js +353 -82
- package/dist/index.js.map +1 -1
- package/dist/languages/brazilian-words.d.ts +7 -0
- package/dist/languages/brazilian-words.js +207 -0
- package/dist/languages/brazilian-words.js.map +1 -0
- package/package.json +23 -7
package/dist/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
//
|
|
1
|
+
// Language dictionaries imports
|
|
2
2
|
import englishBadWords from "./languages/english-words.js";
|
|
3
3
|
import hindiBadWords from "./languages/hindi-words.js";
|
|
4
4
|
import frenchBadWords from "./languages/french-words.js";
|
|
@@ -7,6 +7,11 @@ import spanishBadWords from "./languages/spanish-words.js";
|
|
|
7
7
|
import bengaliBadWords from "./languages/bengali-words.js";
|
|
8
8
|
import tamilBadWords from "./languages/tamil-words.js";
|
|
9
9
|
import teluguBadWords from "./languages/telugu-words.js";
|
|
10
|
+
import brazilianBadWords from "./languages/brazilian-words.js";
|
|
11
|
+
// Advanced algorithm imports
|
|
12
|
+
import { AhoCorasick } from "./algos/aho-corasick.js";
|
|
13
|
+
import { BloomFilter } from "./algos/bloom-filter.js";
|
|
14
|
+
import { ContextAnalyzer } from "./algos/context-patterns.js";
|
|
10
15
|
// Export language dictionaries for direct access
|
|
11
16
|
export { default as englishBadWords } from "./languages/english-words.js";
|
|
12
17
|
export { default as hindiBadWords } from "./languages/hindi-words.js";
|
|
@@ -16,8 +21,9 @@ export { default as spanishBadWords } from "./languages/spanish-words.js";
|
|
|
16
21
|
export { default as bengaliBadWords } from "./languages/bengali-words.js";
|
|
17
22
|
export { default as tamilBadWords } from "./languages/tamil-words.js";
|
|
18
23
|
export { default as teluguBadWords } from "./languages/telugu-words.js";
|
|
24
|
+
export { default as brazilianBadWords } from "./languages/brazilian-words.js";
|
|
19
25
|
/**
|
|
20
|
-
* Default console logger implementation
|
|
26
|
+
* Default console logger implementation.
|
|
21
27
|
*/
|
|
22
28
|
class ConsoleLogger {
|
|
23
29
|
info(message) {
|
|
@@ -31,7 +37,7 @@ class ConsoleLogger {
|
|
|
31
37
|
}
|
|
32
38
|
}
|
|
33
39
|
/**
|
|
34
|
-
* Severity levels for profanity detection
|
|
40
|
+
* Severity levels for profanity detection.
|
|
35
41
|
*/
|
|
36
42
|
export var ProfanitySeverity;
|
|
37
43
|
(function (ProfanitySeverity) {
|
|
@@ -41,7 +47,11 @@ export var ProfanitySeverity;
|
|
|
41
47
|
ProfanitySeverity[ProfanitySeverity["EXTREME"] = 4] = "EXTREME";
|
|
42
48
|
})(ProfanitySeverity = ProfanitySeverity || (ProfanitySeverity = {}));
|
|
43
49
|
/**
|
|
44
|
-
*
|
|
50
|
+
* Validate a string parameter.
|
|
51
|
+
* @param input - The input to validate.
|
|
52
|
+
* @param paramName - The name of the parameter.
|
|
53
|
+
* @returns The validated string.
|
|
54
|
+
* @throws {TypeError} If input is not a string.
|
|
45
55
|
*/
|
|
46
56
|
function validateString(input, paramName) {
|
|
47
57
|
if (typeof input !== "string") {
|
|
@@ -49,6 +59,13 @@ function validateString(input, paramName) {
|
|
|
49
59
|
}
|
|
50
60
|
return input;
|
|
51
61
|
}
|
|
62
|
+
/**
|
|
63
|
+
* Validate a string array parameter.
|
|
64
|
+
* @param input - The input to validate.
|
|
65
|
+
* @param paramName - The name of the parameter.
|
|
66
|
+
* @returns The validated string array.
|
|
67
|
+
* @throws {TypeError} If input is not an array.
|
|
68
|
+
*/
|
|
52
69
|
function validateStringArray(input, paramName) {
|
|
53
70
|
if (!Array.isArray(input)) {
|
|
54
71
|
throw new TypeError(`${paramName} must be an array`);
|
|
@@ -62,7 +79,7 @@ function validateStringArray(input, paramName) {
|
|
|
62
79
|
});
|
|
63
80
|
}
|
|
64
81
|
/**
|
|
65
|
-
*
|
|
82
|
+
* Trie node for efficient string matching.
|
|
66
83
|
*/
|
|
67
84
|
class TrieNode {
|
|
68
85
|
constructor() {
|
|
@@ -71,7 +88,8 @@ class TrieNode {
|
|
|
71
88
|
this.word = "";
|
|
72
89
|
}
|
|
73
90
|
/**
|
|
74
|
-
* Add a word to the trie
|
|
91
|
+
* Add a word to the trie.
|
|
92
|
+
* @param word - The word to add.
|
|
75
93
|
*/
|
|
76
94
|
addWord(word) {
|
|
77
95
|
let current = this;
|
|
@@ -88,7 +106,9 @@ class TrieNode {
|
|
|
88
106
|
current.word = word;
|
|
89
107
|
}
|
|
90
108
|
/**
|
|
91
|
-
* Remove a word from the trie
|
|
109
|
+
* Remove a word from the trie.
|
|
110
|
+
* @param word - The word to remove.
|
|
111
|
+
* @returns True if the word was removed, false otherwise.
|
|
92
112
|
*/
|
|
93
113
|
removeWord(word) {
|
|
94
114
|
return this.removeHelper(word, 0);
|
|
@@ -112,7 +132,11 @@ class TrieNode {
|
|
|
112
132
|
return false;
|
|
113
133
|
}
|
|
114
134
|
/**
|
|
115
|
-
* Find all matches starting at a given position
|
|
135
|
+
* Find all matches starting at a given position.
|
|
136
|
+
* @param text - The text to search.
|
|
137
|
+
* @param startPos - The start position.
|
|
138
|
+
* @param allowPartial - Whether to allow partial word matches.
|
|
139
|
+
* @returns Array of matches.
|
|
116
140
|
*/
|
|
117
141
|
findMatches(text, startPos, allowPartial) {
|
|
118
142
|
const matches = [];
|
|
@@ -146,7 +170,7 @@ class TrieNode {
|
|
|
146
170
|
return matches;
|
|
147
171
|
}
|
|
148
172
|
/**
|
|
149
|
-
* Clear all words from the trie
|
|
173
|
+
* Clear all words from the trie.
|
|
150
174
|
*/
|
|
151
175
|
clear() {
|
|
152
176
|
this.children.clear();
|
|
@@ -155,22 +179,23 @@ class TrieNode {
|
|
|
155
179
|
}
|
|
156
180
|
}
|
|
157
181
|
/**
|
|
158
|
-
*
|
|
159
|
-
* Addresses all critical issues from the original implementation
|
|
182
|
+
* Main class for profanity detection and filtering.
|
|
160
183
|
*/
|
|
161
184
|
export class AllProfanity {
|
|
185
|
+
/**
|
|
186
|
+
* Create an AllProfanity instance.
|
|
187
|
+
* @param options - Profanity filter configuration options.
|
|
188
|
+
*/
|
|
162
189
|
constructor(options) {
|
|
163
190
|
var _a, _b, _c, _d, _e;
|
|
164
191
|
this.profanityTrie = new TrieNode();
|
|
165
192
|
this.whitelistSet = new Set();
|
|
166
193
|
this.loadedLanguages = new Set();
|
|
167
|
-
// Configuration
|
|
168
194
|
this.defaultPlaceholder = "*";
|
|
169
195
|
this.enableLeetSpeak = true;
|
|
170
196
|
this.caseSensitive = false;
|
|
171
197
|
this.strictMode = false;
|
|
172
198
|
this.detectPartialWords = false;
|
|
173
|
-
// Available language dictionaries
|
|
174
199
|
this.availableLanguages = {
|
|
175
200
|
english: englishBadWords || [],
|
|
176
201
|
hindi: hindiBadWords || [],
|
|
@@ -180,8 +205,8 @@ export class AllProfanity {
|
|
|
180
205
|
bengali: bengaliBadWords || [],
|
|
181
206
|
tamil: tamilBadWords || [],
|
|
182
207
|
telugu: teluguBadWords || [],
|
|
208
|
+
brazilian: brazilianBadWords || [],
|
|
183
209
|
};
|
|
184
|
-
// Fixed leet speak mappings
|
|
185
210
|
this.leetMappings = new Map([
|
|
186
211
|
["@", "a"],
|
|
187
212
|
["^", "a"],
|
|
@@ -240,10 +265,14 @@ export class AllProfanity {
|
|
|
240
265
|
["2", "z"],
|
|
241
266
|
["7_", "z"],
|
|
242
267
|
]);
|
|
243
|
-
// Dynamic words added at runtime
|
|
244
268
|
this.dynamicWords = new Set();
|
|
269
|
+
// Advanced algorithms
|
|
270
|
+
this.ahoCorasickAutomaton = null;
|
|
271
|
+
this.bloomFilter = null;
|
|
272
|
+
this.contextAnalyzer = null;
|
|
273
|
+
this.matchingAlgorithm = "trie";
|
|
274
|
+
this.resultCache = null;
|
|
245
275
|
this.logger = (options === null || options === void 0 ? void 0 : options.logger) || new ConsoleLogger();
|
|
246
|
-
// Validate and set configuration
|
|
247
276
|
if ((options === null || options === void 0 ? void 0 : options.defaultPlaceholder) !== undefined) {
|
|
248
277
|
this.setPlaceholder(options.defaultPlaceholder);
|
|
249
278
|
}
|
|
@@ -251,18 +280,17 @@ export class AllProfanity {
|
|
|
251
280
|
this.caseSensitive = (_b = options === null || options === void 0 ? void 0 : options.caseSensitive) !== null && _b !== void 0 ? _b : false;
|
|
252
281
|
this.strictMode = (_c = options === null || options === void 0 ? void 0 : options.strictMode) !== null && _c !== void 0 ? _c : false;
|
|
253
282
|
this.detectPartialWords = (_d = options === null || options === void 0 ? void 0 : options.detectPartialWords) !== null && _d !== void 0 ? _d : false;
|
|
254
|
-
// Load whitelist
|
|
255
283
|
if (options === null || options === void 0 ? void 0 : options.whitelistWords) {
|
|
256
284
|
this.addToWhitelist(options.whitelistWords);
|
|
257
285
|
}
|
|
258
|
-
//
|
|
286
|
+
// Initialize advanced algorithms BEFORE loading dictionaries
|
|
287
|
+
// so that words can be added to all data structures
|
|
288
|
+
this.initializeAdvancedAlgorithms(options);
|
|
259
289
|
this.loadLanguage("english");
|
|
260
290
|
this.loadLanguage("hindi");
|
|
261
|
-
// Load additional languages
|
|
262
291
|
if ((_e = options === null || options === void 0 ? void 0 : options.languages) === null || _e === void 0 ? void 0 : _e.length) {
|
|
263
292
|
options.languages.forEach((lang) => this.loadLanguage(lang));
|
|
264
293
|
}
|
|
265
|
-
// Load custom dictionaries
|
|
266
294
|
if (options === null || options === void 0 ? void 0 : options.customDictionaries) {
|
|
267
295
|
Object.entries(options.customDictionaries).forEach(([name, words]) => {
|
|
268
296
|
this.loadCustomDictionary(name, words);
|
|
@@ -270,7 +298,55 @@ export class AllProfanity {
|
|
|
270
298
|
}
|
|
271
299
|
}
|
|
272
300
|
/**
|
|
273
|
-
*
|
|
301
|
+
* Initialize advanced algorithms based on configuration
|
|
302
|
+
*/
|
|
303
|
+
initializeAdvancedAlgorithms(options) {
|
|
304
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m;
|
|
305
|
+
// Set matching algorithm
|
|
306
|
+
if ((_a = options === null || options === void 0 ? void 0 : options.algorithm) === null || _a === void 0 ? void 0 : _a.matching) {
|
|
307
|
+
this.matchingAlgorithm = options.algorithm.matching;
|
|
308
|
+
}
|
|
309
|
+
// Initialize Bloom Filter if enabled
|
|
310
|
+
const bloomEnabled = ((_b = options === null || options === void 0 ? void 0 : options.algorithm) === null || _b === void 0 ? void 0 : _b.useBloomFilter) ||
|
|
311
|
+
((_c = options === null || options === void 0 ? void 0 : options.bloomFilter) === null || _c === void 0 ? void 0 : _c.enabled) ||
|
|
312
|
+
this.matchingAlgorithm === "hybrid";
|
|
313
|
+
if (bloomEnabled) {
|
|
314
|
+
const expectedItems = ((_d = options === null || options === void 0 ? void 0 : options.bloomFilter) === null || _d === void 0 ? void 0 : _d.expectedItems) || 10000;
|
|
315
|
+
const falsePositiveRate = ((_e = options === null || options === void 0 ? void 0 : options.bloomFilter) === null || _e === void 0 ? void 0 : _e.falsePositiveRate) || 0.01;
|
|
316
|
+
this.bloomFilter = new BloomFilter(expectedItems, falsePositiveRate);
|
|
317
|
+
this.logger.info(`Bloom Filter initialized with ${expectedItems} expected items and ${(falsePositiveRate * 100).toFixed(2)}% false positive rate`);
|
|
318
|
+
}
|
|
319
|
+
// Initialize Aho-Corasick if enabled
|
|
320
|
+
const ahoEnabled = ((_f = options === null || options === void 0 ? void 0 : options.algorithm) === null || _f === void 0 ? void 0 : _f.useAhoCorasick) ||
|
|
321
|
+
((_g = options === null || options === void 0 ? void 0 : options.ahoCorasick) === null || _g === void 0 ? void 0 : _g.enabled) ||
|
|
322
|
+
this.matchingAlgorithm === "aho-corasick" ||
|
|
323
|
+
this.matchingAlgorithm === "hybrid";
|
|
324
|
+
if (ahoEnabled) {
|
|
325
|
+
this.ahoCorasickAutomaton = new AhoCorasick([]);
|
|
326
|
+
this.logger.info("Aho-Corasick automaton initialized");
|
|
327
|
+
}
|
|
328
|
+
// Initialize Context Analyzer if enabled
|
|
329
|
+
const contextEnabled = ((_h = options === null || options === void 0 ? void 0 : options.algorithm) === null || _h === void 0 ? void 0 : _h.useContextAnalysis) ||
|
|
330
|
+
((_j = options === null || options === void 0 ? void 0 : options.contextAnalysis) === null || _j === void 0 ? void 0 : _j.enabled);
|
|
331
|
+
if (contextEnabled) {
|
|
332
|
+
const contextLanguages = ((_k = options === null || options === void 0 ? void 0 : options.contextAnalysis) === null || _k === void 0 ? void 0 : _k.languages) || ["en"];
|
|
333
|
+
this.contextAnalyzer = new ContextAnalyzer(contextLanguages);
|
|
334
|
+
if ((_l = options === null || options === void 0 ? void 0 : options.contextAnalysis) === null || _l === void 0 ? void 0 : _l.contextWindow) {
|
|
335
|
+
this.contextAnalyzer.setContextWindow(options.contextAnalysis.contextWindow);
|
|
336
|
+
}
|
|
337
|
+
this.logger.info(`Context Analyzer initialized for languages: ${contextLanguages.join(", ")}`);
|
|
338
|
+
}
|
|
339
|
+
// Initialize result cache if enabled
|
|
340
|
+
if ((_m = options === null || options === void 0 ? void 0 : options.performance) === null || _m === void 0 ? void 0 : _m.enableCaching) {
|
|
341
|
+
const cacheSize = options.performance.cacheSize || 1000;
|
|
342
|
+
this.resultCache = new Map();
|
|
343
|
+
this.logger.info(`Result caching enabled with size limit: ${cacheSize}`);
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
/**
|
|
347
|
+
* Normalize leet speak to regular characters.
|
|
348
|
+
* @param text - The input text.
|
|
349
|
+
* @returns Normalized text.
|
|
274
350
|
*/
|
|
275
351
|
normalizeLeetSpeak(text) {
|
|
276
352
|
if (!this.enableLeetSpeak)
|
|
@@ -284,13 +360,19 @@ export class AllProfanity {
|
|
|
284
360
|
return normalized;
|
|
285
361
|
}
|
|
286
362
|
/**
|
|
287
|
-
*
|
|
363
|
+
* Escape regex special characters in a string.
|
|
364
|
+
* @param str - The string to escape.
|
|
365
|
+
* @returns The escaped string.
|
|
288
366
|
*/
|
|
289
367
|
escapeRegex(str) {
|
|
290
368
|
return str.replace(/[\\^$.*+?()[\]{}|]/g, "\\$&");
|
|
291
369
|
}
|
|
292
370
|
/**
|
|
293
|
-
* Check if a
|
|
371
|
+
* Check if a match is bounded by word boundaries (strict mode).
|
|
372
|
+
* @param text - The text.
|
|
373
|
+
* @param start - Start index.
|
|
374
|
+
* @param end - End index.
|
|
375
|
+
* @returns True if match is at word boundaries, false otherwise.
|
|
294
376
|
*/
|
|
295
377
|
hasWordBoundaries(text, start, end) {
|
|
296
378
|
if (!this.strictMode)
|
|
@@ -301,27 +383,24 @@ export class AllProfanity {
|
|
|
301
383
|
return (wordBoundaryRegex.test(beforeChar) && wordBoundaryRegex.test(afterChar));
|
|
302
384
|
}
|
|
303
385
|
/**
|
|
304
|
-
*
|
|
386
|
+
* Determine if a match is a whole word.
|
|
387
|
+
* @param text - The text.
|
|
388
|
+
* @param start - Start index.
|
|
389
|
+
* @param end - End index.
|
|
390
|
+
* @returns True if whole word, false otherwise.
|
|
305
391
|
*/
|
|
306
392
|
isWholeWord(text, start, end) {
|
|
307
|
-
|
|
308
|
-
if (start === 0) {
|
|
309
|
-
// ok
|
|
310
|
-
}
|
|
311
|
-
else if (/\w/.test(text[start - 1])) {
|
|
393
|
+
if (start !== 0 && /\w/.test(text[start - 1]))
|
|
312
394
|
return false;
|
|
313
|
-
|
|
314
|
-
// Check right boundary
|
|
315
|
-
if (end === text.length) {
|
|
316
|
-
// ok
|
|
317
|
-
}
|
|
318
|
-
else if (/\w/.test(text[end])) {
|
|
395
|
+
if (end !== text.length && /\w/.test(text[end]))
|
|
319
396
|
return false;
|
|
320
|
-
}
|
|
321
397
|
return true;
|
|
322
398
|
}
|
|
323
399
|
/**
|
|
324
|
-
* Check if a match is whitelisted
|
|
400
|
+
* Check if a match is whitelisted.
|
|
401
|
+
* @param word - Word from dictionary.
|
|
402
|
+
* @param matchedText - Actual matched text.
|
|
403
|
+
* @returns True if whitelisted, false otherwise.
|
|
325
404
|
*/
|
|
326
405
|
isWhitelistedMatch(word, matchedText) {
|
|
327
406
|
if (this.caseSensitive) {
|
|
@@ -333,7 +412,9 @@ export class AllProfanity {
|
|
|
333
412
|
}
|
|
334
413
|
}
|
|
335
414
|
/**
|
|
336
|
-
* Remove overlapping matches,
|
|
415
|
+
* Remove overlapping matches, keeping only the longest at each start position.
|
|
416
|
+
* @param matches - Array of match results.
|
|
417
|
+
* @returns Deduplicated matches.
|
|
337
418
|
*/
|
|
338
419
|
deduplicateMatches(matches) {
|
|
339
420
|
const sorted = [...matches].sort((a, b) => {
|
|
@@ -352,9 +433,76 @@ export class AllProfanity {
|
|
|
352
433
|
return result;
|
|
353
434
|
}
|
|
354
435
|
/**
|
|
355
|
-
*
|
|
436
|
+
* Use Aho-Corasick algorithm for pattern matching
|
|
437
|
+
*/
|
|
438
|
+
findMatchesWithAhoCorasick(searchText, originalText) {
|
|
439
|
+
if (!this.ahoCorasickAutomaton) {
|
|
440
|
+
return [];
|
|
441
|
+
}
|
|
442
|
+
const ahoMatches = this.ahoCorasickAutomaton.findAll(searchText);
|
|
443
|
+
const results = [];
|
|
444
|
+
for (const match of ahoMatches) {
|
|
445
|
+
if (!this.detectPartialWords &&
|
|
446
|
+
!this.isWholeWord(originalText, match.start, match.end)) {
|
|
447
|
+
continue;
|
|
448
|
+
}
|
|
449
|
+
const matchedText = originalText.substring(match.start, match.end);
|
|
450
|
+
if (this.isWhitelistedMatch(match.pattern, matchedText)) {
|
|
451
|
+
continue;
|
|
452
|
+
}
|
|
453
|
+
if (this.hasWordBoundaries(originalText, match.start, match.end)) {
|
|
454
|
+
results.push({
|
|
455
|
+
word: match.pattern,
|
|
456
|
+
start: match.start,
|
|
457
|
+
end: match.end,
|
|
458
|
+
originalWord: matchedText,
|
|
459
|
+
});
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
return results;
|
|
463
|
+
}
|
|
464
|
+
/**
|
|
465
|
+
* Hybrid approach: Aho-Corasick for fast matching, Bloom Filter for validation
|
|
466
|
+
*/
|
|
467
|
+
findMatchesHybrid(searchText, originalText) {
|
|
468
|
+
// Use Aho-Corasick for primary matching if available
|
|
469
|
+
if (this.ahoCorasickAutomaton) {
|
|
470
|
+
const matches = this.findMatchesWithAhoCorasick(searchText, originalText);
|
|
471
|
+
// If Bloom Filter is enabled, validate matches
|
|
472
|
+
if (this.bloomFilter) {
|
|
473
|
+
return matches.filter((match) => this.bloomFilter.mightContain(match.word));
|
|
474
|
+
}
|
|
475
|
+
return matches;
|
|
476
|
+
}
|
|
477
|
+
// Fallback to Trie if Aho-Corasick not available
|
|
478
|
+
const matches = [];
|
|
479
|
+
this.findMatches(searchText, originalText, matches);
|
|
480
|
+
// Validate with Bloom Filter if enabled
|
|
481
|
+
if (this.bloomFilter) {
|
|
482
|
+
return matches.filter((match) => this.bloomFilter.mightContain(match.word));
|
|
483
|
+
}
|
|
484
|
+
return matches;
|
|
485
|
+
}
|
|
486
|
+
/**
|
|
487
|
+
* Apply context analysis to filter false positives
|
|
488
|
+
*/
|
|
489
|
+
applyContextAnalysis(text, matches, scoreThreshold = 0.5) {
|
|
490
|
+
if (!this.contextAnalyzer) {
|
|
491
|
+
return matches;
|
|
492
|
+
}
|
|
493
|
+
return matches.filter((match) => {
|
|
494
|
+
const analysis = this.contextAnalyzer.analyzeContext(text, match.start, match.end, match.word);
|
|
495
|
+
// If score is above threshold, it's likely profanity
|
|
496
|
+
return analysis.score >= scoreThreshold;
|
|
497
|
+
});
|
|
498
|
+
}
|
|
499
|
+
/**
|
|
500
|
+
* Detect profanity in a given text.
|
|
501
|
+
* @param text - The text to check.
|
|
502
|
+
* @returns Profanity detection result.
|
|
356
503
|
*/
|
|
357
504
|
detect(text) {
|
|
505
|
+
var _a;
|
|
358
506
|
const validatedText = validateString(text, "text");
|
|
359
507
|
if (validatedText.length === 0) {
|
|
360
508
|
return {
|
|
@@ -365,23 +513,56 @@ export class AllProfanity {
|
|
|
365
513
|
positions: [],
|
|
366
514
|
};
|
|
367
515
|
}
|
|
368
|
-
|
|
516
|
+
// Check cache first if enabled
|
|
517
|
+
if ((_a = this.resultCache) === null || _a === void 0 ? void 0 : _a.has(validatedText)) {
|
|
518
|
+
return this.resultCache.get(validatedText);
|
|
519
|
+
}
|
|
520
|
+
let matches = [];
|
|
369
521
|
const normalizedText = this.caseSensitive
|
|
370
522
|
? validatedText
|
|
371
523
|
: validatedText.toLowerCase();
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
524
|
+
// Choose matching algorithm based on configuration
|
|
525
|
+
switch (this.matchingAlgorithm) {
|
|
526
|
+
case "aho-corasick":
|
|
527
|
+
matches = this.findMatchesWithAhoCorasick(normalizedText, validatedText);
|
|
528
|
+
if (this.enableLeetSpeak) {
|
|
529
|
+
const leetNormalized = this.normalizeLeetSpeak(normalizedText);
|
|
530
|
+
if (leetNormalized !== normalizedText) {
|
|
531
|
+
const leetMatches = this.findMatchesWithAhoCorasick(leetNormalized, validatedText);
|
|
532
|
+
matches.push(...leetMatches);
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
break;
|
|
536
|
+
case "hybrid":
|
|
537
|
+
matches = this.findMatchesHybrid(normalizedText, validatedText);
|
|
538
|
+
if (this.enableLeetSpeak) {
|
|
539
|
+
const leetNormalized = this.normalizeLeetSpeak(normalizedText);
|
|
540
|
+
if (leetNormalized !== normalizedText) {
|
|
541
|
+
const leetMatches = this.findMatchesHybrid(leetNormalized, validatedText);
|
|
542
|
+
matches.push(...leetMatches);
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
break;
|
|
546
|
+
case "trie":
|
|
547
|
+
default:
|
|
548
|
+
this.findMatches(normalizedText, validatedText, matches);
|
|
549
|
+
if (this.enableLeetSpeak) {
|
|
550
|
+
const leetNormalized = this.normalizeLeetSpeak(normalizedText);
|
|
551
|
+
if (leetNormalized !== normalizedText) {
|
|
552
|
+
this.findMatches(leetNormalized, validatedText, matches);
|
|
553
|
+
}
|
|
554
|
+
}
|
|
555
|
+
break;
|
|
556
|
+
}
|
|
557
|
+
// Apply context analysis if enabled
|
|
558
|
+
if (this.contextAnalyzer) {
|
|
559
|
+
matches = this.applyContextAnalysis(validatedText, matches);
|
|
379
560
|
}
|
|
380
561
|
const uniqueMatches = this.deduplicateMatches(matches);
|
|
381
562
|
const detectedWords = uniqueMatches.map((m) => m.originalWord);
|
|
382
563
|
const severity = this.calculateSeverity(uniqueMatches);
|
|
383
564
|
const cleanedText = this.generateCleanedText(validatedText, uniqueMatches);
|
|
384
|
-
|
|
565
|
+
const result = {
|
|
385
566
|
hasProfanity: uniqueMatches.length > 0,
|
|
386
567
|
detectedWords,
|
|
387
568
|
cleanedText,
|
|
@@ -392,9 +573,22 @@ export class AllProfanity {
|
|
|
392
573
|
end: m.end,
|
|
393
574
|
})),
|
|
394
575
|
};
|
|
576
|
+
// Cache result if caching is enabled
|
|
577
|
+
if (this.resultCache) {
|
|
578
|
+
this.resultCache.set(validatedText, result);
|
|
579
|
+
// Implement simple LRU by clearing cache when it gets too large
|
|
580
|
+
if (this.resultCache.size > 1000) {
|
|
581
|
+
const firstKey = this.resultCache.keys().next().value;
|
|
582
|
+
this.resultCache.delete(firstKey);
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
return result;
|
|
395
586
|
}
|
|
396
587
|
/**
|
|
397
588
|
* Main matching function, with whole-word logic.
|
|
589
|
+
* @param searchText - The normalized text to search.
|
|
590
|
+
* @param originalText - The original text.
|
|
591
|
+
* @param matches - Array to collect matches.
|
|
398
592
|
*/
|
|
399
593
|
findMatches(searchText, originalText, matches) {
|
|
400
594
|
for (let i = 0; i < searchText.length; i++) {
|
|
@@ -402,12 +596,10 @@ export class AllProfanity {
|
|
|
402
596
|
for (const match of matchResults) {
|
|
403
597
|
const start = i + match.start;
|
|
404
598
|
const end = i + match.end;
|
|
405
|
-
// Only match whole words if !detectPartialWords
|
|
406
599
|
if (!this.detectPartialWords &&
|
|
407
600
|
!this.isWholeWord(originalText, start, end)) {
|
|
408
601
|
continue;
|
|
409
602
|
}
|
|
410
|
-
// Use actual matched text for whitelist check
|
|
411
603
|
const matchedText = originalText.substring(start, end);
|
|
412
604
|
if (this.isWhitelistedMatch(match.word, matchedText)) {
|
|
413
605
|
continue;
|
|
@@ -424,13 +616,15 @@ export class AllProfanity {
|
|
|
424
616
|
}
|
|
425
617
|
}
|
|
426
618
|
/**
|
|
427
|
-
* Generate cleaned text by replacing profane words
|
|
619
|
+
* Generate cleaned text by replacing profane words.
|
|
620
|
+
* @param originalText - The original text.
|
|
621
|
+
* @param matches - Array of matches.
|
|
622
|
+
* @returns Cleaned text.
|
|
428
623
|
*/
|
|
429
624
|
generateCleanedText(originalText, matches) {
|
|
430
625
|
if (matches.length === 0)
|
|
431
626
|
return originalText;
|
|
432
627
|
let result = originalText;
|
|
433
|
-
// Process matches in reverse order to maintain indices and avoid overlap
|
|
434
628
|
const sortedMatches = [...this.deduplicateMatches(matches)].sort((a, b) => b.start - a.start);
|
|
435
629
|
for (const match of sortedMatches) {
|
|
436
630
|
const replacement = this.defaultPlaceholder.repeat(match.originalWord.length);
|
|
@@ -442,20 +636,24 @@ export class AllProfanity {
|
|
|
442
636
|
return result;
|
|
443
637
|
}
|
|
444
638
|
/**
|
|
445
|
-
*
|
|
639
|
+
* Check if a string contains profanity.
|
|
640
|
+
* @param text - The text to check.
|
|
641
|
+
* @returns True if profanity is found, false otherwise.
|
|
446
642
|
*/
|
|
447
643
|
check(text) {
|
|
448
644
|
return this.detect(text).hasProfanity;
|
|
449
645
|
}
|
|
450
646
|
/**
|
|
451
|
-
* Clean text with custom placeholder
|
|
647
|
+
* Clean text with a custom placeholder.
|
|
648
|
+
* @param text - The text to clean.
|
|
649
|
+
* @param placeholder - The placeholder to use.
|
|
650
|
+
* @returns Cleaned text.
|
|
452
651
|
*/
|
|
453
652
|
clean(text, placeholder) {
|
|
454
653
|
const detection = this.detect(text);
|
|
455
654
|
if (!placeholder || placeholder === this.defaultPlaceholder) {
|
|
456
655
|
return detection.cleanedText;
|
|
457
656
|
}
|
|
458
|
-
// Use custom placeholder
|
|
459
657
|
let result = text;
|
|
460
658
|
const sortedPositions = [
|
|
461
659
|
...this.deduplicateMatches(detection.positions.map((p) => ({
|
|
@@ -476,14 +674,16 @@ export class AllProfanity {
|
|
|
476
674
|
return result;
|
|
477
675
|
}
|
|
478
676
|
/**
|
|
479
|
-
* Clean text by replacing each profane word with a single placeholder (word-level)
|
|
677
|
+
* Clean text by replacing each profane word with a single placeholder (word-level).
|
|
678
|
+
* @param text - The text to clean.
|
|
679
|
+
* @param placeholder - The placeholder to use.
|
|
680
|
+
* @returns Word-level cleaned text.
|
|
480
681
|
*/
|
|
481
682
|
cleanWithPlaceholder(text, placeholder = "***") {
|
|
482
683
|
const detection = this.detect(text);
|
|
483
684
|
if (detection.positions.length === 0)
|
|
484
685
|
return text;
|
|
485
686
|
let result = text;
|
|
486
|
-
// Sort matches so later matches don't affect earlier ones
|
|
487
687
|
const sortedPositions = [
|
|
488
688
|
...this.deduplicateMatches(detection.positions.map((p) => ({
|
|
489
689
|
word: p.word,
|
|
@@ -493,7 +693,6 @@ export class AllProfanity {
|
|
|
493
693
|
}))),
|
|
494
694
|
].sort((a, b) => b.start - a.start);
|
|
495
695
|
for (const pos of sortedPositions) {
|
|
496
|
-
// Only replace whole words!
|
|
497
696
|
if (!this.isWholeWord(result, pos.start, pos.end))
|
|
498
697
|
continue;
|
|
499
698
|
result =
|
|
@@ -504,7 +703,8 @@ export class AllProfanity {
|
|
|
504
703
|
return result;
|
|
505
704
|
}
|
|
506
705
|
/**
|
|
507
|
-
* Add word(s) to the profanity
|
|
706
|
+
* Add word(s) to the profanity filter.
|
|
707
|
+
* @param word - Word or array of words to add.
|
|
508
708
|
*/
|
|
509
709
|
add(word) {
|
|
510
710
|
const words = Array.isArray(word) ? word : [word];
|
|
@@ -515,7 +715,8 @@ export class AllProfanity {
|
|
|
515
715
|
}
|
|
516
716
|
}
|
|
517
717
|
/**
|
|
518
|
-
* Remove word(s) from the profanity
|
|
718
|
+
* Remove word(s) from the profanity filter.
|
|
719
|
+
* @param word - Word or array of words to remove.
|
|
519
720
|
*/
|
|
520
721
|
remove(word) {
|
|
521
722
|
const words = Array.isArray(word) ? word : [word];
|
|
@@ -527,7 +728,8 @@ export class AllProfanity {
|
|
|
527
728
|
}
|
|
528
729
|
}
|
|
529
730
|
/**
|
|
530
|
-
* Add words to whitelist
|
|
731
|
+
* Add words to the whitelist.
|
|
732
|
+
* @param words - Words to whitelist.
|
|
531
733
|
*/
|
|
532
734
|
addToWhitelist(words) {
|
|
533
735
|
const validatedWords = validateStringArray(words, "whitelist words");
|
|
@@ -537,7 +739,8 @@ export class AllProfanity {
|
|
|
537
739
|
}
|
|
538
740
|
}
|
|
539
741
|
/**
|
|
540
|
-
* Remove words from whitelist
|
|
742
|
+
* Remove words from the whitelist.
|
|
743
|
+
* @param words - Words to remove from whitelist.
|
|
541
744
|
*/
|
|
542
745
|
removeFromWhitelist(words) {
|
|
543
746
|
const validatedWords = validateStringArray(words, "whitelist words");
|
|
@@ -547,14 +750,18 @@ export class AllProfanity {
|
|
|
547
750
|
}
|
|
548
751
|
}
|
|
549
752
|
/**
|
|
550
|
-
*
|
|
753
|
+
* Check if a word is whitelisted.
|
|
754
|
+
* @param word - The word to check.
|
|
755
|
+
* @returns True if whitelisted, false otherwise.
|
|
551
756
|
*/
|
|
552
757
|
isWhitelisted(word) {
|
|
553
758
|
const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
|
|
554
759
|
return this.whitelistSet.has(normalizedWord);
|
|
555
760
|
}
|
|
556
761
|
/**
|
|
557
|
-
* Load a built-in language dictionary
|
|
762
|
+
* Load a built-in language dictionary.
|
|
763
|
+
* @param language - The language key.
|
|
764
|
+
* @returns True if loaded, false otherwise.
|
|
558
765
|
*/
|
|
559
766
|
loadLanguage(language) {
|
|
560
767
|
if (!language || typeof language !== "string") {
|
|
@@ -587,7 +794,9 @@ export class AllProfanity {
|
|
|
587
794
|
}
|
|
588
795
|
}
|
|
589
796
|
/**
|
|
590
|
-
* Load multiple
|
|
797
|
+
* Load multiple language dictionaries.
|
|
798
|
+
* @param languages - Array of languages to load.
|
|
799
|
+
* @returns Number of successfully loaded languages.
|
|
591
800
|
*/
|
|
592
801
|
loadLanguages(languages) {
|
|
593
802
|
const validatedLanguages = validateStringArray(languages, "languages");
|
|
@@ -596,14 +805,17 @@ export class AllProfanity {
|
|
|
596
805
|
}, 0);
|
|
597
806
|
}
|
|
598
807
|
/**
|
|
599
|
-
* Load all Indian languages
|
|
808
|
+
* Load all supported Indian languages.
|
|
809
|
+
* @returns Number of loaded Indian languages.
|
|
600
810
|
*/
|
|
601
811
|
loadIndianLanguages() {
|
|
602
812
|
const indianLanguages = ["hindi", "bengali", "tamil", "telugu"];
|
|
603
813
|
return this.loadLanguages(indianLanguages);
|
|
604
814
|
}
|
|
605
815
|
/**
|
|
606
|
-
* Load a custom dictionary
|
|
816
|
+
* Load a custom dictionary.
|
|
817
|
+
* @param name - Name of the dictionary.
|
|
818
|
+
* @param words - Words to add.
|
|
607
819
|
*/
|
|
608
820
|
loadCustomDictionary(name, words) {
|
|
609
821
|
validateString(name, "dictionary name");
|
|
@@ -619,7 +831,6 @@ export class AllProfanity {
|
|
|
619
831
|
addedCount++;
|
|
620
832
|
}
|
|
621
833
|
}
|
|
622
|
-
// Store for future reference
|
|
623
834
|
this.availableLanguages[name.toLowerCase()] = validatedWords;
|
|
624
835
|
this.loadedLanguages.add(name.toLowerCase());
|
|
625
836
|
this.logger.info(`Loaded ${addedCount} words from custom dictionary '${name}'`);
|
|
@@ -629,7 +840,9 @@ export class AllProfanity {
|
|
|
629
840
|
}
|
|
630
841
|
}
|
|
631
842
|
/**
|
|
632
|
-
* Add a single word to the trie
|
|
843
|
+
* Add a single word to the trie.
|
|
844
|
+
* @param word - The word to add.
|
|
845
|
+
* @returns True if added, false otherwise.
|
|
633
846
|
*/
|
|
634
847
|
addWordToTrie(word) {
|
|
635
848
|
if (!word || typeof word !== "string" || word.trim().length === 0) {
|
|
@@ -638,16 +851,25 @@ export class AllProfanity {
|
|
|
638
851
|
const normalizedWord = this.caseSensitive
|
|
639
852
|
? word.trim()
|
|
640
853
|
: word.trim().toLowerCase();
|
|
641
|
-
// Skip if whitelisted
|
|
642
854
|
if (this.isWhitelisted(normalizedWord)) {
|
|
643
855
|
return false;
|
|
644
856
|
}
|
|
645
|
-
// Add to
|
|
857
|
+
// Add to Trie (always used as fallback)
|
|
646
858
|
this.profanityTrie.addWord(normalizedWord);
|
|
859
|
+
// Add to Bloom Filter if enabled
|
|
860
|
+
if (this.bloomFilter) {
|
|
861
|
+
this.bloomFilter.add(normalizedWord);
|
|
862
|
+
}
|
|
863
|
+
// Add to Aho-Corasick automaton if enabled
|
|
864
|
+
if (this.ahoCorasickAutomaton) {
|
|
865
|
+
this.ahoCorasickAutomaton.addPattern(normalizedWord);
|
|
866
|
+
}
|
|
647
867
|
return true;
|
|
648
868
|
}
|
|
649
869
|
/**
|
|
650
|
-
*
|
|
870
|
+
* Calculate severity from matches.
|
|
871
|
+
* @param matches - Array of matches.
|
|
872
|
+
* @returns Severity level.
|
|
651
873
|
*/
|
|
652
874
|
calculateSeverity(matches) {
|
|
653
875
|
if (matches.length === 0)
|
|
@@ -663,7 +885,7 @@ export class AllProfanity {
|
|
|
663
885
|
return ProfanitySeverity.MILD;
|
|
664
886
|
}
|
|
665
887
|
/**
|
|
666
|
-
* Clear all loaded dictionaries
|
|
888
|
+
* Clear all loaded dictionaries and dynamic words.
|
|
667
889
|
*/
|
|
668
890
|
clearList() {
|
|
669
891
|
this.profanityTrie.clear();
|
|
@@ -671,7 +893,8 @@ export class AllProfanity {
|
|
|
671
893
|
this.dynamicWords.clear();
|
|
672
894
|
}
|
|
673
895
|
/**
|
|
674
|
-
* Set placeholder character
|
|
896
|
+
* Set the placeholder character for filtered words.
|
|
897
|
+
* @param placeholder - The placeholder character.
|
|
675
898
|
*/
|
|
676
899
|
setPlaceholder(placeholder) {
|
|
677
900
|
validateString(placeholder, "placeholder");
|
|
@@ -681,19 +904,22 @@ export class AllProfanity {
|
|
|
681
904
|
this.defaultPlaceholder = placeholder.charAt(0);
|
|
682
905
|
}
|
|
683
906
|
/**
|
|
684
|
-
* Get loaded languages
|
|
907
|
+
* Get the list of loaded languages.
|
|
908
|
+
* @returns Array of loaded language keys.
|
|
685
909
|
*/
|
|
686
910
|
getLoadedLanguages() {
|
|
687
911
|
return Array.from(this.loadedLanguages);
|
|
688
912
|
}
|
|
689
913
|
/**
|
|
690
|
-
* Get available languages
|
|
914
|
+
* Get the list of available built-in languages.
|
|
915
|
+
* @returns Array of available language keys.
|
|
691
916
|
*/
|
|
692
917
|
getAvailableLanguages() {
|
|
693
918
|
return Object.keys(this.availableLanguages);
|
|
694
919
|
}
|
|
695
920
|
/**
|
|
696
|
-
* Get current configuration
|
|
921
|
+
* Get the current configuration of the profanity filter.
|
|
922
|
+
* @returns Partial configuration object.
|
|
697
923
|
*/
|
|
698
924
|
getConfig() {
|
|
699
925
|
return {
|
|
@@ -707,24 +933,23 @@ export class AllProfanity {
|
|
|
707
933
|
};
|
|
708
934
|
}
|
|
709
935
|
/**
|
|
710
|
-
*
|
|
936
|
+
* Rebuild the profanity trie from loaded dictionaries and dynamic words.
|
|
711
937
|
*/
|
|
712
938
|
rebuildTrie() {
|
|
713
939
|
this.profanityTrie.clear();
|
|
714
|
-
// Re-add all loaded language words
|
|
715
940
|
for (const lang of this.loadedLanguages) {
|
|
716
941
|
const words = this.availableLanguages[lang] || [];
|
|
717
942
|
for (const word of words) {
|
|
718
943
|
this.addWordToTrie(word);
|
|
719
944
|
}
|
|
720
945
|
}
|
|
721
|
-
// Re-add dynamic words
|
|
722
946
|
for (const word of this.dynamicWords) {
|
|
723
947
|
this.addWordToTrie(word);
|
|
724
948
|
}
|
|
725
949
|
}
|
|
726
950
|
/**
|
|
727
|
-
* Update configuration
|
|
951
|
+
* Update configuration options for the profanity filter.
|
|
952
|
+
* @param options - Partial configuration object.
|
|
728
953
|
*/
|
|
729
954
|
updateConfig(options) {
|
|
730
955
|
let rebuildNeeded = false;
|
|
@@ -752,8 +977,54 @@ export class AllProfanity {
|
|
|
752
977
|
this.rebuildTrie();
|
|
753
978
|
}
|
|
754
979
|
}
|
|
980
|
+
/**
|
|
981
|
+
* Create an AllProfanity instance from a configuration object.
|
|
982
|
+
* @param config - Configuration object
|
|
983
|
+
* @returns A new AllProfanity instance
|
|
984
|
+
*/
|
|
985
|
+
static fromConfig(config) {
|
|
986
|
+
const options = {};
|
|
987
|
+
if (config.algorithm)
|
|
988
|
+
options.algorithm = config.algorithm;
|
|
989
|
+
if (config.bloomFilter)
|
|
990
|
+
options.bloomFilter = config.bloomFilter;
|
|
991
|
+
if (config.ahoCorasick)
|
|
992
|
+
options.ahoCorasick = config.ahoCorasick;
|
|
993
|
+
if (config.contextAnalysis)
|
|
994
|
+
options.contextAnalysis = config.contextAnalysis;
|
|
995
|
+
if (config.performance)
|
|
996
|
+
options.performance = config.performance;
|
|
997
|
+
if (config.profanityDetection) {
|
|
998
|
+
options.enableLeetSpeak = config.profanityDetection.enableLeetSpeak;
|
|
999
|
+
options.caseSensitive = config.profanityDetection.caseSensitive;
|
|
1000
|
+
options.strictMode = config.profanityDetection.strictMode;
|
|
1001
|
+
options.detectPartialWords = config.profanityDetection.detectPartialWords;
|
|
1002
|
+
options.defaultPlaceholder = config.profanityDetection.defaultPlaceholder;
|
|
1003
|
+
}
|
|
1004
|
+
if (config.enableLeetSpeak !== undefined)
|
|
1005
|
+
options.enableLeetSpeak = config.enableLeetSpeak;
|
|
1006
|
+
if (config.caseSensitive !== undefined)
|
|
1007
|
+
options.caseSensitive = config.caseSensitive;
|
|
1008
|
+
if (config.strictMode !== undefined)
|
|
1009
|
+
options.strictMode = config.strictMode;
|
|
1010
|
+
if (config.detectPartialWords !== undefined)
|
|
1011
|
+
options.detectPartialWords = config.detectPartialWords;
|
|
1012
|
+
if (config.defaultPlaceholder !== undefined)
|
|
1013
|
+
options.defaultPlaceholder = config.defaultPlaceholder;
|
|
1014
|
+
if (config.languages)
|
|
1015
|
+
options.languages = config.languages;
|
|
1016
|
+
if (config.whitelistWords)
|
|
1017
|
+
options.whitelistWords = config.whitelistWords;
|
|
1018
|
+
if (config.customDictionaries)
|
|
1019
|
+
options.customDictionaries = config.customDictionaries;
|
|
1020
|
+
if (config.logger)
|
|
1021
|
+
options.logger = config.logger;
|
|
1022
|
+
return new AllProfanity(options);
|
|
1023
|
+
}
|
|
755
1024
|
}
|
|
756
|
-
|
|
1025
|
+
/**
|
|
1026
|
+
* Singleton instance of AllProfanity with default configuration.
|
|
1027
|
+
*/
|
|
757
1028
|
const allProfanity = new AllProfanity();
|
|
758
1029
|
export default allProfanity;
|
|
759
1030
|
//# sourceMappingURL=index.js.map
|