allprofanity 1.1.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +268 -218
- package/dist/index.d.ts +136 -51
- package/dist/index.js +667 -161
- package/dist/index.js.map +1 -1
- package/dist/languages/bengali-words.js +0 -1
- package/dist/languages/bengali-words.js.map +1 -1
- package/dist/languages/english-words.d.ts +2 -0
- package/dist/languages/english-words.js +256 -0
- package/dist/languages/english-words.js.map +1 -0
- package/package.json +2 -5
package/dist/index.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
|
|
1
|
+
// Import language dictionaries (assuming these exist)
|
|
2
|
+
import englishBadWords from "./languages/english-words.js";
|
|
2
3
|
import hindiBadWords from "./languages/hindi-words.js";
|
|
3
4
|
import frenchBadWords from "./languages/french-words.js";
|
|
4
5
|
import germanBadWords from "./languages/german-words.js";
|
|
@@ -7,6 +8,7 @@ import bengaliBadWords from "./languages/bengali-words.js";
|
|
|
7
8
|
import tamilBadWords from "./languages/tamil-words.js";
|
|
8
9
|
import teluguBadWords from "./languages/telugu-words.js";
|
|
9
10
|
// Export language dictionaries for direct access
|
|
11
|
+
export { default as englishBadWords } from "./languages/english-words.js";
|
|
10
12
|
export { default as hindiBadWords } from "./languages/hindi-words.js";
|
|
11
13
|
export { default as frenchBadWords } from "./languages/french-words.js";
|
|
12
14
|
export { default as germanBadWords } from "./languages/german-words.js";
|
|
@@ -15,239 +17,743 @@ export { default as bengaliBadWords } from "./languages/bengali-words.js";
|
|
|
15
17
|
export { default as tamilBadWords } from "./languages/tamil-words.js";
|
|
16
18
|
export { default as teluguBadWords } from "./languages/telugu-words.js";
|
|
17
19
|
/**
|
|
18
|
-
*
|
|
19
|
-
* Based on leo-profanity with additional language capabilities
|
|
20
|
+
* Default console logger implementation
|
|
20
21
|
*/
|
|
21
|
-
|
|
22
|
+
class ConsoleLogger {
|
|
23
|
+
info(message) {
|
|
24
|
+
console.log(`[AllProfanity] ${message}`);
|
|
25
|
+
}
|
|
26
|
+
warn(message) {
|
|
27
|
+
console.warn(`[AllProfanity] ${message}`);
|
|
28
|
+
}
|
|
29
|
+
error(message) {
|
|
30
|
+
console.error(`[AllProfanity] ${message}`);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Severity levels for profanity detection
|
|
35
|
+
*/
|
|
36
|
+
export var ProfanitySeverity;
|
|
37
|
+
(function (ProfanitySeverity) {
|
|
38
|
+
ProfanitySeverity[ProfanitySeverity["MILD"] = 1] = "MILD";
|
|
39
|
+
ProfanitySeverity[ProfanitySeverity["MODERATE"] = 2] = "MODERATE";
|
|
40
|
+
ProfanitySeverity[ProfanitySeverity["SEVERE"] = 3] = "SEVERE";
|
|
41
|
+
ProfanitySeverity[ProfanitySeverity["EXTREME"] = 4] = "EXTREME";
|
|
42
|
+
})(ProfanitySeverity = ProfanitySeverity || (ProfanitySeverity = {}));
|
|
43
|
+
/**
|
|
44
|
+
* Validates input parameters
|
|
45
|
+
*/
|
|
46
|
+
function validateString(input, paramName) {
|
|
47
|
+
if (typeof input !== "string") {
|
|
48
|
+
throw new TypeError(`${paramName} must be a string, got ${typeof input}`);
|
|
49
|
+
}
|
|
50
|
+
return input;
|
|
51
|
+
}
|
|
52
|
+
function validateStringArray(input, paramName) {
|
|
53
|
+
if (!Array.isArray(input)) {
|
|
54
|
+
throw new TypeError(`${paramName} must be an array`);
|
|
55
|
+
}
|
|
56
|
+
return input.filter((item) => {
|
|
57
|
+
if (typeof item !== "string") {
|
|
58
|
+
console.warn(`Skipping non-string item in ${paramName}: ${item}`);
|
|
59
|
+
return false;
|
|
60
|
+
}
|
|
61
|
+
return item.trim().length > 0;
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Efficient Trie data structure for fast string matching
|
|
66
|
+
*/
|
|
67
|
+
class TrieNode {
|
|
68
|
+
constructor() {
|
|
69
|
+
this.children = new Map();
|
|
70
|
+
this.isEndOfWord = false;
|
|
71
|
+
this.word = "";
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Add a word to the trie
|
|
75
|
+
*/
|
|
76
|
+
addWord(word) {
|
|
77
|
+
let current = this;
|
|
78
|
+
for (const char of word) {
|
|
79
|
+
if (!current.children.has(char)) {
|
|
80
|
+
current.children.set(char, new TrieNode());
|
|
81
|
+
}
|
|
82
|
+
const nextNode = current.children.get(char);
|
|
83
|
+
if (nextNode) {
|
|
84
|
+
current = nextNode;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
current.isEndOfWord = true;
|
|
88
|
+
current.word = word;
|
|
89
|
+
}
|
|
22
90
|
/**
|
|
23
|
-
*
|
|
24
|
-
* @param options - Configuration options
|
|
91
|
+
* Remove a word from the trie
|
|
25
92
|
*/
|
|
93
|
+
removeWord(word) {
|
|
94
|
+
return this.removeHelper(word, 0);
|
|
95
|
+
}
|
|
96
|
+
removeHelper(word, index) {
|
|
97
|
+
if (index === word.length) {
|
|
98
|
+
if (!this.isEndOfWord)
|
|
99
|
+
return false;
|
|
100
|
+
this.isEndOfWord = false;
|
|
101
|
+
return this.children.size === 0;
|
|
102
|
+
}
|
|
103
|
+
const char = word[index];
|
|
104
|
+
const node = this.children.get(char);
|
|
105
|
+
if (!node)
|
|
106
|
+
return false;
|
|
107
|
+
const shouldDeleteChild = node.removeHelper(word, index + 1);
|
|
108
|
+
if (shouldDeleteChild) {
|
|
109
|
+
this.children.delete(char);
|
|
110
|
+
return this.children.size === 0 && !this.isEndOfWord;
|
|
111
|
+
}
|
|
112
|
+
return false;
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Find all matches starting at a given position
|
|
116
|
+
*/
|
|
117
|
+
findMatches(text, startPos, allowPartial) {
|
|
118
|
+
const matches = [];
|
|
119
|
+
let current = this;
|
|
120
|
+
let pos = startPos;
|
|
121
|
+
while (pos < text.length) {
|
|
122
|
+
const nextNode = current.children.get(text[pos]);
|
|
123
|
+
if (!nextNode)
|
|
124
|
+
break;
|
|
125
|
+
current = nextNode;
|
|
126
|
+
pos++;
|
|
127
|
+
if (current.isEndOfWord) {
|
|
128
|
+
if (!allowPartial) {
|
|
129
|
+
const wordStart = startPos;
|
|
130
|
+
const wordEnd = pos;
|
|
131
|
+
matches.push({
|
|
132
|
+
word: current.word,
|
|
133
|
+
start: wordStart - startPos,
|
|
134
|
+
end: wordEnd - startPos,
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
else {
|
|
138
|
+
matches.push({
|
|
139
|
+
word: current.word,
|
|
140
|
+
start: 0,
|
|
141
|
+
end: pos - startPos,
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
return matches;
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Clear all words from the trie
|
|
150
|
+
*/
|
|
151
|
+
clear() {
|
|
152
|
+
this.children.clear();
|
|
153
|
+
this.isEndOfWord = false;
|
|
154
|
+
this.word = "";
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Advanced AllProfanity - Fixed profanity filter with multi-language support
|
|
159
|
+
* Addresses all critical issues from the original implementation
|
|
160
|
+
*/
|
|
161
|
+
export class AllProfanity {
|
|
26
162
|
constructor(options) {
|
|
27
|
-
|
|
163
|
+
var _a, _b, _c, _d, _e;
|
|
164
|
+
this.profanityTrie = new TrieNode();
|
|
165
|
+
this.whitelistSet = new Set();
|
|
28
166
|
this.loadedLanguages = new Set();
|
|
167
|
+
// Configuration
|
|
168
|
+
this.defaultPlaceholder = "*";
|
|
169
|
+
this.enableLeetSpeak = true;
|
|
170
|
+
this.caseSensitive = false;
|
|
171
|
+
this.strictMode = false;
|
|
172
|
+
this.detectPartialWords = false;
|
|
173
|
+
// Available language dictionaries
|
|
29
174
|
this.availableLanguages = {
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
175
|
+
english: englishBadWords || [],
|
|
176
|
+
hindi: hindiBadWords || [],
|
|
177
|
+
french: frenchBadWords || [],
|
|
178
|
+
german: germanBadWords || [],
|
|
179
|
+
spanish: spanishBadWords || [],
|
|
180
|
+
bengali: bengaliBadWords || [],
|
|
181
|
+
tamil: tamilBadWords || [],
|
|
182
|
+
telugu: teluguBadWords || [],
|
|
38
183
|
};
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
184
|
+
// Fixed leet speak mappings
|
|
185
|
+
this.leetMappings = new Map([
|
|
186
|
+
["@", "a"],
|
|
187
|
+
["^", "a"],
|
|
188
|
+
["4", "a"],
|
|
189
|
+
["8", "b"],
|
|
190
|
+
["6", "b"],
|
|
191
|
+
["|3", "b"],
|
|
192
|
+
["(", "c"],
|
|
193
|
+
["<", "c"],
|
|
194
|
+
["©", "c"],
|
|
195
|
+
["|)", "d"],
|
|
196
|
+
["0", "o"],
|
|
197
|
+
["3", "e"],
|
|
198
|
+
["€", "e"],
|
|
199
|
+
["|=", "f"],
|
|
200
|
+
["ph", "f"],
|
|
201
|
+
["9", "g"],
|
|
202
|
+
["#", "h"],
|
|
203
|
+
["|-|", "h"],
|
|
204
|
+
["1", "i"],
|
|
205
|
+
["!", "i"],
|
|
206
|
+
["|", "i"],
|
|
207
|
+
["_|", "j"],
|
|
208
|
+
["¿", "j"],
|
|
209
|
+
["|<", "k"],
|
|
210
|
+
["1<", "k"],
|
|
211
|
+
["7", "l"],
|
|
212
|
+
["|\\/|", "m"],
|
|
213
|
+
["/\\/\\", "m"],
|
|
214
|
+
["|\\|", "n"],
|
|
215
|
+
["//", "n"],
|
|
216
|
+
["()", "o"],
|
|
217
|
+
["|*", "p"],
|
|
218
|
+
["|o", "p"],
|
|
219
|
+
["(_,)", "q"],
|
|
220
|
+
["()_", "q"],
|
|
221
|
+
["|2", "r"],
|
|
222
|
+
["12", "r"],
|
|
223
|
+
["5", "s"],
|
|
224
|
+
["$", "s"],
|
|
225
|
+
["z", "s"],
|
|
226
|
+
["7", "t"],
|
|
227
|
+
["+", "t"],
|
|
228
|
+
["†", "t"],
|
|
229
|
+
["|_|", "u"],
|
|
230
|
+
["(_)", "u"],
|
|
231
|
+
["v", "u"],
|
|
232
|
+
["\\/", "v"],
|
|
233
|
+
["|/", "v"],
|
|
234
|
+
["\\/\\/", "w"],
|
|
235
|
+
["vv", "w"],
|
|
236
|
+
["><", "x"],
|
|
237
|
+
["}{", "x"],
|
|
238
|
+
["`/", "y"],
|
|
239
|
+
["j", "y"],
|
|
240
|
+
["2", "z"],
|
|
241
|
+
["7_", "z"],
|
|
242
|
+
]);
|
|
243
|
+
// Dynamic words added at runtime
|
|
244
|
+
this.dynamicWords = new Set();
|
|
245
|
+
this.logger = (options === null || options === void 0 ? void 0 : options.logger) || new ConsoleLogger();
|
|
246
|
+
// Validate and set configuration
|
|
247
|
+
if ((options === null || options === void 0 ? void 0 : options.defaultPlaceholder) !== undefined) {
|
|
42
248
|
this.setPlaceholder(options.defaultPlaceholder);
|
|
43
249
|
}
|
|
44
|
-
|
|
45
|
-
this.
|
|
46
|
-
|
|
250
|
+
this.enableLeetSpeak = (_a = options === null || options === void 0 ? void 0 : options.enableLeetSpeak) !== null && _a !== void 0 ? _a : true;
|
|
251
|
+
this.caseSensitive = (_b = options === null || options === void 0 ? void 0 : options.caseSensitive) !== null && _b !== void 0 ? _b : false;
|
|
252
|
+
this.strictMode = (_c = options === null || options === void 0 ? void 0 : options.strictMode) !== null && _c !== void 0 ? _c : false;
|
|
253
|
+
this.detectPartialWords = (_d = options === null || options === void 0 ? void 0 : options.detectPartialWords) !== null && _d !== void 0 ? _d : false;
|
|
254
|
+
// Load whitelist
|
|
255
|
+
if (options === null || options === void 0 ? void 0 : options.whitelistWords) {
|
|
256
|
+
this.addToWhitelist(options.whitelistWords);
|
|
257
|
+
}
|
|
258
|
+
// Load default languages
|
|
259
|
+
this.loadLanguage("english");
|
|
47
260
|
this.loadLanguage("hindi");
|
|
48
|
-
// Load
|
|
49
|
-
if (options === null || options === void 0 ? void 0 : options.languages) {
|
|
261
|
+
// Load additional languages
|
|
262
|
+
if ((_e = options === null || options === void 0 ? void 0 : options.languages) === null || _e === void 0 ? void 0 : _e.length) {
|
|
50
263
|
options.languages.forEach((lang) => this.loadLanguage(lang));
|
|
51
264
|
}
|
|
52
|
-
// Load
|
|
265
|
+
// Load custom dictionaries
|
|
53
266
|
if (options === null || options === void 0 ? void 0 : options.customDictionaries) {
|
|
54
|
-
Object.entries(options.customDictionaries).forEach(([
|
|
55
|
-
this.loadCustomDictionary(
|
|
267
|
+
Object.entries(options.customDictionaries).forEach(([name, words]) => {
|
|
268
|
+
this.loadCustomDictionary(name, words);
|
|
56
269
|
});
|
|
57
270
|
}
|
|
58
271
|
}
|
|
272
|
+
/**
|
|
273
|
+
* Normalize text by converting leet speak to regular characters.
|
|
274
|
+
*/
|
|
275
|
+
normalizeLeetSpeak(text) {
|
|
276
|
+
if (!this.enableLeetSpeak)
|
|
277
|
+
return text;
|
|
278
|
+
let normalized = text.toLowerCase();
|
|
279
|
+
const sortedMappings = Array.from(this.leetMappings.entries()).sort(([leetA], [leetB]) => leetB.length - leetA.length);
|
|
280
|
+
for (const [leet, normal] of sortedMappings) {
|
|
281
|
+
const regex = new RegExp(this.escapeRegex(leet), "g");
|
|
282
|
+
normalized = normalized.replace(regex, normal);
|
|
283
|
+
}
|
|
284
|
+
return normalized;
|
|
285
|
+
}
|
|
286
|
+
/**
|
|
287
|
+
* Properly escape regex special characters
|
|
288
|
+
*/
|
|
289
|
+
escapeRegex(str) {
|
|
290
|
+
return str.replace(/[\\^$.*+?()[\]{}|]/g, "\\$&");
|
|
291
|
+
}
|
|
292
|
+
/**
|
|
293
|
+
* Check if a position has word boundaries (for strict mode)
|
|
294
|
+
*/
|
|
295
|
+
hasWordBoundaries(text, start, end) {
|
|
296
|
+
if (!this.strictMode)
|
|
297
|
+
return true;
|
|
298
|
+
const beforeChar = start > 0 ? text[start - 1] : " ";
|
|
299
|
+
const afterChar = end < text.length ? text[end] : " ";
|
|
300
|
+
const wordBoundaryRegex = /[\s\p{P}\p{S}]/u;
|
|
301
|
+
return (wordBoundaryRegex.test(beforeChar) && wordBoundaryRegex.test(afterChar));
|
|
302
|
+
}
|
|
303
|
+
/**
|
|
304
|
+
* Helper method to verify whole-word matching.
|
|
305
|
+
*/
|
|
306
|
+
isWholeWord(text, start, end) {
|
|
307
|
+
// Check left boundary
|
|
308
|
+
if (start === 0) {
|
|
309
|
+
// ok
|
|
310
|
+
}
|
|
311
|
+
else if (/\w/.test(text[start - 1])) {
|
|
312
|
+
return false;
|
|
313
|
+
}
|
|
314
|
+
// Check right boundary
|
|
315
|
+
if (end === text.length) {
|
|
316
|
+
// ok
|
|
317
|
+
}
|
|
318
|
+
else if (/\w/.test(text[end])) {
|
|
319
|
+
return false;
|
|
320
|
+
}
|
|
321
|
+
return true;
|
|
322
|
+
}
|
|
323
|
+
/**
|
|
324
|
+
* Check if a match is whitelisted (by actual matched substring and dictionary word)
|
|
325
|
+
*/
|
|
326
|
+
isWhitelistedMatch(word, matchedText) {
|
|
327
|
+
if (this.caseSensitive) {
|
|
328
|
+
return this.whitelistSet.has(word) || this.whitelistSet.has(matchedText);
|
|
329
|
+
}
|
|
330
|
+
else {
|
|
331
|
+
return (this.whitelistSet.has(word.toLowerCase()) ||
|
|
332
|
+
this.whitelistSet.has(matchedText.toLowerCase()));
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
/**
|
|
336
|
+
* Remove overlapping matches, keep only the longest at each start position
|
|
337
|
+
*/
|
|
338
|
+
deduplicateMatches(matches) {
|
|
339
|
+
const sorted = [...matches].sort((a, b) => {
|
|
340
|
+
if (a.start !== b.start)
|
|
341
|
+
return a.start - b.start;
|
|
342
|
+
return b.end - a.end;
|
|
343
|
+
});
|
|
344
|
+
const result = [];
|
|
345
|
+
let lastEnd = -1;
|
|
346
|
+
for (const match of sorted) {
|
|
347
|
+
if (match.start >= lastEnd) {
|
|
348
|
+
result.push(match);
|
|
349
|
+
lastEnd = match.end;
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
return result;
|
|
353
|
+
}
|
|
354
|
+
/**
|
|
355
|
+
* Advanced profanity detection using efficient trie-based algorithm
|
|
356
|
+
*/
|
|
357
|
+
detect(text) {
|
|
358
|
+
const validatedText = validateString(text, "text");
|
|
359
|
+
if (validatedText.length === 0) {
|
|
360
|
+
return {
|
|
361
|
+
hasProfanity: false,
|
|
362
|
+
detectedWords: [],
|
|
363
|
+
cleanedText: validatedText,
|
|
364
|
+
severity: ProfanitySeverity.MILD,
|
|
365
|
+
positions: [],
|
|
366
|
+
};
|
|
367
|
+
}
|
|
368
|
+
const matches = [];
|
|
369
|
+
const normalizedText = this.caseSensitive
|
|
370
|
+
? validatedText
|
|
371
|
+
: validatedText.toLowerCase();
|
|
372
|
+
this.findMatches(normalizedText, validatedText, matches);
|
|
373
|
+
// Leet speak detection (normalize and search, map back to original)
|
|
374
|
+
if (this.enableLeetSpeak) {
|
|
375
|
+
const leetNormalized = this.normalizeLeetSpeak(normalizedText);
|
|
376
|
+
if (leetNormalized !== normalizedText) {
|
|
377
|
+
this.findMatches(leetNormalized, validatedText, matches);
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
const uniqueMatches = this.deduplicateMatches(matches);
|
|
381
|
+
const detectedWords = uniqueMatches.map((m) => m.originalWord);
|
|
382
|
+
const severity = this.calculateSeverity(uniqueMatches);
|
|
383
|
+
const cleanedText = this.generateCleanedText(validatedText, uniqueMatches);
|
|
384
|
+
return {
|
|
385
|
+
hasProfanity: uniqueMatches.length > 0,
|
|
386
|
+
detectedWords,
|
|
387
|
+
cleanedText,
|
|
388
|
+
severity,
|
|
389
|
+
positions: uniqueMatches.map((m) => ({
|
|
390
|
+
word: m.originalWord,
|
|
391
|
+
start: m.start,
|
|
392
|
+
end: m.end,
|
|
393
|
+
})),
|
|
394
|
+
};
|
|
395
|
+
}
|
|
396
|
+
/**
|
|
397
|
+
* Main matching function, with whole-word logic.
|
|
398
|
+
*/
|
|
399
|
+
findMatches(searchText, originalText, matches) {
|
|
400
|
+
for (let i = 0; i < searchText.length; i++) {
|
|
401
|
+
const matchResults = this.profanityTrie.findMatches(searchText, i, this.detectPartialWords);
|
|
402
|
+
for (const match of matchResults) {
|
|
403
|
+
const start = i + match.start;
|
|
404
|
+
const end = i + match.end;
|
|
405
|
+
// Only match whole words if !detectPartialWords
|
|
406
|
+
if (!this.detectPartialWords &&
|
|
407
|
+
!this.isWholeWord(originalText, start, end)) {
|
|
408
|
+
continue;
|
|
409
|
+
}
|
|
410
|
+
// Use actual matched text for whitelist check
|
|
411
|
+
const matchedText = originalText.substring(start, end);
|
|
412
|
+
if (this.isWhitelistedMatch(match.word, matchedText)) {
|
|
413
|
+
continue;
|
|
414
|
+
}
|
|
415
|
+
if (this.hasWordBoundaries(originalText, start, end)) {
|
|
416
|
+
matches.push({
|
|
417
|
+
word: match.word,
|
|
418
|
+
start,
|
|
419
|
+
end,
|
|
420
|
+
originalWord: matchedText,
|
|
421
|
+
});
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
/**
|
|
427
|
+
* Generate cleaned text by replacing profane words (non-overlapping only)
|
|
428
|
+
*/
|
|
429
|
+
generateCleanedText(originalText, matches) {
|
|
430
|
+
if (matches.length === 0)
|
|
431
|
+
return originalText;
|
|
432
|
+
let result = originalText;
|
|
433
|
+
// Process matches in reverse order to maintain indices and avoid overlap
|
|
434
|
+
const sortedMatches = [...this.deduplicateMatches(matches)].sort((a, b) => b.start - a.start);
|
|
435
|
+
for (const match of sortedMatches) {
|
|
436
|
+
const replacement = this.defaultPlaceholder.repeat(match.originalWord.length);
|
|
437
|
+
result =
|
|
438
|
+
result.substring(0, match.start) +
|
|
439
|
+
replacement +
|
|
440
|
+
result.substring(match.end);
|
|
441
|
+
}
|
|
442
|
+
return result;
|
|
443
|
+
}
|
|
444
|
+
/**
|
|
445
|
+
* Simple boolean check for profanity
|
|
446
|
+
*/
|
|
447
|
+
check(text) {
|
|
448
|
+
return this.detect(text).hasProfanity;
|
|
449
|
+
}
|
|
450
|
+
/**
|
|
451
|
+
* Clean text with custom placeholder
|
|
452
|
+
*/
|
|
453
|
+
clean(text, placeholder) {
|
|
454
|
+
const detection = this.detect(text);
|
|
455
|
+
if (!placeholder || placeholder === this.defaultPlaceholder) {
|
|
456
|
+
return detection.cleanedText;
|
|
457
|
+
}
|
|
458
|
+
// Use custom placeholder
|
|
459
|
+
let result = text;
|
|
460
|
+
const sortedPositions = [
|
|
461
|
+
...this.deduplicateMatches(detection.positions.map((p) => ({
|
|
462
|
+
word: p.word,
|
|
463
|
+
start: p.start,
|
|
464
|
+
end: p.end,
|
|
465
|
+
originalWord: text.substring(p.start, p.end),
|
|
466
|
+
}))),
|
|
467
|
+
].sort((a, b) => b.start - a.start);
|
|
468
|
+
for (const pos of sortedPositions) {
|
|
469
|
+
const originalWord = text.substring(pos.start, pos.end);
|
|
470
|
+
const replacement = placeholder.repeat(originalWord.length);
|
|
471
|
+
result =
|
|
472
|
+
result.substring(0, pos.start) +
|
|
473
|
+
replacement +
|
|
474
|
+
result.substring(pos.end);
|
|
475
|
+
}
|
|
476
|
+
return result;
|
|
477
|
+
}
|
|
478
|
+
/**
|
|
479
|
+
* Clean text by replacing each profane word with a single placeholder (word-level)
|
|
480
|
+
*/
|
|
481
|
+
cleanWithPlaceholder(text, placeholder = "***") {
|
|
482
|
+
const detection = this.detect(text);
|
|
483
|
+
if (detection.positions.length === 0)
|
|
484
|
+
return text;
|
|
485
|
+
let result = text;
|
|
486
|
+
// Sort matches so later matches don't affect earlier ones
|
|
487
|
+
const sortedPositions = [
|
|
488
|
+
...this.deduplicateMatches(detection.positions.map((p) => ({
|
|
489
|
+
word: p.word,
|
|
490
|
+
start: p.start,
|
|
491
|
+
end: p.end,
|
|
492
|
+
originalWord: text.substring(p.start, p.end),
|
|
493
|
+
}))),
|
|
494
|
+
].sort((a, b) => b.start - a.start);
|
|
495
|
+
for (const pos of sortedPositions) {
|
|
496
|
+
// Only replace whole words!
|
|
497
|
+
if (!this.isWholeWord(result, pos.start, pos.end))
|
|
498
|
+
continue;
|
|
499
|
+
result =
|
|
500
|
+
result.substring(0, pos.start) +
|
|
501
|
+
placeholder +
|
|
502
|
+
result.substring(pos.end);
|
|
503
|
+
}
|
|
504
|
+
return result;
|
|
505
|
+
}
|
|
506
|
+
/**
|
|
507
|
+
* Add word(s) to the profanity list
|
|
508
|
+
*/
|
|
509
|
+
add(word) {
|
|
510
|
+
const words = Array.isArray(word) ? word : [word];
|
|
511
|
+
const validatedWords = validateStringArray(words, "words to add");
|
|
512
|
+
for (const w of validatedWords) {
|
|
513
|
+
this.dynamicWords.add(w);
|
|
514
|
+
this.addWordToTrie(w);
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
/**
|
|
518
|
+
* Remove word(s) from the profanity list
|
|
519
|
+
*/
|
|
520
|
+
remove(word) {
|
|
521
|
+
const words = Array.isArray(word) ? word : [word];
|
|
522
|
+
const validatedWords = validateStringArray(words, "words to remove");
|
|
523
|
+
for (const w of validatedWords) {
|
|
524
|
+
const normalizedWord = this.caseSensitive ? w : w.toLowerCase();
|
|
525
|
+
this.profanityTrie.removeWord(normalizedWord);
|
|
526
|
+
this.dynamicWords.delete(w);
|
|
527
|
+
}
|
|
528
|
+
}
|
|
529
|
+
/**
|
|
530
|
+
* Add words to whitelist
|
|
531
|
+
*/
|
|
532
|
+
addToWhitelist(words) {
|
|
533
|
+
const validatedWords = validateStringArray(words, "whitelist words");
|
|
534
|
+
for (const word of validatedWords) {
|
|
535
|
+
const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
|
|
536
|
+
this.whitelistSet.add(normalizedWord);
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
/**
|
|
540
|
+
* Remove words from whitelist
|
|
541
|
+
*/
|
|
542
|
+
removeFromWhitelist(words) {
|
|
543
|
+
const validatedWords = validateStringArray(words, "whitelist words");
|
|
544
|
+
for (const word of validatedWords) {
|
|
545
|
+
const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
|
|
546
|
+
this.whitelistSet.delete(normalizedWord);
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
/**
|
|
550
|
+
* Helper for whitelist checking with correct normalization
|
|
551
|
+
*/
|
|
552
|
+
isWhitelisted(word) {
|
|
553
|
+
const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
|
|
554
|
+
return this.whitelistSet.has(normalizedWord);
|
|
555
|
+
}
|
|
59
556
|
/**
|
|
60
557
|
* Load a built-in language dictionary
|
|
61
|
-
* @param language - The language to load
|
|
62
|
-
* @returns boolean - True if loaded successfully, false otherwise
|
|
63
558
|
*/
|
|
64
559
|
loadLanguage(language) {
|
|
65
|
-
|
|
66
|
-
|
|
560
|
+
if (!language || typeof language !== "string") {
|
|
561
|
+
this.logger.warn(`Invalid language parameter: ${language}`);
|
|
562
|
+
return false;
|
|
563
|
+
}
|
|
564
|
+
const langKey = language.toLowerCase().trim();
|
|
565
|
+
if (this.loadedLanguages.has(langKey)) {
|
|
67
566
|
return true;
|
|
68
567
|
}
|
|
69
|
-
const
|
|
70
|
-
if (
|
|
71
|
-
this.
|
|
568
|
+
const words = this.availableLanguages[langKey];
|
|
569
|
+
if (!words || words.length === 0) {
|
|
570
|
+
this.logger.warn(`Language '${language}' not found or empty`);
|
|
571
|
+
return false;
|
|
572
|
+
}
|
|
573
|
+
try {
|
|
574
|
+
let addedCount = 0;
|
|
575
|
+
for (const word of words) {
|
|
576
|
+
if (this.addWordToTrie(word)) {
|
|
577
|
+
addedCount++;
|
|
578
|
+
}
|
|
579
|
+
}
|
|
72
580
|
this.loadedLanguages.add(langKey);
|
|
73
|
-
|
|
581
|
+
this.logger.info(`Loaded ${addedCount} words from ${language} dictionary`);
|
|
74
582
|
return true;
|
|
75
583
|
}
|
|
76
|
-
|
|
77
|
-
|
|
584
|
+
catch (error) {
|
|
585
|
+
this.logger.error(`Failed to load language ${language}: ${error}`);
|
|
78
586
|
return false;
|
|
79
587
|
}
|
|
80
588
|
}
|
|
81
589
|
/**
|
|
82
590
|
* Load multiple languages at once
|
|
83
|
-
* @param languages - Array of language names to load
|
|
84
|
-
* @returns number - Number of successfully loaded languages
|
|
85
591
|
*/
|
|
86
592
|
loadLanguages(languages) {
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
}
|
|
92
|
-
});
|
|
93
|
-
return successCount;
|
|
593
|
+
const validatedLanguages = validateStringArray(languages, "languages");
|
|
594
|
+
return validatedLanguages.reduce((count, lang) => {
|
|
595
|
+
return this.loadLanguage(lang) ? count + 1 : count;
|
|
596
|
+
}, 0);
|
|
94
597
|
}
|
|
95
598
|
/**
|
|
96
|
-
* Load all Indian languages
|
|
97
|
-
* @returns number - Number of Indian languages loaded
|
|
599
|
+
* Load all Indian languages
|
|
98
600
|
*/
|
|
99
601
|
loadIndianLanguages() {
|
|
100
602
|
const indianLanguages = ["hindi", "bengali", "tamil", "telugu"];
|
|
101
603
|
return this.loadLanguages(indianLanguages);
|
|
102
604
|
}
|
|
103
605
|
/**
|
|
104
|
-
* Load a custom dictionary
|
|
105
|
-
* @param name - Name to identify this dictionary
|
|
106
|
-
* @param words - Array of profanity words
|
|
606
|
+
* Load a custom dictionary
|
|
107
607
|
*/
|
|
108
608
|
loadCustomDictionary(name, words) {
|
|
109
|
-
|
|
110
|
-
|
|
609
|
+
validateString(name, "dictionary name");
|
|
610
|
+
const validatedWords = validateStringArray(words, "custom dictionary words");
|
|
611
|
+
if (validatedWords.length === 0) {
|
|
612
|
+
this.logger.warn(`Custom dictionary '${name}' contains no valid words`);
|
|
111
613
|
return;
|
|
112
614
|
}
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
615
|
+
try {
|
|
616
|
+
let addedCount = 0;
|
|
617
|
+
for (const word of validatedWords) {
|
|
618
|
+
if (this.addWordToTrie(word)) {
|
|
619
|
+
addedCount++;
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
// Store for future reference
|
|
623
|
+
this.availableLanguages[name.toLowerCase()] = validatedWords;
|
|
624
|
+
this.loadedLanguages.add(name.toLowerCase());
|
|
625
|
+
this.logger.info(`Loaded ${addedCount} words from custom dictionary '${name}'`);
|
|
626
|
+
}
|
|
627
|
+
catch (error) {
|
|
628
|
+
this.logger.error(`Failed to load custom dictionary ${name}: ${error}`);
|
|
629
|
+
}
|
|
119
630
|
}
|
|
120
631
|
/**
|
|
121
|
-
*
|
|
122
|
-
* @returns string[] - Array of loaded language names
|
|
632
|
+
* Add a single word to the trie structure
|
|
123
633
|
*/
|
|
124
|
-
|
|
125
|
-
|
|
634
|
+
addWordToTrie(word) {
|
|
635
|
+
if (!word || typeof word !== "string" || word.trim().length === 0) {
|
|
636
|
+
return false;
|
|
637
|
+
}
|
|
638
|
+
const normalizedWord = this.caseSensitive
|
|
639
|
+
? word.trim()
|
|
640
|
+
: word.trim().toLowerCase();
|
|
641
|
+
// Skip if whitelisted
|
|
642
|
+
if (this.isWhitelisted(normalizedWord)) {
|
|
643
|
+
return false;
|
|
644
|
+
}
|
|
645
|
+
// Add to trie
|
|
646
|
+
this.profanityTrie.addWord(normalizedWord);
|
|
647
|
+
return true;
|
|
126
648
|
}
|
|
127
649
|
/**
|
|
128
|
-
*
|
|
129
|
-
* @returns string[] - Array of available language names
|
|
650
|
+
* Remove overlapping matches, keep only the longest at each start position
|
|
130
651
|
*/
|
|
131
|
-
|
|
132
|
-
|
|
652
|
+
calculateSeverity(matches) {
|
|
653
|
+
if (matches.length === 0)
|
|
654
|
+
return ProfanitySeverity.MILD;
|
|
655
|
+
const uniqueWords = new Set(matches.map((m) => m.word)).size;
|
|
656
|
+
const totalMatches = matches.length;
|
|
657
|
+
if (totalMatches >= 5 || uniqueWords >= 4)
|
|
658
|
+
return ProfanitySeverity.EXTREME;
|
|
659
|
+
if (totalMatches >= 3 || uniqueWords >= 3)
|
|
660
|
+
return ProfanitySeverity.SEVERE;
|
|
661
|
+
if (totalMatches >= 2 || uniqueWords >= 2)
|
|
662
|
+
return ProfanitySeverity.MODERATE;
|
|
663
|
+
return ProfanitySeverity.MILD;
|
|
133
664
|
}
|
|
134
665
|
/**
|
|
135
|
-
*
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
}
|
|
142
|
-
/**
|
|
143
|
-
*
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
// More general solution for handling variations like "fucking"
|
|
150
|
-
const badWords = this.list();
|
|
151
|
-
let result = string;
|
|
152
|
-
for (const word of badWords) {
|
|
153
|
-
// Check for variations with "ing", "ed", etc.
|
|
154
|
-
const variations = [
|
|
155
|
-
`${word}ing`,
|
|
156
|
-
`${word}ed`,
|
|
157
|
-
`${word}s`,
|
|
158
|
-
`${word}er`,
|
|
159
|
-
`${word}ers`,
|
|
160
|
-
];
|
|
161
|
-
for (const variation of variations) {
|
|
162
|
-
if (result.toLowerCase().includes(variation.toLowerCase())) {
|
|
163
|
-
const prefix = word;
|
|
164
|
-
const suffix = variation.slice(word.length);
|
|
165
|
-
const replacement = (placeholder || this.defaultPlaceholder).repeat(prefix.length) +
|
|
166
|
-
suffix;
|
|
167
|
-
// Use regex to replace while preserving case (though this simplifies it)
|
|
168
|
-
const regex = new RegExp(variation, "gi");
|
|
169
|
-
result = result.replace(regex, replacement);
|
|
170
|
-
}
|
|
171
|
-
}
|
|
666
|
+
* Clear all loaded dictionaries
|
|
667
|
+
*/
|
|
668
|
+
clearList() {
|
|
669
|
+
this.profanityTrie.clear();
|
|
670
|
+
this.loadedLanguages.clear();
|
|
671
|
+
this.dynamicWords.clear();
|
|
672
|
+
}
|
|
673
|
+
/**
|
|
674
|
+
* Set placeholder character
|
|
675
|
+
*/
|
|
676
|
+
setPlaceholder(placeholder) {
|
|
677
|
+
validateString(placeholder, "placeholder");
|
|
678
|
+
if (placeholder.length === 0) {
|
|
679
|
+
throw new Error("Placeholder cannot be empty");
|
|
172
680
|
}
|
|
173
|
-
|
|
174
|
-
return this.filter.clean(result, placeholder || this.defaultPlaceholder);
|
|
175
|
-
}
|
|
176
|
-
/**
|
|
177
|
-
* Clean a string by replacing each profane word with a single placeholder
|
|
178
|
-
* @param string - The string to clean
|
|
179
|
-
* @param placeholder - The placeholder to use (defaults to '***')
|
|
180
|
-
* @returns string - The cleaned string
|
|
181
|
-
*/
|
|
182
|
-
cleanWithWord(string, placeholder = "***") {
|
|
183
|
-
// Split by spaces but preserve punctuation
|
|
184
|
-
const regex = /([^\w\s])/g;
|
|
185
|
-
let tempString = string.replace(regex, " $1 ");
|
|
186
|
-
const words = tempString.split(" ").filter((w) => w !== "");
|
|
187
|
-
const result = words.map((word) => {
|
|
188
|
-
// Check if this word contains profanity, ignoring punctuation for the check
|
|
189
|
-
const wordWithoutPunctuation = word.replace(/[^\w\s]/g, "");
|
|
190
|
-
if (wordWithoutPunctuation && this.check(wordWithoutPunctuation)) {
|
|
191
|
-
return placeholder;
|
|
192
|
-
}
|
|
193
|
-
return word;
|
|
194
|
-
});
|
|
195
|
-
// Join and fix spaces before punctuation
|
|
196
|
-
let cleaned = result.join(" ");
|
|
197
|
-
cleaned = cleaned.replace(/ ([^\w\s]) /g, "$1 "); // Fix space before punctuation
|
|
198
|
-
cleaned = cleaned.replace(/ ([^\w\s])$/g, "$1"); // Fix trailing punctuation
|
|
199
|
-
return cleaned;
|
|
681
|
+
this.defaultPlaceholder = placeholder.charAt(0);
|
|
200
682
|
}
|
|
201
683
|
/**
|
|
202
|
-
* Get
|
|
203
|
-
* @returns string[] - Array of all profanity words
|
|
684
|
+
* Get loaded languages
|
|
204
685
|
*/
|
|
205
|
-
|
|
206
|
-
return this.
|
|
686
|
+
getLoadedLanguages() {
|
|
687
|
+
return Array.from(this.loadedLanguages);
|
|
207
688
|
}
|
|
208
689
|
/**
|
|
209
|
-
*
|
|
210
|
-
* @param word - String or array of strings to add
|
|
690
|
+
* Get available languages
|
|
211
691
|
*/
|
|
212
|
-
|
|
213
|
-
this.
|
|
692
|
+
getAvailableLanguages() {
|
|
693
|
+
return Object.keys(this.availableLanguages);
|
|
214
694
|
}
|
|
215
695
|
/**
|
|
216
|
-
*
|
|
217
|
-
* @param word - String or array of strings to remove
|
|
696
|
+
* Get current configuration
|
|
218
697
|
*/
|
|
219
|
-
|
|
220
|
-
|
|
698
|
+
getConfig() {
|
|
699
|
+
return {
|
|
700
|
+
defaultPlaceholder: this.defaultPlaceholder,
|
|
701
|
+
enableLeetSpeak: this.enableLeetSpeak,
|
|
702
|
+
caseSensitive: this.caseSensitive,
|
|
703
|
+
strictMode: this.strictMode,
|
|
704
|
+
detectPartialWords: this.detectPartialWords,
|
|
705
|
+
languages: this.getLoadedLanguages(),
|
|
706
|
+
whitelistWords: Array.from(this.whitelistSet),
|
|
707
|
+
};
|
|
221
708
|
}
|
|
222
709
|
/**
|
|
223
|
-
*
|
|
710
|
+
* Rebuilds the profanity trie from loaded language dictionaries and dynamic words.
|
|
224
711
|
*/
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
712
|
+
rebuildTrie() {
|
|
713
|
+
this.profanityTrie.clear();
|
|
714
|
+
// Re-add all loaded language words
|
|
715
|
+
for (const lang of this.loadedLanguages) {
|
|
716
|
+
const words = this.availableLanguages[lang] || [];
|
|
717
|
+
for (const word of words) {
|
|
718
|
+
this.addWordToTrie(word);
|
|
719
|
+
}
|
|
720
|
+
}
|
|
721
|
+
// Re-add dynamic words
|
|
722
|
+
for (const word of this.dynamicWords) {
|
|
723
|
+
this.addWordToTrie(word);
|
|
231
724
|
}
|
|
232
|
-
// Reset loaded languages tracking
|
|
233
|
-
this.loadedLanguages.clear();
|
|
234
|
-
this.loadedLanguages.add("english"); // Default language remains
|
|
235
725
|
}
|
|
236
726
|
/**
|
|
237
|
-
*
|
|
238
|
-
* @param placeholder - Single character to use as placeholder
|
|
727
|
+
* Update configuration. Rebuild trie if needed.
|
|
239
728
|
*/
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
this.defaultPlaceholder
|
|
729
|
+
updateConfig(options) {
|
|
730
|
+
let rebuildNeeded = false;
|
|
731
|
+
if (options.defaultPlaceholder !== undefined) {
|
|
732
|
+
this.setPlaceholder(options.defaultPlaceholder);
|
|
244
733
|
}
|
|
245
|
-
|
|
246
|
-
this.
|
|
734
|
+
if (options.enableLeetSpeak !== undefined) {
|
|
735
|
+
this.enableLeetSpeak = options.enableLeetSpeak;
|
|
736
|
+
}
|
|
737
|
+
if (options.caseSensitive !== undefined &&
|
|
738
|
+
options.caseSensitive !== this.caseSensitive) {
|
|
739
|
+
this.caseSensitive = options.caseSensitive;
|
|
740
|
+
rebuildNeeded = true;
|
|
741
|
+
}
|
|
742
|
+
if (options.strictMode !== undefined) {
|
|
743
|
+
this.strictMode = options.strictMode;
|
|
744
|
+
}
|
|
745
|
+
if (options.detectPartialWords !== undefined) {
|
|
746
|
+
this.detectPartialWords = options.detectPartialWords;
|
|
747
|
+
}
|
|
748
|
+
if (options.whitelistWords) {
|
|
749
|
+
this.addToWhitelist(options.whitelistWords);
|
|
750
|
+
}
|
|
751
|
+
if (rebuildNeeded) {
|
|
752
|
+
this.rebuildTrie();
|
|
247
753
|
}
|
|
248
754
|
}
|
|
249
755
|
}
|
|
250
|
-
// Create and export a singleton instance
|
|
756
|
+
// Create and export a singleton instance
|
|
251
757
|
const allProfanity = new AllProfanity();
|
|
252
758
|
export default allProfanity;
|
|
253
759
|
//# sourceMappingURL=index.js.map
|