allprofanity 2.0.0 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +258 -245
- package/dist/index.d.ts +156 -80
- package/dist/index.js +598 -492
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
// Language dictionaries imports
|
|
1
2
|
import englishBadWords from "./languages/english-words.js";
|
|
2
3
|
import hindiBadWords from "./languages/hindi-words.js";
|
|
3
4
|
import frenchBadWords from "./languages/french-words.js";
|
|
@@ -16,7 +17,21 @@ export { default as bengaliBadWords } from "./languages/bengali-words.js";
|
|
|
16
17
|
export { default as tamilBadWords } from "./languages/tamil-words.js";
|
|
17
18
|
export { default as teluguBadWords } from "./languages/telugu-words.js";
|
|
18
19
|
/**
|
|
19
|
-
*
|
|
20
|
+
* Default console logger implementation.
|
|
21
|
+
*/
|
|
22
|
+
class ConsoleLogger {
|
|
23
|
+
info(message) {
|
|
24
|
+
console.log(`[AllProfanity] ${message}`);
|
|
25
|
+
}
|
|
26
|
+
warn(message) {
|
|
27
|
+
console.warn(`[AllProfanity] ${message}`);
|
|
28
|
+
}
|
|
29
|
+
error(message) {
|
|
30
|
+
console.error(`[AllProfanity] ${message}`);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Severity levels for profanity detection.
|
|
20
35
|
*/
|
|
21
36
|
export var ProfanitySeverity;
|
|
22
37
|
(function (ProfanitySeverity) {
|
|
@@ -26,77 +41,155 @@ export var ProfanitySeverity;
|
|
|
26
41
|
ProfanitySeverity[ProfanitySeverity["EXTREME"] = 4] = "EXTREME";
|
|
27
42
|
})(ProfanitySeverity = ProfanitySeverity || (ProfanitySeverity = {}));
|
|
28
43
|
/**
|
|
29
|
-
*
|
|
30
|
-
*
|
|
44
|
+
* Validate a string parameter.
|
|
45
|
+
* @param input - The input to validate.
|
|
46
|
+
* @param paramName - The name of the parameter.
|
|
47
|
+
* @returns The validated string.
|
|
48
|
+
* @throws {TypeError} If input is not a string.
|
|
49
|
+
*/
|
|
50
|
+
function validateString(input, paramName) {
|
|
51
|
+
if (typeof input !== "string") {
|
|
52
|
+
throw new TypeError(`${paramName} must be a string, got ${typeof input}`);
|
|
53
|
+
}
|
|
54
|
+
return input;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Validate a string array parameter.
|
|
58
|
+
* @param input - The input to validate.
|
|
59
|
+
* @param paramName - The name of the parameter.
|
|
60
|
+
* @returns The validated string array.
|
|
61
|
+
* @throws {TypeError} If input is not an array.
|
|
62
|
+
*/
|
|
63
|
+
function validateStringArray(input, paramName) {
|
|
64
|
+
if (!Array.isArray(input)) {
|
|
65
|
+
throw new TypeError(`${paramName} must be an array`);
|
|
66
|
+
}
|
|
67
|
+
return input.filter((item) => {
|
|
68
|
+
if (typeof item !== "string") {
|
|
69
|
+
console.warn(`Skipping non-string item in ${paramName}: ${item}`);
|
|
70
|
+
return false;
|
|
71
|
+
}
|
|
72
|
+
return item.trim().length > 0;
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Trie node for efficient string matching.
|
|
77
|
+
*/
|
|
78
|
+
class TrieNode {
|
|
79
|
+
constructor() {
|
|
80
|
+
this.children = new Map();
|
|
81
|
+
this.isEndOfWord = false;
|
|
82
|
+
this.word = "";
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Add a word to the trie.
|
|
86
|
+
* @param word - The word to add.
|
|
87
|
+
*/
|
|
88
|
+
addWord(word) {
|
|
89
|
+
let current = this;
|
|
90
|
+
for (const char of word) {
|
|
91
|
+
if (!current.children.has(char)) {
|
|
92
|
+
current.children.set(char, new TrieNode());
|
|
93
|
+
}
|
|
94
|
+
const nextNode = current.children.get(char);
|
|
95
|
+
if (nextNode) {
|
|
96
|
+
current = nextNode;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
current.isEndOfWord = true;
|
|
100
|
+
current.word = word;
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Remove a word from the trie.
|
|
104
|
+
* @param word - The word to remove.
|
|
105
|
+
* @returns True if the word was removed, false otherwise.
|
|
106
|
+
*/
|
|
107
|
+
removeWord(word) {
|
|
108
|
+
return this.removeHelper(word, 0);
|
|
109
|
+
}
|
|
110
|
+
removeHelper(word, index) {
|
|
111
|
+
if (index === word.length) {
|
|
112
|
+
if (!this.isEndOfWord)
|
|
113
|
+
return false;
|
|
114
|
+
this.isEndOfWord = false;
|
|
115
|
+
return this.children.size === 0;
|
|
116
|
+
}
|
|
117
|
+
const char = word[index];
|
|
118
|
+
const node = this.children.get(char);
|
|
119
|
+
if (!node)
|
|
120
|
+
return false;
|
|
121
|
+
const shouldDeleteChild = node.removeHelper(word, index + 1);
|
|
122
|
+
if (shouldDeleteChild) {
|
|
123
|
+
this.children.delete(char);
|
|
124
|
+
return this.children.size === 0 && !this.isEndOfWord;
|
|
125
|
+
}
|
|
126
|
+
return false;
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* Find all matches starting at a given position.
|
|
130
|
+
* @param text - The text to search.
|
|
131
|
+
* @param startPos - The start position.
|
|
132
|
+
* @param allowPartial - Whether to allow partial word matches.
|
|
133
|
+
* @returns Array of matches.
|
|
134
|
+
*/
|
|
135
|
+
findMatches(text, startPos, allowPartial) {
|
|
136
|
+
const matches = [];
|
|
137
|
+
let current = this;
|
|
138
|
+
let pos = startPos;
|
|
139
|
+
while (pos < text.length) {
|
|
140
|
+
const nextNode = current.children.get(text[pos]);
|
|
141
|
+
if (!nextNode)
|
|
142
|
+
break;
|
|
143
|
+
current = nextNode;
|
|
144
|
+
pos++;
|
|
145
|
+
if (current.isEndOfWord) {
|
|
146
|
+
if (!allowPartial) {
|
|
147
|
+
const wordStart = startPos;
|
|
148
|
+
const wordEnd = pos;
|
|
149
|
+
matches.push({
|
|
150
|
+
word: current.word,
|
|
151
|
+
start: wordStart - startPos,
|
|
152
|
+
end: wordEnd - startPos,
|
|
153
|
+
});
|
|
154
|
+
}
|
|
155
|
+
else {
|
|
156
|
+
matches.push({
|
|
157
|
+
word: current.word,
|
|
158
|
+
start: 0,
|
|
159
|
+
end: pos - startPos,
|
|
160
|
+
});
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
return matches;
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Clear all words from the trie.
|
|
168
|
+
*/
|
|
169
|
+
clear() {
|
|
170
|
+
this.children.clear();
|
|
171
|
+
this.isEndOfWord = false;
|
|
172
|
+
this.word = "";
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
/**
|
|
176
|
+
* Main class for profanity detection and filtering.
|
|
31
177
|
*/
|
|
32
178
|
export class AllProfanity {
|
|
33
179
|
/**
|
|
34
|
-
* Create
|
|
35
|
-
* @param options -
|
|
180
|
+
* Create an AllProfanity instance.
|
|
181
|
+
* @param options - Profanity filter configuration options.
|
|
36
182
|
*/
|
|
37
183
|
constructor(options) {
|
|
38
|
-
var _a, _b, _c, _d;
|
|
39
|
-
this.
|
|
40
|
-
this.normalizedProfanityMap = new Map();
|
|
41
|
-
this.defaultPlaceholder = "*";
|
|
42
|
-
this.loadedLanguages = new Set();
|
|
184
|
+
var _a, _b, _c, _d, _e;
|
|
185
|
+
this.profanityTrie = new TrieNode();
|
|
43
186
|
this.whitelistSet = new Set();
|
|
187
|
+
this.loadedLanguages = new Set();
|
|
188
|
+
this.defaultPlaceholder = "*";
|
|
44
189
|
this.enableLeetSpeak = true;
|
|
45
190
|
this.caseSensitive = false;
|
|
46
191
|
this.strictMode = false;
|
|
47
|
-
this.detectPartialWords =
|
|
48
|
-
// Comprehensive leet speak mapping
|
|
49
|
-
this.leetMap = {
|
|
50
|
-
a: ["4", "@", "^", "aye", "λ", "ª"],
|
|
51
|
-
b: ["8", "6", "|3", "ß", "β", "13"],
|
|
52
|
-
c: ["(", "<", "©", "¢", "see", "sea"],
|
|
53
|
-
d: ["|)", "|]", "0", "ð"],
|
|
54
|
-
e: ["3", "€", "£", "ë", "é", "è"],
|
|
55
|
-
f: ["|=", "ph", "|#", "ƒ"],
|
|
56
|
-
g: ["9", "6", "&", "gee"],
|
|
57
|
-
h: ["#", "|-|", "[-]", "}{", "ħ"],
|
|
58
|
-
i: ["1", "!", "|", "eye", "ï", "í", "ì"],
|
|
59
|
-
j: ["_|", "_/", "¿", "ĵ"],
|
|
60
|
-
k: ["|<", "1<", "l<", "|{", "ķ"],
|
|
61
|
-
l: ["1", "|", "7", "£", "ł", "ĺ"],
|
|
62
|
-
m: ["|/|", "//\\", "em", "ɱ"],
|
|
63
|
-
n: ["||", "//", "and", "ñ", "ń"],
|
|
64
|
-
o: ["0", "()", "oh", "ø", "ó", "ò", "ô"],
|
|
65
|
-
p: ["|*", "|o", "|^", "|>", "9", "þ"],
|
|
66
|
-
q: ["(_,)", "()_", "kw", "ĸ"],
|
|
67
|
-
r: ["|2", "12", ".-", "are", "ř", "ŕ"],
|
|
68
|
-
s: ["5", "$", "z", "ş", "ś", "š"],
|
|
69
|
-
t: ["7", "+", "-|-", "†", "ť", "ţ"],
|
|
70
|
-
u: ["(_)", "|_|", "v", "you", "ü", "ú", "ù"],
|
|
71
|
-
v: ["\\/", "|/", "|", "vee"],
|
|
72
|
-
w: ["\\/\\/", "vv", "dubya", "ŵ"],
|
|
73
|
-
x: ["><", "}{", "ecks", "χ"],
|
|
74
|
-
y: ["`/", "j", "why", "ÿ", "ý"],
|
|
75
|
-
z: ["2", "7_", "-/_", "zee", "ž", "ź", "ż"],
|
|
76
|
-
};
|
|
77
|
-
// Word boundary patterns
|
|
78
|
-
this.wordBoundaryChars = /[\s\.,;:!?\-_+=\[\]{}()"'\/\\]/;
|
|
79
|
-
// Common word variations and suffixes
|
|
80
|
-
this.commonSuffixes = [
|
|
81
|
-
"ing",
|
|
82
|
-
"ed",
|
|
83
|
-
"s",
|
|
84
|
-
"er",
|
|
85
|
-
"ers",
|
|
86
|
-
"est",
|
|
87
|
-
"ly",
|
|
88
|
-
"tion",
|
|
89
|
-
"ness",
|
|
90
|
-
];
|
|
91
|
-
this.commonPrefixes = [
|
|
92
|
-
"un",
|
|
93
|
-
"re",
|
|
94
|
-
"pre",
|
|
95
|
-
"dis",
|
|
96
|
-
"over",
|
|
97
|
-
"under",
|
|
98
|
-
"out",
|
|
99
|
-
];
|
|
192
|
+
this.detectPartialWords = false;
|
|
100
193
|
this.availableLanguages = {
|
|
101
194
|
english: englishBadWords || [],
|
|
102
195
|
hindi: hindiBadWords || [],
|
|
@@ -107,442 +200,301 @@ export class AllProfanity {
|
|
|
107
200
|
tamil: tamilBadWords || [],
|
|
108
201
|
telugu: teluguBadWords || [],
|
|
109
202
|
};
|
|
110
|
-
|
|
111
|
-
|
|
203
|
+
this.leetMappings = new Map([
|
|
204
|
+
["@", "a"],
|
|
205
|
+
["^", "a"],
|
|
206
|
+
["4", "a"],
|
|
207
|
+
["8", "b"],
|
|
208
|
+
["6", "b"],
|
|
209
|
+
["|3", "b"],
|
|
210
|
+
["(", "c"],
|
|
211
|
+
["<", "c"],
|
|
212
|
+
["©", "c"],
|
|
213
|
+
["|)", "d"],
|
|
214
|
+
["0", "o"],
|
|
215
|
+
["3", "e"],
|
|
216
|
+
["€", "e"],
|
|
217
|
+
["|=", "f"],
|
|
218
|
+
["ph", "f"],
|
|
219
|
+
["9", "g"],
|
|
220
|
+
["#", "h"],
|
|
221
|
+
["|-|", "h"],
|
|
222
|
+
["1", "i"],
|
|
223
|
+
["!", "i"],
|
|
224
|
+
["|", "i"],
|
|
225
|
+
["_|", "j"],
|
|
226
|
+
["¿", "j"],
|
|
227
|
+
["|<", "k"],
|
|
228
|
+
["1<", "k"],
|
|
229
|
+
["7", "l"],
|
|
230
|
+
["|\\/|", "m"],
|
|
231
|
+
["/\\/\\", "m"],
|
|
232
|
+
["|\\|", "n"],
|
|
233
|
+
["//", "n"],
|
|
234
|
+
["()", "o"],
|
|
235
|
+
["|*", "p"],
|
|
236
|
+
["|o", "p"],
|
|
237
|
+
["(_,)", "q"],
|
|
238
|
+
["()_", "q"],
|
|
239
|
+
["|2", "r"],
|
|
240
|
+
["12", "r"],
|
|
241
|
+
["5", "s"],
|
|
242
|
+
["$", "s"],
|
|
243
|
+
["z", "s"],
|
|
244
|
+
["7", "t"],
|
|
245
|
+
["+", "t"],
|
|
246
|
+
["†", "t"],
|
|
247
|
+
["|_|", "u"],
|
|
248
|
+
["(_)", "u"],
|
|
249
|
+
["v", "u"],
|
|
250
|
+
["\\/", "v"],
|
|
251
|
+
["|/", "v"],
|
|
252
|
+
["\\/\\/", "w"],
|
|
253
|
+
["vv", "w"],
|
|
254
|
+
["><", "x"],
|
|
255
|
+
["}{", "x"],
|
|
256
|
+
["`/", "y"],
|
|
257
|
+
["j", "y"],
|
|
258
|
+
["2", "z"],
|
|
259
|
+
["7_", "z"],
|
|
260
|
+
]);
|
|
261
|
+
this.dynamicWords = new Set();
|
|
262
|
+
this.logger = (options === null || options === void 0 ? void 0 : options.logger) || new ConsoleLogger();
|
|
263
|
+
if ((options === null || options === void 0 ? void 0 : options.defaultPlaceholder) !== undefined) {
|
|
112
264
|
this.setPlaceholder(options.defaultPlaceholder);
|
|
113
265
|
}
|
|
114
266
|
this.enableLeetSpeak = (_a = options === null || options === void 0 ? void 0 : options.enableLeetSpeak) !== null && _a !== void 0 ? _a : true;
|
|
115
267
|
this.caseSensitive = (_b = options === null || options === void 0 ? void 0 : options.caseSensitive) !== null && _b !== void 0 ? _b : false;
|
|
116
268
|
this.strictMode = (_c = options === null || options === void 0 ? void 0 : options.strictMode) !== null && _c !== void 0 ? _c : false;
|
|
117
|
-
this.detectPartialWords = (_d = options === null || options === void 0 ? void 0 : options.detectPartialWords) !== null && _d !== void 0 ? _d :
|
|
118
|
-
// Load whitelist if provided
|
|
269
|
+
this.detectPartialWords = (_d = options === null || options === void 0 ? void 0 : options.detectPartialWords) !== null && _d !== void 0 ? _d : false;
|
|
119
270
|
if (options === null || options === void 0 ? void 0 : options.whitelistWords) {
|
|
120
271
|
this.addToWhitelist(options.whitelistWords);
|
|
121
272
|
}
|
|
122
|
-
// Load the default English dictionary
|
|
123
273
|
this.loadLanguage("english");
|
|
124
|
-
// Load Hindi by default for backward compatibility
|
|
125
274
|
this.loadLanguage("hindi");
|
|
126
|
-
|
|
127
|
-
if (options === null || options === void 0 ? void 0 : options.languages) {
|
|
275
|
+
if ((_e = options === null || options === void 0 ? void 0 : options.languages) === null || _e === void 0 ? void 0 : _e.length) {
|
|
128
276
|
options.languages.forEach((lang) => this.loadLanguage(lang));
|
|
129
277
|
}
|
|
130
|
-
// Load any custom dictionaries
|
|
131
278
|
if (options === null || options === void 0 ? void 0 : options.customDictionaries) {
|
|
132
|
-
Object.entries(options.customDictionaries).forEach(([
|
|
133
|
-
this.loadCustomDictionary(
|
|
279
|
+
Object.entries(options.customDictionaries).forEach(([name, words]) => {
|
|
280
|
+
this.loadCustomDictionary(name, words);
|
|
134
281
|
});
|
|
135
282
|
}
|
|
136
283
|
}
|
|
137
284
|
/**
|
|
138
|
-
* Normalize
|
|
139
|
-
* @param text -
|
|
140
|
-
* @returns Normalized text
|
|
285
|
+
* Normalize leet speak to regular characters.
|
|
286
|
+
* @param text - The input text.
|
|
287
|
+
* @returns Normalized text.
|
|
141
288
|
*/
|
|
142
289
|
normalizeLeetSpeak(text) {
|
|
143
290
|
if (!this.enableLeetSpeak)
|
|
144
291
|
return text;
|
|
145
292
|
let normalized = text.toLowerCase();
|
|
146
|
-
|
|
147
|
-
const
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
{ pattern: /\[-\]/g, replacement: "h" },
|
|
151
|
-
{ pattern: /\}{\s*/g, replacement: "h" },
|
|
152
|
-
{ pattern: /\|\/\|/g, replacement: "m" },
|
|
153
|
-
{ pattern: /\/\/\\/g, replacement: "m" },
|
|
154
|
-
{ pattern: /\|\|/g, replacement: "n" },
|
|
155
|
-
{ pattern: /\/\//g, replacement: "n" },
|
|
156
|
-
{ pattern: /\|2/g, replacement: "r" },
|
|
157
|
-
{ pattern: /12/g, replacement: "r" },
|
|
158
|
-
{ pattern: /\\\/\\\//g, replacement: "w" },
|
|
159
|
-
{ pattern: /vv/g, replacement: "w" },
|
|
160
|
-
{ pattern: /><\s*/g, replacement: "x" },
|
|
161
|
-
{ pattern: /\(_\)/g, replacement: "u" },
|
|
162
|
-
{ pattern: /\|_\|/g, replacement: "u" },
|
|
163
|
-
{ pattern: /\\\//g, replacement: "v" },
|
|
164
|
-
{ pattern: /\|\//g, replacement: "v" },
|
|
165
|
-
// Single character mappings
|
|
166
|
-
{ pattern: /@/g, replacement: "a" },
|
|
167
|
-
{ pattern: /4/g, replacement: "u" },
|
|
168
|
-
{ pattern: /\^/g, replacement: "a" },
|
|
169
|
-
{ pattern: /8/g, replacement: "b" },
|
|
170
|
-
{ pattern: /6/g, replacement: "b" },
|
|
171
|
-
{ pattern: /\(/g, replacement: "c" },
|
|
172
|
-
{ pattern: /</g, replacement: "c" },
|
|
173
|
-
{ pattern: /©/g, replacement: "c" },
|
|
174
|
-
{ pattern: /¢/g, replacement: "c" },
|
|
175
|
-
{ pattern: /0/g, replacement: "o" },
|
|
176
|
-
{ pattern: /3/g, replacement: "e" },
|
|
177
|
-
{ pattern: /€/g, replacement: "e" },
|
|
178
|
-
{ pattern: /£/g, replacement: "e" },
|
|
179
|
-
{ pattern: /9/g, replacement: "g" },
|
|
180
|
-
{ pattern: /&/g, replacement: "g" },
|
|
181
|
-
{ pattern: /#/g, replacement: "h" },
|
|
182
|
-
{ pattern: /1/g, replacement: "i" },
|
|
183
|
-
{ pattern: /!/g, replacement: "i" },
|
|
184
|
-
{ pattern: /\|/g, replacement: "i" },
|
|
185
|
-
{ pattern: /7/g, replacement: "t" },
|
|
186
|
-
{ pattern: /5/g, replacement: "s" },
|
|
187
|
-
{ pattern: /\$/g, replacement: "s" },
|
|
188
|
-
{ pattern: /\+/g, replacement: "t" },
|
|
189
|
-
{ pattern: /2/g, replacement: "z" },
|
|
190
|
-
];
|
|
191
|
-
// Apply all mappings
|
|
192
|
-
for (const mapping of leetMappings) {
|
|
193
|
-
normalized = normalized.replace(mapping.pattern, mapping.replacement);
|
|
293
|
+
const sortedMappings = Array.from(this.leetMappings.entries()).sort(([leetA], [leetB]) => leetB.length - leetA.length);
|
|
294
|
+
for (const [leet, normal] of sortedMappings) {
|
|
295
|
+
const regex = new RegExp(this.escapeRegex(leet), "g");
|
|
296
|
+
normalized = normalized.replace(regex, normal);
|
|
194
297
|
}
|
|
195
298
|
return normalized;
|
|
196
299
|
}
|
|
197
|
-
escapeRegex(str) {
|
|
198
|
-
if (!str || typeof str !== "string") {
|
|
199
|
-
return "";
|
|
200
|
-
}
|
|
201
|
-
return str.replace(/[\\^$.*+?()[\]{}|\-]/g, function (match) {
|
|
202
|
-
return "\\" + match;
|
|
203
|
-
});
|
|
204
|
-
}
|
|
205
300
|
/**
|
|
206
|
-
*
|
|
301
|
+
* Escape regex special characters in a string.
|
|
302
|
+
* @param str - The string to escape.
|
|
303
|
+
* @returns The escaped string.
|
|
207
304
|
*/
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
// Add suffix variations
|
|
211
|
-
for (const suffix of this.commonSuffixes) {
|
|
212
|
-
variations.add(word + suffix);
|
|
213
|
-
// Handle words ending in 'e'
|
|
214
|
-
if (word.endsWith("e") && !suffix.startsWith("e")) {
|
|
215
|
-
variations.add(word.slice(0, -1) + suffix);
|
|
216
|
-
}
|
|
217
|
-
// Handle consonant doubling
|
|
218
|
-
if (word.length > 2 && /[bcdfghjklmnpqrstvwxyz]/.test(word.slice(-1))) {
|
|
219
|
-
variations.add(word + word.slice(-1) + suffix);
|
|
220
|
-
}
|
|
221
|
-
}
|
|
222
|
-
// Add prefix variations
|
|
223
|
-
for (const prefix of this.commonPrefixes) {
|
|
224
|
-
variations.add(prefix + word);
|
|
225
|
-
}
|
|
226
|
-
return Array.from(variations);
|
|
305
|
+
escapeRegex(str) {
|
|
306
|
+
return str.replace(/[\\^$.*+?()[\]{}|]/g, "\\$&");
|
|
227
307
|
}
|
|
228
308
|
/**
|
|
229
|
-
* Check if
|
|
309
|
+
* Check if a match is bounded by word boundaries (strict mode).
|
|
310
|
+
* @param text - The text.
|
|
311
|
+
* @param start - Start index.
|
|
312
|
+
* @param end - End index.
|
|
313
|
+
* @returns True if match is at word boundaries, false otherwise.
|
|
230
314
|
*/
|
|
231
315
|
hasWordBoundaries(text, start, end) {
|
|
232
316
|
if (!this.strictMode)
|
|
233
317
|
return true;
|
|
234
318
|
const beforeChar = start > 0 ? text[start - 1] : " ";
|
|
235
319
|
const afterChar = end < text.length ? text[end] : " ";
|
|
236
|
-
|
|
237
|
-
|
|
320
|
+
const wordBoundaryRegex = /[\s\p{P}\p{S}]/u;
|
|
321
|
+
return (wordBoundaryRegex.test(beforeChar) && wordBoundaryRegex.test(afterChar));
|
|
238
322
|
}
|
|
239
323
|
/**
|
|
240
|
-
*
|
|
324
|
+
* Determine if a match is a whole word.
|
|
325
|
+
* @param text - The text.
|
|
326
|
+
* @param start - Start index.
|
|
327
|
+
* @param end - End index.
|
|
328
|
+
* @returns True if whole word, false otherwise.
|
|
241
329
|
*/
|
|
242
|
-
|
|
243
|
-
if (
|
|
244
|
-
return
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
const uniqueWords = new Set(detectedWords).size;
|
|
249
|
-
if (totalWords >= 5 || uniqueWords >= 3)
|
|
250
|
-
return ProfanitySeverity.EXTREME;
|
|
251
|
-
if (totalWords >= 3 || uniqueWords >= 2)
|
|
252
|
-
return ProfanitySeverity.SEVERE;
|
|
253
|
-
if (totalWords >= 2)
|
|
254
|
-
return ProfanitySeverity.MODERATE;
|
|
255
|
-
return ProfanitySeverity.MILD;
|
|
330
|
+
isWholeWord(text, start, end) {
|
|
331
|
+
if (start !== 0 && /\w/.test(text[start - 1]))
|
|
332
|
+
return false;
|
|
333
|
+
if (end !== text.length && /\w/.test(text[end]))
|
|
334
|
+
return false;
|
|
335
|
+
return true;
|
|
256
336
|
}
|
|
257
337
|
/**
|
|
258
|
-
*
|
|
259
|
-
* @param
|
|
260
|
-
* @
|
|
338
|
+
* Check if a match is whitelisted.
|
|
339
|
+
* @param word - Word from dictionary.
|
|
340
|
+
* @param matchedText - Actual matched text.
|
|
341
|
+
* @returns True if whitelisted, false otherwise.
|
|
261
342
|
*/
|
|
262
|
-
|
|
263
|
-
if (this.
|
|
264
|
-
return
|
|
265
|
-
}
|
|
266
|
-
const langKey = language.toLowerCase();
|
|
267
|
-
if (this.availableLanguages[langKey] &&
|
|
268
|
-
this.availableLanguages[langKey].length > 0) {
|
|
269
|
-
const words = this.availableLanguages[langKey];
|
|
270
|
-
// Add words and their variations to the profanity set
|
|
271
|
-
for (const word of words) {
|
|
272
|
-
if (!word || typeof word !== "string")
|
|
273
|
-
continue;
|
|
274
|
-
const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
|
|
275
|
-
this.profanitySet.add(normalizedWord);
|
|
276
|
-
// Store normalized leet version mapping
|
|
277
|
-
const leetNormalized = this.normalizeLeetSpeak(normalizedWord);
|
|
278
|
-
if (leetNormalized !== normalizedWord) {
|
|
279
|
-
this.normalizedProfanityMap.set(leetNormalized, normalizedWord);
|
|
280
|
-
}
|
|
281
|
-
// Generate and add variations
|
|
282
|
-
const variations = this.generateWordVariations(normalizedWord);
|
|
283
|
-
for (const variation of variations) {
|
|
284
|
-
this.profanitySet.add(variation);
|
|
285
|
-
const leetVariation = this.normalizeLeetSpeak(variation);
|
|
286
|
-
if (leetVariation !== variation) {
|
|
287
|
-
this.normalizedProfanityMap.set(leetVariation, variation);
|
|
288
|
-
}
|
|
289
|
-
}
|
|
290
|
-
}
|
|
291
|
-
this.loadedLanguages.add(langKey);
|
|
292
|
-
console.log(`AllProfanity: Added ${words.length} ${language} words to the profanity list.`);
|
|
293
|
-
return true;
|
|
343
|
+
isWhitelistedMatch(word, matchedText) {
|
|
344
|
+
if (this.caseSensitive) {
|
|
345
|
+
return this.whitelistSet.has(word) || this.whitelistSet.has(matchedText);
|
|
294
346
|
}
|
|
295
347
|
else {
|
|
296
|
-
|
|
297
|
-
|
|
348
|
+
return (this.whitelistSet.has(word.toLowerCase()) ||
|
|
349
|
+
this.whitelistSet.has(matchedText.toLowerCase()));
|
|
298
350
|
}
|
|
299
351
|
}
|
|
300
352
|
/**
|
|
301
|
-
*
|
|
302
|
-
* @param
|
|
303
|
-
* @returns
|
|
353
|
+
* Remove overlapping matches, keeping only the longest at each start position.
|
|
354
|
+
* @param matches - Array of match results.
|
|
355
|
+
* @returns Deduplicated matches.
|
|
304
356
|
*/
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
}
|
|
357
|
+
deduplicateMatches(matches) {
|
|
358
|
+
const sorted = [...matches].sort((a, b) => {
|
|
359
|
+
if (a.start !== b.start)
|
|
360
|
+
return a.start - b.start;
|
|
361
|
+
return b.end - a.end;
|
|
311
362
|
});
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
loadIndianLanguages() {
|
|
319
|
-
const indianLanguages = ["hindi", "bengali", "tamil", "telugu"];
|
|
320
|
-
return this.loadLanguages(indianLanguages);
|
|
321
|
-
}
|
|
322
|
-
/**
|
|
323
|
-
* Load a custom dictionary with a given name
|
|
324
|
-
* @param name - Name to identify this dictionary
|
|
325
|
-
* @param words - Array of profanity words
|
|
326
|
-
*/
|
|
327
|
-
loadCustomDictionary(name, words) {
|
|
328
|
-
if (!words || words.length === 0) {
|
|
329
|
-
console.warn(`AllProfanity: Custom dictionary '${name}' has no words.`);
|
|
330
|
-
return;
|
|
331
|
-
}
|
|
332
|
-
// Add to available languages for future reference
|
|
333
|
-
this.availableLanguages[name.toLowerCase()] = words;
|
|
334
|
-
// Process and add words
|
|
335
|
-
for (const word of words) {
|
|
336
|
-
if (!word || typeof word !== "string")
|
|
337
|
-
continue;
|
|
338
|
-
const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
|
|
339
|
-
this.profanitySet.add(normalizedWord);
|
|
340
|
-
// Store normalized leet version mapping
|
|
341
|
-
const leetNormalized = this.normalizeLeetSpeak(normalizedWord);
|
|
342
|
-
if (leetNormalized !== normalizedWord) {
|
|
343
|
-
this.normalizedProfanityMap.set(leetNormalized, normalizedWord);
|
|
344
|
-
}
|
|
345
|
-
// Generate and add variations
|
|
346
|
-
const variations = this.generateWordVariations(normalizedWord);
|
|
347
|
-
for (const variation of variations) {
|
|
348
|
-
this.profanitySet.add(variation);
|
|
349
|
-
const leetVariation = this.normalizeLeetSpeak(variation);
|
|
350
|
-
if (leetVariation !== variation) {
|
|
351
|
-
this.normalizedProfanityMap.set(leetVariation, variation);
|
|
352
|
-
}
|
|
353
|
-
}
|
|
354
|
-
}
|
|
355
|
-
this.loadedLanguages.add(name.toLowerCase());
|
|
356
|
-
console.log(`AllProfanity: Added ${words.length} words from custom '${name}' dictionary.`);
|
|
357
|
-
}
|
|
358
|
-
/**
|
|
359
|
-
* Add words to whitelist (words that should never be flagged as profanity)
|
|
360
|
-
* @param words - Array of words to whitelist
|
|
361
|
-
*/
|
|
362
|
-
addToWhitelist(words) {
|
|
363
|
-
for (const word of words) {
|
|
364
|
-
if (word && typeof word === "string") {
|
|
365
|
-
this.whitelistSet.add(this.caseSensitive ? word : word.toLowerCase());
|
|
366
|
-
}
|
|
367
|
-
}
|
|
368
|
-
}
|
|
369
|
-
/**
|
|
370
|
-
* Remove words from whitelist
|
|
371
|
-
* @param words - Array of words to remove from whitelist
|
|
372
|
-
*/
|
|
373
|
-
removeFromWhitelist(words) {
|
|
374
|
-
for (const word of words) {
|
|
375
|
-
if (word && typeof word === "string") {
|
|
376
|
-
this.whitelistSet.delete(this.caseSensitive ? word : word.toLowerCase());
|
|
363
|
+
const result = [];
|
|
364
|
+
let lastEnd = -1;
|
|
365
|
+
for (const match of sorted) {
|
|
366
|
+
if (match.start >= lastEnd) {
|
|
367
|
+
result.push(match);
|
|
368
|
+
lastEnd = match.end;
|
|
377
369
|
}
|
|
378
370
|
}
|
|
371
|
+
return result;
|
|
379
372
|
}
|
|
380
373
|
/**
|
|
381
|
-
*
|
|
382
|
-
* @param text - The text to
|
|
383
|
-
* @returns
|
|
374
|
+
* Detect profanity in a given text.
|
|
375
|
+
* @param text - The text to check.
|
|
376
|
+
* @returns Profanity detection result.
|
|
384
377
|
*/
|
|
385
378
|
detect(text) {
|
|
386
|
-
|
|
379
|
+
const validatedText = validateString(text, "text");
|
|
380
|
+
if (validatedText.length === 0) {
|
|
387
381
|
return {
|
|
388
382
|
hasProfanity: false,
|
|
389
383
|
detectedWords: [],
|
|
390
|
-
cleanedText:
|
|
384
|
+
cleanedText: validatedText,
|
|
391
385
|
severity: ProfanitySeverity.MILD,
|
|
392
386
|
positions: [],
|
|
393
387
|
};
|
|
394
388
|
}
|
|
395
|
-
const
|
|
396
|
-
const
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
// Create regex for word boundary detection
|
|
405
|
-
const escapedWord = this.escapeRegex(profanity);
|
|
406
|
-
const wordRegex = new RegExp(`\\b${escapedWord}\\b`, this.caseSensitive ? "g" : "gi");
|
|
407
|
-
let match;
|
|
408
|
-
while ((match = wordRegex.exec(normalizedText)) !== null) {
|
|
409
|
-
if (this.hasWordBoundaries(normalizedText, match.index, match.index + match[0].length)) {
|
|
410
|
-
detectedWords.push(match[0]);
|
|
411
|
-
positions.push({
|
|
412
|
-
word: match[0],
|
|
413
|
-
start: match.index,
|
|
414
|
-
end: match.index + match[0].length,
|
|
415
|
-
});
|
|
416
|
-
}
|
|
417
|
-
}
|
|
418
|
-
}
|
|
419
|
-
catch (error) {
|
|
420
|
-
// Fallback to simple string search if regex fails
|
|
421
|
-
const index = normalizedText.indexOf(profanity);
|
|
422
|
-
if (index !== -1) {
|
|
423
|
-
detectedWords.push(profanity);
|
|
424
|
-
positions.push({
|
|
425
|
-
word: profanity,
|
|
426
|
-
start: index,
|
|
427
|
-
end: index + profanity.length,
|
|
428
|
-
});
|
|
429
|
-
}
|
|
389
|
+
const matches = [];
|
|
390
|
+
const normalizedText = this.caseSensitive
|
|
391
|
+
? validatedText
|
|
392
|
+
: validatedText.toLowerCase();
|
|
393
|
+
this.findMatches(normalizedText, validatedText, matches);
|
|
394
|
+
if (this.enableLeetSpeak) {
|
|
395
|
+
const leetNormalized = this.normalizeLeetSpeak(normalizedText);
|
|
396
|
+
if (leetNormalized !== normalizedText) {
|
|
397
|
+
this.findMatches(leetNormalized, validatedText, matches);
|
|
430
398
|
}
|
|
431
399
|
}
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
400
|
+
const uniqueMatches = this.deduplicateMatches(matches);
|
|
401
|
+
const detectedWords = uniqueMatches.map((m) => m.originalWord);
|
|
402
|
+
const severity = this.calculateSeverity(uniqueMatches);
|
|
403
|
+
const cleanedText = this.generateCleanedText(validatedText, uniqueMatches);
|
|
404
|
+
return {
|
|
405
|
+
hasProfanity: uniqueMatches.length > 0,
|
|
406
|
+
detectedWords,
|
|
407
|
+
cleanedText,
|
|
408
|
+
severity,
|
|
409
|
+
positions: uniqueMatches.map((m) => ({
|
|
410
|
+
word: m.originalWord,
|
|
411
|
+
start: m.start,
|
|
412
|
+
end: m.end,
|
|
413
|
+
})),
|
|
414
|
+
};
|
|
415
|
+
}
|
|
416
|
+
/**
|
|
417
|
+
* Main matching function, with whole-word logic.
|
|
418
|
+
* @param searchText - The normalized text to search.
|
|
419
|
+
* @param originalText - The original text.
|
|
420
|
+
* @param matches - Array to collect matches.
|
|
421
|
+
*/
|
|
422
|
+
findMatches(searchText, originalText, matches) {
|
|
423
|
+
for (let i = 0; i < searchText.length; i++) {
|
|
424
|
+
const matchResults = this.profanityTrie.findMatches(searchText, i, this.detectPartialWords);
|
|
425
|
+
for (const match of matchResults) {
|
|
426
|
+
const start = i + match.start;
|
|
427
|
+
const end = i + match.end;
|
|
428
|
+
if (!this.detectPartialWords &&
|
|
429
|
+
!this.isWholeWord(originalText, start, end)) {
|
|
436
430
|
continue;
|
|
437
|
-
try {
|
|
438
|
-
const escapedWord = this.escapeRegex(profanity);
|
|
439
|
-
const wordRegex = new RegExp(`\\b${escapedWord}\\b`, this.caseSensitive ? "g" : "gi");
|
|
440
|
-
let match;
|
|
441
|
-
while ((match = wordRegex.exec(leetNormalizedText)) !== null) {
|
|
442
|
-
if (this.hasWordBoundaries(leetNormalizedText, match.index, match.index + match[0].length)) {
|
|
443
|
-
// Find the original text that corresponds to this match
|
|
444
|
-
const originalMatch = normalizedText.substring(match.index, match.index + match[0].length);
|
|
445
|
-
if (!detectedWords.includes(originalMatch)) {
|
|
446
|
-
detectedWords.push(originalMatch);
|
|
447
|
-
positions.push({
|
|
448
|
-
word: originalMatch,
|
|
449
|
-
start: match.index,
|
|
450
|
-
end: match.index + match[0].length,
|
|
451
|
-
});
|
|
452
|
-
}
|
|
453
|
-
}
|
|
454
|
-
}
|
|
455
|
-
}
|
|
456
|
-
catch (error) {
|
|
457
|
-
// Fallback to simple string search
|
|
458
|
-
if (leetNormalizedText.includes(profanity)) {
|
|
459
|
-
const index = leetNormalizedText.indexOf(profanity);
|
|
460
|
-
const originalMatch = normalizedText.substring(index, index + profanity.length);
|
|
461
|
-
if (!detectedWords.includes(originalMatch)) {
|
|
462
|
-
detectedWords.push(originalMatch);
|
|
463
|
-
positions.push({
|
|
464
|
-
word: originalMatch,
|
|
465
|
-
start: index,
|
|
466
|
-
end: index + profanity.length,
|
|
467
|
-
});
|
|
468
|
-
}
|
|
469
|
-
}
|
|
470
431
|
}
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
// Partial word detection (if enabled)
|
|
474
|
-
if (this.detectPartialWords) {
|
|
475
|
-
for (const profanity of this.profanitySet) {
|
|
476
|
-
if (this.whitelistSet.has(profanity) || profanity.length < 4)
|
|
432
|
+
const matchedText = originalText.substring(start, end);
|
|
433
|
+
if (this.isWhitelistedMatch(match.word, matchedText)) {
|
|
477
434
|
continue;
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
start: index,
|
|
487
|
-
end: index + profanity.length,
|
|
488
|
-
});
|
|
489
|
-
}
|
|
435
|
+
}
|
|
436
|
+
if (this.hasWordBoundaries(originalText, start, end)) {
|
|
437
|
+
matches.push({
|
|
438
|
+
word: match.word,
|
|
439
|
+
start,
|
|
440
|
+
end,
|
|
441
|
+
originalWord: matchedText,
|
|
442
|
+
});
|
|
490
443
|
}
|
|
491
444
|
}
|
|
492
445
|
}
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
446
|
+
}
|
|
447
|
+
/**
|
|
448
|
+
* Generate cleaned text by replacing profane words.
|
|
449
|
+
* @param originalText - The original text.
|
|
450
|
+
* @param matches - Array of matches.
|
|
451
|
+
* @returns Cleaned text.
|
|
452
|
+
*/
|
|
453
|
+
generateCleanedText(originalText, matches) {
|
|
454
|
+
if (matches.length === 0)
|
|
455
|
+
return originalText;
|
|
456
|
+
let result = originalText;
|
|
457
|
+
const sortedMatches = [...this.deduplicateMatches(matches)].sort((a, b) => b.start - a.start);
|
|
458
|
+
for (const match of sortedMatches) {
|
|
459
|
+
const replacement = this.defaultPlaceholder.repeat(match.originalWord.length);
|
|
460
|
+
result =
|
|
461
|
+
result.substring(0, match.start) +
|
|
462
|
+
replacement +
|
|
463
|
+
result.substring(match.end);
|
|
507
464
|
}
|
|
508
|
-
|
|
509
|
-
return {
|
|
510
|
-
hasProfanity: detectedWords.length > 0,
|
|
511
|
-
detectedWords: [...new Set(detectedWords)],
|
|
512
|
-
cleanedText,
|
|
513
|
-
severity,
|
|
514
|
-
positions,
|
|
515
|
-
};
|
|
465
|
+
return result;
|
|
516
466
|
}
|
|
517
467
|
/**
|
|
518
|
-
* Check if a string contains profanity
|
|
519
|
-
* @param
|
|
520
|
-
* @returns
|
|
468
|
+
* Check if a string contains profanity.
|
|
469
|
+
* @param text - The text to check.
|
|
470
|
+
* @returns True if profanity is found, false otherwise.
|
|
521
471
|
*/
|
|
522
|
-
check(
|
|
523
|
-
return this.detect(
|
|
472
|
+
check(text) {
|
|
473
|
+
return this.detect(text).hasProfanity;
|
|
524
474
|
}
|
|
525
475
|
/**
|
|
526
|
-
* Clean
|
|
527
|
-
* @param
|
|
528
|
-
* @param placeholder -
|
|
529
|
-
* @returns
|
|
476
|
+
* Clean text with a custom placeholder.
|
|
477
|
+
* @param text - The text to clean.
|
|
478
|
+
* @param placeholder - The placeholder to use.
|
|
479
|
+
* @returns Cleaned text.
|
|
530
480
|
*/
|
|
531
|
-
clean(
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
const placeholderChar = placeholder || this.defaultPlaceholder;
|
|
535
|
-
const detection = this.detect(string);
|
|
536
|
-
// If detect() already provided cleanedText and no custom placeholder, use it
|
|
537
|
-
if (!placeholder && detection.cleanedText !== string) {
|
|
481
|
+
clean(text, placeholder) {
|
|
482
|
+
const detection = this.detect(text);
|
|
483
|
+
if (!placeholder || placeholder === this.defaultPlaceholder) {
|
|
538
484
|
return detection.cleanedText;
|
|
539
485
|
}
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
486
|
+
let result = text;
|
|
487
|
+
const sortedPositions = [
|
|
488
|
+
...this.deduplicateMatches(detection.positions.map((p) => ({
|
|
489
|
+
word: p.word,
|
|
490
|
+
start: p.start,
|
|
491
|
+
end: p.end,
|
|
492
|
+
originalWord: text.substring(p.start, p.end),
|
|
493
|
+
}))),
|
|
494
|
+
].sort((a, b) => b.start - a.start);
|
|
543
495
|
for (const pos of sortedPositions) {
|
|
544
|
-
const originalWord =
|
|
545
|
-
const replacement =
|
|
496
|
+
const originalWord = text.substring(pos.start, pos.end);
|
|
497
|
+
const replacement = placeholder.repeat(originalWord.length);
|
|
546
498
|
result =
|
|
547
499
|
result.substring(0, pos.start) +
|
|
548
500
|
replacement +
|
|
@@ -551,113 +503,243 @@ export class AllProfanity {
|
|
|
551
503
|
return result;
|
|
552
504
|
}
|
|
553
505
|
/**
|
|
554
|
-
* Clean
|
|
555
|
-
* @param
|
|
556
|
-
* @param placeholder - The placeholder to use
|
|
557
|
-
* @returns
|
|
506
|
+
* Clean text by replacing each profane word with a single placeholder (word-level).
|
|
507
|
+
* @param text - The text to clean.
|
|
508
|
+
* @param placeholder - The placeholder to use.
|
|
509
|
+
* @returns Word-level cleaned text.
|
|
558
510
|
*/
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
.
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
511
|
+
cleanWithPlaceholder(text, placeholder = "***") {
|
|
512
|
+
const detection = this.detect(text);
|
|
513
|
+
if (detection.positions.length === 0)
|
|
514
|
+
return text;
|
|
515
|
+
let result = text;
|
|
516
|
+
const sortedPositions = [
|
|
517
|
+
...this.deduplicateMatches(detection.positions.map((p) => ({
|
|
518
|
+
word: p.word,
|
|
519
|
+
start: p.start,
|
|
520
|
+
end: p.end,
|
|
521
|
+
originalWord: text.substring(p.start, p.end),
|
|
522
|
+
}))),
|
|
523
|
+
].sort((a, b) => b.start - a.start);
|
|
524
|
+
for (const pos of sortedPositions) {
|
|
525
|
+
if (!this.isWholeWord(result, pos.start, pos.end))
|
|
526
|
+
continue;
|
|
527
|
+
result =
|
|
528
|
+
result.substring(0, pos.start) +
|
|
529
|
+
placeholder +
|
|
530
|
+
result.substring(pos.end);
|
|
531
|
+
}
|
|
532
|
+
return result;
|
|
575
533
|
}
|
|
576
534
|
/**
|
|
577
|
-
*
|
|
578
|
-
* @
|
|
535
|
+
* Add word(s) to the profanity filter.
|
|
536
|
+
* @param word - Word or array of words to add.
|
|
579
537
|
*/
|
|
580
|
-
|
|
581
|
-
|
|
538
|
+
add(word) {
|
|
539
|
+
const words = Array.isArray(word) ? word : [word];
|
|
540
|
+
const validatedWords = validateStringArray(words, "words to add");
|
|
541
|
+
for (const w of validatedWords) {
|
|
542
|
+
this.dynamicWords.add(w);
|
|
543
|
+
this.addWordToTrie(w);
|
|
544
|
+
}
|
|
582
545
|
}
|
|
583
546
|
/**
|
|
584
|
-
*
|
|
585
|
-
* @param word -
|
|
547
|
+
* Remove word(s) from the profanity filter.
|
|
548
|
+
* @param word - Word or array of words to remove.
|
|
586
549
|
*/
|
|
587
|
-
|
|
550
|
+
remove(word) {
|
|
588
551
|
const words = Array.isArray(word) ? word : [word];
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
continue;
|
|
552
|
+
const validatedWords = validateStringArray(words, "words to remove");
|
|
553
|
+
for (const w of validatedWords) {
|
|
592
554
|
const normalizedWord = this.caseSensitive ? w : w.toLowerCase();
|
|
593
|
-
this.
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
555
|
+
this.profanityTrie.removeWord(normalizedWord);
|
|
556
|
+
this.dynamicWords.delete(w);
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
/**
|
|
560
|
+
* Add words to the whitelist.
|
|
561
|
+
* @param words - Words to whitelist.
|
|
562
|
+
*/
|
|
563
|
+
addToWhitelist(words) {
|
|
564
|
+
const validatedWords = validateStringArray(words, "whitelist words");
|
|
565
|
+
for (const word of validatedWords) {
|
|
566
|
+
const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
|
|
567
|
+
this.whitelistSet.add(normalizedWord);
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
/**
|
|
571
|
+
* Remove words from the whitelist.
|
|
572
|
+
* @param words - Words to remove from whitelist.
|
|
573
|
+
*/
|
|
574
|
+
removeFromWhitelist(words) {
|
|
575
|
+
const validatedWords = validateStringArray(words, "whitelist words");
|
|
576
|
+
for (const word of validatedWords) {
|
|
577
|
+
const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
|
|
578
|
+
this.whitelistSet.delete(normalizedWord);
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
/**
|
|
582
|
+
* Check if a word is whitelisted.
|
|
583
|
+
* @param word - The word to check.
|
|
584
|
+
* @returns True if whitelisted, false otherwise.
|
|
585
|
+
*/
|
|
586
|
+
isWhitelisted(word) {
|
|
587
|
+
const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
|
|
588
|
+
return this.whitelistSet.has(normalizedWord);
|
|
589
|
+
}
|
|
590
|
+
/**
|
|
591
|
+
* Load a built-in language dictionary.
|
|
592
|
+
* @param language - The language key.
|
|
593
|
+
* @returns True if loaded, false otherwise.
|
|
594
|
+
*/
|
|
595
|
+
loadLanguage(language) {
|
|
596
|
+
if (!language || typeof language !== "string") {
|
|
597
|
+
this.logger.warn(`Invalid language parameter: ${language}`);
|
|
598
|
+
return false;
|
|
599
|
+
}
|
|
600
|
+
const langKey = language.toLowerCase().trim();
|
|
601
|
+
if (this.loadedLanguages.has(langKey)) {
|
|
602
|
+
return true;
|
|
603
|
+
}
|
|
604
|
+
const words = this.availableLanguages[langKey];
|
|
605
|
+
if (!words || words.length === 0) {
|
|
606
|
+
this.logger.warn(`Language '${language}' not found or empty`);
|
|
607
|
+
return false;
|
|
608
|
+
}
|
|
609
|
+
try {
|
|
610
|
+
let addedCount = 0;
|
|
611
|
+
for (const word of words) {
|
|
612
|
+
if (this.addWordToTrie(word)) {
|
|
613
|
+
addedCount++;
|
|
614
|
+
}
|
|
603
615
|
}
|
|
616
|
+
this.loadedLanguages.add(langKey);
|
|
617
|
+
this.logger.info(`Loaded ${addedCount} words from ${language} dictionary`);
|
|
618
|
+
return true;
|
|
619
|
+
}
|
|
620
|
+
catch (error) {
|
|
621
|
+
this.logger.error(`Failed to load language ${language}: ${error}`);
|
|
622
|
+
return false;
|
|
604
623
|
}
|
|
605
624
|
}
|
|
606
625
|
/**
|
|
607
|
-
*
|
|
608
|
-
* @param
|
|
626
|
+
* Load multiple language dictionaries.
|
|
627
|
+
* @param languages - Array of languages to load.
|
|
628
|
+
* @returns Number of successfully loaded languages.
|
|
609
629
|
*/
|
|
610
|
-
|
|
611
|
-
const
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
630
|
+
loadLanguages(languages) {
|
|
631
|
+
const validatedLanguages = validateStringArray(languages, "languages");
|
|
632
|
+
return validatedLanguages.reduce((count, lang) => {
|
|
633
|
+
return this.loadLanguage(lang) ? count + 1 : count;
|
|
634
|
+
}, 0);
|
|
635
|
+
}
|
|
636
|
+
/**
|
|
637
|
+
* Load all supported Indian languages.
|
|
638
|
+
* @returns Number of loaded Indian languages.
|
|
639
|
+
*/
|
|
640
|
+
loadIndianLanguages() {
|
|
641
|
+
const indianLanguages = ["hindi", "bengali", "tamil", "telugu"];
|
|
642
|
+
return this.loadLanguages(indianLanguages);
|
|
643
|
+
}
|
|
644
|
+
/**
|
|
645
|
+
* Load a custom dictionary.
|
|
646
|
+
* @param name - Name of the dictionary.
|
|
647
|
+
* @param words - Words to add.
|
|
648
|
+
*/
|
|
649
|
+
loadCustomDictionary(name, words) {
|
|
650
|
+
validateString(name, "dictionary name");
|
|
651
|
+
const validatedWords = validateStringArray(words, "custom dictionary words");
|
|
652
|
+
if (validatedWords.length === 0) {
|
|
653
|
+
this.logger.warn(`Custom dictionary '${name}' contains no valid words`);
|
|
654
|
+
return;
|
|
655
|
+
}
|
|
656
|
+
try {
|
|
657
|
+
let addedCount = 0;
|
|
658
|
+
for (const word of validatedWords) {
|
|
659
|
+
if (this.addWordToTrie(word)) {
|
|
660
|
+
addedCount++;
|
|
661
|
+
}
|
|
621
662
|
}
|
|
663
|
+
this.availableLanguages[name.toLowerCase()] = validatedWords;
|
|
664
|
+
this.loadedLanguages.add(name.toLowerCase());
|
|
665
|
+
this.logger.info(`Loaded ${addedCount} words from custom dictionary '${name}'`);
|
|
622
666
|
}
|
|
667
|
+
catch (error) {
|
|
668
|
+
this.logger.error(`Failed to load custom dictionary ${name}: ${error}`);
|
|
669
|
+
}
|
|
670
|
+
}
|
|
671
|
+
/**
|
|
672
|
+
* Add a single word to the trie.
|
|
673
|
+
* @param word - The word to add.
|
|
674
|
+
* @returns True if added, false otherwise.
|
|
675
|
+
*/
|
|
676
|
+
addWordToTrie(word) {
|
|
677
|
+
if (!word || typeof word !== "string" || word.trim().length === 0) {
|
|
678
|
+
return false;
|
|
679
|
+
}
|
|
680
|
+
const normalizedWord = this.caseSensitive
|
|
681
|
+
? word.trim()
|
|
682
|
+
: word.trim().toLowerCase();
|
|
683
|
+
if (this.isWhitelisted(normalizedWord)) {
|
|
684
|
+
return false;
|
|
685
|
+
}
|
|
686
|
+
this.profanityTrie.addWord(normalizedWord);
|
|
687
|
+
return true;
|
|
688
|
+
}
|
|
689
|
+
/**
|
|
690
|
+
* Calculate severity from matches.
|
|
691
|
+
* @param matches - Array of matches.
|
|
692
|
+
* @returns Severity level.
|
|
693
|
+
*/
|
|
694
|
+
calculateSeverity(matches) {
|
|
695
|
+
if (matches.length === 0)
|
|
696
|
+
return ProfanitySeverity.MILD;
|
|
697
|
+
const uniqueWords = new Set(matches.map((m) => m.word)).size;
|
|
698
|
+
const totalMatches = matches.length;
|
|
699
|
+
if (totalMatches >= 5 || uniqueWords >= 4)
|
|
700
|
+
return ProfanitySeverity.EXTREME;
|
|
701
|
+
if (totalMatches >= 3 || uniqueWords >= 3)
|
|
702
|
+
return ProfanitySeverity.SEVERE;
|
|
703
|
+
if (totalMatches >= 2 || uniqueWords >= 2)
|
|
704
|
+
return ProfanitySeverity.MODERATE;
|
|
705
|
+
return ProfanitySeverity.MILD;
|
|
623
706
|
}
|
|
624
707
|
/**
|
|
625
|
-
* Clear
|
|
708
|
+
* Clear all loaded dictionaries and dynamic words.
|
|
626
709
|
*/
|
|
627
710
|
clearList() {
|
|
628
|
-
this.
|
|
629
|
-
this.normalizedProfanityMap.clear();
|
|
711
|
+
this.profanityTrie.clear();
|
|
630
712
|
this.loadedLanguages.clear();
|
|
713
|
+
this.dynamicWords.clear();
|
|
631
714
|
}
|
|
632
715
|
/**
|
|
633
|
-
*
|
|
634
|
-
* @param placeholder -
|
|
716
|
+
* Set the placeholder character for filtered words.
|
|
717
|
+
* @param placeholder - The placeholder character.
|
|
635
718
|
*/
|
|
636
719
|
setPlaceholder(placeholder) {
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
}
|
|
641
|
-
else {
|
|
642
|
-
this.defaultPlaceholder = placeholder;
|
|
720
|
+
validateString(placeholder, "placeholder");
|
|
721
|
+
if (placeholder.length === 0) {
|
|
722
|
+
throw new Error("Placeholder cannot be empty");
|
|
643
723
|
}
|
|
724
|
+
this.defaultPlaceholder = placeholder.charAt(0);
|
|
644
725
|
}
|
|
645
726
|
/**
|
|
646
|
-
* Get the list of
|
|
647
|
-
* @returns
|
|
727
|
+
* Get the list of loaded languages.
|
|
728
|
+
* @returns Array of loaded language keys.
|
|
648
729
|
*/
|
|
649
730
|
getLoadedLanguages() {
|
|
650
731
|
return Array.from(this.loadedLanguages);
|
|
651
732
|
}
|
|
652
733
|
/**
|
|
653
|
-
* Get the list of available
|
|
654
|
-
* @returns
|
|
734
|
+
* Get the list of available built-in languages.
|
|
735
|
+
* @returns Array of available language keys.
|
|
655
736
|
*/
|
|
656
737
|
getAvailableLanguages() {
|
|
657
738
|
return Object.keys(this.availableLanguages);
|
|
658
739
|
}
|
|
659
740
|
/**
|
|
660
|
-
* Get current configuration
|
|
741
|
+
* Get the current configuration of the profanity filter.
|
|
742
|
+
* @returns Partial configuration object.
|
|
661
743
|
*/
|
|
662
744
|
getConfig() {
|
|
663
745
|
return {
|
|
@@ -671,17 +753,36 @@ export class AllProfanity {
|
|
|
671
753
|
};
|
|
672
754
|
}
|
|
673
755
|
/**
|
|
674
|
-
*
|
|
756
|
+
* Rebuild the profanity trie from loaded dictionaries and dynamic words.
|
|
757
|
+
*/
|
|
758
|
+
rebuildTrie() {
|
|
759
|
+
this.profanityTrie.clear();
|
|
760
|
+
for (const lang of this.loadedLanguages) {
|
|
761
|
+
const words = this.availableLanguages[lang] || [];
|
|
762
|
+
for (const word of words) {
|
|
763
|
+
this.addWordToTrie(word);
|
|
764
|
+
}
|
|
765
|
+
}
|
|
766
|
+
for (const word of this.dynamicWords) {
|
|
767
|
+
this.addWordToTrie(word);
|
|
768
|
+
}
|
|
769
|
+
}
|
|
770
|
+
/**
|
|
771
|
+
* Update configuration options for the profanity filter.
|
|
772
|
+
* @param options - Partial configuration object.
|
|
675
773
|
*/
|
|
676
774
|
updateConfig(options) {
|
|
775
|
+
let rebuildNeeded = false;
|
|
677
776
|
if (options.defaultPlaceholder !== undefined) {
|
|
678
777
|
this.setPlaceholder(options.defaultPlaceholder);
|
|
679
778
|
}
|
|
680
779
|
if (options.enableLeetSpeak !== undefined) {
|
|
681
780
|
this.enableLeetSpeak = options.enableLeetSpeak;
|
|
682
781
|
}
|
|
683
|
-
if (options.caseSensitive !== undefined
|
|
782
|
+
if (options.caseSensitive !== undefined &&
|
|
783
|
+
options.caseSensitive !== this.caseSensitive) {
|
|
684
784
|
this.caseSensitive = options.caseSensitive;
|
|
785
|
+
rebuildNeeded = true;
|
|
685
786
|
}
|
|
686
787
|
if (options.strictMode !== undefined) {
|
|
687
788
|
this.strictMode = options.strictMode;
|
|
@@ -692,9 +793,14 @@ export class AllProfanity {
|
|
|
692
793
|
if (options.whitelistWords) {
|
|
693
794
|
this.addToWhitelist(options.whitelistWords);
|
|
694
795
|
}
|
|
796
|
+
if (rebuildNeeded) {
|
|
797
|
+
this.rebuildTrie();
|
|
798
|
+
}
|
|
695
799
|
}
|
|
696
800
|
}
|
|
697
|
-
|
|
801
|
+
/**
|
|
802
|
+
* Singleton instance of AllProfanity with default configuration.
|
|
803
|
+
*/
|
|
698
804
|
const allProfanity = new AllProfanity();
|
|
699
805
|
export default allProfanity;
|
|
700
806
|
//# sourceMappingURL=index.js.map
|